@inproceedings{4a92742130274eb996e4d318ed5133b7,
title = "C-PMI: Conditional Pointwise Mutual Information for Turn-level Dialogue Evaluation",
abstract = "Existing reference-free turn-level evaluation metrics for chatbots inadequately capture the interaction between the user and the system. Consequently, they often correlate poorly with human evaluations. To address this issue, we propose a novel model-agnostic approach that leverages Conditional Pointwise Mutual Information (C-PMI) to measure the turn-level interaction between the system and the user based on a given evaluation dimension. Experimental results on the widely used FED dialogue evaluation dataset demonstrate that our approach significantly improves the correlation with human judgment compared with existing evaluation systems. By replacing the negative log-likelihood-based scorer with our proposed C-PMI scorer, we achieve a relative 60.5% higher Spearman correlation on average for the FED evaluation metric. Our code is publicly available at https://github.com/renll/C-PMI.",
author = "Liliang Ren and Mankeerat Sidhu and Qi Zeng and Reddy, {Revanth Gangi} and Heng Ji and Zhai, {Cheng Xiang}",
note = "Publisher Copyright: {\textcopyright} 2023 Association for Computational Linguistics.; 3rd Workshop on Document-grounded Dialogue and Conversational Question Answering, DialDoc 2023, co-located with ACL 2023 ; Conference date: 13-07-2023",
year = "2023",
language = "English (US)",
series = "Proceedings of the Annual Meeting of the Association for Computational Linguistics",
publisher = "Association for Computational Linguistics (ACL)",
pages = "80--85",
editor = "Smaranda Muresan and Vivian Chen and Casey Kennington and David Vandyke and Nina Dethlefs and Koji Inoue and Erik Ekstedt and Stefan Ultes",
booktitle = "DialDoc 2023 - Proceedings of the 3rd DialDoc Workshop on Document-Grounded Dialogue and Conversational Question Answering, Proceedings of the Workshop",
}