@inproceedings{b8a820ca4adc41a687480d8b42a4e4ef,
title = "R-Tuning: Instructing Large Language Models to Say {\textquoteleft}I Don{\textquoteright}t Know{\textquoteright}",
abstract = "Large language models (LLMs) have revolutionized numerous domains with their impressive performance but still face their challenges. A predominant issue is the propensity for these models to generate non-existent facts, a concern termed hallucination. Our research is motivated by the observation that previous instruction tuning methods force the model to complete a sentence no matter whether the model knows the knowledge or not. When the question is out of the parametric knowledge, it will try to make up something and fail to indicate when it lacks knowledge. In this paper, we present a new approach called Refusal-Aware Instruction Tuning (R-Tuning). This approach is formalized by first identifying the disparity in knowledge encompassed by pre-trained parameters compared to that of instruction tuning data. Then, we construct the refusal-aware data based on the knowledge intersection, to tune LLMs to refrain from responding to questions beyond its parametric knowledge. Experimental results demonstrate R-Tuning effectively improves a model{\textquoteright}s ability to answer known questions and refrain from answering unknown questions. Furthermore, when tested on out-of-domain datasets, the refusal ability was found to be a meta-skill that could be generalized to other tasks. Further analysis surprisingly finds that learning the uncertainty results in better calibration and an improved ability to estimate the uncertainty than uncertainty-based testing.",
author = "Hanning Zhang and Shizhe Diao and Yong Lin and Fung, {Yi R.} and Qing Lian and Xingyao Wang and Yangyi Chen and Heng Ji and Tong Zhang",
note = "We thank the anonymous reviewers for their valuable suggestions and comments. Shizhe Diao was supported by the Hong Kong Ph.D. Fellowship Scheme (HKPFS) and the Hong Kong University of Science and Technology Overseas Research Award. This research is partially supported by U.S. DARPA ITM Program No. FA8650-23-C-7316. The views and conclusions contained herein are those of the authors and should not be interpreted as necessarily representing the official policies, either expressed or implied, of DARPA, or the U.S. Government. The U.S. Government is authorized to reproduce and distribute reprints for governmental purposes notwithstanding any copyright annotation therein.; 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL 2024 ; Conference date: 16-06-2024 Through 21-06-2024",
year = "2024",
doi = "10.18653/v1/2024.naacl-long.394",
language = "English (US)",
series = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL 2024",
publisher = "Association for Computational Linguistics (ACL)",
pages = "7106--7132",
editor = "Kevin Duh and Helena Gomez and Steven Bethard",
booktitle = "Long Papers",
}