@inproceedings{f944cb3117e846dea9b4f65ab720f373,
title = "Axiomatic analysis of smoothing methods in language models for Pseudo-Relevance Feedback",
abstract = "Pseudo-Relevance Feedback (PRF) is an important general technique for improving retrieval effectiveness without requiring any user effort. Several state-of-the-art PRF models are based on the language modeling approach where a query language model is learned based on feedback documents. In all these models, feedback documents are represented with unigram language models smoothed with a collection language model. While collection language model-based smoothing has proven both effective and necessary in using language models for retrieval, we use axiomatic analysis to show that this smoothing scheme inherently causes the feedback model to favor frequent terms and thus violates the IDF constraint needed to ensure selection of discriminative feedback terms. To address this problem, we propose replacing collection language model-based smoothing in the feedback stage with additive smoothing, which is analytically shown to select more discriminative terms. Empirical evaluation further confirms that additive smoothing indeed significantly outperforms collection-based smoothing methods in multiple language model-based PRF models.",
keywords = "Divergence minimization model, Geometric relevance model, Pseudo relevance feedback, Relevance model, Smoothing",
author = "Hussein Hazimeh and Zhai, {Cheng Xiang}",
note = "Publisher Copyright: {\textcopyright} 2015 ACM.; 5th ACM SIGIR International Conference on the Theory of Information Retrieval, ICTIR 2015 ; Conference date: 27-09-2015 Through 30-09-2015",
year = "2015",
month = sep,
day = "27",
doi = "10.1145/2808194.2809471",
language = "English (US)",
series = "ICTIR 2015 - Proceedings of the 2015 ACM SIGIR International Conference on the Theory of Information Retrieval",
publisher = "Association for Computing Machinery",
pages = "141--150",
booktitle = "ICTIR 2015 - Proceedings of the 2015 ACM SIGIR International Conference on the Theory of Information Retrieval",
address = "United States",
}