@inproceedings{55c98815accf4e48a83a3cf6e83f126f,
title = "Adaptive term frequency normalization for BM25",
abstract = "A key component of BM25 contributing to its success is its sub linear term frequency (TF) normalization formula. The scale and shape of this TF normalization component is controlled by a parameter k1, which is generally set to a term-independent constant. We hypothesize and show empirically that in order to optimize retrieval performance, this parameter should be set in a term-specific way. Following this intuition, we propose an information gain measure to directly estimate the contributions of repeated term occurrences, which is then exploited to fit the BM25 function to predict a term-specific k1. Our experiment results show that the proposed approach, without needing any training data, can efficiently and automatically estimate a term-specific k1, and is more effective and robust than the standard BM25.",
keywords = "adaptation, bm25, information gain, term frequency",
author = "Yuanhua Lv and Zhai, {Cheng Xiang}",
year = "2011",
doi = "10.1145/2063576.2063871",
language = "English (US)",
isbn = "9781450307178",
series = "International Conference on Information and Knowledge Management, Proceedings",
pages = "1985--1988",
booktitle = "CIKM'11 - Proceedings of the 2011 ACM International Conference on Information and Knowledge Management",
note = "20th ACM Conference on Information and Knowledge Management, CIKM'11 ; Conference date: 24-10-2011 Through 28-10-2011",
}