@inproceedings{7336ef14928941939b72a6b8f5aaa584,
title = "When documents are very long, BM25 fails",
abstract = "We reveal that the Okapi BM25 retrieval function tends to overly penalize very long documents. To address this problem, we present a simple yet effective extension of BM25, namely BM25L, which {"}shifts{"} the term frequency normalization formula to boost scores of very long documents. Our experiments show that BM25L, with the same computation cost, is more effective and robust than the standard BM25.",
keywords = "BM25, BM25L, Term frequency, Very long documents",
author = "Yuanhua Lv and Zhai, {Cheng Xiang}",
year = "2011",
doi = "10.1145/2009916.2010070",
language = "English (US)",
isbn = "9781450309349",
series = "SIGIR'11 - Proceedings of the 34th International ACM SIGIR Conference on Research and Development in Information Retrieval",
publisher = "Association for Computing Machinery",
pages = "1103--1104",
booktitle = "SIGIR'11 - Proceedings of the 34th International ACM SIGIR Conference on Research and Development in Information Retrieval",
address = "United States",
note = "34th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2011 ; Conference date: 24-07-2011 Through 28-07-2011",
}