@inproceedings{03a1cf26e9f1406d9ac4936c04b453f0,
title = "Learning sufficient queries for entity filtering",
abstract = "Entity-centric document filtering is the task of analyzing a time-ordered stream of documents and emitting those that are relevant to a specified set of entities (e.g., people, places, organizations). This task is exemplified by the TREC Knowledge Base Acceleration (KBA) track and has broad applicability in other modern IR settings. In this paper, we present a simple yet effective approach based on learning high-quality Boolean queries that can be applied deterministically during filtering. We call these Boolean statements sufficient queries. We argue that using deterministic queries for entity-centric filtering can reduce confounding factors seen in more familiar {"}score-then- threshold{"} filtering methods. Experiments on two standard datasets show significant improvements over state-of-the-art baseline models.",
keywords = "Boolean models, Document filtering, Entity retrieval",
author = "Miles Efron and Craig Willis and Garrick Sherman",
note = "Copyright: Copyright 2014 Elsevier B.V., All rights reserved.; 37th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2014 ; Conference date: 06-07-2014 Through 11-07-2014",
year = "2014",
doi = "10.1145/2600428.2609517",
language = "English (US)",
isbn = "9781450322591",
series = "SIGIR 2014 - Proceedings of the 37th International ACM SIGIR Conference on Research and Development in Information Retrieval",
publisher = "Association for Computing Machinery",
pages = "1091--1094",
booktitle = "SIGIR 2014 - Proceedings of the 37th International ACM SIGIR Conference on Research and Development in Information Retrieval",
}