@inproceedings{d0d7271572034dcc9e2063ad24d28f13,
title = "Unsupervised query segmentation using clickthrough for information retrieval",
abstract = "Query segmentation is an important task toward understanding queries accurately, which is essential for improving search results. Existing segmentation models either use labeled data to predict the segmentation boundaries, for which the training data is expensive to collect, or employ unsupervised strategy based on a large text corpus, which might be inaccurate because of the lack of relevant information. In this paper, we propose a probabilistic model to exploit clickthrough data for query segmentation, where the model parameters are estimated via an efficient EM algorithm. We further study how to properly interpret the segmentation results and utilize them to improve retrieval accuracy. Specifically, we propose an integrated language model based on the standard bigram language model to exploit the probabilistic structure obtained through query segmentation. Experiment results on two datasets show that our segmentation model outperforms existing segmentation models. Furthermore, extensive experiments on a large retrieval dataset reveals that the results of query segmentation can be leveraged to improve retrieval relevance by using the proposed integrated language model.",
keywords = "Expectation maximization algorithm, Language modeling, QSLM, Query segmentation",
author = "Yanen Li and Hsu, {Bo June} and Zhai, {Cheng Xiang} and Kuansan Wang",
year = "2011",
doi = "10.1145/2009916.2009957",
language = "English (US)",
isbn = "9781450309349",
series = "SIGIR'11 - Proceedings of the 34th International ACM SIGIR Conference on Research and Development in Information Retrieval",
publisher = "Association for Computing Machinery",
pages = "285--294",
booktitle = "SIGIR'11 - Proceedings of the 34th International ACM SIGIR Conference on Research and Development in Information Retrieval",
address = "United States",
note = "34th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2011 ; Conference date: 24-07-2011 Through 28-07-2011",
}