@inproceedings{0bd11c8f77b54113bb9673a1e2f2b73b,
title = "A hybrid hierarchical model for multi-document summarization",
abstract = "Scoring sentences in documents given abstract summaries created by humans is important in extractive multi-document summarization. In this paper, we formulate extractive summarization as a two step learning problem building a generative model for pattern discovery and a regression model for inference. We calculate scores for sentences in document clusters based on their latent characteristics using a hierarchical topic model. Then, using these scores, we train a regression model based on the lexical and structural characteristics of the sentences, and use the model to score sentences of new documents to form a summary. Our system advances current state-of-the-art improving ROUGE scores by ∼7%. Generated summaries are less redundant and more coherent based upon manual quality evaluations.",
author = "Asli Celikyilmaz and Dilek Hakkani-Tur",
note = "Publisher Copyright: {\textcopyright} 2010 Association for Computational Linguistics.; 48th Annual Meeting of the Association for Computational Linguistics, ACL 2010 ; Conference date: 11-07-2010 Through 16-07-2010",
year = "2010",
language = "English (US)",
series = "Proceedings of the Annual Meeting of the Association for Computational Linguistics",
publisher = "Association for Computational Linguistics (ACL)",
pages = "815--824",
editor = "Jan Hajic and Sandra Carberry and Stephen Clark",
booktitle = "ACL 2010 - 48th Annual Meeting of the Association for Computational Linguistics, Conference Proceedings",
address = "United States",
}