@inproceedings{80bcea1a50a1411da21f0f1c0fa00c6c,
title = "Data Selection in Semi-supervised Learning for Name Tagging",
abstract = "We present two semi-supervised learning techniques to improve a state-of-The-Art multi-lingual name tagger. For English and Chinese, the overall system obtains 1.7\%-2.1\% improvement in F-measure, representing a 13.5\%-17.4\% relative reduction in the spurious, missing, and incorrect tags. We also conclude that simply relying upon large corpora is not in itself sufficient: we must pay attention to unlabeled data selection too. We describe effective measures to automatically select documents and sentences.",
author = "Heng Ji and Ralph Grishman",
note = "Publisher Copyright: {\textcopyright} 2006 Association for Computational Linguistics.; 2006 Workshop on Information Extraction Beyond The Document, IE 2006 ; Conference date: 22-07-2006",
year = "2006",
language = "English (US)",
series = "COLING ACL 2006 - Information Extraction Beyond The Document, Proceedings of the Workshop",
publisher = "Association for Computational Linguistics (ACL)",
pages = "48--55",
editor = "Califf, \{Mary Elaine\} and Greenwood, \{Mark A.\} and Mark Stevenson and Roman Yangarber",
booktitle = "COLING ACL 2006 - Information Extraction Beyond The Document, Proceedings of the Workshop",
}