@inproceedings{a25c317790cd4dcb93d906a8f052ff8e,
title = "Pattern-enhanced Named Entity Recognition with Distant Supervision",
abstract = "Supervised deep learning methods have achieved state-of-the-art performance on the task of named entity recognition (NER). However, such methods suffer from high cost and low efficiency in training data annotation, leading to highly specialized NER models that cannot be easily adapted to new domains. Recently, distant supervision has been applied to replace human annotation, thanks to the fast development of domain-specific knowledge bases. However, the generated noisy labels pose significant challenges in learning effective neural models with distant supervision. We propose PatNER, a distantly supervised NER model that effectively deals with noisy distant supervision from domain-specific dictionaries. PatNER does not require human-annotated training data but only relies on unlabeled data and incomplete domain-specific dictionaries for distant supervision. It incorporates the distant labeling uncertainty into the neural model training to enhance distant supervision. We go beyond the traditional sequence labeling framework and propose a more effective fuzzy neural model using the tie-or-break tagging scheme for the NER task. Extensive experiments on three benchmark datasets in two domains demonstrate the power of PatNER. Case studies on two additional real-world datasets demonstrate that PatNER improves the distant NER performance in both entity boundary detection and entity type recognition. The results show a great promise in supporting high quality named entity recognition with domain-specific dictionaries on a wide variety of entity types.",
keywords = "distant supervision, named entity recognition, neural network, pattern mining",
author = "Xuan Wang and Yingjun Guan and Yu Zhang and Qi Li and Jiawei Han",
note = "ACKNOWLEDGMENT Research was sponsored in part by US DARPA KAIROS Program No. FA8750-19-2-1004 and SocialSim Program No. W911NF-17-C-0099, National Science Foundation IIS-19-56151, IIS-17-41317, IIS 17-04532, and IIS 16-18481, and DTRA HDTRA11810026. Any opinions, findings, and conclusions or recommendations expressed herein are those of the authors and should not be interpreted as necessarily representing the views, either expressed or implied, of DARPA or the U.S. Government. The U.S. Government is authorized to reproduce and distribute reprints for government purposes notwithstanding any copyright annotation hereon.; 8th IEEE International Conference on Big Data, Big Data 2020 ; Conference date: 10-12-2020 Through 13-12-2020",
year = "2020",
month = dec,
day = "10",
doi = "10.1109/BigData50022.2020.9378052",
language = "English (US)",
series = "Proceedings - 2020 IEEE International Conference on Big Data, Big Data 2020",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "818--827",
editor = "Xintao Wu and Chris Jermaine and Li Xiong and Hu, {Xiaohua Tony} and Olivera Kotevska and Siyuan Lu and Weijia Xu and Srinivas Aluru and Chengxiang Zhai and Eyhab Al-Masri and Zhiyuan Chen and Jeff Saltz",
booktitle = "Proceedings - 2020 IEEE International Conference on Big Data, Big Data 2020",
address = "United States",
}