@inproceedings{fa6a3fddfd824c4ca1584256e967f552,
title = "Distantly Supervised Biomedical Named Entity Recognition with Dictionary Expansion",
abstract = "State-of-the-art biomedical named entity recognition (BioNER) systems apply supervised machine learning models (i.e., relying on human effort for training data annotation) which are not easy to be generalized to new entity types and datasets. We propose a distantly supervised approach, AutoBioNER, that automatically recognizes biomedical entities from massive corpora with user-input dictionaries. AutoBioNER does not need any human annotated data. It relies on incomplete entity dictionaries to provide seeds for each entity type and performs a novel entity set expansion step for corpus-level new entity recognition and dictionary completion. The expanded dictionaries are used as distant supervision to train a neural model for BioNER. Experimental results show that AutoBioNER achieves the best performance among the methods that only use dictionaries with no additional human effort on BioNER benchmark datasets. It is also demonstrated that the dictionary expansion step plays an important role in the great performances.",
keywords = "biomedical named entity recognition, distantly supervised learning, entity expansion",
author = "Xuan Wang and Yu Zhang and Qi Li and Xiang Ren and Jingbo Shang and Jiawei Han",
note = "Funding Information: Research was sponsored in part by U.S. Army Research Lab. under Cooperative Agreement No. W911NF-09-2-0053 (NSCTA), DARPA under Agreements No. W911NF-17-C-0099 and FA8750-19-2-1004, National Science Foundation IIS 16-18481, IIS 17-04532, and IIS-17-41317, DTRA HDTRA11810026, and grant 1U54GM114838 awarded by NIGMS through funds provided by the trans-NIH Big Data to Knowledge (BD2K) initiative (www.bd2k.nih.gov). Any opinions, findings, and conclusions or recommendations expressed in this document are those of the author(s) and should not be interpreted as the views of any U.S. Government. The U.S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation hereon. Publisher Copyright: {\textcopyright} 2019 IEEE.; 2019 IEEE International Conference on Bioinformatics and Biomedicine, BIBM 2019 ; Conference date: 18-11-2019 Through 21-11-2019",
year = "2019",
month = nov,
doi = "10.1109/BIBM47256.2019.8983212",
language = "English (US)",
series = "Proceedings - 2019 IEEE International Conference on Bioinformatics and Biomedicine, BIBM 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "496--503",
editor = "Illhoi Yoo and Jinbo Bi and Hu, {Xiaohua Tony}",
booktitle = "Proceedings - 2019 IEEE International Conference on Bioinformatics and Biomedicine, BIBM 2019",
address = "United States",
}