@article{51335c56b8be48a389b56acacb4b1a5f,
title = "Uncertainty reduction for knowledge discovery and information extraction on the world wide web",
abstract = "In this paper, we give an overview of knowledge discovery (KD) and information extraction (IE) techniques on the World Wide Web (WWW). We intend to answer the following questions: What kind of additional uncertainty challenges are introduced by the WWW setting to basic KD and IE techniques? What are the fundamental techniques that can be used to reduce such uncertainty and achieve reasonable KD and IE performance on the WWW? What is the impact of each novel method? What types of interactions can be conducted between these techniques and information networks to make them benefit from each other? In what way can we utilize the results in more interesting applications? What are the remaining challenges and what are the possible ways to address these challenges? We hope this can provide a road map to advance KD and IE on the WWW to a higher level of performance, portability and utilization.",
keywords = "natural language processing, text analysis, text mining",
author = "Heng Ji and Hongbo Deng and Jiawei Han",
note = "Funding Information: Dr. Ji received the Google Research Award in 2009, Faculty Early Career Development (CAREER) Award from the U.S. National Science Foundation (NSF) in 2010, CUNY Chancellor{\textquoteright}s BSalute to Scholar[ award in 2011, and Sloan Junior Faculty Award in 2012. She served as the coordinator of the NIST TAC Knowledge Base Population task in 2010 and 2011, the Information Extraction area chair for the North American Chapter of Association for Computational LinguisticsVHuman Language Technologies, and the coleader of the cross-genre information fusion, inference and transfer learning task of the U.S. Information Network Academic Research Center supported by the U.S. Army Research Lab in 2011–2012. Funding Information: Manuscript received June 11, 2011; revised December 10, 2011; accepted March 4, 2012. Date of publication June 5, 2012; date of current version August 16, 2012. The work was supported in part by the U.S. National Science Foundation grants IIS-0953149, IIS-1144111, IIS-0905215, CNS-0931975, the U.S. Army Research Laboratory under Cooperative Agreement No. W911NF-09-2-0053 (NS-CTA), the U.S. DARPA Broad Operational Language Translations program, the U.S. Air Force Office of Scientific Research MURI award FA9550-08-1-0265. The views and conclusions contained in this document are those of the authors and should not be interpreted as representing the official policies, either expressed or implied, of the U.S. Government. The U.S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation here on. H. Ji is with the Department of Computer Science, City University of New York, New York, NY 10031 USA (e-mail: hengji@cs.qc.cuny.edu). H. Deng and J. Han are with the Department of Computer Science, University of Illinois at Urbana-Champaign, Urbana, IL 61801 USA (e-mail: hbdeng@uiuc.edu; hanj@uiuc.edu).",
year = "2012",
doi = "10.1109/JPROC.2012.2190489",
language = "English (US)",
volume = "100",
pages = "2658--2674",
journal = "Proceedings of the IEEE",
issn = "0018-9219",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
number = "9",
}