@inproceedings{82d55e4696a9412295b526bd43e14634,
title = "Exploiting Knowledge Graph to Improve Text-based Prediction",
abstract = "As a special kind of »big data,» text data can be regarded as data reported by human sensors. Since humans are far more intelligent than physical sensors, text data contains useful information and knowledge about the real world, making it possible to make predictions about real-world phenomena based on text. As all application domains involve humans, text-based prediction has widespread applications, especially for optimization of decision making. While the problem of text-based prediction resembles text classification when formulated as a supervised learning problem, it is more challenging because the variable to be predicted may not be directly derivable from the text and thus there is a semantic gap between the target variable and the surface features that are often used for representing text data in conventional approaches. In this paper, we propose to bridge this gap by using knowledge graph to construct more effective features for text representation. We propose a two-step filtering algorithm to enhance such a knowledge-aware text representation for a family of entity-centric text regression tasks where the response variable can be treated as an attribute of a group of central entities. We evaluate the proposed algorithm by using two revenue prediction tasks based on reviews. The results show that the proposed algorithm can effectively leverage knowledge graphs to construct interpretable features, leading to significant improvement of the prediction accuracy over traditional features.",
keywords = "Knowledge graph, text representation, text-based prediction",
author = "Shan Jiang and Chengxiang Zhai and Qiaozhu Mei",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 2018 IEEE International Conference on Big Data, Big Data 2018 ; Conference date: 10-12-2018 Through 13-12-2018",
year = "2018",
month = jul,
day = "2",
doi = "10.1109/BigData.2018.8622123",
language = "English (US)",
series = "Proceedings - 2018 IEEE International Conference on Big Data, Big Data 2018",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1407--1416",
editor = "Naoki Abe and Huan Liu and Calton Pu and Xiaohua Hu and Nesreen Ahmed and Mu Qiao and Yang Song and Donald Kossmann and Bing Liu and Kisung Lee and Jiliang Tang and Jingrui He and Jeffrey Saltz",
booktitle = "Proceedings - 2018 IEEE International Conference on Big Data, Big Data 2018",
address = "United States",
}