@inproceedings{6363e9bb9e084684a4b8d28c2120a057,
title = "SyntacticDiff: Operator-based transformation for comparative text mining",
abstract = "We describe SyntacticDiff, a novel, general, and efficient edit-based method for transforming sequences of words given a reference text collection. These transformations can be used directly or can be employed as features to represent text data in a wide variety of text mining applications. As case studies, we apply SyntacticDiff to three quite different tasks, including grammatical error correction, student essay clustering and analysis, and native language identification, showing its benefit in each case. SyntacticDiff is completely general and can thus be potentially applied to any text data in any natural language. It is highly efficient, customizable, and able to capture syntactic differences from a reference text collection at the sentence, document, and subcollection levels. This enables both a rich translation method and feature representation for many text mining tasks that deal with word usage and syntax beyond bag-of-words.",
keywords = "Comparative Text Mining, Monolingual Translation. Corpus Summarization, Text Categorization",
author = "Sean Massung and Chengxiang Zhai",
note = "Funding Information: This material is supported in part by the National Science Foundation under grants CNS-1513939 and CNS-1027965 Publisher Copyright: {\textcopyright} 2015 IEEE.; 3rd IEEE International Conference on Big Data, IEEE Big Data 2015 ; Conference date: 29-10-2015 Through 01-11-2015",
year = "2015",
month = dec,
day = "22",
doi = "10.1109/BigData.2015.7363801",
language = "English (US)",
series = "Proceedings - 2015 IEEE International Conference on Big Data, IEEE Big Data 2015",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "571--580",
editor = "Feng Luo and Kemafor Ogan and Zaki, {Mohammed J.} and Laura Haas and Ooi, {Beng Chin} and Vipin Kumar and Sudarsan Rachuri and Saumyadipta Pyne and Howard Ho and Xiaohua Hu and Shipeng Yu and Hsiao, {Morris Hui-I} and Jian Li",
booktitle = "Proceedings - 2015 IEEE International Conference on Big Data, IEEE Big Data 2015",
address = "United States",
}