@inproceedings{ab629b2975da4b2d8166826c2da8eda3,
title = "Harnessing web page directories for large-scale classification of tweets",
abstract = "Classification is paramount for an optimal processing of tweets, albeit performance of classifiers is hindered by the need of large sets of training data to encompass the diversity of con- tents one can find on Twitter. In this paper, we introduce an inexpensive way of labeling large sets of tweets, which can be easily regenerated or updated when needed. We use human-edited web page directories to infer categories from URLs contained in tweets. By experimenting with a large set of more than 5 million tweets categorized accordingly, we show that our proposed model for tweet classification can achieve 82\% in accuracy, performing only 12.2\% worse than for web page classification.",
keywords = "Classification, Distant, Large-scale, Tweets",
author = "Arkaitz Zubiaga and Heng Ji",
year = "2013",
doi = "10.1145/2487788.2487904",
language = "English (US)",
isbn = "9781450320382",
series = "WWW 2013 Companion - Proceedings of the 22nd International Conference on World Wide Web",
publisher = "Association for Computing Machinery",
pages = "225--226",
booktitle = "WWW 2013 Companion - Proceedings of the 22nd International Conference on World Wide Web",
address = "United States",
note = "WWW 2013 Companion - Proceedings of the 22nd International Conference on World Wide Web ; Conference date: 13-05-2013 Through 17-05-2013",
}