@inproceedings{da6db130686b4409adc25555be5655d7,
title = "Providing Pin-point Page-level Precision to 1 Trillion Tokens of Text for Workset Creation",
abstract = "We report on the work undertaken developing a web environment that allows users to search over 1 trillion tokens of text - down to the page-level - of the HathiTrust Part-of-Speech Extracted Features Dataset to help produce worksets for scholarly analysis. We present an extended example of the web environment in use, along with details about its implementation.",
keywords = "extract feature text analysis, very large digital libraries, workset creation",
author = "David Bainbridge and {Stephen Downie}, J. and Boris Capitanu",
note = "Publisher Copyright: {\textcopyright} 2018 Authors.; 18th ACM/IEEE Joint Conference on Digital Libraries, JCDL 2018 ; Conference date: 03-06-2018 Through 07-06-2018",
year = "2018",
month = may,
day = "23",
doi = "10.1145/3197026.3203875",
language = "English (US)",
series = "Proceedings of the ACM/IEEE Joint Conference on Digital Libraries",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "407--408",
booktitle = "JCDL 2018 - Proceedings of the 18th ACM/IEEE Joint Conference on Digital Libraries",
address = "United States",
}