@article{ae999e3d5a674afeb0917d7ebabc33fa,
title = "Giving shape to large digital libraries through exploratory data analysis",
abstract = "The emergence of large multi-institutional digital libraries has opened the door to aggregate-level examinations of the published word. Such large-scale analysis offers a new way to pursue traditional problems in the humanities and social sciences, using digital methods to ask routine questions of large corpora. However, inquiry into multiple centuries of books is constrained by the burdens of scale, where statistical inference is technically complex and limited by hurdles to access and flexibility. This work examines the role that exploratory data analysis and visualization tools may play in understanding large bibliographic datasets. We present one such tool, HathiTrust+Bookworm, which allows multifaceted exploration of the multimillion work HathiTrust Digital Library, and center it in the broader space of scholarly tools for exploratory data analysis.",
author = "Peter Organisciak and Schmidt, \{Benjamin M.\} and Downie, \{J. Stephen\}",
note = "Funding Information: The project was made possible by the National Endowment for the Humanities, award number HK‐50176‐14 (PI: J. Stephen Downie). Any views, findings, conclusions, or recommendations expressed in this article do not necessarily represent those of the National Endowment for the Humanities. Bookworm was initially developed at the Harvard Cultural Observatory under the direction of Erez Lieberman Aiden and Jean‐Baptiste Michel. Notable contributions to Bookworm have been made by Benjamin Schmidt, Martin Camacho, Billy Janitsch, Neva Cherniavsky, Erez Aiden, Matt Nicklay, JB Michel, Peter Organisciak, and Colleen Fallaw. Further funding and institutional support has been provided by the Harvard Cultural Observatory, the Digital Public Library of America, the HathiTrust Research Center, University of Illinois, Northeastern University, and Rice University. Additional thanks to Loretta Auvil for contributions to the project, Andy Lawder, Adrienne VandenBosch and Danielle Francisco Vasquez Albuquerque for assistance in preparing this manuscript, and Danielle Albers Szafir and Glen Worthey for notes and advice. The Bookworm GUI is available at http://bookworm.htrc.illinois.edu . The Bookworm Playground is available at https://bookworm.htrc.illinois.edu/app . The advanced interface is an instance of the Bookworm D3 library (Schmidt, 2015 ). It is available at https://bookworm.htrc.illinois.edu/advanced and its declarative grammar is documented by Schmidt ( 2015 ). BookwormPython is available at https://github.com/organisciak/BookwormPython . Bookworm R Library is available at https://github.com/bmschmidt/edinburgh . 1 Publisher Copyright: {\textcopyright} 2021 Association for Information Science and Technology.",
year = "2022",
month = feb,
doi = "10.1002/asi.24547",
language = "English (US)",
volume = "73",
pages = "317--332",
journal = "Journal of the Association for Information Science and Technology",
issn = "2330-1635",
publisher = "John Wiley \& Sons, Ltd.",
number = "2",
}