@article{c827994ccb5e4931b04bfbd5b7884141,
title = "Uncovering Black Fantastic: Piloting A Word Feature Analysis and Machine Learning Approach for Genre Classification",
abstract = "Given the size of digital library collections and the inconsistencies in their genre-related bibliographic metadata, as digital libraries grow and their contents are opened for computational analysis, finding materials of interest becomes a major challenge. This challenge increases for sub-genres and other categories of text data that are less distinct from the whole. This project pilots machine learning methods and word feature analysis for identifying Black Fantastic genre texts within the HathiTrust Digital Library. These texts are sometimes referred to as “Afrofuturism” but more commonly today described as “Black Fantastic,” in which African Diaspora artists and creators engage with the intersections of race and technology in their works with a primary focus on world-building. Black Fantastic texts pose a challenge to genre classification, as they incorporate aspects of science fiction and fantasy with typical characteristics of African Diaspora-produced literature. This paper presents and reports on results from a pilot predictive modeling process to computationally identify Black Fantastic texts using curated word feature sets for each class of data: general English-language fiction, Black-authored fiction, and Black Fantastic fiction.",
keywords = "digital library, genre classification, hathitrust, machine learning, natural language processing",
author = "Parulian, {Nikolaus Nova} and Ryan Dubnicek and Glen Worthey and Evans, {Daniel J.} and Walsh, {John A.} and Downie, {J. Stephen}",
note = "We acknowledge the crucial role played in this research by Dr. Clarissa West‐White (Bethune Cookman University) and Dr. Seretha Williams (Augusta University), project leaders for “The Black Fantastic: Curated Vocabularies, Artifact Analysis and Identification” and our close collaborators on the analyses described here. Dr. West‐White{\textquoteright}s and Dr. Williams{\textquoteright}s project is part of the “Scholar‐Curated Worksets for Analysis, Reuse & Dissemination” (SCWAReD) project generously funded by the Andrew W. Mellon Foundation, grant reference number 2003‐07550, and managed by the HathiTrust Research Center. Many of the challenges and opportunities in digital libraries are due to the size of their collections. The HathiTrust Digital Library (HTDL) houses over 17.5 million scanned items, all of which are available for computational analysis. Having access to cultural data of this scale brings incredible promise but also unique challenges. Primary amongst these challenges is identifying items of interest. Amongst the massive library collection, bibliographic metadata is inconsistent and sometimes inaccurate, especially when it comes to capturing genre information. This project seeks to overcome these hurdles, as part of HTRC{\textquoteright}s Scholar‐Curated Worksets for Analysis, Reuse and Dissemination (SCWAReD) project, funded by the Andrew W. Mellon Foundation. We acknowledge the crucial role played in this research by Dr. Clarissa West-White (Bethune Cookman University) and Dr. Seretha Williams (Augusta University), project leaders for “The Black Fantastic: Curated Vocabularies, Artifact Analysis and Identification” and our close collaborators on the analyses described here. Dr. West-White{\textquoteright}s and Dr. Williams{\textquoteright}s project is part of the “Scholar-Curated Worksets for Analysis, Reuse & Dissemination” (SCWAReD) project generously funded by the Andrew W. Mellon Foundation, grant reference number 2003-07550, and managed by the HathiTrust Research Center.",
year = "2022",
doi = "10.1002/pra2.620",
language = "English (US)",
volume = "59",
pages = "242--250",
journal = "Proceedings of the Association for Information Science and Technology",
issn = "2373-9231",
publisher = "John Wiley & Sons, Ltd.",
number = "1",
}