@inproceedings{35dd8f8adac84270a05d0cac30d263a4,
title = "Open Set Authorship Attribution Toward Demystifying Victorian Periodicals",
abstract = "Existing research in computational authorship attribution (AA) has primarily focused on attribution tasks with a limited number of authors in a closed-set configuration. This restricted set-up is far from being realistic in dealing with highly entangled real-world AA tasks that involve a large number of candidate authors for attribution during test time. In this paper, we study AA in historical texts using a new data set compiled from the Victorian literature. We investigate the predictive capacity of most common English words in distinguishing writings of most prominent Victorian novelists. We challenged the closed-set classification assumption and discussed the limitations of standard machine learning techniques in dealing with the open set AA task. Our experiments suggest that a linear classifier can achieve near perfect attribution accuracy under closed set assumption yet, the need for more robust approaches becomes evident once a large candidate pool has to be considered in the open-set classification setting.",
keywords = "Author attribution, Open-set classification, Victorian literature",
author = "Sarkhan Badirli and {Borgo Ton}, Mary and Abdulmecit Gungor and Murat Dundar",
note = "Publisher Copyright: {\textcopyright} 2021, Springer Nature Switzerland AG.; 16th International Conference on Document Analysis and Recognition, ICDAR 2021 ; Conference date: 05-09-2021 Through 10-09-2021",
year = "2021",
doi = "10.1007/978-3-030-86337-1_15",
language = "English (US)",
isbn = "9783030863364",
series = "Lecture Notes in Computer Science",
publisher = "Springer",
pages = "221--235",
editor = "Josep Llad{\'o}s and Daniel Lopresti and Seiichi Uchida",
booktitle = "Document Analysis and Recognition – ICDAR 2021",
address = "Germany",
}