@inproceedings{5085ba674ec04b44841dc7efcad0e463,
title = "Why name ambiguity resolution matters for scholarly big data research",
abstract = "This paper illustrates how data pre-processing choices about author name disambiguation can affect research findings about scholarly networks and hypotheses about underlying social mechanisms. We have analyzed three big scholarly datasets that were disambiguated algorithmically and via two common initial-based disambiguation methods; namely first-initial and all-initials disambiguation. The comparison of resulting bibliometric and network properties revealed that initial-disambiguation bears the prevalent risks of incorrectly merging author identities, underestimating the number of unique authors and inflating the average productivity and number of collaborators per author. The gaps between outcomes of name ambiguity resolution methods range from -4.23% to -87.36% per dataset for the number of unique authors, from 3.75% to 691.20% for average productivity, and from 5.06% to 285.28% for degree centrality for initial based methods compared to algorithmic disambiguation. This calls for special attention to data pre-processing choices in scholarly big data research.",
keywords = "Bibliometrics, Collaboration, Disambiguation, Network analysis",
author = "Jinseok Kim and Jana Diesner and Heejun Kim and Amirhossein Aleyasen and Kim, {Hwan Min}",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 2nd IEEE International Conference on Big Data, IEEE Big Data 2014 ; Conference date: 27-10-2014 Through 30-10-2014",
year = "2014",
doi = "10.1109/BigData.2014.7004345",
language = "English (US)",
series = "Proceedings - 2014 IEEE International Conference on Big Data, IEEE Big Data 2014",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1--6",
editor = "Jimmy Lin and Jian Pei and Hu, {Xiaohua Tony} and Wo Chang and Raghunath Nambiar and Charu Aggarwal and Nick Cercone and Vasant Honavar and Jun Huan and Bamshad Mobasher and Saumyadipta Pyne",
booktitle = "Proceedings - 2014 IEEE International Conference on Big Data, IEEE Big Data 2014",
address = "United States",
}