@inproceedings{bfac439772da48b29ae839cbda909fc4,
title = "AIM: AN ABSTRACTION for IMPROVING MACHINE LEARNING PREDICTION",
abstract = "We introduce a structured and portable Abstraction for Improving Machine learning (AIM) to improve prediction outcomes and enable meaningful comparisons of ML pipelines. We implement AIM for a well-known acute leukemia classification problem using the Scientific Filesystem, enabling direct performance comparisons across a variety of classifiers. AIM provides three direct efficiency benefits: 1) the sources of performance differences between ML pipelines can identified at the algorithm implementation level as defined by the AIM, 2) improvements can be made to specific aspects of the pipeline and thus better understood, and 3) the reuse of these defined abstraction components across different pipelines is facilitated. When the AIM is defined at the outset of the prediction challenge, these benefits can come at minimal cost. We show these benefits by implementing AIM and the Scientific Filesystem on the well-known Golub AML/ALL cancer dataset.",
keywords = "Scientific Filesystem, containers, cyberinfrastructure, machine learning, programming abstraction, reproducible research",
author = "Victoria Stodden and Xiaomian Wu and Vanessa Sochat",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 2018 IEEE Data Science Workshop, DSW 2018 ; Conference date: 04-06-2018 Through 06-06-2018",
year = "2018",
month = aug,
day = "17",
doi = "10.1109/DSW.2018.8439914",
language = "English (US)",
isbn = "9781538644102",
series = "2018 IEEE Data Science Workshop, DSW 2018 - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "150--154",
booktitle = "2018 IEEE Data Science Workshop, DSW 2018 - Proceedings",
address = "United States",
}