@inbook{056f56a501ab43a4a6ccf4d7c53bb643,
title = "Classifying COVID-19 Variants Based on Genetic Sequences Using Deep Learning Models",
abstract = "The COrona VIrus Disease (COVID-19) pandemic led to the occurrence of several variants with time. This has led to an increased importance of understanding sequence data related to COVID-19. In this chapter, we propose an alignment-free k-mer based LSTM (Long Short-Term Memory) deep learning model that can classify 20 different variants of COVID-19. We handle the class imbalance problem by sampling a fixed number of sequences for each class label. We handle the vanishing gradient problem in LSTMs arising from long sequences by dividing the sequence into fixed lengths and obtaining results on individual runs. Our results show that one-vs-all classifiers have test accuracies as high as 92.5% with tuned hyperparameters compared to the multi-class classifier model. Our experiments show higher overall accuracies for B.1.1.214, B.1.177.21, B.1.1.7, B.1.526, and P.1 on the one-vs-all classifiers, suggesting the presence of distinct mutations in these variants. Our results show that embedding vector size and batch sizes have insignificant improvement in accuracies, but changing from 2-mers to 3-mers mostly improves accuracies. We also studied individual runs which show that most accuracies improved after the 20th run, indicating that these sequence positions may have more contributions to distinguishing among different COVID-19 variants.",
keywords = "COVID-19, Classification, Deep learning, Gene sequences, LSTMs, Variants",
author = "Sayantani Basu and Campbell, {Roy H.}",
note = "Acknowledgements This project has been funded by the Jump ARCHES endowment through the Health Care Engineering Systems Center. This project has been funded by the Jump ARCHES endowment through the Health Care Engineering Systems Center. This work uses resources from GISAID (https://www.gisaid.org). We would like to acknowledge all laboratories that have contributed their COVID-19 sequence data to GISAID. This work utilizes resources supported by the National Science Foundation{\textquoteright}s Major Research Instrumentation program, grant #1725729, as well as the University of Illinois at Urbana-Champaign.",
year = "2022",
month = jul,
doi = "10.1007/978-3-031-02063-6_19",
language = "English (US)",
isbn = "978-3-031-02062-9",
series = "Springer Series in Reliability Engineering",
publisher = "Springer",
pages = "347--360",
editor = "Long Wang and Karthik Pattabiraman and {Di Martino}, Catello and Arjun Athreya and Saurabh Bagchi",
booktitle = "System Dependability and Analytics",
address = "Germany",
}