@inproceedings{b562c6aa2a8242178454a50360051cbb,
title = "CLaDS: A cloud-based virtual lab for the delivery of scalable hands-on assignments for practical data science education",
abstract = "The rise of the “big data” era has created a pressing demand for educating many data scientists and engineers quickly at low cost. It is essential they learn by working on assignments that involve real world data sets to develop the skills needed to be successful in the workplace. However, enabling instructors to flexibly deliver all kinds of data science assignments using real world data sets to large numbers of learners (both on-campus and off-campus) at low cost is a significant open challenge. To address this emerging challenge generally, we develop and deploy a novel Cloud-based Lab for Data Science (CLaDS) to enable many learners around the world to work on real-world data science problems without having to move or otherwise distribute prohibitively large data sets. Leveraging version control and continuous integration, CLaDS provides a general infrastructure to enable any instructor to conveniently deliver any hands-on data science assignment that uses large real world data sets to as many learners as our cloud-computing infrastructure allows at very low cost. In this paper, we present the design and implementation of CLaDS and discuss our experience with using CLaDS to deploy seven major text data assignments for students in both an on-campus course and an online course to work on for learning about text data retrieval and mining techniques; this shows that CLaDS is a very promising novel general infrastructure for efficiently delivering a wide range of hands-on data science assignments to a large number of learners at very low cost.",
keywords = "Cloud computing, Data science education, Virtual lab",
author = "Chase Geigle and Hari Sundaram and Ismini Lourentzou and Zhai, {Cheng Xiang}",
note = "Funding Information: This material is based upon work supported by the NSF GRFP under Grant Number DGE-1144245, by the NSF Research Program under Grant Number IIS-1629161, by Microsoft Azure, and by a gift fund from Intel under its support program for Big Data Education. Publisher Copyright: {\textcopyright} 2018 Copyright held by the owner/author(s).; 23rd Annual ACM Conference on Innovation and Technology in Computer Science Education, ITiCSE 2018 ; Conference date: 02-07-2018 Through 04-07-2018",
year = "2018",
month = jul,
day = "2",
doi = "10.1145/3197091.3197135",
language = "English (US)",
series = "Annual Conference on Innovation and Technology in Computer Science Education, ITiCSE",
publisher = "Association for Computing Machinery",
pages = "176--181",
editor = "Panayiotis Andreou and Michal Armoni and Read, {Janet C.} and Irene Polycarpou",
booktitle = "ITiCSE 2018 - Proceedings of the 23rd Annual ACM Conference on Innovation and Technology in Computer Science Education",
address = "United States",
}