@inproceedings{7fd9aa1c6208432d81dc4fb69fc7a4f8,
title = "Evaluating Large Language Model Code Generation as an Autograding Mechanism for {"}Explain in Plain English{"} Questions",
abstract = "The ability of students to {"}Explain in Plain English{"}(EiPE) the purpose of code is a critical skill for students in introductory programming courses to develop. EiPE questions serve as both a mechanism for students to develop and demonstrate code comprehension skills. However, evaluating this skill has been challenging as manual grading is time consuming and not easily automated. The process of constructing a prompt for the purposes of code generation for a Large Language Model, such OpenAI's GPT-4, bears a striking resemblance to constructing EiPE responses. In this paper, we explore the potential of using test cases run on code generated by GPT-4 from students' EiPE responses as a grading mechanism for EiPE questions. We applied this proposed grading method to a corpus of EiPE responses collected from past exams, then measured agreement between the results of this grading method and human graders. Overall, we find moderate agreement between the human raters and the results of the unit tests run on the generated code. This appears to be attributable to GPT-4's code generation being more lenient than human graders on low-level descriptions of code.",
keywords = "EIPE, GPT-4, autograding, large language models",
author = "Smith, {David H.} and Craig Zilles",
note = "Publisher Copyright: {\textcopyright} 2024 Owner/Author.; 55th ACM Technical Symposium on Computer Science Education, SIGCSE 2024 ; Conference date: 20-03-2024 Through 23-03-2024",
year = "2024",
month = mar,
day = "14",
doi = "10.1145/3626253.3635542",
language = "English (US)",
series = "SIGCSE 2024 - Proceedings of the 55th ACM Technical Symposium on Computer Science Education",
publisher = "Association for Computing Machinery",
pages = "1824--1825",
booktitle = "SIGCSE 2024 - Proceedings of the 55th ACM Technical Symposium on Computer Science Education",
address = "United States",
}