@inproceedings{0e819b17740448e49b2327111b433c36,
title = "Proof Repair Infrastructure for Supervised Models: Building a Large Proof Repair Dataset",
abstract = "We report on our efforts building a new, large proof-repair dataset and benchmark suite for the Coq proof assistant. The dataset is made up of Git commits from open-source projects with old and new versions of definitions and proofs aligned across commits. Building this dataset has been a significant undertaking, highlighting a number of challenges and gaps in existing infrastructure. We discuss these challenges and gaps, and we provide recommendations for how the proof assistant community can address them. Our hope is to make it easier to build datasets and benchmark suites so that machine-learning tools for proofs will move to target the tasks that matter most and do so equitably across proof assistants.",
keywords = "benchmarks, datasets, formal proof, machine learning, proof repair",
author = "Tom Reichel and {Wesley Henderson}, R. and Andrew Touchet and Andrew Gardner and Talia Ringer",
note = "Publisher Copyright: {\textcopyright} Tom Reichel, R. Wesley Henderson, Andrew Touchet, Andrew Gardner, and Talia Ringer; licensed under Creative Commons License CC-BY 4.0 14th International Conference on Interactive Theorem Proving (ITP 2023); 14th International Conference on Interactive Theorem Proving, ITP 2023 ; Conference date: 31-07-2023 Through 04-08-2023",
year = "2023",
month = jul,
doi = "10.4230/LIPIcs.ITP.2023.26",
language = "English (US)",
series = "Leibniz International Proceedings in Informatics, LIPIcs",
publisher = "Schloss Dagstuhl- Leibniz-Zentrum fur Informatik GmbH, Dagstuhl Publishing",
editor = "Adam Naumowicz and Rene Thiemann",
booktitle = "14th International Conference on Interactive Theorem Proving, ITP 2023",
}