@inproceedings{daaed72ba9904ef9b8f4c6ded49d6c0b,
title = "Toward Learning Robust and Invariant Representations with Alignment Regularization and Data Augmentation",
abstract = "Data augmentation has been proven to be an effective technique for developing machine learning models that are robust to known classes of distributional shifts (e.g., rotations of images), and alignment regularization is a technique often used together with data augmentation to further help the model learn representations invariant to the shifts used to augment the data. In this paper, motivated by a proliferation of options of alignment regularizations, we seek to evaluate the performances of several popular design choices along the dimensions of robustness and invariance, for which we introduce a new test procedure. Our synthetic experiment results speak to the benefits of squared ℓ2 norm regularization. Further, we also formally analyze the behavior of alignment regularization to complement our empirical study under assumptions we consider realistic. Finally, we test this simple technique we identify (worst-case data augmentation with squared ℓ2 norm alignment regularization) and show that the benefits of this method outrun those of the specially designed methods. We also release a software package in both TensorFlow and PyTorch for users to use the method with a couple of lines at https://github.com/jyanln/AlignReg.",
keywords = "robustness, trustworthy, data augmentation, machine learning",
author = "Haohan Wang and Zeyi Huang and Xindi Wu and Xing, {Eric P.}",
note = "This work was supported by NIH R01GM114311, NIH P30DA035778, and NSF IIS1617583; NSF CAREER IIS-2150012 and IIS-2204808. The authors would like to thank Hanru Yan for the implementation of the software package.",
year = "2022",
month = aug,
doi = "10.1145/3534678.3539438",
language = "English (US)",
isbn = "9781450393850",
series = "Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
publisher = "Association for Computing Machinery",
pages = "1846--1856",
booktitle = "KDD 2022 - Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining",
address = "United States",
}