@inproceedings{c8d629315c434c9282f67639eb5ba806,
title = "Kernel Multimodal Continuous Attention",
abstract = "Attention mechanisms average a data representation with respect to probability weights. Recently, [23-25] proposed continuous attention, focusing on unimodal exponential and deformed exponential family attention densities: the latter can have sparse support. [8] extended to multimodality via Gaussian mixture attention densities. In this paper, we propose using kernel exponential families [4] and our new sparse counterpart, kernel deformed exponential families. Theoretically, we show new existence results for both families, and approximation capabilities for the deformed case. Lacking closed form expressions for the context vector, we use numerical integration: we prove exponential convergence for both families. Experiments show that kernel continuous attention often outperforms unimodal continuous attention, and the sparse variant tends to highlight time series peaks.",
author = "Alexander Moreno and Zhenke Wu and Supriya Nagesh and Walter Dempsey and Rehg, \{James M\}",
note = "Alexander Moreno is supported by Luminous Computing, and was previously supported by NIH 1-P41-EB028242-01A1, U01CA229437, and a Google CMD-IT Flip-Alliance fellowship. Zhenke Wu is partly supported by NIH U01CA229437 and an investigator grant from Precision Health Initiative at University of Michigan, Ann Arbor. Supriya Nagesh is supported by NSF CNS1823201. Walter Dempsey is supported by P50 DA054039. We thank Marcos Treviso and Andr{\'e} Martins for helpful discussions about their prior work and help with understanding their code.",
year = "2022",
language = "English (US)",
series = "Advances in Neural Information Processing Systems",
publisher = "Curran Associates Inc.",
pages = "18046--18059",
editor = "S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh",
booktitle = "36th Conference on Neural Information Processing Systems, NeurIPS 2022",
}