@inproceedings{6d91975ce1004265a199633090ae5ef1,
title = "InfoMAE: Pair-Efficient Cross-Modal Alignment for Multimodal Time-Series Sensing Signals",
abstract = "Standard multimodal self-supervised learning (SSL) algorithms regard cross-modal synchronization as implicit supervisory labels during pretraining, thus posing high requirements on the scale and quality of multimodal samples. These constraints significantly limit the performance of sensing intelligence in IoT applications, as the heterogeneity and the non-interpretability of time-series signals result in abundant unimodal data but scarce high-quality multimodal pairs. This paper proposes InfoMAE, a cross-modal alignment framework that tackles the challenge of multimodal pair efficiency under the SSL setting by facilitating efficient cross-modal alignment of pretrained unimodal representations. InfoMAE achieves efficient cross-modal alignment with limited data pairs through a novel information theory-inspired formulation that simultaneously addresses distribution-level and instance-level alignment. Extensive experiments on two real-world IoT applications are performed to evaluate InfoMAE{\textquoteright}s pairing efficiency to bridge pretrained unimodal models into a cohesive joint multimodal model. InfoMAE enhances downstream multimodal tasks by over 60% with significantly improved multimodal pairing efficiency. It also improves unimodal task accuracy by an average of 22%.",
keywords = "Internet of Things, Multimodal sensing, Self-supervised learning",
author = "Tomoyoshi Kimura and Xinlin Li and Osama Hanna and Yatong Chen and Yizhuo Chen and Denizhan Kara and Tianshi Wang and Jinyang Li and Xiaomin Ouyang and Shengzhong Liu and Mani Srivastava and Suhas Diggavi and Tarek Abdelzaher",
note = "Research reported in this paper was sponsored in part by the Army Research Laboratory under Cooperative Agreement W911NF-17-20196, NSF CNS 20-38817, and the Boeing Company. It was also supported in part by ACE, one of the seven centers in JUMP 2.0, a Semiconductor Research Corporation (SRC) program sponsored by DARPA. The views and conclusions contained in this document are those of the author(s) and should not be interpreted as representing the official policies of the CCDC Army Research Laboratory, or the US government. The US government is authorized to reproduce and distribute reprints for government purposes notwithstanding any copyright notation hereon.; 34th ACM Web Conference, WWW 2025 ; Conference date: 28-04-2025 Through 02-05-2025",
year = "2025",
month = apr,
day = "28",
doi = "10.1145/3696410.3714853",
language = "English (US)",
series = "WWW 2025 - Proceedings of the ACM Web Conference",
publisher = "Association for Computing Machinery",
pages = "3084--3095",
booktitle = "WWW 2025 - Proceedings of the ACM Web Conference",
address = "United States",
}