@inproceedings{179b3ef6b74e416f9456294e8bc6dc35,
title = "Multi-modal audio, video and physiological sensor learning for continuous emotion prediction",
abstract = "The automatic determination of emotional state from multimedia content is an inherently challenging problem with a broad range of applications including biomedical diagnostics, multimedia retrieval, and human computer interfaces. The Audio Video Emotion Challenge (AVEC) 2016 provides a well-defined framework for developing and rigorously evaluating innovative approaches for estimating the arousal and valence states of emotion as a function of time. It presents the opportunity for investigating multimodal solutions that include audio, video, and physiological sensor signals. This paper provides an overview of our AVEC Emotion Challenge system, which uses multi-feature learning and fusion across all available modalities. It includes a number of technical contributions, including the development of novel high- and low-level features for modeling emotion in the audio, video, and physiological channels. Low-level features include modeling arousal in audio with minimal prosodic-based descriptors. High-level features are derived from supervised and unsupervised machine learning approaches based on sparse coding and deep learning. Finally, a state space estimation approach is applied for score fusion that demonstrates the importance of exploiting the time-series nature of the arousal and valence states. The resulting system outperforms the baseline systems [10] on the test evaluation set with an achieved Concordant Correlation Coefficient (CCC) for arousal of 0.770 vs 0.702 (baseline) and for valence of 0.687 vs 0.638. Future work will focus on exploiting the time-varying nature of individual channels in the multi-modal framework.",
keywords = "Affective Computing, CNN, Challenge, Deep Learning, Emotion Recognition, Facial Expression, Sparse Coding, Speech",
author = "Kevin Brady and Youngjune Gwon and Pooya Khorrami and Elizabeth Godoy and William Campbell and Charlie Dagli and Huang, {Thomas S.}",
note = "Publisher Copyright: {\textcopyright} 2016 ACM.; 6th International Workshop on Audio/Visual Emotion Challenge, AVEC 2016 ; Conference date: 16-10-2016",
year = "2016",
month = oct,
day = "16",
doi = "10.1145/2988257.2988264",
language = "English (US)",
series = "AVEC 2016 - Proceedings of the 6th International Workshop on Audio/Visual Emotion Challenge, co-located with ACM Multimedia 2016",
publisher = "Association for Computing Machinery",
pages = "97--104",
booktitle = "AVEC 2016 - Proceedings of the 6th International Workshop on Audio/Visual Emotion Challenge, co-located with ACM Multimedia 2016",
address = "United States",
}