@inproceedings{27355053f36e4d3e99fb3ff584cf84d8,
title = "Pooling robust shift-invariant sparse representations of acoustic signals",
abstract = "In recent years, designing the coding and pooling structures in layered networks has been shown to be a useful method for learning high-level feature representations for visual data. Yet, such learning structures have not been extensively studied for audio signals. In this paper, we investigate different pooling strategies based on the sparse coding scheme and propose a temporal pyramid pooling method to extract discriminative and shift invariant feature representations. We demonstrate the superiority of our new feature representation over traditional features on the acoustic event classification task.",
keywords = "Acoustic event classification, Pooling, Sparse coding",
author = "Huang, {Po Sen} and Jianchao Yang and Mark Hasegawa-Johnson and Feng Liang and Huang, {Thomas S.}",
year = "2012",
language = "English (US)",
isbn = "9781622767595",
series = "13th Annual Conference of the International Speech Communication Association 2012, INTERSPEECH 2012",
pages = "2517--2520",
booktitle = "13th Annual Conference of the International Speech Communication Association 2012, INTERSPEECH 2012",
note = "13th Annual Conference of the International Speech Communication Association 2012, INTERSPEECH 2012 ; Conference date: 09-09-2012 Through 13-09-2012",
}