@inproceedings{8a83f8490d46441e9ad1038e72b4fad4,
title = "Robust speaker identification using a CASA front-end",
abstract = "Speaker recognition remains a challenging task under noisy conditions. Inspired by auditory perception, computational auditory scene analysis (CASA) typically segregates speech by producing a binary time-frequency mask. We first show that a recently introduced speaker feature, Gammatone Frequency Cepstral Coefficient, performs substantially better than conventional speaker features under noisy conditions. To deal with noisy speech, we apply CASA separation and then either reconstruct or marginalize corrupted components indicated by the CASA mask. Both methods are effective. We further combine them into a single system depending on the detected signal to noise ratio (SNR). This system achieves significant performance improvements over related systems under a wide range of SNR conditions.",
keywords = "CASA, GFCC, Robust speaker identification, gammatone frequency cepstral coefficient, ideal binary mask",
author = "Xiaojia Zhao and Yang Shao and Wang, \{De Liang\}",
year = "2011",
doi = "10.1109/ICASSP.2011.5947596",
language = "English (US)",
isbn = "9781457705397",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
pages = "5468--5471",
booktitle = "2011 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2011 - Proceedings",
note = "36th IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2011 ; Conference date: 22-05-2011 Through 27-05-2011",
}