@inproceedings{a6225c1897054ac082b08928a5146ba4,
title = "Blind Estimation of the Speech Transmission Index for Speech Quality Prediction",
abstract = "The speech transmission index (STI) of a listening position within a given room indicates the quality and intelligibility of speech uttered in that room. The measure is very reliable for predicting speech intelligibility in many room conditions but requires an STI measurement of the impulse response for the room. We present a method for blindly estimating the STI without measuring or modeling the impulse response of the room using deep convolutional neural networks. Our model is trained entirely using simulated room impulse responses combined with clean speech examples from the DAPS dataset [1] and works directly on PCM audio. Our experiments show that our method predicts true STI with a high degree of accuracy-an average error of under 4%. It can also distinguish between different STI conditions to a level of granularity that is comparable to humans.",
keywords = "Speech enhancement, Speech quality, Speech transmission index",
author = "Prem Seetharaman and Mysore, {Gautham J.} and Paris Smaragdis and Bryan Pardo",
note = "Funding Information: This work was done while on an Adobe Research internship. Paris Smaragdis was supported through NSF grant #1451380.; 2018 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2018 ; Conference date: 15-04-2018 Through 20-04-2018",
year = "2018",
month = sep,
day = "10",
doi = "10.1109/ICASSP.2018.8461827",
language = "English (US)",
isbn = "9781538646588",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "591--595",
booktitle = "2018 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2018 - Proceedings",
address = "United States",
}