@inproceedings{4dd617927b8543048c22b328a76ae281,
title = "Efficient and Scalable Neural Residual Waveform Coding with Collaborative Quantization",
abstract = "Scalability and efficiency are desired in neural speech codecs, which supports a wide range of bitrates for applications on various devices. We propose a collaborative quantization (CQ) scheme to jointly learn the codebook of LPC coefficients and the corresponding residuals. CQ does not simply shoehorn LPC to a neural network, but bridges the computational capacity of advanced neural network models and traditional, yet efficient and domain-specific digital signal processing methods in an integrated manner. We demonstrate that CQ achieves much higher quality than its predecessor at 9 kbps with even lower model complexity. We also show that CQ can scale up to 24 kbps where it outperforms AMR-WB and Opus. As a neural waveform codec, CQ models are with less than 1 million parameters, significantly less than many other generative models.",
keywords = "deep neural network, linear predictive coding, model complexity, residual learning, Speech coding",
author = "Kai Zhen and Lee, {Mi Suk} and Jongmo Sung and Seungkwon Beack and Minje Kim",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 2020 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2020 ; Conference date: 04-05-2020 Through 08-05-2020",
year = "2020",
month = may,
doi = "10.1109/ICASSP40776.2020.9054347",
language = "English (US)",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "361--365",
booktitle = "2020 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2020 - Proceedings",
address = "United States",
}