@inproceedings{2961307da9ba459298d25c4ac4ddbcef,
title = "Parallelizing WFST speech decoders",
abstract = "The performance-intensive part of a large-vocabulary continuous speech-recognition system is the Viterbi computation that determines the sequence of words that are most likely to generate the acoustic-state scores extracted from an input utterance. This paper presents an efficient parallel algorithm for Viterbi. The key idea is to partition the per-frame computation among threads to minimize inter-thread communication despite traversing a large irregular acoustic and language model graphs. Together with a per-thread beam search, load balancing language-model lookups, and memory optimizations, we achieve a 6.67x speedup over an highly-optimized production-quality WFST-based speech decoder. On a 200,000 word vocabulary and a 59 million ngram model, our decoder runs at 0.27x real time while achieving a word-error rate of 14.81% on 6214 labeled utterances from Voice Search data.",
keywords = "Large vocabulary, Parallel Viterbi, WFST Decoder",
author = "Charith Mendis and Jasha Droppo and Saeed Maleki and Madanlal Musuvathi and Todd Mytkowicz and Geoffrey Zweig",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 41st IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2016 ; Conference date: 20-03-2016 Through 25-03-2016",
year = "2016",
month = may,
day = "18",
doi = "10.1109/ICASSP.2016.7472694",
language = "English (US)",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "5325--5329",
booktitle = "2016 IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2016 - Proceedings",
address = "United States",
}