@inproceedings{21db9ef258dd4b1ebf019318b47b24fa,
title = "Joint decoding for speech recognition and semantic tagging",
abstract = "Most conversational understanding (CU) systems today employ a cascade approach, where the best hypothesis from automatic speech recognizer (ASR) is fed into spoken language under-standing (SLU) module, whose best hypothesis is then fed into other systems such as interpreter or dialog manager. In such ap-proaches, errors from one statistical module irreversibly propa-gates into another module causing a serious degradation in the overall performance of the conversational understanding sys-tem. Thus it is desirable to jointly optimize all the statistical modules together. As a first step towards this, in this paper, we propose a joint decoding framework in which we predict the op-timal word as well as slot (semantic tag) sequence jointly given the input acoustic stream. On Microsoft's CU system, we show 1.3% absolute reduction in word error rate (WER) and 1.2% absolute improvement in F measure for slot prediction when compared to a very strong cascade baseline comprising of the state-of-the-art recognizer followed by a slot sequence tagger.",
keywords = "ASR, CRF, CU, ME, SLU",
author = "Anoop Deoras and Ruhi Sarikaya and Gokhan Tur and Dilek Hakkani-T{\"u}r",
year = "2012",
language = "English (US)",
isbn = "9781622767595",
series = "13th Annual Conference of the International Speech Communication Association 2012, INTERSPEECH 2012",
pages = "1066--1069",
booktitle = "13th Annual Conference of the International Speech Communication Association 2012, INTERSPEECH 2012",
note = "13th Annual Conference of the International Speech Communication Association 2012, INTERSPEECH 2012 ; Conference date: 09-09-2012 Through 13-09-2012",
}