@inbook{e9b854a2107241acb66a8fed2d7bb23c,
title = "Detection of documentary scene changes by audio-visual fusion",
abstract = "The concept of a documentary scene was inferred from the audio-visual characteristics of certain documentary videos. It was observed that the amount of information from the visual component alone was not enough to convey a semantic context to most portions of these videos, but a joint observation of the visual component and the audio component conveyed a better semantic context. From the observations that we made on the video data, we generated an audio score and a visual score. We later generated a weighted audio-visual score within an interval and adaptively expanded or shrunk this interval until we found a local maximum score value. The video ultimately will be divided into a set of intervals that correspond to the documentary scenes in the video. After we obtained a set of documentary scenes, we made a check for any redundant detections.",
author = "Atulya Velivelli and Ngo, {Chong Wah} and Huang, {Thomas S.}",
note = "Copyright: Copyright 2020 Elsevier B.V., All rights reserved.",
year = "2003",
doi = "10.1007/3-540-45113-7_23",
language = "English (US)",
isbn = "9783540451136",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer",
pages = "227--237",
editor = "Bakker, {Erwin M.} and Lew, {Michael S.} and Huang, {Thomas S.} and Nicu Sebe and Xiang Zhou",
booktitle = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
address = "Germany",
}