@inproceedings{87070cf58a9b4997bad33248a2fac154,
title = "EScALation: A framework for efficient and scalable spatio-Temporal action localization",
abstract = "Spatio-Temporal action localization aims to detect the spatial location and the start/end time of the action in a video. The state-of-The-Art approach uses convolutional neural networks to extract possible bounding boxes for the action in each frame and then link bounding boxes into action tubes based on the location and the class-specific score of each bounding box. Though this approach has been successful at achieving a good localization accuracy, it is computation-intensive. High-end GPUs are usually demanded for it to achieve real-Time performance. In addition, this approach does not scale well on a large number of action classes. In this work, we present a framework, EScALation, for making spatio-Temporal action localization efficient and scalable. Our framework involves two main strategies. One is the frame sampling technique that utilizes the temporal correlation between frames and selects key frame(s) from a temporally correlated set of frames to perform bounding box detection. The other is the class filtering technique that exploits bounding box information to predict the action class prior to linking bounding boxes. We compare EScALation with the state-of-The-Art approach on UCF101-24 and J-HMDB-21 datasets. One of our experiments shows EScALation is able to save 72.2% of the time with only 6.1% loss of mAP. In addition, we show that EScALation scales better to a large number of action classes than the state-of-The-Art approach.",
keywords = "scalability, spatio-Temporal action localization, video analytics",
author = "Bo Chen and Klara Nahrstedt",
note = "Publisher Copyright: {\textcopyright} 2021 ACM.; 12th ACM Multimedia Systems Conference, MMSys 2021 ; Conference date: 28-09-2021 Through 01-10-2021",
year = "2021",
month = jul,
day = "15",
doi = "10.1145/3458305.3459598",
language = "English (US)",
series = "MMSys 2021 - Proceedings of the 2021 Multimedia Systems Conference",
publisher = "Association for Computing Machinery",
pages = "147--158",
booktitle = "MMSys 2021 - Proceedings of the 2021 Multimedia Systems Conference",
address = "United States",
}