@inproceedings{b7a96219b40f4d7ea4072114cd84a547,
title = "Cross-modal adaptation for RGB-D detection",
abstract = "In this paper we propose a technique to adapt convolutional neural network (CNN) based object detectors trained on RGB images to effectively leverage depth images at test time to boost detection performance. Given labeled depth images for a handful of categories we adapt an RGB object detector for a new category such that it can now use depth images in addition to RGB images at test time to produce more accurate detections. Our approach is built upon the observation that lower layers of a CNN are largely task and category agnostic and domain specific while higher layers are largely task and category specific while being domain agnostic. We operationalize this observation by proposing a mid-level fusion of RGB and depth CNNs. Experimental evaluation on the challenging NYUD2 dataset shows that our proposed adaptation technique results in an average 21% relative improvement in detection performance over an RGB-only baseline even when no depth training data is available for the particular category evaluated. We believe our proposed technique will extend advances made in computer vision to RGB-D data leading to improvements in performance at little additional annotation effort.",
author = "Judy Hoffman and Saurabh Gupta and Jian Leong and Sergio Guadarrama and Trevor Darrell",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 2016 IEEE International Conference on Robotics and Automation, ICRA 2016 ; Conference date: 16-05-2016 Through 21-05-2016",
year = "2016",
month = jun,
day = "8",
doi = "10.1109/ICRA.2016.7487708",
language = "English (US)",
series = "Proceedings - IEEE International Conference on Robotics and Automation",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "5032--5039",
booktitle = "2016 IEEE International Conference on Robotics and Automation, ICRA 2016",
address = "United States",
}