@inproceedings{8cf4ecdd59c2408dbf9773ea6a4cf0a2,
title = "Contrastive Learning for Weakly Supervised Phrase Grounding",
abstract = "Phrase grounding, the problem of associating image regions to caption words, is a crucial component of vision-language tasks. We show that phrase grounding can be learned by optimizing word-region attention to maximize a lower bound on mutual information between images and caption words. Given pairs of images and captions, we maximize compatibility of the attention-weighted regions and the words in the corresponding caption, compared to non-corresponding pairs of images and captions. A key idea is to construct effective negative captions for learning through language model guided word substitutions. Training with our negatives yields a ∼ 10 % absolute gain in accuracy over randomly-sampled negatives from the training data. Our weakly supervised phrase grounding model trained on COCO-Captions shows a healthy gain of 5.7 % to achieve 76.7 % accuracy on Flickr30K Entities benchmark. Our code and project material will be available at http://tanmaygupta.info/info-ground.",
keywords = "Attention, Grounding, InfoNCE, Mutual information",
author = "Tanmay Gupta and Arash Vahdat and Gal Chechik and Xiaodong Yang and Jan Kautz and Derek Hoiem",
note = "Acknowledgement. This work was done partly at NVIDIA and is partly supported by ONR MURI Award N00014-16-1-2007 This work was done partly at NVIDIA and is partly supported by ONR MURI Award N00014-16-1-2007.; 16th European Conference on Computer Vision, ECCV 2020 ; Conference date: 23-08-2020 Through 28-08-2020",
year = "2020",
doi = "10.1007/978-3-030-58580-8_44",
language = "English (US)",
isbn = "9783030585792",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer",
pages = "752--768",
editor = "Andrea Vedaldi and Horst Bischof and Thomas Brox and Jan-Michael Frahm",
booktitle = "Computer Vision – ECCV 2020 - 16th European Conference 2020, Proceedings",
address = "Germany",
}