@inproceedings{eb54cae967454041b040a2efa4687473,
title = "Pessimistic Off-Policy Multi-Objective Optimization",
abstract = "Multi-objective optimization is a class of optimization problems with multiple conflicting objectives. We study offline optimization of multi-objective policies from data collected by a previously deployed policy. We propose a pessimistic estimator for policy values that can be easily plugged into existing formulas for hypervolume computation and optimized. The estimator is based on inverse propensity scores (IPS), and improves upon a naive IPS estimator in both theory and experiments. Our analysis is general, and applies beyond our IPS estimators and methods for optimizing them.",
author = "Shima Alizadeh and Aniruddha Bhargava and Karthick Gopalswamy and Lalit Jain and Branislav Kveton and Ge Liu",
year = "2024",
month = feb,
day = "1",
language = "English (US)",
volume = "238",
series = "Proceedings of Machine Learning Research",
publisher = "PMLR",
pages = "2980--2988",
editor = "Sanjoy Dasgupta and Stephan Mandt and Yingzhen Li",
booktitle = "Proceedings of The 27th International Conference on Artificial Intelligence and Statistics",
}