@inproceedings{7757a65b7faa4796b702ccf74728a00c,
title = "Parallel Hybrid Metaheuristics with Distributed Intensification and Diversification for Large-scale Optimization in Big Data Statistical Analysis",
abstract = "Important insights into many data science problems that are traditionally analyzed via statistical models can be obtained by re-formulating and evaluating within a large-scale optimization framework. However, the theoretical underpinnings of the statistical model may shift the goal of the decision space traversal from a traditional search for a single optimal solution to a traversal with the purpose of yielding a set of high quality, independent solutions. We examine statistical frameworks with astronomical decision spaces that translate to optimization problem but are challenging for standard optimization methodologies. We address the new challenges by designing a hybrid metaheuristic with specialized intensification and diversification protocols in the base search algorithm. Our algorithm is extended to the high performance computing realm using the Stampede2 supercomputer where we experimentally demonstrate the effectiveness of our algorithm to utilize multiple processors to collaboratively hill climb, broadcast messages to one another regarding landscape characteristics, diversify across the solution landscape, and request aid in climbing particularly difficult peaks.",
keywords = "Causal Inference, Diversification and Intensification, Optimization, Statistics",
author = "Cho, {Wendy K.Tam} and Liu, {Yan Y.}",
note = "Funding Information: Yan Y. Liu{\textquoteright}s work in this paper is partly supported by the Laboratory Directed Research and Development Program of Oak Ridge National Laboratory, managed by UT-Battelle, LLC, for the US Department of Energy under contract DE-AC05-00OR22725. The United States Government retains and the publisher, by accepting the article for publication, acknowledges that the United States Government retains a non-exclusive, paidup, irrevocable, world-wide license to publish or reproduce the published form of this manuscript, or allow others to do so, for United States Government purposes. The Department of Energy will provide public access to these results of federally sponsored research in accordance with the DOE Public Access Plan. Both authors contributed equally to this project. 978-1-7281-0858-219$31.00 {\textcopyright}2019 IEEE. Funding Information: The experiments conducted in this paper used the Extreme Science and Engineering Discovery Environment (XSEDE) resources, which are supported by National Science Foundation grant number ACI-1548562. Specifically, the authors acknowledge the Texas Advanced Computing Center (TACC) at The University of Texas at Austin for providing HPC resources, i.e., the Stampede2 system, that have contributed to the research results reported within this paper.; 2019 IEEE International Conference on Big Data, Big Data 2019 ; Conference date: 09-12-2019 Through 12-12-2019",
year = "2019",
month = dec,
doi = "10.1109/BigData47090.2019.9006045",
language = "English (US)",
series = "Proceedings - 2019 IEEE International Conference on Big Data, Big Data 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "3312--3320",
editor = "Chaitanya Baru and Jun Huan and Latifur Khan and Hu, {Xiaohua Tony} and Ronay Ak and Yuanyuan Tian and Roger Barga and Carlo Zaniolo and Kisung Lee and Ye, {Yanfang Fanny}",
booktitle = "Proceedings - 2019 IEEE International Conference on Big Data, Big Data 2019",
address = "United States",
}