@article{5e8d76669c384222ad32c5c3fdebeeae,
title = "Deep mixed model for marginal epistasis detection and population stratification correction in genome-wide association studies",
abstract = "Background: Genome-wide Association Studies (GWAS) have contributed to unraveling associations between genetic variants in the human genome and complex traits for more than a decade. While many works have been invented as follow-ups to detect interactions between SNPs, epistasis are still yet to be modeled and discovered more thoroughly. Results: In this paper, following the previous study of detecting marginal epistasis signals, and motivated by the universal approximation power of deep learning, we propose a neural network method that can potentially model arbitrary interactions between SNPs in genetic association studies as an extension to the mixed models in correcting confounding factors. Our method, namely Deep Mixed Model, consists of two components: 1) a confounding factor correction component, which is a large-kernel convolution neural network that focuses on calibrating the residual phenotypes by removing factors such as population stratification, and 2) a fixed-effect estimation component, which mainly consists of an Long-short Term Memory (LSTM) model that estimates the association effect size of SNPs with the residual phenotype. Conclusions: After validating the performance of our method using simulation experiments, we further apply it to Alzheimer's disease data sets. Our results help gain some explorative understandings of the genetic architecture of Alzheimer's disease.",
keywords = "Deep learning, GWAS, Marginal epistasis, Mixed model",
author = "Haohan Wang and Tianwei Yue and Jingkang Yang and Wei Wu and Xing, {Eric P.}",
note = "Funding Information: Publication costs are funded and supported by the Department of Defense under Contract No. FA8721-05-C-0003 with Carnegie Mellon University for the operation of the Software Engineering Institute, a federally funded research and development center. This work is also supported by the National Institutes of Health grants R01-GM093156 and P30-DA035778. Funding Information: Data collection and sharing for this project were funded by the Alzheimer{\textquoteright}s Disease Neuroimaging Initiative (ADNI) (National Institutes of Health Grant U01 AG024904). The ADNI is funded by the National Institute on Aging and the National Institute of Biomedical Imaging and Bioengineering and through generous contributions from the following: Abbott; the Alzheimer{\textquoteright}s Association; the Alzheimer{\textquoteright}s Drug Discovery Foundation; Amorfix Life Sciences, Ltd.; AstraZeneca; Bayer HealthCare; BioClinica, Inc.; Biogen Idec, Inc.; Bristol-Myers Squibb Co.; Eisai, Inc.; Elan Pharmaceuticals, Inc.; Eli Lilly and Co.; F. Hoffmann-La Roche, Ltd., and its affiliated company Genentech, Inc.; GE Healthcare; Innogenetics, N.V.; IXICO, Ltd.; Janssen Alzheimer Immunotherapy Research & Development, LLC.; Johnson & Johnson Pharmaceutical Research & Development, LLC.; Medpace, Inc.; Merck & Co., Inc.; Meso Scale Diagnostics, LLC.; Novartis Pharmaceuticals Corp.; Pfizer, Inc.; Servier; Synarc, Inc.; Takeda Pharmaceutical Co. The Canadian Institutes of Health Research is providing funds to support the ADNI clinical sites in Canada. Private sector contributions are facilitated by the Foundation for the National Institutes of Health (www.fnih.org). The grantee organization is the Northern California Institute for Research and Education, and the study is coordinated by the Alzheimer{\textquoteright}s Disease Cooperative Study at the University of California, San Diego. ADNI data are disseminated by the Laboratory for Neuro Imaging at the University of California, Los Angeles. Publisher Copyright: {\textcopyright} 2019 The Author(s).",
year = "2019",
month = dec,
day = "27",
doi = "10.1186/s12859-019-3300-9",
language = "English (US)",
volume = "20",
journal = "BMC bioinformatics",
issn = "1471-2105",
publisher = "BioMed Central Ltd.",
}