@article{a31a1b8abc064643867438fae4581575,
title = "DR-BERT: A protein language model to annotate disordered regions",
abstract = "Despite their lack of a rigid structure, intrinsically disordered regions (IDRs) in proteins play important roles in cellular functions, including mediating protein-protein interactions. Therefore, it is important to computationally annotate IDRs with high accuracy. In this study, we present Disordered Region prediction using Bidirectional Encoder Representations from Transformers (DR-BERT), a compact protein language model. Unlike most popular tools, DR-BERT is pretrained on unannotated proteins and trained to predict IDRs without relying on explicit evolutionary or biophysical data. Despite this, DR-BERT demonstrates significant improvement over existing methods on the Critical Assessment of protein Intrinsic Disorder (CAID) evaluation dataset and outperforms competitors on two out of four test cases in the CAID 2 dataset, while maintaining competitiveness in the others. This performance is due to the information learned during pretraining and DR-BERT's ability to use contextual information.",
keywords = "IDP, IDR, deep learning, disorder, machine learning, protein language model, protein structure prediction",
author = "Ananthan Nambiar and Forsyth, {John Malcolm} and Simon Liu and Sergei Maslov",
note = "This work utilizes resources supported by the National Science Foundation\u2019s Major Research Instrumentation program, grant #1725729 , as well as the University of Illinois at Urbana-Champaign [17]. Part of this work was performed under the auspices of the U.S. Department of Energy by Argonne National Laboratory under Contract DE- AC02-06-CH11357 . J.M.F. and S.L. have been supported by the James Scholar Honors Program and the Illinois Scholars Undergraduate Research Program. We thank Mark Hopkins, Anna Ritz, Ashley Blystone, and Desiree Odgers for insightful discussions. This work utilizes resources supported by the National Science Foundation's Major Research Instrumentation program, grant #1725729, as well as the University of Illinois at Urbana-Champaign. This work was partially supported by NSF grant #2107344. Part of this work was performed under the auspices of the U.S. Department of Energy by Argonne National Laboratory under Contract DE-AC02-06-CH11357. J.M.F. and S.L. have been supported by the James Scholar Honors Program and the Illinois Scholars Undergraduate Research Program. We thank Mark Hopkins, Anna Ritz, Ashley Blystone, and Desiree Odgers for insightful discussions. All authors designed the study. S.M. supervised the study and A.N. J.M.F. and S.L. performed simulations and calculations. All authors discussed and wrote the paper. The authors declare no competing interests.",
year = "2024",
month = aug,
day = "8",
doi = "10.1016/j.str.2024.04.010",
language = "English (US)",
volume = "32",
pages = "1260--1268.e3",
journal = "Structure",
issn = "0969-2126",
publisher = "Cell Press",
number = "8",
}