@inbook{9cbd33c39d1144c981d4a9f75ecc9478,
title = "Multiple Sequence Alignment for Large Heterogeneous Datasets Using SAT{\'e}, PASTA, and UPP",
abstract = "The estimation of very large multiple sequence alignments is a challenging problem that requires special techniques in order to achieve high accuracy. Here we describe two software packages—PASTA and UPP—for constructing alignments on large and ultra-large datasets. Both methods have been able to produce highly accurate alignments on 1,000,000 sequences, and trees computed on these alignments are also highly accurate. PASTA provides the best tree accuracy when the input sequences are all full-length, but UPP provides improved accuracy compared to PASTA and other methods when the input contains a large number of fragmentary sequences. Both methods are available in open source form on GitHub.",
keywords = "Ensembles of Hidden Markov Models, Multiple sequence alignment, PASTA, SAT{\'e}, UPP",
author = "Tandy Warnow and Siavash Mirarab",
note = "Publisher Copyright: {\textcopyright} 2021, Springer Science+Business Media, LLC, part of Springer Nature.",
year = "2021",
doi = "10.1007/978-1-0716-1036-7_7",
language = "English (US)",
series = "Methods in Molecular Biology",
publisher = "Humana Press Inc.",
pages = "99--119",
booktitle = "Methods in Molecular Biology",
}