@article{15bf66e458bc465691caf3969d35d9f3,
title = "Finding the missing honey bee genes: Lessons learned from a genome upgrade",
abstract = "Background: The first generation of genome sequence assemblies and annotations have had a significant impact upon our understanding of the biology of the sequenced species, the phylogenetic relationships among species, the study of populations within and across species, and have informed the biology of humans. As only a few Metazoan genomes are approaching finished quality (human, mouse, fly and worm), there is room for improvement of most genome assemblies. The honey bee (Apis mellifera) genome, published in 2006, was noted for its bimodal GC content distribution that affected the quality of the assembly in some regions and for fewer genes in the initial gene set (OGSv1.0) compared to what would be expected based on other sequenced insect genomes. Results: Here, we report an improved honey bee genome assembly (Amel_4.5) with a new gene annotation set (OGSv3.2), and show that the honey bee genome contains a number of genes similar to that of other insect genomes, contrary to what was suggested in OGSv1.0. The new genome assembly is more contiguous and complete and the new gene set includes ~5000 more protein-coding genes, 50% more than previously reported. About 1/6 of the additional genes were due to improvements to the assembly, and the remaining were inferred based on new RNAseq and protein data. Conclusions: Lessons learned from this genome upgrade have important implications for future genome sequencing projects. Furthermore, the improvements significantly enhance genomic resources for the honey bee, a key model for social behavior and essential to global ecology through pollination.",
keywords = "Apis mellifera, GC content, Gene annotation, Gene prediction, Genome assembly, Genome improvement, Genome sequencing, Repetitive DNA, Transcriptome",
author = "{HGSC production teams} and Elsik, {Christine G.} and Worley, {Kim C.} and Bennett, {Anna K.} and Martin Beye and Francisco Camara and Childers, {Christopher P.} and {de Graaf}, {Dirk C.} and Griet Debyser and Jixin Deng and Bart Devreese and Eran Elhaik and Evans, {Jay D.} and Foster, {Leonard J.} and Dan Graur and Roderic Guigo and Hoff, {Katharina J.} and Holder, {Michael E.} and Hudson, {Matthew E.} and Hunt, {Greg J.} and Huaiyang Jiang and Vandita Joshi and Khetani, {Radhika S.} and Peter Kosarev and Kovar, {Christie L.} and Jian Ma and Ryszard Maleszka and Moritz, {Robin F.A.} and Munoz-Torres, {Monica C.} and Murphy, {Terence D.} and Muzny, {Donna M.} and Newsham, {Irene F.} and Reese, {Justin T.} and Robertson, {Hugh M.} and Robinson, {Gene E.} and Olav Rueppell and Victor Solovyev and Mario Stanke and Eckart Stolle and Tsuruda, {Jennifer M.} and Vaerenbergh, {Matthias V.} and Waterhouse, {Robert M.} and Weaver, {Daniel B.} and Whitfield, {Charles W.} and Yuanqing Wu and Zdobnov, {Evgeny M.} and Lan Zhang and Dianhui Zhu and Gibbs, {Richard A.} and S. Patil and S. Gubbala",
note = "Funding Information: Funding for the project was provided by a grant to RG from the National Human Genome Research Institute, National Institutes of Health (NHGRI, NIH) U54 HG003273. Contributions from members of the CGE lab were supported by Agriculture and Food Research Initiative Competitive grant no. 2010-65205-20407 from the USDA National Institute of Food Agriculture. AKB was supported by a Clare Luce Booth Fellowship at Georgetown University. The authors are grateful for the HGSC sequence production teams (Patil,S., Gub-bala,S., Aqrawi,P., Arias,F., Bess,C., Blankenburg,K.B., Brocchini,M., Buhay,C., Challis,D., Chang,K., Chen,D., Coleman,P., Drummond,J., English,A., Evani,U., Francisco,L., Fu,Q., Goodspeed,R., Haessly,T.H., Hale,W., Han,H., Holder,M., Hu,Y., Jackson,L., Jakkamsetti,A., Jayaseelan,J.C., Kakkar,N., Kalra,D., Kandadi,H., Lee,S., Li,H., Liu,Y., Macmil,S., Mandapat,C.M., Mata,R., Mathew,T., Matskevitch,T., Muni-dasa,M., Nagaswamy,U., Najjar,R., Nguyen,N., Niu,J., Opheim,D., Palculict,T., Paul,S., Pellon,M., Perales,L., Pham,C., Pham,P., Pu,L.-L., Qi,S., Qu,J., Ren,Y., Ruth, R.T., Saada,N., Sabo,A., San Lucas,F., Sershen,C., Shafer,J., Shah,N., Shelton,R., Song,X.-Z., Tabassum,N., Tang,L., Taylor,A., Taylor,M., Velamala,V., Wan,Z., Wang,L., Wang,Y., Warren,J., Weissenberger,G., Wilczek-Boney,K.B., Yao,J., Yin,B., Yu,J., Zhang,J., Zhang,L., Zhou,C., Zhu,D., Zhu,Y., and Zou,X.), and the input of other members of the HGSC genome assembly team.",
year = "2014",
month = jan,
day = "30",
doi = "10.1186/1471-2164-15-86",
language = "English (US)",
volume = "15",
journal = "BMC genomics",
issn = "1471-2164",
publisher = "BioMed Central Ltd.",
number = "1",
}