@inproceedings{24cac5da96054f82af6433c83908b6f1,
title = "Brown Dog: Leveraging everything towards autocuration",
abstract = "We present Brown Dog, two highly extensible services that aim to leverage any existing pieces of code, libraries, services, or standalone software (past or present) towards providing users with a simple to use and programmable means of automated aid in the curation and indexing of distributed collections of uncurated and/or unstructured data. Data collections such as these encompassing large varieties of data, in addition to large amounts of data, pose a significant challenge within modern day «Big Data» efforts. The two services, the Data Access Proxy (DAP) and the Data Tilling Service (DTS), focusing on format conversions and content based analysis/extraction respectively, wrap relevant conversion and extraction operations within arbitrary software, manages their deployment in an elastic manner, and manages job execution from behind a deliberately compact REST API. We describe both the motivation and need/scientific drivers for such services, the constituent components that allow for arbitrary software/code to be used and managed, and lastly an evaluation of the systems capabilities and scalability.",
keywords = "digital preservation, unstructured data, web services",
author = "Smruti Padhy and Greg Jansen and Jay Alameda and Edgar Black and Liana Diesendruck and Mike Dietze and Praveen Kumar and Rob Kooper and Lee, {Jong Sung} and Rui Liu and Richard Marciano and Luigi Marini and Dave Mattson and Barbara Minsker and Chris Navarro and Marcus Slavenas and Sullivan, {William C} and Jason Votava and Inna Zharnitsky and McHenry, {Kenton Guadron}",
note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; 3rd IEEE International Conference on Big Data, IEEE Big Data 2015 ; Conference date: 29-10-2015 Through 01-11-2015",
year = "2015",
month = dec,
day = "22",
doi = "10.1109/BigData.2015.7363791",
language = "English (US)",
series = "Proceedings - 2015 IEEE International Conference on Big Data, IEEE Big Data 2015",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "493--500",
editor = "Feng Luo and Kemafor Ogan and Zaki, {Mohammed J.} and Laura Haas and Ooi, {Beng Chin} and Vipin Kumar and Sudarsan Rachuri and Saumyadipta Pyne and Howard Ho and Xiaohua Hu and Shipeng Yu and Hsiao, {Morris Hui-I} and Jian Li",
booktitle = "Proceedings - 2015 IEEE International Conference on Big Data, IEEE Big Data 2015",
address = "United States",
}