@inproceedings{7983651961564c70a8cab061c60bed0d,
title = "Vectorization of apply to reduce interpretation overhead of R",
abstract = "R is a popular dynamic language designed for statistical computing. Despite R's huge user base, the inefficiency in R's language implementation becomes a major pain-point in everyday use as well as an obstacle to apply R to solve large scale analytics problems. The two most common approaches to improve the performance of dynamic languages are: implementing more efficient interpretation strategies and extending the interpreter with Just-In-Time (JIT) compiler. However, both approaches require significant changes to the interpreter, and complicate the adoption by development teams as a result. This paper presents a new approach to improve execution efficiency of R programs by vectorizing the widely used Apply class of operations. Apply accepts two parameters: a function and a collection of input data elements. The standard implementation of Apply iteratively invokes the input function with each element in the data collection. Our approach combines data transformation and function vectorization to convert the looping-over-data execution of the standard Apply into a single invocation of a vectorized function that contains a sequence of vector operations over the input data. This conversion can significantly speed-up the execution of Apply operations in R by reducing the number of interpretation steps. We implemented the vectorization transformation as an R package. To enable the optimization, all that is needed is to invoke the package, and the user can use a normal R interpreter without any changes. The evaluation shows that the proposed method delivers significant performance improvements for a collection of data analysis algorithm benchmarks. This is achieved without any native code generation and using only a single-thread of execution.",
keywords = "Dynamic Language, R, Vectorization",
author = "Haichuan Wang and David Padua and Peng Wu",
note = "Funding Information: We thank Olivier Tardieu, Luke Tierney, Jan Vitek and Michael Lawrence for their suggestions and help. We also thank anonymous reviewers and attendees of the RIOT2015 workshop for their valuable comments. This material is based upon work supported by Huawei gift funding, IBM Fellowship, and the National Science Foundation under Award CNS 1111407. Publisher Copyright: {\textcopyright} 2015 ACM.; 2015 ACM SIGPLAN International Conference on Object-Oriented Programming, Systems, Languages, and Applications, OOPSLA 2015 ; Conference date: 25-10-2015 Through 30-10-2015",
year = "2015",
month = oct,
day = "23",
doi = "10.1145/2814270.2814273",
language = "English (US)",
series = "Proceedings of the Conference on Object-Oriented Programming Systems, Languages, and Applications, OOPSLA",
publisher = "Association for Computing Machinery",
pages = "400--415",
editor = "Patrick Eugster and Jonathan Aldrich",
booktitle = "OOPSLA 2015 - Proceedings of the 2015 ACM SIGPLAN International Conference on Object-Oriented Programming Systems, Languages, and Applications",
address = "United States",
}