@inproceedings{1b0e89222d7d4a99ba64335a359c5614,
title = "Array program transformation with Loo.py by example: High-order finite elements",
abstract = "To concisely and effectively demonstrate the capabilities of our program transformation system Loo.py, we examine a transformation path from two real-world Fortran subroutines as found in a weather model to a single high-performance computational kernel suitable for execution on modern GPU hardware. Along the transformation path, we encounter kernel fusion, vectorization, prefetching, parallelization, and algorithmic changes achieved by mechanized conversion between imperative and functional/substitutionbased code, among a number more. We conclude with performance results that demonstrate the effects and support the effectiveness of the applied transformations. Copyright is held by the owner/author(s). Publication rights licensed to ACM.",
keywords = "Code generation, Embedded language, GPU, High-level language, High-performance, OpenCL, Program transformation, Substitution rule",
author = "Andreas Kl{\"o}ckner and Wilcox, {Lucas C.} and T. Warburton",
year = "2016",
month = jun,
day = "14",
doi = "10.1145/2935323.2935325",
language = "English (US)",
series = "ARRAY 2016 - Proceedings of the 3rd ACM SIGPLAN International Workshop on Libraries, Languages, and Compilers for Array Programming, Co-located with PLDI 2016",
publisher = "Association for Computing Machinery",
pages = "9--16",
editor = "Clemens Grelck and Martin Elsman and Andreas Klockner and David Padua",
booktitle = "ARRAY 2016 - Proceedings of the 3rd ACM SIGPLAN International Workshop on Libraries, Languages, and Compilers for Array Programming, Co-located with PLDI 2016",
address = "United States",
note = "3rd ACM SIGPLAN International Workshop on Libraries, Languages, and Compilers for Array Programming, ARRAY 2016 ; Conference date: 14-06-2016",
}