@inproceedings{d3fb120c71a649f799650e844b8b31cb,
title = "Porting optimized GPU kernels to a multi-core CPU: Computational quantum chemistry application example",
abstract = "We investigate techniques for optimizing a multicore CPU code backported from a highly optimized GPU kernel. We show that common sub-expression elimination and loop unrolling optimization techniques improve code performance on the GPU, but not on the CPU. On the other hand, register reuse and loop merging are effective on the CPU and in combination they improve performance of the ported code by 16%.",
keywords = "Common sub-expression elimination, GPU, Loop merging, OpenMP, Register reuse, Unrolling",
author = "Dong Ye and Alexey Titov and Volodymyr Kindratenko and Ivan Ufimtsev and Todd Martinez",
year = "2011",
doi = "10.1109/SAAHPC.2011.8",
language = "English (US)",
isbn = "9780769544489",
series = "Proceedings - 2011 Symposium on Application Accelerators in High-Performance Computing, SAAHPC 2011",
pages = "72--75",
booktitle = "Proceedings - 2011 Symposium on Application Accelerators in High-Performance Computing, SAAHPC 2011",
note = "2011 Symposium on Application Accelerators in High-Performance Computing, SAAHPC 2011 ; Conference date: 19-07-2011 Through 20-07-2011",
}