@inproceedings{fa02767b439e44a8b368a885fd1b8a8c,
title = "Large vector extensions inside the HMC",
abstract = "One of the main challenges for embedded systems is the transfer of data between memory and processor. In this context, Hybrid Memory Cubes (HMCs) can provide substantial energy and bandwidth improvements compared to traditional memory organizations, while also allowing the execution of simple atomic instructions in the memory. However, the complex memory hierarchy still remains a bottleneck, especially for applications with a low reuse of data, limiting the usable parallelism of the HMC vaults and banks. In this paper, we introduce the HIVE architecture, which allows performing common vector operations directly inside the HMC, avoiding contention on the interconnections as well as cache pollution. Our mechanism achieves substantial speedups of up to 17.3× (9.4× on average) compared to a baseline system that performs vector operations in a 8-core processor. We show that the simple instructions provided by HMC actually hurt performance for streaming applications.",
keywords = "HMC, Near-data processing, vector instructions",
author = "Alves, {Marco A.Z.} and Matthias Diener and Santos, {Paulo C.} and Luigi Carro",
note = "Funding Information: The authors gratefully acknowledge the support of CNPq and CAPES. Publisher Copyright: {\textcopyright} 2016 EDAA.; 19th Design, Automation and Test in Europe Conference and Exhibition, DATE 2016 ; Conference date: 14-03-2016 Through 18-03-2016",
year = "2016",
month = apr,
day = "25",
language = "English (US)",
series = "Proceedings of the 2016 Design, Automation and Test in Europe Conference and Exhibition, DATE 2016",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1249--1254",
booktitle = "Proceedings of the 2016 Design, Automation and Test in Europe Conference and Exhibition, DATE 2016",
address = "United States",
}