@inproceedings{bfef6707d5804fa1a7ec1a72fc70217b,
title = "Mixed Precision Quantization for ReRAM-based DNN Inference Accelerators",
abstract = "ReRAM-based accelerators have shown great potential for accelerating DNN inference because ReRAM crossbars can perform analog matrix-vector multiplication operations with low latency and energy consumption. However, these crossbars require the use of ADCs which constitute a significant fraction of the cost of MVM operations. The overhead of ADCs can be mitigated via partial sum quantization. However, prior quantization flows for DNN inference accelerators do not consider partial sum quantization which is not highly relevant to traditional digital architectures. To address this issue, we propose a mixed precision quantization scheme for ReRAM-based DNN inference accelerators where weight quantization, input quantization, and partial sum quantization are jointly applied for each DNN layer. We also propose an automated quantization flow powered by deep reinforcement learning to search for the best quantization configuration in the large design space. Our evaluation shows that the proposed mixed precision quantization scheme and quantization flow reduce inference latency and energy consumption by up to 3.89 and 4.84, respectively, while only losing 1.18% in DNN inference accuracy.",
keywords = "DNN inference accelerators, Mixed precision quantization, ReRAM",
author = "Sitao Huang and Aayush Ankit and Plinio Silveira and Rodrigo Antunes and Chalamalasetti, {Sai Rahul} and Hajj, {Izzat El} and Kim, {Dong Eun} and Glaucimar Aguiar and Pedro Bruel and Sergey Serebryakov and Cong Xu and Can Li and Paolo Faraboschi and Strachan, {John Paul} and Deming Chen and Kaushik Roy and Hwu, {Wen Mei} and Dejan Milojicic",
note = "Publisher Copyright: {\textcopyright} 2021 Association for Computing Machinery.; 26th Asia and South Pacific Design Automation Conference, ASP-DAC 2021 ; Conference date: 18-01-2021 Through 21-01-2021",
year = "2021",
month = jan,
day = "18",
doi = "10.1145/3394885.3431554",
language = "English (US)",
series = "Proceedings of the Asia and South Pacific Design Automation Conference, ASP-DAC",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "372--377",
booktitle = "Proceedings of the 26th Asia and South Pacific Design Automation Conference, ASP-DAC 2021",
address = "United States",
}