@inproceedings{f50dd036e40e4816b09e2deda0a0108e,
title = "Efficient mini-batch training for stochastic optimization",
abstract = "Stochastic gradient descent (SGD) is a popular technique for large-scale optimization problems in machine learning. In order to parallelize SGD, minibatch training needs to be employed to reduce the communication cost. However, an increase in minibatch size typically decreases the rate of convergence. This paper introduces a technique based on approximate optimization of a conservatively regularized objective function within each minibatch. We prove that the convergence rate does not decrease with increasing minibatch size. Experiments demonstrate that with suitable implementations of approximate optimization, the resulting algorithm can outperform standard SGD in many scenarios.",
keywords = "big data, distributed computing, machine learning, minibatch, stochastic gradient descent",
author = "Mu Li and Tong Zhang and Yuqiang Chen and Smola, \{Alexander J.\}",
year = "2014",
doi = "10.1145/2623330.2623612",
language = "English (US)",
isbn = "9781450329569",
series = "Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
publisher = "Association for Computing Machinery",
pages = "661--670",
booktitle = "KDD 2014 - Proceedings of the 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
address = "United States",
note = "20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2014 ; Conference date: 24-08-2014 Through 27-08-2014",
}