@inproceedings{2aaf91b206c44c84bb1a9c7ab454a446,

title = "Optimal chunking of large multidimensional arrays for data warehousing",

abstract = "Very large multidimensional arrays are commonly used in data intensive scientific computations as well ason-line analytical processing applications referred to as MOLAP. The storage organization of such arrays on disks is done by partitioning the large global array into fixed size sub-arrays called chunks or tiles that form the units of data transfer between disk and memory. Typical queries involve the retrieval of sub-arrays in a manner that access all chunks that overlap the query results. An important metric of the storage efficiency is the expected number of chunks retrieved over all such queries. The question that immediately arises is {"}what shapes of array chunks give theminimum expected number of chunks over a query workload?{"} The problem of optimal chunking was first introduced by Sarawagi and Stonebraker [11] who gave an approximate solution. In this paper we develop exact mathematical models of the problem and provide exact solutions using steepest descent and geometric programming methods. Experimental results, using synthetic and real life workloads, show that our solutions are consistently within than 2.0% of the true number of chunks retrieved for any number of dimensions. In contrast, the approximate solution of [11] can deviate considerably from the true result with increasing number of dimensions and also may lead suboptimal chunk shapes.",

keywords = "chunking, data warehousing, multi-dimensional arrays",

author = "Otoo, {E. J.} and Doron Rotem and Sridhar Seshadri",

year = "2007",

doi = "10.1145/1317331.1317337",

language = "English (US)",

isbn = "9781595938275",

series = "DOLAP: Proceedings of the ACM International Workshop on Data Warehousing and OLAP",

pages = "25--32",

booktitle = "CIKM 2007 Co-Located Workshops - Proceedings of DOLAP'07",

note = "10th ACM International Workshop on Data Warehousing and OLAP, DOLAP'07 - Co-Located with CIKM 2007 ; Conference date: 06-11-2007 Through 09-11-2007",

}