@inproceedings{57ff6f7db06a40fd97b4ad49b8af7a30,
title = "PARSEC: PARallel Subgraph Enumeration in CUDA",
abstract = "Subgraph enumeration is an important problem in the field of Graph Analytics with numerous applications. The problem is provably NP-complete and requires sophisticated heuristics and highly efficient implementations to be feasible on problem sizes of realistic scales. Parallel solutions have shown a lot of promise on CPUs and distributed environments. Recently, GPU-based parallel solutions have also been proposed to take advantage of the massive execution resources in modern GPUs. Subgraph enumeration involves traversing a search tree for each vertex of the data graph to find matches of a query in a graph. Most GPU-based solutions traverse the tree in breadth-first manner that exploits parallelism at the cost of high memory requirement and presents a formidable challenge for processing large graphs with high-degree vertices since the memory capacity of GPUs is significantly lower than that of CPUs. In this work, we propose a novel GPU solution based on a hybrid BFS and DFS approach where the top level(s) of the search trees are traversed in a fully parallel, breadth-first manner while each subtree is traversed in a more space-efficient, depth-first manner. The depth-first traversal of subtrees requires less memory but presents more challenges for parallel execution. To overcome the less parallel nature of depth-first traversal, we exploit fine-grained parallelism in each step of the depth-first traversal of sub-trees. We further identify and implement various optimizations to efficiently utilize memory and compute resources of the GPUs. We evaluate our performance in comparison with the state-of-the-art GPU and CPU implementations. We outperform the GPU and CPU implementations with a geometric mean speedup of 9.47× (up to 92.01×) and 2.37× (up to 12.70×), respectively. We also show that the proposed approach can efficiently process the graphs that previously cannot be processed by the state-of-the-art GPU solutions due to their excessive memory requirement.",
keywords = "CUDA, DFS, Nvidia GPU, graph pattern matching, hybrid BFS+DFS, parallel, subgraph isomorphism, subgraph matching, tree traversal",
author = "Vibhor Dodeja and Mohammad Almasri and Rakesh Nagi and Jinjun Xiong and Hwu, {Wen Mei}",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 36th IEEE International Parallel and Distributed Processing Symposium, IPDPS 2022 ; Conference date: 30-05-2022 Through 03-06-2022",
year = "2022",
doi = "10.1109/IPDPS53621.2022.00025",
language = "English (US)",
series = "Proceedings - 2022 IEEE 36th International Parallel and Distributed Processing Symposium, IPDPS 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "168--178",
booktitle = "Proceedings - 2022 IEEE 36th International Parallel and Distributed Processing Symposium, IPDPS 2022",
address = "United States",
}