@inproceedings{78458c88fe014e778ac53370e9c807cc,
title = "YACC: A Framework Generalizing Tur{\'a}nShadow for Counting Large Cliques",
abstract = "Clique-counting is a fundamental problem that has application in many areas eg. dense subgraph discovery, community detection, spam detection, etc. The problem of k-clique-counting is difficult because as k increases, the number of k-cliques goes up exponentially. Enumeration algorithms (even parallel ones) fail to count k-cliques beyond a small k. Approximation algorithms, like Tur{\'a}nShadow have been shown to perform well upto k = 10, but are inefficient for larger cliques. The recently proposed Pivoter algorithm significantly improved the state-of-the-art and was able to give exact counts of all k-cliques in a large number of graphs. However, the clique counts of some graphs (for example, com-lj) are still out of reach of these algorithms. We revisit the Tur{\'a}nShadow algorithm and propose a generalized framework called YACC that leverages several insights about real-world graphs to achieve faster clique-counting. The bottleneck in Tur{\'a}nShadow is a recursive subroutine whose stopping condition is based on a classic result from extremal combinatorics called Tur{\'a}n{\textquoteright}s theorem. This theorem gives a lower bound for the k-clique density in a subgraph in terms of its edge density. However, this stopping condition is based on a worst-case graph that does not reflect the nature of real-world graphs. Using techniques for quickly discovering dense subgraphs, we relax the stopping condition in a systematic way such that we get a smaller recursion tree while still maintaining the guarantees provided by Tur{\'a}nShadow. We deploy our algorithm on several real-world data sets and show that YACC reduces the size of the recursion tree and the running time by over an order of magnitude. Using YACC, we are able to obtain clique counts for several graphs for which clique-counting was infeasible before, including com-lj.",
keywords = "Cliques, Degeneracy, Sampling, Tur{\'a}nShadow",
author = "Shweta Jain and Hanghang Tong",
note = "∗This work is supported by NSF (1947135, 2134079, and 1939725), and by NIFA award 2020-67021-32799. †Work done while at the University of Illinois, Urbana-Champaign.; 2022 SIAM International Conference on Data Mining, SDM 2022 ; Conference date: 28-04-2022 Through 30-04-2022",
year = "2022",
language = "English (US)",
series = "Proceedings of the 2022 SIAM International Conference on Data Mining, SDM 2022",
publisher = "Society for Industrial and Applied Mathematics Publications",
pages = "684--692",
booktitle = "Proceedings of the 2022 SIAM International Conference on Data Mining, SDM 2022",
address = "United States",
}