@inproceedings{58ce18e427e34473a8812664ddd0fcb3,
title = "FAST sequence mining based on sparse id-lists",
abstract = "Sequential pattern mining is an important data mining task with applications in basket analysis, world wide web, medicine and telecommunication. This task is challenging because sequence databases are usually large with many and long sequences and the number of possible sequential patterns to mine can be exponential. We proposed a new sequential pattern mining algorithm called FAST which employs a representation of the dataset with indexed sparse id-lists to fast counting the support of sequential patterns. We also use a lexicographic tree to improve the efficiency of candidates generation. FAST mines the complete set of patterns by greatly reducing the effort for support counting and candidate sequences generation. Experimental results on artificial and real data show that our method outperforms existing methods in literature up to an order of magnitude or two for large datasets.",
keywords = "Data Mining, Sequential Pattern Discovery, Sparse Id-List",
author = "Eliana Salvemini and Fabio Fumarola and Donato Malerba and Jiawei Han",
year = "2011",
month = jul,
day = "14",
doi = "10.1007/978-3-642-21916-0_35",
language = "English (US)",
isbn = "9783642219153",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
pages = "316--325",
booktitle = "Foundations of Intelligent Systems - 19th International Symposium, ISMIS 2011, Proceedings",
note = "19th International Symposium on Methodologies for Intelligent Systems, ISMIS 2011 ; Conference date: 28-06-2011 Through 30-06-2011",
}