@inproceedings{473e612a05ed48a0998e49dc340b437a,
title = "An objective evaluation criterion for clustering",
abstract = "We propose and test an objective criterion for evaluation of clustering performance: How well does a clustering algorithm run on unlabeled data aid a classification algorithm? The accuracy is quantified using the PAC-MDL bound [3] in a semisupervised setting. Clustering algorithms which naturally separate the data according to (hidden) labels with a small number of clusters perform well. A simple extension of the argument leads to an objective model selection method. Experimental results on text analysis datasets demonstrate that this approach empirically results in very competitive bounds on test set performance on natural datasets.",
keywords = "Clustering, Evaluation, MDL, PAC bounds",
author = "Arindam Banerjee and John Langford",
note = "Copyright: Copyright 2020 Elsevier B.V., All rights reserved.; KDD-2004 - Proceedings of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining ; Conference date: 22-08-2004 Through 25-08-2004",
year = "2004",
doi = "10.1145/1014052.1014112",
language = "English (US)",
isbn = "1581138881",
series = "KDD-2004 - Proceedings of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
publisher = "Association for Computing Machinery",
pages = "515--520",
booktitle = "KDD-2004 - Proceedings of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
address = "United States",
}