@inproceedings{3ae8b86e150649fdb9d6d28b08db901e,
title = "A multi-layer graphical model for approximate identity matching",
abstract = "Many organizations maintain identity information for their customers, vendors, and employees, etc. However, identities being compromised cannot be retrieved effectively. In this paper we first present a case study on identity problems existing in a local police department. The study show that more than half of the sampled suspects have altered identities existing in the police information system due to deception and errors. We build a taxonomy of identity problems based on our findings. The decision to determine matching identities involves some uncertainty because of the problems identified. We propose a probability-based multi-layer graphical model to capture the uncertainty. Experiments show that the proposed model performs significantly better than the searching technique based on exact-match. With 20% of training data labeled, the model with semi-supervised learning achieved performance comparable to that of fully supervised learning.",
keywords = "Bayesian network, Graphical model, Identity matching, Record linkage, Semi-supervised learning, Similarity",
author = "Wang, {G. Alan}",
year = "2005",
language = "English (US)",
isbn = "9781604235531",
series = "Association for Information Systems - 11th Americas Conference on Information Systems, AMCIS 2005: A Conference on a Human Scale",
pages = "2495--2505",
booktitle = "Association for Information Systems - 11th Americas Conference on Information Systems, AMCIS 2005",
note = "11th Americas Conference on Information Systems, AMCIS 2005 ; Conference date: 11-08-2005 Through 15-08-2005",
}