@article{dc72add9f0984e14b9d7251af1964501,
title = "Data mining: An overview from a database perspective",
abstract = "Mining information and knowledge from large databases has been recognized by many researchers as a key research topic in database systems and machine learning, and by many industrial companies as an important area with an opportunity of major revenues. Researchers in many different fields have shown great interest in data mining. Several emerging applications in information providing services, such as data warehousing and on-line services over the Internet, also call for various data mining techniques to better understand user behavior, to improve the service provided, and to increase the business opportunities. In response to such a demand, this article is to provide a survey, from a database researcher's point of view, on the data mining techniques developed recently. A classification of the available data mining techniques is provided, and a comparative study of such techniques is presented.",
keywords = "Association rules, Classification, Data clustering, Data cubes, Data generalization and characterization, Data mining, Knowledge discovery, Multiple-dimensional databases, Pattern matching algorithms",
author = "Chen, {Ming Syan} and Jiawei Han and Allen, {Gabrielle Dawn}",
note = "Funding Information: start with this relation; however, the drill-down of the relation may start with the minimal generalized relation and perform data generalization to the corresponding abstraction levels [361. The essential background knowledge applied in attribute-oriented induction is concept hierarchy (or lattice) asso- ciated with each attribute [37]. Most concept hierarchies are stored implicitly in databases. For example, a set of attributes in address (number, street, city, province, county) in a database schema represents the concept hierarchies of the attribute address. A set of attributes in a data relation, though seemingly no strong semantic linkages exist, may also form concept hierarchies (or lattices) among their su- persets or subsets. For example, in the schema item(id, name, category, producer, date-made, cost, price), {"}{category, producer, date-made) c {category, date-made}{"} indicates the former forms a lower level concept than the latter. Moreover, rules and view definitions can also be used as the definitions of concept hierarchies [24]. Conceptual hierarchies for numerical or ordered attributes can be generated automatically based on the analysis of data distributions in the set of relevant data [38].M oreover, a given hierarchy may not be best suited for a particular data mining task. Therefore, such hierarchies should be adjusted dynamically in many cases based on the analysis of data distributions of the corresponding set of data 1381. An an example, one may use data mining facilities to study the general characteristics of the Natural Science and Engineering Research Council of Canada (NSERC) research grant database. To compare the research grants between 'British Columbia' and 'Alberta' (two neighbor provinces in Western Canada) in the discipline of 'Computev (Science)' ac- Funding Information: Jiawei Han was supported, in part, by Research Grant NSERC-A3723 from the Natural Sciences and Engineering Research Council of Canada, Research Grant NCE: IRIS/Precarn-HMI5 from the Networks of Centers of Excellence of Canada, and research grants from MPR Teltech Ltd. and Hughes Research Laboratories.",
year = "1996",
doi = "10.1109/69.553155",
language = "English (US)",
volume = "8",
pages = "866--883",
journal = "IEEE Transactions on Knowledge and Data Engineering",
issn = "1041-4347",
publisher = "IEEE Computer Society",
number = "6",
}