@inproceedings{7002bf2a2182482281e0c9cdacb4ef41,
title = "SCStory: Self-supervised and Continual Online Story Discovery",
abstract = "We present a framework SCStory for online story discovery, that helps people digest rapidly published news article streams in real-time without human annotations. To organize news article streams into stories, existing approaches directly encode the articles and cluster them based on representation similarity. However, these methods yield noisy and inaccurate story discovery results because the generic article embeddings do not effectively reflect the story-indicative semantics in an article and cannot adapt to the rapidly evolving news article streams. SCStory employs self-supervised and continual learning with a novel idea of story-indicative adaptive modeling of news article streams. With a lightweight hierarchical embedding module that first learns sentence representations and then article representations, SCStory identifies story-relevant information of news articles and uses them to discover stories. The embedding module is continuously updated to adapt to evolving news streams with a contrastive learning objective, backed up by two unique techniques, confidence-aware memory replay and prioritized-augmentation, employed for label absence and data scarcity problems. Thorough experiments on real and the latest news data sets demonstrate that SCStory outperforms existing state-of-the-art algorithms for unsupervised online story discovery.",
keywords = "Document Embedding, News Story Discovery, News Stream Mining",
author = "Susik Yoon and Yu Meng and Dongha Lee and Jiawei Han",
note = "The first author was supported by Basic Science Research Program through the National Research Foundation of Korea (NRF) funded by the Ministry of Education (2021R1A6A3A14043765). The research was supported in part by US DARPA KAIROS Program No. FA8750-19-2-1004 and INCAS Program No. HR001121C0165, National Science Foundation IIS-19-56151, IIS-17-41317, and IIS 17-04532, and the Molecule Maker Lab Institute: An AI Research Institutes program supported by NSF under Award No. 2019897, and the Institute for Geospatial Understanding through an Integrative Discovery Environment (I-GUIDE) by NSF under Award No. 2118329. The views and conclusions contained in this paper are those of the authors and should not be interpreted as representing any funding agencies.; 32nd ACM World Wide Web Conference, WWW 2023 ; Conference date: 30-04-2023 Through 04-05-2023",
year = "2023",
month = apr,
day = "30",
doi = "10.1145/3543507.3583507",
language = "English (US)",
series = "ACM Web Conference 2023 - Proceedings of the World Wide Web Conference, WWW 2023",
publisher = "Association for Computing Machinery",
pages = "1853--1864",
booktitle = "ACM Web Conference 2023 - Proceedings of the World Wide Web Conference, WWW 2023",
address = "United States",
}