@inproceedings{2f33765f38c9417798f56691a5366f19,
title = "Towards Scalable and Dynamic Social Sensing Using A Distributed Computing Framework",
abstract = "With the rapid growth of online social media and ubiquitous Internet connectivity, social sensing has emerged as a new crowdsourcing application paradigm of collecting observations (often called claims) about the physical environment from humans or devices on their behalf. A fundamental problem in social sensing applications lies in effectively ascertaining the correctness of claims and the reliability of data sources without knowing either of them a priori, which is referred to as truth discovery. While significant progress has been made to solve the truth discovery problem, some important challenges have not been well addressed yet. First, existing truth discovery solutions did not fully solve the dynamic truth discovery problem where the ground truth of claims changes over time. Second, many current solutions are not scalable to large-scale social sensing events because of the centralized nature of their truth discovery algorithms. Third, the heterogeneity and unpredictability of the social sensing data traffic pose additional challenges to the resource allocation and system responsiveness. In this paper, we developed a Scalable Streaming Truth Discovery (SSTD) solution to address the above challenges. In this paper, we developed a Scalable Streaming Truth Discovery (SSTD) solution to address the above challenges. In particular, we first developed a dynamic truth discovery scheme based on Hidden Markov Models (HMM) to effectively infer the evolving truth of reported claims. We further developed a distributed framework to implement the dynamic truth discovery scheme using Work Queue in HTCondor system. We also integrated the SSTD scheme with an optimal workload allocation mechanism to dynamically allocate the resources (e.g., cores, memories) to the truth discovery tasks based on their computation requirements. We evaluated SSTD through real world social sensing applications using Twitter data feeds. The evaluation results on three real-world data traces (i.e., Boston Bombing, Paris Shooting and College Football) show that the SSTD scheme is scalable and outperforms the state-of-the-art truth discovery methods in terms of both effectiveness and efficiency.",
keywords = "Control Theory, Crowdsourcing, Distributed Computing, Hidden Markov Model, Socical Sensing, Truth Discovery",
author = "Zhang, {Daniel Yue} and Chao Zheng and Dong Wang and Doug Thain and Xin Mu and Greg Madey and Chao Huang",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 37th IEEE International Conference on Distributed Computing Systems, ICDCS 2017 ; Conference date: 05-06-2017 Through 08-06-2017",
year = "2017",
month = jul,
day = "13",
doi = "10.1109/ICDCS.2017.196",
language = "English (US)",
series = "Proceedings - International Conference on Distributed Computing Systems",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "966--976",
editor = "Kisung Lee and Ling Liu",
booktitle = "Proceedings - IEEE 37th International Conference on Distributed Computing Systems, ICDCS 2017",
address = "United States",
}