@inproceedings{1109b583d3354c58b56ead52d534133a,
title = "A gossip-based system for fast approximate score computation in multinomial Bayesian networks",
abstract = "In this paper, we present a system for fast approximate score computation, a fundamental task for score-based structure learning of multinomial Bayesian networks. Our work is motivated by the fact that exact score computation on large datasets is very time consuming. Our system enables approximate score computation on large datasets in an efficient and scalable manner with probabilistic error bounds on the statistics required for score computation. Our system has several novel features including gossip-based decentralized computation of statistics, lower resource consumption via a probabilistic approach of maintaining statistics, and effective distribution of tasks for score computation using hashing techniques. The demo will provide a real-time and interactive experience to a user on how our system employs the principle of gossiping and hashing techniques in a novel way for fast approximate score computation. The user will be able to control different aspects of our system's execution on a cluster with up to 32 nodes. The approximate scores output by our system can be then used by existing score-based structure learning algorithms.",
keywords = "Approximate score computation, Bayesian networks, Gossip algorithms, Large scale data",
author = "Arun Zachariah and Praveen Rao and Anas Katib and Monica Senapati and Barnard, {Jacobus J}",
year = "2019",
month = apr,
day = "1",
doi = "10.1109/ICDE.2019.00216",
language = "English (US)",
series = "Proceedings - International Conference on Data Engineering",
publisher = "IEEE Computer Society",
pages = "1968--1971",
booktitle = "Proceedings - 2019 IEEE 35th International Conference on Data Engineering, ICDE 2019",
note = "35th IEEE International Conference on Data Engineering, ICDE 2019 ; Conference date: 08-04-2019 Through 11-04-2019",
}