2019
Dutta, Soumya; Biswas, Ayan; Ahrens, James
Multivariate Pointwise Information-Driven Data Sampling and Visualization Journal Article
In: Entropy, vol. 21, no. 7, 2019, ISSN: 1099-4300, (LA-UR-19-24243).
Abstract | Links | BibTeX | Tags: data reduction, multivariate sampling, query-driven visualization
@article{e21070699,
title = {Multivariate Pointwise Information-Driven Data Sampling and Visualization},
author = {Soumya Dutta and Ayan Biswas and James Ahrens},
url = {https://www.mdpi.com/1099-4300/21/7/699},
doi = {10.3390/e21070699},
issn = {1099-4300},
year = {2019},
date = {2019-01-01},
journal = {Entropy},
volume = {21},
number = {7},
abstract = {With increasing computing capabilities of modern supercomputers, the size of the data generated from the scientific simulations is growing rapidly. As a result, application scientists need effective data summarization techniques that can reduce large-scale multivariate spatiotemporal data sets while preserving the important data properties so that the reduced data can answer domain-specific queries involving multiple variables with sufficient accuracy. While analyzing complex scientific events, domain experts often analyze and visualize two or more variables together to obtain a better understanding of the characteristics of the data features. Therefore, data summarization techniques are required to analyze multi-variable relationships in detail and then perform data reduction such that the important features involving multiple variables are preserved in the reduced data. To achieve this, in this work, we propose a data sub-sampling algorithm for performing statistical data summarization that leverages pointwise information theoretic measures to quantify the statistical association of data points considering multiple variables and generates a sub-sampled data that preserves the statistical association among multi-variables. Using such reduced sampled data, we show that multivariate feature query and analysis can be done effectively. The efficacy of the proposed multivariate association driven sampling algorithm is presented by applying it on several scientific data sets.},
note = {LA-UR-19-24243},
keywords = {data reduction, multivariate sampling, query-driven visualization},
pubstate = {published},
tppubtype = {article}
}
2018
Pulido, Jesus; Livescu, Daniel; Kanov, Kalin; Burns, Randal; Canada, Curtis; Ahrens, James; Hamann, Bernd
Remote Visual Analysis of Large Turbulence Databases at Multiple Scales Journal Article
In: Journal of Parallel and Distributed Computing, 2018, ISBN: 0743-7315, (LA-UR-17-20757).
Abstract | Links | BibTeX | Tags: Computer Science, data reduction, Databases, Distributed Systems, Mathematics and Computing, remote visualization, turbulence, Wavelets
@article{info:lanl-repo/lareport/LA-UR-17-20757,
title = {Remote Visual Analysis of Large Turbulence Databases at Multiple Scales},
author = {Jesus Pulido and Daniel Livescu and Kalin Kanov and Randal Burns and Curtis Canada and James Ahrens and Bernd Hamann},
url = {https://www.sciencedirect.com/science/article/pii/S0743731518303927},
doi = {https://doi.org/10.1016/j.jpdc.2018.05.011},
isbn = {0743-7315},
year = {2018},
date = {2018-01-01},
journal = {Journal of Parallel and Distributed Computing},
abstract = {The remote analysis and visualization of raw large turbulence datasets is challenging. Current accurate direct numerical simulations (DNS) of turbulent flows generate datasets with billions of points per time-step and several thousand time-steps per simulation. Until recently, the analysis and visualization of such datasets was restricted to scientists with access to large supercomputers. The public Johns Hopkins Turbulence database simplifies access to multi-terabyte turbulence datasets and facilitates the computation of statistics and extraction of features through the use of commodity hardware. We present a framework designed around wavelet-based compression for high-speed visualization of large datasets and methods supporting multi-resolution analysis of turbulence. By integrating common technologies, this framework enables remote access to tools available on supercomputers and over 230 terabytes of DNS data over the Web. The database toolset is expanded by providing access to exploratory data analysis tools, such as wavelet decomposition capabilities and coherent feature extraction.},
note = {LA-UR-17-20757},
keywords = {Computer Science, data reduction, Databases, Distributed Systems, Mathematics and Computing, remote visualization, turbulence, Wavelets},
pubstate = {published},
tppubtype = {article}
}
2010
Ahrens, James; Fout, Nathaniel; Ma, Kwan-Liu
Time varying, multivariate volume data reduction Proceedings Article
In: 2010, (LA-UR-10-02243).
Abstract | Links | BibTeX | Tags: data reduction, multivariate, time-varying
@inproceedings{Ahrens2010,
title = {Time varying, multivariate volume data reduction},
author = {James Ahrens and Nathaniel Fout and Kwan-Liu Ma},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/06/Time-VaryingMltivariateVolumeDataReduction.pdf},
year = {2010},
date = {2010-01-01},
institution = {Los Alamos National Laboratory (LANL)},
abstract = {Large-scale supercomputing is revolutionizing the way science is conducted. A growing challenge, however, is understanding the massive quantities of data produced by large-scale simulations. The data, typically time-varying, multi-variate, and volumetric, can occupy from hundreds of giga-bytes to several terabytes of storage space. Transferring and processing volume data of such sizes is prohibitively expensive and resource intensive. Although it may not be possible to entirely alleviate these problems, data compression should be considered as part of a viable solution, especially when the primary means of data analysis is volume rendering. In this paper we present our study of multivariate compression, which exploits correlations among related variables, for volume rendering. Two configurations for multidimensional compression based on vector quantization are examined. We emphasize quality reconstruction and interactive rendering, which leads us to a solution using graphics hardware to perform on-the-fly decompression during rendering.},
note = {LA-UR-10-02243},
keywords = {data reduction, multivariate, time-varying},
pubstate = {published},
tppubtype = {inproceedings}
}
Dutta, Soumya; Biswas, Ayan; Ahrens, James
Multivariate Pointwise Information-Driven Data Sampling and Visualization Journal Article
In: Entropy, vol. 21, no. 7, 2019, ISSN: 1099-4300, (LA-UR-19-24243).
@article{e21070699,
title = {Multivariate Pointwise Information-Driven Data Sampling and Visualization},
author = {Soumya Dutta and Ayan Biswas and James Ahrens},
url = {https://www.mdpi.com/1099-4300/21/7/699},
doi = {10.3390/e21070699},
issn = {1099-4300},
year = {2019},
date = {2019-01-01},
journal = {Entropy},
volume = {21},
number = {7},
abstract = {With increasing computing capabilities of modern supercomputers, the size of the data generated from the scientific simulations is growing rapidly. As a result, application scientists need effective data summarization techniques that can reduce large-scale multivariate spatiotemporal data sets while preserving the important data properties so that the reduced data can answer domain-specific queries involving multiple variables with sufficient accuracy. While analyzing complex scientific events, domain experts often analyze and visualize two or more variables together to obtain a better understanding of the characteristics of the data features. Therefore, data summarization techniques are required to analyze multi-variable relationships in detail and then perform data reduction such that the important features involving multiple variables are preserved in the reduced data. To achieve this, in this work, we propose a data sub-sampling algorithm for performing statistical data summarization that leverages pointwise information theoretic measures to quantify the statistical association of data points considering multiple variables and generates a sub-sampled data that preserves the statistical association among multi-variables. Using such reduced sampled data, we show that multivariate feature query and analysis can be done effectively. The efficacy of the proposed multivariate association driven sampling algorithm is presented by applying it on several scientific data sets.},
note = {LA-UR-19-24243},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Pulido, Jesus; Livescu, Daniel; Kanov, Kalin; Burns, Randal; Canada, Curtis; Ahrens, James; Hamann, Bernd
Remote Visual Analysis of Large Turbulence Databases at Multiple Scales Journal Article
In: Journal of Parallel and Distributed Computing, 2018, ISBN: 0743-7315, (LA-UR-17-20757).
@article{info:lanl-repo/lareport/LA-UR-17-20757,
title = {Remote Visual Analysis of Large Turbulence Databases at Multiple Scales},
author = {Jesus Pulido and Daniel Livescu and Kalin Kanov and Randal Burns and Curtis Canada and James Ahrens and Bernd Hamann},
url = {https://www.sciencedirect.com/science/article/pii/S0743731518303927},
doi = {https://doi.org/10.1016/j.jpdc.2018.05.011},
isbn = {0743-7315},
year = {2018},
date = {2018-01-01},
journal = {Journal of Parallel and Distributed Computing},
abstract = {The remote analysis and visualization of raw large turbulence datasets is challenging. Current accurate direct numerical simulations (DNS) of turbulent flows generate datasets with billions of points per time-step and several thousand time-steps per simulation. Until recently, the analysis and visualization of such datasets was restricted to scientists with access to large supercomputers. The public Johns Hopkins Turbulence database simplifies access to multi-terabyte turbulence datasets and facilitates the computation of statistics and extraction of features through the use of commodity hardware. We present a framework designed around wavelet-based compression for high-speed visualization of large datasets and methods supporting multi-resolution analysis of turbulence. By integrating common technologies, this framework enables remote access to tools available on supercomputers and over 230 terabytes of DNS data over the Web. The database toolset is expanded by providing access to exploratory data analysis tools, such as wavelet decomposition capabilities and coherent feature extraction.},
note = {LA-UR-17-20757},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ahrens, James; Fout, Nathaniel; Ma, Kwan-Liu
Time varying, multivariate volume data reduction Proceedings Article
In: 2010, (LA-UR-10-02243).
@inproceedings{Ahrens2010,
title = {Time varying, multivariate volume data reduction},
author = {James Ahrens and Nathaniel Fout and Kwan-Liu Ma},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/06/Time-VaryingMltivariateVolumeDataReduction.pdf},
year = {2010},
date = {2010-01-01},
institution = {Los Alamos National Laboratory (LANL)},
abstract = {Large-scale supercomputing is revolutionizing the way science is conducted. A growing challenge, however, is understanding the massive quantities of data produced by large-scale simulations. The data, typically time-varying, multi-variate, and volumetric, can occupy from hundreds of giga-bytes to several terabytes of storage space. Transferring and processing volume data of such sizes is prohibitively expensive and resource intensive. Although it may not be possible to entirely alleviate these problems, data compression should be considered as part of a viable solution, especially when the primary means of data analysis is volume rendering. In this paper we present our study of multivariate compression, which exploits correlations among related variables, for volume rendering. Two configurations for multidimensional compression based on vector quantization are examined. We emphasize quality reconstruction and interactive rendering, which leads us to a solution using graphics hardware to perform on-the-fly decompression during rendering.},
note = {LA-UR-10-02243},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}