2018

Pulido, Jesus; Livescu, Daniel; Kanov, Kalin; Burns, Randal; Canada, Curtis; Ahrens, James; Hamann, Bernd
Remote Visual Analysis of Large Turbulence Databases at Multiple Scales Journal Article
In: Journal of Parallel and Distributed Computing, 2018, ISBN: 0743-7315, (LA-UR-17-20757).
Abstract | Links | BibTeX | Tags: Computer Science, data reduction, Databases, Distributed Systems, Mathematics and Computing, remote visualization, turbulence, Wavelets
@article{info:lanl-repo/lareport/LA-UR-17-20757,
title = {Remote Visual Analysis of Large Turbulence Databases at Multiple Scales},
author = {Jesus Pulido and Daniel Livescu and Kalin Kanov and Randal Burns and Curtis Canada and James Ahrens and Bernd Hamann},
url = {https://www.sciencedirect.com/science/article/pii/S0743731518303927},
doi = {https://doi.org/10.1016/j.jpdc.2018.05.011},
isbn = {0743-7315},
year = {2018},
date = {2018-01-01},
journal = {Journal of Parallel and Distributed Computing},
abstract = {The remote analysis and visualization of raw large turbulence datasets is challenging. Current accurate direct numerical simulations (DNS) of turbulent flows generate datasets with billions of points per time-step and several thousand time-steps per simulation. Until recently, the analysis and visualization of such datasets was restricted to scientists with access to large supercomputers. The public Johns Hopkins Turbulence database simplifies access to multi-terabyte turbulence datasets and facilitates the computation of statistics and extraction of features through the use of commodity hardware. We present a framework designed around wavelet-based compression for high-speed visualization of large datasets and methods supporting multi-resolution analysis of turbulence. By integrating common technologies, this framework enables remote access to tools available on supercomputers and over 230 terabytes of DNS data over the Web. The database toolset is expanded by providing access to exploratory data analysis tools, such as wavelet decomposition capabilities and coherent feature extraction.},
note = {LA-UR-17-20757},
keywords = {Computer Science, data reduction, Databases, Distributed Systems, Mathematics and Computing, remote visualization, turbulence, Wavelets},
pubstate = {published},
tppubtype = {article}
}

Biswas, Ayan; Dutta, Soumya; Pulido, Jesus; Ahrens, James
In Situ Data-driven Adaptive Sampling for Large-scale Simulation Data Summarization Inproceedings
In: Proceedings of the Workshop on In Situ Infrastructures for Enabling Extreme-Scale Analysis and Visualization, pp. 13–18, ACM, Dallas, Texas, 2018, ISBN: 978-1-4503-6579-6.
Abstract | Links | BibTeX | Tags: human-centered computing, Mathematics and Computing, scientific visualization, statistical paradigms
@inproceedings{Biswas:2018:SDA:3281464.3281467,
title = {In Situ Data-driven Adaptive Sampling for Large-scale Simulation Data Summarization},
author = {Ayan Biswas and Soumya Dutta and Jesus Pulido and James Ahrens},
url = {http://doi.acm.org/10.1145/3281464.3281467
https://datascience.dsscale.org/wp-content/uploads/2019/01/LA-UR-18-28035.pdf},
doi = {10.1145/3281464.3281467},
isbn = {978-1-4503-6579-6},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the Workshop on In Situ Infrastructures for Enabling Extreme-Scale Analysis and Visualization},
pages = {13--18},
publisher = {ACM},
address = {Dallas, Texas},
series = {ISAV '18},
abstract = {Recent advancements in high-performance computing have enabled scientists to model various scientific phenomena in great detail. However, the analysis and visualization of the output data from such large-scale simulations are posing significant challenges due to their excessive size and disk I/O bottlenecks. One viable solution to this problem is to create a sub-sampled dataset which is able to preserve the important information of the data and also is significantly smaller in size compared to the raw data. Creating an in situ workflow for generating such intelligently sub-sampled datasets is of prime importance for such simulations. In this work, we propose an information-driven data sampling technique and compare it with two well-known sampling methods to demonstrate the superiority of the proposed method. The in situ performance of the proposed method is evaluated by applying it to the Nyx Cosmology simulation. We compare and contrast the performance of these various sampling algorithms and provide a holistic view of all the methods so that the scientists can choose appropriate sampling schemes based on their analysis requirements.},
keywords = {human-centered computing, Mathematics and Computing, scientific visualization, statistical paradigms},
pubstate = {published},
tppubtype = {inproceedings}
}
Pulido, Jesus; Livescu, Daniel; Kanov, Kalin; Burns, Randal; Canada, Curtis; Ahrens, James; Hamann, Bernd
Remote Visual Analysis of Large Turbulence Databases at Multiple Scales Journal Article
In: Journal of Parallel and Distributed Computing, 2018, ISBN: 0743-7315, (LA-UR-17-20757).
@article{info:lanl-repo/lareport/LA-UR-17-20757,
title = {Remote Visual Analysis of Large Turbulence Databases at Multiple Scales},
author = {Jesus Pulido and Daniel Livescu and Kalin Kanov and Randal Burns and Curtis Canada and James Ahrens and Bernd Hamann},
url = {https://www.sciencedirect.com/science/article/pii/S0743731518303927},
doi = {https://doi.org/10.1016/j.jpdc.2018.05.011},
isbn = {0743-7315},
year = {2018},
date = {2018-01-01},
journal = {Journal of Parallel and Distributed Computing},
abstract = {The remote analysis and visualization of raw large turbulence datasets is challenging. Current accurate direct numerical simulations (DNS) of turbulent flows generate datasets with billions of points per time-step and several thousand time-steps per simulation. Until recently, the analysis and visualization of such datasets was restricted to scientists with access to large supercomputers. The public Johns Hopkins Turbulence database simplifies access to multi-terabyte turbulence datasets and facilitates the computation of statistics and extraction of features through the use of commodity hardware. We present a framework designed around wavelet-based compression for high-speed visualization of large datasets and methods supporting multi-resolution analysis of turbulence. By integrating common technologies, this framework enables remote access to tools available on supercomputers and over 230 terabytes of DNS data over the Web. The database toolset is expanded by providing access to exploratory data analysis tools, such as wavelet decomposition capabilities and coherent feature extraction.},
note = {LA-UR-17-20757},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Biswas, Ayan; Dutta, Soumya; Pulido, Jesus; Ahrens, James
In Situ Data-driven Adaptive Sampling for Large-scale Simulation Data Summarization Inproceedings
In: Proceedings of the Workshop on In Situ Infrastructures for Enabling Extreme-Scale Analysis and Visualization, pp. 13–18, ACM, Dallas, Texas, 2018, ISBN: 978-1-4503-6579-6.
@inproceedings{Biswas:2018:SDA:3281464.3281467,
title = {In Situ Data-driven Adaptive Sampling for Large-scale Simulation Data Summarization},
author = {Ayan Biswas and Soumya Dutta and Jesus Pulido and James Ahrens},
url = {http://doi.acm.org/10.1145/3281464.3281467
https://datascience.dsscale.org/wp-content/uploads/2019/01/LA-UR-18-28035.pdf},
doi = {10.1145/3281464.3281467},
isbn = {978-1-4503-6579-6},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the Workshop on In Situ Infrastructures for Enabling Extreme-Scale Analysis and Visualization},
pages = {13--18},
publisher = {ACM},
address = {Dallas, Texas},
series = {ISAV '18},
abstract = {Recent advancements in high-performance computing have enabled scientists to model various scientific phenomena in great detail. However, the analysis and visualization of the output data from such large-scale simulations are posing significant challenges due to their excessive size and disk I/O bottlenecks. One viable solution to this problem is to create a sub-sampled dataset which is able to preserve the important information of the data and also is significantly smaller in size compared to the raw data. Creating an in situ workflow for generating such intelligently sub-sampled datasets is of prime importance for such simulations. In this work, we propose an information-driven data sampling technique and compare it with two well-known sampling methods to demonstrate the superiority of the proposed method. The in situ performance of the proposed method is evaluated by applying it to the Nyx Cosmology simulation. We compare and contrast the performance of these various sampling algorithms and provide a holistic view of all the methods so that the scientists can choose appropriate sampling schemes based on their analysis requirements.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}