2014
Sewell, Christopher; Lo, Li-ta; Francois, Marianne; Ahrens, James
Data-Parallel Programming with PISTON and PINION Presentation
30.08.2014, (LA-UR-14-26186).
Abstract | Links | BibTeX | Tags: data parallel, PINION, PISTON
@misc{Sewell2014b,
title = {Data-Parallel Programming with PISTON and PINION},
author = {Christopher Sewell and Li-ta Lo and Marianne Francois and James Ahrens},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/08/Data-Parallel_Programming_with_PISTON_and_PINION.pdf},
year = {2014},
date = {2014-08-30},
abstract = {This presentation provides an introduction to data-parallel programming, NVIDIA's Thrust library, and our PISTON and PINION projects, which use this programming model to implement visualization and analysis operators, as well as simulation code.},
note = {LA-UR-14-26186},
keywords = {data parallel, PINION, PISTON},
pubstate = {published},
tppubtype = {presentation}
}
Sewell, Christopher; Heitmann, Katrin; Lo, Li-Ta; Habib, Salman; Ahrens, James
Portable Parallel Halo and Center Finders for HACC Presentation
31.07.2014, (LA-UR-14-25437).
Abstract | Links | BibTeX | Tags: halo finding, PISTON, VTK-m
@misc{Sewell2014b,
title = {Portable Parallel Halo and Center Finders for HACC},
author = {Christopher Sewell and Katrin Heitmann and Li-Ta Lo and Salman Habib and James Ahrens},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/08/Portable_Parallel_Halo_and_Center_Finders_for_HACC.pdf},
year = {2014},
date = {2014-07-31},
abstract = {This presentation describes our work on finding halos and halo centers for the HACC cosmology code using our portable, data-parallel framework, which allows us to run on accelerators such as GPUs, providing significant speed-up. This work, which is part of the SDAV VTK-m project, enabled halo analysis to be performed on a very large data set (8192^3 particles across 16,384 nodes on Titan) for which analysis using the traditional CPU algorithms was not feasible.},
note = {LA-UR-14-25437},
keywords = {halo finding, PISTON, VTK-m},
pubstate = {published},
tppubtype = {presentation}
}
Sewell, Christopher; Lo, Li-ta; Ahrens, James
PISTON Presentation
31.01.2014, (LA-UR-14-20028).
Abstract | Links | BibTeX | Tags: data parallel, PINION, PISTON
@misc{Sewell2014b,
title = {PISTON},
author = {Christopher Sewell and Li-ta Lo and James Ahrens},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/08/PISTON.pdf},
year = {2014},
date = {2014-01-31},
abstract = {This set of two guest lectures provides an introductory tutorial to data-parallel programming and NVIDIA's Thrust library, as well as an overview of our research in out PISTON and PINION projects.},
note = {LA-UR-14-20028},
keywords = {data parallel, PINION, PISTON},
pubstate = {published},
tppubtype = {presentation}
}
Francois, Marianne; Lo, Li-Ta; Sewell, Christopher
VOLUME-OF-FLUID INTERFACE RECONSTRUCTION ALGORITHMS ON NEXT-GENERATION COMPUTER ARCHITECTURES Proceedings Article
In: Proceedings of the ASME, 2014, (LA-UR-14-20777).
Abstract | Links | BibTeX | Tags: COMPUTER ARCHITECTURES, INTERFACE RECONSTRUCTION, PISTON
@inproceedings{francois2014volume,
title = {VOLUME-OF-FLUID INTERFACE RECONSTRUCTION ALGORITHMS ON NEXT-GENERATION COMPUTER ARCHITECTURES},
author = {Marianne Francois and Li-Ta Lo and Christopher Sewell},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/06/Volume-Of-FluidInterfaceReconstructionAlgorighmsOnNext-GenerationComputerArchitectures.pdf},
year = {2014},
date = {2014-01-01},
booktitle = {Proceedings of the ASME},
abstract = {With the increasing heterogeneity and on-node parallelism of high-performance computing hardware, a major challenge to computational physicists is to work in close collaboration with computer scientists to develop portable and efficient algorithms and software. The objective of our work is to implement a portable code to perform interface reconstruction using NVIDIA’s Thrust library. Interface reconstruction is a technique commonly used in volume tracking methods for simulations of interfacial flows. For that, we have designed a two-dimensional mesh data structure that is easily mapped to the 1D vectors used by Thrust and at the same time is simple to work with using familiar data structures terminology (such as cell, vertices and edges). With this new data structure in place, we have implemented a recursive volume-of-fluid initialization algorithm and a standard piecewise interface reconstruction algorithm. Our interface reconstruction algorithm makes use of a table look-up to easily identify all intersection cases, as this design is efficient on parallel architectures such as GPUs. Finally, we report performance results which show that a single implementation of these algorithms can be compiled to multiple backends (specifically, multi-core CPUs, NVIDIA GPUs, and Intel Xeon Phi coprocessors), making efficient use of the available parallelism on each.},
note = {LA-UR-14-20777},
keywords = {COMPUTER ARCHITECTURES, INTERFACE RECONSTRUCTION, PISTON},
pubstate = {published},
tppubtype = {inproceedings}
}
2013
Sewell, Christopher
Portable Data-Parallel Visualization and Analysis Operators Presentation
20.03.2013, (LA-UR-13-21884).
Abstract | Links | BibTeX | Tags: data parallel, gpu, PINION, PISTON
@misc{Sewell2013,
title = {Portable Data-Parallel Visualization and Analysis Operators},
author = {Christopher Sewell},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/08/PortableDataParallelVisualizationAndAnalysisOperators.pdf},
year = {2013},
date = {2013-03-20},
abstract = {This presentation describes the overall goal of PISTON and PINION (to provide high parallel performance on current and next-generation supercomputers using portable, data-parallel code), and summarizes the work on these projects to date. It is intended for an audience at NVIDIA's GPU Technology Conference, and thus has an emphasis on how it uses Thrust to write code that obtains good parallel performance when compiled to different backends, including CUDA.},
note = {LA-UR-13-21884},
keywords = {data parallel, gpu, PINION, PISTON},
pubstate = {published},
tppubtype = {presentation}
}
Sewell, Christopher; Lo, Li-ta; Ahrens, James
PISTON: An SDAV Framework for Portable High-Performance Data-Parallel Visualization and Analysis Operators Presentation
22.02.2013, (LA-UR-13-21083).
Abstract | Links | BibTeX | Tags: PISTON, VTK-m
@misc{Sewell2013b,
title = {PISTON: An SDAV Framework for Portable High-Performance Data-Parallel Visualization and Analysis Operators},
author = {Christopher Sewell and Li-ta Lo and James Ahrens},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/08/PISTON2.pdf},
year = {2013},
date = {2013-02-22},
abstract = {This presentation describes the overall goal of PISTON (to provide portability and performance for visualization and analysis operators on current and next-generation supercomputers), and summarizes the work on PISTON in relation to the SDAV (The SciDac Institute of Scalable Data Management, Analysis, and Visualization) Milestones. Specifically, it presents work related to general PISTON algorithm and infrastructure development; the halo finder operator; PISTON integration into VTK and ParaView; VPIC in-situ PISTON pipelines; and publications, presentations, and tutorials.},
note = {LA-UR-13-21083},
keywords = {PISTON, VTK-m},
pubstate = {published},
tppubtype = {presentation}
}
Sewell, Christopher; Lo, Li-ta; Ahrens, James
Portable data-parallel visualization and analysis in distributed memory environments Proceedings Article
In: Large-Scale Data Analysis and Visualization (LDAV), 2013 IEEE Symposium on, pp. 25–33, IEEE 2013, (LA-UR-13-23809).
Abstract | Links | BibTeX | Tags: analysis, Concurrent Programming, data-parallel, distributed memory, parallel programming, PISTON, visualization
@inproceedings{sewell2013portable,
title = {Portable data-parallel visualization and analysis in distributed memory environments},
author = {Christopher Sewell and Li-ta Lo and James Ahrens},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/06/PortableData-ParallelVisualizationAndAnalysisInDistributedMemoryEnvironments.pdf},
year = {2013},
date = {2013-01-01},
booktitle = {Large-Scale Data Analysis and Visualization (LDAV), 2013 IEEE Symposium on},
pages = {25--33},
organization = {IEEE},
abstract = {Data-parallelism is a programming model that maps well to architectures with a high degree of concurrency. Algorithms written using data-parallel primitives can be easily ported to any architecture for which an implementation of these primitives exists, making efficient use of the available parallelism on each. We have previously published results demonstrating our ability to compile the same data-parallel code for several visualization algorithms onto different on-node parallel architectures (GPUs and multi-core CPUs) using our extension of NVIDIAÕs Thrust library. In this paper, we discuss our extension of Thrust to support concurrency in distributed memory environments across multiple nodes. This enables the application developer to write data-parallel algorithms while viewing the data as single, long vectors, essentially without needing to explicitly take into consideration whether the values are actually distributed across nodes. Our distributed wrapper for Thrust handles the communication in the backend using MPI, while still using the standard Thrust library to take advantage of available on-node parallelism. We describe the details of our distributed implementations of several key data-parallel primitives, including scan, scatter/ gather, sort, reduce, and upper/lower bound. We also present two higher-level distributed algorithms developed using these primitives: isosurface and KD-tree construction. Finally, we provide timing results demonstrating the ability of these algorithms to take advantage of available parallelism on nodes and across multiple nodes, and discuss scaling limitations for communication-intensive algorithms such as KD-tree construction.},
note = {LA-UR-13-23809},
keywords = {analysis, Concurrent Programming, data-parallel, distributed memory, parallel programming, PISTON, visualization},
pubstate = {published},
tppubtype = {inproceedings}
}
2012
Lo, Li-ta; Sewell, Christopher; Ahrens, James
PISTON: A Portable Cross-Platform Framework for Data-Parallel Visualization Operators. Proceedings Article
In: EGPGV, pp. 11–20, 2012, (LA-UR-12-10227).
Abstract | Links | BibTeX | Tags: Concurrent Programming, parallel programming, PISTON
@inproceedings{lo2012piston,
title = {PISTON: A Portable Cross-Platform Framework for Data-Parallel Visualization Operators.},
author = {Li-ta Lo and Christopher Sewell and James Ahrens},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/06/PISTONAPortableCrossPlatformFrameworkForData-ParallelVisualizationOperators.pdf},
year = {2012},
date = {2012-01-01},
booktitle = {EGPGV},
pages = {11--20},
abstract = {Due to the wide variety of current and next-generation supercomputing architectures, the development of highperformance parallel visualization and analysis operators frequently requires re-writing the underlying algorithms for many different platforms. In order to facilitate portability, we have devised a framework for creating such operators that employs the data-parallel programming model. By writing the operators using only data-parallel primitives (such as scans, transforms, stream compactions, etc.), the same code may be compiled to multiple targets using architecture-specific backend implementations of these primitives. Specifically, we make use of and extend NVIDIAÕs Thrust library, which provides CUDA and OpenMP backends. Using this framework, we have implemented isosurface, cut surface, and threshold operators, and have achieved good parallel performance on two different architectures (multi-core CPUs and NVIDIA GPUs) using the exact same operator code. We have applied these operators to several large, real scientific data sets, and have open-source released a beta version of our code base.},
note = {LA-UR-12-10227},
keywords = {Concurrent Programming, parallel programming, PISTON},
pubstate = {published},
tppubtype = {inproceedings}
}
Sewell, Christopher; Lo, Li-ta; Francois, Marianne; Ahrens, James
Data-Parallel Programming with PISTON and PINION Presentation
30.08.2014, (LA-UR-14-26186).
@misc{Sewell2014b,
title = {Data-Parallel Programming with PISTON and PINION},
author = {Christopher Sewell and Li-ta Lo and Marianne Francois and James Ahrens},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/08/Data-Parallel_Programming_with_PISTON_and_PINION.pdf},
year = {2014},
date = {2014-08-30},
abstract = {This presentation provides an introduction to data-parallel programming, NVIDIA's Thrust library, and our PISTON and PINION projects, which use this programming model to implement visualization and analysis operators, as well as simulation code.},
note = {LA-UR-14-26186},
keywords = {},
pubstate = {published},
tppubtype = {presentation}
}
Sewell, Christopher; Heitmann, Katrin; Lo, Li-Ta; Habib, Salman; Ahrens, James
Portable Parallel Halo and Center Finders for HACC Presentation
31.07.2014, (LA-UR-14-25437).
@misc{Sewell2014b,
title = {Portable Parallel Halo and Center Finders for HACC},
author = {Christopher Sewell and Katrin Heitmann and Li-Ta Lo and Salman Habib and James Ahrens},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/08/Portable_Parallel_Halo_and_Center_Finders_for_HACC.pdf},
year = {2014},
date = {2014-07-31},
abstract = {This presentation describes our work on finding halos and halo centers for the HACC cosmology code using our portable, data-parallel framework, which allows us to run on accelerators such as GPUs, providing significant speed-up. This work, which is part of the SDAV VTK-m project, enabled halo analysis to be performed on a very large data set (8192^3 particles across 16,384 nodes on Titan) for which analysis using the traditional CPU algorithms was not feasible.},
note = {LA-UR-14-25437},
keywords = {},
pubstate = {published},
tppubtype = {presentation}
}
Sewell, Christopher; Lo, Li-ta; Ahrens, James
PISTON Presentation
31.01.2014, (LA-UR-14-20028).
@misc{Sewell2014b,
title = {PISTON},
author = {Christopher Sewell and Li-ta Lo and James Ahrens},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/08/PISTON.pdf},
year = {2014},
date = {2014-01-31},
abstract = {This set of two guest lectures provides an introductory tutorial to data-parallel programming and NVIDIA's Thrust library, as well as an overview of our research in out PISTON and PINION projects.},
note = {LA-UR-14-20028},
keywords = {},
pubstate = {published},
tppubtype = {presentation}
}
Francois, Marianne; Lo, Li-Ta; Sewell, Christopher
VOLUME-OF-FLUID INTERFACE RECONSTRUCTION ALGORITHMS ON NEXT-GENERATION COMPUTER ARCHITECTURES Proceedings Article
In: Proceedings of the ASME, 2014, (LA-UR-14-20777).
@inproceedings{francois2014volume,
title = {VOLUME-OF-FLUID INTERFACE RECONSTRUCTION ALGORITHMS ON NEXT-GENERATION COMPUTER ARCHITECTURES},
author = {Marianne Francois and Li-Ta Lo and Christopher Sewell},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/06/Volume-Of-FluidInterfaceReconstructionAlgorighmsOnNext-GenerationComputerArchitectures.pdf},
year = {2014},
date = {2014-01-01},
booktitle = {Proceedings of the ASME},
abstract = {With the increasing heterogeneity and on-node parallelism of high-performance computing hardware, a major challenge to computational physicists is to work in close collaboration with computer scientists to develop portable and efficient algorithms and software. The objective of our work is to implement a portable code to perform interface reconstruction using NVIDIA’s Thrust library. Interface reconstruction is a technique commonly used in volume tracking methods for simulations of interfacial flows. For that, we have designed a two-dimensional mesh data structure that is easily mapped to the 1D vectors used by Thrust and at the same time is simple to work with using familiar data structures terminology (such as cell, vertices and edges). With this new data structure in place, we have implemented a recursive volume-of-fluid initialization algorithm and a standard piecewise interface reconstruction algorithm. Our interface reconstruction algorithm makes use of a table look-up to easily identify all intersection cases, as this design is efficient on parallel architectures such as GPUs. Finally, we report performance results which show that a single implementation of these algorithms can be compiled to multiple backends (specifically, multi-core CPUs, NVIDIA GPUs, and Intel Xeon Phi coprocessors), making efficient use of the available parallelism on each.},
note = {LA-UR-14-20777},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Sewell, Christopher
Portable Data-Parallel Visualization and Analysis Operators Presentation
20.03.2013, (LA-UR-13-21884).
@misc{Sewell2013,
title = {Portable Data-Parallel Visualization and Analysis Operators},
author = {Christopher Sewell},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/08/PortableDataParallelVisualizationAndAnalysisOperators.pdf},
year = {2013},
date = {2013-03-20},
abstract = {This presentation describes the overall goal of PISTON and PINION (to provide high parallel performance on current and next-generation supercomputers using portable, data-parallel code), and summarizes the work on these projects to date. It is intended for an audience at NVIDIA's GPU Technology Conference, and thus has an emphasis on how it uses Thrust to write code that obtains good parallel performance when compiled to different backends, including CUDA.},
note = {LA-UR-13-21884},
keywords = {},
pubstate = {published},
tppubtype = {presentation}
}
Sewell, Christopher; Lo, Li-ta; Ahrens, James
PISTON: An SDAV Framework for Portable High-Performance Data-Parallel Visualization and Analysis Operators Presentation
22.02.2013, (LA-UR-13-21083).
@misc{Sewell2013b,
title = {PISTON: An SDAV Framework for Portable High-Performance Data-Parallel Visualization and Analysis Operators},
author = {Christopher Sewell and Li-ta Lo and James Ahrens},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/08/PISTON2.pdf},
year = {2013},
date = {2013-02-22},
abstract = {This presentation describes the overall goal of PISTON (to provide portability and performance for visualization and analysis operators on current and next-generation supercomputers), and summarizes the work on PISTON in relation to the SDAV (The SciDac Institute of Scalable Data Management, Analysis, and Visualization) Milestones. Specifically, it presents work related to general PISTON algorithm and infrastructure development; the halo finder operator; PISTON integration into VTK and ParaView; VPIC in-situ PISTON pipelines; and publications, presentations, and tutorials.},
note = {LA-UR-13-21083},
keywords = {},
pubstate = {published},
tppubtype = {presentation}
}
Sewell, Christopher; Lo, Li-ta; Ahrens, James
Portable data-parallel visualization and analysis in distributed memory environments Proceedings Article
In: Large-Scale Data Analysis and Visualization (LDAV), 2013 IEEE Symposium on, pp. 25–33, IEEE 2013, (LA-UR-13-23809).
@inproceedings{sewell2013portable,
title = {Portable data-parallel visualization and analysis in distributed memory environments},
author = {Christopher Sewell and Li-ta Lo and James Ahrens},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/06/PortableData-ParallelVisualizationAndAnalysisInDistributedMemoryEnvironments.pdf},
year = {2013},
date = {2013-01-01},
booktitle = {Large-Scale Data Analysis and Visualization (LDAV), 2013 IEEE Symposium on},
pages = {25--33},
organization = {IEEE},
abstract = {Data-parallelism is a programming model that maps well to architectures with a high degree of concurrency. Algorithms written using data-parallel primitives can be easily ported to any architecture for which an implementation of these primitives exists, making efficient use of the available parallelism on each. We have previously published results demonstrating our ability to compile the same data-parallel code for several visualization algorithms onto different on-node parallel architectures (GPUs and multi-core CPUs) using our extension of NVIDIAÕs Thrust library. In this paper, we discuss our extension of Thrust to support concurrency in distributed memory environments across multiple nodes. This enables the application developer to write data-parallel algorithms while viewing the data as single, long vectors, essentially without needing to explicitly take into consideration whether the values are actually distributed across nodes. Our distributed wrapper for Thrust handles the communication in the backend using MPI, while still using the standard Thrust library to take advantage of available on-node parallelism. We describe the details of our distributed implementations of several key data-parallel primitives, including scan, scatter/ gather, sort, reduce, and upper/lower bound. We also present two higher-level distributed algorithms developed using these primitives: isosurface and KD-tree construction. Finally, we provide timing results demonstrating the ability of these algorithms to take advantage of available parallelism on nodes and across multiple nodes, and discuss scaling limitations for communication-intensive algorithms such as KD-tree construction.},
note = {LA-UR-13-23809},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lo, Li-ta; Sewell, Christopher; Ahrens, James
PISTON: A Portable Cross-Platform Framework for Data-Parallel Visualization Operators. Proceedings Article
In: EGPGV, pp. 11–20, 2012, (LA-UR-12-10227).
@inproceedings{lo2012piston,
title = {PISTON: A Portable Cross-Platform Framework for Data-Parallel Visualization Operators.},
author = {Li-ta Lo and Christopher Sewell and James Ahrens},
url = {http://datascience.dsscale.org/wp-content/uploads/2016/06/PISTONAPortableCrossPlatformFrameworkForData-ParallelVisualizationOperators.pdf},
year = {2012},
date = {2012-01-01},
booktitle = {EGPGV},
pages = {11--20},
abstract = {Due to the wide variety of current and next-generation supercomputing architectures, the development of highperformance parallel visualization and analysis operators frequently requires re-writing the underlying algorithms for many different platforms. In order to facilitate portability, we have devised a framework for creating such operators that employs the data-parallel programming model. By writing the operators using only data-parallel primitives (such as scans, transforms, stream compactions, etc.), the same code may be compiled to multiple targets using architecture-specific backend implementations of these primitives. Specifically, we make use of and extend NVIDIAÕs Thrust library, which provides CUDA and OpenMP backends. Using this framework, we have implemented isosurface, cut surface, and threshold operators, and have achieved good parallel performance on two different architectures (multi-core CPUs and NVIDIA GPUs) using the exact same operator code. We have applied these operators to several large, real scientific data sets, and have open-source released a beta version of our code base.},
note = {LA-UR-12-10227},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}