Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/source' into source
Browse files Browse the repository at this point in the history
# Conflicts:
#	about/people.html
#	collections/_events_past/2023-08-21-16th-jlesc-workshop.md
#	collections/_projects/Compression_for_instruments.md
#	collections/_projects/arch_reconf.md
#	collections/_projects/dmr-dpp-oar.md
#	collections/_projects/fpga_project.md
#	collections/_projects/ft_workflow_project.md
#	collections/_projects/insitu_ippl_libyt.md
#	collections/_projects/linear_algebra_on_fpga.md
#	collections/_projects/ml4protein_diffraction.md
#	collections/_projects/serghei-dmr.md
#	collections/_projects/tool_pandt_project.md
  • Loading branch information
maierj committed Jun 12, 2024
2 parents b83a0e7 + 95c5909 commit a590c8e
Show file tree
Hide file tree
Showing 36 changed files with 1,316 additions and 206 deletions.
28 changes: 28 additions & 0 deletions _bibliography/external/dmr-dpp-oar.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
@article{iserte_dmrlib_2021,
title = {{DMRlib}: {Easy}-{Coding} and {Efficient} {Resource} {Management} for {Job} {Malleability}},
volume = {70},
copyright = {All rights reserved},
issn = {1557-9956},
shorttitle = {{DMRlib}},
url = {https://ieeexplore.ieee.org/document/9190024},
doi = {10.1109/TC.2020.3022933},
abstract = {Process malleability has proved to have a highly positive impact on the resource utilization and global productivity in data centers compared with the conventional static resource allocation policy. However, the non-negligible additional development effort this solution imposes has constrained its adoption by the scientific programming community. In this work, we present DMRlib, a library designed to offer the global advantages of process malleability while providing a minimalist MPI-like syntax. The library includes a series of predefined communication patterns that greatly ease the development of malleable applications. In addition, we deploy several scenarios to demonstrate the positive impact of process malleability featuring different scalability patterns. Concretely, we study two job submission modes (rigid and moldable) in order to identify the best-case scenarios for malleability using metrics such as resource allocation rate, completed jobs per second, and energy consumption. The experiments prove that our elastic approach may improve global throughput by a factor higher than 3x compared to the traditional workloads of non-malleable jobs.},
number = {9},
urldate = {2024-01-23},
journal = {IEEE Transactions on Computers},
author = {Iserte, Sergio and Mayo, Rafael and Quintana-Ortí, Enrique S. and Peña, Antonio J.},
month = sep,
year = {2021},
note = {Conference Name: IEEE Transactions on Computers},
pages = {1443--1457},
file = {Full Text:C\:\\Users\\siser\\Zotero\\storage\\7H5IJ6XY\\Iserte et al. - 2021 - DMRlib Easy-Coding and Efficient Resource Managem.pdf:application/pdf},
}

@misc{huber2024design,
title={Design Principles of Dynamic Resource Management for High-Performance Parallel Programming Models},
author={Dominik Huber and Martin Schreiber and Martin Schulz and Howard Pritchard and Daniel Holmes},
year={2024},
eprint={2403.17107},
archivePrefix={arXiv},
primaryClass={cs.DC}
}
117 changes: 101 additions & 16 deletions _bibliography/external/e2clab_project.bib
Original file line number Diff line number Diff line change
Expand Up @@ -54,33 +54,118 @@
%
@article{RaphaEtAl2006,
TITLE = {{Grid'5000: A Large Scale And Highly Reconfigurable Experimental Grid Testbed}},
AUTHOR = {Bolze, Rapha{\"e}l and Cappello, Franck and Caron, Eddy and Dayde, Michel and Desprez, Fr{\'e}d{\'e}ric and Jeannot, Emmanuel and J{\'e}gou, Yvon and Lanteri, Stephane and Leduc, Julien and Melab, Nouredine and Mornet, Guillaume and Namyst, Raymond and Primet, Pascale and Qu{\'e}tier, Benjamin and Richard, Olivier and Talbi, El-Ghazali and Touche, Ir{\'e}a},
URL = {https://hal.inria.fr/hal-00684943},
DOI = {10.1177/1094342006070078},
HAL_ID = {hal-00684943},
HAL_VERSION = {v1},
JOURNAL = {{International Journal of High Performance Computing Applications}},
PUBLISHER = {{SAGE Publications}},
VOLUME = {20},
NUMBER = {4},
PAGES = {481-494},
PUBLISHER = {{SAGE Publications}},
TITLE = {{Grid'5000: A Large Scale And Highly Reconfigurable Experimental Grid Testbed}},
URL = {https://hal.inria.fr/hal-00684943},
VOLUME = {20},
YEAR = {2006},
DOI = {10.1177/1094342006070078},
HAL_ID = {hal-00684943},
HAL_VERSION = {v1},
}

@inproceedings{KateEtAl2020,
title={Lessons learned from the chameleon testbed},
author={Keahey, Kate and Anderson, Jason and Zhen, Zhuo and Riteau, Pierre and Ruth, Paul and Stanzione, Dan and Cevik, Mert and Colleran, Jacob and Gunawi, Haryadi S and Hammock, Cody and others},
booktitle={2020 USENIX Annual Technical Conference (USENIX ATC 20)},
pages={219--233},
year={2020}
author = {Keahey, Kate and Anderson, Jason and Zhen, Zhuo and Riteau, Pierre and Ruth, Paul and Stanzione, Dan and Cevik, Mert and Colleran, Jacob and Gunawi, Haryadi S and Hammock, Cody and others},
booktitle = {2020 USENIX Annual Technical Conference (USENIX ATC 20)},
pages = {219-233},
title = {Lessons learned from the chameleon testbed},
year = {2020},
}

@online{ChameleonEtAl2022,
title = {{Trovi: Practical Open Reproducibility}},
url = "https://chameleoncloud.gitbook.io/trovi/",
addendum = "(accessed: 07.14.2022)",
year = {2022}
addendum = {(accessed: 07.14.2022)},
title = {{Trovi: Practical Open Reproducibility}},
url = {https://chameleoncloud.gitbook.io/trovi/},
year = {2022},
}

@misc{PrigentEtAl2022,
author = {Prigent, C{\'e}dric and Costan, Alexandru and Antoniu, Gabriel and Cudennec, Lo{\"i}c},
booktitle = {{SC 2022 - International Conference for High Performance Computing, Networking, Storage, and Analysis (Posters)}},
keywords = {Computing Continuum ; Federated Learning ; Workflow ; Hyperparameter optimization},
month = {Nov},
note = {Poster},
pdf = {https://inria.hal.science/hal-03878254/file/Poster.pdf},
title = {{Supporting Efficient Workflow Deployment of Federated Learning Systems across the Computing Continuum}},
url = {https://inria.hal.science/hal-03878254},
year = {2022},
}

@INPROCEEDINGS{ChelliEtAl2023,
author = {Chelli, Melvin and Prigent, Cédric and Schubotz, René and Costan, Alexandru and Antoniu, Gabriel and Cudennec, Loïc and Slusallek, Philipp},
booktitle = {2023 IEEE International Conference on Cluster Computing (CLUSTER)},
doi = {10.1109/CLUSTER52292.2023.00014},
keywords = {Training;Federated learning;Computational modeling;Image edge detection;Cluster computing;Sensor systems and applications;Data models;federated learning;malicious peer detection;robust federated learning;adversarial attacks;generative models},
number = {},
pages = {72-81},
title = {FedGuard: Selective Parameter Aggregation for Poisoning Attack Mitigation in Federated Learning},
volume = {},
year = {2023},
}

@INPROCEEDINGS{KeaheyEtAl2023,
author = {Keahey, Kate and Anderson, Jason and Powers, Mark and Cooper, Adam},
booktitle = {2023 IEEE 19th International Conference on e-Science (e-Science)},
doi = {10.1109/e-Science58273.2023.10254846},
keywords = {Computer science;Ecosystems;Buildings;Refining;Packaging;Information age;Reproducibility of results;reproducibility;infrastructure;scientific platforms;resource management},
number = {},
pages = {1-6},
title = {Three Pillars of Practical Reproducibility},
volume = {},
year = {2023},
}

@INPROCEEDINGS{KeaheyEtAl2023b,
author = {Keahey, Kate and Feamster, Nick and Martins, Guilherme and Powers, Mark and Richardson, Marc and Schrubbe, Alexis and Sherman, Michael},
booktitle = {2023 IEEE 19th International Conference on e-Science (e-Science)},
doi = {10.1109/e-Science58273.2023.10254876},
keywords = {Computers;Instruments;Distributed databases;Data collection;Hardware;Broadband communication;Reliability;infrastructure;instruments;broadband;scientific platforms},
number = {},
pages = {1-4},
title = {Discovery Testbed: An Observational Instrument for Broadband Research},
year = {2023},
volume = {},
}

@INPROCEEDINGS{RosendoEtAl2023b,
address = {Los Alamitos, CA, USA},
author = {D. Rosendo and M. Mattoso and A. Costan and R. Souza and D. Pina and P. Valduriez and G. Antoniu},
booktitle = {2023 IEEE International Conference on Cluster Computing (CLUSTER)},
doi = {10.1109/CLUSTER52292.2023.00026},
issn = {},
keywords = {protocols;memory management;key performance indicator;data compression;cluster computing;data models;performance analysis},
month = {nov},
pages = {221-233},
publisher = {IEEE Computer Society},
title = {ProvLight: Efficient Workflow Provenance Capture on the Edge-to-Cloud Continuum},
url = {https://doi.ieeecomputersociety.org/10.1109/CLUSTER52292.2023.00026},
volume = {},
year = {2023},
}

@online{Repeto2024,
addendum = {(accessed: 01.31.2024)},
title = {{REPETO: Reimagining Experimentation - The Path to Replicable Science}},
url = {https://repeto.cs.uchicago.edu/},
year = {2024},
}

@online{ChiEdge2024,
addendum = {(accessed: 01.31.2024)},
title = {{CHI@Edge}},
url = {https://www.chameleoncloud.org/experiment/chiedge/},
year = {2024},
}

@online{Engage2024,
addendum = {(accessed: 01.31.2024)},
title = {{Engage Project}},
url = {https://engage.inria.fr/},
year = {2024},
}


9 changes: 9 additions & 0 deletions _bibliography/external/hyperp_sr_project.bib
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,12 @@ @inproceedings{ronneberger2015u
year={2015},
organization={Springer}
}

@inproceedings{quercia2023,
title={Sgd biased towards early important samples for efficient training},
author={Quercia, Alessio and Morrison, Abigail and Scharr, Hanno and Assent,Ira},
booktitle={IEEE International Conference on Data Mining (ICDM)},
pages={},
year={2023},
organization={IEEE}
}
34 changes: 34 additions & 0 deletions _bibliography/external/serghei-dmr.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
@article{iserte_dmrlib_2021,
title = {{DMRlib}: {Easy}-{Coding} and {Efficient} {Resource} {Management} for {Job} {Malleability}},
volume = {70},
copyright = {All rights reserved},
issn = {1557-9956},
shorttitle = {{DMRlib}},
url = {https://ieeexplore.ieee.org/document/9190024},
doi = {10.1109/TC.2020.3022933},
abstract = {Process malleability has proved to have a highly positive impact on the resource utilization and global productivity in data centers compared with the conventional static resource allocation policy. However, the non-negligible additional development effort this solution imposes has constrained its adoption by the scientific programming community. In this work, we present DMRlib, a library designed to offer the global advantages of process malleability while providing a minimalist MPI-like syntax. The library includes a series of predefined communication patterns that greatly ease the development of malleable applications. In addition, we deploy several scenarios to demonstrate the positive impact of process malleability featuring different scalability patterns. Concretely, we study two job submission modes (rigid and moldable) in order to identify the best-case scenarios for malleability using metrics such as resource allocation rate, completed jobs per second, and energy consumption. The experiments prove that our elastic approach may improve global throughput by a factor higher than 3x compared to the traditional workloads of non-malleable jobs.},
number = {9},
urldate = {2024-01-23},
journal = {IEEE Transactions on Computers},
author = {Iserte, Sergio and Mayo, Rafael and Quintana-Ortí, Enrique S. and Peña, Antonio J.},
month = sep,
year = {2021},
note = {Conference Name: IEEE Transactions on Computers},
pages = {1443--1457},
file = {Full Text:C\:\\Users\\siser\\Zotero\\storage\\7H5IJ6XY\\Iserte et al. - 2021 - DMRlib Easy-Coding and Efficient Resource Managem.pdf:application/pdf},
}

@Article{Caviedes2023,
author = {Daniel Caviedes-Voulli\`eme and Mario Morales-Hern\'andez and Matthew R. Norman and Ilhan \"Ozgen-Xian},
journal = {Geoscientific Model Development},
title = {{SERGHEI} ({SERGHEI}-{SWE}) v1.0: a performance-portable high-performance parallel-computing shallow-water solver for hydrology and environmental hydraulics},
year = {2023},
month = {feb},
number = {3},
pages = {977--1008},
volume = {16},
doi = {10.5194/gmd-16-977-2023},
file = {:Caviedes2023.pdf:PDF},
groups = {Surface flows},
publisher = {Copernicus {GmbH}},
}
142 changes: 142 additions & 0 deletions _bibliography/jlesc.bib
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,77 @@
% Please, treat this file not different from usual source code files.
%
@misc{rudi2024cgkit,
title={CG-Kit: Code Generation Toolkit for Performant and Maintainable Variants of Source Code Applied to Flash-X Hydrodynamics Simulations},
author={Johann Rudi and Youngjun Lee and Aidan H. Chadha and Mohamed Wahib and Klaus Weide and Jared P. O'Neal and Anshu Dubey},
year={2024},
eprint={2401.03378},
archivePrefix={arXiv},
note = {submitted to FGCS},
primaryClass={cs.DC}
}

@inproceedings{TanEtAl2023,
address = {New York, NY, USA},
author = {{Tan, Nigel and Luettgau, Jakob and Marquez, Jack and Teranishi, Keita and Morales, Nicolas and Bhowmick, Sanjukta and Cappello, Franck and Taufer, Michela and Nicolae, Bogdan}},
booktitle = {{Proceedings of the 52nd International Conference on Parallel Processing}},
doi = {10.1145/3605573.3605639},
isbn = {9798400708435},
keywords = {incremental storage, de-duplication, data versioning, GPU parallelization, Checkpointing},
location = {Salt Lake City, UT, USA},
numpages = {10},
pages = {665–674},
publisher = {{Association for Computing Machinery}},
series = {ICPP '23},
title = {{Scalable Incremental Checkpointing using GPU-Accelerated De-Duplication}},
url = {https://doi.org/10.1145/3605573.3605639},
year = {2023}
}

@inproceedings{ChanningEtAl2023,
author = {{Channing, Georgia and Patel, Ria and Olaya, Paula and Rorabaugh, Ariel and Miyashita, Osamu and Caino-Lores, Silvina and Schuman, Catherine and Tama, Florence and Taufer, Michela}},
title = {{Composable Workflow for Accelerating Neural Architecture Search Using In Situ Analytics for Protein Classification}},
year = {2023},
isbn = {9798400708435},
publisher = {{Association for Computing Machinery}},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3605573.3605636},
doi = {10.1145/3605573.3605636},
booktitle = {{Proceedings of the 52nd International Conference on Parallel Processing}},
pages = {1},
numpages = {1},
keywords = {Deep learning, Early termination, Neural architecture search, Neural networks, Predictive modeling, Protein diffraction},
location = {Salt Lake City, UT, USA},
series = {ICPP '23}
}

@inproceedings{RosendoEtAl2023,
author = {Rosendo, Daniel and Keahey, Kate and Costan, Alexandru and Simonin, Matthieu and Valduriez, Patrick and Antoniu, Gabriel},
title = {KheOps: Cost-effective Repeatability, Reproducibility, and Replicability of Edge-to-Cloud Experiments},
year = {2023},
isbn = {9798400701764},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3589806.3600032},
doi = {10.1145/3589806.3600032},
booktitle = {Proceedings of the 2023 ACM Conference on Reproducibility and Replicability},
pages = {62–73},
numpages = {12},
keywords = {Workflows, Reproducibility, Replicability, Repeatability, Edge Computing, Computing Continuum, Cloud Computing},
location = {Santa Cruz, CA, USA},
series = {ACM REP '23}
}

@inproceedings{BarbutEtAl2023,
author = {Barbut, Quentin and Benoit, Anne and Herault, Thomas and Robert, Yves and Vivien, Frédéric},
title = {When to checkpoint at the end of a fixed-length reservation?},
booktitle = {Proceedings of Fault Tolerance for HPC at eXtreme Scales (FTXS) Workshop},
url = {https://inria.hal.science/hal-04215554},
location = {Denver, United States},
date = {2023-11-12},
year={2023}
}

@inproceedings{onealEtAl2022,
title={Domain-specific runtime to orchestrate computation on heterogeneous platforms},
author={O’Neal, Jared and Wahib, Mohamed and Dubey, Anshu and Weide, Klaus and Klosterman, Tom and Rudi, Johann},
Expand Down Expand Up @@ -134,6 +205,22 @@ @article{Ogoke2021
doi = {10.1088/2632-2153/ac1fc9}
}

@inproceedings{yoshii2023hardware,
title={Hardware specialization: Estimating Monte Carlo cross-section lookup kernel performance and area},
author={Yoshii, Kazutomo and Tramm, John and Allen, Bryce and Ueno, Tomohiro and Sano, Kentaro and Siegel, Andrew and Beckman, Pete},
booktitle={Proceedings of the SC'23 Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis},
pages={1274--1278},
year={2023}
}

@inproceedings{yoshii2023streaming,
title={Streaming Hardware Compressor Generator Framework},
author={Yoshii, Kazutomo and Ueno, Tomohiro and Sano, Kentaro and Miceli, Antonino and Cappello, Franck},
booktitle={Proceedings of the SC'23 Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis},
pages={289--297},
year={2023}
}

@misc{yoshii2021hardware,
title={A Hardware Co-design Workflow for Scientific Instruments at the Edge},
author={Kazutomo Yoshii and Rajesh Sankaran and Sebastian Strempfer and Maksim Levental and Mike Hammer and Antonino Miceli},
Expand Down Expand Up @@ -1275,3 +1362,58 @@ @INPROCEEDINGS{PatelEtAl2022
pages={389-390},
doi={10.1109/eScience55777.2022.00052}
}

@INPROCEEDINGS{MateevitsiEtAl2023,
author={Mateevitsi, Victor A. and Bode, Mathis and Ferrier, Nicola and Fischer, Paul
and G{\"{o}}bbert, Jens Henrik and Insley, Joseph A. and Lan, Yu-Hsiang and Min, Misun
and Papka, Michael E. and Patel, Saumil and Rizzi, Silvio and Windgassen, Jonathan},
booktitle={Workshops of The International Conference on High Performance Computing, Network,
Storage, and Analysis (SC-W 2023)},
title={{Scaling Computational Fluid Dynamics: In Situ Visualization of NekRS using SENSEI}},
year={2023},
doi={10.1145/3624062.3624159},
}

@article{UnderwoodEtAl2023,
author = {Robert Underwood, Chunhong Yoon, Ali Gok, Sheng Di and Franck Cappello},
title = {ROIBIN-SZ: Fast and Science-Preserving Compression for Serial Crystallography},
journal = {Synchrotron Radiation News},
volume = {36},
number = {4},
pages = {17-22},
year = {2023},
publisher = {Taylor & Francis},
doi = {10.1080/08940886.2023.2245722},
URL = {https://doi.org/10.1080/08940886.2023.2245722},
eprint = {https://doi.org/10.1080/08940886.2023.2245722}
}

@misc{TalukdarEtAl2023,
author = {Isita Talukdar, Amarjit Singh, Robert Underwood, Kento Sato, Weikuan Yu},
title = {Integrating TEZip into LibPressio: A Acase Study of Integrating a Dynamic Application into a Static C Envionment},
year = {2023}
}

@inproceedings{peterka2023lowfive,
title={LowFive: In Situ Data Transport for High-Performance Workflows},
author={Peterka, Tom and Morozov, Dmitriy and Nigmetov, Arnur and Yildiz, Orcun and Nicolae, Bogdan and Davis, Philip E},
booktitle={IPDPS'23: The 37th IEEE International Parallel and Distributed Processing Symposium},
year={2023}
}

@ARTICLE{GaikwadEtAl2024,
author = {{Gaikwad}, Shreyas Sunil and {Krishna Narayanan}, Sri Hari and {Hascoet}, Laurent and {Campin}, Jean-Michel and {Pillar}, Helen and {Nguyen}, An and {H{\"u}ckelheim}, Jan and {Hovland}, Paul and {Heimbach}, Patrick},
title = "{{MITgcm-AD} v2: Open source tangent linear and adjoint modeling framework for the oceans and atmosphere enabled by the Automatic Differentiation tool Tapenade}",
journal = {arXiv e-prints},
keywords = {Physics - Atmospheric and Oceanic Physics},
year = 2024,
month = jan,
eid = {arXiv:2401.11952},
pages = {arXiv:2401.11952},
doi = {10.48550/arXiv.2401.11952},
archivePrefix = {arXiv},
eprint = {2401.11952},
primaryClass = {physics.ao-ph},
}


Loading

0 comments on commit a590c8e

Please sign in to comment.