BibTeX records: Kurt B. Ferreira

download as .bib file

@inproceedings{DBLP:conf/pvm/BaconBLFB23,
  author       = {Nicholas H. Bacon and
                  Patrick G. Bridges and
                  Scott Levy and
                  Kurt B. Ferreira and
                  Amanda Bienz},
  title        = {Evaluating the Viability of LogGP for Modeling {MPI} Performance with
                  Non-contiguous Datatypes on Modern Architectures},
  booktitle    = {Proceedings of the 30th European {MPI} Users' Group Meeting, EuroMPI
                  2023, Bristol, United Kingdom, September 11-13, 2023},
  pages        = {8:1--8:10},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3615318.3615326},
  doi          = {10.1145/3615318.3615326},
  timestamp    = {Sat, 14 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/pvm/BaconBLFB23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/FerreiraL23,
  author       = {Kurt B. Ferreira and
                  Scott Levy},
  title        = {Using Benford's Law to Identify Unusual Failure Regions},
  booktitle    = {Proceedings of the {SC} '23 Workshops of The International Conference
                  on High Performance Computing, Network, Storage, and Analysis, {SC-W}
                  2023, Denver, CO, USA, November 12-17, 2023},
  pages        = {516--519},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3624062.3624121},
  doi          = {10.1145/3624062.3624121},
  timestamp    = {Tue, 28 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/FerreiraL23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hpdc/FerreiraLHP22,
  author       = {Kurt B. Ferreira and
                  Scott Levy and
                  Joshua Hemmert and
                  Kevin T. Pedretti},
  editor       = {Jon B. Weissman and
                  Abhishek Chandra and
                  Ada Gavrilovska and
                  Devesh Tiwari},
  title        = {Understanding Memory Failures on a Petascale Arm System},
  booktitle    = {{HPDC} '22: The 31st International Symposium on High-Performance Parallel
                  and Distributed Computing, Minneapolis, MN, USA, 27 June 2022 - 1
                  July 2022},
  pages        = {84--96},
  publisher    = {{ACM}},
  year         = {2022},
  url          = {https://doi.org/10.1145/3502181.3531465},
  doi          = {10.1145/3502181.3531465},
  timestamp    = {Fri, 24 Jun 2022 12:30:13 +0200},
  biburl       = {https://dblp.org/rec/conf/hpdc/FerreiraLHP22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/pc/FerreiraL21,
  author       = {Kurt B. Ferreira and
                  Scott Levy},
  title        = {Evaluating {MPI} resource usage summary statistics},
  journal      = {Parallel Comput.},
  volume       = {108},
  pages        = {102825},
  year         = {2021},
  url          = {https://doi.org/10.1016/j.parco.2021.102825},
  doi          = {10.1016/J.PARCO.2021.102825},
  timestamp    = {Wed, 15 Dec 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/pc/FerreiraL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cluster/FerreiraLKDB21,
  author       = {Kurt B. Ferreira and
                  Scott Levy and
                  Victor Kuhns and
                  Nathan DeBardeleben and
                  Sean Blanchard},
  title        = {Understanding the Effects of {DRAM} Correctable Error Logging at Scale},
  booktitle    = {{IEEE} International Conference on Cluster Computing, {CLUSTER} 2021,
                  Portland, OR, USA, September 7-10, 2021},
  pages        = {421--432},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/Cluster48925.2021.00060},
  doi          = {10.1109/CLUSTER48925.2021.00060},
  timestamp    = {Fri, 15 Oct 2021 14:43:26 +0200},
  biburl       = {https://dblp.org/rec/conf/cluster/FerreiraLKDB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/FerreiraL21,
  author       = {Kurt B. Ferreira and
                  Scott Levy},
  editor       = {Ricardo Chaves and
                  Dora B. Heras and
                  Aleksandar Ilic and
                  Didem Unat and
                  Rosa M. Badia and
                  Andrea Bracciali and
                  Patrick Diehl and
                  Anshu Dubey and
                  Oh Sangyoon and
                  Stephen L. Scott and
                  Laura Ricci},
  title        = {Characterizing Memory Failures Using Benford's Law},
  booktitle    = {Euro-Par 2021: Parallel Processing Workshops - Euro-Par 2021 International
                  Workshops, Lisbon, Portugal, August 30-31, 2021, Revised Selected
                  Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {13098},
  pages        = {310--321},
  publisher    = {Springer},
  year         = {2021},
  url          = {https://doi.org/10.1007/978-3-031-06156-1\_25},
  doi          = {10.1007/978-3-031-06156-1\_25},
  timestamp    = {Tue, 14 Feb 2023 22:22:15 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/FerreiraL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/concurrency/FerreiraGLLG20,
  author       = {Kurt B. Ferreira and
                  Ryan E. Grant and
                  Michael J. Levenhagen and
                  Scott Levy and
                  Taylor L. Groves},
  title        = {Hardware {MPI} message matching: Insights into {MPI} matching behavior
                  to inform design},
  journal      = {Concurr. Comput. Pract. Exp.},
  volume       = {32},
  number       = {3},
  year         = {2020},
  url          = {https://doi.org/10.1002/cpe.5150},
  doi          = {10.1002/CPE.5150},
  timestamp    = {Sun, 25 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/concurrency/FerreiraGLLG20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/concurrency/LevyFW20,
  author       = {Scott Levy and
                  Kurt B. Ferreira and
                  Patrick M. Widener},
  title        = {The unexpected virtue of almost: Exploiting {MPI} collective operations
                  to approximately coordinate checkpoints},
  journal      = {Concurr. Comput. Pract. Exp.},
  volume       = {32},
  number       = {3},
  year         = {2020},
  url          = {https://doi.org/10.1002/cpe.4890},
  doi          = {10.1002/CPE.4890},
  timestamp    = {Sun, 12 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/concurrency/LevyFW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pvm/FerreiraL20,
  author       = {Kurt B. Ferreira and
                  Scott Levy},
  editor       = {Wesley Bland and
                  Kathryn M. Mohror and
                  Toni Pena},
  title        = {Evaluating {MPI} Message Size Summary Statistics},
  booktitle    = {EuroMPI/USA '20: 27th European {MPI} Users' Group Meeting, Virtual
                  Meeting, Austin, TX, USA, September 21-24, 2020},
  pages        = {61--70},
  publisher    = {{ACM}},
  year         = {2020},
  url          = {https://doi.org/10.1145/3416315.3416322},
  doi          = {10.1145/3416315.3416322},
  timestamp    = {Tue, 25 Jul 2023 13:44:09 +0200},
  biburl       = {https://dblp.org/rec/conf/pvm/FerreiraL20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/smc2/BrightwellFGLLO20,
  author       = {Ron Brightwell and
                  Kurt B. Ferreira and
                  Ryan E. Grant and
                  Scott Levy and
                  Jay F. Lofstead and
                  Stephen L. Olivier and
                  Kevin T. Pedretti and
                  Andrew J. Younge and
                  Ann C. Gentile and
                  Jim M. Brandt},
  editor       = {Jeffrey Nichols and
                  Becky Verastegui and
                  Arthur Barney Maccabe and
                  Oscar R. Hernandez and
                  Suzanne Parete{-}Koon and
                  Theresa Ahearn},
  title        = {{ALAMO:} Autonomous Lightweight Allocation, Management, and Optimization},
  booktitle    = {Driving Scientific and Engineering Discoveries Through the Convergence
                  of HPC, Big Data and {AI} - 17th Smoky Mountains Computational Sciences
                  and Engineering Conference, {SMC} 2020, Oak Ridge, TN, USA, August
                  26-28, 2020, Revised Selected Papers},
  series       = {Communications in Computer and Information Science},
  volume       = {1315},
  pages        = {408--422},
  publisher    = {Springer},
  year         = {2020},
  url          = {https://doi.org/10.1007/978-3-030-63393-6\_27},
  doi          = {10.1007/978-3-030-63393-6\_27},
  timestamp    = {Sun, 26 Nov 2023 00:57:51 +0100},
  biburl       = {https://dblp.org/rec/conf/smc2/BrightwellFGLLO20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijnc/HeraultRBAFBD19,
  author       = {Thomas H{\'{e}}rault and
                  Yves Robert and
                  Aur{\'{e}}lien Bouteiller and
                  Dorian C. Arnold and
                  Kurt B. Ferreira and
                  George Bosilca and
                  Jack J. Dongarra},
  title        = {Checkpointing Strategies for Shared High-Performance Computing Platforms},
  journal      = {Int. J. Netw. Comput.},
  volume       = {9},
  number       = {1},
  pages        = {28--52},
  year         = {2019},
  url          = {http://www.ijnc.org/index.php/ijnc/article/view/195},
  timestamp    = {Tue, 16 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijnc/HeraultRBAFBD19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/pc/LevyFSGD19,
  author       = {Scott Levy and
                  Kurt B. Ferreira and
                  Whit Schonbein and
                  Ryan E. Grant and
                  Matthew G. F. Dosanjh},
  title        = {Using simulation to examine the effect of {MPI} message matching costs
                  on application performance},
  journal      = {Parallel Comput.},
  volume       = {84},
  pages        = {63--74},
  year         = {2019},
  url          = {https://doi.org/10.1016/j.parco.2019.02.008},
  doi          = {10.1016/J.PARCO.2019.02.008},
  timestamp    = {Sat, 22 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/pc/LevyFSGD19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/LevyF19,
  author       = {Scott Levy and
                  Kurt B. Ferreira},
  editor       = {Ulrich Schwardmann and
                  Christian Boehme and
                  Dora B. Heras and
                  Valeria Cardellini and
                  Emmanuel Jeannot and
                  Antonio Salis and
                  Claudio Schifanella and
                  Ravi Reddy Manumachu and
                  Dieter Schwamborn and
                  Laura Ricci and
                  Oh Sangyoon and
                  Thomas Gruber and
                  Laura Antonelli and
                  Stephen L. Scott},
  title        = {Space-Efficient Reed-Solomon Encoding to Detect and Correct Pointer
                  Corruption},
  booktitle    = {Euro-Par 2019: Parallel Processing Workshops - Euro-Par 2019 International
                  Workshops, G{\"{o}}ttingen, Germany, August 26-30, 2019, Revised
                  Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {11997},
  pages        = {657--668},
  publisher    = {Springer},
  year         = {2019},
  url          = {https://doi.org/10.1007/978-3-030-48340-1\_50},
  doi          = {10.1007/978-3-030-48340-1\_50},
  timestamp    = {Tue, 14 Feb 2023 22:22:15 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/LevyF19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pvm/LevyF19,
  author       = {Scott Levy and
                  Kurt B. Ferreira},
  editor       = {Torsten Hoefler and
                  Jesper Larsson Tr{\"{a}}ff},
  title        = {Evaluating tradeoffs between {MPI} message matching offload hardware
                  capacity and performance},
  booktitle    = {Proceedings of the 26th European {MPI} Users' Group Meeting, EuroMPI
                  2019, Z{\"{u}}rich, Switzerland, September 11-13, 2019},
  pages        = {12:1--12:11},
  publisher    = {{ACM}},
  year         = {2019},
  url          = {https://doi.org/10.1145/3343211.3343223},
  doi          = {10.1145/3343211.3343223},
  timestamp    = {Wed, 11 Sep 2019 12:26:40 +0200},
  biburl       = {https://dblp.org/rec/conf/pvm/LevyF19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:books/sp/19/BrightwellFMPR19,
  author       = {Ron Brightwell and
                  Kurt B. Ferreira and
                  Arthur B. Maccabe and
                  Kevin T. Pedretti and
                  Rolf Riesen},
  editor       = {Balazs Gerofi and
                  Yutaka Ishikawa and
                  Rolf Riesen and
                  Robert W. Wisniewski},
  title        = {Sandia Line of LWKs},
  booktitle    = {Operating Systems for Supercomputers and High Performance Computing},
  volume       = {1},
  pages        = {23--46},
  publisher    = {Springer},
  year         = {2019},
  url          = {https://doi.org/10.1007/978-981-13-6624-6\_3},
  doi          = {10.1007/978-981-13-6624-6\_3},
  timestamp    = {Thu, 23 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/books/sp/19/BrightwellFMPR19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/pc/FerreiraLPG18,
  author       = {Kurt B. Ferreira and
                  Scott Levy and
                  Kevin T. Pedretti and
                  Ryan E. Grant},
  title        = {Characterizing {MPI} matching via trace-based simulation},
  journal      = {Parallel Comput.},
  volume       = {77},
  pages        = {57--83},
  year         = {2018},
  url          = {https://doi.org/10.1016/j.parco.2018.05.005},
  doi          = {10.1016/J.PARCO.2018.05.005},
  timestamp    = {Sat, 22 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/pc/FerreiraLPG18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/dft/BasemanDBMTFSS18,
  author       = {Elisabeth Baseman and
                  Nathan DeBardeleben and
                  Sean Blanchard and
                  Juston S. Moore and
                  Olena Tkachenko and
                  Kurt B. Ferreira and
                  Taniya Siddiqua and
                  Vilas Sridharan},
  title        = {Physics-Informed Machine Learning for {DRAM} Error Modeling},
  booktitle    = {2018 {IEEE} International Symposium on Defect and Fault Tolerance
                  in {VLSI} and Nanotechnology Systems, {DFT} 2018, Chicago, IL, USA,
                  October 8-10, 2018},
  pages        = {1--6},
  publisher    = {{IEEE} Computer Society},
  year         = {2018},
  url          = {https://doi.org/10.1109/DFT.2018.8602983},
  doi          = {10.1109/DFT.2018.8602983},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/dft/BasemanDBMTFSS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icppw/LevyPF18,
  author       = {Scott Levy and
                  Kevin T. Pedretti and
                  Kurt B. Ferreira},
  title        = {Open Science on Trinity's Knights Landing Partition: An Analysis of
                  User Job Data},
  booktitle    = {The 47th International Conference on Parallel Processing, {ICPP} 2018,
                  Workshop Proceedings, Eugene, OR, USA, August 13-16, 2018},
  pages        = {42:1--42:9},
  publisher    = {{ACM}},
  year         = {2018},
  url          = {https://doi.org/10.1145/3229710.3229753},
  doi          = {10.1145/3229710.3229753},
  timestamp    = {Mon, 14 Jan 2019 13:55:42 +0100},
  biburl       = {https://dblp.org/rec/conf/icppw/LevyPF18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/HeraultRBAFBD18,
  author       = {Thomas H{\'{e}}rault and
                  Yves Robert and
                  Aur{\'{e}}lien Bouteiller and
                  Dorian C. Arnold and
                  Kurt B. Ferreira and
                  George Bosilca and
                  Jack J. Dongarra},
  title        = {Optimal Cooperative Checkpointing for Shared High-Performance Computing
                  Platforms},
  booktitle    = {2018 {IEEE} International Parallel and Distributed Processing Symposium
                  Workshops, {IPDPS} Workshops 2018, Vancouver, BC, Canada, May 21-25,
                  2018},
  pages        = {803--812},
  publisher    = {{IEEE} Computer Society},
  year         = {2018},
  url          = {https://doi.org/10.1109/IPDPSW.2018.00127},
  doi          = {10.1109/IPDPSW.2018.00127},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/HeraultRBAFBD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pvm/LevyF18,
  author       = {Scott Levy and
                  Kurt B. Ferreira},
  title        = {Using Simulation to Examine the Effect of {MPI} Message Matching Costs
                  on Application Performance},
  booktitle    = {Proceedings of the 25th European {MPI} Users' Group Meeting, Barcelona,
                  Spain, September 23-26, 2018},
  pages        = {16:1--16:11},
  publisher    = {{ACM}},
  year         = {2018},
  url          = {https://doi.org/10.1145/3236367.3236375},
  doi          = {10.1145/3236367.3236375},
  timestamp    = {Wed, 21 Nov 2018 12:44:23 +0100},
  biburl       = {https://dblp.org/rec/conf/pvm/LevyF18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/LevyFDSSB18,
  author       = {Scott Levy and
                  Kurt B. Ferreira and
                  Nathan DeBardeleben and
                  Taniya Siddiqua and
                  Vilas Sridharan and
                  Elisabeth Baseman},
  title        = {Lessons learned from memory errors observed over the lifetime of Cielo},
  booktitle    = {Proceedings of the International Conference for High Performance Computing,
                  Networking, Storage, and Analysis, {SC} 2018, Dallas, TX, USA, November
                  11-16, 2018},
  pages        = {43:1--43:12},
  publisher    = {{IEEE} / {ACM}},
  year         = {2018},
  url          = {http://dl.acm.org/citation.cfm?id=3291714},
  timestamp    = {Mon, 12 Nov 2018 09:05:15 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/LevyFDSSB18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cluster/LevyFB17,
  author       = {Scott Levy and
                  Kurt B. Ferreira and
                  Patrick G. Bridges},
  title        = {Evaluating the Viability of Using Compression to Mitigate Silent Corruption
                  of Read-Mostly Application Data},
  booktitle    = {2017 {IEEE} International Conference on Cluster Computing, {CLUSTER}
                  2017, Honolulu, HI, USA, September 5-8, 2017},
  pages        = {603--607},
  publisher    = {{IEEE} Computer Society},
  year         = {2017},
  url          = {https://doi.org/10.1109/CLUSTER.2017.99},
  doi          = {10.1109/CLUSTER.2017.99},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/cluster/LevyFB17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/dft/SiddiquaSRDFLBG17,
  author       = {Taniya Siddiqua and
                  Vilas Sridharan and
                  Steven E. Raasch and
                  Nathan DeBardeleben and
                  Kurt B. Ferreira and
                  Scott Levy and
                  Elisabeth Baseman and
                  Qiang Guan},
  title        = {Lifetime memory reliability data from the field},
  booktitle    = {{IEEE} International Symposium on Defect and Fault Tolerance in {VLSI}
                  and Nanotechnology Systems, {DFT} 2017, Cambridge, United Kingdom,
                  October 23-25, 2017},
  pages        = {1--6},
  publisher    = {{IEEE} Computer Society},
  year         = {2017},
  url          = {https://doi.org/10.1109/DFT.2017.8244428},
  doi          = {10.1109/DFT.2017.8244428},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/dft/SiddiquaSRDFLBG17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/dsn/BasemanDFSST17,
  author       = {Elisabeth Baseman and
                  Nathan DeBardeleben and
                  Kurt B. Ferreira and
                  Vilas Sridharan and
                  Taniya Siddiqua and
                  Olena Tkachenko},
  title        = {Automating {DRAM} Fault Mitigation By Learning From Experience},
  booktitle    = {47th Annual {IEEE/IFIP} International Conference on Dependable Systems
                  and Networks Workshops, {DSN} Workshops 2017, Denver, CO, USA, June
                  26-29, 2017},
  pages        = {137--140},
  publisher    = {{IEEE} Computer Society},
  year         = {2017},
  url          = {https://doi.org/10.1109/DSN-W.2017.39},
  doi          = {10.1109/DSN-W.2017.39},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/dsn/BasemanDFSST17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/WidenerFL17,
  author       = {Patrick M. Widener and
                  Kurt B. Ferreira and
                  Scott Levy},
  editor       = {Dora Blanco Heras and
                  Luc Boug{\'{e}} and
                  Gabriele Mencagli and
                  Emmanuel Jeannot and
                  Rizos Sakellariou and
                  Rosa M. Badia and
                  Jorge G. Barbosa and
                  Laura Ricci and
                  Stephen L. Scott and
                  Stefan Lankes and
                  Josef Weidendorfer},
  title        = {It's Not the Heat, It's the Humidity: Scheduling Resilience Activity
                  at Scale},
  booktitle    = {Euro-Par 2017: Parallel Processing Workshops - Euro-Par 2017 International
                  Workshops, Santiago de Compostela, Spain, August 28-29, 2017, Revised
                  Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {10659},
  pages        = {581--592},
  publisher    = {Springer},
  year         = {2017},
  url          = {https://doi.org/10.1007/978-3-319-75178-8\_47},
  doi          = {10.1007/978-3-319-75178-8\_47},
  timestamp    = {Sun, 12 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/WidenerFL17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pvm/FerreiraLPG17,
  author       = {Kurt B. Ferreira and
                  Scott Levy and
                  Kevin T. Pedretti and
                  Ryan E. Grant},
  editor       = {Antonio J. Pe{\~{n}}a and
                  Pavan Balaji and
                  William Gropp and
                  Rajeev Thakur},
  title        = {Characterizing {MPI} matching via trace-based simulation},
  booktitle    = {Proceedings of the 24th European {MPI} Users' Group Meeting, EuroMPI/USA
                  2017, Chicago, IL, USA, September 25-28, 2017},
  pages        = {8:1--8:11},
  publisher    = {{ACM}},
  year         = {2017},
  url          = {https://doi.org/10.1145/3127024.3127040},
  doi          = {10.1145/3127024.3127040},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/pvm/FerreiraLPG17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijhpca/WidenerLFH16,
  author       = {Patrick M. Widener and
                  Scott Levy and
                  Kurt B. Ferreira and
                  Torsten Hoefler},
  title        = {On noise and the performance benefit of nonblocking collectives},
  journal      = {Int. J. High Perform. Comput. Appl.},
  volume       = {30},
  number       = {1},
  pages        = {121--133},
  year         = {2016},
  url          = {https://doi.org/10.1177/1094342015611952},
  doi          = {10.1177/1094342015611952},
  timestamp    = {Sun, 12 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijhpca/WidenerLFH16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ccgrid/MondragonBLFW16,
  author       = {Oscar H. Mondragon and
                  Patrick G. Bridges and
                  Scott Levy and
                  Kurt B. Ferreira and
                  Patrick M. Widener},
  title        = {Scheduling In-Situ Analytics in Next-Generation Applications},
  booktitle    = {{IEEE/ACM} 16th International Symposium on Cluster, Cloud and Grid
                  Computing, CCGrid 2016, Cartagena, Colombia, May 16-19, 2016},
  pages        = {102--105},
  publisher    = {{IEEE} Computer Society},
  year         = {2016},
  url          = {https://doi.org/10.1109/CCGrid.2016.42},
  doi          = {10.1109/CCGRID.2016.42},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ccgrid/MondragonBLFW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/dsn/BasemanDFLRSSG16,
  author       = {Elisabeth Baseman and
                  Nathan DeBardeleben and
                  Kurt B. Ferreira and
                  Scott Levy and
                  Steven Raasch and
                  Vilas Sridharan and
                  Taniya Siddiqua and
                  Qiang Guan},
  title        = {Improving {DRAM} Fault Characterization through Machine Learning},
  booktitle    = {46th Annual {IEEE/IFIP} International Conference on Dependable Systems
                  and Networks Workshops, {DSN} Workshops 2016, Toulouse, France, June
                  28 - July 1, 2016},
  pages        = {250--253},
  publisher    = {{IEEE} Computer Society},
  year         = {2016},
  url          = {https://doi.org/10.1109/DSN-W.2016.13},
  doi          = {10.1109/DSN-W.2016.13},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/dsn/BasemanDFLRSSG16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/dsrt/FialaMF16,
  author       = {David Fiala and
                  Frank Mueller and
                  Kurt B. Ferreira},
  title        = {FlipSphere: {A} Software-Based {DRAM} Error Detection and Correction
                  Library for {HPC}},
  booktitle    = {20th {IEEE/ACM} International Symposium on Distributed Simulation
                  and Real Time Applications, {DS-RT} 2016, London, United Kingdom,
                  September 21-23, 2016},
  pages        = {19--28},
  publisher    = {{IEEE} Computer Society},
  year         = {2016},
  url          = {https://doi.org/10.1109/DS-RT.2016.27},
  doi          = {10.1109/DS-RT.2016.27},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/dsrt/FialaMF16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/WidenerFL16,
  author       = {Patrick M. Widener and
                  Kurt B. Ferreira and
                  Scott Levy},
  editor       = {Fr{\'{e}}d{\'{e}}ric Desprez and
                  Pierre{-}Fran{\c{c}}ois Dutot and
                  Christos Kaklamanis and
                  Loris Marchal and
                  Korbinian Molitorisz and
                  Laura Ricci and
                  Vittorio Scarano and
                  Miguel A. Vega{-}Rodr{\'{\i}}guez and
                  Ana Lucia Varbanescu and
                  Sascha Hunold and
                  Stephen L. Scott and
                  Stefan Lankes and
                  Josef Weidendorfer},
  title        = {Horseshoes and Hand Grenades: The Case for Approximate Coordination
                  in Local Checkpointing Protocols},
  booktitle    = {Euro-Par 2016: Parallel Processing Workshops - Euro-Par 2016 International
                  Workshops, Grenoble, France, August 24-26, 2016, Revised Selected
                  Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {10104},
  pages        = {623--634},
  publisher    = {Springer},
  year         = {2016},
  url          = {https://doi.org/10.1007/978-3-319-58943-5\_50},
  doi          = {10.1007/978-3-319-58943-5\_50},
  timestamp    = {Sun, 12 Nov 2023 02:07:45 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/WidenerFL16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hpdc/LevyF16,
  author       = {Scott Levy and
                  Kurt B. Ferreira},
  editor       = {Nathan DeBardeleben},
  title        = {An Examination of the Impact of Failure Distribution on Coordinated
                  Checkpoint/Restart},
  booktitle    = {Proceedings of the {ACM} Workshop on Fault-Tolerance for {HPC} at
                  Extreme Scale, FTXS@HPDC 2016, Kyoto, Japan, May 31, 2016},
  pages        = {35--42},
  publisher    = {{ACM}},
  year         = {2016},
  url          = {https://doi.org/10.1145/2909428.2909430},
  doi          = {10.1145/2909428.2909430},
  timestamp    = {Tue, 06 Nov 2018 11:07:20 +0100},
  biburl       = {https://dblp.org/rec/conf/hpdc/LevyF16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ics/FialaMFE16,
  author       = {David Fiala and
                  Frank Mueller and
                  Kurt B. Ferreira and
                  Christian Engelmann},
  editor       = {Ozcan Ozturk and
                  Kemal Ebcioglu and
                  Mahmut T. Kandemir and
                  Onur Mutlu},
  title        = {Mini-Ckpts: Surviving {OS} Failures in Persistent Memory},
  booktitle    = {Proceedings of the 2016 International Conference on Supercomputing,
                  {ICS} 2016, Istanbul, Turkey, June 1-3, 2016},
  pages        = {7:1--7:14},
  publisher    = {{ACM}},
  year         = {2016},
  url          = {https://doi.org/10.1145/2925426.2926295},
  doi          = {10.1145/2925426.2926295},
  timestamp    = {Mon, 22 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ics/FialaMFE16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pvm/LevyFWBM16,
  author       = {Scott Levy and
                  Kurt B. Ferreira and
                  Patrick M. Widener and
                  Patrick G. Bridges and
                  Oscar H. Mondragon},
  editor       = {Jack J. Dongarra and
                  Daniel J. Holmes and
                  Antonia B. K. Collis and
                  Jesper Larsson Tr{\"{a}}ff and
                  Lorna Smith},
  title        = {How {I} Learned to Stop Worrying and Love In Situ Analytics: Leveraging
                  Latent Synchronization in {MPI} Collective Algorithms},
  booktitle    = {Proceedings of the 23rd European {MPI} Users' Group Meeting, EuroMPI
                  2016, Edinburgh, United Kingdom, September 25-28, 2016},
  pages        = {140--153},
  publisher    = {{ACM}},
  year         = {2016},
  url          = {https://doi.org/10.1145/2966884.2966920},
  doi          = {10.1145/2966884.2966920},
  timestamp    = {Sun, 12 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/pvm/LevyFWBM16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/LevyFB16,
  author       = {Scott Levy and
                  Kurt B. Ferreira and
                  Patrick G. Bridges},
  editor       = {John West and
                  Cherri M. Pancake},
  title        = {Improving application resilience to memory errors with lightweight
                  compression},
  booktitle    = {Proceedings of the International Conference for High Performance Computing,
                  Networking, Storage and Analysis, {SC} 2016, Salt Lake City, UT, USA,
                  November 13-18, 2016},
  pages        = {323--334},
  publisher    = {{IEEE} Computer Society},
  year         = {2016},
  url          = {https://doi.org/10.1109/SC.2016.27},
  doi          = {10.1109/SC.2016.27},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/LevyFB16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/MondragonBLFW16,
  author       = {Oscar H. Mondragon and
                  Patrick G. Bridges and
                  Scott Levy and
                  Kurt B. Ferreira and
                  Patrick M. Widener},
  editor       = {John West and
                  Cherri M. Pancake},
  title        = {Understanding performance interference in next-generation {HPC} systems},
  booktitle    = {Proceedings of the International Conference for High Performance Computing,
                  Networking, Storage and Analysis, {SC} 2016, Salt Lake City, UT, USA,
                  November 13-18, 2016},
  pages        = {384--395},
  publisher    = {{IEEE} Computer Society},
  year         = {2016},
  url          = {https://doi.org/10.1109/SC.2016.32},
  doi          = {10.1109/SC.2016.32},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/MondragonBLFW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijhpca/LevyFBTT15,
  author       = {Scott Levy and
                  Kurt B. Ferreira and
                  Patrick G. Bridges and
                  Aidan P. Thompson and
                  Christian R. Trott},
  title        = {A study of the viability of exploiting memory content similarity to
                  improve resilience to memory errors},
  journal      = {Int. J. High Perform. Comput. Appl.},
  volume       = {29},
  number       = {1},
  pages        = {5--20},
  year         = {2015},
  url          = {https://doi.org/10.1177/1094342014560354},
  doi          = {10.1177/1094342014560354},
  timestamp    = {Sun, 12 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijhpca/LevyFBTT15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijhpca/IbteshamFA15,
  author       = {Dewan Ibtesham and
                  Kurt B. Ferreira and
                  Dorian C. Arnold},
  title        = {A checkpoint compression study for high-performance computing systems},
  journal      = {Int. J. High Perform. Comput. Appl.},
  volume       = {29},
  number       = {4},
  pages        = {387--402},
  year         = {2015},
  url          = {https://doi.org/10.1177/1094342015570921},
  doi          = {10.1177/1094342015570921},
  timestamp    = {Thu, 12 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijhpca/IbteshamFA15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asplos/SridharanDBFSSG15,
  author       = {Vilas Sridharan and
                  Nathan DeBardeleben and
                  Sean Blanchard and
                  Kurt B. Ferreira and
                  Jon Stearley and
                  John Shalf and
                  Sudhanva Gurumurthi},
  editor       = {{\"{O}}zcan {\"{O}}zturk and
                  Kemal Ebcioglu and
                  Sandhya Dwarkadas},
  title        = {Memory Errors in Modern Systems: The Good, The Bad, and The Ugly},
  booktitle    = {Proceedings of the Twentieth International Conference on Architectural
                  Support for Programming Languages and Operating Systems, {ASPLOS}
                  2015, Istanbul, Turkey, March 14-18, 2015},
  pages        = {297--310},
  publisher    = {{ACM}},
  year         = {2015},
  url          = {https://doi.org/10.1145/2694344.2694348},
  doi          = {10.1145/2694344.2694348},
  timestamp    = {Wed, 07 Jul 2021 13:23:08 +0200},
  biburl       = {https://dblp.org/rec/conf/asplos/SridharanDBFSSG15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/WidenerFLF15,
  author       = {Patrick M. Widener and
                  Kurt B. Ferreira and
                  Scott Levy and
                  Nathan Fabian},
  editor       = {Sascha Hunold and
                  Alexandru Costan and
                  Domingo Gim{\'{e}}nez and
                  Alexandru Iosup and
                  Laura Ricci and
                  Mar{\'{\i}}a Engracia G{\'{o}}mez Requena and
                  Vittorio Scarano and
                  Ana Lucia Varbanescu and
                  Stephen L. Scott and
                  Stefan Lankes and
                  Josef Weidendorfer and
                  Michael Alexander},
  title        = {Canaries in a Coal Mine: Using Application-Level Checkpoints to Detect
                  Memory Failures},
  booktitle    = {Euro-Par 2015: Parallel Processing Workshops - Euro-Par 2015 International
                  Workshops, Vienna, Austria, August 24-25, 2015, Revised Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {9523},
  pages        = {669--681},
  publisher    = {Springer},
  year         = {2015},
  url          = {https://doi.org/10.1007/978-3-319-27308-2\_54},
  doi          = {10.1007/978-3-319-27308-2\_54},
  timestamp    = {Sun, 12 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/WidenerFLF15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hpdc/GoudarziASFF15,
  author       = {Alireza Goudarzi and
                  Dorian C. Arnold and
                  Darko Stefanovic and
                  Kurt B. Ferreira and
                  Guy Feldman},
  editor       = {Nathan DeBardeleben and
                  Franck Cappello and
                  Robert L. Clay},
  title        = {A Principled Approach to {HPC} Event Monitoring},
  booktitle    = {Proceedings of the 5th Workshop on Fault Tolerance for {HPC} at eXtreme
                  Scale, {FTXS} 2015, Portland, Oregon, USA, June 15, 2015},
  pages        = {3--10},
  publisher    = {{ACM}},
  year         = {2015},
  url          = {https://doi.org/10.1145/2751504.2751506},
  doi          = {10.1145/2751504.2751506},
  timestamp    = {Tue, 06 Nov 2018 11:07:20 +0100},
  biburl       = {https://dblp.org/rec/conf/hpdc/GoudarziASFF15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hpdc/RiesenMGLLPFLKW15,
  author       = {Rolf Riesen and
                  Arthur Barney Maccabe and
                  Balazs Gerofi and
                  David N. Lombard and
                  John Jack Lange and
                  Kevin T. Pedretti and
                  Kurt B. Ferreira and
                  Mike Lang and
                  Pardo Keppel and
                  Robert W. Wisniewski and
                  Ron Brightwell and
                  Todd Inglett and
                  Yoonho Park and
                  Yutaka Ishikawa},
  editor       = {Torsten Hoefler and
                  Kamil Iskra},
  title        = {What is a Lightweight Kernel?},
  booktitle    = {Proceedings of the 5th International Workshop on Runtime and Operating
                  Systems for Supercomputers, {ROSS} 2015, Portland, OR, USA, June 16,
                  2015},
  pages        = {9:1--9:8},
  publisher    = {{ACM}},
  year         = {2015},
  url          = {https://doi.org/10.1145/2768405.2768414},
  doi          = {10.1145/2768405.2768414},
  timestamp    = {Sun, 19 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/hpdc/RiesenMGLLPFLKW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/PedrettiOFSS15,
  author       = {Kevin T. Pedretti and
                  Stephen L. Olivier and
                  Kurt B. Ferreira and
                  Galen M. Shipman and
                  Wei Shu},
  editor       = {Kirk W. Cameron and
                  Adolfy Hoisie and
                  Darren J. Kerbyson and
                  David K. Lowenthal and
                  Dimitrios S. Nikolopoulos and
                  Sudha Yalamanchili and
                  Laura Carrington and
                  Joseph B. Manzano},
  title        = {Early experiences with node-level power capping on the Cray {XC40}
                  platform},
  booktitle    = {Proceedings of the 3rd International Workshop on Energy Efficient
                  Supercomputing, {E2SC} 2015, Austin, Texas, USA, November 15, 2015},
  pages        = {1:1--1:10},
  publisher    = {{ACM}},
  year         = {2015},
  url          = {https://doi.org/10.1145/2834800.2834801},
  doi          = {10.1145/2834800.2834801},
  timestamp    = {Tue, 06 Nov 2018 16:59:29 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/PedrettiOFSS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/fgcs/FerreiraRBAB14,
  author       = {Kurt B. Ferreira and
                  Rolf Riesen and
                  Patrick G. Bridges and
                  Dorian C. Arnold and
                  Ron Brightwell},
  title        = {Accelerating incremental checkpointing for extreme-scale computing},
  journal      = {Future Gener. Comput. Syst.},
  volume       = {30},
  pages        = {66--77},
  year         = {2014},
  url          = {https://doi.org/10.1016/j.future.2013.04.017},
  doi          = {10.1016/J.FUTURE.2013.04.017},
  timestamp    = {Wed, 19 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/fgcs/FerreiraRBAB14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/dsn/IbteshamDAF14,
  author       = {Dewan Ibtesham and
                  David Debonis and
                  Dorian C. Arnold and
                  Kurt B. Ferreira},
  title        = {Coarse-Grained Energy Modeling of Rollback/Recovery Mechanisms},
  booktitle    = {44th Annual {IEEE/IFIP} International Conference on Dependable Systems
                  and Networks, {DSN} 2014, Atlanta, GA, USA, June 23-26, 2014},
  pages        = {708--713},
  publisher    = {{IEEE} Computer Society},
  year         = {2014},
  url          = {https://doi.org/10.1109/DSN.2014.71},
  doi          = {10.1109/DSN.2014.71},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/dsn/IbteshamDAF14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icpp/LevyFB14,
  author       = {Scott Levy and
                  Kurt B. Ferreira and
                  Patrick G. Bridges},
  title        = {Characterizing the Impact of Rollback Avoidance at Extreme-Scale:
                  {A} Modeling Approach},
  booktitle    = {43rd International Conference on Parallel Processing, {ICPP} 2014,
                  Minneapolis, MN, USA, September 9-12, 2014},
  pages        = {401--410},
  publisher    = {{IEEE} Computer Society},
  year         = {2014},
  url          = {https://doi.org/10.1109/ICPP.2014.49},
  doi          = {10.1109/ICPP.2014.49},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icpp/LevyFB14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pdp/MillsZMFG14,
  author       = {Bryan N. Mills and
                  Taieb Znati and
                  Rami G. Melhem and
                  Kurt B. Ferreira and
                  Ryan E. Grant},
  title        = {Energy Consumption of Resilience Mechanisms in Large Scale Systems},
  booktitle    = {22nd Euromicro International Conference on Parallel, Distributed,
                  and Network-Based Processing, {PDP} 2014, Torino, Italy, February
                  12-14, 2014},
  pages        = {528--535},
  publisher    = {{IEEE} Computer Society},
  year         = {2014},
  url          = {https://doi.org/10.1109/PDP.2014.111},
  doi          = {10.1109/PDP.2014.111},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/pdp/MillsZMFG14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pvm/WidenerFLH14,
  author       = {Patrick M. Widener and
                  Kurt B. Ferreira and
                  Scott Levy and
                  Torsten Hoefler},
  editor       = {Jack J. Dongarra and
                  Yutaka Ishikawa and
                  Atsushi Hori},
  title        = {Exploring the effect of noise on the performance benefit of nonblocking
                  allreduce},
  booktitle    = {21st European {MPI} Users' Group Meeting, EuroMPI/ASIA '14, Kyoto,
                  Japan - September 09 - 12, 2014},
  pages        = {77},
  publisher    = {{ACM}},
  year         = {2014},
  url          = {https://doi.org/10.1145/2642769.2642786},
  doi          = {10.1145/2642769.2642786},
  timestamp    = {Sun, 12 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/pvm/WidenerFLH14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/FerreiraWLAH14,
  author       = {Kurt B. Ferreira and
                  Patrick M. Widener and
                  Scott Levy and
                  Dorian C. Arnold and
                  Torsten Hoefler},
  editor       = {Trish Damkroger and
                  Jack J. Dongarra},
  title        = {Understanding the Effects of Communication and Coordination on Checkpointing
                  at Scale},
  booktitle    = {International Conference for High Performance Computing, Networking,
                  Storage and Analysis, {SC} 2014, New Orleans, LA, USA, November 16-21,
                  2014},
  pages        = {883--894},
  publisher    = {{IEEE} Computer Society},
  year         = {2014},
  url          = {https://doi.org/10.1109/SC.2014.77},
  doi          = {10.1109/SC.2014.77},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/FerreiraWLAH14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@book{DBLP:books/daglib/0030022,
  author       = {James H. Laros III and
                  Kevin T. Pedretti and
                  Suzanne M. Kelly and
                  Wei Shu and
                  Kurt B. Ferreira and
                  John Van Dyke and
                  Courtenay T. Vaughan},
  title        = {Energy-Efficient High Performance Computing - Measurement and Tuning},
  series       = {Springer Briefs in Computer Science},
  publisher    = {Springer},
  year         = {2013},
  url          = {https://doi.org/10.1007/978-1-4471-4492-2},
  doi          = {10.1007/978-1-4471-4492-2},
  isbn         = {978-1-4471-4491-5},
  timestamp    = {Tue, 16 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/books/daglib/0030022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cluster/FerreiraBBP13,
  author       = {Kurt B. Ferreira and
                  Patrick G. Bridges and
                  Ron Brightwell and
                  Kevin T. Pedretti},
  title        = {The impact of system design parameters on application noise sensitivity},
  journal      = {Clust. Comput.},
  volume       = {16},
  number       = {1},
  pages        = {117--129},
  year         = {2013},
  url          = {https://doi.org/10.1007/s10586-011-0178-3},
  doi          = {10.1007/S10586-011-0178-3},
  timestamp    = {Tue, 29 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/cluster/FerreiraBBP13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/WidenerFLBAB13,
  author       = {Patrick M. Widener and
                  Kurt B. Ferreira and
                  Scott Levy and
                  Patrick G. Bridges and
                  Dorian C. Arnold and
                  Ron Brightwell},
  editor       = {Dieter an Mey and
                  Michael Alexander and
                  Paolo Bientinesi and
                  Mario Cannataro and
                  Carsten Clauss and
                  Alexandru Costan and
                  Gabor Kecskemeti and
                  Christine Morin and
                  Laura Ricci and
                  Julio Sahuquillo and
                  Martin Schulz and
                  Vittorio Scarano and
                  Stephen L. Scott and
                  Josef Weidendorfer},
  title        = {Asking the Right Questions: Benchmarking Fault-Tolerant Extreme-Scale
                  Systems},
  booktitle    = {Euro-Par 2013: Parallel Processing Workshops - BigDataCloud, DIHC,
                  FedICI, HeteroPar, HiBB, LSDVE, MHPC, OMHI, PADABS, PROPER, Resilience,
                  ROME, and {UCHPC} 2013, Aachen, Germany, August 26-27, 2013. Revised
                  Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {8374},
  pages        = {717--726},
  publisher    = {Springer},
  year         = {2013},
  url          = {https://doi.org/10.1007/978-3-642-54420-0\_70},
  doi          = {10.1007/978-3-642-54420-0\_70},
  timestamp    = {Sun, 12 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/WidenerFLBAB13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hpdc/LevyDBF13,
  author       = {Scott Levy and
                  Matthew G. F. Dosanjh and
                  Patrick G. Bridges and
                  Kurt B. Ferreira},
  editor       = {Nathan DeBardeleben and
                  Jon Stearley and
                  Franck Cappello},
  title        = {Using unreliable virtual hardware to inject errors in extreme-scale
                  systems},
  booktitle    = {Proceedings of the 3rd Workshop on Fault-tolerance for {HPC} at extreme
                  scale, jointly held with the 22nd International Symposium on High-Performance
                  Parallel and Distributed Computing, HPDC'13, New York, NY, USA, June
                  18, 2013},
  pages        = {21--26},
  publisher    = {{ACM}},
  year         = {2013},
  url          = {https://doi.org/10.1145/2465813.2465820},
  doi          = {10.1145/2465813.2465820},
  timestamp    = {Sat, 19 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/hpdc/LevyDBF13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ics/LevyBFTT13,
  author       = {Scott Levy and
                  Patrick G. Bridges and
                  Kurt B. Ferreira and
                  Aidan P. Thompson and
                  Christian R. Trott},
  editor       = {Torsten Hoefler and
                  Kamil Iskra},
  title        = {Evaluating the feasibility of using memory content similarity to improve
                  system resilience},
  booktitle    = {Proceedings of the 3rd International Workshop on Runtime and Operating
                  Systems for Supercomputers, {ROSS} 2013, Eugene, Oregon, USA, June
                  10, 2013},
  pages        = {7:1--7:8},
  publisher    = {{ACM}},
  year         = {2013},
  url          = {https://doi.org/10.1145/2491661.2481432},
  doi          = {10.1145/2491661.2481432},
  timestamp    = {Sat, 19 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ics/LevyBFTT13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/MillsGFR13,
  author       = {Bryan N. Mills and
                  Ryan E. Grant and
                  Kurt B. Ferreira and
                  Rolf Riesen},
  editor       = {Kirk W. Cameron and
                  Darren J. Kerbyson and
                  Andres Marquez and
                  Dimitrios S. Nikolopoulos and
                  Sudha Yalamanchili and
                  Kevin J. Barker},
  title        = {Evaluating energy savings for checkpoint/restart},
  booktitle    = {Proceedings of the 1st International Workshop on Energy Efficient
                  Supercomputing, {E2SC} 2013, Denver, Colorado, USA, November 17-21,
                  2013},
  pages        = {6:1--6:8},
  publisher    = {{ACM}},
  year         = {2013},
  url          = {https://doi.org/10.1145/2536430.2536432},
  doi          = {10.1145/2536430.2536432},
  timestamp    = {Tue, 06 Nov 2018 16:59:29 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/MillsGFR13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/LevyTFAHW13,
  author       = {Scott Levy and
                  Bryan Topp and
                  Kurt B. Ferreira and
                  Dorian C. Arnold and
                  Torsten Hoefler and
                  Patrick M. Widener},
  editor       = {Stephen A. Jarvis and
                  Steven A. Wright and
                  Simon D. Hammond},
  title        = {Using Simulation to Evaluate the Performance of Resilience Strategies
                  at Scale},
  booktitle    = {High Performance Computing Systems. Performance Modeling, Benchmarking
                  and Simulation - 4th International Workshop, {PMBS} 2013, Denver,
                  CO, USA, November 18, 2013. Revised Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {8551},
  pages        = {91--114},
  publisher    = {Springer},
  year         = {2013},
  url          = {https://doi.org/10.1007/978-3-319-10214-6\_5},
  doi          = {10.1007/978-3-319-10214-6\_5},
  timestamp    = {Sun, 12 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/LevyTFAHW13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/dsn/StearleyFRLPABR12,
  author       = {Jon Stearley and
                  Kurt B. Ferreira and
                  David J. Robinson and
                  Jim Laros and
                  Kevin T. Pedretti and
                  Dorian C. Arnold and
                  Patrick G. Bridges and
                  Rolf Riesen},
  title        = {Does partial replication pay off?},
  booktitle    = {{IEEE/IFIP} International Conference on Dependable Systems and Networks
                  Workshops, {DSN} 2012, Boston, MA, USA, June 25-28, 2012},
  pages        = {1--6},
  publisher    = {{IEEE} Computer Society},
  year         = {2012},
  url          = {https://doi.org/10.1109/DSNW.2012.6264669},
  doi          = {10.1109/DSNW.2012.6264669},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/dsn/StearleyFRLPABR12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/FerreiraRAIB12,
  author       = {Kurt B. Ferreira and
                  Rolf Riesen and
                  Dorian C. Arnold and
                  Dewan Ibtesham and
                  Ron Brightwell},
  editor       = {Ioannis Caragiannis and
                  Michael Alexander and
                  Rosa M. Badia and
                  Mario Cannataro and
                  Alexandru Costan and
                  Marco Danelutto and
                  Fr{\'{e}}d{\'{e}}ric Desprez and
                  Bettina Krammer and
                  Julio Sahuquillo and
                  Stephen L. Scott and
                  Josef Weidendorfer},
  title        = {The Viability of Using Compression to Decrease Message Log Sizes},
  booktitle    = {Euro-Par 2012: Parallel Processing Workshops - BDMC, CGWS, HeteroPar,
                  HiBB, OMHI, Paraphrase, PROPER, Resilience, UCHPC, VHPC, Rhodes Islands,
                  Greece, August 27-31, 2012. Revised Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {7640},
  pages        = {484--493},
  publisher    = {Springer},
  year         = {2012},
  url          = {https://doi.org/10.1007/978-3-642-36949-0\_54},
  doi          = {10.1007/978-3-642-36949-0\_54},
  timestamp    = {Wed, 19 Feb 2020 14:52:57 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/FerreiraRAIB12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icdcs/ElliottKFMFE12,
  author       = {James Elliott and
                  Kishor Kharbas and
                  David Fiala and
                  Frank Mueller and
                  Kurt B. Ferreira and
                  Christian Engelmann},
  title        = {Combining Partial Redundancy and Checkpointing for {HPC}},
  booktitle    = {2012 {IEEE} 32nd International Conference on Distributed Computing
                  Systems, Macau, China, June 18-21, 2012},
  pages        = {615--626},
  publisher    = {{IEEE} Computer Society},
  year         = {2012},
  url          = {https://doi.org/10.1109/ICDCS.2012.56},
  doi          = {10.1109/ICDCS.2012.56},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icdcs/ElliottKFMFE12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icpp/IbteshamABFB12,
  author       = {Dewan Ibtesham and
                  Dorian C. Arnold and
                  Patrick G. Bridges and
                  Kurt B. Ferreira and
                  Ron Brightwell},
  title        = {On the Viability of Compression for Reducing the Overheads of Checkpoint/Restart-Based
                  Fault Tolerance},
  booktitle    = {41st International Conference on Parallel Processing, {ICPP} 2012,
                  Pittsburgh, PA, USA, September 10-13, 2012},
  pages        = {148--157},
  publisher    = {{IEEE} Computer Society},
  year         = {2012},
  url          = {https://doi.org/10.1109/ICPP.2012.45},
  doi          = {10.1109/ICPP.2012.45},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icpp/IbteshamABFB12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ics/FerreiraPBBF012,
  author       = {Kurt B. Ferreira and
                  Kevin T. Pedretti and
                  Ron Brightwell and
                  Patrick G. Bridges and
                  David Fiala and
                  Frank Mueller},
  editor       = {Torsten Hoefler and
                  Kamil Iskra},
  title        = {Evaluating operating system vulnerability to memory errors},
  booktitle    = {Proceedings of the 2nd International Workshop on Runtime and Operating
                  Systems for Supercomputers, {ROSS} '12, Venice, Italy, June 29, 2012},
  pages        = {11:1--11:8},
  publisher    = {{ACM}},
  year         = {2012},
  url          = {https://doi.org/10.1145/2318916.2318930},
  doi          = {10.1145/2318916.2318930},
  timestamp    = {Wed, 12 Jan 2022 14:52:46 +0100},
  biburl       = {https://dblp.org/rec/conf/ics/FerreiraPBBF012.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/RiesenFSLAB12,
  author       = {Rolf Riesen and
                  Kurt B. Ferreira and
                  Dilma Da Silva and
                  Pierre Lemarinier and
                  Dorian C. Arnold and
                  Patrick G. Bridges},
  editor       = {Jeffrey K. Hollingsworth},
  title        = {Alleviating scalability issues of checkpointing protocols},
  booktitle    = {{SC} Conference on High Performance Computing Networking, Storage
                  and Analysis, {SC} '12, Salt Lake City, UT, {USA} - November 11 -
                  15, 2012},
  pages        = {18},
  publisher    = {{IEEE/ACM}},
  year         = {2012},
  url          = {https://doi.org/10.1109/SC.2012.18},
  doi          = {10.1109/SC.2012.18},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/RiesenFSLAB12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/FialaMERFB12,
  author       = {David Fiala and
                  Frank Mueller and
                  Christian Engelmann and
                  Rolf Riesen and
                  Kurt B. Ferreira and
                  Ron Brightwell},
  editor       = {Jeffrey K. Hollingsworth},
  title        = {Detection and correction of silent data corruption for large-scale
                  high-performance computing},
  booktitle    = {{SC} Conference on High Performance Computing Networking, Storage
                  and Analysis, {SC} '12, Salt Lake City, UT, {USA} - November 11 -
                  15, 2012},
  pages        = {78},
  publisher    = {{IEEE/ACM}},
  year         = {2012},
  url          = {https://doi.org/10.1109/SC.2012.49},
  doi          = {10.1109/SC.2012.49},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/FialaMERFB12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/IbteshamAFB12,
  author       = {Dewan Ibtesham and
                  Dorian C. Arnold and
                  Kurt B. Ferreira and
                  Ronald Brightwell},
  title        = {Abstract: Comparing {GPU} and Increment-Based Checkpoint Compression},
  booktitle    = {2012 {SC} Companion: High Performance Computing, Networking Storage
                  and Analysis, Salt Lake City, UT, USA, November 10-16, 2012},
  pages        = {1505--1506},
  publisher    = {{IEEE} Computer Society},
  year         = {2012},
  url          = {https://doi.org/10.1109/SC.Companion.2012.290},
  doi          = {10.1109/SC.COMPANION.2012.290},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/IbteshamAFB12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/IbteshamAFB12a,
  author       = {Dewan Ibtesham and
                  Dorian C. Arnold and
                  Kurt B. Ferreira and
                  Ronald Brightwell},
  title        = {Poster: Comparing {GPU} and Increment-Based Checkpoint Compression},
  booktitle    = {2012 {SC} Companion: High Performance Computing, Networking Storage
                  and Analysis, Salt Lake City, UT, USA, November 10-16, 2012},
  pages        = {1507},
  publisher    = {{IEEE} Computer Society},
  year         = {2012},
  url          = {https://doi.org/10.1109/SC.Companion.2012.291},
  doi          = {10.1109/SC.COMPANION.2012.291},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/IbteshamAFB12a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/simutools/RodriguesCBFBH12,
  author       = {Arun Rodrigues and
                  Elliott Cooper{-}Balis and
                  Keren Bergman and
                  Kurt B. Ferreira and
                  David P. Bunde and
                  K. Scott Hemmert},
  editor       = {George F. Riley and
                  Francesco Quaglia and
                  Jan Himmelspach},
  title        = {Improvements to the structural simulation toolkit},
  booktitle    = {International {ICST} Conference on Simulation Tools and Techniques,
                  {SIMUTOOLS} '12, Sirmione-Desenzano, Italy, March 19-23, 2012},
  pages        = {190--195},
  publisher    = {{ICST/ACM}},
  year         = {2012},
  url          = {https://doi.org/10.4108/icst.simutools.2012.247848},
  doi          = {10.4108/ICST.SIMUTOOLS.2012.247848},
  timestamp    = {Fri, 28 Feb 2020 13:12:27 +0100},
  biburl       = {https://dblp.org/rec/conf/simutools/RodriguesCBFBH12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1206-1390,
  author       = {Patrick G. Bridges and
                  Kurt B. Ferreira and
                  Michael A. Heroux and
                  Mark Hoemmen},
  title        = {Fault-tolerant linear solvers via selective reliability},
  journal      = {CoRR},
  volume       = {abs/1206.1390},
  year         = {2012},
  url          = {http://arxiv.org/abs/1206.1390},
  eprinttype    = {arXiv},
  eprint       = {1206.1390},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1206-1390.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/RiesenFVTR11,
  author       = {Rolf Riesen and
                  Kurt B. Ferreira and
                  Maria Ruiz Varela and
                  Michela Taufer and
                  Arun Rodrigues},
  editor       = {Michael Alexander and
                  Pasqua D'Ambra and
                  Adam Belloum and
                  George Bosilca and
                  Mario Cannataro and
                  Marco Danelutto and
                  Beniamino Di Martino and
                  Michael Gerndt and
                  Emmanuel Jeannot and
                  Raymond Namyst and
                  Jean Roman and
                  Stephen L. Scott and
                  Jesper Larsson Tr{\"{a}}ff and
                  Geoffroy Vall{\'{e}}e and
                  Josef Weidendorfer},
  title        = {Simulating Application Resilience at Exascale},
  booktitle    = {Euro-Par 2011: Parallel Processing Workshops - CCPI, CGWS, HeteroPar,
                  HiBB, HPCVirt, HPPC, HPSS, MDGS, ProPer, Resilience, UCHPC, VHPC,
                  Bordeaux, France, August 29 - September 2, 2011, Revised Selected
                  Papers, Part {II}},
  series       = {Lecture Notes in Computer Science},
  volume       = {7156},
  pages        = {221--230},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-642-29740-3\_26},
  doi          = {10.1007/978-3-642-29740-3\_26},
  timestamp    = {Wed, 19 Feb 2020 14:52:57 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/RiesenFVTR11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/BridgesHFHSB11,
  author       = {Patrick G. Bridges and
                  Mark Hoemmen and
                  Kurt B. Ferreira and
                  Michael A. Heroux and
                  Philip Soltero and
                  Ron Brightwell},
  editor       = {Michael Alexander and
                  Pasqua D'Ambra and
                  Adam Belloum and
                  George Bosilca and
                  Mario Cannataro and
                  Marco Danelutto and
                  Beniamino Di Martino and
                  Michael Gerndt and
                  Emmanuel Jeannot and
                  Raymond Namyst and
                  Jean Roman and
                  Stephen L. Scott and
                  Jesper Larsson Tr{\"{a}}ff and
                  Geoffroy Vall{\'{e}}e and
                  Josef Weidendorfer},
  title        = {Cooperative Application/OS {DRAM} Fault Recovery},
  booktitle    = {Euro-Par 2011: Parallel Processing Workshops - CCPI, CGWS, HeteroPar,
                  HiBB, HPCVirt, HPPC, HPSS, MDGS, ProPer, Resilience, UCHPC, VHPC,
                  Bordeaux, France, August 29 - September 2, 2011, Revised Selected
                  Papers, Part {II}},
  series       = {Lecture Notes in Computer Science},
  volume       = {7156},
  pages        = {241--250},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-642-29740-3\_28},
  doi          = {10.1007/978-3-642-29740-3\_28},
  timestamp    = {Fri, 02 Jun 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/europar/BridgesHFHSB11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/FialaFME11,
  author       = {David Fiala and
                  Kurt B. Ferreira and
                  Frank Mueller and
                  Christian Engelmann},
  editor       = {Michael Alexander and
                  Pasqua D'Ambra and
                  Adam Belloum and
                  George Bosilca and
                  Mario Cannataro and
                  Marco Danelutto and
                  Beniamino Di Martino and
                  Michael Gerndt and
                  Emmanuel Jeannot and
                  Raymond Namyst and
                  Jean Roman and
                  Stephen L. Scott and
                  Jesper Larsson Tr{\"{a}}ff and
                  Geoffroy Vall{\'{e}}e and
                  Josef Weidendorfer},
  title        = {A Tunable, Software-Based {DRAM} Error Detection and Correction Library
                  for {HPC}},
  booktitle    = {Euro-Par 2011: Parallel Processing Workshops - CCPI, CGWS, HeteroPar,
                  HiBB, HPCVirt, HPPC, HPSS, MDGS, ProPer, Resilience, UCHPC, VHPC,
                  Bordeaux, France, August 29 - September 2, 2011, Revised Selected
                  Papers, Part {II}},
  series       = {Lecture Notes in Computer Science},
  volume       = {7156},
  pages        = {251--261},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-642-29740-3\_29},
  doi          = {10.1007/978-3-642-29740-3\_29},
  timestamp    = {Mon, 22 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/FialaFME11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/IbteshamAFB11,
  author       = {Dewan Ibtesham and
                  Dorian C. Arnold and
                  Kurt B. Ferreira and
                  Patrick G. Bridges},
  editor       = {Michael Alexander and
                  Pasqua D'Ambra and
                  Adam Belloum and
                  George Bosilca and
                  Mario Cannataro and
                  Marco Danelutto and
                  Beniamino Di Martino and
                  Michael Gerndt and
                  Emmanuel Jeannot and
                  Raymond Namyst and
                  Jean Roman and
                  Stephen L. Scott and
                  Jesper Larsson Tr{\"{a}}ff and
                  Geoffroy Vall{\'{e}}e and
                  Josef Weidendorfer},
  title        = {On the Viability of Checkpoint Compression for Extreme Scale Fault
                  Tolerance},
  booktitle    = {Euro-Par 2011: Parallel Processing Workshops - CCPI, CGWS, HeteroPar,
                  HiBB, HPCVirt, HPPC, HPSS, MDGS, ProPer, Resilience, UCHPC, VHPC,
                  Bordeaux, France, August 29 - September 2, 2011, Revised Selected
                  Papers, Part {II}},
  series       = {Lecture Notes in Computer Science},
  volume       = {7156},
  pages        = {302--311},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-642-29740-3\_34},
  doi          = {10.1007/978-3-642-29740-3\_34},
  timestamp    = {Wed, 24 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/europar/IbteshamAFB11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hpdc/LeonRFM11,
  author       = {Edgar A. Le{\'{o}}n and
                  Rolf Riesen and
                  Kurt B. Ferreira and
                  Arthur B. Maccabe},
  editor       = {Arthur B. Maccabe and
                  Douglas Thain},
  title        = {Cache injection for parallel applications},
  booktitle    = {Proceedings of the 20th {ACM} International Symposium on High Performance
                  Distributed Computing, {HPDC} 2011, San Jose, CA, USA, June 8-11,
                  2011},
  pages        = {15--26},
  publisher    = {{ACM}},
  year         = {2011},
  url          = {https://doi.org/10.1145/1996130.1996135},
  doi          = {10.1145/1996130.1996135},
  timestamp    = {Thu, 23 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/hpdc/LeonRFM11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pvm/FerreiraRBBA11,
  author       = {Kurt B. Ferreira and
                  Rolf Riesen and
                  Ron Brightwell and
                  Patrick G. Bridges and
                  Dorian C. Arnold},
  editor       = {Yiannis Cotronis and
                  Anthony Danalis and
                  Dimitrios S. Nikolopoulos and
                  Jack J. Dongarra},
  title        = {libhashckpt: Hash-Based Incremental Checkpointing Using GPU's},
  booktitle    = {Recent Advances in the Message Passing Interface - 18th European {MPI}
                  Users' Group Meeting, EuroMPI 2011, Santorini, Greece, September 18-21,
                  2011. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {6960},
  pages        = {272--281},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-642-24449-0\_31},
  doi          = {10.1007/978-3-642-24449-0\_31},
  timestamp    = {Tue, 14 May 2019 10:00:52 +0200},
  biburl       = {https://dblp.org/rec/conf/pvm/FerreiraRBBA11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/FerreiraSLOPBRBA11,
  author       = {Kurt B. Ferreira and
                  Jon Stearley and
                  James H. Laros III and
                  Ron A. Oldfield and
                  Kevin T. Pedretti and
                  Ron Brightwell and
                  Rolf Riesen and
                  Patrick G. Bridges and
                  Dorian C. Arnold},
  editor       = {Scott A. Lathrop and
                  Jim Costa and
                  William Kramer},
  title        = {Evaluating the viability of process replication reliability for exascale
                  systems},
  booktitle    = {Conference on High Performance Computing Networking, Storage and Analysis,
                  {SC} 2011, Seattle, WA, USA, November 12-18, 2011},
  pages        = {44:1--44:12},
  publisher    = {{ACM}},
  year         = {2011},
  url          = {https://doi.org/10.1145/2063384.2063443},
  doi          = {10.1145/2063384.2063443},
  timestamp    = {Fri, 23 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/FerreiraSLOPBRBA11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/FialaMERF11,
  author       = {David Fiala and
                  Frank Mueller and
                  Christian Engelmann and
                  Rolf Riesen and
                  Kurt B. Ferreira},
  editor       = {Scott A. Lathrop and
                  Jim Costa and
                  William Kramer},
  title        = {Poster: detection and correction of silent data corruption for large-scale
                  high-performance computing},
  booktitle    = {Conference on High Performance Computing Networking, Storage and Analysis
                  - Companion Volume, {SC} 2011, Seattle, WA, USA, November 12-18, 2011},
  pages        = {47--48},
  publisher    = {{ACM}},
  year         = {2011},
  url          = {https://doi.org/10.1145/2148600.2148625},
  doi          = {10.1145/2148600.2148625},
  timestamp    = {Mon, 22 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/FialaMERF11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/FialaFME11,
  author       = {David Fiala and
                  Kurt B. Ferreira and
                  Frank Mueller and
                  Christian Engelmann},
  editor       = {Scott A. Lathrop and
                  Jim Costa and
                  William Kramer},
  title        = {Poster: a tunable, software-based {DRAM} error detection and correction
                  library for {HPC}},
  booktitle    = {Conference on High Performance Computing Networking, Storage and Analysis
                  - Companion Volume, {SC} 2011, Seattle, WA, USA, November 12-18, 2011},
  pages        = {49--50},
  publisher    = {{ACM}},
  year         = {2011},
  url          = {https://doi.org/10.1145/2148600.2148626},
  doi          = {10.1145/2148600.2148626},
  timestamp    = {Mon, 22 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/FialaFME11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cluster/FerreiraBBP10,
  author       = {Kurt B. Ferreira and
                  Patrick G. Bridges and
                  Ron Brightwell and
                  Kevin T. Pedretti},
  title        = {The Impact of System Design Parameters on Application Noise Sensitivity},
  booktitle    = {Proceedings of the 2010 {IEEE} International Conference on Cluster
                  Computing, Heraklion, Crete, Greece, 20-24 September, 2010},
  pages        = {146--155},
  publisher    = {{IEEE} Computer Society},
  year         = {2010},
  url          = {https://doi.org/10.1109/CLUSTER.2010.41},
  doi          = {10.1109/CLUSTER.2010.41},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/cluster/FerreiraBBP10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/dsn/RiesenFS10,
  author       = {Rolf Riesen and
                  Kurt B. Ferreira and
                  Jon Stearley},
  title        = {See applications run and throughput jump: The case for redundant computing
                  in {HPC}},
  booktitle    = {{IEEE/IFIP} International Conference on Dependable Systems and Networks
                  Workshops {(DSN-W} 2010), Chicago, Illinois, USA, June 28 - July 1,
                  2010},
  pages        = {29--34},
  publisher    = {{IEEE} Computer Society},
  year         = {2010},
  url          = {https://doi.org/10.1109/DSNW.2010.5542625},
  doi          = {10.1109/DSNW.2010.5542625},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/dsn/RiesenFS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pvm/BrightwellFR10,
  author       = {Ron Brightwell and
                  Kurt B. Ferreira and
                  Rolf Riesen},
  editor       = {Rainer Keller and
                  Edgar Gabriel and
                  Michael M. Resch and
                  Jack J. Dongarra},
  title        = {Transparent Redundant Computing with {MPI}},
  booktitle    = {Recent Advances in the Message Passing Interface - 17th European {MPI}
                  Users' Group Meeting, EuroMPI 2010, Stuttgart, Germany, September
                  12-15, 2010. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {6305},
  pages        = {208--218},
  publisher    = {Springer},
  year         = {2010},
  url          = {https://doi.org/10.1007/978-3-642-15646-5\_22},
  doi          = {10.1007/978-3-642-15646-5\_22},
  timestamp    = {Tue, 14 May 2019 10:00:52 +0200},
  biburl       = {https://dblp.org/rec/conf/pvm/BrightwellFR10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/concurrency/RiesenBBHMWF09,
  author       = {Rolf Riesen and
                  Ron Brightwell and
                  Patrick G. Bridges and
                  Trammell Hudson and
                  Arthur B. Maccabe and
                  Patrick M. Widener and
                  Kurt B. Ferreira},
  title        = {Designing and implementing lightweight kernels for capability computing},
  journal      = {Concurr. Comput. Pract. Exp.},
  volume       = {21},
  number       = {6},
  pages        = {793--817},
  year         = {2009},
  url          = {https://doi.org/10.1002/cpe.1361},
  doi          = {10.1002/CPE.1361},
  timestamp    = {Sun, 12 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/concurrency/RiesenBBHMWF09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cluster/LarosPKVFVS09,
  author       = {James H. Laros III and
                  Kevin T. Pedretti and
                  Suzanne M. Kelly and
                  John P. Vandyke and
                  Kurt B. Ferreira and
                  Courtenay T. Vaughan and
                  Mark Swan},
  title        = {Topics on measuring real power usage on high performance computing
                  platforms},
  booktitle    = {Proceedings of the 2009 {IEEE} International Conference on Cluster
                  Computing, August 31 - September 4, 2009, New Orleans, Louisiana,
                  {USA}},
  pages        = {1--8},
  publisher    = {{IEEE} Computer Society},
  year         = {2009},
  url          = {https://doi.org/10.1109/CLUSTR.2009.5289179},
  doi          = {10.1109/CLUSTR.2009.5289179},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/cluster/LarosPKVFVS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icccn/BrightwellPF08,
  author       = {Ron Brightwell and
                  Kevin T. Pedretti and
                  Kurt B. Ferreira},
  title        = {Instrumentation and Analysis of {MPI} Queue Times on the SeaStar High-Performance
                  Network},
  booktitle    = {Proceedings of the 17th International Conference on Computer Communications
                  and Networks, {IEEE} {ICCCN} 2008, St. Thomas, {U.S.} Virgin Islands,
                  August 3-7, 2008},
  pages        = {590--596},
  publisher    = {{IEEE}},
  year         = {2008},
  url          = {https://doi.org/10.1109/ICCCN.2008.ECP.116},
  doi          = {10.1109/ICCCN.2008.ECP.116},
  timestamp    = {Wed, 16 Oct 2019 14:14:49 +0200},
  biburl       = {https://dblp.org/rec/conf/icccn/BrightwellPF08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/FerreiraBB08,
  author       = {Kurt B. Ferreira and
                  Patrick G. Bridges and
                  Ron Brightwell},
  title        = {Characterizing application sensitivity to {OS} interference using
                  kernel-level noise injection},
  booktitle    = {Proceedings of the {ACM/IEEE} Conference on High Performance Computing,
                  {SC} 2008, November 15-21, 2008, Austin, Texas, {USA}},
  pages        = {19},
  publisher    = {{IEEE/ACM}},
  year         = {2008},
  url          = {https://doi.org/10.1109/SC.2008.5219920},
  doi          = {10.1109/SC.2008.5219920},
  timestamp    = {Sat, 19 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/FerreiraBB08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hoti/LeonFM07,
  author       = {Edgar A. Le{\'{o}}n and
                  Kurt B. Ferreira and
                  Arthur B. Maccabe},
  editor       = {John W. Lockwood and
                  Fabrizio Petrini and
                  Ron Brightwell and
                  Dhabaleswar K. Panda},
  title        = {Reducing the Impact of the MemoryWall for {I/O} Using Cache Injection},
  booktitle    = {15th Annual {IEEE} Symposium on High-Performance Interconnects, {HOTI}
                  2007, Stanford, CA, USA, August 22-24, 2007},
  pages        = {143--150},
  publisher    = {{IEEE} Computer Society},
  year         = {2007},
  url          = {https://doi.org/10.1109/HOTI.2007.8},
  doi          = {10.1109/HOTI.2007.8},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/hoti/LeonFM07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics