-
Notifications
You must be signed in to change notification settings - Fork 5
/
thesis.bib
3936 lines (3660 loc) · 139 KB
/
thesis.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@book{saip,
author = {Bass, Len and Clements, Paul and Kazman, Rick},
citeulike-article-id = {1650767},
edition = {3rd},
keywords = {bibtex-import},
posted-at = {2007-09-13 08:18:02},
priority = {0},
publisher = {Addison-Wesley},
title = {{Software Architecture in Practise}},
year = {2003}
}
@misc{amazon-ec2-sla,
author = {Amazon},
title = {{EC2 Service Level Agreement}},
year = {2008},
month = {October},
note = {\url{http://aws.amazon.com/ec2-sla/}}
}
@Unpublished{distributed-algorithms-lecture,
author = {Francesco Bongiovanni},
title = {{Distributed Algorithms: Failure Detectors, Consensus, Self-Stabilization}}},
note = {\url{http://goo.gl/bZBtp}},
month = {November},
year = {2009},
institute = {INRIA Sophia Antipolis Research Center},
}
@book{scott2009,
title={{Realizing and Refining Architectural Tactics: Availability}},
author={Scott, J. and Kazman, R.},
series={Technical report},
url={http://books.google.co.uk/books?id=DlzbYgEACAAJ},
year={2009},
publisher={Carnegie Mellon University, Software Engineering Institute}
}
@book{SA-in-practise-2nd,
author = {Bass, Len and Clements, Paul and Kazman, Rick},
edition = {2},
howpublished = {Hardcover},
isbn = {0321154959},
keywords = {architecture, best-practice, efficiency, management, programming, software-design, software-development, software-engineering},
month = {April},
posted-at = {2009-05-13 13:36:47},
priority = {0},
publisher = {Addison-Wesley Professional},
title = {{Software Architecture in Practice (2nd Edition)}},
year = {2003}
}
@inproceedings{DBLP:conf/compsac/Laprie04,
author = {Jean-Claude Laprie},
title = {{Dependable Computing: Concepts, Challenges, Directions}},
year = {2004},
pages = {242},
ee = {http://csdl.computer.org/comp/proceedings/compsac/2004/2209/01/220910242.pdf},
crossref = {DBLP:conf/compsac/2004},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/compsac/2004,
booktitle = {28th International Computer Software and Applications Conference
(COMPSAC 2004), Design and Assessment of Trustworthy Software-Based
Systems, 27-30 September 2004, Hong Kong, China, Proceedings},
publisher = {IEEE Computer Society},
year = {2004},
isbn = {0-7695-2209-2},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@book{Sommerville10,
abstract = {The book presents a broad perspective on software systems engineering, concentrating on widely used techniques for developing large-scale systems. Building on the widely acclaimed strengths of the 8th edition, the 9th edition updates readers with the latest developments in the field while remaining the most current Software Engineering text in the market with quality trusted coverage and practical case studies. This text is structured into 6 parts: Introduction; Requirements Engineering; Design; Software Development; Verification and Validation; Management. An up-to-date reference for software engineers.},
added-at = {2012-05-30T11:07:40.000+0200},
address = {Harlow, England},
author = {Sommerville, Ian},
biburl = {http://www.bibsonomy.org/bibtex/2094b37a4bb8b242d7694cdc9142e0d80/flint63},
edition = {9th},
file = {Amazon Search inside:http\://www.amazon.de/gp/reader/0137035152/:URL;Google Books:http\://books.google.de/books?isbn=978-0-13-703515-1:URL},
groups = {public},
interhash = {3cb472cdd9d5e4f37480b38846de450e},
intrahash = {094b37a4bb8b242d7694cdc9142e0d80},
isbn = {978-0-13-703515-1},
keywords = {v1205 book software engineering},
publisher = {Addison-Wesley},
timestamp = {2012-05-30T11:07:40.000+0200},
title = {{Software Engineering}},
username = {flint63},
year = 2010
}
@inproceedings{DBLP:conf/padl/MattssonNW99,
author = {H{\aa}kan Mattsson and
Hans Nilsson and
Claes Wikstrom},
title = {{Mnesia - A Distributed Robust DBMS for Telecommunications
Applications}},
year = {1999},
pages = {152-163},
ee = {http://link.springer.de/link/service/series/0558/bibs/1551/15510152.htm},
crossref = {DBLP:conf/padl/1999},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/padl/1999,
editor = {Gopal Gupta},
booktitle = {Practical Aspects of Declarative Languages, First International
Workshop, PADL '99, San Antonio, Texas, USA, January 18-19,
1999, Proceedings},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = {1551},
year = {1998},
isbn = {3-540-65527-1},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{DBLP:conf/iptps/KaashoekK03,
author = {M. Frans Kaashoek and
David R. Karger},
title = {{Koorde: A Simple Degree-Optimal Distributed Hash Table}},
year = {2003},
pages = {98-107},
ee = {http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\&}volume=2735{\&}spage=98},
crossref = {DBLP:conf/iptps/2003},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/iptps/2003,
editor = {M. Frans Kaashoek and
Ion Stoica},
title = {Peer-to-Peer Systems II, Second International Workshop,
IPTPS 2003, Berkeley, CA, USA, February 21-22,2003, Revised
Papers},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = {2735},
year = {2003},
isbn = {3-540-40724-3},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@techreport{HögqvistSchüttShafaatetal2009,
author = {Högqvist, Mikael and Schütt, Thorsten and Shafaat, Tallat and Haridi, Seif and Moser, Monika and Reinefeld, Alexander},
title = {{Transactional DHT Algorithms}},
series = {ZIB-Report},
number = {09-34},
institution = {Konrad-Zuse-Zentrum für Informationstechnik Berlin},
year = {2009}
}
@inproceedings{Rinard:2006:PAB:1183401.1183447,
author = {Rinard, Martin},
title = {{Probabilistic Accuracy Bounds for Fault-Tolerant Computations that Discard Tasks}},
booktitle = {Proceedings of the 20th annual international conference on Supercomputing},
series = {ICS '06},
year = {2006},
isbn = {1-59593-282-8},
location = {Cairns, Queensland, Australia},
pages = {324--334},
numpages = {11},
url = {http://doi.acm.org/10.1145/1183401.1183447},
doi = {http://doi.acm.org/10.1145/1183401.1183447},
acmid = {1183447},
publisher = {ACM},
address = {New York, NY, USA},
}
@article{Dijkstra:1974:SSS:361179.361202,
author = {Dijkstra, Edsger W.},
title = {{Self-Stabilizing Systems in Spite of Distributed Control}},
journal = {Communications of the ACM},
volume = {17},
issue = {11},
month = {November},
year = {1974},
issn = {0001-0782},
pages = {643--644},
numpages = {2},
url = {http://doi.acm.org/10.1145/361179.361202},
doi = {http://doi.acm.org/10.1145/361179.361202},
acmid = {361202},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {distributed control, error recovery, harmonious cooperation, multiprocessing, mutual exclusion, networks, robustness, self-repair, self-stabilization, sharing, synchronization},
}
@article{Kuang-Hua-Huang:1984:AFT:1310169.1310842,
author = {Kuang-Hua Huang and Abraham, J. A.},
title = {{Algorithm-Based Fault Tolerance for Matrix Operations}},
journal = {IEEE Transactions on Computers},
volume = {33},
issue = {6},
month = {June},
year = {1984},
issn = {0018-9340},
pages = {518--528},
numpages = {11},
url = {http://portal.acm.org/citation.cfm?id=1310169.1310842},
doi = {10.1109/TC.1984.1676475},
acmid = {1310842},
publisher = {IEEE Computer Society},
address = {Washington, DC, USA},
keywords = {transient errors, Algorithm-based fault tolerance, checksum matrix, error correction, error detection, matrix operations, multiple processor systems, processor arrays, systolic arrays}
}
@INPROCEEDINGS{InProceedingsRoche.RRC_aftatp_09,
AUTHOR = {Thomas Roche and Jean-Louis Roch and Matthieu Cunche},
BOOKTITLE = {The First International Conference on Advances in P2P Systems},
TITLE = {{Algorithm-Based Fault Tolerance Applied to P2P Computing Networks.}},
YEAR = {2009},
ADDRESS = {Sliema, Malta},
MONTH = {October},
PAGES = {144--149},
PUBLISHER = {IEEE},
URL = {http://www.iaria.org/conferences2009/AP2PS09.html},
PDF = {http://www.computer.org/portal/web/csdl/doi/10.1109/AP2PS.2009.30},
DOI = {http://doi.ieeecomputersociety.org/10.1109/AP2PS.2009.30}
}
@inproceedings{DBLP:conf/ipps/Chen08,
author = {Zizhong Chen},
title = {{Extending Algorithm-Based Fault Yolerance to Yolerate Fail-Stop
Failures in High Performance Distributed Environments}},
year = {2008},
pages = {1-8},
ee = {http://dx.doi.org/10.1109/IPDPS.2008.4536158},
crossref = {DBLP:conf/ipps/2008},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/ipps/2008,
booktitle = {22nd IEEE International Symposium on Parallel and Distributed
Processing, IPDPS 2008, Miami, Florida USA, April 14-18,
2008},
publisher = {IEEE},
year = {2008},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@ARTICLE{Schlichting95programminglanguage,
author = {Richard D. Schlichting and Vicraj T. Thomas},
title = {{Programming Language Support for Writing Fault-Tolerant Distributed Software}},
journal = {IEEE Transactions On Computers},
year = {1995},
volume = {44},
pages = {203--212}
}
@book{Andrews:1993:SPL:135321,
author = {Andrews, Gregory R. and Olsson, Ronald A.},
title = {{The SR Programming Language: Concurrency in Practice}},
year = {1993},
isbn = {0-8053-0088-0},
publisher = {Benjamin-Cummings Publishing Co., Inc.},
address = {Redwood City, CA, USA},
}
@article{Shrivastava:1991:OAD:624585.625019,
author = {Shrivastava, Santosh K. and Dixon, Graeme N. and Parrington, Graham D.},
title = {{An Overview of the Arjuna Distributed Programming System}},
journal = {IEEE Softw.},
volume = {8},
issue = {1},
month = {January},
year = {1991},
issn = {0740-7459},
pages = {66--73},
numpages = {8},
url = {http://dx.doi.org/10.1109/52.62934},
doi = {http://dx.doi.org/10.1109/52.62934},
acmid = {625019},
publisher = {IEEE Computer Society Press},
address = {Los Alamitos, CA, USA},
keywords = {Arjuna distributed programming system, C++, distributed processing, fault tolerant computing, network operating systems, object-oriented programming, objects and actions computational model, programming environments, system architecture},
}
@inproceedings{Maes:1987:CEC:38765.38821,
author = {Maes, Pattie},
title = {{Concepts and experiments in Computational Reflection}},
booktitle = {Conference proceedings on Object-oriented programming systems, languages and applications},
series = {OOPSLA '87},
year = {1987},
isbn = {0-89791-247-0},
location = {Orlando, Florida, United States},
pages = {147--155},
numpages = {9},
url = {http://doi.acm.org/10.1145/38765.38821},
doi = {http://doi.acm.org/10.1145/38765.38821},
acmid = {38821},
publisher = {ACM},
address = {New York, NY, USA},
}
@INPROCEEDINGS{Earle05verificationof,
author = {Clara Benac Earle and Lars-Ake Fredlund:},
title = {{Verification of Language based Fault-Tolerance}},
booktitle = {In EUROCAST},
year = {2005},
pages = {140--149}
}
@inproceedings{Cooper:1990:PLS:504136.504162,
author = {Cooper, Eric C.},
title = {{Programming Language Support for Replication in Fault-Tolerant Distributed Systems}},
booktitle = {Proceedings of the 4th workshop on ACM SIGOPS European workshop},
series = {EW 4},
year = {1990},
location = {Bologna, Italy},
pages = {1--6},
numpages = {6},
url = {http://doi.acm.org/10.1145/504136.504162},
doi = {http://doi.acm.org/10.1145/504136.504162},
acmid = {504162},
publisher = {ACM},
address = {New York, NY, USA},
}
@inproceedings{Chen:2007:RTR:1263546.1263948,
author = {Chen, Jian-Jia and Yang, Chuan-Yue and Kuo, Tei-Wei and Tseng, Shau-Yin},
title = {{Real-Time Task Replication for Fault Tolerance in Identical Multiprocessor Systems}},
booktitle = {Proceedings of the 13th IEEE Real Time and Embedded Technology and Applications Symposium},
year = {2007},
isbn = {0-7695-2800-7},
pages = {249--258},
numpages = {10},
url = {http://portal.acm.org/citation.cfm?id=1263546.1263948},
doi = {10.1109/RTAS.2007.30},
acmid = {1263948},
publisher = {IEEE Computer Society},
address = {Washington, DC, USA},
keywords = {Real-Time Task Scheduling, Fault Tolerance, Task Replication, Multiprocessor Systems.},
}
@article{DBLP:journals/tpds/PlankLP98,
author = {James S. Plank and
Kai Li and
Michael A. Puening},
title = {{Diskless Checkpointing}},
journal = {IEEE Transations in Parallel Distributed Systems},
volume = {9},
number = {10},
year = {1998},
pages = {972-986},
ee = {http://doi.ieeecomputersociety.org/10.1109/71.730527},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@TechReport{node-local-storage,
author = {Adam Moody and Greg Bronevetsky},
title = {{Scalable I/O systems via Node-Local Storage: Approaching 1 TB/sec file I/O}},
institution = {Lawrence Livermore National Laboratory},
year = {2009}
}
@INPROCEEDINGS{916715,
author={Alvisi, L. and Bressoud, T.C. and El-Khashab, A. and Marzullo, K. and Zagorodnov, D.},
booktitle={INFOCOM 2001. Twentieth Annual Joint Conference of the IEEE Computer and Communications Societies. Proceedings. IEEE},
title={{Wrapping Server-Side TCP to Mask Connection Failures}},
year={2001},
volume={1},
number={},
pages={329 -337 vol.1},
keywords={FT-TCP;backup;connection failures masking;fault-tolerant TCP;latency;low overhead;server failure;server recovery;software;throughput;computer network reliability;fault tolerant computing;network servers;system recovery;transport protocols;},
doi={10.1109/INFCOM.2001.916715},
ISSN={},
}
@inproceedings{Schroeder:2007:DFR:1267903.1267904,
author = {Schroeder, Bianca and Gibson, Garth A.},
title = {{Disk Failures in the Real World: What Does an MTTF of 1,000,000 Hours Mean To You?}},
booktitle = {Proceedings of the 5th USENIX conference on File and Storage Technologies},
series = {FAST '07},
year = {2007},
location = {San Jose, CA},
articleno = {1},
url = {http://dl.acm.org/citation.cfm?id=1267903.1267904},
acmid = {1267904},
publisher = {USENIX Association},
address = {Berkeley, CA, USA},
}
@inproceedings{DBLP:conf/jsspp/ZhangSSS04,
author = {Yanyong Zhang and
Mark S. Squillante and
Anand Sivasubramaniam and
Ramendra K. Sahoo},
title = {{Performance Implications of Failures in Large-Scale Cluster
Scheduling}},
year = {2004},
pages = {233-252},
ee = {http://dx.doi.org/10.1007/11407522_13},
crossref = {DBLP:conf/jsspp/2004},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/jsspp/2004,
editor = {Dror G. Feitelson and
Larry Rudolph and
Uwe Schwiegelshohn},
booktitle = {Job Scheduling Strategies for Parallel Processing, 10th
International Workshop, JSSPP 2004, New York, NY, USA, June
13, 2004, Revised Selected Papers},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = {3277},
year = {2005},
isbn = {3-540-25330-0},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{DBLP:conf/dsn/SchroederG06,
author = {Bianca Schroeder and
Garth A. Gibson},
title = {{A Large-Scale Study of Failures in High-Performance Computing
Systems}},
year = {2006},
pages = {249-258},
ee = {http://doi.ieeecomputersociety.org/10.1109/DSN.2006.5},
crossref = {DBLP:conf/dsn/2006},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/dsn/2006,
key = {dsn-2006},
title = {2006 International Conference on Dependable Systems and
Networks (DSN 2006), 25-28 June 2006, Philadelphia, Pennsylvania,
USA, Proceedings},
publisher = {IEEE Computer Society},
year = {2006},
isbn = {0-7695-2607-1},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{DBLP:conf/srds/Gray86,
author = {Jim Gray},
title = {{Why Do Computers Stop and What Can Be Done About It?}},
booktitle = {Symposium on Reliability in Distributed Software and Database
Systems},
year = {1986},
pages = {3-12},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{DBLP:conf/prdc/XuKI99,
author = {Jun Xu and
Zbigniew Kalbarczyk and
Ravishankar K. Iyer},
title = {{Networked Windows NT System Field Failure Data Analysis}},
year = {1999},
pages = {178-185},
ee = {http://doi.ieeecomputersociety.org/10.1109/PRDC.1999.816227},
crossref = {DBLP:conf/prdc/1999},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/prdc/1999,
booktitle = {1999 Pacific Rim International Symposium on Dependable Computing
(PRDC 1999), 16-17 December 1999, Hong Kong},
publisher = {IEEE Computer Society},
year = {1999},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@Article{sankaran05:_lam_mpi_check_restar_framew,
author = {Sriram Sankaran and Jeffrey M. Squyres and Brian Barrett and Andrew Lumsdaine and Jason Duell and Paul Hargrove and Eric Roman},
title = {{The {LAM/MPI} Checkpoint/Restart Framework: System-Initiated Checkpointing}},
journal = {International Journal of High Performance Computing Applications},
year = 2005,
volume = 19,
number = 4,
pages = {479--493},
month = {Winter},
keywords = {MPI, checkpoint/restart, rollback-recovery},
}
@inproceedings{Stellner:1996:CCP:645606.660853,
author = {Stellner, Georg},
title = {{CoCheck: Checkpointing and Process Migration for MPI}},
booktitle = {Proceedings of the 10th International Parallel Processing Symposium},
series = {IPPS '96},
year = {1996},
isbn = {0-8186-7255-2},
pages = {526--531},
numpages = {6},
url = {http://portal.acm.org/citation.cfm?id=645606.660853},
acmid = {660853},
publisher = {IEEE Computer Society},
address = {Washington, DC, USA},
}
@article{DBLP:journals/pc/GroppLDS96,
author = {William Gropp and
Ewing L. Lusk and
Nathan E. Doss and
Anthony Skjellum},
title = {{A High-Performance, Portable Implementation of the MPI Message
Passing Interface Standard}},
journal = {Parallel Computing},
volume = {22},
number = {6},
year = {1996},
pages = {789-828},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{DBLP:conf/parle/StellnerBLL94,
author = {Georg Stellner and
Arndt Bode and
Stefan Lamberts and
Thomas Ludwig 0002},
title = {{NXLib - A Parallel Programming Environment for Workstation
Clusters}},
year = {1994},
pages = {745-748},
crossref = {DBLP:conf/parle/1994},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/parle/1994,
editor = {Constantine Halatsis and
Dimitris G. Maritsas and
George Philokyprou and
Sergios Theodoridis},
booktitle = {PARLE '94: Parallel Architectures and Languages Europe,
6th International PARLE Conference, Athens, Greece, July
4-8, 1994, Proceedings},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = {817},
year = {1994},
isbn = {3-540-58184-7},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{DBLP:conf/srds/JanssensF94,
author = {Bob Janssens and
W. Kent Fuchs},
title = {{Reducing Interprocessor Dependence in Recoverable Distributed
Shared Memory}},
booktitle = {SRDS},
year = {1994},
pages = {34-41},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{Fagg:1997:SNI:509593.509617,
author = {Fagg, Graham E and Moore, Keith and Dongarra, Jack J and Geist, Al},
title = {{Scalable Networked Information Processing Environment (SNIPE)}},
booktitle = {Proceedings of the 1997 ACM/IEEE conference on Supercomputing (CDROM)},
series = {Supercomputing '97},
year = {1997},
isbn = {0-89791-985-8},
location = {San Jose, CA},
pages = {1--13},
numpages = {13},
url = {http://doi.acm.org/10.1145/509593.509617},
doi = {http://doi.acm.org/10.1145/509593.509617},
acmid = {509617},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {MetaComputing, RCDS, SNIPE, reliable, scalable, secure},
}
@inproceedings{Billings:2006:TDP:1159876.1159881,
author = {Billings, John and Sewell, Peter and Shinwell, Mark and Strni\v{s}a, Rok},
title = {{Type-Safe Distributed Programming for OCaml}},
booktitle = {Proceedings of the 2006 workshop on ML},
series = {ML '06},
year = {2006},
isbn = {1-59593-483-9},
location = {Portland, Oregon, USA},
pages = {20--31},
numpages = {12},
url = {http://doi.acm.org/10.1145/1159876.1159881},
doi = {http://doi.acm.org/10.1145/1159876.1159881},
acmid = {1159881},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {HashCaml, objective Caml, type-safe marshalling},
}
@inproceedings{DBLP:conf/sc/BouteillerCHKLM03,
author = {Aurelien Bouteiller and
Franck Cappello and
Thomas H{\'e}rault and
G{\'e}raud Krawezik and
Pierre Lemarinier and
Fr{\'e}d{\'e}ric Magniette},
title = {{MPICH-V2: a Fault Tolerant MPI for Volatile Nodes based
on Pessimistic Sender Based Message Logging}},
year = {2003},
pages = {25},
ee = {http://doi.acm.org/10.1145/1048935.1050176},
crossref = {DBLP:conf/sc/2003},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/sc/2003,
booktitle = {Proceedings of the ACM/IEEE SC2003 Conference on High Performance
Networking and Computing, 15-21 November 2003, Phoenix,
AZ, USA},
publisher = {ACM},
year = {2003},
isbn = {1-58113-695-1},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{DBLP:conf/icdcs/LitzkowLM88,
author = {Michael J. Litzkow and
Miron Livny and
Matt W. Mutka},
title = {{Condor - A Hunter of Idle Workstations}},
booktitle = {ICDCS},
year = {1988},
pages = {104-111},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@article{DBLP:journals/ppl/LoucaNLE00,
author = {Soulla Louca and
Neophytos Neophytou and
Adrianos Lachanas and
Paraskevas Evripidou},
title = {{MPI-FT: Portable Fault Tolerance Scheme for MPI}},
journal = {Parallel Processing Letters},
volume = {10},
number = {4},
year = {2000},
pages = {371-382},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{DBLP:conf/pvm/DewolfsBSF06,
author = {David Dewolfs and
Jan Broeckhove and
Vaidy S. Sunderam and
Graham E. Fagg},
title = {{FT-MPI, Fault-Tolerant Metacomputing and Generic Name Services:
A Case Study}},
year = {2006},
pages = {133-140},
ee = {http://dx.doi.org/10.1007/11846802_24},
crossref = {DBLP:conf/pvm/2006},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/pvm/2006,
editor = {Bernd Mohr and
Jesper Larsson Tr{\"a}ff and
Joachim Worringen and
Jack Dongarra},
booktitle = {Recent Advances in Parallel Virtual Machine and Message
Passing Interface, 13th European PVM/MPI User's Group Meeting,
Bonn, Germany, September 17-20, 2006, Proceedings},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = {4192},
year = {2006},
isbn = {3-540-39110-X},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{DBLP:conf/pvm/FaggD00,
author = {Graham E. Fagg and
Jack Dongarra},
title = {{FT-MPI: Fault Tolerant MPI, Supporting Dynamic Applications in a Dynamic World}},
year = {2000},
pages = {346-353},
ee = {http://dx.doi.org/10.1007/3-540-45255-9_47},
crossref = {DBLP:conf/pvm/2000},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/pvm/2000,
editor = {Jack Dongarra and
P{\'e}ter Kacsuk and
Norbert Podhorszki},
booktitle = {Recent Advances in Parallel Virtual Machine and Message
Passing Interface, 7th European PVM/MPI Users' Group Meeting,
Balatonf{\"u}red, Hungary, September 2000, Proceedings},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = {1908},
year = {2000},
isbn = {3-540-41010-4},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@INPROCEEDINGS{Aulwes03networkfault,
author = {Rob T. Aulwes and David J. Daniel and Nehal N. Desai and Richard L. Graham and L. Dean Risinger and Mitchel W. Sukalski and Mark A. Taylor},
title = {{Network Fault Tolerance in LA-MPI}},
booktitle = {In Proceedings of EuroPVM/MPI03},
year = {2003},
pages = {110--2}
}
@ARTICLE{Gropp02faulttolerance,
author = {William Gropp and Ewing Lusk},
title = {{Fault Tolerance in MPI Programs}},
journal = {Special issue of the Journal High Performance Computing Applications (IJHPCA},
year = {2002},
volume = {18},
pages = {363--372}
}
@inproceedings{DBLP:conf/haskell/EpsteinBJ11,
author = {Jeff Epstein and
Andrew P. Black and
Simon L. Peyton Jones},
title = {{Towards Haskell in the Cloud}},
year = {2011},
pages = {118-129},
ee = {http://doi.acm.org/10.1145/2034675.2034690},
crossref = {DBLP:conf/haskell/2011},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@article{DBLP:journals/tdsc/ElnozahyP04,
author = {E. N. Elnozahy and
James S. Plank},
title = {{Checkpointing for Peta-Scale Systems: A Look into the Future
of Practical Rollback-Recovery}},
journal = {IEEE Transactions on Dependable and Secure Computing},
volume = {1},
number = {2},
year = {2004},
pages = {97-108},
ee = {http://doi.ieeecomputersociety.org/10.1109/TDSC.2004.15},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@article{DBLP:journals/csur/ElnozahyAWJ02,
author = {E. N. Elnozahy and
Lorenzo Alvisi and
Yi-Min Wang and
David B. Johnson},
title = {{A Survey of Rollback-Recovery Protocols in Message-Passing
Systems}},
journal = {ACM Computing Surveys},
volume = {34},
number = {3},
year = {2002},
pages = {375-408},
ee = {http://doi.acm.org/10.1145/568522.568525},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@Unpublished{ch2-hiw2012,
author = {Duncan Coutts and Edsko de Vries},
title = {{Cloud Haskell 2.0}},
note = {Haskell Implementer Workshop. Conpenhagen, Denmark},
month = {September},
year = {2012}
}
@article{ISSAC,
author = {S. Linton and K. Hammond and A. Konovalov and C. Brown and
P.W. Trinder. and H-W. Loidl},
title = {{Easy Composition of Symbolic Computation Software using {SCSCP}:
A New Lingua Franca for Symbolic Computation}},
journal = {Journal of Symbolic Computation},
volume = {49},
pages = {95--119},
year = 2013,
note = {To appear}
}
@InProceedings{avizienis2000,
author = {Avizienis, Algirdas and Laprie, Jean-Claude and Randell, Brian and Vytautas},
keywords = {defect, definition, error, failure, fault},
title = {{Fundamental Concepts of Dependability}},
year = {2000},
pages = {7-12},
booktitle = {Proceedings of the 3rd IEEE Information Survivability
Workshop (ISW-2000), Boston, Massachusetts, USA},
month = {October}
}
@article{DBLP:journals/cacm/DeanG08,
author = {Jeffrey Dean and
Sanjay Ghemawat},
title = {{MapReduce: Simplified Data Processing on Large Clusters}},
journal = {Communications of the ACM},
volume = {51},
number = {1},
year = {2008},
pages = {107-113},
ee = {http://doi.acm.org/10.1145/1327452.1327492},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{DBLP:conf/mascots/AguilarH00,
author = {Jose Aguilar and
Marisela Hern{\'a}ndez},
title = {{Fault Tolerance Protocols for Parallel Programs Based on
Tasks Replication}},
year = {2000},
ee = {http://computer.org/proceedings/mascots/0728/07280397abs.htm},
crossref = {DBLP:conf/mascots/2000},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/mascots/2000,
booktitle = {MASCOTS 2000, Proceedings of the 8th International Symposium
on Modeling, Analysis and Simulation of Computer and Telecommunication
Systems, 29 August - 1 September 2000, San Francisco, California,
USA},
publisher = {IEEE Computer Society},
year = {2000},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@MISC{Narashimhan02trade-offsbetween,
author = {Priya Narashimhan},
title = {{Trade-Offs Between Real-Time and Fault Tolerance For Middleware Applications}},
year = {2002},
journal = {Workshop on Foundations of Middleware Technologies}
}
@inproceedings{Laprie85,
author = {Laprie, J. C.},
booktitle = {Proceedings of 15th International Symposium on Fault-Tolerant Computing (FTSC-15)},
citeulike-article-id = {9950242},
keywords = {dependability},
pages = {2--11},
posted-at = {2011-10-27 09:13:04},
priority = {2},
title = {{Dependable computing and fault tolerance: concepts and terminology}},
year = {1985}
}
@inproceedings{DBLP:conf/pdpta/BlochingerBH00,
author = {Wolfgang Blochinger and
Reinhard B{\"u}ndgen and
Andreas Heinemann},
title = {{Dependable High Performance Computing on a Parallel Sysplex
Cluster}},
year = {2000},
crossref = {DBLP:conf/pdpta/2000},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/pdpta/2000,
editor = {Hamid R. Arabnia},
booktitle = {Proceedings of the International Conference on Parallel
and Distributed Processing Techniques and Applications,
PDPTA 2000, June 24-29, 2000, Las Vegas, Nevada, USA},
publisher = {CSREA Press},
year = {2000},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@article{DBLP:journals/ibmsj/NickMCB97,
author = {Jeffrey M. Nick and
Brian B. Moore and
Jen-Yao Chung and
Nicholas S. Bowen},
title = {{S/390 Cluster Technology: Parallel Sysplex}},
journal = {IBM Systems Journal},
volume = {36},
number = {2},
year = {1997},
pages = {172-201},
ee = {http://dx.doi.org/10.1147/sj.362.0172},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{DBLP:conf/irregular/BlochingerKW98,
author = {Wolfgang Blochinger and
Wolfgang K{\"u}chlin and
Andreas Weber},
title = {{The Distributed Object-Oriented Threads System DOTS}},
year = {1998},
pages = {206-217},
ee = {http://dx.doi.org/10.1007/BFb0018540},
crossref = {DBLP:conf/irregular/1998},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@INPROCEEDINGS{Blochinger99a.:an,
author = {Wolfgang Blochinger and Wolfgang Küchlin and Christoph Ludwig and Andreas Weber},
title = {{An object-oriented platform for distributed high-performance Symbolic Computation}},
booktitle = {Mathematics and Computers in Simulation 49},
year = {1999},
pages = {161--178}
}
@proceedings{DBLP:conf/irregular/1998,
editor = {Afonso Ferreira and
Jos{\'e} D. P. Rolim and
Horst D. Simon and
Shang-Hua Teng},
booktitle = {Solving Irregularly Structured Problems in Parallel, 5th
International Symposium, IRREGULAR '98, Berkeley, California,
USA, August 9-11, 1998, Proceedings},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = {1457},
year = {1998},
isbn = {3-540-64809-7},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{DBLP:conf/dsn/ZagorodnovMAB03,
author = {Dmitrii Zagorodnov and
Keith Marzullo and
Lorenzo Alvisi and
Thomas C. Bressoud},
title = {{Engineering Fault-Tolerant TCP/IP Servers Using FT-TCP}},
year = {2003},
pages = {393-402},
ee = {http://doi.ieeecomputersociety.org/10.1109/DSN.2003.1209950},
crossref = {DBLP:conf/dsn/2003},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/dsn/2003,
booktitle = {2003 International Conference on Dependable Systems and
Networks (DSN 2003), 22-25 June 2003, San Francisco, CA,
USA, Proceedings},
publisher = {IEEE Computer Society},
year = {2003},
isbn = {0-7695-1952-0},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@book{DBLP:books/aw/Stevens94,
author = {W. Richard Stevens},
title = {{TCP/IP Illustrated, Volume 1: The Protocols}},
publisher = {Addison-Wesley},
year = {1994},
isbn = {0-201-63346-9},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@article{DBLP:journals/jise/EkwallUS03,
author = {Richard Ekwall and
P{\'e}ter Urb{\'a}n and
Andr{\'e} Schiper},
title = {{Robust TCP Connections for Fault Tolerant Computing}},
journal = {J. Inf. Sci. Eng.},
volume = {19},
number = {3},
year = {2003},
pages = {503-516},
ee = {http://www.iis.sinica.edu.tw/page/jise/2003/200305_07.html},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@techreport{ basu96solving,
author = {A. Basu and B. Charron-Bost and S. Toueg},
title = {{Solving Problems in the Presence of Process Crashes and Lossy Links}},
institution = {{C}ornell {U}niversity, {C}omputer {S}cience {D}epartment},
number = {TR96-1609},
year = {1996},
url = {citeseer.ist.psu.edu/basu96solving.html}
}
@inproceedings{DBLP:conf/wdag/BasuCT96,
author = {Anindya Basu and
Bernadette Charron-Bost and
Sam Toueg},
title = {{Simulating Reliable Links with Unreliable Links in the Presence
of Process Crashes}},
year = {1996},
pages = {105-122},
ee = {http://dx.doi.org/10.1007/3-540-61769-8_8},
crossref = {DBLP:conf/wdag/1996},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@proceedings{DBLP:conf/wdag/1996,
editor = {{\"O}zalp Babaoglu and
Keith Marzullo},
booktitle = {Distributed Algorithms, 10th International Workshop, WDAG
'96, Bologna, Italy},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = {1151},
year = {1996},
month = {October},
isbn = {3-540-61769-8},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@article{DBLP:journals/cacm/Armstrong10,
author = {Joe Armstrong},
title = {Erlang},
journal = {Communications of the ACM},
volume = {53},
number = {9},
year = {2010},
pages = {68-75},
ee = {http://doi.acm.org/10.1145/1810891.1810910},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@techreport{sd-erlang,
author = {RELEASE Project Team},
title = {{D3.1 (WP3): Scalable Reliable SD Erlang Design}},
year = {2012},
month = {June},
url = {http://www.release-project.eu/documents/D3.1.pdf}
}
@INPROCEEDINGS{wojciechowski2011thermal,
author={Wojciechowski, B. and Berezowski, K.S. and Patronik, P. and
Biernat, J.},