From 220c2bfbeba7dfbfeb77bf3287827ad940a1c267 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Wed, 8 May 2024 10:28:30 -0400 Subject: [PATCH 1/3] Do not create file if not writing --- CHANGELOG.md | 1 + benchmarks/io/checkpoint_simulator/README.md | 6 +-- .../checkpoint_simulator.F90 | 40 +++++++++---------- 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 757a80647b1d..77ec80229c15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Update `components.yaml` - ESMA_cmake v3.45.1 - Fix bug in meson detection +- Updated `checkpoint_simulator` to not create and close file if not writing ### Fixed diff --git a/benchmarks/io/checkpoint_simulator/README.md b/benchmarks/io/checkpoint_simulator/README.md index d2cba319adc8..4466e69af71f 100644 --- a/benchmarks/io/checkpoint_simulator/README.md +++ b/benchmarks/io/checkpoint_simulator/README.md @@ -5,7 +5,7 @@ The code has the following options and needs an ESMF rc file named checkpoint\_b - "NX:" the x distribution for each face - "NY:" the y distribution for each face - "IM\_WORLD:" the cube resolution -- "LM:" the nubmer of levels +- "LM:" the number of levels - "NUM\_WRITERS:" the number of writing processes either to a single or independent files - "NUM\_ARRAYS:" the number of 3D variables to write to the file - "CHUNK:" whether to chunk, default true @@ -13,7 +13,7 @@ The code has the following options and needs an ESMF rc file named checkpoint\_b - "SPLIT\_FILE:" default false, if true, each writer writes to and independent file - "WRITE\_BARRIER:" default false, add a barrier before each write to for synchronization - "DO\_WRITES:" default true, if false skips writing (so just an mpi test at that point) -- "NTRIAL:" default 1, the number of trials to make writing +- "NTRIALS:" default 1, the number of trials to make writing - "RANDOM\_DATA:" default true, if true will arrays with random data, if false sets the array to the rank of the process -Note that whatever you set NX and NY to the program must be run on 6*NY*NY processors and the number of writers must evenly divide 6*NY +Note that whatever you set NX and NY to the program must be run on `6*NX*NY` processors and the number of writers must evenly divide `6*NY` diff --git a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 index c82f395c3c11..d98d243b4839 100644 --- a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 +++ b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 @@ -55,7 +55,7 @@ module mapl_checkpoint_support_mod procedure :: write_level procedure :: write_variable procedure :: reset - end type + end type contains @@ -98,7 +98,7 @@ subroutine set_parameters(this,config_file) this%mpi_time = 0.0 call MPI_COMM_SIZE(MPI_COMM_WORLD,comm_size,status) if (comm_size /= (this%nx*this%ny*6)) call MPI_Abort(mpi_comm_world,error_code,status) - + contains function get_logical_key(config,label,default_val) result(val) @@ -115,7 +115,7 @@ function get_logical_key(config,label,default_val) result(val) val = default_val end if end function - + function get_integer_key(config,label,default_val) result(val) integer :: val type(ESMF_Config), intent(Inout) :: config @@ -130,7 +130,7 @@ function get_integer_key(config,label,default_val) result(val) val = default_val end if end function - + end subroutine subroutine reset(this) @@ -144,7 +144,7 @@ subroutine reset(this) this%time_writing = 0.d0 this%mpi_time = 0.0 end subroutine - + function compute_decomposition(this,axis) result(decomp) integer, allocatable :: decomp(:) class(test_support), intent(inout) :: this @@ -172,7 +172,7 @@ subroutine allocate_n_arrays(this,im,jm) class(test_support), intent(inout) :: this integer, intent(in) :: im integer, intent(in) :: jm - + integer :: n,rank,status character(len=3) :: formatted_int integer :: seed_size @@ -201,7 +201,7 @@ subroutine create_arrays(this) integer, allocatable :: ims(:),jms(:) integer :: rank, status,comm_size,n,i,j,rank_counter,offset,index_offset - call MPI_Comm_Rank(MPI_COMM_WORLD,rank,status) + call MPI_Comm_Rank(MPI_COMM_WORLD,rank,status) call MPI_Comm_Size(MPI_COMM_WORLD,comm_size,status) allocate(this%bundle(this%num_arrays)) ims = this%compute_decomposition(axis=1) @@ -244,13 +244,13 @@ subroutine create_arrays(this) rank_counter = rank_counter + 1 enddo enddo - enddo - + enddo + end subroutine subroutine create_communicators(this) class(test_support), intent(inout) :: this - + integer :: myid,status,nx0,ny0,color,j,ny_by_writers,local_ny,key local_ny = this%ny*6 @@ -280,7 +280,7 @@ subroutine create_communicators(this) call MPI_BARRIER(mpi_comm_world,status) - + end subroutine subroutine close_file(this) @@ -344,7 +344,7 @@ subroutine create_file(this) status = nf90_def_dim(this%ncid,"lon",this%im_world,xdim) if (this%split_file) then y_size = this%im_world*6/this%num_writers - else + else y_size = this%im_world*6 end if status = nf90_def_dim(this%ncid,"lat",y_size,ydim) @@ -384,7 +384,7 @@ subroutine create_file(this) subroutine write_file(this) class(test_support), intent(inout) :: this integer :: status,i,l - + integer(kind=INT64) :: sub_start,sub_end call MPI_BARRIER(MPI_COMM_WORLD,status) @@ -619,7 +619,7 @@ subroutine write_level(this,var_name,local_var,z_index) io_time = end_time-start_time this%data_volume = this%data_volume+byte_to_mega*4.d0*size(var,kind=INT64) this%time_writing = this%time_writing + real(io_time,kind=REAL64)/real(count_rate,kind=REAL64) - + deallocate(VAR, stat=status) endif ! myiorank @@ -662,7 +662,7 @@ program checkpoint_tester call support%set_parameters("checkpoint_benchmark.rc") call MPI_Barrier(MPI_COMM_WORLD,status) - call support%create_arrays() + if (support%do_writes) call support%create_arrays() call MPI_Barrier(MPI_COMM_WORLD,status) call support%create_communicators() @@ -676,13 +676,13 @@ program checkpoint_tester call system_clock(count=start_write) call MPI_Barrier(MPI_COMM_WORLD,status) - call support%create_file() + if (support%do_writes) call support%create_file() call MPI_Barrier(MPI_COMM_WORLD,status) call support%write_file() call MPI_Barrier(MPI_COMM_WORLD,status) - call support%close_file() + if (support%do_writes) call support%close_file() call MPI_Barrier(MPI_COMM_WORLD,status) call system_clock(count=end_time) @@ -707,7 +707,7 @@ program checkpoint_tester all_proc_throughput(i) = real(support%num_writers,kind=REAL32)*average_volume/average_time end if enddo - + call system_clock(count=end_app) application_time = real(end_app - start_app,kind=REAL64)/real(count_rate,kind=REAL64) if (rank == 0) then @@ -741,7 +741,7 @@ program checkpoint_tester std_fs_throughput = sqrt(std_fs_throughput/real(support%n_trials,kind=REAL64)) write(*,'(G16.8,G16.8,G16.8,G16.8)')mean_throughput,std_throughput,mean_fs_throughput,std_fs_throughput end if - - + + call MPI_Finalize(status) end program From 68a0bd72ff69e2b80be665912e6d96ad82922740 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Thu, 9 May 2024 05:04:32 -0700 Subject: [PATCH 2/3] Fix bug. need to create arrays --- benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 index d98d243b4839..f2d257c21020 100644 --- a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 +++ b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 @@ -662,7 +662,7 @@ program checkpoint_tester call support%set_parameters("checkpoint_benchmark.rc") call MPI_Barrier(MPI_COMM_WORLD,status) - if (support%do_writes) call support%create_arrays() + call support%create_arrays() call MPI_Barrier(MPI_COMM_WORLD,status) call support%create_communicators() From c9975901a8225ac469a2389dc402fdd6e57b7264 Mon Sep 17 00:00:00 2001 From: Matt Thompson Date: Mon, 13 May 2024 15:19:14 -0400 Subject: [PATCH 3/3] Update CHANGELOG.md --- CHANGELOG.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e139721f792c..5633f40a207e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,9 +11,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- Update `components.yaml` - - ESMA_cmake v3.45.1 - - Fix bug in meson detection - Updated `checkpoint_simulator` to not create and close file if not writing ### Fixed