-
Notifications
You must be signed in to change notification settings - Fork 0
/
single_comm_same_tag.c
139 lines (118 loc) · 4.63 KB
/
single_comm_same_tag.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/**
* Authors:
* - Agustin Navarro Torres
* - Marcos Canales Mayo
*/
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <float.h>
#define TAG 0
/**
* argv[1] = repetitions
* argv[2] = number of bursts
* argv[3] = packet size in bytes
*/
int main(int argc, char** argv)
{
int world_size, world_rank, name_len, i, j, n_bursts;
int senders_size, senders_rank;
int senders_group_excl_range[1][3];
int aux, repetitions;
long packet_size;
double start_time, end_time, *res_time;
double local_worst_lat = DBL_MIN, local_best_lat = DBL_MAX, local_mean_lat = 0;
double global_worst_lat, global_best_lat, global_mean_lat, bandwith;
void *dummy;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Status status;
MPI_Group world_group, senders_group;
MPI_Comm senders_comm;
// Get params
if (argc != 4) exit(1);
repetitions = atoi(argv[1]);
n_bursts = atoi(argv[2]);
packet_size = atoi(argv[3]);
// Initialize the MPI environment
MPI_Init(&argc, &argv);
// Allocate dummy memory
dummy = malloc(packet_size);
// Get the number of processes, rank and name
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Get_processor_name(processor_name, &name_len);
// Create senders communicator, needed to gather results
// first, last, stride: exclude even ranks
senders_group_excl_range[0][0] = 0;
senders_group_excl_range[0][1] = world_size - 1;
senders_group_excl_range[0][2] = 2;
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
MPI_Group_range_excl(world_group, 1, senders_group_excl_range, &senders_group);
MPI_Comm_create(MPI_COMM_WORLD, senders_group, &senders_comm);
if (senders_comm != MPI_COMM_NULL){
MPI_Comm_size(senders_comm, &senders_size);
MPI_Comm_rank(senders_comm, &senders_rank);
// Allocate memory on senders, to measure latency
res_time = malloc(repetitions*sizeof(double));
// Root process
if (senders_rank == 0) {
// Print info
printf("Number of Repetitions %d, Number of Bounces %d, Packet size %d bytes\n", repetitions, n_bursts, packet_size);
}
}
// Send and receive bursts
for (i = 0; i < repetitions; ++i)
{
if (world_rank%2)
{
// Senders measure latency
start_time = MPI_Wtime();
for (j = 0; j < n_bursts; ++j)
MPI_Send(dummy, packet_size, MPI_BYTE, world_rank - 1, TAG, MPI_COMM_WORLD);
MPI_Recv(&aux, 1, MPI_INT, world_rank - 1, TAG, MPI_COMM_WORLD, &status);
end_time = MPI_Wtime();
res_time[i] = end_time - start_time;
} else
{
for (j = 0; j < n_bursts; ++j)
MPI_Recv(dummy, packet_size, MPI_BYTE, world_rank + 1, TAG, MPI_COMM_WORLD, &status);
MPI_Send(&world_rank, 1, MPI_INT, world_rank + 1, TAG, MPI_COMM_WORLD);
}
}
// Reduce results
// Need to check MPI_COMM_NULL because some processes are not in the senders communicator group,
// which will raise an exception when calling MPI_Gather
if (senders_comm != MPI_COMM_NULL){
// Local vector reduction
for(i = 0; i < repetitions; i++){
local_mean_lat += res_time[i];
if (res_time[i] > local_worst_lat) local_worst_lat = res_time[i];
if (res_time[i] < local_best_lat) local_best_lat = res_time[i];
}
// Global reduction
// Root process calculates worst, best and mean latency
MPI_Reduce(&local_mean_lat, &global_mean_lat, 1, MPI_DOUBLE, MPI_SUM, 0, senders_comm);
MPI_Reduce(&local_worst_lat, &global_worst_lat, 1, MPI_DOUBLE, MPI_MAX, 0, senders_comm);
MPI_Reduce(&local_best_lat, &global_best_lat, 1, MPI_DOUBLE, MPI_MIN, 0, senders_comm);
// Root process in senders communicator is 0
if (senders_rank == 0){
bandwith = (repetitions*n_bursts*packet_size)/global_mean_lat;
global_mean_lat /= repetitions*senders_size;
// Print results
printf("Bandwith: %.2f MBytes/s\n", bandwith/(1024*1024));
printf("Mean latency per packet: %f\n", global_mean_lat);
printf("Worst latency per packet: %f\n", global_worst_lat);
printf("Best latency per packet: %f\n", global_best_lat);
}
}
// Free allocated memory
free(dummy);
if (senders_comm != MPI_COMM_NULL){
free(res_time);
MPI_Comm_free(&senders_comm);
}
MPI_Group_free(&senders_group);
// Finalize the MPI enviroment
MPI_Finalize();
return 0;
}