forked from jack-morrison/OLCF-Support
-
Notifications
You must be signed in to change notification settings - Fork 0
/
commonNodes
executable file
·70 lines (52 loc) · 2.53 KB
/
commonNodes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python3
# Find overlapping nodes from an arbitrary number of LSF jobs
import sys
import subprocess
import functools
def compareJobs(jobIDs):
if all(job.isdigit() for job in jobIDs):
print("\njobIDs: " + str(jobIDs))
jobs_and_nodes = {}
for job in jobIDs:
runline = ["/opt/ibm/csm/bin/csm_allocation_query", "-j", job]
csm_query_output = subprocess.run(runline, stdout=subprocess.PIPE)
csm_query_output = str(csm_query_output.stdout.decode()).splitlines()
alloc_ids = []
for line in csm_query_output:
if 'allocation' in line:
alloc_ids.append(line.split()[-1])
alloc = alloc_ids[-1]
print("JobID ", job, " allocation ID: ", alloc)
runline = ["/opt/ibm/csm/bin/csm_allocation_query_details", "-a", alloc]
csm_detail_output = subprocess.run(runline, stdout=subprocess.PIPE)
csm_detail_output = str(csm_detail_output.stdout.decode()).splitlines()
node_list = []
node_lines = []
flag = 0
for index, line in enumerate(csm_detail_output):
if line.startswith("num_steps:"):
junk, numsteps = line.split()
if 'step_id' in line:
# print("Splitting ", line)
step = line.split()[-1]
# print("Processing line ", step)
if step == numsteps:
flag = 1
if flag == 1:
if 'compute_nodes' in line:
node_lines.extend(csm_detail_output[index + 1:])
for line in node_lines[:-1]: # strip out the "..." at the end
node_list.append(line.split()[-1])
print("Compute nodes in last jobstep (step: ", numsteps, "): ", node_list)
jobs_and_nodes[job] = node_list
common_compute_nodes = functools.reduce(set.intersection, (set(val) for val in jobs_and_nodes.values()))
print("Common:", common_compute_nodes)
if (common_compute_nodes):
print("\nCompute nodes common between jobs:")
print("\n".join(common_compute_nodes) + "\n")
else:
print("\nThese jobs did not share any common compute nodes.\n")
else:
print("Error: Invalid input. 1 or more primary job IDs required\n usage: common_nodes primaryjobID1 [primaryjobID2 primaryjobID3 ...]")
if __name__ == "__main__":
compareJobs(sys.argv[1:])