-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
executable file
·440 lines (369 loc) · 15 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
#!/usr/bin/python3
import os
import csv
import copy
import toml
import bazel
import argparse
import graphviz
from pathlib import Path
FAKE_ROOT = 'fake-root'
RED = (255, 0, 0)
YELLOW = (255, 255, 0)
def read(path):
with open(path, 'r') as f:
return f.read()
def dev_name(name):
return f'{name}-[dev]'
def build_graph(source_dir, skip_3rd_party, dev_dependencies, count_missing, force_migrated):
force_migrated = set(force_migrated)
# Collect Cargo.toml paths.
data = [
{
'cargo_path': path,
'cargo_toml': toml.loads(read(path)),
'bazel_path': None,
}
for path in Path(source_dir).rglob('Cargo.toml')
]
# Collect BUILD.bazel paths.
for entry in data:
path = str(entry['cargo_path']).replace('Cargo.toml', 'BUILD.bazel')
if os.path.exists(path):
entry['build_bazel'] = bazel.loads(read(path))
# Collect all package names.
if skip_3rd_party:
packages = [
x.get('cargo_toml', {}).get('package', {}).get('name')
for x in data
]
packages = set([x for x in packages if x is not None])
# Build graph.
graph = {}
for entry in data:
info = entry.get('cargo_toml', {})
package_name = info.get('package', {}).get('name')
if package_name is None:
continue
build_bazel = entry.get('build_bazel', [])
# Calculate children packages.
children = list(info.get('dependencies', {}).keys())
# Skip 3rd party package dependencies.
if skip_3rd_party:
children = [x for x in children if x in packages]
children = sorted(children, reverse=False) # Stabilaze data.
bazelized = bazel.is_bazelized_bin_or_lib(package_name, build_bazel)
is_force_migrated = package_name in force_migrated
graph[package_name] = {
'bazelized': bazelized or is_force_migrated,
'children': children,
'force_migrated': is_force_migrated,
}
# Calculate children DEV packages.
package_name_dev = dev_name(package_name)
children_dev = list(info.get('dev-dependencies', {}).keys())
if dev_dependencies and len(children_dev) > 0:
# Skip 3rd party package dependencies.
if skip_3rd_party:
children_dev = [x for x in children_dev if x in packages]
# Stabilaze data.
children_dev = sorted(children_dev, reverse=False)
bazelized = bazel.is_bazelized_test(package_name, build_bazel)
is_force_migrated = package_name_dev in force_migrated
graph[package_name_dev] = {
'bazelized': bazelized or is_force_migrated,
'children': children_dev,
'force_migrated': is_force_migrated,
}
graph[package_name_dev]['children'] += [package_name]
# Count missing Cargo attributes in Bazel files.
if count_missing:
def has_rule(bazel_rules, rule_types, name):
for bazel_rule in bazel_rules:
is_name_found = name in bazel_rule.get(
'name', []) or name in bazel_rule.get('crate_name', [])
if bazel_rule.get('rule') in rule_types and is_name_found:
return True
return False
# Count missing `bin`.
missing_count = 0
missing_count_dev = 0
# Unfold array of tables `[[bin]]`.
blocks = entry['cargo_toml'].get('bin')
if blocks is not None:
for block in blocks:
name = block.get('name')
if name is None:
continue
if not has_rule(build_bazel, ['rust_binary', 'rust_canister'], name):
if block.get('path', '').startswith('test/'):
missing_count_dev += 1
else:
missing_count += 1
graph[package_name]['missing bin'] = missing_count
if graph.get(package_name_dev):
graph[package_name_dev]['missing bin'] = missing_count_dev
# Count missing `lib`.
missing_count = 0
missing_count_dev = 0
block = entry['cargo_toml'].get('lib')
if block is not None:
if name := block.get('name'):
if not has_rule(build_bazel, ['rust_library'], name):
if block.get('path', '').startswith('test/'):
missing_count_dev += 1
else:
missing_count += 1
graph[package_name]['missing lib'] = missing_count
if graph.get(package_name_dev):
graph[package_name_dev]['missing lib'] = missing_count_dev
# Count missing `bench`.
missing_count = 0
missing_count_dev = 0
# Unfold array of tables `[[bench]]`.
blocks = entry['cargo_toml'].get('bench')
if blocks is not None:
for block in blocks:
if name := block.get('name'):
is_missing = not has_rule(build_bazel, ['rust_binary'], name) and not has_rule(
build_bazel, ['rust_binary'], f'{package_name}_bench')
if is_missing:
# Benches are DEV dependencies always.
missing_count_dev += 1
graph[package_name]['missing bench'] = missing_count
if graph.get(package_name_dev):
graph[package_name_dev]['missing bench'] = missing_count_dev
return graph
def mark_subtree(graph, current, target, path, is_found=False):
info = graph.get(current)
if info is None or info.get('traversing_status') == 'found':
return
path += [current] # Add current to path.
if info.get('traversing_status') == 'searching':
raise ValueError(f'Unexpected graph cycle, see path: {path}')
info['traversing_status'] = 'searching'
is_found = is_found or current == target
for child in info.get('children', []):
mark_subtree(graph, child, target, path, is_found)
info['traversing_status'] = 'found' if is_found else 'not-found'
path.pop() # Remove current from path.
def remove_unwanted_nodes(graph):
subtree = {}
for package_name in graph:
if graph[package_name].get('traversing_status') == 'found':
subtree[package_name] = copy.deepcopy(graph[package_name])
del subtree[package_name]['traversing_status']
return subtree
def extract_subtree(graph, target_package):
# Add dev-node on top if exists.
new_root = dev_name(target_package)
if graph.get(new_root) is not None:
target_package = new_root
# Get roots.
roots = set(graph.keys())
for package_name in graph:
for child in graph[package_name]['children']:
roots.discard(child)
# Link all the roots to a fake root.
graph[FAKE_ROOT] = {'children': roots}
# print(f'Root nodes linked to "{FAKE_ROOT}": {len(roots)}')
all_packages_keywords = [
'None',
'none',
'default',
'.',
'all',
'',
]
if str(target_package).strip() in all_packages_keywords:
return graph
# Extract target package subtree.
path = []
mark_subtree(graph, FAKE_ROOT, target_package, path)
subtree = remove_unwanted_nodes(graph)
# Add fake root to a subtree.
subtree[FAKE_ROOT] = {'children': [target_package]}
return subtree
def calculate_progress(graph):
bazel_n = sum([1 for x in graph if graph[x].get('bazelized') is True])
total = len(graph.keys())
ratio = bazel_n / total
return (bazel_n, total, ratio)
def add_height(graph, current):
info = graph.get(current)
height = -1
# Skip packages with Bazel.
if info is None or info.get('bazelized', False):
return height
for child in info.get('children', []):
height = max(height, add_height(graph, child))
result = height + 1
# Skip fake root node.
if current != FAKE_ROOT:
info['height'] = result
return result
def add_parent_count(graph):
# Count number of parents for each child.
counter = {}
for package_name in graph:
# Skip fake root node.
if package_name == FAKE_ROOT:
continue
for child in graph[package_name].get('children', []):
if counter.get(child) is None:
counter[child] = 0
counter[child] += 1
# Add parent count data to each node.
for package_name in graph:
# Skip fake root node.
if package_name == FAKE_ROOT:
continue
graph[package_name]['parent_count'] = counter.get(package_name, 0)
def interpolate_rgb(rgb_lo, rgb_hi, param):
def interpolate(a, b, param):
return int(a * (1 - param) + b * param)
r = interpolate(rgb_lo[0], rgb_hi[0], param)
g = interpolate(rgb_lo[1], rgb_hi[1], param)
b = interpolate(rgb_lo[2], rgb_hi[2], param)
return f'#{r:02X}{g:02X}{b:02X}'
def add_height_color(graph, color_lo, color_hi):
max_height = max([graph[x].get('height', -1) for x in graph])
if max_height is None or max_height <= 0:
return
for package_name in graph:
info = graph[package_name]
height = info.get('height')
if height is None:
continue
param = height / max_height
info['color'] = interpolate_rgb(color_lo, color_hi, param)
def to_graphviz(graph):
nodes_n = len(graph.keys())
edges_n = sum([len(graph[x].get('children', [])) for x in graph])
print(f'Plotting {nodes_n} nodes with {edges_n} edges...')
dot = graphviz.Digraph()
# Create nodes.
for package_name in graph.keys():
# Skip fake root node.
if package_name == FAKE_ROOT:
continue
node_text = f'{package_name}'
fillcolor = 'grey' # default
# Display height.
height = graph[package_name].get('height')
if height is not None:
node_text += f'\nheight:{height}'
# Display parent count.
parents = graph[package_name].get('parent_count')
if parents is not None:
node_text += f'\nparents:{parents}'
# Display bazel status and color.
if graph[package_name].get('bazelized'):
node_text += f'\nbazel:yes'
fillcolor = 'green'
# Display not converted node color.
color = graph[package_name].get('color')
if color is not None:
# node_text += f'\ncolor:{color}'
fillcolor = color
dot.node(package_name, node_text, style='filled', fillcolor=fillcolor)
# Create edges.
for package_name in graph.keys():
# Skip fake root node.
if package_name == FAKE_ROOT:
continue
for child in graph[package_name].get('children', []):
dot.edge(package_name, child)
# print(dot.source) # DEBUG
return dot
def write_csv(graph, path):
# Generate table.
data = []
for package_name in graph:
# Skip fake root node.
if package_name == FAKE_ROOT:
continue
info = graph[package_name]
data.append({
'name': package_name,
'bazel': 'yes' if info.get('bazelized') else 'no',
'height': info.get('height'),
'parents': info.get('parent_count'),
'missing bin': info.get('missing bin'),
'missing lib': info.get('missing lib'),
'missing bench': info.get('missing bench'),
'forced': 'yes' if info.get('force_migrated') else 'no',
})
# Sort by name (asc).
data = sorted(data, key=lambda x: x['name'], reverse=False)
# Sort by parents (desc).
data = sorted(data, key=lambda x: x['parents']
if x['parents'] is not None else 0, reverse=True)
# Sort by forced (asc).
data = sorted(data, key=lambda x: x['forced'], reverse=False)
# Sort by height (asc, empty at the bottom).
MAX_HEIGHT = 1000*1000*1000
data = sorted(
data, key=lambda x: x['height'] if x['height'] is not None else MAX_HEIGHT, reverse=False)
# Write to file.
with open(path, 'w+') as f:
columns = data[0].keys()
writer = csv.DictWriter(f, columns)
writer.writeheader()
writer.writerows(data)
def str2bool(v):
if isinstance(v, bool):
return v
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
def main():
# Parse agruments.
parser = argparse.ArgumentParser()
parser.add_argument(
'-sd', '--source_dir', help='source directory', default='../ic/rs/')
parser.add_argument('-rp', '--root_package',
help='root package', default=None)
parser.add_argument(
'-gp', '--graphviz_path', help='graphviz output files', default='./output/graph.gv')
parser.add_argument(
'-gv', '--graphviz_view', help='graphviz view', type=str2bool, default=False)
parser.add_argument(
'-csv', '--csv_path', help='CSV output file', default='./output/packages.csv')
parser.add_argument(
'-s3p', '--skip_3rd_party', help='skip 3rd party package dependencies', type=str2bool, default=True)
parser.add_argument(
'-dev', '--dev_dependencies', help='show dev-dependencies', type=str2bool, default=True)
parser.add_argument(
'-mis', '--count_missing', help='count missing Cargo attributes in Bazel files', type=str2bool, default=False)
parser.add_argument(
'-f', '--force_migrated_file', help='input file with a list of packages, considered migrated', default='./force_migrated.txt')
args = parser.parse_args()
# Print header.
dev = '-dev' if args.dev_dependencies else ''
print('')
print(f'Root package: {args.root_package}{dev}')
# Read list of packages that are considered migrated.
force_migrated = read(args.force_migrated_file).strip().split('\n')
# Generate graph of package dependencies.
graph = build_graph(
args.source_dir, skip_3rd_party=args.skip_3rd_party, dev_dependencies=args.dev_dependencies,
count_missing=args.count_missing, force_migrated=force_migrated)
subtree = extract_subtree(graph, args.root_package)
bazel_n, total, ratio = calculate_progress(subtree)
print(
f'Packages with bazel / no bazel / total / progress: {bazel_n} / {total-bazel_n} / {total} / {100*ratio:>5.01f}%')
# Calculate attributes (height, parents, color).
add_height(subtree, FAKE_ROOT)
add_height_color(subtree, RED, YELLOW)
add_parent_count(subtree)
# Write CSV output.
write_csv(subtree, args.csv_path)
# Generate Graphviz.
dot = to_graphviz(subtree)
dot.render(args.graphviz_path, view=args.graphviz_view)
if __name__ == '__main__':
main()