-
Notifications
You must be signed in to change notification settings - Fork 0
/
partial-psa-flow.py
105 lines (93 loc) · 3.78 KB
/
partial-psa-flow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from design_flow import *
import sys
import pprint
import copy
pp = pprint.PrettyPrinter(indent=2)
def informed_branch_decision(ast, data):
outer_loop = ast.query(select="fn{FunctionDecl}=>l{ForStmt}", where=lambda fn, l: fn.name == data['hotspot_fn_name'] and l.is_outermost())[0].l
if not data['loop_dep_report'][outer_loop.tag]['parallel']: # outer loop is not parallel
if data['arith_intensity_report']['intensity'] > 0.5:
print("CPU+FPGA")
return [2]
else:
print("SINGLE THREAD CPU")
return [0]
else: # outer loop is parallel
tripcounts = data['tripcount_report']
if data['arith_intensity_report']['intensity'] > 0.5:
if len(tripcounts) > 1: # if any inner loops
unrollable_inner_loops = [i for i in tripcounts if i != outer_loop.tag and not tripcounts[i]['fixed']]
if unrollable_inner_loops:
print("CPU+GPU")
return [3]
else:
print("CPU+FPGA")
return [2]
else: # no inner loops
print("CPU+FPGA, CPU+GPU")
return [2,3]
else:
print("MULTI-THREAD CPU")
return [1]
def branch_decision(ast, data):
if 'target' in data:
return data['target']
return informed_branch_decision(ast, data)
no_flow = DesignFlow('none')
# construct OpenMP multi-thread CPU design-flow branch
omp_flow = DesignFlow('omp')
omp_flow.add_pattern(multithread_parallel_loops)
# construct oneAPI CPU+FPGA design-flow branch
oneapi_flow = DesignFlow('oneapi', 'oneapi_fpga')
oneapi_flow.add_pattern(generate_oneapi_design)
oneapi_flow.add_pattern(employ_sp_fp_literals)
oneapi_flow.add_pattern(employ_sp_math_fns)
oneapi_flow.add_pattern(unroll_small_fixed_bound_loops)
# construct HIP CPU+GPU design-flow branch
hip_flow = DesignFlow('hip', 'hip_gpu')
hip_flow.add_pattern(generate_hip_design)
hip_flow.add_pattern(employ_sp_fp_literals)
hip_flow.add_pattern(employ_sp_math_fns)
hip_flow.add_pattern(employ_reciprocal_math_fns)
hip_flow.add_pattern(employ_hip_pinned_memory)
hip_flow.add_pattern(introduce_shared_mem_buffers)
# construct target independent design-flow branch
design_flow = DesignFlow('main')
design_flow.add_pattern(extract_hotspot, {'filter_fn': parallel_filter,'fn_name': 'kernel___','threshold': 0.4})
design_flow.add_pattern(remove_compound_assignment_deps)
design_flow.add_pattern(data_inout_analysis,{'exec_rule':'orig'})
design_flow.add_pattern(loop_tripcount_analysis,{'exec_rule':'orig'})
design_flow.add_pattern(arithmetic_intensity_analysis,{'exec_rule':'orig'})
design_flow.add_pattern(pointer_analysis)
design_flow.add_pattern(loop_dependence_analysis)
design_flow.add_branchpoint(branch_decision, [no_flow, omp_flow, oneapi_flow, hip_flow])
## run the PSA-flow
usage = ("Usage:\n artisan partial-psa-flow.py app_name <target>\n"
"app_name = adpredictor | nbody-sim | bezier-surface | rush-larsen | kmeans\n"
"target = all | cpu | gpu | fpga ")
if len(sys.argv) < 2:
print(usage)
exit()
app = sys.argv[1]
if app not in ['adpredictor', 'nbody-sim', 'bezier-surface', 'rush-larsen', 'kmeans']:
print(usage)
exit()
args = {}
target = "Auto-Selected Target"
if len(sys.argv) > 2:
if sys.argv[2] == 'all':
args['target'] = [1,2,3]
target = "CPU, FPGA, and GPU"
if sys.argv[2] == 'cpu':
args['target'] = [1]
target = "CPU"
if sys.argv[2] == 'fpga':
args['target'] = [2]
target = "FPGA"
if sys.argv[2] == 'gpu':
args['target'] = [3]
target = "GPU"
src = f'cpp_apps/{app}/main.cpp'
dest = f'gen/{app}'
print(f"Running the partial PSA-flow on {app} for {target}...")
final_ast = design_flow.run(src, dest, args=args)