-
Notifications
You must be signed in to change notification settings - Fork 0
/
metaprograms.py
277 lines (257 loc) · 14.3 KB
/
metaprograms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
#!/usr/bin/env artisan
from meta_cl import *
from profilers import *
from util import *
import numpy as np
def identify_hotspot_loops(ast, threshold, filter_fn=outermost_filter, debug=False):
profiler = LoopTimeProfiler(ast)
profiler.run(debug=debug,filter_fn=filter_fn)
print(profiler.data)
app_time = profiler.data['main_fn']
profiler.data.pop('main_fn')
hotspot_candidates = [(loop,time) for (loop,time) in profiler.data.items() if time > app_time*threshold]
hotspot_candidates.sort(key=lambda l: l[1], reverse=True)
return hotspot_candidates
def inline_functions_with_pointer_args(ast, fn):
to_inline = []
calls = fn.query('c{CallExpr}')
for row in calls:
if "operator=" in row.c.name:
continue # operator calls are caught here, ignore
for arg in row.c.args:
if arg.query('r{DeclRefExpr}', where=lambda r: pointer_type(r.type)):
to_inline.append(row.c) # if call has any pointer args, inline
break
for call in to_inline:
inline_fn(ast, fn, call)
def analyse_tripcounts(ast, fn_name, debug=False, exec_rule=''):
fn = ast.query('fn{FunctionDecl}', where=lambda fn: fn.name == fn_name)[0].fn
loops = fn.query(select="(loop{ForStmt})")
# statically check if any tripcounts are fixed
fixed_bound_loops = []
for row in loops:
info = get_loop_info(row.loop)
# TODO: currently assumes simple loop incremement condition (idx++/idx--)
if info['start'].isdigit() and info['end'].isdigit():
fixed_bound_loops.append(row.loop.tag)
# dynamically analyse tripcounts
profiler = LoopTripCountProfiler(ast, fn_name)
profiler.run(debug=debug, exec_rule=exec_rule)
# add static info to profiler results
for tag in profiler.data:
profiler.data[tag]['fixed'] = False
if tag in fixed_bound_loops:
profiler.data[tag]['fixed'] = True
return profiler.data
def run_data_inout_analysis(ast, fn_name, debug=False, exec_rule=''):
profiler = DataInOutProfiler(ast)
profiler.run(fn_name,debug=debug, exec_rule=exec_rule)
return profiler.data
def memory_footprint_analysis(ast, fn_name, debug=False, exec_rule=''):
profiler = MemoryFootprintProfiler(ast)
profiler.run(fn_name, debug=debug, exec_rule=exec_rule)
return profiler.data
math_fn_to_flops = {'sin': 2, 'cos': 2, 'tan': 2, 'sincos': 2, 'log': 2, 'log2': 2, 'log10': 2, 'exp': 2, 'exp2': 2, 'exp10': 2, 'pow': 2, 'sqrt': 2, 'rsqrt' : 2, 'fabs': 2, 'sinf': 2, 'cosf': 2, 'tanf': 2, 'sincosf': 2, 'logf': 2, 'log2f': 2, 'log10f': 2, 'expf': 2, 'exp2f': 2, 'exp10f': 2, 'powf': 2, 'sqrtf': 2, 'rsqrtf' : 2, 'fabsf': 2, 'floor': 2}
def count_flops_basecase(scope, tripcounts, fn_flop_map):
# count basic fp ops (+, -, /, *, ...)
ops = scope.query("(uop{UnaryOperator}|bop{BinaryOperator}|cop{CompoundAssignmentOperator})")
flop_count = 0
for row in ops:
op = row.uop if row.uop else row.bop if row.bop else row.cop
if len(op.children) < 2 or op.symbol == '=' or op.type.spelling == 'bool':
continue
if op.type.spelling == 'float' or op.type.spelling == 'double':
flop_count += 1
# count ops based on calls to math fns
math_fn_calls = scope.query('c{CallExpr}', where=lambda c: c.name in math_fn_to_flops)
for row in math_fn_calls:
flop_count += math_fn_to_flops[row.c.name]
# count ops in any called fns (recursively check called fns, build map for quick lookup)
other_fn_calls = scope.query('c{CallExpr}', where=lambda c: c.name not in math_fn_to_flops and c.name != 'operator=')
for row in other_fn_calls:
fn_name = row.c.name
if not fn_name:
continue
if fn_name not in fn_flop_map:
fn = scope.ast.query('fn{FunctionDecl}', where=lambda fn: fn.name == fn_name)
if fn:
fn_flop_map[fn_name] = count_flops(fn[0].fn.body, 0, tripcounts, fn_flop_map)
else:
print("Can't count flops for %s, can't find function decl." % fn_name)
continue
flop_count += fn_flop_map[fn_name]
return flop_count
def count_flops(scope, count, tripcounts, fn_flop_map):
for child in scope.children:
if child.isentity('ForStmt') or child.isentity('WhileStmt'):
if child.tag in tripcounts:
trip_count = tripcounts[child.tag]['average']
count += trip_count * count_flops(child.body, 0, tripcounts, fn_flop_map)
else:
print("Can't determine trip count of loop %s, skipping." % child.location)
else:
count += count_flops_basecase(child, tripcounts, fn_flop_map)
return count
def calculate_arithmetic_intensity(ast, fn_name, tripcounts=None, memory_footprint=None, exec_rule=''):
if not tripcounts:
tripcounts = loop_tripcount_analysis(ast, fn_name, exec_rule=exec_rule)
# recursively count flops
results = ast.query('fn{FunctionDecl}', where=lambda fn: fn.name == fn_name)
if not results:
return
flops = count_flops(results[0].fn.body, 0, tripcounts, {})
# get memory footprint
if not memory_footprint:
memory_footprint = memory_footprint_analysis(ast, fn_name, exec_rule=exec_rule)
bytes_accessed = memory_footprint['__TOTAL__']['bytes_R'] + memory_footprint['__TOTAL__']['bytes_W']
# calculate and return flops/B
return {'flops': flops, 'bytes': bytes_accessed, 'intensity': float(flops/bytes_accessed)}
def analyse_loop_dependencies(ast, fn_name):
loops = ast.query(select="fn{FunctionDecl}=>l{ForStmt}", where=lambda fn: fn.name == fn_name)
loop_deps = {}
for row in loops:
reads, writes, schedule = build_polyhedral_model(row.l)
if not schedule:
return
raw_deps, war_deps, waw_deps = analyse_loop_deps(reads, writes, schedule)
loop_deps[row.l.tag] = {'raw': raw_deps, 'war': war_deps, 'waw': waw_deps}
loop_deps[row.l.tag]['parallel'] = is_loop_parallel(raw_deps, war_deps, waw_deps,get_loop_info(row.l)['idx'])
if not writes or not reads:
loop_deps[row.l.tag]['parallel'] = True
return loop_deps
def pointer_alias_analysis(ast, fn_name, debug=False):
# TODO: handle memcpy accesses
# find the function and associated pointer parameters
fn = ast.query("fn{FunctionDecl}", where=lambda fn: fn.name == fn_name)
if not fn or len(fn) > 1:
exit(1)
fn = fn[0].fn
pointer_params = [p[1] for p in fn.signature.params if pointer_type(p[0])]
# instrument and execute code to trace all pointers in specified function
profiler = PointerRangeProfiler(ast, fn.name)
profiler.run(debug=debug)
points_to_map = build_points_to_map(profiler.data)
# check if any pointer parameters may alias according to the points-to map
pairs = [(p1,p2) for p1 in pointer_params for p2 in pointer_params if p1 != p2]
aliasing_pairs = [is_reachable(p[0],p[1],points_to_map) for p in pairs]
alias_pairs = [pairs[idx] for idx in [i for i, x in enumerate(aliasing_pairs) if x == True]]
# return pairs of parameters that may alias
return alias_pairs
def remove_loop_arr_deps(ast, kernel_fn, arr_var):
loops = [row.loop for row in ast.query("f{FunctionDecl} => loop{ForStmt}", where=lambda f: f.name == kernel_fn)]
loops.sort(key=lambda l: l.depth, reverse=True)
depth = loops[0].depth
refs = ast.query("f{FunctionDecl} => loop{ForStmt} => arr{ArraySubscriptExpr}", where=lambda f, arr, loop: f.name == kernel_fn and arr.children[0].name == arr_var and loop.depth == depth)
while refs:
remove_loop_arr_dep_with_scalar(refs[0].loop, refs[0].arr)
ast.commit()
refs = ast.query("f{FunctionDecl} => loop{ForStmt} => arr{ArraySubscriptExpr}", where=lambda f, arr, loop: f.name == kernel_fn and arr.children[0].name == arr_var and loop.depth == depth)
#TODO: currently assumes only one reference in loop (check if var decl already exists)
def remove_loop_arr_dep_with_scalar(loop, arr_ref):
if arr_ref.parent.isentity('MemberRefExpr'):
arr_ref = arr_ref.parent
new_var = "_"+arr_ref.unparse().replace("]","").replace("[","").replace(".","")+"_"
# initialise new scalar variable
initialise_var = f"{arr_ref.type.spelling} {new_var} = {arr_ref.unparse()};\n"
loop.instrument(Action.before, code=initialise_var)
# replace references to var in loop
arr_ref.instrument(Action.replace, code=new_var)
# assign value back to array variable
assign_value = f"\n{arr_ref.unparse()} = {new_var};"
loop.instrument(Action.after, code=assign_value)
def use_sp_math_functions(ast, fns):
math_funcs = ['sin', 'cos', 'tan', 'sincos', 'log', 'log2', 'log10', 'exp', 'exp2', 'exp10', 'pow', 'sqrt', 'rsqrt', 'fabs']
calls = ast.query('fn{FunctionDecl} => c{CallExpr}', where=lambda c,fn: c.name in math_funcs and fn.name in fns)
while calls:
call_descendents = [[des.id for des in row.c.descendants] for row in calls]
call_descendents = [item for sublist in call_descendents for item in sublist]
for call in calls:
if call.c.id in call_descendents:
continue
arg_string = ",".join([arg.unparse() for arg in call.c.args])
call.c.instrument(Action.replace, code=f"{call.c.name}f({arg_string})")
ast.commit()
calls = ast.query('fn{FunctionDecl} => c{CallExpr}', where=lambda c,fn: c.name in math_funcs and fn.name in fns)
def use_sp_fp_literals(ast, fns):
funcs = ast.query('fn{FunctionDecl}', where=lambda fn: fn.name in fns)
for row in funcs:
func = row.fn
fp_literals = func.body.query('l{FloatingLiteral}', where=lambda l: l.unparse().strip()[-1] != 'f')
for row in fp_literals:
row.l.instrument(Action.replace, code=row.l.unparse().strip()+'f')
int_literals = func.body.query("il{IntegerLiteral}", where=lambda il: il.parent.type.spelling == 'float' or il.parent.type.spelling == 'double')
for row in int_literals:
row.il.instrument(Action.replace, code=f"{row.il.unparse()}.0f")
def add_parameter(params, ref):
params.append({'name': ref.name, 'type': ref.type.spelling})
def derive_parameter_list(loop):
var_refs = get_all_referenced_vars(loop)
loop_local_vars = [v.name for v in get_local_var_list(loop)]
global_vars = [v.name for v in get_global_var_refs([var for var in var_refs if var.name not in loop_local_vars])]
params = []
for ref in var_refs:
if ref.name not in loop_local_vars and ref.name not in global_vars and ref.name not in [p['name'] for p in params]:
add_parameter(params, ref)
return params
def refine_parameter_list(loop, fn, params):
var_decls_tocopy = ""
for param in params:
# find references to the parameter variable within the current function
decl_refs = fn.query("ref{DeclRefExpr}", where=lambda ref: ref.name == param['name'] and fn.body.encloses(ref))
decl = decl_refs[0].ref.decl
# check if parameter variable is declared inside the function and not used outside the loop
declared_in_fn = fn.body.encloses(decl)
init = None
if declared_in_fn:
init = [row.ref.parent for row in decl_refs if (not loop.encloses(row.ref) and row.ref.parent.isentity('BinaryOperator') and row.ref.parent.symbol == '=')]
if init:
init = init[0]
used_outside_loop = any([not row.ref.parent == init and not loop.encloses(row.ref) for row in decl_refs])
if declared_in_fn and not used_outside_loop:
# copy variable declaration to new function
var_decls_tocopy += decl.unparse() + ";\n" + init.unparse() + ";\n"
# check if any variables referenced in declaration need to be added as parameters
refs_in_decl = decl.query("ref{DeclRefExpr}", where=lambda ref: ref.name not in [p['name'] for p in params] and ref.decl.is_local)
refs_in_init = init.query("ref{DeclRefExpr}", where=lambda ref: ref.name not in [p['name'] for p in params] and ref.decl.is_local)
for row in refs_in_decl:
add_parameter(params, row.ref)
for row in refs_in_init:
add_parameter(params, row.ref)
# remove existing decl
decl.instrument(Action.replace, code="")
decl.instrument(Action.remove_semicolon)
init.instrument(Action.replace, code="")
init.instrument(Action.remove_semicolon)
# remove from param list
params.remove(param)
return var_decls_tocopy, params
def extract_loop_to_function(ast, ltag, new_fn_name='__loop_fn__'):
# query the ast for the relevant loop and containing function
results = ast.query('fn{FunctionDecl} => l{ForStmt}', where=lambda l: l.tag == ltag)
if not results or len(results) > 1:
return
loop = results[0].l
fn = results[0].fn
# instrument app to trace memory allocations so we can pass pointer sizes to new function
trace_memory(ast, scopes_to_ignore=[loop.body])
# determine parameters for the new function: any referenced variables
# that are (1) defined outside of the loop and (2) not global
params = derive_parameter_list(loop)
# refine parameters: identify any variables defined locally within the
# function scope and only used within the loop, move these decls into
# the new function and remove them from the initial parameter list
var_decls_tocopy, params = refine_parameter_list(loop, fn, params)
# add a parameter p_size for every pointer parameter p
pointer_size_params = []
pointer_size_args = []
for p in [i for i in params if '*' in i['type']]:
pointer_size_params.append("int %s_size_" % p['name'])
pointer_size_args.append("Mem::size(%s)" % p['name'])
# instrument to insert new function
param_string = ", ".join([p['type'] + " " + p['name'] for p in params] + pointer_size_params)
new_func_def = "void %s(%s){\n%s%s\n}" % (new_fn_name,param_string,var_decls_tocopy,loop.unparse())
fn.instrument(Action.before, code=new_func_def)
# instrument to replace original loop with call to new function
call_args_string = ", ".join([p['name'] for p in params] + pointer_size_args)
new_func_call = "%s(%s);" % (new_fn_name,call_args_string)
loop.instrument(Action.replace, code=new_func_call)