-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdesign_flow_patterns.py
110 lines (88 loc) · 5 KB
/
design_flow_patterns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env artisan
from util import *
from oneapi import *
from hip import *
from openmp import *
from meta_cl import *
from metaprograms import *
## every design flow pattern has a uniform interface: pattern_name(ast, data, { optional named params })
## so that they can be added to design_flow objects (see design_flow.py)
def extract_hotspot(ast, data, filter_fn=outermost_filter, threshold=0.5, fn_name='__kernel__'):
candidate_loops = identify_hotspot_loops(ast, threshold, filter_fn=filter_fn)
data['hotspot_fn_name'] = fn_name
extract_loop_to_function(ast, candidate_loops[0][0], new_fn_name=fn_name)
ast.sync(commit=True)
kernel_fn = ast.query('fn{FunctionDecl}', where=lambda fn: fn.name == fn_name)[0].fn
inline_functions_with_pointer_args(ast, kernel_fn)
kernel_fn_new, = ast.commit(track=[kernel_fn])
data['device_fns'] = [fn_name] + [fn[0] for fn in get_called_fns(ast, kernel_fn_new)]
def loop_tripcount_analysis(ast, data, debug=False, exec_rule=''):
tripcounts = analyse_tripcounts(ast, data['hotspot_fn_name'], debug=debug, exec_rule=exec_rule)
data['tripcount_report'] = tripcounts
def arithmetic_intensity_analysis(ast, data, exec_rule=''):
tripcounts = None
if 'tripcount_report' in data:
tripcounts = data['tripcount_report']
ai = calculate_arithmetic_intensity(ast, data['hotspot_fn_name'], tripcounts, exec_rule=exec_rule)
data['arith_intensity_report'] = ai
def pointer_analysis(ast, data, *args):
alias_pairs = pointer_alias_analysis(ast, data['hotspot_fn_name'])
data['pointer_alias_report'] = {'alias_pairs': alias_pairs, 'restrict': not len(alias_pairs)}
def data_inout_analysis(ast, data, debug=False, exec_rule=''):
data_inout = run_data_inout_analysis(ast, data['hotspot_fn_name'], debug=debug, exec_rule=exec_rule)
data['data_inout_report'] = data_inout
def loop_dependence_analysis(ast, data, *args):
deps = analyse_loop_dependencies(ast, data['hotspot_fn_name'])
data['loop_dep_report'] = deps
def generate_hip_design(ast, data, *args):
kernel_fn = ast.query('fn{FunctionDecl}', where=lambda fn: fn.name == data['hotspot_fn_name'])[0].fn
map_to_hip_gpu(ast, kernel_fn, restrict=data['pointer_alias_report']['restrict'])
ast.sync(commit=True)
def employ_sp_fp_literals(ast, data, *args):
use_sp_fp_literals(ast, data['device_fns'])
ast.sync(commit=True)
def employ_sp_math_fns(ast, data, *args):
use_sp_math_functions(ast, data['device_fns'])
ast.sync(commit=True)
def employ_reciprocal_math_fns(ast, data, *args):
use_reciprocal_math_functions(ast, data['device_fns'])
ast.sync(commit=True)
def employ_hip_pinned_memory(ast, data, *args):
use_pinned_memory(ast)
ast.sync(commit=True)
def hip_blocksize_timing_DSE(ast, data, device=None):
time_kernel_bs_DSE(ast, data['hotspot_fn_name'], device=device)
def multithread_parallel_loops(ast, data, *args):
kernel_fn = ast.query('fn{FunctionDecl}', where=lambda fn: fn.name == data['hotspot_fn_name'])[0].fn
openmp_multithread_loops(ast, kernel_fn)
ast.sync(commit=True)
def remove_compound_assignment_deps(ast, data, *args):
dep_refs = ast.query('fn{FunctionDecl} => l{ForStmt} => pe{CompoundAssignmentOperator} => arr{ArraySubscriptExpr}', where=lambda fn, pe, arr: fn.name == 'kernel___' and pe.children[0].encloses(arr))
dep_vars = []
for row in dep_refs:
dep_vars.append(row.arr.children[0].name)
dep_vars = list(set(dep_vars))
for var in dep_vars:
remove_loop_arr_deps(ast, data['hotspot_fn_name'], var)
ast.sync(commit=True)
def omp_nthreads_DSE(ast, data, max_threads=32):
run_openmp_num_threads_DSE(ast, max_threads)
def generate_oneapi_design(ast, data, *args):
kernel_fn = ast.query('fn{FunctionDecl}', where=lambda fn: fn.name == data['hotspot_fn_name'])[0].fn
map_to_oneapi_basic(ast, kernel_fn, data['pointer_alias_report']['restrict'], kernel_fn.name)
ast.sync(commit=True)
def use_oneapi_zerocopy_memory(ast,data,*args):
kernel_fn = ast.query('fn{FunctionDecl}', where=lambda fn: fn.name == data['hotspot_fn_name'])[0].fn
basic_kernel_to_zerocopy(ast,kernel_fn)
ast.sync(commit=True)
def unroll_small_fixed_bound_loops(ast, data, max_iters=20):
kernel_fn = ast.query('fn{FunctionDecl}', where=lambda fn: fn.name == data['hotspot_fn_name'])[0].fn
unroll_fixed_oneapi_loops(ast, kernel_fn, max_iters=max_iters)
ast.sync(commit=True)
def introduce_shared_mem_buffers(ast, data, param=None, max_size=10000):
kernel_fn = ast.query('fn{FunctionDecl}', where=lambda fn: fn.name == data['hotspot_fn_name'])[0].fn
wrapper_fn = ast.query('fn{FunctionDecl}', where=lambda fn: fn.name == f"{kernel_fn.name}_wrapper_")[0].fn
introduce_shared_mem(ast, kernel_fn, wrapper_fn, data['data_inout_report'], data['struct_map'], max_size=max_size)
def unroll_until_fpga_overmap_DSE(ast, data, target='a10'):
kernel_fn = ast.query('fn{FunctionDecl}', where=lambda fn: fn.name == data['hotspot_fn_name'])[0].fn
unroll_until_overmap(ast, kernel_fn, target=target)