• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013      Ecole Normale Superieure
3  * Copyright 2015      Sven Verdoolaege
4  *
5  * Use of this software is governed by the MIT license
6  *
7  * Written by Sven Verdoolaege,
8  * Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
9  */
10 
11 #include <string.h>
12 
13 #include <isl/val.h>
14 #include <isl/space.h>
15 #include <isl/union_set.h>
16 #include <isl/schedule_node.h>
17 
18 #include "hybrid.h"
19 #include "gpu_hybrid.h"
20 #include "gpu_tree.h"
21 #include "schedule.h"
22 #include "util.h"
23 
24 /* Have all domain elements been filtered out before reaching
25  * the "node" position in the schedule tree?
26  */
has_empty_domain(__isl_keep isl_schedule_node * node)27 static isl_bool has_empty_domain(__isl_keep isl_schedule_node *node)
28 {
29 	isl_union_set *domain;
30 	isl_bool empty;
31 
32 	domain = isl_schedule_node_get_domain(node);
33 	empty = isl_union_set_is_empty(domain);
34 	isl_union_set_free(domain);
35 
36 	return empty;
37 }
38 
39 /* Given a pointer to a phase in the result of hybrid tiling,
40  * map the phase to the device, provided the phase is non-empty.
41  * Empty phases can occur if the input schedule domain can be
42  * covered by a small number of hexagons that all belong to the same phase.
43  *
44  * The input has the following form:
45  *
46  *	M - CT - P - C - ...
47  *
48  * with M the phase marker, CT the space tiling, P the original
49  * parent band and C the original child band.
50  * The (outer dimensions of the) C band need to be mapped to threads.
51  * The (outer dimension of the) CT band needs to be mapped to blocks.
52  * The mapping to shared memory needs to be computed between the CT and
53  * the P band.
54  *
55  * The C band is first shifted to start at zero.
56  * Then the appropriate markers are introduced and a kernel is
57  * created for the tree rooted at CT.
58  * If the "unroll_gpu_tile" option is set, then the AST generator
59  * is instructed to unroll the P and C bands.
60  */
update_phase(__isl_take isl_schedule_node * node,void * user)61 static __isl_give isl_schedule_node *update_phase(
62 	__isl_take isl_schedule_node *node, void *user)
63 {
64 	struct gpu_gen *gen = user;
65 	int depth0, depth;
66 	isl_ctx *ctx;
67 	isl_id *id;
68 	isl_bool empty_domain;
69 	ppcg_ht_phase *phase;
70 
71 	empty_domain = has_empty_domain(node);
72 	if (empty_domain < 0)
73 		return isl_schedule_node_free(node);
74 	if (empty_domain)
75 		return node;
76 
77 	if (!node)
78 		return NULL;
79 	ctx = isl_schedule_node_get_ctx(node);
80 
81 	phase = ppcg_ht_phase_extract_from_mark(node);
82 
83 	depth0 = isl_schedule_node_get_tree_depth(node);
84 
85 	node = isl_schedule_node_child(node, 0);
86 
87 	node = isl_schedule_node_child(node, 0);
88 	node = isl_schedule_node_child(node, 0);
89 	node = ppcg_ht_phase_shift_space_point(phase, node);
90 	if (gen->options->unroll_gpu_tile)
91 		node = ppcg_set_schedule_node_type(node, isl_ast_loop_unroll);
92 	id = isl_id_alloc(ctx, "thread", NULL);
93 	node = isl_schedule_node_insert_mark(node, id);
94 	node = isl_schedule_node_parent(node);
95 	if (gen->options->unroll_gpu_tile)
96 		node = ppcg_set_schedule_node_type(node, isl_ast_loop_unroll);
97 	id = isl_id_alloc(ctx, "shared", NULL);
98 	node = isl_schedule_node_insert_mark(node, id);
99 	node = isl_schedule_node_parent(node);
100 
101 	node = gpu_create_kernel(gen, node, 0, NULL);
102 
103 	depth = isl_schedule_node_get_tree_depth(node);
104 	node = isl_schedule_node_ancestor(node, depth - depth0);
105 
106 	return node;
107 }
108 
109 /* Apply hybrid tiling on "node" and its parent based on the (valid)
110  * bounds on the relative dependence distances "bounds" and
111  * the tile sizes in "tile_sizes".
112  * The number of elements in "tile_sizes" is at least as large
113  * as the sum of the dimensions of the parent and the child node.
114  *
115  * Convert the tile_sizes to an isl_multi_val in the right space,
116  * insert the hybrid tiling and then create a kernel inside each phase.
117  * Finally, remove the phase marks.
118  */
gpu_hybrid_tile(struct gpu_gen * gen,__isl_take isl_schedule_node * node,__isl_take ppcg_ht_bounds * bounds,int * tile_sizes)119 __isl_give isl_schedule_node *gpu_hybrid_tile(struct gpu_gen *gen,
120 	__isl_take isl_schedule_node *node, __isl_take ppcg_ht_bounds *bounds,
121 	int *tile_sizes)
122 {
123 	isl_multi_val *mv;
124 	isl_space *space, *space2;
125 
126 	if (!node || !bounds)
127 		goto error;
128 
129 	space2 = isl_schedule_node_band_get_space(node);
130 	node = isl_schedule_node_parent(node);
131 	space = isl_schedule_node_band_get_space(node);
132 	space = isl_space_product(space, space2);
133 	mv = ppcg_multi_val_from_int_list(space, tile_sizes);
134 
135 	node = ppcg_ht_bounds_insert_tiling(bounds, mv, node, gen->options);
136 
137 	node = hybrid_tile_foreach_phase(node, &update_phase, gen);
138 
139 	node = hybrid_tile_drop_phase_marks(node);
140 
141 	return node;
142 error:
143 	isl_schedule_node_free(node);
144 	ppcg_ht_bounds_free(bounds);
145 	return NULL;
146 }
147