1 #ifndef _GPU_H 2 #define _GPU_H 3 4 #include <isl/ast.h> 5 #include <isl/id.h> 6 #include <isl/id_to_ast_expr.h> 7 8 #include <pet.h> 9 10 #include "ppcg.h" 11 #include "ppcg_options.h" 12 13 /* An access to an outer array element or an iterator. 14 * Accesses to iterators have an access relation that maps to an unnamed space. 15 * An access may be both read and write. 16 * If the access relation is empty, then the output dimension may 17 * not be equal to the dimension of the corresponding array. 18 */ 19 struct gpu_stmt_access { 20 /* Access reads elements */ 21 int read; 22 /* Access writes elements */ 23 int write; 24 /* All writes are definite writes. */ 25 int exact_write; 26 /* Is a single, fixed element being accessed? */ 27 isl_bool fixed_element; 28 /* The number of index expressions specified in the access. */ 29 int n_index; 30 31 /* May access relation */ 32 isl_map *access; 33 /* May access relation with as domain a mapping from iteration domain 34 * to a reference identifier. 35 */ 36 isl_map *tagged_access; 37 /* The reference id of the corresponding pet_expr. */ 38 isl_id *ref_id; 39 40 struct gpu_stmt_access *next; 41 }; 42 43 /* A representation of a user statement. 44 * "stmt" points to the corresponding pet statement. 45 * "id" is the identifier of the instance set of the statement. 46 * "accesses" is a linked list of accesses performed by the statement. 47 * If the statement has been killed, i.e., if it will not be scheduled, 48 * then this linked list may be empty even if the actual statement does 49 * perform accesses. 50 */ 51 struct gpu_stmt { 52 isl_id *id; 53 struct pet_stmt *stmt; 54 55 struct gpu_stmt_access *accesses; 56 }; 57 58 /* Represents an outer array possibly accessed by a gpu_prog. 59 */ 60 struct gpu_array_info { 61 /* The array data space. */ 62 isl_space *space; 63 /* Element type. */ 64 char *type; 65 /* Element size. */ 66 int size; 67 /* Name of the array. */ 68 char *name; 69 /* Declared extent of original array. */ 70 isl_set *declared_extent; 71 /* AST expression for declared size of original array. */ 72 isl_ast_expr *declared_size; 73 /* Extent of the array that needs to be copied. */ 74 isl_set *extent; 75 /* Number of indices. */ 76 unsigned n_index; 77 /* For each index, a bound on "extent" in that direction. */ 78 isl_multi_pw_aff *bound; 79 /* The corresponding access AST expression, if the array needs 80 * to be allocated on the device. 81 */ 82 isl_ast_expr *bound_expr; 83 84 /* All references to this array; point to elements of a linked list. */ 85 int n_ref; 86 struct gpu_stmt_access **refs; 87 88 /* Is this array accessed at all by the program? */ 89 int accessed; 90 91 /* Is this a scalar that is read-only within the entire program? */ 92 int read_only_scalar; 93 94 /* Are the elements of the array structures? */ 95 int has_compound_element; 96 97 /* Are the elements only accessed through constant index expressions? */ 98 int only_fixed_element; 99 100 /* Is the array local to the scop? */ 101 int local; 102 /* Is the array local and should it be declared on the host? */ 103 int declare_local; 104 105 /* Is the corresponding global device memory accessed in any way? */ 106 int global; 107 108 /* Should the array be linearized? */ 109 int linearize; 110 111 /* Order dependences on this array. 112 * Only used if live_range_reordering option is set. 113 * It is set to NULL otherwise. 114 */ 115 isl_union_map *dep_order; 116 117 void *user; 118 }; 119 120 /* Represents an outer array accessed by a ppcg_kernel, localized 121 * to the context of this kernel. 122 * 123 * "array" points to the corresponding array in the gpu_prog. 124 * The "n_group" "groups" are the reference groups associated to the array. 125 * If "force_private" is set, then the array (in practice a scalar) 126 * must be mapped to a register. 127 * "global" is set if the global device memory corresponding 128 * to this array is accessed by the kernel. 129 * "bound" is equal to array->bound specialized to the current kernel. 130 * "bound_expr" is the corresponding access AST expression. 131 */ 132 struct gpu_local_array_info { 133 struct gpu_array_info *array; 134 135 int n_group; 136 struct gpu_array_ref_group **groups; 137 138 int force_private; 139 int global; 140 141 unsigned n_index; 142 isl_multi_pw_aff *bound; 143 isl_ast_expr *bound_expr; 144 }; 145 146 __isl_give isl_ast_expr *gpu_local_array_info_linearize_index( 147 struct gpu_local_array_info *array, __isl_take isl_ast_expr *expr); 148 149 /* A sequence of "n" names of types. 150 */ 151 struct gpu_types { 152 int n; 153 char **name; 154 }; 155 156 /* "read" and "write" contain the original access relations, possibly 157 * involving member accesses. 158 * 159 * The elements of "array", as well as the ranges of "copy_in" and "copy_out" 160 * only refer to the outer arrays of any possible member accesses. 161 */ 162 struct gpu_prog { 163 isl_ctx *ctx; 164 165 struct ppcg_scop *scop; 166 167 /* Set of parameter values */ 168 isl_set *context; 169 170 /* All potential read accesses in the entire program */ 171 isl_union_map *read; 172 173 /* All potential write accesses in the entire program */ 174 isl_union_map *may_write; 175 /* All definite write accesses in the entire program */ 176 isl_union_map *must_write; 177 /* All tagged definite kills in the entire program */ 178 isl_union_map *tagged_must_kill; 179 180 /* The set of inner array elements that may be preserved. */ 181 isl_union_set *may_persist; 182 183 /* A mapping from all innermost arrays to their outer arrays. */ 184 isl_union_map *to_outer; 185 /* A mapping from the outer arrays to all corresponding inner arrays. */ 186 isl_union_map *to_inner; 187 /* A mapping from all intermediate arrays to their outer arrays, 188 * including an identity mapping from the anonymous 1D space to itself. 189 */ 190 isl_union_map *any_to_outer; 191 192 /* Order dependences on non-scalars. */ 193 isl_union_map *array_order; 194 195 /* Array of statements */ 196 int n_stmts; 197 struct gpu_stmt *stmts; 198 199 int n_array; 200 struct gpu_array_info *array; 201 }; 202 203 struct gpu_gen { 204 isl_ctx *ctx; 205 struct ppcg_options *options; 206 207 /* Callback for printing of AST in appropriate format. */ 208 __isl_give isl_printer *(*print)(__isl_take isl_printer *p, 209 struct gpu_prog *prog, __isl_keep isl_ast_node *tree, 210 struct gpu_types *types, void *user); 211 void *print_user; 212 213 isl_id_to_ast_expr *(*build_ast_expr)(void *stmt, 214 isl_ast_build *build, 215 isl_multi_pw_aff *(*fn_index)( 216 __isl_take isl_multi_pw_aff *mpa, isl_id *id, 217 void *user), 218 void *user_index, 219 isl_ast_expr *(*fn_expr)(isl_ast_expr *expr, 220 isl_id *id, void *user), 221 void *user_expr); 222 223 struct gpu_prog *prog; 224 /* The generated AST. */ 225 isl_ast_node *tree; 226 227 /* The sequence of types for which a definition has been printed. */ 228 struct gpu_types types; 229 230 /* User specified tile, grid and block sizes for each kernel */ 231 isl_union_map *sizes; 232 233 /* Effectively used tile, grid and block sizes for each kernel */ 234 isl_union_map *used_sizes; 235 236 /* Identifier of the next kernel. */ 237 int kernel_id; 238 }; 239 240 enum ppcg_group_access_type { 241 ppcg_access_global, 242 ppcg_access_shared, 243 ppcg_access_private 244 }; 245 246 enum ppcg_kernel_stmt_type { 247 ppcg_kernel_copy, 248 ppcg_kernel_domain, 249 ppcg_kernel_sync 250 }; 251 252 /* Representation of special statements, in particular copy statements 253 * and __syncthreads statements, inside a kernel. 254 * 255 * type represents the kind of statement 256 * 257 * 258 * for ppcg_kernel_copy statements we have 259 * 260 * read is set if the statement should copy data from global memory 261 * to shared memory or registers. 262 * 263 * index expresses an access to the array element that needs to be copied 264 * local_index expresses the corresponding element in the tile 265 * 266 * array refers to the original array being copied 267 * local_array is a pointer to the appropriate element in the "array" 268 * array of the ppcg_kernel to which this copy access belongs 269 * 270 * 271 * for ppcg_kernel_domain statements we have 272 * 273 * stmt is the corresponding input statement 274 * 275 * n_access is the number of accesses in stmt 276 * access is an array of local information about the accesses 277 */ 278 struct ppcg_kernel_stmt { 279 enum ppcg_kernel_stmt_type type; 280 281 union { 282 struct { 283 int read; 284 isl_ast_expr *index; 285 isl_ast_expr *local_index; 286 struct gpu_array_info *array; 287 struct gpu_local_array_info *local_array; 288 } c; 289 struct { 290 struct gpu_stmt *stmt; 291 isl_id_to_ast_expr *ref2expr; 292 } d; 293 } u; 294 }; 295 296 /* Representation of a local variable in a kernel. 297 */ 298 struct ppcg_kernel_var { 299 struct gpu_array_info *array; 300 enum ppcg_group_access_type type; 301 char *name; 302 isl_vec *size; 303 }; 304 305 /* Representation of a kernel. 306 * 307 * prog describes the original code from which the kernel is extracted. 308 * 309 * id is the sequence number of the kernel. 310 * 311 * block_ids contains the list of block identifiers for this kernel. 312 * thread_ids contains the list of thread identifiers for this kernel. 313 * 314 * the first n_grid elements of grid_dim represent the specified size 315 * of the grid. 316 * the first n_block elements of block_dim represent the specified or 317 * effective size of the block. 318 * Note that in the input file, the sizes of the grid and the blocks 319 * are specified in the order x, y, z, but internally, the sizes 320 * are stored in reverse order, so that the last element always 321 * refers to the x dimension. 322 * 323 * grid_size reflects the effective grid size. 324 * grid_size_expr contains a corresponding access AST expression, built within 325 * the context where the launch appears. 326 * 327 * context contains the values of the parameters and outer schedule dimensions 328 * for which any statement instance in this kernel needs to be executed. 329 * 330 * n_sync is the number of synchronization operations that have 331 * been introduced in the schedule tree corresponding to this kernel (so far). 332 * 333 * core contains the spaces of the statement domains that form 334 * the core computation of the kernel. It is used to navigate 335 * the tree during the construction of the device part of the schedule 336 * tree in gpu_create_kernel. 337 * 338 * expanded_domain contains the original statement instances, 339 * i.e., those that appear in the domains of access relations, 340 * that are involved in the kernel. 341 * contraction maps those original statement instances to 342 * the statement instances that are active at the point 343 * in the schedule tree where the kernel is created. 344 * 345 * arrays is the set of possibly accessed outer array elements. 346 * 347 * space is the schedule space of the AST context. That is, it represents 348 * the loops of the generated host code containing the kernel launch. 349 * 350 * n_array is the total number of arrays in the input program and also 351 * the number of element in the array array. 352 * array contains information about each array that is local 353 * to the current kernel. If an array is not used in a kernel, 354 * then the corresponding entry does not contain any information. 355 * 356 * any_force_private is set if any array in the kernel is marked force_private 357 * 358 * block_filter contains constraints on the domain elements in the kernel 359 * that encode the mapping to block identifiers, where the block identifiers 360 * are represented by "n_grid" parameters with as names the elements 361 * of "block_ids". 362 * 363 * thread_filter contains constraints on the domain elements in the kernel 364 * that encode the mapping to thread identifiers, where the thread identifiers 365 * are represented by "n_block" parameters with as names the elements 366 * of "thread_ids". 367 * 368 * copy_schedule corresponds to the schedule dimensions of 369 * the (tiled) schedule for this kernel that have been taken into account 370 * for computing private/shared memory tiles. 371 * The domain corresponds to the original statement instances, i.e., 372 * those that appear in the leaves of the schedule tree. 373 * copy_schedule_dim is the dimension of this schedule. 374 * 375 * sync_writes contains write references that require synchronization. 376 * Each reference is represented by a universe set in a space [S[i,j] -> R[]] 377 * with S[i,j] the statement instance space and R[] the array reference. 378 */ 379 struct ppcg_kernel { 380 isl_ctx *ctx; 381 struct ppcg_options *options; 382 383 struct gpu_prog *prog; 384 385 int id; 386 387 isl_id_list *block_ids; 388 isl_id_list *thread_ids; 389 390 int n_grid; 391 int n_block; 392 int grid_dim[2]; 393 int block_dim[3]; 394 395 isl_multi_pw_aff *grid_size; 396 isl_ast_expr *grid_size_expr; 397 isl_set *context; 398 399 int n_sync; 400 isl_union_set *core; 401 isl_union_set *arrays; 402 403 isl_union_pw_multi_aff *contraction; 404 isl_union_set *expanded_domain; 405 406 isl_space *space; 407 408 int n_array; 409 struct gpu_local_array_info *array; 410 411 int n_var; 412 struct ppcg_kernel_var *var; 413 414 int any_force_private; 415 416 isl_union_set *block_filter; 417 isl_union_set *thread_filter; 418 isl_union_pw_multi_aff *copy_schedule; 419 int copy_schedule_dim; 420 421 isl_union_set *sync_writes; 422 423 isl_ast_node *tree; 424 }; 425 426 int gpu_array_is_scalar(struct gpu_array_info *array); 427 int gpu_array_is_read_only_scalar(struct gpu_array_info *array); 428 int gpu_array_requires_device_allocation(struct gpu_array_info *array); 429 __isl_give isl_set *gpu_array_positive_size_guard(struct gpu_array_info *array); 430 isl_bool gpu_array_can_be_private(struct gpu_array_info *array); 431 432 struct gpu_prog *gpu_prog_alloc(isl_ctx *ctx, struct ppcg_scop *scop); 433 void *gpu_prog_free(struct gpu_prog *prog); 434 435 int ppcg_kernel_requires_array_argument(struct ppcg_kernel *kernel, int i); 436 437 int generate_gpu(isl_ctx *ctx, const char *input, FILE *out, 438 struct ppcg_options *options, 439 __isl_give isl_printer *(*print)(__isl_take isl_printer *p, 440 struct gpu_prog *prog, __isl_keep isl_ast_node *tree, 441 struct gpu_types *types, void *user), void *user); 442 443 __isl_give isl_schedule_node *gpu_create_kernel(struct gpu_gen *gen, 444 __isl_take isl_schedule_node *node, int scale, 445 __isl_keep isl_multi_val *sizes); 446 447 __isl_give isl_schedule *get_schedule(struct gpu_gen *gen); 448 int has_any_permutable_node(__isl_keep isl_schedule *schedule); 449 __isl_give isl_schedule *map_to_device(struct gpu_gen *gen, 450 __isl_take isl_schedule *schedule, 451 int to_from_device); 452 __isl_give isl_ast_node *generate_code(struct gpu_gen *gen, 453 __isl_take isl_schedule *schedule); 454 455 __isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog); 456 void collect_references(struct gpu_prog *prog, struct gpu_array_info *array); 457 void collect_order_dependences(struct gpu_prog *prog); 458 isl_bool only_fixed_element_accessed(struct gpu_array_info *array); 459 #endif 460