1 /*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can
5 * be found in the LICENSE file.
6 *
7 */
8
9 //
10 //
11 //
12
13 // get rid of these
14 #include <stdio.h>
15 #include <stdlib.h>
16
17 //
18 //
19 //
20
21 #include "hs/cl/hs_cl.h"
22
23 #include "common/cl/assert_cl.h"
24
25 #include "context.h"
26 #include "grid.h"
27 #include "raster.h"
28 #include "extent_ring.h"
29 #include "raster_builder.h"
30
31 #include "tile.h"
32
33 #include "config_cl.h"
34 #include "runtime_cl_12.h"
35 #include "extent_cl_12.h"
36 #include "raster_builder_cl_12.h"
37
38 //
39 // RASTERIZATION SUB-PIPELINE
40 // --------------------------
41 //
42 // Phase 1: expand commands
43 //
44 // Phase 2: rasterize
45 //
46 // Phase 3: sort & segment || release paths
47 //
48 // Phase 4: prefix
49 //
50 // Phase 5: release rasters
51 //
52 // RASTER COHORT
53 // ==============
54 //
55 // BUILDER RASTERIZER POST PROCESSING
56 // <-----------------------------------------------> <------------> <--------------------------------------------------------------------->
57 //
58 // fill cmds transforms raster clips path release rasterize cmds cohort map raster release TTSB TTSK cohort atomics context atomics
59 // --------- ---------- ------------ ------------ -------------- ---------- -------------- ---- ---- -------------- ---------------
60 // 1,2 1,2 1,2 1,2 2 1-4 1,2,3,4 2-4 2-4 2-4 global
61 //
62 //
63 // NOTES: FINE-GRAINED SVM
64 // -----------------------
65 //
66 // 1) In a fine-grained system we know the exact number of
67 // rasterize cmds per segment type before phase 1
68 //
69 // 2) A raster that's "under construction" shouldn't be rasterized
70 // until it is complete. This implies that a raster is not part
71 // of a cohort until it is complete. The raster builder must
72 // handle raster promises being "forced" to completion -- this is
73 // likely the result of composition construction and subsequent
74 // rendering to a surface.
75 //
76 // 3) The raster cohort rasterizer state retains the fill cmd,
77 // transform, raster clip and path release "ring" extents.
78 //
79 // 4) The rasterize cmd extent sizes (line, quad, cubic, rational
80 // quad, rational cubic) are known ahead of time.
81 //
82 // 5) The raster cohort post processor is standalone and retains the
83 // raster_map, cohort atomics, TTSK_RYX extent, and raster
84 // references until complete.
85 //
86
87 //
88 // Notes:
89 //
90 // - Could have a pipeline stage before expansion count the exact
91 // number of line/quad/cubic commands but the command buffers are
92 // relatively small (64-bit commands * # of path segments).
93 //
94
95 // raster
96 // cohort atomics path_ids raster_ids transforms clips cmds_fill cmds_l/q/c ttsk_ryx
97 //
98 //
99 // BEGIN ^
100 // |
101 // EXPAND |
102 // |
103 // RASTERIZE |
104 // |
105 // SORT || RELEASE PATHS |
106 // |
107 // PREFIX |
108 // |
109 // RELEASE RASTERS |
110 // |
111 // END v
112 //
113 //
114 // BEGIN
115 //
116 // EXPAND -- PRODUCES: one or more extents of rasterization commands
117 //
118 // RASTERIZE -- DEPENDENCY: requires size of command extents before launching
119 // -- PRODUCES: an extent of ttsk_ryx keys
120 //
121 // SORT || RELEASE PATHS -- DEPENDENCY: requires size of key extent before launching
122 // -- PRODUCES: sorted array of keys
123 //
124 // PREFIX -- DEPENDENCY: none -- can execute after SORT because grid size is number of rasters
125 //
126 // RELEASE RASTERS -- DEPENDENCY: none -- can execute after prefix
127 //
128 // END
129 //
130
131 // ------------------------
132 //
133 // DEPENDENCY is cleanly implemented with a host callback or device kernel launcher
134 //
135 // Can this hide resource acquisition? Yes. But there are two cases:
136 //
137 // 1. acqusition of resources occurs on the host thread and lack of
138 // resources drains the host command queue until resources are
139 // available (OpenCL 2.x)
140 //
141 // 2. the host commands lazily acquire resources (OpenCL 1.2)
142 //
143 // ------------------------
144 //
145 // How to express?
146 //
147 // Each substage launches its successors. This supports both dependency models.
148 //
149 // If OpenCL 1.2 then the substage can't be launched until the prior
150 // stage's event is complete. So this requires registering a callback
151 // to invoke the substage.
152 //
153 // ------------------------
154
155 //
156 // BUILD
157 //
158
159 struct skc_raster_builder_impl
160 {
161 struct skc_raster_builder * raster_builder;
162 struct skc_runtime * runtime;
163
164 skc_grid_t cohort;
165
166 // these are all durable/perm extents
167 struct skc_extent_phrwg_thr1s path_ids; // read/write by host
168 struct skc_extent_phw1g_tdrNs transforms; // write once by host + read by device
169 struct skc_extent_phw1g_tdrNs clips; // write once by host + read by device
170 struct skc_extent_phw1g_tdrNs fill_cmds; // write once by host + read by device
171 struct skc_extent_phrwg_tdrNs raster_ids; // read/write by host + read by device
172
173 struct {
174 cl_kernel fills_expand;
175 cl_kernel rasterize_all;
176 cl_kernel segment;
177 cl_kernel rasters_alloc;
178 cl_kernel prefix;
179 } kernels;
180 };
181
182 //
183 // RASTER COHORT
184 //
185 // This sub-pipeline snapshots the raster builder and then acquires
186 // and releases host and device resources as necessary (as late as
187 // possible).
188 //
189 // Note that the cohort extents are ephemeral and are only used by one
190 // or more stages of a the rasterization sub-pipeline.
191 //
192 // The pipeline implementation may vary between compute platforms.
193 //
194
195 struct skc_raster_cohort
196 {
197 struct skc_raster_builder_impl * impl;
198
199 struct skc_extent_phrwg_thr1s_snap path_ids; // read/write by host
200 struct skc_extent_phw1g_tdrNs_snap transforms; // write once by host + read by device
201 struct skc_extent_phw1g_tdrNs_snap clips; // write once by host + read by device
202 struct skc_extent_phw1g_tdrNs_snap fill_cmds; // write once by host + read by device
203 struct skc_extent_phrwg_tdrNs_snap raster_ids; // read/write by host + read by device
204
205 cl_command_queue cq;
206
207 // sub-pipeline atomics
208 struct skc_extent_thr_tdrw atomics;
209
210 // path primitives are expanded into line/quad/cubic/rational cmds
211 struct skc_extent_tdrw cmds;
212
213 // rasterization output
214 struct skc_extent_tdrw keys;
215 // struct skc_extent_thrw_tdrw keys;
216
217 // post-sort extent with metadata for each raster
218 struct skc_extent_tdrw metas;
219 // struct skc_extent_thrw_tdrw metas;
220
221 // subbuf id
222 skc_subbuf_id_t id;
223
224 //
225 // pipeline also uses the following global resources:
226 //
227 // - command queue from global factory
228 // - global block pool and its atomics
229 // - global path and raster host id map
230 // - temporary host and device allocations
231 //
232 };
233
234 //
235 // TTRK (64-BIT COMPARE)
236 //
237 // 0 63
238 // | TTSB ID | X | Y | COHORT ID |
239 // +---------+------+------+-----------+
240 // | 27 | 12 | 12 | 13 |
241 //
242 //
243 // TTRK (32-BIT COMPARE)
244 //
245 // 0 63
246 // | TTSB ID | N/A | X | Y | COHORT ID |
247 // +---------+-----+------+------+-----------+
248 // | 27 | 5 | 12 | 12 | 8 |
249 //
250
251 //
252 // TTRK is sortable intermediate key format for TTSK
253 //
254 // We're going to use the 32-bit comparison version for now
255 //
256
257 union skc_ttrk
258 {
259 skc_ulong u64;
260 skc_uint2 u32v2;
261
262 struct {
263 skc_uint block : SKC_TTXK_LO_BITS_ID;
264 skc_uint na0 : SKC_TTRK_LO_BITS_NA;
265 skc_uint x : SKC_TTXK_HI_BITS_X;
266 skc_uint y : SKC_TTXK_HI_BITS_Y;
267 skc_uint cohort : SKC_TTRK_HI_BITS_COHORT;
268 };
269
270 struct {
271 skc_uint na1;
272 skc_uint yx : SKC_TTXK_HI_BITS_YX;
273 skc_uint na2 : SKC_TTRK_HI_BITS_COHORT;
274 };
275
276 struct {
277 skc_uint na3;
278 skc_uint na4 : SKC_TTXK_HI_BITS_X;
279 skc_uint cohort_y : SKC_TTRK_HI_BITS_COHORT_Y;
280 };
281 };
282
283 //
284 //
285 //
286
287 static
288 void
skc_raster_builder_pfn_release(struct skc_raster_builder_impl * const impl)289 skc_raster_builder_pfn_release(struct skc_raster_builder_impl * const impl)
290 {
291 // decrement reference count
292 if (--impl->raster_builder->refcount != 0)
293 return;
294
295 //
296 // otherwise, dispose of the the raster builder and its impl
297 //
298 struct skc_runtime * const runtime = impl->runtime;
299
300 // free the raster builder
301 skc_runtime_host_perm_free(runtime,impl->raster_builder);
302
303 // free durable/perm extents
304 skc_extent_phrwg_thr1s_free(runtime,&impl->path_ids);
305 skc_extent_phw1g_tdrNs_free(runtime,&impl->transforms);
306 skc_extent_phw1g_tdrNs_free(runtime,&impl->clips);
307 skc_extent_phw1g_tdrNs_free(runtime,&impl->fill_cmds);
308 skc_extent_phrwg_tdrNs_free(runtime,&impl->raster_ids);
309
310 // release kernels
311 cl(ReleaseKernel(impl->kernels.fills_expand));
312 cl(ReleaseKernel(impl->kernels.rasterize_all));
313
314 #if 0
315 cl(ReleaseKernel(impl->kernels.rasterize_lines));
316 cl(ReleaseKernel(impl->kernels.rasterize_quads));
317 cl(ReleaseKernel(impl->kernels.rasterize_cubics));
318 #endif
319
320 cl(ReleaseKernel(impl->kernels.segment));
321 cl(ReleaseKernel(impl->kernels.rasters_alloc));
322 cl(ReleaseKernel(impl->kernels.prefix));
323
324 // free the impl
325 skc_runtime_host_perm_free(runtime,impl);
326 }
327
328 //
329 //
330 //
331
332 static
333 void
skc_raster_builder_rasters_release(struct skc_runtime * const runtime,skc_raster_t const * const rasters,skc_uint const size,skc_uint const from,skc_uint const to)334 skc_raster_builder_rasters_release(struct skc_runtime * const runtime,
335 skc_raster_t const * const rasters,
336 skc_uint const size,
337 skc_uint const from,
338 skc_uint const to)
339 {
340 if (from <= to) // no wrap
341 {
342 skc_raster_t const * rasters_from = rasters + from;
343 skc_uint count_from = to - from;
344
345 skc_grid_deps_unmap(runtime->deps,rasters_from,count_from);
346 skc_runtime_raster_device_release(runtime,rasters_from,count_from);
347 }
348 else // from > to implies wrap
349 {
350 skc_raster_t const * rasters_lo = rasters + from;
351 skc_uint count_lo = size - from;
352
353 skc_grid_deps_unmap(runtime->deps,rasters_lo,count_lo);
354 skc_runtime_raster_device_release(runtime,rasters_lo,count_lo);
355
356 skc_grid_deps_unmap(runtime->deps,rasters,to);
357 skc_runtime_raster_device_release(runtime,rasters,to);
358 }
359 }
360
361 static
362 void
skc_raster_builder_paths_release(struct skc_runtime * const runtime,struct skc_extent_phrwg_thr1s_snap * const snap)363 skc_raster_builder_paths_release(struct skc_runtime * const runtime,
364 struct skc_extent_phrwg_thr1s_snap * const snap)
365 {
366 // release lo
367 skc_runtime_path_device_release(runtime,snap->hr1.lo,snap->count.lo);
368
369 // release hi
370 if (snap->count.hi)
371 skc_runtime_path_device_release(runtime,snap->hr1.hi,snap->count.hi);
372 }
373
374 static
375 void
skc_raster_builder_cohort_grid_pfn_dispose(skc_grid_t const grid)376 skc_raster_builder_cohort_grid_pfn_dispose(skc_grid_t const grid)
377 {
378 //
379 // ALLOCATED RESOURCES
380 //
381 // path_ids -
382 // raster_ids a
383 // transforms -
384 // clips -
385 // fill_cmds -
386 // cq a
387 // cohort atomics a
388 // cmds -
389 // keys a
390 // meta a
391 //
392
393 struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
394 struct skc_raster_builder_impl * const impl = cohort->impl;
395 struct skc_runtime * const runtime = impl->runtime;
396
397 //
398 // release paths -- FIXME -- Note that releasing paths can be
399 // performed after rasterization is complete
400 //
401
402 // snap alloc the paths -- this host snap simply sets up pointers
403 skc_extent_phrwg_thr1s_snap_alloc(runtime,&impl->path_ids,&cohort->path_ids);
404
405 // unmap and release raster ids
406 skc_raster_builder_paths_release(runtime,&cohort->path_ids);
407
408 // release path ids
409 skc_extent_phrwg_thr1s_snap_free(runtime,&cohort->path_ids);
410
411 //
412 // release rasters
413 //
414 skc_uint const size = cohort->raster_ids.snap->ring->size.pow2;
415 skc_uint const from = skc_extent_ring_snap_from(cohort->raster_ids.snap);
416 skc_uint const to = skc_extent_ring_snap_to(cohort->raster_ids.snap);
417
418 // unmap and release raster ids
419 skc_raster_builder_rasters_release(runtime,impl->raster_ids.hrw,size,from,to);
420
421 // release cohort's remaining allocated resources
422 skc_extent_phrwg_tdrNs_snap_free(runtime,&cohort->raster_ids);
423 skc_runtime_release_cq_in_order(runtime,cohort->cq);
424 skc_extent_thr_tdrw_free(runtime,&cohort->atomics);
425 skc_extent_tdrw_free(runtime,&cohort->keys);
426 skc_extent_tdrw_free(runtime,&cohort->metas);
427 // skc_extent_thrw_tdrw_free(runtime,&cohort->keys);
428 // skc_extent_thrw_tdrw_free(runtime,&cohort->metas);
429 skc_runtime_host_temp_free(runtime,cohort,cohort->id);
430
431 // release the raster builder
432 skc_raster_builder_pfn_release(impl);
433
434 //
435 // ALLOCATED RESOURCES
436 //
437 // path_ids -
438 // raster_ids -
439 // transforms -
440 // clips -
441 // fill_cmds -
442 // cq -
443 // cohort atomics -
444 // cmds -
445 // keys -
446 // meta -
447 //
448 }
449
450 //
451 //
452 //
453
454 static
455 void
skc_raster_cohort_prefix_release(skc_grid_t const grid)456 skc_raster_cohort_prefix_release(skc_grid_t const grid)
457 {
458 // FIXME -- note that pfn_dispose can be accomplished here
459
460 // release the grid
461 skc_grid_complete(grid);
462 }
463
464 static
465 void
skc_raster_cohort_prefix_cb(cl_event event,cl_int status,skc_grid_t const grid)466 skc_raster_cohort_prefix_cb(cl_event event, cl_int status, skc_grid_t const grid)
467 {
468 SKC_CL_CB(status);
469
470 struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
471 struct skc_scheduler * const scheduler = cohort->impl->runtime->scheduler;
472
473 // as quickly as possible, enqueue next stage in pipeline to context command scheduler
474 SKC_SCHEDULER_SCHEDULE(scheduler,skc_raster_cohort_prefix_release,grid);
475 }
476
477 //
478 //
479 //
480
481 #if 0
482 static
483 int cmp64(const void * ptr_a, const void * ptr_b)
484 {
485 skc_ulong const a = *(const skc_ulong *)ptr_a;
486 skc_ulong const b = *(const skc_ulong *)ptr_b;
487
488 if (a < b) return -1;
489 if (a > b) return +1;
490 else return 0;
491 }
492 #endif
493
494 //
495 //
496 //
497
498 static
499 void
skc_raster_cohort_sort_prefix(skc_grid_t const grid)500 skc_raster_cohort_sort_prefix(skc_grid_t const grid)
501 {
502 //
503 // ALLOCATED RESOURCES
504 //
505 // path_ids i
506 // raster_ids i
507 // transforms a
508 // clips a
509 // fill_cmds -
510 // cq a
511 // cohort atomics a
512 // cmds a
513 // keys a
514 // meta -
515 //
516
517 // use the backpointers
518 struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
519 struct skc_raster_builder_impl * const impl = cohort->impl;
520 struct skc_runtime * const runtime = impl->runtime;
521
522 // release transforms
523 skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->transforms);
524
525 // release clips
526 skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->clips);
527
528 // release expanded cmds
529 skc_extent_tdrw_free(runtime,&cohort->cmds);
530
531 // alloc the snapshost -- could be zero-sized
532 skc_extent_phrwg_tdrNs_snap_alloc(runtime,
533 &impl->raster_ids,
534 &cohort->raster_ids,
535 cohort->cq,NULL);
536
537 // will never be zero
538 skc_uint const rasters = skc_extent_ring_snap_count(cohort->raster_ids.snap);
539
540 // acquire fixed-size device-side extent
541 skc_extent_tdrw_alloc(runtime,
542 &cohort->metas,
543 sizeof(struct skc_raster_cohort_meta));
544
545 // skc_extent_thrw_tdrw_alloc(runtime,
546 // &cohort->metas,
547 // sizeof(struct skc_raster_cohort_meta));
548
549 // zero the metas
550 skc_extent_tdrw_zero(&cohort->metas,cohort->cq,NULL);
551
552 // get the read-only host copy of the device atomics
553 struct skc_raster_cohort_atomic const * const atomics = cohort->atomics.hr;
554
555 //
556 // SORT
557 //
558 if (atomics->keys > 0)
559 {
560 #ifndef NDEBUG
561 fprintf(stderr,"raster cohort sort: %u\n",atomics->keys);
562 #endif
563
564 //
565 //
566 //
567 uint32_t keys_padded_in, keys_padded_out;
568
569 hs_cl_pad(runtime->hs,atomics->keys,&keys_padded_in,&keys_padded_out);
570
571 hs_cl_sort(runtime->hs,
572 cohort->cq,
573 0,NULL,NULL,
574 cohort->keys.drw,
575 NULL,
576 atomics->keys,
577 keys_padded_in,
578 keys_padded_out,
579 false);
580
581 cl(SetKernelArg(impl->kernels.segment,0,SKC_CL_ARG(cohort->keys.drw)));
582 cl(SetKernelArg(impl->kernels.segment,1,SKC_CL_ARG(cohort->metas.drw)));
583
584 #ifndef NDEBUG
585 fprintf(stderr,"post-sort\n");
586 #endif
587
588 // find start of each tile
589 skc_device_enqueue_kernel(runtime->device,
590 SKC_DEVICE_KERNEL_ID_SEGMENT_TTRK,
591 cohort->cq,
592 impl->kernels.segment,
593 atomics->keys,
594 0,NULL,NULL);
595
596 #ifndef NDEBUG
597 fprintf(stderr,"post-segment\n");
598 #endif
599
600 //
601 // DELETE ALL THIS WHEN READY
602 //
603
604 #if 0
605 //
606 //
607 //
608 cl(Finish(cohort->cq));
609
610 // map keys to host
611 union skc_ttrk * const keys = skc_extent_thrw_tdrw_map(&cohort->keys,
612 cohort->cq,
613 NULL);
614 // map meta to host
615 struct skc_raster_cohort_meta * const metas = skc_extent_thrw_tdrw_map(&cohort->metas,
616 cohort->cq,
617 NULL);
618 // block until done
619 cl(Finish(cohort->cq));
620
621 // sort keys
622 qsort(keys,atomics->keys,sizeof(*keys),cmp64);
623
624 // mask to determine if rk id is a new block
625 skc_uint const subblock_mask = runtime->config->block.subblocks - 1;
626
627 //
628 // some counters
629 //
630 union skc_raster_cohort_meta_in meta_in = {
631 .blocks = 0,
632 .offset = 0,
633 .pk = 0,
634 .rk = 0
635 };
636
637 // get first key
638 union skc_ttrk curr = keys[0];
639
640 skc_uint ii=0, jj=0;
641
642 // for all TTRK keys
643 while (true)
644 {
645 // increment ttrk count
646 meta_in.rk += 1;
647
648 // was this a new block?
649 if ((curr.u32v2.lo & subblock_mask) == 0)
650 meta_in.blocks += 1;
651
652 // break if we're out of keys
653 if (++ii >= atomics->keys)
654 break;
655
656 // otherwise, process next key
657 union skc_ttrk const next = keys[ii];
658
659 // if new cohort then save curr meta and init next meta
660 if (next.cohort != curr.cohort)
661 {
662 fprintf(stderr,"[ %u, %u, %u, %u ]\n",
663 meta_in.blocks,
664 meta_in.offset,
665 meta_in.pk,
666 meta_in.rk);
667
668 // store back to buffer
669 metas->inout[curr.cohort].in = meta_in;
670
671 // update meta_in
672 meta_in.blocks = 0;
673 meta_in.offset = ii;
674 meta_in.pk = 0;
675 meta_in.rk = 0;
676 }
677 // otherwise, if same y but new x then increment TTPK count
678 else if ((next.y == curr.y) && (next.x != curr.x))
679 {
680 meta_in.pk += 1;
681
682 #if 0
683 fprintf(stderr,"%3u : %3u : ( %3u, %3u ) -> ( %3u )\n",
684 jj++,curr.cohort,curr.y,curr.x,next.x);
685 #endif
686 }
687
688 #if 0
689 fprintf(stderr,"( %3u, %3u )\n",next.y,next.x);
690 #endif
691
692 curr = next;
693 }
694
695 fprintf(stderr,"[ %u, %u, %u, %u ]\n",
696 meta_in.blocks,
697 meta_in.offset,
698 meta_in.pk,
699 meta_in.rk);
700
701 // store back to buffer
702 metas->inout[curr.cohort].in = meta_in;
703
704
705 // unmap
706 skc_extent_thrw_tdrw_unmap(&cohort->keys,
707 keys,
708 cohort->cq,
709 NULL);
710
711 // unmap
712 skc_extent_thrw_tdrw_unmap(&cohort->metas,
713 metas,
714 cohort->cq,
715 NULL);
716 #endif
717 }
718
719 #ifndef NDEBUG
720 fprintf(stderr,"rasters_alloc: %u\n",rasters);
721 #endif
722
723 //
724 // RASTER ALLOC/INIT
725 //
726 cl(SetKernelArg(impl->kernels.rasters_alloc,0,SKC_CL_ARG(runtime->block_pool.atomics.drw)));
727 cl(SetKernelArg(impl->kernels.rasters_alloc,1,SKC_CL_ARG(runtime->block_pool.ids.drw)));
728 cl(SetKernelArg(impl->kernels.rasters_alloc,2,SKC_CL_ARG(runtime->block_pool.size->ring_mask)));
729 cl(SetKernelArg(impl->kernels.rasters_alloc,3,SKC_CL_ARG(runtime->handle_pool.map.drw)));
730 cl(SetKernelArg(impl->kernels.rasters_alloc,4,SKC_CL_ARG(cohort->metas.drw)));
731 cl(SetKernelArg(impl->kernels.rasters_alloc,5,SKC_CL_ARG(cohort->raster_ids.drN)));
732 cl(SetKernelArg(impl->kernels.rasters_alloc,6,SKC_CL_ARG(rasters)));
733
734 skc_device_enqueue_kernel(runtime->device,
735 SKC_DEVICE_KERNEL_ID_RASTERS_ALLOC,
736 cohort->cq,
737 impl->kernels.rasters_alloc,
738 rasters,
739 0,NULL,NULL);
740
741 #ifndef NDEBUG
742 fprintf(stderr,"post-alloc\n");
743 #endif
744
745 //
746 // PREFIX
747 //
748 cl(SetKernelArg(impl->kernels.prefix,0,SKC_CL_ARG(runtime->block_pool.atomics.drw)));
749 cl(SetKernelArg(impl->kernels.prefix,1,SKC_CL_ARG(runtime->block_pool.ids.drw)));
750 cl(SetKernelArg(impl->kernels.prefix,2,SKC_CL_ARG(runtime->block_pool.blocks.drw)));
751 cl(SetKernelArg(impl->kernels.prefix,3,SKC_CL_ARG(runtime->block_pool.size->ring_mask)));
752
753 cl(SetKernelArg(impl->kernels.prefix,4,SKC_CL_ARG(cohort->keys.drw)));
754 cl(SetKernelArg(impl->kernels.prefix,5,SKC_CL_ARG(runtime->handle_pool.map.drw)));
755
756 cl(SetKernelArg(impl->kernels.prefix,6,SKC_CL_ARG(cohort->metas.drw)));
757 cl(SetKernelArg(impl->kernels.prefix,7,SKC_CL_ARG(rasters)));
758
759 cl_event complete;
760
761 skc_device_enqueue_kernel(runtime->device,
762 SKC_DEVICE_KERNEL_ID_PREFIX,
763 cohort->cq,
764 impl->kernels.prefix,
765 rasters,
766 0,NULL,
767 &complete);
768
769 cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_prefix_cb,grid));
770 cl(ReleaseEvent(complete));
771
772 #ifndef NDEBUG
773 fprintf(stderr,"post-prefix\n");
774 #endif
775
776 // flush command queue
777 cl(Flush(cohort->cq));
778
779 //
780 // ALLOCATED RESOURCES
781 //
782 // path_ids a
783 // raster_ids a
784 // transforms -
785 // clips -
786 // fill_cmds -
787 // cq a
788 // cohort atomics a
789 // cmds -
790 // keys a
791 // meta a
792 //
793 }
794
795 static
796 void
skc_raster_cohort_rasterize_cb(cl_event event,cl_int status,skc_grid_t const grid)797 skc_raster_cohort_rasterize_cb(cl_event event, cl_int status, skc_grid_t const grid)
798 {
799 SKC_CL_CB(status);
800
801 struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
802
803 // as quickly as possible, enqueue next stage in pipeline to context command scheduler
804 SKC_SCHEDULER_SCHEDULE(cohort->impl->runtime->scheduler,skc_raster_cohort_sort_prefix,grid);
805 }
806
807 static
808 void
skc_raster_cohort_rasterize(skc_grid_t const grid)809 skc_raster_cohort_rasterize(skc_grid_t const grid)
810 {
811 //
812 // ALLOCATED RESOURCES
813 //
814 // path_ids i
815 // raster_ids i
816 // transforms i
817 // clips i
818 // fill_cmds s
819 // cq a
820 // cohort atomics a
821 // cmds a
822 // cmds_quad a
823 // cmds_cubic a
824 // keys -
825 // meta -
826
827 // use the backpointers
828 struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
829 struct skc_raster_builder_impl * const impl = cohort->impl;
830 struct skc_runtime * const runtime = impl->runtime;
831
832 //
833 // RELEASED RESOURCES
834 //
835 // cmds snap
836 //
837
838 // release the cmds extent and snap since it's only used by the expand stage
839 skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->fill_cmds);
840
841 //
842 // NEW ALLOCATED RESOURCES
843 //
844 // transforms snap
845 // clips snap
846 // ttrk keys
847 //
848 skc_extent_phw1g_tdrNs_snap_alloc(runtime,
849 &impl->transforms,
850 &cohort->transforms,
851 cohort->cq,NULL);
852
853 skc_extent_phw1g_tdrNs_snap_alloc(runtime,
854 &impl->clips,
855 &cohort->clips,
856 cohort->cq,NULL);
857
858 // acquire device-side extent
859 skc_extent_tdrw_alloc(runtime,
860 &cohort->keys,
861 sizeof(union skc_ttrk) * runtime->config->raster_cohort.rasterize.keys);
862
863 // skc_extent_thrw_tdrw_alloc(runtime,
864 // &cohort->keys,
865 // sizeof(union skc_ttrk) * runtime->config->raster_cohort.rasterize.keys);
866
867 //
868 // acquire out-of-order command queue
869 //
870 // and launch up to 3 kernels
871 //
872 // for each kernel:
873 //
874 // set runtime "global" kernel args:
875 //
876 // - block pool atomics
877 // - block pool extent
878 //
879 // set cohort "local" kernel args:
880 //
881 // - atomics
882 // - cmds
883 //
884 // enqueue barrier
885 // enqueue copy back of atomics on the command queue
886 // set callback on copy back event
887 // release command queue
888 //
889 struct skc_raster_cohort_atomic const * const atomics = cohort->atomics.hr;
890
891 if (atomics->cmds > 0)
892 {
893 cl(SetKernelArg(impl->kernels.rasterize_all,0,SKC_CL_ARG(runtime->block_pool.atomics.drw)));
894 cl(SetKernelArg(impl->kernels.rasterize_all,1,SKC_CL_ARG(runtime->block_pool.blocks.drw)));
895 cl(SetKernelArg(impl->kernels.rasterize_all,2,SKC_CL_ARG(runtime->block_pool.ids.drw)));
896 cl(SetKernelArg(impl->kernels.rasterize_all,3,SKC_CL_ARG(runtime->block_pool.size->ring_mask)));
897
898 cl(SetKernelArg(impl->kernels.rasterize_all,4,SKC_CL_ARG(cohort->atomics.drw)));
899 cl(SetKernelArg(impl->kernels.rasterize_all,5,SKC_CL_ARG(cohort->keys.drw)));
900
901 cl(SetKernelArg(impl->kernels.rasterize_all,6,SKC_CL_ARG(cohort->transforms.drN)));
902 cl(SetKernelArg(impl->kernels.rasterize_all,7,SKC_CL_ARG(cohort->clips.drN)));
903 cl(SetKernelArg(impl->kernels.rasterize_all,8,SKC_CL_ARG(cohort->cmds.drw)));
904 cl(SetKernelArg(impl->kernels.rasterize_all,9,SKC_CL_ARG(atomics->cmds)));
905
906 skc_device_enqueue_kernel(runtime->device,
907 SKC_DEVICE_KERNEL_ID_RASTERIZE_ALL,
908 cohort->cq,
909 impl->kernels.rasterize_all,
910 atomics->cmds,
911 0,NULL,NULL);
912 }
913
914 //
915 // copyback number of TTSK keys
916 //
917 cl_event complete;
918
919 skc_extent_thr_tdrw_read(&cohort->atomics,cohort->cq,&complete);
920
921 cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_rasterize_cb,grid));
922 cl(ReleaseEvent(complete));
923
924 // flush command queue
925 cl(Flush(cohort->cq));
926
927 //
928 // ALLOCATED RESOURCES
929 //
930 // path_ids i
931 // raster_ids i
932 // transforms a
933 // clips a
934 // fill_cmds -
935 // cq a
936 // cohort atomics a
937 // cmds a
938 // keys a
939 // meta -
940 }
941
942 static
943 void
skc_raster_cohort_fills_expand_cb(cl_event event,cl_int status,skc_grid_t const grid)944 skc_raster_cohort_fills_expand_cb(cl_event event, cl_int status, skc_grid_t const grid)
945 {
946 SKC_CL_CB(status);
947
948 struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
949
950 // as quickly as possible, enqueue next stage in pipeline to context command scheduler
951 SKC_SCHEDULER_SCHEDULE(cohort->impl->runtime->scheduler,skc_raster_cohort_rasterize,grid);
952 }
953
954 static
955 void
skc_raster_builder_cohort_grid_pfn_execute(skc_grid_t const grid)956 skc_raster_builder_cohort_grid_pfn_execute(skc_grid_t const grid)
957 {
958 //
959 // ALLOCATED RESOURCES
960 //
961 // path_ids i
962 // raster_ids i
963 // transforms i
964 // clips i
965 // fill_cmds i
966 // cq -
967 // cohort atomics -
968 // cmds -
969 // keys -
970 // meta -
971 //
972
973 // allocate the cohort
974 struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
975
976 // get impl
977 struct skc_raster_builder_impl * const impl = cohort->impl;
978 struct skc_runtime * const runtime = impl->runtime;
979
980 // acquire in-order cq
981 cohort->cq = skc_runtime_acquire_cq_in_order(runtime);
982
983 // alloc the snapshot -- could be zero-sized
984 skc_extent_phw1g_tdrNs_snap_alloc(runtime,
985 &impl->fill_cmds,
986 &cohort->fill_cmds,
987 cohort->cq,NULL);
988
989 // flush the cq to get the fill running
990 // cl(Flush(cohort->cq));
991
992 // create split atomics
993 skc_extent_thr_tdrw_alloc(runtime,&cohort->atomics,sizeof(struct skc_raster_cohort_atomic));
994
995 // zero the atomics
996 skc_extent_thr_tdrw_zero(&cohort->atomics,cohort->cq,NULL);
997
998 // get config
999 struct skc_config const * const config = runtime->config;
1000
1001 // acquire device-side extents
1002 skc_extent_tdrw_alloc(runtime,
1003 &cohort->cmds,
1004 sizeof(union skc_cmd_rasterize) * config->raster_cohort.expand.cmds);
1005
1006 //
1007 // FILLS EXPAND
1008 //
1009 // need result of cmd counts before launching RASTERIZE grids
1010 //
1011 // - OpenCL 1.2: copy atomic counters back to host and launch RASTERIZE grids from host
1012 // - OpenCL 2.x: have a kernel size and launch RASTERIZE grids from device
1013 // - or launch a device-wide grid that feeds itself but that's unsatisfying
1014 //
1015
1016 // how many commands? could be zero
1017 skc_uint const work_size = skc_extent_ring_snap_count(cohort->fill_cmds.snap);
1018
1019 if (work_size > 0)
1020 {
1021 cl(SetKernelArg(impl->kernels.fills_expand,0,SKC_CL_ARG(impl->runtime->block_pool.blocks.drw)));
1022 cl(SetKernelArg(impl->kernels.fills_expand,1,SKC_CL_ARG(cohort->atomics.drw)));
1023 cl(SetKernelArg(impl->kernels.fills_expand,2,SKC_CL_ARG(runtime->handle_pool.map.drw)));
1024 cl(SetKernelArg(impl->kernels.fills_expand,3,SKC_CL_ARG(cohort->fill_cmds.drN)));
1025 cl(SetKernelArg(impl->kernels.fills_expand,4,SKC_CL_ARG(cohort->cmds.drw)));
1026
1027 skc_device_enqueue_kernel(runtime->device,
1028 SKC_DEVICE_KERNEL_ID_FILLS_EXPAND,
1029 cohort->cq,
1030 impl->kernels.fills_expand,
1031 work_size,
1032 0,NULL,NULL);
1033 }
1034
1035 //
1036 // copyback number of rasterization commands
1037 //
1038 cl_event complete;
1039
1040 skc_extent_thr_tdrw_read(&cohort->atomics,cohort->cq,&complete);
1041
1042 cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_fills_expand_cb,grid));
1043 cl(ReleaseEvent(complete));
1044
1045 // flush command queue
1046 cl(Flush(cohort->cq));
1047
1048 //
1049 // ALLOCATED RESOURCES
1050 //
1051 // path_ids i
1052 // raster_ids i
1053 // transforms i
1054 // clips i
1055 // fill_cmds s
1056 // cq a
1057 // cohort atomics a
1058 // cmds a
1059 // keys -
1060 // meta -
1061 //
1062 }
1063
1064 //
1065 // move grid into waiting state
1066 //
1067 // this entails allocating a cohort from the temporary extent
1068 //
1069
1070 static
1071 void
skc_raster_builder_cohort_grid_pfn_waiting(skc_grid_t const grid)1072 skc_raster_builder_cohort_grid_pfn_waiting(skc_grid_t const grid)
1073 {
1074 // get the impl
1075 struct skc_raster_builder_impl * const impl = skc_grid_get_data(grid);
1076 struct skc_runtime * const runtime = impl->runtime;
1077
1078 // retain the raster builder
1079 impl->raster_builder->refcount += 1;
1080
1081 // allocate the ephemeral/temp cohort
1082 skc_subbuf_id_t id;
1083
1084 struct skc_raster_cohort * const cohort =
1085 skc_runtime_host_temp_alloc(runtime,
1086 SKC_MEM_FLAGS_READ_WRITE,
1087 sizeof(*cohort),
1088 &id,
1089 NULL);
1090
1091 // save the id and backpointer
1092 cohort->id = id;
1093 cohort->impl = impl;
1094
1095 // set grid data -- replaces impl
1096 skc_grid_set_data(grid,cohort);
1097
1098 //
1099 // ACQUIRE RESOURCES FOR THE COHORT
1100 //
1101
1102 struct skc_raster_builder * const raster_builder = impl->raster_builder;
1103
1104 // immediately take snapshots of all rings -- these are very inexpensive operations
1105 skc_extent_phrwg_thr1s_snap_init(runtime,&raster_builder->path_ids .ring,&cohort->path_ids);
1106 skc_extent_phw1g_tdrNs_snap_init(runtime,&raster_builder->transforms.ring,&cohort->transforms);
1107 skc_extent_phw1g_tdrNs_snap_init(runtime,&raster_builder->clips .ring,&cohort->clips);
1108 skc_extent_phw1g_tdrNs_snap_init(runtime,&raster_builder->fill_cmds .ring,&cohort->fill_cmds);
1109 skc_extent_phrwg_tdrNs_snap_init(runtime,&raster_builder->raster_ids.ring,&cohort->raster_ids);
1110
1111 //
1112 // ALLOCATED RESOURCES
1113 //
1114 // path_ids i
1115 // raster_ids i
1116 // transforms i
1117 // clips i
1118 // fill_cmds i
1119 // cq -
1120 // cohort atomics -
1121 // cmds -
1122 // keys -
1123 // meta -
1124 //
1125 }
1126
1127 //
1128 //
1129 //
1130
1131 static
1132 void
skc_raster_builder_cohort_create(struct skc_raster_builder_impl * const impl)1133 skc_raster_builder_cohort_create(struct skc_raster_builder_impl * const impl)
1134 {
1135 // attach a grid
1136 impl->cohort = SKC_GRID_DEPS_ATTACH(impl->runtime->deps,
1137 &impl->cohort,
1138 impl,
1139 skc_raster_builder_cohort_grid_pfn_waiting,
1140 skc_raster_builder_cohort_grid_pfn_execute,
1141 skc_raster_builder_cohort_grid_pfn_dispose);
1142 }
1143
1144 //
1145 //
1146 //
1147
1148 static
1149 skc_err
skc_raster_builder_pfn_add(struct skc_raster_builder_impl * const impl,skc_path_t const * paths,skc_uint count)1150 skc_raster_builder_pfn_add(struct skc_raster_builder_impl * const impl,
1151 skc_path_t const * paths,
1152 skc_uint count)
1153 {
1154 // validate and retain the path
1155 skc_err err;
1156
1157 err = skc_runtime_handle_device_validate_retain(impl->runtime,
1158 SKC_TYPED_HANDLE_TYPE_IS_PATH,
1159 paths,
1160 count);
1161
1162 if (err)
1163 return err;
1164
1165 skc_runtime_handle_device_retain(impl->runtime,paths,count);
1166
1167 // make sure there is a grid
1168 if (impl->cohort == NULL) {
1169 skc_raster_builder_cohort_create(impl);
1170 }
1171
1172 // declare rasterization grid happens after path
1173 while (count-- > 0)
1174 skc_grid_happens_after_handle(impl->cohort,SKC_TYPED_HANDLE_TO_HANDLE(*paths++));
1175
1176 return SKC_ERR_SUCCESS;
1177 }
1178
1179 //
1180 //
1181 //
1182
1183 static
1184 void
skc_raster_builder_pfn_end(struct skc_raster_builder_impl * const impl,skc_raster_t * const raster)1185 skc_raster_builder_pfn_end(struct skc_raster_builder_impl * const impl, skc_raster_t * const raster)
1186 {
1187 //
1188 // acquire host-managed path raster handle and bump reference count
1189 // to 2 handles will be released (reduced to 1) once the rasters are
1190 // completely rasterized
1191 //
1192 *raster = skc_runtime_handle_device_acquire(impl->runtime);
1193
1194 // make sure there is a grid
1195 if (impl->cohort == NULL) {
1196 skc_raster_builder_cohort_create(impl);
1197 }
1198
1199 // map a handle to a grid
1200 skc_grid_map(impl->cohort,*raster);
1201 }
1202
1203 //
1204 // snapshot the ring and lazily start the grid
1205 //
1206 // FIXME -- might want to revisit this and settle on an even more
1207 // opaque implementation. Some options:
1208 //
1209 // - never let the SKC API expose a forced grid start
1210 // - make snapshots kick off a forced grid start
1211 // - be lazy all the time everywhere
1212 //
1213
1214 static
1215 void
skc_raster_builder_pfn_start(struct skc_raster_builder_impl * const impl)1216 skc_raster_builder_pfn_start(struct skc_raster_builder_impl * const impl)
1217 {
1218 skc_grid_t const cohort = impl->cohort;
1219
1220 if (cohort != NULL) {
1221 skc_grid_start(cohort);
1222 }
1223 }
1224
1225 //
1226 // NOTE: THIS MIGHT BE REMOVED
1227 //
1228
1229 static
1230 void
skc_raster_builder_pfn_force(struct skc_raster_builder_impl * const impl)1231 skc_raster_builder_pfn_force(struct skc_raster_builder_impl * const impl)
1232 {
1233 skc_grid_t const cohort = impl->cohort;
1234
1235 if (cohort != NULL) {
1236 skc_grid_force(cohort);
1237 }
1238 }
1239
1240 //
1241 //
1242 //
1243
1244 skc_err
skc_raster_builder_cl_12_create(struct skc_context * const context,struct skc_raster_builder ** const raster_builder)1245 skc_raster_builder_cl_12_create(struct skc_context * const context,
1246 struct skc_raster_builder * * const raster_builder)
1247 {
1248 struct skc_runtime * const runtime = context->runtime;
1249
1250 // allocate raster builder
1251 (*raster_builder) = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(**raster_builder));
1252
1253 // refcount
1254 (*raster_builder)->refcount = 1;
1255
1256 // state
1257 SKC_ASSERT_STATE_INIT((*raster_builder),SKC_RASTER_BUILDER_STATE_READY);
1258
1259 // allocate runtime raster builder
1260 struct skc_raster_builder_impl * const impl = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(*impl));
1261
1262 // save the impl
1263 (*raster_builder)->impl = impl;
1264
1265 // intialize impl
1266 impl->raster_builder = (*raster_builder);
1267 impl->runtime = runtime;
1268 impl->cohort = NULL;
1269
1270 // get config
1271 struct skc_config const * const config = runtime->config;
1272
1273 skc_extent_phrwg_thr1s_alloc(runtime,&impl->path_ids ,sizeof(skc_path_t ) * config->raster_cohort.path_ids .elem_count);
1274 skc_extent_phw1g_tdrNs_alloc(runtime,&impl->transforms,sizeof(union skc_transform) * config->raster_cohort.transforms.elem_count);
1275 skc_extent_phw1g_tdrNs_alloc(runtime,&impl->clips ,sizeof(union skc_path_clip) * config->raster_cohort.clips .elem_count);
1276 skc_extent_phw1g_tdrNs_alloc(runtime,&impl->fill_cmds ,sizeof(union skc_cmd_fill ) * config->raster_cohort.fill .elem_count);
1277 skc_extent_phrwg_tdrNs_alloc(runtime,&impl->raster_ids,sizeof(skc_raster_t ) * config->raster_cohort.raster_ids.elem_count);
1278
1279 // retain the context
1280 //skc_context_retain(context);
1281
1282 (*raster_builder)->context = context;
1283
1284 (*raster_builder)->add = skc_raster_builder_pfn_add;
1285 (*raster_builder)->end = skc_raster_builder_pfn_end;
1286 (*raster_builder)->start = skc_raster_builder_pfn_start;
1287 (*raster_builder)->force = skc_raster_builder_pfn_force;
1288 (*raster_builder)->release = skc_raster_builder_pfn_release;
1289
1290 // initialize raster builder with host-writable buffers
1291 (*raster_builder)->path_ids .extent = impl->path_ids.hrw;
1292 (*raster_builder)->transforms.extent = impl->transforms.hw1;
1293 (*raster_builder)->clips .extent = impl->clips.hw1;
1294 (*raster_builder)->fill_cmds .extent = impl->fill_cmds.hw1;
1295 (*raster_builder)->raster_ids.extent = impl->raster_ids.hrw;
1296
1297 //
1298 // the rings perform bookkeeping on the extents
1299 //
1300 // the ring snapshotting and checkpointing are necessary because
1301 // another part of the API can _force_ the raster cohort to flush
1302 // its work-in-progress commands but only up to a checkpointed
1303 // boundary
1304 //
1305 skc_extent_ring_init(&(*raster_builder)->path_ids.ring,
1306 config->raster_cohort.path_ids.elem_count,
1307 config->raster_cohort.path_ids.snap_count,
1308 sizeof(skc_path_t));
1309
1310 skc_extent_ring_init(&(*raster_builder)->transforms.ring,
1311 config->raster_cohort.transforms.elem_count,
1312 config->raster_cohort.transforms.snap_count,
1313 sizeof(union skc_transform));
1314
1315 skc_extent_ring_init(&(*raster_builder)->clips.ring,
1316 config->raster_cohort.clips.elem_count,
1317 config->raster_cohort.clips.snap_count,
1318 sizeof(union skc_path_clip));
1319
1320 skc_extent_ring_init(&(*raster_builder)->fill_cmds.ring,
1321 config->raster_cohort.fill.elem_count,
1322 config->raster_cohort.fill.snap_count,
1323 sizeof(union skc_cmd_fill));
1324
1325 skc_extent_ring_init(&(*raster_builder)->raster_ids.ring,
1326 config->raster_cohort.raster_ids.elem_count,
1327 config->raster_cohort.raster_ids.snap_count,
1328 sizeof(skc_raster_t));
1329
1330 //
1331 // acquire kernels
1332 //
1333 impl->kernels.fills_expand = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_FILLS_EXPAND);
1334 impl->kernels.rasterize_all = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_ALL);
1335
1336 #if 0
1337 impl->kernels.rasterize_lines = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_LINES);
1338 impl->kernels.rasterize_quads = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_QUADS);
1339 impl->kernels.rasterize_cubics = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_CUBICS);
1340 #endif
1341
1342 impl->kernels.segment = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_SEGMENT_TTRK);
1343 impl->kernels.rasters_alloc = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERS_ALLOC);
1344 impl->kernels.prefix = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_PREFIX);
1345
1346 return SKC_ERR_SUCCESS;
1347 }
1348
1349 //
1350 //
1351 //
1352