• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can
5  * be found in the LICENSE file.
6  *
7  */
8 
9 //
10 //
11 //
12 
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <assert.h>
17 
18 //
19 //
20 //
21 
22 #include "context.h"
23 #include "block.h"
24 #include "grid.h"
25 #include "common/cl/assert_cl.h"
26 #include "config_cl.h"
27 #include "runtime_cl_12.h"
28 #include "export_cl_12.h"
29 
30 //
31 //
32 //
33 
34 static
35 void
skc_block_pool_create(struct skc_runtime * const runtime,cl_command_queue cq)36 skc_block_pool_create(struct skc_runtime * const runtime, cl_command_queue cq)
37 {
38   // save size
39   runtime->block_pool.size = &runtime->config->block_pool;
40 
41   // create block extent
42   skc_extent_pdrw_alloc(runtime,
43                         &runtime->block_pool.blocks,
44                         runtime->block_pool.size->pool_size *
45                         runtime->config->block.bytes);
46 
47   // allocate block pool ids
48   skc_extent_pdrw_alloc(runtime,
49                         &runtime->block_pool.ids,
50                         runtime->block_pool.size->ring_pow2 * sizeof(skc_uint));
51 
52   // allocate block pool atomics
53   skc_extent_phr_pdrw_alloc(runtime,
54                             &runtime->block_pool.atomics,
55                             sizeof(union skc_block_pool_atomic));
56 
57   // acquire pool id and atomic initialization kernels
58   cl_kernel k0 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS);
59   cl_kernel k1 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS);
60 
61   // init ids
62   cl(SetKernelArg(k0,0,sizeof(runtime->block_pool.ids.drw),&runtime->block_pool.ids.drw));
63   cl(SetKernelArg(k0,1,SKC_CL_ARG(runtime->block_pool.size->pool_size)));
64 
65   // the kernel grid is shaped by the target device -- always 2 for atomics
66   skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS,
67                             cq,k0,runtime->block_pool.size->pool_size,
68                             0,NULL,NULL);
69 
70   // init atomics
71   cl(SetKernelArg(k1,0,sizeof(runtime->block_pool.atomics.drw),&runtime->block_pool.atomics.drw));
72   cl(SetKernelArg(k1,1,SKC_CL_ARG(runtime->block_pool.size->pool_size)));
73 
74   // the kernel grid is shaped by the target device
75   skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS,
76                             cq,k1,2,
77                             0,NULL,NULL);
78 
79   // kickstart kernel execution
80   cl(Flush(cq));
81 
82   // release kernels
83   cl(ReleaseKernel(k0));
84   cl(ReleaseKernel(k1));
85 }
86 
87 static
88 void
skc_block_pool_dispose(struct skc_runtime * const runtime)89 skc_block_pool_dispose(struct skc_runtime * const runtime)
90 {
91   skc_extent_phr_pdrw_free(runtime,&runtime->block_pool.atomics);
92   skc_extent_pdrw_free    (runtime,&runtime->block_pool.ids);
93   skc_extent_pdrw_free    (runtime,&runtime->block_pool.blocks);
94 }
95 
96 //
97 //
98 //
99 
100 static
101 bool
skc_runtime_yield(struct skc_runtime * const runtime)102 skc_runtime_yield(struct skc_runtime * const runtime)
103 {
104   return skc_scheduler_yield(runtime->scheduler);
105 }
106 
107 static
108 void
skc_runtime_wait(struct skc_runtime * const runtime)109 skc_runtime_wait(struct skc_runtime * const runtime)
110 {
111   skc_scheduler_wait(runtime->scheduler);
112 }
113 
114 //
115 //
116 //
117 
118 skc_err
skc_runtime_cl_12_create(struct skc_context * const context,cl_context context_cl,cl_device_id device_id_cl)119 skc_runtime_cl_12_create(struct skc_context * const context,
120                          cl_context                 context_cl,
121                          cl_device_id               device_id_cl)
122 {
123   // allocate the runtime
124   struct skc_runtime * const runtime = malloc(sizeof(*runtime));
125 
126   // save off CL objects
127   runtime->cl.context   = context_cl;
128   runtime->cl.device_id = device_id_cl;
129 
130   // query device alignment
131   cl_uint align_bits;
132 
133   cl(GetDeviceInfo(device_id_cl,
134                    CL_DEVICE_MEM_BASE_ADDR_ALIGN,
135                    sizeof(align_bits),
136                    &align_bits,
137                    NULL));
138 
139   runtime->cl.align_bytes = align_bits / 8;
140 
141   // create device
142   skc_device_create(runtime);
143 
144   // create the host and device allocators
145   skc_allocator_host_create(runtime);
146   skc_allocator_device_create(runtime);
147 
148   // how many slots in the scheduler?
149   runtime->scheduler = skc_scheduler_create(runtime,runtime->config->scheduler.size);
150 
151   // allocate deps structure
152   runtime->deps      = skc_grid_deps_create(runtime,
153                                             runtime->scheduler,
154                                             runtime->config->block_pool.pool_size);
155 
156   // initialize cq pool
157   skc_cq_pool_create(runtime,
158                      &runtime->cq_pool,
159                      runtime->config->cq_pool.cq_props,
160                      runtime->config->cq_pool.size);
161 
162   // acquire in-order cq
163   cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime);
164 
165   // initialize block pool
166   skc_block_pool_create(runtime,cq);
167 
168   // intialize handle pool
169   skc_handle_pool_create(runtime,
170                          &runtime->handle_pool,
171                          runtime->config->handle_pool.size,
172                          runtime->config->handle_pool.width,
173                          runtime->config->handle_pool.recs);
174 
175   //
176   // initialize pfns
177   //
178   // FIXME -- at this point we will have identified which device we've
179   // targeted and will load a DLL (or select from a built-in library)
180   // that contains all the pfns.
181   //
182   context->runtime        = runtime;
183 
184   context->yield          = skc_runtime_yield;
185   context->wait           = skc_runtime_wait;
186 
187   context->path_builder   = skc_path_builder_cl_12_create;
188   context->path_retain    = skc_runtime_path_host_retain;
189   context->path_release   = skc_runtime_path_host_release;
190   context->path_flush     = skc_runtime_path_host_flush;
191 
192   context->raster_builder = skc_raster_builder_cl_12_create;
193   context->raster_retain  = skc_runtime_raster_host_retain;
194   context->raster_release = skc_runtime_raster_host_release;
195   context->raster_flush   = skc_runtime_raster_host_flush;
196 
197   context->composition    = skc_composition_cl_12_create;
198   context->styling        = skc_styling_cl_12_create;
199 
200   context->surface        = skc_surface_cl_12_create;
201 
202   // block on pool creation
203   cl(Finish(cq));
204 
205   // dispose of in-order cq
206   skc_runtime_release_cq_in_order(runtime,cq);
207 
208   return SKC_ERR_SUCCESS;
209 };
210 
211 //
212 //
213 //
214 
215 skc_err
skc_runtime_cl_12_dispose(struct skc_context * const context)216 skc_runtime_cl_12_dispose(struct skc_context * const context)
217 {
218   //
219   // FIXME -- incomplete
220   //
221   fprintf(stderr,"%s incomplete!\n",__func__);
222 
223   struct skc_runtime * runtime = context->runtime;
224 
225   skc_allocator_device_dispose(runtime);
226   skc_allocator_host_dispose(runtime);
227 
228   skc_scheduler_dispose(context->runtime,context->runtime->scheduler);
229 
230   skc_grid_deps_dispose(context->runtime->deps);
231 
232   skc_cq_pool_dispose(runtime,&runtime->cq_pool);
233 
234   skc_block_pool_dispose(context->runtime);
235 
236   // skc_handle_pool_dispose(context->runtime);
237 
238   return SKC_ERR_SUCCESS;
239 }
240 
241 //
242 // REPORT BLOCK POOL ALLOCATION
243 //
244 
245 void
skc_runtime_cl_12_debug(struct skc_context * const context)246 skc_runtime_cl_12_debug(struct skc_context * const context)
247 {
248   struct skc_runtime * const runtime = context->runtime;
249 
250   // acquire out-of-order cq
251   cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime);
252 
253   // copy atomics to host
254   skc_extent_phr_pdrw_read(&runtime->block_pool.atomics,cq,NULL);
255 
256   // block until complete
257   cl(Finish(cq));
258 
259   // dispose of out-of-order cq
260   skc_runtime_release_cq_in_order(runtime,cq);
261 
262   union skc_block_pool_atomic const * const bp_atomic = runtime->block_pool.atomics.hr;
263 
264   skc_uint const available = bp_atomic->writes - bp_atomic->reads;
265   skc_uint const inuse     = runtime->config->block_pool.pool_size - available;
266 
267   fprintf(stderr,
268           "writes/reads/avail/alloc: %9u / %9u / %9u = %6.2f MB / %9u = %6.2f MB\n",
269           bp_atomic->writes,
270           bp_atomic->reads,
271           available,
272           (available * runtime->config->block.bytes) / (1024.0*1024.0),
273           inuse,
274           (inuse     * runtime->config->block.bytes) / (1024.0*1024.0));
275 }
276 
277 //
278 //
279 //
280