1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <math.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #include <stdio.h>
11
12 #include <xnnpack.h>
13 #include <xnnpack/allocator.h>
14 #include <xnnpack/log.h>
15 #include <xnnpack/math.h>
16 #include <xnnpack/operator.h>
17 #include <xnnpack/params.h>
18 #include <xnnpack/subgraph.h>
19
20
xnn_create_runtime(xnn_subgraph_t subgraph,xnn_runtime_t * runtime_out)21 enum xnn_status xnn_create_runtime(
22 xnn_subgraph_t subgraph,
23 xnn_runtime_t* runtime_out)
24 {
25 return xnn_create_runtime_v2(subgraph, NULL /* threadpool */, 0 /* flags */, runtime_out);
26 }
27
xnn_create_runtime_v2(xnn_subgraph_t subgraph,pthreadpool_t threadpool,uint32_t flags,xnn_runtime_t * runtime_out)28 enum xnn_status xnn_create_runtime_v2(
29 xnn_subgraph_t subgraph,
30 pthreadpool_t threadpool,
31 uint32_t flags,
32 xnn_runtime_t* runtime_out)
33 {
34 struct xnn_runtime* runtime = NULL;
35 enum xnn_status status = xnn_status_uninitialized;
36
37 if (!xnn_params.initialized) {
38 xnn_log_error("failed to create runtime: XNNPACK is not initialized");
39 goto error;
40 }
41
42 status = xnn_status_out_of_memory;
43
44 runtime = xnn_allocate_zero_memory(sizeof(struct xnn_runtime));
45 if (runtime == NULL) {
46 xnn_log_error("failed to allocate %zu bytes for runtime descriptor", sizeof(struct xnn_runtime));
47 goto error;
48 }
49
50 runtime->ops = xnn_allocate_zero_memory(sizeof(struct xnn_operator_data) * subgraph->num_nodes);
51 if (runtime->ops == NULL) {
52 xnn_log_error("failed to allocate %zu bytes for opdata descriptors",
53 sizeof(struct xnn_operator_data) * subgraph->num_nodes);
54 goto error;
55 }
56 runtime->num_ops = subgraph->num_nodes;
57
58 struct xnn_value* values = subgraph->values;
59 for (size_t i = 0; i < subgraph->num_nodes; i++) {
60 const struct xnn_node* node = subgraph->nodes + i;
61 switch (node->type) {
62 case xnn_node_type_add2:
63 status = xnn_create_add_nd_f32(
64 node->activation.output_min,
65 node->activation.output_max,
66 node->flags,
67 &runtime->ops[i].op);
68 if (status != xnn_status_success) {
69 goto error;
70 }
71 runtime->ops[i].shape1.num_dims = values[node->inputs.raw[0]].shape.num_dims;
72 runtime->ops[i].shape2.num_dims = values[node->inputs.raw[1]].shape.num_dims;
73 memcpy(runtime->ops[i].shape1.dim, values[node->inputs.raw[0]].shape.dim, values[node->inputs.raw[0]].shape.num_dims * sizeof(size_t));
74 memcpy(runtime->ops[i].shape2.dim, values[node->inputs.raw[1]].shape.dim, values[node->inputs.raw[1]].shape.num_dims * sizeof(size_t));
75 runtime->ops[i].inputs[0] = node->inputs.raw[0];
76 runtime->ops[i].inputs[1] = node->inputs.raw[1];
77 runtime->ops[i].outputs[0] = node->outputs.raw[0];
78 break;
79 case xnn_node_type_convolution_2d:
80 status = xnn_create_convolution2d_nhwc_f32(
81 node->params.convolution_2d.input_padding_top,
82 node->params.convolution_2d.input_padding_right,
83 node->params.convolution_2d.input_padding_bottom,
84 node->params.convolution_2d.input_padding_left,
85 node->params.convolution_2d.kernel_height,
86 node->params.convolution_2d.kernel_width,
87 node->params.convolution_2d.subsampling_height,
88 node->params.convolution_2d.subsampling_width,
89 node->params.convolution_2d.dilation_height,
90 node->params.convolution_2d.dilation_width,
91 node->params.convolution_2d.groups,
92 node->params.convolution_2d.group_input_channels,
93 node->params.convolution_2d.group_output_channels,
94 node->params.convolution_2d.group_input_channels * node->params.convolution_2d.groups /* input_pixel_stride */,
95 node->params.convolution_2d.group_output_channels * node->params.convolution_2d.groups /* output_pixel_stride */,
96 values[node->inputs.convolution_2d.filter].data,
97 values[node->inputs.convolution_2d.bias].data,
98 node->activation.output_min,
99 node->activation.output_max,
100 node->flags,
101 &runtime->ops[i].op);
102 if (status != xnn_status_success) {
103 goto error;
104 }
105 runtime->ops[i].batch_size = values[node->inputs.raw[0]].shape.dim[0];
106 runtime->ops[i].input_height = values[node->inputs.raw[0]].shape.dim[1];
107 runtime->ops[i].input_width = values[node->inputs.raw[0]].shape.dim[2];
108 runtime->ops[i].inputs[0] = node->inputs.raw[0];
109 runtime->ops[i].outputs[0] = node->outputs.raw[0];
110 break;
111 case xnn_node_type_clamp:
112 status = xnn_create_clamp_nc_f32(
113 values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* channels */,
114 values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* input stride */,
115 values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* output stride */,
116 node->activation.output_min,
117 node->activation.output_max,
118 node->flags,
119 &runtime->ops[i].op);
120 if (status != xnn_status_success) {
121 goto error;
122 }
123 runtime->ops[i].batch_size = 1;
124 for (size_t i = 0; i + 1 < values[node->inputs.raw[0]].shape.num_dims; i++) {
125 runtime->ops[i].batch_size *= values[node->inputs.raw[0]].shape.dim[i];
126 }
127 runtime->ops[i].inputs[0] = node->inputs.raw[0];
128 runtime->ops[i].outputs[0] = node->outputs.raw[0];
129 break;
130 case xnn_node_type_depthwise_convolution_2d:
131 status = xnn_create_convolution2d_nhwc_f32(
132 node->params.depthwise_convolution_2d.input_padding_top,
133 node->params.depthwise_convolution_2d.input_padding_right,
134 node->params.depthwise_convolution_2d.input_padding_bottom,
135 node->params.depthwise_convolution_2d.input_padding_left,
136 node->params.depthwise_convolution_2d.kernel_height,
137 node->params.depthwise_convolution_2d.kernel_width,
138 node->params.depthwise_convolution_2d.subsampling_height,
139 node->params.depthwise_convolution_2d.subsampling_width,
140 node->params.depthwise_convolution_2d.dilation_height,
141 node->params.depthwise_convolution_2d.dilation_width,
142 node->params.depthwise_convolution_2d.input_channels /* groups */,
143 1 /* group_input_channels */,
144 node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
145 node->params.depthwise_convolution_2d.input_channels /* input_pixel_stride */,
146 node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_pixel_stride */,
147 values[node->inputs.convolution_2d.filter].data,
148 values[node->inputs.convolution_2d.bias].data,
149 node->activation.output_min,
150 node->activation.output_max,
151 node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
152 &runtime->ops[i].op);
153 if (status != xnn_status_success) {
154 goto error;
155 }
156 runtime->ops[i].batch_size = values[node->inputs.raw[0]].shape.dim[0];
157 runtime->ops[i].input_height = values[node->inputs.raw[0]].shape.dim[1];
158 runtime->ops[i].input_width = values[node->inputs.raw[0]].shape.dim[2];
159 runtime->ops[i].inputs[0] = node->inputs.raw[0];
160 runtime->ops[i].outputs[0] = node->outputs.raw[0];
161 break;
162 case xnn_node_type_hardswish:
163 status = xnn_create_hardswish_nc_f32(
164 values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* channels */,
165 values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* input stride */,
166 values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* output stride */,
167 node->flags,
168 &runtime->ops[i].op);
169 if (status != xnn_status_success) {
170 goto error;
171 }
172 runtime->ops[i].batch_size = 1;
173 for (size_t i = 0; i + 1 < values[node->inputs.raw[0]].shape.num_dims; i++) {
174 runtime->ops[i].batch_size *= values[node->inputs.raw[0]].shape.dim[i];
175 }
176 runtime->ops[i].inputs[0] = node->inputs.raw[0];
177 runtime->ops[i].outputs[0] = node->outputs.raw[0];
178 break;
179 case xnn_node_type_multiply2:
180 status = xnn_create_multiply_nd_f32(
181 node->activation.output_min,
182 node->activation.output_max,
183 node->flags,
184 &runtime->ops[i].op);
185 if (status != xnn_status_success) {
186 goto error;
187 }
188 runtime->ops[i].shape1.num_dims = values[node->inputs.raw[0]].shape.num_dims;
189 runtime->ops[i].shape2.num_dims = values[node->inputs.raw[1]].shape.num_dims;
190 memcpy(runtime->ops[i].shape1.dim, values[node->inputs.raw[0]].shape.dim, values[node->inputs.raw[0]].shape.num_dims * sizeof(size_t));
191 memcpy(runtime->ops[i].shape2.dim, values[node->inputs.raw[1]].shape.dim, values[node->inputs.raw[1]].shape.num_dims * sizeof(size_t));
192 runtime->ops[i].inputs[0] = node->inputs.raw[0];
193 runtime->ops[i].inputs[1] = node->inputs.raw[1];
194 runtime->ops[i].outputs[0] = node->outputs.raw[0];
195 break;
196 case xnn_node_type_prelu:
197 status = xnn_create_prelu_nc_f32(
198 values[node->inputs.raw[1]].shape.dim[values[node->inputs.raw[1]].shape.num_dims - 1] /* channels */,
199 values[node->inputs.raw[1]].shape.dim[values[node->inputs.raw[1]].shape.num_dims - 1] /* input stride */,
200 values[node->inputs.raw[1]].shape.dim[values[node->inputs.raw[1]].shape.num_dims - 1] /* output stride */,
201 values[node->inputs.raw[1]].data /* negative slope */,
202 -INFINITY,
203 +INFINITY,
204 node->flags,
205 &runtime->ops[i].op);
206 if (status != xnn_status_success) {
207 goto error;
208 }
209 runtime->ops[i].batch_size = 1;
210 for (size_t i = 0; i + 1 < values[node->inputs.raw[0]].shape.num_dims; i++) {
211 runtime->ops[i].batch_size *= values[node->inputs.raw[0]].shape.dim[i];
212 }
213 runtime->ops[i].inputs[0] = node->inputs.raw[0];
214 runtime->ops[i].outputs[0] = node->outputs.raw[0];
215 break;
216 case xnn_node_type_sigmoid:
217 status = xnn_create_sigmoid_nc_f32(
218 values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* channels */,
219 values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* input stride */,
220 values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* output stride */,
221 node->flags,
222 &runtime->ops[i].op);
223 if (status != xnn_status_success) {
224 goto error;
225 }
226 runtime->ops[i].batch_size = 1;
227 for (size_t i = 0; i + 1 < values[node->inputs.raw[0]].shape.num_dims; i++) {
228 runtime->ops[i].batch_size *= values[node->inputs.raw[0]].shape.dim[i];
229 }
230 runtime->ops[i].inputs[0] = node->inputs.raw[0];
231 runtime->ops[i].outputs[0] = node->outputs.raw[0];
232 break;
233 case xnn_node_type_softmax:
234 status = xnn_create_softmax_nc_f32(
235 values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* channels */,
236 values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* input stride */,
237 values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* output stride */,
238 node->flags,
239 &runtime->ops[i].op);
240 if (status != xnn_status_success) {
241 goto error;
242 }
243 runtime->ops[i].batch_size = 1;
244 for (size_t i = 0; i + 1 < values[node->inputs.raw[0]].shape.num_dims; i++) {
245 runtime->ops[i].batch_size *= values[node->inputs.raw[0]].shape.dim[i];
246 }
247 runtime->ops[i].inputs[0] = node->inputs.raw[0];
248 runtime->ops[i].outputs[0] = node->outputs.raw[0];
249 break;
250 case xnn_node_type_invalid:
251 xnn_log_fatal("unexpected node type %d in node #%zu", node->type, i);
252 XNN_UNREACHABLE;
253 break;
254 }
255 }
256
257 runtime->blobs = xnn_allocate_zero_memory(sizeof(struct xnn_blob) * subgraph->num_values);
258 if (runtime->blobs == NULL) {
259 xnn_log_error("failed to allocate %zu bytes for blob descriptors",
260 sizeof(struct xnn_blob) * subgraph->num_values);
261 goto error;
262 }
263 runtime->num_blobs = subgraph->num_values;
264
265 size_t buffer_size = 0;
266 for (size_t i = 0; i < subgraph->num_values; i++) {
267 const struct xnn_value* value = &subgraph->values[i];
268 struct xnn_blob* blob = &runtime->blobs[i];
269 if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
270 blob->size = xnn_tensor_get_size(subgraph, i);
271 blob->data = (void*) value->data;
272 if (blob->data == NULL) {
273 if ((value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) == 0) {
274 // Value is purely internal to the runtime, and must be allocated in its workspace.
275 buffer_size = round_up_po2(buffer_size + blob->size, XNN_EXTRA_BYTES);
276 } else {
277 // Value is non-static and external to the runtime: must be specified via a call to xnn_setup_runtime.
278 blob->external = true;
279 }
280 }
281 }
282 }
283
284 runtime->workspace = xnn_allocate_simd_memory(buffer_size);
285 if (runtime->workspace == NULL) {
286 xnn_log_error("failed to allocate %zu bytes to runtime workspace", buffer_size);
287 goto error;
288 }
289
290 size_t buffer_offset = 0;
291 for (size_t i = 0; i < subgraph->num_values; i++) {
292 const struct xnn_value* value = &subgraph->values[i];
293 struct xnn_blob* blob = &runtime->blobs[i];
294 if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
295 if (value->data == NULL && !blob->external) {
296 // Value is purely internal to the runtime, allocate it in the workspace.
297 blob->data = (void*) ((uintptr_t) runtime->workspace + buffer_offset);
298 buffer_offset = round_up_po2(buffer_offset + blob->size, XNN_EXTRA_BYTES);
299 }
300 }
301 }
302
303 runtime->threadpool = threadpool;
304
305 *runtime_out = runtime;
306 return xnn_status_success;
307
308 error:
309 xnn_delete_runtime(runtime);
310 return status;
311 }
312
xnn_setup_runtime(xnn_runtime_t runtime,size_t num_external_values,const struct xnn_external_value * external_values)313 enum xnn_status xnn_setup_runtime(
314 xnn_runtime_t runtime,
315 size_t num_external_values,
316 const struct xnn_external_value* external_values)
317 {
318 // Validate inputs without changing internal state.
319 // This ensures that runtime stays in consistent state in case validation fails midway.
320 for (size_t i = 0; i < num_external_values; i++) {
321 const struct xnn_external_value* external_value = &external_values[i];
322 const uint32_t value_id = external_value->id;
323 if (value_id >= runtime->num_blobs) {
324 xnn_log_error("failed to setup runtime: out-of-bounds ID %" PRIu32 " in external value #%zu",
325 value_id, i);
326 return xnn_status_invalid_parameter;
327 }
328
329 const struct xnn_blob* blob = &runtime->blobs[value_id];
330 if (!blob->external) {
331 xnn_log_error("failed to setup runtime: Value %" PRIu32 " is not external", value_id);
332 return xnn_status_invalid_parameter;
333 }
334 }
335
336 // Apply runtime state changes.
337 for (size_t i = 0; i < num_external_values; i++) {
338 const struct xnn_external_value* external_value = &external_values[i];
339 const uint32_t value_id = external_value->id;
340 struct xnn_blob* blob = &runtime->blobs[value_id];
341 blob->data = external_value->data;
342 }
343
344 for (size_t i = 0; i < runtime->num_ops; i++) {
345 const struct xnn_operator_data* op = &runtime->ops[i];
346 enum xnn_status status = xnn_status_success;
347 switch (op->op->type) {
348 case xnn_operator_type_add_nd_f32:
349 assert(runtime->blobs[op->inputs[0]].data != NULL);
350 assert(runtime->blobs[op->inputs[1]].data != NULL);
351 assert(runtime->blobs[op->outputs[0]].data != NULL);
352 status = xnn_setup_add_nd_f32(
353 op->op,
354 op->shape1.num_dims,
355 op->shape1.dim,
356 op->shape2.num_dims,
357 op->shape2.dim,
358 runtime->blobs[op->inputs[0]].data,
359 runtime->blobs[op->inputs[1]].data,
360 runtime->blobs[op->outputs[0]].data,
361 runtime->threadpool);
362 break;
363 case xnn_operator_type_convolution_nhwc_f32:
364 assert(runtime->blobs[op->inputs[0]].data != NULL);
365 assert(runtime->blobs[op->outputs[0]].data != NULL);
366 status = xnn_setup_convolution2d_nhwc_f32(
367 op->op,
368 op->batch_size,
369 op->input_height,
370 op->input_width,
371 runtime->blobs[op->inputs[0]].data,
372 runtime->blobs[op->outputs[0]].data,
373 runtime->threadpool);
374 break;
375 case xnn_operator_type_clamp_nc_f32:
376 assert(runtime->blobs[op->inputs[0]].data != NULL);
377 assert(runtime->blobs[op->outputs[0]].data != NULL);
378 status = xnn_setup_clamp_nc_f32(
379 op->op,
380 op->batch_size,
381 runtime->blobs[op->inputs[0]].data,
382 runtime->blobs[op->outputs[0]].data,
383 runtime->threadpool);
384 break;
385 case xnn_operator_type_hardswish_nc_f32:
386 assert(runtime->blobs[op->inputs[0]].data != NULL);
387 assert(runtime->blobs[op->outputs[0]].data != NULL);
388 status = xnn_setup_hardswish_nc_f32(
389 op->op,
390 op->batch_size,
391 runtime->blobs[op->inputs[0]].data,
392 runtime->blobs[op->outputs[0]].data,
393 runtime->threadpool);
394 break;
395 case xnn_operator_type_multiply_nd_f32:
396 assert(runtime->blobs[op->inputs[0]].data != NULL);
397 assert(runtime->blobs[op->inputs[1]].data != NULL);
398 assert(runtime->blobs[op->outputs[0]].data != NULL);
399 status = xnn_setup_multiply_nd_f32(
400 op->op,
401 op->shape1.num_dims,
402 op->shape1.dim,
403 op->shape2.num_dims,
404 op->shape2.dim,
405 runtime->blobs[op->inputs[0]].data,
406 runtime->blobs[op->inputs[1]].data,
407 runtime->blobs[op->outputs[0]].data,
408 runtime->threadpool);
409 break;
410 case xnn_operator_type_prelu_nc_f32:
411 assert(runtime->blobs[op->inputs[0]].data != NULL);
412 assert(runtime->blobs[op->outputs[0]].data != NULL);
413 status = xnn_setup_prelu_nc_f32(
414 op->op,
415 op->batch_size,
416 runtime->blobs[op->inputs[0]].data,
417 runtime->blobs[op->outputs[0]].data,
418 runtime->threadpool);
419 break;
420 case xnn_operator_type_sigmoid_nc_f32:
421 assert(runtime->blobs[op->inputs[0]].data != NULL);
422 assert(runtime->blobs[op->outputs[0]].data != NULL);
423 status = xnn_setup_sigmoid_nc_f32(
424 op->op,
425 op->batch_size,
426 runtime->blobs[op->inputs[0]].data,
427 runtime->blobs[op->outputs[0]].data,
428 runtime->threadpool);
429 break;
430 case xnn_operator_type_softmax_nc_f32:
431 assert(runtime->blobs[op->inputs[0]].data != NULL);
432 assert(runtime->blobs[op->outputs[0]].data != NULL);
433 status = xnn_setup_softmax_nc_f32(
434 op->op,
435 op->batch_size,
436 runtime->blobs[op->inputs[0]].data,
437 runtime->blobs[op->outputs[0]].data,
438 runtime->threadpool);
439 break;
440 default:
441 xnn_log_fatal("unexpected operator type %d in operator #%zu", op->op->type, i);
442 XNN_UNREACHABLE;
443 }
444 if (status != xnn_status_success) {
445 xnn_log_error("failed to setup runtime: error in operator #%zu", i);
446 return status;
447 }
448 }
449
450 return xnn_status_success;
451 }
452
xnn_invoke_runtime(xnn_runtime_t runtime)453 enum xnn_status xnn_invoke_runtime(
454 xnn_runtime_t runtime)
455 {
456 for (size_t i = 0; i < runtime->num_ops; i++) {
457 const enum xnn_status status = xnn_run_operator(runtime->ops[i].op, runtime->threadpool);
458 if (status != xnn_status_success) {
459 return status;
460 }
461 }
462 return xnn_status_success;
463 }
464
xnn_delete_runtime(xnn_runtime_t runtime)465 enum xnn_status xnn_delete_runtime(
466 xnn_runtime_t runtime)
467 {
468 if (runtime != NULL) {
469 if (runtime->ops != NULL) {
470 for (size_t i = 0; i < runtime->num_ops; i++) {
471 xnn_delete_operator(runtime->ops[i].op);
472 }
473 xnn_release_memory(runtime->ops);
474
475 xnn_release_memory(runtime->blobs);
476 xnn_release_memory(runtime->workspace);
477 }
478 xnn_release_memory(runtime);
479 }
480 return xnn_status_success;
481 }
482