• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <math.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #include <stdio.h>
11 
12 #include <xnnpack.h>
13 #include <xnnpack/allocator.h>
14 #include <xnnpack/log.h>
15 #include <xnnpack/math.h>
16 #include <xnnpack/operator.h>
17 #include <xnnpack/params.h>
18 #include <xnnpack/subgraph.h>
19 
20 
xnn_create_runtime(xnn_subgraph_t subgraph,xnn_runtime_t * runtime_out)21 enum xnn_status xnn_create_runtime(
22   xnn_subgraph_t subgraph,
23   xnn_runtime_t* runtime_out)
24 {
25   return xnn_create_runtime_v2(subgraph, NULL /* threadpool */, 0 /* flags */, runtime_out);
26 }
27 
xnn_create_runtime_v2(xnn_subgraph_t subgraph,pthreadpool_t threadpool,uint32_t flags,xnn_runtime_t * runtime_out)28 enum xnn_status xnn_create_runtime_v2(
29   xnn_subgraph_t subgraph,
30   pthreadpool_t threadpool,
31   uint32_t flags,
32   xnn_runtime_t* runtime_out)
33 {
34   struct xnn_runtime* runtime = NULL;
35   enum xnn_status status = xnn_status_uninitialized;
36 
37   if (!xnn_params.initialized) {
38     xnn_log_error("failed to create runtime: XNNPACK is not initialized");
39     goto error;
40   }
41 
42   status = xnn_status_out_of_memory;
43 
44   runtime = xnn_allocate_zero_memory(sizeof(struct xnn_runtime));
45   if (runtime == NULL) {
46     xnn_log_error("failed to allocate %zu bytes for runtime descriptor", sizeof(struct xnn_runtime));
47     goto error;
48   }
49 
50   runtime->ops = xnn_allocate_zero_memory(sizeof(struct xnn_operator_data) * subgraph->num_nodes);
51   if (runtime->ops == NULL) {
52     xnn_log_error("failed to allocate %zu bytes for opdata descriptors",
53       sizeof(struct xnn_operator_data) * subgraph->num_nodes);
54     goto error;
55   }
56   runtime->num_ops = subgraph->num_nodes;
57 
58   struct xnn_value* values = subgraph->values;
59   for (size_t i = 0; i < subgraph->num_nodes; i++) {
60     const struct xnn_node* node = subgraph->nodes + i;
61     switch (node->type) {
62       case xnn_node_type_add2:
63         status = xnn_create_add_nd_f32(
64           node->activation.output_min,
65           node->activation.output_max,
66           node->flags,
67           &runtime->ops[i].op);
68         if (status != xnn_status_success) {
69           goto error;
70         }
71         runtime->ops[i].shape1.num_dims = values[node->inputs.raw[0]].shape.num_dims;
72         runtime->ops[i].shape2.num_dims = values[node->inputs.raw[1]].shape.num_dims;
73         memcpy(runtime->ops[i].shape1.dim, values[node->inputs.raw[0]].shape.dim, values[node->inputs.raw[0]].shape.num_dims * sizeof(size_t));
74         memcpy(runtime->ops[i].shape2.dim, values[node->inputs.raw[1]].shape.dim, values[node->inputs.raw[1]].shape.num_dims * sizeof(size_t));
75         runtime->ops[i].inputs[0] = node->inputs.raw[0];
76         runtime->ops[i].inputs[1] = node->inputs.raw[1];
77         runtime->ops[i].outputs[0] = node->outputs.raw[0];
78         break;
79       case xnn_node_type_convolution_2d:
80         status = xnn_create_convolution2d_nhwc_f32(
81           node->params.convolution_2d.input_padding_top,
82           node->params.convolution_2d.input_padding_right,
83           node->params.convolution_2d.input_padding_bottom,
84           node->params.convolution_2d.input_padding_left,
85           node->params.convolution_2d.kernel_height,
86           node->params.convolution_2d.kernel_width,
87           node->params.convolution_2d.subsampling_height,
88           node->params.convolution_2d.subsampling_width,
89           node->params.convolution_2d.dilation_height,
90           node->params.convolution_2d.dilation_width,
91           node->params.convolution_2d.groups,
92           node->params.convolution_2d.group_input_channels,
93           node->params.convolution_2d.group_output_channels,
94           node->params.convolution_2d.group_input_channels * node->params.convolution_2d.groups /* input_pixel_stride */,
95           node->params.convolution_2d.group_output_channels * node->params.convolution_2d.groups /* output_pixel_stride */,
96           values[node->inputs.convolution_2d.filter].data,
97           values[node->inputs.convolution_2d.bias].data,
98           node->activation.output_min,
99           node->activation.output_max,
100           node->flags,
101           &runtime->ops[i].op);
102         if (status != xnn_status_success) {
103           goto error;
104         }
105         runtime->ops[i].batch_size = values[node->inputs.raw[0]].shape.dim[0];
106         runtime->ops[i].input_height = values[node->inputs.raw[0]].shape.dim[1];
107         runtime->ops[i].input_width = values[node->inputs.raw[0]].shape.dim[2];
108         runtime->ops[i].inputs[0] = node->inputs.raw[0];
109         runtime->ops[i].outputs[0] = node->outputs.raw[0];
110         break;
111       case xnn_node_type_clamp:
112         status = xnn_create_clamp_nc_f32(
113           values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* channels */,
114           values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* input stride */,
115           values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* output stride */,
116           node->activation.output_min,
117           node->activation.output_max,
118           node->flags,
119           &runtime->ops[i].op);
120         if (status != xnn_status_success) {
121           goto error;
122         }
123         runtime->ops[i].batch_size = 1;
124         for (size_t i = 0; i + 1 < values[node->inputs.raw[0]].shape.num_dims; i++) {
125           runtime->ops[i].batch_size *= values[node->inputs.raw[0]].shape.dim[i];
126         }
127         runtime->ops[i].inputs[0] = node->inputs.raw[0];
128         runtime->ops[i].outputs[0] = node->outputs.raw[0];
129         break;
130       case xnn_node_type_depthwise_convolution_2d:
131         status = xnn_create_convolution2d_nhwc_f32(
132           node->params.depthwise_convolution_2d.input_padding_top,
133           node->params.depthwise_convolution_2d.input_padding_right,
134           node->params.depthwise_convolution_2d.input_padding_bottom,
135           node->params.depthwise_convolution_2d.input_padding_left,
136           node->params.depthwise_convolution_2d.kernel_height,
137           node->params.depthwise_convolution_2d.kernel_width,
138           node->params.depthwise_convolution_2d.subsampling_height,
139           node->params.depthwise_convolution_2d.subsampling_width,
140           node->params.depthwise_convolution_2d.dilation_height,
141           node->params.depthwise_convolution_2d.dilation_width,
142           node->params.depthwise_convolution_2d.input_channels /* groups */,
143           1 /* group_input_channels */,
144           node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
145           node->params.depthwise_convolution_2d.input_channels /* input_pixel_stride */,
146           node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_pixel_stride */,
147           values[node->inputs.convolution_2d.filter].data,
148           values[node->inputs.convolution_2d.bias].data,
149           node->activation.output_min,
150           node->activation.output_max,
151           node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
152           &runtime->ops[i].op);
153         if (status != xnn_status_success) {
154           goto error;
155         }
156         runtime->ops[i].batch_size = values[node->inputs.raw[0]].shape.dim[0];
157         runtime->ops[i].input_height = values[node->inputs.raw[0]].shape.dim[1];
158         runtime->ops[i].input_width = values[node->inputs.raw[0]].shape.dim[2];
159         runtime->ops[i].inputs[0] = node->inputs.raw[0];
160         runtime->ops[i].outputs[0] = node->outputs.raw[0];
161         break;
162       case xnn_node_type_hardswish:
163         status = xnn_create_hardswish_nc_f32(
164           values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* channels */,
165           values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* input stride */,
166           values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* output stride */,
167           node->flags,
168           &runtime->ops[i].op);
169         if (status != xnn_status_success) {
170           goto error;
171         }
172         runtime->ops[i].batch_size = 1;
173         for (size_t i = 0; i + 1 < values[node->inputs.raw[0]].shape.num_dims; i++) {
174           runtime->ops[i].batch_size *= values[node->inputs.raw[0]].shape.dim[i];
175         }
176         runtime->ops[i].inputs[0] = node->inputs.raw[0];
177         runtime->ops[i].outputs[0] = node->outputs.raw[0];
178         break;
179       case xnn_node_type_multiply2:
180         status = xnn_create_multiply_nd_f32(
181           node->activation.output_min,
182           node->activation.output_max,
183           node->flags,
184           &runtime->ops[i].op);
185         if (status != xnn_status_success) {
186           goto error;
187         }
188         runtime->ops[i].shape1.num_dims = values[node->inputs.raw[0]].shape.num_dims;
189         runtime->ops[i].shape2.num_dims = values[node->inputs.raw[1]].shape.num_dims;
190         memcpy(runtime->ops[i].shape1.dim, values[node->inputs.raw[0]].shape.dim, values[node->inputs.raw[0]].shape.num_dims * sizeof(size_t));
191         memcpy(runtime->ops[i].shape2.dim, values[node->inputs.raw[1]].shape.dim, values[node->inputs.raw[1]].shape.num_dims * sizeof(size_t));
192         runtime->ops[i].inputs[0] = node->inputs.raw[0];
193         runtime->ops[i].inputs[1] = node->inputs.raw[1];
194         runtime->ops[i].outputs[0] = node->outputs.raw[0];
195         break;
196       case xnn_node_type_prelu:
197         status = xnn_create_prelu_nc_f32(
198           values[node->inputs.raw[1]].shape.dim[values[node->inputs.raw[1]].shape.num_dims - 1] /* channels */,
199           values[node->inputs.raw[1]].shape.dim[values[node->inputs.raw[1]].shape.num_dims - 1] /* input stride */,
200           values[node->inputs.raw[1]].shape.dim[values[node->inputs.raw[1]].shape.num_dims - 1] /* output stride */,
201           values[node->inputs.raw[1]].data /* negative slope */,
202           -INFINITY,
203           +INFINITY,
204           node->flags,
205           &runtime->ops[i].op);
206         if (status != xnn_status_success) {
207           goto error;
208         }
209         runtime->ops[i].batch_size = 1;
210         for (size_t i = 0; i + 1 < values[node->inputs.raw[0]].shape.num_dims; i++) {
211           runtime->ops[i].batch_size *= values[node->inputs.raw[0]].shape.dim[i];
212         }
213         runtime->ops[i].inputs[0] = node->inputs.raw[0];
214         runtime->ops[i].outputs[0] = node->outputs.raw[0];
215         break;
216       case xnn_node_type_sigmoid:
217         status = xnn_create_sigmoid_nc_f32(
218           values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* channels */,
219           values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* input stride */,
220           values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* output stride */,
221           node->flags,
222           &runtime->ops[i].op);
223         if (status != xnn_status_success) {
224           goto error;
225         }
226         runtime->ops[i].batch_size = 1;
227         for (size_t i = 0; i + 1 < values[node->inputs.raw[0]].shape.num_dims; i++) {
228           runtime->ops[i].batch_size *= values[node->inputs.raw[0]].shape.dim[i];
229         }
230         runtime->ops[i].inputs[0] = node->inputs.raw[0];
231         runtime->ops[i].outputs[0] = node->outputs.raw[0];
232         break;
233       case xnn_node_type_softmax:
234         status = xnn_create_softmax_nc_f32(
235           values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* channels */,
236           values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* input stride */,
237           values[node->inputs.raw[0]].shape.dim[values[node->inputs.raw[0]].shape.num_dims - 1] /* output stride */,
238           node->flags,
239           &runtime->ops[i].op);
240         if (status != xnn_status_success) {
241           goto error;
242         }
243         runtime->ops[i].batch_size = 1;
244         for (size_t i = 0; i + 1 < values[node->inputs.raw[0]].shape.num_dims; i++) {
245           runtime->ops[i].batch_size *= values[node->inputs.raw[0]].shape.dim[i];
246         }
247         runtime->ops[i].inputs[0] = node->inputs.raw[0];
248         runtime->ops[i].outputs[0] = node->outputs.raw[0];
249         break;
250       case xnn_node_type_invalid:
251         xnn_log_fatal("unexpected node type %d in node #%zu", node->type, i);
252         XNN_UNREACHABLE;
253         break;
254     }
255   }
256 
257   runtime->blobs = xnn_allocate_zero_memory(sizeof(struct xnn_blob) * subgraph->num_values);
258   if (runtime->blobs == NULL) {
259     xnn_log_error("failed to allocate %zu bytes for blob descriptors",
260       sizeof(struct xnn_blob) * subgraph->num_values);
261     goto error;
262   }
263   runtime->num_blobs = subgraph->num_values;
264 
265   size_t buffer_size = 0;
266   for (size_t i = 0; i < subgraph->num_values; i++) {
267     const struct xnn_value* value = &subgraph->values[i];
268     struct xnn_blob* blob = &runtime->blobs[i];
269     if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
270       blob->size = xnn_tensor_get_size(subgraph, i);
271       blob->data = (void*) value->data;
272       if (blob->data == NULL) {
273         if ((value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) == 0) {
274           // Value is purely internal to the runtime, and must be allocated in its workspace.
275           buffer_size = round_up_po2(buffer_size + blob->size, XNN_EXTRA_BYTES);
276         } else {
277           // Value is non-static and external to the runtime: must be specified via a call to xnn_setup_runtime.
278           blob->external = true;
279         }
280       }
281     }
282   }
283 
284   runtime->workspace = xnn_allocate_simd_memory(buffer_size);
285   if (runtime->workspace == NULL) {
286     xnn_log_error("failed to allocate %zu bytes to runtime workspace", buffer_size);
287     goto error;
288   }
289 
290   size_t buffer_offset = 0;
291   for (size_t i = 0; i < subgraph->num_values; i++) {
292     const struct xnn_value* value = &subgraph->values[i];
293     struct xnn_blob* blob = &runtime->blobs[i];
294     if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
295       if (value->data == NULL && !blob->external) {
296         // Value is purely internal to the runtime, allocate it in the workspace.
297         blob->data = (void*) ((uintptr_t) runtime->workspace + buffer_offset);
298         buffer_offset = round_up_po2(buffer_offset + blob->size, XNN_EXTRA_BYTES);
299       }
300     }
301   }
302 
303   runtime->threadpool = threadpool;
304 
305   *runtime_out = runtime;
306   return xnn_status_success;
307 
308 error:
309   xnn_delete_runtime(runtime);
310   return status;
311 }
312 
xnn_setup_runtime(xnn_runtime_t runtime,size_t num_external_values,const struct xnn_external_value * external_values)313 enum xnn_status xnn_setup_runtime(
314   xnn_runtime_t runtime,
315   size_t num_external_values,
316   const struct xnn_external_value* external_values)
317 {
318   // Validate inputs without changing internal state.
319   // This ensures that runtime stays in consistent state in case validation fails midway.
320   for (size_t i = 0; i < num_external_values; i++) {
321     const struct xnn_external_value* external_value = &external_values[i];
322     const uint32_t value_id = external_value->id;
323     if (value_id >= runtime->num_blobs) {
324       xnn_log_error("failed to setup runtime: out-of-bounds ID %" PRIu32 " in external value #%zu",
325         value_id, i);
326       return xnn_status_invalid_parameter;
327     }
328 
329     const struct xnn_blob* blob = &runtime->blobs[value_id];
330     if (!blob->external) {
331       xnn_log_error("failed to setup runtime: Value %" PRIu32 " is not external", value_id);
332       return xnn_status_invalid_parameter;
333     }
334   }
335 
336   // Apply runtime state changes.
337   for (size_t i = 0; i < num_external_values; i++) {
338     const struct xnn_external_value* external_value = &external_values[i];
339     const uint32_t value_id = external_value->id;
340     struct xnn_blob* blob = &runtime->blobs[value_id];
341     blob->data = external_value->data;
342   }
343 
344   for (size_t i = 0; i < runtime->num_ops; i++) {
345     const struct xnn_operator_data* op = &runtime->ops[i];
346     enum xnn_status status = xnn_status_success;
347     switch (op->op->type) {
348       case xnn_operator_type_add_nd_f32:
349         assert(runtime->blobs[op->inputs[0]].data != NULL);
350         assert(runtime->blobs[op->inputs[1]].data != NULL);
351         assert(runtime->blobs[op->outputs[0]].data != NULL);
352         status = xnn_setup_add_nd_f32(
353           op->op,
354           op->shape1.num_dims,
355           op->shape1.dim,
356           op->shape2.num_dims,
357           op->shape2.dim,
358           runtime->blobs[op->inputs[0]].data,
359           runtime->blobs[op->inputs[1]].data,
360           runtime->blobs[op->outputs[0]].data,
361           runtime->threadpool);
362         break;
363       case xnn_operator_type_convolution_nhwc_f32:
364         assert(runtime->blobs[op->inputs[0]].data != NULL);
365         assert(runtime->blobs[op->outputs[0]].data != NULL);
366         status = xnn_setup_convolution2d_nhwc_f32(
367           op->op,
368           op->batch_size,
369           op->input_height,
370           op->input_width,
371           runtime->blobs[op->inputs[0]].data,
372           runtime->blobs[op->outputs[0]].data,
373           runtime->threadpool);
374         break;
375       case xnn_operator_type_clamp_nc_f32:
376         assert(runtime->blobs[op->inputs[0]].data != NULL);
377         assert(runtime->blobs[op->outputs[0]].data != NULL);
378         status = xnn_setup_clamp_nc_f32(
379           op->op,
380           op->batch_size,
381           runtime->blobs[op->inputs[0]].data,
382           runtime->blobs[op->outputs[0]].data,
383           runtime->threadpool);
384         break;
385       case xnn_operator_type_hardswish_nc_f32:
386         assert(runtime->blobs[op->inputs[0]].data != NULL);
387         assert(runtime->blobs[op->outputs[0]].data != NULL);
388         status = xnn_setup_hardswish_nc_f32(
389           op->op,
390           op->batch_size,
391           runtime->blobs[op->inputs[0]].data,
392           runtime->blobs[op->outputs[0]].data,
393           runtime->threadpool);
394         break;
395       case xnn_operator_type_multiply_nd_f32:
396         assert(runtime->blobs[op->inputs[0]].data != NULL);
397         assert(runtime->blobs[op->inputs[1]].data != NULL);
398         assert(runtime->blobs[op->outputs[0]].data != NULL);
399         status = xnn_setup_multiply_nd_f32(
400           op->op,
401           op->shape1.num_dims,
402           op->shape1.dim,
403           op->shape2.num_dims,
404           op->shape2.dim,
405           runtime->blobs[op->inputs[0]].data,
406           runtime->blobs[op->inputs[1]].data,
407           runtime->blobs[op->outputs[0]].data,
408           runtime->threadpool);
409         break;
410       case xnn_operator_type_prelu_nc_f32:
411         assert(runtime->blobs[op->inputs[0]].data != NULL);
412         assert(runtime->blobs[op->outputs[0]].data != NULL);
413         status = xnn_setup_prelu_nc_f32(
414           op->op,
415           op->batch_size,
416           runtime->blobs[op->inputs[0]].data,
417           runtime->blobs[op->outputs[0]].data,
418           runtime->threadpool);
419         break;
420       case xnn_operator_type_sigmoid_nc_f32:
421         assert(runtime->blobs[op->inputs[0]].data != NULL);
422         assert(runtime->blobs[op->outputs[0]].data != NULL);
423         status = xnn_setup_sigmoid_nc_f32(
424           op->op,
425           op->batch_size,
426           runtime->blobs[op->inputs[0]].data,
427           runtime->blobs[op->outputs[0]].data,
428           runtime->threadpool);
429         break;
430       case xnn_operator_type_softmax_nc_f32:
431         assert(runtime->blobs[op->inputs[0]].data != NULL);
432         assert(runtime->blobs[op->outputs[0]].data != NULL);
433         status = xnn_setup_softmax_nc_f32(
434           op->op,
435           op->batch_size,
436           runtime->blobs[op->inputs[0]].data,
437           runtime->blobs[op->outputs[0]].data,
438           runtime->threadpool);
439         break;
440       default:
441         xnn_log_fatal("unexpected operator type %d in operator #%zu", op->op->type, i);
442         XNN_UNREACHABLE;
443     }
444     if (status != xnn_status_success) {
445       xnn_log_error("failed to setup runtime: error in operator #%zu", i);
446       return status;
447     }
448   }
449 
450   return xnn_status_success;
451 }
452 
xnn_invoke_runtime(xnn_runtime_t runtime)453 enum xnn_status xnn_invoke_runtime(
454   xnn_runtime_t runtime)
455 {
456   for (size_t i = 0; i < runtime->num_ops; i++) {
457     const enum xnn_status status = xnn_run_operator(runtime->ops[i].op, runtime->threadpool);
458     if (status != xnn_status_success) {
459       return status;
460     }
461   }
462   return xnn_status_success;
463 }
464 
xnn_delete_runtime(xnn_runtime_t runtime)465 enum xnn_status xnn_delete_runtime(
466   xnn_runtime_t runtime)
467 {
468   if (runtime != NULL) {
469     if (runtime->ops != NULL) {
470       for (size_t i = 0; i < runtime->num_ops; i++) {
471         xnn_delete_operator(runtime->ops[i].op);
472       }
473       xnn_release_memory(runtime->ops);
474 
475       xnn_release_memory(runtime->blobs);
476       xnn_release_memory(runtime->workspace);
477     }
478     xnn_release_memory(runtime);
479   }
480   return xnn_status_success;
481 }
482