• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <math.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #include <stdio.h>
11 
12 #include <xnnpack.h>
13 #include <xnnpack/allocator.h>
14 #include <xnnpack/log.h>
15 #include <xnnpack/math.h>
16 #include <xnnpack/memory-planner.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/params.h>
19 #include <xnnpack/subgraph.h>
20 
21 
xnn_create_runtime(xnn_subgraph_t subgraph,xnn_runtime_t * runtime_out)22 enum xnn_status xnn_create_runtime(
23   xnn_subgraph_t subgraph,
24   xnn_runtime_t* runtime_out)
25 {
26   return xnn_create_runtime_v2(subgraph, NULL /* threadpool */, 0 /* flags */, runtime_out);
27 }
28 
xnn_create_runtime_v2(xnn_subgraph_t subgraph,pthreadpool_t threadpool,uint32_t flags,xnn_runtime_t * runtime_out)29 enum xnn_status xnn_create_runtime_v2(
30   xnn_subgraph_t subgraph,
31   pthreadpool_t threadpool,
32   uint32_t flags,
33   xnn_runtime_t* runtime_out)
34 {
35   struct xnn_runtime* runtime = NULL;
36   enum xnn_status status = xnn_status_uninitialized;
37 
38   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
39     xnn_log_error("failed to create runtime: XNNPACK is not initialized");
40     goto error;
41   }
42 
43   xnn_subgraph_optimize(subgraph, flags & XNN_FLAG_SPARSE_INFERENCE);
44 
45   status = xnn_status_out_of_memory;
46 
47   runtime = xnn_allocate_zero_memory(sizeof(struct xnn_runtime));
48   if (runtime == NULL) {
49     xnn_log_error("failed to allocate %zu bytes for runtime descriptor", sizeof(struct xnn_runtime));
50     goto error;
51   }
52 
53   runtime->opdata = xnn_allocate_zero_memory(sizeof(struct xnn_operator_data) * subgraph->num_nodes);
54   if (runtime->opdata == NULL) {
55     xnn_log_error("failed to allocate %zu bytes for opdata descriptors",
56       sizeof(struct xnn_operator_data) * subgraph->num_nodes);
57     goto error;
58   }
59   runtime->num_ops = subgraph->num_nodes;
60 
61   if (flags & XNN_FLAG_YIELD_WORKERS) {
62     struct xnn_node* last_valid_node = NULL;
63     for (size_t i = 0; i < subgraph->num_nodes; i++) {
64       struct xnn_node* node = subgraph->nodes + i;
65       if (node->type != xnn_node_type_invalid) {
66         last_valid_node = node;
67       }
68     }
69     if (last_valid_node != NULL) {
70       last_valid_node->flags |= XNN_FLAG_YIELD_WORKERS;
71     }
72   }
73 
74   struct xnn_value* values = subgraph->values;
75   for (size_t i = 0; i < subgraph->num_nodes; i++) {
76     const struct xnn_node* node = subgraph->nodes + i;
77 
78     // Ignore fused nodes
79     if (node->type != xnn_node_type_invalid) {
80       assert(node->create != NULL);
81       status = node->create(node, values, subgraph->num_values, runtime->opdata + i);
82       if (status != xnn_status_success) {
83         goto error;
84       }
85       runtime->opdata[i].setup = node->setup;
86     }
87   }
88 
89   runtime->blobs = xnn_allocate_zero_memory(sizeof(struct xnn_blob) * subgraph->num_values);
90   if (runtime->blobs == NULL) {
91     xnn_log_error("failed to allocate %zu bytes for blob descriptors",
92       sizeof(struct xnn_blob) * subgraph->num_values);
93     goto error;
94   }
95   runtime->num_blobs = subgraph->num_values;
96 
97   struct xnn_value_allocation_tracker mem_alloc_tracker;
98   xnn_init_value_allocation_tracker(&mem_alloc_tracker, subgraph);
99 
100   for (uint32_t i = 0; i < subgraph->num_values; i++) {
101     struct xnn_value* value = &subgraph->values[i];
102     struct xnn_blob* blob = &runtime->blobs[i];
103     if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
104       blob->size = xnn_tensor_get_size(subgraph, i);
105       blob->data = (void*) (uintptr_t) value->data;
106       if (blob->data == NULL) {
107         if ((value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) == 0) {
108           // Value is purely internal to the runtime, and must be allocated in its workspace.
109           xnn_add_value_allocation_tracker(&mem_alloc_tracker, i, round_up_po2(blob->size, XNN_EXTRA_BYTES));
110         } else {
111           // Value is non-static and external to the runtime: must be specified via a call to xnn_setup_runtime.
112           blob->external = true;
113         }
114       }
115     }
116   }
117   xnn_plan_value_allocation_tracker(&mem_alloc_tracker);
118 
119   if (mem_alloc_tracker.mem_arena_size != 0) {
120     // XNN_EXTRA_BYTES ensures that out-of-bound reads of intermediate values don't segfault.
121     const size_t mem_arena_size = mem_alloc_tracker.mem_arena_size + XNN_EXTRA_BYTES;
122     runtime->workspace = xnn_allocate_simd_memory(mem_arena_size);
123     if (runtime->workspace == NULL) {
124       xnn_log_error("failed to allocate %zu bytes for runtime workspace", mem_arena_size);
125       xnn_release_value_allocation_tracker(&mem_alloc_tracker);
126       goto error;
127     }
128     for (size_t i = 0; i < subgraph->num_values; i++) {
129       const struct xnn_value* value = &subgraph->values[i];
130       struct xnn_blob* blob = &runtime->blobs[i];
131       if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
132         if (value->data == NULL && !blob->external) {
133           // Value is purely internal to the runtime, allocate it in the workspace.
134           blob->data = (void*) ((uintptr_t) runtime->workspace + mem_alloc_tracker.usage[i].alloc_offset);
135         }
136       }
137     }
138   }
139   xnn_release_value_allocation_tracker(&mem_alloc_tracker);
140 
141   runtime->threadpool = threadpool;
142 
143   *runtime_out = runtime;
144   return xnn_status_success;
145 
146 error:
147   xnn_delete_runtime(runtime);
148   return status;
149 }
150 
xnn_setup_runtime(xnn_runtime_t runtime,size_t num_external_values,const struct xnn_external_value * external_values)151 enum xnn_status xnn_setup_runtime(
152   xnn_runtime_t runtime,
153   size_t num_external_values,
154   const struct xnn_external_value* external_values)
155 {
156   // Validate inputs without changing internal state.
157   // This ensures that runtime stays in consistent state in case validation fails midway.
158   for (size_t i = 0; i < num_external_values; i++) {
159     const struct xnn_external_value* external_value = &external_values[i];
160     const uint32_t value_id = external_value->id;
161     if (value_id >= runtime->num_blobs) {
162       xnn_log_error("failed to setup runtime: out-of-bounds ID %" PRIu32 " in external value #%zu",
163         value_id, i);
164       return xnn_status_invalid_parameter;
165     }
166 
167     const struct xnn_blob* blob = &runtime->blobs[value_id];
168     if (!blob->external) {
169       xnn_log_error("failed to setup runtime: Value %" PRIu32 " is not external", value_id);
170       return xnn_status_invalid_parameter;
171     }
172   }
173 
174   // Apply runtime state changes.
175   for (size_t i = 0; i < num_external_values; i++) {
176     const struct xnn_external_value* external_value = &external_values[i];
177     const uint32_t value_id = external_value->id;
178     struct xnn_blob* blob = &runtime->blobs[value_id];
179     blob->data = external_value->data;
180   }
181 
182   for (size_t i = 0; i < runtime->num_ops; i++) {
183     const struct xnn_operator_data* opdata = &runtime->opdata[i];
184     if (opdata->operator_object == NULL) {
185       // Operator was removed during optimization
186       continue;
187     }
188 
189     assert(opdata->setup != NULL);
190     const enum xnn_status status = opdata->setup(opdata, runtime->blobs, runtime->num_blobs, runtime->threadpool);
191     if (status != xnn_status_success) {
192       xnn_log_error("failed to setup runtime: error in operator #%zu", i);
193       return status;
194     }
195   }
196 
197   return xnn_status_success;
198 }
199 
xnn_invoke_runtime(xnn_runtime_t runtime)200 enum xnn_status xnn_invoke_runtime(
201   xnn_runtime_t runtime)
202 {
203   for (size_t i = 0; i < runtime->num_ops; i++) {
204     if (runtime->opdata[i].operator_object == NULL) {
205       // Operator was removed after fusion
206       continue;
207     }
208 
209     const enum xnn_status status = xnn_run_operator(runtime->opdata[i].operator_object, runtime->threadpool);
210     if (status != xnn_status_success) {
211       return status;
212     }
213   }
214   return xnn_status_success;
215 }
216 
xnn_delete_runtime(xnn_runtime_t runtime)217 enum xnn_status xnn_delete_runtime(
218   xnn_runtime_t runtime)
219 {
220   if (runtime != NULL) {
221     if (runtime->opdata != NULL) {
222       for (size_t i = 0; i < runtime->num_ops; i++) {
223         xnn_delete_operator(runtime->opdata[i].operator_object);
224       }
225       xnn_release_memory(runtime->opdata);
226 
227       xnn_release_memory(runtime->blobs);
228       xnn_release_simd_memory(runtime->workspace);
229     }
230     xnn_release_memory(runtime);
231   }
232   return xnn_status_success;
233 }
234