1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <math.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #include <stdio.h>
11
12 #include <xnnpack.h>
13 #include <xnnpack/allocator.h>
14 #include <xnnpack/log.h>
15 #include <xnnpack/math.h>
16 #include <xnnpack/memory-planner.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/params.h>
19 #include <xnnpack/subgraph.h>
20
21
xnn_create_runtime(xnn_subgraph_t subgraph,xnn_runtime_t * runtime_out)22 enum xnn_status xnn_create_runtime(
23 xnn_subgraph_t subgraph,
24 xnn_runtime_t* runtime_out)
25 {
26 return xnn_create_runtime_v2(subgraph, NULL /* threadpool */, 0 /* flags */, runtime_out);
27 }
28
xnn_create_runtime_v2(xnn_subgraph_t subgraph,pthreadpool_t threadpool,uint32_t flags,xnn_runtime_t * runtime_out)29 enum xnn_status xnn_create_runtime_v2(
30 xnn_subgraph_t subgraph,
31 pthreadpool_t threadpool,
32 uint32_t flags,
33 xnn_runtime_t* runtime_out)
34 {
35 struct xnn_runtime* runtime = NULL;
36 enum xnn_status status = xnn_status_uninitialized;
37
38 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
39 xnn_log_error("failed to create runtime: XNNPACK is not initialized");
40 goto error;
41 }
42
43 xnn_subgraph_optimize(subgraph, flags & XNN_FLAG_SPARSE_INFERENCE);
44
45 status = xnn_status_out_of_memory;
46
47 runtime = xnn_allocate_zero_memory(sizeof(struct xnn_runtime));
48 if (runtime == NULL) {
49 xnn_log_error("failed to allocate %zu bytes for runtime descriptor", sizeof(struct xnn_runtime));
50 goto error;
51 }
52
53 runtime->opdata = xnn_allocate_zero_memory(sizeof(struct xnn_operator_data) * subgraph->num_nodes);
54 if (runtime->opdata == NULL) {
55 xnn_log_error("failed to allocate %zu bytes for opdata descriptors",
56 sizeof(struct xnn_operator_data) * subgraph->num_nodes);
57 goto error;
58 }
59 runtime->num_ops = subgraph->num_nodes;
60
61 if (flags & XNN_FLAG_YIELD_WORKERS) {
62 struct xnn_node* last_valid_node = NULL;
63 for (size_t i = 0; i < subgraph->num_nodes; i++) {
64 struct xnn_node* node = subgraph->nodes + i;
65 if (node->type != xnn_node_type_invalid) {
66 last_valid_node = node;
67 }
68 }
69 if (last_valid_node != NULL) {
70 last_valid_node->flags |= XNN_FLAG_YIELD_WORKERS;
71 }
72 }
73
74 struct xnn_value* values = subgraph->values;
75 for (size_t i = 0; i < subgraph->num_nodes; i++) {
76 const struct xnn_node* node = subgraph->nodes + i;
77
78 // Ignore fused nodes
79 if (node->type != xnn_node_type_invalid) {
80 assert(node->create != NULL);
81 status = node->create(node, values, subgraph->num_values, runtime->opdata + i);
82 if (status != xnn_status_success) {
83 goto error;
84 }
85 runtime->opdata[i].setup = node->setup;
86 }
87 }
88
89 runtime->blobs = xnn_allocate_zero_memory(sizeof(struct xnn_blob) * subgraph->num_values);
90 if (runtime->blobs == NULL) {
91 xnn_log_error("failed to allocate %zu bytes for blob descriptors",
92 sizeof(struct xnn_blob) * subgraph->num_values);
93 goto error;
94 }
95 runtime->num_blobs = subgraph->num_values;
96
97 struct xnn_value_allocation_tracker mem_alloc_tracker;
98 xnn_init_value_allocation_tracker(&mem_alloc_tracker, subgraph);
99
100 for (uint32_t i = 0; i < subgraph->num_values; i++) {
101 struct xnn_value* value = &subgraph->values[i];
102 struct xnn_blob* blob = &runtime->blobs[i];
103 if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
104 blob->size = xnn_tensor_get_size(subgraph, i);
105 blob->data = (void*) (uintptr_t) value->data;
106 if (blob->data == NULL) {
107 if ((value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) == 0) {
108 // Value is purely internal to the runtime, and must be allocated in its workspace.
109 xnn_add_value_allocation_tracker(&mem_alloc_tracker, i, round_up_po2(blob->size, XNN_EXTRA_BYTES));
110 } else {
111 // Value is non-static and external to the runtime: must be specified via a call to xnn_setup_runtime.
112 blob->external = true;
113 }
114 }
115 }
116 }
117 xnn_plan_value_allocation_tracker(&mem_alloc_tracker);
118
119 if (mem_alloc_tracker.mem_arena_size != 0) {
120 // XNN_EXTRA_BYTES ensures that out-of-bound reads of intermediate values don't segfault.
121 const size_t mem_arena_size = mem_alloc_tracker.mem_arena_size + XNN_EXTRA_BYTES;
122 runtime->workspace = xnn_allocate_simd_memory(mem_arena_size);
123 if (runtime->workspace == NULL) {
124 xnn_log_error("failed to allocate %zu bytes for runtime workspace", mem_arena_size);
125 xnn_release_value_allocation_tracker(&mem_alloc_tracker);
126 goto error;
127 }
128 for (size_t i = 0; i < subgraph->num_values; i++) {
129 const struct xnn_value* value = &subgraph->values[i];
130 struct xnn_blob* blob = &runtime->blobs[i];
131 if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
132 if (value->data == NULL && !blob->external) {
133 // Value is purely internal to the runtime, allocate it in the workspace.
134 blob->data = (void*) ((uintptr_t) runtime->workspace + mem_alloc_tracker.usage[i].alloc_offset);
135 }
136 }
137 }
138 }
139 xnn_release_value_allocation_tracker(&mem_alloc_tracker);
140
141 runtime->threadpool = threadpool;
142
143 *runtime_out = runtime;
144 return xnn_status_success;
145
146 error:
147 xnn_delete_runtime(runtime);
148 return status;
149 }
150
xnn_setup_runtime(xnn_runtime_t runtime,size_t num_external_values,const struct xnn_external_value * external_values)151 enum xnn_status xnn_setup_runtime(
152 xnn_runtime_t runtime,
153 size_t num_external_values,
154 const struct xnn_external_value* external_values)
155 {
156 // Validate inputs without changing internal state.
157 // This ensures that runtime stays in consistent state in case validation fails midway.
158 for (size_t i = 0; i < num_external_values; i++) {
159 const struct xnn_external_value* external_value = &external_values[i];
160 const uint32_t value_id = external_value->id;
161 if (value_id >= runtime->num_blobs) {
162 xnn_log_error("failed to setup runtime: out-of-bounds ID %" PRIu32 " in external value #%zu",
163 value_id, i);
164 return xnn_status_invalid_parameter;
165 }
166
167 const struct xnn_blob* blob = &runtime->blobs[value_id];
168 if (!blob->external) {
169 xnn_log_error("failed to setup runtime: Value %" PRIu32 " is not external", value_id);
170 return xnn_status_invalid_parameter;
171 }
172 }
173
174 // Apply runtime state changes.
175 for (size_t i = 0; i < num_external_values; i++) {
176 const struct xnn_external_value* external_value = &external_values[i];
177 const uint32_t value_id = external_value->id;
178 struct xnn_blob* blob = &runtime->blobs[value_id];
179 blob->data = external_value->data;
180 }
181
182 for (size_t i = 0; i < runtime->num_ops; i++) {
183 const struct xnn_operator_data* opdata = &runtime->opdata[i];
184 if (opdata->operator_object == NULL) {
185 // Operator was removed during optimization
186 continue;
187 }
188
189 assert(opdata->setup != NULL);
190 const enum xnn_status status = opdata->setup(opdata, runtime->blobs, runtime->num_blobs, runtime->threadpool);
191 if (status != xnn_status_success) {
192 xnn_log_error("failed to setup runtime: error in operator #%zu", i);
193 return status;
194 }
195 }
196
197 return xnn_status_success;
198 }
199
xnn_invoke_runtime(xnn_runtime_t runtime)200 enum xnn_status xnn_invoke_runtime(
201 xnn_runtime_t runtime)
202 {
203 for (size_t i = 0; i < runtime->num_ops; i++) {
204 if (runtime->opdata[i].operator_object == NULL) {
205 // Operator was removed after fusion
206 continue;
207 }
208
209 const enum xnn_status status = xnn_run_operator(runtime->opdata[i].operator_object, runtime->threadpool);
210 if (status != xnn_status_success) {
211 return status;
212 }
213 }
214 return xnn_status_success;
215 }
216
xnn_delete_runtime(xnn_runtime_t runtime)217 enum xnn_status xnn_delete_runtime(
218 xnn_runtime_t runtime)
219 {
220 if (runtime != NULL) {
221 if (runtime->opdata != NULL) {
222 for (size_t i = 0; i < runtime->num_ops; i++) {
223 xnn_delete_operator(runtime->opdata[i].operator_object);
224 }
225 xnn_release_memory(runtime->opdata);
226
227 xnn_release_memory(runtime->blobs);
228 xnn_release_simd_memory(runtime->workspace);
229 }
230 xnn_release_memory(runtime);
231 }
232 return xnn_status_success;
233 }
234