• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #ifndef __MACH__
7 #define _POSIX_C_SOURCE 199309L
8 #endif
9 
10 #include <assert.h>
11 #include <math.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdio.h> // For snprintf.
15 #include <stdlib.h>
16 
17 #include <xnnpack.h>
18 #include <xnnpack/allocator.h>
19 #include <xnnpack/cache.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/log.h>
22 #include <xnnpack/math.h>
23 #include <xnnpack/memory-planner.h>
24 #include <xnnpack/operator.h>
25 #include <xnnpack/params.h>
26 #include <xnnpack/subgraph.h>
27 
28 #if defined(__EMSCRIPTEN__)
29 #include <emscripten/emscripten.h>
30 #elif XNN_PLATFORM_WINDOWS
31 #include <windows.h>
32 #else
33 #include <errno.h>
34 #include <time.h>
35 #endif
36 
37 #ifndef XNN_ENABLE_JIT
38   #error "XNN_ENABLE_JIT is not defined"
39 #endif
40 
xnn_create_workspace(xnn_workspace_t * workspace_out)41 enum xnn_status xnn_create_workspace(xnn_workspace_t* workspace_out)
42 {
43   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
44     xnn_log_error("failed to create workspace: XNNPACK is not initialized");
45     return xnn_status_uninitialized;
46   }
47 
48   struct xnn_workspace* workspace = NULL;
49   workspace = xnn_allocate_zero_memory(sizeof(struct xnn_workspace));
50   if (workspace == NULL) {
51     xnn_log_error("failed to allocate %zu bytes for workspace descriptor", sizeof(struct xnn_workspace));
52     return xnn_status_out_of_memory;
53   }
54   workspace->ref_count = 1;
55   *workspace_out = workspace;
56   return xnn_status_success;
57 }
58 
xnn_retain_workspace(xnn_workspace_t workspace)59 static inline void xnn_retain_workspace(xnn_workspace_t workspace)
60 {
61   workspace->ref_count++;
62 }
63 
xnn_release_workspace(xnn_workspace_t workspace)64 enum xnn_status xnn_release_workspace(xnn_workspace_t workspace)
65 {
66   assert(workspace->ref_count != 0);
67   if (--workspace->ref_count == 0) {
68     xnn_release_simd_memory(workspace->data);
69     xnn_release_memory(workspace);
70   }
71   return xnn_status_success;
72 }
73 
xnn_create_weights_cache_with_size(size_t size,xnn_weights_cache_t * weights_cache_out)74 enum xnn_status xnn_create_weights_cache_with_size(size_t size, xnn_weights_cache_t* weights_cache_out)
75 {
76   struct xnn_weights_cache* weights_cache = NULL;
77   enum xnn_status status = xnn_status_uninitialized;
78 
79   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
80     xnn_log_error("failed to create weights cache: XNNPACK is not initialized");
81     goto error;
82   }
83 
84   weights_cache = xnn_allocate_zero_memory(sizeof(struct xnn_weights_cache));
85   if (weights_cache == NULL) {
86     xnn_log_error("failed to allocate %zu bytes for weights cache descriptor", sizeof(struct xnn_weights_cache));
87     goto error;
88   }
89 
90   status = xnn_init_weights_cache_with_size(weights_cache, size);
91   if (status != xnn_status_success) {
92     goto error;
93   }
94   *weights_cache_out = weights_cache;
95   return xnn_status_success;
96 
97 error:
98   xnn_release_weights_cache(weights_cache);
99   return status;
100 }
101 
xnn_create_weights_cache(xnn_weights_cache_t * weights_cache_out)102 enum xnn_status xnn_create_weights_cache(xnn_weights_cache_t* weights_cache_out)
103 {
104   return xnn_create_weights_cache_with_size(XNN_DEFAULT_WEIGHTS_BUFFER_SIZE, weights_cache_out);
105 }
106 
xnn_delete_weights_cache(xnn_weights_cache_t weights_cache)107 enum xnn_status xnn_delete_weights_cache(xnn_weights_cache_t weights_cache)
108 {
109   enum xnn_status status = xnn_release_weights_cache(weights_cache);
110   if (status != xnn_status_success) {
111     return status;
112   }
113   xnn_release_memory(weights_cache);
114   return xnn_status_success;
115 }
116 
xnn_create_runtime(xnn_subgraph_t subgraph,xnn_runtime_t * runtime_out)117 enum xnn_status xnn_create_runtime(
118   xnn_subgraph_t subgraph,
119   xnn_runtime_t* runtime_out)
120 {
121   return xnn_create_runtime_v2(subgraph, NULL /* threadpool */, 0 /* flags */, runtime_out);
122 }
123 
xnn_create_runtime_v2(xnn_subgraph_t subgraph,pthreadpool_t threadpool,uint32_t flags,xnn_runtime_t * runtime_out)124 enum xnn_status xnn_create_runtime_v2(
125   xnn_subgraph_t subgraph,
126   pthreadpool_t threadpool,
127   uint32_t flags,
128   xnn_runtime_t* runtime_out)
129 {
130   return xnn_create_runtime_v3(subgraph, /* weights_cache */ NULL, threadpool, flags, runtime_out);
131 }
132 
xnn_create_runtime_v3(xnn_subgraph_t subgraph,xnn_weights_cache_t weights_cache,pthreadpool_t threadpool,uint32_t flags,xnn_runtime_t * runtime_out)133 enum xnn_status xnn_create_runtime_v3(
134   xnn_subgraph_t subgraph,
135   xnn_weights_cache_t weights_cache,
136   pthreadpool_t threadpool,
137   uint32_t flags,
138   xnn_runtime_t* runtime_out)
139 {
140   xnn_workspace_t workspace;
141   enum xnn_status status = xnn_create_workspace(&workspace);
142   if (status != xnn_status_success) {
143     return status;
144   }
145   status = xnn_create_runtime_v4(subgraph, weights_cache, workspace, threadpool, flags, runtime_out);
146   // Release workspace regardless of return status of creating runtime.
147   xnn_release_workspace(workspace);
148   return status;
149 }
150 
initialize_workspace_blobs(xnn_subgraph_t subgraph,xnn_runtime_t runtime,struct xnn_value_allocation_tracker * mem_alloc_tracker)151 static enum xnn_status initialize_workspace_blobs(
152     xnn_subgraph_t subgraph,
153     xnn_runtime_t runtime,
154     struct xnn_value_allocation_tracker* mem_alloc_tracker)
155 {
156   assert(runtime->workspace != NULL);
157 
158   size_t mem_arena_size = mem_alloc_tracker->mem_arena_size;
159   if (mem_arena_size == 0) {
160     return xnn_status_success;
161   }
162   // Sparse microkernels can read up to 2 * XNN_EXTRA_BYTES beyond array bounds.
163   mem_arena_size += 2 * XNN_EXTRA_BYTES;
164 
165   // Records how much the workspace has moved by due to allocating a larger workspace.
166   ptrdiff_t workspace_data_delta = 0;
167   // Allocates larger workspace here if needed.
168   if (runtime->workspace->size < mem_arena_size) {
169     void* old_workspace_data = runtime->workspace->data;
170     if (runtime->workspace->size != 0) {
171       // Free up the workspace's current data. Free first then allocate to keep peak memory usage low.
172       xnn_release_simd_memory(runtime->workspace->data);
173     }
174     void* new_workspace_data = xnn_allocate_simd_memory(mem_arena_size);
175     if (new_workspace_data == NULL) {
176       xnn_log_error("failed to allocate %zu bytes for runtime workspace", mem_arena_size);
177       return xnn_status_out_of_memory;
178     }
179     runtime->workspace->data = new_workspace_data;
180     runtime->workspace->size = mem_arena_size;
181     // Keep track of how much the workspace data moved.
182     if (old_workspace_data != NULL) {
183       workspace_data_delta = (uintptr_t) new_workspace_data - (uintptr_t) old_workspace_data;
184     }
185   }
186 
187   assert(runtime->workspace->size >= mem_arena_size);
188 
189   // Initialize current runtime's blob pointers.
190   for (size_t i = 0; i < subgraph->num_values; i++) {
191     const struct xnn_value* value = &subgraph->values[i];
192     struct xnn_blob* blob = &runtime->blobs[i];
193     if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
194       if (blob->allocation_type == xnn_allocation_type_workspace) {
195         // Value is purely internal to the runtime, allocate it in the workspace.
196         blob->data = (void*) ((uintptr_t) runtime->workspace->data + mem_alloc_tracker->usage[i].alloc_offset);
197       }
198     }
199   }
200 
201   // Adjust the blob pointers of all runtimes that share this workspace.
202   if (workspace_data_delta != 0) {
203     for (struct xnn_runtime* rt = runtime->workspace->first_user; rt != NULL; rt = rt->next_workspace_user) {
204       // The current runtime already has the correct offset.
205       if (rt == runtime) {
206         continue;
207       }
208       for (size_t i = 0; i < rt->num_blobs; i++) {
209         struct xnn_blob* blob = &rt->blobs[i];
210         if (blob->allocation_type == xnn_allocation_type_workspace) {
211           assert(blob->data != NULL);
212           blob->data = (void*) ((uintptr_t) blob->data + workspace_data_delta);
213         }
214       }
215     }
216   }
217 
218   return xnn_status_success;
219 }
220 
xnn_create_runtime_v4(xnn_subgraph_t subgraph,xnn_weights_cache_t weights_cache,xnn_workspace_t workspace,pthreadpool_t threadpool,uint32_t flags,xnn_runtime_t * runtime_out)221 enum xnn_status xnn_create_runtime_v4(
222   xnn_subgraph_t subgraph,
223   xnn_weights_cache_t weights_cache,
224   xnn_workspace_t workspace,
225   pthreadpool_t threadpool,
226   uint32_t flags,
227   xnn_runtime_t* runtime_out)
228 {
229   struct xnn_runtime* runtime = NULL;
230   enum xnn_status status = xnn_status_uninitialized;
231 
232   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
233     xnn_log_error("failed to create runtime: XNNPACK is not initialized");
234     goto error;
235   }
236 
237   if (workspace == NULL) {
238     xnn_log_error("failed to create runtime: workspace is NULL");
239     status = xnn_status_invalid_parameter;
240     goto error;
241   }
242 
243   const uint32_t optimization_flags = XNN_FLAG_SPARSE_INFERENCE | XNN_FLAG_HINT_FP16_INFERENCE |
244     XNN_FLAG_FORCE_FP16_INFERENCE | XNN_FLAG_NO_OPERATOR_FUSION;
245   status = xnn_subgraph_optimize(subgraph, flags & optimization_flags);
246   if (status != xnn_status_success) {
247     xnn_log_error("failed to optimize subgraph");
248     goto error;
249   }
250 
251   status = xnn_status_out_of_memory;
252 
253   runtime = xnn_allocate_zero_memory(sizeof(struct xnn_runtime));
254   if (runtime == NULL) {
255     xnn_log_error("failed to allocate %zu bytes for runtime descriptor", sizeof(struct xnn_runtime));
256     goto error;
257   }
258 
259   runtime->opdata = xnn_allocate_zero_memory(sizeof(struct xnn_operator_data) * subgraph->num_nodes);
260   if (runtime->opdata == NULL) {
261     xnn_log_error("failed to allocate %zu bytes for opdata descriptors",
262       sizeof(struct xnn_operator_data) * (size_t) subgraph->num_nodes);
263     goto error;
264   }
265   runtime->num_ops = subgraph->num_nodes;
266 
267   if (flags & XNN_FLAG_YIELD_WORKERS) {
268     struct xnn_node* last_valid_node = NULL;
269     for (size_t i = 0; i < subgraph->num_nodes; i++) {
270       struct xnn_node* node = subgraph->nodes + i;
271       if (node->type != xnn_node_type_invalid) {
272         last_valid_node = node;
273       }
274     }
275     if (last_valid_node != NULL) {
276       last_valid_node->flags |= XNN_FLAG_YIELD_WORKERS;
277     }
278   }
279 
280   struct xnn_code_cache* code_cache = NULL;
281 #if XNN_PLATFORM_JIT && XNN_ENABLE_JIT
282   code_cache = &runtime->code_cache;
283   status = xnn_init_code_cache(code_cache);
284   if (status != xnn_status_success) {
285     goto error;
286   }
287 #endif
288   const struct xnn_caches caches = {
289     .code_cache = code_cache,
290     .weights_cache = weights_cache,
291   };
292 
293   struct xnn_value* values = subgraph->values;
294   for (size_t i = 0; i < subgraph->num_nodes; i++) {
295     const struct xnn_node* node = subgraph->nodes + i;
296 
297     // Ignore fused nodes
298     if (node->type != xnn_node_type_invalid) {
299       assert(node->create != NULL);
300       status = node->create(node, values, subgraph->num_values, runtime->opdata + i, &caches);
301       if (status != xnn_status_success) {
302         goto error;
303       }
304       runtime->opdata[i].setup = node->setup;
305     }
306   }
307 
308 #if XNN_PLATFORM_JIT && XNN_ENABLE_JIT
309   xnn_finalize_code_memory(&code_cache->cache.code);
310 #endif
311 
312   runtime->blobs = xnn_allocate_zero_memory(sizeof(struct xnn_blob) * subgraph->num_values);
313   if (runtime->blobs == NULL) {
314     xnn_log_error("failed to allocate %zu bytes for blob descriptors",
315       sizeof(struct xnn_blob) * (size_t) subgraph->num_values);
316     goto error;
317   }
318   runtime->num_blobs = subgraph->num_values;
319 
320   struct xnn_value_allocation_tracker mem_alloc_tracker;
321   xnn_init_value_allocation_tracker(&mem_alloc_tracker, subgraph);
322 
323   for (uint32_t i = 0; i < subgraph->num_values; i++) {
324     struct xnn_value* value = &subgraph->values[i];
325     struct xnn_blob* blob = &runtime->blobs[i];
326     if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
327       blob->size = xnn_tensor_get_size(subgraph, i);
328       blob->data = (void*) (uintptr_t) value->data;
329       if (blob->data == NULL) {
330         if ((value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) == 0) {
331           // Value is purely internal to the runtime, and must be allocated in its workspace.
332           xnn_add_value_allocation_tracker(&mem_alloc_tracker, i, round_up_po2(blob->size, XNN_EXTRA_BYTES));
333           blob->allocation_type = xnn_allocation_type_workspace;
334         } else {
335           // Value is non-static and external to the runtime: must be specified via a call to xnn_setup_runtime.
336           blob->allocation_type = xnn_allocation_type_external;
337         }
338       } else {
339         blob->allocation_type = xnn_allocation_type_static;
340       }
341     }
342   }
343   xnn_plan_value_allocation_tracker(&mem_alloc_tracker);
344 
345   xnn_retain_workspace(workspace);
346   runtime->workspace = workspace;
347   runtime->next_workspace_user = runtime->workspace->first_user;
348   runtime->workspace->first_user = runtime;
349 
350   status = initialize_workspace_blobs(subgraph, runtime, &mem_alloc_tracker);
351   if (status != xnn_status_success) {
352     xnn_release_value_allocation_tracker(&mem_alloc_tracker);
353     goto error;
354   }
355 
356   if (flags & XNN_FLAG_BASIC_PROFILING) {
357     runtime->profiling = true;
358   }
359 
360   xnn_release_value_allocation_tracker(&mem_alloc_tracker);
361 
362   runtime->threadpool = threadpool;
363 
364   *runtime_out = runtime;
365   return xnn_status_success;
366 
367 error:
368   xnn_delete_runtime(runtime);
369   return status;
370 }
371 
xnn_setup_runtime(xnn_runtime_t runtime,size_t num_external_values,const struct xnn_external_value * external_values)372 enum xnn_status xnn_setup_runtime(
373   xnn_runtime_t runtime,
374   size_t num_external_values,
375   const struct xnn_external_value* external_values)
376 {
377   // Validate inputs without changing internal state.
378   // This ensures that runtime stays in consistent state in case validation fails midway.
379   for (size_t i = 0; i < num_external_values; i++) {
380     const struct xnn_external_value* external_value = &external_values[i];
381     const uint32_t value_id = external_value->id;
382     if (value_id >= runtime->num_blobs) {
383       xnn_log_error("failed to setup runtime: out-of-bounds ID %" PRIu32 " in external value #%zu",
384         value_id, i);
385       return xnn_status_invalid_parameter;
386     }
387 
388     const struct xnn_blob* blob = &runtime->blobs[value_id];
389     if (blob->allocation_type != xnn_allocation_type_external) {
390       xnn_log_error("failed to setup runtime: Value %" PRIu32 " is not external", value_id);
391       return xnn_status_invalid_parameter;
392     }
393   }
394 
395   // Apply runtime state changes.
396   for (size_t i = 0; i < num_external_values; i++) {
397     const struct xnn_external_value* external_value = &external_values[i];
398     const uint32_t value_id = external_value->id;
399     struct xnn_blob* blob = &runtime->blobs[value_id];
400     blob->data = external_value->data;
401   }
402 
403   for (size_t i = 0; i < runtime->num_ops; i++) {
404     const struct xnn_operator_data* opdata = &runtime->opdata[i];
405     if (opdata->operator_objects[0] == NULL) {
406       // Operator was removed during optimization
407       continue;
408     }
409 
410     // Ensure that weights cache is finalized.
411     struct xnn_weights_cache* weights_cache = opdata->operator_objects[0]->weights_cache;
412     if (weights_cache != NULL && !xnn_weights_cache_is_finalized(weights_cache)) {
413       xnn_log_error("weights cache needs to be finalized before setup/infer");
414       return xnn_status_invalid_state;
415     }
416 
417     assert(opdata->setup != NULL);
418     const enum xnn_status status = opdata->setup(opdata, runtime->blobs, runtime->num_blobs, runtime->threadpool);
419     if (status != xnn_status_success) {
420       xnn_log_error("failed to setup runtime: error in operator #%zu", i);
421       return status;
422     }
423   }
424 
425   return xnn_status_success;
426 }
427 
xnn_read_timer()428 static xnn_timestamp xnn_read_timer() {
429   xnn_timestamp timestamp;
430 #ifdef __MACH__
431   timestamp = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
432   if (timestamp == 0) {
433     xnn_log_warning("clock_gettime failed: error code %d", errno);
434   }
435 #elif __EMSCRIPTEN__
436   timestamp = emscripten_get_now();
437 #elif XNN_PLATFORM_WINDOWS
438   BOOL res = QueryPerformanceCounter(&timestamp);
439   if (!res) {
440     xnn_log_error("QueryPerformanceCounter failed: error code %u", GetLastError());
441     memset(&timestamp, 0, sizeof(timestamp));
442   }
443 #else
444   int res = clock_gettime(CLOCK_MONOTONIC, &timestamp);
445   if (res != 0) {
446     xnn_log_error("clock_gettime failed: error code %d", errno);
447     memset(&timestamp, 0, sizeof(timestamp));
448   }
449 #endif
450   return timestamp;
451 }
452 
xnn_get_elapsed_time(const xnn_timestamp * start,const xnn_timestamp * end)453 static inline uint64_t xnn_get_elapsed_time(const xnn_timestamp* start, const xnn_timestamp* end) {
454 #ifdef __MACH__
455   const uint64_t kMicrosInNanos = 1000;
456   return (*end - *start) / kMicrosInNanos;
457 #elif __EMSCRIPTEN__
458   const double kMillisInMicros = 1.0e3;
459   return (uint64_t) ((*end - *start) * kMillisInMicros);
460 #elif XNN_PLATFORM_WINDOWS
461   const uint64_t kMicrosInSec = 1000 * 1000;
462   LARGE_INTEGER frequency;
463   BOOL res = QueryPerformanceFrequency(&frequency);
464   if (!res) {
465     xnn_log_error("QueryPerformanceFrequency failed: error code %u", GetLastError());
466     return 0;
467   }
468   return ((end->QuadPart - start->QuadPart) * kMicrosInSec) / frequency.QuadPart;
469 #else
470   const uint64_t kNanosInMicro = UINT64_C(1000);
471   const uint64_t kNanosInSec = UINT64_C(1000000000);
472   const uint64_t secs = (end->tv_sec - start->tv_sec) * kNanosInSec;
473   const uint64_t ns_secs = (end->tv_nsec - start->tv_nsec);
474   return (secs + ns_secs) / kNanosInMicro;
475 #endif
476 }
477 
xnn_get_runtime_profiling_info(xnn_runtime_t runtime,enum xnn_profile_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)478 enum xnn_status xnn_get_runtime_profiling_info(xnn_runtime_t runtime,
479                                                enum xnn_profile_info param_name,
480                                                size_t param_value_size,
481                                                void* param_value,
482                                                size_t* param_value_size_ret)
483 {
484   if (!runtime->profiling) {
485     return xnn_status_invalid_state;
486   }
487   enum xnn_status status = xnn_status_success;
488   size_t required_size = 0;
489   const struct xnn_operator_data* opdata = runtime->opdata;
490   switch (param_name) {
491     case xnn_profile_info_num_operators:
492       required_size = sizeof(size_t);
493       if (param_value_size < required_size){
494         *param_value_size_ret = required_size;
495         status = xnn_status_out_of_memory;
496       } else {
497         size_t num_valid_ops = 0;
498         for (size_t i = 0; i < runtime->num_ops; ++i) {
499           if (opdata[i].operator_objects[0] != NULL) {
500             num_valid_ops += 1;
501           }
502         }
503         memcpy(param_value, &num_valid_ops, required_size);
504       }
505       break;
506     case xnn_profile_info_operator_name:
507       for (size_t i = 0; i < runtime->num_ops; ++i) {
508         if (opdata[i].operator_objects[0] != NULL) {
509           const char* op_name = xnn_operator_type_to_string(opdata[i].operator_objects[0]->type);
510           size_t op_name_len = strlen(op_name) + 1;
511           if (opdata[i].operator_objects[0]->ukernel.type != xnn_ukernel_type_default ) {
512             op_name_len += strlen(xnn_ukernel_type_to_string(opdata[i].operator_objects[0]->ukernel.type)) + 1;
513           }
514           required_size += op_name_len;
515         }
516       }
517       if (param_value_size < required_size) {
518         *param_value_size_ret = required_size;
519         status = xnn_status_out_of_memory;
520       } else {
521         char* name_out = (char*) param_value;
522         for (size_t i = 0; i < runtime->num_ops; ++i) {
523           if (opdata[i].operator_objects[0] != NULL) {
524             const char* op_name = xnn_operator_type_to_string(opdata[i].operator_objects[0]->type);
525             size_t op_name_len = strlen(op_name) + 1;
526             if (opdata[i].operator_objects[0]->ukernel.type != xnn_ukernel_type_default ) {
527               const char* ukernel_type = xnn_ukernel_type_to_string(opdata[i].operator_objects[0]->ukernel.type);
528               op_name_len += strlen(ukernel_type) + 1;
529               snprintf(name_out, op_name_len, "%s %s", op_name, ukernel_type);
530             } else {
531               snprintf(name_out, op_name_len, "%s", op_name);
532             }
533             name_out += op_name_len;
534           }
535         }
536       }
537       break;
538     case xnn_profile_info_operator_timing:
539     {
540       size_t num_valid_ops = 0;
541       for (size_t i = 0; i < runtime->num_ops; ++i) {
542         if (opdata[i].operator_objects[0] != NULL) {
543           num_valid_ops += 1;
544         }
545       }
546       required_size = num_valid_ops * sizeof(uint64_t);
547       if (param_value_size < required_size) {
548         *param_value_size_ret = required_size;
549         status = xnn_status_out_of_memory;
550       } else {
551         xnn_timestamp previous_ts = runtime->start_ts;
552         uint64_t* data = (uint64_t*) param_value;
553         for (size_t i = 0; i < runtime->num_ops; ++i) {
554           if (opdata[i].operator_objects[0] != NULL) {
555             uint64_t op_time = 0;
556             for (size_t j = 0; j < XNN_MAX_OPERATOR_OBJECTS; j++) {
557               if (opdata[i].operator_objects[j] != NULL) {
558                 op_time += xnn_get_elapsed_time(&previous_ts, &opdata[i].end_ts[j]);
559                 previous_ts = opdata[i].end_ts[j];
560               }
561             }
562             *data++ = op_time;
563           }
564         }
565       }
566       break;
567     }
568     default:
569       status = xnn_status_invalid_parameter;
570   }
571   return status;
572 }
573 
xnn_invoke_runtime(xnn_runtime_t runtime)574 enum xnn_status xnn_invoke_runtime(
575   xnn_runtime_t runtime)
576 {
577   if (runtime->profiling) {
578     runtime->start_ts = xnn_read_timer();
579   }
580   for (size_t i = 0; i < runtime->num_ops; i++) {
581     for (size_t j = 0; j < XNN_MAX_OPERATOR_OBJECTS; j++) {
582       if (runtime->opdata[i].operator_objects[j] == NULL) {
583         // Operator was removed after fusion
584         continue;
585       }
586 
587       const enum xnn_status status = xnn_run_operator(runtime->opdata[i].operator_objects[j], runtime->threadpool);
588       if (status != xnn_status_success) {
589         return status;
590       }
591       if (runtime->profiling) {
592         runtime->opdata[i].end_ts[j] = xnn_read_timer();
593       }
594     }
595   }
596   return xnn_status_success;
597 }
598 
xnn_delete_runtime(xnn_runtime_t runtime)599 enum xnn_status xnn_delete_runtime(
600   xnn_runtime_t runtime)
601 {
602   if (runtime != NULL) {
603     if (runtime->opdata != NULL) {
604       for (size_t i = 0; i < runtime->num_ops; i++) {
605         for (size_t j = 0; j < XNN_MAX_OPERATOR_OBJECTS; j++) {
606           xnn_delete_operator(runtime->opdata[i].operator_objects[j]);
607         }
608       }
609       xnn_release_memory(runtime->opdata);
610 
611       xnn_release_memory(runtime->blobs);
612       if (runtime->workspace != NULL) {
613         // Remove this runtime from the list of users of the workspace.
614         assert(runtime->workspace->first_user != NULL);
615         if (runtime->workspace->first_user == runtime) {
616           runtime->workspace->first_user = runtime->next_workspace_user;
617         } else {
618           xnn_runtime_t prev = runtime->workspace->first_user;
619           xnn_runtime_t curr = prev->next_workspace_user;
620           while (curr != runtime) {
621             prev = curr;
622             curr = curr->next_workspace_user;
623           }
624           assert(curr == runtime);
625           prev->next_workspace_user = curr->next_workspace_user;
626         }
627         xnn_release_workspace(runtime->workspace);
628       }
629     }
630 #if XNN_PLATFORM_JIT && XNN_ENABLE_JIT
631     xnn_release_code_cache(&runtime->code_cache);
632 #endif
633     xnn_release_memory(runtime);
634   }
635   return xnn_status_success;
636 }
637