• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 
11 #include <xnnpack.h>
12 #include <xnnpack/common.h>
13 #include <xnnpack/cache.h>
14 #include <xnnpack/node-type.h>
15 
16 #if defined(EMSCRIPTEN)
17 #include <emscripten/emscripten.h>
18 #elif XNN_PLATFORM_WINDOWS
19 #include <windows.h>
20 #else
21 #include <time.h>
22 #endif
23 
24 #define XNN_MAX_INPUTS 4
25 #define XNN_MAX_OUTPUTS 4
26 
27 #define XNN_MAX_RUNTIME_INPUTS 4
28 #define XNN_MAX_RUNTIME_OUTPUTS 4
29 
30 #define XNN_INVALID_NODE_ID UINT32_MAX
31 
32 #define XNN_MAX_OPERATOR_OBJECTS 4
33 
34 /// Disable fusion of nodes in subgraph. Fusion is enabled by default, set this flag to turn it off.
35 #define XNN_FLAG_NO_OPERATOR_FUSION 0x80000000
36 
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 
41 struct xnn_shape {
42   size_t num_dims;
43   size_t dim[XNN_MAX_TENSOR_DIMS];
44 };
45 
46 enum xnn_value_type {
47   xnn_value_type_invalid = 0,
48   xnn_value_type_dense_tensor = 1,
49 };
50 
51 enum xnn_layout_type {
52   xnn_layout_type_nhwc = 0,
53   xnn_layout_type_nchw = 1,
54 };
55 
56 /// Abstraction for a collections of elements produced and consumed by nodes.
57 struct xnn_value {
58   /// Unique ID for the value.
59   uint32_t id;
60   /// Type of the collection of elements.
61   ///
62   /// Currently only dense tensors are supported.
63   /// Other types (e.g. sparse tensors) might be supported in the future.
64   enum xnn_value_type type;
65   /// Type of elements in the collection.
66   enum xnn_datatype datatype;
67   /// Per-value quantization parameters.
68   struct {
69     /// Offset from zero of the quantized elements.
70     int32_t zero_point;
71     union {
72       /// Multiplication factor to convert quantized elements to real representation.
73       float scale;
74       struct {
75         /// Per-channel multiplication factor to convert quantized elements to real representation.
76         const float* channelwise_scale;
77         /// Index of the channel dimension with per-channel quantization parameters.
78         size_t channel_dimension;
79       };
80     };
81   } quantization;
82   /// Tensor shape.
83   struct xnn_shape shape;
84   /// Binary features of the tensor. Supported values are any combination of:
85   /// - XNN_VALUE_FLAG_EXTERNAL_INPUT
86   /// - XNN_VALUE_FLAG_EXTERNAL_OUTPUT
87   uint32_t flags;
88   /// Static initialization data. Must be null for non-static values.
89   const void* data;
90   /// Index of the Subgraph node that produced the value, or XNN_INVALID_NODE_ID is the Value is an external input.
91   uint32_t producer;
92   /// Index of the first Node that consume the value, or XNN_INVALID_NODE_ID if the Value has no consumers within the
93   /// graph (e.g. Value is an external output).
94   uint32_t first_consumer;
95   /// Number of Nodes that consume the value.
96   /// If multiple inputs in a Node refer to this Value as input, the Node is counted as consumer multiple times.
97   /// If the Value is an external output, it counts as having an extra consumer.
98   uint32_t num_consumers;
99   uint32_t num_nchw_compatible_consumers;
100   enum xnn_layout_type layout;
101   /// Set during analysis in xnn_subgraph_rewrite_for_fp16.
102   /// Indicates that this value should be converted to FP16.
103   bool fp16_compatible;
104   /// Set during analysis in xnn_subgraph_rewrite_for_fp16.
105   /// Indicates Value ID of the FP16 variant of this Value.
106   uint32_t fp16_id;
107   /// Set during analysis in xnn_subgraph_rewrite_for_fp16.
108   /// Indicates Value ID of the FP32 variant of this Value.
109   uint32_t fp32_id;
110 };
111 
112 
xnn_value_is_external(const struct xnn_value * value)113 XNN_INLINE bool xnn_value_is_external(const struct xnn_value* value) {
114   return (value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) != 0;
115 }
116 
xnn_value_is_external_output(const struct xnn_value * value)117 XNN_INLINE bool xnn_value_is_external_output(const struct xnn_value* value) {
118   return (value->flags & XNN_VALUE_FLAG_EXTERNAL_OUTPUT) != 0;
119 }
120 
xnn_value_is_external_input(const struct xnn_value * value)121 XNN_INLINE bool xnn_value_is_external_input(const struct xnn_value* value) {
122   return (value->flags & XNN_VALUE_FLAG_EXTERNAL_INPUT) != 0;
123 }
124 
125 enum xnn_allocation_type {
126   xnn_allocation_type_invalid = 0,
127   /// Static data that is provided by caller, needs to outlive the xnn_runtime.
128   xnn_allocation_type_static,
129   /// Lives in XNNPACK-managed internal workspace.
130   xnn_allocation_type_workspace,
131   /// Non-static data that is external to the runtime, provided by caller, specified in xnn_setup_runtime.
132   xnn_allocation_type_external,
133 };
134 
135 struct xnn_blob {
136   /// Size in bytes.
137   size_t size;
138   /// Data pointer.
139   void* data;
140   enum xnn_allocation_type allocation_type;
141 };
142 
143 struct xnn_node;
144 struct xnn_operator_data;
145 
146 typedef enum xnn_status (*xnn_create_operator_fn)(
147   const struct xnn_node* node,
148   const struct xnn_value* values,
149   size_t num_values,
150   struct xnn_operator_data* opdata,
151   const struct xnn_caches* caches);
152 
153 typedef enum xnn_status (*xnn_setup_operator_fn)(
154   const struct xnn_operator_data* opdata,
155   const struct xnn_blob* blobs,
156   size_t num_blobs,
157   pthreadpool_t threadpool);
158 
159 enum xnn_compute_type {
160   xnn_compute_type_invalid = 0,
161   xnn_compute_type_fp32,
162   xnn_compute_type_fp16,
163   xnn_compute_type_qc8,
164   xnn_compute_type_qs8,
165   xnn_compute_type_qu8,
166   xnn_compute_type_fp32_to_fp16,
167   xnn_compute_type_fp32_to_qs8,
168   xnn_compute_type_fp32_to_qu8,
169   xnn_compute_type_fp16_to_fp32,
170   xnn_compute_type_qs8_to_fp32,
171   xnn_compute_type_qu8_to_fp32,
172 };
173 
174 struct xnn_node {
175   enum xnn_node_type type;
176   uint32_t id;
177   enum xnn_compute_type compute_type;
178   /// Static parameters of the operator node.
179   union {
180     struct {
181       uint32_t input_padding_top;
182       uint32_t input_padding_right;
183       uint32_t input_padding_bottom;
184       uint32_t input_padding_left;
185       uint32_t kernel_height;
186       uint32_t kernel_width;
187       uint32_t subsampling_height;
188       uint32_t subsampling_width;
189       uint32_t dilation_height;
190       uint32_t dilation_width;
191       uint32_t groups;
192       size_t group_input_channels;
193       size_t group_output_channels;
194     } convolution_2d;
195     struct {
196       uint32_t padding_top;
197       uint32_t padding_right;
198       uint32_t padding_bottom;
199       uint32_t padding_left;
200       uint32_t adjustment_height;
201       uint32_t adjustment_width;
202       uint32_t kernel_height;
203       uint32_t kernel_width;
204       uint32_t upsampling_height;
205       uint32_t upsampling_width;
206       uint32_t dilation_height;
207       uint32_t dilation_width;
208       uint32_t groups;
209       size_t group_input_channels;
210       size_t group_output_channels;
211     } deconvolution_2d;
212     struct {
213       uint32_t input_padding_top;
214       uint32_t input_padding_right;
215       uint32_t input_padding_bottom;
216       uint32_t input_padding_left;
217       uint32_t kernel_height;
218       uint32_t kernel_width;
219       uint32_t subsampling_height;
220       uint32_t subsampling_width;
221       uint32_t dilation_height;
222       uint32_t dilation_width;
223       uint32_t depth_multiplier;
224       size_t input_channels;
225     } depthwise_convolution_2d;
226     struct {
227       uint32_t block_size;
228     } depth_to_space;
229     struct {
230       uint32_t padding_top;
231       uint32_t padding_right;
232       uint32_t padding_bottom;
233       uint32_t padding_left;
234       uint32_t pooling_height;
235       uint32_t pooling_width;
236       uint32_t stride_height;
237       uint32_t stride_width;
238       uint32_t dilation_height;
239       uint32_t dilation_width;
240     } pooling_2d;
241     struct {
242       float alpha;
243     } elu;
244     struct {
245       float negative_slope;
246     } leaky_relu;
247     struct {
248       size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
249       size_t post_paddings[XNN_MAX_TENSOR_DIMS];
250       uint32_t padding_value;
251     } static_pad;
252     struct {
253       struct xnn_shape new_shape;
254     } static_reshape;
255     struct {
256       size_t new_height;
257       size_t new_width;
258     } static_resize;
259     struct {
260       size_t axis;
261     } concatenate;
262     struct {
263       size_t axis;
264     } even_split;
265     struct {
266       size_t perm[XNN_MAX_TENSOR_DIMS];
267       size_t num_dims;
268     } transpose;
269   } params;
270   struct {
271     float output_min;
272     float output_max;
273   } activation;
274   /// Value IDs for node inputs.
275   uint32_t inputs[XNN_MAX_INPUTS];
276   uint32_t num_inputs;
277   /// Value IDs for node outputs.
278   uint32_t outputs[XNN_MAX_OUTPUTS];
279   uint32_t num_outputs;
280   uint32_t flags;
281   uint32_t layout_flags;
282   uint32_t cluster_leader;
283   // Number of filter parameters in all 1x1 Convolutions of the sparse cluster.
284   // This value is properly initialized only in sparse inference analysis of 1x1 Convolutions.
285   size_t num_params;
286   // Number of zero filter parameters in all 1x1 Convolutions of the sparse cluster.
287   // This value is properly initialized only in sparse inference analysis of 1x1 Convolutions.
288   size_t num_zeroes;
289   // Factory function to create an operator object from the node.
290   xnn_create_operator_fn create;
291   // Function to setup an operator using opdata.
292   xnn_setup_operator_fn setup;
293 };
294 
295 #ifdef __MACH__
296 typedef uint64_t xnn_timestamp;
297 #elif __EMSCRIPTEN__
298 typedef double xnn_timestamp;
299 #elif XNN_PLATFORM_WINDOWS
300 typedef LARGE_INTEGER xnn_timestamp;
301 #else
302 typedef struct timespec xnn_timestamp;
303 #endif
304 
305 struct xnn_operator_data {
306   xnn_operator_t operator_objects[XNN_MAX_OPERATOR_OBJECTS];
307   xnn_setup_operator_fn setup;
308   size_t batch_size;
309   size_t input_height;
310   size_t input_width;
311   size_t output_height;
312   size_t output_width;
313   struct xnn_shape shape1;
314   struct xnn_shape shape2;
315   size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
316   size_t post_paddings[XNN_MAX_TENSOR_DIMS];
317   uint32_t adjustment_height;
318   uint32_t adjustment_width;
319   uint32_t inputs[XNN_MAX_RUNTIME_INPUTS];
320   uint32_t outputs[XNN_MAX_RUNTIME_OUTPUTS];
321   xnn_timestamp end_ts[XNN_MAX_OPERATOR_OBJECTS];
322 };
323 
324 struct xnn_subgraph {
325   /// Number of Value IDs reserved for communication with external graph representation.
326   /// Values created during subgraph transformation avoid using IDs in [0, reserved_value_ids-1] range.
327   uint32_t external_value_ids;
328 
329   uint32_t num_reserved_values;
330   uint32_t num_values;
331   struct xnn_value* values;
332 
333   uint32_t num_reserved_nodes;
334   uint32_t num_nodes;
335   struct xnn_node* nodes;
336 };
337 
338 /// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
339 struct xnn_runtime {
340   uint32_t num_external_values;
341 
342   /// List of operators in the execution plan, in execution order.
343   struct xnn_operator_data* opdata;
344   /// Number of operators in the execution plan.
345   size_t num_ops;
346 
347   struct xnn_blob* blobs;
348   size_t num_blobs;
349 
350   struct xnn_workspace* workspace;
351   struct xnn_runtime* next_workspace_user;
352 
353 #if XNN_PLATFORM_JIT
354   struct xnn_code_cache code_cache;
355 #endif // XNN_PLATFORM_JIT
356 
357   pthreadpool_t threadpool;
358 
359   bool profiling;
360   // The start timestamp of the first operator in the subgraph. This is set when profiling is true.
361   xnn_timestamp start_ts;
362 };
363 
364 struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph);
365 
366 struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph);
367 
368 void xnn_subgraph_add_nodes(xnn_subgraph_t subgraph, size_t num_nodes);
369 
370 size_t xnn_tensor_get_size(
371   xnn_subgraph_t subgraph,
372   uint32_t value_id);
373 
374 // Product of all shape dimensions
375 size_t xnn_shape_multiply_all_dims(
376   const struct xnn_shape shape[1]);
377 
378 // Product of all shape dimensions, except for the specified number of the last dimensions
379 size_t xnn_shape_multiply_batch_dims(
380   const struct xnn_shape shape[1], size_t num_nonbatch_dims);
381 
382 // Product of all shape dimensions, except for the last (channel) one
383 size_t xnn_shape_multiply_non_channel_dims(
384   const struct xnn_shape shape[1]);
385 
386 enum xnn_status xnn_subgraph_optimize(xnn_subgraph_t subgraph, uint32_t flags);
387 
388 void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph);
389 // Rewrites subgraph for FP16, returns true if success, false if rewrite failed.
390 bool xnn_subgraph_rewrite_for_fp16(xnn_subgraph_t subgraph);
391 
392 void xnn_node_clear(struct xnn_node* node);
393 void xnn_value_clear(struct xnn_value* value);
394 
395 void xnn_value_copy(struct xnn_value* dst_value, const struct xnn_value* src_value);
396 
397 void xnn_init_convert_node(
398   struct xnn_node* node,
399   enum xnn_compute_type compute_type,
400   uint32_t input_id,
401   uint32_t output_id,
402   uint32_t flags);
403 
404 struct xnn_workspace {
405   void* data;
406   size_t size;
407   struct xnn_runtime* first_user;
408   // Workspace will be destroyed in xnn_delete_runtime or xnn_delete_workspace if num_users reaches 0.
409   size_t ref_count;
410 };
411 
412 #ifdef __cplusplus
413 }  // extern "C"
414 #endif
415