• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 
11 #include <xnnpack.h>
12 
13 #define XNN_MAX_INPUTS 3
14 #define XNN_MAX_OUTPUTS 2
15 
16 #define XNN_MAX_RUNTIME_INPUTS 2
17 #define XNN_MAX_RUNTIME_OUTPUTS 2
18 
19 #define XNN_INVALID_NODE_ID UINT32_MAX
20 
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24 
25 struct xnn_shape {
26   size_t num_dims;
27   size_t dim[XNN_MAX_TENSOR_DIMS];
28 };
29 
30 enum xnn_value_type {
31   xnn_value_type_invalid = 0,
32   xnn_value_type_dense_tensor = 1,
33 };
34 
35 enum xnn_layout_type {
36   xnn_layout_type_nhwc = 0,
37   xnn_layout_type_nchw = 1,
38 };
39 
40 /// Abstraction for a collections of elements produced and consumed by nodes.
41 struct xnn_value {
42   /// Unique ID for the value.
43   uint32_t id;
44   /// Type of the collection of elements.
45   ///
46   /// Currently only dense tensors are supported.
47   /// Other types (e.g. sparse tensors) might be supported in the future.
48   enum xnn_value_type type;
49   /// Type of elements in the collection.
50   enum xnn_datatype datatype;
51   /// Per-value quantization parameters.
52   struct {
53     /// Offset from zero of the quantized elements.
54     int32_t zero_point;
55     union {
56       /// Multiplication factor to convert quantized elements to real representation.
57       float scale;
58       struct {
59         /// Per-channel multiplication factor to convert quantized elements to real representation.
60         const float* channelwise_scale;
61         /// Index of the channel dimension with per-channel quantization parameters.
62         size_t channel_dimension;
63       };
64     };
65   } quantization;
66   /// Tensor shape.
67   struct xnn_shape shape;
68   /// Binary features of the tensor. Supported values are any combination of:
69   /// - XNN_VALUE_FLAG_EXTERNAL_INPUT
70   /// - XNN_VALUE_FLAG_EXTERNAL_OUTPUT
71   uint32_t flags;
72   /// Static initialization data. Must be null for non-static values.
73   const void* data;
74   /// Index of the Subgraph node that produced the value, or XNN_INVALID_NODE_ID is the Value is an external input.
75   uint32_t producer;
76   /// Index of the first Node that consume the value, or XNN_INVALID_NODE_ID if the Value has no consumers within the
77   /// graph (e.g. Value is an external output).
78   uint32_t first_consumer;
79   /// Number of Nodes that consume the value.
80   /// If multiple inputs in a Node refer to this Value as input, the Node is counted as consumer multiple times.
81   /// If the Value is an external output, it counts as having an extra consumer.
82   uint32_t num_consumers;
83   uint32_t num_nchw_compatible_consumers;
84   enum xnn_layout_type layout;
85   /// Set during analysis in xnn_subgraph_rewrite_for_fp16.
86   /// Indicates that this value should be converted to FP16.
87   bool fp16_compatible;
88   /// Set during analysis in xnn_subgraph_rewrite_for_fp16.
89   /// Indicates Value ID of the FP16 variant of this Value.
90   uint32_t fp16_id;
91   /// Set during analysis in xnn_subgraph_rewrite_for_fp16.
92   /// Indicates Value ID of the FP32 variant of this Value.
93   uint32_t fp32_id;
94 };
95 
96 struct xnn_blob {
97   /// Size in bytes.
98   size_t size;
99   /// Data pointer.
100   void* data;
101   bool external;
102 };
103 
104 struct xnn_node;
105 struct xnn_operator_data;
106 
107 typedef enum xnn_status (*xnn_create_operator_fn)(
108   const struct xnn_node* node,
109   const struct xnn_value* values,
110   size_t num_values,
111   struct xnn_operator_data* opdata);
112 
113 typedef enum xnn_status (*xnn_setup_operator_fn)(
114   const struct xnn_operator_data* opdata,
115   const struct xnn_blob* blobs,
116   size_t num_blobs,
117   pthreadpool_t threadpool);
118 
119 enum xnn_compute_type {
120   xnn_compute_type_invalid = 0,
121   xnn_compute_type_fp32,
122   xnn_compute_type_fp16,
123   xnn_compute_type_qc8,
124   xnn_compute_type_qs8,
125   xnn_compute_type_qu8,
126   xnn_compute_type_fp32_to_fp16,
127   xnn_compute_type_fp32_to_qs8,
128   xnn_compute_type_fp32_to_qu8,
129   xnn_compute_type_fp16_to_fp32,
130   xnn_compute_type_qs8_to_fp32,
131   xnn_compute_type_qu8_to_fp32,
132 };
133 
134 enum xnn_node_type {
135   xnn_node_type_invalid = 0,
136   xnn_node_type_abs,
137   xnn_node_type_add2,
138   xnn_node_type_argmax_pooling_2d,
139   xnn_node_type_average_pooling_2d,
140   xnn_node_type_bankers_rounding,
141   xnn_node_type_ceiling,
142   xnn_node_type_clamp,
143   xnn_node_type_convert,
144   xnn_node_type_convolution_2d,
145   xnn_node_type_deconvolution_2d,
146   xnn_node_type_depthwise_convolution_2d,
147   xnn_node_type_depth_to_space,
148   xnn_node_type_divide,
149   xnn_node_type_elu,
150   xnn_node_type_fully_connected,
151   xnn_node_type_floor,
152   xnn_node_type_global_average_pooling_2d,
153   xnn_node_type_hardswish,
154   xnn_node_type_leaky_relu,
155   xnn_node_type_max_pooling_2d,
156   xnn_node_type_maximum2,
157   xnn_node_type_minimum2,
158   xnn_node_type_multiply2,
159   xnn_node_type_negate,
160   xnn_node_type_prelu,
161   xnn_node_type_sigmoid,
162   xnn_node_type_softmax,
163   xnn_node_type_static_constant_pad,
164   xnn_node_type_static_reshape,
165   xnn_node_type_static_resize_bilinear_2d,
166   xnn_node_type_square,
167   xnn_node_type_square_root,
168   xnn_node_type_squared_difference,
169   xnn_node_type_subtract,
170   xnn_node_type_unpooling_2d,
171 };
172 
173 struct xnn_node {
174   enum xnn_node_type type;
175   uint32_t id;
176   enum xnn_compute_type compute_type;
177   /// Static parameters of the operator node.
178   union {
179     struct {
180       uint32_t input_padding_top;
181       uint32_t input_padding_right;
182       uint32_t input_padding_bottom;
183       uint32_t input_padding_left;
184       uint32_t kernel_height;
185       uint32_t kernel_width;
186       uint32_t subsampling_height;
187       uint32_t subsampling_width;
188       uint32_t dilation_height;
189       uint32_t dilation_width;
190       uint32_t groups;
191       size_t group_input_channels;
192       size_t group_output_channels;
193     } convolution_2d;
194     struct {
195       uint32_t padding_top;
196       uint32_t padding_right;
197       uint32_t padding_bottom;
198       uint32_t padding_left;
199       uint32_t adjustment_height;
200       uint32_t adjustment_width;
201       uint32_t kernel_height;
202       uint32_t kernel_width;
203       uint32_t upsampling_height;
204       uint32_t upsampling_width;
205       uint32_t dilation_height;
206       uint32_t dilation_width;
207       uint32_t groups;
208       size_t group_input_channels;
209       size_t group_output_channels;
210     } deconvolution_2d;
211     struct {
212       uint32_t input_padding_top;
213       uint32_t input_padding_right;
214       uint32_t input_padding_bottom;
215       uint32_t input_padding_left;
216       uint32_t kernel_height;
217       uint32_t kernel_width;
218       uint32_t subsampling_height;
219       uint32_t subsampling_width;
220       uint32_t dilation_height;
221       uint32_t dilation_width;
222       uint32_t depth_multiplier;
223       size_t input_channels;
224     } depthwise_convolution_2d;
225     struct {
226       uint32_t block_size;
227     } depth_to_space;
228     struct {
229       uint32_t padding_top;
230       uint32_t padding_right;
231       uint32_t padding_bottom;
232       uint32_t padding_left;
233       uint32_t pooling_height;
234       uint32_t pooling_width;
235       uint32_t stride_height;
236       uint32_t stride_width;
237       uint32_t dilation_height;
238       uint32_t dilation_width;
239     } pooling_2d;
240     struct {
241       float alpha;
242     } elu;
243     struct {
244       float negative_slope;
245     } leaky_relu;
246     struct {
247       size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
248       size_t post_paddings[XNN_MAX_TENSOR_DIMS];
249       uint32_t padding_value;
250     } static_pad;
251     struct {
252       struct xnn_shape new_shape;
253     } static_reshape;
254     struct {
255       size_t new_height;
256       size_t new_width;
257     } static_resize;
258   } params;
259   struct {
260     float output_min;
261     float output_max;
262   } activation;
263   /// Value IDs for node inputs.
264   uint32_t inputs[XNN_MAX_INPUTS];
265   uint32_t num_inputs;
266   /// Value IDs for node outputs.
267   uint32_t outputs[XNN_MAX_OUTPUTS];
268   uint32_t num_outputs;
269   uint32_t flags;
270   uint32_t layout_flags;
271   uint32_t cluster_leader;
272   // Number of filter parameters in all 1x1 Convolutions of the sparse cluster.
273   // This value is properly initialized only in sparse inference analysis of 1x1 Convolutions.
274   size_t num_params;
275   // Number of zero filter parameters in all 1x1 Convolutions of the sparse cluster.
276   // This value is properly initialized only in sparse inference analysis of 1x1 Convolutions.
277   size_t num_zeroes;
278   // Factory function to create an operator object from the node.
279   xnn_create_operator_fn create;
280   // Function to setup an operator using opdata.
281   xnn_setup_operator_fn setup;
282 };
283 
284 struct xnn_operator_data {
285   xnn_operator_t operator_object;
286   xnn_setup_operator_fn setup;
287   size_t batch_size;
288   size_t input_height;
289   size_t input_width;
290   size_t output_height;
291   size_t output_width;
292   struct xnn_shape shape1;
293   struct xnn_shape shape2;
294   size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
295   size_t post_paddings[XNN_MAX_TENSOR_DIMS];
296   uint32_t adjustment_height;
297   uint32_t adjustment_width;
298   uint32_t inputs[XNN_MAX_RUNTIME_INPUTS];
299   uint32_t outputs[XNN_MAX_RUNTIME_OUTPUTS];
300 };
301 
302 struct xnn_subgraph {
303   /// Number of Value IDs reserved for communication with external graph representation.
304   /// Values created during subgraph transformation avoid using IDs in [0, reserved_value_ids-1] range.
305   uint32_t external_value_ids;
306 
307   uint32_t num_reserved_values;
308   uint32_t num_values;
309   struct xnn_value* values;
310 
311   uint32_t num_reserved_nodes;
312   uint32_t num_nodes;
313   struct xnn_node* nodes;
314 };
315 
316 /// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
317 struct xnn_runtime {
318   uint32_t num_external_values;
319 
320   /// List of operators in the execution plan, in execution order.
321   struct xnn_operator_data* opdata;
322   /// Number of operators in the execution plan.
323   size_t num_ops;
324 
325   struct xnn_blob* blobs;
326   size_t num_blobs;
327 
328   void* workspace;
329 
330   pthreadpool_t threadpool;
331 };
332 
333 struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph);
334 
335 struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph);
336 
337 void xnn_subgraph_add_nodes(xnn_subgraph_t subgraph, size_t num_nodes);
338 
339 size_t xnn_tensor_get_size(
340   xnn_subgraph_t subgraph,
341   uint32_t value_id);
342 
343 // Product of all shape dimensions
344 size_t xnn_shape_multiply_all_dims(
345   const struct xnn_shape shape[1]);
346 
347 // Product of all shape dimensions, except for the last (channel) one
348 size_t xnn_shape_multiply_non_channel_dims(
349   const struct xnn_shape shape[1]);
350 
351 enum xnn_status xnn_subgraph_optimize(xnn_subgraph_t subgraph, uint32_t flags);
352 
353 void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph);
354 
355 void xnn_node_clear(struct xnn_node* node);
356 void xnn_value_clear(struct xnn_value* value);
357 
358 void xnn_value_copy(struct xnn_value* dst_value, const struct xnn_value* src_value);
359 
360 void xnn_init_convert_node(
361   struct xnn_node* node,
362   enum xnn_compute_type compute_type,
363   uint32_t input_id,
364   uint32_t output_id,
365   uint32_t flags);
366 
367 #ifdef __cplusplus
368 }  // extern "C"
369 #endif
370