1 // Copyright 2020 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <stddef.h> 9 #include <stdint.h> 10 11 #include <xnnpack.h> 12 13 #define XNN_MAX_INPUTS 3 14 #define XNN_MAX_OUTPUTS 2 15 16 #define XNN_MAX_RUNTIME_INPUTS 2 17 #define XNN_MAX_RUNTIME_OUTPUTS 2 18 19 #define XNN_INVALID_NODE_ID UINT32_MAX 20 21 #ifdef __cplusplus 22 extern "C" { 23 #endif 24 25 struct xnn_shape { 26 size_t num_dims; 27 size_t dim[XNN_MAX_TENSOR_DIMS]; 28 }; 29 30 enum xnn_value_type { 31 xnn_value_type_invalid = 0, 32 xnn_value_type_dense_tensor = 1, 33 }; 34 35 enum xnn_layout_type { 36 xnn_layout_type_nhwc = 0, 37 xnn_layout_type_nchw = 1, 38 }; 39 40 /// Abstraction for a collections of elements produced and consumed by nodes. 41 struct xnn_value { 42 /// Unique ID for the value. 43 uint32_t id; 44 /// Type of the collection of elements. 45 /// 46 /// Currently only dense tensors are supported. 47 /// Other types (e.g. sparse tensors) might be supported in the future. 48 enum xnn_value_type type; 49 /// Type of elements in the collection. 50 enum xnn_datatype datatype; 51 /// Per-value quantization parameters. 52 struct { 53 /// Offset from zero of the quantized elements. 54 int32_t zero_point; 55 union { 56 /// Multiplication factor to convert quantized elements to real representation. 57 float scale; 58 struct { 59 /// Per-channel multiplication factor to convert quantized elements to real representation. 60 const float* channelwise_scale; 61 /// Index of the channel dimension with per-channel quantization parameters. 62 size_t channel_dimension; 63 }; 64 }; 65 } quantization; 66 /// Tensor shape. 67 struct xnn_shape shape; 68 /// Binary features of the tensor. Supported values are any combination of: 69 /// - XNN_VALUE_FLAG_EXTERNAL_INPUT 70 /// - XNN_VALUE_FLAG_EXTERNAL_OUTPUT 71 uint32_t flags; 72 /// Static initialization data. Must be null for non-static values. 73 const void* data; 74 /// Index of the Subgraph node that produced the value, or XNN_INVALID_NODE_ID is the Value is an external input. 75 uint32_t producer; 76 /// Index of the first Node that consume the value, or XNN_INVALID_NODE_ID if the Value has no consumers within the 77 /// graph (e.g. Value is an external output). 78 uint32_t first_consumer; 79 /// Number of Nodes that consume the value. 80 /// If multiple inputs in a Node refer to this Value as input, the Node is counted as consumer multiple times. 81 /// If the Value is an external output, it counts as having an extra consumer. 82 uint32_t num_consumers; 83 uint32_t num_nchw_compatible_consumers; 84 enum xnn_layout_type layout; 85 /// Set during analysis in xnn_subgraph_rewrite_for_fp16. 86 /// Indicates that this value should be converted to FP16. 87 bool fp16_compatible; 88 /// Set during analysis in xnn_subgraph_rewrite_for_fp16. 89 /// Indicates Value ID of the FP16 variant of this Value. 90 uint32_t fp16_id; 91 /// Set during analysis in xnn_subgraph_rewrite_for_fp16. 92 /// Indicates Value ID of the FP32 variant of this Value. 93 uint32_t fp32_id; 94 }; 95 96 struct xnn_blob { 97 /// Size in bytes. 98 size_t size; 99 /// Data pointer. 100 void* data; 101 bool external; 102 }; 103 104 struct xnn_node; 105 struct xnn_operator_data; 106 107 typedef enum xnn_status (*xnn_create_operator_fn)( 108 const struct xnn_node* node, 109 const struct xnn_value* values, 110 size_t num_values, 111 struct xnn_operator_data* opdata); 112 113 typedef enum xnn_status (*xnn_setup_operator_fn)( 114 const struct xnn_operator_data* opdata, 115 const struct xnn_blob* blobs, 116 size_t num_blobs, 117 pthreadpool_t threadpool); 118 119 enum xnn_compute_type { 120 xnn_compute_type_invalid = 0, 121 xnn_compute_type_fp32, 122 xnn_compute_type_fp16, 123 xnn_compute_type_qc8, 124 xnn_compute_type_qs8, 125 xnn_compute_type_qu8, 126 xnn_compute_type_fp32_to_fp16, 127 xnn_compute_type_fp32_to_qs8, 128 xnn_compute_type_fp32_to_qu8, 129 xnn_compute_type_fp16_to_fp32, 130 xnn_compute_type_qs8_to_fp32, 131 xnn_compute_type_qu8_to_fp32, 132 }; 133 134 enum xnn_node_type { 135 xnn_node_type_invalid = 0, 136 xnn_node_type_abs, 137 xnn_node_type_add2, 138 xnn_node_type_argmax_pooling_2d, 139 xnn_node_type_average_pooling_2d, 140 xnn_node_type_bankers_rounding, 141 xnn_node_type_ceiling, 142 xnn_node_type_clamp, 143 xnn_node_type_convert, 144 xnn_node_type_convolution_2d, 145 xnn_node_type_deconvolution_2d, 146 xnn_node_type_depthwise_convolution_2d, 147 xnn_node_type_depth_to_space, 148 xnn_node_type_divide, 149 xnn_node_type_elu, 150 xnn_node_type_fully_connected, 151 xnn_node_type_floor, 152 xnn_node_type_global_average_pooling_2d, 153 xnn_node_type_hardswish, 154 xnn_node_type_leaky_relu, 155 xnn_node_type_max_pooling_2d, 156 xnn_node_type_maximum2, 157 xnn_node_type_minimum2, 158 xnn_node_type_multiply2, 159 xnn_node_type_negate, 160 xnn_node_type_prelu, 161 xnn_node_type_sigmoid, 162 xnn_node_type_softmax, 163 xnn_node_type_static_constant_pad, 164 xnn_node_type_static_reshape, 165 xnn_node_type_static_resize_bilinear_2d, 166 xnn_node_type_square, 167 xnn_node_type_square_root, 168 xnn_node_type_squared_difference, 169 xnn_node_type_subtract, 170 xnn_node_type_unpooling_2d, 171 }; 172 173 struct xnn_node { 174 enum xnn_node_type type; 175 uint32_t id; 176 enum xnn_compute_type compute_type; 177 /// Static parameters of the operator node. 178 union { 179 struct { 180 uint32_t input_padding_top; 181 uint32_t input_padding_right; 182 uint32_t input_padding_bottom; 183 uint32_t input_padding_left; 184 uint32_t kernel_height; 185 uint32_t kernel_width; 186 uint32_t subsampling_height; 187 uint32_t subsampling_width; 188 uint32_t dilation_height; 189 uint32_t dilation_width; 190 uint32_t groups; 191 size_t group_input_channels; 192 size_t group_output_channels; 193 } convolution_2d; 194 struct { 195 uint32_t padding_top; 196 uint32_t padding_right; 197 uint32_t padding_bottom; 198 uint32_t padding_left; 199 uint32_t adjustment_height; 200 uint32_t adjustment_width; 201 uint32_t kernel_height; 202 uint32_t kernel_width; 203 uint32_t upsampling_height; 204 uint32_t upsampling_width; 205 uint32_t dilation_height; 206 uint32_t dilation_width; 207 uint32_t groups; 208 size_t group_input_channels; 209 size_t group_output_channels; 210 } deconvolution_2d; 211 struct { 212 uint32_t input_padding_top; 213 uint32_t input_padding_right; 214 uint32_t input_padding_bottom; 215 uint32_t input_padding_left; 216 uint32_t kernel_height; 217 uint32_t kernel_width; 218 uint32_t subsampling_height; 219 uint32_t subsampling_width; 220 uint32_t dilation_height; 221 uint32_t dilation_width; 222 uint32_t depth_multiplier; 223 size_t input_channels; 224 } depthwise_convolution_2d; 225 struct { 226 uint32_t block_size; 227 } depth_to_space; 228 struct { 229 uint32_t padding_top; 230 uint32_t padding_right; 231 uint32_t padding_bottom; 232 uint32_t padding_left; 233 uint32_t pooling_height; 234 uint32_t pooling_width; 235 uint32_t stride_height; 236 uint32_t stride_width; 237 uint32_t dilation_height; 238 uint32_t dilation_width; 239 } pooling_2d; 240 struct { 241 float alpha; 242 } elu; 243 struct { 244 float negative_slope; 245 } leaky_relu; 246 struct { 247 size_t pre_paddings[XNN_MAX_TENSOR_DIMS]; 248 size_t post_paddings[XNN_MAX_TENSOR_DIMS]; 249 uint32_t padding_value; 250 } static_pad; 251 struct { 252 struct xnn_shape new_shape; 253 } static_reshape; 254 struct { 255 size_t new_height; 256 size_t new_width; 257 } static_resize; 258 } params; 259 struct { 260 float output_min; 261 float output_max; 262 } activation; 263 /// Value IDs for node inputs. 264 uint32_t inputs[XNN_MAX_INPUTS]; 265 uint32_t num_inputs; 266 /// Value IDs for node outputs. 267 uint32_t outputs[XNN_MAX_OUTPUTS]; 268 uint32_t num_outputs; 269 uint32_t flags; 270 uint32_t layout_flags; 271 uint32_t cluster_leader; 272 // Number of filter parameters in all 1x1 Convolutions of the sparse cluster. 273 // This value is properly initialized only in sparse inference analysis of 1x1 Convolutions. 274 size_t num_params; 275 // Number of zero filter parameters in all 1x1 Convolutions of the sparse cluster. 276 // This value is properly initialized only in sparse inference analysis of 1x1 Convolutions. 277 size_t num_zeroes; 278 // Factory function to create an operator object from the node. 279 xnn_create_operator_fn create; 280 // Function to setup an operator using opdata. 281 xnn_setup_operator_fn setup; 282 }; 283 284 struct xnn_operator_data { 285 xnn_operator_t operator_object; 286 xnn_setup_operator_fn setup; 287 size_t batch_size; 288 size_t input_height; 289 size_t input_width; 290 size_t output_height; 291 size_t output_width; 292 struct xnn_shape shape1; 293 struct xnn_shape shape2; 294 size_t pre_paddings[XNN_MAX_TENSOR_DIMS]; 295 size_t post_paddings[XNN_MAX_TENSOR_DIMS]; 296 uint32_t adjustment_height; 297 uint32_t adjustment_width; 298 uint32_t inputs[XNN_MAX_RUNTIME_INPUTS]; 299 uint32_t outputs[XNN_MAX_RUNTIME_OUTPUTS]; 300 }; 301 302 struct xnn_subgraph { 303 /// Number of Value IDs reserved for communication with external graph representation. 304 /// Values created during subgraph transformation avoid using IDs in [0, reserved_value_ids-1] range. 305 uint32_t external_value_ids; 306 307 uint32_t num_reserved_values; 308 uint32_t num_values; 309 struct xnn_value* values; 310 311 uint32_t num_reserved_nodes; 312 uint32_t num_nodes; 313 struct xnn_node* nodes; 314 }; 315 316 /// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values. 317 struct xnn_runtime { 318 uint32_t num_external_values; 319 320 /// List of operators in the execution plan, in execution order. 321 struct xnn_operator_data* opdata; 322 /// Number of operators in the execution plan. 323 size_t num_ops; 324 325 struct xnn_blob* blobs; 326 size_t num_blobs; 327 328 void* workspace; 329 330 pthreadpool_t threadpool; 331 }; 332 333 struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph); 334 335 struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph); 336 337 void xnn_subgraph_add_nodes(xnn_subgraph_t subgraph, size_t num_nodes); 338 339 size_t xnn_tensor_get_size( 340 xnn_subgraph_t subgraph, 341 uint32_t value_id); 342 343 // Product of all shape dimensions 344 size_t xnn_shape_multiply_all_dims( 345 const struct xnn_shape shape[1]); 346 347 // Product of all shape dimensions, except for the last (channel) one 348 size_t xnn_shape_multiply_non_channel_dims( 349 const struct xnn_shape shape[1]); 350 351 enum xnn_status xnn_subgraph_optimize(xnn_subgraph_t subgraph, uint32_t flags); 352 353 void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph); 354 355 void xnn_node_clear(struct xnn_node* node); 356 void xnn_value_clear(struct xnn_value* value); 357 358 void xnn_value_copy(struct xnn_value* dst_value, const struct xnn_value* src_value); 359 360 void xnn_init_convert_node( 361 struct xnn_node* node, 362 enum xnn_compute_type compute_type, 363 uint32_t input_id, 364 uint32_t output_id, 365 uint32_t flags); 366 367 #ifdef __cplusplus 368 } // extern "C" 369 #endif 370