1 // Copyright 2020 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <algorithm> 9 #include <cassert> 10 #include <cmath> 11 #include <cstddef> 12 #include <cstdlib> 13 #include <unordered_map> 14 #include <numeric> 15 #include <random> 16 #include <vector> 17 #include <type_traits> 18 19 #include <xnnpack.h> 20 #include <xnnpack/subgraph.h> 21 22 #include <gtest/gtest.h> 23 24 namespace xnnpack { 25 26 enum class TensorType { 27 kDense, 28 kSparse, 29 }; 30 31 struct Padding { 32 uint32_t top; 33 uint32_t right; 34 uint32_t bottom; 35 uint32_t left; 36 }; 37 38 struct HeightWidth { 39 uint32_t height; 40 uint32_t width; 41 }; 42 43 using Kernel = HeightWidth; 44 using Subsampling = HeightWidth; 45 using Dilation = HeightWidth; 46 using Upsampling = HeightWidth; 47 using Adjustment = HeightWidth; 48 49 struct ConvolutionParams { 50 Padding padding; 51 Kernel kernel; 52 Subsampling subsampling; 53 Dilation dilation; 54 uint32_t groups; 55 uint32_t group_input_channels; 56 uint32_t group_output_channels; 57 }; 58 59 struct DeconvolutionParams { 60 Padding padding; 61 Adjustment adjustment; 62 Kernel kernel; 63 Upsampling upsampling; 64 Dilation dilation; 65 uint32_t groups; 66 uint32_t group_input_channels; 67 uint32_t group_output_channels; 68 }; 69 70 struct DepthwiseConvolutionParams { 71 Padding padding; 72 Kernel kernel; 73 Subsampling subsampling; 74 Dilation dilation; 75 uint32_t depth_multiplier; 76 uint32_t input_channels; 77 }; 78 79 class SubgraphTester { 80 public: SubgraphTester(uint32_t external_value_ids)81 explicit SubgraphTester(uint32_t external_value_ids) { 82 xnn_status status = xnn_initialize(nullptr); 83 EXPECT_EQ(status, xnn_status_success); 84 85 xnn_subgraph_t subgraph_ptr = nullptr; 86 status = xnn_create_subgraph(external_value_ids, 0 /* flags */, &subgraph_ptr); 87 EXPECT_EQ(status, xnn_status_success); 88 subgraph_.reset(subgraph_ptr); 89 90 std::random_device random_device; 91 rng_ = std::mt19937(random_device()); 92 } 93 94 inline SubgraphTester& AddDynamicTensorF32(const std::vector<size_t>& dims, 95 uint32_t external_id, 96 uint32_t flags = 0) { 97 uint32_t id_out = 0; 98 const xnn_status status = 99 xnn_define_tensor_value(subgraph_.get(), xnn_datatype_fp32, dims.size(), 100 dims.data(), nullptr, external_id, flags, &id_out); 101 EXPECT_EQ(status, xnn_status_success); 102 EXPECT_EQ(id_out, external_id); 103 104 return *this; 105 } 106 107 inline SubgraphTester& AddStaticTensorF32(const std::vector<size_t>& dims, 108 TensorType tensor_type, 109 uint32_t external_id, 110 uint32_t flags = 0) { 111 const size_t num_elements = NumElements(dims); 112 static_data_.emplace_back(num_elements * sizeof(float)); 113 float* data = reinterpret_cast<float*>(static_data_.back().data()); 114 115 if (tensor_type == TensorType::kDense) { 116 std::generate(data, data + num_elements, [&]() { return f32dist(rng_); }); 117 } else { 118 // Create tensor with 90% sparsity in two steps: 119 // 1. Generate non-zero elements in the beginning of the vector 120 // 2. Randomize positions of non-zero elements 121 const size_t num_nonzero_elements = num_elements / 10; 122 std::generate(data, data + num_nonzero_elements, [&]() { return f32dist(rng_); }); 123 std::shuffle(data, data + num_elements, rng_); 124 } 125 uint32_t id_out; 126 const xnn_status status = 127 xnn_define_tensor_value(subgraph_.get(), xnn_datatype_fp32, dims.size(), 128 dims.data(), data, external_id, flags, &id_out); 129 EXPECT_EQ(status, xnn_status_success); 130 EXPECT_EQ(id_out, external_id); 131 return *this; 132 } 133 134 AddInputTensorF32(const std::vector<size_t> & dims,uint32_t external_id)135 inline SubgraphTester& AddInputTensorF32(const std::vector<size_t>& dims, uint32_t external_id) { 136 AddDynamicTensorF32(dims, external_id, XNN_VALUE_FLAG_EXTERNAL_INPUT); 137 size_t num_elements = NumElements(dims); 138 auto input = std::vector<char>(num_elements * sizeof(float) + XNN_EXTRA_BYTES * sizeof(char)); 139 float* data = reinterpret_cast<float*>(input.data()); 140 std::generate(data, data + num_elements, [&]() { return f32dist(rng_); }); 141 auto it = external_tensors_.insert({external_id, input}); 142 EXPECT_TRUE(it.second); 143 return *this; 144 } 145 AddOutputTensorF32(const std::vector<size_t> & dims,uint32_t external_id)146 inline SubgraphTester& AddOutputTensorF32(const std::vector<size_t>& dims, uint32_t external_id) { 147 output_id_ = external_id; 148 AddDynamicTensorF32(dims, external_id, XNN_VALUE_FLAG_EXTERNAL_OUTPUT); 149 size_t num_elements = NumElements(dims); 150 auto output = std::vector<char>(num_elements * sizeof(float)); 151 float* data = reinterpret_cast<float*>(output.data()); 152 std::fill(data, data + num_elements, std::nanf("")); 153 auto it = external_tensors_.insert({external_id, output}); 154 EXPECT_TRUE(it.second); 155 return *this; 156 } 157 AddConstantPad(const size_t * pre_paddings,const size_t * post_paddings,float padding_value,uint32_t input_id,uint32_t output_id)158 inline SubgraphTester& AddConstantPad( 159 const size_t *pre_paddings, const size_t *post_paddings, 160 float padding_value, uint32_t input_id, uint32_t output_id) { 161 const xnn_status status = xnn_define_static_constant_pad( 162 subgraph_.get(), pre_paddings, post_paddings, padding_value, input_id, 163 output_id, 0 /* flags */); 164 EXPECT_EQ(status, xnn_status_success); 165 return *this; 166 } 167 AddConvolution2D(ConvolutionParams params,uint32_t input_id,uint32_t filter_id,uint32_t bias_id,uint32_t output_id)168 inline SubgraphTester& AddConvolution2D( 169 ConvolutionParams params, 170 uint32_t input_id, uint32_t filter_id, uint32_t bias_id, 171 uint32_t output_id) { 172 const xnn_status status = xnn_define_convolution_2d( 173 subgraph_.get(), params.padding.top, params.padding.right, 174 params.padding.bottom, params.padding.left, params.kernel.height, params.kernel.width, 175 params.subsampling.height, params.subsampling.width, params.dilation.height, params.dilation.width, 176 params.groups, params.group_input_channels, params.group_output_channels, 177 -std::numeric_limits<float>::infinity(), 178 std::numeric_limits<float>::infinity(), input_id, filter_id, bias_id, 179 output_id, 0 /* flags */); 180 EXPECT_EQ(status, xnn_status_success); 181 182 return *this; 183 } 184 AddDepthwiseConvolution2D(DepthwiseConvolutionParams params,uint32_t input_id,uint32_t filter_id,uint32_t bias_id,uint32_t output_id)185 inline SubgraphTester& AddDepthwiseConvolution2D( 186 DepthwiseConvolutionParams params, 187 uint32_t input_id, uint32_t filter_id, uint32_t bias_id, uint32_t output_id) { 188 const xnn_status status = xnn_define_depthwise_convolution_2d( 189 subgraph_.get(), params.padding.top, params.padding.right, 190 params.padding.bottom, params.padding.left, params.kernel.height, params.kernel.width, 191 params.subsampling.height, params.subsampling.width, params.dilation.height, params.dilation.width, 192 params.depth_multiplier, params.input_channels, 193 -std::numeric_limits<float>::infinity(), 194 std::numeric_limits<float>::infinity(), input_id, filter_id, bias_id, 195 output_id, 0 /* flags */); 196 EXPECT_EQ(status, xnn_status_success); 197 198 return *this; 199 } 200 AddAddition(uint32_t input_id1,uint32_t input_id2,uint32_t output_id)201 inline SubgraphTester& AddAddition(uint32_t input_id1, uint32_t input_id2, uint32_t output_id) { 202 const xnn_status status = 203 xnn_define_add2(subgraph_.get(), -std::numeric_limits<float>::infinity(), 204 std::numeric_limits<float>::infinity(), input_id1, 205 input_id2, output_id, 0 /* flags */); 206 EXPECT_EQ(status, xnn_status_success); 207 208 return *this; 209 } 210 AddAveragePooling2D(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t input_id,uint32_t output_id)211 inline SubgraphTester& AddAveragePooling2D( 212 uint32_t input_padding_top, uint32_t input_padding_right, 213 uint32_t input_padding_bottom, uint32_t input_padding_left, 214 uint32_t pooling_height, uint32_t pooling_width, uint32_t stride_height, 215 uint32_t stride_width, uint32_t input_id, uint32_t output_id) { 216 const xnn_status status = xnn_define_average_pooling_2d( 217 subgraph_.get(), input_padding_top, input_padding_right, 218 input_padding_bottom, input_padding_left, pooling_height, pooling_width, 219 stride_height, stride_width, -std::numeric_limits<float>::infinity(), 220 std::numeric_limits<float>::infinity(), input_id, output_id, 221 0 /* flags */); 222 EXPECT_EQ(status, xnn_status_success); 223 224 return *this; 225 } 226 AddClamp(float output_min,float output_max,uint32_t input_id,uint32_t output_id)227 inline SubgraphTester& AddClamp(float output_min, float output_max, uint32_t input_id, uint32_t output_id) { 228 const xnn_status status = 229 xnn_define_clamp(subgraph_.get(), output_min, output_max, input_id, output_id, 0 /* flags */); 230 EXPECT_EQ(status, xnn_status_success); 231 232 return *this; 233 } 234 AddDeconvolution2D(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t adjustment_height,uint32_t adjustment_width,uint32_t kernel_height,uint32_t kernel_width,uint32_t upsampling_height,uint32_t upsampling_width,uint32_t dilation_height,uint32_t dilation_width,uint32_t groups,size_t group_input_channels,size_t group_output_channels,uint32_t input_id,uint32_t filter_id,uint32_t bias_id,uint32_t output_id)235 inline SubgraphTester& AddDeconvolution2D( 236 uint32_t input_padding_top, uint32_t input_padding_right, 237 uint32_t input_padding_bottom, uint32_t input_padding_left, 238 uint32_t adjustment_height, uint32_t adjustment_width, 239 uint32_t kernel_height, uint32_t kernel_width, 240 uint32_t upsampling_height, uint32_t upsampling_width, 241 uint32_t dilation_height, uint32_t dilation_width, uint32_t groups, 242 size_t group_input_channels, size_t group_output_channels, 243 uint32_t input_id, uint32_t filter_id, uint32_t bias_id, 244 uint32_t output_id) { 245 const xnn_status status = xnn_define_deconvolution_2d( 246 subgraph_.get(), input_padding_top, input_padding_right, 247 input_padding_bottom, input_padding_left, adjustment_height, 248 adjustment_width, kernel_height, kernel_width, upsampling_height, 249 upsampling_width, dilation_height, dilation_width, groups, 250 group_input_channels, group_output_channels, 251 -std::numeric_limits<float>::infinity(), 252 std::numeric_limits<float>::infinity(), input_id, filter_id, bias_id, 253 output_id, 0 /* flags */); 254 EXPECT_EQ(status, xnn_status_success); 255 256 return *this; 257 } 258 AddDeconvolution2D(DeconvolutionParams params,uint32_t input_id,uint32_t filter_id,uint32_t bias_id,uint32_t output_id)259 inline SubgraphTester& AddDeconvolution2D( 260 DeconvolutionParams params, 261 uint32_t input_id, uint32_t filter_id, uint32_t bias_id, 262 uint32_t output_id) { 263 const xnn_status status = xnn_define_deconvolution_2d( 264 subgraph_.get(), params.padding.top, params.padding.right, 265 params.padding.bottom, params.padding.left, params.adjustment.height, 266 params.adjustment.width, params.kernel.height, params.kernel.width, params.upsampling.height, 267 params.upsampling.width, params.dilation.height, params.dilation.width, params.groups, 268 params.group_input_channels, params.group_output_channels, 269 -std::numeric_limits<float>::infinity(), 270 std::numeric_limits<float>::infinity(), input_id, filter_id, bias_id, 271 output_id, 0 /* flags */); 272 EXPECT_EQ(status, xnn_status_success); 273 274 return *this; 275 } 276 AddDivide(uint32_t input_id1,uint32_t input_id2,uint32_t output_id)277 inline SubgraphTester& AddDivide(uint32_t input_id1, uint32_t input_id2, uint32_t output_id) { 278 const xnn_status status = 279 xnn_define_divide(subgraph_.get(), -std::numeric_limits<float>::infinity(), 280 std::numeric_limits<float>::infinity(), input_id1, 281 input_id2, output_id, 0 /* flags */); 282 EXPECT_EQ(status, xnn_status_success); 283 284 return *this; 285 } 286 AddFullyConnected(uint32_t input_id,uint32_t filter_id,uint32_t bias_id,uint32_t output_id)287 inline SubgraphTester& AddFullyConnected( 288 uint32_t input_id, uint32_t filter_id, uint32_t bias_id, uint32_t output_id) { 289 const xnn_status status = xnn_define_fully_connected( 290 subgraph_.get(), 291 -std::numeric_limits<float>::infinity(), 292 std::numeric_limits<float>::infinity(), input_id, filter_id, bias_id, 293 output_id, 0 /* flags */); 294 EXPECT_EQ(status, xnn_status_success); 295 296 return *this; 297 } 298 299 AddGlobalAveragePooling(uint32_t input_id,uint32_t output_id)300 inline SubgraphTester& AddGlobalAveragePooling(uint32_t input_id, uint32_t output_id) { 301 const xnn_status status = xnn_define_global_average_pooling_2d( 302 subgraph_.get(), -std::numeric_limits<float>::infinity(), 303 std::numeric_limits<float>::infinity(), input_id, output_id, 0 /* flags */); 304 EXPECT_EQ(status, xnn_status_success); 305 306 return *this; 307 } 308 AddMaxPooling2D(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,uint32_t input_id,uint32_t output_id)309 inline SubgraphTester& AddMaxPooling2D( 310 uint32_t input_padding_top, uint32_t input_padding_right, 311 uint32_t input_padding_bottom, uint32_t input_padding_left, 312 uint32_t pooling_height, uint32_t pooling_width, uint32_t stride_height, 313 uint32_t stride_width, uint32_t dilation_height, uint32_t dilation_width, uint32_t input_id, uint32_t output_id) { 314 const xnn_status status = xnn_define_max_pooling_2d( 315 subgraph_.get(), input_padding_top, input_padding_right, 316 input_padding_bottom, input_padding_left, pooling_height, pooling_width, 317 stride_height, stride_width, dilation_height, dilation_width, -std::numeric_limits<float>::infinity(), 318 std::numeric_limits<float>::infinity(), input_id, output_id, 319 0 /* flags */); 320 EXPECT_EQ(status, xnn_status_success); 321 322 return *this; 323 } 324 AddMultiply(uint32_t input_id1,uint32_t input_id2,uint32_t output_id)325 inline SubgraphTester& AddMultiply(uint32_t input_id1, uint32_t input_id2, uint32_t output_id) { 326 const xnn_status status = 327 xnn_define_multiply2(subgraph_.get(), -std::numeric_limits<float>::infinity(), 328 std::numeric_limits<float>::infinity(), input_id1, 329 input_id2, output_id, 0 /* flags */); 330 EXPECT_EQ(status, xnn_status_success); 331 332 return *this; 333 } 334 AddSubtract(uint32_t input_id1,uint32_t input_id2,uint32_t output_id)335 inline SubgraphTester& AddSubtract(uint32_t input_id1, uint32_t input_id2, uint32_t output_id) { 336 const xnn_status status = 337 xnn_define_subtract(subgraph_.get(), -std::numeric_limits<float>::infinity(), 338 std::numeric_limits<float>::infinity(), input_id1, 339 input_id2, output_id, 0 /* flags */); 340 EXPECT_EQ(status, xnn_status_success); 341 342 return *this; 343 } 344 Optimize()345 inline SubgraphTester& Optimize() { 346 const xnn_status status = xnn_subgraph_optimize(subgraph_.get(), 0 /* flags */); 347 EXPECT_EQ(status, xnn_status_success); 348 349 return *this; 350 } 351 RewriteForNchw()352 inline SubgraphTester& RewriteForNchw() { 353 xnn_subgraph_rewrite_for_nchw(subgraph_.get()); 354 355 return *this; 356 } 357 RewriteForFp16()358 inline SubgraphTester& RewriteForFp16() { 359 EXPECT_TRUE(xnn_subgraph_rewrite_for_fp16(subgraph_.get())); 360 361 return *this; 362 } 363 GetLayout(uint32_t value_id)364 inline xnn_layout_type GetLayout(uint32_t value_id) const { 365 return subgraph_->values[value_id].layout; 366 } 367 Value(uint32_t value_id)368 inline const xnn_value* const Value(uint32_t value_id) const { 369 return &subgraph_->values[value_id]; 370 } 371 Node(uint32_t node_id)372 inline const xnn_node* const Node(uint32_t node_id) const { 373 return &subgraph_->nodes[node_id]; 374 } 375 NumNodes()376 inline size_t NumNodes() const { 377 return subgraph_->num_nodes; 378 } 379 380 protected: 381 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> subgraph_{nullptr, xnn_delete_subgraph}; 382 std::unordered_map<uint32_t, std::vector<char>> external_tensors_; 383 uint32_t output_id_; 384 385 private: NumElements(const std::vector<size_t> & dims)386 static inline size_t NumElements(const std::vector<size_t>& dims) { 387 return std::accumulate(std::begin(dims), std::end(dims), size_t(1), std::multiplies<size_t>()); 388 } 389 390 std::vector<std::vector<char>> static_data_; 391 std::mt19937 rng_; 392 std::uniform_real_distribution<float> f32dist = std::uniform_real_distribution<float>(-1.0f, +1.0f); 393 394 }; 395 396 } // namespace xnnpack 397