• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <stddef.h>
7 #include <stdint.h>
8 
9 #include <xnnpack.h>
10 #include <xnnpack/allocator.h>
11 #include <xnnpack/operator.h>
12 #include <xnnpack/log.h>
13 #include <xnnpack/params.h>
14 
15 
xnn_create_depth_to_space_nhwc_x32(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * depth_to_space_op_out)16 enum xnn_status xnn_create_depth_to_space_nhwc_x32(
17     size_t output_channels,
18     size_t input_channel_stride,
19     size_t output_channel_stride,
20     uint32_t block_size,
21     uint32_t flags,
22     xnn_operator_t* depth_to_space_op_out)
23 {
24   xnn_operator_t depth_to_space_op = NULL;
25   enum xnn_status status = xnn_status_uninitialized;
26 
27   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
28     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
29       xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32));
30     goto error;
31   }
32 
33   status = xnn_status_invalid_parameter;
34 
35   if (output_channels == 0) {
36     xnn_log_error("failed to create %s operator with %zu output channels: number of channels must be non-zero",
37       xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32), output_channels);
38     goto error;
39   }
40 
41   if (output_channel_stride < output_channels) {
42     xnn_log_error(
43       "failed to create %s operator with output channel stride of %zu: "
44       "stride must be at least as large as the number of output channels (%zu)",
45       xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32),
46       output_channel_stride, output_channels);
47     goto error;
48   }
49 
50   if (block_size <= 1) {
51     xnn_log_error("failed to create %s operator with %u block size: block size must be greater than 1",
52       xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32),
53       block_size);
54     goto error;
55   }
56 
57   const size_t input_channels = output_channels * block_size * block_size;
58   if (input_channel_stride < input_channels) {
59     xnn_log_error(
60       "failed to create %s operator with input channel stride of %zu: "
61       "stride must be at least as large as the number of input channels (%" PRIu32 "x%" PRIu32 "x%zu)",
62       xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32),
63       input_channel_stride, block_size, block_size, input_channels);
64     goto error;
65   }
66 
67   status = xnn_status_out_of_memory;
68 
69   depth_to_space_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
70   if (depth_to_space_op == NULL) {
71     xnn_log_error(
72       "failed to allocate %zu bytes for %s operator descriptor",
73       sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32));
74     goto error;
75   }
76 
77   depth_to_space_op->channels = output_channels;
78   depth_to_space_op->input_pixel_stride = input_channel_stride;
79   depth_to_space_op->output_pixel_stride = output_channel_stride;
80   depth_to_space_op->block_size = block_size;
81 
82   depth_to_space_op->type = xnn_operator_type_depth_to_space_nhwc_x32;
83   depth_to_space_op->flags = flags;
84 
85   depth_to_space_op->state = xnn_run_state_invalid;
86 
87   *depth_to_space_op_out = depth_to_space_op;
88   return xnn_status_success;
89 
90 error:
91   xnn_delete_operator(depth_to_space_op);
92   return status;
93 }
94 
xnn_setup_depth_to_space_nhwc_x32(xnn_operator_t depth_to_space_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)95 enum xnn_status xnn_setup_depth_to_space_nhwc_x32(
96     xnn_operator_t depth_to_space_op,
97     size_t batch_size,
98     size_t input_height,
99     size_t input_width,
100     const void* input,
101     void* output,
102     pthreadpool_t threadpool)
103 {
104   if (depth_to_space_op->type != xnn_operator_type_depth_to_space_nhwc_x32) {
105     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
106       xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32),
107       xnn_operator_type_to_string(depth_to_space_op->type));
108     return xnn_status_invalid_parameter;
109   }
110   depth_to_space_op->state = xnn_run_state_invalid;
111 
112   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
113     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
114       xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32));
115     return xnn_status_uninitialized;
116   }
117 
118   if (input_width == 0 || input_height == 0) {
119     xnn_log_error("failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
120       xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32), input_width, input_height);
121     return xnn_status_invalid_parameter;
122   }
123 
124   if (batch_size == 0) {
125     depth_to_space_op->state = xnn_run_state_skip;
126     return xnn_status_success;
127   }
128 
129   const uint32_t block_size = depth_to_space_op->block_size;
130   const size_t output_channels = depth_to_space_op->channels;
131   const size_t output_width = input_width * block_size;
132 
133   depth_to_space_op->context.depthtospace2d_hwc = (struct depthtospace2d_hwc_context) {
134     .elements = output_channels * sizeof(float),
135     .input_width = input_width,
136     .block_size = (size_t) block_size,
137     .input = input,
138     .output = output,
139     .input_height_stride = input_width * depth_to_space_op->input_pixel_stride * sizeof(float),
140     .input_width_stride = depth_to_space_op->input_pixel_stride * sizeof(float),
141     .output_height_stride = output_width * depth_to_space_op->output_pixel_stride * sizeof(float),
142     .output_width_stride = depth_to_space_op->output_pixel_stride * sizeof(float),
143     .ukernel = xnn_params.xx.copy,
144   };
145   if (depth_to_space_op->output_pixel_stride == output_channels) {
146     // Transpose (N, Hi, Wi, Hb, Wb, Cout) -> (N, Hi, Hb, Wi, Wb, Cout) with Wb, Cout contiguous in memory.
147     // Optimization: copy Wb * Cout pixels at once
148     depth_to_space_op->context.depthtospace2d_hwc.elements *= block_size;
149 
150     depth_to_space_op->compute.type = xnn_parallelization_type_3d;
151     depth_to_space_op->compute.task_3d = (pthreadpool_task_3d_t) xnn_compute_depthtospace2d_hwc_contiguous;
152     depth_to_space_op->compute.range[0] = batch_size * input_height;
153     depth_to_space_op->compute.range[1] = input_width;
154     depth_to_space_op->compute.range[2] = block_size;
155   } else {
156     depth_to_space_op->compute.type = xnn_parallelization_type_4d;
157     depth_to_space_op->compute.task_4d = (pthreadpool_task_4d_t) xnn_compute_depthtospace2d_hwc_strided;
158     depth_to_space_op->compute.range[0] = batch_size * input_height;
159     depth_to_space_op->compute.range[1] = input_width;
160     depth_to_space_op->compute.range[2] = block_size;
161     depth_to_space_op->compute.range[3] = block_size;
162   }
163   depth_to_space_op->state = xnn_run_state_ready;
164 
165   return xnn_status_success;
166 }
167