1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <math.h>
10 #include <stddef.h>
11 #include <stdint.h>
12 #include <stdlib.h>
13
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/log.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/params-init.h>
19 #include <xnnpack/params.h>
20
21
xnn_create_clamp_nc_u8(size_t channels,size_t input_stride,size_t output_stride,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * clamp_op_out)22 enum xnn_status xnn_create_clamp_nc_u8(
23 size_t channels,
24 size_t input_stride,
25 size_t output_stride,
26 uint8_t output_min,
27 uint8_t output_max,
28 uint32_t flags,
29 xnn_operator_t* clamp_op_out)
30 {
31 xnn_operator_t clamp_op = NULL;
32 enum xnn_status status = xnn_status_uninitialized;
33
34 if (!xnn_params.initialized) {
35 xnn_log_error("failed to create Clamp operator: XNNPACK is not initialized");
36 goto error;
37 }
38
39 status = xnn_status_invalid_parameter;
40
41 if (channels == 0) {
42 xnn_log_error(
43 "failed to create Clamp operator with %zu channels: number of channels must be non-zero", channels);
44 goto error;
45 }
46
47 if (input_stride < channels) {
48 xnn_log_error(
49 "failed to create Clamp operator with input element stride of %zu: "
50 "stride must be at least as large as the number of channels (%zu)",
51 input_stride, channels);
52 goto error;
53 }
54
55 if (output_stride < channels) {
56 xnn_log_error(
57 "failed to create Clamp operator with output element stride of %zu: "
58 "stride must be at least as large as the number of channels (%zu)",
59 output_stride, channels);
60 goto error;
61 }
62
63 if (output_min >= output_max) {
64 xnn_log_error(
65 "failed to create Clamp operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
66 output_min, output_max);
67 goto error;
68 }
69
70 status = xnn_status_out_of_memory;
71
72 clamp_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
73 if (clamp_op == NULL) {
74 xnn_log_error("failed to allocate %zu bytes for Clamp operator descriptor", sizeof(struct xnn_operator));
75 goto error;
76 }
77
78 clamp_op->channels = channels;
79 clamp_op->input_pixel_stride = input_stride;
80 clamp_op->output_pixel_stride = output_stride;
81 clamp_op->u8_output_params = xnn_init_u8_output_params(output_min, output_max);
82
83 clamp_op->type = xnn_operator_type_clamp_nc_u8;
84 clamp_op->ukernel.type = xnn_ukernel_type_clamp;
85
86 clamp_op->state = xnn_run_state_invalid;
87
88 *clamp_op_out = clamp_op;
89 return xnn_status_success;
90
91 error:
92 xnn_delete_operator(clamp_op);
93 return status;
94 }
95
xnn_create_clamp_nc_f32(size_t channels,size_t input_stride,size_t output_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * clamp_op_out)96 enum xnn_status xnn_create_clamp_nc_f32(
97 size_t channels,
98 size_t input_stride,
99 size_t output_stride,
100 float output_min,
101 float output_max,
102 uint32_t flags,
103 xnn_operator_t* clamp_op_out)
104 {
105 xnn_operator_t clamp_op = NULL;
106 enum xnn_status status = xnn_status_uninitialized;
107
108 if (!xnn_params.initialized) {
109 xnn_log_error("failed to create Clamp operator: XNNPACK is not initialized");
110 goto error;
111 }
112
113 status = xnn_status_invalid_parameter;
114
115 if (channels == 0) {
116 xnn_log_error(
117 "failed to create Clamp operator with %zu channels: number of channels must be non-zero", channels);
118 goto error;
119 }
120
121 if (input_stride < channels) {
122 xnn_log_error(
123 "failed to create Clamp operator with input element stride of %zu: "
124 "stride must be at least as large as the number of channels (%zu)",
125 input_stride, channels);
126 goto error;
127 }
128
129 if (output_stride < channels) {
130 xnn_log_error(
131 "failed to create Clamp operator with output element stride of %zu: "
132 "stride must be at least as large as the number of channels (%zu)",
133 output_stride, channels);
134 goto error;
135 }
136
137 if (isnan(output_min)) {
138 xnn_log_error(
139 "failed to create Clamp operator with NaN output lower bound: lower bound must be non-NaN");
140 goto error;
141 }
142
143 if (isnan(output_max)) {
144 xnn_log_error(
145 "failed to create Clamp operator with NaN output upper bound: upper bound must be non-NaN");
146 goto error;
147 }
148
149 if (output_min >= output_max) {
150 xnn_log_error(
151 "failed to create Clamp operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
152 output_min, output_max);
153 goto error;
154 }
155
156 status = xnn_status_out_of_memory;
157
158 clamp_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
159 if (clamp_op == NULL) {
160 xnn_log_error("failed to allocate %zu bytes for Clamp operator descriptor", sizeof(struct xnn_operator));
161 goto error;
162 }
163
164 clamp_op->channels = channels;
165 clamp_op->input_pixel_stride = input_stride;
166 clamp_op->output_pixel_stride = output_stride;
167 clamp_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
168
169 clamp_op->type = xnn_operator_type_clamp_nc_f32;
170 clamp_op->ukernel.type = xnn_ukernel_type_clamp;
171
172 clamp_op->state = xnn_run_state_invalid;
173
174 *clamp_op_out = clamp_op;
175 return xnn_status_success;
176
177 error:
178 xnn_delete_operator(clamp_op);
179 return status;
180 }
181
xnn_setup_clamp_nc_u8(xnn_operator_t clamp_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)182 enum xnn_status xnn_setup_clamp_nc_u8(
183 xnn_operator_t clamp_op,
184 size_t batch_size,
185 const uint8_t* input,
186 uint8_t* output,
187 pthreadpool_t threadpool)
188 {
189 if (clamp_op->type != xnn_operator_type_clamp_nc_u8) {
190 xnn_log_error("failed to setup Clamp (NC, U8) operator: operator type mismatch");
191 return xnn_status_invalid_parameter;
192 }
193 clamp_op->state = xnn_run_state_invalid;
194
195 if (!xnn_params.initialized) {
196 xnn_log_error("failed to setup Clamp operator: XNNPACK is not initialized");
197 return xnn_status_uninitialized;
198 }
199
200 if (batch_size == 0) {
201 clamp_op->state = xnn_run_state_skip;
202 return xnn_status_success;
203 }
204
205 const size_t channels = clamp_op->channels;
206 const size_t input_stride = clamp_op->input_pixel_stride;
207 const size_t output_stride = clamp_op->output_pixel_stride;
208 if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
209 const size_t block_size = 4096;
210 clamp_op->context.univector_contiguous = (struct univector_contiguous_context) {
211 .x = input,
212 .x_stride = input_stride * sizeof(uint8_t),
213 .y = output,
214 .y_stride = output_stride * sizeof(uint8_t),
215 .ukernel = xnn_params.u8.clamp,
216 .params.u8_output = clamp_op->u8_output_params,
217 };
218 clamp_op->compute.type = xnn_parallelization_type_1d_tile_1d;
219 clamp_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_contiguous;
220 clamp_op->compute.range[0] = batch_size * channels * sizeof(uint8_t);
221 clamp_op->compute.tile[0] = block_size;
222 } else {
223 clamp_op->context.univector_strided = (struct univector_strided_context) {
224 .n = channels * sizeof(uint8_t),
225 .x = input,
226 .x_stride = input_stride * sizeof(uint8_t),
227 .y = output,
228 .y_stride = output_stride * sizeof(uint8_t),
229 .ukernel = xnn_params.u8.clamp,
230 .params.u8_output = clamp_op->u8_output_params,
231 };
232 clamp_op->compute.type = xnn_parallelization_type_1d_tile_1d;
233 clamp_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_strided;
234 clamp_op->compute.range[0] = batch_size;
235 clamp_op->compute.tile[0] = 1;
236 }
237 clamp_op->state = xnn_run_state_ready;
238
239 return xnn_status_success;
240 }
241
xnn_setup_clamp_nc_f32(xnn_operator_t clamp_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)242 enum xnn_status xnn_setup_clamp_nc_f32(
243 xnn_operator_t clamp_op,
244 size_t batch_size,
245 const float* input,
246 float* output,
247 pthreadpool_t threadpool)
248 {
249 if (clamp_op->type != xnn_operator_type_clamp_nc_f32) {
250 xnn_log_error("failed to setup Clamp (NC, F32) operator: operator type mismatch");
251 return xnn_status_invalid_parameter;
252 }
253 clamp_op->state = xnn_run_state_invalid;
254
255 if (!xnn_params.initialized) {
256 xnn_log_error("failed to setup Clamp operator: XNNPACK is not initialized");
257 return xnn_status_uninitialized;
258 }
259
260 if (batch_size == 0) {
261 clamp_op->state = xnn_run_state_skip;
262 return xnn_status_success;
263 }
264
265 const size_t channels = clamp_op->channels;
266 const size_t input_stride = clamp_op->input_pixel_stride;
267 const size_t output_stride = clamp_op->output_pixel_stride;
268 if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
269 const size_t block_size = 4096;
270 clamp_op->context.univector_contiguous = (struct univector_contiguous_context) {
271 .x = input,
272 .x_stride = input_stride * sizeof(float),
273 .y = output,
274 .y_stride = output_stride * sizeof(float),
275 .ukernel = xnn_params.f32.clamp,
276 .params.f32_output = clamp_op->f32_output_params,
277 };
278 clamp_op->compute.type = xnn_parallelization_type_1d_tile_1d;
279 clamp_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_contiguous;
280 clamp_op->compute.range[0] = batch_size * channels * sizeof(float);
281 clamp_op->compute.tile[0] = block_size;
282 } else {
283 clamp_op->context.univector_strided = (struct univector_strided_context) {
284 .n = channels * sizeof(float),
285 .x = input,
286 .x_stride = input_stride * sizeof(float),
287 .y = output,
288 .y_stride = output_stride * sizeof(float),
289 .ukernel = xnn_params.f32.clamp,
290 .params.f32_output = clamp_op->f32_output_params,
291 };
292 clamp_op->compute.type = xnn_parallelization_type_1d_tile_1d;
293 clamp_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_strided;
294 clamp_op->compute.range[0] = batch_size;
295 clamp_op->compute.tile[0] = 1;
296 }
297 clamp_op->state = xnn_run_state_ready;
298
299 return xnn_status_success;
300 }
301