• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <math.h>
10 #include <stddef.h>
11 #include <stdint.h>
12 #include <stdlib.h>
13 
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/log.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/params-init.h>
19 #include <xnnpack/params.h>
20 
21 
xnn_create_clamp_nc_u8(size_t channels,size_t input_stride,size_t output_stride,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * clamp_op_out)22 enum xnn_status xnn_create_clamp_nc_u8(
23     size_t channels,
24     size_t input_stride,
25     size_t output_stride,
26     uint8_t output_min,
27     uint8_t output_max,
28     uint32_t flags,
29     xnn_operator_t* clamp_op_out)
30 {
31   xnn_operator_t clamp_op = NULL;
32   enum xnn_status status = xnn_status_uninitialized;
33 
34   if (!xnn_params.initialized) {
35     xnn_log_error("failed to create Clamp operator: XNNPACK is not initialized");
36     goto error;
37   }
38 
39   status = xnn_status_invalid_parameter;
40 
41   if (channels == 0) {
42     xnn_log_error(
43       "failed to create Clamp operator with %zu channels: number of channels must be non-zero", channels);
44     goto error;
45   }
46 
47   if (input_stride < channels) {
48     xnn_log_error(
49       "failed to create Clamp operator with input element stride of %zu: "
50       "stride must be at least as large as the number of channels (%zu)",
51       input_stride, channels);
52     goto error;
53   }
54 
55   if (output_stride < channels) {
56     xnn_log_error(
57       "failed to create Clamp operator with output element stride of %zu: "
58       "stride must be at least as large as the number of channels (%zu)",
59       output_stride, channels);
60     goto error;
61   }
62 
63   if (output_min >= output_max) {
64     xnn_log_error(
65       "failed to create Clamp operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
66       output_min, output_max);
67     goto error;
68   }
69 
70   status = xnn_status_out_of_memory;
71 
72   clamp_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
73   if (clamp_op == NULL) {
74     xnn_log_error("failed to allocate %zu bytes for Clamp operator descriptor", sizeof(struct xnn_operator));
75     goto error;
76   }
77 
78   clamp_op->channels = channels;
79   clamp_op->input_pixel_stride = input_stride;
80   clamp_op->output_pixel_stride = output_stride;
81   clamp_op->u8_output_params = xnn_init_u8_output_params(output_min, output_max);
82 
83   clamp_op->type = xnn_operator_type_clamp_nc_u8;
84   clamp_op->ukernel.type = xnn_ukernel_type_clamp;
85 
86   clamp_op->state = xnn_run_state_invalid;
87 
88   *clamp_op_out = clamp_op;
89   return xnn_status_success;
90 
91 error:
92   xnn_delete_operator(clamp_op);
93   return status;
94 }
95 
xnn_create_clamp_nc_f32(size_t channels,size_t input_stride,size_t output_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * clamp_op_out)96 enum xnn_status xnn_create_clamp_nc_f32(
97     size_t channels,
98     size_t input_stride,
99     size_t output_stride,
100     float output_min,
101     float output_max,
102     uint32_t flags,
103     xnn_operator_t* clamp_op_out)
104 {
105   xnn_operator_t clamp_op = NULL;
106   enum xnn_status status = xnn_status_uninitialized;
107 
108   if (!xnn_params.initialized) {
109     xnn_log_error("failed to create Clamp operator: XNNPACK is not initialized");
110     goto error;
111   }
112 
113   status = xnn_status_invalid_parameter;
114 
115   if (channels == 0) {
116     xnn_log_error(
117       "failed to create Clamp operator with %zu channels: number of channels must be non-zero", channels);
118     goto error;
119   }
120 
121   if (input_stride < channels) {
122     xnn_log_error(
123       "failed to create Clamp operator with input element stride of %zu: "
124       "stride must be at least as large as the number of channels (%zu)",
125       input_stride, channels);
126     goto error;
127   }
128 
129   if (output_stride < channels) {
130     xnn_log_error(
131       "failed to create Clamp operator with output element stride of %zu: "
132       "stride must be at least as large as the number of channels (%zu)",
133       output_stride, channels);
134     goto error;
135   }
136 
137   if (isnan(output_min)) {
138     xnn_log_error(
139       "failed to create Clamp operator with NaN output lower bound: lower bound must be non-NaN");
140     goto error;
141   }
142 
143   if (isnan(output_max)) {
144     xnn_log_error(
145       "failed to create Clamp operator with NaN output upper bound: upper bound must be non-NaN");
146     goto error;
147   }
148 
149   if (output_min >= output_max) {
150     xnn_log_error(
151       "failed to create Clamp operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
152       output_min, output_max);
153     goto error;
154   }
155 
156   status = xnn_status_out_of_memory;
157 
158   clamp_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
159   if (clamp_op == NULL) {
160     xnn_log_error("failed to allocate %zu bytes for Clamp operator descriptor", sizeof(struct xnn_operator));
161     goto error;
162   }
163 
164   clamp_op->channels = channels;
165   clamp_op->input_pixel_stride = input_stride;
166   clamp_op->output_pixel_stride = output_stride;
167   clamp_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
168 
169   clamp_op->type = xnn_operator_type_clamp_nc_f32;
170   clamp_op->ukernel.type = xnn_ukernel_type_clamp;
171 
172   clamp_op->state = xnn_run_state_invalid;
173 
174   *clamp_op_out = clamp_op;
175   return xnn_status_success;
176 
177 error:
178   xnn_delete_operator(clamp_op);
179   return status;
180 }
181 
xnn_setup_clamp_nc_u8(xnn_operator_t clamp_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)182 enum xnn_status xnn_setup_clamp_nc_u8(
183     xnn_operator_t clamp_op,
184     size_t batch_size,
185     const uint8_t* input,
186     uint8_t* output,
187     pthreadpool_t threadpool)
188 {
189   if (clamp_op->type != xnn_operator_type_clamp_nc_u8) {
190     xnn_log_error("failed to setup Clamp (NC, U8) operator: operator type mismatch");
191     return xnn_status_invalid_parameter;
192   }
193   clamp_op->state = xnn_run_state_invalid;
194 
195   if (!xnn_params.initialized) {
196     xnn_log_error("failed to setup Clamp operator: XNNPACK is not initialized");
197     return xnn_status_uninitialized;
198   }
199 
200   if (batch_size == 0) {
201     clamp_op->state = xnn_run_state_skip;
202     return xnn_status_success;
203   }
204 
205   const size_t channels = clamp_op->channels;
206   const size_t input_stride = clamp_op->input_pixel_stride;
207   const size_t output_stride = clamp_op->output_pixel_stride;
208   if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
209     const size_t block_size = 4096;
210     clamp_op->context.univector_contiguous = (struct univector_contiguous_context) {
211       .x = input,
212       .x_stride = input_stride * sizeof(uint8_t),
213       .y = output,
214       .y_stride = output_stride * sizeof(uint8_t),
215       .ukernel = xnn_params.u8.clamp,
216       .params.u8_output = clamp_op->u8_output_params,
217     };
218     clamp_op->compute.type = xnn_parallelization_type_1d_tile_1d;
219     clamp_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_contiguous;
220     clamp_op->compute.range[0] = batch_size * channels * sizeof(uint8_t);
221     clamp_op->compute.tile[0] = block_size;
222   } else {
223     clamp_op->context.univector_strided = (struct univector_strided_context) {
224       .n = channels * sizeof(uint8_t),
225       .x = input,
226       .x_stride = input_stride * sizeof(uint8_t),
227       .y = output,
228       .y_stride = output_stride * sizeof(uint8_t),
229       .ukernel = xnn_params.u8.clamp,
230       .params.u8_output = clamp_op->u8_output_params,
231     };
232     clamp_op->compute.type = xnn_parallelization_type_1d_tile_1d;
233     clamp_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_strided;
234     clamp_op->compute.range[0] = batch_size;
235     clamp_op->compute.tile[0] = 1;
236   }
237   clamp_op->state = xnn_run_state_ready;
238 
239   return xnn_status_success;
240 }
241 
xnn_setup_clamp_nc_f32(xnn_operator_t clamp_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)242 enum xnn_status xnn_setup_clamp_nc_f32(
243     xnn_operator_t clamp_op,
244     size_t batch_size,
245     const float* input,
246     float* output,
247     pthreadpool_t threadpool)
248 {
249   if (clamp_op->type != xnn_operator_type_clamp_nc_f32) {
250     xnn_log_error("failed to setup Clamp (NC, F32) operator: operator type mismatch");
251     return xnn_status_invalid_parameter;
252   }
253   clamp_op->state = xnn_run_state_invalid;
254 
255   if (!xnn_params.initialized) {
256     xnn_log_error("failed to setup Clamp operator: XNNPACK is not initialized");
257     return xnn_status_uninitialized;
258   }
259 
260   if (batch_size == 0) {
261     clamp_op->state = xnn_run_state_skip;
262     return xnn_status_success;
263   }
264 
265   const size_t channels = clamp_op->channels;
266   const size_t input_stride = clamp_op->input_pixel_stride;
267   const size_t output_stride = clamp_op->output_pixel_stride;
268   if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
269     const size_t block_size = 4096;
270     clamp_op->context.univector_contiguous = (struct univector_contiguous_context) {
271       .x = input,
272       .x_stride = input_stride * sizeof(float),
273       .y = output,
274       .y_stride = output_stride * sizeof(float),
275       .ukernel = xnn_params.f32.clamp,
276       .params.f32_output = clamp_op->f32_output_params,
277     };
278     clamp_op->compute.type = xnn_parallelization_type_1d_tile_1d;
279     clamp_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_contiguous;
280     clamp_op->compute.range[0] = batch_size * channels * sizeof(float);
281     clamp_op->compute.tile[0] = block_size;
282   } else {
283     clamp_op->context.univector_strided = (struct univector_strided_context) {
284       .n = channels * sizeof(float),
285       .x = input,
286       .x_stride = input_stride * sizeof(float),
287       .y = output,
288       .y_stride = output_stride * sizeof(float),
289       .ukernel = xnn_params.f32.clamp,
290       .params.f32_output = clamp_op->f32_output_params,
291     };
292     clamp_op->compute.type = xnn_parallelization_type_1d_tile_1d;
293     clamp_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_strided;
294     clamp_op->compute.range[0] = batch_size;
295     clamp_op->compute.tile[0] = 1;
296   }
297   clamp_op->state = xnn_run_state_ready;
298 
299   return xnn_status_success;
300 }
301