1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <assert.h>
10 #include <math.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14
15 #include <xnnpack.h>
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/log.h>
19
20
xnn_create_leaky_relu_nc_q8(size_t channels,size_t input_stride,size_t output_stride,float negative_slope,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * leaky_relu_op_out)21 enum xnn_status xnn_create_leaky_relu_nc_q8(
22 size_t channels,
23 size_t input_stride,
24 size_t output_stride,
25 float negative_slope,
26 uint8_t input_zero_point,
27 float input_scale,
28 uint8_t output_zero_point,
29 float output_scale,
30 uint8_t output_min,
31 uint8_t output_max,
32 uint32_t flags,
33 xnn_operator_t* leaky_relu_op_out)
34 {
35 xnn_operator_t leaky_relu_op = NULL;
36 enum xnn_status status = xnn_status_uninitialized;
37
38 if (!xnn_params.initialized) {
39 xnn_log_error("failed to create Leaky ReLU operator: XNNPACK is not initialized");
40 goto error;
41 }
42
43 status = xnn_status_invalid_parameter;
44
45 if (channels == 0) {
46 xnn_log_error(
47 "failed to create Leaky ReLU operator with %zu channels: number of channels must be non-zero", channels);
48 goto error;
49 }
50
51 if (input_stride < channels) {
52 xnn_log_error(
53 "failed to create Leaky ReLU operator with input element stride of %zu: "
54 "stride must be at least as large as the number of channels (%zu)",
55 input_stride, channels);
56 goto error;
57 }
58
59 if (output_stride < channels) {
60 xnn_log_error(
61 "failed to create Leaky ReLU operator with output element stride of %zu: "
62 "stride must be at least as large as the number of channels (%zu)",
63 output_stride, channels);
64 goto error;
65 }
66
67 if (negative_slope <= 0.0f || !isnormal(negative_slope)) {
68 xnn_log_error(
69 "failed to create Leaky ReLU operator with %.7g negative slope: slope must be finite, normalized, and positive",
70 negative_slope);
71 goto error;
72 }
73
74 if (negative_slope > 1.0f) {
75 xnn_log_error(
76 "failed to create Leaky ReLU operator with %.7g negative slope: slope must not exceed 1.0", negative_slope);
77 goto error;
78 }
79
80 if (input_scale <= 0.0f || !isnormal(input_scale)) {
81 xnn_log_error(
82 "failed to create Leaky ReLU operator with %.7g input scale: scale must be finite, normalized, and positive",
83 input_scale);
84 goto error;
85 }
86
87 if (output_scale <= 0.0f || !isnormal(output_scale)) {
88 xnn_log_error(
89 "failed to create Leaky ReLU operator with %.7g output scale: scale must be finite, normalized, and positive",
90 output_scale);
91 goto error;
92 }
93
94 if (output_min >= output_max) {
95 xnn_log_error(
96 "failed to create Leaky ReLU operator with [%" PRIu8 ", %" PRIu8 "] output range: "
97 "range min must be below range max",
98 output_min, output_max);
99 goto error;
100 }
101
102 status = xnn_status_unsupported_parameter;
103
104 const float input_output_scale = input_scale / output_scale;
105 if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
106 xnn_log_error(
107 "failed to create Leaky ReLU operator with %.7g input-to-output scale ratio: "
108 "scale ratio must be in [2**-8, 2**8) range",
109 input_output_scale);
110 goto error;
111 }
112
113 status = xnn_status_out_of_memory;
114
115 leaky_relu_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
116 if (leaky_relu_op == NULL) {
117 xnn_log_error("failed to allocate %zu bytes for Leaky ReLU operator descriptor", sizeof(struct xnn_operator));
118 goto error;
119 }
120
121 leaky_relu_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
122 if (leaky_relu_op->lookup_table == NULL) {
123 xnn_log_error("failed to allocate 256 bytes for Leaky ReLU lookup table");
124 goto error;
125 }
126
127 uint8_t* lookup_table = leaky_relu_op->lookup_table;
128 const float scaled_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
129 const float scaled_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
130 for (int32_t i = 0; i < 256; i++) {
131 const float x = input_output_scale * (float) (i - (int32_t) (uint32_t) input_zero_point);
132 float y = x < 0.0f ? x * negative_slope : x;
133 if (y < scaled_min_less_zero_point) {
134 y = scaled_min_less_zero_point;
135 }
136 if (y > scaled_max_less_zero_point) {
137 y = scaled_max_less_zero_point;
138 }
139 lookup_table[(uint32_t) i] = (uint8_t) (lrintf(y) + (long) output_zero_point);
140 }
141
142 leaky_relu_op->channels = channels;
143 leaky_relu_op->input_pixel_stride = input_stride;
144 leaky_relu_op->output_pixel_stride = output_stride;
145
146 leaky_relu_op->type = xnn_operator_type_leaky_relu_nc_q8;
147 leaky_relu_op->ukernel.type = xnn_ukernel_type_lut;
148
149 leaky_relu_op->state = xnn_run_state_invalid;
150
151 *leaky_relu_op_out = leaky_relu_op;
152 return xnn_status_success;
153
154 error:
155 xnn_delete_operator(leaky_relu_op);
156 return status;
157 }
158
xnn_setup_leaky_relu_nc_q8(xnn_operator_t leaky_relu_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)159 enum xnn_status xnn_setup_leaky_relu_nc_q8(
160 xnn_operator_t leaky_relu_op,
161 size_t batch_size,
162 const uint8_t* input,
163 uint8_t* output,
164 pthreadpool_t threadpool)
165 {
166 if (leaky_relu_op->type != xnn_operator_type_leaky_relu_nc_q8) {
167 xnn_log_error("failed to setup Leaky ReLU (NC, Q8) operator: operator type mismatch");
168 return xnn_status_invalid_parameter;
169 }
170 leaky_relu_op->state = xnn_run_state_invalid;
171
172 if (!xnn_params.initialized) {
173 xnn_log_error("failed to setup Leaky ReLU operator: XNNPACK is not initialized");
174 return xnn_status_uninitialized;
175 }
176
177 if (batch_size == 0) {
178 leaky_relu_op->state = xnn_run_state_skip;
179 return xnn_status_success;
180 }
181
182 const size_t channels = leaky_relu_op->channels;
183 const size_t input_stride = leaky_relu_op->input_pixel_stride;
184 const size_t output_stride = leaky_relu_op->output_pixel_stride;
185 if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
186 const size_t block_size = 1024;
187 leaky_relu_op->context.lut_contiguous = (struct lut_contiguous_context) {
188 .x = input,
189 .x_stride = input_stride * sizeof(uint8_t),
190 .t = leaky_relu_op->lookup_table,
191 .y = output,
192 .y_stride = output_stride * sizeof(uint8_t),
193 .ukernel = xnn_params.x8.lut,
194 };
195 leaky_relu_op->compute.type = xnn_parallelization_type_1d_tile_1d;
196 leaky_relu_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_lut_contiguous;
197 leaky_relu_op->compute.range[0] = batch_size * channels * sizeof(uint8_t);
198 leaky_relu_op->compute.tile[0] = block_size;
199 } else {
200 leaky_relu_op->context.lut_strided = (struct lut_strided_context) {
201 .n = channels,
202 .x = input,
203 .x_stride = input_stride * sizeof(uint8_t),
204 .t = leaky_relu_op->lookup_table,
205 .y = output,
206 .y_stride = output_stride * sizeof(uint8_t),
207 .ukernel = xnn_params.x8.lut,
208 };
209 leaky_relu_op->compute.type = xnn_parallelization_type_1d;
210 leaky_relu_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_lut_strided;
211 leaky_relu_op->compute.range[0] = batch_size;
212 leaky_relu_op->compute.tile[0] = 0;
213 }
214 leaky_relu_op->state = xnn_run_state_ready;
215
216 return xnn_status_success;
217 }
218