• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14 
15 #include <xnnpack.h>
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/log.h>
19 
20 
xnn_create_leaky_relu_nc_q8(size_t channels,size_t input_stride,size_t output_stride,float negative_slope,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * leaky_relu_op_out)21 enum xnn_status xnn_create_leaky_relu_nc_q8(
22     size_t channels,
23     size_t input_stride,
24     size_t output_stride,
25     float negative_slope,
26     uint8_t input_zero_point,
27     float input_scale,
28     uint8_t output_zero_point,
29     float output_scale,
30     uint8_t output_min,
31     uint8_t output_max,
32     uint32_t flags,
33     xnn_operator_t* leaky_relu_op_out)
34 {
35   xnn_operator_t leaky_relu_op = NULL;
36   enum xnn_status status = xnn_status_uninitialized;
37 
38   if (!xnn_params.initialized) {
39     xnn_log_error("failed to create Leaky ReLU operator: XNNPACK is not initialized");
40     goto error;
41   }
42 
43   status = xnn_status_invalid_parameter;
44 
45   if (channels == 0) {
46     xnn_log_error(
47       "failed to create Leaky ReLU operator with %zu channels: number of channels must be non-zero", channels);
48     goto error;
49   }
50 
51   if (input_stride < channels) {
52     xnn_log_error(
53       "failed to create Leaky ReLU operator with input element stride of %zu: "
54       "stride must be at least as large as the number of channels (%zu)",
55       input_stride, channels);
56     goto error;
57   }
58 
59   if (output_stride < channels) {
60     xnn_log_error(
61       "failed to create Leaky ReLU operator with output element stride of %zu: "
62       "stride must be at least as large as the number of channels (%zu)",
63       output_stride, channels);
64     goto error;
65   }
66 
67   if (negative_slope <= 0.0f || !isnormal(negative_slope)) {
68     xnn_log_error(
69       "failed to create Leaky ReLU operator with %.7g negative slope: slope must be finite, normalized, and positive",
70       negative_slope);
71     goto error;
72   }
73 
74   if (negative_slope > 1.0f) {
75     xnn_log_error(
76       "failed to create Leaky ReLU operator with %.7g negative slope: slope must not exceed 1.0", negative_slope);
77     goto error;
78   }
79 
80   if (input_scale <= 0.0f || !isnormal(input_scale)) {
81     xnn_log_error(
82       "failed to create Leaky ReLU operator with %.7g input scale: scale must be finite, normalized, and positive",
83       input_scale);
84     goto error;
85   }
86 
87   if (output_scale <= 0.0f || !isnormal(output_scale)) {
88     xnn_log_error(
89       "failed to create Leaky ReLU operator with %.7g output scale: scale must be finite, normalized, and positive",
90       output_scale);
91     goto error;
92   }
93 
94   if (output_min >= output_max) {
95     xnn_log_error(
96       "failed to create Leaky ReLU operator with [%" PRIu8 ", %" PRIu8 "] output range: "
97       "range min must be below range max",
98       output_min, output_max);
99     goto error;
100   }
101 
102   status = xnn_status_unsupported_parameter;
103 
104   const float input_output_scale = input_scale / output_scale;
105   if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
106     xnn_log_error(
107       "failed to create Leaky ReLU operator with %.7g input-to-output scale ratio: "
108       "scale ratio must be in [2**-8, 2**8) range",
109       input_output_scale);
110     goto error;
111   }
112 
113   status = xnn_status_out_of_memory;
114 
115   leaky_relu_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
116   if (leaky_relu_op == NULL) {
117     xnn_log_error("failed to allocate %zu bytes for Leaky ReLU operator descriptor", sizeof(struct xnn_operator));
118     goto error;
119   }
120 
121   leaky_relu_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
122   if (leaky_relu_op->lookup_table == NULL) {
123     xnn_log_error("failed to allocate 256 bytes for Leaky ReLU lookup table");
124     goto error;
125   }
126 
127   uint8_t* lookup_table = leaky_relu_op->lookup_table;
128   const float scaled_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
129   const float scaled_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
130   for (int32_t i = 0; i < 256; i++) {
131     const float x = input_output_scale * (float) (i - (int32_t) (uint32_t) input_zero_point);
132     float y = x < 0.0f ? x * negative_slope : x;
133     if (y < scaled_min_less_zero_point) {
134       y = scaled_min_less_zero_point;
135     }
136     if (y > scaled_max_less_zero_point) {
137       y = scaled_max_less_zero_point;
138     }
139     lookup_table[(uint32_t) i] = (uint8_t) (lrintf(y) + (long) output_zero_point);
140   }
141 
142   leaky_relu_op->channels = channels;
143   leaky_relu_op->input_pixel_stride = input_stride;
144   leaky_relu_op->output_pixel_stride = output_stride;
145 
146   leaky_relu_op->type = xnn_operator_type_leaky_relu_nc_q8;
147   leaky_relu_op->ukernel.type = xnn_ukernel_type_lut;
148 
149   leaky_relu_op->state = xnn_run_state_invalid;
150 
151   *leaky_relu_op_out = leaky_relu_op;
152   return xnn_status_success;
153 
154 error:
155   xnn_delete_operator(leaky_relu_op);
156   return status;
157 }
158 
xnn_setup_leaky_relu_nc_q8(xnn_operator_t leaky_relu_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)159 enum xnn_status xnn_setup_leaky_relu_nc_q8(
160     xnn_operator_t leaky_relu_op,
161     size_t batch_size,
162     const uint8_t* input,
163     uint8_t* output,
164     pthreadpool_t threadpool)
165 {
166   if (leaky_relu_op->type != xnn_operator_type_leaky_relu_nc_q8) {
167     xnn_log_error("failed to setup Leaky ReLU (NC, Q8) operator: operator type mismatch");
168     return xnn_status_invalid_parameter;
169   }
170   leaky_relu_op->state = xnn_run_state_invalid;
171 
172   if (!xnn_params.initialized) {
173     xnn_log_error("failed to setup Leaky ReLU operator: XNNPACK is not initialized");
174     return xnn_status_uninitialized;
175   }
176 
177   if (batch_size == 0) {
178     leaky_relu_op->state = xnn_run_state_skip;
179     return xnn_status_success;
180   }
181 
182   const size_t channels = leaky_relu_op->channels;
183   const size_t input_stride = leaky_relu_op->input_pixel_stride;
184   const size_t output_stride = leaky_relu_op->output_pixel_stride;
185   if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
186     const size_t block_size = 1024;
187     leaky_relu_op->context.lut_contiguous = (struct lut_contiguous_context) {
188       .x = input,
189       .x_stride = input_stride * sizeof(uint8_t),
190       .t = leaky_relu_op->lookup_table,
191       .y = output,
192       .y_stride = output_stride * sizeof(uint8_t),
193       .ukernel = xnn_params.x8.lut,
194     };
195     leaky_relu_op->compute.type = xnn_parallelization_type_1d_tile_1d;
196     leaky_relu_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_lut_contiguous;
197     leaky_relu_op->compute.range[0] = batch_size * channels * sizeof(uint8_t);
198     leaky_relu_op->compute.tile[0] = block_size;
199   } else {
200     leaky_relu_op->context.lut_strided = (struct lut_strided_context) {
201       .n = channels,
202       .x = input,
203       .x_stride = input_stride * sizeof(uint8_t),
204       .t = leaky_relu_op->lookup_table,
205       .y = output,
206       .y_stride = output_stride * sizeof(uint8_t),
207       .ukernel = xnn_params.x8.lut,
208     };
209     leaky_relu_op->compute.type = xnn_parallelization_type_1d;
210     leaky_relu_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_lut_strided;
211     leaky_relu_op->compute.range[0] = batch_size;
212     leaky_relu_op->compute.tile[0] = 0;
213   }
214   leaky_relu_op->state = xnn_run_state_ready;
215 
216   return xnn_status_success;
217 }
218