1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2020 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <algorithm>
10 #include <array>
11 #include <cmath>
12 #include <functional>
13 #include <limits>
14 #include <random>
15 #include <vector>
16
17 #include <xnnpack.h>
18
19 #include <benchmark/benchmark.h>
20 #include "bench/utils.h"
21 #ifdef BENCHMARK_TENSORFLOW_LITE
22 #include "flatbuffers/include/flatbuffers/flatbuffers.h"
23 #include "tensorflow/lite/interpreter.h"
24 #include "tensorflow/lite/kernels/register.h"
25 #include "tensorflow/lite/model.h"
26 #include "tensorflow/lite/schema/schema_generated.h"
27 #include "tensorflow/lite/version.h"
28 #endif // BENCHMARK_TENSORFLOW_LITE
29
30
xnnpack_sigmoid_f32(benchmark::State & state)31 static void xnnpack_sigmoid_f32(benchmark::State& state) {
32 const size_t batch_size = state.range(0);
33
34 std::random_device random_device;
35 auto rng = std::mt19937(random_device());
36 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
37
38 std::vector<float> input(batch_size + XNN_EXTRA_BYTES / sizeof(float));
39 std::vector<float> output(batch_size);
40 std::generate(input.begin(), input.end(), std::ref(f32rng));
41 std::fill(output.begin(), output.end(), std::nanf(""));
42
43 xnn_status status = xnn_initialize(nullptr /* allocator */);
44 if (status != xnn_status_success) {
45 state.SkipWithError("failed to initialize XNNPACK");
46 return;
47 }
48
49 xnn_operator_t sigmoid_op = nullptr;
50 status = xnn_create_sigmoid_nc_f32(
51 1 /* channels */, 1 /* input stride */, 1 /* output stride */,
52 0 /* flags */, &sigmoid_op);
53 if (status != xnn_status_success || sigmoid_op == nullptr) {
54 state.SkipWithError("failed to create Sigmoid operator");
55 return;
56 }
57
58 status = xnn_setup_sigmoid_nc_f32(
59 sigmoid_op, batch_size,
60 input.data(), output.data(),
61 nullptr /* thread pool */);
62 if (status != xnn_status_success) {
63 state.SkipWithError("failed to setup Sigmoid operator");
64 return;
65 }
66
67 for (auto _ : state) {
68 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
69 if (status != xnn_status_success) {
70 state.SkipWithError("failed to run Sigmoid operator");
71 return;
72 }
73 }
74
75 status = xnn_delete_operator(sigmoid_op);
76 if (status != xnn_status_success) {
77 state.SkipWithError("failed to delete Sigmoid operator");
78 return;
79 }
80
81 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
82 if (cpu_frequency != 0) {
83 state.counters["cpufreq"] = cpu_frequency;
84 }
85
86 state.counters["elements"] =
87 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
88
89 const size_t bytes_per_iteration = 2 * batch_size * sizeof(float);
90 state.counters["bytes"] =
91 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
92 }
93
94 #ifndef XNN_NO_QS8_OPERATORS
xnnpack_sigmoid_qs8(benchmark::State & state)95 static void xnnpack_sigmoid_qs8(benchmark::State& state) {
96 const size_t batch_size = state.range(0);
97
98 std::random_device random_device;
99 auto rng = std::mt19937(random_device());
100 auto i8rng = std::bind(
101 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
102 std::ref(rng));
103
104 std::vector<int8_t> input(batch_size + XNN_EXTRA_BYTES / sizeof(int8_t));
105 std::vector<int8_t> output(batch_size);
106 std::generate(input.begin(), input.end(), std::ref(i8rng));
107 std::fill(output.begin(), output.end(), INT8_C(0xA5));
108
109 xnn_status status = xnn_initialize(nullptr /* allocator */);
110 if (status != xnn_status_success) {
111 state.SkipWithError("failed to initialize XNNPACK");
112 return;
113 }
114
115 xnn_operator_t sigmoid_op = nullptr;
116 status = xnn_create_sigmoid_nc_qs8(
117 1 /* channels */, 1 /* input stride */, 1 /* output stride */,
118 1 /* input zero point */, 1.0f /* input scale */,
119 -128 /* output zero point */, 1.0f / 256.0f /* output scale */,
120 std::numeric_limits<int8_t>::min() /* output min */, std::numeric_limits<int8_t>::max() /* output max */,
121 0 /* flags */, &sigmoid_op);
122 if (status != xnn_status_success || sigmoid_op == nullptr) {
123 state.SkipWithError("failed to create Sigmoid operator");
124 return;
125 }
126
127 status = xnn_setup_sigmoid_nc_qs8(
128 sigmoid_op, batch_size,
129 input.data(), output.data(),
130 nullptr /* thread pool */);
131 if (status != xnn_status_success) {
132 state.SkipWithError("failed to setup Sigmoid operator");
133 return;
134 }
135
136 for (auto _ : state) {
137 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
138 if (status != xnn_status_success) {
139 state.SkipWithError("failed to run Sigmoid operator");
140 return;
141 }
142 }
143
144 status = xnn_delete_operator(sigmoid_op);
145 if (status != xnn_status_success) {
146 state.SkipWithError("failed to delete Sigmoid operator");
147 return;
148 }
149
150 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
151 if (cpu_frequency != 0) {
152 state.counters["cpufreq"] = cpu_frequency;
153 }
154
155 state.counters["elements"] =
156 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
157
158 const size_t bytes_per_iteration = 2 * batch_size * sizeof(int8_t);
159 state.counters["bytes"] =
160 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
161 }
162 #endif // XNN_NO_QS8_OPERATORS
163
164 #ifndef XNN_NO_QU8_OPERATORS
xnnpack_sigmoid_qu8(benchmark::State & state)165 static void xnnpack_sigmoid_qu8(benchmark::State& state) {
166 const size_t batch_size = state.range(0);
167
168 std::random_device random_device;
169 auto rng = std::mt19937(random_device());
170 auto u8rng = std::bind(
171 std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
172
173 std::vector<uint8_t> input(batch_size + XNN_EXTRA_BYTES / sizeof(uint8_t));
174 std::vector<uint8_t> output(batch_size);
175 std::generate(input.begin(), input.end(), std::ref(u8rng));
176 std::fill(output.begin(), output.end(), UINT8_C(0xA5));
177
178 xnn_status status = xnn_initialize(nullptr /* allocator */);
179 if (status != xnn_status_success) {
180 state.SkipWithError("failed to initialize XNNPACK");
181 return;
182 }
183
184 xnn_operator_t sigmoid_op = nullptr;
185 status = xnn_create_sigmoid_nc_qu8(
186 1 /* channels */, 1 /* input stride */, 1 /* output stride */,
187 128 /* input zero point */, 1.0f /* input scale */,
188 0 /* output zero point */, 1.0f / 256.0f /* output scale */,
189 std::numeric_limits<uint8_t>::min() /* output min */, std::numeric_limits<uint8_t>::max() /* output max */,
190 0 /* flags */, &sigmoid_op);
191 if (status != xnn_status_success || sigmoid_op == nullptr) {
192 state.SkipWithError("failed to create Sigmoid operator");
193 return;
194 }
195
196 status = xnn_setup_sigmoid_nc_qu8(
197 sigmoid_op, batch_size,
198 input.data(), output.data(),
199 nullptr /* thread pool */);
200 if (status != xnn_status_success) {
201 state.SkipWithError("failed to setup Sigmoid operator");
202 return;
203 }
204
205 for (auto _ : state) {
206 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
207 if (status != xnn_status_success) {
208 state.SkipWithError("failed to run Sigmoid operator");
209 return;
210 }
211 }
212
213 status = xnn_delete_operator(sigmoid_op);
214 if (status != xnn_status_success) {
215 state.SkipWithError("failed to delete Sigmoid operator");
216 return;
217 }
218
219 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
220 if (cpu_frequency != 0) {
221 state.counters["cpufreq"] = cpu_frequency;
222 }
223
224 state.counters["elements"] =
225 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
226
227 const size_t bytes_per_iteration = 2 * batch_size * sizeof(uint8_t);
228 state.counters["bytes"] =
229 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
230 }
231 #endif // XNN_NO_QU8_OPERATORS
232
233 #ifdef BENCHMARK_TENSORFLOW_LITE
tflite_sigmoid_f32(benchmark::State & state)234 static void tflite_sigmoid_f32(benchmark::State& state) {
235 const size_t batch_size = state.range(0);
236
237 std::random_device random_device;
238 auto rng = std::mt19937(random_device());
239 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
240
241 flatbuffers::FlatBufferBuilder builder;
242 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
243 CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
244
245 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
246 tflite::CreateBuffer(builder, builder.CreateVector({})),
247 }};
248
249 const std::array<int32_t, 1> shape{{
250 static_cast<int32_t>(batch_size)
251 }};
252
253 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
254 tflite::CreateTensor(builder,
255 builder.CreateVector<int32_t>(shape.data(), shape.size()),
256 tflite::TensorType_FLOAT32),
257 tflite::CreateTensor(builder,
258 builder.CreateVector<int32_t>(shape.data(), shape.size()),
259 tflite::TensorType_FLOAT32),
260 }};
261
262 const std::array<int32_t, 1> op_inputs{{ 0 }};
263 const std::array<int32_t, 1> op_outputs{{ 1 }};
264 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
265 builder,
266 0 /* opcode_index */,
267 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
268 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
269
270 const std::array<int32_t, 1> graph_inputs{{ 0 }};
271 const std::array<int32_t, 1> graph_outputs{{ 1 }};
272 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
273 builder,
274 builder.CreateVector(tensors.data(), tensors.size()),
275 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
276 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
277 builder.CreateVector(&op, 1));
278
279 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
280 TFLITE_SCHEMA_VERSION,
281 builder.CreateVector(&operator_code, 1),
282 builder.CreateVector(&subgraph, 1),
283 builder.CreateString("Sigmoid model"),
284 builder.CreateVector(buffers.data(), buffers.size()));
285
286 builder.Finish(model_buffer);
287
288 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
289 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
290 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
291 std::unique_ptr<tflite::Interpreter> interpreter;
292 if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
293 state.SkipWithError("failed to create TFLite interpreter");
294 return;
295 }
296 interpreter->SetNumThreads(1);
297
298 if (interpreter->AllocateTensors() != kTfLiteOk) {
299 state.SkipWithError("failed to allocate tensors");
300 return;
301 }
302
303 std::generate(
304 interpreter->typed_tensor<float>(0),
305 interpreter->typed_tensor<float>(0) + batch_size,
306 std::ref(f32rng));
307
308 for (auto _ : state) {
309 if (interpreter->Invoke() != kTfLiteOk) {
310 state.SkipWithError("failed to invoke TFLite interpreter");
311 return;
312 }
313 }
314
315 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
316 if (cpu_frequency != 0) {
317 state.counters["cpufreq"] = cpu_frequency;
318 }
319
320 state.counters["elements"] =
321 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
322
323 const size_t bytes_per_iteration = 2 * batch_size * sizeof(float);
324 state.counters["bytes"] =
325 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
326
327 interpreter.reset();
328 }
329
tflite_sigmoid_qs8(benchmark::State & state)330 static void tflite_sigmoid_qs8(benchmark::State& state) {
331 const size_t batch_size = state.range(0);
332
333 std::random_device random_device;
334 auto rng = std::mt19937(random_device());
335 auto i8rng = std::bind(
336 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
337 std::ref(rng));
338
339 flatbuffers::FlatBufferBuilder builder;
340 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
341 CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
342
343 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
344 tflite::CreateBuffer(builder, builder.CreateVector({})),
345 }};
346
347 const std::array<int32_t, 1> shape{{
348 static_cast<int32_t>(batch_size)
349 }};
350
351 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
352 tflite::CreateTensor(builder,
353 builder.CreateVector<int32_t>(shape.data(), shape.size()),
354 tflite::TensorType_INT8, 0 /* buffer */, 0 /* name */,
355 tflite::CreateQuantizationParameters(builder,
356 0 /*min*/, 0 /*max*/,
357 builder.CreateVector<float>({1.0f /* scale */}),
358 builder.CreateVector<int64_t>({1 /* zero point */}))),
359 tflite::CreateTensor(builder,
360 builder.CreateVector<int32_t>(shape.data(), shape.size()),
361 tflite::TensorType_INT8, 0 /* buffer */, 0 /* name */,
362 tflite::CreateQuantizationParameters(builder,
363 0 /*min*/, 0 /*max*/,
364 builder.CreateVector<float>({1.0f / 256.0f /* scale */}),
365 builder.CreateVector<int64_t>({-128 /* zero point */}))),
366 }};
367
368 const std::array<int32_t, 1> op_inputs{{ 0 }};
369 const std::array<int32_t, 1> op_outputs{{ 1 }};
370 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
371 builder,
372 0 /* opcode_index */,
373 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
374 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
375
376 const std::array<int32_t, 1> graph_inputs{{ 0 }};
377 const std::array<int32_t, 1> graph_outputs{{ 1 }};
378 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
379 builder,
380 builder.CreateVector(tensors.data(), tensors.size()),
381 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
382 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
383 builder.CreateVector(&op, 1));
384
385 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
386 TFLITE_SCHEMA_VERSION,
387 builder.CreateVector(&operator_code, 1),
388 builder.CreateVector(&subgraph, 1),
389 builder.CreateString("Sigmoid model"),
390 builder.CreateVector(buffers.data(), buffers.size()));
391
392 builder.Finish(model_buffer);
393
394 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
395 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
396 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
397 std::unique_ptr<tflite::Interpreter> interpreter;
398 if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
399 state.SkipWithError("failed to create TFLite interpreter");
400 return;
401 }
402 interpreter->SetNumThreads(1);
403
404 if (interpreter->AllocateTensors() != kTfLiteOk) {
405 state.SkipWithError("failed to allocate tensors");
406 return;
407 }
408
409 std::generate(
410 interpreter->typed_tensor<int8_t>(0),
411 interpreter->typed_tensor<int8_t>(0) + batch_size,
412 std::ref(i8rng));
413
414 for (auto _ : state) {
415 if (interpreter->Invoke() != kTfLiteOk) {
416 state.SkipWithError("failed to invoke TFLite interpreter");
417 return;
418 }
419 }
420
421 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
422 if (cpu_frequency != 0) {
423 state.counters["cpufreq"] = cpu_frequency;
424 }
425
426 state.counters["elements"] =
427 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
428
429 const size_t bytes_per_iteration = 2 * batch_size * sizeof(int8_t);
430 state.counters["bytes"] =
431 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
432
433 interpreter.reset();
434 }
435
tflite_sigmoid_qu8(benchmark::State & state)436 static void tflite_sigmoid_qu8(benchmark::State& state) {
437 const size_t batch_size = state.range(0);
438
439 std::random_device random_device;
440 auto rng = std::mt19937(random_device());
441 auto u8rng = std::bind(
442 std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()),
443 std::ref(rng));
444
445 flatbuffers::FlatBufferBuilder builder;
446 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
447 CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
448
449 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
450 tflite::CreateBuffer(builder, builder.CreateVector({})),
451 }};
452
453 const std::array<int32_t, 1> shape{{
454 static_cast<int32_t>(batch_size)
455 }};
456
457 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
458 tflite::CreateTensor(builder,
459 builder.CreateVector<int32_t>(shape.data(), shape.size()),
460 tflite::TensorType_UINT8, 0 /* buffer */, 0 /* name */,
461 tflite::CreateQuantizationParameters(builder,
462 0 /*min*/, 0 /*max*/,
463 builder.CreateVector<float>({1.0f /* scale */}),
464 builder.CreateVector<int64_t>({128 /* zero point */}))),
465 tflite::CreateTensor(builder,
466 builder.CreateVector<int32_t>(shape.data(), shape.size()),
467 tflite::TensorType_UINT8, 0 /* buffer */, 0 /* name */,
468 tflite::CreateQuantizationParameters(builder,
469 0 /*min*/, 0 /*max*/,
470 builder.CreateVector<float>({1.0f / 256.0f /* scale */}),
471 builder.CreateVector<int64_t>({0 /* zero point */}))),
472 }};
473
474 const std::array<int32_t, 1> op_inputs{{ 0 }};
475 const std::array<int32_t, 1> op_outputs{{ 1 }};
476 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
477 builder,
478 0 /* opcode_index */,
479 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
480 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
481
482 const std::array<int32_t, 1> graph_inputs{{ 0 }};
483 const std::array<int32_t, 1> graph_outputs{{ 1 }};
484 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
485 builder,
486 builder.CreateVector(tensors.data(), tensors.size()),
487 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
488 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
489 builder.CreateVector(&op, 1));
490
491 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
492 TFLITE_SCHEMA_VERSION,
493 builder.CreateVector(&operator_code, 1),
494 builder.CreateVector(&subgraph, 1),
495 builder.CreateString("Sigmoid model"),
496 builder.CreateVector(buffers.data(), buffers.size()));
497
498 builder.Finish(model_buffer);
499
500 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
501 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
502 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
503 std::unique_ptr<tflite::Interpreter> interpreter;
504 if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
505 state.SkipWithError("failed to create TFLite interpreter");
506 return;
507 }
508 interpreter->SetNumThreads(1);
509
510 if (interpreter->AllocateTensors() != kTfLiteOk) {
511 state.SkipWithError("failed to allocate tensors");
512 return;
513 }
514
515 std::generate(
516 interpreter->typed_tensor<uint8_t>(0),
517 interpreter->typed_tensor<uint8_t>(0) + batch_size,
518 std::ref(u8rng));
519
520 for (auto _ : state) {
521 if (interpreter->Invoke() != kTfLiteOk) {
522 state.SkipWithError("failed to invoke TFLite interpreter");
523 return;
524 }
525 }
526
527 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
528 if (cpu_frequency != 0) {
529 state.counters["cpufreq"] = cpu_frequency;
530 }
531
532 state.counters["elements"] =
533 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
534
535 const size_t bytes_per_iteration = 2 * batch_size * sizeof(uint8_t);
536 state.counters["bytes"] =
537 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
538
539 interpreter.reset();
540 }
541 #endif // BENCHMARK_TENSORFLOW_LITE
542
543 BENCHMARK(xnnpack_sigmoid_f32)
544 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
545 ->UseRealTime();
546 #ifndef XNN_NO_QS8_OPERATORS
547 BENCHMARK(xnnpack_sigmoid_qs8)
548 ->Apply(benchmark::utils::UnaryElementwiseParameters<int8_t, int8_t>)
549 ->UseRealTime();
550 #endif // XNN_NO_QS8_OPERATORS
551 #ifndef XNN_NO_QU8_OPERATORS
552 BENCHMARK(xnnpack_sigmoid_qu8)
553 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint8_t, uint8_t>)
554 ->UseRealTime();
555 #endif // XNN_NO_QU8_OPERATORS
556
557 #ifdef BENCHMARK_TENSORFLOW_LITE
558 BENCHMARK(tflite_sigmoid_f32)
559 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
560 ->UseRealTime();
561 BENCHMARK(tflite_sigmoid_qs8)
562 ->Apply(benchmark::utils::UnaryElementwiseParameters<int8_t, int8_t>)
563 ->UseRealTime();
564 BENCHMARK(tflite_sigmoid_qu8)
565 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint8_t, uint8_t>)
566 ->UseRealTime();
567 #endif // BENCHMARK_TENSORFLOW_LITE
568
569 #ifndef XNNPACK_BENCHMARK_NO_MAIN
570 BENCHMARK_MAIN();
571 #endif
572