• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2020 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <algorithm>
10 #include <array>
11 #include <cmath>
12 #include <functional>
13 #include <limits>
14 #include <random>
15 #include <vector>
16 
17 #include <xnnpack.h>
18 
19 #include <benchmark/benchmark.h>
20 #include "bench/utils.h"
21 #ifdef BENCHMARK_TENSORFLOW_LITE
22 #include "flatbuffers/include/flatbuffers/flatbuffers.h"
23 #include "tensorflow/lite/interpreter.h"
24 #include "tensorflow/lite/kernels/register.h"
25 #include "tensorflow/lite/model.h"
26 #include "tensorflow/lite/schema/schema_generated.h"
27 #include "tensorflow/lite/version.h"
28 #endif  // BENCHMARK_TENSORFLOW_LITE
29 
30 
xnnpack_sigmoid_f32(benchmark::State & state)31 static void xnnpack_sigmoid_f32(benchmark::State& state) {
32   const size_t batch_size = state.range(0);
33 
34   std::random_device random_device;
35   auto rng = std::mt19937(random_device());
36   auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
37 
38   std::vector<float> input(batch_size + XNN_EXTRA_BYTES / sizeof(float));
39   std::vector<float> output(batch_size);
40   std::generate(input.begin(), input.end(), std::ref(f32rng));
41   std::fill(output.begin(), output.end(), std::nanf(""));
42 
43   xnn_status status = xnn_initialize(nullptr /* allocator */);
44   if (status != xnn_status_success) {
45     state.SkipWithError("failed to initialize XNNPACK");
46     return;
47   }
48 
49   xnn_operator_t sigmoid_op = nullptr;
50   status = xnn_create_sigmoid_nc_f32(
51     1 /* channels */, 1 /* input stride */, 1 /* output stride */,
52     0 /* flags */, &sigmoid_op);
53   if (status != xnn_status_success || sigmoid_op == nullptr) {
54     state.SkipWithError("failed to create Sigmoid operator");
55     return;
56   }
57 
58   status = xnn_setup_sigmoid_nc_f32(
59     sigmoid_op, batch_size,
60     input.data(), output.data(),
61     nullptr /* thread pool */);
62   if (status != xnn_status_success) {
63     state.SkipWithError("failed to setup Sigmoid operator");
64     return;
65   }
66 
67   for (auto _ : state) {
68     status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
69     if (status != xnn_status_success) {
70       state.SkipWithError("failed to run Sigmoid operator");
71       return;
72     }
73   }
74 
75   status = xnn_delete_operator(sigmoid_op);
76   if (status != xnn_status_success) {
77     state.SkipWithError("failed to delete Sigmoid operator");
78     return;
79   }
80 
81   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
82   if (cpu_frequency != 0) {
83     state.counters["cpufreq"] = cpu_frequency;
84   }
85 
86   state.counters["elements"] =
87     benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
88 
89   const size_t bytes_per_iteration = 2 * batch_size * sizeof(float);
90   state.counters["bytes"] =
91     benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
92 }
93 
94 #ifndef XNN_NO_QS8_OPERATORS
xnnpack_sigmoid_qs8(benchmark::State & state)95 static void xnnpack_sigmoid_qs8(benchmark::State& state) {
96   const size_t batch_size = state.range(0);
97 
98   std::random_device random_device;
99   auto rng = std::mt19937(random_device());
100   auto i8rng = std::bind(
101     std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
102     std::ref(rng));
103 
104   std::vector<int8_t> input(batch_size + XNN_EXTRA_BYTES / sizeof(int8_t));
105   std::vector<int8_t> output(batch_size);
106   std::generate(input.begin(), input.end(), std::ref(i8rng));
107   std::fill(output.begin(), output.end(), INT8_C(0xA5));
108 
109   xnn_status status = xnn_initialize(nullptr /* allocator */);
110   if (status != xnn_status_success) {
111     state.SkipWithError("failed to initialize XNNPACK");
112     return;
113   }
114 
115   xnn_operator_t sigmoid_op = nullptr;
116   status = xnn_create_sigmoid_nc_qs8(
117     1 /* channels */, 1 /* input stride */, 1 /* output stride */,
118     1 /* input zero point */, 1.0f /* input scale */,
119     -128 /* output zero point */, 1.0f / 256.0f /* output scale */,
120     std::numeric_limits<int8_t>::min() /* output min */, std::numeric_limits<int8_t>::max() /* output max */,
121     0 /* flags */, &sigmoid_op);
122   if (status != xnn_status_success || sigmoid_op == nullptr) {
123     state.SkipWithError("failed to create Sigmoid operator");
124     return;
125   }
126 
127   status = xnn_setup_sigmoid_nc_qs8(
128     sigmoid_op, batch_size,
129     input.data(), output.data(),
130     nullptr /* thread pool */);
131   if (status != xnn_status_success) {
132     state.SkipWithError("failed to setup Sigmoid operator");
133     return;
134   }
135 
136   for (auto _ : state) {
137     status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
138     if (status != xnn_status_success) {
139       state.SkipWithError("failed to run Sigmoid operator");
140       return;
141     }
142   }
143 
144   status = xnn_delete_operator(sigmoid_op);
145   if (status != xnn_status_success) {
146     state.SkipWithError("failed to delete Sigmoid operator");
147     return;
148   }
149 
150   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
151   if (cpu_frequency != 0) {
152     state.counters["cpufreq"] = cpu_frequency;
153   }
154 
155   state.counters["elements"] =
156     benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
157 
158   const size_t bytes_per_iteration = 2 * batch_size * sizeof(int8_t);
159   state.counters["bytes"] =
160     benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
161 }
162 #endif  // XNN_NO_QS8_OPERATORS
163 
164 #ifndef XNN_NO_QU8_OPERATORS
xnnpack_sigmoid_qu8(benchmark::State & state)165 static void xnnpack_sigmoid_qu8(benchmark::State& state) {
166   const size_t batch_size = state.range(0);
167 
168   std::random_device random_device;
169   auto rng = std::mt19937(random_device());
170   auto u8rng = std::bind(
171     std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
172 
173   std::vector<uint8_t> input(batch_size + XNN_EXTRA_BYTES / sizeof(uint8_t));
174   std::vector<uint8_t> output(batch_size);
175   std::generate(input.begin(), input.end(), std::ref(u8rng));
176   std::fill(output.begin(), output.end(), UINT8_C(0xA5));
177 
178   xnn_status status = xnn_initialize(nullptr /* allocator */);
179   if (status != xnn_status_success) {
180     state.SkipWithError("failed to initialize XNNPACK");
181     return;
182   }
183 
184   xnn_operator_t sigmoid_op = nullptr;
185   status = xnn_create_sigmoid_nc_qu8(
186     1 /* channels */, 1 /* input stride */, 1 /* output stride */,
187     128 /* input zero point */, 1.0f /* input scale */,
188     0 /* output zero point */, 1.0f / 256.0f /* output scale */,
189     std::numeric_limits<uint8_t>::min() /* output min */, std::numeric_limits<uint8_t>::max() /* output max */,
190     0 /* flags */, &sigmoid_op);
191   if (status != xnn_status_success || sigmoid_op == nullptr) {
192     state.SkipWithError("failed to create Sigmoid operator");
193     return;
194   }
195 
196   status = xnn_setup_sigmoid_nc_qu8(
197     sigmoid_op, batch_size,
198     input.data(), output.data(),
199     nullptr /* thread pool */);
200   if (status != xnn_status_success) {
201     state.SkipWithError("failed to setup Sigmoid operator");
202     return;
203   }
204 
205   for (auto _ : state) {
206     status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
207     if (status != xnn_status_success) {
208       state.SkipWithError("failed to run Sigmoid operator");
209       return;
210     }
211   }
212 
213   status = xnn_delete_operator(sigmoid_op);
214   if (status != xnn_status_success) {
215     state.SkipWithError("failed to delete Sigmoid operator");
216     return;
217   }
218 
219   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
220   if (cpu_frequency != 0) {
221     state.counters["cpufreq"] = cpu_frequency;
222   }
223 
224   state.counters["elements"] =
225     benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
226 
227   const size_t bytes_per_iteration = 2 * batch_size * sizeof(uint8_t);
228   state.counters["bytes"] =
229     benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
230 }
231 #endif  // XNN_NO_QU8_OPERATORS
232 
233 #ifdef BENCHMARK_TENSORFLOW_LITE
tflite_sigmoid_f32(benchmark::State & state)234 static void tflite_sigmoid_f32(benchmark::State& state) {
235   const size_t batch_size = state.range(0);
236 
237   std::random_device random_device;
238   auto rng = std::mt19937(random_device());
239   auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
240 
241   flatbuffers::FlatBufferBuilder builder;
242   const flatbuffers::Offset<tflite::OperatorCode> operator_code =
243       CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
244 
245   const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
246     tflite::CreateBuffer(builder, builder.CreateVector({})),
247   }};
248 
249   const std::array<int32_t, 1> shape{{
250     static_cast<int32_t>(batch_size)
251   }};
252 
253   const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
254     tflite::CreateTensor(builder,
255                          builder.CreateVector<int32_t>(shape.data(), shape.size()),
256                          tflite::TensorType_FLOAT32),
257     tflite::CreateTensor(builder,
258                          builder.CreateVector<int32_t>(shape.data(), shape.size()),
259                          tflite::TensorType_FLOAT32),
260   }};
261 
262   const std::array<int32_t, 1> op_inputs{{ 0 }};
263   const std::array<int32_t, 1> op_outputs{{ 1 }};
264   flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
265       builder,
266       0 /* opcode_index */,
267       builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
268       builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
269 
270   const std::array<int32_t, 1> graph_inputs{{ 0 }};
271   const std::array<int32_t, 1> graph_outputs{{ 1 }};
272   const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
273       builder,
274       builder.CreateVector(tensors.data(), tensors.size()),
275       builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
276       builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
277       builder.CreateVector(&op, 1));
278 
279   const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
280       TFLITE_SCHEMA_VERSION,
281       builder.CreateVector(&operator_code, 1),
282       builder.CreateVector(&subgraph, 1),
283       builder.CreateString("Sigmoid model"),
284       builder.CreateVector(buffers.data(), buffers.size()));
285 
286   builder.Finish(model_buffer);
287 
288   const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
289   tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
290   tflite::InterpreterBuilder interpreterBuilder(model, resolver);
291   std::unique_ptr<tflite::Interpreter> interpreter;
292   if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
293     state.SkipWithError("failed to create TFLite interpreter");
294     return;
295   }
296   interpreter->SetNumThreads(1);
297 
298   if (interpreter->AllocateTensors() != kTfLiteOk) {
299     state.SkipWithError("failed to allocate tensors");
300     return;
301   }
302 
303   std::generate(
304     interpreter->typed_tensor<float>(0),
305     interpreter->typed_tensor<float>(0) + batch_size,
306     std::ref(f32rng));
307 
308   for (auto _ : state) {
309     if (interpreter->Invoke() != kTfLiteOk) {
310       state.SkipWithError("failed to invoke TFLite interpreter");
311       return;
312     }
313   }
314 
315   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
316   if (cpu_frequency != 0) {
317     state.counters["cpufreq"] = cpu_frequency;
318   }
319 
320   state.counters["elements"] =
321     benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
322 
323   const size_t bytes_per_iteration = 2 * batch_size * sizeof(float);
324   state.counters["bytes"] =
325     benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
326 
327   interpreter.reset();
328 }
329 
tflite_sigmoid_qs8(benchmark::State & state)330 static void tflite_sigmoid_qs8(benchmark::State& state) {
331   const size_t batch_size = state.range(0);
332 
333   std::random_device random_device;
334   auto rng = std::mt19937(random_device());
335   auto i8rng = std::bind(
336     std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
337     std::ref(rng));
338 
339   flatbuffers::FlatBufferBuilder builder;
340   const flatbuffers::Offset<tflite::OperatorCode> operator_code =
341       CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
342 
343   const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
344     tflite::CreateBuffer(builder, builder.CreateVector({})),
345   }};
346 
347   const std::array<int32_t, 1> shape{{
348     static_cast<int32_t>(batch_size)
349   }};
350 
351   const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
352     tflite::CreateTensor(builder,
353                          builder.CreateVector<int32_t>(shape.data(), shape.size()),
354                          tflite::TensorType_INT8, 0 /* buffer */, 0 /* name */,
355                          tflite::CreateQuantizationParameters(builder,
356                            0 /*min*/, 0 /*max*/,
357                            builder.CreateVector<float>({1.0f /* scale */}),
358                            builder.CreateVector<int64_t>({1 /* zero point */}))),
359     tflite::CreateTensor(builder,
360                          builder.CreateVector<int32_t>(shape.data(), shape.size()),
361                          tflite::TensorType_INT8, 0 /* buffer */, 0 /* name */,
362                          tflite::CreateQuantizationParameters(builder,
363                            0 /*min*/, 0 /*max*/,
364                            builder.CreateVector<float>({1.0f / 256.0f /* scale */}),
365                            builder.CreateVector<int64_t>({-128 /* zero point */}))),
366   }};
367 
368   const std::array<int32_t, 1> op_inputs{{ 0 }};
369   const std::array<int32_t, 1> op_outputs{{ 1 }};
370   flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
371       builder,
372       0 /* opcode_index */,
373       builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
374       builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
375 
376   const std::array<int32_t, 1> graph_inputs{{ 0 }};
377   const std::array<int32_t, 1> graph_outputs{{ 1 }};
378   const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
379       builder,
380       builder.CreateVector(tensors.data(), tensors.size()),
381       builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
382       builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
383       builder.CreateVector(&op, 1));
384 
385   const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
386       TFLITE_SCHEMA_VERSION,
387       builder.CreateVector(&operator_code, 1),
388       builder.CreateVector(&subgraph, 1),
389       builder.CreateString("Sigmoid model"),
390       builder.CreateVector(buffers.data(), buffers.size()));
391 
392   builder.Finish(model_buffer);
393 
394   const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
395   tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
396   tflite::InterpreterBuilder interpreterBuilder(model, resolver);
397   std::unique_ptr<tflite::Interpreter> interpreter;
398   if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
399     state.SkipWithError("failed to create TFLite interpreter");
400     return;
401   }
402   interpreter->SetNumThreads(1);
403 
404   if (interpreter->AllocateTensors() != kTfLiteOk) {
405     state.SkipWithError("failed to allocate tensors");
406     return;
407   }
408 
409   std::generate(
410     interpreter->typed_tensor<int8_t>(0),
411     interpreter->typed_tensor<int8_t>(0) + batch_size,
412     std::ref(i8rng));
413 
414   for (auto _ : state) {
415     if (interpreter->Invoke() != kTfLiteOk) {
416       state.SkipWithError("failed to invoke TFLite interpreter");
417       return;
418     }
419   }
420 
421   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
422   if (cpu_frequency != 0) {
423     state.counters["cpufreq"] = cpu_frequency;
424   }
425 
426   state.counters["elements"] =
427     benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
428 
429   const size_t bytes_per_iteration = 2 * batch_size * sizeof(int8_t);
430   state.counters["bytes"] =
431     benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
432 
433   interpreter.reset();
434 }
435 
tflite_sigmoid_qu8(benchmark::State & state)436 static void tflite_sigmoid_qu8(benchmark::State& state) {
437   const size_t batch_size = state.range(0);
438 
439   std::random_device random_device;
440   auto rng = std::mt19937(random_device());
441   auto u8rng = std::bind(
442     std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()),
443     std::ref(rng));
444 
445   flatbuffers::FlatBufferBuilder builder;
446   const flatbuffers::Offset<tflite::OperatorCode> operator_code =
447       CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
448 
449   const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
450     tflite::CreateBuffer(builder, builder.CreateVector({})),
451   }};
452 
453   const std::array<int32_t, 1> shape{{
454     static_cast<int32_t>(batch_size)
455   }};
456 
457   const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
458     tflite::CreateTensor(builder,
459                          builder.CreateVector<int32_t>(shape.data(), shape.size()),
460                          tflite::TensorType_UINT8, 0 /* buffer */, 0 /* name */,
461                          tflite::CreateQuantizationParameters(builder,
462                            0 /*min*/, 0 /*max*/,
463                            builder.CreateVector<float>({1.0f /* scale */}),
464                            builder.CreateVector<int64_t>({128 /* zero point */}))),
465     tflite::CreateTensor(builder,
466                          builder.CreateVector<int32_t>(shape.data(), shape.size()),
467                          tflite::TensorType_UINT8, 0 /* buffer */, 0 /* name */,
468                          tflite::CreateQuantizationParameters(builder,
469                            0 /*min*/, 0 /*max*/,
470                            builder.CreateVector<float>({1.0f / 256.0f /* scale */}),
471                            builder.CreateVector<int64_t>({0 /* zero point */}))),
472   }};
473 
474   const std::array<int32_t, 1> op_inputs{{ 0 }};
475   const std::array<int32_t, 1> op_outputs{{ 1 }};
476   flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
477       builder,
478       0 /* opcode_index */,
479       builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
480       builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
481 
482   const std::array<int32_t, 1> graph_inputs{{ 0 }};
483   const std::array<int32_t, 1> graph_outputs{{ 1 }};
484   const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
485       builder,
486       builder.CreateVector(tensors.data(), tensors.size()),
487       builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
488       builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
489       builder.CreateVector(&op, 1));
490 
491   const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
492       TFLITE_SCHEMA_VERSION,
493       builder.CreateVector(&operator_code, 1),
494       builder.CreateVector(&subgraph, 1),
495       builder.CreateString("Sigmoid model"),
496       builder.CreateVector(buffers.data(), buffers.size()));
497 
498   builder.Finish(model_buffer);
499 
500   const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
501   tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
502   tflite::InterpreterBuilder interpreterBuilder(model, resolver);
503   std::unique_ptr<tflite::Interpreter> interpreter;
504   if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
505     state.SkipWithError("failed to create TFLite interpreter");
506     return;
507   }
508   interpreter->SetNumThreads(1);
509 
510   if (interpreter->AllocateTensors() != kTfLiteOk) {
511     state.SkipWithError("failed to allocate tensors");
512     return;
513   }
514 
515   std::generate(
516     interpreter->typed_tensor<uint8_t>(0),
517     interpreter->typed_tensor<uint8_t>(0) + batch_size,
518     std::ref(u8rng));
519 
520   for (auto _ : state) {
521     if (interpreter->Invoke() != kTfLiteOk) {
522       state.SkipWithError("failed to invoke TFLite interpreter");
523       return;
524     }
525   }
526 
527   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
528   if (cpu_frequency != 0) {
529     state.counters["cpufreq"] = cpu_frequency;
530   }
531 
532   state.counters["elements"] =
533     benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
534 
535   const size_t bytes_per_iteration = 2 * batch_size * sizeof(uint8_t);
536   state.counters["bytes"] =
537     benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
538 
539   interpreter.reset();
540 }
541 #endif  // BENCHMARK_TENSORFLOW_LITE
542 
543 BENCHMARK(xnnpack_sigmoid_f32)
544   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
545   ->UseRealTime();
546 #ifndef XNN_NO_QS8_OPERATORS
547   BENCHMARK(xnnpack_sigmoid_qs8)
548     ->Apply(benchmark::utils::UnaryElementwiseParameters<int8_t, int8_t>)
549     ->UseRealTime();
550 #endif  // XNN_NO_QS8_OPERATORS
551 #ifndef XNN_NO_QU8_OPERATORS
552   BENCHMARK(xnnpack_sigmoid_qu8)
553     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint8_t, uint8_t>)
554     ->UseRealTime();
555 #endif  // XNN_NO_QU8_OPERATORS
556 
557 #ifdef BENCHMARK_TENSORFLOW_LITE
558   BENCHMARK(tflite_sigmoid_f32)
559     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
560     ->UseRealTime();
561   BENCHMARK(tflite_sigmoid_qs8)
562     ->Apply(benchmark::utils::UnaryElementwiseParameters<int8_t, int8_t>)
563     ->UseRealTime();
564   BENCHMARK(tflite_sigmoid_qu8)
565     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint8_t, uint8_t>)
566     ->UseRealTime();
567 #endif  // BENCHMARK_TENSORFLOW_LITE
568 
569 #ifndef XNNPACK_BENCHMARK_NO_MAIN
570 BENCHMARK_MAIN();
571 #endif
572