1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Operations"
18
19 #include "Multinomial.h"
20
21 #include <algorithm>
22 #include <limits>
23 #include <vector>
24
25 #include "CpuExecutor.h"
26 #include "Tracing.h"
27
28 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
29 #include <tensorflow/lite/kernels/internal/tensor_utils.h>
30
31 #include <unsupported/Eigen/CXX11/Tensor>
32
33 #include "CpuOperationUtils.h"
34 #include "guarded_philox_random.h"
35 #include "philox_random.h"
36 #include "simple_philox.h"
37 #endif // NN_INCLUDE_CPU_IMPLEMENTATION
38
39 namespace android {
40 namespace nn {
41
42 namespace {
43
44 template <typename T>
GetBuffer(RunTimeOperandInfo * operand)45 inline T* GetBuffer(RunTimeOperandInfo* operand) {
46 return reinterpret_cast<T*>(operand->buffer);
47 }
48
49 template <typename T>
GetBuffer(const RunTimeOperandInfo * operand)50 inline const T* GetBuffer(const RunTimeOperandInfo* operand) {
51 return reinterpret_cast<const T*>(operand->buffer);
52 }
53
54 } // namespace
55
Multinomial(const Operation & operation,RunTimeOperandInfo * operands)56 Multinomial::Multinomial(const Operation& operation, RunTimeOperandInfo* operands) {
57 NNTRACE_TRANS("Multinomial::Multinomial");
58 input_ = GetInput(operation, operands, kInputTensor);
59 sample_count_ = getScalarData<int>(*GetInput(operation, operands, kSampleCountParam));
60 random_seeds_ = GetInput(operation, operands, kRandomSeedsTensor);
61
62 output_ = GetOutput(operation, operands, kOutputTensor);
63 }
64
Prepare(const Operation & operation,RunTimeOperandInfo * operands,Shape * outputShape)65 bool Multinomial::Prepare(const Operation& operation, RunTimeOperandInfo* operands,
66 Shape* outputShape) {
67 NNTRACE_TRANS("Multinomial::Prepare");
68 NN_CHECK_EQ(NumInputsWithValues(operation, operands), 3);
69 NN_CHECK_EQ(NumOutputs(operation), 1);
70
71 const RunTimeOperandInfo* input = GetInput(operation, operands, Multinomial::kInputTensor);
72 const Shape& inputShape = input->shape();
73
74 const uint32_t batch_size = SizeOfDimension(input, 0);
75 const uint32_t sample_count =
76 getScalarData<int>(*GetInput(operation, operands, kSampleCountParam));
77
78 outputShape->type = OperandType::TENSOR_INT32;
79 outputShape->dimensions = {batch_size, sample_count};
80 outputShape->offset = inputShape.offset;
81 outputShape->scale = inputShape.scale;
82
83 return true;
84 }
85
Eval()86 bool Multinomial::Eval() {
87 NNTRACE_COMP("Multinomial::Eval");
88 switch (input_->type) {
89 case OperandType::TENSOR_FLOAT16: {
90 std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
91 convertFloat16ToFloat32(GetBuffer<_Float16>(input_), &inputDataFloat32);
92 EvalFloat32(inputDataFloat32.data());
93 break;
94 }
95 case OperandType::TENSOR_FLOAT32: {
96 EvalFloat32(GetBuffer<float>(input_));
97 break;
98 }
99 default: {
100 LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
101 return false;
102 }
103 }
104 return true;
105 }
106
EvalFloat32(const float * inputData)107 void Multinomial::EvalFloat32(const float* inputData) {
108 const int batch_size = SizeOfDimension(input_, 0);
109 const int class_size = SizeOfDimension(input_, 1);
110
111 tensorflow::GuardedPhiloxRandom random_generator;
112 int32_t* seeds = GetBuffer<int32_t>(random_seeds_);
113 random_generator.Init(seeds[0], seeds[1]);
114
115 // PhiloxRandom produces results as 4 32-bit integers.
116 int sample_count_aligned = (sample_count_ + 3) / 4 * 4;
117 // The CPU operation uses 64-bit double values, so two results per sample.
118 sample_count_aligned *= 2;
119 auto random_generator_reserved =
120 random_generator.ReserveRandomOutputs(batch_size * sample_count_aligned, 256);
121 tensorflow::random::SimplePhilox simple_philox(&random_generator_reserved);
122
123 for (uint64_t b = 0; b < batch_size; ++b) {
124 const float* input_ptr_batch = inputData + b * class_size;
125 float max = std::numeric_limits<float>::lowest();
126 for (uint64_t j = 0; j < class_size; ++j) {
127 if (Eigen::numext::isfinite(input_ptr_batch[j])) {
128 max = std::max(max, input_ptr_batch[j]);
129 }
130 }
131 const double batch_max = static_cast<double>(max);
132 double total = 0;
133 std::vector<double> cdf;
134 cdf.resize(class_size);
135 for (uint64_t j = 0; j < class_size; ++j) {
136 if (Eigen::numext::isfinite(static_cast<float>(input_ptr_batch[j]))) {
137 total += exp(static_cast<double>(input_ptr_batch[j]) - batch_max);
138 }
139 cdf[j] = total;
140 }
141
142 auto* output_ptr_batch = GetBuffer<int32_t>(output_) + b * sample_count_;
143 for (uint64_t j = 0; j < sample_count_; ++j) {
144 const double target = simple_philox.RandDouble() * total;
145 auto found_iter = std::upper_bound(cdf.begin(), cdf.end(), target);
146 output_ptr_batch[j] = std::distance(cdf.begin(), found_iter);
147 }
148 }
149 }
150
151 } // namespace nn
152 } // namespace android
153