1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "SVDF.h"
18
19 #include "CpuExecutor.h"
20 #include "CpuOperationUtils.h"
21 #include "HalInterfaces.h"
22
23 #include "Tracing.h"
24
25 namespace android {
26 namespace nn {
27
SVDF(const Operation & operation,std::vector<RunTimeOperandInfo> & operands)28 SVDF::SVDF(const Operation& operation,
29 std::vector<RunTimeOperandInfo>& operands) {
30 NNTRACE_TRANS("SVDF::SVDF");
31 input_ = GetInput(operation, operands, kInputTensor);
32 weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor);
33 weights_time_ = GetInput(operation, operands, kWeightsTimeTensor);
34 bias_ = GetInput(operation, operands, kBiasTensor);
35 state_in_ = GetInput(operation, operands, kStateInTensor);
36
37 params_.rank_ = getScalarData<int>(*GetInput(operation, operands, kRankParam));
38 params_.activation_ = static_cast<TfLiteFusedActivation>(getScalarData<int>(
39 *GetInput(operation, operands, kActivationParam)));
40
41 state_out_ = GetOutput(operation, operands, kStateOutTensor);
42 output_ = GetOutput(operation, operands, kOutputTensor);
43 }
44
Prepare(const Operation & operation,std::vector<RunTimeOperandInfo> & operands,Shape * stateShape,Shape * outputShape)45 bool SVDF::Prepare(const Operation &operation,
46 std::vector<RunTimeOperandInfo> &operands,
47 Shape *stateShape,
48 Shape *outputShape) {
49 NNTRACE_TRANS("SVDF::Prepare");
50 // Check we have all the inputs and outputs we need.
51 const int num_inputs = NumInputsWithValues(operation, operands);
52
53 NN_CHECK(num_inputs == 6 || num_inputs == 7);
54 NN_CHECK_EQ(NumOutputs(operation), 2);
55
56 const RunTimeOperandInfo *input =
57 GetInput(operation, operands, SVDF::kInputTensor);
58 const RunTimeOperandInfo *weights_feature =
59 GetInput(operation, operands, SVDF::kWeightsFeatureTensor);
60 const RunTimeOperandInfo *weights_time =
61 GetInput(operation, operands, SVDF::kWeightsTimeTensor);
62
63 // Check all the parameters of tensor match within themselves and match the
64 // input configuration.
65 const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam));
66 const uint32_t batch_size = SizeOfDimension(input, 0);
67 const uint32_t num_filters = SizeOfDimension(weights_feature, 0);
68 NN_CHECK_EQ(num_filters % rank, 0);
69 const uint32_t num_units = num_filters / rank;
70 const uint32_t memory_size = SizeOfDimension(weights_time, 1);
71 NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1));
72 NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters);
73
74 const RunTimeOperandInfo *bias =
75 GetInput(operation, operands, kBiasTensor);
76 if (!IsNullInput(bias)) {
77 NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units);
78 }
79
80 // Resize state.
81 const Shape &inputShape = input->shape();
82 stateShape->type = inputShape.type;
83 stateShape->dimensions = { batch_size, memory_size * num_filters };
84 stateShape->offset = inputShape.offset;
85 stateShape->scale = inputShape.scale;
86
87 // Resize output.
88 outputShape->type = inputShape.type;
89 outputShape->dimensions = { batch_size, num_units };
90 outputShape->offset = inputShape.offset;
91 outputShape->scale = inputShape.scale;
92
93 return true;
94 }
95
Eval()96 bool SVDF::Eval() {
97 NNTRACE_TRANS("SVDF::Eval");
98 switch (input_->type) {
99 case OperandType::TENSOR_FLOAT16: {
100 std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
101 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32);
102 std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape()));
103 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer),
104 &inputStateDataFloat32);
105 std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape()));
106 if (!IsNullInput(bias_)) {
107 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer),
108 &biasDataFloat32);
109 }
110 std::vector<float> weightsFeatureDataFloat32(
111 getNumberOfElements(weights_feature_->shape()));
112 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer),
113 &weightsFeatureDataFloat32);
114 std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape()));
115 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer),
116 &weightsTimeDataFloat32);
117 std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape()));
118 std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape()));
119
120 EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(),
121 biasDataFloat32.data(), weightsFeatureDataFloat32.data(),
122 weightsTimeDataFloat32.data(), outputDataFloat32.data(),
123 outputStateDataFloat32.data());
124 convertFloat32ToFloat16(outputDataFloat32,
125 reinterpret_cast<_Float16*>(output_->buffer));
126 convertFloat32ToFloat16(outputStateDataFloat32,
127 reinterpret_cast<_Float16*>(state_out_->buffer));
128 break;
129 }
130 case OperandType::TENSOR_FLOAT32: {
131 EvalFloat32(reinterpret_cast<float*>(input_->buffer),
132 reinterpret_cast<float*>(state_in_->buffer),
133 reinterpret_cast<float*>(bias_->buffer),
134 reinterpret_cast<float*>(weights_feature_->buffer),
135 reinterpret_cast<float*>(weights_time_->buffer),
136 reinterpret_cast<float*>(output_->buffer),
137 reinterpret_cast<float*>(state_out_->buffer));
138 break;
139 }
140 default: {
141 LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
142 return false;
143 }
144 }
145 return true;
146 }
147
EvalFloat32(const float * inputData,const float * inputStateData,const float * biasData,const float * weightsFeatureData,const float * weightsTimeData,float * outputData,float * outputStateData)148 void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData,
149 const float* weightsFeatureData, const float* weightsTimeData,
150 float* outputData, float* outputStateData) {
151 NNTRACE_COMP("SVDF::EvalFloat32");
152
153 const int rank = params_.rank_;
154 const int batch_size = SizeOfDimension(input_, 0);
155 const int input_size = SizeOfDimension(input_, 1);
156 const int num_filters = SizeOfDimension(weights_feature_, 0);
157 const int num_units = num_filters / rank;
158 const int memory_size = SizeOfDimension(weights_time_, 1);
159
160 memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters);
161 // Compute conv1d(inputs, weights_feature).
162 for (int b = 0; b < batch_size; b++) {
163 float* state_ptr_batch = outputStateData + b * memory_size * num_filters;
164 for (int c = 0; c < num_filters; c++) {
165 float* state_ptr = state_ptr_batch + c * memory_size;
166 state_ptr[memory_size - 1] = 0.0;
167 }
168 }
169 // The state left most column is used to save current cycle activation. This
170 // is achieved by starting at state->data.f[memory_size - 1] and having the
171 // stride equal to memory_size.
172 tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
173 weightsFeatureData, num_filters, input_size, inputData, batch_size,
174 &outputStateData[memory_size - 1], memory_size);
175
176 // Compute matmul(state, weights_time).
177 // The right most column is used to save temporary output (with the size of
178 // num_filters). This is achieved by starting at state->data.f and having the
179 // stride equal to memory_size.
180 float scratch[batch_size * num_filters];
181 for (int b = 0; b < batch_size; b++) {
182 float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
183 float* scratch_ptr_batch = scratch + b * num_filters;
184 tflite::tensor_utils::BatchVectorBatchVectorDotProduct(
185 weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch,
186 /*result_stride=*/1);
187 }
188
189 // Initialize output with bias if provided.
190 if (!IsNullInput(bias_)) {
191 tflite::tensor_utils::VectorBatchVectorAssign(biasData, num_units, batch_size, outputData);
192 } else {
193 tflite::tensor_utils::ZeroVector(outputData, batch_size * num_units);
194 }
195
196 // Reduction sum
197 for (int b = 0; b < batch_size; b++) {
198 float* output_ptr_batch = outputData + b * num_units;
199 float* scratch_ptr_batch = scratch + b * num_filters;
200 tflite::tensor_utils::ReductionSumVector(scratch_ptr_batch, output_ptr_batch, num_units,
201 rank);
202 }
203
204 // Apply activation.
205 for (int b = 0; b < batch_size; b++) {
206 float* output_ptr_batch = outputData + b * num_units;
207 tflite::tensor_utils::ApplyActivationToVector(output_ptr_batch, num_units,
208 params_.activation_, output_ptr_batch);
209 }
210
211 // Right shift the state.
212 for (int b = 0; b < batch_size; b++) {
213 float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
214 for (int f = 0; f < num_filters; f++) {
215 tflite::tensor_utils::VectorShiftLeft(state_out_ptr_batch, memory_size,
216 /*shift_value=*/0.0);
217 state_out_ptr_batch += memory_size;
218 }
219 }
220 }
221
222 } // namespace nn
223 } // namespace android
224