1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <HalInterfaces.h>
18 #include <SampleDriver.h>
19 #include <ValidateHal.h>
20 #include <android-base/logging.h>
21 #include <gtest/gtest.h>
22 #include <unistd.h>
23
24 #include <algorithm>
25 #include <cassert>
26 #include <cstdio>
27 #include <iterator>
28 #include <map>
29 #include <memory>
30 #include <random>
31 #include <set>
32 #include <string>
33 #include <tuple>
34 #include <utility>
35 #include <vector>
36
37 #include "CompilationBuilder.h"
38 #include "HalUtils.h"
39 #include "Manager.h"
40 #include "ModelBuilder.h"
41 #include "NeuralNetworks.h"
42 #include "TestNeuralNetworksWrapper.h"
43
44 // Uncomment the following line to generate some debugging output that
45 // may be useful when analyzing failures:
46 //
47 // #define VERBOSE VERBOSE
48
49 // Uncomment the following line to generate some debugging output that
50 // may be useful to determine test coverage for support of dynamic
51 // temporaries (http://b/132458982):
52 //
53 // #define TRACE_DYNTEMP TRACE_DYNTEMP
54
55 // We randomly generate tests (model + input data) at runtime, and verify
56 // that we get the same results whether we do partitioned compilation/execution
57 // or non partitioned compilation/execution. We perform a test as follows:
58 //
59 // (1) Randomly generate a model (graph and weights), randomly generate input
60 // data, randomly assign inputs and outputs to CPU memory or to shared
61 // memory.
62 //
63 // Randomly leaves dimensions unset for intermediate operands.
64 //
65 // (2) Randomly generate drivers based on the sample driver, each of which
66 // executes models on the CPU. They differ according to which operations
67 // they support.
68 //
69 // (3) Compile and execute without partitioning, saving off the results.
70 //
71 // (4) Compile and execute with partitioning.
72 //
73 // (5) Verify that the saved results from (3) match the results from (4).
74 //
75 // For simplicity, all data (model inputs, model outputs, weights,
76 // temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two
77 // dimensions are fixed throughout a particular test case (and
78 // randomly determined). This prevents us from having to find a
79 // mechanism to "resize" data (e.g., if ADD#a operates on data of size
80 // 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a
81 // and ADD#b become inputs of ADD#c, do we need to insert one or more
82 // operations between (say) ADD#a and ADD#c to convert ADD#2's data
83 // from size 2x2 to size 3x3 in order to match ADD#b). In the few
84 // cases where an operand cannot be of this type, it is a constant
85 // (e.g., activation functions and RNN bias).
86 //
87 // Each operation we generate has a signature (described in more
88 // detail later). The randomly generated drivers decide which
89 // operations they can execute by checking operation signatures. Once
90 // we have built the model and know the set of signatures, we randomly
91 // assign each signature to a driver. No signature is supported by
92 // multiple drivers -- we're not testing the logic that the
93 // partitioning algorithm uses to select the best driver for an
94 // operation.
95
96 namespace android {
97
98 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
99 namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
100 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
101 namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
102 using CompilationBuilder = nn::CompilationBuilder;
103 using DeviceManager = nn::DeviceManager;
104 using Device = nn::Device;
105 using SharedDevice = nn::SharedDevice;
106 using ExecutionPlan = nn::ExecutionPlan;
107 using ExecutionStep = nn::ExecutionStep;
108 using HalCacheToken = nn::HalCacheToken;
109 using HalVersion = nn::HalVersion;
110 using HidlModel = V1_3::Model;
111 using LogicalStep = nn::LogicalStep;
112 using ModelBuilder = nn::ModelBuilder;
113 using Result = nn::test_wrapper::Result;
114 using SampleDriver = nn::sample_driver::SampleDriver;
115 using WrapperCompilation = nn::test_wrapper::Compilation;
116 using WrapperExecution = nn::test_wrapper::Execution;
117 using WrapperMemory = nn::test_wrapper::Memory;
118 using WrapperModel = nn::test_wrapper::Model;
119 using WrapperOperandType = nn::test_wrapper::OperandType;
120 using WrapperType = nn::test_wrapper::Type;
121
122 namespace {
123
124 /// Configure test size //////////////////////////////////////////////////////////
125
126 // We may exceed this in order to connect otherwise disjoint subgraphs.
127 static const unsigned kMaxNumOperations = 100;
128
129 // We build models to process 2-D square tensors up to this size in each dimension;
130 // note that the API promotes by-value weights larger than 128 to by-reference,
131 // so we want to ensure that we can pick both types that exceed and types that do
132 // not exceed this size.
133 static const unsigned kMaxProblemSize = 8;
134
135 // First seed for pseudorandom test generation.
136 static const unsigned kFirstSeed = 0;
137
138 // Number of test cases.
139 static const unsigned kNumTestCases = 225;
140
141 // Force all graph weights into a single pool (as we recommend to users)
142 // or allow them to be distributed across multiple pools (more stress
143 // on the partitioning algorithm and the rest of the runtime)?
144 // Forcing all graph weights into a single pool may be necessary to
145 // prevent large graphs from running up against http://b/70302693
146 // "NNAPI overuses (?) fds".
147 static const bool kAllWeightsInOnePool = false;
148
149 //////////////////////////////////////////////////////////////////////////////////
150
151 // The signature of an operation consists of the operation type (e.g.,
152 // ADD) and the activation function (use -1 in the case of an
153 // operation type for which the activation function is inapplicable).
154 typedef std::pair<ANeuralNetworksOperationType, int> Signature;
155
156 // This class adds some simple utilities on top of WrapperModel. For example,
157 // it provides access to certain features from ModelBuilder that are not exposed
158 // by the base class (such as inputCount() and operation index).
159 class TestModel : public WrapperModel {
160 public:
addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)161 uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
162 const std::vector<uint32_t>& outputs) {
163 const uint32_t operationIndex = operationCount();
164 mOperations.push_back(outputs);
165 WrapperModel::addOperation(type, inputs, outputs);
166 return operationIndex;
167 }
168
operationCount() const169 uint32_t operationCount() const { return mOperations.size(); }
170
inputCount() const171 uint32_t inputCount() const { return builder()->inputCount(); }
outputCount() const172 uint32_t outputCount() const { return builder()->outputCount(); }
173
getOperationOutputs(uint32_t index) const174 const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const {
175 CHECK(index < mOperations.size());
176 return mOperations[index];
177 }
178
179 // All values are immediately copied into the model (we need to do
180 // this ourselves in cases where the underlying NNAPI does not).
setOperandValue(uint32_t index,const std::vector<float> & value)181 void setOperandValue(uint32_t index, const std::vector<float>& value) {
182 const size_t length = value.size() * sizeof(float);
183
184 if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
185 WrapperModel::setOperandValue(index, value.data(), length);
186 } else {
187 mOperandValues.push_back(value);
188 WrapperModel::setOperandValue(index, mOperandValues.back().data(), length);
189 }
190 }
191
setOperandValue(uint32_t index,const std::vector<int32_t> & value)192 void setOperandValue(uint32_t index, const std::vector<int32_t>& value) {
193 const size_t length = value.size() * sizeof(int32_t);
194
195 CHECK(length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
196 WrapperModel::setOperandValue(index, value.data(), length);
197 }
198
setOperandValue(uint32_t index,int32_t value)199 void setOperandValue(uint32_t index, int32_t value) {
200 CHECK(sizeof(value) <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
201 WrapperModel::setOperandValue(index, &value, sizeof(value));
202 }
203
204 private:
builder() const205 const ModelBuilder* builder() const {
206 return reinterpret_cast<const ModelBuilder*>(getHandle());
207 }
208
209 // Representation of operations: vector index is operation number,
210 // vector value is operation's output operands.
211 std::vector<std::vector<uint32_t>> mOperations;
212
213 // Large operand values -- not immediately copied into the
214 // WrapperModel, so remembered here instead.
215 std::vector<std::vector<float>> mOperandValues;
216 };
217
218 // This class adds some simple utilities on top of WrapperCompilation in order
219 // to provide access to certain features from CompilationBuilder that are not
220 // exposed by the base class.
221 class TestCompilation : public WrapperCompilation {
222 public:
TestCompilation(const WrapperModel * model)223 TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {}
224
TestCompilation(const WrapperModel * model,std::vector<std::shared_ptr<Device>> devices)225 TestCompilation(const WrapperModel* model, std::vector<std::shared_ptr<Device>> devices) {
226 ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
227 CompilationBuilder* c = nullptr;
228 int result = m->createCompilation(&c, devices);
229 EXPECT_EQ(result, 0);
230 mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
231 }
232
233 using WrapperCompilation::finish;
234
setPartitioning(uint32_t partitioning)235 Result setPartitioning(uint32_t partitioning) {
236 return static_cast<Result>(builder()->forTest_setPartitioning(partitioning));
237 }
238
getExecutionPlan() const239 const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); }
240
241 private:
builder() const242 const CompilationBuilder* builder() const {
243 return reinterpret_cast<const CompilationBuilder*>(getHandle());
244 }
builder()245 CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); }
246 };
247
248 // This class is used to manage a collection of memory regions,
249 // disjoint windows onto a set of Memory instances, each of which is
250 // associated with a single shared memory region. Each region and
251 // Memory instance is assigned a number. The usage pattern is as
252 // follows:
253 // - Call addMemory() and addRegion() as many times as needed to
254 // declare (but not define) Memory instances and declare region
255 // instances.
256 // - Call layout() to define the Memory instances.
257 // - Call getRegion() as many times as needed to get the details
258 // of memory regions (such as address, or Memory/offset/length).
259 // The Memory instances created by layout() are owned by the
260 // TestMemories instance, and are destroyed when the TestMemories
261 // instance is destroyed.
262 class TestMemories {
263 public:
264 TestMemories() = default;
265
266 TestMemories(const TestMemories&) = delete;
267 TestMemories& operator=(const TestMemories&) = delete;
268
addMemory()269 unsigned addMemory() {
270 CHECK(!mLayoutDone);
271 mMemorySizes.push_back(0);
272 return memoryCount() - 1;
273 }
memoryCount() const274 unsigned memoryCount() const { return mMemorySizes.size(); }
275
addRegion(unsigned memoryIndex,uint32_t length)276 unsigned addRegion(unsigned memoryIndex, uint32_t length) {
277 CHECK(!mLayoutDone);
278 CHECK(memoryIndex < memoryCount());
279 uint32_t& memorySize = mMemorySizes[memoryIndex];
280 auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length);
281 mRegions.push_back(desc);
282 memorySize += length;
283 return regionCount() - 1;
284 }
regionCount() const285 unsigned regionCount() const { return mRegions.size(); }
286
287 void layout();
288
getRegion(unsigned regionIndex,const WrapperMemory ** pMemory,uint32_t * pOffset,uint32_t * pLength)289 void* getRegion(unsigned regionIndex, const WrapperMemory** pMemory, uint32_t* pOffset,
290 uint32_t* pLength) {
291 CHECK(mLayoutDone);
292 CHECK(regionIndex < regionCount());
293 const auto& regionDescriptor = mRegions[regionIndex];
294 const WrapperMemory* memory = &mMemories[std::get<0>(regionDescriptor)];
295 uint32_t offset = std::get<1>(regionDescriptor);
296 uint32_t length = std::get<2>(regionDescriptor);
297
298 uint8_t* buffer = reinterpret_cast<nn::MemoryAshmem*>(memory->get())->getPointer();
299 CHECK(buffer != nullptr);
300
301 if (pMemory) *pMemory = memory;
302 if (pOffset) *pOffset = offset;
303 if (pLength) *pLength = length;
304
305 return buffer + offset;
306 }
307
getRegion(unsigned regionIndex)308 void* getRegion(unsigned regionIndex) {
309 return getRegion(regionIndex, nullptr, nullptr, nullptr);
310 }
311
312 private:
313 // Index is the memory index; value is the size of the memory
314 // (aggregate size of all regions in the memory).
315 std::vector<uint32_t> mMemorySizes;
316
317 // Index is the memory index.
318 std::vector<WrapperMemory> mMemories;
319
320 // Index is the region index; tuple represents memory index,
321 // region offset within memory, region length.
322 std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions;
323
324 // For validity checking.
325 bool mLayoutDone = false;
326 };
327
layout()328 void TestMemories::layout() {
329 CHECK(!mLayoutDone);
330 for (uint32_t memorySize : mMemorySizes) {
331 auto [n, ashmem] = nn::MemoryAshmem::create(memorySize);
332 CHECK_EQ(n, ANEURALNETWORKS_NO_ERROR);
333 CHECK(ashmem != nullptr);
334
335 ANeuralNetworksMemory* memory = reinterpret_cast<ANeuralNetworksMemory*>(ashmem.release());
336 mMemories.emplace_back(memory);
337 }
338 mLayoutDone = true;
339 }
340
341 class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> {
342 public:
RandomPartitioningTest()343 RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {}
344
345 static Signature getSignature(const HidlModel& model, const V1_3::Operation& operation);
346
347 protected:
348 static SharedDevice makeTestDriver(HalVersion version, const char* name,
349 std::set<Signature> signatures);
350
351 static HalVersion getMinHalVersion(ANeuralNetworksOperationType type);
352
353 static std::string to_string(HalVersion version);
354
randBool()355 bool randBool() { return randUInt(2) == 1; }
356
randFrac()357 double randFrac() { // [0.0, 1.0)
358 return mRandNumUnitDist(mRandNumEng);
359 }
360
randUInt(unsigned limit)361 unsigned randUInt(unsigned limit) { // [0, limit)
362 return unsigned(randFrac() * limit);
363 }
364
365 // Represents an operation in which every input and output operand
366 // is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except:
367 // - One input operand may be an activation function.
368 // - Any number of input operands may be "special" in some other way
369 // (and in this implementation, not produced by any other operation).
370 // We require that:
371 // - There be at least one input operand that is neither an
372 // activation function nor "special".
373 struct OperationPattern {
374 HalVersion mMinHalVersion;
375 int mOperationType;
376 unsigned mNumInputs;
377 unsigned mNumOutputs;
378 int mActivationFunctionInputIndex; // <0 if none
379
380 // Returns operand index, or <0 if input is normal (must not
381 // be called for an activation function operand). Function
382 // should have the following prototype:
383 //
384 // int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex);
385 //
386 int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned);
387 };
388
389 static const OperationPattern kOperationPatterns[];
390
391 // See OperationPattern::mMakeSpecialInput. This function is used to
392 // manufacture an ELU input operand that doesn't fit the general operand
393 // pattern known to the graph generator infrastructure.
makeEluSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)394 int makeEluSpecialInput([[maybe_unused]] unsigned problemSize, TestModel* model,
395 unsigned inputIndex) {
396 if (inputIndex != 1) {
397 return -1;
398 }
399
400 // input operand 1 is alpha, a scalar
401 const WrapperOperandType alphaType(WrapperType::FLOAT32, {});
402 return int(model->addConstantOperand(&alphaType, 1.0f));
403 }
404
405 // See OperationPattern::mMakeSpecialInput. This function is used to
406 // manufacture an RNN input operand that doesn't fit the general operand
407 // pattern known to the graph generator infrastructure.
makeRnnSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)408 int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) {
409 if (inputIndex != 3) {
410 return -1;
411 }
412
413 // input operand 3 is bias, a 1-D tensor
414 const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, {problemSize});
415 const uint32_t operandIndex = model->addOperand(&biasType);
416 std::vector<float> biasValue(problemSize);
417 std::generate(biasValue.begin(), biasValue.end(), [this] { return randFrac(); });
418 model->setOperandValue(operandIndex, biasValue);
419 return int(operandIndex);
420 }
421
422 // See OperationPattern::mMakeSpecialInput. This function is used to
423 // manufacture a TRANSPOSE input operand that doesn't fit the general operand
424 // pattern known to the graph generator infrastructure.
makeTransposeSpecialInput(unsigned,TestModel * model,unsigned inputIndex)425 int makeTransposeSpecialInput(unsigned /* problemSize */, TestModel* model,
426 unsigned inputIndex) {
427 if (inputIndex != 1) {
428 return -1;
429 }
430
431 // input operand 1 is perm, a 1-D tensor
432 const WrapperOperandType permType(WrapperType::TENSOR_INT32, {2});
433 const uint32_t operandIndex = model->addOperand(&permType);
434 std::vector<int32_t> permValue = {1, 0};
435 model->setOperandValue(operandIndex, permValue);
436 return int(operandIndex);
437 }
438
439 #ifdef VERBOSE
440 class ModelStats {
441 public:
ModelStats(const ModelBuilder * model)442 ModelStats(const ModelBuilder* model) : mBuilder(model) {}
ModelStats(const WrapperModel * model)443 ModelStats(const WrapperModel* model)
444 : mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) {}
operator <<(std::ostream & out,const ModelStats & stats)445 friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) {
446 const uint32_t operandCount = stats.mBuilder->operandCount();
447 const uint32_t inputCount = stats.mBuilder->inputCount();
448 const uint32_t outputCount = stats.mBuilder->outputCount();
449 out << "operationCount = " << stats.mBuilder->operationCount()
450 << ", operandCount = " << operandCount << ", inputCount = " << inputCount << " ("
451 << (double(inputCount) / operandCount) << ")"
452 << ", outputCount = " << outputCount << " (" << (double(outputCount) / operandCount)
453 << ")";
454 return out;
455 }
456
457 private:
458 const ModelBuilder* mBuilder;
459 };
460
461 template <typename T_iterator>
dump(T_iterator I,T_iterator E)462 static void dump(T_iterator I, T_iterator E) {
463 std::cout << "{";
464 for (; I != E; I++) {
465 std::cout << " " << *I;
466 }
467 std::cout << " }" << std::endl;
468 }
469 #endif
470
471 std::mt19937 mRandNumEng;
472
473 private:
474 std::uniform_real_distribution<double> mRandNumUnitDist;
475 };
476
477 const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = {
478 {HalVersion::V1_0, ANEURALNETWORKS_ADD, 3, 1, 2, nullptr},
479 {HalVersion::V1_0, ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr},
480 {HalVersion::V1_0, ANEURALNETWORKS_MUL, 3, 1, 2, nullptr},
481 {HalVersion::V1_0, ANEURALNETWORKS_RNN, 6, 2, 5,
482 &RandomPartitioningTest::makeRnnSpecialInput},
483 {HalVersion::V1_0, ANEURALNETWORKS_TANH, 1, 1, -1, nullptr},
484
485 {HalVersion::V1_1, ANEURALNETWORKS_SUB, 3, 1, 2, nullptr},
486 {HalVersion::V1_1, ANEURALNETWORKS_TRANSPOSE, 2, 1, -1,
487 &RandomPartitioningTest::makeTransposeSpecialInput},
488
489 {HalVersion::V1_2, ANEURALNETWORKS_MAXIMUM, 2, 1, -1, nullptr},
490 {HalVersion::V1_2, ANEURALNETWORKS_NEG, 1, 1, -1, nullptr},
491 {HalVersion::V1_2, ANEURALNETWORKS_SIN, 1, 1, -1, nullptr},
492
493 {HalVersion::V1_3, ANEURALNETWORKS_ELU, 2, 1, -1,
494 &RandomPartitioningTest::makeEluSpecialInput},
495 {HalVersion::V1_3, ANEURALNETWORKS_HARD_SWISH, 1, 1, -1, nullptr},
496 };
497
getMinHalVersion(ANeuralNetworksOperationType type)498 HalVersion RandomPartitioningTest::getMinHalVersion(ANeuralNetworksOperationType type) {
499 static const auto kOperationToVersion = [] {
500 std::map<ANeuralNetworksOperationType, HalVersion> result;
501 for (const auto& pattern : kOperationPatterns) {
502 result[pattern.mOperationType] = pattern.mMinHalVersion;
503 }
504 return result;
505 }();
506
507 return kOperationToVersion.at(type);
508 }
509
getSignature(const HidlModel & model,const V1_3::Operation & operation)510 Signature RandomPartitioningTest::getSignature(const HidlModel& model,
511 const V1_3::Operation& operation) {
512 static const auto kOperationToActivation = [] {
513 std::map<ANeuralNetworksOperationType, int> result;
514 for (const auto& pattern : kOperationPatterns) {
515 result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex;
516 }
517 return result;
518 }();
519
520 const ANeuralNetworksOperationType operationType =
521 static_cast<ANeuralNetworksOperationType>(operation.type);
522 const int activationFunctionInputIndex = kOperationToActivation.at(operationType);
523 if (activationFunctionInputIndex < 0) {
524 return Signature(operationType, -1);
525 }
526
527 const V1_3::Operand& operand =
528 model.main.operands[operation.inputs[activationFunctionInputIndex]];
529 CHECK(operand.lifetime == V1_3::OperandLifeTime::CONSTANT_COPY);
530 CHECK(operand.type == V1_3::OperandType::INT32);
531 int32_t value;
532 memcpy(&value, &model.operandValues[operand.location.offset], operand.location.length);
533 return Signature(operationType, value);
534 }
535
to_string(HalVersion version)536 std::string RandomPartitioningTest::to_string(HalVersion version) {
537 switch (version) {
538 case HalVersion::V1_0:
539 return "V1_0";
540 case HalVersion::V1_1:
541 return "V1_1";
542 case HalVersion::V1_2:
543 return "V1_2";
544 case HalVersion::V1_3:
545 return "V1_3";
546 default:
547 return "V_UNKNOWN";
548 }
549 };
550
551 class TestDriver : public SampleDriver {
552 public:
553 // Behaves like SampleDriver, except that it only supports
554 // operations with the specified signatures.
TestDriver(const char * name,std::set<Signature> signatures)555 TestDriver(const char* name, std::set<Signature> signatures)
556 : SampleDriver(name), mSignatures(std::move(signatures)) {}
557
getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb)558 hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override {
559 android::nn::initVLogMask();
560 const V1_0::PerformanceInfo kPerf = {.execTime = 0.75f, .powerUsage = 0.75f};
561 V1_3::Capabilities capabilities = {
562 .relaxedFloat32toFloat16PerformanceScalar = kPerf,
563 .relaxedFloat32toFloat16PerformanceTensor = kPerf,
564 .operandPerformance = nn::nonExtensionOperandPerformance<HalVersion::V1_3>(kPerf),
565 .ifPerformance = kPerf,
566 .whilePerformance = kPerf};
567 _hidl_cb(V1_3::ErrorStatus::NONE, capabilities);
568 return hardware::Void();
569 }
570
getSupportedOperations_1_3(const HidlModel & model,getSupportedOperations_1_3_cb cb)571 hardware::Return<void> getSupportedOperations_1_3(const HidlModel& model,
572 getSupportedOperations_1_3_cb cb) override {
573 if (nn::validateModel(model)) {
574 const size_t count = model.main.operations.size();
575 std::vector<bool> supported(count);
576 for (size_t i = 0; i < count; i++) {
577 supported[i] = (mSignatures.count(RandomPartitioningTest::getSignature(
578 model, model.main.operations[i])) != 0);
579 }
580 cb(V1_3::ErrorStatus::NONE, supported);
581 } else {
582 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {});
583 }
584 return hardware::Void();
585 }
586
prepareModel_1_3(const HidlModel & model,V1_1::ExecutionPreference preference,V1_3::Priority priority,const V1_3::OptionalTimePoint & deadline,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_3::IPreparedModelCallback> & callback)587 hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
588 const HidlModel& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
589 const V1_3::OptionalTimePoint& deadline,
590 const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
591 const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
592 const sp<V1_3::IPreparedModelCallback>& callback) override {
593 // NOTE: We verify that all operations in the model are supported.
594 V1_3::ErrorStatus outStatus = V1_3::ErrorStatus::INVALID_ARGUMENT;
595 auto ret = getSupportedOperations_1_3(
596 model, [&outStatus](V1_3::ErrorStatus inStatus,
597 const hardware::hidl_vec<bool>& supportedOperations) {
598 if (inStatus == V1_3::ErrorStatus::NONE) {
599 if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
600 [](bool v) { return v; })) {
601 outStatus = V1_3::ErrorStatus::NONE;
602 }
603 }
604 });
605 if (ret.isOk() && (outStatus == V1_3::ErrorStatus::NONE)) {
606 return SampleDriver::prepareModel_1_3(model, preference, priority, deadline, modelCache,
607 dataCache, token, callback);
608 } else {
609 callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
610 return V1_3::ErrorStatus::INVALID_ARGUMENT;
611 }
612 }
613
614 private:
615 const std::set<Signature> mSignatures;
616 };
617
makeTestDriver(HalVersion version,const char * name,std::set<Signature> signatures)618 SharedDevice RandomPartitioningTest::makeTestDriver(HalVersion version, const char* name,
619 std::set<Signature> signatures) {
620 switch (version) {
621 case HalVersion::V1_0:
622 return V1_0::utils::Device::create(name, new TestDriver(name, std::move(signatures)))
623 .value();
624 case HalVersion::V1_1:
625 return V1_1::utils::Device::create(name, new TestDriver(name, std::move(signatures)))
626 .value();
627 case HalVersion::V1_2:
628 return V1_2::utils::Device::create(name, new TestDriver(name, std::move(signatures)))
629 .value();
630 case HalVersion::V1_3:
631 return V1_3::utils::Device::create(name, new TestDriver(name, std::move(signatures)))
632 .value();
633 default:
634 ADD_FAILURE() << "Unexpected HalVersion " << static_cast<int32_t>(version);
635 return nullptr;
636 }
637 }
638
639 INSTANTIATE_TEST_SUITE_P(Seed, RandomPartitioningTest,
640 ::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases));
641
TEST_P(RandomPartitioningTest,Test)642 TEST_P(RandomPartitioningTest, Test) {
643 LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam();
644
645 #ifdef VERBOSE
646 std::cout << std::setprecision(2) << std::fixed << std::setw(4);
647 #endif
648
649 const unsigned problemSize = 1 + randUInt(kMaxProblemSize);
650 const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, {problemSize, problemSize});
651 const WrapperOperandType unknownDimensionsTypes[] = {
652 {WrapperType::TENSOR_FLOAT32, {}},
653 {WrapperType::TENSOR_FLOAT32, {0, 0}},
654 {WrapperType::TENSOR_FLOAT32, {0, problemSize}},
655 {WrapperType::TENSOR_FLOAT32, {problemSize, 0}},
656 };
657 const unsigned kUnknownDimensionsTypesCount =
658 sizeof(unknownDimensionsTypes) / sizeof(unknownDimensionsTypes[0]);
659
660 static const WrapperOperandType activationFunctionType(WrapperType::INT32, {});
661
662 const unsigned numOperations = 2 + randUInt(kMaxNumOperations - 1);
663 const bool allowDeadOperations = (randFrac() < 0.2);
664 const bool allowUnknownDimensions = (randFrac() < 0.25);
665
666 // TODO: The current algorithm builds the graph in a forward
667 // direction (i.e., later-generated operations consume outputs
668 // from earlier-generated operations). In order to get more
669 // variation in graph topology, perhaps we should also create an
670 // algorithm to build the graph in a backward direction (i.e.,
671 // later-generated operations produce outputs to be consumed by
672 // earlier-generated operations).
673 [[maybe_unused]] const bool buildForward = randBool();
674
675 // TODO: Add a form of forced connectivity that operates by
676 // joining disjoint subgraphs rather than by forcing a root.
677 const bool forceCommonRoot = (randFrac() < 0.75);
678
679 auto computeMode = WrapperExecution::getComputeMode();
680 // We check randFrac() independent of compute mode, because we don't want
681 // the random number sequence to change depending on compute mode: Compute
682 // mode should only affect how we perform the inference, not how we build the
683 // Model, the Compilation, or the Execution.
684 if (randFrac() < 0.5 && computeMode == WrapperExecution::ComputeMode::ASYNC) {
685 computeMode = WrapperExecution::ComputeMode::FENCED;
686 }
687
688 TestModel model;
689 std::vector<uint32_t> modelInputs;
690 std::vector<uint32_t> modelOutputs;
691
692 std::set<uint32_t> operandsWithUnknownDimensions;
693
694 // Each region in weights is a problem-sized 2-D TENSOR_FLOAT32.
695 TestMemories weights;
696
697 // Keep track of all normal (i.e., not activation function and not
698 // "special") operands that are values (from setOperandValue*()).
699 // .first: operand index
700 // .second: if the operand is already defined (via setOperandValue*()) then ~0U;
701 // otherwise, the operand has yet to be defined, and this is the corresponding
702 // region index in "weights"
703 std::vector<std::pair<uint32_t, unsigned>> valueOperands;
704
705 // An operand is "dead" if it is not consumed by another operation
706 // and is not a model output. Key is operand index; value is
707 // operation index.
708 std::map<uint32_t, uint32_t> deadOperands;
709
710 // An operation is "dead" if all of its outputs are dead.
711 std::set<uint32_t> deadOperations;
712
713 // Collect the signatures of operations in this model.
714 std::set<Signature> signatures;
715
716 // For reporting purposes, keep track of the number of root
717 // operations (those that do not consume results produced by other
718 // operations).
719 unsigned rootOperationCount = 0;
720
721 // Track whether we added operands with unknown dimensions. In this case,
722 // partitioned compilation will fail if such an operand is read in a
723 // different partition than it is written, and the partition that does the
724 // writing is scheduled on a pre-HAL 1.2 (pre-Android Q) device.
725 bool hasUnknownDimensions = false;
726
727 // Generate operations.
728 for (unsigned i = 0; i < numOperations; i++) {
729 const unsigned operationPatternIndex = randUInt(std::size(kOperationPatterns));
730 const auto& operationPattern = kOperationPatterns[operationPatternIndex];
731
732 // INPUTS //////////////////////////////////////////////////////////////////////////////////
733
734 std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U);
735
736 // First, process activation function and special inputs, and
737 // keep track of which inputs remain.
738 std::vector<uint32_t> normalOperationInputIndexes;
739 int32_t activationFunction = -1;
740 for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs;
741 operationInputIndex++) {
742 if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) {
743 const uint32_t operandIndex = model.addOperand(&activationFunctionType);
744 activationFunction = randUInt(4);
745 if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) {
746 // workaround for http://b/69011131
747 activationFunction = ANEURALNETWORKS_FUSED_NONE;
748 }
749 model.setOperandValue(operandIndex, activationFunction);
750 operationInputs[operationInputIndex] = operandIndex;
751 continue;
752 }
753 if (operationPattern.mMakeSpecialInput != nullptr) {
754 const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))(
755 problemSize, &model, operationInputIndex);
756 if (operandIndex >= 0) {
757 operationInputs[operationInputIndex] = operandIndex;
758 continue;
759 }
760 }
761 normalOperationInputIndexes.push_back(operationInputIndex);
762 }
763 CHECK(!normalOperationInputIndexes.empty());
764 signatures.insert(Signature(operationPattern.mOperationType, activationFunction));
765
766 // A (normal) operation input can be one of:
767 // - a new or existing model input
768 // - an output of an existing operation
769 // - an OperandValue
770 // - an OperandValueFromMemory
771 // Some guidelines:
772 // - We generally don't want all of an operation's inputs to be values (constants)
773 const unsigned normalOperationInputCount = normalOperationInputIndexes.size();
774 // How many of this operation's inputs are constants?
775 unsigned normalOperationInputConstantCount = 0;
776 // How many of this operation's inputs are model inputs?
777 unsigned normalOperationInputModelInputCount = 0;
778 // We begin by deciding what kind of input each (normal) operation will be; we don't
779 // actually pick input operand indexes at this time, because we might override this
780 // decision later.
781 enum InputKind { IK_SUBGRAPH_INPUT, IK_OPERATION_OUTPUT, IK_VALUE };
782 std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount);
783 std::generate(
784 normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
785 [this, &model, numOperations, normalOperationInputCount,
786 &normalOperationInputConstantCount,
787 &normalOperationInputModelInputCount]() -> InputKind {
788 // Constant? Becomes less likely the more
789 // constants we already have as inputs to
790 // this operation.
791 if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) /
792 normalOperationInputCount)) {
793 normalOperationInputConstantCount++;
794 return IK_VALUE;
795 }
796
797 // Model input? Becomes less likely the
798 // more model inputs we already have as
799 // inputs to this operation, and the further
800 // along we are in generating this model
801 // (i.e., the more operations we have
802 // generated).
803 if ((model.operationCount() == 0) ||
804 (randFrac() < 0.5 *
805 (1 - double(normalOperationInputModelInputCount) /
806 normalOperationInputCount) *
807 std::min(0.3, (1 - double(model.operationCount()) /
808 numOperations)))) {
809 normalOperationInputModelInputCount++;
810 return IK_SUBGRAPH_INPUT;
811 }
812
813 // Else output of an existing operation.
814 return IK_OPERATION_OUTPUT;
815 });
816
817 // Now force common root or model input, if necessary. (A
818 // model must have at least one input.)
819 auto force = [this, &normalOperationInputKinds,
820 normalOperationInputCount](InputKind forceKind) {
821 if (std::none_of(normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
822 [forceKind](InputKind kind) { return kind == forceKind; })) {
823 normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind;
824 }
825 };
826 if (forceCommonRoot && (model.operationCount() != 0)) {
827 force(IK_OPERATION_OUTPUT);
828 }
829 if (modelInputs.empty()) {
830 CHECK(model.operationCount() == 0);
831 force(IK_SUBGRAPH_INPUT);
832 }
833
834 // Finally create the normal inputs.
835 bool isRootOperation = true;
836 for (unsigned i = 0; i < normalOperationInputCount; i++) {
837 uint32_t operandIndex = ~0U;
838 switch (normalOperationInputKinds[i]) {
839 case IK_SUBGRAPH_INPUT: {
840 if (!modelInputs.empty() && (randFrac() < 0.5)) {
841 operandIndex = modelInputs[randUInt(modelInputs.size())];
842 } else {
843 operandIndex = model.addOperand(&problemType);
844 modelInputs.push_back(operandIndex);
845 }
846 break;
847 }
848 case IK_OPERATION_OUTPUT: {
849 decltype(deadOperands.begin()) deadOperandI;
850 if (!deadOperands.empty() && (randFrac() < 0.5)) {
851 deadOperandI = deadOperands.begin();
852 std::advance(deadOperandI, randUInt(deadOperands.size()));
853 operandIndex = deadOperandI->first;
854 } else {
855 const uint32_t existingOperationIndex = randUInt(model.operationCount());
856 const auto& existingOperationOutputs =
857 model.getOperationOutputs(existingOperationIndex);
858 operandIndex =
859 existingOperationOutputs[randUInt(existingOperationOutputs.size())];
860 deadOperandI = deadOperands.find(operandIndex);
861 CHECK(deadOperandI == deadOperands.end() ||
862 deadOperandI->second == existingOperationIndex);
863 }
864 if (deadOperandI != deadOperands.end()) {
865 const uint32_t correspondingOperation = deadOperandI->second;
866 deadOperands.erase(deadOperandI);
867
868 auto deadOperationI = deadOperations.find(correspondingOperation);
869 if (deadOperationI != deadOperations.end()) {
870 deadOperations.erase(deadOperationI);
871 }
872 }
873 isRootOperation = false;
874 break;
875 }
876 case IK_VALUE: {
877 if (!valueOperands.empty() && (randFrac() < 0.25)) {
878 operandIndex = valueOperands[randUInt(valueOperands.size())].first;
879 } else {
880 operandIndex = model.addOperand(&problemType);
881 if (randFrac() < 0.5) {
882 std::vector<float> value(problemSize * problemSize);
883 std::generate(value.begin(), value.end(),
884 [this] { return randFrac(); });
885 model.setOperandValue(operandIndex, value);
886 valueOperands.push_back(std::make_pair(operandIndex, ~0U));
887 } else {
888 unsigned memoryIndex = ~0U;
889 if ((weights.memoryCount() != 0) &&
890 (kAllWeightsInOnePool || (randFrac() < 0.5))) {
891 memoryIndex = randUInt(weights.memoryCount());
892 } else {
893 memoryIndex = weights.addMemory();
894 }
895 const size_t length = problemSize * problemSize * sizeof(float);
896 const unsigned regionIndex = weights.addRegion(memoryIndex, length);
897 valueOperands.push_back(std::make_pair(operandIndex, regionIndex));
898 }
899 }
900 break;
901 }
902 default:
903 FAIL();
904 }
905 operationInputs[normalOperationInputIndexes[i]] = operandIndex;
906 }
907 if (isRootOperation) {
908 rootOperationCount++;
909 }
910
911 // OUTPUTS /////////////////////////////////////////////////////////////////////////////////
912
913 std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
914 std::generate(
915 operationOutputs.begin(), operationOutputs.end(),
916 [&operandsWithUnknownDimensions, &model, &problemType, &unknownDimensionsTypes,
917 &hasUnknownDimensions, allowUnknownDimensions, this] {
918 // Before the fix for http://b/132458982, 3% unknowns causes
919 // ~35% of partitionings to fail. After the fix, 3%
920 // unknowns causes ~3% of partitionings to fail. (This is
921 // determined by removing the fallback code and noting the
922 // number of failures.)
923 if (allowUnknownDimensions && randFrac() < 0.03) {
924 hasUnknownDimensions = true;
925 uint32_t opndIdx = model.addOperand(
926 &unknownDimensionsTypes[randUInt(kUnknownDimensionsTypesCount)]);
927 operandsWithUnknownDimensions.insert(opndIdx);
928 return opndIdx;
929 } else {
930 return model.addOperand(&problemType);
931 }
932 });
933
934 // OPERATION ///////////////////////////////////////////////////////////////////////////////
935
936 const uint32_t operationIndex = model.addOperation(operationPattern.mOperationType,
937 operationInputs, operationOutputs);
938 deadOperations.insert(operationIndex);
939 std::for_each(operationOutputs.begin(), operationOutputs.end(),
940 [&deadOperands, operationIndex](uint32_t operandIndex) {
941 deadOperands.insert(std::make_pair(operandIndex, operationIndex));
942 });
943 }
944
945 // Now finalize the weights.
946 weights.layout();
947 for (const auto& valueOperand : valueOperands) {
948 const uint32_t operandIndex = valueOperand.first;
949 const unsigned regionIndex = valueOperand.second;
950
951 if (regionIndex == ~0U) {
952 continue;
953 }
954
955 const WrapperMemory* memory;
956 uint32_t offset, length;
957 float* region =
958 static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length));
959 CHECK(length == problemSize * problemSize * sizeof(float));
960 std::generate(region, region + problemSize * problemSize, [this] { return randFrac(); });
961 model.setOperandValueFromMemory(operandIndex, memory, offset, length);
962 }
963
964 // Now select model outputs.
965 for (uint32_t operationIdx = 0, operationCount = model.operationCount();
966 operationIdx < operationCount; operationIdx++) {
967 const auto& outputs = model.getOperationOutputs(operationIdx);
968 for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount;
969 outputIdx++) {
970 bool modelOutput = false;
971 const uint32_t operandIndex = outputs[outputIdx];
972 const auto deadOperandI = deadOperands.find(operandIndex);
973 if (deadOperandI != deadOperands.end()) {
974 // This is not consumed within the model, so unless we
975 // make it an output of the model, it's dead. The
976 // further along we are in generating this model
977 // (i.e., the more operations we have generated), the
978 // more likely we are to classify this operation
979 // output as a model output.
980 const double probabilityOfModelOutput =
981 0.50 * [](double x) { return x * x; }((operationIdx + 1) / operationCount);
982 modelOutput = (randFrac() < probabilityOfModelOutput);
983 } else {
984 // This is consumed within the model, so we'll rarely
985 // make it an output of the model.
986 modelOutput = (randFrac() < 0.05);
987 }
988 if (!modelOutput) {
989 continue;
990 }
991 modelOutputs.push_back(operandIndex);
992 if (deadOperandI != deadOperands.end()) {
993 deadOperands.erase(deadOperandI);
994 const auto deadOperationI = deadOperations.find(operationIdx);
995 if (deadOperationI != deadOperations.end()) {
996 deadOperations.erase(deadOperationI);
997 }
998 }
999 }
1000 }
1001 if (!allowDeadOperations) {
1002 // For each dead operation, pick a random output to become a model output.
1003 for (uint32_t deadOperationIndex : deadOperations) {
1004 const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex);
1005 const uint32_t deadOperandIndex =
1006 deadOperationOutputs[randUInt(deadOperationOutputs.size())];
1007 modelOutputs.push_back(deadOperandIndex);
1008 }
1009 }
1010 // A model must have at least one output.
1011 if (modelOutputs.empty()) {
1012 const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount()));
1013 modelOutputs.push_back(outputs[randUInt(outputs.size())]);
1014 }
1015 if (computeMode == WrapperExecution::ComputeMode::FENCED) {
1016 if (std::any_of(modelOutputs.begin(), modelOutputs.end(),
1017 [&operandsWithUnknownDimensions](uint32_t opndIdx) {
1018 return operandsWithUnknownDimensions.count(opndIdx) != 0;
1019 })) {
1020 // Workaround for http://b/162980246: Fenced execution is documented
1021 // as requiring model outputs to have fully specified dimensions,
1022 // either from Model or from Execution, but its implementation
1023 // requires this to come from Model. This test only guarantees that
1024 // they have fully specified dimensions from Execution. So in the
1025 // case of a Model where some output does not have fully specified
1026 // dimensions, perform asynchronous execution instead.
1027 computeMode = WrapperExecution::ComputeMode::ASYNC;
1028 }
1029 }
1030
1031 model.identifyInputsAndOutputs(modelInputs, modelOutputs);
1032 #ifdef VERBOSE
1033 {
1034 std::cout << "Original model: " << ModelStats(&model) << std::endl;
1035 std::cout << "rootOperationCount = " << rootOperationCount << ", deadOperations = ";
1036 if (allowDeadOperations) {
1037 std::cout << deadOperations.size();
1038 } else {
1039 std::cout << "forbidden (converted " << deadOperations.size() << ")";
1040 }
1041 std::cout << std::endl;
1042 }
1043 #endif
1044 ASSERT_EQ(model.finish(), Result::NO_ERROR);
1045
1046 // Non-partitioned compilation.
1047 TestCompilation c(&model);
1048 ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1049 ASSERT_EQ(c.finish(), Result::NO_ERROR);
1050
1051 // Create some drivers for partitioned compilation.
1052 CHECK(!signatures.empty());
1053 std::vector<std::set<Signature>> signaturesForDriver(signatures.size());
1054 // First assign each signature to a random driver (a driver is
1055 // just represented as an entry in the signaturesForDriver
1056 // vector).
1057 for (Signature signature : signatures) {
1058 signaturesForDriver[randUInt(signatures.size())].insert(signature);
1059 }
1060 // Now remove each entry that has no signatures.
1061 auto firstExtra =
1062 std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(),
1063 [](const std::set<Signature>& sigSet) { return sigSet.empty(); });
1064 if (firstExtra != signaturesForDriver.end()) {
1065 signaturesForDriver.erase(firstExtra, signaturesForDriver.end());
1066 }
1067 // Now actually create the drivers.
1068 std::vector<std::shared_ptr<Device>> devices;
1069 for (unsigned i = 0; i < signaturesForDriver.size(); i++) {
1070 const auto& signaturesForThisDriver = signaturesForDriver[i];
1071 // Minimum HAL version for this driver is highest minimum HAL version of
1072 // any operation supported by this driver.
1073 const HalVersion minHalVersion = getMinHalVersion(
1074 std::max_element(signaturesForThisDriver.begin(), signaturesForThisDriver.end(),
1075 [](const Signature& a, const Signature& b) {
1076 return getMinHalVersion(a.first) < getMinHalVersion(b.first);
1077 })
1078 ->first);
1079 const HalVersion actualHalVersion =
1080 static_cast<HalVersion>(static_cast<int32_t>(minHalVersion) +
1081 randUInt(static_cast<int32_t>(HalVersion::LATEST) -
1082 static_cast<int32_t>(minHalVersion) + 1));
1083 const std::string name =
1084 "TestDriver(" + std::to_string(i) + "){" + to_string(actualHalVersion) + "}";
1085 #ifdef VERBOSE
1086 std::cout << "Creating " + name + " for collection of signatures that requires HAL " +
1087 to_string(minHalVersion)
1088 << std::endl;
1089 #endif
1090 auto device = DeviceManager::forTest_makeDriverDevice(
1091 makeTestDriver(actualHalVersion, name.c_str(), signaturesForThisDriver));
1092 devices.push_back(device);
1093 }
1094 // CPU fallback device
1095 devices.push_back(DeviceManager::getCpuDevice());
1096
1097 // Partitioned compilation.
1098 //
1099 // If a test case has both (a) unknown intermediate operand sizes and
1100 // (b) partitions scheduled on pre-HAL 1.2 (pre-Android Q) devices, fallback
1101 // is needed if the non-fallback partitioning fails.
1102 //
1103 // The issue is that prior to HAL 1.2, an output operand must have a known
1104 // size provided either in the Model or in the Request; and in the case of
1105 // partitioning, an intermediate operand of the original model that becomes
1106 // an output operand of a partition won't have a known size provided in the
1107 // Request.
1108 //
1109 // If a test case has a step model with no inputs or no outputs, fallback is needed.
1110 // This is because our HAL specification requires a model to have at least one
1111 // input and one output.
1112 //
1113 // If a fallback is needed, we retry the compilation with a fallback and require
1114 // the fallback to succeed. Otherwise, we require the partitioning to succeed
1115 // without CPU fallback.
1116 TestCompilation cNoFallback(&model, devices);
1117 TestCompilation cWithFallback(&model, devices);
1118 ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1119 Result::NO_ERROR);
1120 auto compilationResult = cNoFallback.finish();
1121 const bool fallbackNeededForDynamicTemporaries =
1122 compilationResult == Result::OP_FAILED && hasUnknownDimensions &&
1123 cNoFallback.getExecutionPlan().hasDynamicTemporaries() &&
1124 std::any_of(devices.begin(), devices.end(), [](const std::shared_ptr<Device>& device) {
1125 return !isCompliantVersion(nn::kHalVersionV1_2ToApi.canonical,
1126 device->getFeatureLevel());
1127 });
1128 const bool fallbackNeededForStepModelWithNoInputsOrNoOutputs =
1129 cNoFallback.getExecutionPlan().forTest_hasStepModelWithNoInputsOrNoOutputs();
1130 const bool fallbackNeeded = fallbackNeededForDynamicTemporaries ||
1131 fallbackNeededForStepModelWithNoInputsOrNoOutputs;
1132 if (fallbackNeeded) {
1133 ASSERT_EQ(compilationResult, Result::OP_FAILED);
1134
1135 ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1136 Result::NO_ERROR);
1137 compilationResult = cWithFallback.finish();
1138 ASSERT_EQ(compilationResult, Result::NO_ERROR);
1139 ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1140 ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1141 DeviceManager::getCpuDevice());
1142 } else {
1143 ASSERT_EQ(compilationResult, Result::NO_ERROR);
1144
1145 const ExecutionPlan& plan = cNoFallback.getExecutionPlan();
1146 if (signaturesForDriver.size() == 1) {
1147 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1148 ASSERT_TRUE(plan.forTest_simpleGetDevice() != DeviceManager::getCpuDevice());
1149 } else {
1150 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1151 auto stepToDeviceId = [](const std::shared_ptr<LogicalStep>& step) {
1152 return step->executionStep()->getDevice();
1153 };
1154 std::set<decltype(stepToDeviceId(plan.forTest_compoundGetSteps()[0]))> deviceSet;
1155 for (const auto& step : plan.forTest_compoundGetSteps()) {
1156 deviceSet.insert(stepToDeviceId(step));
1157 }
1158 // TODO(b/178517567): Figure out why we sometimes have 1 more
1159 // signature than we have devices -- this means that we've scheduled
1160 // one or more operations onto the CPU fallback device, which is not
1161 // something we ever expect to do.
1162 ASSERT_TRUE(deviceSet.size() == signaturesForDriver.size() ||
1163 deviceSet.size() == signaturesForDriver.size() + 1);
1164 }
1165 }
1166 TestCompilation& c2 = (fallbackNeeded ? cWithFallback : cNoFallback);
1167 #ifdef TRACE_DYNTEMP
1168 {
1169 const ExecutionPlan& plan = c2.getExecutionPlan();
1170 const size_t dynamicTemporaryCount = plan.forTest_flatGetDynamicTemporaries().size();
1171 std::cout << "TRACE_DYNTEMP: dynamic temporary count = " << dynamicTemporaryCount
1172 << std::endl;
1173 if (plan.forTest_getKind() == ExecutionPlan::Kind::COMPOUND) {
1174 size_t stepsWithModelOutputsThatAreDownstreamInputs = 0;
1175 size_t countOfModelOutputsThatAreDownstreamInputs = 0;
1176 for (const auto& step : plan.forTest_compoundGetSteps()) {
1177 if (const size_t count = step->executionStep()
1178 ->getModelOutputsThatAreDownstreamInputs()
1179 .size()) {
1180 ++stepsWithModelOutputsThatAreDownstreamInputs;
1181 countOfModelOutputsThatAreDownstreamInputs += count;
1182 }
1183 }
1184 if (countOfModelOutputsThatAreDownstreamInputs != 0) {
1185 std::cout << "TRACE_DYNTEMP: model outputs that are downstream inputs: "
1186 << countOfModelOutputsThatAreDownstreamInputs << " / "
1187 << modelOutputs.size() << ", over "
1188 << stepsWithModelOutputsThatAreDownstreamInputs << " / "
1189 << plan.forTest_compoundGetSteps().size() << " steps" << std::endl;
1190 EXPECT_LE(countOfModelOutputsThatAreDownstreamInputs, modelOutputs.size());
1191 }
1192 } else {
1193 EXPECT_EQ(dynamicTemporaryCount, size_t(0))
1194 << "Only COMPOUND plan should have dynamic temporaries";
1195 }
1196 }
1197 #endif
1198
1199 #ifdef VERBOSE
1200 {
1201 std::cout << "signatures = " << signatures.size() << ", devices = " << devices.size()
1202 << std::endl;
1203 // TODO: When dumping steps, include non-ExecutionSteps.
1204 const ExecutionPlan& plan = c2.getExecutionPlan();
1205 switch (plan.forTest_getKind()) {
1206 case ExecutionPlan::Kind::SIMPLE:
1207 std::cout << "plan: simple" << std::endl;
1208 break;
1209 case ExecutionPlan::Kind::COMPOUND: {
1210 const auto& steps = plan.forTest_compoundGetSteps();
1211 std::set<const Device*> devicesInPlan;
1212 for (const auto& step : steps) {
1213 if (const auto* executionStep = step->tryExecutionStep()) {
1214 devicesInPlan.insert(executionStep->getDevice().get());
1215 }
1216 }
1217 std::cout << "plan: compound, " << steps.size() << " steps over "
1218 << devicesInPlan.size() << " devices" << std::endl;
1219 for (unsigned i = 0; i < steps.size(); i++) {
1220 if (const auto executionStep = steps[i]->tryExecutionStep()) {
1221 std::cout << "Step " << i << ": "
1222 << ModelStats(executionStep->getStepModel())
1223 << ", device = " << executionStep->getDevice()->getName()
1224 << std::endl;
1225 }
1226 }
1227 break;
1228 }
1229 default:
1230 std::cout << "Unexpected plan kind: "
1231 << static_cast<unsigned>(plan.forTest_getKind());
1232 break;
1233 }
1234 }
1235 #endif
1236
1237 // For execution:
1238 // - create golden inputs (one long vector) and golden output value
1239 // - golden inputs will be copied to actual inputs before each
1240 // of the two executions
1241 // - golden output will be used to fill actual outputs before each
1242 // of the two executions
1243 // - create actual inputs and outputs
1244 // - first execution (non-partitioned)
1245 // - initialize inputs and (to avoid unrelated oddities) outputs
1246 // - execute
1247 // - copy outputs to a save area (one long vector)
1248 // - second execution (partitioned)
1249 // - (to avoid unrelated oddities) initialize inputs and outputs
1250 // - execute
1251 // - compare outputs to save area
1252
1253 // If the runtime and drivers are working properly, execution
1254 // should not change the inputs. Nonetheless, we reinitialize the
1255 // inputs for each execution, so as to avoid unrelated problems
1256 // appearing to be problems related to unpartitioned execution
1257 // versus partitioned execution. Similarly, execution behavior
1258 // should not be dependent on the outputs; but we'll initialize the
1259 // outputs anyway.
1260 std::vector<float> goldenInputs(problemSize * problemSize * model.inputCount());
1261 std::generate(goldenInputs.begin(), goldenInputs.end(), [this] { return randFrac(); });
1262 #ifdef VERBOSE
1263 {
1264 std::cout << "flat inputs = ";
1265 dump(goldenInputs.begin(), goldenInputs.end());
1266 }
1267 #endif
1268 const float goldenOutput = randFrac();
1269
1270 // Create the memory for the actual inputs and outputs.
1271 struct InputOutputDescriptor {
1272 enum Kind { INPUT, OUTPUT };
1273 Kind mKind;
1274
1275 // The input or output either resides in a local buffer
1276 // (mVector, in which case mMemoryRegion is ignored); or in a
1277 // shared memory region within a TestMemories instance
1278 // (mMemoryRegion, in which case mVector is ignored).
1279 enum Location { VECTOR, REGION };
1280 Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; }
1281
1282 std::vector<float> mVector;
1283 unsigned mMemoryRegion;
1284 };
1285 std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount());
1286 for (unsigned i = 0; i < ioDescriptors.size(); i++) {
1287 ioDescriptors[i].mKind = (i < model.inputCount() ? InputOutputDescriptor::INPUT
1288 : InputOutputDescriptor::OUTPUT);
1289 }
1290 // We randomly interleave inputs and outputs in creation
1291 // order, because when we we create memory regions in a
1292 // TestMemories instance, the order in which regions are
1293 // created within a single Memory is the order they'll be laid
1294 // out in that memory; and when we have inputs and outputs
1295 // within the same Memory, we want the possibility that
1296 // they'll be interleaved.
1297 std::shuffle(ioDescriptors.begin(), ioDescriptors.end(), mRandNumEng);
1298 TestMemories ioMemories;
1299 for (auto& desc : ioDescriptors) {
1300 if (randFrac() < 0.5) {
1301 desc.mVector.resize(problemSize * problemSize);
1302 } else {
1303 // TODO: common this with the way we create IK_VALUE inputs?
1304 unsigned memoryIndex = ~0U;
1305 if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) {
1306 memoryIndex = randUInt(ioMemories.memoryCount());
1307 } else {
1308 memoryIndex = ioMemories.addMemory();
1309 }
1310 const size_t length = problemSize * problemSize * sizeof(float);
1311 desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length);
1312 }
1313 }
1314 ioMemories.layout();
1315
1316 // Function to set up actual inputs and outputs (initializing them
1317 // and telling the WrapperExecution about them).
1318 auto prepareForExecution = [&model, &ioDescriptors, &ioMemories, &goldenInputs, &goldenOutput,
1319 problemSize, &problemType](WrapperExecution* e) {
1320 uint32_t inputIndex = 0, outputIndex = 0;
1321 for (auto& desc : ioDescriptors) {
1322 if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1323 if (desc.mKind == InputOutputDescriptor::INPUT) {
1324 const size_t inputOffset = inputIndex * problemSize * problemSize;
1325 std::copy(goldenInputs.begin() + inputOffset,
1326 goldenInputs.begin() + inputOffset + problemSize * problemSize,
1327 desc.mVector.begin());
1328 e->setInput(inputIndex++, desc.mVector.data(),
1329 desc.mVector.size() * sizeof(float));
1330 } else {
1331 std::fill(desc.mVector.begin(),
1332 desc.mVector.begin() + problemSize * problemSize, goldenOutput);
1333 e->setOutput(outputIndex++, desc.mVector.data(),
1334 desc.mVector.size() * sizeof(float), &problemType.operandType);
1335 }
1336 } else {
1337 const WrapperMemory* memory;
1338 uint32_t offset, length;
1339 float* region = static_cast<float*>(
1340 ioMemories.getRegion(desc.mMemoryRegion, &memory, &offset, &length));
1341 CHECK(length == problemSize * problemSize * sizeof(float));
1342 if (desc.mKind == InputOutputDescriptor::INPUT) {
1343 const size_t inputOffset = inputIndex * problemSize * problemSize;
1344 std::copy(goldenInputs.begin() + inputOffset,
1345 goldenInputs.begin() + inputOffset + problemSize * problemSize,
1346 region);
1347 e->setInputFromMemory(inputIndex++, memory, offset, length);
1348 } else {
1349 std::fill(region, region + problemSize * problemSize, goldenOutput);
1350 e->setOutputFromMemory(outputIndex++, memory, offset, length,
1351 &problemType.operandType);
1352 }
1353 }
1354 };
1355 CHECK(inputIndex == model.inputCount());
1356 CHECK(outputIndex == model.outputCount());
1357 };
1358
1359 // Non-partitioned execution.
1360 WrapperExecution e(&c);
1361 ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e));
1362 ASSERT_EQ(e.compute(computeMode), Result::NO_ERROR);
1363
1364 // Copy the outputs of the non-partitioned execution to a save area.
1365 std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount());
1366 {
1367 uint32_t outputIndex = 0;
1368 for (const auto& desc : ioDescriptors) {
1369 if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1370 continue;
1371 }
1372 const size_t outputOffset = outputIndex * problemSize * problemSize;
1373 if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1374 std::copy(desc.mVector.begin(), desc.mVector.end(),
1375 nonPartitionedOutputs.begin() + outputOffset);
1376 } else {
1377 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1378 std::copy(region, region + problemSize * problemSize,
1379 nonPartitionedOutputs.begin() + outputOffset);
1380 }
1381 #ifdef VERBOSE
1382 {
1383 std::cout << "nonpartitioned output[" << outputIndex << "] = ";
1384 dump(nonPartitionedOutputs.begin() + outputOffset,
1385 nonPartitionedOutputs.begin() + outputOffset + problemSize * problemSize);
1386 }
1387 #endif
1388 outputIndex++;
1389 }
1390 }
1391
1392 // Partitioned execution.
1393 WrapperExecution e2(&c2);
1394 ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
1395 ASSERT_EQ(e2.compute(computeMode), Result::NO_ERROR);
1396
1397 // Compare the outputs of the partitioned execution to the save
1398 // area containing the outpus of the non-partitioned execution.
1399 {
1400 uint32_t outputIndex = 0;
1401 for (const auto& desc : ioDescriptors) {
1402 if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1403 continue;
1404 }
1405 SCOPED_TRACE(outputIndex);
1406 const size_t outputOffset = outputIndex * problemSize * problemSize;
1407 if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1408 #ifdef VERBOSE
1409 std::cout << " partitioned output[" << outputIndex << "] = ";
1410 dump(desc.mVector.begin(), desc.mVector.end());
1411 #endif
1412 ASSERT_TRUE(std::equal(desc.mVector.begin(), desc.mVector.end(),
1413 nonPartitionedOutputs.begin() + outputOffset));
1414 } else {
1415 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1416 #ifdef VERBOSE
1417 std::cout << "part output[" << outputIndex << "] = ";
1418 dump(region, region + problemSize * problemSize);
1419 #endif
1420 ASSERT_TRUE(std::equal(region, region + problemSize * problemSize,
1421 nonPartitionedOutputs.begin() + outputOffset));
1422 }
1423 outputIndex++;
1424 }
1425 }
1426 }
1427
1428 } // namespace
1429 } // namespace android
1430