• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <HalInterfaces.h>
18 #include <SampleDriver.h>
19 #include <ValidateHal.h>
20 #include <android-base/logging.h>
21 #include <gtest/gtest.h>
22 #include <unistd.h>
23 
24 #include <algorithm>
25 #include <cassert>
26 #include <cstdio>
27 #include <iterator>
28 #include <map>
29 #include <memory>
30 #include <random>
31 #include <set>
32 #include <string>
33 #include <tuple>
34 #include <utility>
35 #include <vector>
36 
37 #include "CompilationBuilder.h"
38 #include "HalUtils.h"
39 #include "Manager.h"
40 #include "ModelBuilder.h"
41 #include "NeuralNetworks.h"
42 #include "TestNeuralNetworksWrapper.h"
43 
44 // Uncomment the following line to generate some debugging output that
45 // may be useful when analyzing failures:
46 //
47 // #define VERBOSE VERBOSE
48 
49 // Uncomment the following line to generate some debugging output that
50 // may be useful to determine test coverage for support of dynamic
51 // temporaries (http://b/132458982):
52 //
53 // #define TRACE_DYNTEMP TRACE_DYNTEMP
54 
55 // We randomly generate tests (model + input data) at runtime, and verify
56 // that we get the same results whether we do partitioned compilation/execution
57 // or non partitioned compilation/execution.  We perform a test as follows:
58 //
59 // (1) Randomly generate a model (graph and weights), randomly generate input
60 //     data, randomly assign inputs and outputs to CPU memory or to shared
61 //     memory.
62 //
63 //     Randomly leaves dimensions unset for intermediate operands.
64 //
65 // (2) Randomly generate drivers based on the sample driver, each of which
66 //     executes models on the CPU.  They differ according to which operations
67 //     they support.
68 //
69 // (3) Compile and execute without partitioning, saving off the results.
70 //
71 // (4) Compile and execute with partitioning.
72 //
73 // (5) Verify that the saved results from (3) match the results from (4).
74 //
75 // For simplicity, all data (model inputs, model outputs, weights,
76 // temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two
77 // dimensions are fixed throughout a particular test case (and
78 // randomly determined).  This prevents us from having to find a
79 // mechanism to "resize" data (e.g., if ADD#a operates on data of size
80 // 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a
81 // and ADD#b become inputs of ADD#c, do we need to insert one or more
82 // operations between (say) ADD#a and ADD#c to convert ADD#2's data
83 // from size 2x2 to size 3x3 in order to match ADD#b).  In the few
84 // cases where an operand cannot be of this type, it is a constant
85 // (e.g., activation functions and RNN bias).
86 //
87 // Each operation we generate has a signature (described in more
88 // detail later).  The randomly generated drivers decide which
89 // operations they can execute by checking operation signatures.  Once
90 // we have built the model and know the set of signatures, we randomly
91 // assign each signature to a driver.  No signature is supported by
92 // multiple drivers -- we're not testing the logic that the
93 // partitioning algorithm uses to select the best driver for an
94 // operation.
95 
96 namespace android {
97 
98 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
99 namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
100 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
101 namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
102 using CompilationBuilder = nn::CompilationBuilder;
103 using DeviceManager = nn::DeviceManager;
104 using Device = nn::Device;
105 using SharedDevice = nn::SharedDevice;
106 using ExecutionPlan = nn::ExecutionPlan;
107 using ExecutionStep = nn::ExecutionStep;
108 using HalCacheToken = nn::HalCacheToken;
109 using HalVersion = nn::HalVersion;
110 using HidlModel = V1_3::Model;
111 using LogicalStep = nn::LogicalStep;
112 using ModelBuilder = nn::ModelBuilder;
113 using Result = nn::test_wrapper::Result;
114 using SampleDriver = nn::sample_driver::SampleDriver;
115 using WrapperCompilation = nn::test_wrapper::Compilation;
116 using WrapperExecution = nn::test_wrapper::Execution;
117 using WrapperMemory = nn::test_wrapper::Memory;
118 using WrapperModel = nn::test_wrapper::Model;
119 using WrapperOperandType = nn::test_wrapper::OperandType;
120 using WrapperType = nn::test_wrapper::Type;
121 
122 namespace {
123 
124 /// Configure test size //////////////////////////////////////////////////////////
125 
126 // We may exceed this in order to connect otherwise disjoint subgraphs.
127 static const unsigned kMaxNumOperations = 100;
128 
129 // We build models to process 2-D square tensors up to this size in each dimension;
130 // note that the API promotes by-value weights larger than 128 to by-reference,
131 // so we want to ensure that we can pick both types that exceed and types that do
132 // not exceed this size.
133 static const unsigned kMaxProblemSize = 8;
134 
135 // First seed for pseudorandom test generation.
136 static const unsigned kFirstSeed = 0;
137 
138 // Number of test cases.
139 static const unsigned kNumTestCases = 225;
140 
141 // Force all graph weights into a single pool (as we recommend to users)
142 // or allow them to be distributed across multiple pools (more stress
143 // on the partitioning algorithm and the rest of the runtime)?
144 // Forcing all graph weights into a single pool may be necessary to
145 // prevent large graphs from running up against http://b/70302693
146 // "NNAPI overuses (?) fds".
147 static const bool kAllWeightsInOnePool = false;
148 
149 //////////////////////////////////////////////////////////////////////////////////
150 
151 // The signature of an operation consists of the operation type (e.g.,
152 // ADD) and the activation function (use -1 in the case of an
153 // operation type for which the activation function is inapplicable).
154 typedef std::pair<ANeuralNetworksOperationType, int> Signature;
155 
156 // This class adds some simple utilities on top of WrapperModel.  For example,
157 // it provides access to certain features from ModelBuilder that are not exposed
158 // by the base class (such as inputCount() and operation index).
159 class TestModel : public WrapperModel {
160    public:
addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)161     uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
162                           const std::vector<uint32_t>& outputs) {
163         const uint32_t operationIndex = operationCount();
164         mOperations.push_back(outputs);
165         WrapperModel::addOperation(type, inputs, outputs);
166         return operationIndex;
167     }
168 
operationCount() const169     uint32_t operationCount() const { return mOperations.size(); }
170 
inputCount() const171     uint32_t inputCount() const { return builder()->inputCount(); }
outputCount() const172     uint32_t outputCount() const { return builder()->outputCount(); }
173 
getOperationOutputs(uint32_t index) const174     const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const {
175         CHECK(index < mOperations.size());
176         return mOperations[index];
177     }
178 
179     // All values are immediately copied into the model (we need to do
180     // this ourselves in cases where the underlying NNAPI does not).
setOperandValue(uint32_t index,const std::vector<float> & value)181     void setOperandValue(uint32_t index, const std::vector<float>& value) {
182         const size_t length = value.size() * sizeof(float);
183 
184         if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
185             WrapperModel::setOperandValue(index, value.data(), length);
186         } else {
187             mOperandValues.push_back(value);
188             WrapperModel::setOperandValue(index, mOperandValues.back().data(), length);
189         }
190     }
191 
setOperandValue(uint32_t index,const std::vector<int32_t> & value)192     void setOperandValue(uint32_t index, const std::vector<int32_t>& value) {
193         const size_t length = value.size() * sizeof(int32_t);
194 
195         CHECK(length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
196         WrapperModel::setOperandValue(index, value.data(), length);
197     }
198 
setOperandValue(uint32_t index,int32_t value)199     void setOperandValue(uint32_t index, int32_t value) {
200         CHECK(sizeof(value) <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
201         WrapperModel::setOperandValue(index, &value, sizeof(value));
202     }
203 
204    private:
builder() const205     const ModelBuilder* builder() const {
206         return reinterpret_cast<const ModelBuilder*>(getHandle());
207     }
208 
209     // Representation of operations: vector index is operation number,
210     // vector value is operation's output operands.
211     std::vector<std::vector<uint32_t>> mOperations;
212 
213     // Large operand values -- not immediately copied into the
214     // WrapperModel, so remembered here instead.
215     std::vector<std::vector<float>> mOperandValues;
216 };
217 
218 // This class adds some simple utilities on top of WrapperCompilation in order
219 // to provide access to certain features from CompilationBuilder that are not
220 // exposed by the base class.
221 class TestCompilation : public WrapperCompilation {
222    public:
TestCompilation(const WrapperModel * model)223     TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {}
224 
TestCompilation(const WrapperModel * model,std::vector<std::shared_ptr<Device>> devices)225     TestCompilation(const WrapperModel* model, std::vector<std::shared_ptr<Device>> devices) {
226         ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
227         CompilationBuilder* c = nullptr;
228         int result = m->createCompilation(&c, devices);
229         EXPECT_EQ(result, 0);
230         mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
231     }
232 
233     using WrapperCompilation::finish;
234 
setPartitioning(uint32_t partitioning)235     Result setPartitioning(uint32_t partitioning) {
236         return static_cast<Result>(builder()->forTest_setPartitioning(partitioning));
237     }
238 
getExecutionPlan() const239     const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); }
240 
241    private:
builder() const242     const CompilationBuilder* builder() const {
243         return reinterpret_cast<const CompilationBuilder*>(getHandle());
244     }
builder()245     CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); }
246 };
247 
248 // This class is used to manage a collection of memory regions,
249 // disjoint windows onto a set of Memory instances, each of which is
250 // associated with a single shared memory region.  Each region and
251 // Memory instance is assigned a number.  The usage pattern is as
252 // follows:
253 // - Call addMemory() and addRegion() as many times as needed to
254 //   declare (but not define) Memory instances and declare region
255 //   instances.
256 // - Call layout() to define the Memory instances.
257 // - Call getRegion() as many times as needed to get the details
258 //   of memory regions (such as address, or Memory/offset/length).
259 // The Memory instances created by layout() are owned by the
260 // TestMemories instance, and are destroyed when the TestMemories
261 // instance is destroyed.
262 class TestMemories {
263    public:
264     TestMemories() = default;
265 
266     TestMemories(const TestMemories&) = delete;
267     TestMemories& operator=(const TestMemories&) = delete;
268 
addMemory()269     unsigned addMemory() {
270         CHECK(!mLayoutDone);
271         mMemorySizes.push_back(0);
272         return memoryCount() - 1;
273     }
memoryCount() const274     unsigned memoryCount() const { return mMemorySizes.size(); }
275 
addRegion(unsigned memoryIndex,uint32_t length)276     unsigned addRegion(unsigned memoryIndex, uint32_t length) {
277         CHECK(!mLayoutDone);
278         CHECK(memoryIndex < memoryCount());
279         uint32_t& memorySize = mMemorySizes[memoryIndex];
280         auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length);
281         mRegions.push_back(desc);
282         memorySize += length;
283         return regionCount() - 1;
284     }
regionCount() const285     unsigned regionCount() const { return mRegions.size(); }
286 
287     void layout();
288 
getRegion(unsigned regionIndex,const WrapperMemory ** pMemory,uint32_t * pOffset,uint32_t * pLength)289     void* getRegion(unsigned regionIndex, const WrapperMemory** pMemory, uint32_t* pOffset,
290                     uint32_t* pLength) {
291         CHECK(mLayoutDone);
292         CHECK(regionIndex < regionCount());
293         const auto& regionDescriptor = mRegions[regionIndex];
294         const WrapperMemory* memory = &mMemories[std::get<0>(regionDescriptor)];
295         uint32_t offset = std::get<1>(regionDescriptor);
296         uint32_t length = std::get<2>(regionDescriptor);
297 
298         uint8_t* buffer = reinterpret_cast<nn::MemoryAshmem*>(memory->get())->getPointer();
299         CHECK(buffer != nullptr);
300 
301         if (pMemory) *pMemory = memory;
302         if (pOffset) *pOffset = offset;
303         if (pLength) *pLength = length;
304 
305         return buffer + offset;
306     }
307 
getRegion(unsigned regionIndex)308     void* getRegion(unsigned regionIndex) {
309         return getRegion(regionIndex, nullptr, nullptr, nullptr);
310     }
311 
312    private:
313     // Index is the memory index; value is the size of the memory
314     // (aggregate size of all regions in the memory).
315     std::vector<uint32_t> mMemorySizes;
316 
317     // Index is the memory index.
318     std::vector<WrapperMemory> mMemories;
319 
320     // Index is the region index; tuple represents memory index,
321     // region offset within memory, region length.
322     std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions;
323 
324     // For validity checking.
325     bool mLayoutDone = false;
326 };
327 
layout()328 void TestMemories::layout() {
329     CHECK(!mLayoutDone);
330     for (uint32_t memorySize : mMemorySizes) {
331         auto [n, ashmem] = nn::MemoryAshmem::create(memorySize);
332         CHECK_EQ(n, ANEURALNETWORKS_NO_ERROR);
333         CHECK(ashmem != nullptr);
334 
335         ANeuralNetworksMemory* memory = reinterpret_cast<ANeuralNetworksMemory*>(ashmem.release());
336         mMemories.emplace_back(memory);
337     }
338     mLayoutDone = true;
339 }
340 
341 class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> {
342    public:
RandomPartitioningTest()343     RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {}
344 
345     static Signature getSignature(const HidlModel& model, const V1_3::Operation& operation);
346 
347    protected:
348     static SharedDevice makeTestDriver(HalVersion version, const char* name,
349                                        std::set<Signature> signatures);
350 
351     static HalVersion getMinHalVersion(ANeuralNetworksOperationType type);
352 
353     static std::string to_string(HalVersion version);
354 
randBool()355     bool randBool() { return randUInt(2) == 1; }
356 
randFrac()357     double randFrac() {  // [0.0, 1.0)
358         return mRandNumUnitDist(mRandNumEng);
359     }
360 
randUInt(unsigned limit)361     unsigned randUInt(unsigned limit) {  // [0, limit)
362         return unsigned(randFrac() * limit);
363     }
364 
365     // Represents an operation in which every input and output operand
366     // is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except:
367     // - One input operand may be an activation function.
368     // - Any number of input operands may be "special" in some other way
369     //   (and in this implementation, not produced by any other operation).
370     // We require that:
371     // - There be at least one input operand that is neither an
372     //    activation function nor "special".
373     struct OperationPattern {
374         HalVersion mMinHalVersion;
375         int mOperationType;
376         unsigned mNumInputs;
377         unsigned mNumOutputs;
378         int mActivationFunctionInputIndex;  // <0 if none
379 
380         // Returns operand index, or <0 if input is normal (must not
381         // be called for an activation function operand).  Function
382         // should have the following prototype:
383         //
384         //     int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex);
385         //
386         int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned);
387     };
388 
389     static const OperationPattern kOperationPatterns[];
390 
391     // See OperationPattern::mMakeSpecialInput.  This function is used to
392     // manufacture an ELU input operand that doesn't fit the general operand
393     // pattern known to the graph generator infrastructure.
makeEluSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)394     int makeEluSpecialInput([[maybe_unused]] unsigned problemSize, TestModel* model,
395                             unsigned inputIndex) {
396         if (inputIndex != 1) {
397             return -1;
398         }
399 
400         // input operand 1 is alpha, a scalar
401         const WrapperOperandType alphaType(WrapperType::FLOAT32, {});
402         return int(model->addConstantOperand(&alphaType, 1.0f));
403     }
404 
405     // See OperationPattern::mMakeSpecialInput.  This function is used to
406     // manufacture an RNN input operand that doesn't fit the general operand
407     // pattern known to the graph generator infrastructure.
makeRnnSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)408     int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) {
409         if (inputIndex != 3) {
410             return -1;
411         }
412 
413         // input operand 3 is bias, a 1-D tensor
414         const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, {problemSize});
415         const uint32_t operandIndex = model->addOperand(&biasType);
416         std::vector<float> biasValue(problemSize);
417         std::generate(biasValue.begin(), biasValue.end(), [this] { return randFrac(); });
418         model->setOperandValue(operandIndex, biasValue);
419         return int(operandIndex);
420     }
421 
422     // See OperationPattern::mMakeSpecialInput.  This function is used to
423     // manufacture a TRANSPOSE input operand that doesn't fit the general operand
424     // pattern known to the graph generator infrastructure.
makeTransposeSpecialInput(unsigned,TestModel * model,unsigned inputIndex)425     int makeTransposeSpecialInput(unsigned /* problemSize */, TestModel* model,
426                                   unsigned inputIndex) {
427         if (inputIndex != 1) {
428             return -1;
429         }
430 
431         // input operand 1 is perm, a 1-D tensor
432         const WrapperOperandType permType(WrapperType::TENSOR_INT32, {2});
433         const uint32_t operandIndex = model->addOperand(&permType);
434         std::vector<int32_t> permValue = {1, 0};
435         model->setOperandValue(operandIndex, permValue);
436         return int(operandIndex);
437     }
438 
439 #ifdef VERBOSE
440     class ModelStats {
441        public:
ModelStats(const ModelBuilder * model)442         ModelStats(const ModelBuilder* model) : mBuilder(model) {}
ModelStats(const WrapperModel * model)443         ModelStats(const WrapperModel* model)
444             : mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) {}
operator <<(std::ostream & out,const ModelStats & stats)445         friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) {
446             const uint32_t operandCount = stats.mBuilder->operandCount();
447             const uint32_t inputCount = stats.mBuilder->inputCount();
448             const uint32_t outputCount = stats.mBuilder->outputCount();
449             out << "operationCount = " << stats.mBuilder->operationCount()
450                 << ", operandCount = " << operandCount << ", inputCount = " << inputCount << " ("
451                 << (double(inputCount) / operandCount) << ")"
452                 << ", outputCount = " << outputCount << " (" << (double(outputCount) / operandCount)
453                 << ")";
454             return out;
455         }
456 
457        private:
458         const ModelBuilder* mBuilder;
459     };
460 
461     template <typename T_iterator>
dump(T_iterator I,T_iterator E)462     static void dump(T_iterator I, T_iterator E) {
463         std::cout << "{";
464         for (; I != E; I++) {
465             std::cout << " " << *I;
466         }
467         std::cout << " }" << std::endl;
468     }
469 #endif
470 
471     std::mt19937 mRandNumEng;
472 
473    private:
474     std::uniform_real_distribution<double> mRandNumUnitDist;
475 };
476 
477 const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = {
478         {HalVersion::V1_0, ANEURALNETWORKS_ADD, 3, 1, 2, nullptr},
479         {HalVersion::V1_0, ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr},
480         {HalVersion::V1_0, ANEURALNETWORKS_MUL, 3, 1, 2, nullptr},
481         {HalVersion::V1_0, ANEURALNETWORKS_RNN, 6, 2, 5,
482          &RandomPartitioningTest::makeRnnSpecialInput},
483         {HalVersion::V1_0, ANEURALNETWORKS_TANH, 1, 1, -1, nullptr},
484 
485         {HalVersion::V1_1, ANEURALNETWORKS_SUB, 3, 1, 2, nullptr},
486         {HalVersion::V1_1, ANEURALNETWORKS_TRANSPOSE, 2, 1, -1,
487          &RandomPartitioningTest::makeTransposeSpecialInput},
488 
489         {HalVersion::V1_2, ANEURALNETWORKS_MAXIMUM, 2, 1, -1, nullptr},
490         {HalVersion::V1_2, ANEURALNETWORKS_NEG, 1, 1, -1, nullptr},
491         {HalVersion::V1_2, ANEURALNETWORKS_SIN, 1, 1, -1, nullptr},
492 
493         {HalVersion::V1_3, ANEURALNETWORKS_ELU, 2, 1, -1,
494          &RandomPartitioningTest::makeEluSpecialInput},
495         {HalVersion::V1_3, ANEURALNETWORKS_HARD_SWISH, 1, 1, -1, nullptr},
496 };
497 
getMinHalVersion(ANeuralNetworksOperationType type)498 HalVersion RandomPartitioningTest::getMinHalVersion(ANeuralNetworksOperationType type) {
499     static const auto kOperationToVersion = [] {
500         std::map<ANeuralNetworksOperationType, HalVersion> result;
501         for (const auto& pattern : kOperationPatterns) {
502             result[pattern.mOperationType] = pattern.mMinHalVersion;
503         }
504         return result;
505     }();
506 
507     return kOperationToVersion.at(type);
508 }
509 
getSignature(const HidlModel & model,const V1_3::Operation & operation)510 Signature RandomPartitioningTest::getSignature(const HidlModel& model,
511                                                const V1_3::Operation& operation) {
512     static const auto kOperationToActivation = [] {
513         std::map<ANeuralNetworksOperationType, int> result;
514         for (const auto& pattern : kOperationPatterns) {
515             result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex;
516         }
517         return result;
518     }();
519 
520     const ANeuralNetworksOperationType operationType =
521             static_cast<ANeuralNetworksOperationType>(operation.type);
522     const int activationFunctionInputIndex = kOperationToActivation.at(operationType);
523     if (activationFunctionInputIndex < 0) {
524         return Signature(operationType, -1);
525     }
526 
527     const V1_3::Operand& operand =
528             model.main.operands[operation.inputs[activationFunctionInputIndex]];
529     CHECK(operand.lifetime == V1_3::OperandLifeTime::CONSTANT_COPY);
530     CHECK(operand.type == V1_3::OperandType::INT32);
531     int32_t value;
532     memcpy(&value, &model.operandValues[operand.location.offset], operand.location.length);
533     return Signature(operationType, value);
534 }
535 
to_string(HalVersion version)536 std::string RandomPartitioningTest::to_string(HalVersion version) {
537     switch (version) {
538         case HalVersion::V1_0:
539             return "V1_0";
540         case HalVersion::V1_1:
541             return "V1_1";
542         case HalVersion::V1_2:
543             return "V1_2";
544         case HalVersion::V1_3:
545             return "V1_3";
546         default:
547             return "V_UNKNOWN";
548     }
549 };
550 
551 class TestDriver : public SampleDriver {
552    public:
553     // Behaves like SampleDriver, except that it only supports
554     // operations with the specified signatures.
TestDriver(const char * name,std::set<Signature> signatures)555     TestDriver(const char* name, std::set<Signature> signatures)
556         : SampleDriver(name), mSignatures(std::move(signatures)) {}
557 
getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb)558     hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override {
559         android::nn::initVLogMask();
560         const V1_0::PerformanceInfo kPerf = {.execTime = 0.75f, .powerUsage = 0.75f};
561         V1_3::Capabilities capabilities = {
562                 .relaxedFloat32toFloat16PerformanceScalar = kPerf,
563                 .relaxedFloat32toFloat16PerformanceTensor = kPerf,
564                 .operandPerformance = nn::nonExtensionOperandPerformance<HalVersion::V1_3>(kPerf),
565                 .ifPerformance = kPerf,
566                 .whilePerformance = kPerf};
567         _hidl_cb(V1_3::ErrorStatus::NONE, capabilities);
568         return hardware::Void();
569     }
570 
getSupportedOperations_1_3(const HidlModel & model,getSupportedOperations_1_3_cb cb)571     hardware::Return<void> getSupportedOperations_1_3(const HidlModel& model,
572                                                       getSupportedOperations_1_3_cb cb) override {
573         if (nn::validateModel(model)) {
574             const size_t count = model.main.operations.size();
575             std::vector<bool> supported(count);
576             for (size_t i = 0; i < count; i++) {
577                 supported[i] = (mSignatures.count(RandomPartitioningTest::getSignature(
578                                         model, model.main.operations[i])) != 0);
579             }
580             cb(V1_3::ErrorStatus::NONE, supported);
581         } else {
582             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {});
583         }
584         return hardware::Void();
585     }
586 
prepareModel_1_3(const HidlModel & model,V1_1::ExecutionPreference preference,V1_3::Priority priority,const V1_3::OptionalTimePoint & deadline,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_3::IPreparedModelCallback> & callback)587     hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
588             const HidlModel& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
589             const V1_3::OptionalTimePoint& deadline,
590             const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
591             const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
592             const sp<V1_3::IPreparedModelCallback>& callback) override {
593         // NOTE: We verify that all operations in the model are supported.
594         V1_3::ErrorStatus outStatus = V1_3::ErrorStatus::INVALID_ARGUMENT;
595         auto ret = getSupportedOperations_1_3(
596                 model, [&outStatus](V1_3::ErrorStatus inStatus,
597                                     const hardware::hidl_vec<bool>& supportedOperations) {
598                     if (inStatus == V1_3::ErrorStatus::NONE) {
599                         if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
600                                         [](bool v) { return v; })) {
601                             outStatus = V1_3::ErrorStatus::NONE;
602                         }
603                     }
604                 });
605         if (ret.isOk() && (outStatus == V1_3::ErrorStatus::NONE)) {
606             return SampleDriver::prepareModel_1_3(model, preference, priority, deadline, modelCache,
607                                                   dataCache, token, callback);
608         } else {
609             callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
610             return V1_3::ErrorStatus::INVALID_ARGUMENT;
611         }
612     }
613 
614    private:
615     const std::set<Signature> mSignatures;
616 };
617 
makeTestDriver(HalVersion version,const char * name,std::set<Signature> signatures)618 SharedDevice RandomPartitioningTest::makeTestDriver(HalVersion version, const char* name,
619                                                     std::set<Signature> signatures) {
620     switch (version) {
621         case HalVersion::V1_0:
622             return V1_0::utils::Device::create(name, new TestDriver(name, std::move(signatures)))
623                     .value();
624         case HalVersion::V1_1:
625             return V1_1::utils::Device::create(name, new TestDriver(name, std::move(signatures)))
626                     .value();
627         case HalVersion::V1_2:
628             return V1_2::utils::Device::create(name, new TestDriver(name, std::move(signatures)))
629                     .value();
630         case HalVersion::V1_3:
631             return V1_3::utils::Device::create(name, new TestDriver(name, std::move(signatures)))
632                     .value();
633         default:
634             ADD_FAILURE() << "Unexpected HalVersion " << static_cast<int32_t>(version);
635             return nullptr;
636     }
637 }
638 
639 INSTANTIATE_TEST_SUITE_P(Seed, RandomPartitioningTest,
640                          ::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases));
641 
TEST_P(RandomPartitioningTest,Test)642 TEST_P(RandomPartitioningTest, Test) {
643     LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam();
644 
645 #ifdef VERBOSE
646     std::cout << std::setprecision(2) << std::fixed << std::setw(4);
647 #endif
648 
649     const unsigned problemSize = 1 + randUInt(kMaxProblemSize);
650     const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, {problemSize, problemSize});
651     const WrapperOperandType unknownDimensionsTypes[] = {
652             {WrapperType::TENSOR_FLOAT32, {}},
653             {WrapperType::TENSOR_FLOAT32, {0, 0}},
654             {WrapperType::TENSOR_FLOAT32, {0, problemSize}},
655             {WrapperType::TENSOR_FLOAT32, {problemSize, 0}},
656     };
657     const unsigned kUnknownDimensionsTypesCount =
658             sizeof(unknownDimensionsTypes) / sizeof(unknownDimensionsTypes[0]);
659 
660     static const WrapperOperandType activationFunctionType(WrapperType::INT32, {});
661 
662     const unsigned numOperations = 2 + randUInt(kMaxNumOperations - 1);
663     const bool allowDeadOperations = (randFrac() < 0.2);
664     const bool allowUnknownDimensions = (randFrac() < 0.25);
665 
666     // TODO: The current algorithm builds the graph in a forward
667     // direction (i.e., later-generated operations consume outputs
668     // from earlier-generated operations).  In order to get more
669     // variation in graph topology, perhaps we should also create an
670     // algorithm to build the graph in a backward direction (i.e.,
671     // later-generated operations produce outputs to be consumed by
672     // earlier-generated operations).
673     [[maybe_unused]] const bool buildForward = randBool();
674 
675     // TODO: Add a form of forced connectivity that operates by
676     // joining disjoint subgraphs rather than by forcing a root.
677     const bool forceCommonRoot = (randFrac() < 0.75);
678 
679     auto computeMode = WrapperExecution::getComputeMode();
680     // We check randFrac() independent of compute mode, because we don't want
681     // the random number sequence to change depending on compute mode: Compute
682     // mode should only affect how we perform the inference, not how we build the
683     // Model, the Compilation, or the Execution.
684     if (randFrac() < 0.5 && computeMode == WrapperExecution::ComputeMode::ASYNC) {
685         computeMode = WrapperExecution::ComputeMode::FENCED;
686     }
687 
688     TestModel model;
689     std::vector<uint32_t> modelInputs;
690     std::vector<uint32_t> modelOutputs;
691 
692     std::set<uint32_t> operandsWithUnknownDimensions;
693 
694     // Each region in weights is a problem-sized 2-D TENSOR_FLOAT32.
695     TestMemories weights;
696 
697     // Keep track of all normal (i.e., not activation function and not
698     // "special") operands that are values (from setOperandValue*()).
699     // .first: operand index
700     // .second: if the operand is already defined (via setOperandValue*()) then ~0U;
701     //          otherwise, the operand has yet to be defined, and this is the corresponding
702     //          region index in "weights"
703     std::vector<std::pair<uint32_t, unsigned>> valueOperands;
704 
705     // An operand is "dead" if it is not consumed by another operation
706     // and is not a model output.  Key is operand index; value is
707     // operation index.
708     std::map<uint32_t, uint32_t> deadOperands;
709 
710     // An operation is "dead" if all of its outputs are dead.
711     std::set<uint32_t> deadOperations;
712 
713     // Collect the signatures of operations in this model.
714     std::set<Signature> signatures;
715 
716     // For reporting purposes, keep track of the number of root
717     // operations (those that do not consume results produced by other
718     // operations).
719     unsigned rootOperationCount = 0;
720 
721     // Track whether we added operands with unknown dimensions. In this case,
722     // partitioned compilation will fail if such an operand is read in a
723     // different partition than it is written, and the partition that does the
724     // writing is scheduled on a pre-HAL 1.2 (pre-Android Q) device.
725     bool hasUnknownDimensions = false;
726 
727     // Generate operations.
728     for (unsigned i = 0; i < numOperations; i++) {
729         const unsigned operationPatternIndex = randUInt(std::size(kOperationPatterns));
730         const auto& operationPattern = kOperationPatterns[operationPatternIndex];
731 
732         // INPUTS //////////////////////////////////////////////////////////////////////////////////
733 
734         std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U);
735 
736         // First, process activation function and special inputs, and
737         // keep track of which inputs remain.
738         std::vector<uint32_t> normalOperationInputIndexes;
739         int32_t activationFunction = -1;
740         for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs;
741              operationInputIndex++) {
742             if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) {
743                 const uint32_t operandIndex = model.addOperand(&activationFunctionType);
744                 activationFunction = randUInt(4);
745                 if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) {
746                     // workaround for http://b/69011131
747                     activationFunction = ANEURALNETWORKS_FUSED_NONE;
748                 }
749                 model.setOperandValue(operandIndex, activationFunction);
750                 operationInputs[operationInputIndex] = operandIndex;
751                 continue;
752             }
753             if (operationPattern.mMakeSpecialInput != nullptr) {
754                 const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))(
755                         problemSize, &model, operationInputIndex);
756                 if (operandIndex >= 0) {
757                     operationInputs[operationInputIndex] = operandIndex;
758                     continue;
759                 }
760             }
761             normalOperationInputIndexes.push_back(operationInputIndex);
762         }
763         CHECK(!normalOperationInputIndexes.empty());
764         signatures.insert(Signature(operationPattern.mOperationType, activationFunction));
765 
766         // A (normal) operation input can be one of:
767         // - a new or existing model input
768         // - an output of an existing operation
769         // - an OperandValue
770         // - an OperandValueFromMemory
771         // Some guidelines:
772         // - We generally don't want all of an operation's inputs to be values (constants)
773         const unsigned normalOperationInputCount = normalOperationInputIndexes.size();
774         //     How many of this operation's inputs are constants?
775         unsigned normalOperationInputConstantCount = 0;
776         //     How many of this operation's inputs are model inputs?
777         unsigned normalOperationInputModelInputCount = 0;
778         // We begin by deciding what kind of input each (normal) operation will be; we don't
779         // actually pick input operand indexes at this time, because we might override this
780         // decision later.
781         enum InputKind { IK_SUBGRAPH_INPUT, IK_OPERATION_OUTPUT, IK_VALUE };
782         std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount);
783         std::generate(
784                 normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
785                 [this, &model, numOperations, normalOperationInputCount,
786                  &normalOperationInputConstantCount,
787                  &normalOperationInputModelInputCount]() -> InputKind {
788                     // Constant?  Becomes less likely the more
789                     // constants we already have as inputs to
790                     // this operation.
791                     if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) /
792                                                         normalOperationInputCount)) {
793                         normalOperationInputConstantCount++;
794                         return IK_VALUE;
795                     }
796 
797                     // Model input?  Becomes less likely the
798                     // more model inputs we already have as
799                     // inputs to this operation, and the further
800                     // along we are in generating this model
801                     // (i.e., the more operations we have
802                     // generated).
803                     if ((model.operationCount() == 0) ||
804                         (randFrac() < 0.5 *
805                                               (1 - double(normalOperationInputModelInputCount) /
806                                                            normalOperationInputCount) *
807                                               std::min(0.3, (1 - double(model.operationCount()) /
808                                                                          numOperations)))) {
809                         normalOperationInputModelInputCount++;
810                         return IK_SUBGRAPH_INPUT;
811                     }
812 
813                     // Else output of an existing operation.
814                     return IK_OPERATION_OUTPUT;
815                 });
816 
817         // Now force common root or model input, if necessary.  (A
818         // model must have at least one input.)
819         auto force = [this, &normalOperationInputKinds,
820                       normalOperationInputCount](InputKind forceKind) {
821             if (std::none_of(normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
822                              [forceKind](InputKind kind) { return kind == forceKind; })) {
823                 normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind;
824             }
825         };
826         if (forceCommonRoot && (model.operationCount() != 0)) {
827             force(IK_OPERATION_OUTPUT);
828         }
829         if (modelInputs.empty()) {
830             CHECK(model.operationCount() == 0);
831             force(IK_SUBGRAPH_INPUT);
832         }
833 
834         // Finally create the normal inputs.
835         bool isRootOperation = true;
836         for (unsigned i = 0; i < normalOperationInputCount; i++) {
837             uint32_t operandIndex = ~0U;
838             switch (normalOperationInputKinds[i]) {
839                 case IK_SUBGRAPH_INPUT: {
840                     if (!modelInputs.empty() && (randFrac() < 0.5)) {
841                         operandIndex = modelInputs[randUInt(modelInputs.size())];
842                     } else {
843                         operandIndex = model.addOperand(&problemType);
844                         modelInputs.push_back(operandIndex);
845                     }
846                     break;
847                 }
848                 case IK_OPERATION_OUTPUT: {
849                     decltype(deadOperands.begin()) deadOperandI;
850                     if (!deadOperands.empty() && (randFrac() < 0.5)) {
851                         deadOperandI = deadOperands.begin();
852                         std::advance(deadOperandI, randUInt(deadOperands.size()));
853                         operandIndex = deadOperandI->first;
854                     } else {
855                         const uint32_t existingOperationIndex = randUInt(model.operationCount());
856                         const auto& existingOperationOutputs =
857                                 model.getOperationOutputs(existingOperationIndex);
858                         operandIndex =
859                                 existingOperationOutputs[randUInt(existingOperationOutputs.size())];
860                         deadOperandI = deadOperands.find(operandIndex);
861                         CHECK(deadOperandI == deadOperands.end() ||
862                               deadOperandI->second == existingOperationIndex);
863                     }
864                     if (deadOperandI != deadOperands.end()) {
865                         const uint32_t correspondingOperation = deadOperandI->second;
866                         deadOperands.erase(deadOperandI);
867 
868                         auto deadOperationI = deadOperations.find(correspondingOperation);
869                         if (deadOperationI != deadOperations.end()) {
870                             deadOperations.erase(deadOperationI);
871                         }
872                     }
873                     isRootOperation = false;
874                     break;
875                 }
876                 case IK_VALUE: {
877                     if (!valueOperands.empty() && (randFrac() < 0.25)) {
878                         operandIndex = valueOperands[randUInt(valueOperands.size())].first;
879                     } else {
880                         operandIndex = model.addOperand(&problemType);
881                         if (randFrac() < 0.5) {
882                             std::vector<float> value(problemSize * problemSize);
883                             std::generate(value.begin(), value.end(),
884                                           [this] { return randFrac(); });
885                             model.setOperandValue(operandIndex, value);
886                             valueOperands.push_back(std::make_pair(operandIndex, ~0U));
887                         } else {
888                             unsigned memoryIndex = ~0U;
889                             if ((weights.memoryCount() != 0) &&
890                                 (kAllWeightsInOnePool || (randFrac() < 0.5))) {
891                                 memoryIndex = randUInt(weights.memoryCount());
892                             } else {
893                                 memoryIndex = weights.addMemory();
894                             }
895                             const size_t length = problemSize * problemSize * sizeof(float);
896                             const unsigned regionIndex = weights.addRegion(memoryIndex, length);
897                             valueOperands.push_back(std::make_pair(operandIndex, regionIndex));
898                         }
899                     }
900                     break;
901                 }
902                 default:
903                     FAIL();
904             }
905             operationInputs[normalOperationInputIndexes[i]] = operandIndex;
906         }
907         if (isRootOperation) {
908             rootOperationCount++;
909         }
910 
911         // OUTPUTS /////////////////////////////////////////////////////////////////////////////////
912 
913         std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
914         std::generate(
915                 operationOutputs.begin(), operationOutputs.end(),
916                 [&operandsWithUnknownDimensions, &model, &problemType, &unknownDimensionsTypes,
917                  &hasUnknownDimensions, allowUnknownDimensions, this] {
918                     // Before the fix for http://b/132458982, 3% unknowns causes
919                     // ~35% of partitionings to fail.  After the fix, 3%
920                     // unknowns causes ~3% of partitionings to fail.  (This is
921                     // determined by removing the fallback code and noting the
922                     // number of failures.)
923                     if (allowUnknownDimensions && randFrac() < 0.03) {
924                         hasUnknownDimensions = true;
925                         uint32_t opndIdx = model.addOperand(
926                                 &unknownDimensionsTypes[randUInt(kUnknownDimensionsTypesCount)]);
927                         operandsWithUnknownDimensions.insert(opndIdx);
928                         return opndIdx;
929                     } else {
930                         return model.addOperand(&problemType);
931                     }
932                 });
933 
934         // OPERATION ///////////////////////////////////////////////////////////////////////////////
935 
936         const uint32_t operationIndex = model.addOperation(operationPattern.mOperationType,
937                                                            operationInputs, operationOutputs);
938         deadOperations.insert(operationIndex);
939         std::for_each(operationOutputs.begin(), operationOutputs.end(),
940                       [&deadOperands, operationIndex](uint32_t operandIndex) {
941                           deadOperands.insert(std::make_pair(operandIndex, operationIndex));
942                       });
943     }
944 
945     // Now finalize the weights.
946     weights.layout();
947     for (const auto& valueOperand : valueOperands) {
948         const uint32_t operandIndex = valueOperand.first;
949         const unsigned regionIndex = valueOperand.second;
950 
951         if (regionIndex == ~0U) {
952             continue;
953         }
954 
955         const WrapperMemory* memory;
956         uint32_t offset, length;
957         float* region =
958                 static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length));
959         CHECK(length == problemSize * problemSize * sizeof(float));
960         std::generate(region, region + problemSize * problemSize, [this] { return randFrac(); });
961         model.setOperandValueFromMemory(operandIndex, memory, offset, length);
962     }
963 
964     // Now select model outputs.
965     for (uint32_t operationIdx = 0, operationCount = model.operationCount();
966          operationIdx < operationCount; operationIdx++) {
967         const auto& outputs = model.getOperationOutputs(operationIdx);
968         for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount;
969              outputIdx++) {
970             bool modelOutput = false;
971             const uint32_t operandIndex = outputs[outputIdx];
972             const auto deadOperandI = deadOperands.find(operandIndex);
973             if (deadOperandI != deadOperands.end()) {
974                 // This is not consumed within the model, so unless we
975                 // make it an output of the model, it's dead.  The
976                 // further along we are in generating this model
977                 // (i.e., the more operations we have generated), the
978                 // more likely we are to classify this operation
979                 // output as a model output.
980                 const double probabilityOfModelOutput =
981                         0.50 * [](double x) { return x * x; }((operationIdx + 1) / operationCount);
982                 modelOutput = (randFrac() < probabilityOfModelOutput);
983             } else {
984                 // This is consumed within the model, so we'll rarely
985                 // make it an output of the model.
986                 modelOutput = (randFrac() < 0.05);
987             }
988             if (!modelOutput) {
989                 continue;
990             }
991             modelOutputs.push_back(operandIndex);
992             if (deadOperandI != deadOperands.end()) {
993                 deadOperands.erase(deadOperandI);
994                 const auto deadOperationI = deadOperations.find(operationIdx);
995                 if (deadOperationI != deadOperations.end()) {
996                     deadOperations.erase(deadOperationI);
997                 }
998             }
999         }
1000     }
1001     if (!allowDeadOperations) {
1002         // For each dead operation, pick a random output to become a model output.
1003         for (uint32_t deadOperationIndex : deadOperations) {
1004             const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex);
1005             const uint32_t deadOperandIndex =
1006                     deadOperationOutputs[randUInt(deadOperationOutputs.size())];
1007             modelOutputs.push_back(deadOperandIndex);
1008         }
1009     }
1010     // A model must have at least one output.
1011     if (modelOutputs.empty()) {
1012         const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount()));
1013         modelOutputs.push_back(outputs[randUInt(outputs.size())]);
1014     }
1015     if (computeMode == WrapperExecution::ComputeMode::FENCED) {
1016         if (std::any_of(modelOutputs.begin(), modelOutputs.end(),
1017                         [&operandsWithUnknownDimensions](uint32_t opndIdx) {
1018                             return operandsWithUnknownDimensions.count(opndIdx) != 0;
1019                         })) {
1020             // Workaround for http://b/162980246: Fenced execution is documented
1021             // as requiring model outputs to have fully specified dimensions,
1022             // either from Model or from Execution, but its implementation
1023             // requires this to come from Model.  This test only guarantees that
1024             // they have fully specified dimensions from Execution.  So in the
1025             // case of a Model where some output does not have fully specified
1026             // dimensions, perform asynchronous execution instead.
1027             computeMode = WrapperExecution::ComputeMode::ASYNC;
1028         }
1029     }
1030 
1031     model.identifyInputsAndOutputs(modelInputs, modelOutputs);
1032 #ifdef VERBOSE
1033     {
1034         std::cout << "Original model: " << ModelStats(&model) << std::endl;
1035         std::cout << "rootOperationCount = " << rootOperationCount << ", deadOperations = ";
1036         if (allowDeadOperations) {
1037             std::cout << deadOperations.size();
1038         } else {
1039             std::cout << "forbidden (converted " << deadOperations.size() << ")";
1040         }
1041         std::cout << std::endl;
1042     }
1043 #endif
1044     ASSERT_EQ(model.finish(), Result::NO_ERROR);
1045 
1046     // Non-partitioned compilation.
1047     TestCompilation c(&model);
1048     ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1049     ASSERT_EQ(c.finish(), Result::NO_ERROR);
1050 
1051     // Create some drivers for partitioned compilation.
1052     CHECK(!signatures.empty());
1053     std::vector<std::set<Signature>> signaturesForDriver(signatures.size());
1054     //     First assign each signature to a random driver (a driver is
1055     //     just represented as an entry in the signaturesForDriver
1056     //     vector).
1057     for (Signature signature : signatures) {
1058         signaturesForDriver[randUInt(signatures.size())].insert(signature);
1059     }
1060     //     Now remove each entry that has no signatures.
1061     auto firstExtra =
1062             std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(),
1063                            [](const std::set<Signature>& sigSet) { return sigSet.empty(); });
1064     if (firstExtra != signaturesForDriver.end()) {
1065         signaturesForDriver.erase(firstExtra, signaturesForDriver.end());
1066     }
1067     //     Now actually create the drivers.
1068     std::vector<std::shared_ptr<Device>> devices;
1069     for (unsigned i = 0; i < signaturesForDriver.size(); i++) {
1070         const auto& signaturesForThisDriver = signaturesForDriver[i];
1071         // Minimum HAL version for this driver is highest minimum HAL version of
1072         // any operation supported by this driver.
1073         const HalVersion minHalVersion = getMinHalVersion(
1074                 std::max_element(signaturesForThisDriver.begin(), signaturesForThisDriver.end(),
1075                                  [](const Signature& a, const Signature& b) {
1076                                      return getMinHalVersion(a.first) < getMinHalVersion(b.first);
1077                                  })
1078                         ->first);
1079         const HalVersion actualHalVersion =
1080                 static_cast<HalVersion>(static_cast<int32_t>(minHalVersion) +
1081                                         randUInt(static_cast<int32_t>(HalVersion::LATEST) -
1082                                                  static_cast<int32_t>(minHalVersion) + 1));
1083         const std::string name =
1084                 "TestDriver(" + std::to_string(i) + "){" + to_string(actualHalVersion) + "}";
1085 #ifdef VERBOSE
1086         std::cout << "Creating " + name + " for collection of signatures that requires HAL " +
1087                              to_string(minHalVersion)
1088                   << std::endl;
1089 #endif
1090         auto device = DeviceManager::forTest_makeDriverDevice(
1091                 makeTestDriver(actualHalVersion, name.c_str(), signaturesForThisDriver));
1092         devices.push_back(device);
1093     }
1094     // CPU fallback device
1095     devices.push_back(DeviceManager::getCpuDevice());
1096 
1097     // Partitioned compilation.
1098     //
1099     // If a test case has both (a) unknown intermediate operand sizes and
1100     // (b) partitions scheduled on pre-HAL 1.2 (pre-Android Q) devices, fallback
1101     // is needed if the non-fallback partitioning fails.
1102     //
1103     // The issue is that prior to HAL 1.2, an output operand must have a known
1104     // size provided either in the Model or in the Request; and in the case of
1105     // partitioning, an intermediate operand of the original model that becomes
1106     // an output operand of a partition won't have a known size provided in the
1107     // Request.
1108     //
1109     // If a test case has a step model with no inputs or no outputs, fallback is needed.
1110     // This is because our HAL specification requires a model to have at least one
1111     // input and one output.
1112     //
1113     // If a fallback is needed, we retry the compilation with a fallback and require
1114     // the fallback to succeed. Otherwise, we require the partitioning to succeed
1115     // without CPU fallback.
1116     TestCompilation cNoFallback(&model, devices);
1117     TestCompilation cWithFallback(&model, devices);
1118     ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1119               Result::NO_ERROR);
1120     auto compilationResult = cNoFallback.finish();
1121     const bool fallbackNeededForDynamicTemporaries =
1122             compilationResult == Result::OP_FAILED && hasUnknownDimensions &&
1123             cNoFallback.getExecutionPlan().hasDynamicTemporaries() &&
1124             std::any_of(devices.begin(), devices.end(), [](const std::shared_ptr<Device>& device) {
1125                 return !isCompliantVersion(nn::kHalVersionV1_2ToApi.canonical,
1126                                            device->getFeatureLevel());
1127             });
1128     const bool fallbackNeededForStepModelWithNoInputsOrNoOutputs =
1129             cNoFallback.getExecutionPlan().forTest_hasStepModelWithNoInputsOrNoOutputs();
1130     const bool fallbackNeeded = fallbackNeededForDynamicTemporaries ||
1131                                 fallbackNeededForStepModelWithNoInputsOrNoOutputs;
1132     if (fallbackNeeded) {
1133         ASSERT_EQ(compilationResult, Result::OP_FAILED);
1134 
1135         ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1136                   Result::NO_ERROR);
1137         compilationResult = cWithFallback.finish();
1138         ASSERT_EQ(compilationResult, Result::NO_ERROR);
1139         ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1140         ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1141                   DeviceManager::getCpuDevice());
1142     } else {
1143         ASSERT_EQ(compilationResult, Result::NO_ERROR);
1144 
1145         const ExecutionPlan& plan = cNoFallback.getExecutionPlan();
1146         if (signaturesForDriver.size() == 1) {
1147             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1148             ASSERT_TRUE(plan.forTest_simpleGetDevice() != DeviceManager::getCpuDevice());
1149         } else {
1150             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1151             auto stepToDeviceId = [](const std::shared_ptr<LogicalStep>& step) {
1152                 return step->executionStep()->getDevice();
1153             };
1154             std::set<decltype(stepToDeviceId(plan.forTest_compoundGetSteps()[0]))> deviceSet;
1155             for (const auto& step : plan.forTest_compoundGetSteps()) {
1156                 deviceSet.insert(stepToDeviceId(step));
1157             }
1158             // TODO(b/178517567): Figure out why we sometimes have 1 more
1159             // signature than we have devices -- this means that we've scheduled
1160             // one or more operations onto the CPU fallback device, which is not
1161             // something we ever expect to do.
1162             ASSERT_TRUE(deviceSet.size() == signaturesForDriver.size() ||
1163                         deviceSet.size() == signaturesForDriver.size() + 1);
1164         }
1165     }
1166     TestCompilation& c2 = (fallbackNeeded ? cWithFallback : cNoFallback);
1167 #ifdef TRACE_DYNTEMP
1168     {
1169         const ExecutionPlan& plan = c2.getExecutionPlan();
1170         const size_t dynamicTemporaryCount = plan.forTest_flatGetDynamicTemporaries().size();
1171         std::cout << "TRACE_DYNTEMP: dynamic temporary count = " << dynamicTemporaryCount
1172                   << std::endl;
1173         if (plan.forTest_getKind() == ExecutionPlan::Kind::COMPOUND) {
1174             size_t stepsWithModelOutputsThatAreDownstreamInputs = 0;
1175             size_t countOfModelOutputsThatAreDownstreamInputs = 0;
1176             for (const auto& step : plan.forTest_compoundGetSteps()) {
1177                 if (const size_t count = step->executionStep()
1178                                                  ->getModelOutputsThatAreDownstreamInputs()
1179                                                  .size()) {
1180                     ++stepsWithModelOutputsThatAreDownstreamInputs;
1181                     countOfModelOutputsThatAreDownstreamInputs += count;
1182                 }
1183             }
1184             if (countOfModelOutputsThatAreDownstreamInputs != 0) {
1185                 std::cout << "TRACE_DYNTEMP: model outputs that are downstream inputs: "
1186                           << countOfModelOutputsThatAreDownstreamInputs << " / "
1187                           << modelOutputs.size() << ", over "
1188                           << stepsWithModelOutputsThatAreDownstreamInputs << " / "
1189                           << plan.forTest_compoundGetSteps().size() << " steps" << std::endl;
1190                 EXPECT_LE(countOfModelOutputsThatAreDownstreamInputs, modelOutputs.size());
1191             }
1192         } else {
1193             EXPECT_EQ(dynamicTemporaryCount, size_t(0))
1194                     << "Only COMPOUND plan should have dynamic temporaries";
1195         }
1196     }
1197 #endif
1198 
1199 #ifdef VERBOSE
1200     {
1201         std::cout << "signatures = " << signatures.size() << ", devices = " << devices.size()
1202                   << std::endl;
1203         // TODO: When dumping steps, include non-ExecutionSteps.
1204         const ExecutionPlan& plan = c2.getExecutionPlan();
1205         switch (plan.forTest_getKind()) {
1206             case ExecutionPlan::Kind::SIMPLE:
1207                 std::cout << "plan: simple" << std::endl;
1208                 break;
1209             case ExecutionPlan::Kind::COMPOUND: {
1210                 const auto& steps = plan.forTest_compoundGetSteps();
1211                 std::set<const Device*> devicesInPlan;
1212                 for (const auto& step : steps) {
1213                     if (const auto* executionStep = step->tryExecutionStep()) {
1214                         devicesInPlan.insert(executionStep->getDevice().get());
1215                     }
1216                 }
1217                 std::cout << "plan: compound, " << steps.size() << " steps over "
1218                           << devicesInPlan.size() << " devices" << std::endl;
1219                 for (unsigned i = 0; i < steps.size(); i++) {
1220                     if (const auto executionStep = steps[i]->tryExecutionStep()) {
1221                         std::cout << "Step " << i << ": "
1222                                   << ModelStats(executionStep->getStepModel())
1223                                   << ", device = " << executionStep->getDevice()->getName()
1224                                   << std::endl;
1225                     }
1226                 }
1227                 break;
1228             }
1229             default:
1230                 std::cout << "Unexpected plan kind: "
1231                           << static_cast<unsigned>(plan.forTest_getKind());
1232                 break;
1233         }
1234     }
1235 #endif
1236 
1237     // For execution:
1238     // - create golden inputs (one long vector) and golden output value
1239     //   - golden inputs will be copied to actual inputs before each
1240     //     of the two executions
1241     //   - golden output will be used to fill actual outputs before each
1242     //     of the two executions
1243     // - create actual inputs and outputs
1244     // - first execution (non-partitioned)
1245     //   - initialize inputs and (to avoid unrelated oddities) outputs
1246     //   - execute
1247     //   - copy outputs to a save area (one long vector)
1248     // - second execution (partitioned)
1249     //   - (to avoid unrelated oddities) initialize inputs and outputs
1250     //   - execute
1251     //   - compare outputs to save area
1252 
1253     // If the runtime and drivers are working properly, execution
1254     // should not change the inputs.  Nonetheless, we reinitialize the
1255     // inputs for each execution, so as to avoid unrelated problems
1256     // appearing to be problems related to unpartitioned execution
1257     // versus partitioned execution.  Similarly, execution behavior
1258     // should not be dependent on the outputs; but we'll initialize the
1259     // outputs anyway.
1260     std::vector<float> goldenInputs(problemSize * problemSize * model.inputCount());
1261     std::generate(goldenInputs.begin(), goldenInputs.end(), [this] { return randFrac(); });
1262 #ifdef VERBOSE
1263     {
1264         std::cout << "flat inputs = ";
1265         dump(goldenInputs.begin(), goldenInputs.end());
1266     }
1267 #endif
1268     const float goldenOutput = randFrac();
1269 
1270     // Create the memory for the actual inputs and outputs.
1271     struct InputOutputDescriptor {
1272         enum Kind { INPUT, OUTPUT };
1273         Kind mKind;
1274 
1275         // The input or output either resides in a local buffer
1276         // (mVector, in which case mMemoryRegion is ignored); or in a
1277         // shared memory region within a TestMemories instance
1278         // (mMemoryRegion, in which case mVector is ignored).
1279         enum Location { VECTOR, REGION };
1280         Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; }
1281 
1282         std::vector<float> mVector;
1283         unsigned mMemoryRegion;
1284     };
1285     std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount());
1286     for (unsigned i = 0; i < ioDescriptors.size(); i++) {
1287         ioDescriptors[i].mKind = (i < model.inputCount() ? InputOutputDescriptor::INPUT
1288                                                          : InputOutputDescriptor::OUTPUT);
1289     }
1290     //     We randomly interleave inputs and outputs in creation
1291     //     order, because when we we create memory regions in a
1292     //     TestMemories instance, the order in which regions are
1293     //     created within a single Memory is the order they'll be laid
1294     //     out in that memory; and when we have inputs and outputs
1295     //     within the same Memory, we want the possibility that
1296     //     they'll be interleaved.
1297     std::shuffle(ioDescriptors.begin(), ioDescriptors.end(), mRandNumEng);
1298     TestMemories ioMemories;
1299     for (auto& desc : ioDescriptors) {
1300         if (randFrac() < 0.5) {
1301             desc.mVector.resize(problemSize * problemSize);
1302         } else {
1303             // TODO: common this with the way we create IK_VALUE inputs?
1304             unsigned memoryIndex = ~0U;
1305             if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) {
1306                 memoryIndex = randUInt(ioMemories.memoryCount());
1307             } else {
1308                 memoryIndex = ioMemories.addMemory();
1309             }
1310             const size_t length = problemSize * problemSize * sizeof(float);
1311             desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length);
1312         }
1313     }
1314     ioMemories.layout();
1315 
1316     // Function to set up actual inputs and outputs (initializing them
1317     // and telling the WrapperExecution about them).
1318     auto prepareForExecution = [&model, &ioDescriptors, &ioMemories, &goldenInputs, &goldenOutput,
1319                                 problemSize, &problemType](WrapperExecution* e) {
1320         uint32_t inputIndex = 0, outputIndex = 0;
1321         for (auto& desc : ioDescriptors) {
1322             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1323                 if (desc.mKind == InputOutputDescriptor::INPUT) {
1324                     const size_t inputOffset = inputIndex * problemSize * problemSize;
1325                     std::copy(goldenInputs.begin() + inputOffset,
1326                               goldenInputs.begin() + inputOffset + problemSize * problemSize,
1327                               desc.mVector.begin());
1328                     e->setInput(inputIndex++, desc.mVector.data(),
1329                                 desc.mVector.size() * sizeof(float));
1330                 } else {
1331                     std::fill(desc.mVector.begin(),
1332                               desc.mVector.begin() + problemSize * problemSize, goldenOutput);
1333                     e->setOutput(outputIndex++, desc.mVector.data(),
1334                                  desc.mVector.size() * sizeof(float), &problemType.operandType);
1335                 }
1336             } else {
1337                 const WrapperMemory* memory;
1338                 uint32_t offset, length;
1339                 float* region = static_cast<float*>(
1340                         ioMemories.getRegion(desc.mMemoryRegion, &memory, &offset, &length));
1341                 CHECK(length == problemSize * problemSize * sizeof(float));
1342                 if (desc.mKind == InputOutputDescriptor::INPUT) {
1343                     const size_t inputOffset = inputIndex * problemSize * problemSize;
1344                     std::copy(goldenInputs.begin() + inputOffset,
1345                               goldenInputs.begin() + inputOffset + problemSize * problemSize,
1346                               region);
1347                     e->setInputFromMemory(inputIndex++, memory, offset, length);
1348                 } else {
1349                     std::fill(region, region + problemSize * problemSize, goldenOutput);
1350                     e->setOutputFromMemory(outputIndex++, memory, offset, length,
1351                                            &problemType.operandType);
1352                 }
1353             }
1354         };
1355         CHECK(inputIndex == model.inputCount());
1356         CHECK(outputIndex == model.outputCount());
1357     };
1358 
1359     // Non-partitioned execution.
1360     WrapperExecution e(&c);
1361     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e));
1362     ASSERT_EQ(e.compute(computeMode), Result::NO_ERROR);
1363 
1364     // Copy the outputs of the non-partitioned execution to a save area.
1365     std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount());
1366     {
1367         uint32_t outputIndex = 0;
1368         for (const auto& desc : ioDescriptors) {
1369             if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1370                 continue;
1371             }
1372             const size_t outputOffset = outputIndex * problemSize * problemSize;
1373             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1374                 std::copy(desc.mVector.begin(), desc.mVector.end(),
1375                           nonPartitionedOutputs.begin() + outputOffset);
1376             } else {
1377                 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1378                 std::copy(region, region + problemSize * problemSize,
1379                           nonPartitionedOutputs.begin() + outputOffset);
1380             }
1381 #ifdef VERBOSE
1382             {
1383                 std::cout << "nonpartitioned output[" << outputIndex << "] = ";
1384                 dump(nonPartitionedOutputs.begin() + outputOffset,
1385                      nonPartitionedOutputs.begin() + outputOffset + problemSize * problemSize);
1386             }
1387 #endif
1388             outputIndex++;
1389         }
1390     }
1391 
1392     // Partitioned execution.
1393     WrapperExecution e2(&c2);
1394     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
1395     ASSERT_EQ(e2.compute(computeMode), Result::NO_ERROR);
1396 
1397     // Compare the outputs of the partitioned execution to the save
1398     // area containing the outpus of the non-partitioned execution.
1399     {
1400         uint32_t outputIndex = 0;
1401         for (const auto& desc : ioDescriptors) {
1402             if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1403                 continue;
1404             }
1405             SCOPED_TRACE(outputIndex);
1406             const size_t outputOffset = outputIndex * problemSize * problemSize;
1407             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1408 #ifdef VERBOSE
1409                 std::cout << "   partitioned output[" << outputIndex << "] = ";
1410                 dump(desc.mVector.begin(), desc.mVector.end());
1411 #endif
1412                 ASSERT_TRUE(std::equal(desc.mVector.begin(), desc.mVector.end(),
1413                                        nonPartitionedOutputs.begin() + outputOffset));
1414             } else {
1415                 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1416 #ifdef VERBOSE
1417                 std::cout << "part output[" << outputIndex << "] = ";
1418                 dump(region, region + problemSize * problemSize);
1419 #endif
1420                 ASSERT_TRUE(std::equal(region, region + problemSize * problemSize,
1421                                        nonPartitionedOutputs.begin() + outputOffset));
1422             }
1423             outputIndex++;
1424         }
1425     }
1426 }
1427 
1428 }  // namespace
1429 }  // namespace android
1430