• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <HalInterfaces.h>
18 #include <SampleDriver.h>
19 #include <ValidateHal.h>
20 #include <android-base/logging.h>
21 #include <android/hardware/neuralnetworks/1.0/ADevice.h>
22 #include <android/hardware/neuralnetworks/1.1/ADevice.h>
23 #include <android/hardware/neuralnetworks/1.2/ADevice.h>
24 #include <gtest/gtest.h>
25 #include <unistd.h>
26 
27 #include <algorithm>
28 #include <cassert>
29 #include <cstdio>
30 #include <iterator>
31 #include <map>
32 #include <memory>
33 #include <random>
34 #include <set>
35 #include <string>
36 #include <tuple>
37 #include <utility>
38 #include <vector>
39 
40 #include "CompilationBuilder.h"
41 #include "HalUtils.h"
42 #include "Manager.h"
43 #include "ModelBuilder.h"
44 #include "NeuralNetworks.h"
45 #include "TestNeuralNetworksWrapper.h"
46 
47 // Uncomment the following line to generate some debugging output that
48 // may be useful when analyzing failures:
49 //
50 // #define VERBOSE VERBOSE
51 
52 // Uncomment the following line to generate some debugging output that
53 // may be useful to determine test coverage for support of dynamic
54 // temporaries (http://b/132458982):
55 //
56 // #define TRACE_DYNTEMP TRACE_DYNTEMP
57 
58 // We randomly generate tests (model + input data) at runtime, and verify
59 // that we get the same results whether we do partitioned compilation/execution
60 // or non partitioned compilation/execution.  We perform a test as follows:
61 //
62 // (1) Randomly generate a model (graph and weights), randomly generate input
63 //     data, randomly assign inputs and outputs to CPU memory or to shared
64 //     memory.
65 //
66 //     Randomly leaves dimensions unset for intermediate operands.
67 //
68 // (2) Randomly generate drivers based on the sample driver, each of which
69 //     executes models on the CPU.  They differ according to which operations
70 //     they support.
71 //
72 // (3) Compile and execute without partitioning, saving off the results.
73 //
74 // (4) Compile and execute with partitioning.
75 //
76 // (5) Verify that the saved results from (3) match the results from (4).
77 //
78 // For simplicity, all data (model inputs, model outputs, weights,
79 // temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two
80 // dimensions are fixed throughout a particular test case (and
81 // randomly determined).  This prevents us from having to find a
82 // mechanism to "resize" data (e.g., if ADD#a operates on data of size
83 // 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a
84 // and ADD#b become inputs of ADD#c, do we need to insert one or more
85 // operations between (say) ADD#a and ADD#c to convert ADD#2's data
86 // from size 2x2 to size 3x3 in order to match ADD#b).  In the few
87 // cases where an operand cannot be of this type, it is a constant
88 // (e.g., activation functions and RNN bias).
89 //
90 // Each operation we generate has a signature (described in more
91 // detail later).  The randomly generated drivers decide which
92 // operations they can execute by checking operation signatures.  Once
93 // we have built the model and know the set of signatures, we randomly
94 // assign each signature to a driver.  No signature is supported by
95 // multiple drivers -- we're not testing the logic that the
96 // partitioning algorithm uses to select the best driver for an
97 // operation.
98 
99 namespace android {
100 
101 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
102 namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
103 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
104 namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
105 using CompilationBuilder = nn::CompilationBuilder;
106 using DeviceManager = nn::DeviceManager;
107 using Device = nn::Device;
108 using SharedDevice = nn::SharedDevice;
109 using ExecutionPlan = nn::ExecutionPlan;
110 using ExecutionStep = nn::ExecutionStep;
111 using HalCacheToken = nn::HalCacheToken;
112 using HalVersion = nn::HalVersion;
113 using HidlModel = V1_3::Model;
114 using LogicalStep = nn::LogicalStep;
115 using ModelBuilder = nn::ModelBuilder;
116 using Result = nn::test_wrapper::Result;
117 using SampleDriver = nn::sample_driver::SampleDriver;
118 using WrapperCompilation = nn::test_wrapper::Compilation;
119 using WrapperExecution = nn::test_wrapper::Execution;
120 using WrapperMemory = nn::test_wrapper::Memory;
121 using WrapperModel = nn::test_wrapper::Model;
122 using WrapperOperandType = nn::test_wrapper::OperandType;
123 using WrapperType = nn::test_wrapper::Type;
124 
125 namespace {
126 
127 /// Configure test size //////////////////////////////////////////////////////////
128 
129 // We may exceed this in order to connect otherwise disjoint subgraphs.
130 static const unsigned kMaxNumOperations = 100;
131 
132 // We build models to process 2-D square tensors up to this size in each dimension;
133 // note that the API promotes by-value weights larger than 128 to by-reference,
134 // so we want to ensure that we can pick both types that exceed and types that do
135 // not exceed this size.
136 static const unsigned kMaxProblemSize = 8;
137 
138 // First seed for pseudorandom test generation.
139 static const unsigned kFirstSeed = 0;
140 
141 // Number of test cases.
142 static const unsigned kNumTestCases = 225;
143 
144 // Force all graph weights into a single pool (as we recommend to users)
145 // or allow them to be distributed across multiple pools (more stress
146 // on the partitioning algorithm and the rest of the runtime)?
147 // Forcing all graph weights into a single pool may be necessary to
148 // prevent large graphs from running up against http://b/70302693
149 // "NNAPI overuses (?) fds".
150 static const bool kAllWeightsInOnePool = false;
151 
152 //////////////////////////////////////////////////////////////////////////////////
153 
154 // The signature of an operation consists of the operation type (e.g.,
155 // ADD) and the activation function (use -1 in the case of an
156 // operation type for which the activation function is inapplicable).
157 typedef std::pair<ANeuralNetworksOperationType, int> Signature;
158 
159 // This class adds some simple utilities on top of WrapperModel.  For example,
160 // it provides access to certain features from ModelBuilder that are not exposed
161 // by the base class (such as inputCount() and operation index).
162 class TestModel : public WrapperModel {
163    public:
addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)164     uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
165                           const std::vector<uint32_t>& outputs) {
166         const uint32_t operationIndex = operationCount();
167         mOperations.push_back(outputs);
168         WrapperModel::addOperation(type, inputs, outputs);
169         return operationIndex;
170     }
171 
operationCount() const172     uint32_t operationCount() const { return mOperations.size(); }
173 
inputCount() const174     uint32_t inputCount() const { return builder()->inputCount(); }
outputCount() const175     uint32_t outputCount() const { return builder()->outputCount(); }
176 
getOperationOutputs(uint32_t index) const177     const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const {
178         CHECK(index < mOperations.size());
179         return mOperations[index];
180     }
181 
182     // All values are immediately copied into the model (we need to do
183     // this ourselves in cases where the underlying NNAPI does not).
setOperandValue(uint32_t index,const std::vector<float> & value)184     void setOperandValue(uint32_t index, const std::vector<float>& value) {
185         const size_t length = value.size() * sizeof(float);
186 
187         if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
188             WrapperModel::setOperandValue(index, value.data(), length);
189         } else {
190             mOperandValues.push_back(value);
191             WrapperModel::setOperandValue(index, mOperandValues.back().data(), length);
192         }
193     }
194 
setOperandValue(uint32_t index,const std::vector<int32_t> & value)195     void setOperandValue(uint32_t index, const std::vector<int32_t>& value) {
196         const size_t length = value.size() * sizeof(int32_t);
197 
198         CHECK(length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
199         WrapperModel::setOperandValue(index, value.data(), length);
200     }
201 
setOperandValue(uint32_t index,int32_t value)202     void setOperandValue(uint32_t index, int32_t value) {
203         CHECK(sizeof(value) <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
204         WrapperModel::setOperandValue(index, &value, sizeof(value));
205     }
206 
207    private:
builder() const208     const ModelBuilder* builder() const {
209         return reinterpret_cast<const ModelBuilder*>(getHandle());
210     }
211 
212     // Representation of operations: vector index is operation number,
213     // vector value is operation's output operands.
214     std::vector<std::vector<uint32_t>> mOperations;
215 
216     // Large operand values -- not immediately copied into the
217     // WrapperModel, so remembered here instead.
218     std::vector<std::vector<float>> mOperandValues;
219 };
220 
221 // This class adds some simple utilities on top of WrapperCompilation in order
222 // to provide access to certain features from CompilationBuilder that are not
223 // exposed by the base class.
224 class TestCompilation : public WrapperCompilation {
225    public:
TestCompilation(const WrapperModel * model)226     TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {}
227 
TestCompilation(const WrapperModel * model,std::vector<std::shared_ptr<Device>> devices)228     TestCompilation(const WrapperModel* model, std::vector<std::shared_ptr<Device>> devices) {
229         ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
230         CompilationBuilder* c = nullptr;
231         int result = m->createCompilation(&c, devices);
232         EXPECT_EQ(result, 0);
233         mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
234     }
235 
236     using WrapperCompilation::finish;
237 
setPartitioning(uint32_t partitioning)238     Result setPartitioning(uint32_t partitioning) {
239         return static_cast<Result>(builder()->forTest_setPartitioning(partitioning));
240     }
241 
getExecutionPlan() const242     const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); }
243 
244    private:
builder() const245     const CompilationBuilder* builder() const {
246         return reinterpret_cast<const CompilationBuilder*>(getHandle());
247     }
builder()248     CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); }
249 };
250 
251 // This class is used to manage a collection of memory regions,
252 // disjoint windows onto a set of Memory instances, each of which is
253 // associated with a single shared memory region.  Each region and
254 // Memory instance is assigned a number.  The usage pattern is as
255 // follows:
256 // - Call addMemory() and addRegion() as many times as needed to
257 //   declare (but not define) Memory instances and declare region
258 //   instances.
259 // - Call layout() to define the Memory instances.
260 // - Call getRegion() as many times as needed to get the details
261 //   of memory regions (such as address, or Memory/offset/length).
262 // The Memory instances created by layout() are owned by the
263 // TestMemories instance, and are destroyed when the TestMemories
264 // instance is destroyed.
265 class TestMemories {
266    public:
267     TestMemories() = default;
268 
269     TestMemories(const TestMemories&) = delete;
270     TestMemories& operator=(const TestMemories&) = delete;
271 
addMemory()272     unsigned addMemory() {
273         CHECK(!mLayoutDone);
274         mMemorySizes.push_back(0);
275         return memoryCount() - 1;
276     }
memoryCount() const277     unsigned memoryCount() const { return mMemorySizes.size(); }
278 
addRegion(unsigned memoryIndex,uint32_t length)279     unsigned addRegion(unsigned memoryIndex, uint32_t length) {
280         CHECK(!mLayoutDone);
281         CHECK(memoryIndex < memoryCount());
282         uint32_t& memorySize = mMemorySizes[memoryIndex];
283         auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length);
284         mRegions.push_back(desc);
285         memorySize += length;
286         return regionCount() - 1;
287     }
regionCount() const288     unsigned regionCount() const { return mRegions.size(); }
289 
290     void layout();
291 
getRegion(unsigned regionIndex,const WrapperMemory ** pMemory,uint32_t * pOffset,uint32_t * pLength)292     void* getRegion(unsigned regionIndex, const WrapperMemory** pMemory, uint32_t* pOffset,
293                     uint32_t* pLength) {
294         CHECK(mLayoutDone);
295         CHECK(regionIndex < regionCount());
296         const auto& regionDescriptor = mRegions[regionIndex];
297         const WrapperMemory* memory = &mMemories[std::get<0>(regionDescriptor)];
298         uint32_t offset = std::get<1>(regionDescriptor);
299         uint32_t length = std::get<2>(regionDescriptor);
300 
301         uint8_t* buffer = reinterpret_cast<nn::MemoryAshmem*>(memory->get())->getPointer();
302         CHECK(buffer != nullptr);
303 
304         if (pMemory) *pMemory = memory;
305         if (pOffset) *pOffset = offset;
306         if (pLength) *pLength = length;
307 
308         return buffer + offset;
309     }
310 
getRegion(unsigned regionIndex)311     void* getRegion(unsigned regionIndex) {
312         return getRegion(regionIndex, nullptr, nullptr, nullptr);
313     }
314 
315    private:
316     // Index is the memory index; value is the size of the memory
317     // (aggregate size of all regions in the memory).
318     std::vector<uint32_t> mMemorySizes;
319 
320     // Index is the memory index.
321     std::vector<WrapperMemory> mMemories;
322 
323     // Index is the region index; tuple represents memory index,
324     // region offset within memory, region length.
325     std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions;
326 
327     // For validity checking.
328     bool mLayoutDone = false;
329 };
330 
layout()331 void TestMemories::layout() {
332     CHECK(!mLayoutDone);
333     for (uint32_t memorySize : mMemorySizes) {
334         auto [n, ashmem] = nn::MemoryAshmem::create(memorySize);
335         CHECK_EQ(n, ANEURALNETWORKS_NO_ERROR);
336         CHECK(ashmem != nullptr);
337 
338         ANeuralNetworksMemory* memory = reinterpret_cast<ANeuralNetworksMemory*>(ashmem.release());
339         mMemories.emplace_back(memory);
340     }
341     mLayoutDone = true;
342 }
343 
344 class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> {
345    public:
RandomPartitioningTest()346     RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {}
347 
348     static Signature getSignature(const HidlModel& model, const V1_3::Operation& operation);
349 
350    protected:
351     static SharedDevice makeTestDriver(HalVersion version, const char* name,
352                                        std::set<Signature> signatures);
353 
354     static HalVersion getMinHalVersion(ANeuralNetworksOperationType type);
355 
356     static std::string to_string(HalVersion version);
357 
randBool()358     bool randBool() { return randUInt(2) == 1; }
359 
randFrac()360     double randFrac() {  // [0.0, 1.0)
361         return mRandNumUnitDist(mRandNumEng);
362     }
363 
randUInt(unsigned limit)364     unsigned randUInt(unsigned limit) {  // [0, limit)
365         return unsigned(randFrac() * limit);
366     }
367 
368     // Represents an operation in which every input and output operand
369     // is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except:
370     // - One input operand may be an activation function.
371     // - Any number of input operands may be "special" in some other way
372     //   (and in this implementation, not produced by any other operation).
373     // We require that:
374     // - There be at least one input operand that is neither an
375     //    activation function nor "special".
376     struct OperationPattern {
377         HalVersion mMinHalVersion;
378         int mOperationType;
379         unsigned mNumInputs;
380         unsigned mNumOutputs;
381         int mActivationFunctionInputIndex;  // <0 if none
382 
383         // Returns operand index, or <0 if input is normal (must not
384         // be called for an activation function operand).  Function
385         // should have the following prototype:
386         //
387         //     int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex);
388         //
389         int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned);
390     };
391 
392     static const OperationPattern kOperationPatterns[];
393 
394     // See OperationPattern::mMakeSpecialInput.  This function is used to
395     // manufacture an ELU input operand that doesn't fit the general operand
396     // pattern known to the graph generator infrastructure.
makeEluSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)397     int makeEluSpecialInput([[maybe_unused]] unsigned problemSize, TestModel* model,
398                             unsigned inputIndex) {
399         if (inputIndex != 1) {
400             return -1;
401         }
402 
403         // input operand 1 is alpha, a scalar
404         const WrapperOperandType alphaType(WrapperType::FLOAT32, {});
405         return int(model->addConstantOperand(&alphaType, 1.0f));
406     }
407 
408     // See OperationPattern::mMakeSpecialInput.  This function is used to
409     // manufacture an RNN input operand that doesn't fit the general operand
410     // pattern known to the graph generator infrastructure.
makeRnnSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)411     int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) {
412         if (inputIndex != 3) {
413             return -1;
414         }
415 
416         // input operand 3 is bias, a 1-D tensor
417         const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, {problemSize});
418         const uint32_t operandIndex = model->addOperand(&biasType);
419         std::vector<float> biasValue(problemSize);
420         std::generate(biasValue.begin(), biasValue.end(), [this] { return randFrac(); });
421         model->setOperandValue(operandIndex, biasValue);
422         return int(operandIndex);
423     }
424 
425     // See OperationPattern::mMakeSpecialInput.  This function is used to
426     // manufacture a TRANSPOSE input operand that doesn't fit the general operand
427     // pattern known to the graph generator infrastructure.
makeTransposeSpecialInput(unsigned,TestModel * model,unsigned inputIndex)428     int makeTransposeSpecialInput(unsigned /* problemSize */, TestModel* model,
429                                   unsigned inputIndex) {
430         if (inputIndex != 1) {
431             return -1;
432         }
433 
434         // input operand 1 is perm, a 1-D tensor
435         const WrapperOperandType permType(WrapperType::TENSOR_INT32, {2});
436         const uint32_t operandIndex = model->addOperand(&permType);
437         std::vector<int32_t> permValue = {1, 0};
438         model->setOperandValue(operandIndex, permValue);
439         return int(operandIndex);
440     }
441 
442 #ifdef VERBOSE
443     class ModelStats {
444        public:
ModelStats(const ModelBuilder * model)445         ModelStats(const ModelBuilder* model) : mBuilder(model) {}
ModelStats(const WrapperModel * model)446         ModelStats(const WrapperModel* model)
447             : mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) {}
operator <<(std::ostream & out,const ModelStats & stats)448         friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) {
449             const uint32_t operandCount = stats.mBuilder->operandCount();
450             const uint32_t inputCount = stats.mBuilder->inputCount();
451             const uint32_t outputCount = stats.mBuilder->outputCount();
452             out << "operationCount = " << stats.mBuilder->operationCount()
453                 << ", operandCount = " << operandCount << ", inputCount = " << inputCount << " ("
454                 << (double(inputCount) / operandCount) << ")"
455                 << ", outputCount = " << outputCount << " (" << (double(outputCount) / operandCount)
456                 << ")";
457             return out;
458         }
459 
460        private:
461         const ModelBuilder* mBuilder;
462     };
463 
464     template <typename T_iterator>
dump(T_iterator I,T_iterator E)465     static void dump(T_iterator I, T_iterator E) {
466         std::cout << "{";
467         for (; I != E; I++) {
468             std::cout << " " << *I;
469         }
470         std::cout << " }" << std::endl;
471     }
472 #endif
473 
474     std::mt19937 mRandNumEng;
475 
476    private:
477     std::uniform_real_distribution<double> mRandNumUnitDist;
478 };
479 
480 const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = {
481         {HalVersion::V1_0, ANEURALNETWORKS_ADD, 3, 1, 2, nullptr},
482         {HalVersion::V1_0, ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr},
483         {HalVersion::V1_0, ANEURALNETWORKS_MUL, 3, 1, 2, nullptr},
484         {HalVersion::V1_0, ANEURALNETWORKS_RNN, 6, 2, 5,
485          &RandomPartitioningTest::makeRnnSpecialInput},
486         {HalVersion::V1_0, ANEURALNETWORKS_TANH, 1, 1, -1, nullptr},
487 
488         {HalVersion::V1_1, ANEURALNETWORKS_SUB, 3, 1, 2, nullptr},
489         {HalVersion::V1_1, ANEURALNETWORKS_TRANSPOSE, 2, 1, -1,
490          &RandomPartitioningTest::makeTransposeSpecialInput},
491 
492         {HalVersion::V1_2, ANEURALNETWORKS_MAXIMUM, 2, 1, -1, nullptr},
493         {HalVersion::V1_2, ANEURALNETWORKS_NEG, 1, 1, -1, nullptr},
494         {HalVersion::V1_2, ANEURALNETWORKS_SIN, 1, 1, -1, nullptr},
495 
496         {HalVersion::V1_3, ANEURALNETWORKS_ELU, 2, 1, -1,
497          &RandomPartitioningTest::makeEluSpecialInput},
498         {HalVersion::V1_3, ANEURALNETWORKS_HARD_SWISH, 1, 1, -1, nullptr},
499 };
500 
getMinHalVersion(ANeuralNetworksOperationType type)501 HalVersion RandomPartitioningTest::getMinHalVersion(ANeuralNetworksOperationType type) {
502     static const auto kOperationToVersion = [] {
503         std::map<ANeuralNetworksOperationType, HalVersion> result;
504         for (const auto& pattern : kOperationPatterns) {
505             result[pattern.mOperationType] = pattern.mMinHalVersion;
506         }
507         return result;
508     }();
509 
510     return kOperationToVersion.at(type);
511 }
512 
getSignature(const HidlModel & model,const V1_3::Operation & operation)513 Signature RandomPartitioningTest::getSignature(const HidlModel& model,
514                                                const V1_3::Operation& operation) {
515     static const auto kOperationToActivation = [] {
516         std::map<ANeuralNetworksOperationType, int> result;
517         for (const auto& pattern : kOperationPatterns) {
518             result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex;
519         }
520         return result;
521     }();
522 
523     const ANeuralNetworksOperationType operationType =
524             static_cast<ANeuralNetworksOperationType>(operation.type);
525     const int activationFunctionInputIndex = kOperationToActivation.at(operationType);
526     if (activationFunctionInputIndex < 0) {
527         return Signature(operationType, -1);
528     }
529 
530     const V1_3::Operand& operand =
531             model.main.operands[operation.inputs[activationFunctionInputIndex]];
532     CHECK(operand.lifetime == V1_3::OperandLifeTime::CONSTANT_COPY);
533     CHECK(operand.type == V1_3::OperandType::INT32);
534     int32_t value;
535     memcpy(&value, &model.operandValues[operand.location.offset], operand.location.length);
536     return Signature(operationType, value);
537 }
538 
to_string(HalVersion version)539 std::string RandomPartitioningTest::to_string(HalVersion version) {
540     switch (version) {
541         case HalVersion::V1_0:
542             return "V1_0";
543         case HalVersion::V1_1:
544             return "V1_1";
545         case HalVersion::V1_2:
546             return "V1_2";
547         case HalVersion::V1_3:
548             return "V1_3";
549         default:
550             return "V_UNKNOWN";
551     }
552 };
553 
554 class TestDriver : public SampleDriver {
555    public:
556     // Behaves like SampleDriver, except that it only supports
557     // operations with the specified signatures.
TestDriver(const char * name,std::set<Signature> signatures)558     TestDriver(const char* name, std::set<Signature> signatures)
559         : SampleDriver(name), mSignatures(std::move(signatures)) {}
560 
getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb)561     hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override {
562         android::nn::initVLogMask();
563         const V1_0::PerformanceInfo kPerf = {.execTime = 0.75f, .powerUsage = 0.75f};
564         V1_3::Capabilities capabilities = {
565                 .relaxedFloat32toFloat16PerformanceScalar = kPerf,
566                 .relaxedFloat32toFloat16PerformanceTensor = kPerf,
567                 .operandPerformance = nn::nonExtensionOperandPerformance<HalVersion::V1_3>(kPerf),
568                 .ifPerformance = kPerf,
569                 .whilePerformance = kPerf};
570         _hidl_cb(V1_3::ErrorStatus::NONE, capabilities);
571         return hardware::Void();
572     }
573 
getSupportedOperations_1_3(const HidlModel & model,getSupportedOperations_1_3_cb cb)574     hardware::Return<void> getSupportedOperations_1_3(const HidlModel& model,
575                                                       getSupportedOperations_1_3_cb cb) override {
576         if (nn::validateModel(model)) {
577             const size_t count = model.main.operations.size();
578             std::vector<bool> supported(count);
579             for (size_t i = 0; i < count; i++) {
580                 supported[i] = (mSignatures.count(RandomPartitioningTest::getSignature(
581                                         model, model.main.operations[i])) != 0);
582             }
583             cb(V1_3::ErrorStatus::NONE, supported);
584         } else {
585             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {});
586         }
587         return hardware::Void();
588     }
589 
prepareModel_1_3(const HidlModel & model,V1_1::ExecutionPreference preference,V1_3::Priority priority,const V1_3::OptionalTimePoint & deadline,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_3::IPreparedModelCallback> & callback)590     hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
591             const HidlModel& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
592             const V1_3::OptionalTimePoint& deadline,
593             const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
594             const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
595             const sp<V1_3::IPreparedModelCallback>& callback) override {
596         // NOTE: We verify that all operations in the model are supported.
597         V1_3::ErrorStatus outStatus = V1_3::ErrorStatus::INVALID_ARGUMENT;
598         auto ret = getSupportedOperations_1_3(
599                 model, [&outStatus](V1_3::ErrorStatus inStatus,
600                                     const hardware::hidl_vec<bool>& supportedOperations) {
601                     if (inStatus == V1_3::ErrorStatus::NONE) {
602                         if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
603                                         [](bool v) { return v; })) {
604                             outStatus = V1_3::ErrorStatus::NONE;
605                         }
606                     }
607                 });
608         if (ret.isOk() && (outStatus == V1_3::ErrorStatus::NONE)) {
609             return SampleDriver::prepareModel_1_3(model, preference, priority, deadline, modelCache,
610                                                   dataCache, token, callback);
611         } else {
612             callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
613             return V1_3::ErrorStatus::INVALID_ARGUMENT;
614         }
615     }
616 
617    private:
618     const std::set<Signature> mSignatures;
619 };
620 
621 class TestDriverV1_2 : public V1_2::ADevice {
622    public:
TestDriverV1_2(const char * name,std::set<Signature> signatures)623     TestDriverV1_2(const char* name, std::set<Signature> signatures)
624         : V1_2::ADevice(new TestDriver(name, std::move(signatures))) {}
625 };
626 
627 class TestDriverV1_1 : public V1_1::ADevice {
628    public:
TestDriverV1_1(const char * name,std::set<Signature> signatures)629     TestDriverV1_1(const char* name, std::set<Signature> signatures)
630         : V1_1::ADevice(new TestDriver(name, std::move(signatures))) {}
631 };
632 
633 class TestDriverV1_0 : public V1_0::ADevice {
634    public:
TestDriverV1_0(const char * name,std::set<Signature> signatures)635     TestDriverV1_0(const char* name, std::set<Signature> signatures)
636         : V1_0::ADevice(new TestDriver(name, std::move(signatures))) {}
637 };
638 
makeTestDriver(HalVersion version,const char * name,std::set<Signature> signatures)639 SharedDevice RandomPartitioningTest::makeTestDriver(HalVersion version, const char* name,
640                                                     std::set<Signature> signatures) {
641     switch (version) {
642         case HalVersion::V1_0:
643             return nn::makeSharedDevice(name, new TestDriverV1_0(name, std::move(signatures)));
644         case HalVersion::V1_1:
645             return nn::makeSharedDevice(name, new TestDriverV1_1(name, std::move(signatures)));
646         case HalVersion::V1_2:
647             return nn::makeSharedDevice(name, new TestDriverV1_2(name, std::move(signatures)));
648         case HalVersion::V1_3:
649             return nn::makeSharedDevice(name, new TestDriver(name, std::move(signatures)));
650         default:
651             ADD_FAILURE() << "Unexpected HalVersion " << static_cast<int32_t>(version);
652             return nullptr;
653     }
654 }
655 
656 INSTANTIATE_TEST_SUITE_P(Seed, RandomPartitioningTest,
657                          ::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases));
658 
TEST_P(RandomPartitioningTest,Test)659 TEST_P(RandomPartitioningTest, Test) {
660     LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam();
661 
662 #ifdef VERBOSE
663     std::cout << std::setprecision(2) << std::fixed << std::setw(4);
664 #endif
665 
666     const unsigned problemSize = 1 + randUInt(kMaxProblemSize);
667     const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, {problemSize, problemSize});
668     const WrapperOperandType unknownDimensionsTypes[] = {
669             {WrapperType::TENSOR_FLOAT32, {}},
670             {WrapperType::TENSOR_FLOAT32, {0, 0}},
671             {WrapperType::TENSOR_FLOAT32, {0, problemSize}},
672             {WrapperType::TENSOR_FLOAT32, {problemSize, 0}},
673     };
674     const unsigned kUnknownDimensionsTypesCount =
675             sizeof(unknownDimensionsTypes) / sizeof(unknownDimensionsTypes[0]);
676 
677     static const WrapperOperandType activationFunctionType(WrapperType::INT32, {});
678 
679     const unsigned numOperations = 2 + randUInt(kMaxNumOperations - 1);
680     const bool allowDeadOperations = (randFrac() < 0.2);
681     const bool allowUnknownDimensions = (randFrac() < 0.25);
682 
683     // TODO: The current algorithm builds the graph in a forward
684     // direction (i.e., later-generated operations consume outputs
685     // from earlier-generated operations).  In order to get more
686     // variation in graph topology, perhaps we should also create an
687     // algorithm to build the graph in a backward direction (i.e.,
688     // later-generated operations produce outputs to be consumed by
689     // earlier-generated operations).
690     [[maybe_unused]] const bool buildForward = randBool();
691 
692     // TODO: Add a form of forced connectivity that operates by
693     // joining disjoint subgraphs rather than by forcing a root.
694     const bool forceCommonRoot = (randFrac() < 0.75);
695 
696     auto computeMode = WrapperExecution::getComputeMode();
697     // We check randFrac() independent of compute mode, because we don't want
698     // the random number sequence to change depending on compute mode: Compute
699     // mode should only affect how we perform the inference, not how we build the
700     // Model, the Compilation, or the Execution.
701     if (randFrac() < 0.5 && computeMode == WrapperExecution::ComputeMode::ASYNC) {
702         computeMode = WrapperExecution::ComputeMode::FENCED;
703     }
704 
705     TestModel model;
706     std::vector<uint32_t> modelInputs;
707     std::vector<uint32_t> modelOutputs;
708 
709     std::set<uint32_t> operandsWithUnknownDimensions;
710 
711     // Each region in weights is a problem-sized 2-D TENSOR_FLOAT32.
712     TestMemories weights;
713 
714     // Keep track of all normal (i.e., not activation function and not
715     // "special") operands that are values (from setOperandValue*()).
716     // .first: operand index
717     // .second: if the operand is already defined (via setOperandValue*()) then ~0U;
718     //          otherwise, the operand has yet to be defined, and this is the corresponding
719     //          region index in "weights"
720     std::vector<std::pair<uint32_t, unsigned>> valueOperands;
721 
722     // An operand is "dead" if it is not consumed by another operation
723     // and is not a model output.  Key is operand index; value is
724     // operation index.
725     std::map<uint32_t, uint32_t> deadOperands;
726 
727     // An operation is "dead" if all of its outputs are dead.
728     std::set<uint32_t> deadOperations;
729 
730     // Collect the signatures of operations in this model.
731     std::set<Signature> signatures;
732 
733     // For reporting purposes, keep track of the number of root
734     // operations (those that do not consume results produced by other
735     // operations).
736     unsigned rootOperationCount = 0;
737 
738     // Track whether we added operands with unknown dimensions. In this case,
739     // partitioned compilation will fail if such an operand is read in a
740     // different partition than it is written, and the partition that does the
741     // writing is scheduled on a pre-HAL 1.2 (pre-Android Q) device.
742     bool hasUnknownDimensions = false;
743 
744     // Generate operations.
745     for (unsigned i = 0; i < numOperations; i++) {
746         const unsigned operationPatternIndex = randUInt(std::size(kOperationPatterns));
747         const auto& operationPattern = kOperationPatterns[operationPatternIndex];
748 
749         // INPUTS //////////////////////////////////////////////////////////////////////////////////
750 
751         std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U);
752 
753         // First, process activation function and special inputs, and
754         // keep track of which inputs remain.
755         std::vector<uint32_t> normalOperationInputIndexes;
756         int32_t activationFunction = -1;
757         for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs;
758              operationInputIndex++) {
759             if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) {
760                 const uint32_t operandIndex = model.addOperand(&activationFunctionType);
761                 activationFunction = randUInt(4);
762                 if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) {
763                     // workaround for http://b/69011131
764                     activationFunction = ANEURALNETWORKS_FUSED_NONE;
765                 }
766                 model.setOperandValue(operandIndex, activationFunction);
767                 operationInputs[operationInputIndex] = operandIndex;
768                 continue;
769             }
770             if (operationPattern.mMakeSpecialInput != nullptr) {
771                 const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))(
772                         problemSize, &model, operationInputIndex);
773                 if (operandIndex >= 0) {
774                     operationInputs[operationInputIndex] = operandIndex;
775                     continue;
776                 }
777             }
778             normalOperationInputIndexes.push_back(operationInputIndex);
779         }
780         CHECK(!normalOperationInputIndexes.empty());
781         signatures.insert(Signature(operationPattern.mOperationType, activationFunction));
782 
783         // A (normal) operation input can be one of:
784         // - a new or existing model input
785         // - an output of an existing operation
786         // - an OperandValue
787         // - an OperandValueFromMemory
788         // Some guidelines:
789         // - We generally don't want all of an operation's inputs to be values (constants)
790         const unsigned normalOperationInputCount = normalOperationInputIndexes.size();
791         //     How many of this operation's inputs are constants?
792         unsigned normalOperationInputConstantCount = 0;
793         //     How many of this operation's inputs are model inputs?
794         unsigned normalOperationInputModelInputCount = 0;
795         // We begin by deciding what kind of input each (normal) operation will be; we don't
796         // actually pick input operand indexes at this time, because we might override this
797         // decision later.
798         enum InputKind { IK_SUBGRAPH_INPUT, IK_OPERATION_OUTPUT, IK_VALUE };
799         std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount);
800         std::generate(
801                 normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
802                 [this, &model, numOperations, normalOperationInputCount,
803                  &normalOperationInputConstantCount,
804                  &normalOperationInputModelInputCount]() -> InputKind {
805                     // Constant?  Becomes less likely the more
806                     // constants we already have as inputs to
807                     // this operation.
808                     if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) /
809                                                         normalOperationInputCount)) {
810                         normalOperationInputConstantCount++;
811                         return IK_VALUE;
812                     }
813 
814                     // Model input?  Becomes less likely the
815                     // more model inputs we already have as
816                     // inputs to this operation, and the further
817                     // along we are in generating this model
818                     // (i.e., the more operations we have
819                     // generated).
820                     if ((model.operationCount() == 0) ||
821                         (randFrac() < 0.5 *
822                                               (1 - double(normalOperationInputModelInputCount) /
823                                                            normalOperationInputCount) *
824                                               std::min(0.3, (1 - double(model.operationCount()) /
825                                                                          numOperations)))) {
826                         normalOperationInputModelInputCount++;
827                         return IK_SUBGRAPH_INPUT;
828                     }
829 
830                     // Else output of an existing operation.
831                     return IK_OPERATION_OUTPUT;
832                 });
833 
834         // Now force common root or model input, if necessary.  (A
835         // model must have at least one input.)
836         auto force = [this, &normalOperationInputKinds,
837                       normalOperationInputCount](InputKind forceKind) {
838             if (std::none_of(normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
839                              [forceKind](InputKind kind) { return kind == forceKind; })) {
840                 normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind;
841             }
842         };
843         if (forceCommonRoot && (model.operationCount() != 0)) {
844             force(IK_OPERATION_OUTPUT);
845         }
846         if (modelInputs.empty()) {
847             CHECK(model.operationCount() == 0);
848             force(IK_SUBGRAPH_INPUT);
849         }
850 
851         // Finally create the normal inputs.
852         bool isRootOperation = true;
853         for (unsigned i = 0; i < normalOperationInputCount; i++) {
854             uint32_t operandIndex = ~0U;
855             switch (normalOperationInputKinds[i]) {
856                 case IK_SUBGRAPH_INPUT: {
857                     if (!modelInputs.empty() && (randFrac() < 0.5)) {
858                         operandIndex = modelInputs[randUInt(modelInputs.size())];
859                     } else {
860                         operandIndex = model.addOperand(&problemType);
861                         modelInputs.push_back(operandIndex);
862                     }
863                     break;
864                 }
865                 case IK_OPERATION_OUTPUT: {
866                     decltype(deadOperands.begin()) deadOperandI;
867                     if (!deadOperands.empty() && (randFrac() < 0.5)) {
868                         deadOperandI = deadOperands.begin();
869                         std::advance(deadOperandI, randUInt(deadOperands.size()));
870                         operandIndex = deadOperandI->first;
871                     } else {
872                         const uint32_t existingOperationIndex = randUInt(model.operationCount());
873                         const auto& existingOperationOutputs =
874                                 model.getOperationOutputs(existingOperationIndex);
875                         operandIndex =
876                                 existingOperationOutputs[randUInt(existingOperationOutputs.size())];
877                         deadOperandI = deadOperands.find(operandIndex);
878                         CHECK(deadOperandI == deadOperands.end() ||
879                               deadOperandI->second == existingOperationIndex);
880                     }
881                     if (deadOperandI != deadOperands.end()) {
882                         const uint32_t correspondingOperation = deadOperandI->second;
883                         deadOperands.erase(deadOperandI);
884 
885                         auto deadOperationI = deadOperations.find(correspondingOperation);
886                         if (deadOperationI != deadOperations.end()) {
887                             deadOperations.erase(deadOperationI);
888                         }
889                     }
890                     isRootOperation = false;
891                     break;
892                 }
893                 case IK_VALUE: {
894                     if (!valueOperands.empty() && (randFrac() < 0.25)) {
895                         operandIndex = valueOperands[randUInt(valueOperands.size())].first;
896                     } else {
897                         operandIndex = model.addOperand(&problemType);
898                         if (randFrac() < 0.5) {
899                             std::vector<float> value(problemSize * problemSize);
900                             std::generate(value.begin(), value.end(),
901                                           [this] { return randFrac(); });
902                             model.setOperandValue(operandIndex, value);
903                             valueOperands.push_back(std::make_pair(operandIndex, ~0U));
904                         } else {
905                             unsigned memoryIndex = ~0U;
906                             if ((weights.memoryCount() != 0) &&
907                                 (kAllWeightsInOnePool || (randFrac() < 0.5))) {
908                                 memoryIndex = randUInt(weights.memoryCount());
909                             } else {
910                                 memoryIndex = weights.addMemory();
911                             }
912                             const size_t length = problemSize * problemSize * sizeof(float);
913                             const unsigned regionIndex = weights.addRegion(memoryIndex, length);
914                             valueOperands.push_back(std::make_pair(operandIndex, regionIndex));
915                         }
916                     }
917                     break;
918                 }
919                 default:
920                     FAIL();
921             }
922             operationInputs[normalOperationInputIndexes[i]] = operandIndex;
923         }
924         if (isRootOperation) {
925             rootOperationCount++;
926         }
927 
928         // OUTPUTS /////////////////////////////////////////////////////////////////////////////////
929 
930         std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
931         std::generate(
932                 operationOutputs.begin(), operationOutputs.end(),
933                 [&operandsWithUnknownDimensions, &model, &problemType, &unknownDimensionsTypes,
934                  &hasUnknownDimensions, allowUnknownDimensions, this] {
935                     // Before the fix for http://b/132458982, 3% unknowns causes
936                     // ~35% of partitionings to fail.  After the fix, 3%
937                     // unknowns causes ~3% of partitionings to fail.  (This is
938                     // determined by removing the fallback code and noting the
939                     // number of failures.)
940                     if (allowUnknownDimensions && randFrac() < 0.03) {
941                         hasUnknownDimensions = true;
942                         uint32_t opndIdx = model.addOperand(
943                                 &unknownDimensionsTypes[randUInt(kUnknownDimensionsTypesCount)]);
944                         operandsWithUnknownDimensions.insert(opndIdx);
945                         return opndIdx;
946                     } else {
947                         return model.addOperand(&problemType);
948                     }
949                 });
950 
951         // OPERATION ///////////////////////////////////////////////////////////////////////////////
952 
953         const uint32_t operationIndex = model.addOperation(operationPattern.mOperationType,
954                                                            operationInputs, operationOutputs);
955         deadOperations.insert(operationIndex);
956         std::for_each(operationOutputs.begin(), operationOutputs.end(),
957                       [&deadOperands, operationIndex](uint32_t operandIndex) {
958                           deadOperands.insert(std::make_pair(operandIndex, operationIndex));
959                       });
960     }
961 
962     // Now finalize the weights.
963     weights.layout();
964     for (const auto& valueOperand : valueOperands) {
965         const uint32_t operandIndex = valueOperand.first;
966         const unsigned regionIndex = valueOperand.second;
967 
968         if (regionIndex == ~0U) {
969             continue;
970         }
971 
972         const WrapperMemory* memory;
973         uint32_t offset, length;
974         float* region =
975                 static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length));
976         CHECK(length == problemSize * problemSize * sizeof(float));
977         std::generate(region, region + problemSize * problemSize, [this] { return randFrac(); });
978         model.setOperandValueFromMemory(operandIndex, memory, offset, length);
979     }
980 
981     // Now select model outputs.
982     for (uint32_t operationIdx = 0, operationCount = model.operationCount();
983          operationIdx < operationCount; operationIdx++) {
984         const auto& outputs = model.getOperationOutputs(operationIdx);
985         for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount;
986              outputIdx++) {
987             bool modelOutput = false;
988             const uint32_t operandIndex = outputs[outputIdx];
989             const auto deadOperandI = deadOperands.find(operandIndex);
990             if (deadOperandI != deadOperands.end()) {
991                 // This is not consumed within the model, so unless we
992                 // make it an output of the model, it's dead.  The
993                 // further along we are in generating this model
994                 // (i.e., the more operations we have generated), the
995                 // more likely we are to classify this operation
996                 // output as a model output.
997                 const double probabilityOfModelOutput =
998                         0.50 * [](double x) { return x * x; }((operationIdx + 1) / operationCount);
999                 modelOutput = (randFrac() < probabilityOfModelOutput);
1000             } else {
1001                 // This is consumed within the model, so we'll rarely
1002                 // make it an output of the model.
1003                 modelOutput = (randFrac() < 0.05);
1004             }
1005             if (!modelOutput) {
1006                 continue;
1007             }
1008             modelOutputs.push_back(operandIndex);
1009             if (deadOperandI != deadOperands.end()) {
1010                 deadOperands.erase(deadOperandI);
1011                 const auto deadOperationI = deadOperations.find(operationIdx);
1012                 if (deadOperationI != deadOperations.end()) {
1013                     deadOperations.erase(deadOperationI);
1014                 }
1015             }
1016         }
1017     }
1018     if (!allowDeadOperations) {
1019         // For each dead operation, pick a random output to become a model output.
1020         for (uint32_t deadOperationIndex : deadOperations) {
1021             const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex);
1022             const uint32_t deadOperandIndex =
1023                     deadOperationOutputs[randUInt(deadOperationOutputs.size())];
1024             modelOutputs.push_back(deadOperandIndex);
1025         }
1026     }
1027     // A model must have at least one output.
1028     if (modelOutputs.empty()) {
1029         const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount()));
1030         modelOutputs.push_back(outputs[randUInt(outputs.size())]);
1031     }
1032     if (computeMode == WrapperExecution::ComputeMode::FENCED) {
1033         if (std::any_of(modelOutputs.begin(), modelOutputs.end(),
1034                         [&operandsWithUnknownDimensions](uint32_t opndIdx) {
1035                             return operandsWithUnknownDimensions.count(opndIdx) != 0;
1036                         })) {
1037             // Workaround for http://b/162980246: Fenced execution is documented
1038             // as requiring model outputs to have fully specified dimensions,
1039             // either from Model or from Execution, but its implementation
1040             // requires this to come from Model.  This test only guarantees that
1041             // they have fully specified dimensions from Execution.  So in the
1042             // case of a Model where some output does not have fully specified
1043             // dimensions, perform asynchronous execution instead.
1044             computeMode = WrapperExecution::ComputeMode::ASYNC;
1045         }
1046     }
1047 
1048     model.identifyInputsAndOutputs(modelInputs, modelOutputs);
1049 #ifdef VERBOSE
1050     {
1051         std::cout << "Original model: " << ModelStats(&model) << std::endl;
1052         std::cout << "rootOperationCount = " << rootOperationCount << ", deadOperations = ";
1053         if (allowDeadOperations) {
1054             std::cout << deadOperations.size();
1055         } else {
1056             std::cout << "forbidden (converted " << deadOperations.size() << ")";
1057         }
1058         std::cout << std::endl;
1059     }
1060 #endif
1061     ASSERT_EQ(model.finish(), Result::NO_ERROR);
1062 
1063     // Non-partitioned compilation.
1064     TestCompilation c(&model);
1065     ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1066     ASSERT_EQ(c.finish(), Result::NO_ERROR);
1067 
1068     // Create some drivers for partitioned compilation.
1069     CHECK(!signatures.empty());
1070     std::vector<std::set<Signature>> signaturesForDriver(signatures.size());
1071     //     First assign each signature to a random driver (a driver is
1072     //     just represented as an entry in the signaturesForDriver
1073     //     vector).
1074     for (Signature signature : signatures) {
1075         signaturesForDriver[randUInt(signatures.size())].insert(signature);
1076     }
1077     //     Now remove each entry that has no signatures.
1078     auto firstExtra =
1079             std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(),
1080                            [](const std::set<Signature>& sigSet) { return sigSet.empty(); });
1081     if (firstExtra != signaturesForDriver.end()) {
1082         signaturesForDriver.erase(firstExtra, signaturesForDriver.end());
1083     }
1084     //     Now actually create the drivers.
1085     std::vector<std::shared_ptr<Device>> devices;
1086     for (unsigned i = 0; i < signaturesForDriver.size(); i++) {
1087         const auto& signaturesForThisDriver = signaturesForDriver[i];
1088         // Minimum HAL version for this driver is highest minimum HAL version of
1089         // any operation supported by this driver.
1090         const HalVersion minHalVersion = getMinHalVersion(
1091                 std::max_element(signaturesForThisDriver.begin(), signaturesForThisDriver.end(),
1092                                  [](const Signature& a, const Signature& b) {
1093                                      return getMinHalVersion(a.first) < getMinHalVersion(b.first);
1094                                  })
1095                         ->first);
1096         const HalVersion actualHalVersion =
1097                 static_cast<HalVersion>(static_cast<int32_t>(minHalVersion) +
1098                                         randUInt(static_cast<int32_t>(HalVersion::LATEST) -
1099                                                  static_cast<int32_t>(minHalVersion) + 1));
1100         const std::string name =
1101                 "TestDriver(" + std::to_string(i) + "){" + to_string(actualHalVersion) + "}";
1102 #ifdef VERBOSE
1103         std::cout << "Creating " + name + " for collection of signatures that requires HAL " +
1104                              to_string(minHalVersion)
1105                   << std::endl;
1106 #endif
1107         auto device = DeviceManager::forTest_makeDriverDevice(
1108                 makeTestDriver(actualHalVersion, name.c_str(), signaturesForThisDriver));
1109         devices.push_back(device);
1110     }
1111     // CPU fallback device
1112     devices.push_back(DeviceManager::getCpuDevice());
1113 
1114     // Partitioned compilation.
1115     //
1116     // If a test case has both (a) unknown intermediate operand sizes and
1117     // (b) partitions scheduled on pre-HAL 1.2 (pre-Android Q) devices, fallback
1118     // is needed if the non-fallback partitioning fails.
1119     //
1120     // The issue is that prior to HAL 1.2, an output operand must have a known
1121     // size provided either in the Model or in the Request; and in the case of
1122     // partitioning, an intermediate operand of the original model that becomes
1123     // an output operand of a partition won't have a known size provided in the
1124     // Request.
1125     //
1126     // If a test case has a step model with no inputs or no outputs, fallback is needed.
1127     // This is because our HAL specification requires a model to have at least one
1128     // input and one output.
1129     //
1130     // If a fallback is needed, we retry the compilation with a fallback and require
1131     // the fallback to succeed. Otherwise, we require the partitioning to succeed
1132     // without CPU fallback.
1133     TestCompilation cNoFallback(&model, devices);
1134     TestCompilation cWithFallback(&model, devices);
1135     ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1136               Result::NO_ERROR);
1137     auto compilationResult = cNoFallback.finish();
1138     const bool fallbackNeededForDynamicTemporaries =
1139             compilationResult == Result::OP_FAILED && hasUnknownDimensions &&
1140             cNoFallback.getExecutionPlan().hasDynamicTemporaries() &&
1141             std::any_of(devices.begin(), devices.end(), [](const std::shared_ptr<Device>& device) {
1142                 return device->getFeatureLevel() < nn::kHalVersionV1_2ToApi.featureLevel;
1143             });
1144     const bool fallbackNeededForStepModelWithNoInputsOrNoOutputs =
1145             cNoFallback.getExecutionPlan().forTest_hasStepModelWithNoInputsOrNoOutputs();
1146     const bool fallbackNeeded = fallbackNeededForDynamicTemporaries ||
1147                                 fallbackNeededForStepModelWithNoInputsOrNoOutputs;
1148     if (fallbackNeeded) {
1149         ASSERT_EQ(compilationResult, Result::OP_FAILED);
1150 
1151         ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1152                   Result::NO_ERROR);
1153         compilationResult = cWithFallback.finish();
1154         ASSERT_EQ(compilationResult, Result::NO_ERROR);
1155         ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1156         ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1157                   DeviceManager::getCpuDevice());
1158     } else {
1159         ASSERT_EQ(compilationResult, Result::NO_ERROR);
1160 
1161         const ExecutionPlan& plan = cNoFallback.getExecutionPlan();
1162         if (signaturesForDriver.size() == 1) {
1163             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1164             ASSERT_TRUE(plan.forTest_simpleGetDevice() != DeviceManager::getCpuDevice());
1165         } else {
1166             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1167             auto stepToDeviceId = [](const std::shared_ptr<LogicalStep>& step) {
1168                 return step->executionStep()->getDevice();
1169             };
1170             std::set<decltype(stepToDeviceId(plan.forTest_compoundGetSteps()[0]))> deviceSet;
1171             for (const auto& step : plan.forTest_compoundGetSteps()) {
1172                 deviceSet.insert(stepToDeviceId(step));
1173             }
1174             // TODO(b/178517567): Figure out why we sometimes have 1 more
1175             // signature than we have devices -- this means that we've scheduled
1176             // one or more operations onto the CPU fallback device, which is not
1177             // something we ever expect to do.
1178             ASSERT_TRUE(deviceSet.size() == signaturesForDriver.size() ||
1179                         deviceSet.size() == signaturesForDriver.size() + 1);
1180         }
1181     }
1182     TestCompilation& c2 = (fallbackNeeded ? cWithFallback : cNoFallback);
1183 #ifdef TRACE_DYNTEMP
1184     {
1185         const ExecutionPlan& plan = c2.getExecutionPlan();
1186         const size_t dynamicTemporaryCount = plan.forTest_flatGetDynamicTemporaries().size();
1187         std::cout << "TRACE_DYNTEMP: dynamic temporary count = " << dynamicTemporaryCount
1188                   << std::endl;
1189         if (plan.forTest_getKind() == ExecutionPlan::Kind::COMPOUND) {
1190             size_t stepsWithModelOutputsThatAreDownstreamInputs = 0;
1191             size_t countOfModelOutputsThatAreDownstreamInputs = 0;
1192             for (const auto& step : plan.forTest_compoundGetSteps()) {
1193                 if (const size_t count = step->executionStep()
1194                                                  ->getModelOutputsThatAreDownstreamInputs()
1195                                                  .size()) {
1196                     ++stepsWithModelOutputsThatAreDownstreamInputs;
1197                     countOfModelOutputsThatAreDownstreamInputs += count;
1198                 }
1199             }
1200             if (countOfModelOutputsThatAreDownstreamInputs != 0) {
1201                 std::cout << "TRACE_DYNTEMP: model outputs that are downstream inputs: "
1202                           << countOfModelOutputsThatAreDownstreamInputs << " / "
1203                           << modelOutputs.size() << ", over "
1204                           << stepsWithModelOutputsThatAreDownstreamInputs << " / "
1205                           << plan.forTest_compoundGetSteps().size() << " steps" << std::endl;
1206                 EXPECT_LE(countOfModelOutputsThatAreDownstreamInputs, modelOutputs.size());
1207             }
1208         } else {
1209             EXPECT_EQ(dynamicTemporaryCount, size_t(0))
1210                     << "Only COMPOUND plan should have dynamic temporaries";
1211         }
1212     }
1213 #endif
1214 
1215 #ifdef VERBOSE
1216     {
1217         std::cout << "signatures = " << signatures.size() << ", devices = " << devices.size()
1218                   << std::endl;
1219         // TODO: When dumping steps, include non-ExecutionSteps.
1220         const ExecutionPlan& plan = c2.getExecutionPlan();
1221         switch (plan.forTest_getKind()) {
1222             case ExecutionPlan::Kind::SIMPLE:
1223                 std::cout << "plan: simple" << std::endl;
1224                 break;
1225             case ExecutionPlan::Kind::COMPOUND: {
1226                 const auto& steps = plan.forTest_compoundGetSteps();
1227                 std::set<const Device*> devicesInPlan;
1228                 for (const auto& step : steps) {
1229                     if (const auto* executionStep = step->tryExecutionStep()) {
1230                         devicesInPlan.insert(executionStep->getDevice().get());
1231                     }
1232                 }
1233                 std::cout << "plan: compound, " << steps.size() << " steps over "
1234                           << devicesInPlan.size() << " devices" << std::endl;
1235                 for (unsigned i = 0; i < steps.size(); i++) {
1236                     if (const auto executionStep = steps[i]->tryExecutionStep()) {
1237                         std::cout << "Step " << i << ": "
1238                                   << ModelStats(executionStep->getStepModel())
1239                                   << ", device = " << executionStep->getDevice()->getName()
1240                                   << std::endl;
1241                     }
1242                 }
1243                 break;
1244             }
1245             default:
1246                 std::cout << "Unexpected plan kind: "
1247                           << static_cast<unsigned>(plan.forTest_getKind());
1248                 break;
1249         }
1250     }
1251 #endif
1252 
1253     // For execution:
1254     // - create golden inputs (one long vector) and golden output value
1255     //   - golden inputs will be copied to actual inputs before each
1256     //     of the two executions
1257     //   - golden output will be used to fill actual outputs before each
1258     //     of the two executions
1259     // - create actual inputs and outputs
1260     // - first execution (non-partitioned)
1261     //   - initialize inputs and (to avoid unrelated oddities) outputs
1262     //   - execute
1263     //   - copy outputs to a save area (one long vector)
1264     // - second execution (partitioned)
1265     //   - (to avoid unrelated oddities) initialize inputs and outputs
1266     //   - execute
1267     //   - compare outputs to save area
1268 
1269     // If the runtime and drivers are working properly, execution
1270     // should not change the inputs.  Nonetheless, we reinitialize the
1271     // inputs for each execution, so as to avoid unrelated problems
1272     // appearing to be problems related to unpartitioned execution
1273     // versus partitioned execution.  Similarly, execution behavior
1274     // should not be dependent on the outputs; but we'll initialize the
1275     // outputs anyway.
1276     std::vector<float> goldenInputs(problemSize * problemSize * model.inputCount());
1277     std::generate(goldenInputs.begin(), goldenInputs.end(), [this] { return randFrac(); });
1278 #ifdef VERBOSE
1279     {
1280         std::cout << "flat inputs = ";
1281         dump(goldenInputs.begin(), goldenInputs.end());
1282     }
1283 #endif
1284     const float goldenOutput = randFrac();
1285 
1286     // Create the memory for the actual inputs and outputs.
1287     struct InputOutputDescriptor {
1288         enum Kind { INPUT, OUTPUT };
1289         Kind mKind;
1290 
1291         // The input or output either resides in a local buffer
1292         // (mVector, in which case mMemoryRegion is ignored); or in a
1293         // shared memory region within a TestMemories instance
1294         // (mMemoryRegion, in which case mVector is ignored).
1295         enum Location { VECTOR, REGION };
1296         Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; }
1297 
1298         std::vector<float> mVector;
1299         unsigned mMemoryRegion;
1300     };
1301     std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount());
1302     for (unsigned i = 0; i < ioDescriptors.size(); i++) {
1303         ioDescriptors[i].mKind = (i < model.inputCount() ? InputOutputDescriptor::INPUT
1304                                                          : InputOutputDescriptor::OUTPUT);
1305     }
1306     //     We randomly interleave inputs and outputs in creation
1307     //     order, because when we we create memory regions in a
1308     //     TestMemories instance, the order in which regions are
1309     //     created within a single Memory is the order they'll be laid
1310     //     out in that memory; and when we have inputs and outputs
1311     //     within the same Memory, we want the possibility that
1312     //     they'll be interleaved.
1313     std::shuffle(ioDescriptors.begin(), ioDescriptors.end(), mRandNumEng);
1314     TestMemories ioMemories;
1315     for (auto& desc : ioDescriptors) {
1316         if (randFrac() < 0.5) {
1317             desc.mVector.resize(problemSize * problemSize);
1318         } else {
1319             // TODO: common this with the way we create IK_VALUE inputs?
1320             unsigned memoryIndex = ~0U;
1321             if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) {
1322                 memoryIndex = randUInt(ioMemories.memoryCount());
1323             } else {
1324                 memoryIndex = ioMemories.addMemory();
1325             }
1326             const size_t length = problemSize * problemSize * sizeof(float);
1327             desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length);
1328         }
1329     }
1330     ioMemories.layout();
1331 
1332     // Function to set up actual inputs and outputs (initializing them
1333     // and telling the WrapperExecution about them).
1334     auto prepareForExecution = [&model, &ioDescriptors, &ioMemories, &goldenInputs, &goldenOutput,
1335                                 problemSize, &problemType](WrapperExecution* e) {
1336         uint32_t inputIndex = 0, outputIndex = 0;
1337         for (auto& desc : ioDescriptors) {
1338             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1339                 if (desc.mKind == InputOutputDescriptor::INPUT) {
1340                     const size_t inputOffset = inputIndex * problemSize * problemSize;
1341                     std::copy(goldenInputs.begin() + inputOffset,
1342                               goldenInputs.begin() + inputOffset + problemSize * problemSize,
1343                               desc.mVector.begin());
1344                     e->setInput(inputIndex++, desc.mVector.data(),
1345                                 desc.mVector.size() * sizeof(float));
1346                 } else {
1347                     std::fill(desc.mVector.begin(),
1348                               desc.mVector.begin() + problemSize * problemSize, goldenOutput);
1349                     e->setOutput(outputIndex++, desc.mVector.data(),
1350                                  desc.mVector.size() * sizeof(float), &problemType.operandType);
1351                 }
1352             } else {
1353                 const WrapperMemory* memory;
1354                 uint32_t offset, length;
1355                 float* region = static_cast<float*>(
1356                         ioMemories.getRegion(desc.mMemoryRegion, &memory, &offset, &length));
1357                 CHECK(length == problemSize * problemSize * sizeof(float));
1358                 if (desc.mKind == InputOutputDescriptor::INPUT) {
1359                     const size_t inputOffset = inputIndex * problemSize * problemSize;
1360                     std::copy(goldenInputs.begin() + inputOffset,
1361                               goldenInputs.begin() + inputOffset + problemSize * problemSize,
1362                               region);
1363                     e->setInputFromMemory(inputIndex++, memory, offset, length);
1364                 } else {
1365                     std::fill(region, region + problemSize * problemSize, goldenOutput);
1366                     e->setOutputFromMemory(outputIndex++, memory, offset, length,
1367                                            &problemType.operandType);
1368                 }
1369             }
1370         };
1371         CHECK(inputIndex == model.inputCount());
1372         CHECK(outputIndex == model.outputCount());
1373     };
1374 
1375     // Non-partitioned execution.
1376     WrapperExecution e(&c);
1377     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e));
1378     ASSERT_EQ(e.compute(computeMode), Result::NO_ERROR);
1379 
1380     // Copy the outputs of the non-partitioned execution to a save area.
1381     std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount());
1382     {
1383         uint32_t outputIndex = 0;
1384         for (const auto& desc : ioDescriptors) {
1385             if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1386                 continue;
1387             }
1388             const size_t outputOffset = outputIndex * problemSize * problemSize;
1389             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1390                 std::copy(desc.mVector.begin(), desc.mVector.end(),
1391                           nonPartitionedOutputs.begin() + outputOffset);
1392             } else {
1393                 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1394                 std::copy(region, region + problemSize * problemSize,
1395                           nonPartitionedOutputs.begin() + outputOffset);
1396             }
1397 #ifdef VERBOSE
1398             {
1399                 std::cout << "nonpartitioned output[" << outputIndex << "] = ";
1400                 dump(nonPartitionedOutputs.begin() + outputOffset,
1401                      nonPartitionedOutputs.begin() + outputOffset + problemSize * problemSize);
1402             }
1403 #endif
1404             outputIndex++;
1405         }
1406     }
1407 
1408     // Partitioned execution.
1409     WrapperExecution e2(&c2);
1410     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
1411     ASSERT_EQ(e2.compute(computeMode), Result::NO_ERROR);
1412 
1413     // Compare the outputs of the partitioned execution to the save
1414     // area containing the outpus of the non-partitioned execution.
1415     {
1416         uint32_t outputIndex = 0;
1417         for (const auto& desc : ioDescriptors) {
1418             if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1419                 continue;
1420             }
1421             SCOPED_TRACE(outputIndex);
1422             const size_t outputOffset = outputIndex * problemSize * problemSize;
1423             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1424 #ifdef VERBOSE
1425                 std::cout << "   partitioned output[" << outputIndex << "] = ";
1426                 dump(desc.mVector.begin(), desc.mVector.end());
1427 #endif
1428                 ASSERT_TRUE(std::equal(desc.mVector.begin(), desc.mVector.end(),
1429                                        nonPartitionedOutputs.begin() + outputOffset));
1430             } else {
1431                 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1432 #ifdef VERBOSE
1433                 std::cout << "part output[" << outputIndex << "] = ";
1434                 dump(region, region + problemSize * problemSize);
1435 #endif
1436                 ASSERT_TRUE(std::equal(region, region + problemSize * problemSize,
1437                                        nonPartitionedOutputs.begin() + outputOffset));
1438             }
1439             outputIndex++;
1440         }
1441     }
1442 }
1443 
1444 }  // namespace
1445 }  // namespace android
1446