• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "Burst.h"
18 
19 #include <android-base/logging.h>
20 #include <nnapi/IBurst.h>
21 #include <nnapi/Result.h>
22 #include <nnapi/TypeUtils.h>
23 #include <nnapi/Types.h>
24 #include <nnapi/Validation.h>
25 #include <nnapi/hal/1.0/Conversions.h>
26 #include <nnapi/hal/1.0/HandleError.h>
27 #include <nnapi/hal/1.0/ProtectCallback.h>
28 #include <nnapi/hal/1.2/BurstUtils.h>
29 #include <nnapi/hal/1.2/Conversions.h>
30 #include <nnapi/hal/TransferValue.h>
31 
32 #include <algorithm>
33 #include <cstring>
34 #include <limits>
35 #include <map>
36 #include <memory>
37 #include <tuple>
38 #include <utility>
39 #include <vector>
40 
41 #include "Tracing.h"
42 
43 namespace android::hardware::neuralnetworks::adapter {
44 namespace {
45 
46 constexpr V1_2::Timing kTiming = {std::numeric_limits<uint64_t>::max(),
47                                   std::numeric_limits<uint64_t>::max()};
48 
getMemoriesCallback(V1_0::ErrorStatus status,const hidl_vec<hidl_memory> & memories)49 nn::GeneralResult<std::vector<nn::SharedMemory>> getMemoriesCallback(
50         V1_0::ErrorStatus status, const hidl_vec<hidl_memory>& memories) {
51     HANDLE_STATUS_HIDL(status) << "getting burst memories failed with " << toString(status);
52     std::vector<nn::SharedMemory> canonicalMemories;
53     canonicalMemories.reserve(memories.size());
54     for (const auto& memory : memories) {
55         canonicalMemories.push_back(NN_TRY(nn::convert(memory)));
56     }
57     return canonicalMemories;
58 }
59 
60 }  // anonymous namespace
61 
MemoryCache(nn::SharedBurst burstExecutor,sp<V1_2::IBurstCallback> burstCallback)62 Burst::MemoryCache::MemoryCache(nn::SharedBurst burstExecutor,
63                                 sp<V1_2::IBurstCallback> burstCallback)
64     : kBurstExecutor(std::move(burstExecutor)), kBurstCallback(std::move(burstCallback)) {
65     CHECK(kBurstExecutor != nullptr);
66     CHECK(kBurstCallback != nullptr);
67 }
68 
69 nn::GeneralResult<std::vector<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>>>
getCacheEntries(const std::vector<int32_t> & slots)70 Burst::MemoryCache::getCacheEntries(const std::vector<int32_t>& slots) {
71     std::lock_guard guard(mMutex);
72     NN_TRY(ensureCacheEntriesArePresentLocked(slots));
73 
74     std::vector<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>> results;
75     results.reserve(slots.size());
76     for (int32_t slot : slots) {
77         results.push_back(NN_TRY(getCacheEntryLocked(slot)));
78     }
79 
80     return results;
81 }
82 
ensureCacheEntriesArePresentLocked(const std::vector<int32_t> & slots)83 nn::GeneralResult<void> Burst::MemoryCache::ensureCacheEntriesArePresentLocked(
84         const std::vector<int32_t>& slots) {
85     const auto slotIsKnown = [this](int32_t slot)
86                                      REQUIRES(mMutex) { return mCache.count(slot) > 0; };
87 
88     // find unique unknown slots
89     std::vector<int32_t> unknownSlots = slots;
90     std::sort(unknownSlots.begin(), unknownSlots.end());
91     auto unknownSlotsEnd = std::unique(unknownSlots.begin(), unknownSlots.end());
92     unknownSlotsEnd = std::remove_if(unknownSlots.begin(), unknownSlotsEnd, slotIsKnown);
93     unknownSlots.erase(unknownSlotsEnd, unknownSlots.end());
94 
95     // quick-exit if all slots are known
96     if (unknownSlots.empty()) {
97         return {};
98     }
99 
100     auto cb = neuralnetworks::utils::CallbackValue(getMemoriesCallback);
101 
102     const auto ret = kBurstCallback->getMemories(unknownSlots, cb);
103     HANDLE_TRANSPORT_FAILURE(ret);
104 
105     auto returnedMemories = NN_TRY(cb.take());
106 
107     if (returnedMemories.size() != unknownSlots.size()) {
108         return NN_ERROR() << "Burst::MemoryCache::ensureCacheEntriesArePresentLocked: Error "
109                              "retrieving memories -- count mismatch between requested memories ("
110                           << unknownSlots.size() << ") and returned memories ("
111                           << returnedMemories.size() << ")";
112     }
113 
114     // add memories to unknown slots
115     for (size_t i = 0; i < unknownSlots.size(); ++i) {
116         addCacheEntryLocked(unknownSlots[i], std::move(returnedMemories[i]));
117     }
118 
119     return {};
120 }
121 
122 nn::GeneralResult<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>>
getCacheEntryLocked(int32_t slot)123 Burst::MemoryCache::getCacheEntryLocked(int32_t slot) {
124     if (const auto iter = mCache.find(slot); iter != mCache.end()) {
125         return iter->second;
126     }
127     return NN_ERROR() << "Burst::MemoryCache::getCacheEntryLocked failed because slot " << slot
128                       << " is not present in the cache";
129 }
130 
addCacheEntryLocked(int32_t slot,nn::SharedMemory memory)131 void Burst::MemoryCache::addCacheEntryLocked(int32_t slot, nn::SharedMemory memory) {
132     auto hold = kBurstExecutor->cacheMemory(memory);
133     mCache.emplace(slot, std::make_pair(std::move(memory), std::move(hold)));
134 }
135 
removeCacheEntry(int32_t slot)136 void Burst::MemoryCache::removeCacheEntry(int32_t slot) {
137     std::lock_guard guard(mMutex);
138     mCache.erase(slot);
139 }
140 
141 // Burst methods
142 
create(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,nn::SharedBurst burstExecutor,std::chrono::microseconds pollingTimeWindow)143 nn::GeneralResult<sp<Burst>> Burst::create(
144         const sp<V1_2::IBurstCallback>& callback,
145         const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
146         const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel, nn::SharedBurst burstExecutor,
147         std::chrono::microseconds pollingTimeWindow) {
148     // check inputs
149     if (callback == nullptr || burstExecutor == nullptr) {
150         return NN_ERROR() << "Burst::create passed a nullptr";
151     }
152 
153     // create FMQ objects
154     auto requestChannelReceiver =
155             NN_TRY(V1_2::utils::RequestChannelReceiver::create(requestChannel, pollingTimeWindow));
156     auto resultChannelSender = NN_TRY(V1_2::utils::ResultChannelSender::create(resultChannel));
157 
158     // check FMQ objects
159     CHECK(requestChannelReceiver != nullptr);
160     CHECK(resultChannelSender != nullptr);
161 
162     // make and return context
163     return sp<Burst>::make(PrivateConstructorTag{}, callback, std::move(requestChannelReceiver),
164                            std::move(resultChannelSender), std::move(burstExecutor));
165 }
166 
Burst(PrivateConstructorTag,const sp<V1_2::IBurstCallback> & callback,std::unique_ptr<V1_2::utils::RequestChannelReceiver> requestChannel,std::unique_ptr<V1_2::utils::ResultChannelSender> resultChannel,nn::SharedBurst burstExecutor)167 Burst::Burst(PrivateConstructorTag /*tag*/, const sp<V1_2::IBurstCallback>& callback,
168              std::unique_ptr<V1_2::utils::RequestChannelReceiver> requestChannel,
169              std::unique_ptr<V1_2::utils::ResultChannelSender> resultChannel,
170              nn::SharedBurst burstExecutor)
171     : mCallback(callback),
172       mRequestChannelReceiver(std::move(requestChannel)),
173       mResultChannelSender(std::move(resultChannel)),
174       mBurstExecutor(std::move(burstExecutor)),
175       mMemoryCache(mBurstExecutor, mCallback) {
176     // TODO: highly document the threading behavior of this class
177     mWorker = std::thread([this] { task(); });
178 }
179 
~Burst()180 Burst::~Burst() {
181     // set teardown flag
182     mTeardown = true;
183     mRequestChannelReceiver->invalidate();
184 
185     // wait for task thread to end
186     mWorker.join();
187 }
188 
freeMemory(int32_t slot)189 Return<void> Burst::freeMemory(int32_t slot) {
190     mMemoryCache.removeCacheEntry(slot);
191     return Void();
192 }
193 
task()194 void Burst::task() {
195     // loop until the burst object is being destroyed
196     while (!mTeardown) {
197         // receive request
198         auto arguments = mRequestChannelReceiver->getBlocking();
199 
200         // if the request packet was not properly received, return a generic error and skip the
201         // execution
202         //
203         // if the burst is being torn down, skip the execution so the "task" function can end
204         if (!arguments.has_value()) {
205             if (!mTeardown) {
206                 mResultChannelSender->send(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kTiming);
207             }
208             continue;
209         }
210 
211         // unpack the arguments; types are Request, std::vector<int32_t>, and V1_2::MeasureTiming,
212         // respectively
213         const auto [requestWithoutPools, slotsOfPools, measure] = std::move(arguments).value();
214 
215         auto result = execute(requestWithoutPools, slotsOfPools, measure);
216 
217         // return result
218         if (result.has_value()) {
219             const auto& [outputShapes, timing] = result.value();
220             mResultChannelSender->send(V1_0::ErrorStatus::NONE, outputShapes, timing);
221         } else {
222             const auto& [message, code, outputShapes] = result.error();
223             LOG(ERROR) << "IBurst::execute failed with " << code << ": " << message;
224             mResultChannelSender->send(V1_2::utils::convert(code).value(),
225                                        V1_2::utils::convert(outputShapes).value(), kTiming);
226         }
227     }
228 }
229 
execute(const V1_0::Request & requestWithoutPools,const std::vector<int32_t> & slotsOfPools,V1_2::MeasureTiming measure)230 nn::ExecutionResult<std::pair<hidl_vec<V1_2::OutputShape>, V1_2::Timing>> Burst::execute(
231         const V1_0::Request& requestWithoutPools, const std::vector<int32_t>& slotsOfPools,
232         V1_2::MeasureTiming measure) {
233     NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION,
234                  "Burst getting memory, executing, and returning results");
235 
236     // ensure executor with cache has required memory
237     const auto cacheEntries = NN_TRY(mMemoryCache.getCacheEntries(slotsOfPools));
238 
239     // convert request, populating its pools
240     // This code performs an unvalidated convert because the request object without its pools is
241     // invalid because it is incomplete. Instead, the validation is performed after the memory pools
242     // have been added to the request.
243     auto canonicalRequest = NN_TRY(nn::unvalidatedConvert(requestWithoutPools));
244     CHECK(canonicalRequest.pools.empty());
245     std::transform(cacheEntries.begin(), cacheEntries.end(),
246                    std::back_inserter(canonicalRequest.pools),
247                    [](const auto& cacheEntry) { return cacheEntry.first; });
248     NN_TRY(validate(canonicalRequest));
249 
250     nn::MeasureTiming canonicalMeasure = NN_TRY(nn::convert(measure));
251 
252     const auto [outputShapes, timing] =
253             NN_TRY(mBurstExecutor->execute(canonicalRequest, canonicalMeasure, {}, {}, {}, {}));
254 
255     return std::make_pair(NN_TRY(V1_2::utils::convert(outputShapes)),
256                           NN_TRY(V1_2::utils::convert(timing)));
257 }
258 
259 }  // namespace android::hardware::neuralnetworks::adapter
260