1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TELEMETRY_STATSD_H 18 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TELEMETRY_STATSD_H 19 20 #include <android-base/thread_annotations.h> 21 22 #include <array> 23 #include <condition_variable> 24 #include <limits> 25 #include <map> 26 #include <mutex> 27 #include <queue> 28 #include <string> 29 #include <thread> 30 #include <utility> 31 #include <vector> 32 33 #include "Telemetry.h" 34 35 namespace android::nn::telemetry { 36 37 using ModelArchHash = std::array<uint8_t, BYTE_SIZE_OF_MODEL_ARCH_HASH>; 38 39 constexpr int64_t kSumTimeDefault = 0; 40 constexpr int64_t kMinTimeDefault = std::numeric_limits<int64_t>::max(); 41 constexpr int64_t kMaxTimeDefault = std::numeric_limits<int64_t>::min(); 42 43 // For CompilationCompleted: isExecution = false, executionMode = SYNC, errorCode = 0 44 // For CompilationFailed: isExecution = false, executionMode = SYNC, errorCode != 0 45 // For ExecutionCompleted: isExecution = true, errorCode = 0, fallbackToCpuFromError = false 46 // For ExecutionFailed: isExecution = true, errorCode != 0, fallbackToCpuFromError = false 47 struct AtomKey { 48 bool isExecution; 49 ModelArchHash modelArchHash; 50 std::string deviceId; 51 ExecutionMode executionMode; 52 int32_t errorCode; 53 DataClass inputDataClass; 54 DataClass outputDataClass; 55 bool fallbackToCpuFromError; 56 bool introspectionEnabled; 57 bool cacheEnabled; 58 bool hasControlFlow; 59 bool hasDynamicTemporaries; 60 }; 61 62 bool operator==(const AtomKey& lhs, const AtomKey& rhs); 63 bool operator<(const AtomKey& lhs, const AtomKey& rhs); 64 65 // For CompilationCompleted, all timings except compilationTimeMillis omitted 66 // For CompilationFailed, all timings omitted 67 // For ExecutionCompleted, compilationTimeMillis timing omitted 68 // For ExecutionFailed, all timings omitted 69 struct AtomValue { 70 int32_t count = 0; 71 72 // AccumulatedTiming stores all the information needed to calculate the average, min, max, and 73 // standard deviation of all the accumulated timings. When count == 0, AccumulatedTiming is 74 // ignored. When count > 0: 75 // * average = sumTime / count 76 // * minimum = minTime 77 // * maximum = maxTime 78 // * variance = sumSquaredTime / count - average * average 79 // * standard deviation = sqrt(variance) 80 // * sample standard deviation = sqrt(variance * count / (count - 1)) 81 struct AccumulatedTiming { 82 int64_t sumTime = kSumTimeDefault; 83 int64_t minTime = kMinTimeDefault; 84 int64_t maxTime = kMaxTimeDefault; 85 // Sum of each squared timing, e.g.: t1^2 + t2^2 + ... + tn^2 86 int64_t sumSquaredTime = kSumTimeDefault; 87 int32_t count = 0; 88 }; 89 AccumulatedTiming compilationTimeMillis; 90 AccumulatedTiming durationRuntimeMicros; 91 AccumulatedTiming durationDriverMicros; 92 AccumulatedTiming durationHardwareMicros; 93 }; 94 95 void combineAtomValues(AtomValue* acculatedValue, const AtomValue& value); 96 97 // Atom type to be sent to Statsd Telemetry 98 using Atom = std::pair<AtomKey, AtomValue>; 99 100 // Helper class to locally aggregate and retrieve telemetry atoms. 101 class AtomAggregator { 102 public: 103 bool empty() const; 104 105 void push(Atom&& atom); 106 107 // Precondition: !empty() 108 Atom pop(); 109 110 private: 111 std::map<AtomKey, AtomValue> mAggregate; 112 // Pointer to keys of mAggregate to ensure atoms are logged in a fair order. Using pointers into 113 // a std::map is guaranteed to work because references to elements are guaranteed to be valid 114 // until that element is erased. 115 std::queue<const AtomKey*> mOrder; 116 }; 117 118 using LoggerFn = std::function<void(Atom&&)>; 119 120 // AsyncLogger minimizes the call to `write`, so that the calling thread which handles the 121 // compilation or execution is not slowed down by writing to statsd. Instead, AsyncLogger 122 // contains a dedicated thread that will handle logging to statsd in the background. 123 // This class is thread-safe. 124 class AsyncLogger { 125 public: 126 AsyncLogger(LoggerFn logger, Duration loggingQuietPeriodDuration); 127 AsyncLogger(const AsyncLogger&) = delete; 128 AsyncLogger(AsyncLogger&&) = delete; 129 AsyncLogger& operator=(const AsyncLogger&) = delete; 130 AsyncLogger& operator=(AsyncLogger&&) = delete; 131 ~AsyncLogger(); 132 133 void write(Atom&& atom); 134 135 private: 136 enum class Result { 137 SUCCESS, 138 TEARDOWN, 139 }; 140 141 // Precondition: output != nullptr 142 // Precondition: output->empty() 143 Result takeAll(std::vector<Atom>* output, bool blockUntilDataIsAvailable); 144 145 Result sleepFor(Duration duration); 146 147 mutable std::mutex mMutex; 148 mutable std::condition_variable mNotEmptyOrTeardown; 149 mutable std::vector<Atom> mChannel GUARDED_BY(mMutex); 150 mutable bool mTeardown GUARDED_BY(mMutex) = false; 151 std::thread mThread; 152 }; 153 154 // Create an Atom from a diagnostic info object. 155 Atom createAtomFrom(const DiagnosticCompilationInfo* info); 156 Atom createAtomFrom(const DiagnosticExecutionInfo* info); 157 158 // Log an Atom to statsd from a diagnostic info object. 159 void logCompilationToStatsd(const DiagnosticCompilationInfo* info); 160 void logExecutionToStatsd(const DiagnosticExecutionInfo* info); 161 162 } // namespace android::nn::telemetry 163 164 #endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TELEMETRY_STATSD_H 165