1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef METRIC_PRODUCER_H 18 #define METRIC_PRODUCER_H 19 20 #include <shared_mutex> 21 22 #include <frameworks/base/cmds/statsd/src/active_config_list.pb.h> 23 #include "HashableDimensionKey.h" 24 #include "anomaly/AnomalyTracker.h" 25 #include "condition/ConditionWizard.h" 26 #include "config/ConfigKey.h" 27 #include "matchers/matcher_util.h" 28 #include "packages/PackageInfoListener.h" 29 30 #include <log/logprint.h> 31 #include <utils/RefBase.h> 32 #include <unordered_map> 33 34 namespace android { 35 namespace os { 36 namespace statsd { 37 38 // Keep this in sync with DumpReportReason enum in stats_log.proto 39 enum DumpReportReason { 40 DEVICE_SHUTDOWN = 1, 41 CONFIG_UPDATED = 2, 42 CONFIG_REMOVED = 3, 43 GET_DATA_CALLED = 4, 44 ADB_DUMP = 5, 45 CONFIG_RESET = 6, 46 STATSCOMPANION_DIED = 7, 47 TERMINATION_SIGNAL_RECEIVED = 8 48 }; 49 50 // If the metric has no activation requirement, it will be active once the metric producer is 51 // created. 52 // If the metric needs to be activated by atoms, the metric producer will start 53 // with kNotActive state, turn to kActive or kActiveOnBoot when the activation event arrives, become 54 // kNotActive when it reaches the duration limit (timebomb). If the activation event arrives again 55 // before or after it expires, the event producer will be re-activated and ttl will be reset. 56 enum ActivationState { 57 kNotActive = 0, 58 kActive = 1, 59 kActiveOnBoot = 2, 60 }; 61 62 enum DumpLatency { 63 // In some cases, we only have a short time range to do the dump, e.g. statsd is being killed. 64 // We might be able to return all the data in this mode. For instance, pull metrics might need 65 // to be pulled when the current bucket is requested. 66 FAST = 1, 67 // In other cases, it is fine for a dump to take more than a few milliseconds, e.g. config 68 // updates. 69 NO_TIME_CONSTRAINTS = 2 70 }; 71 72 // A MetricProducer is responsible for compute one single metrics, creating stats log report, and 73 // writing the report to dropbox. MetricProducers should respond to package changes as required in 74 // PackageInfoListener, but if none of the metrics are slicing by package name, then the update can 75 // be a no-op. 76 class MetricProducer : public virtual PackageInfoListener { 77 public: MetricProducer(const int64_t & metricId,const ConfigKey & key,const int64_t timeBaseNs,const int conditionIndex,const sp<ConditionWizard> & wizard)78 MetricProducer(const int64_t& metricId, const ConfigKey& key, const int64_t timeBaseNs, 79 const int conditionIndex, const sp<ConditionWizard>& wizard) 80 : mMetricId(metricId), 81 mConfigKey(key), 82 mTimeBaseNs(timeBaseNs), 83 mCurrentBucketStartTimeNs(timeBaseNs), 84 mCurrentBucketNum(0), 85 mCondition(initialCondition(conditionIndex)), 86 mConditionSliced(false), 87 mWizard(wizard), 88 mConditionTrackerIndex(conditionIndex), 89 mContainANYPositionInDimensionsInWhat(false), 90 mSliceByPositionALL(false), 91 mSameConditionDimensionsInTracker(false), 92 mHasLinksToAllConditionDimensionsInTracker(false), 93 mIsActive(true) { 94 } 95 ~MetricProducer()96 virtual ~MetricProducer(){}; 97 initialCondition(const int conditionIndex)98 ConditionState initialCondition(const int conditionIndex) const { 99 return conditionIndex >= 0 ? ConditionState::kUnknown : ConditionState::kTrue; 100 } 101 102 /** 103 * Forces this metric to split into a partial bucket right now. If we're past a full bucket, we 104 * first call the standard flushing code to flush up to the latest full bucket. Then we call 105 * the flush again when the end timestamp is forced to be now, and then after flushing, update 106 * the start timestamp to be now. 107 */ notifyAppUpgrade(const int64_t & eventTimeNs,const string & apk,const int uid,const int64_t version)108 void notifyAppUpgrade(const int64_t& eventTimeNs, const string& apk, const int uid, 109 const int64_t version) override { 110 std::lock_guard<std::mutex> lock(mMutex); 111 112 if (eventTimeNs > getCurrentBucketEndTimeNs()) { 113 // Flush full buckets on the normal path up to the latest bucket boundary. 114 flushIfNeededLocked(eventTimeNs); 115 } 116 // Now flush a partial bucket. 117 flushCurrentBucketLocked(eventTimeNs, eventTimeNs); 118 // Don't update the current bucket number so that the anomaly tracker knows this bucket 119 // is a partial bucket and can merge it with the previous bucket. 120 }; 121 notifyAppRemoved(const int64_t & eventTimeNs,const string & apk,const int uid)122 void notifyAppRemoved(const int64_t& eventTimeNs, const string& apk, const int uid) override{ 123 // Force buckets to split on removal also. 124 notifyAppUpgrade(eventTimeNs, apk, uid, 0); 125 }; 126 onUidMapReceived(const int64_t & eventTimeNs)127 void onUidMapReceived(const int64_t& eventTimeNs) override{ 128 // Purposefully don't flush partial buckets on a new snapshot. 129 // This occurs if a new user is added/removed or statsd crashes. 130 }; 131 132 // Consume the parsed stats log entry that already matched the "what" of the metric. onMatchedLogEvent(const size_t matcherIndex,const LogEvent & event)133 void onMatchedLogEvent(const size_t matcherIndex, const LogEvent& event) { 134 std::lock_guard<std::mutex> lock(mMutex); 135 onMatchedLogEventLocked(matcherIndex, event); 136 } 137 onConditionChanged(const bool condition,const int64_t eventTime)138 void onConditionChanged(const bool condition, const int64_t eventTime) { 139 std::lock_guard<std::mutex> lock(mMutex); 140 onConditionChangedLocked(condition, eventTime); 141 } 142 onSlicedConditionMayChange(bool overallCondition,const int64_t eventTime)143 void onSlicedConditionMayChange(bool overallCondition, const int64_t eventTime) { 144 std::lock_guard<std::mutex> lock(mMutex); 145 onSlicedConditionMayChangeLocked(overallCondition, eventTime); 146 } 147 isConditionSliced()148 bool isConditionSliced() const { 149 std::lock_guard<std::mutex> lock(mMutex); 150 return mConditionSliced; 151 }; 152 153 // Output the metrics data to [protoOutput]. All metrics reports end with the same timestamp. 154 // This method clears all the past buckets. onDumpReport(const int64_t dumpTimeNs,const bool include_current_partial_bucket,const bool erase_data,const DumpLatency dumpLatency,std::set<string> * str_set,android::util::ProtoOutputStream * protoOutput)155 void onDumpReport(const int64_t dumpTimeNs, 156 const bool include_current_partial_bucket, 157 const bool erase_data, 158 const DumpLatency dumpLatency, 159 std::set<string> *str_set, 160 android::util::ProtoOutputStream* protoOutput) { 161 std::lock_guard<std::mutex> lock(mMutex); 162 return onDumpReportLocked(dumpTimeNs, include_current_partial_bucket, erase_data, 163 dumpLatency, str_set, protoOutput); 164 } 165 clearPastBuckets(const int64_t dumpTimeNs)166 void clearPastBuckets(const int64_t dumpTimeNs) { 167 std::lock_guard<std::mutex> lock(mMutex); 168 return clearPastBucketsLocked(dumpTimeNs); 169 } 170 dumpStates(FILE * out,bool verbose)171 void dumpStates(FILE* out, bool verbose) const { 172 std::lock_guard<std::mutex> lock(mMutex); 173 dumpStatesLocked(out, verbose); 174 } 175 176 // Returns the memory in bytes currently used to store this metric's data. Does not change 177 // state. byteSize()178 size_t byteSize() const { 179 std::lock_guard<std::mutex> lock(mMutex); 180 return byteSizeLocked(); 181 } 182 183 /* If alert is valid, adds an AnomalyTracker and returns it. If invalid, returns nullptr. */ addAnomalyTracker(const Alert & alert,const sp<AlarmMonitor> & anomalyAlarmMonitor)184 virtual sp<AnomalyTracker> addAnomalyTracker(const Alert &alert, 185 const sp<AlarmMonitor>& anomalyAlarmMonitor) { 186 std::lock_guard<std::mutex> lock(mMutex); 187 sp<AnomalyTracker> anomalyTracker = new AnomalyTracker(alert, mConfigKey); 188 if (anomalyTracker != nullptr) { 189 mAnomalyTrackers.push_back(anomalyTracker); 190 } 191 return anomalyTracker; 192 } 193 getBuckeSizeInNs()194 int64_t getBuckeSizeInNs() const { 195 std::lock_guard<std::mutex> lock(mMutex); 196 return mBucketSizeNs; 197 } 198 199 // Only needed for unit-testing to override guardrail. setBucketSize(int64_t bucketSize)200 void setBucketSize(int64_t bucketSize) { 201 mBucketSizeNs = bucketSize; 202 } 203 getMetricId()204 inline const int64_t& getMetricId() const { 205 return mMetricId; 206 } 207 loadActiveMetric(const ActiveMetric & activeMetric,int64_t currentTimeNs)208 void loadActiveMetric(const ActiveMetric& activeMetric, int64_t currentTimeNs) { 209 std::lock_guard<std::mutex> lock(mMutex); 210 loadActiveMetricLocked(activeMetric, currentTimeNs); 211 } 212 213 // Let MetricProducer drop in-memory data to save memory. 214 // We still need to keep future data valid and anomaly tracking work, which means we will 215 // have to flush old data, informing anomaly trackers then safely drop old data. 216 // We still keep current bucket data for future metrics' validity. dropData(const int64_t dropTimeNs)217 void dropData(const int64_t dropTimeNs) { 218 std::lock_guard<std::mutex> lock(mMutex); 219 dropDataLocked(dropTimeNs); 220 } 221 222 // For test only. getCurrentBucketNum()223 inline int64_t getCurrentBucketNum() const { 224 return mCurrentBucketNum; 225 } 226 activate(int activationTrackerIndex,int64_t elapsedTimestampNs)227 void activate(int activationTrackerIndex, int64_t elapsedTimestampNs) { 228 std::lock_guard<std::mutex> lock(mMutex); 229 activateLocked(activationTrackerIndex, elapsedTimestampNs); 230 } 231 cancelEventActivation(int deactivationTrackerIndex)232 void cancelEventActivation(int deactivationTrackerIndex) { 233 std::lock_guard<std::mutex> lock(mMutex); 234 cancelEventActivationLocked(deactivationTrackerIndex); 235 } 236 isActive()237 bool isActive() const { 238 std::lock_guard<std::mutex> lock(mMutex); 239 return isActiveLocked(); 240 } 241 242 void addActivation(int activationTrackerIndex, const ActivationType& activationType, 243 int64_t ttl_seconds, int deactivationTrackerIndex = -1); 244 prepareFirstBucket()245 void prepareFirstBucket() { 246 std::lock_guard<std::mutex> lock(mMutex); 247 prepareFirstBucketLocked(); 248 } 249 250 void flushIfExpire(int64_t elapsedTimestampNs); 251 252 void writeActiveMetricToProtoOutputStream( 253 int64_t currentTimeNs, const DumpReportReason reason, ProtoOutputStream* proto); 254 protected: 255 virtual void onConditionChangedLocked(const bool condition, const int64_t eventTime) = 0; 256 virtual void onSlicedConditionMayChangeLocked(bool overallCondition, 257 const int64_t eventTime) = 0; 258 virtual void onDumpReportLocked(const int64_t dumpTimeNs, 259 const bool include_current_partial_bucket, 260 const bool erase_data, 261 const DumpLatency dumpLatency, 262 std::set<string> *str_set, 263 android::util::ProtoOutputStream* protoOutput) = 0; 264 virtual void clearPastBucketsLocked(const int64_t dumpTimeNs) = 0; 265 virtual size_t byteSizeLocked() const = 0; 266 virtual void dumpStatesLocked(FILE* out, bool verbose) const = 0; 267 268 bool evaluateActiveStateLocked(int64_t elapsedTimestampNs); 269 270 void activateLocked(int activationTrackerIndex, int64_t elapsedTimestampNs); 271 void cancelEventActivationLocked(int deactivationTrackerIndex); 272 isActiveLocked()273 inline bool isActiveLocked() const { 274 return mIsActive; 275 } 276 277 void loadActiveMetricLocked(const ActiveMetric& activeMetric, int64_t currentTimeNs); 278 prepareFirstBucketLocked()279 virtual void prepareFirstBucketLocked() {}; 280 /** 281 * Flushes the current bucket if the eventTime is after the current bucket's end time. This will 282 also flush the current partial bucket in memory. 283 */ flushIfNeededLocked(const int64_t & eventTime)284 virtual void flushIfNeededLocked(const int64_t& eventTime){}; 285 286 /** 287 * Flushes all the data including the current partial bucket. 288 */ flushLocked(const int64_t & eventTimeNs)289 virtual void flushLocked(const int64_t& eventTimeNs) { 290 flushIfNeededLocked(eventTimeNs); 291 flushCurrentBucketLocked(eventTimeNs, eventTimeNs); 292 }; 293 294 /** 295 * For metrics that aggregate (ie, every metric producer except for EventMetricProducer), 296 * we need to be able to flush the current buckets on demand (ie, end the current bucket and 297 * start new bucket). If this function is called when eventTimeNs is greater than the current 298 * bucket's end timestamp, than we flush up to the end of the latest full bucket; otherwise, 299 * we assume that we want to flush a partial bucket. The bucket start timestamp and bucket 300 * number are not changed by this function. This method should only be called by 301 * flushIfNeededLocked or flushLocked or the app upgrade handler; the caller MUST update the 302 * bucket timestamp and bucket number as needed. 303 */ flushCurrentBucketLocked(const int64_t & eventTimeNs,const int64_t & nextBucketStartTimeNs)304 virtual void flushCurrentBucketLocked(const int64_t& eventTimeNs, 305 const int64_t& nextBucketStartTimeNs) {}; 306 onActiveStateChangedLocked(const int64_t & eventTimeNs)307 virtual void onActiveStateChangedLocked(const int64_t& eventTimeNs) { 308 if (!mIsActive) { 309 flushLocked(eventTimeNs); 310 } 311 } 312 313 // Convenience to compute the current bucket's end time, which is always aligned with the 314 // start time of the metric. getCurrentBucketEndTimeNs()315 int64_t getCurrentBucketEndTimeNs() const { 316 return mTimeBaseNs + (mCurrentBucketNum + 1) * mBucketSizeNs; 317 } 318 getBucketNumFromEndTimeNs(const int64_t endNs)319 int64_t getBucketNumFromEndTimeNs(const int64_t endNs) { 320 return (endNs - mTimeBaseNs) / mBucketSizeNs - 1; 321 } 322 323 virtual void dropDataLocked(const int64_t dropTimeNs) = 0; 324 325 const int64_t mMetricId; 326 327 const ConfigKey mConfigKey; 328 329 // The time when this metric producer was first created. The end time for the current bucket 330 // can be computed from this based on mCurrentBucketNum. 331 int64_t mTimeBaseNs; 332 333 // Start time may not be aligned with the start of statsd if there is an app upgrade in the 334 // middle of a bucket. 335 int64_t mCurrentBucketStartTimeNs; 336 337 // Used by anomaly detector to track which bucket we are in. This is not sent with the produced 338 // report. 339 int64_t mCurrentBucketNum; 340 341 int64_t mBucketSizeNs; 342 343 ConditionState mCondition; 344 345 bool mConditionSliced; 346 347 sp<ConditionWizard> mWizard; 348 349 int mConditionTrackerIndex; 350 351 vector<Matcher> mDimensionsInWhat; // The dimensions_in_what defined in statsd_config 352 vector<Matcher> mDimensionsInCondition; // The dimensions_in_condition defined in statsd_config 353 354 bool mContainANYPositionInDimensionsInWhat; 355 bool mSliceByPositionALL; 356 357 // True iff the condition dimensions equal to the sliced dimensions in the simple condition 358 // tracker. This field is always false for combinational condition trackers. 359 bool mSameConditionDimensionsInTracker; 360 361 // True iff the metric to condition links cover all dimension fields in the condition tracker. 362 // This field is always false for combinational condition trackers. 363 bool mHasLinksToAllConditionDimensionsInTracker; 364 365 std::vector<Metric2Condition> mMetric2ConditionLinks; 366 367 std::vector<sp<AnomalyTracker>> mAnomalyTrackers; 368 369 /* 370 * Individual metrics can implement their own business logic here. All pre-processing is done. 371 * 372 * [matcherIndex]: the index of the matcher which matched this event. This is interesting to 373 * DurationMetric, because it has start/stop/stop_all 3 matchers. 374 * [eventKey]: the extracted dimension key for the final output. if the metric doesn't have 375 * dimensions, it will be DEFAULT_DIMENSION_KEY 376 * [conditionKey]: the keys of conditions which should be used to query the condition for this 377 * target event (from MetricConditionLink). This is passed to individual metrics 378 * because DurationMetric needs it to be cached. 379 * [condition]: whether condition is met. If condition is sliced, this is the result coming from 380 * query with ConditionWizard; If condition is not sliced, this is the 381 * nonSlicedCondition. 382 * [event]: the log event, just in case the metric needs its data, e.g., EventMetric. 383 */ 384 virtual void onMatchedLogEventInternalLocked( 385 const size_t matcherIndex, const MetricDimensionKey& eventKey, 386 const ConditionKey& conditionKey, bool condition, 387 const LogEvent& event) = 0; 388 389 // Consume the parsed stats log entry that already matched the "what" of the metric. 390 virtual void onMatchedLogEventLocked(const size_t matcherIndex, const LogEvent& event); 391 392 mutable std::mutex mMutex; 393 394 struct Activation { ActivationActivation395 Activation(const ActivationType& activationType, const int64_t ttlNs) 396 : ttl_ns(ttlNs), 397 start_ns(0), 398 state(ActivationState::kNotActive), 399 activationType(activationType) {} 400 401 const int64_t ttl_ns; 402 int64_t start_ns; 403 ActivationState state; 404 const ActivationType activationType; 405 }; 406 // When the metric producer has multiple activations, these activations are ORed to determine 407 // whether the metric producer is ready to generate metrics. 408 std::unordered_map<int, std::shared_ptr<Activation>> mEventActivationMap; 409 410 // Maps index of atom matcher for deactivation to a list of Activation structs. 411 std::unordered_map<int, std::vector<std::shared_ptr<Activation>>> mEventDeactivationMap; 412 413 bool mIsActive; 414 415 FRIEND_TEST(DurationMetricE2eTest, TestOneBucket); 416 FRIEND_TEST(DurationMetricE2eTest, TestTwoBuckets); 417 FRIEND_TEST(DurationMetricE2eTest, TestWithActivation); 418 FRIEND_TEST(DurationMetricE2eTest, TestWithCondition); 419 FRIEND_TEST(DurationMetricE2eTest, TestWithSlicedCondition); 420 FRIEND_TEST(DurationMetricE2eTest, TestWithActivationAndSlicedCondition); 421 422 FRIEND_TEST(MetricActivationE2eTest, TestCountMetric); 423 FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithOneDeactivation); 424 FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithTwoDeactivations); 425 FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithSameDeactivation); 426 FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithTwoMetricsTwoDeactivations); 427 428 FRIEND_TEST(StatsLogProcessorTest, TestActiveConfigMetricDiskWriteRead); 429 FRIEND_TEST(StatsLogProcessorTest, TestActivationOnBoot); 430 FRIEND_TEST(StatsLogProcessorTest, TestActivationOnBootMultipleActivations); 431 FRIEND_TEST(StatsLogProcessorTest, 432 TestActivationOnBootMultipleActivationsDifferentActivationTypes); 433 FRIEND_TEST(StatsLogProcessorTest, TestActivationsPersistAcrossSystemServerRestart); 434 }; 435 436 } // namespace statsd 437 } // namespace os 438 } // namespace android 439 #endif // METRIC_PRODUCER_H 440