1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #define STATSD_DEBUG false
17 #include "Log.h"
18 #include "OringDurationTracker.h"
19 #include "guardrail/StatsdStats.h"
20
21 namespace android {
22 namespace os {
23 namespace statsd {
24
25 using std::pair;
26
OringDurationTracker(const ConfigKey & key,const int64_t & id,const MetricDimensionKey & eventKey,sp<ConditionWizard> wizard,int conditionIndex,bool nesting,int64_t currentBucketStartNs,int64_t currentBucketNum,int64_t startTimeNs,int64_t bucketSizeNs,bool conditionSliced,bool fullLink,const vector<sp<AnomalyTracker>> & anomalyTrackers)27 OringDurationTracker::OringDurationTracker(
28 const ConfigKey& key, const int64_t& id, const MetricDimensionKey& eventKey,
29 sp<ConditionWizard> wizard, int conditionIndex, bool nesting, int64_t currentBucketStartNs,
30 int64_t currentBucketNum, int64_t startTimeNs, int64_t bucketSizeNs, bool conditionSliced,
31 bool fullLink, const vector<sp<AnomalyTracker>>& anomalyTrackers)
32 : DurationTracker(key, id, eventKey, wizard, conditionIndex, nesting, currentBucketStartNs,
33 currentBucketNum, startTimeNs, bucketSizeNs, conditionSliced, fullLink,
34 anomalyTrackers),
35 mStarted(),
36 mPaused() {
37 mLastStartTime = 0;
38 }
39
hitGuardRail(const HashableDimensionKey & newKey)40 bool OringDurationTracker::hitGuardRail(const HashableDimensionKey& newKey) {
41 // ===========GuardRail==============
42 // 1. Report the tuple count if the tuple count > soft limit
43 if (mConditionKeyMap.find(newKey) != mConditionKeyMap.end()) {
44 return false;
45 }
46 if (mConditionKeyMap.size() > StatsdStats::kDimensionKeySizeSoftLimit - 1) {
47 size_t newTupleCount = mConditionKeyMap.size() + 1;
48 StatsdStats::getInstance().noteMetricDimensionSize(mConfigKey, mTrackerId, newTupleCount);
49 // 2. Don't add more tuples, we are above the allowed threshold. Drop the data.
50 if (newTupleCount > StatsdStats::kDimensionKeySizeHardLimit) {
51 if (!mHasHitGuardrail) {
52 ALOGE("OringDurTracker %lld dropping data for dimension key %s",
53 (long long)mTrackerId, newKey.toString().c_str());
54 mHasHitGuardrail = true;
55 }
56 return true;
57 }
58 }
59 return false;
60 }
61
noteStart(const HashableDimensionKey & key,bool condition,const int64_t eventTime,const ConditionKey & conditionKey)62 void OringDurationTracker::noteStart(const HashableDimensionKey& key, bool condition,
63 const int64_t eventTime, const ConditionKey& conditionKey) {
64 if (hitGuardRail(key)) {
65 return;
66 }
67 if (condition) {
68 if (mStarted.size() == 0) {
69 mLastStartTime = eventTime;
70 VLOG("record first start....");
71 startAnomalyAlarm(eventTime);
72 }
73 mStarted[key]++;
74 } else {
75 mPaused[key]++;
76 }
77
78 if (mConditionSliced && mConditionKeyMap.find(key) == mConditionKeyMap.end()) {
79 mConditionKeyMap[key] = conditionKey;
80 }
81 VLOG("Oring: %s start, condition %d", key.toString().c_str(), condition);
82 }
83
noteStop(const HashableDimensionKey & key,const int64_t timestamp,const bool stopAll)84 void OringDurationTracker::noteStop(const HashableDimensionKey& key, const int64_t timestamp,
85 const bool stopAll) {
86 VLOG("Oring: %s stop", key.toString().c_str());
87 auto it = mStarted.find(key);
88 if (it != mStarted.end()) {
89 (it->second)--;
90 if (stopAll || !mNested || it->second <= 0) {
91 mStarted.erase(it);
92 mConditionKeyMap.erase(key);
93 }
94 if (mStarted.empty()) {
95 mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
96 (timestamp - mLastStartTime);
97 detectAndDeclareAnomaly(
98 timestamp, mCurrentBucketNum,
99 getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
100 VLOG("record duration %lld, total duration %lld for state key %s",
101 (long long)timestamp - mLastStartTime, (long long)getCurrentStateKeyDuration(),
102 mEventKey.getStateValuesKey().toString().c_str());
103 }
104 }
105
106 auto pausedIt = mPaused.find(key);
107 if (pausedIt != mPaused.end()) {
108 (pausedIt->second)--;
109 if (stopAll || !mNested || pausedIt->second <= 0) {
110 mPaused.erase(pausedIt);
111 mConditionKeyMap.erase(key);
112 }
113 }
114 if (mStarted.empty()) {
115 stopAnomalyAlarm(timestamp);
116 }
117 }
118
noteStopAll(const int64_t timestamp)119 void OringDurationTracker::noteStopAll(const int64_t timestamp) {
120 if (!mStarted.empty()) {
121 mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
122 (timestamp - mLastStartTime);
123 VLOG("Oring Stop all: record duration %lld, total duration %lld for state key %s",
124 (long long)timestamp - mLastStartTime, (long long)getCurrentStateKeyDuration(),
125 mEventKey.getStateValuesKey().toString().c_str());
126 detectAndDeclareAnomaly(
127 timestamp, mCurrentBucketNum,
128 getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
129 }
130
131 stopAnomalyAlarm(timestamp);
132 mStarted.clear();
133 mPaused.clear();
134 mConditionKeyMap.clear();
135 }
136
flushCurrentBucket(const int64_t & eventTimeNs,const optional<UploadThreshold> & uploadThreshold,const int64_t globalConditionTrueNs,std::unordered_map<MetricDimensionKey,std::vector<DurationBucket>> * output)137 bool OringDurationTracker::flushCurrentBucket(
138 const int64_t& eventTimeNs, const optional<UploadThreshold>& uploadThreshold,
139 const int64_t globalConditionTrueNs,
140 std::unordered_map<MetricDimensionKey, std::vector<DurationBucket>>* output) {
141 VLOG("OringDurationTracker Flushing.............");
142
143 // Note that we have to mimic the bucket time changes we do in the
144 // MetricProducer#notifyAppUpgrade.
145
146 int numBucketsForward = 0;
147 int64_t fullBucketEnd = getCurrentBucketEndTimeNs();
148 int64_t currentBucketEndTimeNs;
149
150 bool isFullBucket = eventTimeNs >= fullBucketEnd;
151 if (isFullBucket) {
152 numBucketsForward = 1 + (eventTimeNs - fullBucketEnd) / mBucketSizeNs;
153 currentBucketEndTimeNs = fullBucketEnd;
154 } else {
155 // This must be a partial bucket.
156 currentBucketEndTimeNs = eventTimeNs;
157 }
158
159 // Process the current bucket.
160 if (mStarted.size() > 0) {
161 // Calculate the duration for the current state key.
162 mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
163 (currentBucketEndTimeNs - mLastStartTime);
164 }
165 // Store DurationBucket info for each whatKey, stateKey pair.
166 // Note: The whatKey stored in mEventKey is constant for each DurationTracker, while the
167 // stateKey stored in mEventKey is only the current stateKey. mStateKeyDurationMap is used to
168 // store durations for each stateKey, so we need to flush the bucket by creating a
169 // DurationBucket for each stateKey.
170 for (auto& durationIt : mStateKeyDurationMap) {
171 if (durationPassesThreshold(uploadThreshold, durationIt.second.mDuration)) {
172 DurationBucket current_info;
173 current_info.mBucketStartNs = mCurrentBucketStartTimeNs;
174 current_info.mBucketEndNs = currentBucketEndTimeNs;
175 current_info.mDuration = durationIt.second.mDuration;
176 current_info.mConditionTrueNs = globalConditionTrueNs;
177 (*output)[MetricDimensionKey(mEventKey.getDimensionKeyInWhat(), durationIt.first)]
178 .push_back(current_info);
179
180 durationIt.second.mDurationFullBucket += durationIt.second.mDuration;
181 VLOG(" duration: %lld", (long long)current_info.mDuration);
182 } else {
183 VLOG(" duration: %lld does not pass set threshold", (long long)mDuration);
184 }
185
186 if (isFullBucket) {
187 // End of full bucket, can send to anomaly tracker now.
188 addPastBucketToAnomalyTrackers(
189 MetricDimensionKey(mEventKey.getDimensionKeyInWhat(), durationIt.first),
190 getCurrentStateKeyFullBucketDuration(), mCurrentBucketNum);
191 durationIt.second.mDurationFullBucket = 0;
192 }
193 durationIt.second.mDuration = 0;
194 }
195
196 if (mStarted.size() > 0) {
197 for (int i = 1; i < numBucketsForward; i++) {
198 DurationBucket info;
199 info.mBucketStartNs = fullBucketEnd + mBucketSizeNs * (i - 1);
200 info.mBucketEndNs = info.mBucketStartNs + mBucketSizeNs;
201 info.mDuration = mBucketSizeNs;
202 // Full duration buckets are attributed to the current stateKey.
203 (*output)[mEventKey].push_back(info);
204 // Safe to send these buckets to anomaly tracker since they must be full buckets.
205 // If it's a partial bucket, numBucketsForward would be 0.
206 addPastBucketToAnomalyTrackers(mEventKey, info.mDuration, mCurrentBucketNum + i);
207 VLOG(" add filling bucket with duration %lld", (long long)info.mDuration);
208 }
209 } else {
210 if (numBucketsForward >= 2) {
211 addPastBucketToAnomalyTrackers(mEventKey, 0, mCurrentBucketNum + numBucketsForward - 1);
212 }
213 }
214
215 if (numBucketsForward > 0) {
216 mCurrentBucketStartTimeNs = fullBucketEnd + (numBucketsForward - 1) * mBucketSizeNs;
217 mCurrentBucketNum += numBucketsForward;
218 } else { // We must be forming a partial bucket.
219 mCurrentBucketStartTimeNs = eventTimeNs;
220 }
221 mLastStartTime = mCurrentBucketStartTimeNs;
222 // Reset mHasHitGuardrail boolean since bucket was reset
223 mHasHitGuardrail = false;
224
225 // If all stopped, then tell owner it's safe to remove this tracker on a full bucket.
226 // On a partial bucket, only clear if no anomaly trackers, as full bucket duration is used
227 // for anomaly detection.
228 // Note: Anomaly trackers can be added on config updates, in which case mAnomalyTrackers > 0 and
229 // the full bucket duration could be used, but this is very rare so it is okay to clear.
230 return mStarted.empty() && mPaused.empty() && (isFullBucket || mAnomalyTrackers.size() == 0);
231 }
232
flushIfNeeded(int64_t eventTimeNs,const optional<UploadThreshold> & uploadThreshold,unordered_map<MetricDimensionKey,vector<DurationBucket>> * output)233 bool OringDurationTracker::flushIfNeeded(
234 int64_t eventTimeNs, const optional<UploadThreshold>& uploadThreshold,
235 unordered_map<MetricDimensionKey, vector<DurationBucket>>* output) {
236 if (eventTimeNs < getCurrentBucketEndTimeNs()) {
237 return false;
238 }
239 return flushCurrentBucket(eventTimeNs, uploadThreshold, /*globalConditionTrueNs=*/0, output);
240 }
241
onSlicedConditionMayChange(const int64_t timestamp)242 void OringDurationTracker::onSlicedConditionMayChange(const int64_t timestamp) {
243 vector<pair<HashableDimensionKey, int>> startedToPaused;
244 vector<pair<HashableDimensionKey, int>> pausedToStarted;
245 if (!mStarted.empty()) {
246 for (auto it = mStarted.begin(); it != mStarted.end();) {
247 const auto& key = it->first;
248 const auto& condIt = mConditionKeyMap.find(key);
249 if (condIt == mConditionKeyMap.end()) {
250 VLOG("Key %s dont have condition key", key.toString().c_str());
251 ++it;
252 continue;
253 }
254 ConditionState conditionState =
255 mWizard->query(mConditionTrackerIndex, condIt->second,
256 !mHasLinksToAllConditionDimensionsInTracker);
257 if (conditionState != ConditionState::kTrue) {
258 startedToPaused.push_back(*it);
259 it = mStarted.erase(it);
260 VLOG("Key %s started -> paused", key.toString().c_str());
261 } else {
262 ++it;
263 }
264 }
265
266 if (mStarted.empty()) {
267 mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
268 (timestamp - mLastStartTime);
269 VLOG("record duration %lld, total duration %lld for state key %s",
270 (long long)(timestamp - mLastStartTime), (long long)getCurrentStateKeyDuration(),
271 mEventKey.getStateValuesKey().toString().c_str());
272 detectAndDeclareAnomaly(
273 timestamp, mCurrentBucketNum,
274 getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
275 }
276 }
277
278 if (!mPaused.empty()) {
279 for (auto it = mPaused.begin(); it != mPaused.end();) {
280 const auto& key = it->first;
281 if (mConditionKeyMap.find(key) == mConditionKeyMap.end()) {
282 VLOG("Key %s dont have condition key", key.toString().c_str());
283 ++it;
284 continue;
285 }
286 ConditionState conditionState =
287 mWizard->query(mConditionTrackerIndex, mConditionKeyMap[key],
288 !mHasLinksToAllConditionDimensionsInTracker);
289 if (conditionState == ConditionState::kTrue) {
290 pausedToStarted.push_back(*it);
291 it = mPaused.erase(it);
292 VLOG("Key %s paused -> started", key.toString().c_str());
293 } else {
294 ++it;
295 }
296 }
297
298 if (mStarted.empty() && pausedToStarted.size() > 0) {
299 mLastStartTime = timestamp;
300 }
301 }
302
303 if (mStarted.empty() && !pausedToStarted.empty()) {
304 startAnomalyAlarm(timestamp);
305 }
306 mStarted.insert(pausedToStarted.begin(), pausedToStarted.end());
307 mPaused.insert(startedToPaused.begin(), startedToPaused.end());
308
309 if (mStarted.empty()) {
310 stopAnomalyAlarm(timestamp);
311 }
312 }
313
onConditionChanged(bool condition,const int64_t timestamp)314 void OringDurationTracker::onConditionChanged(bool condition, const int64_t timestamp) {
315 if (condition) {
316 if (!mPaused.empty()) {
317 VLOG("Condition true, all started");
318 if (mStarted.empty()) {
319 mLastStartTime = timestamp;
320 }
321 if (mStarted.empty() && !mPaused.empty()) {
322 startAnomalyAlarm(timestamp);
323 }
324 mStarted.insert(mPaused.begin(), mPaused.end());
325 mPaused.clear();
326 }
327 } else {
328 if (!mStarted.empty()) {
329 VLOG("Condition false, all paused");
330 mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
331 (timestamp - mLastStartTime);
332 mPaused.insert(mStarted.begin(), mStarted.end());
333 mStarted.clear();
334 detectAndDeclareAnomaly(
335 timestamp, mCurrentBucketNum,
336 getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
337 }
338 }
339 if (mStarted.empty()) {
340 stopAnomalyAlarm(timestamp);
341 }
342 }
343
onStateChanged(const int64_t timestamp,const int32_t atomId,const FieldValue & newState)344 void OringDurationTracker::onStateChanged(const int64_t timestamp, const int32_t atomId,
345 const FieldValue& newState) {
346 // Nothing needs to be done on a state change if we have not seen a start
347 // event, the metric is currently not active, or condition is false.
348 // For these cases, no keys are being tracked in mStarted, so update
349 // the current state key and return.
350 if (mStarted.empty()) {
351 updateCurrentStateKey(atomId, newState);
352 return;
353 }
354 // Add the current duration length to the previous state key and then update
355 // the last start time and current state key.
356 mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration += (timestamp - mLastStartTime);
357 mLastStartTime = timestamp;
358 updateCurrentStateKey(atomId, newState);
359 }
360
hasAccumulatingDuration()361 bool OringDurationTracker::hasAccumulatingDuration() {
362 return !mStarted.empty();
363 }
predictAnomalyTimestampNs(const AnomalyTracker & anomalyTracker,const int64_t eventTimestampNs) const364 int64_t OringDurationTracker::predictAnomalyTimestampNs(const AnomalyTracker& anomalyTracker,
365 const int64_t eventTimestampNs) const {
366 // The anomaly threshold.
367 const int64_t thresholdNs = anomalyTracker.getAnomalyThreshold();
368
369 // The timestamp of the current bucket end.
370 const int64_t currentBucketEndNs = getCurrentBucketEndTimeNs();
371
372 // The past duration ns for the current bucket of the current stateKey.
373 int64_t currentStateBucketPastNs =
374 getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration();
375
376 // As we move into the future, old buckets get overwritten (so their old data is erased).
377 // Sum of past durations. Will change as we overwrite old buckets.
378 int64_t pastNs = currentStateBucketPastNs + anomalyTracker.getSumOverPastBuckets(mEventKey);
379
380 // The refractory period end timestamp for dimension mEventKey.
381 const int64_t refractoryPeriodEndNs =
382 anomalyTracker.getRefractoryPeriodEndsSec(mEventKey) * NS_PER_SEC;
383
384 // The anomaly should happen when accumulated wakelock duration is above the threshold and
385 // not within the refractory period.
386 int64_t anomalyTimestampNs =
387 std::max(eventTimestampNs + thresholdNs - pastNs, refractoryPeriodEndNs);
388 // If the predicted the anomaly timestamp is within the current bucket, return it directly.
389 if (anomalyTimestampNs <= currentBucketEndNs) {
390 return std::max(eventTimestampNs, anomalyTimestampNs);
391 }
392
393 // Remove the old bucket.
394 if (anomalyTracker.getNumOfPastBuckets() > 0) {
395 pastNs -= anomalyTracker.getPastBucketValue(
396 mEventKey,
397 mCurrentBucketNum - anomalyTracker.getNumOfPastBuckets());
398 // Add the remaining of the current bucket to the accumulated wakelock duration.
399 pastNs += (currentBucketEndNs - eventTimestampNs);
400 } else {
401 // The anomaly depends on only one bucket.
402 pastNs = 0;
403 }
404
405 // The anomaly will not happen in the current bucket. We need to iterate over the future buckets
406 // to predict the accumulated wakelock duration and determine the anomaly timestamp accordingly.
407 for (int futureBucketIdx = 1; futureBucketIdx <= anomalyTracker.getNumOfPastBuckets() + 1;
408 futureBucketIdx++) {
409 // The alarm candidate timestamp should meet two requirements:
410 // 1. the accumulated wakelock duration is above the threshold.
411 // 2. it is not within the refractory period.
412 // 3. the alarm timestamp falls in this bucket. Otherwise we need to flush the past buckets,
413 // find the new alarm candidate timestamp and check these requirements again.
414 const int64_t bucketEndNs = currentBucketEndNs + futureBucketIdx * mBucketSizeNs;
415 int64_t anomalyTimestampNs =
416 std::max(bucketEndNs - mBucketSizeNs + thresholdNs - pastNs, refractoryPeriodEndNs);
417 if (anomalyTimestampNs <= bucketEndNs) {
418 return anomalyTimestampNs;
419 }
420 if (anomalyTracker.getNumOfPastBuckets() <= 0) {
421 continue;
422 }
423
424 // No valid alarm timestamp is found in this bucket. The clock moves to the end of the
425 // bucket. Update the pastNs.
426 pastNs += mBucketSizeNs;
427 // 1. If the oldest past bucket is still in the past bucket window, we could fetch the past
428 // bucket and erase it from pastNs.
429 // 2. If the oldest past bucket is the current bucket, we should compute the
430 // wakelock duration in the current bucket and erase it from pastNs.
431 // 3. Otherwise all othe past buckets are ancient.
432 if (futureBucketIdx < anomalyTracker.getNumOfPastBuckets()) {
433 pastNs -= anomalyTracker.getPastBucketValue(
434 mEventKey,
435 mCurrentBucketNum - anomalyTracker.getNumOfPastBuckets() + futureBucketIdx);
436 } else if (futureBucketIdx == anomalyTracker.getNumOfPastBuckets()) {
437 pastNs -= (currentStateBucketPastNs + (currentBucketEndNs - eventTimestampNs));
438 }
439 }
440
441 return std::max(eventTimestampNs + thresholdNs, refractoryPeriodEndNs);
442 }
443
dumpStates(FILE * out,bool verbose) const444 void OringDurationTracker::dumpStates(FILE* out, bool verbose) const {
445 fprintf(out, "\t\t started count %lu\n", (unsigned long)mStarted.size());
446 fprintf(out, "\t\t paused count %lu\n", (unsigned long)mPaused.size());
447 fprintf(out, "\t\t current duration %lld\n", (long long)getCurrentStateKeyDuration());
448 }
449
getCurrentStateKeyDuration() const450 int64_t OringDurationTracker::getCurrentStateKeyDuration() const {
451 auto it = mStateKeyDurationMap.find(mEventKey.getStateValuesKey());
452 if (it == mStateKeyDurationMap.end()) {
453 return 0;
454 } else {
455 return it->second.mDuration;
456 }
457 }
458
getCurrentStateKeyFullBucketDuration() const459 int64_t OringDurationTracker::getCurrentStateKeyFullBucketDuration() const {
460 auto it = mStateKeyDurationMap.find(mEventKey.getStateValuesKey());
461 if (it == mStateKeyDurationMap.end()) {
462 return 0;
463 } else {
464 return it->second.mDurationFullBucket;
465 }
466 }
467
updateCurrentStateKey(const int32_t atomId,const FieldValue & newState)468 void OringDurationTracker::updateCurrentStateKey(const int32_t atomId, const FieldValue& newState) {
469 HashableDimensionKey* stateValuesKey = mEventKey.getMutableStateValuesKey();
470 for (size_t i = 0; i < stateValuesKey->getValues().size(); i++) {
471 if (stateValuesKey->getValues()[i].mField.getTag() == atomId) {
472 stateValuesKey->mutableValue(i)->mValue = newState.mValue;
473 }
474 }
475 }
476
477 } // namespace statsd
478 } // namespace os
479 } // namespace android
480