1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #define DEBUG false
17 #include "Log.h"
18 #include "OringDurationTracker.h"
19 #include "guardrail/StatsdStats.h"
20
21 namespace android {
22 namespace os {
23 namespace statsd {
24
25 using std::pair;
26
OringDurationTracker(const ConfigKey & key,const int64_t & id,const MetricDimensionKey & eventKey,sp<ConditionWizard> wizard,int conditionIndex,bool nesting,int64_t currentBucketStartNs,int64_t currentBucketNum,int64_t startTimeNs,int64_t bucketSizeNs,bool conditionSliced,bool fullLink,const vector<sp<AnomalyTracker>> & anomalyTrackers)27 OringDurationTracker::OringDurationTracker(
28 const ConfigKey& key, const int64_t& id, const MetricDimensionKey& eventKey,
29 sp<ConditionWizard> wizard, int conditionIndex, bool nesting, int64_t currentBucketStartNs,
30 int64_t currentBucketNum, int64_t startTimeNs, int64_t bucketSizeNs, bool conditionSliced,
31 bool fullLink, const vector<sp<AnomalyTracker>>& anomalyTrackers)
32 : DurationTracker(key, id, eventKey, wizard, conditionIndex, nesting, currentBucketStartNs,
33 currentBucketNum, startTimeNs, bucketSizeNs, conditionSliced, fullLink,
34 anomalyTrackers),
35 mStarted(),
36 mPaused() {
37 mLastStartTime = 0;
38 }
39
hitGuardRail(const HashableDimensionKey & newKey)40 bool OringDurationTracker::hitGuardRail(const HashableDimensionKey& newKey) {
41 // ===========GuardRail==============
42 // 1. Report the tuple count if the tuple count > soft limit
43 if (mConditionKeyMap.find(newKey) != mConditionKeyMap.end()) {
44 return false;
45 }
46 if (mConditionKeyMap.size() > StatsdStats::kDimensionKeySizeSoftLimit - 1) {
47 size_t newTupleCount = mConditionKeyMap.size() + 1;
48 StatsdStats::getInstance().noteMetricDimensionSize(mConfigKey, mTrackerId, newTupleCount);
49 // 2. Don't add more tuples, we are above the allowed threshold. Drop the data.
50 if (newTupleCount > StatsdStats::kDimensionKeySizeHardLimit) {
51 ALOGE("OringDurTracker %lld dropping data for dimension key %s",
52 (long long)mTrackerId, newKey.toString().c_str());
53 return true;
54 }
55 }
56 return false;
57 }
58
noteStart(const HashableDimensionKey & key,bool condition,const int64_t eventTime,const ConditionKey & conditionKey)59 void OringDurationTracker::noteStart(const HashableDimensionKey& key, bool condition,
60 const int64_t eventTime, const ConditionKey& conditionKey) {
61 if (hitGuardRail(key)) {
62 return;
63 }
64 if (condition) {
65 if (mStarted.size() == 0) {
66 mLastStartTime = eventTime;
67 VLOG("record first start....");
68 startAnomalyAlarm(eventTime);
69 }
70 mStarted[key]++;
71 } else {
72 mPaused[key]++;
73 }
74
75 if (mConditionSliced && mConditionKeyMap.find(key) == mConditionKeyMap.end()) {
76 mConditionKeyMap[key] = conditionKey;
77 }
78 VLOG("Oring: %s start, condition %d", key.toString().c_str(), condition);
79 }
80
noteStop(const HashableDimensionKey & key,const int64_t timestamp,const bool stopAll)81 void OringDurationTracker::noteStop(const HashableDimensionKey& key, const int64_t timestamp,
82 const bool stopAll) {
83 VLOG("Oring: %s stop", key.toString().c_str());
84 auto it = mStarted.find(key);
85 if (it != mStarted.end()) {
86 (it->second)--;
87 if (stopAll || !mNested || it->second <= 0) {
88 mStarted.erase(it);
89 mConditionKeyMap.erase(key);
90 }
91 if (mStarted.empty()) {
92 mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
93 (timestamp - mLastStartTime);
94 detectAndDeclareAnomaly(
95 timestamp, mCurrentBucketNum,
96 getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
97 VLOG("record duration %lld, total duration %lld for state key %s",
98 (long long)timestamp - mLastStartTime, (long long)getCurrentStateKeyDuration(),
99 mEventKey.getStateValuesKey().toString().c_str());
100 }
101 }
102
103 auto pausedIt = mPaused.find(key);
104 if (pausedIt != mPaused.end()) {
105 (pausedIt->second)--;
106 if (stopAll || !mNested || pausedIt->second <= 0) {
107 mPaused.erase(pausedIt);
108 mConditionKeyMap.erase(key);
109 }
110 }
111 if (mStarted.empty()) {
112 stopAnomalyAlarm(timestamp);
113 }
114 }
115
noteStopAll(const int64_t timestamp)116 void OringDurationTracker::noteStopAll(const int64_t timestamp) {
117 if (!mStarted.empty()) {
118 mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
119 (timestamp - mLastStartTime);
120 VLOG("Oring Stop all: record duration %lld, total duration %lld for state key %s",
121 (long long)timestamp - mLastStartTime, (long long)getCurrentStateKeyDuration(),
122 mEventKey.getStateValuesKey().toString().c_str());
123 detectAndDeclareAnomaly(
124 timestamp, mCurrentBucketNum,
125 getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
126 }
127
128 stopAnomalyAlarm(timestamp);
129 mStarted.clear();
130 mPaused.clear();
131 mConditionKeyMap.clear();
132 }
133
flushCurrentBucket(const int64_t & eventTimeNs,const optional<UploadThreshold> & uploadThreshold,std::unordered_map<MetricDimensionKey,std::vector<DurationBucket>> * output)134 bool OringDurationTracker::flushCurrentBucket(
135 const int64_t& eventTimeNs, const optional<UploadThreshold>& uploadThreshold,
136 std::unordered_map<MetricDimensionKey, std::vector<DurationBucket>>* output) {
137 VLOG("OringDurationTracker Flushing.............");
138
139 // Note that we have to mimic the bucket time changes we do in the
140 // MetricProducer#notifyAppUpgrade.
141
142 int numBucketsForward = 0;
143 int64_t fullBucketEnd = getCurrentBucketEndTimeNs();
144 int64_t currentBucketEndTimeNs;
145
146 bool isFullBucket = eventTimeNs >= fullBucketEnd;
147 if (isFullBucket) {
148 numBucketsForward = 1 + (eventTimeNs - fullBucketEnd) / mBucketSizeNs;
149 currentBucketEndTimeNs = fullBucketEnd;
150 } else {
151 // This must be a partial bucket.
152 currentBucketEndTimeNs = eventTimeNs;
153 }
154
155 // Process the current bucket.
156 if (mStarted.size() > 0) {
157 // Calculate the duration for the current state key.
158 mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
159 (currentBucketEndTimeNs - mLastStartTime);
160 }
161 // Store DurationBucket info for each whatKey, stateKey pair.
162 // Note: The whatKey stored in mEventKey is constant for each DurationTracker, while the
163 // stateKey stored in mEventKey is only the current stateKey. mStateKeyDurationMap is used to
164 // store durations for each stateKey, so we need to flush the bucket by creating a
165 // DurationBucket for each stateKey.
166 for (auto& durationIt : mStateKeyDurationMap) {
167 if (durationPassesThreshold(uploadThreshold, durationIt.second.mDuration)) {
168 DurationBucket current_info;
169 current_info.mBucketStartNs = mCurrentBucketStartTimeNs;
170 current_info.mBucketEndNs = currentBucketEndTimeNs;
171 current_info.mDuration = durationIt.second.mDuration;
172 (*output)[MetricDimensionKey(mEventKey.getDimensionKeyInWhat(), durationIt.first)]
173 .push_back(current_info);
174
175 durationIt.second.mDurationFullBucket += durationIt.second.mDuration;
176 VLOG(" duration: %lld", (long long)current_info.mDuration);
177 } else {
178 VLOG(" duration: %lld does not pass set threshold", (long long)mDuration);
179 }
180
181 if (isFullBucket) {
182 // End of full bucket, can send to anomaly tracker now.
183 addPastBucketToAnomalyTrackers(
184 MetricDimensionKey(mEventKey.getDimensionKeyInWhat(), durationIt.first),
185 getCurrentStateKeyFullBucketDuration(), mCurrentBucketNum);
186 durationIt.second.mDurationFullBucket = 0;
187 }
188 durationIt.second.mDuration = 0;
189 }
190
191 if (mStarted.size() > 0) {
192 for (int i = 1; i < numBucketsForward; i++) {
193 DurationBucket info;
194 info.mBucketStartNs = fullBucketEnd + mBucketSizeNs * (i - 1);
195 info.mBucketEndNs = info.mBucketStartNs + mBucketSizeNs;
196 info.mDuration = mBucketSizeNs;
197 // Full duration buckets are attributed to the current stateKey.
198 (*output)[mEventKey].push_back(info);
199 // Safe to send these buckets to anomaly tracker since they must be full buckets.
200 // If it's a partial bucket, numBucketsForward would be 0.
201 addPastBucketToAnomalyTrackers(mEventKey, info.mDuration, mCurrentBucketNum + i);
202 VLOG(" add filling bucket with duration %lld", (long long)info.mDuration);
203 }
204 } else {
205 if (numBucketsForward >= 2) {
206 addPastBucketToAnomalyTrackers(mEventKey, 0, mCurrentBucketNum + numBucketsForward - 1);
207 }
208 }
209
210 if (numBucketsForward > 0) {
211 mCurrentBucketStartTimeNs = fullBucketEnd + (numBucketsForward - 1) * mBucketSizeNs;
212 mCurrentBucketNum += numBucketsForward;
213 } else { // We must be forming a partial bucket.
214 mCurrentBucketStartTimeNs = eventTimeNs;
215 }
216 mLastStartTime = mCurrentBucketStartTimeNs;
217
218 // If all stopped, then tell owner it's safe to remove this tracker on a full bucket.
219 // On a partial bucket, only clear if no anomaly trackers, as full bucket duration is used
220 // for anomaly detection.
221 // Note: Anomaly trackers can be added on config updates, in which case mAnomalyTrackers > 0 and
222 // the full bucket duration could be used, but this is very rare so it is okay to clear.
223 return mStarted.empty() && mPaused.empty() && (isFullBucket || mAnomalyTrackers.size() == 0);
224 }
225
flushIfNeeded(int64_t eventTimeNs,const optional<UploadThreshold> & uploadThreshold,unordered_map<MetricDimensionKey,vector<DurationBucket>> * output)226 bool OringDurationTracker::flushIfNeeded(
227 int64_t eventTimeNs, const optional<UploadThreshold>& uploadThreshold,
228 unordered_map<MetricDimensionKey, vector<DurationBucket>>* output) {
229 if (eventTimeNs < getCurrentBucketEndTimeNs()) {
230 return false;
231 }
232 return flushCurrentBucket(eventTimeNs, uploadThreshold, output);
233 }
234
onSlicedConditionMayChange(bool overallCondition,const int64_t timestamp)235 void OringDurationTracker::onSlicedConditionMayChange(bool overallCondition,
236 const int64_t timestamp) {
237 vector<pair<HashableDimensionKey, int>> startedToPaused;
238 vector<pair<HashableDimensionKey, int>> pausedToStarted;
239 if (!mStarted.empty()) {
240 for (auto it = mStarted.begin(); it != mStarted.end();) {
241 const auto& key = it->first;
242 const auto& condIt = mConditionKeyMap.find(key);
243 if (condIt == mConditionKeyMap.end()) {
244 VLOG("Key %s dont have condition key", key.toString().c_str());
245 ++it;
246 continue;
247 }
248 ConditionState conditionState =
249 mWizard->query(mConditionTrackerIndex, condIt->second,
250 !mHasLinksToAllConditionDimensionsInTracker);
251 if (conditionState != ConditionState::kTrue) {
252 startedToPaused.push_back(*it);
253 it = mStarted.erase(it);
254 VLOG("Key %s started -> paused", key.toString().c_str());
255 } else {
256 ++it;
257 }
258 }
259
260 if (mStarted.empty()) {
261 mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
262 (timestamp - mLastStartTime);
263 VLOG("record duration %lld, total duration %lld for state key %s",
264 (long long)(timestamp - mLastStartTime), (long long)getCurrentStateKeyDuration(),
265 mEventKey.getStateValuesKey().toString().c_str());
266 detectAndDeclareAnomaly(
267 timestamp, mCurrentBucketNum,
268 getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
269 }
270 }
271
272 if (!mPaused.empty()) {
273 for (auto it = mPaused.begin(); it != mPaused.end();) {
274 const auto& key = it->first;
275 if (mConditionKeyMap.find(key) == mConditionKeyMap.end()) {
276 VLOG("Key %s dont have condition key", key.toString().c_str());
277 ++it;
278 continue;
279 }
280 ConditionState conditionState =
281 mWizard->query(mConditionTrackerIndex, mConditionKeyMap[key],
282 !mHasLinksToAllConditionDimensionsInTracker);
283 if (conditionState == ConditionState::kTrue) {
284 pausedToStarted.push_back(*it);
285 it = mPaused.erase(it);
286 VLOG("Key %s paused -> started", key.toString().c_str());
287 } else {
288 ++it;
289 }
290 }
291
292 if (mStarted.empty() && pausedToStarted.size() > 0) {
293 mLastStartTime = timestamp;
294 }
295 }
296
297 if (mStarted.empty() && !pausedToStarted.empty()) {
298 startAnomalyAlarm(timestamp);
299 }
300 mStarted.insert(pausedToStarted.begin(), pausedToStarted.end());
301 mPaused.insert(startedToPaused.begin(), startedToPaused.end());
302
303 if (mStarted.empty()) {
304 stopAnomalyAlarm(timestamp);
305 }
306 }
307
onConditionChanged(bool condition,const int64_t timestamp)308 void OringDurationTracker::onConditionChanged(bool condition, const int64_t timestamp) {
309 if (condition) {
310 if (!mPaused.empty()) {
311 VLOG("Condition true, all started");
312 if (mStarted.empty()) {
313 mLastStartTime = timestamp;
314 }
315 if (mStarted.empty() && !mPaused.empty()) {
316 startAnomalyAlarm(timestamp);
317 }
318 mStarted.insert(mPaused.begin(), mPaused.end());
319 mPaused.clear();
320 }
321 } else {
322 if (!mStarted.empty()) {
323 VLOG("Condition false, all paused");
324 mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
325 (timestamp - mLastStartTime);
326 mPaused.insert(mStarted.begin(), mStarted.end());
327 mStarted.clear();
328 detectAndDeclareAnomaly(
329 timestamp, mCurrentBucketNum,
330 getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
331 }
332 }
333 if (mStarted.empty()) {
334 stopAnomalyAlarm(timestamp);
335 }
336 }
337
onStateChanged(const int64_t timestamp,const int32_t atomId,const FieldValue & newState)338 void OringDurationTracker::onStateChanged(const int64_t timestamp, const int32_t atomId,
339 const FieldValue& newState) {
340 // Nothing needs to be done on a state change if we have not seen a start
341 // event, the metric is currently not active, or condition is false.
342 // For these cases, no keys are being tracked in mStarted, so update
343 // the current state key and return.
344 if (mStarted.empty()) {
345 updateCurrentStateKey(atomId, newState);
346 return;
347 }
348 // Add the current duration length to the previous state key and then update
349 // the last start time and current state key.
350 mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration += (timestamp - mLastStartTime);
351 mLastStartTime = timestamp;
352 updateCurrentStateKey(atomId, newState);
353 }
354
hasAccumulatingDuration()355 bool OringDurationTracker::hasAccumulatingDuration() {
356 return !mStarted.empty();
357 }
predictAnomalyTimestampNs(const AnomalyTracker & anomalyTracker,const int64_t eventTimestampNs) const358 int64_t OringDurationTracker::predictAnomalyTimestampNs(const AnomalyTracker& anomalyTracker,
359 const int64_t eventTimestampNs) const {
360 // The anomaly threshold.
361 const int64_t thresholdNs = anomalyTracker.getAnomalyThreshold();
362
363 // The timestamp of the current bucket end.
364 const int64_t currentBucketEndNs = getCurrentBucketEndTimeNs();
365
366 // The past duration ns for the current bucket of the current stateKey.
367 int64_t currentStateBucketPastNs =
368 getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration();
369
370 // As we move into the future, old buckets get overwritten (so their old data is erased).
371 // Sum of past durations. Will change as we overwrite old buckets.
372 int64_t pastNs = currentStateBucketPastNs + anomalyTracker.getSumOverPastBuckets(mEventKey);
373
374 // The refractory period end timestamp for dimension mEventKey.
375 const int64_t refractoryPeriodEndNs =
376 anomalyTracker.getRefractoryPeriodEndsSec(mEventKey) * NS_PER_SEC;
377
378 // The anomaly should happen when accumulated wakelock duration is above the threshold and
379 // not within the refractory period.
380 int64_t anomalyTimestampNs =
381 std::max(eventTimestampNs + thresholdNs - pastNs, refractoryPeriodEndNs);
382 // If the predicted the anomaly timestamp is within the current bucket, return it directly.
383 if (anomalyTimestampNs <= currentBucketEndNs) {
384 return std::max(eventTimestampNs, anomalyTimestampNs);
385 }
386
387 // Remove the old bucket.
388 if (anomalyTracker.getNumOfPastBuckets() > 0) {
389 pastNs -= anomalyTracker.getPastBucketValue(
390 mEventKey,
391 mCurrentBucketNum - anomalyTracker.getNumOfPastBuckets());
392 // Add the remaining of the current bucket to the accumulated wakelock duration.
393 pastNs += (currentBucketEndNs - eventTimestampNs);
394 } else {
395 // The anomaly depends on only one bucket.
396 pastNs = 0;
397 }
398
399 // The anomaly will not happen in the current bucket. We need to iterate over the future buckets
400 // to predict the accumulated wakelock duration and determine the anomaly timestamp accordingly.
401 for (int futureBucketIdx = 1; futureBucketIdx <= anomalyTracker.getNumOfPastBuckets() + 1;
402 futureBucketIdx++) {
403 // The alarm candidate timestamp should meet two requirements:
404 // 1. the accumulated wakelock duration is above the threshold.
405 // 2. it is not within the refractory period.
406 // 3. the alarm timestamp falls in this bucket. Otherwise we need to flush the past buckets,
407 // find the new alarm candidate timestamp and check these requirements again.
408 const int64_t bucketEndNs = currentBucketEndNs + futureBucketIdx * mBucketSizeNs;
409 int64_t anomalyTimestampNs =
410 std::max(bucketEndNs - mBucketSizeNs + thresholdNs - pastNs, refractoryPeriodEndNs);
411 if (anomalyTimestampNs <= bucketEndNs) {
412 return anomalyTimestampNs;
413 }
414 if (anomalyTracker.getNumOfPastBuckets() <= 0) {
415 continue;
416 }
417
418 // No valid alarm timestamp is found in this bucket. The clock moves to the end of the
419 // bucket. Update the pastNs.
420 pastNs += mBucketSizeNs;
421 // 1. If the oldest past bucket is still in the past bucket window, we could fetch the past
422 // bucket and erase it from pastNs.
423 // 2. If the oldest past bucket is the current bucket, we should compute the
424 // wakelock duration in the current bucket and erase it from pastNs.
425 // 3. Otherwise all othe past buckets are ancient.
426 if (futureBucketIdx < anomalyTracker.getNumOfPastBuckets()) {
427 pastNs -= anomalyTracker.getPastBucketValue(
428 mEventKey,
429 mCurrentBucketNum - anomalyTracker.getNumOfPastBuckets() + futureBucketIdx);
430 } else if (futureBucketIdx == anomalyTracker.getNumOfPastBuckets()) {
431 pastNs -= (currentStateBucketPastNs + (currentBucketEndNs - eventTimestampNs));
432 }
433 }
434
435 return std::max(eventTimestampNs + thresholdNs, refractoryPeriodEndNs);
436 }
437
dumpStates(FILE * out,bool verbose) const438 void OringDurationTracker::dumpStates(FILE* out, bool verbose) const {
439 fprintf(out, "\t\t started count %lu\n", (unsigned long)mStarted.size());
440 fprintf(out, "\t\t paused count %lu\n", (unsigned long)mPaused.size());
441 fprintf(out, "\t\t current duration %lld\n", (long long)getCurrentStateKeyDuration());
442 }
443
getCurrentStateKeyDuration() const444 int64_t OringDurationTracker::getCurrentStateKeyDuration() const {
445 auto it = mStateKeyDurationMap.find(mEventKey.getStateValuesKey());
446 if (it == mStateKeyDurationMap.end()) {
447 return 0;
448 } else {
449 return it->second.mDuration;
450 }
451 }
452
getCurrentStateKeyFullBucketDuration() const453 int64_t OringDurationTracker::getCurrentStateKeyFullBucketDuration() const {
454 auto it = mStateKeyDurationMap.find(mEventKey.getStateValuesKey());
455 if (it == mStateKeyDurationMap.end()) {
456 return 0;
457 } else {
458 return it->second.mDurationFullBucket;
459 }
460 }
461
updateCurrentStateKey(const int32_t atomId,const FieldValue & newState)462 void OringDurationTracker::updateCurrentStateKey(const int32_t atomId, const FieldValue& newState) {
463 HashableDimensionKey* stateValuesKey = mEventKey.getMutableStateValuesKey();
464 for (size_t i = 0; i < stateValuesKey->getValues().size(); i++) {
465 if (stateValuesKey->getValues()[i].mField.getTag() == atomId) {
466 stateValuesKey->mutableValue(i)->mValue = newState.mValue;
467 }
468 }
469 }
470
471 } // namespace statsd
472 } // namespace os
473 } // namespace android
474