• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <gtest/gtest_prod.h>
20 #include <stdlib.h>
21 #include <utils/RefBase.h>
22 
23 #include "AlarmMonitor.h"
24 #include "config/ConfigKey.h"
25 #include "guardrail/StatsdStats.h"
26 #include "hash.h"
27 #include "src/statsd_config.pb.h"    // Alert
28 #include "src/statsd_metadata.pb.h"  // AlertMetadata
29 #include "stats_util.h"              // HashableDimensionKey and DimToValMap
30 
31 namespace android {
32 namespace os {
33 namespace statsd {
34 
35 using std::optional;
36 using std::shared_ptr;
37 using std::unordered_map;
38 
39 // Does NOT allow negative values.
40 class AnomalyTracker : public virtual RefBase {
41 public:
42     AnomalyTracker(const Alert& alert, const ConfigKey& configKey);
43 
44     virtual ~AnomalyTracker();
45 
46     // Reset appropriate state on a config update. Clear subscriptions so they can be reset.
47     void onConfigUpdated();
48 
49     // Add subscriptions that depend on this alert.
addSubscription(const Subscription & subscription)50     void addSubscription(const Subscription& subscription) {
51         mSubscriptions.push_back(subscription);
52     }
53 
54     // Adds a bucket for the given bucketNum (index starting at 0).
55     // If a bucket for bucketNum already exists, it will be replaced.
56     // Also, advances to bucketNum (if not in the past), effectively filling any intervening
57     // buckets with 0s.
58     void addPastBucket(std::shared_ptr<DimToValMap> bucket, const int64_t& bucketNum);
59 
60     // Inserts (or replaces) the bucket entry for the given bucketNum at the given key to be the
61     // given bucketValue. If the bucket does not exist, it will be created.
62     // Also, advances to bucketNum (if not in the past), effectively filling any intervening
63     // buckets with 0s.
64     void addPastBucket(const MetricDimensionKey& key, const int64_t& bucketValue,
65                        const int64_t& bucketNum);
66 
67     // Returns true if, based on past buckets plus the new currentBucketValue (which generally
68     // represents the partially-filled current bucket), an anomaly has happened.
69     // Also advances to currBucketNum-1.
70     bool detectAnomaly(const int64_t& currBucketNum, const MetricDimensionKey& key,
71                        const int64_t& currentBucketValue);
72 
73     // Informs incidentd about the detected alert.
74     void declareAnomaly(const int64_t& timestampNs, int64_t metricId, const MetricDimensionKey& key,
75                         int64_t metricValue);
76 
77     // Detects if, based on past buckets plus the new currentBucketValue (which generally
78     // represents the partially-filled current bucket), an anomaly has happened, and if so,
79     // declares an anomaly and informs relevant subscribers.
80     // Also advances to currBucketNum-1.
81     void detectAndDeclareAnomaly(const int64_t& timestampNs, const int64_t& currBucketNum,
82                                  int64_t metricId, const MetricDimensionKey& key,
83                                  const int64_t& currentBucketValue);
84 
85     // Init the AlarmMonitor which is shared across anomaly trackers.
setAlarmMonitor(const sp<AlarmMonitor> & alarmMonitor)86     virtual void setAlarmMonitor(const sp<AlarmMonitor>& alarmMonitor) {
87         return; // Base AnomalyTracker class has no need for the AlarmMonitor.
88     }
89 
90     // Returns the sum of all past bucket values for the given dimension key.
91     int64_t getSumOverPastBuckets(const MetricDimensionKey& key) const;
92 
93     // Returns the value for a past bucket, or 0 if that bucket doesn't exist.
94     int64_t getPastBucketValue(const MetricDimensionKey& key, const int64_t& bucketNum) const;
95 
96     // Returns the anomaly threshold set in the configuration.
getAnomalyThreshold()97     inline int64_t getAnomalyThreshold() const {
98         return mAlert.trigger_if_sum_gt();
99     }
100 
101     // Returns the refractory period ending timestamp (in seconds) for the given key.
102     // Before this moment, any detected anomaly will be ignored.
103     // If there is no stored refractory period ending timestamp, returns 0.
getRefractoryPeriodEndsSec(const MetricDimensionKey & key)104     uint32_t getRefractoryPeriodEndsSec(const MetricDimensionKey& key) const {
105         const auto& it = mRefractoryPeriodEndsSec.find(key);
106         return it != mRefractoryPeriodEndsSec.end() ? it->second : 0;
107     }
108 
109     // Returns the (constant) number of past buckets this anomaly tracker can store.
getNumOfPastBuckets()110     inline int getNumOfPastBuckets() const {
111         return mNumOfPastBuckets;
112     }
113 
114     std::pair<optional<InvalidConfigReason>, uint64_t> getProtoHash() const;
115 
116     // Sets an alarm for the given timestamp.
117     // Replaces previous alarm if one already exists.
startAlarm(const MetricDimensionKey & dimensionKey,const int64_t & eventTime)118     virtual void startAlarm(const MetricDimensionKey& dimensionKey, const int64_t& eventTime) {
119         return;  // The base AnomalyTracker class doesn't have alarms.
120     }
121 
122     // Stops the alarm.
123     // If it should have already fired, but hasn't yet (e.g. because the AlarmManager is delayed),
124     // declare the anomaly now.
stopAlarm(const MetricDimensionKey & dimensionKey,const int64_t & timestampNs)125     virtual void stopAlarm(const MetricDimensionKey& dimensionKey, const int64_t& timestampNs) {
126         return;  // The base AnomalyTracker class doesn't have alarms.
127     }
128 
129     // Stop all the alarms owned by this tracker. Does not declare any anomalies.
cancelAllAlarms()130     virtual void cancelAllAlarms() {
131         return;  // The base AnomalyTracker class doesn't have alarms.
132     }
133 
134     // Declares an anomaly for each alarm in firedAlarms that belongs to this AnomalyTracker,
135     // and removes it from firedAlarms. Does NOT remove the alarm from the AlarmMonitor.
informAlarmsFired(const int64_t & timestampNs,unordered_set<sp<const InternalAlarm>,SpHash<InternalAlarm>> & firedAlarms)136     virtual void informAlarmsFired(const int64_t& timestampNs,
137             unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>>& firedAlarms) {
138         return; // The base AnomalyTracker class doesn't have alarms.
139     }
140 
141     // Writes metadata of the alert (refractory_period_end_sec) to AlertMetadata.
142     // Returns true if at least one element is written to alertMetadata.
143     bool writeAlertMetadataToProto(
144             int64_t currentWallClockTimeNs,
145             int64_t systemElapsedTimeNs, metadata::AlertMetadata* alertMetadata);
146 
147     void loadAlertMetadata(
148             const metadata::AlertMetadata& alertMetadata,
149             int64_t currentWallClockTimeNs,
150             int64_t systemElapsedTimeNs);
151 
152 protected:
153     // For testing only.
154     // Returns the alarm timestamp in seconds for the query dimension if it exists. Otherwise
155     // returns 0.
getAlarmTimestampSec(const MetricDimensionKey & dimensionKey)156     virtual uint32_t getAlarmTimestampSec(const MetricDimensionKey& dimensionKey) const {
157         return 0;   // The base AnomalyTracker class doesn't have alarms.
158     }
159 
160     // statsd_config.proto Alert message that defines this tracker.
161     const Alert mAlert;
162 
163     // The subscriptions that depend on this alert.
164     std::vector<Subscription> mSubscriptions;
165 
166     // A reference to the Alert's config key.
167     const ConfigKey mConfigKey;
168 
169     // Number of past buckets. One less than the total number of buckets needed
170     // for the anomaly detection (since the current bucket is not in the past).
171     const int mNumOfPastBuckets;
172 
173     // Values for each of the past mNumOfPastBuckets buckets. Always of size mNumOfPastBuckets.
174     // mPastBuckets[i] can be null, meaning that no data is present in that bucket.
175     std::vector<shared_ptr<DimToValMap>> mPastBuckets;
176 
177     // Cached sum over all existing buckets in mPastBuckets.
178     // Its buckets never contain entries of 0.
179     DimToValMap mSumOverPastBuckets;
180 
181     // The bucket number of the last added bucket.
182     int64_t mMostRecentBucketNum = -1;
183 
184     // Map from each dimension to the timestamp that its refractory period (if this anomaly was
185     // declared for that dimension) ends, in seconds. From this moment and onwards, anomalies
186     // can be declared again.
187     // Entries may be, but are not guaranteed to be, removed after the period is finished.
188     unordered_map<MetricDimensionKey, uint32_t> mRefractoryPeriodEndsSec;
189 
190     // Advances mMostRecentBucketNum to bucketNum, deleting any data that is now too old.
191     // Specifically, since it is now too old, removes the data for
192     //   [mMostRecentBucketNum - mNumOfPastBuckets + 1, bucketNum - mNumOfPastBuckets].
193     void advanceMostRecentBucketTo(const int64_t& bucketNum);
194 
195     // Add the information in the given bucket to mSumOverPastBuckets.
196     void addBucketToSum(const shared_ptr<DimToValMap>& bucket);
197 
198     // Subtract the information in the given bucket from mSumOverPastBuckets
199     // and remove any items with value 0.
200     void subtractBucketFromSum(const shared_ptr<DimToValMap>& bucket);
201 
202     // From mSumOverPastBuckets[key], subtracts bucketValue, removing it if it is now 0.
203     void subtractValueFromSum(const MetricDimensionKey& key, const int64_t& bucketValue);
204 
205     // Returns true if in the refractory period, else false.
206     bool isInRefractoryPeriod(const int64_t& timestampNs, const MetricDimensionKey& key) const;
207 
208     // Calculates the corresponding bucket index within the circular array.
209     // Requires bucketNum >= 0.
210     size_t index(int64_t bucketNum) const;
211 
212     // Resets all bucket data. For use when all the data gets stale.
213     virtual void resetStorage();
214 
215     // Informs the subscribers (incidentd, perfetto, broadcasts, etc) that an anomaly has occurred.
216     void informSubscribers(const MetricDimensionKey& key, int64_t metricId, int64_t metricValue);
217 
218     FRIEND_TEST(AnomalyTrackerTest, TestConsecutiveBuckets);
219     FRIEND_TEST(AnomalyTrackerTest, TestSparseBuckets);
220     FRIEND_TEST(GaugeMetricProducerTest, TestAnomalyDetection);
221     FRIEND_TEST(CountMetricProducerTest, TestAnomalyDetectionUnSliced);
222     FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_single_bucket);
223     FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_partial_bucket);
224     FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_multiple_buckets);
225     FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_long_refractory_period);
226 
227     FRIEND_TEST(ConfigUpdateTest, TestUpdateAlerts);
228 };
229 
230 }  // namespace statsd
231 }  // namespace os
232 }  // namespace android
233