1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include <gtest/gtest_prod.h> 20 #include <stdlib.h> 21 #include <utils/RefBase.h> 22 23 #include "AlarmMonitor.h" 24 #include "config/ConfigKey.h" 25 #include "guardrail/StatsdStats.h" 26 #include "hash.h" 27 #include "src/statsd_config.pb.h" // Alert 28 #include "src/statsd_metadata.pb.h" // AlertMetadata 29 #include "stats_util.h" // HashableDimensionKey and DimToValMap 30 31 namespace android { 32 namespace os { 33 namespace statsd { 34 35 using std::optional; 36 using std::shared_ptr; 37 using std::unordered_map; 38 39 // Does NOT allow negative values. 40 class AnomalyTracker : public virtual RefBase { 41 public: 42 AnomalyTracker(const Alert& alert, const ConfigKey& configKey); 43 44 virtual ~AnomalyTracker(); 45 46 // Reset appropriate state on a config update. Clear subscriptions so they can be reset. 47 void onConfigUpdated(); 48 49 // Add subscriptions that depend on this alert. addSubscription(const Subscription & subscription)50 void addSubscription(const Subscription& subscription) { 51 mSubscriptions.push_back(subscription); 52 } 53 54 // Adds a bucket for the given bucketNum (index starting at 0). 55 // If a bucket for bucketNum already exists, it will be replaced. 56 // Also, advances to bucketNum (if not in the past), effectively filling any intervening 57 // buckets with 0s. 58 void addPastBucket(std::shared_ptr<DimToValMap> bucket, const int64_t& bucketNum); 59 60 // Inserts (or replaces) the bucket entry for the given bucketNum at the given key to be the 61 // given bucketValue. If the bucket does not exist, it will be created. 62 // Also, advances to bucketNum (if not in the past), effectively filling any intervening 63 // buckets with 0s. 64 void addPastBucket(const MetricDimensionKey& key, const int64_t& bucketValue, 65 const int64_t& bucketNum); 66 67 // Returns true if, based on past buckets plus the new currentBucketValue (which generally 68 // represents the partially-filled current bucket), an anomaly has happened. 69 // Also advances to currBucketNum-1. 70 bool detectAnomaly(const int64_t& currBucketNum, const MetricDimensionKey& key, 71 const int64_t& currentBucketValue); 72 73 // Informs incidentd about the detected alert. 74 void declareAnomaly(const int64_t& timestampNs, int64_t metricId, const MetricDimensionKey& key, 75 int64_t metricValue); 76 77 // Detects if, based on past buckets plus the new currentBucketValue (which generally 78 // represents the partially-filled current bucket), an anomaly has happened, and if so, 79 // declares an anomaly and informs relevant subscribers. 80 // Also advances to currBucketNum-1. 81 void detectAndDeclareAnomaly(const int64_t& timestampNs, const int64_t& currBucketNum, 82 int64_t metricId, const MetricDimensionKey& key, 83 const int64_t& currentBucketValue); 84 85 // Init the AlarmMonitor which is shared across anomaly trackers. setAlarmMonitor(const sp<AlarmMonitor> & alarmMonitor)86 virtual void setAlarmMonitor(const sp<AlarmMonitor>& alarmMonitor) { 87 return; // Base AnomalyTracker class has no need for the AlarmMonitor. 88 } 89 90 // Returns the sum of all past bucket values for the given dimension key. 91 int64_t getSumOverPastBuckets(const MetricDimensionKey& key) const; 92 93 // Returns the value for a past bucket, or 0 if that bucket doesn't exist. 94 int64_t getPastBucketValue(const MetricDimensionKey& key, const int64_t& bucketNum) const; 95 96 // Returns the anomaly threshold set in the configuration. getAnomalyThreshold()97 inline int64_t getAnomalyThreshold() const { 98 return mAlert.trigger_if_sum_gt(); 99 } 100 101 // Returns the refractory period ending timestamp (in seconds) for the given key. 102 // Before this moment, any detected anomaly will be ignored. 103 // If there is no stored refractory period ending timestamp, returns 0. getRefractoryPeriodEndsSec(const MetricDimensionKey & key)104 uint32_t getRefractoryPeriodEndsSec(const MetricDimensionKey& key) const { 105 const auto& it = mRefractoryPeriodEndsSec.find(key); 106 return it != mRefractoryPeriodEndsSec.end() ? it->second : 0; 107 } 108 109 // Returns the (constant) number of past buckets this anomaly tracker can store. getNumOfPastBuckets()110 inline int getNumOfPastBuckets() const { 111 return mNumOfPastBuckets; 112 } 113 114 std::pair<optional<InvalidConfigReason>, uint64_t> getProtoHash() const; 115 116 // Sets an alarm for the given timestamp. 117 // Replaces previous alarm if one already exists. startAlarm(const MetricDimensionKey & dimensionKey,const int64_t & eventTime)118 virtual void startAlarm(const MetricDimensionKey& dimensionKey, const int64_t& eventTime) { 119 return; // The base AnomalyTracker class doesn't have alarms. 120 } 121 122 // Stops the alarm. 123 // If it should have already fired, but hasn't yet (e.g. because the AlarmManager is delayed), 124 // declare the anomaly now. stopAlarm(const MetricDimensionKey & dimensionKey,const int64_t & timestampNs)125 virtual void stopAlarm(const MetricDimensionKey& dimensionKey, const int64_t& timestampNs) { 126 return; // The base AnomalyTracker class doesn't have alarms. 127 } 128 129 // Stop all the alarms owned by this tracker. Does not declare any anomalies. cancelAllAlarms()130 virtual void cancelAllAlarms() { 131 return; // The base AnomalyTracker class doesn't have alarms. 132 } 133 134 // Declares an anomaly for each alarm in firedAlarms that belongs to this AnomalyTracker, 135 // and removes it from firedAlarms. Does NOT remove the alarm from the AlarmMonitor. informAlarmsFired(const int64_t & timestampNs,unordered_set<sp<const InternalAlarm>,SpHash<InternalAlarm>> & firedAlarms)136 virtual void informAlarmsFired(const int64_t& timestampNs, 137 unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>>& firedAlarms) { 138 return; // The base AnomalyTracker class doesn't have alarms. 139 } 140 141 // Writes metadata of the alert (refractory_period_end_sec) to AlertMetadata. 142 // Returns true if at least one element is written to alertMetadata. 143 bool writeAlertMetadataToProto( 144 int64_t currentWallClockTimeNs, 145 int64_t systemElapsedTimeNs, metadata::AlertMetadata* alertMetadata); 146 147 void loadAlertMetadata( 148 const metadata::AlertMetadata& alertMetadata, 149 int64_t currentWallClockTimeNs, 150 int64_t systemElapsedTimeNs); 151 152 protected: 153 // For testing only. 154 // Returns the alarm timestamp in seconds for the query dimension if it exists. Otherwise 155 // returns 0. getAlarmTimestampSec(const MetricDimensionKey & dimensionKey)156 virtual uint32_t getAlarmTimestampSec(const MetricDimensionKey& dimensionKey) const { 157 return 0; // The base AnomalyTracker class doesn't have alarms. 158 } 159 160 // statsd_config.proto Alert message that defines this tracker. 161 const Alert mAlert; 162 163 // The subscriptions that depend on this alert. 164 std::vector<Subscription> mSubscriptions; 165 166 // A reference to the Alert's config key. 167 const ConfigKey mConfigKey; 168 169 // Number of past buckets. One less than the total number of buckets needed 170 // for the anomaly detection (since the current bucket is not in the past). 171 const int mNumOfPastBuckets; 172 173 // Values for each of the past mNumOfPastBuckets buckets. Always of size mNumOfPastBuckets. 174 // mPastBuckets[i] can be null, meaning that no data is present in that bucket. 175 std::vector<shared_ptr<DimToValMap>> mPastBuckets; 176 177 // Cached sum over all existing buckets in mPastBuckets. 178 // Its buckets never contain entries of 0. 179 DimToValMap mSumOverPastBuckets; 180 181 // The bucket number of the last added bucket. 182 int64_t mMostRecentBucketNum = -1; 183 184 // Map from each dimension to the timestamp that its refractory period (if this anomaly was 185 // declared for that dimension) ends, in seconds. From this moment and onwards, anomalies 186 // can be declared again. 187 // Entries may be, but are not guaranteed to be, removed after the period is finished. 188 unordered_map<MetricDimensionKey, uint32_t> mRefractoryPeriodEndsSec; 189 190 // Advances mMostRecentBucketNum to bucketNum, deleting any data that is now too old. 191 // Specifically, since it is now too old, removes the data for 192 // [mMostRecentBucketNum - mNumOfPastBuckets + 1, bucketNum - mNumOfPastBuckets]. 193 void advanceMostRecentBucketTo(const int64_t& bucketNum); 194 195 // Add the information in the given bucket to mSumOverPastBuckets. 196 void addBucketToSum(const shared_ptr<DimToValMap>& bucket); 197 198 // Subtract the information in the given bucket from mSumOverPastBuckets 199 // and remove any items with value 0. 200 void subtractBucketFromSum(const shared_ptr<DimToValMap>& bucket); 201 202 // From mSumOverPastBuckets[key], subtracts bucketValue, removing it if it is now 0. 203 void subtractValueFromSum(const MetricDimensionKey& key, const int64_t& bucketValue); 204 205 // Returns true if in the refractory period, else false. 206 bool isInRefractoryPeriod(const int64_t& timestampNs, const MetricDimensionKey& key) const; 207 208 // Calculates the corresponding bucket index within the circular array. 209 // Requires bucketNum >= 0. 210 size_t index(int64_t bucketNum) const; 211 212 // Resets all bucket data. For use when all the data gets stale. 213 virtual void resetStorage(); 214 215 // Informs the subscribers (incidentd, perfetto, broadcasts, etc) that an anomaly has occurred. 216 void informSubscribers(const MetricDimensionKey& key, int64_t metricId, int64_t metricValue); 217 218 FRIEND_TEST(AnomalyTrackerTest, TestConsecutiveBuckets); 219 FRIEND_TEST(AnomalyTrackerTest, TestSparseBuckets); 220 FRIEND_TEST(GaugeMetricProducerTest, TestAnomalyDetection); 221 FRIEND_TEST(CountMetricProducerTest, TestAnomalyDetectionUnSliced); 222 FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_single_bucket); 223 FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_partial_bucket); 224 FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_multiple_buckets); 225 FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_long_refractory_period); 226 227 FRIEND_TEST(ConfigUpdateTest, TestUpdateAlerts); 228 }; 229 230 } // namespace statsd 231 } // namespace os 232 } // namespace android 233