1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include <gtest/gtest_prod.h> 20 #include <stdlib.h> 21 #include <utils/RefBase.h> 22 23 #include "AlarmMonitor.h" 24 #include "config/ConfigKey.h" 25 #include "src/statsd_config.pb.h" // Alert 26 #include "src/statsd_metadata.pb.h" // AlertMetadata 27 #include "hash.h" 28 #include "stats_util.h" // HashableDimensionKey and DimToValMap 29 30 namespace android { 31 namespace os { 32 namespace statsd { 33 34 using std::shared_ptr; 35 using std::unordered_map; 36 37 // Does NOT allow negative values. 38 class AnomalyTracker : public virtual RefBase { 39 public: 40 AnomalyTracker(const Alert& alert, const ConfigKey& configKey); 41 42 virtual ~AnomalyTracker(); 43 44 // Reset appropriate state on a config update. Clear subscriptions so they can be reset. 45 void onConfigUpdated(); 46 47 // Add subscriptions that depend on this alert. addSubscription(const Subscription & subscription)48 void addSubscription(const Subscription& subscription) { 49 mSubscriptions.push_back(subscription); 50 } 51 52 // Adds a bucket for the given bucketNum (index starting at 0). 53 // If a bucket for bucketNum already exists, it will be replaced. 54 // Also, advances to bucketNum (if not in the past), effectively filling any intervening 55 // buckets with 0s. 56 void addPastBucket(std::shared_ptr<DimToValMap> bucket, const int64_t& bucketNum); 57 58 // Inserts (or replaces) the bucket entry for the given bucketNum at the given key to be the 59 // given bucketValue. If the bucket does not exist, it will be created. 60 // Also, advances to bucketNum (if not in the past), effectively filling any intervening 61 // buckets with 0s. 62 void addPastBucket(const MetricDimensionKey& key, const int64_t& bucketValue, 63 const int64_t& bucketNum); 64 65 // Returns true if, based on past buckets plus the new currentBucketValue (which generally 66 // represents the partially-filled current bucket), an anomaly has happened. 67 // Also advances to currBucketNum-1. 68 bool detectAnomaly(const int64_t& currBucketNum, const MetricDimensionKey& key, 69 const int64_t& currentBucketValue); 70 71 // Informs incidentd about the detected alert. 72 void declareAnomaly(const int64_t& timestampNs, int64_t metricId, const MetricDimensionKey& key, 73 int64_t metricValue); 74 75 // Detects if, based on past buckets plus the new currentBucketValue (which generally 76 // represents the partially-filled current bucket), an anomaly has happened, and if so, 77 // declares an anomaly and informs relevant subscribers. 78 // Also advances to currBucketNum-1. 79 void detectAndDeclareAnomaly(const int64_t& timestampNs, const int64_t& currBucketNum, 80 int64_t metricId, const MetricDimensionKey& key, 81 const int64_t& currentBucketValue); 82 83 // Init the AlarmMonitor which is shared across anomaly trackers. setAlarmMonitor(const sp<AlarmMonitor> & alarmMonitor)84 virtual void setAlarmMonitor(const sp<AlarmMonitor>& alarmMonitor) { 85 return; // Base AnomalyTracker class has no need for the AlarmMonitor. 86 } 87 88 // Returns the sum of all past bucket values for the given dimension key. 89 int64_t getSumOverPastBuckets(const MetricDimensionKey& key) const; 90 91 // Returns the value for a past bucket, or 0 if that bucket doesn't exist. 92 int64_t getPastBucketValue(const MetricDimensionKey& key, const int64_t& bucketNum) const; 93 94 // Returns the anomaly threshold set in the configuration. getAnomalyThreshold()95 inline int64_t getAnomalyThreshold() const { 96 return mAlert.trigger_if_sum_gt(); 97 } 98 99 // Returns the refractory period ending timestamp (in seconds) for the given key. 100 // Before this moment, any detected anomaly will be ignored. 101 // If there is no stored refractory period ending timestamp, returns 0. getRefractoryPeriodEndsSec(const MetricDimensionKey & key)102 uint32_t getRefractoryPeriodEndsSec(const MetricDimensionKey& key) const { 103 const auto& it = mRefractoryPeriodEndsSec.find(key); 104 return it != mRefractoryPeriodEndsSec.end() ? it->second : 0; 105 } 106 107 // Returns the (constant) number of past buckets this anomaly tracker can store. getNumOfPastBuckets()108 inline int getNumOfPastBuckets() const { 109 return mNumOfPastBuckets; 110 } 111 112 std::pair<bool, uint64_t> getProtoHash() const; 113 114 // Sets an alarm for the given timestamp. 115 // Replaces previous alarm if one already exists. startAlarm(const MetricDimensionKey & dimensionKey,const int64_t & eventTime)116 virtual void startAlarm(const MetricDimensionKey& dimensionKey, const int64_t& eventTime) { 117 return; // The base AnomalyTracker class doesn't have alarms. 118 } 119 120 // Stops the alarm. 121 // If it should have already fired, but hasn't yet (e.g. because the AlarmManager is delayed), 122 // declare the anomaly now. stopAlarm(const MetricDimensionKey & dimensionKey,const int64_t & timestampNs)123 virtual void stopAlarm(const MetricDimensionKey& dimensionKey, const int64_t& timestampNs) { 124 return; // The base AnomalyTracker class doesn't have alarms. 125 } 126 127 // Stop all the alarms owned by this tracker. Does not declare any anomalies. cancelAllAlarms()128 virtual void cancelAllAlarms() { 129 return; // The base AnomalyTracker class doesn't have alarms. 130 } 131 132 // Declares an anomaly for each alarm in firedAlarms that belongs to this AnomalyTracker, 133 // and removes it from firedAlarms. Does NOT remove the alarm from the AlarmMonitor. informAlarmsFired(const int64_t & timestampNs,unordered_set<sp<const InternalAlarm>,SpHash<InternalAlarm>> & firedAlarms)134 virtual void informAlarmsFired(const int64_t& timestampNs, 135 unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>>& firedAlarms) { 136 return; // The base AnomalyTracker class doesn't have alarms. 137 } 138 139 // Writes metadata of the alert (refractory_period_end_sec) to AlertMetadata. 140 // Returns true if at least one element is written to alertMetadata. 141 bool writeAlertMetadataToProto( 142 int64_t currentWallClockTimeNs, 143 int64_t systemElapsedTimeNs, metadata::AlertMetadata* alertMetadata); 144 145 void loadAlertMetadata( 146 const metadata::AlertMetadata& alertMetadata, 147 int64_t currentWallClockTimeNs, 148 int64_t systemElapsedTimeNs); 149 150 protected: 151 // For testing only. 152 // Returns the alarm timestamp in seconds for the query dimension if it exists. Otherwise 153 // returns 0. getAlarmTimestampSec(const MetricDimensionKey & dimensionKey)154 virtual uint32_t getAlarmTimestampSec(const MetricDimensionKey& dimensionKey) const { 155 return 0; // The base AnomalyTracker class doesn't have alarms. 156 } 157 158 // statsd_config.proto Alert message that defines this tracker. 159 const Alert mAlert; 160 161 // The subscriptions that depend on this alert. 162 std::vector<Subscription> mSubscriptions; 163 164 // A reference to the Alert's config key. 165 const ConfigKey mConfigKey; 166 167 // Number of past buckets. One less than the total number of buckets needed 168 // for the anomaly detection (since the current bucket is not in the past). 169 const int mNumOfPastBuckets; 170 171 // Values for each of the past mNumOfPastBuckets buckets. Always of size mNumOfPastBuckets. 172 // mPastBuckets[i] can be null, meaning that no data is present in that bucket. 173 std::vector<shared_ptr<DimToValMap>> mPastBuckets; 174 175 // Cached sum over all existing buckets in mPastBuckets. 176 // Its buckets never contain entries of 0. 177 DimToValMap mSumOverPastBuckets; 178 179 // The bucket number of the last added bucket. 180 int64_t mMostRecentBucketNum = -1; 181 182 // Map from each dimension to the timestamp that its refractory period (if this anomaly was 183 // declared for that dimension) ends, in seconds. From this moment and onwards, anomalies 184 // can be declared again. 185 // Entries may be, but are not guaranteed to be, removed after the period is finished. 186 unordered_map<MetricDimensionKey, uint32_t> mRefractoryPeriodEndsSec; 187 188 // Advances mMostRecentBucketNum to bucketNum, deleting any data that is now too old. 189 // Specifically, since it is now too old, removes the data for 190 // [mMostRecentBucketNum - mNumOfPastBuckets + 1, bucketNum - mNumOfPastBuckets]. 191 void advanceMostRecentBucketTo(const int64_t& bucketNum); 192 193 // Add the information in the given bucket to mSumOverPastBuckets. 194 void addBucketToSum(const shared_ptr<DimToValMap>& bucket); 195 196 // Subtract the information in the given bucket from mSumOverPastBuckets 197 // and remove any items with value 0. 198 void subtractBucketFromSum(const shared_ptr<DimToValMap>& bucket); 199 200 // From mSumOverPastBuckets[key], subtracts bucketValue, removing it if it is now 0. 201 void subtractValueFromSum(const MetricDimensionKey& key, const int64_t& bucketValue); 202 203 // Returns true if in the refractory period, else false. 204 bool isInRefractoryPeriod(const int64_t& timestampNs, const MetricDimensionKey& key) const; 205 206 // Calculates the corresponding bucket index within the circular array. 207 // Requires bucketNum >= 0. 208 size_t index(int64_t bucketNum) const; 209 210 // Resets all bucket data. For use when all the data gets stale. 211 virtual void resetStorage(); 212 213 // Informs the subscribers (incidentd, perfetto, broadcasts, etc) that an anomaly has occurred. 214 void informSubscribers(const MetricDimensionKey& key, int64_t metricId, int64_t metricValue); 215 216 FRIEND_TEST(AnomalyTrackerTest, TestConsecutiveBuckets); 217 FRIEND_TEST(AnomalyTrackerTest, TestSparseBuckets); 218 FRIEND_TEST(GaugeMetricProducerTest, TestAnomalyDetection); 219 FRIEND_TEST(CountMetricProducerTest, TestAnomalyDetectionUnSliced); 220 FRIEND_TEST(AnomalyDetectionE2eTest, TestDurationMetric_SUM_single_bucket); 221 FRIEND_TEST(AnomalyDetectionE2eTest, TestDurationMetric_SUM_partial_bucket); 222 FRIEND_TEST(AnomalyDetectionE2eTest, TestDurationMetric_SUM_multiple_buckets); 223 FRIEND_TEST(AnomalyDetectionE2eTest, TestDurationMetric_SUM_long_refractory_period); 224 225 FRIEND_TEST(ConfigUpdateTest, TestUpdateAlerts); 226 }; 227 228 } // namespace statsd 229 } // namespace os 230 } // namespace android 231