• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <gtest/gtest_prod.h>
19 #include <log/log_time.h>
20 #include <src/guardrail/invalid_config_reason_enum.pb.h>
21 
22 #include <list>
23 #include <mutex>
24 #include <string>
25 #include <unordered_map>
26 #include <vector>
27 
28 #include "config/ConfigKey.h"
29 
30 namespace android {
31 namespace os {
32 namespace statsd {
33 
34 struct InvalidConfigReason {
35     InvalidConfigReasonEnum reason;
36     std::optional<int64_t> metricId;
37     std::optional<int64_t> stateId;
38     std::optional<int64_t> alertId;
39     std::optional<int64_t> alarmId;
40     std::optional<int64_t> subscriptionId;
41     std::vector<int64_t> matcherIds;
42     std::vector<int64_t> conditionIds;
InvalidConfigReasonInvalidConfigReason43     InvalidConfigReason(){};
InvalidConfigReasonInvalidConfigReason44     InvalidConfigReason(InvalidConfigReasonEnum reason) : reason(reason){};
InvalidConfigReasonInvalidConfigReason45     InvalidConfigReason(InvalidConfigReasonEnum reason, int64_t metricId)
46         : reason(reason), metricId(metricId){};
47     bool operator==(const InvalidConfigReason& other) const {
48         return (this->reason == other.reason) && (this->metricId == other.metricId) &&
49                (this->stateId == other.stateId) && (this->alertId == other.alertId) &&
50                (this->alarmId == other.alarmId) && (this->subscriptionId == other.subscriptionId) &&
51                (this->matcherIds == other.matcherIds) && (this->conditionIds == other.conditionIds);
52     }
53 };
54 
55 // Keep this in sync with InvalidQueryReason enum in stats_log.proto
56 enum InvalidQueryReason {
57     UNKNOWN_REASON = 0,
58     FLAG_DISABLED = 1,
59     UNSUPPORTED_SQLITE_VERSION = 2,
60     AMBIGUOUS_CONFIG_KEY = 3,
61     CONFIG_KEY_NOT_FOUND = 4,
62     CONFIG_KEY_WITH_UNMATCHED_DELEGATE = 5,
63     QUERY_FAILURE = 6,
64     INCONSISTENT_ROW_SIZE = 7,
65     NULL_CALLBACK = 8
66 };
67 
68 typedef struct {
69     int64_t insertError = 0;
70     int64_t tableCreationError = 0;
71     int64_t tableDeletionError = 0;
72     std::list<int64_t> flushLatencyNs;
73     int64_t categoryChangedCount = 0;
74 } RestrictedMetricStats;
75 
76 struct ConfigStats {
77     int32_t uid;
78     int64_t id;
79     int32_t creation_time_sec;
80     int32_t deletion_time_sec = 0;
81     int32_t reset_time_sec = 0;
82     int32_t metric_count;
83     int32_t condition_count;
84     int32_t matcher_count;
85     int32_t alert_count;
86     bool is_valid;
87     bool device_info_table_creation_failed = false;
88     int32_t db_corrupted_count = 0;
89 
90     // Stores reasons for why config is valid or not
91     std::optional<InvalidConfigReason> reason;
92 
93     std::list<int32_t> broadcast_sent_time_sec;
94 
95     // Times at which this config is activated.
96     std::list<int32_t> activation_time_sec;
97 
98     // Times at which this config is deactivated.
99     std::list<int32_t> deactivation_time_sec;
100 
101     std::list<int32_t> data_drop_time_sec;
102     // Number of bytes dropped at corresponding time.
103     std::list<int64_t> data_drop_bytes;
104     std::list<std::pair<int32_t, int64_t>> dump_report_stats;
105 
106     // Stores how many times a matcher have been matched. The map size is capped by kMaxConfigCount.
107     std::map<const int64_t, int> matcher_stats;
108 
109     // Stores the number of output tuple of condition trackers when it's bigger than
110     // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1,
111     // it means some data has been dropped. The map size is capped by kMaxConfigCount.
112     std::map<const int64_t, int> condition_stats;
113 
114     // Stores the number of output tuple of metric producers when it's bigger than
115     // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1,
116     // it means some data has been dropped. The map size is capped by kMaxConfigCount.
117     std::map<const int64_t, int> metric_stats;
118 
119     // Stores the max number of output tuple of dimensions in condition across dimensions in what
120     // when it's bigger than kDimensionKeySizeSoftLimit. When you see the number is
121     // kDimensionKeySizeHardLimit +1, it means some data has been dropped. The map size is capped by
122     // kMaxConfigCount.
123     std::map<const int64_t, int> metric_dimension_in_condition_stats;
124 
125     // Stores the number of times an anomaly detection alert has been declared.
126     // The map size is capped by kMaxConfigCount.
127     std::map<const int64_t, int> alert_stats;
128 
129     // Stores the config ID for each sub-config used.
130     std::list<std::pair<const int64_t, const int32_t>> annotations;
131 
132     // Maps metric ID of restricted metric to its stats.
133     std::map<int64_t, RestrictedMetricStats> restricted_metric_stats;
134 
135     std::list<int64_t> total_flush_latency_ns;
136 
137     // Stores the last 20 timestamps for computing sqlite db size.
138     std::list<int64_t> total_db_size_timestamps;
139 
140     // Stores the last 20 sizes of the sqlite db.
141     std::list<int64_t> total_db_sizes;
142 };
143 
144 struct UidMapStats {
145     int32_t changes = 0;
146     int32_t bytes_used = 0;
147     int32_t dropped_changes = 0;
148     int32_t deleted_apps = 0;
149 };
150 
151 // Keeps track of stats of statsd.
152 // Single instance shared across the process. All public methods are thread safe.
153 class StatsdStats {
154 public:
155     static StatsdStats& getInstance();
~StatsdStats()156     ~StatsdStats(){};
157 
158     const static int kDimensionKeySizeSoftLimit = 500;
159     static constexpr int kDimensionKeySizeHardLimit = 800;
160 
161     // Per atom dimension key size limit
162     static const std::map<int, std::pair<size_t, size_t>> kAtomDimensionKeySizeLimitMap;
163 
164     const static int kMaxConfigCountPerUid = 20;
165     const static int kMaxAlertCountPerConfig = 200;
166     const static int kMaxConditionCountPerConfig = 500;
167     const static int kMaxMetricCountPerConfig = 2000;
168     const static int kMaxMatcherCountPerConfig = 2500;
169 
170     // The max number of old config stats we keep.
171     const static int kMaxIceBoxSize = 20;
172 
173     const static int kMaxLoggerErrors = 20;
174 
175     const static int kMaxSystemServerRestarts = 20;
176 
177     const static int kMaxTimestampCount = 20;
178 
179     const static int kMaxLogSourceCount = 150;
180 
181     const static int kMaxPullAtomPackages = 100;
182 
183     const static int kMaxRestrictedMetricQueryCount = 20;
184 
185     const static int kMaxRestrictedMetricFlushLatencyCount = 20;
186 
187     const static int kMaxRestrictedConfigFlushLatencyCount = 20;
188 
189     const static int kMaxRestrictedConfigDbSizeCount = 20;
190 
191     // Max memory allowed for storing metrics per configuration. If this limit is exceeded, statsd
192     // drops the metrics data in memory.
193     static const size_t kMaxMetricsBytesPerConfig = 2 * 1024 * 1024;
194 
195     // Soft memory limit per configuration. Once this limit is exceeded, we begin notifying the
196     // data subscriber that it's time to call getData.
197     static const size_t kBytesPerConfigTriggerGetData = 192 * 1024;
198 
199     // Soft memory limit per restricted configuration. Once this limit is exceeded,
200     // we begin flush in-memory restricted metrics to database.
201     static const size_t kBytesPerRestrictedConfigTriggerFlush = 25 * 1024;
202 
203     // Cap the UID map's memory usage to this. This should be fairly high since the UID information
204     // is critical for understanding the metrics.
205     const static size_t kMaxBytesUsedUidMap = 50 * 1024;
206 
207     // The number of deleted apps that are stored in the uid map.
208     const static int kMaxDeletedAppsInUidMap = 100;
209 
210     /* Minimum period between two broadcasts in nanoseconds. */
211     static const int64_t kMinBroadcastPeriodNs = 60 * NS_PER_SEC;
212 
213     /* Min period between two checks of byte size per config key in nanoseconds. */
214     static const int64_t kMinByteSizeCheckPeriodNs = 60 * NS_PER_SEC;
215 
216     /* Min period between two checks of restricted metrics TTLs. */
217     static const int64_t kMinTtlCheckPeriodNs = 60 * 60 * NS_PER_SEC;
218 
219     /* Min period between two flush operations of restricted metrics. */
220     static const int64_t kMinFlushRestrictedPeriodNs = 60 * 60 * NS_PER_SEC;
221 
222     /* Min period between two db guardrail check operations of restricted metrics. */
223     static const int64_t kMinDbGuardrailEnforcementPeriodNs = 60 * 60 * NS_PER_SEC;
224 
225     /* Minimum period between two activation broadcasts in nanoseconds. */
226     static const int64_t kMinActivationBroadcastPeriodNs = 10 * NS_PER_SEC;
227 
228     // Maximum age (30 days) that files on disk can exist in seconds.
229     static const int kMaxAgeSecond = 60 * 60 * 24 * 30;
230 
231     // Maximum age (2 days) that local history files on disk can exist in seconds.
232     static const int kMaxLocalHistoryAgeSecond = 60 * 60 * 24 * 2;
233 
234     // Maximum number of files (1000) that can be in stats directory on disk.
235     static const int kMaxFileNumber = 1000;
236 
237     // Maximum size of all files that can be written to stats directory on disk.
238     static const int kMaxFileSize = 50 * 1024 * 1024;
239 
240     // How long to try to clear puller cache from last time
241     static const long kPullerCacheClearIntervalSec = 1;
242 
243     // Max time to do a pull.
244     static const int64_t kPullMaxDelayNs = 30 * NS_PER_SEC;
245 
246     // Maximum number of pushed atoms statsd stats will track above kMaxPushedAtomId.
247     static const int kMaxNonPlatformPushedAtoms = 600;
248 
249     // Maximum number of pushed atoms error statsd stats will track.
250     static const int kMaxPushedAtomErrorStatsSize = 100;
251 
252     // Maximum atom id value that we consider a platform pushed atom.
253     // This should be updated once highest pushed atom id in atoms.proto approaches this value.
254     static const int kMaxPushedAtomId = 900;
255 
256     // Atom id that is the start of the pulled atoms.
257     static const int kPullAtomStartTag = 10000;
258 
259     // Atom id that is the start of vendor atoms.
260     static const int kVendorAtomStartTag = 100000;
261 
262     // Vendor pulled atom start id.
263     static const int32_t kVendorPulledAtomStartTag = 150000;
264 
265     // Beginning of range for timestamp truncation.
266     static const int32_t kTimestampTruncationStartTag = 300000;
267 
268     // End of range for timestamp truncation.
269     static const int32_t kTimestampTruncationEndTag = 304999;
270 
271     // Max accepted atom id.
272     static const int32_t kMaxAtomTag = 200000;
273 
274     static const int64_t kInt64Max = 0x7fffffffffffffffLL;
275 
276     static const int32_t kMaxLoggedBucketDropEvents = 10;
277 
278     /**
279      * Report a new config has been received and report the static stats about the config.
280      *
281      * The static stats include: the count of metrics, conditions, matchers, and alerts.
282      * If the config is not valid, this config stats will be put into icebox immediately.
283      */
284     void noteConfigReceived(const ConfigKey& key, int metricsCount, int conditionsCount,
285                             int matchersCount, int alertCount,
286                             const std::list<std::pair<const int64_t, const int32_t>>& annotations,
287                             const std::optional<InvalidConfigReason>& reason);
288     /**
289      * Report a config has been removed.
290      */
291     void noteConfigRemoved(const ConfigKey& key);
292     /**
293      * Report a config has been reset when ttl expires.
294      */
295     void noteConfigReset(const ConfigKey& key);
296 
297     /**
298      * Report a broadcast has been sent to a config owner to collect the data.
299      */
300     void noteBroadcastSent(const ConfigKey& key);
301 
302     /**
303      * Report that a config has become activated or deactivated.
304      * This can be different from whether or not a broadcast is sent if the
305      * guardrail prevented the broadcast from being sent.
306      */
307     void noteActiveStatusChanged(const ConfigKey& key, bool activate);
308 
309     /**
310      * Report a config's metrics data has been dropped.
311      */
312     void noteDataDropped(const ConfigKey& key, const size_t totalBytes);
313 
314     /**
315      * Report metrics data report has been sent.
316      *
317      * The report may be requested via StatsManager API, or through adb cmd.
318      */
319     void noteMetricsReportSent(const ConfigKey& key, const size_t num_bytes);
320 
321     /**
322      * Report failure in creating the device info metadata table for restricted configs.
323      */
324     void noteDeviceInfoTableCreationFailed(const ConfigKey& key);
325 
326     /**
327      * Report db corruption for restricted configs.
328      */
329     void noteDbCorrupted(const ConfigKey& key);
330 
331     /**
332      * Report the size of output tuple of a condition.
333      *
334      * Note: only report when the condition has an output dimension, and the tuple
335      * count > kDimensionKeySizeSoftLimit.
336      *
337      * [key]: The config key that this condition belongs to.
338      * [id]: The id of the condition.
339      * [size]: The output tuple size.
340      */
341     void noteConditionDimensionSize(const ConfigKey& key, const int64_t& id, int size);
342 
343     /**
344      * Report the size of output tuple of a metric.
345      *
346      * Note: only report when the metric has an output dimension, and the tuple
347      * count > kDimensionKeySizeSoftLimit.
348      *
349      * [key]: The config key that this metric belongs to.
350      * [id]: The id of the metric.
351      * [size]: The output tuple size.
352      */
353     void noteMetricDimensionSize(const ConfigKey& key, const int64_t& id, int size);
354 
355     /**
356      * Report the max size of output tuple of dimension in condition across dimensions in what.
357      *
358      * Note: only report when the metric has an output dimension in condition, and the max tuple
359      * count > kDimensionKeySizeSoftLimit.
360      *
361      * [key]: The config key that this metric belongs to.
362      * [id]: The id of the metric.
363      * [size]: The output tuple size.
364      */
365     void noteMetricDimensionInConditionSize(const ConfigKey& key, const int64_t& id, int size);
366 
367     /**
368      * Report a matcher has been matched.
369      *
370      * [key]: The config key that this matcher belongs to.
371      * [id]: The id of the matcher.
372      */
373     void noteMatcherMatched(const ConfigKey& key, const int64_t& id);
374 
375     /**
376      * Report that an anomaly detection alert has been declared.
377      *
378      * [key]: The config key that this alert belongs to.
379      * [id]: The id of the alert.
380      */
381     void noteAnomalyDeclared(const ConfigKey& key, const int64_t& id);
382 
383     /**
384      * Report an atom event has been logged.
385      */
386     void noteAtomLogged(int atomId, int32_t timeSec, bool isSkipped);
387 
388     /**
389      * Report that statsd modified the anomaly alarm registered with StatsCompanionService.
390      */
391     void noteRegisteredAnomalyAlarmChanged();
392 
393     /**
394      * Report that statsd modified the periodic alarm registered with StatsCompanionService.
395      */
396     void noteRegisteredPeriodicAlarmChanged();
397 
398     /**
399      * Records the number of delta entries that are being dropped from the uid map.
400      */
401     void noteUidMapDropped(int deltas);
402 
403     /**
404      * Records that an app was deleted (from statsd's map).
405      */
406     void noteUidMapAppDeletionDropped();
407 
408     /**
409      * Updates the number of changes currently stored in the uid map.
410      */
411     void setUidMapChanges(int changes);
412     void setCurrentUidMapMemory(int bytes);
413 
414     /*
415      * Updates minimum interval between pulls for an pulled atom.
416      */
417     void updateMinPullIntervalSec(int pullAtomId, long intervalSec);
418 
419     /*
420      * Notes an atom is pulled.
421      */
422     void notePull(int pullAtomId);
423 
424     /*
425      * Notes an atom is served from puller cache.
426      */
427     void notePullFromCache(int pullAtomId);
428 
429     /*
430      * Notify data error for pulled atom.
431      */
432     void notePullDataError(int pullAtomId);
433 
434     /*
435      * Records time for actual pulling, not including those served from cache and not including
436      * statsd processing delays.
437      */
438     void notePullTime(int pullAtomId, int64_t pullTimeNs);
439 
440     /*
441      * Records pull delay for a pulled atom, including those served from cache and including statsd
442      * processing delays.
443      */
444     void notePullDelay(int pullAtomId, int64_t pullDelayNs);
445 
446     /*
447      * Records pull exceeds timeout for the puller.
448      */
449     void notePullTimeout(int pullAtomId, int64_t pullUptimeMillis, int64_t pullElapsedMillis);
450 
451     /*
452      * Records pull exceeds max delay for a metric.
453      */
454     void notePullExceedMaxDelay(int pullAtomId);
455 
456     /*
457      * Records when system server restarts.
458      */
459     void noteSystemServerRestart(int32_t timeSec);
460 
461     /**
462      * Records statsd skipped an event.
463      */
464     void noteLogLost(int32_t wallClockTimeSec, int32_t count, int32_t lastError,
465                      int32_t lastAtomTag, int32_t uid, int32_t pid);
466 
467     /**
468      * Records that the pull of an atom has failed. Eg, if the client indicated the pull failed, if
469      * the pull timed out, or if the outgoing binder call failed.
470      * This count will only increment if the puller was actually invoked.
471      *
472      * It does not include a pull not occurring due to not finding the appropriate
473      * puller. These cases are covered in other counts.
474      */
475     void notePullFailed(int atomId);
476 
477     /**
478      * Records that the pull of an atom has failed due to not having a uid provider.
479      */
480     void notePullUidProviderNotFound(int atomId);
481 
482     /**
483      * Records that the pull of an atom has failed due not finding a puller registered by a
484      * trusted uid.
485      */
486     void notePullerNotFound(int atomId);
487 
488     /**
489      * Records that the pull has failed due to the outgoing binder call failing.
490      */
491     void notePullBinderCallFailed(int atomId);
492 
493     /**
494      * A pull with no data occurred
495      */
496     void noteEmptyData(int atomId);
497 
498     /**
499      * Records that a puller callback for the given atomId was registered or unregistered.
500      *
501      * @param registered True if the callback was registered, false if was unregistered.
502      */
503     void notePullerCallbackRegistrationChanged(int atomId, bool registered);
504 
505     /**
506      * Hard limit was reached in the cardinality of an atom
507      */
508     void noteHardDimensionLimitReached(int64_t metricId);
509 
510     /**
511      * A log event was too late, arrived in the wrong bucket and was skipped
512      */
513     void noteLateLogEventSkipped(int64_t metricId);
514 
515     /**
516      * Buckets were skipped as time elapsed without any data for them
517      */
518     void noteSkippedForwardBuckets(int64_t metricId);
519 
520     /**
521      * An unsupported value type was received
522      */
523     void noteBadValueType(int64_t metricId);
524 
525     /**
526      * Buckets were dropped due to reclaim memory.
527      */
528     void noteBucketDropped(int64_t metricId);
529 
530     /**
531      * A condition change was too late, arrived in the wrong bucket and was skipped
532      */
533     void noteConditionChangeInNextBucket(int64_t metricId);
534 
535     /**
536      * A bucket has been tagged as invalid.
537      */
538     void noteInvalidatedBucket(int64_t metricId);
539 
540     /**
541      * Tracks the total number of buckets (include skipped/invalid buckets).
542      */
543     void noteBucketCount(int64_t metricId);
544 
545     /**
546      * For pulls at bucket boundaries, it represents the misalignment between the real timestamp and
547      * the end of the bucket.
548      */
549     void noteBucketBoundaryDelayNs(int64_t metricId, int64_t timeDelayNs);
550 
551     /**
552      * Number of buckets with unknown condition.
553      */
554     void noteBucketUnknownCondition(int64_t metricId);
555 
556     /* Reports one event id has been dropped due to queue overflow, and the oldest event timestamp
557      * in the queue */
558     void noteEventQueueOverflow(int64_t oldestEventTimestampNs, int32_t atomId, bool isSkipped);
559 
560     /**
561      * Reports that the activation broadcast guardrail was hit for this uid. Namely, the broadcast
562      * should have been sent, but instead was skipped due to hitting the guardrail.
563      */
564     void noteActivationBroadcastGuardrailHit(const int uid);
565 
566     /**
567      * Reports that an atom is erroneous or cannot be parsed successfully by
568      * statsd. An atom tag of 0 indicates that the client did not supply the
569      * atom id within the encoding.
570      *
571      * For pushed atoms only, this call should be preceded by a call to
572      * noteAtomLogged.
573      */
574     void noteAtomError(int atomTag, bool pull = false);
575 
576     /** Report query of restricted metric succeed **/
577     void noteQueryRestrictedMetricSucceed(const int64_t configId, const string& configPackage,
578                                           const std::optional<int32_t> configUid,
579                                           const int32_t callingUid, const int64_t queryLatencyNs);
580 
581     /** Report query of restricted metric failed **/
582     void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage,
583                                          const std::optional<int32_t> configUid,
584                                          const int32_t callingUid, const InvalidQueryReason reason);
585 
586     /** Report query of restricted metric failed along with an error string **/
587     void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage,
588                                          const std::optional<int32_t> configUid,
589                                          const int32_t callingUid, const InvalidQueryReason reason,
590                                          const string& error);
591 
592     // Reports that a restricted metric fails to be inserted to database.
593     void noteRestrictedMetricInsertError(const ConfigKey& configKey, int64_t metricId);
594 
595     // Reports that a restricted metric fails to create table in database.
596     void noteRestrictedMetricTableCreationError(const ConfigKey& configKey, const int64_t metricId);
597 
598     // Reports that a restricted metric fails to delete table in database.
599     void noteRestrictedMetricTableDeletionError(const ConfigKey& configKey, const int64_t metricId);
600 
601     // Reports the time it takes for a restricted metric to flush the data to the database.
602     void noteRestrictedMetricFlushLatency(const ConfigKey& configKey, const int64_t metricId,
603                                           const int64_t flushLatencyNs);
604 
605     // Reports that a restricted metric had a category change.
606     void noteRestrictedMetricCategoryChanged(const ConfigKey& configKey, const int64_t metricId);
607 
608     // Reports the time is takes to flush a restricted config to the database.
609     void noteRestrictedConfigFlushLatency(const ConfigKey& configKey,
610                                           const int64_t totalFlushLatencyNs);
611 
612     // Reports the size of the internal sqlite db.
613     void noteRestrictedConfigDbSize(const ConfigKey& configKey, const int64_t elapsedTimeNs,
614                                     const int64_t dbSize);
615 
616     /**
617      * Reset the historical stats. Including all stats in icebox, and the tracked stats about
618      * metrics, matchers, and atoms. The active configs will be kept and StatsdStats will continue
619      * to collect stats after reset() has been called.
620      */
621     void reset();
622 
623     /**
624      * Output the stats in protobuf binary format to [buffer].
625      *
626      * [reset]: whether to clear the historical stats after the call.
627      */
628     void dumpStats(std::vector<uint8_t>* buffer, bool reset);
629 
630     /**
631      * Output statsd stats in human readable format to [out] file descriptor.
632      */
633     void dumpStats(int outFd) const;
634 
635     /**
636      * Return soft and hard atom key dimension size limits as an std::pair.
637      */
638     static std::pair<size_t, size_t> getAtomDimensionKeySizeLimits(const int atomId = -1);
639 
640     typedef struct PullTimeoutMetadata {
641         int64_t pullTimeoutUptimeMillis;
642         int64_t pullTimeoutElapsedMillis;
PullTimeoutMetadataPullTimeoutMetadata643         PullTimeoutMetadata(int64_t uptimeMillis, int64_t elapsedMillis)
644             : pullTimeoutUptimeMillis(uptimeMillis),
645               pullTimeoutElapsedMillis(elapsedMillis) { /* do nothing */
646         }
647     } PullTimeoutMetadata;
648 
649     typedef struct {
650         long totalPull = 0;
651         long totalPullFromCache = 0;
652         long minPullIntervalSec = LONG_MAX;
653         int64_t avgPullTimeNs = 0;
654         int64_t maxPullTimeNs = 0;
655         long numPullTime = 0;
656         int64_t avgPullDelayNs = 0;
657         int64_t maxPullDelayNs = 0;
658         long numPullDelay = 0;
659         long dataError = 0;
660         long pullTimeout = 0;
661         long pullExceedMaxDelay = 0;
662         long pullFailed = 0;
663         long pullUidProviderNotFound = 0;
664         long pullerNotFound = 0;
665         long emptyData = 0;
666         long registeredCount = 0;
667         long unregisteredCount = 0;
668         int32_t atomErrorCount = 0;
669         long binderCallFailCount = 0;
670         std::list<PullTimeoutMetadata> pullTimeoutMetadata;
671     } PulledAtomStats;
672 
673     typedef struct {
674         long hardDimensionLimitReached = 0;
675         long lateLogEventSkipped = 0;
676         long skippedForwardBuckets = 0;
677         long badValueType = 0;
678         long conditionChangeInNextBucket = 0;
679         long invalidatedBucket = 0;
680         long bucketDropped = 0;
681         int64_t minBucketBoundaryDelayNs = 0;
682         int64_t maxBucketBoundaryDelayNs = 0;
683         long bucketUnknownCondition = 0;
684         long bucketCount = 0;
685     } AtomMetricStats;
686 
687 private:
688     StatsdStats();
689 
690     mutable std::mutex mLock;
691 
692     int32_t mStartTimeSec;
693 
694     // Track the number of dropped entries used by the uid map.
695     UidMapStats mUidMapStats;
696 
697     // The stats about the configs that are still in use.
698     // The map size is capped by kMaxConfigCount.
699     std::map<const ConfigKey, std::shared_ptr<ConfigStats>> mConfigStats;
700 
701     // Stores the stats for the configs that are no longer in use.
702     // The size of the vector is capped by kMaxIceBoxSize.
703     std::list<const std::shared_ptr<ConfigStats>> mIceBox;
704 
705     // Stores the number of times a pushed atom is logged and skipped (if skipped).
706     // The size of the vector is the largest pushed atom id in atoms.proto + 1. Atoms
707     // out of that range will be put in mNonPlatformPushedAtomStats.
708     // This is a vector, not a map because it will be accessed A LOT -- for each stats log.
709     struct PushedAtomStats {
710         int logCount = 0;
711         int skipCount = 0;
712     };
713 
714     std::vector<PushedAtomStats> mPushedAtomStats;
715 
716     // Stores the number of times a pushed atom is logged and skipped for atom ids above
717     // kMaxPushedAtomId. The max size of the map is kMaxNonPlatformPushedAtoms.
718     std::unordered_map<int, PushedAtomStats> mNonPlatformPushedAtomStats;
719 
720     // Stores the number of times a pushed atom is dropped due to queue overflow event.
721     // We do not expect it will happen too often so the map is preferable vs pre-allocated vector
722     // The max size of the map is kMaxPushedAtomId + kMaxNonPlatformPushedAtoms.
723     std::unordered_map<int, int> mPushedAtomDropsStats;
724 
725     // Maps PullAtomId to its stats. The size is capped by the puller atom counts.
726     std::map<int, PulledAtomStats> mPulledAtomStats;
727 
728     // Stores the number of times a pushed atom was logged erroneously. The
729     // corresponding counts for pulled atoms are stored in PulledAtomStats.
730     // The max size of this map is kMaxPushedAtomErrorStatsSize.
731     std::map<int, int> mPushedAtomErrorStats;
732 
733     // Maps metric ID to its stats. The size is capped by the number of metrics.
734     std::map<int64_t, AtomMetricStats> mAtomMetricStats;
735 
736     // Maps uids to times when the activation changed broadcast not sent due to hitting the
737     // guardrail. The size is capped by the number of configs, and up to 20 times per uid.
738     std::map<int, std::list<int32_t>> mActivationBroadcastGuardrailStats;
739 
740     struct LogLossStats {
LogLossStatsLogLossStats741         LogLossStats(int32_t sec, int32_t count, int32_t error, int32_t tag, int32_t uid,
742                      int32_t pid)
743             : mWallClockSec(sec),
744               mCount(count),
745               mLastError(error),
746               mLastTag(tag),
747               mUid(uid),
748               mPid(pid) {
749         }
750         int32_t mWallClockSec;
751         int32_t mCount;
752         // error code defined in linux/errno.h
753         int32_t mLastError;
754         int32_t mLastTag;
755         int32_t mUid;
756         int32_t mPid;
757     };
758 
759     // Max of {(now - oldestEventTimestamp) when overflow happens}.
760     // This number is helpful to understand how SLOW statsd can be.
761     int64_t mMaxQueueHistoryNs = 0;
762 
763     // Min of {(now - oldestEventTimestamp) when overflow happens}.
764     // This number is helpful to understand how FAST the events floods to statsd.
765     int64_t mMinQueueHistoryNs = kInt64Max;
766 
767     // Total number of events that are lost due to queue overflow.
768     int32_t mOverflowCount = 0;
769 
770     // Timestamps when we detect log loss, and the number of logs lost.
771     std::list<LogLossStats> mLogLossStats;
772 
773     std::list<int32_t> mSystemServerRestartSec;
774 
775     struct RestrictedMetricQueryStats {
RestrictedMetricQueryStatsRestrictedMetricQueryStats776         RestrictedMetricQueryStats(int32_t callingUid, int64_t configId,
777                                    const string& configPackage, std::optional<int32_t> configUid,
778                                    int64_t queryTimeNs,
779                                    std::optional<InvalidQueryReason> invalidQueryReason,
780                                    const string& error, std::optional<int64_t> queryLatencyNs)
781             : mCallingUid(callingUid),
782               mConfigId(configId),
783               mConfigPackage(configPackage),
784               mConfigUid(configUid),
785               mQueryWallTimeNs(queryTimeNs),
786               mInvalidQueryReason(invalidQueryReason),
787               mError(error),
788               mQueryLatencyNs(queryLatencyNs) {
789             mHasError = invalidQueryReason.has_value();
790         }
791         int32_t mCallingUid;
792         int64_t mConfigId;
793         string mConfigPackage;
794         std::optional<int32_t> mConfigUid;
795         int64_t mQueryWallTimeNs;
796         std::optional<InvalidQueryReason> mInvalidQueryReason;
797         bool mHasError;
798         string mError;
799         std::optional<int64_t> mQueryLatencyNs;
800     };
801     std::list<RestrictedMetricQueryStats> mRestrictedMetricQueryStats;
802 
803     void noteQueryRestrictedMetricFailedLocked(const int64_t configId, const string& configPackage,
804                                                const std::optional<int32_t> configUid,
805                                                const int32_t callingUid,
806                                                const InvalidQueryReason reason,
807                                                const string& error);
808 
809     // Stores the number of times statsd modified the anomaly alarm registered with
810     // StatsCompanionService.
811     int mAnomalyAlarmRegisteredStats = 0;
812 
813     // Stores the number of times statsd registers the periodic alarm changes
814     int mPeriodicAlarmRegisteredStats = 0;
815 
816     void noteConfigResetInternalLocked(const ConfigKey& key);
817 
818     void noteConfigRemovedInternalLocked(const ConfigKey& key);
819 
820     void resetInternalLocked();
821 
822     void noteAtomLoggedLocked(int atomId, bool isSkipped);
823 
824     void noteAtomDroppedLocked(int atomId);
825 
826     void noteDataDropped(const ConfigKey& key, const size_t totalBytes, int32_t timeSec);
827 
828     void noteMetricsReportSent(const ConfigKey& key, const size_t num_bytes, int32_t timeSec);
829 
830     void noteBroadcastSent(const ConfigKey& key, int32_t timeSec);
831 
832     void noteActiveStatusChanged(const ConfigKey& key, bool activate, int32_t timeSec);
833 
834     void noteActivationBroadcastGuardrailHit(const int uid, int32_t timeSec);
835 
836     void addToIceBoxLocked(std::shared_ptr<ConfigStats>& stats);
837 
838     int getPushedAtomErrorsLocked(int atomId) const;
839 
840     int getPushedAtomDropsLocked(int atomId) const;
841 
842     /**
843      * Get a reference to AtomMetricStats for a metric. If none exists, create it. The reference
844      * will live as long as `this`.
845      */
846     StatsdStats::AtomMetricStats& getAtomMetricStats(int64_t metricId);
847 
848     FRIEND_TEST(StatsdStatsTest, TestValidConfigAdd);
849     FRIEND_TEST(StatsdStatsTest, TestInvalidConfigAdd);
850     FRIEND_TEST(StatsdStatsTest, TestInvalidConfigMissingMetricId);
851     FRIEND_TEST(StatsdStatsTest, TestInvalidConfigOnlyMetricId);
852     FRIEND_TEST(StatsdStatsTest, TestConfigRemove);
853     FRIEND_TEST(StatsdStatsTest, TestSubStats);
854     FRIEND_TEST(StatsdStatsTest, TestAtomLog);
855     FRIEND_TEST(StatsdStatsTest, TestNonPlatformAtomLog);
856     FRIEND_TEST(StatsdStatsTest, TestTimestampThreshold);
857     FRIEND_TEST(StatsdStatsTest, TestAnomalyMonitor);
858     FRIEND_TEST(StatsdStatsTest, TestSystemServerCrash);
859     FRIEND_TEST(StatsdStatsTest, TestPullAtomStats);
860     FRIEND_TEST(StatsdStatsTest, TestAtomMetricsStats);
861     FRIEND_TEST(StatsdStatsTest, TestActivationBroadcastGuardrailHit);
862     FRIEND_TEST(StatsdStatsTest, TestAtomErrorStats);
863     FRIEND_TEST(StatsdStatsTest, TestAtomSkippedStats);
864     FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsStats);
865     FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsQueryStats);
866     FRIEND_TEST(StatsdStatsTest, TestAtomDroppedStats);
867     FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedStats);
868     FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedAndSkippedStats);
869     FRIEND_TEST(StatsdStatsTest, TestShardOffsetProvider);
870 
871     FRIEND_TEST(StatsLogProcessorTest, InvalidConfigRemoved);
872 };
873 
874 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason,
875                                                          const int64_t matcherId);
876 
877 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason,
878                                                          const int64_t metricId,
879                                                          const int64_t matcherId);
880 
881 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason,
882                                                            const int64_t conditionId);
883 
884 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason,
885                                                            const int64_t metricId,
886                                                            const int64_t conditionId);
887 
888 InvalidConfigReason createInvalidConfigReasonWithState(const InvalidConfigReasonEnum reason,
889                                                        const int64_t metricId,
890                                                        const int64_t stateId);
891 
892 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason,
893                                                        const int64_t alertId);
894 
895 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason,
896                                                        const int64_t metricId,
897                                                        const int64_t alertId);
898 
899 InvalidConfigReason createInvalidConfigReasonWithAlarm(const InvalidConfigReasonEnum reason,
900                                                        const int64_t alarmId);
901 
902 InvalidConfigReason createInvalidConfigReasonWithSubscription(const InvalidConfigReasonEnum reason,
903                                                               const int64_t subscriptionId);
904 
905 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlarm(
906         const InvalidConfigReasonEnum reason, const int64_t subscriptionId, const int64_t alarmId);
907 
908 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlert(
909         const InvalidConfigReasonEnum reason, const int64_t subscriptionId, const int64_t alertId);
910 
911 }  // namespace statsd
912 }  // namespace os
913 }  // namespace android
914