1 /* 2 * Copyright 2017, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #pragma once 17 18 #include <gtest/gtest_prod.h> 19 #include <log/log_time.h> 20 #include <src/guardrail/invalid_config_reason_enum.pb.h> 21 22 #include <list> 23 #include <mutex> 24 #include <string> 25 #include <unordered_map> 26 #include <vector> 27 28 #include "config/ConfigKey.h" 29 30 namespace android { 31 namespace os { 32 namespace statsd { 33 34 struct InvalidConfigReason { 35 InvalidConfigReasonEnum reason; 36 std::optional<int64_t> metricId; 37 std::optional<int64_t> stateId; 38 std::optional<int64_t> alertId; 39 std::optional<int64_t> alarmId; 40 std::optional<int64_t> subscriptionId; 41 std::vector<int64_t> matcherIds; 42 std::vector<int64_t> conditionIds; InvalidConfigReasonInvalidConfigReason43 InvalidConfigReason(){}; InvalidConfigReasonInvalidConfigReason44 InvalidConfigReason(InvalidConfigReasonEnum reason) : reason(reason){}; InvalidConfigReasonInvalidConfigReason45 InvalidConfigReason(InvalidConfigReasonEnum reason, int64_t metricId) 46 : reason(reason), metricId(metricId){}; 47 bool operator==(const InvalidConfigReason& other) const { 48 return (this->reason == other.reason) && (this->metricId == other.metricId) && 49 (this->stateId == other.stateId) && (this->alertId == other.alertId) && 50 (this->alarmId == other.alarmId) && (this->subscriptionId == other.subscriptionId) && 51 (this->matcherIds == other.matcherIds) && (this->conditionIds == other.conditionIds); 52 } 53 }; 54 55 // Keep this in sync with InvalidQueryReason enum in stats_log.proto 56 enum InvalidQueryReason { 57 UNKNOWN_REASON = 0, 58 FLAG_DISABLED = 1, 59 UNSUPPORTED_SQLITE_VERSION = 2, 60 AMBIGUOUS_CONFIG_KEY = 3, 61 CONFIG_KEY_NOT_FOUND = 4, 62 CONFIG_KEY_WITH_UNMATCHED_DELEGATE = 5, 63 QUERY_FAILURE = 6, 64 INCONSISTENT_ROW_SIZE = 7, 65 NULL_CALLBACK = 8 66 }; 67 68 typedef struct { 69 int64_t insertError = 0; 70 int64_t tableCreationError = 0; 71 int64_t tableDeletionError = 0; 72 std::list<int64_t> flushLatencyNs; 73 int64_t categoryChangedCount = 0; 74 } RestrictedMetricStats; 75 76 struct ConfigStats { 77 int32_t uid; 78 int64_t id; 79 int32_t creation_time_sec; 80 int32_t deletion_time_sec = 0; 81 int32_t reset_time_sec = 0; 82 int32_t metric_count; 83 int32_t condition_count; 84 int32_t matcher_count; 85 int32_t alert_count; 86 bool is_valid; 87 bool device_info_table_creation_failed = false; 88 int32_t db_corrupted_count = 0; 89 90 // Stores reasons for why config is valid or not 91 std::optional<InvalidConfigReason> reason; 92 93 std::list<int32_t> broadcast_sent_time_sec; 94 95 // Times at which this config is activated. 96 std::list<int32_t> activation_time_sec; 97 98 // Times at which this config is deactivated. 99 std::list<int32_t> deactivation_time_sec; 100 101 std::list<int32_t> data_drop_time_sec; 102 // Number of bytes dropped at corresponding time. 103 std::list<int64_t> data_drop_bytes; 104 std::list<std::pair<int32_t, int64_t>> dump_report_stats; 105 106 // Stores how many times a matcher have been matched. The map size is capped by kMaxConfigCount. 107 std::map<const int64_t, int> matcher_stats; 108 109 // Stores the number of output tuple of condition trackers when it's bigger than 110 // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1, 111 // it means some data has been dropped. The map size is capped by kMaxConfigCount. 112 std::map<const int64_t, int> condition_stats; 113 114 // Stores the number of output tuple of metric producers when it's bigger than 115 // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1, 116 // it means some data has been dropped. The map size is capped by kMaxConfigCount. 117 std::map<const int64_t, int> metric_stats; 118 119 // Stores the max number of output tuple of dimensions in condition across dimensions in what 120 // when it's bigger than kDimensionKeySizeSoftLimit. When you see the number is 121 // kDimensionKeySizeHardLimit +1, it means some data has been dropped. The map size is capped by 122 // kMaxConfigCount. 123 std::map<const int64_t, int> metric_dimension_in_condition_stats; 124 125 // Stores the number of times an anomaly detection alert has been declared. 126 // The map size is capped by kMaxConfigCount. 127 std::map<const int64_t, int> alert_stats; 128 129 // Stores the config ID for each sub-config used. 130 std::list<std::pair<const int64_t, const int32_t>> annotations; 131 132 // Maps metric ID of restricted metric to its stats. 133 std::map<int64_t, RestrictedMetricStats> restricted_metric_stats; 134 135 std::list<int64_t> total_flush_latency_ns; 136 137 // Stores the last 20 timestamps for computing sqlite db size. 138 std::list<int64_t> total_db_size_timestamps; 139 140 // Stores the last 20 sizes of the sqlite db. 141 std::list<int64_t> total_db_sizes; 142 }; 143 144 struct UidMapStats { 145 int32_t changes = 0; 146 int32_t bytes_used = 0; 147 int32_t dropped_changes = 0; 148 int32_t deleted_apps = 0; 149 }; 150 151 // Keeps track of stats of statsd. 152 // Single instance shared across the process. All public methods are thread safe. 153 class StatsdStats { 154 public: 155 static StatsdStats& getInstance(); ~StatsdStats()156 ~StatsdStats(){}; 157 158 const static int kDimensionKeySizeSoftLimit = 500; 159 static constexpr int kDimensionKeySizeHardLimit = 800; 160 161 // Per atom dimension key size limit 162 static const std::map<int, std::pair<size_t, size_t>> kAtomDimensionKeySizeLimitMap; 163 164 const static int kMaxConfigCountPerUid = 20; 165 const static int kMaxAlertCountPerConfig = 200; 166 const static int kMaxConditionCountPerConfig = 500; 167 const static int kMaxMetricCountPerConfig = 2000; 168 const static int kMaxMatcherCountPerConfig = 2500; 169 170 // The max number of old config stats we keep. 171 const static int kMaxIceBoxSize = 20; 172 173 const static int kMaxLoggerErrors = 20; 174 175 const static int kMaxSystemServerRestarts = 20; 176 177 const static int kMaxTimestampCount = 20; 178 179 const static int kMaxLogSourceCount = 150; 180 181 const static int kMaxPullAtomPackages = 100; 182 183 const static int kMaxRestrictedMetricQueryCount = 20; 184 185 const static int kMaxRestrictedMetricFlushLatencyCount = 20; 186 187 const static int kMaxRestrictedConfigFlushLatencyCount = 20; 188 189 const static int kMaxRestrictedConfigDbSizeCount = 20; 190 191 // Max memory allowed for storing metrics per configuration. If this limit is exceeded, statsd 192 // drops the metrics data in memory. 193 static const size_t kMaxMetricsBytesPerConfig = 2 * 1024 * 1024; 194 195 // Soft memory limit per configuration. Once this limit is exceeded, we begin notifying the 196 // data subscriber that it's time to call getData. 197 static const size_t kBytesPerConfigTriggerGetData = 192 * 1024; 198 199 // Soft memory limit per restricted configuration. Once this limit is exceeded, 200 // we begin flush in-memory restricted metrics to database. 201 static const size_t kBytesPerRestrictedConfigTriggerFlush = 25 * 1024; 202 203 // Cap the UID map's memory usage to this. This should be fairly high since the UID information 204 // is critical for understanding the metrics. 205 const static size_t kMaxBytesUsedUidMap = 50 * 1024; 206 207 // The number of deleted apps that are stored in the uid map. 208 const static int kMaxDeletedAppsInUidMap = 100; 209 210 /* Minimum period between two broadcasts in nanoseconds. */ 211 static const int64_t kMinBroadcastPeriodNs = 60 * NS_PER_SEC; 212 213 /* Min period between two checks of byte size per config key in nanoseconds. */ 214 static const int64_t kMinByteSizeCheckPeriodNs = 60 * NS_PER_SEC; 215 216 /* Min period between two checks of restricted metrics TTLs. */ 217 static const int64_t kMinTtlCheckPeriodNs = 60 * 60 * NS_PER_SEC; 218 219 /* Min period between two flush operations of restricted metrics. */ 220 static const int64_t kMinFlushRestrictedPeriodNs = 60 * 60 * NS_PER_SEC; 221 222 /* Min period between two db guardrail check operations of restricted metrics. */ 223 static const int64_t kMinDbGuardrailEnforcementPeriodNs = 60 * 60 * NS_PER_SEC; 224 225 /* Minimum period between two activation broadcasts in nanoseconds. */ 226 static const int64_t kMinActivationBroadcastPeriodNs = 10 * NS_PER_SEC; 227 228 // Maximum age (30 days) that files on disk can exist in seconds. 229 static const int kMaxAgeSecond = 60 * 60 * 24 * 30; 230 231 // Maximum age (2 days) that local history files on disk can exist in seconds. 232 static const int kMaxLocalHistoryAgeSecond = 60 * 60 * 24 * 2; 233 234 // Maximum number of files (1000) that can be in stats directory on disk. 235 static const int kMaxFileNumber = 1000; 236 237 // Maximum size of all files that can be written to stats directory on disk. 238 static const int kMaxFileSize = 50 * 1024 * 1024; 239 240 // How long to try to clear puller cache from last time 241 static const long kPullerCacheClearIntervalSec = 1; 242 243 // Max time to do a pull. 244 static const int64_t kPullMaxDelayNs = 30 * NS_PER_SEC; 245 246 // Maximum number of pushed atoms statsd stats will track above kMaxPushedAtomId. 247 static const int kMaxNonPlatformPushedAtoms = 600; 248 249 // Maximum number of pushed atoms error statsd stats will track. 250 static const int kMaxPushedAtomErrorStatsSize = 100; 251 252 // Maximum atom id value that we consider a platform pushed atom. 253 // This should be updated once highest pushed atom id in atoms.proto approaches this value. 254 static const int kMaxPushedAtomId = 900; 255 256 // Atom id that is the start of the pulled atoms. 257 static const int kPullAtomStartTag = 10000; 258 259 // Atom id that is the start of vendor atoms. 260 static const int kVendorAtomStartTag = 100000; 261 262 // Vendor pulled atom start id. 263 static const int32_t kVendorPulledAtomStartTag = 150000; 264 265 // Beginning of range for timestamp truncation. 266 static const int32_t kTimestampTruncationStartTag = 300000; 267 268 // End of range for timestamp truncation. 269 static const int32_t kTimestampTruncationEndTag = 304999; 270 271 // Max accepted atom id. 272 static const int32_t kMaxAtomTag = 200000; 273 274 static const int64_t kInt64Max = 0x7fffffffffffffffLL; 275 276 static const int32_t kMaxLoggedBucketDropEvents = 10; 277 278 /** 279 * Report a new config has been received and report the static stats about the config. 280 * 281 * The static stats include: the count of metrics, conditions, matchers, and alerts. 282 * If the config is not valid, this config stats will be put into icebox immediately. 283 */ 284 void noteConfigReceived(const ConfigKey& key, int metricsCount, int conditionsCount, 285 int matchersCount, int alertCount, 286 const std::list<std::pair<const int64_t, const int32_t>>& annotations, 287 const std::optional<InvalidConfigReason>& reason); 288 /** 289 * Report a config has been removed. 290 */ 291 void noteConfigRemoved(const ConfigKey& key); 292 /** 293 * Report a config has been reset when ttl expires. 294 */ 295 void noteConfigReset(const ConfigKey& key); 296 297 /** 298 * Report a broadcast has been sent to a config owner to collect the data. 299 */ 300 void noteBroadcastSent(const ConfigKey& key); 301 302 /** 303 * Report that a config has become activated or deactivated. 304 * This can be different from whether or not a broadcast is sent if the 305 * guardrail prevented the broadcast from being sent. 306 */ 307 void noteActiveStatusChanged(const ConfigKey& key, bool activate); 308 309 /** 310 * Report a config's metrics data has been dropped. 311 */ 312 void noteDataDropped(const ConfigKey& key, const size_t totalBytes); 313 314 /** 315 * Report metrics data report has been sent. 316 * 317 * The report may be requested via StatsManager API, or through adb cmd. 318 */ 319 void noteMetricsReportSent(const ConfigKey& key, const size_t num_bytes); 320 321 /** 322 * Report failure in creating the device info metadata table for restricted configs. 323 */ 324 void noteDeviceInfoTableCreationFailed(const ConfigKey& key); 325 326 /** 327 * Report db corruption for restricted configs. 328 */ 329 void noteDbCorrupted(const ConfigKey& key); 330 331 /** 332 * Report the size of output tuple of a condition. 333 * 334 * Note: only report when the condition has an output dimension, and the tuple 335 * count > kDimensionKeySizeSoftLimit. 336 * 337 * [key]: The config key that this condition belongs to. 338 * [id]: The id of the condition. 339 * [size]: The output tuple size. 340 */ 341 void noteConditionDimensionSize(const ConfigKey& key, const int64_t& id, int size); 342 343 /** 344 * Report the size of output tuple of a metric. 345 * 346 * Note: only report when the metric has an output dimension, and the tuple 347 * count > kDimensionKeySizeSoftLimit. 348 * 349 * [key]: The config key that this metric belongs to. 350 * [id]: The id of the metric. 351 * [size]: The output tuple size. 352 */ 353 void noteMetricDimensionSize(const ConfigKey& key, const int64_t& id, int size); 354 355 /** 356 * Report the max size of output tuple of dimension in condition across dimensions in what. 357 * 358 * Note: only report when the metric has an output dimension in condition, and the max tuple 359 * count > kDimensionKeySizeSoftLimit. 360 * 361 * [key]: The config key that this metric belongs to. 362 * [id]: The id of the metric. 363 * [size]: The output tuple size. 364 */ 365 void noteMetricDimensionInConditionSize(const ConfigKey& key, const int64_t& id, int size); 366 367 /** 368 * Report a matcher has been matched. 369 * 370 * [key]: The config key that this matcher belongs to. 371 * [id]: The id of the matcher. 372 */ 373 void noteMatcherMatched(const ConfigKey& key, const int64_t& id); 374 375 /** 376 * Report that an anomaly detection alert has been declared. 377 * 378 * [key]: The config key that this alert belongs to. 379 * [id]: The id of the alert. 380 */ 381 void noteAnomalyDeclared(const ConfigKey& key, const int64_t& id); 382 383 /** 384 * Report an atom event has been logged. 385 */ 386 void noteAtomLogged(int atomId, int32_t timeSec, bool isSkipped); 387 388 /** 389 * Report that statsd modified the anomaly alarm registered with StatsCompanionService. 390 */ 391 void noteRegisteredAnomalyAlarmChanged(); 392 393 /** 394 * Report that statsd modified the periodic alarm registered with StatsCompanionService. 395 */ 396 void noteRegisteredPeriodicAlarmChanged(); 397 398 /** 399 * Records the number of delta entries that are being dropped from the uid map. 400 */ 401 void noteUidMapDropped(int deltas); 402 403 /** 404 * Records that an app was deleted (from statsd's map). 405 */ 406 void noteUidMapAppDeletionDropped(); 407 408 /** 409 * Updates the number of changes currently stored in the uid map. 410 */ 411 void setUidMapChanges(int changes); 412 void setCurrentUidMapMemory(int bytes); 413 414 /* 415 * Updates minimum interval between pulls for an pulled atom. 416 */ 417 void updateMinPullIntervalSec(int pullAtomId, long intervalSec); 418 419 /* 420 * Notes an atom is pulled. 421 */ 422 void notePull(int pullAtomId); 423 424 /* 425 * Notes an atom is served from puller cache. 426 */ 427 void notePullFromCache(int pullAtomId); 428 429 /* 430 * Notify data error for pulled atom. 431 */ 432 void notePullDataError(int pullAtomId); 433 434 /* 435 * Records time for actual pulling, not including those served from cache and not including 436 * statsd processing delays. 437 */ 438 void notePullTime(int pullAtomId, int64_t pullTimeNs); 439 440 /* 441 * Records pull delay for a pulled atom, including those served from cache and including statsd 442 * processing delays. 443 */ 444 void notePullDelay(int pullAtomId, int64_t pullDelayNs); 445 446 /* 447 * Records pull exceeds timeout for the puller. 448 */ 449 void notePullTimeout(int pullAtomId, int64_t pullUptimeMillis, int64_t pullElapsedMillis); 450 451 /* 452 * Records pull exceeds max delay for a metric. 453 */ 454 void notePullExceedMaxDelay(int pullAtomId); 455 456 /* 457 * Records when system server restarts. 458 */ 459 void noteSystemServerRestart(int32_t timeSec); 460 461 /** 462 * Records statsd skipped an event. 463 */ 464 void noteLogLost(int32_t wallClockTimeSec, int32_t count, int32_t lastError, 465 int32_t lastAtomTag, int32_t uid, int32_t pid); 466 467 /** 468 * Records that the pull of an atom has failed. Eg, if the client indicated the pull failed, if 469 * the pull timed out, or if the outgoing binder call failed. 470 * This count will only increment if the puller was actually invoked. 471 * 472 * It does not include a pull not occurring due to not finding the appropriate 473 * puller. These cases are covered in other counts. 474 */ 475 void notePullFailed(int atomId); 476 477 /** 478 * Records that the pull of an atom has failed due to not having a uid provider. 479 */ 480 void notePullUidProviderNotFound(int atomId); 481 482 /** 483 * Records that the pull of an atom has failed due not finding a puller registered by a 484 * trusted uid. 485 */ 486 void notePullerNotFound(int atomId); 487 488 /** 489 * Records that the pull has failed due to the outgoing binder call failing. 490 */ 491 void notePullBinderCallFailed(int atomId); 492 493 /** 494 * A pull with no data occurred 495 */ 496 void noteEmptyData(int atomId); 497 498 /** 499 * Records that a puller callback for the given atomId was registered or unregistered. 500 * 501 * @param registered True if the callback was registered, false if was unregistered. 502 */ 503 void notePullerCallbackRegistrationChanged(int atomId, bool registered); 504 505 /** 506 * Hard limit was reached in the cardinality of an atom 507 */ 508 void noteHardDimensionLimitReached(int64_t metricId); 509 510 /** 511 * A log event was too late, arrived in the wrong bucket and was skipped 512 */ 513 void noteLateLogEventSkipped(int64_t metricId); 514 515 /** 516 * Buckets were skipped as time elapsed without any data for them 517 */ 518 void noteSkippedForwardBuckets(int64_t metricId); 519 520 /** 521 * An unsupported value type was received 522 */ 523 void noteBadValueType(int64_t metricId); 524 525 /** 526 * Buckets were dropped due to reclaim memory. 527 */ 528 void noteBucketDropped(int64_t metricId); 529 530 /** 531 * A condition change was too late, arrived in the wrong bucket and was skipped 532 */ 533 void noteConditionChangeInNextBucket(int64_t metricId); 534 535 /** 536 * A bucket has been tagged as invalid. 537 */ 538 void noteInvalidatedBucket(int64_t metricId); 539 540 /** 541 * Tracks the total number of buckets (include skipped/invalid buckets). 542 */ 543 void noteBucketCount(int64_t metricId); 544 545 /** 546 * For pulls at bucket boundaries, it represents the misalignment between the real timestamp and 547 * the end of the bucket. 548 */ 549 void noteBucketBoundaryDelayNs(int64_t metricId, int64_t timeDelayNs); 550 551 /** 552 * Number of buckets with unknown condition. 553 */ 554 void noteBucketUnknownCondition(int64_t metricId); 555 556 /* Reports one event id has been dropped due to queue overflow, and the oldest event timestamp 557 * in the queue */ 558 void noteEventQueueOverflow(int64_t oldestEventTimestampNs, int32_t atomId, bool isSkipped); 559 560 /** 561 * Reports that the activation broadcast guardrail was hit for this uid. Namely, the broadcast 562 * should have been sent, but instead was skipped due to hitting the guardrail. 563 */ 564 void noteActivationBroadcastGuardrailHit(const int uid); 565 566 /** 567 * Reports that an atom is erroneous or cannot be parsed successfully by 568 * statsd. An atom tag of 0 indicates that the client did not supply the 569 * atom id within the encoding. 570 * 571 * For pushed atoms only, this call should be preceded by a call to 572 * noteAtomLogged. 573 */ 574 void noteAtomError(int atomTag, bool pull = false); 575 576 /** Report query of restricted metric succeed **/ 577 void noteQueryRestrictedMetricSucceed(const int64_t configId, const string& configPackage, 578 const std::optional<int32_t> configUid, 579 const int32_t callingUid, const int64_t queryLatencyNs); 580 581 /** Report query of restricted metric failed **/ 582 void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage, 583 const std::optional<int32_t> configUid, 584 const int32_t callingUid, const InvalidQueryReason reason); 585 586 /** Report query of restricted metric failed along with an error string **/ 587 void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage, 588 const std::optional<int32_t> configUid, 589 const int32_t callingUid, const InvalidQueryReason reason, 590 const string& error); 591 592 // Reports that a restricted metric fails to be inserted to database. 593 void noteRestrictedMetricInsertError(const ConfigKey& configKey, int64_t metricId); 594 595 // Reports that a restricted metric fails to create table in database. 596 void noteRestrictedMetricTableCreationError(const ConfigKey& configKey, const int64_t metricId); 597 598 // Reports that a restricted metric fails to delete table in database. 599 void noteRestrictedMetricTableDeletionError(const ConfigKey& configKey, const int64_t metricId); 600 601 // Reports the time it takes for a restricted metric to flush the data to the database. 602 void noteRestrictedMetricFlushLatency(const ConfigKey& configKey, const int64_t metricId, 603 const int64_t flushLatencyNs); 604 605 // Reports that a restricted metric had a category change. 606 void noteRestrictedMetricCategoryChanged(const ConfigKey& configKey, const int64_t metricId); 607 608 // Reports the time is takes to flush a restricted config to the database. 609 void noteRestrictedConfigFlushLatency(const ConfigKey& configKey, 610 const int64_t totalFlushLatencyNs); 611 612 // Reports the size of the internal sqlite db. 613 void noteRestrictedConfigDbSize(const ConfigKey& configKey, const int64_t elapsedTimeNs, 614 const int64_t dbSize); 615 616 /** 617 * Reset the historical stats. Including all stats in icebox, and the tracked stats about 618 * metrics, matchers, and atoms. The active configs will be kept and StatsdStats will continue 619 * to collect stats after reset() has been called. 620 */ 621 void reset(); 622 623 /** 624 * Output the stats in protobuf binary format to [buffer]. 625 * 626 * [reset]: whether to clear the historical stats after the call. 627 */ 628 void dumpStats(std::vector<uint8_t>* buffer, bool reset); 629 630 /** 631 * Output statsd stats in human readable format to [out] file descriptor. 632 */ 633 void dumpStats(int outFd) const; 634 635 /** 636 * Return soft and hard atom key dimension size limits as an std::pair. 637 */ 638 static std::pair<size_t, size_t> getAtomDimensionKeySizeLimits(const int atomId = -1); 639 640 typedef struct PullTimeoutMetadata { 641 int64_t pullTimeoutUptimeMillis; 642 int64_t pullTimeoutElapsedMillis; PullTimeoutMetadataPullTimeoutMetadata643 PullTimeoutMetadata(int64_t uptimeMillis, int64_t elapsedMillis) 644 : pullTimeoutUptimeMillis(uptimeMillis), 645 pullTimeoutElapsedMillis(elapsedMillis) { /* do nothing */ 646 } 647 } PullTimeoutMetadata; 648 649 typedef struct { 650 long totalPull = 0; 651 long totalPullFromCache = 0; 652 long minPullIntervalSec = LONG_MAX; 653 int64_t avgPullTimeNs = 0; 654 int64_t maxPullTimeNs = 0; 655 long numPullTime = 0; 656 int64_t avgPullDelayNs = 0; 657 int64_t maxPullDelayNs = 0; 658 long numPullDelay = 0; 659 long dataError = 0; 660 long pullTimeout = 0; 661 long pullExceedMaxDelay = 0; 662 long pullFailed = 0; 663 long pullUidProviderNotFound = 0; 664 long pullerNotFound = 0; 665 long emptyData = 0; 666 long registeredCount = 0; 667 long unregisteredCount = 0; 668 int32_t atomErrorCount = 0; 669 long binderCallFailCount = 0; 670 std::list<PullTimeoutMetadata> pullTimeoutMetadata; 671 } PulledAtomStats; 672 673 typedef struct { 674 long hardDimensionLimitReached = 0; 675 long lateLogEventSkipped = 0; 676 long skippedForwardBuckets = 0; 677 long badValueType = 0; 678 long conditionChangeInNextBucket = 0; 679 long invalidatedBucket = 0; 680 long bucketDropped = 0; 681 int64_t minBucketBoundaryDelayNs = 0; 682 int64_t maxBucketBoundaryDelayNs = 0; 683 long bucketUnknownCondition = 0; 684 long bucketCount = 0; 685 } AtomMetricStats; 686 687 private: 688 StatsdStats(); 689 690 mutable std::mutex mLock; 691 692 int32_t mStartTimeSec; 693 694 // Track the number of dropped entries used by the uid map. 695 UidMapStats mUidMapStats; 696 697 // The stats about the configs that are still in use. 698 // The map size is capped by kMaxConfigCount. 699 std::map<const ConfigKey, std::shared_ptr<ConfigStats>> mConfigStats; 700 701 // Stores the stats for the configs that are no longer in use. 702 // The size of the vector is capped by kMaxIceBoxSize. 703 std::list<const std::shared_ptr<ConfigStats>> mIceBox; 704 705 // Stores the number of times a pushed atom is logged and skipped (if skipped). 706 // The size of the vector is the largest pushed atom id in atoms.proto + 1. Atoms 707 // out of that range will be put in mNonPlatformPushedAtomStats. 708 // This is a vector, not a map because it will be accessed A LOT -- for each stats log. 709 struct PushedAtomStats { 710 int logCount = 0; 711 int skipCount = 0; 712 }; 713 714 std::vector<PushedAtomStats> mPushedAtomStats; 715 716 // Stores the number of times a pushed atom is logged and skipped for atom ids above 717 // kMaxPushedAtomId. The max size of the map is kMaxNonPlatformPushedAtoms. 718 std::unordered_map<int, PushedAtomStats> mNonPlatformPushedAtomStats; 719 720 // Stores the number of times a pushed atom is dropped due to queue overflow event. 721 // We do not expect it will happen too often so the map is preferable vs pre-allocated vector 722 // The max size of the map is kMaxPushedAtomId + kMaxNonPlatformPushedAtoms. 723 std::unordered_map<int, int> mPushedAtomDropsStats; 724 725 // Maps PullAtomId to its stats. The size is capped by the puller atom counts. 726 std::map<int, PulledAtomStats> mPulledAtomStats; 727 728 // Stores the number of times a pushed atom was logged erroneously. The 729 // corresponding counts for pulled atoms are stored in PulledAtomStats. 730 // The max size of this map is kMaxPushedAtomErrorStatsSize. 731 std::map<int, int> mPushedAtomErrorStats; 732 733 // Maps metric ID to its stats. The size is capped by the number of metrics. 734 std::map<int64_t, AtomMetricStats> mAtomMetricStats; 735 736 // Maps uids to times when the activation changed broadcast not sent due to hitting the 737 // guardrail. The size is capped by the number of configs, and up to 20 times per uid. 738 std::map<int, std::list<int32_t>> mActivationBroadcastGuardrailStats; 739 740 struct LogLossStats { LogLossStatsLogLossStats741 LogLossStats(int32_t sec, int32_t count, int32_t error, int32_t tag, int32_t uid, 742 int32_t pid) 743 : mWallClockSec(sec), 744 mCount(count), 745 mLastError(error), 746 mLastTag(tag), 747 mUid(uid), 748 mPid(pid) { 749 } 750 int32_t mWallClockSec; 751 int32_t mCount; 752 // error code defined in linux/errno.h 753 int32_t mLastError; 754 int32_t mLastTag; 755 int32_t mUid; 756 int32_t mPid; 757 }; 758 759 // Max of {(now - oldestEventTimestamp) when overflow happens}. 760 // This number is helpful to understand how SLOW statsd can be. 761 int64_t mMaxQueueHistoryNs = 0; 762 763 // Min of {(now - oldestEventTimestamp) when overflow happens}. 764 // This number is helpful to understand how FAST the events floods to statsd. 765 int64_t mMinQueueHistoryNs = kInt64Max; 766 767 // Total number of events that are lost due to queue overflow. 768 int32_t mOverflowCount = 0; 769 770 // Timestamps when we detect log loss, and the number of logs lost. 771 std::list<LogLossStats> mLogLossStats; 772 773 std::list<int32_t> mSystemServerRestartSec; 774 775 struct RestrictedMetricQueryStats { RestrictedMetricQueryStatsRestrictedMetricQueryStats776 RestrictedMetricQueryStats(int32_t callingUid, int64_t configId, 777 const string& configPackage, std::optional<int32_t> configUid, 778 int64_t queryTimeNs, 779 std::optional<InvalidQueryReason> invalidQueryReason, 780 const string& error, std::optional<int64_t> queryLatencyNs) 781 : mCallingUid(callingUid), 782 mConfigId(configId), 783 mConfigPackage(configPackage), 784 mConfigUid(configUid), 785 mQueryWallTimeNs(queryTimeNs), 786 mInvalidQueryReason(invalidQueryReason), 787 mError(error), 788 mQueryLatencyNs(queryLatencyNs) { 789 mHasError = invalidQueryReason.has_value(); 790 } 791 int32_t mCallingUid; 792 int64_t mConfigId; 793 string mConfigPackage; 794 std::optional<int32_t> mConfigUid; 795 int64_t mQueryWallTimeNs; 796 std::optional<InvalidQueryReason> mInvalidQueryReason; 797 bool mHasError; 798 string mError; 799 std::optional<int64_t> mQueryLatencyNs; 800 }; 801 std::list<RestrictedMetricQueryStats> mRestrictedMetricQueryStats; 802 803 void noteQueryRestrictedMetricFailedLocked(const int64_t configId, const string& configPackage, 804 const std::optional<int32_t> configUid, 805 const int32_t callingUid, 806 const InvalidQueryReason reason, 807 const string& error); 808 809 // Stores the number of times statsd modified the anomaly alarm registered with 810 // StatsCompanionService. 811 int mAnomalyAlarmRegisteredStats = 0; 812 813 // Stores the number of times statsd registers the periodic alarm changes 814 int mPeriodicAlarmRegisteredStats = 0; 815 816 void noteConfigResetInternalLocked(const ConfigKey& key); 817 818 void noteConfigRemovedInternalLocked(const ConfigKey& key); 819 820 void resetInternalLocked(); 821 822 void noteAtomLoggedLocked(int atomId, bool isSkipped); 823 824 void noteAtomDroppedLocked(int atomId); 825 826 void noteDataDropped(const ConfigKey& key, const size_t totalBytes, int32_t timeSec); 827 828 void noteMetricsReportSent(const ConfigKey& key, const size_t num_bytes, int32_t timeSec); 829 830 void noteBroadcastSent(const ConfigKey& key, int32_t timeSec); 831 832 void noteActiveStatusChanged(const ConfigKey& key, bool activate, int32_t timeSec); 833 834 void noteActivationBroadcastGuardrailHit(const int uid, int32_t timeSec); 835 836 void addToIceBoxLocked(std::shared_ptr<ConfigStats>& stats); 837 838 int getPushedAtomErrorsLocked(int atomId) const; 839 840 int getPushedAtomDropsLocked(int atomId) const; 841 842 /** 843 * Get a reference to AtomMetricStats for a metric. If none exists, create it. The reference 844 * will live as long as `this`. 845 */ 846 StatsdStats::AtomMetricStats& getAtomMetricStats(int64_t metricId); 847 848 FRIEND_TEST(StatsdStatsTest, TestValidConfigAdd); 849 FRIEND_TEST(StatsdStatsTest, TestInvalidConfigAdd); 850 FRIEND_TEST(StatsdStatsTest, TestInvalidConfigMissingMetricId); 851 FRIEND_TEST(StatsdStatsTest, TestInvalidConfigOnlyMetricId); 852 FRIEND_TEST(StatsdStatsTest, TestConfigRemove); 853 FRIEND_TEST(StatsdStatsTest, TestSubStats); 854 FRIEND_TEST(StatsdStatsTest, TestAtomLog); 855 FRIEND_TEST(StatsdStatsTest, TestNonPlatformAtomLog); 856 FRIEND_TEST(StatsdStatsTest, TestTimestampThreshold); 857 FRIEND_TEST(StatsdStatsTest, TestAnomalyMonitor); 858 FRIEND_TEST(StatsdStatsTest, TestSystemServerCrash); 859 FRIEND_TEST(StatsdStatsTest, TestPullAtomStats); 860 FRIEND_TEST(StatsdStatsTest, TestAtomMetricsStats); 861 FRIEND_TEST(StatsdStatsTest, TestActivationBroadcastGuardrailHit); 862 FRIEND_TEST(StatsdStatsTest, TestAtomErrorStats); 863 FRIEND_TEST(StatsdStatsTest, TestAtomSkippedStats); 864 FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsStats); 865 FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsQueryStats); 866 FRIEND_TEST(StatsdStatsTest, TestAtomDroppedStats); 867 FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedStats); 868 FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedAndSkippedStats); 869 FRIEND_TEST(StatsdStatsTest, TestShardOffsetProvider); 870 871 FRIEND_TEST(StatsLogProcessorTest, InvalidConfigRemoved); 872 }; 873 874 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason, 875 const int64_t matcherId); 876 877 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason, 878 const int64_t metricId, 879 const int64_t matcherId); 880 881 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason, 882 const int64_t conditionId); 883 884 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason, 885 const int64_t metricId, 886 const int64_t conditionId); 887 888 InvalidConfigReason createInvalidConfigReasonWithState(const InvalidConfigReasonEnum reason, 889 const int64_t metricId, 890 const int64_t stateId); 891 892 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason, 893 const int64_t alertId); 894 895 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason, 896 const int64_t metricId, 897 const int64_t alertId); 898 899 InvalidConfigReason createInvalidConfigReasonWithAlarm(const InvalidConfigReasonEnum reason, 900 const int64_t alarmId); 901 902 InvalidConfigReason createInvalidConfigReasonWithSubscription(const InvalidConfigReasonEnum reason, 903 const int64_t subscriptionId); 904 905 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlarm( 906 const InvalidConfigReasonEnum reason, const int64_t subscriptionId, const int64_t alarmId); 907 908 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlert( 909 const InvalidConfigReasonEnum reason, const int64_t subscriptionId, const int64_t alertId); 910 911 } // namespace statsd 912 } // namespace os 913 } // namespace android 914