1 /* 2 * Copyright 2017, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #pragma once 17 18 #include <gtest/gtest_prod.h> 19 #include <log/log_time.h> 20 #include <src/guardrail/stats_log_enums.pb.h> 21 22 #include <limits> 23 #include <list> 24 #include <mutex> 25 #include <string> 26 #include <unordered_map> 27 #include <vector> 28 29 #include "LoggingRate.h" 30 #include "config/ConfigKey.h" 31 #include "logd/logevent_util.h" 32 33 namespace android { 34 namespace os { 35 namespace statsd { 36 37 struct InvalidConfigReason { 38 InvalidConfigReasonEnum reason; 39 std::optional<int64_t> metricId; 40 std::optional<int64_t> stateId; 41 std::optional<int64_t> alertId; 42 std::optional<int64_t> alarmId; 43 std::optional<int64_t> subscriptionId; 44 std::vector<int64_t> matcherIds; 45 std::vector<int64_t> conditionIds; InvalidConfigReasonInvalidConfigReason46 InvalidConfigReason(){}; InvalidConfigReasonInvalidConfigReason47 InvalidConfigReason(InvalidConfigReasonEnum reason) : reason(reason){}; InvalidConfigReasonInvalidConfigReason48 InvalidConfigReason(InvalidConfigReasonEnum reason, int64_t metricId) 49 : reason(reason), metricId(metricId){}; 50 bool operator==(const InvalidConfigReason& other) const { 51 return (this->reason == other.reason) && (this->metricId == other.metricId) && 52 (this->stateId == other.stateId) && (this->alertId == other.alertId) && 53 (this->alarmId == other.alarmId) && (this->subscriptionId == other.subscriptionId) && 54 (this->matcherIds == other.matcherIds) && (this->conditionIds == other.conditionIds); 55 } 56 57 // For better failure messages in statsd_test 58 friend void PrintTo(const InvalidConfigReason& obj, std::ostream* os); 59 }; 60 61 typedef struct { 62 int64_t insertError = 0; 63 int64_t tableCreationError = 0; 64 int64_t tableDeletionError = 0; 65 std::list<int64_t> flushLatencyNs; 66 int64_t categoryChangedCount = 0; 67 } RestrictedMetricStats; 68 69 struct DumpReportStats { DumpReportStatsDumpReportStats70 DumpReportStats(int32_t dumpReportSec, int32_t dumpReportSize, int32_t reportNumber) 71 : mDumpReportTimeSec(dumpReportSec), 72 mDumpReportSizeBytes(dumpReportSize), 73 mDumpReportNumber(reportNumber) { 74 } 75 int32_t mDumpReportTimeSec = 0; 76 int32_t mDumpReportSizeBytes = 0; 77 int32_t mDumpReportNumber = 0; 78 }; 79 80 struct ConfigStats { 81 int32_t uid; 82 int64_t id; 83 int32_t creation_time_sec; 84 int32_t deletion_time_sec = 0; 85 int32_t reset_time_sec = 0; 86 int32_t metric_count; 87 int32_t condition_count; 88 int32_t matcher_count; 89 int32_t alert_count; 90 bool is_valid; 91 bool device_info_table_creation_failed = false; 92 int32_t db_corrupted_count = 0; 93 int32_t db_deletion_stat_failed = 0; 94 int32_t db_deletion_size_exceeded_limit = 0; 95 int32_t db_deletion_config_invalid = 0; 96 int32_t db_deletion_too_old = 0; 97 int32_t db_deletion_config_removed = 0; 98 int32_t db_deletion_config_updated = 0; 99 // Stores the number of ConfigMetadataProvider promotion failures 100 int32_t config_metadata_provider_promote_failure = 0; 101 102 // Stores reasons for why config is valid or not 103 std::optional<InvalidConfigReason> reason; 104 105 std::list<int32_t> broadcast_sent_time_sec; 106 107 // Times at which this config is activated. 108 std::list<int32_t> activation_time_sec; 109 110 // Times at which this config is deactivated. 111 std::list<int32_t> deactivation_time_sec; 112 113 std::list<int32_t> data_drop_time_sec; 114 // Number of bytes dropped at corresponding time. 115 std::list<int64_t> data_drop_bytes; 116 117 std::list<DumpReportStats> dump_report_stats; 118 119 // Stores how many times a matcher have been matched. The map size is capped by kMaxConfigCount. 120 std::unordered_map<int64_t, int> matcher_stats; 121 122 // Stores the number of output tuple of condition trackers when it's bigger than 123 // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1, 124 // it means some data has been dropped. The map size is capped by kMaxConfigCount. 125 std::map<const int64_t, int> condition_stats; 126 127 // Stores the number of output tuple of metric producers when it's bigger than 128 // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1, 129 // it means some data has been dropped. The map size is capped by kMaxConfigCount. 130 std::map<const int64_t, int> metric_stats; 131 132 // Stores the max number of output tuple of dimensions in condition across dimensions in what 133 // when it's bigger than kDimensionKeySizeSoftLimit. When you see the number is 134 // kDimensionKeySizeHardLimit +1, it means some data has been dropped. The map size is capped by 135 // kMaxConfigCount. 136 std::map<const int64_t, int> metric_dimension_in_condition_stats; 137 138 // Stores the number of times an anomaly detection alert has been declared. 139 // The map size is capped by kMaxConfigCount. 140 std::map<const int64_t, int> alert_stats; 141 142 // Stores the config ID for each sub-config used. 143 std::list<std::pair<const int64_t, const int32_t>> annotations; 144 145 // Maps metric ID of restricted metric to its stats. 146 std::map<int64_t, RestrictedMetricStats> restricted_metric_stats; 147 148 std::list<int64_t> total_flush_latency_ns; 149 150 // Stores the last 20 timestamps for computing sqlite db size. 151 std::list<int64_t> total_db_size_timestamps; 152 153 // Stores the last 20 sizes of the sqlite db. 154 std::list<int64_t> total_db_sizes; 155 }; 156 157 struct UidMapStats { 158 int32_t changes = 0; 159 int32_t bytes_used = 0; 160 int32_t dropped_changes = 0; 161 int32_t deleted_apps = 0; 162 }; 163 164 struct SubscriptionStats { 165 int32_t pushed_atom_count = 0; 166 int32_t pulled_atom_count = 0; 167 int32_t start_time_sec = 0; 168 int32_t end_time_sec = 0; 169 int32_t flush_count = 0; 170 }; 171 172 // Keeps track of stats of statsd. 173 // Single instance shared across the process. All public methods are thread safe. 174 class StatsdStats { 175 public: 176 static StatsdStats& getInstance(); ~StatsdStats()177 ~StatsdStats(){}; 178 179 const static int kDimensionKeySizeSoftLimit = 500; 180 static constexpr int kDimensionKeySizeHardLimit = 800; 181 static constexpr int kDimensionKeySizeHardLimitMin = 800; 182 static constexpr int kDimensionKeySizeHardLimitMax = 3000; 183 184 // Per atom dimension key size limit 185 static const std::map<int, std::pair<size_t, size_t>> kAtomDimensionKeySizeLimitMap; 186 187 const static int kMaxConfigCountPerUid = 20; 188 const static int kMaxAlertCountPerConfig = 200; 189 const static int kMaxConditionCountPerConfig = 500; 190 const static int kMaxMetricCountPerConfig = 3000; 191 const static int kMaxMatcherCountPerConfig = 3500; 192 193 // The max number of old config stats we keep. 194 const static int kMaxIceBoxSize = 20; 195 196 const static int kMaxLoggerErrors = 20; 197 198 const static int kMaxSystemServerRestarts = 20; 199 200 const static int kMaxTimestampCount = 20; 201 202 const static int kMaxLogSourceCount = 150; 203 204 const static int kMaxPullAtomPackages = 100; 205 206 const static int kMaxRestrictedMetricQueryCount = 20; 207 208 const static int kMaxRestrictedMetricFlushLatencyCount = 20; 209 210 const static int kMaxRestrictedConfigFlushLatencyCount = 20; 211 212 const static int kMaxRestrictedConfigDbSizeCount = 20; 213 214 // Max memory allowed for storing metrics per configuration. If this limit is exceeded, statsd 215 // drops the metrics data in memory. 216 static const size_t kDefaultMaxMetricsBytesPerConfig = 2 * 1024 * 1024; 217 218 // Hard limit for custom memory allowed for storing metrics per configuration. 219 static const size_t kHardMaxMetricsBytesPerConfig = 20 * 1024 * 1024; 220 221 // Max memory allowed for storing metrics per configuration before triggering a intent to fetch 222 // data. 223 static const size_t kHardMaxTriggerGetDataBytes = 10 * 1024 * 1024; 224 225 // Soft memory limit per configuration. Once this limit is exceeded, we begin notifying the 226 // data subscriber that it's time to call getData. 227 static const size_t kDefaultBytesPerConfigTriggerGetData = 192 * 1024; 228 229 // Soft memory limit per restricted configuration. Once this limit is exceeded, 230 // we begin flush in-memory restricted metrics to database. 231 static const size_t kBytesPerRestrictedConfigTriggerFlush = 25 * 1024; 232 233 // Cap the UID map's memory usage to this. This should be fairly high since the UID information 234 // is critical for understanding the metrics. 235 const static size_t kMaxBytesUsedUidMap = 50 * 1024; 236 237 // The number of deleted apps that are stored in the uid map. 238 const static int kMaxDeletedAppsInUidMap = 100; 239 240 /* Minimum period between two broadcasts in nanoseconds. */ 241 static const int64_t kMinBroadcastPeriodNs = 60 * NS_PER_SEC; 242 243 /* Min period between two checks of byte size per config key in nanoseconds. */ 244 static const int64_t kMinByteSizeCheckPeriodNs = 1 * 60 * NS_PER_SEC; 245 246 // Min period between two checks of byte size per config key in nanoseconds for V2 memory 247 // calculations. 248 static const int64_t kMinByteSizeV2CheckPeriodNs = 5 * 60 * NS_PER_SEC; 249 250 /* Min period between two checks of restricted metrics TTLs. */ 251 static const int64_t kMinTtlCheckPeriodNs = 60 * 60 * NS_PER_SEC; 252 253 /* Min period between two flush operations of restricted metrics. */ 254 static const int64_t kMinFlushRestrictedPeriodNs = 60 * 60 * NS_PER_SEC; 255 256 /* Min period between two db guardrail check operations of restricted metrics. */ 257 static const int64_t kMinDbGuardrailEnforcementPeriodNs = 60 * 60 * NS_PER_SEC; 258 259 /* Minimum period between two activation broadcasts in nanoseconds. */ 260 static const int64_t kMinActivationBroadcastPeriodNs = 10 * NS_PER_SEC; 261 262 // Maximum age (30 days) that files on disk can exist in seconds. 263 static const int kMaxAgeSecond = 60 * 60 * 24 * 30; 264 265 // Maximum age (2 days) that local history files on disk can exist in seconds. 266 static const int kMaxLocalHistoryAgeSecond = 60 * 60 * 24 * 2; 267 268 // Maximum number of files (1000) that can be in stats directory on disk. 269 static const int kMaxFileNumber = 1000; 270 271 // Maximum size of all files that can be written to stats directory on disk. 272 static const int kMaxFileSize = 50 * 1024 * 1024; 273 274 // How long to try to clear puller cache from last time 275 static const long kPullerCacheClearIntervalSec = 1; 276 277 // Max time to do a pull. 278 static const int64_t kPullMaxDelayNs = 30 * NS_PER_SEC; 279 280 // Maximum number of pushed atoms statsd stats will track above kMaxPushedAtomId. 281 static const int kMaxNonPlatformPushedAtoms = 600; 282 283 // Maximum number of pushed atoms error statsd stats will track. 284 static const int kMaxPushedAtomErrorStatsSize = 100; 285 286 // Maximum number of socket loss stats to track. 287 static const int kMaxSocketLossStatsSize = 50; 288 289 // Maximum atom id value that we consider a platform pushed atom. 290 // This should be updated once highest pushed atom id in atoms.proto approaches this value. 291 static const int32_t kMaxPushedAtomId = 1500; 292 293 // Atom id that is the start of the pulled atoms. 294 static const int32_t kPullAtomStartTag = 10000; 295 296 // Atom id that is the start of vendor atoms. 297 static const int32_t kVendorAtomStartTag = 100000; 298 299 // Vendor pulled atom start id. 300 static const int32_t kVendorPulledAtomStartTag = 150000; 301 302 // Beginning of range for timestamp truncation. 303 static const int32_t kTimestampTruncationStartTag = 300000; 304 305 // End of range for timestamp truncation. 306 static const int32_t kTimestampTruncationEndTag = 304999; 307 308 // Max accepted atom id. 309 static const int32_t kMaxAtomTag = 200000; 310 311 static const int32_t kMaxLoggedBucketDropEvents = 10; 312 313 static const int32_t kNumBinsInSocketBatchReadHistogram = 30; 314 static const int32_t kLargeBatchReadThreshold = 1000; 315 static const int32_t kMaxLargeBatchReadSize = 20; 316 static const int32_t kMaxLargeBatchReadAtomThreshold = 50; 317 318 static const int32_t kMaxLoggingRateStatsToReport = 50; 319 320 /** 321 * Report a new config has been received and report the static stats about the config. 322 * 323 * The static stats include: the count of metrics, conditions, matchers, and alerts. 324 * If the config is not valid, this config stats will be put into icebox immediately. 325 */ 326 void noteConfigReceived(const ConfigKey& key, int metricsCount, int conditionsCount, 327 int matchersCount, int alertCount, 328 const std::list<std::pair<const int64_t, const int32_t>>& annotations, 329 const std::optional<InvalidConfigReason>& reason); 330 /** 331 * Report a config has been removed. 332 */ 333 void noteConfigRemoved(const ConfigKey& key); 334 /** 335 * Report a config has been reset when ttl expires. 336 */ 337 void noteConfigReset(const ConfigKey& key); 338 339 /** 340 * Report a broadcast has been sent to a config owner to collect the data. 341 */ 342 void noteBroadcastSent(const ConfigKey& key); 343 344 /** 345 * Report that a config has become activated or deactivated. 346 * This can be different from whether or not a broadcast is sent if the 347 * guardrail prevented the broadcast from being sent. 348 */ 349 void noteActiveStatusChanged(const ConfigKey& key, bool activate); 350 351 /** 352 * Report a config's metrics data has been dropped. 353 */ 354 void noteDataDropped(const ConfigKey& key, const size_t totalBytes); 355 356 /** 357 * Report metrics data report has been sent. 358 * 359 * The report may be requested via StatsManager API, or through adb cmd. 360 */ 361 void noteMetricsReportSent(const ConfigKey& key, const size_t numBytes, 362 const int32_t reportNumber); 363 364 /** 365 * Report failure in creating the device info metadata table for restricted configs. 366 */ 367 void noteDeviceInfoTableCreationFailed(const ConfigKey& key); 368 369 /** 370 * Report db corruption for restricted configs. 371 */ 372 void noteDbCorrupted(const ConfigKey& key); 373 374 /** 375 * Report db exceeded the size limit for restricted configs. 376 */ 377 void noteDbSizeExceeded(const ConfigKey& key); 378 379 /** 380 * Report db size check with stat for restricted configs failed. 381 */ 382 void noteDbStatFailed(const ConfigKey& key); 383 384 /** 385 * Report restricted config is invalid. 386 */ 387 void noteDbConfigInvalid(const ConfigKey& key); 388 389 /** 390 * Report db is too old for restricted configs. 391 */ 392 void noteDbTooOld(const ConfigKey& key); 393 394 /** 395 * Report db was deleted due to config removal. 396 */ 397 void noteDbDeletionConfigRemoved(const ConfigKey& key); 398 399 /** 400 * Report db was deleted due to config update. 401 */ 402 void noteDbDeletionConfigUpdated(const ConfigKey& key); 403 404 /** 405 * Reports that the promotion for ConfigMetadataProvider failed. 406 */ 407 void noteConfigMetadataProviderPromotionFailed(const ConfigKey& key); 408 409 /** 410 * Report the size of output tuple of a condition. 411 * 412 * Note: only report when the condition has an output dimension, and the tuple 413 * count > kDimensionKeySizeSoftLimit. 414 * 415 * [key]: The config key that this condition belongs to. 416 * [id]: The id of the condition. 417 * [size]: The output tuple size. 418 */ 419 void noteConditionDimensionSize(const ConfigKey& key, int64_t id, int size); 420 421 /** 422 * Report the size of output tuple of a metric. 423 * 424 * Note: only report when the metric has an output dimension, and the tuple 425 * count > kDimensionKeySizeSoftLimit. 426 * 427 * [key]: The config key that this metric belongs to. 428 * [id]: The id of the metric. 429 * [size]: The output tuple size. 430 */ 431 void noteMetricDimensionSize(const ConfigKey& key, int64_t id, int size); 432 433 /** 434 * Report the max size of output tuple of dimension in condition across dimensions in what. 435 * 436 * Note: only report when the metric has an output dimension in condition, and the max tuple 437 * count > kDimensionKeySizeSoftLimit. 438 * 439 * [key]: The config key that this metric belongs to. 440 * [id]: The id of the metric. 441 * [size]: The output tuple size. 442 */ 443 void noteMetricDimensionInConditionSize(const ConfigKey& key, int64_t id, int size); 444 445 /** 446 * Report a matcher has been matched. 447 * 448 * [key]: The config key that this matcher belongs to. 449 * [id]: The id of the matcher. 450 */ 451 void noteMatcherMatched(const ConfigKey& key, int64_t id); 452 453 /** 454 * Report that an anomaly detection alert has been declared. 455 * 456 * [key]: The config key that this alert belongs to. 457 * [id]: The id of the alert. 458 */ 459 void noteAnomalyDeclared(const ConfigKey& key, int64_t id); 460 461 /** 462 * Report an atom event has been logged. 463 */ 464 void noteAtomLogged(int atomId, int64_t eventTimestampNs, bool isSkipped); 465 466 /** 467 * Report that statsd modified the anomaly alarm registered with StatsCompanionService. 468 */ 469 void noteRegisteredAnomalyAlarmChanged(); 470 471 /** 472 * Report that statsd modified the periodic alarm registered with StatsCompanionService. 473 */ 474 void noteRegisteredPeriodicAlarmChanged(); 475 476 /** 477 * Records the number of delta entries that are being dropped from the uid map. 478 */ 479 void noteUidMapDropped(int deltas); 480 481 /** 482 * Records that an app was deleted (from statsd's map). 483 */ 484 void noteUidMapAppDeletionDropped(); 485 486 /** 487 * Updates the number of changes currently stored in the uid map. 488 */ 489 void setUidMapChanges(int changes); 490 void setCurrentUidMapMemory(int bytes); 491 492 /* 493 * Updates minimum interval between pulls for an pulled atom. 494 */ 495 void updateMinPullIntervalSec(int pullAtomId, long intervalSec); 496 497 /* 498 * Notes an atom is pulled. 499 */ 500 void notePull(int pullAtomId); 501 502 /* 503 * Notes an atom is served from puller cache. 504 */ 505 void notePullFromCache(int pullAtomId); 506 507 /* 508 * Notify data error for pulled atom. 509 */ 510 void notePullDataError(int pullAtomId); 511 512 /* 513 * Records time for actual pulling, not including those served from cache and not including 514 * statsd processing delays. 515 */ 516 void notePullTime(int pullAtomId, int64_t pullTimeNs); 517 518 /* 519 * Records pull delay for a pulled atom, including those served from cache and including statsd 520 * processing delays. 521 */ 522 void notePullDelay(int pullAtomId, int64_t pullDelayNs); 523 524 /* 525 * Records pull exceeds timeout for the puller. 526 */ 527 void notePullTimeout(int pullAtomId, int64_t pullUptimeMillis, int64_t pullElapsedMillis); 528 529 /* 530 * Records pull exceeds max delay for a metric. 531 */ 532 void notePullExceedMaxDelay(int pullAtomId); 533 534 /* 535 * Records when system server restarts. 536 */ 537 void noteSystemServerRestart(int32_t timeSec); 538 539 /** 540 * Records statsd skipped an event. 541 */ 542 void noteLogLost(int32_t wallClockTimeSec, int32_t count, int32_t lastError, 543 int32_t lastAtomTag, int32_t uid, int32_t pid); 544 545 /** 546 * Records that the pull of an atom has failed. Eg, if the client indicated the pull failed, if 547 * the pull timed out, or if the outgoing binder call failed. 548 * This count will only increment if the puller was actually invoked. 549 * 550 * It does not include a pull not occurring due to not finding the appropriate 551 * puller. These cases are covered in other counts. 552 */ 553 void notePullFailed(int atomId); 554 555 /** 556 * Records that the pull of an atom has failed due to not having a uid provider. 557 */ 558 void notePullUidProviderNotFound(int atomId); 559 560 /** 561 * Records that the pull of an atom has failed due not finding a puller registered by a 562 * trusted uid. 563 */ 564 void notePullerNotFound(int atomId); 565 566 /** 567 * Records that the pull has failed due to the outgoing binder call failing. 568 */ 569 void notePullBinderCallFailed(int atomId); 570 571 /** 572 * A pull with no data occurred 573 */ 574 void noteEmptyData(int atomId); 575 576 /** 577 * Records that a puller callback for the given atomId was registered or unregistered. 578 * 579 * @param registered True if the callback was registered, false if was unregistered. 580 */ 581 void notePullerCallbackRegistrationChanged(int atomId, bool registered); 582 583 /** 584 * Hard limit was reached in the cardinality of an atom 585 */ 586 void noteHardDimensionLimitReached(int64_t metricId); 587 588 /** 589 * A log event was too late, arrived in the wrong bucket and was skipped 590 */ 591 void noteLateLogEventSkipped(int64_t metricId); 592 593 /** 594 * Buckets were skipped as time elapsed without any data for them 595 */ 596 void noteSkippedForwardBuckets(int64_t metricId); 597 598 /** 599 * An unsupported value type was received 600 */ 601 void noteBadValueType(int64_t metricId); 602 603 /** 604 * Buckets were dropped due to reclaim memory. 605 */ 606 void noteBucketDropped(int64_t metricId); 607 608 /** 609 * A condition change was too late, arrived in the wrong bucket and was skipped 610 */ 611 void noteConditionChangeInNextBucket(int64_t metricId); 612 613 /** 614 * A bucket has been tagged as invalid. 615 */ 616 void noteInvalidatedBucket(int64_t metricId); 617 618 /** 619 * Tracks the total number of buckets (include skipped/invalid buckets). 620 */ 621 void noteBucketCount(int64_t metricId); 622 623 /** 624 * For pulls at bucket boundaries, it represents the misalignment between the real timestamp and 625 * the end of the bucket. 626 */ 627 void noteBucketBoundaryDelayNs(int64_t metricId, int64_t timeDelayNs); 628 629 /** 630 * Number of buckets with unknown condition. 631 */ 632 void noteBucketUnknownCondition(int64_t metricId); 633 634 /* Reports one event id has been dropped due to queue overflow, and the oldest event timestamp 635 * in the queue. There is an expectation that noteAtomLogged() is called for the same 636 * atomId 637 */ 638 void noteEventQueueOverflow(int64_t oldestEventTimestampNs, int32_t atomId); 639 640 /* Notes queue max size seen so far and associated timestamp */ 641 void noteEventQueueSize(int32_t size, int64_t eventTimestampNs); 642 643 /** 644 * Reports that the activation broadcast guardrail was hit for this uid. Namely, the broadcast 645 * should have been sent, but instead was skipped due to hitting the guardrail. 646 */ 647 void noteActivationBroadcastGuardrailHit(const int uid); 648 649 /** 650 * Reports that an atom is erroneous or cannot be parsed successfully by 651 * statsd. An atom tag of 0 indicates that the client did not supply the 652 * atom id within the encoding. 653 * 654 * For pushed atoms only, this call should be preceded by a call to 655 * noteAtomLogged. 656 */ 657 void noteAtomError(int atomTag, bool pull = false); 658 659 /** 660 * Increases counter associated with a CounterType. 661 */ 662 void noteIllegalState(CounterType error); 663 664 /** Report query of restricted metric succeed **/ 665 void noteQueryRestrictedMetricSucceed(const int64_t configId, const string& configPackage, 666 const std::optional<int32_t> configUid, 667 const int32_t callingUid, int64_t queryLatencyNs); 668 669 /** Report query of restricted metric failed **/ 670 void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage, 671 const std::optional<int32_t> configUid, 672 const int32_t callingUid, const InvalidQueryReason reason); 673 674 /** Report query of restricted metric failed along with an error string **/ 675 void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage, 676 const std::optional<int32_t> configUid, 677 const int32_t callingUid, const InvalidQueryReason reason, 678 const string& error); 679 680 // Reports that a restricted metric fails to be inserted to database. 681 void noteRestrictedMetricInsertError(const ConfigKey& configKey, int64_t metricId); 682 683 // Reports that a restricted metric fails to create table in database. 684 void noteRestrictedMetricTableCreationError(const ConfigKey& configKey, int64_t metricId); 685 686 // Reports that a restricted metric fails to delete table in database. 687 void noteRestrictedMetricTableDeletionError(const ConfigKey& configKey, int64_t metricId); 688 689 // Reports the time it takes for a restricted metric to flush the data to the database. 690 void noteRestrictedMetricFlushLatency(const ConfigKey& configKey, int64_t metricId, 691 const int64_t flushLatencyNs); 692 693 // Reports that a restricted metric had a category change. 694 void noteRestrictedMetricCategoryChanged(const ConfigKey& configKey, int64_t metricId); 695 696 // Reports the time is takes to flush a restricted config to the database. 697 void noteRestrictedConfigFlushLatency(const ConfigKey& configKey, 698 const int64_t totalFlushLatencyNs); 699 700 // Reports the size of the internal sqlite db. 701 void noteRestrictedConfigDbSize(const ConfigKey& configKey, int64_t elapsedTimeNs, 702 const int64_t dbSize); 703 704 /** 705 * Records libstatssocket was not able to write into socket. 706 */ 707 void noteAtomSocketLoss(const SocketLossInfo& lossInfo); 708 709 /** 710 * Report a new subscription has started and report the static stats about the subscription 711 * config. 712 * 713 * The static stats include: the count of pushed atoms and pulled atoms. 714 */ 715 void noteSubscriptionStarted(int subId, int32_t pushedAtomCount, int32_t pulledAtomCount); 716 717 /** 718 * Report an existing subscription has ended. 719 */ 720 void noteSubscriptionEnded(int subId); 721 722 /** 723 * Report an existing subscription was flushed. 724 */ 725 void noteSubscriptionFlushed(int subId); 726 727 /** 728 * Report an atom was pulled for a subscription. 729 */ 730 void noteSubscriptionAtomPulled(int atomId); 731 732 /** 733 * Report subscriber pull thread wakeup. 734 */ 735 void noteSubscriptionPullThreadWakeup(); 736 737 void noteBatchSocketRead(int32_t size, int64_t lastReadTimeNs, int64_t currReadTimeNs, 738 int64_t minAtomReadTimeNs, int64_t maxAtomReadTimeNs, 739 const std::unordered_map<int32_t, int32_t>& atomCounts); 740 741 /** 742 * Reset the historical stats. Including all stats in icebox, and the tracked stats about 743 * metrics, matchers, and atoms. The active configs will be kept and StatsdStats will continue 744 * to collect stats after reset() has been called. 745 */ 746 void reset(); 747 748 /** 749 * Output the stats in protobuf binary format to [buffer]. 750 * 751 * [reset]: whether to clear the historical stats after the call. 752 */ 753 void dumpStats(std::vector<uint8_t>* buffer, bool reset); 754 755 /** 756 * Output statsd stats in human readable format to [out] file descriptor. 757 */ 758 void dumpStats(int outFd) const; 759 760 /** 761 * Returns true if dimension guardrail has been hit since boot for given metric. 762 */ 763 bool hasHitDimensionGuardrail(int64_t metricId) const; 764 765 /** 766 * Return soft and hard atom key dimension size limits as an std::pair. 767 */ 768 static std::pair<size_t, size_t> getAtomDimensionKeySizeLimits(int atomId, 769 size_t defaultHardLimit); 770 clampDimensionKeySizeLimit(int dimLimit)771 inline static int clampDimensionKeySizeLimit(int dimLimit) { 772 return std::clamp(dimLimit, kDimensionKeySizeHardLimitMin, kDimensionKeySizeHardLimitMax); 773 } 774 775 /** 776 * Return the unique identifier for the statsd stats report. This id is 777 * reset on boot. 778 */ getStatsdStatsId()779 inline int32_t getStatsdStatsId() const { 780 return mStatsdStatsId; 781 } 782 783 /** 784 * Returns true if there is recorded event queue overflow 785 */ 786 bool hasEventQueueOverflow() const; 787 788 typedef std::unordered_map<int32_t, int32_t> QueueOverflowAtomsStatsMap; 789 QueueOverflowAtomsStatsMap getQueueOverflowAtomsStats() const; 790 791 /** 792 * Returns true if there is recorded socket loss 793 */ 794 bool hasSocketLoss() const; 795 796 typedef struct PullTimeoutMetadata { 797 int64_t pullTimeoutUptimeMillis; 798 int64_t pullTimeoutElapsedMillis; PullTimeoutMetadataPullTimeoutMetadata799 PullTimeoutMetadata(int64_t uptimeMillis, int64_t elapsedMillis) 800 : pullTimeoutUptimeMillis(uptimeMillis), 801 pullTimeoutElapsedMillis(elapsedMillis) { /* do nothing */ 802 } 803 } PullTimeoutMetadata; 804 805 typedef struct { 806 long totalPull = 0; 807 long totalPullFromCache = 0; 808 long minPullIntervalSec = LONG_MAX; 809 int64_t avgPullTimeNs = 0; 810 int64_t maxPullTimeNs = 0; 811 long numPullTime = 0; 812 int64_t avgPullDelayNs = 0; 813 int64_t maxPullDelayNs = 0; 814 long numPullDelay = 0; 815 long dataError = 0; 816 long pullTimeout = 0; 817 long pullExceedMaxDelay = 0; 818 long pullFailed = 0; 819 long pullUidProviderNotFound = 0; 820 long pullerNotFound = 0; 821 long emptyData = 0; 822 long registeredCount = 0; 823 long unregisteredCount = 0; 824 int32_t atomErrorCount = 0; 825 long binderCallFailCount = 0; 826 std::list<PullTimeoutMetadata> pullTimeoutMetadata; 827 int32_t subscriptionPullCount = 0; 828 } PulledAtomStats; 829 830 typedef struct { 831 long hardDimensionLimitReached = 0; 832 long lateLogEventSkipped = 0; 833 long skippedForwardBuckets = 0; 834 long badValueType = 0; 835 long conditionChangeInNextBucket = 0; 836 long invalidatedBucket = 0; 837 long bucketDropped = 0; 838 int64_t minBucketBoundaryDelayNs = 0; 839 int64_t maxBucketBoundaryDelayNs = 0; 840 long bucketUnknownCondition = 0; 841 long bucketCount = 0; 842 } AtomMetricStats; 843 844 private: 845 StatsdStats(); 846 847 mutable std::mutex mLock; 848 849 int32_t mStartTimeSec; 850 851 // Random id set using rand() during the initialization. Used to uniquely 852 // identify a session. This is more reliable than mStartTimeSec due to the 853 // unreliable nature of wall clock times. 854 const int32_t mStatsdStatsId; 855 856 // Track the number of dropped entries used by the uid map. 857 UidMapStats mUidMapStats; 858 859 // The stats about the configs that are still in use. 860 // The map size is capped by kMaxConfigCount. 861 std::map<const ConfigKey, std::shared_ptr<ConfigStats>> mConfigStats; 862 863 // Stores the stats for the configs that are no longer in use. 864 // The size of the vector is capped by kMaxIceBoxSize. 865 std::list<std::shared_ptr<ConfigStats>> mIceBox; 866 867 // Stores the number of times a pushed atom is logged and skipped (if skipped). 868 // The size of the vector is the largest pushed atom id in atoms.proto + 1. Atoms 869 // out of that range will be put in mNonPlatformPushedAtomStats. 870 // This is a vector, not a map because it will be accessed A LOT -- for each stats log. 871 struct PushedAtomStats { 872 int logCount = 0; 873 int skipCount = 0; 874 }; 875 876 std::vector<PushedAtomStats> mPushedAtomStats; 877 878 // Stores the number of times a pushed atom is logged and skipped for atom ids above 879 // kMaxPushedAtomId. The max size of the map is kMaxNonPlatformPushedAtoms. 880 std::unordered_map<int, PushedAtomStats> mNonPlatformPushedAtomStats; 881 882 // Stores the number of times a pushed atom is dropped due to queue overflow event. 883 // We do not expect it will happen too often so the map is preferable vs pre-allocated vector 884 // The max size of the map is kMaxPushedAtomId + kMaxNonPlatformPushedAtoms. 885 QueueOverflowAtomsStatsMap mPushedAtomDropsStats; 886 887 // Maps PullAtomId to its stats. The size is capped by the puller atom counts. 888 std::map<int, PulledAtomStats> mPulledAtomStats; 889 890 // Tracks counter associated with CounterType to represent errors. Max capacity == CounterType 891 std::map<CounterType, int32_t> mErrorStats; 892 893 // Stores the number of times a pushed atom was logged erroneously. The 894 // corresponding counts for pulled atoms are stored in PulledAtomStats. 895 // The max size of this map is kMaxPushedAtomErrorStatsSize. 896 std::map<int, int> mPushedAtomErrorStats; 897 898 // Stores the number of times a pushed atom was lost due to socket error. 899 // Represents counter per uid per tag per error with indication when the loss event was observed 900 // first & last time. 901 struct SocketLossStats { SocketLossStatsSocketLossStats902 SocketLossStats(int32_t uid, int64_t firstLossTsNanos, int64_t lastLossTsNanos) 903 : mUid(uid), mFirstLossTsNanos(firstLossTsNanos), mLastLossTsNanos(lastLossTsNanos) { 904 } 905 906 int32_t mUid; 907 int64_t mFirstLossTsNanos; 908 int64_t mLastLossTsNanos; 909 // atom loss count per error, atom id 910 struct AtomLossInfo { AtomLossInfoSocketLossStats::AtomLossInfo911 AtomLossInfo(int32_t atomId, int32_t error, int32_t count) 912 : mAtomId(atomId), mError(error), mCount(count) { 913 } 914 int mAtomId; 915 int mError; 916 int mCount; 917 }; 918 std::vector<AtomLossInfo> mLossCountPerErrorAtomId; 919 }; 920 // The max size of this list is kMaxSocketLossStatsSize. 921 std::list<SocketLossStats> mSocketLossStats; 922 923 // Stores the number of times a pushed atom loss info was dropped from the stats 924 // on libstatssocket side due to guardrail hit. 925 // Represents counter per uid. 926 // The max size of this map is kMaxSocketLossStatsSize. 927 std::map<int32_t, int32_t> mSocketLossStatsOverflowCounters; 928 929 // Maps metric ID to its stats. The size is capped by the number of metrics. 930 std::map<int64_t, AtomMetricStats> mAtomMetricStats; 931 932 // Maps uids to times when the activation changed broadcast not sent due to hitting the 933 // guardrail. The size is capped by the number of configs, and up to 20 times per uid. 934 std::map<int, std::list<int32_t>> mActivationBroadcastGuardrailStats; 935 936 struct LogLossStats { LogLossStatsLogLossStats937 LogLossStats(int32_t sec, int32_t count, int32_t error, int32_t tag, int32_t uid, 938 int32_t pid) 939 : mWallClockSec(sec), 940 mCount(count), 941 mLastError(error), 942 mLastTag(tag), 943 mUid(uid), 944 mPid(pid) { 945 } 946 int32_t mWallClockSec; 947 int32_t mCount; 948 // error code defined in linux/errno.h 949 int32_t mLastError; 950 int32_t mLastTag; 951 int32_t mUid; 952 int32_t mPid; 953 }; 954 955 // Max of {(now - oldestEventTimestamp) when overflow happens}. 956 // This number is helpful to understand how SLOW statsd can be. 957 int64_t mMaxQueueHistoryNs = 0; 958 959 // Min of {(now - oldestEventTimestamp) when overflow happens}. 960 // This number is helpful to understand how FAST the events floods to statsd. 961 int64_t mMinQueueHistoryNs = std::numeric_limits<int64_t>::max(); 962 963 // Total number of events that are lost due to queue overflow. 964 int32_t mOverflowCount = 0; 965 966 // Max number of events stored into the queue seen so far. 967 int32_t mEventQueueMaxSizeObserved = 0; 968 969 // Event timestamp for associated max size hit. 970 int64_t mEventQueueMaxSizeObservedElapsedNanos = 0; 971 972 // Timestamps when we detect log loss, and the number of logs lost. 973 std::list<LogLossStats> mLogLossStats; 974 975 LoggingRate mLoggingRateStats; 976 977 std::list<int32_t> mSystemServerRestartSec; 978 979 std::vector<int64_t> mSocketBatchReadHistogram; 980 981 // Stores stats about large socket batch reads 982 struct LargeBatchSocketReadStats { LargeBatchSocketReadStatsLargeBatchSocketReadStats983 LargeBatchSocketReadStats(int32_t size, int64_t lastReadTimeNs, int64_t currReadTimeNs, 984 int64_t minAtomReadTimeNs, int64_t maxAtomReadTimeNs, 985 const std::unordered_map<int32_t, int32_t>& atomCounts) 986 : mSize(size), 987 mLastReadTimeNs(lastReadTimeNs), 988 mCurrReadTimeNs(currReadTimeNs), 989 mMinAtomReadTimeNs(minAtomReadTimeNs), 990 mMaxAtomReadTimeNs(maxAtomReadTimeNs), 991 mCommonAtomCounts(atomCounts) { 992 } 993 994 int32_t mSize; 995 // The elapsed time of the previous and current read times. 996 int64_t mLastReadTimeNs; 997 int64_t mCurrReadTimeNs; 998 // The min and max times of the LogEvents processed in the batch 999 int64_t mMinAtomReadTimeNs; 1000 int64_t mMaxAtomReadTimeNs; 1001 // Map of atom id to count for atoms logged more than kMaxLargeBatchReadAtomThreshold times. 1002 std::unordered_map<int32_t, int32_t> mCommonAtomCounts; 1003 }; 1004 // The max size of this list is kMaxSocketLossStatsSize. 1005 std::list<LargeBatchSocketReadStats> mLargeBatchSocketReadStats; 1006 1007 struct RestrictedMetricQueryStats { RestrictedMetricQueryStatsRestrictedMetricQueryStats1008 RestrictedMetricQueryStats(int32_t callingUid, int64_t configId, 1009 const string& configPackage, std::optional<int32_t> configUid, 1010 int64_t queryTimeNs, 1011 std::optional<InvalidQueryReason> invalidQueryReason, 1012 const string& error, std::optional<int64_t> queryLatencyNs) 1013 : mCallingUid(callingUid), 1014 mConfigId(configId), 1015 mConfigPackage(configPackage), 1016 mConfigUid(configUid), 1017 mQueryWallTimeNs(queryTimeNs), 1018 mInvalidQueryReason(invalidQueryReason), 1019 mError(error), 1020 mQueryLatencyNs(queryLatencyNs) { 1021 mHasError = invalidQueryReason.has_value(); 1022 } 1023 int32_t mCallingUid; 1024 int64_t mConfigId; 1025 string mConfigPackage; 1026 std::optional<int32_t> mConfigUid; 1027 int64_t mQueryWallTimeNs; 1028 std::optional<InvalidQueryReason> mInvalidQueryReason; 1029 bool mHasError; 1030 string mError; 1031 std::optional<int64_t> mQueryLatencyNs; 1032 }; 1033 std::list<RestrictedMetricQueryStats> mRestrictedMetricQueryStats; 1034 1035 void noteQueryRestrictedMetricFailedLocked(const int64_t configId, const string& configPackage, 1036 const std::optional<int32_t> configUid, 1037 const int32_t callingUid, 1038 const InvalidQueryReason reason, 1039 const string& error); 1040 1041 int32_t mSubscriptionPullThreadWakeupCount = 0; 1042 1043 // Maps Subscription ID to the corresponding SubscriptionStats struct object. 1044 // Size of this map is capped by ShellSubscriber::kMaxSubscriptions. 1045 std::map<int32_t, SubscriptionStats> mSubscriptionStats; 1046 1047 // Stores the number of times statsd modified the anomaly alarm registered with 1048 // StatsCompanionService. 1049 int mAnomalyAlarmRegisteredStats = 0; 1050 1051 // Stores the number of times statsd registers the periodic alarm changes 1052 int mPeriodicAlarmRegisteredStats = 0; 1053 1054 void noteConfigResetInternalLocked(const ConfigKey& key); 1055 1056 void noteConfigRemovedInternalLocked(const ConfigKey& key); 1057 1058 void resetInternalLocked(); 1059 1060 void noteAtomLoggedLocked(int atomId, int64_t eventTimestampNs, bool isSkipped); 1061 1062 void noteAtomDroppedLocked(int atomId); 1063 1064 void noteDataDropped(const ConfigKey& key, const size_t totalBytes, int32_t timeSec); 1065 1066 void noteMetricsReportSent(const ConfigKey& key, const size_t numBytes, int32_t timeSec, 1067 const int32_t reportNumber); 1068 1069 void noteBroadcastSent(const ConfigKey& key, int32_t timeSec); 1070 1071 void noteActiveStatusChanged(const ConfigKey& key, bool activate, int32_t timeSec); 1072 1073 void noteActivationBroadcastGuardrailHit(const int uid, int32_t timeSec); 1074 1075 void addToIceBoxLocked(std::shared_ptr<ConfigStats>& stats); 1076 1077 int getPushedAtomErrorsLocked(int atomId) const; 1078 1079 int getPushedAtomDropsLocked(int atomId) const; 1080 1081 int getLoggingRateLocked(int atomId) const; 1082 1083 bool hasRestrictedConfigErrors(const std::shared_ptr<ConfigStats>& configStats) const; 1084 1085 /** 1086 * Get a reference to AtomMetricStats for a metric. If none exists, create it. The reference 1087 * will live as long as `this`. 1088 */ 1089 StatsdStats::AtomMetricStats& getAtomMetricStats(int64_t metricId); 1090 1091 FRIEND_TEST(LogEventQueue_test, TestQueueMaxSize); 1092 FRIEND_TEST(SocketParseMessageTest, TestProcessMessage); 1093 FRIEND_TEST(StatsLogProcessorTest, InvalidConfigRemoved); 1094 FRIEND_TEST(StatsPullerManagerTest, TestOnAlarmFiredNoPullerForUidNotesPullerNotFound); 1095 FRIEND_TEST(StatsPullerManagerTest, TestOnAlarmFiredNoUidProviderUpdatesNextPullTime); 1096 FRIEND_TEST(StatsPullerManagerTest, TestOnAlarmFiredUidsNotRegisteredInPullAtomCallback); 1097 FRIEND_TEST(StatsdStatsTest, TestActivationBroadcastGuardrailHit); 1098 FRIEND_TEST(StatsdStatsTest, TestAnomalyMonitor); 1099 FRIEND_TEST(StatsdStatsTest, TestAtomDroppedStats); 1100 FRIEND_TEST(StatsdStatsTest, TestAtomErrorStats); 1101 FRIEND_TEST(StatsdStatsTest, TestAtomLog); 1102 FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedAndSkippedStats); 1103 FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedStats); 1104 FRIEND_TEST(StatsdStatsTest, TestAtomMetricsStats); 1105 FRIEND_TEST(StatsdStatsTest, TestAtomSkippedStats); 1106 FRIEND_TEST(StatsdStatsTest, TestConfigMetadataProviderPromotionFailed); 1107 FRIEND_TEST(StatsdStatsTest, TestConfigRemove); 1108 FRIEND_TEST(StatsdStatsTest, TestHasHitDimensionGuardrail); 1109 FRIEND_TEST(StatsdStatsTest, TestInvalidConfigAdd); 1110 FRIEND_TEST(StatsdStatsTest, TestInvalidConfigMissingMetricId); 1111 FRIEND_TEST(StatsdStatsTest, TestInvalidConfigOnlyMetricId); 1112 FRIEND_TEST(StatsdStatsTest, TestNonPlatformAtomLog); 1113 FRIEND_TEST(StatsdStatsTest, TestPullAtomStats); 1114 FRIEND_TEST(StatsdStatsTest, TestQueueStats); 1115 FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsQueryStats); 1116 FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsStats); 1117 FRIEND_TEST(StatsdStatsTest, TestShardOffsetProvider); 1118 FRIEND_TEST(StatsdStatsTest, TestSocketLossStats); 1119 FRIEND_TEST(StatsdStatsTest, TestSocketLossStatsOverflowCounter); 1120 FRIEND_TEST(StatsdStatsTest, TestSubStats); 1121 FRIEND_TEST(StatsdStatsTest, TestSubscriptionAtomPulled); 1122 FRIEND_TEST(StatsdStatsTest, TestSubscriptionEnded); 1123 FRIEND_TEST(StatsdStatsTest, TestSubscriptionFlushed); 1124 FRIEND_TEST(StatsdStatsTest, TestSubscriptionPullThreadWakeup); 1125 FRIEND_TEST(StatsdStatsTest, TestSubscriptionStarted); 1126 FRIEND_TEST(StatsdStatsTest, TestSubscriptionStartedMaxActiveSubscriptions); 1127 FRIEND_TEST(StatsdStatsTest, TestSubscriptionStartedRemoveFinishedSubscription); 1128 FRIEND_TEST(StatsdStatsTest, TestSystemServerCrash); 1129 FRIEND_TEST(StatsdStatsTest, TestTimestampThreshold); 1130 FRIEND_TEST(StatsdStatsTest, TestValidConfigAdd); 1131 FRIEND_TEST(StatsdStatsTest, TestSocketBatchReadStats); 1132 FRIEND_TEST(StatsdStatsTest, TestErrorStatsReport); 1133 FRIEND_TEST(StatsdStatsTest, TestErrorStatsReportReset); 1134 FRIEND_TEST(StatsdStatsTest, TestLoggingRateReport); 1135 FRIEND_TEST(StatsdStatsTest, TestLoggingRateReportOnlyTopN); 1136 FRIEND_TEST(StatsdStatsTest, TestLoggingRateReportReset); 1137 }; 1138 1139 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason, 1140 const int64_t matcherId); 1141 1142 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason, 1143 const int64_t metricId, 1144 const int64_t matcherId); 1145 1146 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason, 1147 const int64_t conditionId); 1148 1149 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason, 1150 const int64_t metricId, 1151 const int64_t conditionId); 1152 1153 InvalidConfigReason createInvalidConfigReasonWithState(const InvalidConfigReasonEnum reason, 1154 const int64_t metricId, 1155 const int64_t stateId); 1156 1157 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason, 1158 const int64_t alertId); 1159 1160 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason, 1161 const int64_t metricId, 1162 const int64_t alertId); 1163 1164 InvalidConfigReason createInvalidConfigReasonWithAlarm(const InvalidConfigReasonEnum reason, 1165 const int64_t alarmId); 1166 1167 InvalidConfigReason createInvalidConfigReasonWithSubscription(const InvalidConfigReasonEnum reason, 1168 const int64_t subscriptionId); 1169 1170 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlarm( 1171 const InvalidConfigReasonEnum reason, int64_t subscriptionId, int64_t alarmId); 1172 1173 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlert( 1174 const InvalidConfigReasonEnum reason, int64_t subscriptionId, int64_t alertId); 1175 1176 } // namespace statsd 1177 } // namespace os 1178 } // namespace android 1179