1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14syntax = "proto3"; 15 16package cobalt; 17 18import "cobalt/proto/window_size.proto"; 19 20option java_multiple_files = true; 21option java_package = "com.google.cobalt"; 22 23//////////////////////////////////////////////////////////////////////////////// 24// NOTE: This file is used by the Cobalt client and the Cobalt servers. 25// The source-of-truth of this file is located in Cobalt's open source code 26// repository, and the file is copied to Android where it is used by the Cobalt 27// client. Do not edit the copy of this file in this Android repo as those edits 28// will be overwritten when the file is next copied. 29//////////////////////////////////////////////////////////////////////////////// 30 31// A Report analyzes Events that were logged to Cobalt and emits an aggregated 32// output that may then be queried or visualized by an analyst user of Cobalt. 33// 34// A Report is associated with a Metric and this means that the Report analyzes 35// the Events that were logged to that Metric. The first step occurs on a 36// Fuchsia device where Cobalt analyzes the logged Events in order to form 37// Observations. 38// 39// An Observation is built for a particular Report. The type of observation, 40// including which of several privacy-oriented Encodings is used or not, depends 41// on the Report type. 42// 43// The Observations are sent to the Cobalt Shuffler which shuffles them in order 44// to break linkability between Observations and linkability with the 45// originating device. Next the shuffled Observations are sent to the Analyzer 46// which aggregates Observations from all Fuchsia devices in order to generate 47// a report. 48// 49// There are multiple types of Metrics and multiple types of Reports. Each 50// Report type is compatible with only some of the Metric types. 51// 52// A ReportDefinition defines a Cobalt Report to be generated. 53// An instance of ReportDefinition is always associated with an instance of 54// MetricDefinition called the owning MetricDefinition. 55// Next ID: 33 56message ReportDefinition { 57 reserved 4, 5, 6, 7, 8, 11, 14, 15, 16, 12, 101, 102, 31, 21; 58 reserved "aggregation_type", "aggregation_window", "candidate_lis", 59 "dp_release_config", "expected_population_size", 60 "expected_string_set_size", "export_location_override", 61 "local_privacy_noise_level", "output_location", "percentiles", 62 "threshold", "window_size", "use_poisson_mechanism_for_privacy", 63 "prob_bit_flip"; 64 65 // Unique name for this Report within its owning MetricDefinition. 66 // The name must obey the syntax of a C variable name and must have length 67 // at most 64. The integer |id| field is the stable identifier for a report 68 // so this name may be changed. However doing this may affect the 69 // names and locations of some artifacts produced by Cobalt's report 70 // generation pipeline. 71 string report_name = 1; 72 73 // The unique integer ID for this report within its owning metric. 74 // The user must manually set this |id| field. This is the stable identifier 75 // for a report and should not be changed once data collection begins. 76 uint32 id = 2; 77 78 // A Report has one of the following types. 79 // Next standard report type ID: 22 80 enum ReportType { 81 reserved 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 9999; 82 reserved "CUSTOM_RAW_DUMP", "EVENT_COMPONENT_OCCURRENCE_COUNT", 83 "HIGH_FREQUENCY_STRING_COUNTS", "INT_RANGE_HISTOGRAM", 84 "NUMERIC_AGGREGATION", "NUMERIC_PERF_RAW_DUMP", "PER_DEVICE_HISTOGRAM", 85 "PER_DEVICE_NUMERIC_STATS", "SIMPLE_OCCURRENCE_COUNT", 86 "STRING_COUNTS_WITH_THRESHOLD", "STRING_HISTOGRAMS", 87 "UNIQUE_N_DAY_ACTIVES"; 88 89 REPORT_TYPE_UNSET = 0; 90 91 // For each system_profile SP and each event_vector EV, produces the total 92 // count of all occurrences on all devices in the fleet with system profile 93 // SP of the event associated with EV over the course of the report day. 94 // For example, a report of this type might give the total number of times 95 // a medium, red widget was used across the fleet yesterday. 96 // 97 // Input metric types: OCCURRENCE 98 // 99 // Local aggregation: COUNT 100 // Local aggregation period: 1 hour 101 // Global aggregation: OCCURRENCE_COUNTS 102 // System Profile Selection Policy: REPORT_ALL 103 // 104 // Output report row type: OccurrenceCountReportRow 105 // (See report_row.proto) 106 // 107 // ReportDefinition fields particular to this type: 108 // none 109 FLEETWIDE_OCCURRENCE_COUNTS = 11; 110 111 // For each system_profile SP and each event_vector EV, produces the count 112 // of the number of unique devices with system profile SP for which EV 113 // “is accepted” during the aggregation period, which must be DAYS_1, 114 // DAYS_7, DAYS_28 or DAYS_30. 115 // 116 // There are different versions of what “is accepted” means depending on 117 // which local aggregation procedure is specified: 118 // 119 // AT_LEAST_ONCE. In this case EV is accepted if EV was logged at least once 120 // during the aggregation period. For example, a report of this type might 121 // give the total number of devices with system profile SP on which a 122 // medium, red widget was used at least once in the seven-day period 123 // ending yesterday. 124 // 125 // SELECT_FIRST, SELECT_MOST_COMMON. In this case EV is accepted if the 126 // category selection procedure selected EV. For example, a report of this 127 // type using SELECT_MOST_COMMON might give the total number of devices 128 // with system profile SP on which most of the widgets used during the 129 // seven-day period ending yesterday were medium-red. 130 // 131 // NOTE: Using a local aggregation procedure of AT_LEAST_ONCE or 132 // SELECT_FIRST, in combination with setting expedited_sending, results in 133 // the count being sent by the device when the event occurs (instead of at 134 // the end of the day). This can be desirable for having data for the 135 // current day appear faster in the reports output by Cobalt. 136 // 137 // Input metric types: OCCURRENCE 138 // 139 // Local aggregation: AT_LEAST_ONCE, SELECT_FIRST, or SELECT_MOST_COMMON 140 // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 141 // Global aggregation: OCCURRENCE_COUNTS 142 // 143 // Output report row type: OccurrenceCountReportRow 144 // (See report_row.proto) 145 // 146 // ReportDefinition fields particular to this type: 147 // - local_aggregation_procedure 148 // - local_aggregation_period 149 // - expedited_sending 150 // - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain 151 // uniqueness, REPORT_ALL may be useful in some cases) 152 UNIQUE_DEVICE_COUNTS = 12; 153 154 // For each system_profile SP and each event_vector EV, produces an 155 // int-range histogram such that in each int range bucket it gives the 156 // number of unique devices with system_profile SP for which an integer 157 // value, aggregated locally on device over the aggregation period, 158 // associated with EV, falls into the bucket. 159 // 160 // There are two versions of this depending on the metric type: 161 // 162 // With metrics of type OCCURRENCE the integer values are occurrence counts. 163 // For example, for the integer bucket 10-100, a report of this type might 164 // give the number of devices with system profile SP on which a medium, 165 // red widget was used between 10 and 100 times in the seven-day period 166 // ending yesterday. 167 // 168 // With metrics of type INTEGER the integer values are computed statistics. 169 // For example, for the integer bucket 10-100, a report of this type that 170 // specifies the MINIMUM local aggregation procedure might give the number 171 // of devices with system profile SP on which the minimum temperature of a 172 // medium red widget over the seven-day period ending yesterday was between 173 // 10 and 100 degrees. 174 // 175 // Input metric types: OCCURRENCE or INTEGER 176 // 177 // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or 178 // NUMERIC_STAT (used with INTEGER metrics) 179 // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 180 // Global aggregation: INTEGER_HISTOGRAMS 181 // 182 // Output report row type: IntegerHistogramReportRow 183 // (See report_row.proto) 184 // 185 // ReportDefinition fields particular to this type: 186 // - local_aggregation_procedure (only when the metric type is INTEGER) 187 // - local_aggregation_period 188 // - int_buckets (this is used only on the server for reports without 189 // added privacy, but is used on the client for reports with added 190 // privacy) 191 // - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain 192 // uniqueness, REPORT_ALL may be useful in some cases) 193 UNIQUE_DEVICE_HISTOGRAMS = 13; 194 195 // For each system_profile SP and each event_vector EV, produces an 196 // int-range histogram such that in each int range bucket it gives the 197 // number of values, associated with EV, from devices 198 // with system_profile SP, that fall into the bucket, where each device 199 // computes one such value per hour. 200 // 201 // Computationally this report type is identical to 202 // UNQIQUE_DEVICE_HISTOGRAMS except that the local aggregation period 203 // used is one hour and so the counts in each buckets are not interpreted 204 // as a number of unique devices. 205 // 206 // There are two versions of this depending on the metric type: 207 // 208 // With metrics of type OCCURRENCE the integer values are occurrence counts. 209 // For example, for the integer bucket 10-100, a report of this type might 210 // give the number of times that the hourly count of medium red widgets 211 // used was between 10 and 100 over devices with system profile SP, 212 // yesterday. 213 // 214 // With metrics of type INTEGER the integer values are computed statistics. 215 // For example, for the integer bucket 10-100, a report of this that 216 // specifies the MINIMUM local aggregation procedure might give the number 217 // of times that the minimum temperature over an hour of all medium red 218 // widgets used was between 10 and 100 degrees over all devices with 219 // system profile SP, yesterday. 220 // 221 // Input metric types: OCCURRENCE or INTEGER 222 // 223 // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or 224 // NUMERIC_STAT (used with INTEGER metrics) 225 // Local aggregation period: one hour 226 // Global aggregation: INTEGER_HISTOGRAMS 227 // 228 // Output report row type: IntegerHistogramReportRow 229 // (See report_row.proto) 230 // 231 // ReportDefinition fields particular to this type: 232 // - local_aggregation_procedure (only when the metric type is INTEGER) 233 // - int_buckets (this is used only on the server for reports without 234 // added privacy, but is used on the client for reports with added 235 // privacy) 236 // - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain 237 // uniqueness, REPORT_ALL may be useful in some cases) 238 HOURLY_VALUE_HISTOGRAMS = 14; 239 240 // For each system_profile SP and each event_vector EV, produces an 241 // int-range histogram such that in each int range bucket it gives the 242 // number of integer measurements, associated with EV, logged on devices 243 // with system_profile SP, that fall into the bucket. Here we are counting 244 // each value logged by the instrumented code individually and so the rate 245 // at which values are being recorded is arbitrary and varies from device 246 // to device. For example, for the integer bucket 10-100, a report of this 247 // type might give the number of times that a medium red widget's 248 // temperature was measured as being between 10 and 100 degrees over all 249 // devices with system profile SP, yesterday. The rate at which these 250 // widget temperature measurements are taken is arbitrary and may vary 251 // from device to device. 252 // 253 // Input metric types: INTEGER or INTEGER_HISTOGRAM 254 // 255 // Local aggregation: INTEGER_HISTOGRAM 256 // Local aggregation period: one hour 257 // Global aggregation: INTEGER_HISTOGRAMS 258 // System Profile Selection Policy: REPORT_ALL 259 // 260 // Output report row type: IntegerHistogramReportRow 261 // (See report_row.proto) 262 // 263 // ReportDefinition fields particular to this type: 264 // - int_buckets (Only with metric_type = INTEGER) 265 FLEETWIDE_HISTOGRAMS = 15; 266 267 // For each system_profile SP and each event_vector EV, produces the sum 268 // and count of many integer measurements associated with EV, logged on 269 // devices with system_profile SP. Here we are counting each value logged 270 // by the instrumented code individually and so the rate at which values are 271 // being recorded is arbitrary and varies from device to device. This allows 272 // us to produce a fleetwide mean. For example, a report of this type might 273 // give the mean of all temperature measurements of medium-red widgets 274 // yesterday, across all devices with system profile SP, regardless of how 275 // many temperature measurements were taken on each device individually. 276 // 277 // Input metric types: INTEGER 278 // 279 // Local aggregation: SUM_AND_COUNT 280 // Local aggregation period: one hour 281 // Global aggregation: SUM_AND_COUNTS 282 // System Profile Selection Policy: REPORT_ALL 283 // 284 // Output report row type: SumAndCountReportRow 285 // (See report_row.proto) 286 // 287 // ReportDefinition fields particular to this type: 288 // none 289 FLEETWIDE_MEANS = 16; 290 291 // For each system_profile SP and each event_vector EV, produces several 292 // numeric statistics (e.g. 95%-ile) over a set of integers associated 293 // with EV, collected from all devices with system_profile SP. Each unique 294 // device contributes a single value and so the distribution of the values 295 // may be thought of as a distribution of unique devices. 296 // 297 // There are different versions of this depending on the metric type: 298 // 299 // With metrics of type OCCURRENCE the integer values are occurrence counts 300 // over the course of the aggregation period. For example a report of this 301 // type might give the 95%-ile of the counts of medium-red widgets used by 302 // each device over the 7-day period ending yesterday. 303 // 304 // With metrics of type INTEGER the integer values are computed statistics. 305 // For example, a report of this type that specifies the MINIMUM local 306 // aggregation procedure might give the 95%-ile of the minimum temperature 307 // over the 7-day period ending yesterday of all medium-red widgets over 308 // all devices with system profile SP. 309 // 310 // Input metric types: OCCURRENCE or INTEGER 311 // 312 // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or 313 // NUMERIC_STAT (used with INTEGER metrics) 314 // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 315 // Global aggregation: NUMERIC_STATS 316 // System Profile Selection Policy: REPORT_ALL 317 // 318 // Output report row type: NumericStatsReportRow 319 // (See report_row.proto) 320 // 321 // ReportDefinition fields particular to this type: 322 // - local_aggregation_procedure (only when the metric type is INTEGER) 323 // - local_aggregation_period 324 UNIQUE_DEVICE_NUMERIC_STATS = 17; 325 326 // For each system_profile SP and each event_vector EV, produces several 327 // numeric statistics (e.g. 95%-ile) over a set of integers associated 328 // with EV, collected from all devices with system_profile SP. Each unique 329 // device contributes a value every hour and so the distribution of the 330 // values may NOT be thought of as a distribution of unique devices. 331 // 332 // Computationally this report type is identical to 333 // UNIQUE_DEVICE_NUMERIC_STATS except that the local aggregation period 334 // used is one hour. 335 // 336 // There are different versions of this depending on the metric type: 337 // 338 // With metrics of type OCCURRENCE the integer values are occurrence counts 339 // over the course of the hour. For example a report of this 340 // type might give the 95%-ile of the counts of medium-red widgets used in 341 // any one hour period on any device with System profile SP, yesterday. 342 // 343 // With metrics of type INTEGER the integer values are computed statistics. 344 // For example, a report of this type that specifies the MINIMUM local 345 // aggregation procedure might give the 95%-ile of the minimum temperature 346 // over any one-hour period of medium-red widgets use on any device 347 // with system profile SP, yesterday. 348 // 349 // Input metric types: OCCURRENCE or INTEGER 350 // 351 // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or 352 // NUMERIC_STAT (used with INTEGER metrics) 353 // Local aggregation period: 1 hour 354 // Global aggregation: NUMERIC_STATS 355 // System Profile Selection Policy: REPORT_ALL 356 // 357 // Output report row type: NumericStatsReportRow 358 // (See report_row.proto) 359 // 360 // ReportDefinition fields particular to this type: 361 // - local_aggregation_procedure (only when the metric type is INTEGER) 362 HOURLY_VALUE_NUMERIC_STATS = 18; 363 364 // For each system_profile SP and each event_vector EV, produces the total 365 // count of all occurrences of a string value on all devices in the fleet 366 // with system profile SP of the event associated with EV over the course 367 // of the report day. 368 // 369 // Input metric types: STRING 370 // 371 // Local aggregation: STRING_HISTOGRAM 372 // Local aggregation period: 1 hour 373 // Global aggregation: STRING_HISTOGRAMS 374 // System Profile Selection Policy: REPORT_ALL 375 // 376 // Output report row type: StringCountReportRow 377 // (See report_row.proto) 378 // 379 // ReportDefinition fields particular to this type: 380 // - candidate_file 381 // - string_buffer_max 382 STRING_COUNTS = 20; 383 384 // For each system_profile SP, each event_vector EV, and each string value 385 // produces the count of the number of unique devices with system profile 386 // SP on which the string value was logged in connection with the EV during 387 // the aggregation period, which must be DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 388 // 389 // This is similar to the AT_LEAST_ONCE local aggregation procedure for 390 // UNIQUE_DEVICE_COUNTS. For example, a report of this type might 391 // give the total number of devices with system profile SP on which a 392 // medium, red widget was used in conjunction with the component name 393 // "widget-consumer" at least once in the seven-day period ending 394 // yesterday. 395 // 396 // Input metric types: STRING 397 // 398 // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 399 // Global aggregation: STRING_HISTOGRAMS 400 // 401 // Output report row type: StringCountReportRow 402 // (See report_row.proto) 403 // 404 // ReportDefinition fields particular to this type: 405 // - candidate_file 406 // - local_aggregation_period 407 // - string_buffer_max 408 // - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain 409 // uniqueness, REPORT_ALL may be useful in some cases) 410 UNIQUE_DEVICE_STRING_COUNTS = 21; 411 } 412 ReportType report_type = 3; 413 414 //////////////// Fields for reports with privacy enabled ///////////////// 415 416 // The level of differential privacy applied to the report. Each level 417 // corresponds to an epsilon value in the shuffled model. The mapping 418 // from enum values to epsilon values is hard-coded in makePrivacyConstants() 419 // in the file //src/bin/config_parser/src/privacy/privacy_encoding_params.go 420 enum PrivacyLevel { 421 PRIVACY_LEVEL_UNKNOWN = 0; 422 423 // epsilon = infinity 424 NO_ADDED_PRIVACY = 1; 425 426 LOW_PRIVACY = 2; 427 428 MEDIUM_PRIVACY = 3; 429 430 HIGH_PRIVACY = 4; 431 } 432 433 // This field is used to specify the privacy level for a Cobalt report. 434 // All Cobalt report types support differential privacy and are required 435 // to set this field (use NO_ADDED_PRIVACY to disable differential privacy). 436 PrivacyLevel privacy_level = 20; 437 438 // The mean number of observations added per index point when performing the 439 // Poisson mechanism encoding for Cobalt reports. Should be set if and only if 440 // `privacy_level` is not NO_ADDED_PRIVACY. 441 // 442 // In the future, the value of this field will be computed by the registry 443 // parser as a function of other privacy-related fields and an estimate of the 444 // user population size. For now, it should be set manually in the Cobalt 445 // registry in consultation with the Cobalt team. 446 // 447 // TODO(b/278932979): update this comment once the field is populated by 448 // the registry parser. 449 double poisson_mean = 30; 450 451 // When reporting numerical values with privacy, the values are mapped to 452 // indices from 0 to num_index_points-1 with a randomized rounding method. 453 // 454 // In the future, the value of this field will be computed by the registry 455 // parser as a function of other privacy-related fields and an estimate of the 456 // user population size. For now, it should be set manually in the Cobalt 457 // registry in consultation with the Cobalt team. 458 // 459 // TODO(b/278932979): update this comment once the field is populated by 460 // the registry parser. 461 uint32 num_index_points = 22; 462 463 // When reporting strings with privacy, the strings are counted using a linear 464 // sketch. 465 // 466 // In the future, the value of this field will be computed by the registry 467 // parser as a function of other privacy-related fields and an estimate of the 468 // user population size. For now, it should be set manually in the Cobalt 469 // registry in consultation with the Cobalt team. 470 // 471 // TODO(b/278932979): update this comment once the field is populated by 472 // the registry parser. 473 StringSketchParameters string_sketch_params = 27; 474 475 // These fields specify the range of values that can be reported by a device 476 // in the specified local_aggregation_period. If the true value to be reported 477 // falls outside specified range, the value is clipped. 478 // 479 // For FLEETWIDE_OCCURRENCE_COUNTS, UNIQUE_DEVICE_NUMERIC_STATS and 480 // HOURLY_VALUE_NUMERIC_STATS, the range applies to the total numerical value 481 // computed for the device over the aggregation period specified in the 482 // report. 483 // 484 // For FLEETWIDE_MEANS, the range applies to the per-device sum of the value 485 // to be averaged over one hour. (For FLEETWIDE_MEANS, the `max_count` field 486 // is also required in order to bound the `count` value.) 487 // 488 // If a privacy_level other than NO_ADDED_PRIVACY is specified, this field is 489 // required for reports of type: 490 // * FLEETWIDE_OCCURRENCE_COUNTS 491 // * UNIQUE_DEVICE_NUMERIC_STATS 492 // * HOURLY_VALUE_NUMERIC_STATS 493 // * FLEETWIDE_MEANS 494 int64 min_value = 23; 495 int64 max_value = 24; 496 497 // This field specifies the maximum count to be reported by a device in the 498 // specified local_aggregation_period. If the true count is greater than 499 // max_count, then the count will be reported as max_count. 500 // 501 // For FLEETWIDE_HISTOGRAMS, the bound applies to the count for each 502 // individual histogram bucket over the aggregation period of one hour. For 503 // STRING_COUNTS, it applies to the count for each string over one hour. 504 // 505 // For FLEETWIDE_MEANS, the bound applies to the per-device count of the 506 // values to be averaged over one hour. 507 // 508 // If a privacy_level other than NO_ADDED_PRIVACY is specified, this field is 509 // required for reports of type: 510 // * FLEETWIDE_HISTOGRAMS 511 // * FLEETWIDE_MEANS 512 // * STRING_COUNTS 513 uint64 max_count = 25; 514 515 //////////////// Fields specific to some report types ///////////////// 516 517 // Simple name or full path to file containing known string values. 518 // 519 // This field is used only for reports of type STRING. 520 string candidate_file = 9; 521 522 // A specification of integer-range buckets for a histogram. 523 // 524 // This field is for reports of type UNIQUE_DEVICE_HISTOGRAMS, 525 // HOURLY_VALUE_HISTOGRAMS, and FLEETWIDE_HISTOGRAMS -- but for 526 // FLEETWIDE_HISTOGRAMS only with metrics of type INTEGER, not with metrics of 527 // type INTEGER_HISTOGRAM, because in that case the MetricDefinition already 528 // contains an instance of IntegerBuckets. 529 IntegerBuckets int_buckets = 10; 530 531 // The interval with which clients will generate and upload observations. 532 enum ReportingInterval { 533 REPORTING_INTERVAL_UNSET = 0; 534 HOURS_1 = 1; 535 DAYS_1 = 2; 536 } 537 538 // This field is optional for FLEETWIDE_OCCURRENCE_COUNTS reports, and is only 539 // supported by some client platforms. If not set, the reporting interval 540 // defaults to 1 hour for FLEETWIDE_OCCURRENCE_COUNTS reports. 541 ReportingInterval reporting_interval = 32; 542 543 // This field can be used with all Report types. When set, the generated 544 // report will exclude an Observation if there are not at least 545 // |reporting_threshold| number of distinct devices reporting Observations 546 // with the same ObservationMetadata. 547 uint32 reporting_threshold = 13; 548 549 // The on-device function computed on the metric during the aggregation 550 // window. 551 enum LocalAggregationProcedure { 552 LOCAL_AGGREGATION_PROCEDURE_UNSET = 0; 553 554 // Numerical statistic aggregation procedures to be used with reports 555 // of type UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS, 556 // UNIQUE_DEVICE_NUMERIC_STATS and HOURLY_VALUE_NUMERIC_STATS. 557 // TODO(fxbug.dev/87151): Rename these to remove the '_PROCEDURE' suffix. 558 SUM_PROCEDURE = 1; 559 MIN_PROCEDURE = 2; 560 MAX_PROCEDURE = 3; 561 MEAN = 4; 562 MEDIAN = 5; 563 // The value of N is set in the field 564 // |local_aggregation_procedure_percentile_n|. 565 PERCENTILE_N = 6; 566 567 // Logical aggregation procedures to be used with reports of type 568 // UNIQUE_DEVICE_COUNTS 569 AT_LEAST_ONCE = 7; 570 SELECT_FIRST = 8; 571 SELECT_MOST_COMMON = 9; 572 } 573 574 // This field is required for reports of type 575 // UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS, 576 // UNIQUE_DEVICE_NUMERIC_STATS, HOURLY_VALUE_NUMERIC_STATS 577 // and UNIQUE_DEVICE_COUNTS. Different report types support 578 // different values of this field. See the comments on the 579 // enum values in LocalAggregationProcedure. 580 LocalAggregationProcedure local_aggregation_procedure = 17; 581 582 // This field is required when 583 // local_aggregation_procedure = LOCAL_AGGREGATION_PROCEDURE_PERCENTILE_N. 584 // In this case it gives the value of N to use. Otherwise this field is 585 // ignored. 586 uint32 local_aggregation_procedure_percentile_n = 18; 587 588 // Time window over which the metric is aggregated. The local aggregation 589 // period is specified for UNIQUE_DEVICE_* report types. 590 WindowSize local_aggregation_period = 19; 591 592 // The maximum number of distinct event vectors for which an instance of the 593 // Cobalt client should produce an observation, for a given local aggregation 594 // period. Event vectors are prioritized in order of first arrival during the 595 // aggregation period. 596 // 597 // For example, if a report has an event_vector_buffer_max of 10, and 12 598 // distinct event vectors are logged for this metric over an aggregation 599 // period, then Cobalt will send observations of the first 10 event vectors 600 // for that aggregation period and drop the last 2. 601 // 602 // If this field is unset, the registry parser assigns to it the total number 603 // of event vectors for the report's parent metric (i.e., the product over all 604 // metric dimensions of the number of event codes per dimension). 605 // 606 // The report's project will be charged against a resource budget for this 607 // value so project owners are encouraged to set this as small as possible. 608 // For example, the report's parent metric may include a dimension with 609 // thousands of event codes, but it is expected that any one device will log 610 // only a few distinct event vectors per day. In that case we may set 611 // event_vector_buffer_max to a relatively small number, say 20. For reports 612 // which use differential privacy, setting event_vector_buffer_max to a 613 // smaller number will improve the signal for event vectors which are included 614 // in observations. 615 uint64 event_vector_buffer_max = 26; 616 617 // The maximum number of distinct strings that Cobalt must keep in its 618 // in-memory buffer on any single device. During local aggregation for reports 619 // of type STRING_COUNTS and UNIQUE_DEVICE_STRING_COUNTS, Cobalt will keep 620 // track of this many distinct strings per aggregation period. The report's 621 // project will be charged against a resource budget for this value so project 622 // owners are encouraged to set this as small as possible. A STRING metric 623 // includes a file of candidate strings that may contain many thousands of 624 // strings. But it is expected that any one device will log only a few of 625 // these strings per day. We may set string_buffer_max to a relatively small 626 // number, say 20. 627 // 628 // This is a required field for reports of type STRING_COUNTS and 629 // UNIQUE_DEVICE_STRING_COUNTS. 630 uint32 string_buffer_max = 28; 631 632 // For reports of type UNIQUE_DEVICE_COUNTS, send observations as soon as the 633 // event occurs, instead of waiting for the end of the day. 634 // 635 // This can only be enabled when using a local aggregation procedure of 636 // AT_LEAST_ONCE or SELECT_FIRST, and when the privacy level is 637 // NO_ADDED_PRIVACY. When used with a system_profile_selection of REPORT_ALL 638 // or SELECT_FIRST, enabling this is recommended as Cobalt will send the count 639 // for the current day when the event occurs instead of at the end of the day. 640 // For a system_profile_selection of SELECT_LAST, this may also be desirable, 641 // though it may result in a slight change in the current day's system profile 642 // that is used, as Cobalt won't wait until the end of the day to determine 643 // the final system profile, but will instead send the count immediately with 644 // the system profile that is currently active on the device. 645 bool expedited_sending = 29; 646 647 /////////////////// Fields used by all report types /////////////////// 648 // Next id: 106 649 650 // The list of SystemProfileFields to include in each row of the report. 651 // Optional. 652 repeated SystemProfileField system_profile_field = 100; 653 654 // The list of Experiments to include in each row of the report. 655 // 656 // Each report row lists the intersection of the experiment ids active on the 657 // device and experiment ids specified in this field. 658 // 659 // The specified experiment ids must be found in one of the project's 660 // experiments_namespaces. 661 repeated int64 experiment_id = 104; 662 663 // This field is required for reports of type UNIQUE_DEVICE_COUNTS, 664 // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and 665 // HOURLY_VALUE_HISTOGRAMS. The value for these reports must be SELECT_LAST, 666 // SELECT_FIRST, or occasionally REPORT_ALL. 667 // 668 // If the system profile value changed during the aggregation window specified 669 // for this report, system_profile_selection specifies which system profile to 670 // report for each device. 671 SystemProfileSelectionPolicy system_profile_selection = 103; 672 673 // Maximum ReleaseStage for which this Report is allowed to be collected. 674 ReleaseStage max_release_stage = 105; 675} 676 677// A specification for SystemProfile selection policy. 678enum SystemProfileSelectionPolicy { 679 // Use the default value. For reports of type FLEETWIDE_OCCURRENCE_COUNTS, 680 // FLEETWIDE_HISTOGRAMS, FLEETWIDE_MEANS, UNIQUE_DEVICE_NUMERIC_STATS, 681 // HOURLY_VALUE_NUMERIC_STATS, and STRING_COUNTS this will resolve to 682 // 'REPORT_ALL' and should not be changed. For all other report types, 683 // SELECT_DEFAULT must not be used. 684 SELECT_DEFAULT = 0; 685 686 // Always report the last SystemProfile seen in the aggregation window. This 687 // will be the last SystemProfile seen *at the time of an event* in the 688 // aggregation window. 689 SELECT_LAST = 1; 690 691 // Always report the first SystemProfile seen in the aggregation window. This 692 // will be the first SystemProfile seen *at the time of an event* in the 693 // aggregation window. 694 SELECT_FIRST = 2; 695 696 // Report all system profiles in the aggregation window. For most report 697 // types, this is the most sensible value to use. For reports that depend on 698 // some concept of uniqueness (such as UNIQUE_DEVICE_COUNTS, 699 // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and 700 // HOURLY_VALUE_HISTOGRAMS) this may not be the best choice, since it will no 701 // longer be the case that a single device will only upload one observation 702 // per time period (It will upload one observation per time period *per unique 703 // system_profile*). 704 REPORT_ALL = 3; 705} 706 707// A specification of a field from SystemProfile. These are used in a 708// ReportDefinition to specify which fields should be included in the generated 709// Observations and reports. 710// 711// For a description of the meaning of each field, see the fields in the 712// SystemProfile in: cobalt/proto/common.proto 713enum SystemProfileField { 714 OS = 0; 715 ARCH = 1; 716 BOARD_NAME = 2; 717 PRODUCT_NAME = 3; 718 SYSTEM_VERSION = 4; 719 APP_VERSION = 10; 720 CHANNEL = 5; 721 BUILD_TYPE = 7; 722 EXPERIMENT_IDS = 9; 723 reserved 6, 8; 724 reserved "REALM", "EXPERIMENT_TOKENS"; 725} 726 727// Stages in the release cycle of a component. Each Cobalt customer determines 728// its current ReleaseStage when initializing the CobaltService. Each Metric 729// and Report can declare the maximum ReleaseStage for which it is allowed to 730// be collected. For example a DEBUG Metric will not be collected from a device 731// running a FISHFOOD release. 732enum ReleaseStage { 733 RELEASE_STAGE_NOT_SET = 0; 734 735 // A test build. Also called "eng". Only use this value when the device is 736 // running test builds as all metrics/reports will be collected. 737 DEBUG = 10; 738 // Small, internal prototype. Used for testing a new feature internally, 739 // usually within the team or a small group. 740 FISHFOOD = 20; 741 // An internal release for testing with internal users. 742 DOGFOOD = 40; 743 // An open beta, for testing with internal and external users. 744 OPEN_BETA = 60; 745 746 // Generally-available. The final stage of a release. Also called 747 // "production". If unsure of which release stage the device is running, it 748 // is safest to fallback to this value (which is the default if no value is 749 // set), to avoid inadvertently collecting metric/report data. 750 GA = 99; 751} 752 753// ExponentialIntegerBuckets is used to define a partition of the integers into 754// a finite number of exponentially increasing buckets. 755// 756// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1. 757// 758// The bucket boundaries are: 759// a[0] = floor 760// a[1] = floor + initial_step 761// a[2] = floor + initial_step * step_multiplier 762// a[3] = floor + initial_step * step_multiplier ^ 2 763// a[4] = floor + initial_step * step_multiplier ^ 3 764// and in general, for i = 1, 2, 3 ... n 765// a[i] = floor + initial_step * step_multiplier ^ (i-1) 766// 767// Then, the buckets are defined as follows: 768// Bucket 0 is the underflow bucket: (-infinity, floor) 769// Bucket i for 0 < i < n+1: [a[i-1], a[i]) 770// Bucket n+1 is the overflow bucket: [a[n], +infinity) 771// 772// Examples: 773// floor = 0 774// num_buckets = 3 775// initial_step = 10 776// step_multiplier = 10 777// Then, the buckets are: 778// (-infinity, 0), [0, 10), [10, 100), [100, 1000), [1000, +infinity) 779// 780// floor = 0 781// num_buckets = 3 782// initial_step = 2 783// step_multiplier = 2 784// Then, the buckets are: 785// (-infinity, 0), [0, 2), [2, 4), [4, 8), [8, +infinity) 786// 787// floor = 10 788// num_buckets = 3 789// initial_step = 2 790// step_multiplier = 2 791// Then, the buckets are: 792// (-infinity, 10), [10, 12), [12, 14), [14, 18), [18, +infinity) 793// 794// floor = 0 795// num_buckets = 3 796// initial_step = 100 797// step_multiplier = 10 798// Then, the buckets are: 799// (-infinity, 0), [0, 100), [100, 1000), [1000, 10000), [10000, +infinity) 800// 801message ExponentialIntegerBuckets { 802 int64 floor = 1; 803 804 // num_buckets must be at least 1. 805 uint32 num_buckets = 2; 806 807 // Must be at least one. 808 uint32 initial_step = 3; 809 810 // Must be at least one. 811 uint32 step_multiplier = 4; 812} 813 814// LinearIntegerBuckets is used to define a partition of the integers into a 815// finite number of buckets of equal size. 816// 817// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1. 818// Bucket 0 is the underflow bucket: (-infinity, floor) 819// Bucket n+1 is the overflow bucket: [lower + step_size * n, +infinity) 820// 821// For i = 1 to n, the bucket i is defined as 822// [floor + step_size * (i-1), floor + step_size * i) 823// 824// Example: floor = 0, num_buckets = 3, step_size = 10. 825// (-infinity, 0), [0, 10), [10, 20), [20, 30), [30, +inifinity) 826message LinearIntegerBuckets { 827 int64 floor = 1; 828 829 // Must be at least one. 830 uint32 num_buckets = 2; 831 832 // Must be at least one. 833 uint32 step_size = 3; 834} 835 836message IntegerBuckets { 837 oneof buckets { 838 ExponentialIntegerBuckets exponential = 1; 839 LinearIntegerBuckets linear = 2; 840 } 841 842 // If set to true, empty buckets will not be added to the report data such 843 // that all histograms contain a row for every bucket. Buckets with a zero 844 // count may still occur if data is logged that contains a zero count. This 845 // field can not be set on reports with added privacy. 846 bool sparse_output = 3; 847} 848 849message StringSketchParameters { 850 // Number of hashes in Count-Min Sketch. 851 int32 num_hashes = 1; 852 853 // Number of cells per hash in Count-Min Sketch. 854 int32 num_cells_per_hash = 2; 855} 856