• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14syntax = "proto3";
15
16package cobalt;
17
18import "cobalt/proto/window_size.proto";
19
20option java_multiple_files = true;
21option java_package = "com.google.cobalt";
22
23////////////////////////////////////////////////////////////////////////////////
24// NOTE: This file is used by the Cobalt client and the Cobalt servers.
25// The source-of-truth of this file is located in Cobalt's open source code
26// repository, and the file is copied to Android where it is used by the Cobalt
27// client. Do not edit the copy of this file in this Android repo as those edits
28// will be overwritten when the file is next copied.
29////////////////////////////////////////////////////////////////////////////////
30
31// A Report analyzes Events that were logged to Cobalt and emits an aggregated
32// output that may then be queried or visualized by an analyst user of Cobalt.
33//
34// A Report is associated with a Metric and this means that the Report analyzes
35// the Events that were logged to that Metric. The first step occurs on a
36// Fuchsia device where Cobalt analyzes the logged Events in order to form
37// Observations.
38//
39// An Observation is built for a particular Report. The type of observation,
40// including which of several privacy-oriented Encodings is used or not, depends
41// on the Report type.
42//
43// The Observations are sent to the Cobalt Shuffler which shuffles them in order
44// to break linkability between Observations and linkability with the
45// originating device. Next the shuffled Observations are sent to the Analyzer
46// which aggregates Observations from all Fuchsia devices in order to generate
47// a report.
48//
49// There are multiple types of Metrics and multiple types of Reports. Each
50// Report type is compatible with only some of the Metric types.
51//
52// A ReportDefinition defines a Cobalt Report to be generated.
53// An instance of ReportDefinition is always associated with an instance of
54// MetricDefinition called the owning MetricDefinition.
55// Next ID: 33
56message ReportDefinition {
57  reserved 4, 5, 6, 7, 8, 11, 14, 15, 16, 12, 101, 102, 31, 21;
58  reserved "aggregation_type", "aggregation_window", "candidate_lis",
59      "dp_release_config", "expected_population_size",
60      "expected_string_set_size", "export_location_override",
61      "local_privacy_noise_level", "output_location", "percentiles",
62      "threshold", "window_size", "use_poisson_mechanism_for_privacy",
63      "prob_bit_flip";
64
65  // Unique name for this Report within its owning MetricDefinition.
66  // The name must obey the syntax of a C variable name and must have length
67  // at most 64. The integer |id| field is the stable identifier for a report
68  // so this name may be changed. However doing this may affect the
69  // names and locations of some artifacts produced by Cobalt's report
70  // generation pipeline.
71  string report_name = 1;
72
73  // The unique integer ID for this report within its owning metric.
74  // The user must manually set this |id| field. This is the stable identifier
75  // for a report and should not be changed once data collection begins.
76  uint32 id = 2;
77
78  // A Report has one of the following types.
79  // Next standard report type ID: 22
80  enum ReportType {
81    reserved 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 9999;
82    reserved "CUSTOM_RAW_DUMP", "EVENT_COMPONENT_OCCURRENCE_COUNT",
83        "HIGH_FREQUENCY_STRING_COUNTS", "INT_RANGE_HISTOGRAM",
84        "NUMERIC_AGGREGATION", "NUMERIC_PERF_RAW_DUMP", "PER_DEVICE_HISTOGRAM",
85        "PER_DEVICE_NUMERIC_STATS", "SIMPLE_OCCURRENCE_COUNT",
86        "STRING_COUNTS_WITH_THRESHOLD", "STRING_HISTOGRAMS",
87        "UNIQUE_N_DAY_ACTIVES";
88
89    REPORT_TYPE_UNSET = 0;
90
91    // For each system_profile SP and each event_vector EV, produces the total
92    // count of all occurrences on all devices in the fleet with system profile
93    // SP of the event associated with EV over the course of the report day.
94    // For example, a report of this type might give the total number of times
95    // a medium, red widget was used across the fleet yesterday.
96    //
97    // Input metric types: OCCURRENCE
98    //
99    // Local aggregation: COUNT
100    // Local aggregation period: 1 hour
101    // Global aggregation: OCCURRENCE_COUNTS
102    // System Profile Selection Policy: REPORT_ALL
103    //
104    // Output report row type: OccurrenceCountReportRow
105    // (See report_row.proto)
106    //
107    // ReportDefinition fields particular to this type:
108    //    none
109    FLEETWIDE_OCCURRENCE_COUNTS = 11;
110
111    // For each system_profile SP and each event_vector EV, produces the count
112    // of the number of unique devices with system profile SP for which EV
113    // “is accepted” during the aggregation period, which must be DAYS_1,
114    // DAYS_7, DAYS_28 or DAYS_30.
115    //
116    // There are different versions of what “is accepted” means depending on
117    // which local aggregation procedure is specified:
118    //
119    // AT_LEAST_ONCE. In this case EV is accepted if EV was logged at least once
120    // during the aggregation period. For example, a report of this type might
121    // give the total number of devices with system profile SP on which a
122    // medium, red widget was used at least once in the seven-day period
123    // ending yesterday.
124    //
125    // SELECT_FIRST, SELECT_MOST_COMMON. In this case EV is accepted if the
126    // category selection procedure selected EV. For example, a report of this
127    // type using SELECT_MOST_COMMON might give the total number of devices
128    // with system profile SP on which most of the widgets used during the
129    // seven-day period ending yesterday were medium-red.
130    //
131    // NOTE: Using a local aggregation procedure of AT_LEAST_ONCE or
132    // SELECT_FIRST, in combination with setting expedited_sending, results in
133    // the count being sent by the device when the event occurs (instead of at
134    // the end of the day). This can be desirable for having data for the
135    // current day appear faster in the reports output by Cobalt.
136    //
137    // Input metric types: OCCURRENCE
138    //
139    // Local aggregation: AT_LEAST_ONCE, SELECT_FIRST, or SELECT_MOST_COMMON
140    // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
141    // Global aggregation: OCCURRENCE_COUNTS
142    //
143    // Output report row type: OccurrenceCountReportRow
144    // (See report_row.proto)
145    //
146    // ReportDefinition fields particular to this type:
147    //   - local_aggregation_procedure
148    //   - local_aggregation_period
149    //   - expedited_sending
150    //   - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
151    //     uniqueness, REPORT_ALL may be useful in some cases)
152    UNIQUE_DEVICE_COUNTS = 12;
153
154    // For each system_profile SP and each event_vector EV, produces an
155    // int-range histogram such that in each int range bucket it gives the
156    // number of unique devices with system_profile SP for which an integer
157    // value, aggregated locally on device over the aggregation period,
158    // associated with EV, falls into the bucket.
159    //
160    // There are two versions of this depending on the metric type:
161    //
162    // With metrics of type OCCURRENCE the integer values are occurrence counts.
163    // For example, for the integer bucket 10-100, a report of this type might
164    // give the number of devices with system profile SP on which a medium,
165    // red widget was used between 10 and 100 times in the seven-day period
166    // ending yesterday.
167    //
168    // With metrics of type INTEGER the integer values are computed statistics.
169    // For example, for the integer bucket 10-100, a report of this type that
170    // specifies the MINIMUM local aggregation procedure might give the number
171    // of devices with system profile SP on which the minimum temperature of a
172    // medium red widget over the seven-day period ending yesterday was between
173    // 10 and 100 degrees.
174    //
175    // Input metric types: OCCURRENCE or INTEGER
176    //
177    // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or
178    //                    NUMERIC_STAT (used with INTEGER metrics)
179    // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
180    // Global aggregation: INTEGER_HISTOGRAMS
181    //
182    // Output report row type: IntegerHistogramReportRow
183    // (See report_row.proto)
184    //
185    // ReportDefinition fields particular to this type:
186    //   - local_aggregation_procedure (only when the metric type is INTEGER)
187    //   - local_aggregation_period
188    //   - int_buckets (this is used only on the server for reports without
189    //     added privacy, but is used on the client for reports with added
190    //     privacy)
191    //   - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
192    //     uniqueness, REPORT_ALL may be useful in some cases)
193    UNIQUE_DEVICE_HISTOGRAMS = 13;
194
195    // For each system_profile SP and each event_vector EV, produces an
196    // int-range histogram such that in each int range bucket it gives the
197    // number of values, associated with EV, from devices
198    // with system_profile SP, that fall into the bucket, where each device
199    // computes one such value per hour.
200    //
201    // Computationally this report type is identical to
202    // UNQIQUE_DEVICE_HISTOGRAMS except that the local aggregation period
203    // used is one hour and so the counts in each buckets are not interpreted
204    // as a number of unique devices.
205    //
206    // There are two versions of this depending on the metric type:
207    //
208    // With metrics of type OCCURRENCE the integer values are occurrence counts.
209    // For example, for the integer bucket 10-100, a report of this type might
210    // give the number of times that the hourly count of medium red widgets
211    // used was between 10 and 100 over devices with system profile SP,
212    // yesterday.
213    //
214    // With metrics of type INTEGER the integer values are computed statistics.
215    // For example, for the integer bucket 10-100, a report of this that
216    // specifies the MINIMUM local aggregation procedure might give the number
217    // of times that the minimum temperature over an hour of all medium red
218    // widgets used was between 10 and 100 degrees over all devices with
219    // system profile SP, yesterday.
220    //
221    // Input metric types: OCCURRENCE or INTEGER
222    //
223    // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or
224    //                    NUMERIC_STAT (used with INTEGER metrics)
225    // Local aggregation period: one hour
226    // Global aggregation: INTEGER_HISTOGRAMS
227    //
228    // Output report row type: IntegerHistogramReportRow
229    // (See report_row.proto)
230    //
231    // ReportDefinition fields particular to this type:
232    //   - local_aggregation_procedure (only when the metric type is INTEGER)
233    //   - int_buckets (this is used only on the server for reports without
234    //     added privacy, but is used on the client for reports with added
235    //     privacy)
236    //   - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
237    //     uniqueness, REPORT_ALL may be useful in some cases)
238    HOURLY_VALUE_HISTOGRAMS = 14;
239
240    // For each system_profile SP and each event_vector EV, produces an
241    // int-range histogram such that in each int range bucket it gives the
242    // number of integer measurements, associated with EV, logged on devices
243    // with system_profile SP, that fall into the bucket. Here we are counting
244    // each value logged by the instrumented code individually and so the rate
245    // at which values are being recorded is arbitrary and varies from device
246    // to device. For example, for the integer bucket 10-100, a report of this
247    // type might give the number of times that a medium red widget's
248    // temperature was measured as being between 10 and 100 degrees over all
249    // devices with system profile SP, yesterday. The rate at which these
250    // widget temperature measurements are taken is arbitrary and may vary
251    // from device to device.
252    //
253    // Input metric types: INTEGER or INTEGER_HISTOGRAM
254    //
255    // Local aggregation: INTEGER_HISTOGRAM
256    // Local aggregation period: one hour
257    // Global aggregation: INTEGER_HISTOGRAMS
258    // System Profile Selection Policy: REPORT_ALL
259    //
260    // Output report row type: IntegerHistogramReportRow
261    // (See report_row.proto)
262    //
263    // ReportDefinition fields particular to this type:
264    //   - int_buckets (Only with metric_type = INTEGER)
265    FLEETWIDE_HISTOGRAMS = 15;
266
267    // For each system_profile SP and each event_vector EV, produces the sum
268    // and count of many integer measurements associated with EV, logged on
269    // devices with system_profile SP. Here we are counting each value logged
270    // by the instrumented code individually and so the rate at which values are
271    // being recorded is arbitrary and varies from device to device. This allows
272    // us to  produce a fleetwide mean. For example, a report of this type might
273    // give the mean of all temperature measurements of medium-red widgets
274    // yesterday, across all devices with system profile SP, regardless of how
275    // many temperature measurements were taken on each device individually.
276    //
277    // Input metric types: INTEGER
278    //
279    // Local aggregation: SUM_AND_COUNT
280    // Local aggregation period: one hour
281    // Global aggregation: SUM_AND_COUNTS
282    // System Profile Selection Policy: REPORT_ALL
283    //
284    // Output report row type: SumAndCountReportRow
285    // (See report_row.proto)
286    //
287    // ReportDefinition fields particular to this type:
288    //   none
289    FLEETWIDE_MEANS = 16;
290
291    // For each system_profile SP and each event_vector EV, produces several
292    // numeric statistics (e.g. 95%-ile) over a set of integers associated
293    // with EV, collected from all devices with system_profile SP. Each unique
294    // device contributes a single value and so the distribution of the values
295    // may be thought of as a distribution of unique devices.
296    //
297    // There are different versions of this depending on the metric type:
298    //
299    // With metrics of type OCCURRENCE the integer values are occurrence counts
300    // over the course of the aggregation period. For example a report of this
301    // type might give the 95%-ile of the counts of medium-red widgets used by
302    // each device over the 7-day period ending yesterday.
303    //
304    // With metrics of type INTEGER the integer values are computed statistics.
305    // For example, a report of this type that specifies the MINIMUM local
306    // aggregation procedure might give the 95%-ile of the minimum temperature
307    // over the 7-day period ending yesterday of all medium-red widgets over
308    // all devices with system profile SP.
309    //
310    // Input metric types: OCCURRENCE or INTEGER
311    //
312    // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or
313    //                    NUMERIC_STAT (used with INTEGER metrics)
314    // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
315    // Global aggregation: NUMERIC_STATS
316    // System Profile Selection Policy: REPORT_ALL
317    //
318    // Output report row type: NumericStatsReportRow
319    // (See report_row.proto)
320    //
321    // ReportDefinition fields particular to this type:
322    //   - local_aggregation_procedure (only when the metric type is INTEGER)
323    //   - local_aggregation_period
324    UNIQUE_DEVICE_NUMERIC_STATS = 17;
325
326    // For each system_profile SP and each event_vector EV, produces several
327    // numeric statistics (e.g. 95%-ile) over a set of integers associated
328    // with EV, collected from all devices with system_profile SP. Each unique
329    // device contributes a value every hour and so the distribution of the
330    // values may NOT be thought of as a distribution of unique devices.
331    //
332    // Computationally this report type is identical to
333    // UNIQUE_DEVICE_NUMERIC_STATS except that the local aggregation period
334    // used is one hour.
335    //
336    // There are different versions of this depending on the metric type:
337    //
338    // With metrics of type OCCURRENCE the integer values are occurrence counts
339    // over the course of the hour. For example a report of this
340    // type might give the 95%-ile of the counts of medium-red widgets used in
341    // any one hour period on any device with System profile SP, yesterday.
342    //
343    // With metrics of type INTEGER the integer values are computed statistics.
344    // For example, a report of this type that specifies the MINIMUM local
345    // aggregation procedure might give the 95%-ile of the minimum temperature
346    // over any one-hour period of medium-red widgets use on any device
347    // with system profile SP, yesterday.
348    //
349    // Input metric types: OCCURRENCE or INTEGER
350    //
351    // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or
352    //                    NUMERIC_STAT (used with INTEGER metrics)
353    // Local aggregation period: 1 hour
354    // Global aggregation: NUMERIC_STATS
355    // System Profile Selection Policy: REPORT_ALL
356    //
357    // Output report row type: NumericStatsReportRow
358    // (See report_row.proto)
359    //
360    // ReportDefinition fields particular to this type:
361    //   - local_aggregation_procedure (only when the metric type is INTEGER)
362    HOURLY_VALUE_NUMERIC_STATS = 18;
363
364    // For each system_profile SP and each event_vector EV, produces the total
365    // count of all occurrences of a string value on all devices in the fleet
366    // with system profile SP of the event associated with EV over the course
367    // of the report day.
368    //
369    // Input metric types: STRING
370    //
371    // Local aggregation: STRING_HISTOGRAM
372    // Local aggregation period: 1 hour
373    // Global aggregation: STRING_HISTOGRAMS
374    // System Profile Selection Policy: REPORT_ALL
375    //
376    // Output report row type: StringCountReportRow
377    // (See report_row.proto)
378    //
379    // ReportDefinition fields particular to this type:
380    //   - candidate_file
381    //   - string_buffer_max
382    STRING_COUNTS = 20;
383
384    // For each system_profile SP, each event_vector EV, and each string value
385    // produces the count of the number of unique devices with system profile
386    // SP on which the string value was logged in connection with the EV during
387    // the aggregation period, which must be DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
388    //
389    // This is similar to the AT_LEAST_ONCE local aggregation procedure for
390    // UNIQUE_DEVICE_COUNTS. For example, a report of this type might
391    // give the total number of devices with system profile SP on which a
392    // medium, red widget was used in conjunction with the component name
393    // "widget-consumer" at least once in the seven-day period ending
394    // yesterday.
395    //
396    // Input metric types: STRING
397    //
398    // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
399    // Global aggregation: STRING_HISTOGRAMS
400    //
401    // Output report row type: StringCountReportRow
402    // (See report_row.proto)
403    //
404    // ReportDefinition fields particular to this type:
405    //   - candidate_file
406    //   - local_aggregation_period
407    //   - string_buffer_max
408    //   - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
409    //     uniqueness, REPORT_ALL may be useful in some cases)
410    UNIQUE_DEVICE_STRING_COUNTS = 21;
411  }
412  ReportType report_type = 3;
413
414  ////////////////  Fields for reports with privacy enabled  /////////////////
415
416  // The level of differential privacy applied to the report. Each level
417  // corresponds to an epsilon value in the shuffled model. The mapping
418  // from enum values to epsilon values is hard-coded in makePrivacyConstants()
419  // in the file //src/bin/config_parser/src/privacy/privacy_encoding_params.go
420  enum PrivacyLevel {
421    PRIVACY_LEVEL_UNKNOWN = 0;
422
423    // epsilon = infinity
424    NO_ADDED_PRIVACY = 1;
425
426    LOW_PRIVACY = 2;
427
428    MEDIUM_PRIVACY = 3;
429
430    HIGH_PRIVACY = 4;
431  }
432
433  // This field is used to specify the privacy level for a Cobalt report.
434  // All Cobalt report types support differential privacy and are required
435  // to set this field (use NO_ADDED_PRIVACY to disable differential privacy).
436  PrivacyLevel privacy_level = 20;
437
438  // The mean number of observations added per index point when performing the
439  // Poisson mechanism encoding for Cobalt reports. Should be set if and only if
440  // `privacy_level` is not NO_ADDED_PRIVACY.
441  //
442  // In the future, the value of this field will be computed by the registry
443  // parser as a function of other privacy-related fields and an estimate of the
444  // user population size. For now, it should be set manually in the Cobalt
445  // registry in consultation with the Cobalt team.
446  //
447  // TODO(b/278932979): update this comment once the field is populated by
448  // the registry parser.
449  double poisson_mean = 30;
450
451  // When reporting numerical values with privacy, the values are mapped to
452  // indices from 0 to num_index_points-1 with a randomized rounding method.
453  //
454  // In the future, the value of this field will be computed by the registry
455  // parser as a function of other privacy-related fields and an estimate of the
456  // user population size. For now, it should be set manually in the Cobalt
457  // registry in consultation with the Cobalt team.
458  //
459  // TODO(b/278932979): update this comment once the field is populated by
460  // the registry parser.
461  uint32 num_index_points = 22;
462
463  // When reporting strings with privacy, the strings are counted using a linear
464  // sketch.
465  //
466  // In the future, the value of this field will be computed by the registry
467  // parser as a function of other privacy-related fields and an estimate of the
468  // user population size. For now, it should be set manually in the Cobalt
469  // registry in consultation with the Cobalt team.
470  //
471  // TODO(b/278932979): update this comment once the field is populated by
472  // the registry parser.
473  StringSketchParameters string_sketch_params = 27;
474
475  // These fields specify the range of values that can be reported by a device
476  // in the specified local_aggregation_period. If the true value to be reported
477  // falls outside specified range, the value is clipped.
478  //
479  // For FLEETWIDE_OCCURRENCE_COUNTS, UNIQUE_DEVICE_NUMERIC_STATS and
480  // HOURLY_VALUE_NUMERIC_STATS, the range applies to the total numerical value
481  // computed for the device over the aggregation period specified in the
482  // report.
483  //
484  // For FLEETWIDE_MEANS, the range applies to the per-device sum of the value
485  // to be averaged over one hour. (For FLEETWIDE_MEANS, the `max_count` field
486  // is also required in order to bound the `count` value.)
487  //
488  // If a privacy_level other than NO_ADDED_PRIVACY is specified, this field is
489  // required for reports of type:
490  // * FLEETWIDE_OCCURRENCE_COUNTS
491  // * UNIQUE_DEVICE_NUMERIC_STATS
492  // * HOURLY_VALUE_NUMERIC_STATS
493  // * FLEETWIDE_MEANS
494  int64 min_value = 23;
495  int64 max_value = 24;
496
497  // This field specifies the maximum count to be reported by a device in the
498  // specified local_aggregation_period. If the true count is greater than
499  // max_count, then the count will be reported as max_count.
500  //
501  // For FLEETWIDE_HISTOGRAMS, the bound applies to the count for each
502  // individual histogram bucket over the aggregation period of one hour. For
503  // STRING_COUNTS, it applies to the count for each string over one hour.
504  //
505  // For FLEETWIDE_MEANS, the bound applies to the per-device count of the
506  // values to be averaged over one hour.
507  //
508  // If a privacy_level other than NO_ADDED_PRIVACY is specified, this field is
509  // required for reports of type:
510  // * FLEETWIDE_HISTOGRAMS
511  // * FLEETWIDE_MEANS
512  // * STRING_COUNTS
513  uint64 max_count = 25;
514
515  ////////////////  Fields specific to some report types /////////////////
516
517  // Simple name or full path to file containing known string values.
518  //
519  // This field is used only for reports of type STRING.
520  string candidate_file = 9;
521
522  // A specification of integer-range buckets for a histogram.
523  //
524  // This field is for reports of type UNIQUE_DEVICE_HISTOGRAMS,
525  // HOURLY_VALUE_HISTOGRAMS, and FLEETWIDE_HISTOGRAMS -- but for
526  // FLEETWIDE_HISTOGRAMS only with metrics of type INTEGER, not with metrics of
527  // type INTEGER_HISTOGRAM, because in that case the MetricDefinition already
528  // contains an instance of IntegerBuckets.
529  IntegerBuckets int_buckets = 10;
530
531  // The interval with which clients will generate and upload observations.
532  enum ReportingInterval {
533    REPORTING_INTERVAL_UNSET = 0;
534    HOURS_1 = 1;
535    DAYS_1 = 2;
536  }
537
538  // This field is optional for FLEETWIDE_OCCURRENCE_COUNTS reports, and is only
539  // supported by some client platforms. If not set, the reporting interval
540  // defaults to 1 hour for FLEETWIDE_OCCURRENCE_COUNTS reports.
541  ReportingInterval reporting_interval = 32;
542
543  // This field can be used with all Report types. When set, the generated
544  // report will exclude an Observation if there are not at least
545  // |reporting_threshold| number of distinct devices reporting Observations
546  // with the same ObservationMetadata.
547  uint32 reporting_threshold = 13;
548
549  // The on-device function computed on the metric during the aggregation
550  // window.
551  enum LocalAggregationProcedure {
552    LOCAL_AGGREGATION_PROCEDURE_UNSET = 0;
553
554    // Numerical statistic aggregation procedures to be used with reports
555    // of type UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS,
556    // UNIQUE_DEVICE_NUMERIC_STATS and HOURLY_VALUE_NUMERIC_STATS.
557    // TODO(fxbug.dev/87151): Rename these to remove the '_PROCEDURE' suffix.
558    SUM_PROCEDURE = 1;
559    MIN_PROCEDURE = 2;
560    MAX_PROCEDURE = 3;
561    MEAN = 4;
562    MEDIAN = 5;
563    // The value of N is set in the field
564    // |local_aggregation_procedure_percentile_n|.
565    PERCENTILE_N = 6;
566
567    // Logical aggregation procedures to be used with reports of type
568    // UNIQUE_DEVICE_COUNTS
569    AT_LEAST_ONCE = 7;
570    SELECT_FIRST = 8;
571    SELECT_MOST_COMMON = 9;
572  }
573
574  // This field is required for reports of type
575  // UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS,
576  // UNIQUE_DEVICE_NUMERIC_STATS, HOURLY_VALUE_NUMERIC_STATS
577  // and UNIQUE_DEVICE_COUNTS. Different report types support
578  // different values of this field. See the comments on the
579  // enum values in LocalAggregationProcedure.
580  LocalAggregationProcedure local_aggregation_procedure = 17;
581
582  // This field is required when
583  // local_aggregation_procedure = LOCAL_AGGREGATION_PROCEDURE_PERCENTILE_N.
584  // In this case it gives the value of N to use. Otherwise this field is
585  // ignored.
586  uint32 local_aggregation_procedure_percentile_n = 18;
587
588  // Time window over which the metric is aggregated. The local aggregation
589  // period is specified for UNIQUE_DEVICE_* report types.
590  WindowSize local_aggregation_period = 19;
591
592  // The maximum number of distinct event vectors for which an instance of the
593  // Cobalt client should produce an observation, for a given local aggregation
594  // period. Event vectors are prioritized in order of first arrival during the
595  // aggregation period.
596  //
597  // For example, if a report has an event_vector_buffer_max of 10, and 12
598  // distinct event vectors are logged for this metric over an aggregation
599  // period, then Cobalt will send observations of the first 10 event vectors
600  // for that aggregation period and drop the last 2.
601  //
602  // If this field is unset, the registry parser assigns to it the total number
603  // of event vectors for the report's parent metric (i.e., the product over all
604  // metric dimensions of the number of event codes per dimension).
605  //
606  // The report's project will be charged against a resource budget for this
607  // value so project owners are encouraged to set this as small as possible.
608  // For example, the report's parent metric may include a dimension with
609  // thousands of event codes, but it is expected that any one device will log
610  // only a few distinct event vectors per day. In that case we may set
611  // event_vector_buffer_max to a relatively small number, say 20. For reports
612  // which use differential privacy, setting event_vector_buffer_max to a
613  // smaller number will improve the signal for event vectors which are included
614  // in observations.
615  uint64 event_vector_buffer_max = 26;
616
617  // The maximum number of distinct strings that Cobalt must keep in its
618  // in-memory buffer on any single device. During local aggregation for reports
619  // of type STRING_COUNTS and UNIQUE_DEVICE_STRING_COUNTS, Cobalt will keep
620  // track of this many distinct strings per aggregation period. The report's
621  // project will be charged against a resource budget for this value so project
622  // owners are encouraged to set this as small as possible. A STRING metric
623  // includes a file of candidate strings that may contain many thousands of
624  // strings. But it is expected that any one device will log only a few of
625  // these strings per day. We may set string_buffer_max to a relatively small
626  // number, say 20.
627  //
628  // This is a required field for reports of type STRING_COUNTS and
629  // UNIQUE_DEVICE_STRING_COUNTS.
630  uint32 string_buffer_max = 28;
631
632  // For reports of type UNIQUE_DEVICE_COUNTS, send observations as soon as the
633  // event occurs, instead of waiting for the end of the day.
634  //
635  // This can only be enabled when using a local aggregation procedure of
636  // AT_LEAST_ONCE or SELECT_FIRST, and when the privacy level is
637  // NO_ADDED_PRIVACY. When used with a system_profile_selection of REPORT_ALL
638  // or SELECT_FIRST, enabling this is recommended as Cobalt will send the count
639  // for the current day when the event occurs instead of at the end of the day.
640  // For a system_profile_selection of SELECT_LAST, this may also be desirable,
641  // though it may result in a slight change in the current day's system profile
642  // that is used, as Cobalt won't wait until the end of the day to determine
643  // the final system profile, but will instead send the count immediately with
644  // the system profile that is currently active on the device.
645  bool expedited_sending = 29;
646
647  ///////////////////  Fields used by all report types ///////////////////
648  // Next id: 106
649
650  // The list of SystemProfileFields to include in each row of the report.
651  // Optional.
652  repeated SystemProfileField system_profile_field = 100;
653
654  // The list of Experiments to include in each row of the report.
655  //
656  // Each report row lists the intersection of the experiment ids active on the
657  // device and experiment ids specified in this field.
658  //
659  // The specified experiment ids must be found in one of the project's
660  // experiments_namespaces.
661  repeated int64 experiment_id = 104;
662
663  // This field is required for reports of type UNIQUE_DEVICE_COUNTS,
664  // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and
665  // HOURLY_VALUE_HISTOGRAMS. The value for these reports must be SELECT_LAST,
666  // SELECT_FIRST, or occasionally REPORT_ALL.
667  //
668  // If the system profile value changed during the aggregation window specified
669  // for this report, system_profile_selection specifies which system profile to
670  // report for each device.
671  SystemProfileSelectionPolicy system_profile_selection = 103;
672
673  // Maximum ReleaseStage for which this Report is allowed to be collected.
674  ReleaseStage max_release_stage = 105;
675}
676
677// A specification for SystemProfile selection policy.
678enum SystemProfileSelectionPolicy {
679  // Use the default value. For reports of type FLEETWIDE_OCCURRENCE_COUNTS,
680  // FLEETWIDE_HISTOGRAMS, FLEETWIDE_MEANS, UNIQUE_DEVICE_NUMERIC_STATS,
681  // HOURLY_VALUE_NUMERIC_STATS, and STRING_COUNTS this will resolve to
682  // 'REPORT_ALL' and should not be changed. For all other report types,
683  // SELECT_DEFAULT must not be used.
684  SELECT_DEFAULT = 0;
685
686  // Always report the last SystemProfile seen in the aggregation window. This
687  // will be the last SystemProfile seen *at the time of an event* in the
688  // aggregation window.
689  SELECT_LAST = 1;
690
691  // Always report the first SystemProfile seen in the aggregation window. This
692  // will be the first SystemProfile seen *at the time of an event* in the
693  // aggregation window.
694  SELECT_FIRST = 2;
695
696  // Report all system profiles in the aggregation window. For most report
697  // types, this is the most sensible value to use. For reports that depend on
698  // some concept of uniqueness (such as UNIQUE_DEVICE_COUNTS,
699  // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and
700  // HOURLY_VALUE_HISTOGRAMS) this may not be the best choice, since it will no
701  // longer be the case that a single device will only upload one observation
702  // per time period (It will upload one observation per time period *per unique
703  // system_profile*).
704  REPORT_ALL = 3;
705}
706
707// A specification of a field from SystemProfile. These are used in a
708// ReportDefinition to specify which fields should be included in the generated
709// Observations and reports.
710//
711// For a description of the meaning of each field, see the fields in the
712// SystemProfile in: cobalt/proto/common.proto
713enum SystemProfileField {
714  OS = 0;
715  ARCH = 1;
716  BOARD_NAME = 2;
717  PRODUCT_NAME = 3;
718  SYSTEM_VERSION = 4;
719  APP_VERSION = 10;
720  CHANNEL = 5;
721  BUILD_TYPE = 7;
722  EXPERIMENT_IDS = 9;
723  reserved 6, 8;
724  reserved "REALM", "EXPERIMENT_TOKENS";
725}
726
727// Stages in the release cycle of a component. Each Cobalt customer determines
728// its current ReleaseStage when initializing the CobaltService. Each Metric
729// and Report can declare the maximum ReleaseStage for which it is allowed to
730// be collected. For example a DEBUG Metric will not be collected from a device
731// running a FISHFOOD release.
732enum ReleaseStage {
733  RELEASE_STAGE_NOT_SET = 0;
734
735  // A test build. Also called "eng". Only use this value when the device is
736  // running test builds as all metrics/reports will be collected.
737  DEBUG = 10;
738  // Small, internal prototype. Used for testing a new feature internally,
739  // usually within the team or a small group.
740  FISHFOOD = 20;
741  // An internal release for testing with internal users.
742  DOGFOOD = 40;
743  // An open beta, for testing with internal and external users.
744  OPEN_BETA = 60;
745
746  // Generally-available. The final stage of a release. Also called
747  // "production". If unsure of which release stage the device is running, it
748  // is safest to fallback to this value (which is the default if no value is
749  // set), to avoid inadvertently collecting metric/report data.
750  GA = 99;
751}
752
753// ExponentialIntegerBuckets is used to define a partition of the integers into
754// a finite number of exponentially increasing buckets.
755//
756// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1.
757//
758// The bucket boundaries are:
759// a[0] = floor
760// a[1] = floor + initial_step
761// a[2] = floor + initial_step * step_multiplier
762// a[3] = floor + initial_step * step_multiplier ^ 2
763// a[4] = floor + initial_step * step_multiplier ^ 3
764// and in general, for i = 1, 2, 3 ... n
765// a[i] = floor + initial_step * step_multiplier ^ (i-1)
766//
767// Then, the buckets are defined as follows:
768// Bucket 0 is the underflow bucket: (-infinity, floor)
769// Bucket i for 0 < i < n+1: [a[i-1], a[i])
770// Bucket n+1 is the overflow bucket: [a[n], +infinity)
771//
772// Examples:
773// floor = 0
774// num_buckets = 3
775// initial_step = 10
776// step_multiplier = 10
777// Then, the buckets are:
778// (-infinity, 0), [0, 10), [10, 100), [100, 1000), [1000, +infinity)
779//
780// floor = 0
781// num_buckets = 3
782// initial_step = 2
783// step_multiplier = 2
784// Then, the buckets are:
785// (-infinity, 0), [0, 2), [2, 4), [4, 8), [8, +infinity)
786//
787// floor = 10
788// num_buckets = 3
789// initial_step = 2
790// step_multiplier = 2
791// Then, the buckets are:
792// (-infinity, 10), [10, 12), [12, 14), [14, 18), [18, +infinity)
793//
794// floor = 0
795// num_buckets = 3
796// initial_step = 100
797// step_multiplier = 10
798// Then, the buckets are:
799// (-infinity, 0), [0, 100), [100, 1000), [1000, 10000), [10000, +infinity)
800//
801message ExponentialIntegerBuckets {
802  int64 floor = 1;
803
804  // num_buckets must be at least 1.
805  uint32 num_buckets = 2;
806
807  // Must be at least one.
808  uint32 initial_step = 3;
809
810  // Must be at least one.
811  uint32 step_multiplier = 4;
812}
813
814// LinearIntegerBuckets is used to define a partition of the integers into a
815// finite number of buckets of equal size.
816//
817// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1.
818// Bucket 0 is the underflow bucket: (-infinity, floor)
819// Bucket n+1 is the overflow bucket: [lower + step_size * n, +infinity)
820//
821// For i = 1 to n, the bucket i is defined as
822// [floor + step_size * (i-1), floor + step_size * i)
823//
824// Example: floor = 0, num_buckets = 3, step_size = 10.
825// (-infinity, 0), [0, 10), [10, 20), [20, 30), [30, +inifinity)
826message LinearIntegerBuckets {
827  int64 floor = 1;
828
829  // Must be at least one.
830  uint32 num_buckets = 2;
831
832  // Must be at least one.
833  uint32 step_size = 3;
834}
835
836message IntegerBuckets {
837  oneof buckets {
838    ExponentialIntegerBuckets exponential = 1;
839    LinearIntegerBuckets linear = 2;
840  }
841
842  // If set to true, empty buckets will not be added to the report data such
843  // that all histograms contain a row for every bucket. Buckets with a zero
844  // count may still occur if data is logged that contains a zero count. This
845  // field can not be set on reports with added privacy.
846  bool sparse_output = 3;
847}
848
849message StringSketchParameters {
850  // Number of hashes in Count-Min Sketch.
851  int32 num_hashes = 1;
852
853  // Number of cells per hash in Count-Min Sketch.
854  int32 num_cells_per_hash = 2;
855}
856