• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     https://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "aggregator.pb.h"
20 #include "compactor_stack.h"
21 #include "random_generator.h"
22 
23 // KLL Quantile - Implementation of Approximate quantiles algorithm based on
24 // the KLL16 paper (up to section 3), see https://arxiv.org/abs/1603.05346.
25 //
26 // Simplified, single-type library for use on Android devices which is
27 // compatible with internal libraries. Cannot use Abseil; trimmed down feature
28 // set, doing leaf aggregation only; no multi-type support/templates, no dynamic
29 // types.
30 namespace dist_proc {
31 namespace aggregation {
32 class KllQuantileOptions;
33 
34 class KllQuantile {
35 public:
36     static std::unique_ptr<KllQuantile> Create(std::string* error = nullptr);
37     static std::unique_ptr<KllQuantile> Create(const KllQuantileOptions& options,
38                                                std::string* error = nullptr);
num_values()39     int64_t num_values() const {
40         return num_values_;
41     }
inv_eps()42     int64_t inv_eps() const {
43         return inv_eps_;
44     }
k()45     int k() const {
46         return compactor_stack_.k();
47     }
48     // Reset the aggregator to its state just after construction.
49     void Reset();
50     void Add(int64_t value);
51 
52     // Adds a value to the aggregator with multiplicity 'weight' (same as adding
53     // the value with Add(value) 'weight' times). Does nothing if weight <= 0.
54     //
55     // If your weights exceed the max int32 size, we recommend to scale all
56     // weights down to make them fit within that bound, and use (randomized)
57     // rounding where needed.
58     // Background: Even at high precision values (inv_eps ~100k), the compactor
59     // stack will only accurately track weights in a range of ~31 consecutive
60     // powers of 2, covering the largest weights encountered, while reservoir
61     // sampling is used for weights below that range. So the precision loss by
62     // downscaling and randomized rounding is negligible.
63     void AddWeighted(int64_t value, int weight);
64 
65     // Not safe to be called concurrently.
66     zetasketch::android::AggregatorStateProto SerializeToProto();
67 
IsSamplerOn()68     bool IsSamplerOn() const {
69         return compactor_stack_.IsSamplerOn();
70     }
71 
72 private:
73     // Constructor.
KllQuantile(int64_t inv_eps,int64_t inv_delta,int k,RandomGenerator * random)74     KllQuantile(int64_t inv_eps, int64_t inv_delta, int k, RandomGenerator* random)
75         : inv_eps_(inv_eps),
76           owned_random_(random != nullptr ? nullptr : std::make_unique<MTRandomGenerator>()),
77           compactor_stack_(inv_eps_, inv_delta, k,
78                            random != nullptr ? random : owned_random_.get()) {
79         Reset();
80     }
81     void UpdateMin(const int64_t value);
82     void UpdateMax(const int64_t value);
83     int64_t inv_eps_;
84     // The (exact) minimum item encountered among all items.
85     int64_t min_{};
86     // The (exact) maximum item encountered among all items.
87     int64_t max_{};
88     // Number of items added into the aggregator.
89     int64_t num_values_;
90     // Owned MTRandom instance, if not given a RandomGenerator.
91     std::unique_ptr<MTRandomGenerator> owned_random_;
92     // Stack of compactors to which newly added items are added;
93     // it maintains a 'sketch' of hitherto added items.
94     internal::CompactorStack compactor_stack_;
95 
96     KllQuantile(const KllQuantile&) = delete;
97     KllQuantile& operator=(const KllQuantile&) = delete;
98 };
99 
100 // Explicitly set KLL's epsilon and delta parameters that control
101 // approximation error and failure probability.
102 // *inv_eps is 1/epsilon, where epsilon is the approximation error parameter:
103 // When a user queries for a quantile phi, the rank of the returned
104 // approximate answer may be +/- (epsilon * n) off from the correct
105 // rank ceil(phi * n), where n is the number of aggregated items.
106 // *inv_delta is 1/delta, where delta is the failure probability parameter:
107 // with delta probability, at most one out of all possible quantile
108 // queries can be further off than the approximation guarantee.
109 class KllQuantileOptions {
110 public:
111     // Set inv_eps. Default value: 1000
set_inv_eps(int64_t inv_eps)112     void set_inv_eps(int64_t inv_eps) {
113         inv_eps_ = inv_eps;
114     }
115     // Set inv_delta. Default value: 100000
set_inv_delta(int64_t inv_delta)116     void set_inv_delta(int64_t inv_delta) {
117         inv_delta_ = inv_delta;
118     }
119     // Set k, overriding the default calculation of this parameter from inv_eps
120     // and inv_delta.
set_k(int k)121     void set_k(int k) {
122         k_ = k;
123     }
124     // Set RandomGenerator pointer to use (caller retains ownership). Default is
125     // to use an owned MTRandomGenerator instance.
set_random(RandomGenerator * random)126     void set_random(RandomGenerator* random) {
127         random_ = random;
128     }
inv_eps()129     int64_t inv_eps() const {
130         return inv_eps_;
131     }
inv_delta()132     int64_t inv_delta() const {
133         return inv_delta_;
134     }
k()135     int k() const {
136         return k_;
137     }
random()138     RandomGenerator* random() const {
139         return random_;
140     }
141 
142 private:
143     int64_t inv_eps_ = 1000;
144     int64_t inv_delta_ = 100000;
145     int k_ = 0;
146     RandomGenerator* random_ = nullptr;
147 };
148 
149 }  // namespace aggregation
150 }  // namespace dist_proc
151