1 /* 2 * Copyright 2021 Google LLC 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * https://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include "aggregator.pb.h" 20 #include "compactor_stack.h" 21 #include "random_generator.h" 22 23 // KLL Quantile - Implementation of Approximate quantiles algorithm based on 24 // the KLL16 paper (up to section 3), see https://arxiv.org/abs/1603.05346. 25 // 26 // Simplified, single-type library for use on Android devices which is 27 // compatible with internal libraries. Cannot use Abseil; trimmed down feature 28 // set, doing leaf aggregation only; no multi-type support/templates, no dynamic 29 // types. 30 namespace dist_proc { 31 namespace aggregation { 32 class KllQuantileOptions; 33 34 class KllQuantile { 35 public: 36 static std::unique_ptr<KllQuantile> Create(std::string* error = nullptr); 37 static std::unique_ptr<KllQuantile> Create(const KllQuantileOptions& options, 38 std::string* error = nullptr); num_values()39 int64_t num_values() const { 40 return num_values_; 41 } inv_eps()42 int64_t inv_eps() const { 43 return inv_eps_; 44 } k()45 int k() const { 46 return compactor_stack_.k(); 47 } 48 // Reset the aggregator to its state just after construction. 49 void Reset(); 50 void Add(int64_t value); 51 52 // Adds a value to the aggregator with multiplicity 'weight' (same as adding 53 // the value with Add(value) 'weight' times). Does nothing if weight <= 0. 54 // 55 // If your weights exceed the max int32 size, we recommend to scale all 56 // weights down to make them fit within that bound, and use (randomized) 57 // rounding where needed. 58 // Background: Even at high precision values (inv_eps ~100k), the compactor 59 // stack will only accurately track weights in a range of ~31 consecutive 60 // powers of 2, covering the largest weights encountered, while reservoir 61 // sampling is used for weights below that range. So the precision loss by 62 // downscaling and randomized rounding is negligible. 63 void AddWeighted(int64_t value, int weight); 64 65 // Not safe to be called concurrently. 66 zetasketch::android::AggregatorStateProto SerializeToProto(); 67 IsSamplerOn()68 bool IsSamplerOn() const { 69 return compactor_stack_.IsSamplerOn(); 70 } 71 72 private: 73 // Constructor. KllQuantile(int64_t inv_eps,int64_t inv_delta,int k,RandomGenerator * random)74 KllQuantile(int64_t inv_eps, int64_t inv_delta, int k, RandomGenerator* random) 75 : inv_eps_(inv_eps), 76 owned_random_(random != nullptr ? nullptr : std::make_unique<MTRandomGenerator>()), 77 compactor_stack_(inv_eps_, inv_delta, k, 78 random != nullptr ? random : owned_random_.get()) { 79 Reset(); 80 } 81 void UpdateMin(const int64_t value); 82 void UpdateMax(const int64_t value); 83 int64_t inv_eps_; 84 // The (exact) minimum item encountered among all items. 85 int64_t min_{}; 86 // The (exact) maximum item encountered among all items. 87 int64_t max_{}; 88 // Number of items added into the aggregator. 89 int64_t num_values_; 90 // Owned MTRandom instance, if not given a RandomGenerator. 91 std::unique_ptr<MTRandomGenerator> owned_random_; 92 // Stack of compactors to which newly added items are added; 93 // it maintains a 'sketch' of hitherto added items. 94 internal::CompactorStack compactor_stack_; 95 96 KllQuantile(const KllQuantile&) = delete; 97 KllQuantile& operator=(const KllQuantile&) = delete; 98 }; 99 100 // Explicitly set KLL's epsilon and delta parameters that control 101 // approximation error and failure probability. 102 // *inv_eps is 1/epsilon, where epsilon is the approximation error parameter: 103 // When a user queries for a quantile phi, the rank of the returned 104 // approximate answer may be +/- (epsilon * n) off from the correct 105 // rank ceil(phi * n), where n is the number of aggregated items. 106 // *inv_delta is 1/delta, where delta is the failure probability parameter: 107 // with delta probability, at most one out of all possible quantile 108 // queries can be further off than the approximation guarantee. 109 class KllQuantileOptions { 110 public: 111 // Set inv_eps. Default value: 1000 set_inv_eps(int64_t inv_eps)112 void set_inv_eps(int64_t inv_eps) { 113 inv_eps_ = inv_eps; 114 } 115 // Set inv_delta. Default value: 100000 set_inv_delta(int64_t inv_delta)116 void set_inv_delta(int64_t inv_delta) { 117 inv_delta_ = inv_delta; 118 } 119 // Set k, overriding the default calculation of this parameter from inv_eps 120 // and inv_delta. set_k(int k)121 void set_k(int k) { 122 k_ = k; 123 } 124 // Set RandomGenerator pointer to use (caller retains ownership). Default is 125 // to use an owned MTRandomGenerator instance. set_random(RandomGenerator * random)126 void set_random(RandomGenerator* random) { 127 random_ = random; 128 } inv_eps()129 int64_t inv_eps() const { 130 return inv_eps_; 131 } inv_delta()132 int64_t inv_delta() const { 133 return inv_delta_; 134 } k()135 int k() const { 136 return k_; 137 } random()138 RandomGenerator* random() const { 139 return random_; 140 } 141 142 private: 143 int64_t inv_eps_ = 1000; 144 int64_t inv_delta_ = 100000; 145 int k_ = 0; 146 RandomGenerator* random_ = nullptr; 147 }; 148 149 } // namespace aggregation 150 } // namespace dist_proc 151