1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CONTRIB_CODER_KERNELS_RANGE_CODER_H_ 17 #define TENSORFLOW_CONTRIB_CODER_KERNELS_RANGE_CODER_H_ 18 19 #include <limits> 20 #include <string> 21 22 #include "tensorflow/core/lib/gtl/array_slice.h" 23 #include "tensorflow/core/platform/types.h" 24 25 namespace tensorflow { 26 class RangeEncoder { 27 public: 28 // `precision` determines the granularity of probability masses passed to 29 // Encode() function below. 30 // 31 // REQUIRES: 0 < precision <= 16. 32 explicit RangeEncoder(int precision); 33 34 // Encodes a half-open interval [lower / 2^precision, upper / 2^precision). 35 // Suppose each character to be encoded is from an integer-valued 36 // distribution. When encoding a random character x0, the arguments lower and 37 // upper represent 38 // Pr(X < x0) = lower / 2^precision, 39 // Pr(X < x0 + 1) = upper / 2^precision, 40 // where X is a random variable following the distribution. 41 // 42 // For example, assume that the distribution has possible outputs 0, 1, 2, ... 43 // To encode value 0, lower = 0 and upper = Pr(X = 0). 44 // To encode value 1, lower = Pr(X = 0) and upper = Pr(X = 0 or 1). 45 // To encode value 2, lower = Pr(X = 0 or 1) and upper = Pr(X = 0, 1, or 2). 46 // ... 47 // 48 // REQUIRES: 0 <= lower < upper <= 2^precision. 49 void Encode(int32 lower, int32 upper, string* sink); 50 51 // The encode may contain some under-determined values from previous encoding. 52 // After Encode() calls, Finalize() must be called. Otherwise the encoded 53 // string may not be decoded. 54 void Finalize(string* sink); 55 56 private: 57 uint32 base_ = 0; 58 uint32 size_minus1_ = std::numeric_limits<uint32>::max(); 59 uint64 delay_ = 0; 60 61 const int precision_; 62 }; 63 64 class RangeDecoder { 65 public: 66 // Holds a reference to `source`. The caller has to make sure that `source` 67 // outlives the decoder object. 68 // 69 // REQUIRES: `precision` must be the same as the encoder's precision. 70 // REQUIRES: 0 < precision <= 16. 71 RangeDecoder(const string& source, int precision); 72 73 // Decodes a character from `source` using CDF. The size of `cdf` should be 74 // one more than the number of the character in the alphabet. 75 // 76 // If x0, x1, x2, ... are the possible characters (in increasing order) from 77 // the distribution, then 78 // cdf[0] = 0 79 // cdf[1] = Pr(X <= x0), 80 // cdf[2] = Pr(X <= x1), 81 // cdf[3] = Pr(X <= x2), 82 // ... 83 // 84 // The returned value is an index to `cdf` where the decoded character 85 // corresponds to. 86 // 87 // REQUIRES: cdf.size() > 1. 88 // REQUIRES: cdf[i] <= cdf[i + 1] for i = 0, 1, ..., cdf.size() - 2. 89 // REQUIRES: cdf[cdf.size() - 1] <= 2^precision. 90 // 91 // In practice the last element of `cdf` should equal to 2^precision. 92 int32 Decode(gtl::ArraySlice<int32> cdf); 93 94 private: 95 void Read16BitValue(); 96 97 uint32 base_ = 0; 98 uint32 size_minus1_ = std::numeric_limits<uint32>::max(); 99 uint32 value_ = 0; 100 101 string::const_iterator current_; 102 const string::const_iterator begin_; 103 const string::const_iterator end_; 104 105 const int precision_; 106 }; 107 } // namespace tensorflow 108 109 #endif // TENSORFLOW_CONTRIB_CODER_KERNELS_RANGE_CODER_H_ 110