• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CONTRIB_CODER_KERNELS_RANGE_CODER_H_
17 #define TENSORFLOW_CONTRIB_CODER_KERNELS_RANGE_CODER_H_
18 
19 #include <limits>
20 #include <string>
21 
22 #include "tensorflow/core/lib/gtl/array_slice.h"
23 #include "tensorflow/core/platform/types.h"
24 
25 namespace tensorflow {
26 class RangeEncoder {
27  public:
28   // `precision` determines the granularity of probability masses passed to
29   // Encode() function below.
30   //
31   // REQUIRES: 0 < precision <= 16.
32   explicit RangeEncoder(int precision);
33 
34   // Encodes a half-open interval [lower / 2^precision, upper / 2^precision).
35   // Suppose each character to be encoded is from an integer-valued
36   // distribution. When encoding a random character x0, the arguments lower and
37   // upper represent
38   //   Pr(X < x0) = lower / 2^precision,
39   //   Pr(X < x0 + 1) = upper / 2^precision,
40   // where X is a random variable following the distribution.
41   //
42   // For example, assume that the distribution has possible outputs 0, 1, 2, ...
43   // To encode value 0, lower = 0 and upper = Pr(X = 0).
44   // To encode value 1, lower = Pr(X = 0) and upper = Pr(X = 0 or 1).
45   // To encode value 2, lower = Pr(X = 0 or 1) and upper = Pr(X = 0, 1, or 2).
46   // ...
47   //
48   // REQUIRES: 0 <= lower < upper <= 2^precision.
49   void Encode(int32 lower, int32 upper, string* sink);
50 
51   // The encode may contain some under-determined values from previous encoding.
52   // After Encode() calls, Finalize() must be called. Otherwise the encoded
53   // string may not be decoded.
54   void Finalize(string* sink);
55 
56  private:
57   uint32 base_ = 0;
58   uint32 size_minus1_ = std::numeric_limits<uint32>::max();
59   uint64 delay_ = 0;
60 
61   const int precision_;
62 };
63 
64 class RangeDecoder {
65  public:
66   // Holds a reference to `source`. The caller has to make sure that `source`
67   // outlives the decoder object.
68   //
69   // REQUIRES: `precision` must be the same as the encoder's precision.
70   // REQUIRES: 0 < precision <= 16.
71   RangeDecoder(const string& source, int precision);
72 
73   // Decodes a character from `source` using CDF. The size of `cdf` should be
74   // one more than the number of the character in the alphabet.
75   //
76   // If x0, x1, x2, ... are the possible characters (in increasing order) from
77   // the distribution, then
78   //   cdf[0] = 0
79   //   cdf[1] = Pr(X <= x0),
80   //   cdf[2] = Pr(X <= x1),
81   //   cdf[3] = Pr(X <= x2),
82   //   ...
83   //
84   // The returned value is an index to `cdf` where the decoded character
85   // corresponds to.
86   //
87   // REQUIRES: cdf.size() > 1.
88   // REQUIRES: cdf[i] <= cdf[i + 1] for i = 0, 1, ..., cdf.size() - 2.
89   // REQUIRES: cdf[cdf.size() - 1] <= 2^precision.
90   //
91   // In practice the last element of `cdf` should equal to 2^precision.
92   int32 Decode(gtl::ArraySlice<int32> cdf);
93 
94  private:
95   void Read16BitValue();
96 
97   uint32 base_ = 0;
98   uint32 size_minus1_ = std::numeric_limits<uint32>::max();
99   uint32 value_ = 0;
100 
101   string::const_iterator current_;
102   const string::const_iterator begin_;
103   const string::const_iterator end_;
104 
105   const int precision_;
106 };
107 }  // namespace tensorflow
108 
109 #endif  // TENSORFLOW_CONTRIB_CODER_KERNELS_RANGE_CODER_H_
110