• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2007 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // Classes to implement the Address Cache and Address Encoding
17 // algorithms described in sections 5.1 - 5.4 of RFC 3284 -
18 // The VCDIFF Generic Differencing and Compression Data Format.
19 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
20 
21 #ifndef OPEN_VCDIFF_ADDRCACHE_H_
22 #define OPEN_VCDIFF_ADDRCACHE_H_
23 
24 #include <config.h>
25 #include <vector>
26 #include "vcdiff_defs.h"  // VCDAddress
27 
28 namespace open_vcdiff {
29 
30 // Implements the "same" and "near" caches
31 // as described in RFC 3284, section 5.  The "near" cache allows
32 // efficient reuse of one of the last four referenced addresses
33 // plus a small offset, and the "same" cache allows efficient reuse
34 // of an exact recent address distinguished by its lowest-order bits.
35 //
36 // NOT threadsafe.
37 //
38 class VCDiffAddressCache {
39  public:
40   // The default cache sizes specified in the RFC
41   static const int kDefaultNearCacheSize = 4;
42   static const int kDefaultSameCacheSize = 3;
43 
44   VCDiffAddressCache(int near_cache_size, int same_cache_size);
45 
46   // This version of the constructor uses the default values
47   // kDefaultNearCacheSize and kDefaultSameCacheSize.
48   VCDiffAddressCache();
49 
50   // Initializes the object before use.  This method must be called after
51   // constructing a VCDiffAddressCache/ object, before any other method may be
52   // called.  This is because Init() validates near_cache_size_ and
53   // same_cache_size_ before initializing the same and near caches.  After the
54   // object has been initialized and used, Init() can be called again to reset
55   // it to its initial state.
56   //
57   bool Init();
58 
near_cache_size()59   int near_cache_size() const { return near_cache_size_; }
60 
same_cache_size()61   int same_cache_size() const { return same_cache_size_; }
62 
63   // Returns the first mode number that represents one of the NEAR modes.
64   // The number of NEAR modes is near_cache_size.  Each NEAR mode refers to
65   // an element of the near_addresses_ array, where a recently-referenced
66   // address is stored.
67   //
FirstNearMode()68   static unsigned char FirstNearMode() {
69     return VCD_FIRST_NEAR_MODE;
70   }
71 
72   // Returns the first mode number that represents one of the SAME modes.
73   // The number of SAME modes is same_cache_size.  Each SAME mode refers to
74   // a block of 256 elements of the same_addresses_ array; the lowest-order
75   // 8 bits of the address are used to find the element of this block that
76   // may match the desired address value.
77   //
FirstSameMode()78   unsigned char FirstSameMode() const {
79     return VCD_FIRST_NEAR_MODE + near_cache_size();
80   }
81 
82   // Returns the maximum valid mode number, which happens to be
83   // the last SAME mode.
84   //
LastMode()85   unsigned char LastMode() const {
86     return FirstSameMode() + same_cache_size() - 1;
87   }
88 
DefaultLastMode()89   static unsigned char DefaultLastMode() {
90     return VCD_FIRST_NEAR_MODE
91         + kDefaultNearCacheSize + kDefaultSameCacheSize - 1;
92   }
93 
94   // See the definition of enum VCDiffModes in vcdiff_defs.h,
95   // as well as section 5.3 of the RFC, for a description of
96   // each address mode type (SELF, HERE, NEAR, and SAME).
IsSelfMode(unsigned char mode)97   static bool IsSelfMode(unsigned char mode) {
98     return mode == VCD_SELF_MODE;
99   }
100 
IsHereMode(unsigned char mode)101   static bool IsHereMode(unsigned char mode) {
102     return mode == VCD_HERE_MODE;
103   }
104 
IsNearMode(unsigned char mode)105   bool IsNearMode(unsigned char mode) const {
106     return (mode >= FirstNearMode()) && (mode < FirstSameMode());
107   }
108 
IsSameMode(unsigned char mode)109   bool IsSameMode(unsigned char mode) const {
110     return (mode >= FirstSameMode()) && (mode <= LastMode());
111   }
112 
DecodeSelfAddress(int32_t encoded_address)113   static VCDAddress DecodeSelfAddress(int32_t encoded_address) {
114     return encoded_address;
115   }
116 
DecodeHereAddress(int32_t encoded_address,VCDAddress here_address)117   static VCDAddress DecodeHereAddress(int32_t encoded_address,
118                                       VCDAddress here_address) {
119     return here_address - encoded_address;
120   }
121 
DecodeNearAddress(unsigned char mode,int32_t encoded_address)122   VCDAddress DecodeNearAddress(unsigned char mode,
123                                int32_t encoded_address) const {
124     return NearAddress(mode - FirstNearMode()) + encoded_address;
125   }
126 
DecodeSameAddress(unsigned char mode,unsigned char encoded_address)127   VCDAddress DecodeSameAddress(unsigned char mode,
128                                unsigned char encoded_address) const {
129     return SameAddress(((mode - FirstSameMode()) * 256) + encoded_address);
130   }
131 
132   // Returns true if, when using the given mode, an encoded address
133   // should be written to the delta file as a variable-length integer;
134   // returns false if the encoded address should be written
135   // as a byte value (unsigned char).
WriteAddressAsVarintForMode(unsigned char mode)136   bool WriteAddressAsVarintForMode(unsigned char mode) const {
137     return !IsSameMode(mode);
138   }
139 
140   // An accessor for an element of the near_addresses_ array.
141   // No bounds checking is performed; the caller must ensure that
142   // Init() has already been called, and that
143   //     0 <= pos < near_cache_size_
144   //
NearAddress(int pos)145   VCDAddress NearAddress(int pos) const {
146     return near_addresses_[pos];
147   }
148 
149   // An accessor for an element of the same_addresses_ array.
150   // No bounds checking is performed; the caller must ensure that
151   // Init() has already been called, and that
152   //     0 <= pos < (same_cache_size_ * 256)
153   //
SameAddress(int pos)154   VCDAddress SameAddress(int pos) const {
155     return same_addresses_[pos];
156   }
157 
158   // This method will be called whenever an address is calculated for an
159   // encoded or decoded COPY instruction, and will update the contents
160   // of the SAME and NEAR caches.
161   //
162   void UpdateCache(VCDAddress address);
163 
164   // Determines the address mode that yields the most compact encoding
165   // of the given address value.  The most compact encoding
166   // is found by looking for the numerically lowest encoded address.
167   // Sets *encoded_addr to the encoded representation of the address
168   // and returns the mode used.
169   //
170   // The caller should pass the return value to the method
171   // WriteAddressAsVarintForMode() to determine whether encoded_addr
172   // should be written to the delta file as a variable-length integer
173   // or as a byte (unsigned char).
174   //
175   unsigned char EncodeAddress(VCDAddress address,
176                               VCDAddress here_address,
177                               VCDAddress* encoded_addr);
178 
179   // Interprets the next value in the address_stream using the provided mode,
180   // which may need to access the SAME or NEAR address cache.  Returns the
181   // decoded address, or one of the following values:
182   //   RESULT_ERROR: An invalid address value was found in address_stream.
183   //   RESULT_END_OF_DATA: The limit address_stream_end was reached before
184   //       the address could be decoded.  If more streamed data is expected,
185   //       this means that the consumer should block and wait for more data
186   //       before continuing to decode.  If no more data is expected, this
187   //       return value signals an error condition.
188   //
189   // If successful, *address_stream will be incremented past the decoded address
190   // position.  If RESULT_ERROR or RESULT_END_OF_DATA is returned,
191   // then the value of *address_stream will not have changed.
192   //
193   VCDAddress DecodeAddress(VCDAddress here_address,
194                            unsigned char mode,
195                            const char** address_stream,
196                            const char* address_stream_end);
197 
198  private:
199   // The number of addresses to be kept in the NEAR cache.
200   const int near_cache_size_;
201   // The number of 256-byte blocks to store in the SAME cache.
202   const int same_cache_size_;
203   // The next position in the NEAR cache to which an address will be written.
204   int       next_slot_;
205   // NEAR cache contents
206   std::vector<VCDAddress> near_addresses_;
207   // SAME cache contents
208   std::vector<VCDAddress> same_addresses_;
209 
210   // Making these private avoids implicit copy constructor & assignment operator
211   VCDiffAddressCache(const VCDiffAddressCache&);  // NOLINT
212   void operator=(const VCDiffAddressCache&);
213 };
214 
215 }  // namespace open_vcdiff
216 
217 #endif  // OPEN_VCDIFF_ADDRCACHE_H_
218