• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #ifndef OPEN_VCDIFF_ENCODETABLE_H_
17 #define OPEN_VCDIFF_ENCODETABLE_H_
18 
19 #include <config.h>
20 #include <stddef.h>  // size_t
21 #include <stdint.h>  // int32_t
22 #include <string>
23 #include "addrcache.h"
24 #include "checksum.h"
25 #include "codetable.h"
26 #include "codetablewriter_interface.h"
27 
28 namespace open_vcdiff {
29 
30 class OutputStringInterface;
31 class VCDiffInstructionMap;
32 
33 // The method calls after construction *must* conform
34 // to the following pattern:
35 //    {{Add|Copy|Run}* [AddChecksum] Output}*
36 //
37 // When Output has been called in this sequence, a complete target window
38 // (as defined in RFC 3284 section 4.3) will have been appended to
39 // out (unless no calls to Add, Run, or Copy were made, in which
40 // case Output will do nothing.)  The output will not be available for use
41 // until after each call to Output().
42 //
43 // NOT threadsafe.
44 //
45 class VCDiffCodeTableWriter : public CodeTableWriterInterface {
46  public:
47   // This constructor uses the default code table.
48   // If interleaved is true, the encoder writes each delta file window
49   // by interleaving instructions and sizes with their corresponding
50   // addresses and data, rather than placing these elements into three
51   // separate sections.  This facilitates providing partially
52   // decoded results when only a portion of a delta file window
53   // is received (e.g. when HTTP over TCP is used as the
54   // transmission protocol.)  The interleaved format is
55   // not consistent with the VCDIFF draft standard.
56   //
57   explicit VCDiffCodeTableWriter(bool interleaved);
58 
59   // Uses a non-standard code table and non-standard cache sizes.  The caller
60   // must guarantee that code_table_data remains allocated for the lifetime of
61   // the VCDiffCodeTableWriter object.  Note that this is different from how
62   // VCDiffCodeTableReader::UseCodeTable works.  It is assumed that a given
63   // encoder will use either the default code table or a statically-defined
64   // non-standard code table, whereas the decoder must have the ability to read
65   // an arbitrary non-standard code table from a delta file and discard it once
66   // the file has been decoded.
67   //
68   VCDiffCodeTableWriter(bool interleaved,
69                         int near_cache_size,
70                         int same_cache_size,
71                         const VCDiffCodeTableData& code_table_data,
72                         unsigned char max_mode);
73 
74   virtual ~VCDiffCodeTableWriter();
75 
76   // Initializes the constructed object for use.
77   // This method must be called after a VCDiffCodeTableWriter is constructed
78   // and before any of its other methods can be called.  It will return
79   // false if there was an error initializing the object, or true if it
80   // was successful.  After the object has been initialized and used,
81   // Init() can be called again to restore the initial state of the object.
82   //
83   virtual bool Init(size_t dictionary_size);
84 
85   // Write the header (as defined in section 4.1 of the RFC) to *out.
86   // This includes information that can be gathered
87   // before the first chunk of input is available.
88   virtual void WriteHeader(OutputStringInterface* out,
89                            VCDiffFormatExtensionFlags format_extensions);
90 
target_length()91   virtual size_t target_length() const { return target_length_; }
92 
93   // Encode an ADD opcode with the "size" bytes starting at data
94   virtual void Add(const char* data, size_t size);
95 
96   // Encode a COPY opcode with args "offset" (into dictionary) and "size" bytes.
97   virtual void Copy(int32_t offset, size_t size);
98 
99   // Encode a RUN opcode for "size" copies of the value "byte".
100   virtual void Run(size_t size, unsigned char byte);
101 
AddChecksum(VCDChecksum checksum)102   virtual void AddChecksum(VCDChecksum checksum) {
103     add_checksum_ = true;
104     checksum_ = checksum;
105   }
106 
107   // Appends the encoded delta window to the output
108   // string.  The output string is not null-terminated and may contain embedded
109   // '\0' characters.
110   virtual void Output(OutputStringInterface* out);
111 
112   // There should not be any need to output more data
113   // since EncodeChunk() encodes a complete target window
114   // and there is no end-of-delta-file marker.
FinishEncoding(OutputStringInterface *)115   virtual void FinishEncoding(OutputStringInterface* /*out*/) {}
116 
117  private:
118   typedef std::string string;
119 
120   // The maximum value for the mode of a COPY instruction.
121   const unsigned char max_mode_;
122 
123   // If interleaved is true, sets data_for_add_and_run_ and
124   // addresses_for_copy_ to point at instructions_and_sizes_,
125   // so that instructions, sizes, addresses and data will be
126   // combined into a single interleaved stream.
127   // If interleaved is false, sets data_for_add_and_run_ and
128   // addresses_for_copy_ to point at their corresponding
129   // separate_... strings, so that the three sections will
130   // be generated separately from one another.
131   //
132   void InitSectionPointers(bool interleaved);
133 
134   // Determines the best opcode to encode an instruction, and appends
135   // or substitutes that opcode and its size into the
136   // instructions_and_sizes_ string.
137   //
138   void EncodeInstruction(VCDiffInstructionType inst,
139                          size_t size,
140                          unsigned char mode);
141 
EncodeInstruction(VCDiffInstructionType inst,size_t size)142   void EncodeInstruction(VCDiffInstructionType inst, size_t size) {
143     return EncodeInstruction(inst, size, 0);
144   }
145 
146   // Calculates the number of bytes needed to store the given size value as a
147   // variable-length integer (VarintBE).
148   static size_t CalculateLengthOfSizeAsVarint(size_t size);
149 
150   // Appends the size value to the string as a variable-length integer.
151   static void AppendSizeToString(size_t size, string* out);
152 
153   // Appends the size value to the output string as a variable-length integer.
154   static void AppendSizeToOutputString(size_t size, OutputStringInterface* out);
155 
156   // Calculates the "Length of the delta encoding" field for the delta window
157   // header, based on the sizes of the sections and of the other header
158   // elements.
159   size_t CalculateLengthOfTheDeltaEncoding() const;
160 
161   // None of the following 'string' objects are null-terminated.
162 
163   // A series of instruction opcodes, each of which may be followed
164   // by one or two Varint values representing the size parameters
165   // of the first and second instruction in the opcode.
166   string instructions_and_sizes_;
167 
168   // A series of data arguments (byte values) used for ADD and RUN
169   // instructions.  Depending on whether interleaved output is used
170   // for streaming or not, the pointer may point to
171   // separate_data_for_add_and_run_ or to instructions_and_sizes_.
172   string *data_for_add_and_run_;
173   string separate_data_for_add_and_run_;
174 
175   // A series of Varint addresses used for COPY instructions.
176   // For the SAME mode, a byte value is stored instead of a Varint.
177   // Depending on whether interleaved output is used
178   // for streaming or not, the pointer may point to
179   // separate_addresses_for_copy_ or to instructions_and_sizes_.
180   string *addresses_for_copy_;
181   string separate_addresses_for_copy_;
182 
183   VCDiffAddressCache address_cache_;
184 
185   size_t dictionary_size_;
186 
187   // The number of bytes of target data that has been encoded so far.
188   // Each time Add(), Copy(), or Run() is called, this will be incremented.
189   // The target length is used to compute HERE mode addresses
190   // for COPY instructions, and is also written into the header
191   // of the delta window when Output() is called.
192   //
193   size_t target_length_;
194 
195   const VCDiffCodeTableData* code_table_data_;
196 
197   // The instruction map facilitates finding an opcode quickly given an
198   // instruction inst, size, and mode.  This is an alternate representation
199   // of the same information that is found in code_table_data_.
200   //
201   const VCDiffInstructionMap* instruction_map_;
202 
203   // The zero-based index within instructions_and_sizes_ of the byte
204   // that contains the last single-instruction opcode generated by
205   // EncodeInstruction().  (See that function for exhaustive details.)
206   // It is necessary to use an index rather than a pointer for this value
207   // because instructions_and_sizes_ may be resized, which would invalidate
208   // any pointers into its data buffer.  The value -1 is reserved to mean that
209   // either no opcodes have been generated yet, or else the last opcode
210   // generated was a double-instruction opcode.
211   //
212   int last_opcode_index_;
213 
214   // If true, an Adler32 checksum of the target window data will be written as
215   // a variable-length integer, just after the size of the addresses section.
216   //
217   bool add_checksum_;
218 
219   // The checksum to be written to the current target window,
220   // if add_checksum_ is true.
221   // This will not be calculated based on the individual calls to Add(), Run(),
222   // and Copy(), which would be unnecessarily expensive.  Instead, the code
223   // that uses the VCDiffCodeTableWriter object is expected to calculate
224   // the checksum all at once and to call AddChecksum() with that value.
225   // Must be called sometime before calling Output(), though it can be called
226   // either before or after the calls to Add(), Run(), and Copy().
227   //
228   VCDChecksum checksum_;
229 
230   // Making these private avoids implicit copy constructor & assignment operator
231   VCDiffCodeTableWriter(const VCDiffCodeTableWriter&);  // NOLINT
232   void operator=(const VCDiffCodeTableWriter&);
233 };
234 
235 };  // namespace open_vcdiff
236 
237 #endif  // OPEN_VCDIFF_ENCODETABLE_H_
238