1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // The main idea in Courgette is to do patching *under a tranformation*. The
6 // input is transformed into a new representation, patching occurs in the new
7 // repesentation, and then the tranform is reversed to get the patched data.
8 //
9 // The idea is applied to pieces (or 'elements') of the whole (or 'ensemble').
10 // Each of the elements has to go through the same set of steps in lock-step.
11
12 // This file contains the code to create the patch.
13
14
15 #include "courgette/ensemble.h"
16
17 #include <limits>
18 #include <vector>
19
20 #include "base/basictypes.h"
21 #include "base/logging.h"
22 #include "base/time/time.h"
23
24 #include "courgette/crc.h"
25 #include "courgette/difference_estimator.h"
26 #include "courgette/region.h"
27 #include "courgette/simple_delta.h"
28 #include "courgette/streams.h"
29 #include "courgette/third_party/bsdiff.h"
30
31 #include "courgette/patcher_x86_32.h"
32 #include "courgette/patch_generator_x86_32.h"
33
34 namespace courgette {
35
TransformationPatchGenerator(Element * old_element,Element * new_element,TransformationPatcher * patcher)36 TransformationPatchGenerator::TransformationPatchGenerator(
37 Element* old_element,
38 Element* new_element,
39 TransformationPatcher* patcher)
40 : old_element_(old_element),
41 new_element_(new_element),
42 patcher_(patcher) {
43 }
44
~TransformationPatchGenerator()45 TransformationPatchGenerator::~TransformationPatchGenerator() {
46 delete patcher_;
47 }
48
49 // The default implementation of PredictTransformParameters delegates to the
50 // patcher.
PredictTransformParameters(SinkStreamSet * prediction)51 Status TransformationPatchGenerator::PredictTransformParameters(
52 SinkStreamSet* prediction) {
53 return patcher_->PredictTransformParameters(prediction);
54 }
55
56 // The default implementation of Reform delegates to the patcher.
Reform(SourceStreamSet * transformed_element,SinkStream * reformed_element)57 Status TransformationPatchGenerator::Reform(
58 SourceStreamSet* transformed_element,
59 SinkStream* reformed_element) {
60 return patcher_->Reform(transformed_element, reformed_element);
61 }
62
63 // Makes a TransformationPatchGenerator of the appropriate variety for the
64 // Element kind.
MakeGenerator(Element * old_element,Element * new_element)65 TransformationPatchGenerator* MakeGenerator(Element* old_element,
66 Element* new_element) {
67 switch (new_element->kind()) {
68 case EXE_UNKNOWN:
69 break;
70 case EXE_WIN_32_X86: {
71 TransformationPatchGenerator* generator =
72 new PatchGeneratorX86_32(
73 old_element,
74 new_element,
75 new PatcherX86_32(old_element->region()),
76 EXE_WIN_32_X86);
77 return generator;
78 }
79 case EXE_ELF_32_X86: {
80 TransformationPatchGenerator* generator =
81 new PatchGeneratorX86_32(
82 old_element,
83 new_element,
84 new PatcherX86_32(old_element->region()),
85 EXE_ELF_32_X86);
86 return generator;
87 }
88 case EXE_ELF_32_ARM: {
89 TransformationPatchGenerator* generator =
90 new PatchGeneratorX86_32(
91 old_element,
92 new_element,
93 new PatcherX86_32(old_element->region()),
94 EXE_ELF_32_ARM);
95 return generator;
96 }
97 case EXE_WIN_32_X64: {
98 TransformationPatchGenerator* generator =
99 new PatchGeneratorX86_32(
100 old_element,
101 new_element,
102 new PatcherX86_32(old_element->region()),
103 EXE_WIN_32_X64);
104 return generator;
105 }
106 }
107
108 LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind();
109 return NULL;
110 }
111
112 // Checks to see if the proposed comparison is 'unsafe'. Sometimes one element
113 // from 'old' is matched as the closest element to multiple elements from 'new'.
114 // Each time this happens, the old element is transformed and serialized. This
115 // is a problem when the old element is huge compared with the new element
116 // because the mutliple serialized copies can be much bigger than the size of
117 // either ensemble.
118 //
119 // The right way to avoid this is to ensure any one element from 'old' is
120 // serialized once, which requires matching code in the patch application.
121 //
122 // This is a quick hack to avoid the problem by prohibiting a big difference in
123 // size between matching elements.
UnsafeDifference(Element * old_element,Element * new_element)124 bool UnsafeDifference(Element* old_element, Element* new_element) {
125 double kMaxBloat = 2.0;
126 size_t kMinWorrysomeDifference = 2 << 20; // 2MB
127 size_t old_size = old_element->region().length();
128 size_t new_size = new_element->region().length();
129 size_t low_size = std::min(old_size, new_size);
130 size_t high_size = std::max(old_size, new_size);
131 if (high_size - low_size < kMinWorrysomeDifference) return false;
132 if (high_size < low_size * kMaxBloat) return false;
133 return true;
134 }
135
136 // FindGenerators finds TransformationPatchGenerators for the elements of
137 // |new_ensemble|. For each element of |new_ensemble| we find the closest
138 // matching element from |old_ensemble| and use that as the basis for
139 // differential compression. The elements have to be the same kind so as to
140 // support transformation into the same kind of 'new representation'.
141 //
FindGenerators(Ensemble * old_ensemble,Ensemble * new_ensemble,std::vector<TransformationPatchGenerator * > * generators)142 Status FindGenerators(Ensemble* old_ensemble, Ensemble* new_ensemble,
143 std::vector<TransformationPatchGenerator*>* generators) {
144 base::Time start_find_time = base::Time::Now();
145 old_ensemble->FindEmbeddedElements();
146 new_ensemble->FindEmbeddedElements();
147 VLOG(1) << "done FindEmbeddedElements "
148 << (base::Time::Now() - start_find_time).InSecondsF();
149
150 std::vector<Element*> old_elements(old_ensemble->elements());
151 std::vector<Element*> new_elements(new_ensemble->elements());
152
153 VLOG(1) << "old has " << old_elements.size() << " elements";
154 VLOG(1) << "new has " << new_elements.size() << " elements";
155
156 DifferenceEstimator difference_estimator;
157 std::vector<DifferenceEstimator::Base*> bases;
158
159 base::Time start_bases_time = base::Time::Now();
160 for (size_t i = 0; i < old_elements.size(); ++i) {
161 bases.push_back(
162 difference_estimator.MakeBase(old_elements[i]->region()));
163 }
164 VLOG(1) << "done make bases "
165 << (base::Time::Now() - start_bases_time).InSecondsF() << "s";
166
167 for (size_t new_index = 0; new_index < new_elements.size(); ++new_index) {
168 Element* new_element = new_elements[new_index];
169 DifferenceEstimator::Subject* new_subject =
170 difference_estimator.MakeSubject(new_element->region());
171
172 // Search through old elements to find the best match.
173 //
174 // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and
175 // new_ensemble probably have a very similar structure. We can make the
176 // search faster by making the comparison provided by DifferenceEstimator
177 // more nuanced, returning early if the measured difference is greater than
178 // the current best. This will be most effective if we can arrange that the
179 // first elements we try to match are likely the 'right' ones. We could
180 // prioritize elements that are of a similar size or similar position in the
181 // sequence of elements.
182 //
183 Element* best_old_element = NULL;
184 size_t best_difference = std::numeric_limits<size_t>::max();
185 for (size_t old_index = 0; old_index < old_elements.size(); ++old_index) {
186 Element* old_element = old_elements[old_index];
187 // Elements of different kinds are incompatible.
188 if (old_element->kind() != new_element->kind())
189 continue;
190
191 if (UnsafeDifference(old_element, new_element))
192 continue;
193
194 base::Time start_compare = base::Time::Now();
195 DifferenceEstimator::Base* old_base = bases[old_index];
196 size_t difference = difference_estimator.Measure(old_base, new_subject);
197
198 VLOG(1) << "Compare " << old_element->Name()
199 << " to " << new_element->Name()
200 << " --> " << difference
201 << " in " << (base::Time::Now() - start_compare).InSecondsF()
202 << "s";
203 if (difference == 0) {
204 VLOG(1) << "Skip " << new_element->Name()
205 << " - identical to " << old_element->Name();
206 best_difference = 0;
207 best_old_element = NULL;
208 break;
209 }
210 if (difference < best_difference) {
211 best_difference = difference;
212 best_old_element = old_element;
213 }
214 }
215
216 if (best_old_element) {
217 VLOG(1) << "Matched " << best_old_element->Name()
218 << " to " << new_element->Name()
219 << " --> " << best_difference;
220 TransformationPatchGenerator* generator =
221 MakeGenerator(best_old_element, new_element);
222 if (generator)
223 generators->push_back(generator);
224 }
225 }
226
227 VLOG(1) << "done FindGenerators found " << generators->size()
228 << " in " << (base::Time::Now() - start_find_time).InSecondsF()
229 << "s";
230
231 return C_OK;
232 }
233
FreeGenerators(std::vector<TransformationPatchGenerator * > * generators)234 void FreeGenerators(std::vector<TransformationPatchGenerator*>* generators) {
235 for (size_t i = 0; i < generators->size(); ++i) {
236 delete (*generators)[i];
237 }
238 generators->clear();
239 }
240
241 ////////////////////////////////////////////////////////////////////////////////
242
GenerateEnsemblePatch(SourceStream * base,SourceStream * update,SinkStream * final_patch)243 Status GenerateEnsemblePatch(SourceStream* base,
244 SourceStream* update,
245 SinkStream* final_patch) {
246 VLOG(1) << "start GenerateEnsemblePatch";
247 base::Time start_time = base::Time::Now();
248
249 Region old_region(base->Buffer(), base->Remaining());
250 Region new_region(update->Buffer(), update->Remaining());
251 Ensemble old_ensemble(old_region, "old");
252 Ensemble new_ensemble(new_region, "new");
253 std::vector<TransformationPatchGenerator*> generators;
254 Status generators_status = FindGenerators(&old_ensemble, &new_ensemble,
255 &generators);
256 if (generators_status != C_OK)
257 return generators_status;
258
259 SinkStreamSet patch_streams;
260
261 SinkStream* tranformation_descriptions = patch_streams.stream(0);
262 SinkStream* parameter_correction = patch_streams.stream(1);
263 SinkStream* transformed_elements_correction = patch_streams.stream(2);
264 SinkStream* ensemble_correction = patch_streams.stream(3);
265
266 size_t number_of_transformations = generators.size();
267 if (!tranformation_descriptions->WriteSizeVarint32(number_of_transformations))
268 return C_STREAM_ERROR;
269
270 for (size_t i = 0; i < number_of_transformations; ++i) {
271 ExecutableType kind = generators[i]->Kind();
272 if (!tranformation_descriptions->WriteVarint32(kind))
273 return C_STREAM_ERROR;
274 }
275
276 for (size_t i = 0; i < number_of_transformations; ++i) {
277 Status status =
278 generators[i]->WriteInitialParameters(tranformation_descriptions);
279 if (status != C_OK)
280 return status;
281 }
282
283 //
284 // Generate sub-patch for parameters.
285 //
286 SinkStreamSet predicted_parameters_sink;
287 SinkStreamSet corrected_parameters_sink;
288
289 for (size_t i = 0; i < number_of_transformations; ++i) {
290 SinkStreamSet single_predicted_parameters;
291 Status status;
292 status = generators[i]->PredictTransformParameters(
293 &single_predicted_parameters);
294 if (status != C_OK)
295 return status;
296 if (!predicted_parameters_sink.WriteSet(&single_predicted_parameters))
297 return C_STREAM_ERROR;
298
299 SinkStreamSet single_corrected_parameters;
300 status = generators[i]->CorrectedTransformParameters(
301 &single_corrected_parameters);
302 if (status != C_OK)
303 return status;
304 if (!corrected_parameters_sink.WriteSet(&single_corrected_parameters))
305 return C_STREAM_ERROR;
306 }
307
308 SinkStream linearized_predicted_parameters;
309 SinkStream linearized_corrected_parameters;
310
311 if (!predicted_parameters_sink.CopyTo(&linearized_predicted_parameters))
312 return C_STREAM_ERROR;
313 if (!corrected_parameters_sink.CopyTo(&linearized_corrected_parameters))
314 return C_STREAM_ERROR;
315
316 SourceStream predicted_parameters_source;
317 SourceStream corrected_parameters_source;
318 predicted_parameters_source.Init(linearized_predicted_parameters);
319 corrected_parameters_source.Init(linearized_corrected_parameters);
320
321 Status delta1_status = GenerateSimpleDelta(&predicted_parameters_source,
322 &corrected_parameters_source,
323 parameter_correction);
324 if (delta1_status != C_OK)
325 return delta1_status;
326
327 //
328 // Generate sub-patch for elements.
329 //
330 corrected_parameters_source.Init(linearized_corrected_parameters);
331 SourceStreamSet corrected_parameters_source_set;
332 if (!corrected_parameters_source_set.Init(&corrected_parameters_source))
333 return C_STREAM_ERROR;
334
335 SinkStreamSet predicted_transformed_elements;
336 SinkStreamSet corrected_transformed_elements;
337
338 for (size_t i = 0; i < number_of_transformations; ++i) {
339 SourceStreamSet single_parameters;
340 if (!corrected_parameters_source_set.ReadSet(&single_parameters))
341 return C_STREAM_ERROR;
342 SinkStreamSet single_predicted_transformed_element;
343 SinkStreamSet single_corrected_transformed_element;
344 Status status = generators[i]->Transform(
345 &single_parameters,
346 &single_predicted_transformed_element,
347 &single_corrected_transformed_element);
348 if (status != C_OK)
349 return status;
350 if (!single_parameters.Empty())
351 return C_STREAM_NOT_CONSUMED;
352 if (!predicted_transformed_elements.WriteSet(
353 &single_predicted_transformed_element))
354 return C_STREAM_ERROR;
355 if (!corrected_transformed_elements.WriteSet(
356 &single_corrected_transformed_element))
357 return C_STREAM_ERROR;
358 }
359
360 if (!corrected_parameters_source_set.Empty())
361 return C_STREAM_NOT_CONSUMED;
362
363 SinkStream linearized_predicted_transformed_elements;
364 SinkStream linearized_corrected_transformed_elements;
365
366 if (!predicted_transformed_elements.CopyTo(
367 &linearized_predicted_transformed_elements))
368 return C_STREAM_ERROR;
369 if (!corrected_transformed_elements.CopyTo(
370 &linearized_corrected_transformed_elements))
371 return C_STREAM_ERROR;
372
373 SourceStream predicted_transformed_elements_source;
374 SourceStream corrected_transformed_elements_source;
375 predicted_transformed_elements_source
376 .Init(linearized_predicted_transformed_elements);
377 corrected_transformed_elements_source
378 .Init(linearized_corrected_transformed_elements);
379
380 Status delta2_status =
381 GenerateSimpleDelta(&predicted_transformed_elements_source,
382 &corrected_transformed_elements_source,
383 transformed_elements_correction);
384 if (delta2_status != C_OK)
385 return delta2_status;
386
387 // Last use, free storage.
388 linearized_predicted_transformed_elements.Retire();
389
390 //
391 // Generate sub-patch for whole enchilada.
392 //
393 SinkStream predicted_ensemble;
394
395 if (!predicted_ensemble.Write(base->Buffer(), base->Remaining()))
396 return C_STREAM_ERROR;
397
398 SourceStreamSet corrected_transformed_elements_source_set;
399 corrected_transformed_elements_source
400 .Init(linearized_corrected_transformed_elements);
401 if (!corrected_transformed_elements_source_set
402 .Init(&corrected_transformed_elements_source))
403 return C_STREAM_ERROR;
404
405 for (size_t i = 0; i < number_of_transformations; ++i) {
406 SourceStreamSet single_corrected_transformed_element;
407 if (!corrected_transformed_elements_source_set.ReadSet(
408 &single_corrected_transformed_element))
409 return C_STREAM_ERROR;
410 Status status = generators[i]->Reform(&single_corrected_transformed_element,
411 &predicted_ensemble);
412 if (status != C_OK)
413 return status;
414 if (!single_corrected_transformed_element.Empty())
415 return C_STREAM_NOT_CONSUMED;
416 }
417
418 if (!corrected_transformed_elements_source_set.Empty())
419 return C_STREAM_NOT_CONSUMED;
420
421 // No more references to this stream's buffer.
422 linearized_corrected_transformed_elements.Retire();
423
424 FreeGenerators(&generators);
425
426 size_t final_patch_input_size = predicted_ensemble.Length();
427 SourceStream predicted_ensemble_source;
428 predicted_ensemble_source.Init(predicted_ensemble);
429 Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source,
430 update,
431 ensemble_correction);
432 if (delta3_status != C_OK)
433 return delta3_status;
434
435 //
436 // Final output stream has a header followed by a StreamSet.
437 //
438 if (!final_patch->WriteVarint32(CourgettePatchFile::kMagic) ||
439 !final_patch->WriteVarint32(CourgettePatchFile::kVersion) ||
440 !final_patch->WriteVarint32(CalculateCrc(old_region.start(),
441 old_region.length())) ||
442 !final_patch->WriteVarint32(CalculateCrc(new_region.start(),
443 new_region.length())) ||
444 !final_patch->WriteSizeVarint32(final_patch_input_size) ||
445 !patch_streams.CopyTo(final_patch)) {
446 return C_STREAM_ERROR;
447 }
448
449 VLOG(1) << "done GenerateEnsemblePatch "
450 << (base::Time::Now() - start_time).InSecondsF() << "s";
451
452 return C_OK;
453 }
454
455 } // namespace
456