1 /*-
2 * Copyright 2003-2005 Colin Percival
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #if 0
28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $");
29 #endif
30
31 #include "bspatch.h"
32
33 #include <bzlib.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <inttypes.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <sys/stat.h>
41 #include <sys/types.h>
42
43 #include <algorithm>
44 #include <memory>
45 #include <limits>
46 #include <vector>
47
48 #include "buffer_file.h"
49 #include "extents.h"
50 #include "extents_file.h"
51 #include "file.h"
52 #include "file_interface.h"
53 #include "memory_file.h"
54 #include "sink_file.h"
55
56 namespace {
57
ParseInt64(const u_char * buf)58 int64_t ParseInt64(const u_char* buf) {
59 int64_t y;
60
61 y = buf[7] & 0x7F;
62 y = y * 256;
63 y += buf[6];
64 y = y * 256;
65 y += buf[5];
66 y = y * 256;
67 y += buf[4];
68 y = y * 256;
69 y += buf[3];
70 y = y * 256;
71 y += buf[2];
72 y = y * 256;
73 y += buf[1];
74 y = y * 256;
75 y += buf[0];
76
77 if (buf[7] & 0x80)
78 y = -y;
79
80 return y;
81 }
82
ReadBZ2(bz_stream * stream,uint8_t * data,size_t size)83 bool ReadBZ2(bz_stream* stream, uint8_t* data, size_t size) {
84 stream->next_out = (char*)data;
85 while (size > 0) {
86 unsigned int read_size = std::min(
87 static_cast<size_t>(std::numeric_limits<unsigned int>::max()), size);
88 stream->avail_out = read_size;
89 int bz2err = BZ2_bzDecompress(stream);
90 if (bz2err != BZ_OK && bz2err != BZ_STREAM_END)
91 return false;
92 size -= read_size - stream->avail_out;
93 }
94 return true;
95 }
96
ReadBZ2AndWriteAll(const std::unique_ptr<bsdiff::FileInterface> & file,bz_stream * stream,size_t size,uint8_t * buf,size_t buf_size)97 int ReadBZ2AndWriteAll(const std::unique_ptr<bsdiff::FileInterface>& file,
98 bz_stream* stream,
99 size_t size,
100 uint8_t* buf,
101 size_t buf_size) {
102 while (size > 0) {
103 size_t bytes_to_read = std::min(size, buf_size);
104 if (!ReadBZ2(stream, buf, bytes_to_read)) {
105 fprintf(stderr, "Failed to read bzip stream.\n");
106 return 2;
107 }
108 if (!WriteAll(file, buf, bytes_to_read)) {
109 perror("WriteAll() failed");
110 return 1;
111 }
112 size -= bytes_to_read;
113 }
114 return 0;
115 }
116
117 } // namespace
118
119 namespace bsdiff {
120
ReadAll(const std::unique_ptr<FileInterface> & file,uint8_t * data,size_t size)121 bool ReadAll(const std::unique_ptr<FileInterface>& file,
122 uint8_t* data,
123 size_t size) {
124 size_t offset = 0, read;
125 while (offset < size) {
126 if (!file->Read(data + offset, size - offset, &read) || read == 0)
127 return false;
128 offset += read;
129 }
130 return true;
131 }
132
WriteAll(const std::unique_ptr<FileInterface> & file,const uint8_t * data,size_t size)133 bool WriteAll(const std::unique_ptr<FileInterface>& file,
134 const uint8_t* data,
135 size_t size) {
136 size_t offset = 0, written;
137 while (offset < size) {
138 if (!file->Write(data + offset, size - offset, &written) || written == 0)
139 return false;
140 offset += written;
141 }
142 return true;
143 }
144
IsOverlapping(const char * old_filename,const char * new_filename,const std::vector<ex_t> & old_extents,const std::vector<ex_t> & new_extents)145 bool IsOverlapping(const char* old_filename,
146 const char* new_filename,
147 const std::vector<ex_t>& old_extents,
148 const std::vector<ex_t>& new_extents) {
149 struct stat old_stat, new_stat;
150 if (stat(new_filename, &new_stat) == -1) {
151 if (errno == ENOENT)
152 return false;
153 fprintf(stderr, "Error stat the new file %s: %s\n", new_filename,
154 strerror(errno));
155 return true;
156 }
157 if (stat(old_filename, &old_stat) == -1) {
158 fprintf(stderr, "Error stat the old file %s: %s\n", old_filename,
159 strerror(errno));
160 return true;
161 }
162
163 if (old_stat.st_dev != new_stat.st_dev || old_stat.st_ino != new_stat.st_ino)
164 return false;
165
166 if (old_extents.empty() && new_extents.empty())
167 return true;
168
169 for (ex_t old_ex : old_extents)
170 for (ex_t new_ex : new_extents)
171 if (static_cast<uint64_t>(old_ex.off) < new_ex.off + new_ex.len &&
172 static_cast<uint64_t>(new_ex.off) < old_ex.off + old_ex.len)
173 return true;
174
175 return false;
176 }
177
178 // Patch |old_filename| with |patch_filename| and save it to |new_filename|.
179 // |old_extents| and |new_extents| are comma-separated lists of "offset:length"
180 // extents of |old_filename| and |new_filename|.
181 // Returns 0 on success, 1 on I/O error and 2 on data error.
bspatch(const char * old_filename,const char * new_filename,const char * patch_filename,const char * old_extents,const char * new_extents)182 int bspatch(const char* old_filename,
183 const char* new_filename,
184 const char* patch_filename,
185 const char* old_extents,
186 const char* new_extents) {
187 std::unique_ptr<FileInterface> patch_file =
188 File::FOpen(patch_filename, O_RDONLY);
189 if (!patch_file) {
190 fprintf(stderr, "Error opening the patch file %s: %s\n", patch_filename,
191 strerror(errno));
192 return 1;
193 }
194 uint64_t patch_size;
195 patch_file->GetSize(&patch_size);
196 std::vector<uint8_t> patch(patch_size);
197 if (!ReadAll(patch_file, patch.data(), patch_size)) {
198 fprintf(stderr, "Error reading the patch file %s: %s\n", patch_filename,
199 strerror(errno));
200 return 1;
201 }
202 patch_file.reset();
203
204 return bspatch(old_filename, new_filename, patch.data(), patch_size,
205 old_extents, new_extents);
206 }
207
208 // Patch |old_filename| with |patch_data| and save it to |new_filename|.
209 // |old_extents| and |new_extents| are comma-separated lists of "offset:length"
210 // extents of |old_filename| and |new_filename|.
211 // Returns 0 on success, 1 on I/O error and 2 on data error.
bspatch(const char * old_filename,const char * new_filename,const uint8_t * patch_data,size_t patch_size,const char * old_extents,const char * new_extents)212 int bspatch(const char* old_filename,
213 const char* new_filename,
214 const uint8_t* patch_data,
215 size_t patch_size,
216 const char* old_extents,
217 const char* new_extents) {
218 int using_extents = (old_extents != NULL || new_extents != NULL);
219
220 // Open input file for reading.
221 std::unique_ptr<FileInterface> old_file = File::FOpen(old_filename, O_RDONLY);
222 if (!old_file) {
223 fprintf(stderr, "Error opening the old file %s: %s\n", old_filename,
224 strerror(errno));
225 return 1;
226 }
227
228 std::vector<ex_t> parsed_old_extents;
229 if (using_extents) {
230 if (!ParseExtentStr(old_extents, &parsed_old_extents)) {
231 fprintf(stderr, "Error parsing the old extents\n");
232 return 2;
233 }
234 old_file.reset(new ExtentsFile(std::move(old_file), parsed_old_extents));
235 }
236
237 // Open output file for writing.
238 std::unique_ptr<FileInterface> new_file =
239 File::FOpen(new_filename, O_CREAT | O_WRONLY);
240 if (!new_file) {
241 fprintf(stderr, "Error opening the new file %s: %s\n", new_filename,
242 strerror(errno));
243 return 1;
244 }
245
246 std::vector<ex_t> parsed_new_extents;
247 if (using_extents) {
248 if (!ParseExtentStr(new_extents, &parsed_new_extents)) {
249 fprintf(stderr, "Error parsing the new extents\n");
250 return 2;
251 }
252 new_file.reset(new ExtentsFile(std::move(new_file), parsed_new_extents));
253 }
254
255 if (IsOverlapping(old_filename, new_filename, parsed_old_extents,
256 parsed_new_extents)) {
257 // New and old file is overlapping, we can not stream output to new file,
258 // cache it in a buffer and write to the file at the end.
259 uint64_t newsize = ParseInt64(patch_data + 24);
260 new_file.reset(new BufferFile(std::move(new_file), newsize));
261 }
262
263 return bspatch(old_file, new_file, patch_data, patch_size);
264 }
265
266 // Patch |old_data| with |patch_data| and save it by calling sink function.
267 // Returns 0 on success, 1 on I/O error and 2 on data error.
bspatch(const uint8_t * old_data,size_t old_size,const uint8_t * patch_data,size_t patch_size,const sink_func & sink)268 int bspatch(const uint8_t* old_data,
269 size_t old_size,
270 const uint8_t* patch_data,
271 size_t patch_size,
272 const sink_func& sink) {
273 std::unique_ptr<FileInterface> old_file(new MemoryFile(old_data, old_size));
274 std::unique_ptr<FileInterface> new_file(new SinkFile(sink));
275
276 return bspatch(old_file, new_file, patch_data, patch_size);
277 }
278
279 // Patch |old_file| with |patch_data| and save it to |new_file|.
280 // Returns 0 on success, 1 on I/O error and 2 on data error.
bspatch(const std::unique_ptr<FileInterface> & old_file,const std::unique_ptr<FileInterface> & new_file,const uint8_t * patch_data,size_t patch_size)281 int bspatch(const std::unique_ptr<FileInterface>& old_file,
282 const std::unique_ptr<FileInterface>& new_file,
283 const uint8_t* patch_data,
284 size_t patch_size) {
285 int bz2err;
286 u_char buf[8];
287 off_t ctrl[3];
288
289 // File format:
290 // 0 8 "BSDIFF40"
291 // 8 8 X
292 // 16 8 Y
293 // 24 8 sizeof(new_filename)
294 // 32 X bzip2(control block)
295 // 32+X Y bzip2(diff block)
296 // 32+X+Y ??? bzip2(extra block)
297 // with control block a set of triples (x,y,z) meaning "add x bytes
298 // from oldfile to x bytes from the diff block; copy y bytes from the
299 // extra block; seek forwards in oldfile by z bytes".
300
301 // Check for appropriate magic.
302 if (memcmp(patch_data, "BSDIFF40", 8) != 0) {
303 fprintf(stderr, "Not a bsdiff patch.\n");
304 return 2;
305 }
306
307 // Read lengths from header.
308 uint64_t oldsize, newsize;
309 int64_t ctrl_len = ParseInt64(patch_data + 8);
310 int64_t data_len = ParseInt64(patch_data + 16);
311 int64_t signed_newsize = ParseInt64(patch_data + 24);
312 newsize = signed_newsize;
313 if ((ctrl_len < 0) || (data_len < 0) || (signed_newsize < 0) ||
314 (32 + ctrl_len + data_len > static_cast<int64_t>(patch_size))) {
315 fprintf(stderr, "Corrupt patch.\n");
316 return 2;
317 }
318
319 bz_stream cstream;
320 cstream.next_in = (char*)patch_data + 32;
321 cstream.avail_in = ctrl_len;
322 cstream.bzalloc = nullptr;
323 cstream.bzfree = nullptr;
324 cstream.opaque = nullptr;
325 if ((bz2err = BZ2_bzDecompressInit(&cstream, 0, 0)) != BZ_OK) {
326 fprintf(stderr, "Failed to bzinit control stream (%d)\n", bz2err);
327 return 2;
328 }
329
330 bz_stream dstream;
331 dstream.next_in = (char*)patch_data + 32 + ctrl_len;
332 dstream.avail_in = data_len;
333 dstream.bzalloc = nullptr;
334 dstream.bzfree = nullptr;
335 dstream.opaque = nullptr;
336 if ((bz2err = BZ2_bzDecompressInit(&dstream, 0, 0)) != BZ_OK) {
337 fprintf(stderr, "Failed to bzinit diff stream (%d)\n", bz2err);
338 return 2;
339 }
340
341 bz_stream estream;
342 estream.next_in = (char*)patch_data + 32 + ctrl_len + data_len;
343 estream.avail_in = patch_size - (32 + ctrl_len + data_len);
344 estream.bzalloc = nullptr;
345 estream.bzfree = nullptr;
346 estream.opaque = nullptr;
347 if ((bz2err = BZ2_bzDecompressInit(&estream, 0, 0)) != BZ_OK) {
348 fprintf(stderr, "Failed to bzinit extra stream (%d)\n", bz2err);
349 return 2;
350 }
351
352 uint64_t old_file_pos = 0;
353
354 if (!old_file->GetSize(&oldsize)) {
355 fprintf(stderr, "Cannot obtain the size of old file.\n");
356 return 1;
357 }
358
359 // The oldpos can be negative, but the new pos is only incremented linearly.
360 int64_t oldpos = 0;
361 uint64_t newpos = 0;
362 std::vector<uint8_t> old_buf(1024 * 1024), new_buf(1024 * 1024);
363 while (newpos < newsize) {
364 int64_t i;
365 // Read control data.
366 for (i = 0; i <= 2; i++) {
367 if (!ReadBZ2(&cstream, buf, 8)) {
368 fprintf(stderr, "Failed to read control stream.\n");
369 return 2;
370 }
371 ctrl[i] = ParseInt64(buf);
372 }
373
374 // Sanity-check.
375 if (ctrl[0] < 0 || ctrl[1] < 0) {
376 fprintf(stderr, "Corrupt patch.\n");
377 return 2;
378 }
379
380 // Sanity-check.
381 if (newpos + ctrl[0] > newsize) {
382 fprintf(stderr, "Corrupt patch.\n");
383 return 2;
384 }
385
386 int ret = 0;
387 // Add old data to diff string. It is enough to fseek once, at
388 // the beginning of the sequence, to avoid unnecessary overhead.
389 if ((i = oldpos) < 0) {
390 // Write diff block directly to new file without adding old data,
391 // because we will skip part where |oldpos| < 0.
392 ret = ReadBZ2AndWriteAll(new_file, &dstream, -i, new_buf.data(),
393 new_buf.size());
394 if (ret)
395 return ret;
396 i = 0;
397 }
398
399 // We just checked that |i| is not negative.
400 if (static_cast<uint64_t>(i) != old_file_pos && !old_file->Seek(i)) {
401 fprintf(stderr, "Error seeking input file to offset %" PRId64 ": %s\n", i,
402 strerror(errno));
403 return 1;
404 }
405 if ((old_file_pos = oldpos + ctrl[0]) > oldsize)
406 old_file_pos = oldsize;
407
408 size_t chunk_size = old_file_pos - i;
409 while (chunk_size > 0) {
410 size_t read_bytes;
411 size_t bytes_to_read = std::min(chunk_size, old_buf.size());
412 if (!old_file->Read(old_buf.data(), bytes_to_read, &read_bytes)) {
413 perror("Error reading from input file");
414 return 1;
415 }
416 if (!read_bytes) {
417 fprintf(stderr, "EOF reached while reading from input file.\n");
418 return 2;
419 }
420 // Read same amount of bytes from diff block
421 if (!ReadBZ2(&dstream, new_buf.data(), read_bytes)) {
422 fprintf(stderr, "Failed to read diff stream.\n");
423 return 2;
424 }
425 // new_buf already has data from diff block, adds old data to it.
426 for (size_t k = 0; k < read_bytes; k++)
427 new_buf[k] += old_buf[k];
428 if (!WriteAll(new_file, new_buf.data(), read_bytes)) {
429 perror("Error writing to new file");
430 return 1;
431 }
432 chunk_size -= read_bytes;
433 }
434
435 // Adjust pointers.
436 newpos += ctrl[0];
437 oldpos += ctrl[0];
438
439 if (oldpos > static_cast<int64_t>(oldsize)) {
440 // Write diff block directly to new file without adding old data,
441 // because we skipped part where |oldpos| > oldsize.
442 ret = ReadBZ2AndWriteAll(new_file, &dstream, oldpos - oldsize,
443 new_buf.data(), new_buf.size());
444 if (ret)
445 return ret;
446 }
447
448 // Sanity-check.
449 if (newpos + ctrl[1] > newsize) {
450 fprintf(stderr, "Corrupt patch.\n");
451 return 2;
452 }
453
454 // Read extra block.
455 ret = ReadBZ2AndWriteAll(new_file, &estream, ctrl[1], new_buf.data(),
456 new_buf.size());
457 if (ret)
458 return ret;
459
460 // Adjust pointers.
461 newpos += ctrl[1];
462 oldpos += ctrl[2];
463 }
464
465 // Close input file.
466 old_file->Close();
467
468 // Clean up the bzip2 reads.
469 BZ2_bzDecompressEnd(&cstream);
470 BZ2_bzDecompressEnd(&dstream);
471 BZ2_bzDecompressEnd(&estream);
472
473 if (!new_file->Close()) {
474 perror("Error closing new file");
475 return 1;
476 }
477
478 return 0;
479 }
480
481 } // namespace bsdiff
482