• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-
2  * Copyright 2003-2005 Colin Percival
3  * All rights reserved
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted providing that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #if 0
28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $");
29 #endif
30 
31 #include "bspatch.h"
32 
33 #include <bzlib.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <inttypes.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <sys/stat.h>
41 #include <sys/types.h>
42 
43 #include <algorithm>
44 #include <memory>
45 #include <limits>
46 #include <vector>
47 
48 #include "buffer_file.h"
49 #include "extents.h"
50 #include "extents_file.h"
51 #include "file.h"
52 #include "file_interface.h"
53 #include "memory_file.h"
54 #include "sink_file.h"
55 
56 namespace {
57 
ParseInt64(const u_char * buf)58 int64_t ParseInt64(const u_char* buf) {
59   int64_t y;
60 
61   y = buf[7] & 0x7F;
62   y = y * 256;
63   y += buf[6];
64   y = y * 256;
65   y += buf[5];
66   y = y * 256;
67   y += buf[4];
68   y = y * 256;
69   y += buf[3];
70   y = y * 256;
71   y += buf[2];
72   y = y * 256;
73   y += buf[1];
74   y = y * 256;
75   y += buf[0];
76 
77   if (buf[7] & 0x80)
78     y = -y;
79 
80   return y;
81 }
82 
ReadBZ2(bz_stream * stream,uint8_t * data,size_t size)83 bool ReadBZ2(bz_stream* stream, uint8_t* data, size_t size) {
84   stream->next_out = (char*)data;
85   while (size > 0) {
86     unsigned int read_size = std::min(
87         static_cast<size_t>(std::numeric_limits<unsigned int>::max()), size);
88     stream->avail_out = read_size;
89     int bz2err = BZ2_bzDecompress(stream);
90     if (bz2err != BZ_OK && bz2err != BZ_STREAM_END)
91       return false;
92     size -= read_size - stream->avail_out;
93   }
94   return true;
95 }
96 
ReadBZ2AndWriteAll(const std::unique_ptr<bsdiff::FileInterface> & file,bz_stream * stream,size_t size,uint8_t * buf,size_t buf_size)97 int ReadBZ2AndWriteAll(const std::unique_ptr<bsdiff::FileInterface>& file,
98                        bz_stream* stream,
99                        size_t size,
100                        uint8_t* buf,
101                        size_t buf_size) {
102   while (size > 0) {
103     size_t bytes_to_read = std::min(size, buf_size);
104     if (!ReadBZ2(stream, buf, bytes_to_read)) {
105       fprintf(stderr, "Failed to read bzip stream.\n");
106       return 2;
107     }
108     if (!WriteAll(file, buf, bytes_to_read)) {
109       perror("WriteAll() failed");
110       return 1;
111     }
112     size -= bytes_to_read;
113   }
114   return 0;
115 }
116 
117 }  // namespace
118 
119 namespace bsdiff {
120 
ReadAll(const std::unique_ptr<FileInterface> & file,uint8_t * data,size_t size)121 bool ReadAll(const std::unique_ptr<FileInterface>& file,
122              uint8_t* data,
123              size_t size) {
124   size_t offset = 0, read;
125   while (offset < size) {
126     if (!file->Read(data + offset, size - offset, &read) || read == 0)
127       return false;
128     offset += read;
129   }
130   return true;
131 }
132 
WriteAll(const std::unique_ptr<FileInterface> & file,const uint8_t * data,size_t size)133 bool WriteAll(const std::unique_ptr<FileInterface>& file,
134               const uint8_t* data,
135               size_t size) {
136   size_t offset = 0, written;
137   while (offset < size) {
138     if (!file->Write(data + offset, size - offset, &written) || written == 0)
139       return false;
140     offset += written;
141   }
142   return true;
143 }
144 
IsOverlapping(const char * old_filename,const char * new_filename,const std::vector<ex_t> & old_extents,const std::vector<ex_t> & new_extents)145 bool IsOverlapping(const char* old_filename,
146                    const char* new_filename,
147                    const std::vector<ex_t>& old_extents,
148                    const std::vector<ex_t>& new_extents) {
149   struct stat old_stat, new_stat;
150   if (stat(new_filename, &new_stat) == -1) {
151     if (errno == ENOENT)
152       return false;
153     fprintf(stderr, "Error stat the new file %s: %s\n", new_filename,
154             strerror(errno));
155     return true;
156   }
157   if (stat(old_filename, &old_stat) == -1) {
158     fprintf(stderr, "Error stat the old file %s: %s\n", old_filename,
159             strerror(errno));
160     return true;
161   }
162 
163   if (old_stat.st_dev != new_stat.st_dev || old_stat.st_ino != new_stat.st_ino)
164     return false;
165 
166   if (old_extents.empty() && new_extents.empty())
167     return true;
168 
169   for (ex_t old_ex : old_extents)
170     for (ex_t new_ex : new_extents)
171       if (static_cast<uint64_t>(old_ex.off) < new_ex.off + new_ex.len &&
172           static_cast<uint64_t>(new_ex.off) < old_ex.off + old_ex.len)
173         return true;
174 
175   return false;
176 }
177 
178 // Patch |old_filename| with |patch_filename| and save it to |new_filename|.
179 // |old_extents| and |new_extents| are comma-separated lists of "offset:length"
180 // extents of |old_filename| and |new_filename|.
181 // Returns 0 on success, 1 on I/O error and 2 on data error.
bspatch(const char * old_filename,const char * new_filename,const char * patch_filename,const char * old_extents,const char * new_extents)182 int bspatch(const char* old_filename,
183             const char* new_filename,
184             const char* patch_filename,
185             const char* old_extents,
186             const char* new_extents) {
187   std::unique_ptr<FileInterface> patch_file =
188       File::FOpen(patch_filename, O_RDONLY);
189   if (!patch_file) {
190     fprintf(stderr, "Error opening the patch file %s: %s\n", patch_filename,
191             strerror(errno));
192     return 1;
193   }
194   uint64_t patch_size;
195   patch_file->GetSize(&patch_size);
196   std::vector<uint8_t> patch(patch_size);
197   if (!ReadAll(patch_file, patch.data(), patch_size)) {
198     fprintf(stderr, "Error reading the patch file %s: %s\n", patch_filename,
199             strerror(errno));
200     return 1;
201   }
202   patch_file.reset();
203 
204   return bspatch(old_filename, new_filename, patch.data(), patch_size,
205                  old_extents, new_extents);
206 }
207 
208 // Patch |old_filename| with |patch_data| and save it to |new_filename|.
209 // |old_extents| and |new_extents| are comma-separated lists of "offset:length"
210 // extents of |old_filename| and |new_filename|.
211 // Returns 0 on success, 1 on I/O error and 2 on data error.
bspatch(const char * old_filename,const char * new_filename,const uint8_t * patch_data,size_t patch_size,const char * old_extents,const char * new_extents)212 int bspatch(const char* old_filename,
213             const char* new_filename,
214             const uint8_t* patch_data,
215             size_t patch_size,
216             const char* old_extents,
217             const char* new_extents) {
218   int using_extents = (old_extents != NULL || new_extents != NULL);
219 
220   // Open input file for reading.
221   std::unique_ptr<FileInterface> old_file = File::FOpen(old_filename, O_RDONLY);
222   if (!old_file) {
223     fprintf(stderr, "Error opening the old file %s: %s\n", old_filename,
224             strerror(errno));
225     return 1;
226   }
227 
228   std::vector<ex_t> parsed_old_extents;
229   if (using_extents) {
230     if (!ParseExtentStr(old_extents, &parsed_old_extents)) {
231       fprintf(stderr, "Error parsing the old extents\n");
232       return 2;
233     }
234     old_file.reset(new ExtentsFile(std::move(old_file), parsed_old_extents));
235   }
236 
237   // Open output file for writing.
238   std::unique_ptr<FileInterface> new_file =
239       File::FOpen(new_filename, O_CREAT | O_WRONLY);
240   if (!new_file) {
241     fprintf(stderr, "Error opening the new file %s: %s\n", new_filename,
242             strerror(errno));
243     return 1;
244   }
245 
246   std::vector<ex_t> parsed_new_extents;
247   if (using_extents) {
248     if (!ParseExtentStr(new_extents, &parsed_new_extents)) {
249       fprintf(stderr, "Error parsing the new extents\n");
250       return 2;
251     }
252     new_file.reset(new ExtentsFile(std::move(new_file), parsed_new_extents));
253   }
254 
255   if (IsOverlapping(old_filename, new_filename, parsed_old_extents,
256                     parsed_new_extents)) {
257     // New and old file is overlapping, we can not stream output to new file,
258     // cache it in a buffer and write to the file at the end.
259     uint64_t newsize = ParseInt64(patch_data + 24);
260     new_file.reset(new BufferFile(std::move(new_file), newsize));
261   }
262 
263   return bspatch(old_file, new_file, patch_data, patch_size);
264 }
265 
266 // Patch |old_data| with |patch_data| and save it by calling sink function.
267 // Returns 0 on success, 1 on I/O error and 2 on data error.
bspatch(const uint8_t * old_data,size_t old_size,const uint8_t * patch_data,size_t patch_size,const sink_func & sink)268 int bspatch(const uint8_t* old_data,
269             size_t old_size,
270             const uint8_t* patch_data,
271             size_t patch_size,
272             const sink_func& sink) {
273   std::unique_ptr<FileInterface> old_file(new MemoryFile(old_data, old_size));
274   std::unique_ptr<FileInterface> new_file(new SinkFile(sink));
275 
276   return bspatch(old_file, new_file, patch_data, patch_size);
277 }
278 
279 // Patch |old_file| with |patch_data| and save it to |new_file|.
280 // Returns 0 on success, 1 on I/O error and 2 on data error.
bspatch(const std::unique_ptr<FileInterface> & old_file,const std::unique_ptr<FileInterface> & new_file,const uint8_t * patch_data,size_t patch_size)281 int bspatch(const std::unique_ptr<FileInterface>& old_file,
282             const std::unique_ptr<FileInterface>& new_file,
283             const uint8_t* patch_data,
284             size_t patch_size) {
285   int bz2err;
286   u_char buf[8];
287   off_t ctrl[3];
288 
289   // File format:
290   //   0       8    "BSDIFF40"
291   //   8       8    X
292   //   16      8    Y
293   //   24      8    sizeof(new_filename)
294   //   32      X    bzip2(control block)
295   //   32+X    Y    bzip2(diff block)
296   //   32+X+Y  ???  bzip2(extra block)
297   // with control block a set of triples (x,y,z) meaning "add x bytes
298   // from oldfile to x bytes from the diff block; copy y bytes from the
299   // extra block; seek forwards in oldfile by z bytes".
300 
301   // Check for appropriate magic.
302   if (memcmp(patch_data, "BSDIFF40", 8) != 0) {
303     fprintf(stderr, "Not a bsdiff patch.\n");
304     return 2;
305   }
306 
307   // Read lengths from header.
308   uint64_t oldsize, newsize;
309   int64_t ctrl_len = ParseInt64(patch_data + 8);
310   int64_t data_len = ParseInt64(patch_data + 16);
311   int64_t signed_newsize = ParseInt64(patch_data + 24);
312   newsize = signed_newsize;
313   if ((ctrl_len < 0) || (data_len < 0) || (signed_newsize < 0) ||
314       (32 + ctrl_len + data_len > static_cast<int64_t>(patch_size))) {
315     fprintf(stderr, "Corrupt patch.\n");
316     return 2;
317   }
318 
319   bz_stream cstream;
320   cstream.next_in = (char*)patch_data + 32;
321   cstream.avail_in = ctrl_len;
322   cstream.bzalloc = nullptr;
323   cstream.bzfree = nullptr;
324   cstream.opaque = nullptr;
325   if ((bz2err = BZ2_bzDecompressInit(&cstream, 0, 0)) != BZ_OK) {
326     fprintf(stderr, "Failed to bzinit control stream (%d)\n", bz2err);
327     return 2;
328   }
329 
330   bz_stream dstream;
331   dstream.next_in = (char*)patch_data + 32 + ctrl_len;
332   dstream.avail_in = data_len;
333   dstream.bzalloc = nullptr;
334   dstream.bzfree = nullptr;
335   dstream.opaque = nullptr;
336   if ((bz2err = BZ2_bzDecompressInit(&dstream, 0, 0)) != BZ_OK) {
337     fprintf(stderr, "Failed to bzinit diff stream (%d)\n", bz2err);
338     return 2;
339   }
340 
341   bz_stream estream;
342   estream.next_in = (char*)patch_data + 32 + ctrl_len + data_len;
343   estream.avail_in = patch_size - (32 + ctrl_len + data_len);
344   estream.bzalloc = nullptr;
345   estream.bzfree = nullptr;
346   estream.opaque = nullptr;
347   if ((bz2err = BZ2_bzDecompressInit(&estream, 0, 0)) != BZ_OK) {
348     fprintf(stderr, "Failed to bzinit extra stream (%d)\n", bz2err);
349     return 2;
350   }
351 
352   uint64_t old_file_pos = 0;
353 
354   if (!old_file->GetSize(&oldsize)) {
355     fprintf(stderr, "Cannot obtain the size of old file.\n");
356     return 1;
357   }
358 
359   // The oldpos can be negative, but the new pos is only incremented linearly.
360   int64_t oldpos = 0;
361   uint64_t newpos = 0;
362   std::vector<uint8_t> old_buf(1024 * 1024), new_buf(1024 * 1024);
363   while (newpos < newsize) {
364     int64_t i;
365     // Read control data.
366     for (i = 0; i <= 2; i++) {
367       if (!ReadBZ2(&cstream, buf, 8)) {
368         fprintf(stderr, "Failed to read control stream.\n");
369         return 2;
370       }
371       ctrl[i] = ParseInt64(buf);
372     }
373 
374     // Sanity-check.
375     if (ctrl[0] < 0 || ctrl[1] < 0) {
376       fprintf(stderr, "Corrupt patch.\n");
377       return 2;
378     }
379 
380     // Sanity-check.
381     if (newpos + ctrl[0] > newsize) {
382       fprintf(stderr, "Corrupt patch.\n");
383       return 2;
384     }
385 
386     int ret = 0;
387     // Add old data to diff string. It is enough to fseek once, at
388     // the beginning of the sequence, to avoid unnecessary overhead.
389     if ((i = oldpos) < 0) {
390       // Write diff block directly to new file without adding old data,
391       // because we will skip part where |oldpos| < 0.
392       ret = ReadBZ2AndWriteAll(new_file, &dstream, -i, new_buf.data(),
393                                new_buf.size());
394       if (ret)
395         return ret;
396       i = 0;
397     }
398 
399     // We just checked that |i| is not negative.
400     if (static_cast<uint64_t>(i) != old_file_pos && !old_file->Seek(i)) {
401       fprintf(stderr, "Error seeking input file to offset %" PRId64 ": %s\n", i,
402               strerror(errno));
403       return 1;
404     }
405     if ((old_file_pos = oldpos + ctrl[0]) > oldsize)
406       old_file_pos = oldsize;
407 
408     size_t chunk_size = old_file_pos - i;
409     while (chunk_size > 0) {
410       size_t read_bytes;
411       size_t bytes_to_read = std::min(chunk_size, old_buf.size());
412       if (!old_file->Read(old_buf.data(), bytes_to_read, &read_bytes)) {
413         perror("Error reading from input file");
414         return 1;
415       }
416       if (!read_bytes) {
417         fprintf(stderr, "EOF reached while reading from input file.\n");
418         return 2;
419       }
420       // Read same amount of bytes from diff block
421       if (!ReadBZ2(&dstream, new_buf.data(), read_bytes)) {
422         fprintf(stderr, "Failed to read diff stream.\n");
423         return 2;
424       }
425       // new_buf already has data from diff block, adds old data to it.
426       for (size_t k = 0; k < read_bytes; k++)
427         new_buf[k] += old_buf[k];
428       if (!WriteAll(new_file, new_buf.data(), read_bytes)) {
429         perror("Error writing to new file");
430         return 1;
431       }
432       chunk_size -= read_bytes;
433     }
434 
435     // Adjust pointers.
436     newpos += ctrl[0];
437     oldpos += ctrl[0];
438 
439     if (oldpos > static_cast<int64_t>(oldsize)) {
440       // Write diff block directly to new file without adding old data,
441       // because we skipped part where |oldpos| > oldsize.
442       ret = ReadBZ2AndWriteAll(new_file, &dstream, oldpos - oldsize,
443                                new_buf.data(), new_buf.size());
444       if (ret)
445         return ret;
446     }
447 
448     // Sanity-check.
449     if (newpos + ctrl[1] > newsize) {
450       fprintf(stderr, "Corrupt patch.\n");
451       return 2;
452     }
453 
454     // Read extra block.
455     ret = ReadBZ2AndWriteAll(new_file, &estream, ctrl[1], new_buf.data(),
456                              new_buf.size());
457     if (ret)
458       return ret;
459 
460     // Adjust pointers.
461     newpos += ctrl[1];
462     oldpos += ctrl[2];
463   }
464 
465   // Close input file.
466   old_file->Close();
467 
468   // Clean up the bzip2 reads.
469   BZ2_bzDecompressEnd(&cstream);
470   BZ2_bzDecompressEnd(&dstream);
471   BZ2_bzDecompressEnd(&estream);
472 
473   if (!new_file->Close()) {
474     perror("Error closing new file");
475     return 1;
476   }
477 
478   return 0;
479 }
480 
481 }  // namespace bsdiff
482