1 /*-
2 * Copyright 2003-2005 Colin Percival
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #if 0
28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $");
29 #endif
30
31 #include "bspatch.h"
32
33 #include <bzlib.h>
34 #include <err.h>
35 #include <errno.h>
36 #include <fcntl.h>
37 #include <inttypes.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43
44 #include <algorithm>
45 #include <memory>
46 #include <limits>
47 #include <vector>
48
49 #include "extents.h"
50 #include "extents_file.h"
51 #include "file.h"
52 #include "file_interface.h"
53 #include "memory_file.h"
54
55 namespace {
56
ParseInt64(u_char * buf)57 int64_t ParseInt64(u_char* buf) {
58 int64_t y;
59
60 y = buf[7] & 0x7F;
61 y = y * 256;
62 y += buf[6];
63 y = y * 256;
64 y += buf[5];
65 y = y * 256;
66 y += buf[4];
67 y = y * 256;
68 y += buf[3];
69 y = y * 256;
70 y += buf[2];
71 y = y * 256;
72 y += buf[1];
73 y = y * 256;
74 y += buf[0];
75
76 if (buf[7] & 0x80)
77 y = -y;
78
79 return y;
80 }
81
ReadBZ2(BZFILE * pfbz2,uint8_t * data,size_t size)82 bool ReadBZ2(BZFILE* pfbz2, uint8_t* data, size_t size) {
83 int bz2err;
84 size_t lenread = BZ2_bzRead(&bz2err, pfbz2, data, size);
85 if (lenread < size || (bz2err != BZ_OK && bz2err != BZ_STREAM_END))
86 return false;
87 return true;
88 }
89
ReadBZ2AndWriteAll(const std::unique_ptr<bsdiff::FileInterface> & file,BZFILE * pfbz2,size_t size,uint8_t * buf,size_t buf_size)90 bool ReadBZ2AndWriteAll(const std::unique_ptr<bsdiff::FileInterface>& file,
91 BZFILE* pfbz2,
92 size_t size,
93 uint8_t* buf,
94 size_t buf_size) {
95 while (size > 0) {
96 size_t bytes_to_read = std::min(size, buf_size);
97 if (!ReadBZ2(pfbz2, buf, bytes_to_read))
98 return false;
99 if (!WriteAll(file, buf, bytes_to_read))
100 return false;
101 size -= bytes_to_read;
102 }
103 return true;
104 }
105
106 } // namespace
107
108 namespace bsdiff {
109
WriteAll(const std::unique_ptr<FileInterface> & file,const uint8_t * data,size_t size)110 bool WriteAll(const std::unique_ptr<FileInterface>& file,
111 const uint8_t* data,
112 size_t size) {
113 size_t offset = 0, written;
114 while (offset < size) {
115 if (!file->Write(data + offset, size - offset, &written) || written == 0)
116 return false;
117 offset += written;
118 }
119 return true;
120 }
121
IsOverlapping(const char * old_filename,const char * new_filename,const std::vector<ex_t> & old_extents,const std::vector<ex_t> & new_extents)122 bool IsOverlapping(const char* old_filename,
123 const char* new_filename,
124 const std::vector<ex_t>& old_extents,
125 const std::vector<ex_t>& new_extents) {
126 struct stat old_stat, new_stat;
127 if (stat(new_filename, &new_stat) == -1) {
128 if (errno == ENOENT)
129 return false;
130 err(1, "Error stat the new filename %s", new_filename);
131 }
132 if (stat(old_filename, &old_stat) == -1)
133 err(1, "Error stat the old filename %s", old_filename);
134
135 if (old_stat.st_dev != new_stat.st_dev || old_stat.st_ino != new_stat.st_ino)
136 return false;
137
138 if (old_extents.empty() && new_extents.empty())
139 return true;
140
141 for (ex_t old_ex : old_extents)
142 for (ex_t new_ex : new_extents)
143 if (static_cast<uint64_t>(old_ex.off) < new_ex.off + new_ex.len &&
144 static_cast<uint64_t>(new_ex.off) < old_ex.off + old_ex.len)
145 return true;
146
147 return false;
148 }
149
bspatch(const char * old_filename,const char * new_filename,const char * patch_filename,const char * old_extents,const char * new_extents)150 int bspatch(
151 const char* old_filename, const char* new_filename,
152 const char* patch_filename,
153 const char* old_extents, const char* new_extents) {
154 FILE* f, *cpf, *dpf, *epf;
155 BZFILE* cpfbz2, *dpfbz2, *epfbz2;
156 int bz2err;
157 ssize_t bzctrllen, bzdatalen;
158 u_char header[32], buf[8];
159 off_t ctrl[3];
160
161 int using_extents = (old_extents != NULL || new_extents != NULL);
162
163 // Open patch file.
164 if ((f = fopen(patch_filename, "r")) == NULL)
165 err(1, "fopen(%s)", patch_filename);
166
167 // File format:
168 // 0 8 "BSDIFF40"
169 // 8 8 X
170 // 16 8 Y
171 // 24 8 sizeof(new_filename)
172 // 32 X bzip2(control block)
173 // 32+X Y bzip2(diff block)
174 // 32+X+Y ??? bzip2(extra block)
175 // with control block a set of triples (x,y,z) meaning "add x bytes
176 // from oldfile to x bytes from the diff block; copy y bytes from the
177 // extra block; seek forwards in oldfile by z bytes".
178
179 // Read header.
180 if (fread(header, 1, 32, f) < 32) {
181 if (feof(f))
182 errx(1, "Corrupt patch\n");
183 err(1, "fread(%s)", patch_filename);
184 }
185
186 // Check for appropriate magic.
187 if (memcmp(header, "BSDIFF40", 8) != 0)
188 errx(1, "Corrupt patch\n");
189
190 // Read lengths from header.
191 uint64_t oldsize, newsize;
192 bzctrllen = ParseInt64(header + 8);
193 bzdatalen = ParseInt64(header + 16);
194 int64_t signed_newsize = ParseInt64(header + 24);
195 newsize = signed_newsize;
196 if ((bzctrllen < 0) || (bzdatalen < 0) || (signed_newsize < 0))
197 errx(1, "Corrupt patch\n");
198
199 // Close patch file and re-open it via libbzip2 at the right places.
200 if (fclose(f))
201 err(1, "fclose(%s)", patch_filename);
202 if ((cpf = fopen(patch_filename, "r")) == NULL)
203 err(1, "fopen(%s)", patch_filename);
204 if (fseek(cpf, 32, SEEK_SET))
205 err(1, "fseeko(%s, %lld)", patch_filename, (long long)32);
206 if ((cpfbz2 = BZ2_bzReadOpen(&bz2err, cpf, 0, 0, NULL, 0)) == NULL)
207 errx(1, "BZ2_bzReadOpen, bz2err = %d", bz2err);
208 if ((dpf = fopen(patch_filename, "r")) == NULL)
209 err(1, "fopen(%s)", patch_filename);
210 if (fseek(dpf, 32 + bzctrllen, SEEK_SET))
211 err(1, "fseeko(%s, %lld)", patch_filename, (long long)(32 + bzctrllen));
212 if ((dpfbz2 = BZ2_bzReadOpen(&bz2err, dpf, 0, 0, NULL, 0)) == NULL)
213 errx(1, "BZ2_bzReadOpen, bz2err = %d", bz2err);
214 if ((epf = fopen(patch_filename, "r")) == NULL)
215 err(1, "fopen(%s)", patch_filename);
216 if (fseek(epf, 32 + bzctrllen + bzdatalen, SEEK_SET))
217 err(1, "fseeko(%s, %lld)", patch_filename,
218 (long long)(32 + bzctrllen + bzdatalen));
219 if ((epfbz2 = BZ2_bzReadOpen(&bz2err, epf, 0, 0, NULL, 0)) == NULL)
220 errx(1, "BZ2_bzReadOpen, bz2err = %d", bz2err);
221
222 // Open input file for reading.
223 std::unique_ptr<FileInterface> old_file = File::FOpen(old_filename, O_RDONLY);
224 if (!old_file)
225 err(1, "Error opening the old filename");
226
227 std::vector<ex_t> parsed_old_extents;
228 if (using_extents) {
229 if (!ParseExtentStr(old_extents, &parsed_old_extents))
230 errx(1, "Error parsing the old extents");
231 old_file.reset(new ExtentsFile(std::move(old_file), parsed_old_extents));
232 }
233
234 if (!old_file->GetSize(&oldsize))
235 err(1, "cannot obtain the size of %s", old_filename);
236 uint64_t old_file_pos = 0;
237
238 // Open output file for writing.
239 std::unique_ptr<FileInterface> new_file =
240 File::FOpen(new_filename, O_CREAT | O_WRONLY);
241 if (!new_file)
242 err(1, "Error opening the new filename %s", new_filename);
243
244 std::vector<ex_t> parsed_new_extents;
245 if (using_extents) {
246 if (!ParseExtentStr(new_extents, &parsed_new_extents))
247 errx(1, "Error parsing the new extents");
248 new_file.reset(new ExtentsFile(std::move(new_file), parsed_new_extents));
249 }
250
251 if (IsOverlapping(old_filename, new_filename, parsed_old_extents,
252 parsed_new_extents)) {
253 // New and old file is overlapping, we can not stream output to new file,
254 // cache it in the memory and write to the file at the end.
255 new_file.reset(new MemoryFile(std::move(new_file), newsize));
256 }
257
258 // The oldpos can be negative, but the new pos is only incremented linearly.
259 int64_t oldpos = 0;
260 uint64_t newpos = 0;
261 std::vector<uint8_t> old_buf(1024 * 1024), new_buf(1024 * 1024);
262 while (newpos < newsize) {
263 int64_t i;
264 // Read control data.
265 for (i = 0; i <= 2; i++) {
266 if (!ReadBZ2(cpfbz2, buf, 8))
267 errx(1, "Corrupt patch\n");
268 ctrl[i] = ParseInt64(buf);
269 }
270
271 // Sanity-check.
272 if (ctrl[0] < 0 || ctrl[1] < 0)
273 errx(1, "Corrupt patch\n");
274
275 // Sanity-check.
276 if (newpos + ctrl[0] > newsize)
277 errx(1, "Corrupt patch\n");
278
279 // Add old data to diff string. It is enough to fseek once, at
280 // the beginning of the sequence, to avoid unnecessary overhead.
281 if ((i = oldpos) < 0) {
282 // Write diff block directly to new file without adding old data,
283 // because we will skip part where |oldpos| < 0.
284 if (!ReadBZ2AndWriteAll(new_file, dpfbz2, -i, new_buf.data(),
285 new_buf.size()))
286 errx(1, "Error during ReadBZ2AndWriteAll()");
287
288 i = 0;
289 }
290
291 // We just checked that |i| is not negative.
292 if (static_cast<uint64_t>(i) != old_file_pos && !old_file->Seek(i))
293 err(1, "error seeking input file to offset %" PRId64, i);
294 if ((old_file_pos = oldpos + ctrl[0]) > oldsize)
295 old_file_pos = oldsize;
296
297 size_t chunk_size = old_file_pos - i;
298 while (chunk_size > 0) {
299 size_t read_bytes;
300 size_t bytes_to_read = std::min(chunk_size, old_buf.size());
301 if (!old_file->Read(old_buf.data(), bytes_to_read, &read_bytes))
302 err(1, "error reading from input file");
303 if (!read_bytes)
304 errx(1, "EOF reached while reading from input file");
305 // Read same amount of bytes from diff block
306 if (!ReadBZ2(dpfbz2, new_buf.data(), read_bytes))
307 errx(1, "Corrupt patch\n");
308 // new_buf already has data from diff block, adds old data to it.
309 for (size_t k = 0; k < read_bytes; k++)
310 new_buf[k] += old_buf[k];
311 if (!WriteAll(new_file, new_buf.data(), read_bytes))
312 err(1, "Error writing new file.");
313 chunk_size -= read_bytes;
314 }
315
316 // Adjust pointers.
317 newpos += ctrl[0];
318 oldpos += ctrl[0];
319
320 if (oldpos > static_cast<int64_t>(oldsize)) {
321 // Write diff block directly to new file without adding old data,
322 // because we skipped part where |oldpos| > oldsize.
323 if (!ReadBZ2AndWriteAll(new_file, dpfbz2, oldpos - oldsize,
324 new_buf.data(), new_buf.size()))
325 errx(1, "Error during ReadBZ2AndWriteAll()");
326 }
327
328 // Sanity-check.
329 if (newpos + ctrl[1] > newsize)
330 errx(1, "Corrupt patch\n");
331
332 // Read extra block.
333 if (!ReadBZ2AndWriteAll(new_file, epfbz2, ctrl[1], new_buf.data(),
334 new_buf.size()))
335 errx(1, "Error during ReadBZ2AndWriteAll()");
336
337 // Adjust pointers.
338 newpos += ctrl[1];
339 oldpos += ctrl[2];
340 }
341
342 // Close input file.
343 old_file->Close();
344
345 // Clean up the bzip2 reads.
346 BZ2_bzReadClose(&bz2err, cpfbz2);
347 BZ2_bzReadClose(&bz2err, dpfbz2);
348 BZ2_bzReadClose(&bz2err, epfbz2);
349 if (fclose(cpf) || fclose(dpf) || fclose(epf))
350 err(1, "fclose(%s)", patch_filename);
351
352 if (!new_file->Close())
353 err(1, "Error closing new file %s", new_filename);
354
355 return 0;
356 }
357
358 } // namespace bsdiff
359