• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-
2  * Copyright 2003-2005 Colin Percival
3  * All rights reserved
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted providing that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #if 0
28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 cperciva Exp $");
29 #endif
30 
31 #include <sys/types.h>
32 
33 #include <bzlib.h>
34 #include <err.h>
35 #include <fcntl.h>
36 #include <lzma.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <zlib.h>
42 
43 #if defined(__APPLE__)
44 #include <libkern/OSByteOrder.h>
45 #define htole64(x) OSSwapHostToLittleInt64(x)
46 #elif defined(__linux__)
47 #include <endian.h>
48 #elif defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
49 #define htole64(x) (x)
50 #else
51 #error Provide htole64 for this platform
52 #endif
53 
54 #include "chrome/installer/mac/third_party/bsdiff/sha1_adapter.h"
55 
56 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
57 
split(off_t * I,off_t * V,off_t start,off_t len,off_t h)58 static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h)
59 {
60 	off_t i,j,k,x,tmp,jj,kk;
61 
62 	if(len<16) {
63 		for(k=start;k<start+len;k+=j) {
64 			j=1;x=V[I[k]+h];
65 			for(i=1;k+i<start+len;i++) {
66 				if(V[I[k+i]+h]<x) {
67 					x=V[I[k+i]+h];
68 					j=0;
69 				};
70 				if(V[I[k+i]+h]==x) {
71 					tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
72 					j++;
73 				};
74 			};
75 			for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
76 			if(j==1) I[k]=-1;
77 		};
78 		return;
79 	};
80 
81 	x=V[I[start+len/2]+h];
82 	jj=0;kk=0;
83 	for(i=start;i<start+len;i++) {
84 		if(V[I[i]+h]<x) jj++;
85 		if(V[I[i]+h]==x) kk++;
86 	};
87 	jj+=start;kk+=jj;
88 
89 	i=start;j=0;k=0;
90 	while(i<jj) {
91 		if(V[I[i]+h]<x) {
92 			i++;
93 		} else if(V[I[i]+h]==x) {
94 			tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
95 			j++;
96 		} else {
97 			tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
98 			k++;
99 		};
100 	};
101 
102 	while(jj+j<kk) {
103 		if(V[I[jj+j]+h]==x) {
104 			j++;
105 		} else {
106 			tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
107 			k++;
108 		};
109 	};
110 
111 	if(jj>start) split(I,V,start,jj-start,h);
112 
113 	for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
114 	if(jj==kk-1) I[jj]=-1;
115 
116 	if(start+len>kk) split(I,V,kk,start+len-kk,h);
117 }
118 
qsufsort(off_t * I,off_t * V,u_char * old,off_t oldsize)119 static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize)
120 {
121 	off_t buckets[256];
122 	off_t i,h,len;
123 
124 	for(i=0;i<256;i++) buckets[i]=0;
125 	for(i=0;i<oldsize;i++) buckets[old[i]]++;
126 	for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
127 	for(i=255;i>0;i--) buckets[i]=buckets[i-1];
128 	buckets[0]=0;
129 
130 	for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
131 	I[0]=oldsize;
132 	for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
133 	V[oldsize]=0;
134 	for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
135 	I[0]=-1;
136 
137 	for(h=1;I[0]!=-(oldsize+1);h+=h) {
138 		len=0;
139 		for(i=0;i<oldsize+1;) {
140 			if(I[i]<0) {
141 				len-=I[i];
142 				i-=I[i];
143 			} else {
144 				if(len) I[i-len]=-len;
145 				len=V[I[i]]+1-i;
146 				split(I,V,i,len,h);
147 				i+=len;
148 				len=0;
149 			};
150 		};
151 		if(len) I[i-len]=-len;
152 	};
153 
154 	for(i=0;i<oldsize+1;i++) I[V[i]]=i;
155 }
156 
matchlen(u_char * old,off_t oldsize,u_char * new,off_t newsize)157 static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize)
158 {
159 	off_t i;
160 
161 	for(i=0;(i<oldsize)&&(i<newsize);i++)
162 		if(old[i]!=new[i]) break;
163 
164 	return i;
165 }
166 
search(off_t * I,u_char * old,off_t oldsize,u_char * new,off_t newsize,off_t st,off_t en,off_t * pos)167 static off_t search(off_t *I,u_char *old,off_t oldsize,
168 		u_char *new,off_t newsize,off_t st,off_t en,off_t *pos)
169 {
170 	off_t x,y;
171 
172 	if(en-st<2) {
173 		x=matchlen(old+I[st],oldsize-I[st],new,newsize);
174 		y=matchlen(old+I[en],oldsize-I[en],new,newsize);
175 
176 		if(x>y) {
177 			*pos=I[st];
178 			return x;
179 		} else {
180 			*pos=I[en];
181 			return y;
182 		}
183 	};
184 
185 	x=st+(en-st)/2;
186 	if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) {
187 		return search(I,old,oldsize,new,newsize,x,en,pos);
188 	} else {
189 		return search(I,old,oldsize,new,newsize,st,x,pos);
190 	};
191 }
192 
offtout(off_t x,u_char * buf)193 static inline void offtout(off_t x,u_char *buf)
194 {
195 	*((off_t*)buf) = htole64(x);
196 }
197 
198 /* zlib provides compress2, which deflates to deflate (zlib) format. This is
199  * unfortunately distinct from gzip format in that the headers wrapping the
200  * decompressed data are different. gbspatch reads gzip-compressed data using
201  * the file-oriented gzread interface, which only supports gzip format.
202  * compress2gzip is identical to zlib's compress2 except that it produces gzip
203  * output compatible with gzread. This change is achieved by calling
204  * deflateInit2 instead of deflateInit and specifying 31 for windowBits;
205  * numbers greater than 15 cause the addition of a gzip wrapper. */
compress2gzip(Bytef * dest,uLongf * destLen,const Bytef * source,uLong sourceLen,int level)206 static int compress2gzip(Bytef *dest, uLongf *destLen,
207                          const Bytef *source, uLong sourceLen, int level)
208 {
209 	z_stream stream;
210 	int err;
211 
212 	stream.next_in = (Bytef*)source;
213 	stream.avail_in = (uInt)sourceLen;
214 
215 	stream.next_out = dest;
216 	stream.avail_out = (uInt)*destLen;
217 	if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
218 
219 	stream.zalloc = (alloc_func)0;
220 	stream.zfree = (free_func)0;
221 	stream.opaque = (voidpf)0;
222 
223 	err = deflateInit2(&stream,
224 	                   level, Z_DEFLATED, 31, 8, Z_DEFAULT_STRATEGY);
225 	if (err != Z_OK) return err;
226 
227 	err = deflate(&stream, Z_FINISH);
228 	if (err != Z_STREAM_END) {
229 		deflateEnd(&stream);
230 		return err == Z_OK ? Z_BUF_ERROR : err;
231 	}
232 	*destLen = stream.total_out;
233 
234 	err = deflateEnd(&stream);
235 	return err;
236 }
237 
238 /* Recompress buf of size buf_len using bzip2 or gzip. The smallest version is
239  * used. The original uncompressed variant may be the smallest. Returns a
240  * number identifying the encoding, 1 for uncompressed, 2 for bzip2, 3 for
241  * gzip, and 4 for xz/lzma2. If the original uncompressed variant is not
242  * smallest, it is freed. The caller must free any buf after this function
243  * returns. */
make_small(u_char ** buf,off_t * buf_len)244 static char make_small(u_char **buf, off_t *buf_len)
245 {
246 	u_char *source = *buf;
247 	off_t source_len = *buf_len;
248 	u_char *bz2, *gz, *lzma;
249 	unsigned int bz2_len;
250 	size_t gz_len, lzma_len, lzma_pos;
251 	int bz2_err, gz_err;
252 	lzma_ret lzma_err;
253 	lzma_check lzma_ck;
254 	char smallest;
255 
256 	smallest = 1;
257 
258 	bz2_len = source_len + 1;
259 	bz2 = malloc(bz2_len);
260 	bz2_err = BZ2_bzBuffToBuffCompress((char*)bz2, &bz2_len, (char*)source,
261 	                                   source_len, 9, 0, 0);
262 	if (bz2_err == BZ_OK) {
263 		if (bz2_len < *buf_len) {
264 			smallest = 2;
265 			*buf = bz2;
266 			*buf_len = bz2_len;
267 		} else {
268 			free(bz2);
269 			bz2 = NULL;
270 		}
271 	} else if (bz2_err == BZ_OUTBUFF_FULL) {
272 		free(bz2);
273 		bz2 = NULL;
274 	} else {
275 		errx(1, "BZ2_bzBuffToBuffCompress: %d", bz2_err);
276 	}
277 
278 	gz_len = source_len + 1;
279 	gz = malloc(gz_len);
280 	gz_err = compress2gzip(gz, &gz_len, source, source_len, 9);
281 	if (gz_err == Z_OK) {
282 		if (gz_len < *buf_len) {
283 			smallest = 3;
284 			*buf = gz;
285 			*buf_len = gz_len;
286 		} else {
287 			free(gz);
288 			gz = NULL;
289 		}
290 	} else if (gz_err == Z_BUF_ERROR) {
291 		free(gz);
292 		gz = NULL;
293 	} else {
294 		errx(1, "compress2gzip: %d", gz_err);
295 	}
296 
297 	lzma_len = source_len + 1;
298 	lzma = malloc(lzma_len);
299 	lzma_pos = 0;
300 
301 	/* Equivalent to the options used by xz -9 -e. */
302 	lzma_ck = LZMA_CHECK_CRC64;
303 	if (!lzma_check_is_supported(lzma_ck))
304 		lzma_ck = LZMA_CHECK_CRC32;
305 	lzma_err = lzma_easy_buffer_encode(9 | LZMA_PRESET_EXTREME,
306 	                                   lzma_ck, NULL,
307 	                                   source, source_len,
308 	                                   lzma, &lzma_pos, lzma_len);
309 	if (lzma_err == LZMA_OK) {
310 		if (lzma_pos < *buf_len) {
311 			smallest = 4;
312 			*buf = lzma;
313 			*buf_len = lzma_pos;
314 		} else {
315 			free(lzma);
316 			lzma = NULL;
317 		}
318 	} else if (lzma_err == LZMA_BUF_ERROR) {
319 		free(lzma);
320 		lzma = NULL;
321 	} else {
322 		errx(1, "lzma_easy_buffer_encode: %d", lzma_err);
323 	}
324 
325 	if (smallest != 1) {
326 		free(source);
327 	}
328 
329 	return smallest;
330 }
331 
main(int argc,char * argv[])332 int main(int argc,char *argv[])
333 {
334 	int fd;
335 	u_char *old,*new;
336 	off_t oldsize,newsize;
337 	off_t *I,*V;
338 	off_t scan,pos,len;
339 	off_t lastscan,lastpos,lastoffset;
340 	off_t oldscore,scsc;
341 	off_t s,Sf,lenf,Sb,lenb;
342 	off_t overlap,Ss,lens;
343 	off_t i;
344 	off_t cblen, dblen, eblen;
345 	u_char *cb, *db, *eb;
346 	u_char header[96];
347 	FILE * pf;
348 
349 	if(argc!=4) errx(1,"usage: %s oldfile newfile patchfile",argv[0]);
350 
351 	/* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
352 		that we never try to malloc(0) and get a NULL pointer */
353 	if(((fd=open(argv[1],O_RDONLY,0))<0) ||
354 		((oldsize=lseek(fd,0,SEEK_END))==-1) ||
355 		((old=malloc(oldsize+1))==NULL) ||
356 		(lseek(fd,0,SEEK_SET)!=0) ||
357 		(read(fd,old,oldsize)!=oldsize) ||
358 		(close(fd)==-1)) err(1,"%s",argv[1]);
359 
360 	if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) ||
361 		((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL);
362 
363 	qsufsort(I,V,old,oldsize);
364 
365 	free(V);
366 
367 	/* Allocate newsize+1 bytes instead of newsize bytes to ensure
368 		that we never try to malloc(0) and get a NULL pointer */
369 	if(((fd=open(argv[2],O_RDONLY,0))<0) ||
370 		((newsize=lseek(fd,0,SEEK_END))==-1) ||
371 		((new=malloc(newsize+1))==NULL) ||
372 		(lseek(fd,0,SEEK_SET)!=0) ||
373 		(read(fd,new,newsize)!=newsize) ||
374 		(close(fd)==-1)) err(1,"%s",argv[2]);
375 
376 	if(((cb=malloc(newsize+1))==NULL) ||
377 		((db=malloc(newsize+1))==NULL) ||
378 		((eb=malloc(newsize+1))==NULL)) err(1,NULL);
379 	cblen=0;
380 	dblen=0;
381 	eblen=0;
382 
383 	/* Create the patch file */
384 	if ((pf = fopen(argv[3], "wb")) == NULL)
385 		err(1, "%s", argv[3]);
386 
387 	/* File format:
388 		0	8	"BSDIFF4G"
389 		8	8	length of compressed control block (x)
390 		16	8	length of compressed diff block (y)
391 		24	8	length of compressed extra block (z)
392 		32	8	length of old file
393 		40	8	length of new file
394 		48	20	SHA1 of old file
395 		68	20	SHA1 of new file
396 		88	1	encoding of control block
397 		89	1	encoding of diff block
398 		90	1	encoding of extra block
399 		91	5	unused
400 		96	x	compressed control block
401 		96+x	y	compressed diff block
402 		96+x+y	z	compressed extra block
403 	Encodings are 1 (uncompressed), 2 (bzip2), 3 (gzip), and 4 (xz/lzma2).
404 	*/
405 
406 	memset(header, 0, sizeof(header));
407 	if (fwrite(header, sizeof(header), 1, pf) != 1)
408 		err(1, "fwrite(%s)", argv[3]);
409 	memcpy(header, "BSDIFF4G", 8);
410 	offtout(oldsize, header + 32);
411 	offtout(newsize, header + 40);
412 	SHA1(old, oldsize, header + 48);
413 	SHA1(new, newsize, header + 68);
414 
415 	/* Compute the differences */
416 	scan=0;len=0;
417 	lastscan=0;lastpos=0;lastoffset=0;
418 	while(scan<newsize) {
419 		oldscore=0;
420 
421 		for(scsc=scan+=len;scan<newsize;scan++) {
422 			len=search(I,old,oldsize,new+scan,newsize-scan,
423 					0,oldsize,&pos);
424 
425 			for(;scsc<scan+len;scsc++)
426 			if((scsc+lastoffset<oldsize) &&
427 				(old[scsc+lastoffset] == new[scsc]))
428 				oldscore++;
429 
430 			if(((len==oldscore) && (len!=0)) ||
431 				(len>oldscore+8)) break;
432 
433 			if((scan+lastoffset<oldsize) &&
434 				(old[scan+lastoffset] == new[scan]))
435 				oldscore--;
436 		};
437 
438 		if((len!=oldscore) || (scan==newsize)) {
439 			s=0;Sf=0;lenf=0;
440 			for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
441 				if(old[lastpos+i]==new[lastscan+i]) s++;
442 				i++;
443 				if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
444 			};
445 
446 			lenb=0;
447 			if(scan<newsize) {
448 				s=0;Sb=0;
449 				for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
450 					if(old[pos-i]==new[scan-i]) s++;
451 					if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
452 				};
453 			};
454 
455 			if(lastscan+lenf>scan-lenb) {
456 				overlap=(lastscan+lenf)-(scan-lenb);
457 				s=0;Ss=0;lens=0;
458 				for(i=0;i<overlap;i++) {
459 					if(new[lastscan+lenf-overlap+i]==
460 					   old[lastpos+lenf-overlap+i]) s++;
461 					if(new[scan-lenb+i]==
462 					   old[pos-lenb+i]) s--;
463 					if(s>Ss) { Ss=s; lens=i+1; };
464 				};
465 
466 				lenf+=lens-overlap;
467 				lenb-=lens;
468 			};
469 
470 			for(i=0;i<lenf;i++)
471 				db[dblen+i]=new[lastscan+i]-old[lastpos+i];
472 			for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
473 				eb[eblen+i]=new[lastscan+lenf+i];
474 
475 			dblen+=lenf;
476 			eblen+=(scan-lenb)-(lastscan+lenf);
477 
478 			offtout(lenf, cb + cblen);
479 			cblen += 8;
480 
481 			offtout((scan - lenb) - (lastscan + lenf), cb + cblen);
482 			cblen += 8;
483 
484 			offtout((pos - lenb) - (lastpos + lenf), cb + cblen);
485 			cblen += 8;
486 
487 			lastscan=scan-lenb;
488 			lastpos=pos-lenb;
489 			lastoffset=pos-scan;
490 		};
491 	};
492 
493 	header[88] = make_small(&cb, &cblen);
494 	header[89] = make_small(&db, &dblen);
495 	header[90] = make_small(&eb, &eblen);
496 
497 	if (fwrite(cb, 1, cblen, pf) != cblen)
498 		err(1, "fwrite");
499 	if (fwrite(db, 1, dblen, pf) != dblen)
500 		err(1, "fwrite");
501 	if (fwrite(eb, 1, eblen, pf) != eblen)
502 		err(1, "fwrite");
503 
504 	offtout(cblen, header + 8);
505 	offtout(dblen, header + 16);
506 	offtout(eblen, header + 24);
507 
508 	/* Seek to the beginning, write the header, and close the file */
509 	if (fseeko(pf, 0, SEEK_SET))
510 		err(1, "fseeko");
511 	if (fwrite(header, sizeof(header), 1, pf) != 1)
512 		err(1, "fwrite(%s)", argv[3]);
513 	if (fclose(pf))
514 		err(1, "fclose");
515 
516 	/* Free the memory we used */
517 	free(cb);
518 	free(db);
519 	free(eb);
520 	free(I);
521 	free(old);
522 	free(new);
523 
524 	return 0;
525 }
526