1#!/bin/bash -p 2 3# Copyright (c) 2011 The Chromium Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7# usage: dirdiffer.sh old_dir new_dir patch_dir 8# 9# dirdiffer creates a patch directory patch_dir that represents the difference 10# between old_dir and new_dir. patch_dir can be used with dirpatcher to 11# recreate new_dir given old_dir. 12# 13# dirdiffer operates recursively, properly handling ordinary files, symbolic 14# links, and directories, as they are found in new_dir. Symbolic links and 15# directories are always replicated as-is in patch_dir. Ordinary files will 16# be represented at the appropriate location in patch_dir by one of the 17# following: 18# 19# - a binary diff prepared by goobsdiff that can transform the file at the 20# same position in old_dir to the version in new_dir, but only when such a 21# file already exists in old_dir and is an ordinary file. These files are 22# given a "$gbs" suffix. 23# - a bzip2-compressed copy of the new file from new_dir; in patch_dir, the 24# new file will have a "$bz2" suffix. 25# - a gzip-compressed copy of the new file from new_dir; in patch_dir, the 26# new file will have a "$gz" suffix. 27# - an xz/lzma2-compressed copy of the new file from new_dir; in patch_dir, 28# the new file will have an "$xz" suffix. 29# - an uncompressed copy of the new file from new_dir; in patch_dir, the 30# new file will have a "$raw" suffix. 31# 32# The unconventional suffixes are used because they aren't likely to occur in 33# filenames. 34# 35# Of these options, the smallest possible representation is chosen. Note that 36# goobsdiff itself will also compress various sections of a binary diff with 37# bzip2, gzip, or xz/lzma2, or leave them uncompressed, according to which is 38# smallest. The approach of choosing the smallest possible representation is 39# time-consuming but given the choices of compressors results in an overall 40# size reduction of about 3%-5% relative to using bzip2 as the only 41# compressor; bzip2 is generally more effective for these data sets than gzip, 42# and xz/lzma2 more effective than bzip2. 43# 44# For large input files, goobsdiff is also very time-consuming and 45# memory-intensive. The overall "wall clock time" spent preparing a patch_dir 46# representing the differences between Google Chrome's 6.0.422.0 and 6.0.427.0 47# versioned directories from successive weekly dev channel releases on a 48# 2.53GHz dual-core 4GB MacBook Pro is 3 minutes. Reconstructing new_dir with 49# dirpatcher is much quicker; in the above configuration, only 10 seconds are 50# needed for reconstruction. 51# 52# After creating a full patch_dir structure, but before returning, dirpatcher 53# is invoked to attempt to recreate new_dir in a temporary location given 54# old_dir and patch_dir. The recreated new_dir is then compared against the 55# original new_dir as a verification step. Should verification fail, dirdiffer 56# exits with a nonzero status, and patch_dir should not be used. 57# 58# Environment variables: 59# DIRDIFFER_EXCLUDE 60# When an entry in new_dir matches this regular expression, it will not be 61# included in patch_dir. All prospective paths in new_dir will be matched 62# against this regular expression, including directories. If a directory 63# matches this pattern, dirdiffer will also ignore the directory's contents. 64# DIRDIFFER_NO_DIFF 65# When an entry in new_dir matches this regular expression, it will not be 66# represented in patch_dir by a $gbs file prepared by goobsdiff. It will only 67# appear as a $bz2, $gz, or $raw file. Only files in new_dir, not 68# directories, will be matched against this regular expression. 69# 70# Exit codes: 71# 0 OK 72# 1 Unknown failure 73# 2 Incorrect number of parameters 74# 3 Input directories do not exist or are not directories 75# 4 Output directory already exists 76# 5 Parent of output directory does not exist or is not a directory 77# 6 An input or output directories contains another 78# 7 Could not create output directory 79# 8 File already exists in output directory 80# 9 Found an irregular file (non-directory, file, or symbolic link) in input 81# 10 Could not create symbolic link 82# 11 File copy failed 83# 12 bzip2 compression failed 84# 13 gzip compression failed 85# 14 xz/lzma2 compression failed 86# 15 Patch creation failed 87# 16 Verification failed 88# 17 Could not set mode (permissions) 89# 18 Could not set modification time 90# 19 Invalid regular expression (irregular expression?) 91 92set -eu 93 94# Environment sanitization. Set a known-safe PATH. Clear environment variables 95# that might impact the interpreter's operation. The |bash -p| invocation 96# on the #! line takes the bite out of BASH_ENV, ENV, and SHELLOPTS (among 97# other features), but clearing them here ensures that they won't impact any 98# shell scripts used as utility programs. SHELLOPTS is read-only and can't be 99# unset, only unexported. 100export PATH="/usr/bin:/bin:/usr/sbin:/sbin" 101unset BASH_ENV CDPATH ENV GLOBIGNORE IFS POSIXLY_CORRECT 102export -n SHELLOPTS 103 104shopt -s dotglob nullglob 105 106# find_tool looks for an executable file named |tool_name|: 107# - in the same directory as this script, 108# - if this script is located in a Chromium source tree, at the expected 109# Release output location in the Mac out directory, 110# - as above, but in the Debug output location 111# If found in any of the above locations, the script's path is output. 112# Otherwise, this function outputs |tool_name| as a fallback, allowing it to 113# be found (or not) by an ordinary ${PATH} search. 114find_tool() { 115 local tool_name="${1}" 116 117 local script_dir 118 script_dir="$(dirname "${0}")" 119 120 local tool="${script_dir}/${tool_name}" 121 if [[ -f "${tool}" ]] && [[ -x "${tool}" ]]; then 122 echo "${tool}" 123 return 124 fi 125 126 local script_dir_phys 127 script_dir_phys="$(cd "${script_dir}" && pwd -P)" 128 if [[ "${script_dir_phys}" =~ ^(.*)/src/chrome/installer/mac$ ]]; then 129 tool="${BASH_REMATCH[1]}/src/out/Release/${tool_name}" 130 if [[ -f "${tool}" ]] && [[ -x "${tool}" ]]; then 131 echo "${tool}" 132 return 133 fi 134 135 tool="${BASH_REMATCH[1]}/src/out/Debug/${tool_name}" 136 if [[ -f "${tool}" ]] && [[ -x "${tool}" ]]; then 137 echo "${tool}" 138 return 139 fi 140 fi 141 142 echo "${tool_name}" 143} 144 145ME="$(basename "${0}")" 146readonly ME 147DIRPATCHER="$(dirname "${0}")/dirpatcher.sh" 148readonly DIRPATCHER 149GOOBSDIFF="$(find_tool goobsdiff)" 150readonly GOOBSDIFF 151readonly BZIP2="bzip2" 152readonly GZIP="gzip" 153XZ="$(find_tool xz)" 154readonly XZ 155readonly GBS_SUFFIX='$gbs' 156readonly BZ2_SUFFIX='$bz2' 157readonly GZ_SUFFIX='$gz' 158readonly XZ_SUFFIX='$xz' 159readonly PLAIN_SUFFIX='$raw' 160 161# Workaround for http://code.google.com/p/chromium/issues/detail?id=83180#c3 162# In bash 4.0, "declare VAR" no longer initializes VAR if not already set. 163: ${DIRDIFFER_EXCLUDE:=} 164: ${DIRDIFFER_NO_DIFF:=} 165 166err() { 167 local error="${1}" 168 169 echo "${ME}: ${error}" >& 2 170} 171 172declare -a g_cleanup g_verify_exclude 173cleanup() { 174 local status=${?} 175 176 trap - EXIT 177 trap '' HUP INT QUIT TERM 178 179 if [[ ${status} -ge 128 ]]; then 180 err "Caught signal $((${status} - 128))" 181 fi 182 183 if [[ "${#g_cleanup[@]}" -gt 0 ]]; then 184 rm -rf "${g_cleanup[@]}" 185 fi 186 187 exit ${status} 188} 189 190copy_mode_and_time() { 191 local new_file="${1}" 192 local patch_file="${2}" 193 194 local mode 195 mode="$(stat "-f%OMp%OLp" "${new_file}")" 196 if ! chmod -h "${mode}" "${patch_file}"; then 197 exit 17 198 fi 199 200 if ! [[ -L "${patch_file}" ]]; then 201 # Symbolic link modification times can't be copied because there's no 202 # shell tool that provides direct access to lutimes. Instead, the symbolic 203 # link was created with rsync, which already copied the timestamp with 204 # lutimes. 205 if ! touch -r "${new_file}" "${patch_file}"; then 206 exit 18 207 fi 208 fi 209} 210 211file_size() { 212 local file="${1}" 213 214 stat -f %z "${file}" 215} 216 217make_patch_file() { 218 local old_file="${1}" 219 local new_file="${2}" 220 local patch_file="${3}" 221 222 local uncompressed_file="${patch_file}${PLAIN_SUFFIX}" 223 if ! cp "${new_file}" "${uncompressed_file}"; then 224 exit 11 225 fi 226 local uncompressed_size 227 uncompressed_size="$(file_size "${new_file}")" 228 229 local keep_file="${uncompressed_file}" 230 local keep_size="${uncompressed_size}" 231 232 local bz2_file="${patch_file}${BZ2_SUFFIX}" 233 if [[ -e "${bz2_file}" ]]; then 234 err "${bz2_file} already exists" 235 exit 8 236 fi 237 if ! "${BZIP2}" -9c < "${new_file}" > "${bz2_file}"; then 238 err "couldn't compress ${new_file} to ${bz2_file} with ${BZIP2}" 239 exit 12 240 fi 241 local bz2_size 242 bz2_size="$(file_size "${bz2_file}")" 243 244 if [[ "${bz2_size}" -ge "${keep_size}" ]]; then 245 rm -f "${bz2_file}" 246 else 247 rm -f "${keep_file}" 248 keep_file="${bz2_file}" 249 keep_size="${bz2_size}" 250 fi 251 252 local gz_file="${patch_file}${GZ_SUFFIX}" 253 if [[ -e "${gz_file}" ]]; then 254 err "${gz_file} already exists" 255 exit 8 256 fi 257 if ! "${GZIP}" -9cn < "${new_file}" > "${gz_file}"; then 258 err "couldn't compress ${new_file} to ${gz_file} with ${GZIP}" 259 exit 13 260 fi 261 local gz_size 262 gz_size="$(file_size "${gz_file}")" 263 264 if [[ "${gz_size}" -ge "${keep_size}" ]]; then 265 rm -f "${gz_file}" 266 else 267 rm -f "${keep_file}" 268 keep_file="${gz_file}" 269 keep_size="${gz_size}" 270 fi 271 272 local xz_flags=("-c") 273 274 # If the file looks like a Mach-O file, including a universal/fat file, add 275 # the x86 BCJ filter, which results in slightly better compression of x86 276 # and x86_64 executables. Mach-O files might contain other architectures, 277 # but they aren't currently expected in Chrome. 278 local file_output 279 file_output="$(file "${new_file}" 2> /dev/null || true)" 280 if [[ "${file_output}" =~ Mach-O ]]; then 281 xz_flags+=("--x86") 282 fi 283 284 # Use an lzma2 encoder. This is equivalent to xz -9 -e, but allows filters 285 # to precede the compressor. 286 xz_flags+=("--lzma2=preset=9e") 287 288 local xz_file="${patch_file}${XZ_SUFFIX}" 289 if [[ -e "${xz_file}" ]]; then 290 err "${xz_file} already exists" 291 exit 8 292 fi 293 if ! "${XZ}" "${xz_flags[@]}" < "${new_file}" > "${xz_file}"; then 294 err "couldn't compress ${new_file} to ${xz_file} with ${XZ}" 295 exit 14 296 fi 297 local xz_size 298 xz_size="$(file_size "${xz_file}")" 299 300 if [[ "${xz_size}" -ge "${keep_size}" ]]; then 301 rm -f "${xz_file}" 302 else 303 rm -f "${keep_file}" 304 keep_file="${xz_file}" 305 keep_size="${xz_size}" 306 fi 307 308 if [[ -f "${old_file}" ]] && ! [[ -L "${old_file}" ]] && 309 ! [[ "${new_file}" =~ ${DIRDIFFER_NO_DIFF} ]]; then 310 local gbs_file="${patch_file}${GBS_SUFFIX}" 311 if [[ -e "${gbs_file}" ]]; then 312 err "${gbs_file} already exists" 313 exit 8 314 fi 315 if ! "${GOOBSDIFF}" "${old_file}" "${new_file}" "${gbs_file}"; then 316 err "couldn't create ${gbs_file} by comparing ${old_file} to ${new_file}" 317 exit 15 318 fi 319 local gbs_size 320 gbs_size="$(file_size "${gbs_file}")" 321 322 if [[ "${gbs_size}" -ge "${keep_size}" ]]; then 323 rm -f "${gbs_file}" 324 else 325 rm -f "${keep_file}" 326 keep_file="${gbs_file}" 327 keep_size="${gbs_size}" 328 fi 329 fi 330 331 copy_mode_and_time "${new_file}" "${keep_file}" 332} 333 334make_patch_symlink() { 335 local new_file="${1}" 336 local patch_file="${2}" 337 338 # local target 339 # target="$(readlink "${new_file}")" 340 # ln -s "${target}" "${patch_file}" 341 342 # Use rsync instead of the above, as it's the only way to preserve the 343 # timestamp of a symbolic link using shell tools. 344 if ! rsync -lt "${new_file}" "${patch_file}"; then 345 exit 10 346 fi 347 348 copy_mode_and_time "${new_file}" "${patch_file}" 349} 350 351make_patch_dir() { 352 local old_dir="${1}" 353 local new_dir="${2}" 354 local patch_dir="${3}" 355 356 if ! mkdir "${patch_dir}"; then 357 exit 7 358 fi 359 360 local new_file 361 for new_file in "${new_dir}/"*; do 362 local file="${new_file:${#new_dir} + 1}" 363 local old_file="${old_dir}/${file}" 364 local patch_file="${patch_dir}/${file}" 365 366 if [[ "${new_file}" =~ ${DIRDIFFER_EXCLUDE} ]]; then 367 g_verify_exclude+=("${new_file}") 368 continue 369 fi 370 371 if [[ -e "${patch_file}" ]]; then 372 err "${patch_file} already exists" 373 exit 8 374 fi 375 376 if [[ -L "${new_file}" ]]; then 377 make_patch_symlink "${new_file}" "${patch_file}" 378 elif [[ -d "${new_file}" ]]; then 379 make_patch_dir "${old_file}" "${new_file}" "${patch_file}" 380 elif [[ ! -f "${new_file}" ]]; then 381 err "can't handle irregular file ${new_file}" 382 exit 9 383 else 384 make_patch_file "${old_file}" "${new_file}" "${patch_file}" 385 fi 386 done 387 388 copy_mode_and_time "${new_dir}" "${patch_dir}" 389} 390 391verify_patch_dir() { 392 local old_dir="${1}" 393 local new_dir="${2}" 394 local patch_dir="${3}" 395 396 local verify_temp_dir verify_dir 397 verify_temp_dir="$(mktemp -d -t "${ME}")" 398 g_cleanup+=("${verify_temp_dir}") 399 verify_dir="${verify_temp_dir}/patched" 400 401 if ! "${DIRPATCHER}" "${old_dir}" "${patch_dir}" "${verify_dir}"; then 402 err "patch application for verification failed" 403 exit 16 404 fi 405 406 # rsync will print a line for any file, directory, or symbolic link that 407 # differs or exists only in one directory. As used here, it correctly 408 # considers link targets, file contents, permissions, and timestamps. 409 local rsync_command=(rsync -clprt --delete --out-format=%n \ 410 "${new_dir}/" "${verify_dir}") 411 if [[ ${#g_verify_exclude[@]} -gt 0 ]]; then 412 local exclude 413 for exclude in "${g_verify_exclude[@]}"; do 414 # ${g_verify_exclude[@]} contains paths in ${new_dir}. Strip off 415 # ${new_dir} from the beginning of each, but leave a leading "/" so that 416 # rsync treats them as being at the root of the "transfer." 417 rsync_command+=("--exclude" "${exclude:${#new_dir}}") 418 done 419 fi 420 421 local rsync_output 422 if ! rsync_output="$("${rsync_command[@]}")"; then 423 err "rsync for verification failed" 424 exit 16 425 fi 426 427 rm -rf "${verify_temp_dir}" 428 unset g_cleanup[${#g_cleanup[@]}] 429 430 if [[ -n "${rsync_output}" ]]; then 431 err "verification failed" 432 exit 16 433 fi 434} 435 436# shell_safe_path ensures that |path| is safe to pass to tools as a 437# command-line argument. If the first character in |path| is "-", "./" is 438# prepended to it. The possibly-modified |path| is output. 439shell_safe_path() { 440 local path="${1}" 441 if [[ "${path:0:1}" = "-" ]]; then 442 echo "./${path}" 443 else 444 echo "${path}" 445 fi 446} 447 448dirs_contained() { 449 local dir1="${1}/" 450 local dir2="${2}/" 451 452 if [[ "${dir1:0:${#dir2}}" = "${dir2}" ]] || 453 [[ "${dir2:0:${#dir1}}" = "${dir1}" ]]; then 454 return 0 455 fi 456 457 return 1 458} 459 460usage() { 461 echo "usage: ${ME} old_dir new_dir patch_dir" >& 2 462} 463 464main() { 465 local old_dir new_dir patch_dir 466 old_dir="$(shell_safe_path "${1}")" 467 new_dir="$(shell_safe_path "${2}")" 468 patch_dir="$(shell_safe_path "${3}")" 469 470 trap cleanup EXIT HUP INT QUIT TERM 471 472 if ! [[ -d "${old_dir}" ]] || ! [[ -d "${new_dir}" ]]; then 473 err "old_dir and new_dir must exist and be directories" 474 usage 475 exit 3 476 fi 477 478 if [[ -e "${patch_dir}" ]]; then 479 err "patch_dir must not exist" 480 usage 481 exit 4 482 fi 483 484 local patch_dir_parent 485 patch_dir_parent="$(dirname "${patch_dir}")" 486 if ! [[ -d "${patch_dir_parent}" ]]; then 487 err "patch_dir parent directory must exist and be a directory" 488 usage 489 exit 5 490 fi 491 492 # The weird conditional structure is because the status of the RE comparison 493 # needs to be available in ${?} without conflating it with other conditions 494 # or negating it. Only a status of 2 from the =~ operator indicates an 495 # invalid regular expression. 496 497 if [[ -n "${DIRDIFFER_EXCLUDE}" ]]; then 498 if [[ "" =~ ${DIRDIFFER_EXCLUDE} ]]; then 499 true 500 elif [[ ${?} -eq 2 ]]; then 501 err "DIRDIFFER_EXCLUDE contains an invalid regular expression" 502 exit 19 503 fi 504 fi 505 506 if [[ -n "${DIRDIFFER_NO_DIFF}" ]]; then 507 if [[ "" =~ ${DIRDIFFER_NO_DIFF} ]]; then 508 true 509 elif [[ ${?} -eq 2 ]]; then 510 err "DIRDIFFER_NO_DIFF contains an invalid regular expression" 511 exit 19 512 fi 513 fi 514 515 local old_dir_phys new_dir_phys patch_dir_parent_phys patch_dir_phys 516 old_dir_phys="$(cd "${old_dir}" && pwd -P)" 517 new_dir_phys="$(cd "${new_dir}" && pwd -P)" 518 patch_dir_parent_phys="$(cd "${patch_dir_parent}" && pwd -P)" 519 patch_dir_phys="${patch_dir_parent_phys}/$(basename "${patch_dir}")" 520 521 if dirs_contained "${old_dir_phys}" "${new_dir_phys}" || 522 dirs_contained "${old_dir_phys}" "${patch_dir_phys}" || 523 dirs_contained "${new_dir_phys}" "${patch_dir_phys}"; then 524 err "directories must not contain one another" 525 usage 526 exit 6 527 fi 528 529 g_cleanup[${#g_cleanup[@]}]="${patch_dir}" 530 531 make_patch_dir "${old_dir}" "${new_dir}" "${patch_dir}" 532 533 verify_patch_dir "${old_dir}" "${new_dir}" "${patch_dir}" 534 535 unset g_cleanup[${#g_cleanup[@]}] 536 trap - EXIT 537} 538 539if [[ ${#} -ne 3 ]]; then 540 usage 541 exit 2 542fi 543 544main "${@}" 545exit ${?} 546