• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import print_function
16
17import array
18import common
19import functools
20import heapq
21import itertools
22import multiprocessing
23import os
24import os.path
25import re
26import subprocess
27import sys
28import threading
29
30from collections import deque, OrderedDict
31from hashlib import sha1
32from rangelib import RangeSet
33
34
35__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
36
37
38def compute_patch(srcfile, tgtfile, imgdiff=False):
39  patchfile = common.MakeTempFile(prefix='patch-')
40
41  cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
42  cmd.extend([srcfile, tgtfile, patchfile])
43
44  # Not using common.Run(), which would otherwise dump all the bsdiff/imgdiff
45  # commands when OPTIONS.verbose is True - not useful for the case here, since
46  # they contain temp filenames only.
47  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
48  output, _ = p.communicate()
49
50  if p.returncode != 0:
51    raise ValueError(output)
52
53  with open(patchfile, 'rb') as f:
54    return f.read()
55
56
57class Image(object):
58  def RangeSha1(self, ranges):
59    raise NotImplementedError
60
61  def ReadRangeSet(self, ranges):
62    raise NotImplementedError
63
64  def TotalSha1(self, include_clobbered_blocks=False):
65    raise NotImplementedError
66
67  def WriteRangeDataToFd(self, ranges, fd):
68    raise NotImplementedError
69
70
71class EmptyImage(Image):
72  """A zero-length image."""
73
74  def __init__(self):
75    self.blocksize = 4096
76    self.care_map = RangeSet()
77    self.clobbered_blocks = RangeSet()
78    self.extended = RangeSet()
79    self.total_blocks = 0
80    self.file_map = {}
81
82  def RangeSha1(self, ranges):
83    return sha1().hexdigest()
84
85  def ReadRangeSet(self, ranges):
86    return ()
87
88  def TotalSha1(self, include_clobbered_blocks=False):
89    # EmptyImage always carries empty clobbered_blocks, so
90    # include_clobbered_blocks can be ignored.
91    assert self.clobbered_blocks.size() == 0
92    return sha1().hexdigest()
93
94  def WriteRangeDataToFd(self, ranges, fd):
95    raise ValueError("Can't write data from EmptyImage to file")
96
97
98class DataImage(Image):
99  """An image wrapped around a single string of data."""
100
101  def __init__(self, data, trim=False, pad=False):
102    self.data = data
103    self.blocksize = 4096
104
105    assert not (trim and pad)
106
107    partial = len(self.data) % self.blocksize
108    padded = False
109    if partial > 0:
110      if trim:
111        self.data = self.data[:-partial]
112      elif pad:
113        self.data += '\0' * (self.blocksize - partial)
114        padded = True
115      else:
116        raise ValueError(("data for DataImage must be multiple of %d bytes "
117                          "unless trim or pad is specified") %
118                         (self.blocksize,))
119
120    assert len(self.data) % self.blocksize == 0
121
122    self.total_blocks = len(self.data) / self.blocksize
123    self.care_map = RangeSet(data=(0, self.total_blocks))
124    # When the last block is padded, we always write the whole block even for
125    # incremental OTAs. Because otherwise the last block may get skipped if
126    # unchanged for an incremental, but would fail the post-install
127    # verification if it has non-zero contents in the padding bytes.
128    # Bug: 23828506
129    if padded:
130      clobbered_blocks = [self.total_blocks-1, self.total_blocks]
131    else:
132      clobbered_blocks = []
133    self.clobbered_blocks = clobbered_blocks
134    self.extended = RangeSet()
135
136    zero_blocks = []
137    nonzero_blocks = []
138    reference = '\0' * self.blocksize
139
140    for i in range(self.total_blocks-1 if padded else self.total_blocks):
141      d = self.data[i*self.blocksize : (i+1)*self.blocksize]
142      if d == reference:
143        zero_blocks.append(i)
144        zero_blocks.append(i+1)
145      else:
146        nonzero_blocks.append(i)
147        nonzero_blocks.append(i+1)
148
149    assert zero_blocks or nonzero_blocks or clobbered_blocks
150
151    self.file_map = dict()
152    if zero_blocks:
153      self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
154    if nonzero_blocks:
155      self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
156    if clobbered_blocks:
157      self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
158
159  def _GetRangeData(self, ranges):
160    for s, e in ranges:
161      yield self.data[s*self.blocksize:e*self.blocksize]
162
163  def RangeSha1(self, ranges):
164    h = sha1()
165    for data in self._GetRangeData(ranges):
166      h.update(data)
167    return h.hexdigest()
168
169  def ReadRangeSet(self, ranges):
170    return [self._GetRangeData(ranges)]
171
172  def TotalSha1(self, include_clobbered_blocks=False):
173    if not include_clobbered_blocks:
174      return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
175    else:
176      return sha1(self.data).hexdigest()
177
178  def WriteRangeDataToFd(self, ranges, fd):
179    for data in self._GetRangeData(ranges):
180      fd.write(data)
181
182
183class Transfer(object):
184  def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
185               src_sha1, style, by_id):
186    self.tgt_name = tgt_name
187    self.src_name = src_name
188    self.tgt_ranges = tgt_ranges
189    self.src_ranges = src_ranges
190    self.tgt_sha1 = tgt_sha1
191    self.src_sha1 = src_sha1
192    self.style = style
193    self.intact = (getattr(tgt_ranges, "monotonic", False) and
194                   getattr(src_ranges, "monotonic", False))
195
196    # We use OrderedDict rather than dict so that the output is repeatable;
197    # otherwise it would depend on the hash values of the Transfer objects.
198    self.goes_before = OrderedDict()
199    self.goes_after = OrderedDict()
200
201    self.stash_before = []
202    self.use_stash = []
203
204    self.id = len(by_id)
205    by_id.append(self)
206
207  def NetStashChange(self):
208    return (sum(sr.size() for (_, sr) in self.stash_before) -
209            sum(sr.size() for (_, sr) in self.use_stash))
210
211  def ConvertToNew(self):
212    assert self.style != "new"
213    self.use_stash = []
214    self.style = "new"
215    self.src_ranges = RangeSet()
216
217  def __str__(self):
218    return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
219            " to " + str(self.tgt_ranges) + ">")
220
221
222@functools.total_ordering
223class HeapItem(object):
224  def __init__(self, item):
225    self.item = item
226    # Negate the score since python's heap is a min-heap and we want
227    # the maximum score.
228    self.score = -item.score
229  def clear(self):
230    self.item = None
231  def __bool__(self):
232    return self.item is None
233  def __eq__(self, other):
234    return self.score == other.score
235  def __le__(self, other):
236    return self.score <= other.score
237
238
239# BlockImageDiff works on two image objects.  An image object is
240# anything that provides the following attributes:
241#
242#    blocksize: the size in bytes of a block, currently must be 4096.
243#
244#    total_blocks: the total size of the partition/image, in blocks.
245#
246#    care_map: a RangeSet containing which blocks (in the range [0,
247#      total_blocks) we actually care about; i.e. which blocks contain
248#      data.
249#
250#    file_map: a dict that partitions the blocks contained in care_map
251#      into smaller domains that are useful for doing diffs on.
252#      (Typically a domain is a file, and the key in file_map is the
253#      pathname.)
254#
255#    clobbered_blocks: a RangeSet containing which blocks contain data
256#      but may be altered by the FS. They need to be excluded when
257#      verifying the partition integrity.
258#
259#    ReadRangeSet(): a function that takes a RangeSet and returns the
260#      data contained in the image blocks of that RangeSet.  The data
261#      is returned as a list or tuple of strings; concatenating the
262#      elements together should produce the requested data.
263#      Implementations are free to break up the data into list/tuple
264#      elements in any way that is convenient.
265#
266#    RangeSha1(): a function that returns (as a hex string) the SHA-1
267#      hash of all the data in the specified range.
268#
269#    TotalSha1(): a function that returns (as a hex string) the SHA-1
270#      hash of all the data in the image (ie, all the blocks in the
271#      care_map minus clobbered_blocks, or including the clobbered
272#      blocks if include_clobbered_blocks is True).
273#
274# When creating a BlockImageDiff, the src image may be None, in which
275# case the list of transfers produced will never read from the
276# original image.
277
278class BlockImageDiff(object):
279  def __init__(self, tgt, src=None, threads=None, version=4,
280               disable_imgdiff=False):
281    if threads is None:
282      threads = multiprocessing.cpu_count() // 2
283      if threads == 0:
284        threads = 1
285    self.threads = threads
286    self.version = version
287    self.transfers = []
288    self.src_basenames = {}
289    self.src_numpatterns = {}
290    self._max_stashed_size = 0
291    self.touched_src_ranges = RangeSet()
292    self.touched_src_sha1 = None
293    self.disable_imgdiff = disable_imgdiff
294
295    assert version in (3, 4)
296
297    self.tgt = tgt
298    if src is None:
299      src = EmptyImage()
300    self.src = src
301
302    # The updater code that installs the patch always uses 4k blocks.
303    assert tgt.blocksize == 4096
304    assert src.blocksize == 4096
305
306    # The range sets in each filemap should comprise a partition of
307    # the care map.
308    self.AssertPartition(src.care_map, src.file_map.values())
309    self.AssertPartition(tgt.care_map, tgt.file_map.values())
310
311  @property
312  def max_stashed_size(self):
313    return self._max_stashed_size
314
315  def Compute(self, prefix):
316    # When looking for a source file to use as the diff input for a
317    # target file, we try:
318    #   1) an exact path match if available, otherwise
319    #   2) a exact basename match if available, otherwise
320    #   3) a basename match after all runs of digits are replaced by
321    #      "#" if available, otherwise
322    #   4) we have no source for this target.
323    self.AbbreviateSourceNames()
324    self.FindTransfers()
325
326    # Find the ordering dependencies among transfers (this is O(n^2)
327    # in the number of transfers).
328    self.GenerateDigraph()
329    # Find a sequence of transfers that satisfies as many ordering
330    # dependencies as possible (heuristically).
331    self.FindVertexSequence()
332    # Fix up the ordering dependencies that the sequence didn't
333    # satisfy.
334    self.ReverseBackwardEdges()
335    self.ImproveVertexSequence()
336
337    # Ensure the runtime stash size is under the limit.
338    if common.OPTIONS.cache_size is not None:
339      self.ReviseStashSize()
340
341    # Double-check our work.
342    self.AssertSequenceGood()
343
344    self.ComputePatches(prefix)
345    self.WriteTransfers(prefix)
346
347  def WriteTransfers(self, prefix):
348    def WriteSplitTransfers(out, style, target_blocks):
349      """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
350
351      This prevents the target size of one command from being too large; and
352      might help to avoid fsync errors on some devices."""
353
354      assert style == "new" or style == "zero"
355      blocks_limit = 1024
356      total = 0
357      while target_blocks:
358        blocks_to_write = target_blocks.first(blocks_limit)
359        out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
360        total += blocks_to_write.size()
361        target_blocks = target_blocks.subtract(blocks_to_write)
362      return total
363
364    out = []
365    total = 0
366
367    # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
368    # id. 'stashes' records the map from 'hash' to the ref count. The stash
369    # will be freed only if the count decrements to zero.
370    stashes = {}
371    stashed_blocks = 0
372    max_stashed_blocks = 0
373
374    for xf in self.transfers:
375
376      for _, sr in xf.stash_before:
377        sh = self.src.RangeSha1(sr)
378        if sh in stashes:
379          stashes[sh] += 1
380        else:
381          stashes[sh] = 1
382          stashed_blocks += sr.size()
383          self.touched_src_ranges = self.touched_src_ranges.union(sr)
384          out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
385
386      if stashed_blocks > max_stashed_blocks:
387        max_stashed_blocks = stashed_blocks
388
389      free_string = []
390      free_size = 0
391
392      #   <# blocks> <src ranges>
393      #     OR
394      #   <# blocks> <src ranges> <src locs> <stash refs...>
395      #     OR
396      #   <# blocks> - <stash refs...>
397
398      size = xf.src_ranges.size()
399      src_str = [str(size)]
400
401      unstashed_src_ranges = xf.src_ranges
402      mapped_stashes = []
403      for _, sr in xf.use_stash:
404        unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
405        sh = self.src.RangeSha1(sr)
406        sr = xf.src_ranges.map_within(sr)
407        mapped_stashes.append(sr)
408        assert sh in stashes
409        src_str.append("%s:%s" % (sh, sr.to_string_raw()))
410        stashes[sh] -= 1
411        if stashes[sh] == 0:
412          free_string.append("free %s\n" % (sh,))
413          free_size += sr.size()
414          stashes.pop(sh)
415
416      if unstashed_src_ranges:
417        src_str.insert(1, unstashed_src_ranges.to_string_raw())
418        if xf.use_stash:
419          mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
420          src_str.insert(2, mapped_unstashed.to_string_raw())
421          mapped_stashes.append(mapped_unstashed)
422          self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
423      else:
424        src_str.insert(1, "-")
425        self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
426
427      src_str = " ".join(src_str)
428
429      # version 3+:
430      #   zero <rangeset>
431      #   new <rangeset>
432      #   erase <rangeset>
433      #   bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
434      #   imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
435      #   move hash <tgt rangeset> <src_str>
436
437      tgt_size = xf.tgt_ranges.size()
438
439      if xf.style == "new":
440        assert xf.tgt_ranges
441        assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
442        total += tgt_size
443      elif xf.style == "move":
444        assert xf.tgt_ranges
445        assert xf.src_ranges.size() == tgt_size
446        if xf.src_ranges != xf.tgt_ranges:
447          # take into account automatic stashing of overlapping blocks
448          if xf.src_ranges.overlaps(xf.tgt_ranges):
449            temp_stash_usage = stashed_blocks + xf.src_ranges.size()
450            if temp_stash_usage > max_stashed_blocks:
451              max_stashed_blocks = temp_stash_usage
452
453          self.touched_src_ranges = self.touched_src_ranges.union(
454              xf.src_ranges)
455
456          out.append("%s %s %s %s\n" % (
457              xf.style,
458              xf.tgt_sha1,
459              xf.tgt_ranges.to_string_raw(), src_str))
460          total += tgt_size
461      elif xf.style in ("bsdiff", "imgdiff"):
462        assert xf.tgt_ranges
463        assert xf.src_ranges
464        # take into account automatic stashing of overlapping blocks
465        if xf.src_ranges.overlaps(xf.tgt_ranges):
466          temp_stash_usage = stashed_blocks + xf.src_ranges.size()
467          if temp_stash_usage > max_stashed_blocks:
468            max_stashed_blocks = temp_stash_usage
469
470        self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
471
472        out.append("%s %d %d %s %s %s %s\n" % (
473            xf.style,
474            xf.patch_start, xf.patch_len,
475            xf.src_sha1,
476            xf.tgt_sha1,
477            xf.tgt_ranges.to_string_raw(), src_str))
478        total += tgt_size
479      elif xf.style == "zero":
480        assert xf.tgt_ranges
481        to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
482        assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
483        total += to_zero.size()
484      else:
485        raise ValueError("unknown transfer style '%s'\n" % xf.style)
486
487      if free_string:
488        out.append("".join(free_string))
489        stashed_blocks -= free_size
490
491      if common.OPTIONS.cache_size is not None:
492        # Sanity check: abort if we're going to need more stash space than
493        # the allowed size (cache_size * threshold). There are two purposes
494        # of having a threshold here. a) Part of the cache may have been
495        # occupied by some recovery logs. b) It will buy us some time to deal
496        # with the oversize issue.
497        cache_size = common.OPTIONS.cache_size
498        stash_threshold = common.OPTIONS.stash_threshold
499        max_allowed = cache_size * stash_threshold
500        assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
501               'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
502                   max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
503                   self.tgt.blocksize, max_allowed, cache_size,
504                   stash_threshold)
505
506    self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
507
508    # Zero out extended blocks as a workaround for bug 20881595.
509    if self.tgt.extended:
510      assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
511              self.tgt.extended.size())
512      total += self.tgt.extended.size()
513
514    # We erase all the blocks on the partition that a) don't contain useful
515    # data in the new image; b) will not be touched by dm-verity. Out of those
516    # blocks, we erase the ones that won't be used in this update at the
517    # beginning of an update. The rest would be erased at the end. This is to
518    # work around the eMMC issue observed on some devices, which may otherwise
519    # get starving for clean blocks and thus fail the update. (b/28347095)
520    all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
521    all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
522    new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
523
524    erase_first = new_dontcare.subtract(self.touched_src_ranges)
525    if erase_first:
526      out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
527
528    erase_last = new_dontcare.subtract(erase_first)
529    if erase_last:
530      out.append("erase %s\n" % (erase_last.to_string_raw(),))
531
532    out.insert(0, "%d\n" % (self.version,))   # format version number
533    out.insert(1, "%d\n" % (total,))
534    # v3+: the number of stash slots is unused.
535    out.insert(2, "0\n")
536    out.insert(3, str(max_stashed_blocks) + "\n")
537
538    with open(prefix + ".transfer.list", "wb") as f:
539      for i in out:
540        f.write(i)
541
542    self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
543    OPTIONS = common.OPTIONS
544    if OPTIONS.cache_size is not None:
545      max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
546      print("max stashed blocks: %d  (%d bytes), "
547            "limit: %d bytes (%.2f%%)\n" % (
548            max_stashed_blocks, self._max_stashed_size, max_allowed,
549            self._max_stashed_size * 100.0 / max_allowed))
550    else:
551      print("max stashed blocks: %d  (%d bytes), limit: <unknown>\n" % (
552            max_stashed_blocks, self._max_stashed_size))
553
554  def ReviseStashSize(self):
555    print("Revising stash size...")
556    stash_map = {}
557
558    # Create the map between a stash and its def/use points. For example, for a
559    # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
560    for xf in self.transfers:
561      # Command xf defines (stores) all the stashes in stash_before.
562      for stash_raw_id, sr in xf.stash_before:
563        stash_map[stash_raw_id] = (sr, xf)
564
565      # Record all the stashes command xf uses.
566      for stash_raw_id, _ in xf.use_stash:
567        stash_map[stash_raw_id] += (xf,)
568
569    # Compute the maximum blocks available for stash based on /cache size and
570    # the threshold.
571    cache_size = common.OPTIONS.cache_size
572    stash_threshold = common.OPTIONS.stash_threshold
573    max_allowed = cache_size * stash_threshold / self.tgt.blocksize
574
575    # See the comments for 'stashes' in WriteTransfers().
576    stashes = {}
577    stashed_blocks = 0
578    new_blocks = 0
579
580    # Now go through all the commands. Compute the required stash size on the
581    # fly. If a command requires excess stash than available, it deletes the
582    # stash by replacing the command that uses the stash with a "new" command
583    # instead.
584    for xf in self.transfers:
585      replaced_cmds = []
586
587      # xf.stash_before generates explicit stash commands.
588      for stash_raw_id, sr in xf.stash_before:
589        # Check the post-command stashed_blocks.
590        stashed_blocks_after = stashed_blocks
591        sh = self.src.RangeSha1(sr)
592        if sh not in stashes:
593          stashed_blocks_after += sr.size()
594
595        if stashed_blocks_after > max_allowed:
596          # We cannot stash this one for a later command. Find out the command
597          # that will use this stash and replace the command with "new".
598          use_cmd = stash_map[stash_raw_id][2]
599          replaced_cmds.append(use_cmd)
600          print("%10d  %9s  %s" % (sr.size(), "explicit", use_cmd))
601        else:
602          # Update the stashes map.
603          if sh in stashes:
604            stashes[sh] += 1
605          else:
606            stashes[sh] = 1
607          stashed_blocks = stashed_blocks_after
608
609      # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
610      # ComputePatches(), they both have the style of "diff".
611      if xf.style == "diff":
612        assert xf.tgt_ranges and xf.src_ranges
613        if xf.src_ranges.overlaps(xf.tgt_ranges):
614          if stashed_blocks + xf.src_ranges.size() > max_allowed:
615            replaced_cmds.append(xf)
616            print("%10d  %9s  %s" % (xf.src_ranges.size(), "implicit", xf))
617
618      # Replace the commands in replaced_cmds with "new"s.
619      for cmd in replaced_cmds:
620        # It no longer uses any commands in "use_stash". Remove the def points
621        # for all those stashes.
622        for stash_raw_id, sr in cmd.use_stash:
623          def_cmd = stash_map[stash_raw_id][1]
624          assert (stash_raw_id, sr) in def_cmd.stash_before
625          def_cmd.stash_before.remove((stash_raw_id, sr))
626
627        # Add up blocks that violates space limit and print total number to
628        # screen later.
629        new_blocks += cmd.tgt_ranges.size()
630        cmd.ConvertToNew()
631
632      # xf.use_stash may generate free commands.
633      for _, sr in xf.use_stash:
634        sh = self.src.RangeSha1(sr)
635        assert sh in stashes
636        stashes[sh] -= 1
637        if stashes[sh] == 0:
638          stashed_blocks -= sr.size()
639          stashes.pop(sh)
640
641    num_of_bytes = new_blocks * self.tgt.blocksize
642    print("  Total %d blocks (%d bytes) are packed as new blocks due to "
643          "insufficient cache size." % (new_blocks, num_of_bytes))
644    return new_blocks
645
646  def ComputePatches(self, prefix):
647    print("Reticulating splines...")
648    diff_queue = []
649    patch_num = 0
650    with open(prefix + ".new.dat", "wb") as new_f:
651      for index, xf in enumerate(self.transfers):
652        if xf.style == "zero":
653          tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
654          print("%10d %10d (%6.2f%%) %7s %s %s" % (
655              tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
656              str(xf.tgt_ranges)))
657
658        elif xf.style == "new":
659          self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
660          tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
661          print("%10d %10d (%6.2f%%) %7s %s %s" % (
662              tgt_size, tgt_size, 100.0, xf.style,
663              xf.tgt_name, str(xf.tgt_ranges)))
664
665        elif xf.style == "diff":
666          # We can't compare src and tgt directly because they may have
667          # the same content but be broken up into blocks differently, eg:
668          #
669          #    ["he", "llo"]  vs  ["h", "ello"]
670          #
671          # We want those to compare equal, ideally without having to
672          # actually concatenate the strings (these may be tens of
673          # megabytes).
674          if xf.src_sha1 == xf.tgt_sha1:
675            # These are identical; we don't need to generate a patch,
676            # just issue copy commands on the device.
677            xf.style = "move"
678            tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
679            if xf.src_ranges != xf.tgt_ranges:
680              print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
681                  tgt_size, tgt_size, 100.0, xf.style,
682                  xf.tgt_name if xf.tgt_name == xf.src_name else (
683                      xf.tgt_name + " (from " + xf.src_name + ")"),
684                  str(xf.tgt_ranges), str(xf.src_ranges)))
685          else:
686            # For files in zip format (eg, APKs, JARs, etc.) we would
687            # like to use imgdiff -z if possible (because it usually
688            # produces significantly smaller patches than bsdiff).
689            # This is permissible if:
690            #
691            #  - imgdiff is not disabled, and
692            #  - the source and target files are monotonic (ie, the
693            #    data is stored with blocks in increasing order), and
694            #  - we haven't removed any blocks from the source set.
695            #
696            # If these conditions are satisfied then appending all the
697            # blocks in the set together in order will produce a valid
698            # zip file (plus possibly extra zeros in the last block),
699            # which is what imgdiff needs to operate.  (imgdiff is
700            # fine with extra zeros at the end of the file.)
701            imgdiff = (not self.disable_imgdiff and xf.intact and
702                       xf.tgt_name.split(".")[-1].lower()
703                       in ("apk", "jar", "zip"))
704            xf.style = "imgdiff" if imgdiff else "bsdiff"
705            diff_queue.append((index, imgdiff, patch_num))
706            patch_num += 1
707
708        else:
709          assert False, "unknown style " + xf.style
710
711    if diff_queue:
712      if self.threads > 1:
713        print("Computing patches (using %d threads)..." % (self.threads,))
714      else:
715        print("Computing patches...")
716
717      diff_total = len(diff_queue)
718      patches = [None] * diff_total
719      error_messages = []
720      if sys.stdout.isatty():
721        global diff_done
722        diff_done = 0
723
724      # Using multiprocessing doesn't give additional benefits, due to the
725      # pattern of the code. The diffing work is done by subprocess.call, which
726      # already runs in a separate process (not affected much by the GIL -
727      # Global Interpreter Lock). Using multiprocess also requires either a)
728      # writing the diff input files in the main process before forking, or b)
729      # reopening the image file (SparseImage) in the worker processes. Doing
730      # neither of them further improves the performance.
731      lock = threading.Lock()
732      def diff_worker():
733        while True:
734          with lock:
735            if not diff_queue:
736              return
737            xf_index, imgdiff, patch_index = diff_queue.pop()
738
739          xf = self.transfers[xf_index]
740          src_ranges = xf.src_ranges
741          tgt_ranges = xf.tgt_ranges
742
743          # Needs lock since WriteRangeDataToFd() is stateful (calling seek).
744          with lock:
745            src_file = common.MakeTempFile(prefix="src-")
746            with open(src_file, "wb") as fd:
747              self.src.WriteRangeDataToFd(src_ranges, fd)
748
749            tgt_file = common.MakeTempFile(prefix="tgt-")
750            with open(tgt_file, "wb") as fd:
751              self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
752
753          try:
754            patch = compute_patch(src_file, tgt_file, imgdiff)
755          except ValueError as e:
756            with lock:
757              error_messages.append(
758                  "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
759                      "imgdiff" if imgdiff else "bsdiff",
760                      xf.tgt_name if xf.tgt_name == xf.src_name else
761                          xf.tgt_name + " (from " + xf.src_name + ")",
762                      xf.tgt_ranges, xf.src_ranges, e.message))
763
764          with lock:
765            patches[patch_index] = (xf_index, patch)
766            if sys.stdout.isatty():
767              global diff_done
768              diff_done += 1
769              progress = diff_done * 100 / diff_total
770              # '\033[K' is to clear to EOL.
771              print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
772              sys.stdout.flush()
773
774      threads = [threading.Thread(target=diff_worker)
775                 for _ in range(self.threads)]
776      for th in threads:
777        th.start()
778      while threads:
779        threads.pop().join()
780
781      if sys.stdout.isatty():
782        print('\n')
783
784      if error_messages:
785        print('\n'.join(error_messages))
786        sys.exit(1)
787    else:
788      patches = []
789
790    offset = 0
791    with open(prefix + ".patch.dat", "wb") as patch_fd:
792      for index, patch in patches:
793        xf = self.transfers[index]
794        xf.patch_len = len(patch)
795        xf.patch_start = offset
796        offset += xf.patch_len
797        patch_fd.write(patch)
798
799        if common.OPTIONS.verbose:
800          tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
801          print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
802                xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
803                xf.style,
804                xf.tgt_name if xf.tgt_name == xf.src_name else (
805                    xf.tgt_name + " (from " + xf.src_name + ")"),
806                xf.tgt_ranges, xf.src_ranges))
807
808  def AssertSequenceGood(self):
809    # Simulate the sequences of transfers we will output, and check that:
810    # - we never read a block after writing it, and
811    # - we write every block we care about exactly once.
812
813    # Start with no blocks having been touched yet.
814    touched = array.array("B", "\0" * self.tgt.total_blocks)
815
816    # Imagine processing the transfers in order.
817    for xf in self.transfers:
818      # Check that the input blocks for this transfer haven't yet been touched.
819
820      x = xf.src_ranges
821      for _, sr in xf.use_stash:
822        x = x.subtract(sr)
823
824      for s, e in x:
825        # Source image could be larger. Don't check the blocks that are in the
826        # source image only. Since they are not in 'touched', and won't ever
827        # be touched.
828        for i in range(s, min(e, self.tgt.total_blocks)):
829          assert touched[i] == 0
830
831      # Check that the output blocks for this transfer haven't yet
832      # been touched, and touch all the blocks written by this
833      # transfer.
834      for s, e in xf.tgt_ranges:
835        for i in range(s, e):
836          assert touched[i] == 0
837          touched[i] = 1
838
839    # Check that we've written every target block.
840    for s, e in self.tgt.care_map:
841      for i in range(s, e):
842        assert touched[i] == 1
843
844  def ImproveVertexSequence(self):
845    print("Improving vertex order...")
846
847    # At this point our digraph is acyclic; we reversed any edges that
848    # were backwards in the heuristically-generated sequence.  The
849    # previously-generated order is still acceptable, but we hope to
850    # find a better order that needs less memory for stashed data.
851    # Now we do a topological sort to generate a new vertex order,
852    # using a greedy algorithm to choose which vertex goes next
853    # whenever we have a choice.
854
855    # Make a copy of the edge set; this copy will get destroyed by the
856    # algorithm.
857    for xf in self.transfers:
858      xf.incoming = xf.goes_after.copy()
859      xf.outgoing = xf.goes_before.copy()
860
861    L = []   # the new vertex order
862
863    # S is the set of sources in the remaining graph; we always choose
864    # the one that leaves the least amount of stashed data after it's
865    # executed.
866    S = [(u.NetStashChange(), u.order, u) for u in self.transfers
867         if not u.incoming]
868    heapq.heapify(S)
869
870    while S:
871      _, _, xf = heapq.heappop(S)
872      L.append(xf)
873      for u in xf.outgoing:
874        del u.incoming[xf]
875        if not u.incoming:
876          heapq.heappush(S, (u.NetStashChange(), u.order, u))
877
878    # if this fails then our graph had a cycle.
879    assert len(L) == len(self.transfers)
880
881    self.transfers = L
882    for i, xf in enumerate(L):
883      xf.order = i
884
885  def RemoveBackwardEdges(self):
886    print("Removing backward edges...")
887    in_order = 0
888    out_of_order = 0
889    lost_source = 0
890
891    for xf in self.transfers:
892      lost = 0
893      size = xf.src_ranges.size()
894      for u in xf.goes_before:
895        # xf should go before u
896        if xf.order < u.order:
897          # it does, hurray!
898          in_order += 1
899        else:
900          # it doesn't, boo.  trim the blocks that u writes from xf's
901          # source, so that xf can go after u.
902          out_of_order += 1
903          assert xf.src_ranges.overlaps(u.tgt_ranges)
904          xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
905          xf.intact = False
906
907      if xf.style == "diff" and not xf.src_ranges:
908        # nothing left to diff from; treat as new data
909        xf.style = "new"
910
911      lost = size - xf.src_ranges.size()
912      lost_source += lost
913
914    print(("  %d/%d dependencies (%.2f%%) were violated; "
915           "%d source blocks removed.") %
916          (out_of_order, in_order + out_of_order,
917           (out_of_order * 100.0 / (in_order + out_of_order))
918           if (in_order + out_of_order) else 0.0,
919           lost_source))
920
921  def ReverseBackwardEdges(self):
922    """Reverse unsatisfying edges and compute pairs of stashed blocks.
923
924    For each transfer, make sure it properly stashes the blocks it touches and
925    will be used by later transfers. It uses pairs of (stash_raw_id, range) to
926    record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
927    identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
928    it is possible to have multiple pairs with different 'stash_raw_id's. Each
929    'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
930    blocks will be written to the same stash slot in WriteTransfers().
931    """
932
933    print("Reversing backward edges...")
934    in_order = 0
935    out_of_order = 0
936    stash_raw_id = 0
937    stash_size = 0
938
939    for xf in self.transfers:
940      for u in xf.goes_before.copy():
941        # xf should go before u
942        if xf.order < u.order:
943          # it does, hurray!
944          in_order += 1
945        else:
946          # it doesn't, boo.  modify u to stash the blocks that it
947          # writes that xf wants to read, and then require u to go
948          # before xf.
949          out_of_order += 1
950
951          overlap = xf.src_ranges.intersect(u.tgt_ranges)
952          assert overlap
953
954          u.stash_before.append((stash_raw_id, overlap))
955          xf.use_stash.append((stash_raw_id, overlap))
956          stash_raw_id += 1
957          stash_size += overlap.size()
958
959          # reverse the edge direction; now xf must go after u
960          del xf.goes_before[u]
961          del u.goes_after[xf]
962          xf.goes_after[u] = None    # value doesn't matter
963          u.goes_before[xf] = None
964
965    print(("  %d/%d dependencies (%.2f%%) were violated; "
966           "%d source blocks stashed.") %
967          (out_of_order, in_order + out_of_order,
968           (out_of_order * 100.0 / (in_order + out_of_order))
969           if (in_order + out_of_order) else 0.0,
970           stash_size))
971
972  def FindVertexSequence(self):
973    print("Finding vertex sequence...")
974
975    # This is based on "A Fast & Effective Heuristic for the Feedback
976    # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth.  Think of
977    # it as starting with the digraph G and moving all the vertices to
978    # be on a horizontal line in some order, trying to minimize the
979    # number of edges that end up pointing to the left.  Left-pointing
980    # edges will get removed to turn the digraph into a DAG.  In this
981    # case each edge has a weight which is the number of source blocks
982    # we'll lose if that edge is removed; we try to minimize the total
983    # weight rather than just the number of edges.
984
985    # Make a copy of the edge set; this copy will get destroyed by the
986    # algorithm.
987    for xf in self.transfers:
988      xf.incoming = xf.goes_after.copy()
989      xf.outgoing = xf.goes_before.copy()
990      xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
991
992    # We use an OrderedDict instead of just a set so that the output
993    # is repeatable; otherwise it would depend on the hash values of
994    # the transfer objects.
995    G = OrderedDict()
996    for xf in self.transfers:
997      G[xf] = None
998    s1 = deque()  # the left side of the sequence, built from left to right
999    s2 = deque()  # the right side of the sequence, built from right to left
1000
1001    heap = []
1002    for xf in self.transfers:
1003      xf.heap_item = HeapItem(xf)
1004      heap.append(xf.heap_item)
1005    heapq.heapify(heap)
1006
1007    # Use OrderedDict() instead of set() to preserve the insertion order. Need
1008    # to use 'sinks[key] = None' to add key into the set. sinks will look like
1009    # { key1: None, key2: None, ... }.
1010    sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1011    sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
1012
1013    def adjust_score(iu, delta):
1014      iu.score += delta
1015      iu.heap_item.clear()
1016      iu.heap_item = HeapItem(iu)
1017      heapq.heappush(heap, iu.heap_item)
1018
1019    while G:
1020      # Put all sinks at the end of the sequence.
1021      while sinks:
1022        new_sinks = OrderedDict()
1023        for u in sinks:
1024          if u not in G: continue
1025          s2.appendleft(u)
1026          del G[u]
1027          for iu in u.incoming:
1028            adjust_score(iu, -iu.outgoing.pop(u))
1029            if not iu.outgoing:
1030              new_sinks[iu] = None
1031        sinks = new_sinks
1032
1033      # Put all the sources at the beginning of the sequence.
1034      while sources:
1035        new_sources = OrderedDict()
1036        for u in sources:
1037          if u not in G: continue
1038          s1.append(u)
1039          del G[u]
1040          for iu in u.outgoing:
1041            adjust_score(iu, +iu.incoming.pop(u))
1042            if not iu.incoming:
1043              new_sources[iu] = None
1044        sources = new_sources
1045
1046      if not G: break
1047
1048      # Find the "best" vertex to put next.  "Best" is the one that
1049      # maximizes the net difference in source blocks saved we get by
1050      # pretending it's a source rather than a sink.
1051
1052      while True:
1053        u = heapq.heappop(heap)
1054        if u and u.item in G:
1055          u = u.item
1056          break
1057
1058      s1.append(u)
1059      del G[u]
1060      for iu in u.outgoing:
1061        adjust_score(iu, +iu.incoming.pop(u))
1062        if not iu.incoming:
1063          sources[iu] = None
1064
1065      for iu in u.incoming:
1066        adjust_score(iu, -iu.outgoing.pop(u))
1067        if not iu.outgoing:
1068          sinks[iu] = None
1069
1070    # Now record the sequence in the 'order' field of each transfer,
1071    # and by rearranging self.transfers to be in the chosen sequence.
1072
1073    new_transfers = []
1074    for x in itertools.chain(s1, s2):
1075      x.order = len(new_transfers)
1076      new_transfers.append(x)
1077      del x.incoming
1078      del x.outgoing
1079
1080    self.transfers = new_transfers
1081
1082  def GenerateDigraph(self):
1083    print("Generating digraph...")
1084
1085    # Each item of source_ranges will be:
1086    #   - None, if that block is not used as a source,
1087    #   - an ordered set of transfers.
1088    source_ranges = []
1089    for b in self.transfers:
1090      for s, e in b.src_ranges:
1091        if e > len(source_ranges):
1092          source_ranges.extend([None] * (e-len(source_ranges)))
1093        for i in range(s, e):
1094          if source_ranges[i] is None:
1095            source_ranges[i] = OrderedDict.fromkeys([b])
1096          else:
1097            source_ranges[i][b] = None
1098
1099    for a in self.transfers:
1100      intersections = OrderedDict()
1101      for s, e in a.tgt_ranges:
1102        for i in range(s, e):
1103          if i >= len(source_ranges): break
1104          # Add all the Transfers in source_ranges[i] to the (ordered) set.
1105          if source_ranges[i] is not None:
1106            for j in source_ranges[i]:
1107              intersections[j] = None
1108
1109      for b in intersections:
1110        if a is b: continue
1111
1112        # If the blocks written by A are read by B, then B needs to go before A.
1113        i = a.tgt_ranges.intersect(b.src_ranges)
1114        if i:
1115          if b.src_name == "__ZERO":
1116            # the cost of removing source blocks for the __ZERO domain
1117            # is (nearly) zero.
1118            size = 0
1119          else:
1120            size = i.size()
1121          b.goes_before[a] = size
1122          a.goes_after[b] = size
1123
1124  def FindTransfers(self):
1125    """Parse the file_map to generate all the transfers."""
1126
1127    def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,
1128                          style, by_id):
1129      """Add one or multiple Transfer()s by splitting large files.
1130
1131      For BBOTA v3, we need to stash source blocks for resumable feature.
1132      However, with the growth of file size and the shrink of the cache
1133      partition source blocks are too large to be stashed. If a file occupies
1134      too many blocks, we split it into smaller pieces by getting multiple
1135      Transfer()s.
1136
1137      The downside is that after splitting, we may increase the package size
1138      since the split pieces don't align well. According to our experiments,
1139      1/8 of the cache size as the per-piece limit appears to be optimal.
1140      Compared to the fixed 1024-block limit, it reduces the overall package
1141      size by 30% for volantis, and 20% for angler and bullhead."""
1142
1143      # Possibly split large files into smaller chunks.
1144      pieces = 0
1145      cache_size = common.OPTIONS.cache_size
1146      split_threshold = 0.125
1147      max_blocks_per_transfer = int(cache_size * split_threshold /
1148                                    self.tgt.blocksize)
1149
1150      # Change nothing for small files.
1151      if (tgt_ranges.size() <= max_blocks_per_transfer and
1152          src_ranges.size() <= max_blocks_per_transfer):
1153        Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1154                 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1155                 style, by_id)
1156        return
1157
1158      while (tgt_ranges.size() > max_blocks_per_transfer and
1159             src_ranges.size() > max_blocks_per_transfer):
1160        tgt_split_name = "%s-%d" % (tgt_name, pieces)
1161        src_split_name = "%s-%d" % (src_name, pieces)
1162        tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1163        src_first = src_ranges.first(max_blocks_per_transfer)
1164
1165        Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1166                 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1167                 style, by_id)
1168
1169        tgt_ranges = tgt_ranges.subtract(tgt_first)
1170        src_ranges = src_ranges.subtract(src_first)
1171        pieces += 1
1172
1173      # Handle remaining blocks.
1174      if tgt_ranges.size() or src_ranges.size():
1175        # Must be both non-empty.
1176        assert tgt_ranges.size() and src_ranges.size()
1177        tgt_split_name = "%s-%d" % (tgt_name, pieces)
1178        src_split_name = "%s-%d" % (src_name, pieces)
1179        Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1180                 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1181                 style, by_id)
1182
1183    def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1184                    split=False):
1185      """Wrapper function for adding a Transfer()."""
1186
1187      # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1188      # otherwise add the Transfer() as is.
1189      if style != "diff" or not split:
1190        Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1191                 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1192                 style, by_id)
1193        return
1194
1195      # Handle .odex files specially to analyze the block-wise difference. If
1196      # most of the blocks are identical with only few changes (e.g. header),
1197      # we will patch the changed blocks only. This avoids stashing unchanged
1198      # blocks while patching. We limit the analysis to files without size
1199      # changes only. This is to avoid sacrificing the OTA generation cost too
1200      # much.
1201      if (tgt_name.split(".")[-1].lower() == 'odex' and
1202          tgt_ranges.size() == src_ranges.size()):
1203
1204        # 0.5 threshold can be further tuned. The tradeoff is: if only very
1205        # few blocks remain identical, we lose the opportunity to use imgdiff
1206        # that may have better compression ratio than bsdiff.
1207        crop_threshold = 0.5
1208
1209        tgt_skipped = RangeSet()
1210        src_skipped = RangeSet()
1211        tgt_size = tgt_ranges.size()
1212        tgt_changed = 0
1213        for src_block, tgt_block in zip(src_ranges.next_item(),
1214                                        tgt_ranges.next_item()):
1215          src_rs = RangeSet(str(src_block))
1216          tgt_rs = RangeSet(str(tgt_block))
1217          if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1218            tgt_skipped = tgt_skipped.union(tgt_rs)
1219            src_skipped = src_skipped.union(src_rs)
1220          else:
1221            tgt_changed += tgt_rs.size()
1222
1223          # Terminate early if no clear sign of benefits.
1224          if tgt_changed > tgt_size * crop_threshold:
1225            break
1226
1227        if tgt_changed < tgt_size * crop_threshold:
1228          assert tgt_changed + tgt_skipped.size() == tgt_size
1229          print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
1230                tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
1231          AddSplitTransfers(
1232              "%s-skipped" % (tgt_name,),
1233              "%s-skipped" % (src_name,),
1234              tgt_skipped, src_skipped, style, by_id)
1235
1236          # Intentionally change the file extension to avoid being imgdiff'd as
1237          # the files are no longer in their original format.
1238          tgt_name = "%s-cropped" % (tgt_name,)
1239          src_name = "%s-cropped" % (src_name,)
1240          tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1241          src_ranges = src_ranges.subtract(src_skipped)
1242
1243          # Possibly having no changed blocks.
1244          if not tgt_ranges:
1245            return
1246
1247      # Add the transfer(s).
1248      AddSplitTransfers(
1249          tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1250
1251    print("Finding transfers...")
1252
1253    empty = RangeSet()
1254    for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1255      if tgt_fn == "__ZERO":
1256        # the special "__ZERO" domain is all the blocks not contained
1257        # in any file and that are filled with zeros.  We have a
1258        # special transfer style for zero blocks.
1259        src_ranges = self.src.file_map.get("__ZERO", empty)
1260        AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1261                    "zero", self.transfers)
1262        continue
1263
1264      elif tgt_fn == "__COPY":
1265        # "__COPY" domain includes all the blocks not contained in any
1266        # file and that need to be copied unconditionally to the target.
1267        AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
1268        continue
1269
1270      elif tgt_fn in self.src.file_map:
1271        # Look for an exact pathname match in the source.
1272        AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1273                    "diff", self.transfers, True)
1274        continue
1275
1276      b = os.path.basename(tgt_fn)
1277      if b in self.src_basenames:
1278        # Look for an exact basename match in the source.
1279        src_fn = self.src_basenames[b]
1280        AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1281                    "diff", self.transfers, True)
1282        continue
1283
1284      b = re.sub("[0-9]+", "#", b)
1285      if b in self.src_numpatterns:
1286        # Look for a 'number pattern' match (a basename match after
1287        # all runs of digits are replaced by "#").  (This is useful
1288        # for .so files that contain version numbers in the filename
1289        # that get bumped.)
1290        src_fn = self.src_numpatterns[b]
1291        AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1292                    "diff", self.transfers, True)
1293        continue
1294
1295      AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
1296
1297  def AbbreviateSourceNames(self):
1298    for k in self.src.file_map.keys():
1299      b = os.path.basename(k)
1300      self.src_basenames[b] = k
1301      b = re.sub("[0-9]+", "#", b)
1302      self.src_numpatterns[b] = k
1303
1304  @staticmethod
1305  def AssertPartition(total, seq):
1306    """Assert that all the RangeSets in 'seq' form a partition of the
1307    'total' RangeSet (ie, they are nonintersecting and their union
1308    equals 'total')."""
1309
1310    so_far = RangeSet()
1311    for i in seq:
1312      assert not so_far.overlaps(i)
1313      so_far = so_far.union(i)
1314    assert so_far == total
1315