• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
6 #define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
7 
8 #include <stddef.h>
9 
10 #include <deque>
11 #include <limits>
12 #include <vector>
13 
14 #include "components/zucchini/image_index.h"
15 #include "components/zucchini/image_utils.h"
16 #include "components/zucchini/targets_affinity.h"
17 
18 namespace zucchini {
19 
20 constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity();
21 
22 class EncodedView;
23 class EquivalenceSource;
24 
25 // Returns similarity score between a token (raw byte or first byte of a
26 // reference) in |old_image_index| at |src| and a token in |new_image_index|
27 // at |dst|. |targets_affinities| describes affinities for each target pool and
28 // is used to evaluate similarity between references, hence it's size must be
29 // equal to the number of pools in both |old_image_index| and |new_image_index|.
30 // Both |src| and |dst| must refer to tokens in |old_image_index| and
31 // |new_image_index|.
32 double GetTokenSimilarity(
33     const ImageIndex& old_image_index,
34     const ImageIndex& new_image_index,
35     const std::vector<TargetsAffinity>& targets_affinities,
36     offset_t src,
37     offset_t dst);
38 
39 // Returns a similarity score between content in |old_image_index| and
40 // |new_image_index| at regions described by |equivalence|, using
41 // |targets_affinities| to evaluate similarity between references.
42 double GetEquivalenceSimilarity(
43     const ImageIndex& old_image_index,
44     const ImageIndex& new_image_index,
45     const std::vector<TargetsAffinity>& targets_affinities,
46     const Equivalence& equivalence);
47 
48 // Extends |equivalence| forward and returns the result. This is related to
49 // VisitEquivalenceSeed().
50 EquivalenceCandidate ExtendEquivalenceForward(
51     const ImageIndex& old_image_index,
52     const ImageIndex& new_image_index,
53     const std::vector<TargetsAffinity>& targets_affinities,
54     const EquivalenceCandidate& equivalence,
55     double min_similarity);
56 
57 // Extends |equivalence| backward and returns the result. This is related to
58 // VisitEquivalenceSeed().
59 EquivalenceCandidate ExtendEquivalenceBackward(
60     const ImageIndex& old_image_index,
61     const ImageIndex& new_image_index,
62     const std::vector<TargetsAffinity>& targets_affinities,
63     const EquivalenceCandidate& equivalence,
64     double min_similarity);
65 
66 // Creates an equivalence, starting with |src| and |dst| as offset hint, and
67 // extends it both forward and backward, trying to maximise similarity between
68 // |old_image_index| and |new_image_index|, and returns the result.
69 // |targets_affinities| is used to evaluate similarity between references.
70 // |min_similarity| describes the minimum acceptable similarity score and is
71 // used as threshold to discard bad equivalences.
72 EquivalenceCandidate VisitEquivalenceSeed(
73     const ImageIndex& old_image_index,
74     const ImageIndex& new_image_index,
75     const std::vector<TargetsAffinity>& targets_affinities,
76     offset_t src,
77     offset_t dst,
78     double min_similarity);
79 
80 // Container of pruned equivalences used to map offsets from |old_image| to
81 // offsets in |new_image|. Equivalences are pruned by cropping smaller
82 // equivalences to avoid overlaps, to make the equivalence map (for covered
83 // bytes in |old_image| and |new_image|) one-to-one.
84 class OffsetMapper {
85  public:
86   using const_iterator = std::deque<Equivalence>::const_iterator;
87 
88   // Constructors for various data sources. "Old" and "new" image sizes are
89   // needed for bounds checks and to handle dangling targets.
90   // - From a list of |equivalences|, already sorted (by |src_offset|) and
91   //   pruned, useful for tests.
92   OffsetMapper(std::deque<Equivalence>&& equivalences,
93                offset_t old_image_size,
94                offset_t new_image_size);
95   // - From a generator, useful for Zucchini-apply.
96   OffsetMapper(EquivalenceSource&& equivalence_source,
97                offset_t old_image_size,
98                offset_t new_image_size);
99   // - From an EquivalenceMap that needs to be processed, useful for
100   //   Zucchini-gen.
101   OffsetMapper(const EquivalenceMap& equivalence_map,
102                offset_t old_image_size,
103                offset_t new_image_size);
104   ~OffsetMapper();
105 
size()106   size_t size() const { return equivalences_.size(); }
begin()107   const_iterator begin() const { return equivalences_.begin(); }
end()108   const_iterator end() const { return equivalences_.end(); }
109 
110   // Returns naive extended forward-projection of "old" |offset| that follows
111   // |eq|'s delta. |eq| needs not cover |offset|.
112   // - Averts underflow / overflow by clamping to |[0, new_image_size_)|.
113   // - However, |offset| is *not* restricted to |[0, old_image_size_)|; the
114   //   caller must to make the check (hence "naive").
115   offset_t NaiveExtendedForwardProject(const Equivalence& unit,
116                                        offset_t offset) const;
117 
118   // Returns an offset in |new_image| corresponding to |offset| in |old_image|.
119   // Assumes |equivalences_| to be non-empty. Cases:
120   // - If |offset| is covered (i.e., in an "old" block), then use the delta of
121   //   the (unique) equivalence unit that covers |offset|.
122   // - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the
123   //   nearest "old" block, use its delta, and avert underflow / overflow by
124   //   clamping the result to |[0, new_image_size_)|.
125   // - If |offset| is >= |new_image_size_| (a "fake offset"), then use
126   //   |new_image_size_ - old_image_size_| as the delta.
127   offset_t ExtendedForwardProject(offset_t offset) const;
128 
129   // Given sorted |offsets|, applies a projection in-place of all offsets that
130   // are part of a pruned equivalence from |old_image| to |new_image|. Other
131   // offsets are removed from |offsets|.
132   void ForwardProjectAll(std::deque<offset_t>* offsets) const;
133 
134   // Accessor for testing.
equivalences()135   const std::deque<Equivalence> equivalences() const { return equivalences_; }
136 
137   // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a
138   // source location that was covered by some Equivalence would become covered
139   // by exactly one Equivalence. Moreover, for the offset, the equivalence
140   // corresponds to the largest (pre-pruning) covering Equivalence, and in case
141   // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may
142   // change in size since empty Equivalences are removed.
143   static void PruneEquivalencesAndSortBySource(
144       std::deque<Equivalence>* equivalences);
145 
146  private:
147   // |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new"
148   // block overlaps). Also, it is sorted by "old" offsets.
149   std::deque<Equivalence> equivalences_;
150   const offset_t old_image_size_;
151   const offset_t new_image_size_;
152 };
153 
154 // Container of equivalences between |old_image_index| and |new_image_index|,
155 // sorted by |Equivalence::dst_offset|, only used during patch generation.
156 class EquivalenceMap {
157  public:
158   using const_iterator = std::vector<EquivalenceCandidate>::const_iterator;
159 
160   EquivalenceMap();
161   // Initializes the object with |equivalences|.
162   explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates);
163   EquivalenceMap(EquivalenceMap&&);
164   EquivalenceMap(const EquivalenceMap&) = delete;
165   ~EquivalenceMap();
166 
167   // Finds relevant equivalences between |old_view| and |new_view|, using
168   // suffix array |old_sa| computed from |old_view| and using
169   // |targets_affinities| to evaluate similarity between references. This
170   // function is not symmetric. Equivalences might overlap in |old_view|, but
171   // not in |new_view|. It tries to maximize accumulated similarity within each
172   // equivalence, while maximizing |new_view| coverage. The minimum similarity
173   // of an equivalence is given by |min_similarity|.
174   void Build(const std::vector<offset_t>& old_sa,
175              const EncodedView& old_view,
176              const EncodedView& new_view,
177              const std::vector<TargetsAffinity>& targets_affinities,
178              double min_similarity);
179 
size()180   size_t size() const { return candidates_.size(); }
begin()181   const_iterator begin() const { return candidates_.begin(); }
end()182   const_iterator end() const { return candidates_.end(); }
183 
184  private:
185   // Discovers equivalence candidates between |old_view| and |new_view| and
186   // stores them in the object. Note that resulting candidates are not sorted
187   // and might be overlapping in new image.
188   void CreateCandidates(const std::vector<offset_t>& old_sa,
189                         const EncodedView& old_view,
190                         const EncodedView& new_view,
191                         const std::vector<TargetsAffinity>& targets_affinities,
192                         double min_similarity);
193   // Sorts candidates by their offset in new image.
194   void SortByDestination();
195   // Visits |candidates_| (sorted by |dst_offset|) and remove all destination
196   // overlaps. Candidates with low similarity scores are more likely to be
197   // shrunken. Unfit candidates may be removed.
198   void Prune(const EncodedView& old_view,
199              const EncodedView& new_view,
200              const std::vector<TargetsAffinity>& targets_affinities,
201              double min_similarity);
202 
203   std::vector<EquivalenceCandidate> candidates_;
204 };
205 
206 }  // namespace zucchini
207 
208 #endif  // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
209