1 // Copyright 2017 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ 6 #define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ 7 8 #include <stddef.h> 9 10 #include <deque> 11 #include <limits> 12 #include <vector> 13 14 #include "components/zucchini/image_index.h" 15 #include "components/zucchini/image_utils.h" 16 #include "components/zucchini/targets_affinity.h" 17 18 namespace zucchini { 19 20 constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity(); 21 22 class EncodedView; 23 class EquivalenceSource; 24 25 // Returns similarity score between a token (raw byte or first byte of a 26 // reference) in |old_image_index| at |src| and a token in |new_image_index| 27 // at |dst|. |targets_affinities| describes affinities for each target pool and 28 // is used to evaluate similarity between references, hence it's size must be 29 // equal to the number of pools in both |old_image_index| and |new_image_index|. 30 // Both |src| and |dst| must refer to tokens in |old_image_index| and 31 // |new_image_index|. 32 double GetTokenSimilarity( 33 const ImageIndex& old_image_index, 34 const ImageIndex& new_image_index, 35 const std::vector<TargetsAffinity>& targets_affinities, 36 offset_t src, 37 offset_t dst); 38 39 // Returns a similarity score between content in |old_image_index| and 40 // |new_image_index| at regions described by |equivalence|, using 41 // |targets_affinities| to evaluate similarity between references. 42 double GetEquivalenceSimilarity( 43 const ImageIndex& old_image_index, 44 const ImageIndex& new_image_index, 45 const std::vector<TargetsAffinity>& targets_affinities, 46 const Equivalence& equivalence); 47 48 // Extends |equivalence| forward and returns the result. This is related to 49 // VisitEquivalenceSeed(). 50 EquivalenceCandidate ExtendEquivalenceForward( 51 const ImageIndex& old_image_index, 52 const ImageIndex& new_image_index, 53 const std::vector<TargetsAffinity>& targets_affinities, 54 const EquivalenceCandidate& equivalence, 55 double min_similarity); 56 57 // Extends |equivalence| backward and returns the result. This is related to 58 // VisitEquivalenceSeed(). 59 EquivalenceCandidate ExtendEquivalenceBackward( 60 const ImageIndex& old_image_index, 61 const ImageIndex& new_image_index, 62 const std::vector<TargetsAffinity>& targets_affinities, 63 const EquivalenceCandidate& equivalence, 64 double min_similarity); 65 66 // Creates an equivalence, starting with |src| and |dst| as offset hint, and 67 // extends it both forward and backward, trying to maximise similarity between 68 // |old_image_index| and |new_image_index|, and returns the result. 69 // |targets_affinities| is used to evaluate similarity between references. 70 // |min_similarity| describes the minimum acceptable similarity score and is 71 // used as threshold to discard bad equivalences. 72 EquivalenceCandidate VisitEquivalenceSeed( 73 const ImageIndex& old_image_index, 74 const ImageIndex& new_image_index, 75 const std::vector<TargetsAffinity>& targets_affinities, 76 offset_t src, 77 offset_t dst, 78 double min_similarity); 79 80 // Container of pruned equivalences used to map offsets from |old_image| to 81 // offsets in |new_image|. Equivalences are pruned by cropping smaller 82 // equivalences to avoid overlaps, to make the equivalence map (for covered 83 // bytes in |old_image| and |new_image|) one-to-one. 84 class OffsetMapper { 85 public: 86 using const_iterator = std::deque<Equivalence>::const_iterator; 87 88 // Constructors for various data sources. "Old" and "new" image sizes are 89 // needed for bounds checks and to handle dangling targets. 90 // - From a list of |equivalences|, already sorted (by |src_offset|) and 91 // pruned, useful for tests. 92 OffsetMapper(std::deque<Equivalence>&& equivalences, 93 offset_t old_image_size, 94 offset_t new_image_size); 95 // - From a generator, useful for Zucchini-apply. 96 OffsetMapper(EquivalenceSource&& equivalence_source, 97 offset_t old_image_size, 98 offset_t new_image_size); 99 // - From an EquivalenceMap that needs to be processed, useful for 100 // Zucchini-gen. 101 OffsetMapper(const EquivalenceMap& equivalence_map, 102 offset_t old_image_size, 103 offset_t new_image_size); 104 ~OffsetMapper(); 105 size()106 size_t size() const { return equivalences_.size(); } begin()107 const_iterator begin() const { return equivalences_.begin(); } end()108 const_iterator end() const { return equivalences_.end(); } 109 110 // Returns naive extended forward-projection of "old" |offset| that follows 111 // |eq|'s delta. |eq| needs not cover |offset|. 112 // - Averts underflow / overflow by clamping to |[0, new_image_size_)|. 113 // - However, |offset| is *not* restricted to |[0, old_image_size_)|; the 114 // caller must to make the check (hence "naive"). 115 offset_t NaiveExtendedForwardProject(const Equivalence& unit, 116 offset_t offset) const; 117 118 // Returns an offset in |new_image| corresponding to |offset| in |old_image|. 119 // Assumes |equivalences_| to be non-empty. Cases: 120 // - If |offset| is covered (i.e., in an "old" block), then use the delta of 121 // the (unique) equivalence unit that covers |offset|. 122 // - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the 123 // nearest "old" block, use its delta, and avert underflow / overflow by 124 // clamping the result to |[0, new_image_size_)|. 125 // - If |offset| is >= |new_image_size_| (a "fake offset"), then use 126 // |new_image_size_ - old_image_size_| as the delta. 127 offset_t ExtendedForwardProject(offset_t offset) const; 128 129 // Given sorted |offsets|, applies a projection in-place of all offsets that 130 // are part of a pruned equivalence from |old_image| to |new_image|. Other 131 // offsets are removed from |offsets|. 132 void ForwardProjectAll(std::deque<offset_t>* offsets) const; 133 134 // Accessor for testing. equivalences()135 const std::deque<Equivalence> equivalences() const { return equivalences_; } 136 137 // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a 138 // source location that was covered by some Equivalence would become covered 139 // by exactly one Equivalence. Moreover, for the offset, the equivalence 140 // corresponds to the largest (pre-pruning) covering Equivalence, and in case 141 // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may 142 // change in size since empty Equivalences are removed. 143 static void PruneEquivalencesAndSortBySource( 144 std::deque<Equivalence>* equivalences); 145 146 private: 147 // |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new" 148 // block overlaps). Also, it is sorted by "old" offsets. 149 std::deque<Equivalence> equivalences_; 150 const offset_t old_image_size_; 151 const offset_t new_image_size_; 152 }; 153 154 // Container of equivalences between |old_image_index| and |new_image_index|, 155 // sorted by |Equivalence::dst_offset|, only used during patch generation. 156 class EquivalenceMap { 157 public: 158 using const_iterator = std::vector<EquivalenceCandidate>::const_iterator; 159 160 EquivalenceMap(); 161 // Initializes the object with |equivalences|. 162 explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates); 163 EquivalenceMap(EquivalenceMap&&); 164 EquivalenceMap(const EquivalenceMap&) = delete; 165 ~EquivalenceMap(); 166 167 // Finds relevant equivalences between |old_view| and |new_view|, using 168 // suffix array |old_sa| computed from |old_view| and using 169 // |targets_affinities| to evaluate similarity between references. This 170 // function is not symmetric. Equivalences might overlap in |old_view|, but 171 // not in |new_view|. It tries to maximize accumulated similarity within each 172 // equivalence, while maximizing |new_view| coverage. The minimum similarity 173 // of an equivalence is given by |min_similarity|. 174 void Build(const std::vector<offset_t>& old_sa, 175 const EncodedView& old_view, 176 const EncodedView& new_view, 177 const std::vector<TargetsAffinity>& targets_affinities, 178 double min_similarity); 179 size()180 size_t size() const { return candidates_.size(); } begin()181 const_iterator begin() const { return candidates_.begin(); } end()182 const_iterator end() const { return candidates_.end(); } 183 184 private: 185 // Discovers equivalence candidates between |old_view| and |new_view| and 186 // stores them in the object. Note that resulting candidates are not sorted 187 // and might be overlapping in new image. 188 void CreateCandidates(const std::vector<offset_t>& old_sa, 189 const EncodedView& old_view, 190 const EncodedView& new_view, 191 const std::vector<TargetsAffinity>& targets_affinities, 192 double min_similarity); 193 // Sorts candidates by their offset in new image. 194 void SortByDestination(); 195 // Visits |candidates_| (sorted by |dst_offset|) and remove all destination 196 // overlaps. Candidates with low similarity scores are more likely to be 197 // shrunken. Unfit candidates may be removed. 198 void Prune(const EncodedView& old_view, 199 const EncodedView& new_view, 200 const std::vector<TargetsAffinity>& targets_affinities, 201 double min_similarity); 202 203 std::vector<EquivalenceCandidate> candidates_; 204 }; 205 206 } // namespace zucchini 207 208 #endif // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ 209