• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "bsdiff/suffix_array_index.h"
6 
7 #include <algorithm>
8 #include <limits>
9 #include <vector>
10 
11 #include <divsufsort.h>
12 #include <divsufsort64.h>
13 
14 #include "bsdiff/logging.h"
15 
16 namespace {
17 
18 // libdivsufsort C++ overloaded functions used to allow calling the right
19 // implementation based on the pointer size.
CallDivSufSort(const uint8_t * text,saidx_t * sa,size_t n)20 int CallDivSufSort(const uint8_t* text, saidx_t* sa, size_t n) {
21   return divsufsort(text, sa, n);
22 }
CallDivSufSort(const uint8_t * text,saidx64_t * sa,size_t n)23 int CallDivSufSort(const uint8_t* text, saidx64_t* sa, size_t n) {
24   return divsufsort64(text, sa, n);
25 }
26 
CallSaSearch(const uint8_t * text,size_t text_size,const uint8_t * pattern,size_t pattern_size,const saidx_t * sa,size_t sa_size,saidx_t * left)27 saidx_t CallSaSearch(const uint8_t* text,
28                      size_t text_size,
29                      const uint8_t* pattern,
30                      size_t pattern_size,
31                      const saidx_t* sa,
32                      size_t sa_size,
33                      saidx_t* left) {
34   return sa_search(text, text_size, pattern, pattern_size, sa, sa_size, left);
35 }
CallSaSearch(const uint8_t * text,size_t text_size,const uint8_t * pattern,size_t pattern_size,const saidx64_t * sa,size_t sa_size,saidx64_t * left)36 saidx64_t CallSaSearch(const uint8_t* text,
37                        size_t text_size,
38                        const uint8_t* pattern,
39                        size_t pattern_size,
40                        const saidx64_t* sa,
41                        size_t sa_size,
42                        saidx64_t* left) {
43   return sa_search64(text, text_size, pattern, pattern_size, sa, sa_size, left);
44 }
45 
46 }  // namespace
47 
48 namespace bsdiff {
49 
50 // The SAIDX template type must be either saidx_t or saidx64_t, which will
51 // depend on the maximum size of the input text needed.
52 template <typename SAIDX>
53 class SuffixArrayIndex : public SuffixArrayIndexInterface {
54  public:
55   SuffixArrayIndex() = default;
56 
57   // Initialize and construct the suffix array of the |text| string of length
58   // |n|. The memory pointed by |text| must be kept alive. Returns whether the
59   // construction succeeded.
60   bool Init(const uint8_t* text, size_t n);
61 
62   // SuffixArrayIndexInterface overrides.
63   void SearchPrefix(const uint8_t* target,
64                     size_t length,
65                     size_t* out_length,
66                     uint64_t* out_pos) const override;
67 
68  private:
69   const uint8_t* text_{nullptr};  // Owned by the caller.
70   size_t n_{0};
71 
72   std::vector<SAIDX> sa_;
73 };
74 
75 template <typename SAIDX>
Init(const uint8_t * text,size_t n)76 bool SuffixArrayIndex<SAIDX>::Init(const uint8_t* text, size_t n) {
77   if (!sa_.empty()) {
78     // Already initialized.
79     LOG(ERROR) << "SuffixArray already initialized";
80     return false;
81   }
82   if (static_cast<uint64_t>(n) >
83       static_cast<uint64_t>(std::numeric_limits<SAIDX>::max())) {
84     LOG(ERROR) << "Input too big (" << n << ") for this implementation";
85     return false;
86   }
87   text_ = text;
88   n_ = n;
89   sa_.resize(n + 1);
90 
91   if (n > 0 && CallDivSufSort(text_, sa_.data(), n) != 0) {
92     LOG(ERROR) << "divsufsrot() failed";
93     return false;
94   }
95 
96   return true;
97 }
98 
99 template <typename SAIDX>
SearchPrefix(const uint8_t * target,size_t length,size_t * out_length,uint64_t * out_pos) const100 void SuffixArrayIndex<SAIDX>::SearchPrefix(const uint8_t* target,
101                                            size_t length,
102                                            size_t* out_length,
103                                            uint64_t* out_pos) const {
104   SAIDX suf_left;
105   SAIDX count =
106       CallSaSearch(text_, n_, target, length, sa_.data(), n_, &suf_left);
107   if (count > 0) {
108     // This is the simple case where we found the whole |target| string was
109     // found.
110     *out_pos = sa_[suf_left];
111     *out_length = length;
112     return;
113   }
114   // In this case, |suf_left| points to the first suffix array position such
115   // that the suffix at that position is lexicographically larger than |target|.
116   // We only need to check whether the previous entry or the current entry is a
117   // longer match.
118   size_t prev_suffix_len = 0;
119   if (suf_left > 0) {
120     const size_t prev_max_len =
121         std::min(n_ - static_cast<size_t>(sa_[suf_left - 1]), length);
122     const uint8_t* prev_suffix = text_ + sa_[suf_left - 1];
123     prev_suffix_len =
124         std::mismatch(target, target + prev_max_len, prev_suffix).first -
125         target;
126   }
127 
128   size_t next_suffix_len = 0;
129   if (static_cast<size_t>(suf_left) < n_) {
130     const uint8_t* next_suffix = text_ + sa_[suf_left];
131     const size_t next_max_len =
132         std::min(n_ - static_cast<size_t>(sa_[suf_left]), length);
133     next_suffix_len =
134         std::mismatch(target, target + next_max_len, next_suffix).first -
135         target;
136   }
137 
138   *out_length = std::max(next_suffix_len, prev_suffix_len);
139   if (!*out_length) {
140     *out_pos = 0;
141   } else if (next_suffix_len >= prev_suffix_len) {
142     *out_pos = sa_[suf_left];
143   } else {
144     *out_pos = sa_[suf_left - 1];
145   }
146 }
147 
CreateSuffixArrayIndex(const uint8_t * text,size_t n)148 std::unique_ptr<SuffixArrayIndexInterface> CreateSuffixArrayIndex(
149     const uint8_t* text,
150     size_t n) {
151   // The maximum supported size when using the suffix array based on the 32-bit
152   // saidx_t type. We limit this to something a bit smaller (16 bytes smaller)
153   // than the maximum representable number so references like "n + 1" are don't
154   // overflow.
155   const size_t kMaxSaidxSize = std::numeric_limits<saidx_t>::max() - 16;
156   std::unique_ptr<SuffixArrayIndexInterface> ret;
157 
158   if (n > kMaxSaidxSize) {
159     SuffixArrayIndex<saidx64_t>* sa_ptr = new SuffixArrayIndex<saidx64_t>();
160     ret.reset(sa_ptr);
161     if (!sa_ptr->Init(text, n))
162       return nullptr;
163   } else {
164     SuffixArrayIndex<saidx_t>* sa_ptr = new SuffixArrayIndex<saidx_t>();
165     ret.reset(sa_ptr);
166     if (!sa_ptr->Init(text, n))
167       return nullptr;
168   }
169   return ret;
170 }
171 
172 
173 }  // namespace bsdiff
174