• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FXCRT_STRING_VIEW_TEMPLATE_H_
8 #define CORE_FXCRT_STRING_VIEW_TEMPLATE_H_
9 
10 #include <ctype.h>
11 
12 #include <algorithm>
13 #include <iterator>
14 #include <optional>
15 #include <string>
16 #include <type_traits>
17 
18 #include "core/fxcrt/compiler_specific.h"
19 #include "core/fxcrt/fx_memcpy_wrappers.h"
20 #include "core/fxcrt/fx_system.h"
21 #include "core/fxcrt/span.h"
22 #include "core/fxcrt/span_util.h"
23 
24 namespace fxcrt {
25 
26 // An immutable string with caller-provided storage which must outlive the
27 // string itself. These are not necessarily nul-terminated, so that substring
28 // extraction (via the Substr(), First(), and Last() methods) is copy-free.
29 //
30 // String view arguments should be passed by value, since they are small,
31 // rather than const-ref, even if they are not modified.
32 //
33 // Front() and Back() tolerate empty strings and must return NUL in those
34 // cases. Substr(), First(), and Last() tolerate out-of-range indices and
35 // must return an empty string view in those cases. The aim here is allowing
36 // callers to avoid range-checking first.
37 template <typename T>
38 class StringViewTemplate {
39  public:
40   using CharType = T;
41   using UnsignedType = typename std::make_unsigned<CharType>::type;
42   using const_iterator = const CharType*;
43   using const_reverse_iterator = std::reverse_iterator<const_iterator>;
44 
45   constexpr StringViewTemplate() noexcept = default;
46   constexpr StringViewTemplate(const StringViewTemplate& src) noexcept =
47       default;
48 
49   // Deliberately implicit to avoid calling on every string literal.
50   // NOLINTNEXTLINE(runtime/explicit)
StringViewTemplate(const CharType * ptr)51   StringViewTemplate(const CharType* ptr) noexcept
52       // SAFETY: from length() function.
53       : m_Span(UNSAFE_BUFFERS(pdfium::make_span(
54             reinterpret_cast<const UnsignedType*>(ptr),
55             ptr ? std::char_traits<CharType>::length(ptr) : 0))) {}
56 
StringViewTemplate(const pdfium::span<const CharType> & other)57   explicit constexpr StringViewTemplate(
58       const pdfium::span<const CharType>& other) noexcept {
59     if (!other.empty()) {
60       m_Span = reinterpret_span<const UnsignedType>(other);
61     }
62   }
63 
64   template <typename E = typename std::enable_if<
65                 !std::is_same<UnsignedType, CharType>::value>::type>
StringViewTemplate(const pdfium::span<const UnsignedType> & other)66   explicit constexpr StringViewTemplate(
67       const pdfium::span<const UnsignedType>& other) noexcept {
68     if (!other.empty()) {
69       m_Span = other;
70     }
71   }
72 
73   // Deliberately implicit to avoid calling on every char literal.
74   // |ch| must be an lvalue that outlives the StringViewTemplate.
75   // NOLINTNEXTLINE(runtime/explicit)
StringViewTemplate(const CharType & ch)76   constexpr StringViewTemplate(const CharType& ch) noexcept
77       : m_Span(
78             reinterpret_span<const UnsignedType>(pdfium::span_from_ref(ch))) {}
79 
80   UNSAFE_BUFFER_USAGE
StringViewTemplate(const CharType * ptr,size_t size)81   constexpr StringViewTemplate(const CharType* ptr, size_t size) noexcept
82       // SAFETY: propagated to caller via UNSAFE_BUFFER_USAGE.
83       : m_Span(UNSAFE_BUFFERS(
84             pdfium::make_span(reinterpret_cast<const UnsignedType*>(ptr),
85                               size))) {}
86 
87   template <typename E = typename std::enable_if<
88                 !std::is_same<UnsignedType, CharType>::value>::type>
StringViewTemplate(const UnsignedType * ptr,size_t size)89   UNSAFE_BUFFER_USAGE constexpr StringViewTemplate(const UnsignedType* ptr,
90                                                    size_t size) noexcept
91       // SAFETY: propagated to caller via UNSAFE_BUFFER_USAGE.
92       : m_Span(UNSAFE_BUFFERS(pdfium::make_span(ptr, size))) {}
93 
94   StringViewTemplate& operator=(const CharType* src) {
95     // SAFETY: caller ensures `src` is nul-terminated so `length()` is correct.
96     m_Span = UNSAFE_BUFFERS(
97         pdfium::make_span(reinterpret_cast<const UnsignedType*>(src),
98                           src ? std::char_traits<CharType>::length(src) : 0));
99     return *this;
100   }
101 
102   StringViewTemplate& operator=(const StringViewTemplate& src) {
103     m_Span = src.m_Span;
104     return *this;
105   }
106 
begin()107   const_iterator begin() const {
108     return reinterpret_cast<const_iterator>(m_Span.begin());
109   }
end()110   const_iterator end() const {
111     return reinterpret_cast<const_iterator>(m_Span.end());
112   }
rbegin()113   const_reverse_iterator rbegin() const {
114     return const_reverse_iterator(end());
115   }
rend()116   const_reverse_iterator rend() const {
117     return const_reverse_iterator(begin());
118   }
119 
120   bool operator==(const StringViewTemplate& other) const {
121     return std::equal(m_Span.begin(), m_Span.end(), other.m_Span.begin(),
122                       other.m_Span.end());
123   }
124   bool operator==(const CharType* ptr) const {
125     StringViewTemplate other(ptr);
126     return *this == other;
127   }
128   bool operator!=(const CharType* ptr) const { return !(*this == ptr); }
129   bool operator!=(const StringViewTemplate& other) const {
130     return !(*this == other);
131   }
132 
IsASCII()133   bool IsASCII() const {
134     for (auto c : *this) {
135       if (c <= 0 || c > 127)  // Questionable signedness of |c|.
136         return false;
137     }
138     return true;
139   }
140 
EqualsASCII(const StringViewTemplate<char> & that)141   bool EqualsASCII(const StringViewTemplate<char>& that) const {
142     size_t length = GetLength();
143     if (length != that.GetLength())
144       return false;
145 
146     for (size_t i = 0; i < length; ++i) {
147       auto c = (*this)[i];
148       if (c <= 0 || c > 127 || c != that[i])  // Questionable signedness of |c|.
149         return false;
150     }
151     return true;
152   }
153 
EqualsASCIINoCase(const StringViewTemplate<char> & that)154   bool EqualsASCIINoCase(const StringViewTemplate<char>& that) const {
155     size_t length = GetLength();
156     if (length != that.GetLength())
157       return false;
158 
159     for (size_t i = 0; i < length; ++i) {
160       auto c = (*this)[i];
161       if (c <= 0 || c > 127 || tolower(c) != tolower(that[i]))
162         return false;
163     }
164     return true;
165   }
166 
GetID()167   uint32_t GetID() const {
168     if (m_Span.empty())
169       return 0;
170 
171     uint32_t strid = 0;
172     size_t size = std::min(static_cast<size_t>(4), m_Span.size());
173     for (size_t i = 0; i < size; i++)
174       strid = strid * 256 + m_Span[i];
175 
176     return strid << ((4 - size) * 8);
177   }
178 
unsigned_span()179   pdfium::span<const UnsignedType> unsigned_span() const { return m_Span; }
span()180   pdfium::span<const CharType> span() const {
181     return reinterpret_span<const CharType>(m_Span);
182   }
unterminated_unsigned_str()183   const UnsignedType* unterminated_unsigned_str() const {
184     return m_Span.data();
185   }
unterminated_c_str()186   const CharType* unterminated_c_str() const {
187     return reinterpret_cast<const CharType*>(m_Span.data());
188   }
189 
GetLength()190   size_t GetLength() const { return m_Span.size(); }
IsEmpty()191   bool IsEmpty() const { return m_Span.empty(); }
IsValidIndex(size_t index)192   bool IsValidIndex(size_t index) const { return index < m_Span.size(); }
IsValidLength(size_t length)193   bool IsValidLength(size_t length) const { return length <= m_Span.size(); }
194 
195   // CHECK() if index is out of range (via span's operator[]).
196   const UnsignedType& operator[](const size_t index) const {
197     return m_Span[index];
198   }
199 
200   // CHECK() if index is out of range (via span's operator[]).
CharAt(const size_t index)201   CharType CharAt(const size_t index) const {
202     return static_cast<CharType>(m_Span[index]);
203   }
204 
205   // Unlike std::string_view::front(), this is always safe and returns a
206   // NUL char when the string is empty.
Front()207   UnsignedType Front() const { return !m_Span.empty() ? m_Span.front() : 0; }
208 
209   // Unlike std::string_view::back(), this is always safe and returns a
210   // NUL char when the string is empty.
Back()211   UnsignedType Back() const { return !m_Span.empty() ? m_Span.back() : 0; }
212 
Find(CharType ch)213   std::optional<size_t> Find(CharType ch) const {
214     const auto* found =
215         reinterpret_cast<const UnsignedType*>(std::char_traits<CharType>::find(
216             reinterpret_cast<const CharType*>(m_Span.data()), m_Span.size(),
217             ch));
218 
219     return found ? std::optional<size_t>(found - m_Span.data()) : std::nullopt;
220   }
221 
Contains(CharType ch)222   bool Contains(CharType ch) const { return Find(ch).has_value(); }
223 
Substr(size_t offset)224   StringViewTemplate Substr(size_t offset) const {
225     // Unsigned underflow is well-defined and out-of-range is handled by
226     // Substr().
227     return Substr(offset, GetLength() - offset);
228   }
229 
Substr(size_t first,size_t count)230   StringViewTemplate Substr(size_t first, size_t count) const {
231     if (!m_Span.data())
232       return StringViewTemplate();
233 
234     if (!IsValidIndex(first))
235       return StringViewTemplate();
236 
237     if (count == 0 || !IsValidLength(count))
238       return StringViewTemplate();
239 
240     if (!IsValidIndex(first + count - 1))
241       return StringViewTemplate();
242 
243     // SAFETY: performance-sensitive, checks above equivalent to subspan()'s.
244     return UNSAFE_BUFFERS(StringViewTemplate(m_Span.data() + first, count));
245   }
246 
First(size_t count)247   StringViewTemplate First(size_t count) const {
248     return Substr(0, count);
249   }
250 
Last(size_t count)251   StringViewTemplate Last(size_t count) const {
252     // Unsigned underflow is well-defined and out-of-range is handled by
253     // Substr().
254     return Substr(GetLength() - count, count);
255   }
256 
TrimmedRight(CharType ch)257   StringViewTemplate TrimmedRight(CharType ch) const {
258     if (IsEmpty())
259       return StringViewTemplate();
260 
261     size_t pos = GetLength();
262     while (pos && CharAt(pos - 1) == ch)
263       pos--;
264 
265     if (pos == 0)
266       return StringViewTemplate();
267 
268     // SAFETY: Loop above keeps `pos` at length of string or less.
269     return UNSAFE_BUFFERS(StringViewTemplate(m_Span.data(), pos));
270   }
271 
272   bool operator<(const StringViewTemplate& that) const {
273     const size_t common_size = std::min(m_Span.size(), that.m_Span.size());
274     int result =
275         common_size ? std::char_traits<CharType>::compare(
276                           reinterpret_cast<const CharType*>(m_Span.data()),
277                           reinterpret_cast<const CharType*>(that.m_Span.data()),
278                           common_size)
279                     : 0;
280     return result < 0 || (result == 0 && m_Span.size() < that.m_Span.size());
281   }
282 
283   bool operator>(const StringViewTemplate& that) const {
284     const size_t common_size = std::min(m_Span.size(), that.m_Span.size());
285     int result =
286         common_size ? std::char_traits<CharType>::compare(
287                           reinterpret_cast<const CharType*>(m_Span.data()),
288                           reinterpret_cast<const CharType*>(that.m_Span.data()),
289                           common_size)
290                     : 0;
291     return result > 0 || (result == 0 && m_Span.size() > that.m_Span.size());
292   }
293 
294  protected:
295   // This is not a raw_span<> because StringViewTemplates must be passed by
296   // value without introducing BackupRefPtr churn. Also, repeated re-assignment
297   // of substrings of a StringViewTemplate to itself must avoid the same issue.
298   pdfium::span<const UnsignedType> m_Span;
299 
300  private:
new(size_t)301   void* operator new(size_t) throw() { return nullptr; }
302 };
303 
304 template <typename T>
305 inline bool operator==(const T* lhs, const StringViewTemplate<T>& rhs) {
306   return rhs == lhs;
307 }
308 template <typename T>
309 inline bool operator!=(const T* lhs, const StringViewTemplate<T>& rhs) {
310   return rhs != lhs;
311 }
312 template <typename T>
313 inline bool operator<(const T* lhs, const StringViewTemplate<T>& rhs) {
314   return rhs > lhs;
315 }
316 
317 extern template class StringViewTemplate<char>;
318 extern template class StringViewTemplate<wchar_t>;
319 
320 using ByteStringView = StringViewTemplate<char>;
321 using WideStringView = StringViewTemplate<wchar_t>;
322 
323 }  // namespace fxcrt
324 
325 using ByteStringView = fxcrt::ByteStringView;
326 using WideStringView = fxcrt::WideStringView;
327 
328 #endif  // CORE_FXCRT_STRING_VIEW_TEMPLATE_H_
329