1 // Copyright 2016 The PDFium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FXCRT_STRING_VIEW_TEMPLATE_H_ 8 #define CORE_FXCRT_STRING_VIEW_TEMPLATE_H_ 9 10 #include <ctype.h> 11 12 #include <algorithm> 13 #include <iterator> 14 #include <optional> 15 #include <string> 16 #include <type_traits> 17 18 #include "core/fxcrt/compiler_specific.h" 19 #include "core/fxcrt/fx_memcpy_wrappers.h" 20 #include "core/fxcrt/fx_system.h" 21 #include "core/fxcrt/span.h" 22 #include "core/fxcrt/span_util.h" 23 24 namespace fxcrt { 25 26 // An immutable string with caller-provided storage which must outlive the 27 // string itself. These are not necessarily nul-terminated, so that substring 28 // extraction (via the Substr(), First(), and Last() methods) is copy-free. 29 // 30 // String view arguments should be passed by value, since they are small, 31 // rather than const-ref, even if they are not modified. 32 // 33 // Front() and Back() tolerate empty strings and must return NUL in those 34 // cases. Substr(), First(), and Last() tolerate out-of-range indices and 35 // must return an empty string view in those cases. The aim here is allowing 36 // callers to avoid range-checking first. 37 template <typename T> 38 class StringViewTemplate { 39 public: 40 using CharType = T; 41 using UnsignedType = typename std::make_unsigned<CharType>::type; 42 using const_iterator = const CharType*; 43 using const_reverse_iterator = std::reverse_iterator<const_iterator>; 44 45 constexpr StringViewTemplate() noexcept = default; 46 constexpr StringViewTemplate(const StringViewTemplate& src) noexcept = 47 default; 48 49 // Deliberately implicit to avoid calling on every string literal. 50 // NOLINTNEXTLINE(runtime/explicit) StringViewTemplate(const CharType * ptr)51 StringViewTemplate(const CharType* ptr) noexcept 52 // SAFETY: from length() function. 53 : m_Span(UNSAFE_BUFFERS(pdfium::make_span( 54 reinterpret_cast<const UnsignedType*>(ptr), 55 ptr ? std::char_traits<CharType>::length(ptr) : 0))) {} 56 StringViewTemplate(const pdfium::span<const CharType> & other)57 explicit constexpr StringViewTemplate( 58 const pdfium::span<const CharType>& other) noexcept { 59 if (!other.empty()) { 60 m_Span = reinterpret_span<const UnsignedType>(other); 61 } 62 } 63 64 template <typename E = typename std::enable_if< 65 !std::is_same<UnsignedType, CharType>::value>::type> StringViewTemplate(const pdfium::span<const UnsignedType> & other)66 explicit constexpr StringViewTemplate( 67 const pdfium::span<const UnsignedType>& other) noexcept { 68 if (!other.empty()) { 69 m_Span = other; 70 } 71 } 72 73 // Deliberately implicit to avoid calling on every char literal. 74 // |ch| must be an lvalue that outlives the StringViewTemplate. 75 // NOLINTNEXTLINE(runtime/explicit) StringViewTemplate(const CharType & ch)76 constexpr StringViewTemplate(const CharType& ch) noexcept 77 : m_Span( 78 reinterpret_span<const UnsignedType>(pdfium::span_from_ref(ch))) {} 79 80 UNSAFE_BUFFER_USAGE StringViewTemplate(const CharType * ptr,size_t size)81 constexpr StringViewTemplate(const CharType* ptr, size_t size) noexcept 82 // SAFETY: propagated to caller via UNSAFE_BUFFER_USAGE. 83 : m_Span(UNSAFE_BUFFERS( 84 pdfium::make_span(reinterpret_cast<const UnsignedType*>(ptr), 85 size))) {} 86 87 template <typename E = typename std::enable_if< 88 !std::is_same<UnsignedType, CharType>::value>::type> StringViewTemplate(const UnsignedType * ptr,size_t size)89 UNSAFE_BUFFER_USAGE constexpr StringViewTemplate(const UnsignedType* ptr, 90 size_t size) noexcept 91 // SAFETY: propagated to caller via UNSAFE_BUFFER_USAGE. 92 : m_Span(UNSAFE_BUFFERS(pdfium::make_span(ptr, size))) {} 93 94 StringViewTemplate& operator=(const CharType* src) { 95 // SAFETY: caller ensures `src` is nul-terminated so `length()` is correct. 96 m_Span = UNSAFE_BUFFERS( 97 pdfium::make_span(reinterpret_cast<const UnsignedType*>(src), 98 src ? std::char_traits<CharType>::length(src) : 0)); 99 return *this; 100 } 101 102 StringViewTemplate& operator=(const StringViewTemplate& src) { 103 m_Span = src.m_Span; 104 return *this; 105 } 106 begin()107 const_iterator begin() const { 108 return reinterpret_cast<const_iterator>(m_Span.begin()); 109 } end()110 const_iterator end() const { 111 return reinterpret_cast<const_iterator>(m_Span.end()); 112 } rbegin()113 const_reverse_iterator rbegin() const { 114 return const_reverse_iterator(end()); 115 } rend()116 const_reverse_iterator rend() const { 117 return const_reverse_iterator(begin()); 118 } 119 120 bool operator==(const StringViewTemplate& other) const { 121 return std::equal(m_Span.begin(), m_Span.end(), other.m_Span.begin(), 122 other.m_Span.end()); 123 } 124 bool operator==(const CharType* ptr) const { 125 StringViewTemplate other(ptr); 126 return *this == other; 127 } 128 bool operator!=(const CharType* ptr) const { return !(*this == ptr); } 129 bool operator!=(const StringViewTemplate& other) const { 130 return !(*this == other); 131 } 132 IsASCII()133 bool IsASCII() const { 134 for (auto c : *this) { 135 if (c <= 0 || c > 127) // Questionable signedness of |c|. 136 return false; 137 } 138 return true; 139 } 140 EqualsASCII(const StringViewTemplate<char> & that)141 bool EqualsASCII(const StringViewTemplate<char>& that) const { 142 size_t length = GetLength(); 143 if (length != that.GetLength()) 144 return false; 145 146 for (size_t i = 0; i < length; ++i) { 147 auto c = (*this)[i]; 148 if (c <= 0 || c > 127 || c != that[i]) // Questionable signedness of |c|. 149 return false; 150 } 151 return true; 152 } 153 EqualsASCIINoCase(const StringViewTemplate<char> & that)154 bool EqualsASCIINoCase(const StringViewTemplate<char>& that) const { 155 size_t length = GetLength(); 156 if (length != that.GetLength()) 157 return false; 158 159 for (size_t i = 0; i < length; ++i) { 160 auto c = (*this)[i]; 161 if (c <= 0 || c > 127 || tolower(c) != tolower(that[i])) 162 return false; 163 } 164 return true; 165 } 166 GetID()167 uint32_t GetID() const { 168 if (m_Span.empty()) 169 return 0; 170 171 uint32_t strid = 0; 172 size_t size = std::min(static_cast<size_t>(4), m_Span.size()); 173 for (size_t i = 0; i < size; i++) 174 strid = strid * 256 + m_Span[i]; 175 176 return strid << ((4 - size) * 8); 177 } 178 unsigned_span()179 pdfium::span<const UnsignedType> unsigned_span() const { return m_Span; } span()180 pdfium::span<const CharType> span() const { 181 return reinterpret_span<const CharType>(m_Span); 182 } unterminated_unsigned_str()183 const UnsignedType* unterminated_unsigned_str() const { 184 return m_Span.data(); 185 } unterminated_c_str()186 const CharType* unterminated_c_str() const { 187 return reinterpret_cast<const CharType*>(m_Span.data()); 188 } 189 GetLength()190 size_t GetLength() const { return m_Span.size(); } IsEmpty()191 bool IsEmpty() const { return m_Span.empty(); } IsValidIndex(size_t index)192 bool IsValidIndex(size_t index) const { return index < m_Span.size(); } IsValidLength(size_t length)193 bool IsValidLength(size_t length) const { return length <= m_Span.size(); } 194 195 // CHECK() if index is out of range (via span's operator[]). 196 const UnsignedType& operator[](const size_t index) const { 197 return m_Span[index]; 198 } 199 200 // CHECK() if index is out of range (via span's operator[]). CharAt(const size_t index)201 CharType CharAt(const size_t index) const { 202 return static_cast<CharType>(m_Span[index]); 203 } 204 205 // Unlike std::string_view::front(), this is always safe and returns a 206 // NUL char when the string is empty. Front()207 UnsignedType Front() const { return !m_Span.empty() ? m_Span.front() : 0; } 208 209 // Unlike std::string_view::back(), this is always safe and returns a 210 // NUL char when the string is empty. Back()211 UnsignedType Back() const { return !m_Span.empty() ? m_Span.back() : 0; } 212 Find(CharType ch)213 std::optional<size_t> Find(CharType ch) const { 214 const auto* found = 215 reinterpret_cast<const UnsignedType*>(std::char_traits<CharType>::find( 216 reinterpret_cast<const CharType*>(m_Span.data()), m_Span.size(), 217 ch)); 218 219 return found ? std::optional<size_t>(found - m_Span.data()) : std::nullopt; 220 } 221 Contains(CharType ch)222 bool Contains(CharType ch) const { return Find(ch).has_value(); } 223 Substr(size_t offset)224 StringViewTemplate Substr(size_t offset) const { 225 // Unsigned underflow is well-defined and out-of-range is handled by 226 // Substr(). 227 return Substr(offset, GetLength() - offset); 228 } 229 Substr(size_t first,size_t count)230 StringViewTemplate Substr(size_t first, size_t count) const { 231 if (!m_Span.data()) 232 return StringViewTemplate(); 233 234 if (!IsValidIndex(first)) 235 return StringViewTemplate(); 236 237 if (count == 0 || !IsValidLength(count)) 238 return StringViewTemplate(); 239 240 if (!IsValidIndex(first + count - 1)) 241 return StringViewTemplate(); 242 243 // SAFETY: performance-sensitive, checks above equivalent to subspan()'s. 244 return UNSAFE_BUFFERS(StringViewTemplate(m_Span.data() + first, count)); 245 } 246 First(size_t count)247 StringViewTemplate First(size_t count) const { 248 return Substr(0, count); 249 } 250 Last(size_t count)251 StringViewTemplate Last(size_t count) const { 252 // Unsigned underflow is well-defined and out-of-range is handled by 253 // Substr(). 254 return Substr(GetLength() - count, count); 255 } 256 TrimmedRight(CharType ch)257 StringViewTemplate TrimmedRight(CharType ch) const { 258 if (IsEmpty()) 259 return StringViewTemplate(); 260 261 size_t pos = GetLength(); 262 while (pos && CharAt(pos - 1) == ch) 263 pos--; 264 265 if (pos == 0) 266 return StringViewTemplate(); 267 268 // SAFETY: Loop above keeps `pos` at length of string or less. 269 return UNSAFE_BUFFERS(StringViewTemplate(m_Span.data(), pos)); 270 } 271 272 bool operator<(const StringViewTemplate& that) const { 273 const size_t common_size = std::min(m_Span.size(), that.m_Span.size()); 274 int result = 275 common_size ? std::char_traits<CharType>::compare( 276 reinterpret_cast<const CharType*>(m_Span.data()), 277 reinterpret_cast<const CharType*>(that.m_Span.data()), 278 common_size) 279 : 0; 280 return result < 0 || (result == 0 && m_Span.size() < that.m_Span.size()); 281 } 282 283 bool operator>(const StringViewTemplate& that) const { 284 const size_t common_size = std::min(m_Span.size(), that.m_Span.size()); 285 int result = 286 common_size ? std::char_traits<CharType>::compare( 287 reinterpret_cast<const CharType*>(m_Span.data()), 288 reinterpret_cast<const CharType*>(that.m_Span.data()), 289 common_size) 290 : 0; 291 return result > 0 || (result == 0 && m_Span.size() > that.m_Span.size()); 292 } 293 294 protected: 295 // This is not a raw_span<> because StringViewTemplates must be passed by 296 // value without introducing BackupRefPtr churn. Also, repeated re-assignment 297 // of substrings of a StringViewTemplate to itself must avoid the same issue. 298 pdfium::span<const UnsignedType> m_Span; 299 300 private: new(size_t)301 void* operator new(size_t) throw() { return nullptr; } 302 }; 303 304 template <typename T> 305 inline bool operator==(const T* lhs, const StringViewTemplate<T>& rhs) { 306 return rhs == lhs; 307 } 308 template <typename T> 309 inline bool operator!=(const T* lhs, const StringViewTemplate<T>& rhs) { 310 return rhs != lhs; 311 } 312 template <typename T> 313 inline bool operator<(const T* lhs, const StringViewTemplate<T>& rhs) { 314 return rhs > lhs; 315 } 316 317 extern template class StringViewTemplate<char>; 318 extern template class StringViewTemplate<wchar_t>; 319 320 using ByteStringView = StringViewTemplate<char>; 321 using WideStringView = StringViewTemplate<wchar_t>; 322 323 } // namespace fxcrt 324 325 using ByteStringView = fxcrt::ByteStringView; 326 using WideStringView = fxcrt::WideStringView; 327 328 #endif // CORE_FXCRT_STRING_VIEW_TEMPLATE_H_ 329