• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 // This implementation doesn't use ICU. The ICU macros are oriented towards
11 // character-at-a-time processing, whereas byte-at-a-time processing is easier
12 // with streaming input.
13 
14 #include "base/i18n/streaming_utf8_validator.h"
15 
16 #include "base/check_op.h"
17 #include "base/i18n/utf8_validator_tables.h"
18 
19 namespace base {
20 namespace {
21 
StateTableLookup(uint8_t offset)22 uint8_t StateTableLookup(uint8_t offset) {
23   DCHECK_LT(offset, internal::kUtf8ValidatorTablesSize);
24   return internal::kUtf8ValidatorTables[offset];
25 }
26 
27 }  // namespace
28 
AddBytes(base::span<const uint8_t> data)29 StreamingUtf8Validator::State StreamingUtf8Validator::AddBytes(
30     base::span<const uint8_t> data) {
31   // Copy |state_| into a local variable so that the compiler doesn't have to be
32   // careful of aliasing.
33   uint8_t state = state_;
34   for (const uint8_t ch : data) {
35     if ((ch & 0x80) == 0) {
36       if (state == 0)
37         continue;
38       state = internal::I18N_UTF8_VALIDATOR_INVALID_INDEX;
39       break;
40     }
41     const uint8_t shift_amount = StateTableLookup(state);
42     const uint8_t shifted_char = (ch & 0x7F) >> shift_amount;
43     state = StateTableLookup(state + shifted_char + 1);
44     // State may be INVALID here, but this code is optimised for the case of
45     // valid UTF-8 and it is more efficient (by about 2%) to not attempt an
46     // early loop exit unless we hit an ASCII character.
47   }
48   state_ = state;
49   return state == 0 ? VALID_ENDPOINT
50       : state == internal::I18N_UTF8_VALIDATOR_INVALID_INDEX
51       ? INVALID
52       : VALID_MIDPOINT;
53 }
54 
Reset()55 void StreamingUtf8Validator::Reset() {
56   state_ = 0u;
57 }
58 
Validate(const std::string & string)59 bool StreamingUtf8Validator::Validate(const std::string& string) {
60   return StreamingUtf8Validator().AddBytes(base::as_byte_span(string)) ==
61          VALID_ENDPOINT;
62 }
63 
64 }  // namespace base
65