1 // Copyright 2014 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifdef UNSAFE_BUFFERS_BUILD 6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors. 7 #pragma allow_unsafe_buffers 8 #endif 9 10 // This implementation doesn't use ICU. The ICU macros are oriented towards 11 // character-at-a-time processing, whereas byte-at-a-time processing is easier 12 // with streaming input. 13 14 #include "base/i18n/streaming_utf8_validator.h" 15 16 #include "base/check_op.h" 17 #include "base/i18n/utf8_validator_tables.h" 18 19 namespace base { 20 namespace { 21 StateTableLookup(uint8_t offset)22uint8_t StateTableLookup(uint8_t offset) { 23 DCHECK_LT(offset, internal::kUtf8ValidatorTablesSize); 24 return internal::kUtf8ValidatorTables[offset]; 25 } 26 27 } // namespace 28 AddBytes(base::span<const uint8_t> data)29StreamingUtf8Validator::State StreamingUtf8Validator::AddBytes( 30 base::span<const uint8_t> data) { 31 // Copy |state_| into a local variable so that the compiler doesn't have to be 32 // careful of aliasing. 33 uint8_t state = state_; 34 for (const uint8_t ch : data) { 35 if ((ch & 0x80) == 0) { 36 if (state == 0) 37 continue; 38 state = internal::I18N_UTF8_VALIDATOR_INVALID_INDEX; 39 break; 40 } 41 const uint8_t shift_amount = StateTableLookup(state); 42 const uint8_t shifted_char = (ch & 0x7F) >> shift_amount; 43 state = StateTableLookup(state + shifted_char + 1); 44 // State may be INVALID here, but this code is optimised for the case of 45 // valid UTF-8 and it is more efficient (by about 2%) to not attempt an 46 // early loop exit unless we hit an ASCII character. 47 } 48 state_ = state; 49 return state == 0 ? VALID_ENDPOINT 50 : state == internal::I18N_UTF8_VALIDATOR_INVALID_INDEX 51 ? INVALID 52 : VALID_MIDPOINT; 53 } 54 Reset()55void StreamingUtf8Validator::Reset() { 56 state_ = 0u; 57 } 58 Validate(const std::string & string)59bool StreamingUtf8Validator::Validate(const std::string& string) { 60 return StreamingUtf8Validator().AddBytes(base::as_byte_span(string)) == 61 VALID_ENDPOINT; 62 } 63 64 } // namespace base 65