• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // This implementation doesn't use ICU. The ICU macros are oriented towards
6 // character-at-a-time processing, whereas byte-at-a-time processing is easier
7 // with streaming input.
8 
9 #include "base/i18n/streaming_utf8_validator.h"
10 
11 #include "base/check_op.h"
12 #include "base/i18n/utf8_validator_tables.h"
13 
14 namespace base {
15 namespace {
16 
StateTableLookup(uint8_t offset)17 uint8_t StateTableLookup(uint8_t offset) {
18   DCHECK_LT(offset, internal::kUtf8ValidatorTablesSize);
19   return internal::kUtf8ValidatorTables[offset];
20 }
21 
22 }  // namespace
23 
AddBytes(const char * data,size_t size)24 StreamingUtf8Validator::State StreamingUtf8Validator::AddBytes(const char* data,
25                                                                size_t size) {
26   // Copy |state_| into a local variable so that the compiler doesn't have to be
27   // careful of aliasing.
28   uint8_t state = state_;
29   for (const char* p = data; p != data + size; ++p) {
30     if ((*p & 0x80) == 0) {
31       if (state == 0)
32         continue;
33       state = internal::I18N_UTF8_VALIDATOR_INVALID_INDEX;
34       break;
35     }
36     const uint8_t shift_amount = StateTableLookup(state);
37     const uint8_t shifted_char = (*p & 0x7F) >> shift_amount;
38     state = StateTableLookup(state + shifted_char + 1);
39     // State may be INVALID here, but this code is optimised for the case of
40     // valid UTF-8 and it is more efficient (by about 2%) to not attempt an
41     // early loop exit unless we hit an ASCII character.
42   }
43   state_ = state;
44   return state == 0 ? VALID_ENDPOINT
45       : state == internal::I18N_UTF8_VALIDATOR_INVALID_INDEX
46       ? INVALID
47       : VALID_MIDPOINT;
48 }
49 
Reset()50 void StreamingUtf8Validator::Reset() {
51   state_ = 0u;
52 }
53 
Validate(const std::string & string)54 bool StreamingUtf8Validator::Validate(const std::string& string) {
55   return StreamingUtf8Validator().AddBytes(string.data(), string.size()) ==
56          VALID_ENDPOINT;
57 }
58 
59 }  // namespace base
60