1 // Copyright 2008 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Unicode character groups. 6 7 // The codes get split into ranges of 16-bit codes 8 // and ranges of 32-bit codes. It would be simpler 9 // to use only 32-bit ranges, but these tables are large 10 // enough to warrant extra care. 11 // 12 // Using just 32-bit ranges gives 27 kB of data. 13 // Adding 16-bit ranges gives 18 kB of data. 14 // Adding an extra table of 16-bit singletons would reduce 15 // to 16.5 kB of data but make the data harder to use; 16 // we don't bother. 17 18 #ifndef RE2_UNICODE_GROUPS_H__ 19 #define RE2_UNICODE_GROUPS_H__ 20 21 #include "util/util.h" 22 23 namespace re2 { 24 25 struct URange16 26 { 27 uint16 lo; 28 uint16 hi; 29 }; 30 31 struct URange32 32 { 33 uint32 lo; 34 uint32 hi; 35 }; 36 37 struct UGroup 38 { 39 const char *name; 40 int sign; // +1 for [abc], -1 for [^abc] 41 URange16 *r16; 42 int nr16; 43 URange32 *r32; 44 int nr32; 45 }; 46 47 // Named by property or script name (e.g., "Nd", "N", "Han"). 48 // Negated groups are not included. 49 extern UGroup unicode_groups[]; 50 extern int num_unicode_groups; 51 52 // Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]"). 53 // Negated groups are included. 54 extern UGroup posix_groups[]; 55 extern int num_posix_groups; 56 57 // Named by Perl name (e.g., "\\d", "\\D"). 58 // Negated groups are included. 59 extern UGroup perl_groups[]; 60 extern int num_perl_groups; 61 62 } // namespace re2 63 64 #endif // RE2_UNICODE_GROUPS_H__ 65