1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Original API code Copyright (c) 1997-2012 University of Cambridge 10 New API code Copyright (c) 2016 University of Cambridge 11 12 ----------------------------------------------------------------------------- 13 Redistribution and use in source and binary forms, with or without 14 modification, are permitted provided that the following conditions are met: 15 16 * Redistributions of source code must retain the above copyright notice, 17 this list of conditions and the following disclaimer. 18 19 * Redistributions in binary form must reproduce the above copyright 20 notice, this list of conditions and the following disclaimer in the 21 documentation and/or other materials provided with the distribution. 22 23 * Neither the name of the University of Cambridge nor the names of its 24 contributors may be used to endorse or promote products derived from 25 this software without specific prior written permission. 26 27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 POSSIBILITY OF SUCH DAMAGE. 38 ----------------------------------------------------------------------------- 39 */ 40 41 42 #ifndef _PCRE2_UCP_H 43 #define _PCRE2_UCP_H 44 45 /* This file contains definitions of the property values that are returned by 46 the UCD access macros. New values that are added for new releases of Unicode 47 should always be at the end of each enum, for backwards compatibility. 48 49 IMPORTANT: Note also that the specific numeric values of the enums have to be 50 the same as the values that are generated by the maint/MultiStage2.py script, 51 where the equivalent property descriptive names are listed in vectors. 52 53 ALSO: The specific values of the first two enums are assumed for the table 54 called catposstab in pcre2_compile.c. */ 55 56 /* These are the general character categories. */ 57 58 enum { 59 ucp_C, /* Other */ 60 ucp_L, /* Letter */ 61 ucp_M, /* Mark */ 62 ucp_N, /* Number */ 63 ucp_P, /* Punctuation */ 64 ucp_S, /* Symbol */ 65 ucp_Z /* Separator */ 66 }; 67 68 /* These are the particular character categories. */ 69 70 enum { 71 ucp_Cc, /* Control */ 72 ucp_Cf, /* Format */ 73 ucp_Cn, /* Unassigned */ 74 ucp_Co, /* Private use */ 75 ucp_Cs, /* Surrogate */ 76 ucp_Ll, /* Lower case letter */ 77 ucp_Lm, /* Modifier letter */ 78 ucp_Lo, /* Other letter */ 79 ucp_Lt, /* Title case letter */ 80 ucp_Lu, /* Upper case letter */ 81 ucp_Mc, /* Spacing mark */ 82 ucp_Me, /* Enclosing mark */ 83 ucp_Mn, /* Non-spacing mark */ 84 ucp_Nd, /* Decimal number */ 85 ucp_Nl, /* Letter number */ 86 ucp_No, /* Other number */ 87 ucp_Pc, /* Connector punctuation */ 88 ucp_Pd, /* Dash punctuation */ 89 ucp_Pe, /* Close punctuation */ 90 ucp_Pf, /* Final punctuation */ 91 ucp_Pi, /* Initial punctuation */ 92 ucp_Po, /* Other punctuation */ 93 ucp_Ps, /* Open punctuation */ 94 ucp_Sc, /* Currency symbol */ 95 ucp_Sk, /* Modifier symbol */ 96 ucp_Sm, /* Mathematical symbol */ 97 ucp_So, /* Other symbol */ 98 ucp_Zl, /* Line separator */ 99 ucp_Zp, /* Paragraph separator */ 100 ucp_Zs /* Space separator */ 101 }; 102 103 /* These are grapheme break properties. Note that the code for processing them 104 assumes that the values are less than 16. If more values are added that take 105 the number to 16 or more, the code will have to be rewritten. */ 106 107 enum { 108 ucp_gbCR, /* 0 */ 109 ucp_gbLF, /* 1 */ 110 ucp_gbControl, /* 2 */ 111 ucp_gbExtend, /* 3 */ 112 ucp_gbPrepend, /* 4 */ 113 ucp_gbSpacingMark, /* 5 */ 114 ucp_gbL, /* 6 Hangul syllable type L */ 115 ucp_gbV, /* 7 Hangul syllable type V */ 116 ucp_gbT, /* 8 Hangul syllable type T */ 117 ucp_gbLV, /* 9 Hangul syllable type LV */ 118 ucp_gbLVT, /* 10 Hangul syllable type LVT */ 119 ucp_gbRegionalIndicator, /* 11 */ 120 ucp_gbOther /* 12 */ 121 }; 122 123 /* These are the script identifications. */ 124 125 enum { 126 ucp_Arabic, 127 ucp_Armenian, 128 ucp_Bengali, 129 ucp_Bopomofo, 130 ucp_Braille, 131 ucp_Buginese, 132 ucp_Buhid, 133 ucp_Canadian_Aboriginal, 134 ucp_Cherokee, 135 ucp_Common, 136 ucp_Coptic, 137 ucp_Cypriot, 138 ucp_Cyrillic, 139 ucp_Deseret, 140 ucp_Devanagari, 141 ucp_Ethiopic, 142 ucp_Georgian, 143 ucp_Glagolitic, 144 ucp_Gothic, 145 ucp_Greek, 146 ucp_Gujarati, 147 ucp_Gurmukhi, 148 ucp_Han, 149 ucp_Hangul, 150 ucp_Hanunoo, 151 ucp_Hebrew, 152 ucp_Hiragana, 153 ucp_Inherited, 154 ucp_Kannada, 155 ucp_Katakana, 156 ucp_Kharoshthi, 157 ucp_Khmer, 158 ucp_Lao, 159 ucp_Latin, 160 ucp_Limbu, 161 ucp_Linear_B, 162 ucp_Malayalam, 163 ucp_Mongolian, 164 ucp_Myanmar, 165 ucp_New_Tai_Lue, 166 ucp_Ogham, 167 ucp_Old_Italic, 168 ucp_Old_Persian, 169 ucp_Oriya, 170 ucp_Osmanya, 171 ucp_Runic, 172 ucp_Shavian, 173 ucp_Sinhala, 174 ucp_Syloti_Nagri, 175 ucp_Syriac, 176 ucp_Tagalog, 177 ucp_Tagbanwa, 178 ucp_Tai_Le, 179 ucp_Tamil, 180 ucp_Telugu, 181 ucp_Thaana, 182 ucp_Thai, 183 ucp_Tibetan, 184 ucp_Tifinagh, 185 ucp_Ugaritic, 186 ucp_Yi, 187 /* New for Unicode 5.0: */ 188 ucp_Balinese, 189 ucp_Cuneiform, 190 ucp_Nko, 191 ucp_Phags_Pa, 192 ucp_Phoenician, 193 /* New for Unicode 5.1: */ 194 ucp_Carian, 195 ucp_Cham, 196 ucp_Kayah_Li, 197 ucp_Lepcha, 198 ucp_Lycian, 199 ucp_Lydian, 200 ucp_Ol_Chiki, 201 ucp_Rejang, 202 ucp_Saurashtra, 203 ucp_Sundanese, 204 ucp_Vai, 205 /* New for Unicode 5.2: */ 206 ucp_Avestan, 207 ucp_Bamum, 208 ucp_Egyptian_Hieroglyphs, 209 ucp_Imperial_Aramaic, 210 ucp_Inscriptional_Pahlavi, 211 ucp_Inscriptional_Parthian, 212 ucp_Javanese, 213 ucp_Kaithi, 214 ucp_Lisu, 215 ucp_Meetei_Mayek, 216 ucp_Old_South_Arabian, 217 ucp_Old_Turkic, 218 ucp_Samaritan, 219 ucp_Tai_Tham, 220 ucp_Tai_Viet, 221 /* New for Unicode 6.0.0: */ 222 ucp_Batak, 223 ucp_Brahmi, 224 ucp_Mandaic, 225 /* New for Unicode 6.1.0: */ 226 ucp_Chakma, 227 ucp_Meroitic_Cursive, 228 ucp_Meroitic_Hieroglyphs, 229 ucp_Miao, 230 ucp_Sharada, 231 ucp_Sora_Sompeng, 232 ucp_Takri, 233 /* New for Unicode 7.0.0: */ 234 ucp_Bassa_Vah, 235 ucp_Caucasian_Albanian, 236 ucp_Duployan, 237 ucp_Elbasan, 238 ucp_Grantha, 239 ucp_Khojki, 240 ucp_Khudawadi, 241 ucp_Linear_A, 242 ucp_Mahajani, 243 ucp_Manichaean, 244 ucp_Mende_Kikakui, 245 ucp_Modi, 246 ucp_Mro, 247 ucp_Nabataean, 248 ucp_Old_North_Arabian, 249 ucp_Old_Permic, 250 ucp_Pahawh_Hmong, 251 ucp_Palmyrene, 252 ucp_Psalter_Pahlavi, 253 ucp_Pau_Cin_Hau, 254 ucp_Siddham, 255 ucp_Tirhuta, 256 ucp_Warang_Citi, 257 /* New for Unicode 8.0.0: */ 258 ucp_Ahom, 259 ucp_Anatolian_Hieroglyphs, 260 ucp_Hatran, 261 ucp_Multani, 262 ucp_Old_Hungarian, 263 ucp_SignWriting 264 }; 265 266 #endif 267 268 /* End of pcre2_ucp.h */ 269