1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Original API code Copyright (c) 1997-2012 University of Cambridge 10 New API code Copyright (c) 2016-2018 University of Cambridge 11 12 ----------------------------------------------------------------------------- 13 Redistribution and use in source and binary forms, with or without 14 modification, are permitted provided that the following conditions are met: 15 16 * Redistributions of source code must retain the above copyright notice, 17 this list of conditions and the following disclaimer. 18 19 * Redistributions in binary form must reproduce the above copyright 20 notice, this list of conditions and the following disclaimer in the 21 documentation and/or other materials provided with the distribution. 22 23 * Neither the name of the University of Cambridge nor the names of its 24 contributors may be used to endorse or promote products derived from 25 this software without specific prior written permission. 26 27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 POSSIBILITY OF SUCH DAMAGE. 38 ----------------------------------------------------------------------------- 39 */ 40 41 42 #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD 43 #define PCRE2_UCP_H_IDEMPOTENT_GUARD 44 45 /* This file contains definitions of the property values that are returned by 46 the UCD access macros. New values that are added for new releases of Unicode 47 should always be at the end of each enum, for backwards compatibility. 48 49 IMPORTANT: Note also that the specific numeric values of the enums have to be 50 the same as the values that are generated by the maint/MultiStage2.py script, 51 where the equivalent property descriptive names are listed in vectors. 52 53 ALSO: The specific values of the first two enums are assumed for the table 54 called catposstab in pcre2_compile.c. */ 55 56 /* These are the general character categories. */ 57 58 enum { 59 ucp_C, /* Other */ 60 ucp_L, /* Letter */ 61 ucp_M, /* Mark */ 62 ucp_N, /* Number */ 63 ucp_P, /* Punctuation */ 64 ucp_S, /* Symbol */ 65 ucp_Z /* Separator */ 66 }; 67 68 /* These are the particular character categories. */ 69 70 enum { 71 ucp_Cc, /* Control */ 72 ucp_Cf, /* Format */ 73 ucp_Cn, /* Unassigned */ 74 ucp_Co, /* Private use */ 75 ucp_Cs, /* Surrogate */ 76 ucp_Ll, /* Lower case letter */ 77 ucp_Lm, /* Modifier letter */ 78 ucp_Lo, /* Other letter */ 79 ucp_Lt, /* Title case letter */ 80 ucp_Lu, /* Upper case letter */ 81 ucp_Mc, /* Spacing mark */ 82 ucp_Me, /* Enclosing mark */ 83 ucp_Mn, /* Non-spacing mark */ 84 ucp_Nd, /* Decimal number */ 85 ucp_Nl, /* Letter number */ 86 ucp_No, /* Other number */ 87 ucp_Pc, /* Connector punctuation */ 88 ucp_Pd, /* Dash punctuation */ 89 ucp_Pe, /* Close punctuation */ 90 ucp_Pf, /* Final punctuation */ 91 ucp_Pi, /* Initial punctuation */ 92 ucp_Po, /* Other punctuation */ 93 ucp_Ps, /* Open punctuation */ 94 ucp_Sc, /* Currency symbol */ 95 ucp_Sk, /* Modifier symbol */ 96 ucp_Sm, /* Mathematical symbol */ 97 ucp_So, /* Other symbol */ 98 ucp_Zl, /* Line separator */ 99 ucp_Zp, /* Paragraph separator */ 100 ucp_Zs /* Space separator */ 101 }; 102 103 /* These are grapheme break properties. The Extended Pictographic property 104 comes from the emoji-data.txt file. */ 105 106 enum { 107 ucp_gbCR, /* 0 */ 108 ucp_gbLF, /* 1 */ 109 ucp_gbControl, /* 2 */ 110 ucp_gbExtend, /* 3 */ 111 ucp_gbPrepend, /* 4 */ 112 ucp_gbSpacingMark, /* 5 */ 113 ucp_gbL, /* 6 Hangul syllable type L */ 114 ucp_gbV, /* 7 Hangul syllable type V */ 115 ucp_gbT, /* 8 Hangul syllable type T */ 116 ucp_gbLV, /* 9 Hangul syllable type LV */ 117 ucp_gbLVT, /* 10 Hangul syllable type LVT */ 118 ucp_gbRegionalIndicator, /* 11 */ 119 ucp_gbOther, /* 12 */ 120 ucp_gbZWJ, /* 13 */ 121 ucp_gbExtended_Pictographic /* 14 */ 122 }; 123 124 /* These are the script identifications. */ 125 126 enum { 127 ucp_Unknown, 128 ucp_Arabic, 129 ucp_Armenian, 130 ucp_Bengali, 131 ucp_Bopomofo, 132 ucp_Braille, 133 ucp_Buginese, 134 ucp_Buhid, 135 ucp_Canadian_Aboriginal, 136 ucp_Cherokee, 137 ucp_Common, 138 ucp_Coptic, 139 ucp_Cypriot, 140 ucp_Cyrillic, 141 ucp_Deseret, 142 ucp_Devanagari, 143 ucp_Ethiopic, 144 ucp_Georgian, 145 ucp_Glagolitic, 146 ucp_Gothic, 147 ucp_Greek, 148 ucp_Gujarati, 149 ucp_Gurmukhi, 150 ucp_Han, 151 ucp_Hangul, 152 ucp_Hanunoo, 153 ucp_Hebrew, 154 ucp_Hiragana, 155 ucp_Inherited, 156 ucp_Kannada, 157 ucp_Katakana, 158 ucp_Kharoshthi, 159 ucp_Khmer, 160 ucp_Lao, 161 ucp_Latin, 162 ucp_Limbu, 163 ucp_Linear_B, 164 ucp_Malayalam, 165 ucp_Mongolian, 166 ucp_Myanmar, 167 ucp_New_Tai_Lue, 168 ucp_Ogham, 169 ucp_Old_Italic, 170 ucp_Old_Persian, 171 ucp_Oriya, 172 ucp_Osmanya, 173 ucp_Runic, 174 ucp_Shavian, 175 ucp_Sinhala, 176 ucp_Syloti_Nagri, 177 ucp_Syriac, 178 ucp_Tagalog, 179 ucp_Tagbanwa, 180 ucp_Tai_Le, 181 ucp_Tamil, 182 ucp_Telugu, 183 ucp_Thaana, 184 ucp_Thai, 185 ucp_Tibetan, 186 ucp_Tifinagh, 187 ucp_Ugaritic, 188 ucp_Yi, 189 /* New for Unicode 5.0 */ 190 ucp_Balinese, 191 ucp_Cuneiform, 192 ucp_Nko, 193 ucp_Phags_Pa, 194 ucp_Phoenician, 195 /* New for Unicode 5.1 */ 196 ucp_Carian, 197 ucp_Cham, 198 ucp_Kayah_Li, 199 ucp_Lepcha, 200 ucp_Lycian, 201 ucp_Lydian, 202 ucp_Ol_Chiki, 203 ucp_Rejang, 204 ucp_Saurashtra, 205 ucp_Sundanese, 206 ucp_Vai, 207 /* New for Unicode 5.2 */ 208 ucp_Avestan, 209 ucp_Bamum, 210 ucp_Egyptian_Hieroglyphs, 211 ucp_Imperial_Aramaic, 212 ucp_Inscriptional_Pahlavi, 213 ucp_Inscriptional_Parthian, 214 ucp_Javanese, 215 ucp_Kaithi, 216 ucp_Lisu, 217 ucp_Meetei_Mayek, 218 ucp_Old_South_Arabian, 219 ucp_Old_Turkic, 220 ucp_Samaritan, 221 ucp_Tai_Tham, 222 ucp_Tai_Viet, 223 /* New for Unicode 6.0.0 */ 224 ucp_Batak, 225 ucp_Brahmi, 226 ucp_Mandaic, 227 /* New for Unicode 6.1.0 */ 228 ucp_Chakma, 229 ucp_Meroitic_Cursive, 230 ucp_Meroitic_Hieroglyphs, 231 ucp_Miao, 232 ucp_Sharada, 233 ucp_Sora_Sompeng, 234 ucp_Takri, 235 /* New for Unicode 7.0.0 */ 236 ucp_Bassa_Vah, 237 ucp_Caucasian_Albanian, 238 ucp_Duployan, 239 ucp_Elbasan, 240 ucp_Grantha, 241 ucp_Khojki, 242 ucp_Khudawadi, 243 ucp_Linear_A, 244 ucp_Mahajani, 245 ucp_Manichaean, 246 ucp_Mende_Kikakui, 247 ucp_Modi, 248 ucp_Mro, 249 ucp_Nabataean, 250 ucp_Old_North_Arabian, 251 ucp_Old_Permic, 252 ucp_Pahawh_Hmong, 253 ucp_Palmyrene, 254 ucp_Psalter_Pahlavi, 255 ucp_Pau_Cin_Hau, 256 ucp_Siddham, 257 ucp_Tirhuta, 258 ucp_Warang_Citi, 259 /* New for Unicode 8.0.0 */ 260 ucp_Ahom, 261 ucp_Anatolian_Hieroglyphs, 262 ucp_Hatran, 263 ucp_Multani, 264 ucp_Old_Hungarian, 265 ucp_SignWriting, 266 /* New for Unicode 10.0.0 (no update since 8.0.0) */ 267 ucp_Adlam, 268 ucp_Bhaiksuki, 269 ucp_Marchen, 270 ucp_Newa, 271 ucp_Osage, 272 ucp_Tangut, 273 ucp_Masaram_Gondi, 274 ucp_Nushu, 275 ucp_Soyombo, 276 ucp_Zanabazar_Square, 277 /* New for Unicode 11.0.0 */ 278 ucp_Dogra, 279 ucp_Gunjala_Gondi, 280 ucp_Hanifi_Rohingya, 281 ucp_Makasar, 282 ucp_Medefaidrin, 283 ucp_Old_Sogdian, 284 ucp_Sogdian, 285 /* New for Unicode 12.0.0 */ 286 ucp_Elymaic, 287 ucp_Nandinagari, 288 ucp_Nyiakeng_Puachue_Hmong, 289 ucp_Wancho, 290 /* New for Unicode 13.0.0 */ 291 ucp_Chorasmian, 292 ucp_Dives_Akuru, 293 ucp_Khitan_Small_Script, 294 ucp_Yezidi 295 }; 296 297 #endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ 298 299 /* End of pcre2_ucp.h */ 300