1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Original API code Copyright (c) 1997-2012 University of Cambridge 10 New API code Copyright (c) 2016-2018 University of Cambridge 11 12 ----------------------------------------------------------------------------- 13 Redistribution and use in source and binary forms, with or without 14 modification, are permitted provided that the following conditions are met: 15 16 * Redistributions of source code must retain the above copyright notice, 17 this list of conditions and the following disclaimer. 18 19 * Redistributions in binary form must reproduce the above copyright 20 notice, this list of conditions and the following disclaimer in the 21 documentation and/or other materials provided with the distribution. 22 23 * Neither the name of the University of Cambridge nor the names of its 24 contributors may be used to endorse or promote products derived from 25 this software without specific prior written permission. 26 27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 POSSIBILITY OF SUCH DAMAGE. 38 ----------------------------------------------------------------------------- 39 */ 40 41 42 #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD 43 #define PCRE2_UCP_H_IDEMPOTENT_GUARD 44 45 /* This file contains definitions of the property values that are returned by 46 the UCD access macros. New values that are added for new releases of Unicode 47 should always be at the end of each enum, for backwards compatibility. 48 49 IMPORTANT: Note also that the specific numeric values of the enums have to be 50 the same as the values that are generated by the maint/MultiStage2.py script, 51 where the equivalent property descriptive names are listed in vectors. 52 53 ALSO: The specific values of the first two enums are assumed for the table 54 called catposstab in pcre2_compile.c. */ 55 56 /* These are the general character categories. */ 57 58 enum { 59 ucp_C, /* Other */ 60 ucp_L, /* Letter */ 61 ucp_M, /* Mark */ 62 ucp_N, /* Number */ 63 ucp_P, /* Punctuation */ 64 ucp_S, /* Symbol */ 65 ucp_Z /* Separator */ 66 }; 67 68 /* These are the particular character categories. */ 69 70 enum { 71 ucp_Cc, /* Control */ 72 ucp_Cf, /* Format */ 73 ucp_Cn, /* Unassigned */ 74 ucp_Co, /* Private use */ 75 ucp_Cs, /* Surrogate */ 76 ucp_Ll, /* Lower case letter */ 77 ucp_Lm, /* Modifier letter */ 78 ucp_Lo, /* Other letter */ 79 ucp_Lt, /* Title case letter */ 80 ucp_Lu, /* Upper case letter */ 81 ucp_Mc, /* Spacing mark */ 82 ucp_Me, /* Enclosing mark */ 83 ucp_Mn, /* Non-spacing mark */ 84 ucp_Nd, /* Decimal number */ 85 ucp_Nl, /* Letter number */ 86 ucp_No, /* Other number */ 87 ucp_Pc, /* Connector punctuation */ 88 ucp_Pd, /* Dash punctuation */ 89 ucp_Pe, /* Close punctuation */ 90 ucp_Pf, /* Final punctuation */ 91 ucp_Pi, /* Initial punctuation */ 92 ucp_Po, /* Other punctuation */ 93 ucp_Ps, /* Open punctuation */ 94 ucp_Sc, /* Currency symbol */ 95 ucp_Sk, /* Modifier symbol */ 96 ucp_Sm, /* Mathematical symbol */ 97 ucp_So, /* Other symbol */ 98 ucp_Zl, /* Line separator */ 99 ucp_Zp, /* Paragraph separator */ 100 ucp_Zs /* Space separator */ 101 }; 102 103 /* These are grapheme break properties. The Extended Pictographic property 104 comes from the emoji-data.txt file. */ 105 106 enum { 107 ucp_gbCR, /* 0 */ 108 ucp_gbLF, /* 1 */ 109 ucp_gbControl, /* 2 */ 110 ucp_gbExtend, /* 3 */ 111 ucp_gbPrepend, /* 4 */ 112 ucp_gbSpacingMark, /* 5 */ 113 ucp_gbL, /* 6 Hangul syllable type L */ 114 ucp_gbV, /* 7 Hangul syllable type V */ 115 ucp_gbT, /* 8 Hangul syllable type T */ 116 ucp_gbLV, /* 9 Hangul syllable type LV */ 117 ucp_gbLVT, /* 10 Hangul syllable type LVT */ 118 ucp_gbRegionalIndicator, /* 11 */ 119 ucp_gbOther, /* 12 */ 120 ucp_gbZWJ, /* 13 */ 121 ucp_gbExtended_Pictographic /* 14 */ 122 }; 123 124 /* These are the script identifications. */ 125 126 enum { 127 ucp_Arabic, 128 ucp_Armenian, 129 ucp_Bengali, 130 ucp_Bopomofo, 131 ucp_Braille, 132 ucp_Buginese, 133 ucp_Buhid, 134 ucp_Canadian_Aboriginal, 135 ucp_Cherokee, 136 ucp_Common, 137 ucp_Coptic, 138 ucp_Cypriot, 139 ucp_Cyrillic, 140 ucp_Deseret, 141 ucp_Devanagari, 142 ucp_Ethiopic, 143 ucp_Georgian, 144 ucp_Glagolitic, 145 ucp_Gothic, 146 ucp_Greek, 147 ucp_Gujarati, 148 ucp_Gurmukhi, 149 ucp_Han, 150 ucp_Hangul, 151 ucp_Hanunoo, 152 ucp_Hebrew, 153 ucp_Hiragana, 154 ucp_Inherited, 155 ucp_Kannada, 156 ucp_Katakana, 157 ucp_Kharoshthi, 158 ucp_Khmer, 159 ucp_Lao, 160 ucp_Latin, 161 ucp_Limbu, 162 ucp_Linear_B, 163 ucp_Malayalam, 164 ucp_Mongolian, 165 ucp_Myanmar, 166 ucp_New_Tai_Lue, 167 ucp_Ogham, 168 ucp_Old_Italic, 169 ucp_Old_Persian, 170 ucp_Oriya, 171 ucp_Osmanya, 172 ucp_Runic, 173 ucp_Shavian, 174 ucp_Sinhala, 175 ucp_Syloti_Nagri, 176 ucp_Syriac, 177 ucp_Tagalog, 178 ucp_Tagbanwa, 179 ucp_Tai_Le, 180 ucp_Tamil, 181 ucp_Telugu, 182 ucp_Thaana, 183 ucp_Thai, 184 ucp_Tibetan, 185 ucp_Tifinagh, 186 ucp_Ugaritic, 187 ucp_Yi, 188 /* New for Unicode 5.0 */ 189 ucp_Balinese, 190 ucp_Cuneiform, 191 ucp_Nko, 192 ucp_Phags_Pa, 193 ucp_Phoenician, 194 /* New for Unicode 5.1 */ 195 ucp_Carian, 196 ucp_Cham, 197 ucp_Kayah_Li, 198 ucp_Lepcha, 199 ucp_Lycian, 200 ucp_Lydian, 201 ucp_Ol_Chiki, 202 ucp_Rejang, 203 ucp_Saurashtra, 204 ucp_Sundanese, 205 ucp_Vai, 206 /* New for Unicode 5.2 */ 207 ucp_Avestan, 208 ucp_Bamum, 209 ucp_Egyptian_Hieroglyphs, 210 ucp_Imperial_Aramaic, 211 ucp_Inscriptional_Pahlavi, 212 ucp_Inscriptional_Parthian, 213 ucp_Javanese, 214 ucp_Kaithi, 215 ucp_Lisu, 216 ucp_Meetei_Mayek, 217 ucp_Old_South_Arabian, 218 ucp_Old_Turkic, 219 ucp_Samaritan, 220 ucp_Tai_Tham, 221 ucp_Tai_Viet, 222 /* New for Unicode 6.0.0 */ 223 ucp_Batak, 224 ucp_Brahmi, 225 ucp_Mandaic, 226 /* New for Unicode 6.1.0 */ 227 ucp_Chakma, 228 ucp_Meroitic_Cursive, 229 ucp_Meroitic_Hieroglyphs, 230 ucp_Miao, 231 ucp_Sharada, 232 ucp_Sora_Sompeng, 233 ucp_Takri, 234 /* New for Unicode 7.0.0 */ 235 ucp_Bassa_Vah, 236 ucp_Caucasian_Albanian, 237 ucp_Duployan, 238 ucp_Elbasan, 239 ucp_Grantha, 240 ucp_Khojki, 241 ucp_Khudawadi, 242 ucp_Linear_A, 243 ucp_Mahajani, 244 ucp_Manichaean, 245 ucp_Mende_Kikakui, 246 ucp_Modi, 247 ucp_Mro, 248 ucp_Nabataean, 249 ucp_Old_North_Arabian, 250 ucp_Old_Permic, 251 ucp_Pahawh_Hmong, 252 ucp_Palmyrene, 253 ucp_Psalter_Pahlavi, 254 ucp_Pau_Cin_Hau, 255 ucp_Siddham, 256 ucp_Tirhuta, 257 ucp_Warang_Citi, 258 /* New for Unicode 8.0.0 */ 259 ucp_Ahom, 260 ucp_Anatolian_Hieroglyphs, 261 ucp_Hatran, 262 ucp_Multani, 263 ucp_Old_Hungarian, 264 ucp_SignWriting, 265 /* New for Unicode 10.0.0 (no update since 8.0.0) */ 266 ucp_Adlam, 267 ucp_Bhaiksuki, 268 ucp_Marchen, 269 ucp_Newa, 270 ucp_Osage, 271 ucp_Tangut, 272 ucp_Masaram_Gondi, 273 ucp_Nushu, 274 ucp_Soyombo, 275 ucp_Zanabazar_Square, 276 /* New for Unicode 11.0.0 */ 277 ucp_Dogra, 278 ucp_Gunjala_Gondi, 279 ucp_Hanifi_Rohingya, 280 ucp_Makasar, 281 ucp_Medefaidrin, 282 ucp_Old_Sogdian, 283 ucp_Sogdian 284 }; 285 286 #endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ 287 288 /* End of pcre2_ucp.h */ 289