1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 //--------------------------------------------------------------------------------- 4 // 5 // Generated Header File. Do not edit by hand. 6 // This file contains the state table for the ICU Regular Expression Pattern Parser 7 // It is generated by the Perl script "regexcst.pl" from 8 // the rule parser state definitions file "regexcst.txt". 9 // 10 // Copyright (C) 2002-2016 International Business Machines Corporation 11 // and others. All rights reserved. 12 // 13 //--------------------------------------------------------------------------------- 14 #ifndef RBBIRPT_H 15 #define RBBIRPT_H 16 17 #include "unicode/utypes.h" 18 19 U_NAMESPACE_BEGIN 20 // 21 // Character classes for regex pattern scanning. 22 // 23 static const uint8_t kRuleSet_digit_char = 128; 24 static const uint8_t kRuleSet_ascii_letter = 129; 25 static const uint8_t kRuleSet_rule_char = 130; 26 constexpr uint32_t kRuleSet_count = 131-128; 27 28 enum Regex_PatternParseAction { 29 doSetBackslash_D, 30 doBackslashh, 31 doBackslashH, 32 doSetLiteralEscaped, 33 doOpenLookAheadNeg, 34 doCompleteNamedBackRef, 35 doPatStart, 36 doBackslashS, 37 doBackslashD, 38 doNGStar, 39 doNOP, 40 doBackslashX, 41 doSetLiteral, 42 doContinueNamedCapture, 43 doBackslashG, 44 doBackslashR, 45 doSetBegin, 46 doSetBackslash_v, 47 doPossessivePlus, 48 doPerlInline, 49 doBackslashZ, 50 doSetAddAmp, 51 doSetBeginDifference1, 52 doIntervalError, 53 doSetNegate, 54 doIntervalInit, 55 doSetIntersection2, 56 doPossessiveInterval, 57 doRuleError, 58 doBackslashW, 59 doContinueNamedBackRef, 60 doOpenNonCaptureParen, 61 doExit, 62 doSetNamedChar, 63 doSetBackslash_V, 64 doConditionalExpr, 65 doEscapeError, 66 doBadOpenParenType, 67 doPossessiveStar, 68 doSetAddDash, 69 doEscapedLiteralChar, 70 doSetBackslash_w, 71 doIntervalUpperDigit, 72 doBackslashv, 73 doSetBackslash_S, 74 doSetNoCloseError, 75 doSetProp, 76 doBackslashB, 77 doSetEnd, 78 doSetRange, 79 doMatchModeParen, 80 doPlus, 81 doBackslashV, 82 doSetMatchMode, 83 doBackslashz, 84 doSetNamedRange, 85 doOpenLookBehindNeg, 86 doInterval, 87 doBadNamedCapture, 88 doBeginMatchMode, 89 doBackslashd, 90 doPatFinish, 91 doNamedChar, 92 doNGPlus, 93 doSetDifference2, 94 doSetBackslash_H, 95 doCloseParen, 96 doDotAny, 97 doOpenCaptureParen, 98 doEnterQuoteMode, 99 doOpenAtomicParen, 100 doBadModeFlag, 101 doSetBackslash_d, 102 doSetFinish, 103 doProperty, 104 doBeginNamedBackRef, 105 doBackRef, 106 doOpt, 107 doDollar, 108 doBeginNamedCapture, 109 doNGInterval, 110 doSetOpError, 111 doSetPosixProp, 112 doSetBeginIntersection1, 113 doBackslashb, 114 doSetBeginUnion, 115 doIntevalLowerDigit, 116 doSetBackslash_h, 117 doStar, 118 doMatchMode, 119 doBackslashA, 120 doOpenLookBehind, 121 doPossessiveOpt, 122 doOrOperator, 123 doBackslashw, 124 doBackslashs, 125 doLiteralChar, 126 doSuppressComments, 127 doCaret, 128 doIntervalSame, 129 doNGOpt, 130 doOpenLookAhead, 131 doSetBackslash_W, 132 doMismatchedParenErr, 133 doSetBackslash_s, 134 rbbiLastAction}; 135 136 //------------------------------------------------------------------------------- 137 // 138 // RegexTableEl represents the structure of a row in the transition table 139 // for the pattern parser state machine. 140 //------------------------------------------------------------------------------- 141 struct RegexTableEl { 142 Regex_PatternParseAction fAction; 143 uint8_t fCharClass; // 0-127: an individual ASCII character 144 // 128-255: character class index 145 uint8_t fNextState; // 0-250: normal next-state numbers 146 // 255: pop next-state from stack. 147 uint8_t fPushState; 148 UBool fNextChar; 149 }; 150 151 static const struct RegexTableEl gRuleParseStateTable[] = { 152 {doNOP, 0, 0, 0, TRUE} 153 , {doPatStart, 255, 2,0, FALSE} // 1 start 154 , {doLiteralChar, 254, 14,0, TRUE} // 2 term 155 , {doLiteralChar, 130, 14,0, TRUE} // 3 156 , {doSetBegin, 91 /* [ */, 123, 205, TRUE} // 4 157 , {doNOP, 40 /* ( */, 27,0, TRUE} // 5 158 , {doDotAny, 46 /* . */, 14,0, TRUE} // 6 159 , {doCaret, 94 /* ^ */, 14,0, TRUE} // 7 160 , {doDollar, 36 /* $ */, 14,0, TRUE} // 8 161 , {doNOP, 92 /* \ */, 89,0, TRUE} // 9 162 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 10 163 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11 164 , {doPatFinish, 253, 2,0, FALSE} // 12 165 , {doRuleError, 255, 206,0, FALSE} // 13 166 , {doNOP, 42 /* * */, 68,0, TRUE} // 14 expr-quant 167 , {doNOP, 43 /* + */, 71,0, TRUE} // 15 168 , {doNOP, 63 /* ? */, 74,0, TRUE} // 16 169 , {doIntervalInit, 123 /* { */, 77,0, TRUE} // 17 170 , {doNOP, 40 /* ( */, 23,0, TRUE} // 18 171 , {doNOP, 255, 20,0, FALSE} // 19 172 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 20 expr-cont 173 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 21 174 , {doNOP, 255, 2,0, FALSE} // 22 175 , {doSuppressComments, 63 /* ? */, 25,0, TRUE} // 23 open-paren-quant 176 , {doNOP, 255, 27,0, FALSE} // 24 177 , {doNOP, 35 /* # */, 50, 14, TRUE} // 25 open-paren-quant2 178 , {doNOP, 255, 29,0, FALSE} // 26 179 , {doSuppressComments, 63 /* ? */, 29,0, TRUE} // 27 open-paren 180 , {doOpenCaptureParen, 255, 2, 14, FALSE} // 28 181 , {doOpenNonCaptureParen, 58 /* : */, 2, 14, TRUE} // 29 open-paren-extended 182 , {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE} // 30 183 , {doOpenLookAhead, 61 /* = */, 2, 20, TRUE} // 31 184 , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32 185 , {doNOP, 60 /* < */, 46,0, TRUE} // 33 186 , {doNOP, 35 /* # */, 50, 2, TRUE} // 34 187 , {doBeginMatchMode, 105 /* i */, 53,0, FALSE} // 35 188 , {doBeginMatchMode, 100 /* d */, 53,0, FALSE} // 36 189 , {doBeginMatchMode, 109 /* m */, 53,0, FALSE} // 37 190 , {doBeginMatchMode, 115 /* s */, 53,0, FALSE} // 38 191 , {doBeginMatchMode, 117 /* u */, 53,0, FALSE} // 39 192 , {doBeginMatchMode, 119 /* w */, 53,0, FALSE} // 40 193 , {doBeginMatchMode, 120 /* x */, 53,0, FALSE} // 41 194 , {doBeginMatchMode, 45 /* - */, 53,0, FALSE} // 42 195 , {doConditionalExpr, 40 /* ( */, 206,0, TRUE} // 43 196 , {doPerlInline, 123 /* { */, 206,0, TRUE} // 44 197 , {doBadOpenParenType, 255, 206,0, FALSE} // 45 198 , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 46 open-paren-lookbehind 199 , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 47 200 , {doBeginNamedCapture, 129, 64,0, FALSE} // 48 201 , {doBadOpenParenType, 255, 206,0, FALSE} // 49 202 , {doNOP, 41 /* ) */, 255,0, TRUE} // 50 paren-comment 203 , {doMismatchedParenErr, 253, 206,0, FALSE} // 51 204 , {doNOP, 255, 50,0, TRUE} // 52 205 , {doMatchMode, 105 /* i */, 53,0, TRUE} // 53 paren-flag 206 , {doMatchMode, 100 /* d */, 53,0, TRUE} // 54 207 , {doMatchMode, 109 /* m */, 53,0, TRUE} // 55 208 , {doMatchMode, 115 /* s */, 53,0, TRUE} // 56 209 , {doMatchMode, 117 /* u */, 53,0, TRUE} // 57 210 , {doMatchMode, 119 /* w */, 53,0, TRUE} // 58 211 , {doMatchMode, 120 /* x */, 53,0, TRUE} // 59 212 , {doMatchMode, 45 /* - */, 53,0, TRUE} // 60 213 , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 61 214 , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 62 215 , {doBadModeFlag, 255, 206,0, FALSE} // 63 216 , {doContinueNamedCapture, 129, 64,0, TRUE} // 64 named-capture 217 , {doContinueNamedCapture, 128, 64,0, TRUE} // 65 218 , {doOpenCaptureParen, 62 /* > */, 2, 14, TRUE} // 66 219 , {doBadNamedCapture, 255, 206,0, FALSE} // 67 220 , {doNGStar, 63 /* ? */, 20,0, TRUE} // 68 quant-star 221 , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 69 222 , {doStar, 255, 20,0, FALSE} // 70 223 , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 71 quant-plus 224 , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 72 225 , {doPlus, 255, 20,0, FALSE} // 73 226 , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 74 quant-opt 227 , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 75 228 , {doOpt, 255, 20,0, FALSE} // 76 229 , {doNOP, 128, 79,0, FALSE} // 77 interval-open 230 , {doIntervalError, 255, 206,0, FALSE} // 78 231 , {doIntevalLowerDigit, 128, 79,0, TRUE} // 79 interval-lower 232 , {doNOP, 44 /* , */, 83,0, TRUE} // 80 233 , {doIntervalSame, 125 /* } */, 86,0, TRUE} // 81 234 , {doIntervalError, 255, 206,0, FALSE} // 82 235 , {doIntervalUpperDigit, 128, 83,0, TRUE} // 83 interval-upper 236 , {doNOP, 125 /* } */, 86,0, TRUE} // 84 237 , {doIntervalError, 255, 206,0, FALSE} // 85 238 , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 86 interval-type 239 , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 87 240 , {doInterval, 255, 20,0, FALSE} // 88 241 , {doBackslashA, 65 /* A */, 2,0, TRUE} // 89 backslash 242 , {doBackslashB, 66 /* B */, 2,0, TRUE} // 90 243 , {doBackslashb, 98 /* b */, 2,0, TRUE} // 91 244 , {doBackslashd, 100 /* d */, 14,0, TRUE} // 92 245 , {doBackslashD, 68 /* D */, 14,0, TRUE} // 93 246 , {doBackslashG, 71 /* G */, 2,0, TRUE} // 94 247 , {doBackslashh, 104 /* h */, 14,0, TRUE} // 95 248 , {doBackslashH, 72 /* H */, 14,0, TRUE} // 96 249 , {doNOP, 107 /* k */, 115,0, TRUE} // 97 250 , {doNamedChar, 78 /* N */, 14,0, FALSE} // 98 251 , {doProperty, 112 /* p */, 14,0, FALSE} // 99 252 , {doProperty, 80 /* P */, 14,0, FALSE} // 100 253 , {doBackslashR, 82 /* R */, 14,0, TRUE} // 101 254 , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 102 255 , {doBackslashS, 83 /* S */, 14,0, TRUE} // 103 256 , {doBackslashs, 115 /* s */, 14,0, TRUE} // 104 257 , {doBackslashv, 118 /* v */, 14,0, TRUE} // 105 258 , {doBackslashV, 86 /* V */, 14,0, TRUE} // 106 259 , {doBackslashW, 87 /* W */, 14,0, TRUE} // 107 260 , {doBackslashw, 119 /* w */, 14,0, TRUE} // 108 261 , {doBackslashX, 88 /* X */, 14,0, TRUE} // 109 262 , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 110 263 , {doBackslashz, 122 /* z */, 2,0, TRUE} // 111 264 , {doBackRef, 128, 14,0, TRUE} // 112 265 , {doEscapeError, 253, 206,0, FALSE} // 113 266 , {doEscapedLiteralChar, 255, 14,0, TRUE} // 114 267 , {doBeginNamedBackRef, 60 /* < */, 117,0, TRUE} // 115 named-backref 268 , {doBadNamedCapture, 255, 206,0, FALSE} // 116 269 , {doContinueNamedBackRef, 129, 119,0, TRUE} // 117 named-backref-2 270 , {doBadNamedCapture, 255, 206,0, FALSE} // 118 271 , {doContinueNamedBackRef, 129, 119,0, TRUE} // 119 named-backref-3 272 , {doContinueNamedBackRef, 128, 119,0, TRUE} // 120 273 , {doCompleteNamedBackRef, 62 /* > */, 14,0, TRUE} // 121 274 , {doBadNamedCapture, 255, 206,0, FALSE} // 122 275 , {doSetNegate, 94 /* ^ */, 126,0, TRUE} // 123 set-open 276 , {doSetPosixProp, 58 /* : */, 128,0, FALSE} // 124 277 , {doNOP, 255, 126,0, FALSE} // 125 278 , {doSetLiteral, 93 /* ] */, 141,0, TRUE} // 126 set-open2 279 , {doNOP, 255, 131,0, FALSE} // 127 280 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 128 set-posix 281 , {doNOP, 58 /* : */, 131,0, FALSE} // 129 282 , {doRuleError, 255, 206,0, FALSE} // 130 283 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 131 set-start 284 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 132 285 , {doNOP, 92 /* \ */, 191,0, TRUE} // 133 286 , {doNOP, 45 /* - */, 137,0, TRUE} // 134 287 , {doNOP, 38 /* & */, 139,0, TRUE} // 135 288 , {doSetLiteral, 255, 141,0, TRUE} // 136 289 , {doRuleError, 45 /* - */, 206,0, FALSE} // 137 set-start-dash 290 , {doSetAddDash, 255, 141,0, FALSE} // 138 291 , {doRuleError, 38 /* & */, 206,0, FALSE} // 139 set-start-amp 292 , {doSetAddAmp, 255, 141,0, FALSE} // 140 293 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 141 set-after-lit 294 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 142 295 , {doNOP, 45 /* - */, 178,0, TRUE} // 143 296 , {doNOP, 38 /* & */, 169,0, TRUE} // 144 297 , {doNOP, 92 /* \ */, 191,0, TRUE} // 145 298 , {doSetNoCloseError, 253, 206,0, FALSE} // 146 299 , {doSetLiteral, 255, 141,0, TRUE} // 147 300 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 148 set-after-set 301 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 149 302 , {doNOP, 45 /* - */, 171,0, TRUE} // 150 303 , {doNOP, 38 /* & */, 166,0, TRUE} // 151 304 , {doNOP, 92 /* \ */, 191,0, TRUE} // 152 305 , {doSetNoCloseError, 253, 206,0, FALSE} // 153 306 , {doSetLiteral, 255, 141,0, TRUE} // 154 307 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 155 set-after-range 308 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 156 309 , {doNOP, 45 /* - */, 174,0, TRUE} // 157 310 , {doNOP, 38 /* & */, 176,0, TRUE} // 158 311 , {doNOP, 92 /* \ */, 191,0, TRUE} // 159 312 , {doSetNoCloseError, 253, 206,0, FALSE} // 160 313 , {doSetLiteral, 255, 141,0, TRUE} // 161 314 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 162 set-after-op 315 , {doSetOpError, 93 /* ] */, 206,0, FALSE} // 163 316 , {doNOP, 92 /* \ */, 191,0, TRUE} // 164 317 , {doSetLiteral, 255, 141,0, TRUE} // 165 318 , {doSetBeginIntersection1, 91 /* [ */, 123, 148, TRUE} // 166 set-set-amp 319 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 167 320 , {doSetAddAmp, 255, 141,0, FALSE} // 168 321 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 169 set-lit-amp 322 , {doSetAddAmp, 255, 141,0, FALSE} // 170 323 , {doSetBeginDifference1, 91 /* [ */, 123, 148, TRUE} // 171 set-set-dash 324 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 172 325 , {doSetAddDash, 255, 141,0, FALSE} // 173 326 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 174 set-range-dash 327 , {doSetAddDash, 255, 141,0, FALSE} // 175 328 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 176 set-range-amp 329 , {doSetAddAmp, 255, 141,0, FALSE} // 177 330 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 178 set-lit-dash 331 , {doSetAddDash, 91 /* [ */, 141,0, FALSE} // 179 332 , {doSetAddDash, 93 /* ] */, 141,0, FALSE} // 180 333 , {doNOP, 92 /* \ */, 183,0, TRUE} // 181 334 , {doSetRange, 255, 155,0, TRUE} // 182 335 , {doSetOpError, 115 /* s */, 206,0, FALSE} // 183 set-lit-dash-escape 336 , {doSetOpError, 83 /* S */, 206,0, FALSE} // 184 337 , {doSetOpError, 119 /* w */, 206,0, FALSE} // 185 338 , {doSetOpError, 87 /* W */, 206,0, FALSE} // 186 339 , {doSetOpError, 100 /* d */, 206,0, FALSE} // 187 340 , {doSetOpError, 68 /* D */, 206,0, FALSE} // 188 341 , {doSetNamedRange, 78 /* N */, 155,0, FALSE} // 189 342 , {doSetRange, 255, 155,0, TRUE} // 190 343 , {doSetProp, 112 /* p */, 148,0, FALSE} // 191 set-escape 344 , {doSetProp, 80 /* P */, 148,0, FALSE} // 192 345 , {doSetNamedChar, 78 /* N */, 141,0, FALSE} // 193 346 , {doSetBackslash_s, 115 /* s */, 155,0, TRUE} // 194 347 , {doSetBackslash_S, 83 /* S */, 155,0, TRUE} // 195 348 , {doSetBackslash_w, 119 /* w */, 155,0, TRUE} // 196 349 , {doSetBackslash_W, 87 /* W */, 155,0, TRUE} // 197 350 , {doSetBackslash_d, 100 /* d */, 155,0, TRUE} // 198 351 , {doSetBackslash_D, 68 /* D */, 155,0, TRUE} // 199 352 , {doSetBackslash_h, 104 /* h */, 155,0, TRUE} // 200 353 , {doSetBackslash_H, 72 /* H */, 155,0, TRUE} // 201 354 , {doSetBackslash_v, 118 /* v */, 155,0, TRUE} // 202 355 , {doSetBackslash_V, 86 /* V */, 155,0, TRUE} // 203 356 , {doSetLiteralEscaped, 255, 141,0, TRUE} // 204 357 , {doSetFinish, 255, 14,0, FALSE} // 205 set-finish 358 , {doExit, 255, 206,0, TRUE} // 206 errorDeath 359 }; 360 static const char * const RegexStateNames[] = { 0, 361 "start", 362 "term", 363 0, 364 0, 365 0, 366 0, 367 0, 368 0, 369 0, 370 0, 371 0, 372 0, 373 0, 374 "expr-quant", 375 0, 376 0, 377 0, 378 0, 379 0, 380 "expr-cont", 381 0, 382 0, 383 "open-paren-quant", 384 0, 385 "open-paren-quant2", 386 0, 387 "open-paren", 388 0, 389 "open-paren-extended", 390 0, 391 0, 392 0, 393 0, 394 0, 395 0, 396 0, 397 0, 398 0, 399 0, 400 0, 401 0, 402 0, 403 0, 404 0, 405 0, 406 "open-paren-lookbehind", 407 0, 408 0, 409 0, 410 "paren-comment", 411 0, 412 0, 413 "paren-flag", 414 0, 415 0, 416 0, 417 0, 418 0, 419 0, 420 0, 421 0, 422 0, 423 0, 424 "named-capture", 425 0, 426 0, 427 0, 428 "quant-star", 429 0, 430 0, 431 "quant-plus", 432 0, 433 0, 434 "quant-opt", 435 0, 436 0, 437 "interval-open", 438 0, 439 "interval-lower", 440 0, 441 0, 442 0, 443 "interval-upper", 444 0, 445 0, 446 "interval-type", 447 0, 448 0, 449 "backslash", 450 0, 451 0, 452 0, 453 0, 454 0, 455 0, 456 0, 457 0, 458 0, 459 0, 460 0, 461 0, 462 0, 463 0, 464 0, 465 0, 466 0, 467 0, 468 0, 469 0, 470 0, 471 0, 472 0, 473 0, 474 0, 475 "named-backref", 476 0, 477 "named-backref-2", 478 0, 479 "named-backref-3", 480 0, 481 0, 482 0, 483 "set-open", 484 0, 485 0, 486 "set-open2", 487 0, 488 "set-posix", 489 0, 490 0, 491 "set-start", 492 0, 493 0, 494 0, 495 0, 496 0, 497 "set-start-dash", 498 0, 499 "set-start-amp", 500 0, 501 "set-after-lit", 502 0, 503 0, 504 0, 505 0, 506 0, 507 0, 508 "set-after-set", 509 0, 510 0, 511 0, 512 0, 513 0, 514 0, 515 "set-after-range", 516 0, 517 0, 518 0, 519 0, 520 0, 521 0, 522 "set-after-op", 523 0, 524 0, 525 0, 526 "set-set-amp", 527 0, 528 0, 529 "set-lit-amp", 530 0, 531 "set-set-dash", 532 0, 533 0, 534 "set-range-dash", 535 0, 536 "set-range-amp", 537 0, 538 "set-lit-dash", 539 0, 540 0, 541 0, 542 0, 543 "set-lit-dash-escape", 544 0, 545 0, 546 0, 547 0, 548 0, 549 0, 550 0, 551 "set-escape", 552 0, 553 0, 554 0, 555 0, 556 0, 557 0, 558 0, 559 0, 560 0, 561 0, 562 0, 563 0, 564 0, 565 "set-finish", 566 "errorDeath", 567 0}; 568 569 U_NAMESPACE_END 570 #endif 571