1From 23d48c5fc7aa889dc7798f9c64acd43d9cb34683 Mon Sep 17 00:00:00 2001 2From: Christian Persch <chpe@gnome.org> 3Date: Sun, 12 Feb 2012 21:20:33 +0100 4Subject: [PATCH] regex: Use glib for unicode data 5 6Use g_unichar_type() and g_unichar_get_script() instead of pcre tables. 7--- 8 glib/pcre/pcre_compile.c | 26 +++--- 9 glib/pcre/pcre_dfa_exec.c | 96 ++++++++-------- 10 glib/pcre/pcre_exec.c | 26 +++--- 11 glib/pcre/pcre_internal.h | 11 +-- 12 glib/pcre/pcre_tables.c | 16 +++ 13 glib/pcre/pcre_xclass.c | 24 ++-- 14 glib/pcre/ucp.h | 265 +++++++++++++++++++++++---------------------- 15 7 files changed, 239 insertions(+), 225 deletions(-) 16 17diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c 18index 21bef80..a6c84e1 100644 19--- a/glib/pcre/pcre_compile.c 20+++ b/glib/pcre/pcre_compile.c 21@@ -2920,43 +2920,43 @@ Returns: TRUE if auto-possessifying is OK 22 static BOOL 23 check_char_prop(int c, int ptype, int pdata, BOOL negated) 24 { 25-const ucd_record *prop = GET_UCD(c); 26+const pcre_uint8 chartype = UCD_CHARTYPE(c); 27 switch(ptype) 28 { 29 case PT_LAMP: 30- return (prop->chartype == ucp_Lu || 31- prop->chartype == ucp_Ll || 32- prop->chartype == ucp_Lt) == negated; 33+ return (chartype == ucp_Lu || 34+ chartype == ucp_Ll || 35+ chartype == ucp_Lt) == negated; 36 37 case PT_GC: 38- return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated; 39+ return (pdata == PRIV(ucp_gentype)[chartype]) == negated; 40 41 case PT_PC: 42- return (pdata == prop->chartype) == negated; 43+ return (pdata == chartype) == negated; 44 45 case PT_SC: 46- return (pdata == prop->script) == negated; 47+ return (pdata == UCD_SCRIPT(c)) == negated; 48 49 /* These are specials */ 50 51 case PT_ALNUM: 52- return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || 53- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated; 54+ return (PRIV(ucp_gentype)[chartype] == ucp_L || 55+ PRIV(ucp_gentype)[chartype] == ucp_N) == negated; 56 57 case PT_SPACE: /* Perl space */ 58- return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 59+ return (PRIV(ucp_gentype)[chartype] == ucp_Z || 60 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) 61 == negated; 62 63 case PT_PXSPACE: /* POSIX space */ 64- return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 65+ return (PRIV(ucp_gentype)[chartype] == ucp_Z || 66 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || 67 c == CHAR_FF || c == CHAR_CR) 68 == negated; 69 70 case PT_WORD: 71- return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || 72- PRIV(ucp_gentype)[prop->chartype] == ucp_N || 73+ return (PRIV(ucp_gentype)[chartype] == ucp_L || 74+ PRIV(ucp_gentype)[chartype] == ucp_N || 75 c == CHAR_UNDERSCORE) == negated; 76 } 77 return FALSE; 78diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c 79index 9565d46..3f913ce 100644 80--- a/glib/pcre/pcre_dfa_exec.c 81+++ b/glib/pcre/pcre_dfa_exec.c 82@@ -1060,7 +1060,7 @@ for (;;) 83 if (clen > 0) 84 { 85 BOOL OK; 86- const ucd_record * prop = GET_UCD(c); 87+ const pcre_uint8 chartype = UCD_CHARTYPE(c); 88 switch(code[1]) 89 { 90 case PT_ANY: 91@@ -1068,43 +1068,43 @@ for (;;) 92 break; 93 94 case PT_LAMP: 95- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || 96- prop->chartype == ucp_Lt; 97+ OK = chartype == ucp_Lu || chartype == ucp_Ll || 98+ chartype == ucp_Lt; 99 break; 100 101 case PT_GC: 102- OK = PRIV(ucp_gentype)[prop->chartype] == code[2]; 103+ OK = PRIV(ucp_gentype)[chartype] == code[2]; 104 break; 105 106 case PT_PC: 107- OK = prop->chartype == code[2]; 108+ OK = chartype == code[2]; 109 break; 110 111 case PT_SC: 112- OK = prop->script == code[2]; 113+ OK = UCD_SCRIPT(c) == code[2]; 114 break; 115 116 /* These are specials for combination cases. */ 117 118 case PT_ALNUM: 119- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || 120- PRIV(ucp_gentype)[prop->chartype] == ucp_N; 121+ OK = PRIV(ucp_gentype)[chartype] == ucp_L || 122+ PRIV(ucp_gentype)[chartype] == ucp_N; 123 break; 124 125 case PT_SPACE: /* Perl space */ 126- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 127+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z || 128 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; 129 break; 130 131 case PT_PXSPACE: /* POSIX space */ 132- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 133+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z || 134 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || 135 c == CHAR_FF || c == CHAR_CR; 136 break; 137 138 case PT_WORD: 139- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || 140- PRIV(ucp_gentype)[prop->chartype] == ucp_N || 141+ OK = PRIV(ucp_gentype)[chartype] == ucp_L || 142+ PRIV(ucp_gentype)[chartype] == ucp_N || 143 c == CHAR_UNDERSCORE; 144 break; 145 146@@ -1294,7 +1294,7 @@ for (;;) 147 if (clen > 0) 148 { 149 BOOL OK; 150- const ucd_record * prop = GET_UCD(c); 151+ const pcre_uint8 chartype = UCD_CHARTYPE(c); 152 switch(code[2]) 153 { 154 case PT_ANY: 155@@ -1302,43 +1302,43 @@ for (;;) 156 break; 157 158 case PT_LAMP: 159- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || 160- prop->chartype == ucp_Lt; 161+ OK = chartype == ucp_Lu || chartype == ucp_Ll || 162+ chartype == ucp_Lt; 163 break; 164 165 case PT_GC: 166- OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; 167+ OK = PRIV(ucp_gentype)[chartype] == code[3]; 168 break; 169 170 case PT_PC: 171- OK = prop->chartype == code[3]; 172+ OK = chartype == code[3]; 173 break; 174 175 case PT_SC: 176- OK = prop->script == code[3]; 177+ OK = UCD_SCRIPT(c) == code[3]; 178 break; 179 180 /* These are specials for combination cases. */ 181 182 case PT_ALNUM: 183- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || 184- PRIV(ucp_gentype)[prop->chartype] == ucp_N; 185+ OK = PRIV(ucp_gentype)[chartype] == ucp_L || 186+ PRIV(ucp_gentype)[chartype] == ucp_N; 187 break; 188 189 case PT_SPACE: /* Perl space */ 190- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 191+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z || 192 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; 193 break; 194 195 case PT_PXSPACE: /* POSIX space */ 196- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 197+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z || 198 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || 199 c == CHAR_FF || c == CHAR_CR; 200 break; 201 202 case PT_WORD: 203- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || 204- PRIV(ucp_gentype)[prop->chartype] == ucp_N || 205+ OK = PRIV(ucp_gentype)[chartype] == ucp_L || 206+ PRIV(ucp_gentype)[chartype] == ucp_N || 207 c == CHAR_UNDERSCORE; 208 break; 209 210@@ -1541,7 +1541,7 @@ for (;;) 211 if (clen > 0) 212 { 213 BOOL OK; 214- const ucd_record * prop = GET_UCD(c); 215+ const pcre_uint8 chartype = UCD_CHARTYPE(c); 216 switch(code[2]) 217 { 218 case PT_ANY: 219@@ -1549,43 +1549,43 @@ for (;;) 220 break; 221 222 case PT_LAMP: 223- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || 224- prop->chartype == ucp_Lt; 225+ OK = chartype == ucp_Lu || chartype == ucp_Ll || 226+ chartype == ucp_Lt; 227 break; 228 229 case PT_GC: 230- OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; 231+ OK = PRIV(ucp_gentype)[chartype] == code[3]; 232 break; 233 234 case PT_PC: 235- OK = prop->chartype == code[3]; 236+ OK = chartype == code[3]; 237 break; 238 239 case PT_SC: 240- OK = prop->script == code[3]; 241+ OK = UCD_SCRIPT(c) == code[3]; 242 break; 243 244 /* These are specials for combination cases. */ 245 246 case PT_ALNUM: 247- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || 248- PRIV(ucp_gentype)[prop->chartype] == ucp_N; 249+ OK = PRIV(ucp_gentype)[chartype] == ucp_L || 250+ PRIV(ucp_gentype)[chartype] == ucp_N; 251 break; 252 253 case PT_SPACE: /* Perl space */ 254- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 255+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z || 256 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; 257 break; 258 259 case PT_PXSPACE: /* POSIX space */ 260- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 261+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z || 262 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || 263 c == CHAR_FF || c == CHAR_CR; 264 break; 265 266 case PT_WORD: 267- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || 268- PRIV(ucp_gentype)[prop->chartype] == ucp_N || 269+ OK = PRIV(ucp_gentype)[chartype] == ucp_L || 270+ PRIV(ucp_gentype)[chartype] == ucp_N || 271 c == CHAR_UNDERSCORE; 272 break; 273 274@@ -1813,7 +1813,7 @@ for (;;) 275 if (clen > 0) 276 { 277 BOOL OK; 278- const ucd_record * prop = GET_UCD(c); 279+ const pcre_uint8 chartype = UCD_CHARTYPE(c); 280 switch(code[1 + IMM2_SIZE + 1]) 281 { 282 case PT_ANY: 283@@ -1821,43 +1821,43 @@ for (;;) 284 break; 285 286 case PT_LAMP: 287- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || 288- prop->chartype == ucp_Lt; 289+ OK = chartype == ucp_Lu || chartype == ucp_Ll || 290+ chartype == ucp_Lt; 291 break; 292 293 case PT_GC: 294- OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2]; 295+ OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2]; 296 break; 297 298 case PT_PC: 299- OK = prop->chartype == code[1 + IMM2_SIZE + 2]; 300+ OK = chartype == code[1 + IMM2_SIZE + 2]; 301 break; 302 303 case PT_SC: 304- OK = prop->script == code[1 + IMM2_SIZE + 2]; 305+ OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2]; 306 break; 307 308 /* These are specials for combination cases. */ 309 310 case PT_ALNUM: 311- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || 312- PRIV(ucp_gentype)[prop->chartype] == ucp_N; 313+ OK = PRIV(ucp_gentype)[chartype] == ucp_L || 314+ PRIV(ucp_gentype)[chartype] == ucp_N; 315 break; 316 317 case PT_SPACE: /* Perl space */ 318- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 319+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z || 320 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; 321 break; 322 323 case PT_PXSPACE: /* POSIX space */ 324- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 325+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z || 326 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || 327 c == CHAR_FF || c == CHAR_CR; 328 break; 329 330 case PT_WORD: 331- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || 332- PRIV(ucp_gentype)[prop->chartype] == ucp_N || 333+ OK = PRIV(ucp_gentype)[chartype] == ucp_L || 334+ PRIV(ucp_gentype)[chartype] == ucp_N || 335 c == CHAR_UNDERSCORE; 336 break; 337 338diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c 339index 830b8b5..c89a3f9 100644 340--- a/glib/pcre/pcre_exec.c 341+++ b/glib/pcre/pcre_exec.c 342@@ -2565,7 +2565,7 @@ for (;;) 343 } 344 GETCHARINCTEST(c, eptr); 345 { 346- const ucd_record *prop = GET_UCD(c); 347+ const pcre_uint8 chartype = UCD_CHARTYPE(c); 348 349 switch(ecode[1]) 350 { 351@@ -2574,44 +2574,44 @@ for (;;) 352 break; 353 354 case PT_LAMP: 355- if ((prop->chartype == ucp_Lu || 356- prop->chartype == ucp_Ll || 357- prop->chartype == ucp_Lt) == (op == OP_NOTPROP)) 358+ if ((chartype == ucp_Lu || 359+ chartype == ucp_Ll || 360+ chartype == ucp_Lt) == (op == OP_NOTPROP)) 361 RRETURN(MATCH_NOMATCH); 362 break; 363 364 case PT_GC: 365- if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP)) 366+ if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP)) 367 RRETURN(MATCH_NOMATCH); 368 break; 369 370 case PT_PC: 371- if ((ecode[2] != prop->chartype) == (op == OP_PROP)) 372+ if ((ecode[2] != chartype) == (op == OP_PROP)) 373 RRETURN(MATCH_NOMATCH); 374 break; 375 376 case PT_SC: 377- if ((ecode[2] != prop->script) == (op == OP_PROP)) 378+ if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP)) 379 RRETURN(MATCH_NOMATCH); 380 break; 381 382 /* These are specials */ 383 384 case PT_ALNUM: 385- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || 386- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) 387+ if ((PRIV(ucp_gentype)[chartype] == ucp_L || 388+ PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP)) 389 RRETURN(MATCH_NOMATCH); 390 break; 391 392 case PT_SPACE: /* Perl space */ 393- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 394+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z || 395 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) 396 == (op == OP_NOTPROP)) 397 RRETURN(MATCH_NOMATCH); 398 break; 399 400 case PT_PXSPACE: /* POSIX space */ 401- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 402+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z || 403 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || 404 c == CHAR_FF || c == CHAR_CR) 405 == (op == OP_NOTPROP)) 406@@ -2619,8 +2619,8 @@ for (;;) 407 break; 408 409 case PT_WORD: 410- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || 411- PRIV(ucp_gentype)[prop->chartype] == ucp_N || 412+ if ((PRIV(ucp_gentype)[chartype] == ucp_L || 413+ PRIV(ucp_gentype)[chartype] == ucp_N || 414 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP)) 415 RRETURN(MATCH_NOMATCH); 416 break; 417diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h 418index 181c312..234af1b 100644 419--- a/glib/pcre/pcre_internal.h 420+++ b/glib/pcre/pcre_internal.h 421@@ -2329,15 +2329,12 @@ extern const int PRIV(ucp_typerange)[]; 422 #ifdef SUPPORT_UCP 423 /* UCD access macros */ 424 425-#define UCD_BLOCK_SIZE 128 426-#define GET_UCD(ch) (PRIV(ucd_records) + \ 427- PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \ 428- UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE]) 429+unsigned int _pcre_ucp_othercase(const unsigned int c); 430 431-#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype 432-#define UCD_SCRIPT(ch) GET_UCD(ch)->script 433+#define UCD_CHARTYPE(ch) (pcre_uint8)g_unichar_type((gunichar)(ch)) 434+#define UCD_SCRIPT(ch) (pcre_uint8)g_unichar_get_script((gunichar)(ch)) 435 #define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] 436-#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case) 437+#define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch)) 438 439 #endif /* SUPPORT_UCP */ 440 441diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c 442index 7ac2d89..e401974 100644 443--- a/glib/pcre/pcre_tables.c 444+++ b/glib/pcre/pcre_tables.c 445@@ -584,6 +584,22 @@ const ucp_type_table PRIV(utt)[] = { 446 447 const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); 448 449+unsigned int 450+_pcre_ucp_othercase(const unsigned int c) 451+{ 452+ int other_case = NOTACHAR; 453+ 454+ if (g_unichar_islower(c)) 455+ other_case = g_unichar_toupper(c); 456+ else if (g_unichar_isupper(c)) 457+ other_case = g_unichar_tolower(c); 458+ 459+ if (other_case == c) 460+ other_case = NOTACHAR; 461+ 462+ return other_case; 463+} 464+ 465 #endif /* SUPPORT_UTF */ 466 467 /* End of pcre_tables.c */ 468diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c 469index dca7a39..e5a55d7 100644 470--- a/glib/pcre/pcre_xclass.c 471+++ b/glib/pcre/pcre_xclass.c 472@@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END) 473 #ifdef SUPPORT_UCP 474 else /* XCL_PROP & XCL_NOTPROP */ 475 { 476- const ucd_record *prop = GET_UCD(c); 477+ const pcre_uint8 chartype = UCD_CHARTYPE(c); 478 479 switch(*data) 480 { 481@@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END) 482 break; 483 484 case PT_LAMP: 485- if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || 486- prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated; 487+ if ((chartype == ucp_Lu || chartype == ucp_Ll || 488+ chartype == ucp_Lt) == (t == XCL_PROP)) return !negated; 489 break; 490 491 case PT_GC: 492- if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP)) 493+ if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP)) 494 return !negated; 495 break; 496 497 case PT_PC: 498- if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated; 499+ if ((data[1] == chartype) == (t == XCL_PROP)) return !negated; 500 break; 501 502 case PT_SC: 503- if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated; 504+ if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated; 505 break; 506 507 case PT_ALNUM: 508- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || 509- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP)) 510+ if ((PRIV(ucp_gentype)[chartype] == ucp_L || 511+ PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP)) 512 return !negated; 513 break; 514 515 case PT_SPACE: /* Perl space */ 516- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 517+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z || 518 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) 519 == (t == XCL_PROP)) 520 return !negated; 521 break; 522 523 case PT_PXSPACE: /* POSIX space */ 524- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || 525+ if ((PRIV(ucp_gentype)[chartype] == ucp_Z || 526 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || 527 c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP)) 528 return !negated; 529 break; 530 531 case PT_WORD: 532- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || 533- PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) 534+ if ((PRIV(ucp_gentype)[chartype] == ucp_L || 535+ PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE) 536 == (t == XCL_PROP)) 537 return !negated; 538 break; 539diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h 540index 59c3bec..53a48c9 100644 541--- a/glib/pcre/ucp.h 542+++ b/glib/pcre/ucp.h 543@@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode 544 should always be at the end of each enum, for backwards compatibility. */ 545 546 /* These are the general character categories. */ 547+#include "gunicode.h" 548 549 enum { 550 ucp_C, /* Other */ 551@@ -24,148 +25,148 @@ enum { 552 /* These are the particular character types. */ 553 554 enum { 555- ucp_Cc, /* Control */ 556- ucp_Cf, /* Format */ 557- ucp_Cn, /* Unassigned */ 558- ucp_Co, /* Private use */ 559- ucp_Cs, /* Surrogate */ 560- ucp_Ll, /* Lower case letter */ 561- ucp_Lm, /* Modifier letter */ 562- ucp_Lo, /* Other letter */ 563- ucp_Lt, /* Title case letter */ 564- ucp_Lu, /* Upper case letter */ 565- ucp_Mc, /* Spacing mark */ 566- ucp_Me, /* Enclosing mark */ 567- ucp_Mn, /* Non-spacing mark */ 568- ucp_Nd, /* Decimal number */ 569- ucp_Nl, /* Letter number */ 570- ucp_No, /* Other number */ 571- ucp_Pc, /* Connector punctuation */ 572- ucp_Pd, /* Dash punctuation */ 573- ucp_Pe, /* Close punctuation */ 574- ucp_Pf, /* Final punctuation */ 575- ucp_Pi, /* Initial punctuation */ 576- ucp_Po, /* Other punctuation */ 577- ucp_Ps, /* Open punctuation */ 578- ucp_Sc, /* Currency symbol */ 579- ucp_Sk, /* Modifier symbol */ 580- ucp_Sm, /* Mathematical symbol */ 581- ucp_So, /* Other symbol */ 582- ucp_Zl, /* Line separator */ 583- ucp_Zp, /* Paragraph separator */ 584- ucp_Zs /* Space separator */ 585+ ucp_Cc = G_UNICODE_CONTROL, /* Control */ 586+ ucp_Cf = G_UNICODE_FORMAT, /* Format */ 587+ ucp_Cn = G_UNICODE_UNASSIGNED, /* Unassigned */ 588+ ucp_Co = G_UNICODE_PRIVATE_USE, /* Private use */ 589+ ucp_Cs = G_UNICODE_SURROGATE, /* Surrogate */ 590+ ucp_Ll = G_UNICODE_LOWERCASE_LETTER, /* Lower case letter */ 591+ ucp_Lm = G_UNICODE_MODIFIER_LETTER, /* Modifier letter */ 592+ ucp_Lo = G_UNICODE_OTHER_LETTER, /* Other letter */ 593+ ucp_Lt = G_UNICODE_TITLECASE_LETTER, /* Title case letter */ 594+ ucp_Lu = G_UNICODE_UPPERCASE_LETTER, /* Upper case letter */ 595+ ucp_Mc = G_UNICODE_SPACING_MARK, /* Spacing mark */ 596+ ucp_Me = G_UNICODE_ENCLOSING_MARK, /* Enclosing mark */ 597+ ucp_Mn = G_UNICODE_NON_SPACING_MARK, /* Non-spacing mark */ 598+ ucp_Nd = G_UNICODE_DECIMAL_NUMBER, /* Decimal number */ 599+ ucp_Nl = G_UNICODE_LETTER_NUMBER, /* Letter number */ 600+ ucp_No = G_UNICODE_OTHER_NUMBER, /* Other number */ 601+ ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION, /* Connector punctuation */ 602+ ucp_Pd = G_UNICODE_DASH_PUNCTUATION, /* Dash punctuation */ 603+ ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION, /* Close punctuation */ 604+ ucp_Pf = G_UNICODE_FINAL_PUNCTUATION, /* Final punctuation */ 605+ ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION, /* Initial punctuation */ 606+ ucp_Po = G_UNICODE_OTHER_PUNCTUATION, /* Other punctuation */ 607+ ucp_Ps = G_UNICODE_OPEN_PUNCTUATION, /* Open punctuation */ 608+ ucp_Sc = G_UNICODE_CURRENCY_SYMBOL, /* Currency symbol */ 609+ ucp_Sk = G_UNICODE_MODIFIER_SYMBOL, /* Modifier symbol */ 610+ ucp_Sm = G_UNICODE_MATH_SYMBOL, /* Mathematical symbol */ 611+ ucp_So = G_UNICODE_OTHER_SYMBOL, /* Other symbol */ 612+ ucp_Zl = G_UNICODE_LINE_SEPARATOR, /* Line separator */ 613+ ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR, /* Paragraph separator */ 614+ ucp_Zs = G_UNICODE_SPACE_SEPARATOR /* Space separator */ 615 }; 616 617 /* These are the script identifications. */ 618 619 enum { 620- ucp_Arabic, 621- ucp_Armenian, 622- ucp_Bengali, 623- ucp_Bopomofo, 624- ucp_Braille, 625- ucp_Buginese, 626- ucp_Buhid, 627- ucp_Canadian_Aboriginal, 628- ucp_Cherokee, 629- ucp_Common, 630- ucp_Coptic, 631- ucp_Cypriot, 632- ucp_Cyrillic, 633- ucp_Deseret, 634- ucp_Devanagari, 635- ucp_Ethiopic, 636- ucp_Georgian, 637- ucp_Glagolitic, 638- ucp_Gothic, 639- ucp_Greek, 640- ucp_Gujarati, 641- ucp_Gurmukhi, 642- ucp_Han, 643- ucp_Hangul, 644- ucp_Hanunoo, 645- ucp_Hebrew, 646- ucp_Hiragana, 647- ucp_Inherited, 648- ucp_Kannada, 649- ucp_Katakana, 650- ucp_Kharoshthi, 651- ucp_Khmer, 652- ucp_Lao, 653- ucp_Latin, 654- ucp_Limbu, 655- ucp_Linear_B, 656- ucp_Malayalam, 657- ucp_Mongolian, 658- ucp_Myanmar, 659- ucp_New_Tai_Lue, 660- ucp_Ogham, 661- ucp_Old_Italic, 662- ucp_Old_Persian, 663- ucp_Oriya, 664- ucp_Osmanya, 665- ucp_Runic, 666- ucp_Shavian, 667- ucp_Sinhala, 668- ucp_Syloti_Nagri, 669- ucp_Syriac, 670- ucp_Tagalog, 671- ucp_Tagbanwa, 672- ucp_Tai_Le, 673- ucp_Tamil, 674- ucp_Telugu, 675- ucp_Thaana, 676- ucp_Thai, 677- ucp_Tibetan, 678- ucp_Tifinagh, 679- ucp_Ugaritic, 680- ucp_Yi, 681+ ucp_Arabic = G_UNICODE_SCRIPT_ARABIC, 682+ ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN, 683+ ucp_Bengali = G_UNICODE_SCRIPT_BENGALI, 684+ ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO, 685+ ucp_Braille = G_UNICODE_SCRIPT_BRAILLE, 686+ ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE, 687+ ucp_Buhid = G_UNICODE_SCRIPT_BUHID, 688+ ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, 689+ ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE, 690+ ucp_Common = G_UNICODE_SCRIPT_COMMON, 691+ ucp_Coptic = G_UNICODE_SCRIPT_COPTIC, 692+ ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT, 693+ ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC, 694+ ucp_Deseret = G_UNICODE_SCRIPT_DESERET, 695+ ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI, 696+ ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC, 697+ ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN, 698+ ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC, 699+ ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC, 700+ ucp_Greek = G_UNICODE_SCRIPT_GREEK, 701+ ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI, 702+ ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI, 703+ ucp_Han = G_UNICODE_SCRIPT_HAN, 704+ ucp_Hangul = G_UNICODE_SCRIPT_HANGUL, 705+ ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO, 706+ ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW, 707+ ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA, 708+ ucp_Inherited = G_UNICODE_SCRIPT_INHERITED, 709+ ucp_Kannada = G_UNICODE_SCRIPT_KANNADA, 710+ ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA, 711+ ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI, 712+ ucp_Khmer = G_UNICODE_SCRIPT_KHMER, 713+ ucp_Lao = G_UNICODE_SCRIPT_LAO, 714+ ucp_Latin = G_UNICODE_SCRIPT_LATIN, 715+ ucp_Limbu = G_UNICODE_SCRIPT_LIMBU, 716+ ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B, 717+ ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM, 718+ ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN, 719+ ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR, 720+ ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE, 721+ ucp_Ogham = G_UNICODE_SCRIPT_OGHAM, 722+ ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC, 723+ ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN, 724+ ucp_Oriya = G_UNICODE_SCRIPT_ORIYA, 725+ ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA, 726+ ucp_Runic = G_UNICODE_SCRIPT_RUNIC, 727+ ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN, 728+ ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA, 729+ ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI, 730+ ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC, 731+ ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG, 732+ ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA, 733+ ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE, 734+ ucp_Tamil = G_UNICODE_SCRIPT_TAMIL, 735+ ucp_Telugu = G_UNICODE_SCRIPT_TELUGU, 736+ ucp_Thaana = G_UNICODE_SCRIPT_THAANA, 737+ ucp_Thai = G_UNICODE_SCRIPT_THAI, 738+ ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN, 739+ ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH, 740+ ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC, 741+ ucp_Yi = G_UNICODE_SCRIPT_YI, 742 /* New for Unicode 5.0: */ 743- ucp_Balinese, 744- ucp_Cuneiform, 745- ucp_Nko, 746- ucp_Phags_Pa, 747- ucp_Phoenician, 748+ ucp_Balinese = G_UNICODE_SCRIPT_BALINESE, 749+ ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM, 750+ ucp_Nko = G_UNICODE_SCRIPT_NKO, 751+ ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA, 752+ ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN, 753 /* New for Unicode 5.1: */ 754- ucp_Carian, 755- ucp_Cham, 756- ucp_Kayah_Li, 757- ucp_Lepcha, 758- ucp_Lycian, 759- ucp_Lydian, 760- ucp_Ol_Chiki, 761- ucp_Rejang, 762- ucp_Saurashtra, 763- ucp_Sundanese, 764- ucp_Vai, 765+ ucp_Carian = G_UNICODE_SCRIPT_CARIAN, 766+ ucp_Cham = G_UNICODE_SCRIPT_CHAM, 767+ ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI, 768+ ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA, 769+ ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN, 770+ ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN, 771+ ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI, 772+ ucp_Rejang = G_UNICODE_SCRIPT_REJANG, 773+ ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA, 774+ ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE, 775+ ucp_Vai = G_UNICODE_SCRIPT_VAI, 776 /* New for Unicode 5.2: */ 777- ucp_Avestan, 778- ucp_Bamum, 779- ucp_Egyptian_Hieroglyphs, 780- ucp_Imperial_Aramaic, 781- ucp_Inscriptional_Pahlavi, 782- ucp_Inscriptional_Parthian, 783- ucp_Javanese, 784- ucp_Kaithi, 785- ucp_Lisu, 786- ucp_Meetei_Mayek, 787- ucp_Old_South_Arabian, 788- ucp_Old_Turkic, 789- ucp_Samaritan, 790- ucp_Tai_Tham, 791- ucp_Tai_Viet, 792+ ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN, 793+ ucp_Bamum = G_UNICODE_SCRIPT_BAMUM, 794+ ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, 795+ ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, 796+ ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, 797+ ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, 798+ ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE, 799+ ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI, 800+ ucp_Lisu = G_UNICODE_SCRIPT_LISU, 801+ ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK, 802+ ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, 803+ ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC, 804+ ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN, 805+ ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM, 806+ ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET, 807 /* New for Unicode 6.0.0: */ 808- ucp_Batak, 809- ucp_Brahmi, 810- ucp_Mandaic, 811+ ucp_Batak = G_UNICODE_SCRIPT_BATAK, 812+ ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI, 813+ ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC, 814 /* New for Unicode 6.1.0: */ 815- ucp_Chakma, 816- ucp_Meroitic_Cursive, 817- ucp_Meroitic_Hieroglyphs, 818- ucp_Miao, 819- ucp_Sharada, 820- ucp_Sora_Sompeng, 821- ucp_Takri 822+ ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA, 823+ ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE, 824+ ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, 825+ ucp_Miao = G_UNICODE_SCRIPT_MIAO, 826+ ucp_Sharada = G_UNICODE_SCRIPT_SHARADA, 827+ ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG, 828+ ucp_Takri = G_UNICODE_SCRIPT_TAKRI, 829 }; 830 831 #endif 832-- 8331.7.5.1.217.g4e3aa.dirty 834 835