• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From 23d48c5fc7aa889dc7798f9c64acd43d9cb34683 Mon Sep 17 00:00:00 2001
2From: Christian Persch <chpe@gnome.org>
3Date: Sun, 12 Feb 2012 21:20:33 +0100
4Subject: [PATCH] regex: Use glib for unicode data
5
6Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
7---
8 glib/pcre/pcre_compile.c  |   26 +++---
9 glib/pcre/pcre_dfa_exec.c |   96 ++++++++--------
10 glib/pcre/pcre_exec.c     |   26 +++---
11 glib/pcre/pcre_internal.h |   11 +--
12 glib/pcre/pcre_tables.c   |   16 +++
13 glib/pcre/pcre_xclass.c   |   24 ++--
14 glib/pcre/ucp.h           |  265 +++++++++++++++++++++++----------------------
15 7 files changed, 239 insertions(+), 225 deletions(-)
16
17diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
18index 21bef80..a6c84e1 100644
19--- a/glib/pcre/pcre_compile.c
20+++ b/glib/pcre/pcre_compile.c
21@@ -2920,43 +2920,43 @@ Returns:       TRUE if auto-possessifying is OK
22 static BOOL
23 check_char_prop(int c, int ptype, int pdata, BOOL negated)
24 {
25-const ucd_record *prop = GET_UCD(c);
26+const pcre_uint8 chartype = UCD_CHARTYPE(c);
27 switch(ptype)
28   {
29   case PT_LAMP:
30-  return (prop->chartype == ucp_Lu ||
31-          prop->chartype == ucp_Ll ||
32-          prop->chartype == ucp_Lt) == negated;
33+  return (chartype == ucp_Lu ||
34+          chartype == ucp_Ll ||
35+          chartype == ucp_Lt) == negated;
36
37   case PT_GC:
38-  return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
39+  return (pdata == PRIV(ucp_gentype)[chartype]) == negated;
40
41   case PT_PC:
42-  return (pdata == prop->chartype) == negated;
43+  return (pdata == chartype) == negated;
44
45   case PT_SC:
46-  return (pdata == prop->script) == negated;
47+  return (pdata == UCD_SCRIPT(c)) == negated;
48
49   /* These are specials */
50
51   case PT_ALNUM:
52-  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
53-          PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
54+  return (PRIV(ucp_gentype)[chartype] == ucp_L ||
55+          PRIV(ucp_gentype)[chartype] == ucp_N) == negated;
56
57   case PT_SPACE:    /* Perl space */
58-  return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
59+  return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
60           c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
61           == negated;
62
63   case PT_PXSPACE:  /* POSIX space */
64-  return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
65+  return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
66           c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
67           c == CHAR_FF || c == CHAR_CR)
68           == negated;
69
70   case PT_WORD:
71-  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
72-          PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
73+  return (PRIV(ucp_gentype)[chartype] == ucp_L ||
74+          PRIV(ucp_gentype)[chartype] == ucp_N ||
75           c == CHAR_UNDERSCORE) == negated;
76   }
77 return FALSE;
78diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c
79index 9565d46..3f913ce 100644
80--- a/glib/pcre/pcre_dfa_exec.c
81+++ b/glib/pcre/pcre_dfa_exec.c
82@@ -1060,7 +1060,7 @@ for (;;)
83       if (clen > 0)
84         {
85         BOOL OK;
86-        const ucd_record * prop = GET_UCD(c);
87+        const pcre_uint8 chartype = UCD_CHARTYPE(c);
88         switch(code[1])
89           {
90           case PT_ANY:
91@@ -1068,43 +1068,43 @@ for (;;)
92           break;
93
94           case PT_LAMP:
95-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
96-               prop->chartype == ucp_Lt;
97+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
98+               chartype == ucp_Lt;
99           break;
100
101           case PT_GC:
102-          OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
103+          OK = PRIV(ucp_gentype)[chartype] == code[2];
104           break;
105
106           case PT_PC:
107-          OK = prop->chartype == code[2];
108+          OK = chartype == code[2];
109           break;
110
111           case PT_SC:
112-          OK = prop->script == code[2];
113+          OK = UCD_SCRIPT(c) == code[2];
114           break;
115
116           /* These are specials for combination cases. */
117
118           case PT_ALNUM:
119-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
120-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
121+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
122+               PRIV(ucp_gentype)[chartype] == ucp_N;
123           break;
124
125           case PT_SPACE:    /* Perl space */
126-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
127+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
128                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
129           break;
130
131           case PT_PXSPACE:  /* POSIX space */
132-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
133+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
134                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
135                c == CHAR_FF || c == CHAR_CR;
136           break;
137
138           case PT_WORD:
139-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
140-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
141+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
142+               PRIV(ucp_gentype)[chartype] == ucp_N ||
143                c == CHAR_UNDERSCORE;
144           break;
145
146@@ -1294,7 +1294,7 @@ for (;;)
147       if (clen > 0)
148         {
149         BOOL OK;
150-        const ucd_record * prop = GET_UCD(c);
151+        const pcre_uint8 chartype = UCD_CHARTYPE(c);
152         switch(code[2])
153           {
154           case PT_ANY:
155@@ -1302,43 +1302,43 @@ for (;;)
156           break;
157
158           case PT_LAMP:
159-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
160-            prop->chartype == ucp_Lt;
161+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
162+            chartype == ucp_Lt;
163           break;
164
165           case PT_GC:
166-          OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
167+          OK = PRIV(ucp_gentype)[chartype] == code[3];
168           break;
169
170           case PT_PC:
171-          OK = prop->chartype == code[3];
172+          OK = chartype == code[3];
173           break;
174
175           case PT_SC:
176-          OK = prop->script == code[3];
177+          OK = UCD_SCRIPT(c) == code[3];
178           break;
179
180           /* These are specials for combination cases. */
181
182           case PT_ALNUM:
183-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
184-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
185+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
186+               PRIV(ucp_gentype)[chartype] == ucp_N;
187           break;
188
189           case PT_SPACE:    /* Perl space */
190-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
191+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
192                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
193           break;
194
195           case PT_PXSPACE:  /* POSIX space */
196-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
197+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
198                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
199                c == CHAR_FF || c == CHAR_CR;
200           break;
201
202           case PT_WORD:
203-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
204-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
205+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
206+               PRIV(ucp_gentype)[chartype] == ucp_N ||
207                c == CHAR_UNDERSCORE;
208           break;
209
210@@ -1541,7 +1541,7 @@ for (;;)
211       if (clen > 0)
212         {
213         BOOL OK;
214-        const ucd_record * prop = GET_UCD(c);
215+        const pcre_uint8 chartype = UCD_CHARTYPE(c);
216         switch(code[2])
217           {
218           case PT_ANY:
219@@ -1549,43 +1549,43 @@ for (;;)
220           break;
221
222           case PT_LAMP:
223-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
224-            prop->chartype == ucp_Lt;
225+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
226+            chartype == ucp_Lt;
227           break;
228
229           case PT_GC:
230-          OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
231+          OK = PRIV(ucp_gentype)[chartype] == code[3];
232           break;
233
234           case PT_PC:
235-          OK = prop->chartype == code[3];
236+          OK = chartype == code[3];
237           break;
238
239           case PT_SC:
240-          OK = prop->script == code[3];
241+          OK = UCD_SCRIPT(c) == code[3];
242           break;
243
244           /* These are specials for combination cases. */
245
246           case PT_ALNUM:
247-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
248-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
249+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
250+               PRIV(ucp_gentype)[chartype] == ucp_N;
251           break;
252
253           case PT_SPACE:    /* Perl space */
254-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
255+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
256                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
257           break;
258
259           case PT_PXSPACE:  /* POSIX space */
260-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
261+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
262                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
263                c == CHAR_FF || c == CHAR_CR;
264           break;
265
266           case PT_WORD:
267-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
268-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
269+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
270+               PRIV(ucp_gentype)[chartype] == ucp_N ||
271                c == CHAR_UNDERSCORE;
272           break;
273
274@@ -1813,7 +1813,7 @@ for (;;)
275       if (clen > 0)
276         {
277         BOOL OK;
278-        const ucd_record * prop = GET_UCD(c);
279+        const pcre_uint8 chartype = UCD_CHARTYPE(c);
280         switch(code[1 + IMM2_SIZE + 1])
281           {
282           case PT_ANY:
283@@ -1821,43 +1821,43 @@ for (;;)
284           break;
285
286           case PT_LAMP:
287-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
288-            prop->chartype == ucp_Lt;
289+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
290+            chartype == ucp_Lt;
291           break;
292
293           case PT_GC:
294-          OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
295+          OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2];
296           break;
297
298           case PT_PC:
299-          OK = prop->chartype == code[1 + IMM2_SIZE + 2];
300+          OK = chartype == code[1 + IMM2_SIZE + 2];
301           break;
302
303           case PT_SC:
304-          OK = prop->script == code[1 + IMM2_SIZE + 2];
305+          OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2];
306           break;
307
308           /* These are specials for combination cases. */
309
310           case PT_ALNUM:
311-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
312-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
313+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
314+               PRIV(ucp_gentype)[chartype] == ucp_N;
315           break;
316
317           case PT_SPACE:    /* Perl space */
318-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
319+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
320                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
321           break;
322
323           case PT_PXSPACE:  /* POSIX space */
324-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
325+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
326                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
327                c == CHAR_FF || c == CHAR_CR;
328           break;
329
330           case PT_WORD:
331-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
332-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
333+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
334+               PRIV(ucp_gentype)[chartype] == ucp_N ||
335                c == CHAR_UNDERSCORE;
336           break;
337
338diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c
339index 830b8b5..c89a3f9 100644
340--- a/glib/pcre/pcre_exec.c
341+++ b/glib/pcre/pcre_exec.c
342@@ -2565,7 +2565,7 @@ for (;;)
343       }
344     GETCHARINCTEST(c, eptr);
345       {
346-      const ucd_record *prop = GET_UCD(c);
347+      const pcre_uint8 chartype = UCD_CHARTYPE(c);
348
349       switch(ecode[1])
350         {
351@@ -2574,44 +2574,44 @@ for (;;)
352         break;
353
354         case PT_LAMP:
355-        if ((prop->chartype == ucp_Lu ||
356-             prop->chartype == ucp_Ll ||
357-             prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
358+        if ((chartype == ucp_Lu ||
359+             chartype == ucp_Ll ||
360+             chartype == ucp_Lt) == (op == OP_NOTPROP))
361           RRETURN(MATCH_NOMATCH);
362         break;
363
364         case PT_GC:
365-        if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
366+        if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
367           RRETURN(MATCH_NOMATCH);
368         break;
369
370         case PT_PC:
371-        if ((ecode[2] != prop->chartype) == (op == OP_PROP))
372+        if ((ecode[2] != chartype) == (op == OP_PROP))
373           RRETURN(MATCH_NOMATCH);
374         break;
375
376         case PT_SC:
377-        if ((ecode[2] != prop->script) == (op == OP_PROP))
378+        if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
379           RRETURN(MATCH_NOMATCH);
380         break;
381
382         /* These are specials */
383
384         case PT_ALNUM:
385-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
386-             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
387+        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
388+             PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
389           RRETURN(MATCH_NOMATCH);
390         break;
391
392         case PT_SPACE:    /* Perl space */
393-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
394+        if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
395              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
396                == (op == OP_NOTPROP))
397           RRETURN(MATCH_NOMATCH);
398         break;
399
400         case PT_PXSPACE:  /* POSIX space */
401-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
402+        if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
403              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
404              c == CHAR_FF || c == CHAR_CR)
405                == (op == OP_NOTPROP))
406@@ -2619,8 +2619,8 @@ for (;;)
407         break;
408
409         case PT_WORD:
410-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
411-             PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
412+        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
413+             PRIV(ucp_gentype)[chartype] == ucp_N ||
414              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
415           RRETURN(MATCH_NOMATCH);
416         break;
417diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h
418index 181c312..234af1b 100644
419--- a/glib/pcre/pcre_internal.h
420+++ b/glib/pcre/pcre_internal.h
421@@ -2329,15 +2329,12 @@ extern const int         PRIV(ucp_typerange)[];
422 #ifdef SUPPORT_UCP
423 /* UCD access macros */
424
425-#define UCD_BLOCK_SIZE 128
426-#define GET_UCD(ch) (PRIV(ucd_records) + \
427-        PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \
428-        UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
429+unsigned int _pcre_ucp_othercase(const unsigned int c);
430
431-#define UCD_CHARTYPE(ch)  GET_UCD(ch)->chartype
432-#define UCD_SCRIPT(ch)    GET_UCD(ch)->script
433+#define UCD_CHARTYPE(ch)  (pcre_uint8)g_unichar_type((gunichar)(ch))
434+#define UCD_SCRIPT(ch)    (pcre_uint8)g_unichar_get_script((gunichar)(ch))
435 #define UCD_CATEGORY(ch)  PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
436-#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
437+#define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch))
438
439 #endif /* SUPPORT_UCP */
440
441diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
442index 7ac2d89..e401974 100644
443--- a/glib/pcre/pcre_tables.c
444+++ b/glib/pcre/pcre_tables.c
445@@ -584,6 +584,22 @@ const ucp_type_table PRIV(utt)[] = {
446
447 const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
448
449+unsigned int
450+_pcre_ucp_othercase(const unsigned int c)
451+{
452+  int other_case = NOTACHAR;
453+
454+  if (g_unichar_islower(c))
455+    other_case = g_unichar_toupper(c);
456+  else if (g_unichar_isupper(c))
457+    other_case = g_unichar_tolower(c);
458+
459+  if (other_case == c)
460+    other_case = NOTACHAR;
461+
462+  return other_case;
463+}
464+
465 #endif /* SUPPORT_UTF */
466
467 /* End of pcre_tables.c */
468diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c
469index dca7a39..e5a55d7 100644
470--- a/glib/pcre/pcre_xclass.c
471+++ b/glib/pcre/pcre_xclass.c
472@@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END)
473 #ifdef SUPPORT_UCP
474   else  /* XCL_PROP & XCL_NOTPROP */
475     {
476-    const ucd_record *prop = GET_UCD(c);
477+    const pcre_uint8 chartype = UCD_CHARTYPE(c);
478
479     switch(*data)
480       {
481@@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END)
482       break;
483
484       case PT_LAMP:
485-      if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
486-           prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
487+      if ((chartype == ucp_Lu || chartype == ucp_Ll ||
488+           chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
489       break;
490
491       case PT_GC:
492-      if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
493+      if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP))
494         return !negated;
495       break;
496
497       case PT_PC:
498-      if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
499+      if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
500       break;
501
502       case PT_SC:
503-      if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
504+      if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
505       break;
506
507       case PT_ALNUM:
508-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
509-           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
510+      if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
511+           PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP))
512         return !negated;
513       break;
514
515       case PT_SPACE:    /* Perl space */
516-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
517+      if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
518            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
519              == (t == XCL_PROP))
520         return !negated;
521       break;
522
523       case PT_PXSPACE:  /* POSIX space */
524-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
525+      if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
526            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
527            c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
528         return !negated;
529       break;
530
531       case PT_WORD:
532-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
533-           PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
534+      if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
535+           PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE)
536              == (t == XCL_PROP))
537         return !negated;
538       break;
539diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h
540index 59c3bec..53a48c9 100644
541--- a/glib/pcre/ucp.h
542+++ b/glib/pcre/ucp.h
543@@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode
544 should always be at the end of each enum, for backwards compatibility. */
545
546 /* These are the general character categories. */
547+#include "gunicode.h"
548
549 enum {
550   ucp_C,     /* Other */
551@@ -24,148 +25,148 @@ enum {
552 /* These are the particular character types. */
553
554 enum {
555-  ucp_Cc,    /* Control */
556-  ucp_Cf,    /* Format */
557-  ucp_Cn,    /* Unassigned */
558-  ucp_Co,    /* Private use */
559-  ucp_Cs,    /* Surrogate */
560-  ucp_Ll,    /* Lower case letter */
561-  ucp_Lm,    /* Modifier letter */
562-  ucp_Lo,    /* Other letter */
563-  ucp_Lt,    /* Title case letter */
564-  ucp_Lu,    /* Upper case letter */
565-  ucp_Mc,    /* Spacing mark */
566-  ucp_Me,    /* Enclosing mark */
567-  ucp_Mn,    /* Non-spacing mark */
568-  ucp_Nd,    /* Decimal number */
569-  ucp_Nl,    /* Letter number */
570-  ucp_No,    /* Other number */
571-  ucp_Pc,    /* Connector punctuation */
572-  ucp_Pd,    /* Dash punctuation */
573-  ucp_Pe,    /* Close punctuation */
574-  ucp_Pf,    /* Final punctuation */
575-  ucp_Pi,    /* Initial punctuation */
576-  ucp_Po,    /* Other punctuation */
577-  ucp_Ps,    /* Open punctuation */
578-  ucp_Sc,    /* Currency symbol */
579-  ucp_Sk,    /* Modifier symbol */
580-  ucp_Sm,    /* Mathematical symbol */
581-  ucp_So,    /* Other symbol */
582-  ucp_Zl,    /* Line separator */
583-  ucp_Zp,    /* Paragraph separator */
584-  ucp_Zs     /* Space separator */
585+  ucp_Cc = G_UNICODE_CONTROL,                   /* Control */
586+  ucp_Cf = G_UNICODE_FORMAT,                    /* Format */
587+  ucp_Cn = G_UNICODE_UNASSIGNED,                /* Unassigned */
588+  ucp_Co = G_UNICODE_PRIVATE_USE,               /* Private use */
589+  ucp_Cs = G_UNICODE_SURROGATE,                 /* Surrogate */
590+  ucp_Ll = G_UNICODE_LOWERCASE_LETTER,          /* Lower case letter */
591+  ucp_Lm = G_UNICODE_MODIFIER_LETTER,           /* Modifier letter */
592+  ucp_Lo = G_UNICODE_OTHER_LETTER,              /* Other letter */
593+  ucp_Lt = G_UNICODE_TITLECASE_LETTER,          /* Title case letter */
594+  ucp_Lu = G_UNICODE_UPPERCASE_LETTER,          /* Upper case letter */
595+  ucp_Mc = G_UNICODE_SPACING_MARK,              /* Spacing mark */
596+  ucp_Me = G_UNICODE_ENCLOSING_MARK,            /* Enclosing mark */
597+  ucp_Mn = G_UNICODE_NON_SPACING_MARK,          /* Non-spacing mark */
598+  ucp_Nd = G_UNICODE_DECIMAL_NUMBER,            /* Decimal number */
599+  ucp_Nl = G_UNICODE_LETTER_NUMBER,             /* Letter number */
600+  ucp_No = G_UNICODE_OTHER_NUMBER,              /* Other number */
601+  ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION,       /* Connector punctuation */
602+  ucp_Pd = G_UNICODE_DASH_PUNCTUATION,          /* Dash punctuation */
603+  ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION,         /* Close punctuation */
604+  ucp_Pf = G_UNICODE_FINAL_PUNCTUATION,         /* Final punctuation */
605+  ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION,       /* Initial punctuation */
606+  ucp_Po = G_UNICODE_OTHER_PUNCTUATION,         /* Other punctuation */
607+  ucp_Ps = G_UNICODE_OPEN_PUNCTUATION,          /* Open punctuation */
608+  ucp_Sc = G_UNICODE_CURRENCY_SYMBOL,           /* Currency symbol */
609+  ucp_Sk = G_UNICODE_MODIFIER_SYMBOL,           /* Modifier symbol */
610+  ucp_Sm = G_UNICODE_MATH_SYMBOL,               /* Mathematical symbol */
611+  ucp_So = G_UNICODE_OTHER_SYMBOL,              /* Other symbol */
612+  ucp_Zl = G_UNICODE_LINE_SEPARATOR,            /* Line separator */
613+  ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR,       /* Paragraph separator */
614+  ucp_Zs = G_UNICODE_SPACE_SEPARATOR            /* Space separator */
615 };
616
617 /* These are the script identifications. */
618
619 enum {
620-  ucp_Arabic,
621-  ucp_Armenian,
622-  ucp_Bengali,
623-  ucp_Bopomofo,
624-  ucp_Braille,
625-  ucp_Buginese,
626-  ucp_Buhid,
627-  ucp_Canadian_Aboriginal,
628-  ucp_Cherokee,
629-  ucp_Common,
630-  ucp_Coptic,
631-  ucp_Cypriot,
632-  ucp_Cyrillic,
633-  ucp_Deseret,
634-  ucp_Devanagari,
635-  ucp_Ethiopic,
636-  ucp_Georgian,
637-  ucp_Glagolitic,
638-  ucp_Gothic,
639-  ucp_Greek,
640-  ucp_Gujarati,
641-  ucp_Gurmukhi,
642-  ucp_Han,
643-  ucp_Hangul,
644-  ucp_Hanunoo,
645-  ucp_Hebrew,
646-  ucp_Hiragana,
647-  ucp_Inherited,
648-  ucp_Kannada,
649-  ucp_Katakana,
650-  ucp_Kharoshthi,
651-  ucp_Khmer,
652-  ucp_Lao,
653-  ucp_Latin,
654-  ucp_Limbu,
655-  ucp_Linear_B,
656-  ucp_Malayalam,
657-  ucp_Mongolian,
658-  ucp_Myanmar,
659-  ucp_New_Tai_Lue,
660-  ucp_Ogham,
661-  ucp_Old_Italic,
662-  ucp_Old_Persian,
663-  ucp_Oriya,
664-  ucp_Osmanya,
665-  ucp_Runic,
666-  ucp_Shavian,
667-  ucp_Sinhala,
668-  ucp_Syloti_Nagri,
669-  ucp_Syriac,
670-  ucp_Tagalog,
671-  ucp_Tagbanwa,
672-  ucp_Tai_Le,
673-  ucp_Tamil,
674-  ucp_Telugu,
675-  ucp_Thaana,
676-  ucp_Thai,
677-  ucp_Tibetan,
678-  ucp_Tifinagh,
679-  ucp_Ugaritic,
680-  ucp_Yi,
681+  ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
682+  ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
683+  ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
684+  ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
685+  ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
686+  ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
687+  ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
688+  ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
689+  ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
690+  ucp_Common = G_UNICODE_SCRIPT_COMMON,
691+  ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
692+  ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
693+  ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
694+  ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
695+  ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
696+  ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
697+  ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
698+  ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
699+  ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
700+  ucp_Greek = G_UNICODE_SCRIPT_GREEK,
701+  ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
702+  ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
703+  ucp_Han = G_UNICODE_SCRIPT_HAN,
704+  ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
705+  ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
706+  ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
707+  ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
708+  ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
709+  ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
710+  ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
711+  ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
712+  ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
713+  ucp_Lao = G_UNICODE_SCRIPT_LAO,
714+  ucp_Latin = G_UNICODE_SCRIPT_LATIN,
715+  ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
716+  ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
717+  ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
718+  ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
719+  ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
720+  ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
721+  ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
722+  ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
723+  ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
724+  ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
725+  ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
726+  ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
727+  ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
728+  ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
729+  ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
730+  ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
731+  ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
732+  ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
733+  ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
734+  ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
735+  ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
736+  ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
737+  ucp_Thai = G_UNICODE_SCRIPT_THAI,
738+  ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
739+  ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
740+  ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
741+  ucp_Yi = G_UNICODE_SCRIPT_YI,
742   /* New for Unicode 5.0: */
743-  ucp_Balinese,
744-  ucp_Cuneiform,
745-  ucp_Nko,
746-  ucp_Phags_Pa,
747-  ucp_Phoenician,
748+  ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
749+  ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
750+  ucp_Nko = G_UNICODE_SCRIPT_NKO,
751+  ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
752+  ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
753   /* New for Unicode 5.1: */
754-  ucp_Carian,
755-  ucp_Cham,
756-  ucp_Kayah_Li,
757-  ucp_Lepcha,
758-  ucp_Lycian,
759-  ucp_Lydian,
760-  ucp_Ol_Chiki,
761-  ucp_Rejang,
762-  ucp_Saurashtra,
763-  ucp_Sundanese,
764-  ucp_Vai,
765+  ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
766+  ucp_Cham = G_UNICODE_SCRIPT_CHAM,
767+  ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
768+  ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
769+  ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
770+  ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
771+  ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
772+  ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
773+  ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
774+  ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
775+  ucp_Vai = G_UNICODE_SCRIPT_VAI,
776   /* New for Unicode 5.2: */
777-  ucp_Avestan,
778-  ucp_Bamum,
779-  ucp_Egyptian_Hieroglyphs,
780-  ucp_Imperial_Aramaic,
781-  ucp_Inscriptional_Pahlavi,
782-  ucp_Inscriptional_Parthian,
783-  ucp_Javanese,
784-  ucp_Kaithi,
785-  ucp_Lisu,
786-  ucp_Meetei_Mayek,
787-  ucp_Old_South_Arabian,
788-  ucp_Old_Turkic,
789-  ucp_Samaritan,
790-  ucp_Tai_Tham,
791-  ucp_Tai_Viet,
792+  ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
793+  ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
794+  ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
795+  ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
796+  ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
797+  ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
798+  ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
799+  ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
800+  ucp_Lisu = G_UNICODE_SCRIPT_LISU,
801+  ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
802+  ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
803+  ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
804+  ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
805+  ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
806+  ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
807   /* New for Unicode 6.0.0: */
808-  ucp_Batak,
809-  ucp_Brahmi,
810-  ucp_Mandaic,
811+  ucp_Batak = G_UNICODE_SCRIPT_BATAK,
812+  ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
813+  ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC,
814   /* New for Unicode 6.1.0: */
815-  ucp_Chakma,
816-  ucp_Meroitic_Cursive,
817-  ucp_Meroitic_Hieroglyphs,
818-  ucp_Miao,
819-  ucp_Sharada,
820-  ucp_Sora_Sompeng,
821-  ucp_Takri
822+  ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA,
823+  ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE,
824+  ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,
825+  ucp_Miao = G_UNICODE_SCRIPT_MIAO,
826+  ucp_Sharada = G_UNICODE_SCRIPT_SHARADA,
827+  ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG,
828+  ucp_Takri = G_UNICODE_SCRIPT_TAKRI,
829 };
830
831 #endif
832--
8331.7.5.1.217.g4e3aa.dirty
834
835