1From 8d5a44dc8f36cce270519bd52fcecf330ccb43b4 Mon Sep 17 00:00:00 2001 2From: Aleksei Rybalkin <aleksei@rybalkin.org> 3Date: Tue, 12 Jul 2022 11:46:34 +0000 4Subject: [PATCH] replace pcre1 with pcre2 5 6Conflict:NA 7Reference:https://gitlab.gnome.org/GNOME/glib/-/commit/8d5a44dc8f36cce270519bd52fcecf330ccb43b4 8 9--- 10 docs/reference/glib/regex-syntax.xml | 46 -- 11 glib/gregex.c | 1113 ++++++++++++++++---------- 12 glib/gregex.h | 11 +- 13 glib/meson.build | 10 +- 14 glib/tests/meson.build | 4 +- 15 glib/tests/regex.c | 175 ++-- 16 meson.build | 41 +- 17 po/sk.po | 2 +- 18 subprojects/pcre.wrap | 11 - 19 9 files changed, 819 insertions(+), 594 deletions(-) 20 delete mode 100644 subprojects/pcre.wrap 21 22diff --git a/docs/reference/glib/regex-syntax.xml b/docs/reference/glib/regex-syntax.xml 23index 5dd9291..0b413aa 100644 24--- a/docs/reference/glib/regex-syntax.xml 25+++ b/docs/reference/glib/regex-syntax.xml 26@@ -2442,52 +2442,6 @@ processing option does not affect the called subpattern. 27 </para> 28 </refsect1> 29 30-<!-- Callouts are not supported by GRegex 31-<refsect1> 32-<title>Callouts</title> 33-<para> 34-Perl has a feature whereby using the sequence (?{...}) causes arbitrary 35-Perl code to be obeyed in the middle of matching a regular expression. 36-This makes it possible, amongst other things, to extract different substrings that match the same pair of parentheses when there is a repetition. 37-</para> 38- 39-<para> 40-PCRE provides a similar feature, but of course it cannot obey arbitrary 41-Perl code. The feature is called "callout". The caller of PCRE provides 42-an external function by putting its entry point in the global variable 43-pcre_callout. By default, this variable contains NULL, which disables 44-all calling out. 45-</para> 46- 47-<para> 48-Within a regular expression, (?C) indicates the points at which the 49-external function is to be called. If you want to identify different 50-callout points, you can put a number less than 256 after the letter C. 51-The default value is zero. For example, this pattern has two callout 52-points: 53-</para> 54- 55-<programlisting> 56-(?C1)abc(?C2)def 57-</programlisting> 58- 59-<para> 60-If the PCRE_AUTO_CALLOUT flag is passed to pcre_compile(), callouts are 61-automatically installed before each item in the pattern. They are all 62-numbered 255. 63-</para> 64- 65-<para> 66-During matching, when PCRE reaches a callout point (and pcre_callout is 67-set), the external function is called. It is provided with the number 68-of the callout, the position in the pattern, and, optionally, one item 69-of data originally supplied by the caller of pcre_exec(). The callout 70-function may cause matching to proceed, to backtrack, or to fail altogether. A complete description of the interface to the callout function 71-is given in the pcrecallout documentation. 72-</para> 73-</refsect1> 74---> 75- 76 <refsect1> 77 <title>Copyright</title> 78 <para> 79diff --git a/glib/gregex.c b/glib/gregex.c 80index 9a8229a..da37213 100644 81--- a/glib/gregex.c 82+++ b/glib/gregex.c 83@@ -22,7 +22,8 @@ 84 85 #include <string.h> 86 87-#include <pcre.h> 88+#define PCRE2_CODE_UNIT_WIDTH 8 89+#include <pcre2.h> 90 91 #include "gtypes.h" 92 #include "gregex.h" 93@@ -107,87 +108,63 @@ 94 * library written by Philip Hazel. 95 */ 96 97+/* Signifies that flags have already been converted from pcre1 to pcre2. The 98+ * value 0x04000000u is also the value of PCRE2_MATCH_INVALID_UTF in pcre2.h, 99+ * but it is not used in gregex, so we can reuse it for this flag. 100+ */ 101+#define G_REGEX_FLAGS_CONVERTED 0x04000000u 102 /* Mask of all the possible values for GRegexCompileFlags. */ 103-#define G_REGEX_COMPILE_MASK (G_REGEX_CASELESS | \ 104- G_REGEX_MULTILINE | \ 105- G_REGEX_DOTALL | \ 106- G_REGEX_EXTENDED | \ 107- G_REGEX_ANCHORED | \ 108- G_REGEX_DOLLAR_ENDONLY | \ 109- G_REGEX_UNGREEDY | \ 110- G_REGEX_RAW | \ 111- G_REGEX_NO_AUTO_CAPTURE | \ 112- G_REGEX_OPTIMIZE | \ 113- G_REGEX_FIRSTLINE | \ 114- G_REGEX_DUPNAMES | \ 115- G_REGEX_NEWLINE_CR | \ 116- G_REGEX_NEWLINE_LF | \ 117- G_REGEX_NEWLINE_CRLF | \ 118- G_REGEX_NEWLINE_ANYCRLF | \ 119- G_REGEX_BSR_ANYCRLF | \ 120- G_REGEX_JAVASCRIPT_COMPAT) 121+#define G_REGEX_COMPILE_MASK (PCRE2_CASELESS | \ 122+ PCRE2_MULTILINE | \ 123+ PCRE2_DOTALL | \ 124+ PCRE2_EXTENDED | \ 125+ PCRE2_ANCHORED | \ 126+ PCRE2_DOLLAR_ENDONLY | \ 127+ PCRE2_UNGREEDY | \ 128+ PCRE2_UTF | \ 129+ PCRE2_NO_AUTO_CAPTURE | \ 130+ PCRE2_FIRSTLINE | \ 131+ PCRE2_DUPNAMES | \ 132+ PCRE2_NEWLINE_CR | \ 133+ PCRE2_NEWLINE_LF | \ 134+ PCRE2_NEWLINE_CRLF | \ 135+ PCRE2_NEWLINE_ANYCRLF | \ 136+ PCRE2_BSR_ANYCRLF | \ 137+ G_REGEX_FLAGS_CONVERTED) 138 139 /* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */ 140 #define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK) 141-#define G_REGEX_COMPILE_NONPCRE_MASK (G_REGEX_RAW | \ 142- G_REGEX_OPTIMIZE) 143+#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF | \ 144+ G_REGEX_FLAGS_CONVERTED) 145 146 /* Mask of all the possible values for GRegexMatchFlags. */ 147-#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \ 148- G_REGEX_MATCH_NOTBOL | \ 149- G_REGEX_MATCH_NOTEOL | \ 150- G_REGEX_MATCH_NOTEMPTY | \ 151- G_REGEX_MATCH_PARTIAL | \ 152- G_REGEX_MATCH_NEWLINE_CR | \ 153- G_REGEX_MATCH_NEWLINE_LF | \ 154- G_REGEX_MATCH_NEWLINE_CRLF | \ 155- G_REGEX_MATCH_NEWLINE_ANY | \ 156- G_REGEX_MATCH_NEWLINE_ANYCRLF | \ 157- G_REGEX_MATCH_BSR_ANYCRLF | \ 158- G_REGEX_MATCH_BSR_ANY | \ 159- G_REGEX_MATCH_PARTIAL_SOFT | \ 160- G_REGEX_MATCH_PARTIAL_HARD | \ 161- G_REGEX_MATCH_NOTEMPTY_ATSTART) 162- 163-/* we rely on these flags having the same values */ 164-G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS); 165-G_STATIC_ASSERT (G_REGEX_MULTILINE == PCRE_MULTILINE); 166-G_STATIC_ASSERT (G_REGEX_DOTALL == PCRE_DOTALL); 167-G_STATIC_ASSERT (G_REGEX_EXTENDED == PCRE_EXTENDED); 168-G_STATIC_ASSERT (G_REGEX_ANCHORED == PCRE_ANCHORED); 169-G_STATIC_ASSERT (G_REGEX_DOLLAR_ENDONLY == PCRE_DOLLAR_ENDONLY); 170-G_STATIC_ASSERT (G_REGEX_UNGREEDY == PCRE_UNGREEDY); 171-G_STATIC_ASSERT (G_REGEX_NO_AUTO_CAPTURE == PCRE_NO_AUTO_CAPTURE); 172-G_STATIC_ASSERT (G_REGEX_FIRSTLINE == PCRE_FIRSTLINE); 173-G_STATIC_ASSERT (G_REGEX_DUPNAMES == PCRE_DUPNAMES); 174-G_STATIC_ASSERT (G_REGEX_NEWLINE_CR == PCRE_NEWLINE_CR); 175-G_STATIC_ASSERT (G_REGEX_NEWLINE_LF == PCRE_NEWLINE_LF); 176-G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF); 177-G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF); 178-G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF); 179-G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT); 180- 181-G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED); 182-G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL); 183-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL); 184-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY); 185-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL); 186-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR); 187-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF); 188-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF); 189-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY); 190-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF); 191-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF); 192-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE); 193-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT); 194-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD); 195-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART); 196- 197-/* These PCRE flags are unused or not exposed publicly in GRegexFlags, so 198- * it should be ok to reuse them for different things. 199- */ 200-G_STATIC_ASSERT (G_REGEX_OPTIMIZE == PCRE_NO_UTF8_CHECK); 201-G_STATIC_ASSERT (G_REGEX_RAW == PCRE_UTF8); 202+#define G_REGEX_MATCH_MASK (PCRE2_ANCHORED | \ 203+ PCRE2_NOTBOL | \ 204+ PCRE2_NOTEOL | \ 205+ PCRE2_NOTEMPTY | \ 206+ PCRE2_PARTIAL_SOFT | \ 207+ PCRE2_NEWLINE_CR | \ 208+ PCRE2_NEWLINE_LF | \ 209+ PCRE2_NEWLINE_CRLF | \ 210+ PCRE2_NEWLINE_ANY | \ 211+ PCRE2_NEWLINE_ANYCRLF | \ 212+ PCRE2_BSR_ANYCRLF | \ 213+ PCRE2_BSR_UNICODE | \ 214+ PCRE2_PARTIAL_SOFT | \ 215+ PCRE2_PARTIAL_HARD | \ 216+ PCRE2_NOTEMPTY_ATSTART | \ 217+ G_REGEX_FLAGS_CONVERTED) 218+ 219+#define G_REGEX_NEWLINE_MASK (PCRE2_NEWLINE_CR | \ 220+ PCRE2_NEWLINE_LF | \ 221+ PCRE2_NEWLINE_CRLF | \ 222+ PCRE2_NEWLINE_ANYCRLF) 223+ 224+#define G_REGEX_MATCH_NEWLINE_MASK (PCRE2_NEWLINE_CR | \ 225+ PCRE2_NEWLINE_LF | \ 226+ PCRE2_NEWLINE_CRLF | \ 227+ PCRE2_NEWLINE_ANYCRLF | \ 228+ PCRE2_NEWLINE_ANY) 229 230 /* if the string is in UTF-8 use g_utf8_ functions, else use 231 * use just +/- 1. */ 232@@ -208,24 +185,26 @@ struct _GMatchInfo 233 gint pos; /* position in the string where last match left off */ 234 gint n_offsets; /* number of offsets */ 235 gint *offsets; /* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */ 236- gint *workspace; /* workspace for pcre_dfa_exec() */ 237+ gint *workspace; /* workspace for pcre2_dfa_match() */ 238 gint n_workspace; /* number of workspace elements */ 239 const gchar *string; /* string passed to the match function */ 240 gssize string_len; /* length of string, in bytes */ 241+ pcre2_match_context *match_context; 242+ pcre2_match_data *match_data; 243 }; 244 245 struct _GRegex 246 { 247 gint ref_count; /* the ref count for the immutable part (atomic) */ 248 gchar *pattern; /* the pattern */ 249- pcre *pcre_re; /* compiled form of the pattern */ 250- GRegexCompileFlags compile_opts; /* options used at compile time on the pattern */ 251+ pcre2_code *pcre_re; /* compiled form of the pattern */ 252+ GRegexCompileFlags compile_opts; /* options used at compile time on the pattern, pcre2 values */ 253+ GRegexCompileFlags orig_compile_opts; /* options used at compile time on the pattern, gregex values */ 254 GRegexMatchFlags match_opts; /* options used at match time on the regex */ 255- pcre_extra *extra; /* data stored when G_REGEX_OPTIMIZE is used */ 256 }; 257 258 /* TRUE if ret is an error code, FALSE otherwise. */ 259-#define IS_PCRE_ERROR(ret) ((ret) < PCRE_ERROR_NOMATCH && (ret) != PCRE_ERROR_PARTIAL) 260+#define IS_PCRE2_ERROR(ret) ((ret) < PCRE2_ERROR_NOMATCH && (ret) != PCRE2_ERROR_PARTIAL) 261 262 typedef struct _InterpolationData InterpolationData; 263 static gboolean interpolation_list_needs_match (GList *list); 264@@ -236,70 +215,249 @@ static GList *split_replacement (const gchar *replacement, 265 GError **error); 266 static void free_interpolation_data (InterpolationData *data); 267 268+static gint 269+map_to_pcre2_compile_flags (gint pcre1_flags) 270+{ 271+ /* Maps compile flags from pcre1 to pcre2 values 272+ */ 273+ gint pcre2_flags = G_REGEX_FLAGS_CONVERTED; 274+ 275+ if (pcre1_flags & G_REGEX_FLAGS_CONVERTED) 276+ return pcre1_flags; 277+ 278+ if (pcre1_flags & G_REGEX_CASELESS) 279+ pcre2_flags |= PCRE2_CASELESS; 280+ if (pcre1_flags & G_REGEX_MULTILINE) 281+ pcre2_flags |= PCRE2_MULTILINE; 282+ if (pcre1_flags & G_REGEX_DOTALL) 283+ pcre2_flags |= PCRE2_DOTALL; 284+ if (pcre1_flags & G_REGEX_EXTENDED) 285+ pcre2_flags |= PCRE2_EXTENDED; 286+ if (pcre1_flags & G_REGEX_ANCHORED) 287+ pcre2_flags |= PCRE2_ANCHORED; 288+ if (pcre1_flags & G_REGEX_DOLLAR_ENDONLY) 289+ pcre2_flags |= PCRE2_DOLLAR_ENDONLY; 290+ if (pcre1_flags & G_REGEX_UNGREEDY) 291+ pcre2_flags |= PCRE2_UNGREEDY; 292+ if (!(pcre1_flags & G_REGEX_RAW)) 293+ pcre2_flags |= PCRE2_UTF; 294+ if (pcre1_flags & G_REGEX_NO_AUTO_CAPTURE) 295+ pcre2_flags |= PCRE2_NO_AUTO_CAPTURE; 296+ if (pcre1_flags & G_REGEX_FIRSTLINE) 297+ pcre2_flags |= PCRE2_FIRSTLINE; 298+ if (pcre1_flags & G_REGEX_DUPNAMES) 299+ pcre2_flags |= PCRE2_DUPNAMES; 300+ if (pcre1_flags & G_REGEX_NEWLINE_CR) 301+ pcre2_flags |= PCRE2_NEWLINE_CR; 302+ if (pcre1_flags & G_REGEX_NEWLINE_LF) 303+ pcre2_flags |= PCRE2_NEWLINE_LF; 304+ /* Check for exact match for a composite flag */ 305+ if ((pcre1_flags & G_REGEX_NEWLINE_CRLF) == G_REGEX_NEWLINE_CRLF) 306+ pcre2_flags |= PCRE2_NEWLINE_CRLF; 307+ /* Check for exact match for a composite flag */ 308+ if ((pcre1_flags & G_REGEX_NEWLINE_ANYCRLF) == G_REGEX_NEWLINE_ANYCRLF) 309+ pcre2_flags |= PCRE2_NEWLINE_ANYCRLF; 310+ if (pcre1_flags & G_REGEX_BSR_ANYCRLF) 311+ pcre2_flags |= PCRE2_BSR_ANYCRLF; 312+ 313+ /* these are not available in pcre2 */ 314+G_GNUC_BEGIN_IGNORE_DEPRECATIONS 315+ if (pcre1_flags & G_REGEX_OPTIMIZE) 316+ pcre2_flags |= 0; 317+ if (pcre1_flags & G_REGEX_JAVASCRIPT_COMPAT) 318+ pcre2_flags |= 0; 319+G_GNUC_END_IGNORE_DEPRECATIONS 320+ 321+ return pcre2_flags; 322+} 323+ 324+static gint 325+map_to_pcre2_match_flags (gint pcre1_flags) 326+{ 327+ /* Maps match flags from pcre1 to pcre2 values 328+ */ 329+ gint pcre2_flags = G_REGEX_FLAGS_CONVERTED; 330+ 331+ if (pcre1_flags & G_REGEX_FLAGS_CONVERTED) 332+ return pcre1_flags; 333+ 334+ if (pcre1_flags & G_REGEX_MATCH_ANCHORED) 335+ pcre2_flags |= PCRE2_ANCHORED; 336+ if (pcre1_flags & G_REGEX_MATCH_NOTBOL) 337+ pcre2_flags |= PCRE2_NOTBOL; 338+ if (pcre1_flags & G_REGEX_MATCH_NOTEOL) 339+ pcre2_flags |= PCRE2_NOTEOL; 340+ if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY) 341+ pcre2_flags |= PCRE2_NOTEMPTY; 342+ if (pcre1_flags & G_REGEX_MATCH_PARTIAL) 343+ pcre2_flags |= PCRE2_PARTIAL_SOFT; 344+ if (pcre1_flags & G_REGEX_MATCH_NEWLINE_CR) 345+ pcre2_flags |= PCRE2_NEWLINE_CR; 346+ if (pcre1_flags & G_REGEX_MATCH_NEWLINE_LF) 347+ pcre2_flags |= PCRE2_NEWLINE_LF; 348+ /* Check for exact match for a composite flag */ 349+ if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_CRLF) == G_REGEX_MATCH_NEWLINE_CRLF) 350+ pcre2_flags |= PCRE2_NEWLINE_CRLF; 351+ if (pcre1_flags & G_REGEX_MATCH_NEWLINE_ANY) 352+ pcre2_flags |= PCRE2_NEWLINE_ANY; 353+ /* Check for exact match for a composite flag */ 354+ if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_ANYCRLF) == G_REGEX_MATCH_NEWLINE_ANYCRLF) 355+ pcre2_flags |= PCRE2_NEWLINE_ANYCRLF; 356+ if (pcre1_flags & G_REGEX_MATCH_BSR_ANYCRLF) 357+ pcre2_flags |= PCRE2_BSR_ANYCRLF; 358+ if (pcre1_flags & G_REGEX_MATCH_BSR_ANY) 359+ pcre2_flags |= PCRE2_BSR_UNICODE; 360+ if (pcre1_flags & G_REGEX_MATCH_PARTIAL_SOFT) 361+ pcre2_flags |= PCRE2_PARTIAL_SOFT; 362+ if (pcre1_flags & G_REGEX_MATCH_PARTIAL_HARD) 363+ pcre2_flags |= PCRE2_PARTIAL_HARD; 364+ if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART) 365+ pcre2_flags |= PCRE2_NOTEMPTY_ATSTART; 366+ 367+ return pcre2_flags; 368+} 369+ 370+static gint 371+map_to_pcre1_compile_flags (gint pcre2_flags) 372+{ 373+ /* Maps compile flags from pcre2 to pcre1 values 374+ */ 375+ gint pcre1_flags = 0; 376+ 377+ if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED)) 378+ return pcre2_flags; 379+ 380+ if (pcre2_flags & PCRE2_CASELESS) 381+ pcre1_flags |= G_REGEX_CASELESS; 382+ if (pcre2_flags & PCRE2_MULTILINE) 383+ pcre1_flags |= G_REGEX_MULTILINE; 384+ if (pcre2_flags & PCRE2_DOTALL) 385+ pcre1_flags |= G_REGEX_DOTALL; 386+ if (pcre2_flags & PCRE2_EXTENDED) 387+ pcre1_flags |= G_REGEX_EXTENDED; 388+ if (pcre2_flags & PCRE2_ANCHORED) 389+ pcre1_flags |= G_REGEX_ANCHORED; 390+ if (pcre2_flags & PCRE2_DOLLAR_ENDONLY) 391+ pcre1_flags |= G_REGEX_DOLLAR_ENDONLY; 392+ if (pcre2_flags & PCRE2_UNGREEDY) 393+ pcre1_flags |= G_REGEX_UNGREEDY; 394+ if (!(pcre2_flags & PCRE2_UTF)) 395+ pcre1_flags |= G_REGEX_RAW; 396+ if (pcre2_flags & PCRE2_NO_AUTO_CAPTURE) 397+ pcre1_flags |= G_REGEX_NO_AUTO_CAPTURE; 398+ if (pcre2_flags & PCRE2_FIRSTLINE) 399+ pcre1_flags |= G_REGEX_FIRSTLINE; 400+ if (pcre2_flags & PCRE2_DUPNAMES) 401+ pcre1_flags |= G_REGEX_DUPNAMES; 402+ if (pcre2_flags & PCRE2_NEWLINE_CR) 403+ pcre1_flags |= G_REGEX_NEWLINE_CR; 404+ if (pcre2_flags & PCRE2_NEWLINE_LF) 405+ pcre1_flags |= G_REGEX_NEWLINE_LF; 406+ /* Check for exact match for a composite flag */ 407+ if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF) 408+ pcre1_flags |= G_REGEX_NEWLINE_CRLF; 409+ /* Check for exact match for a composite flag */ 410+ if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF) 411+ pcre1_flags |= G_REGEX_NEWLINE_ANYCRLF; 412+ if (pcre2_flags & PCRE2_BSR_ANYCRLF) 413+ pcre1_flags |= G_REGEX_BSR_ANYCRLF; 414+ 415+ return pcre1_flags; 416+} 417+ 418+static gint 419+map_to_pcre1_match_flags (gint pcre2_flags) 420+{ 421+ /* Maps match flags from pcre2 to pcre1 values 422+ */ 423+ gint pcre1_flags = 0; 424+ 425+ if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED)) 426+ return pcre2_flags; 427+ 428+ if (pcre2_flags & PCRE2_ANCHORED) 429+ pcre1_flags |= G_REGEX_MATCH_ANCHORED; 430+ if (pcre2_flags & PCRE2_NOTBOL) 431+ pcre1_flags |= G_REGEX_MATCH_NOTBOL; 432+ if (pcre2_flags & PCRE2_NOTEOL) 433+ pcre1_flags |= G_REGEX_MATCH_NOTEOL; 434+ if (pcre2_flags & PCRE2_NOTEMPTY) 435+ pcre1_flags |= G_REGEX_MATCH_NOTEMPTY; 436+ if (pcre2_flags & PCRE2_PARTIAL_SOFT) 437+ pcre1_flags |= G_REGEX_MATCH_PARTIAL; 438+ if (pcre2_flags & PCRE2_NEWLINE_CR) 439+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_CR; 440+ if (pcre2_flags & PCRE2_NEWLINE_LF) 441+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_LF; 442+ /* Check for exact match for a composite flag */ 443+ if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF) 444+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_CRLF; 445+ if (pcre2_flags & PCRE2_NEWLINE_ANY) 446+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANY; 447+ /* Check for exact match for a composite flag */ 448+ if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF) 449+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANYCRLF; 450+ if (pcre2_flags & PCRE2_BSR_ANYCRLF) 451+ pcre1_flags |= G_REGEX_MATCH_BSR_ANYCRLF; 452+ if (pcre2_flags & PCRE2_BSR_UNICODE) 453+ pcre1_flags |= G_REGEX_MATCH_BSR_ANY; 454+ if (pcre2_flags & PCRE2_PARTIAL_SOFT) 455+ pcre1_flags |= G_REGEX_MATCH_PARTIAL_SOFT; 456+ if (pcre2_flags & PCRE2_PARTIAL_HARD) 457+ pcre1_flags |= G_REGEX_MATCH_PARTIAL_HARD; 458+ if (pcre2_flags & PCRE2_NOTEMPTY_ATSTART) 459+ pcre1_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART; 460+ 461+ return pcre1_flags; 462+} 463 464 static const gchar * 465 match_error (gint errcode) 466 { 467 switch (errcode) 468 { 469- case PCRE_ERROR_NOMATCH: 470+ case PCRE2_ERROR_NOMATCH: 471 /* not an error */ 472 break; 473- case PCRE_ERROR_NULL: 474+ case PCRE2_ERROR_NULL: 475 /* NULL argument, this should not happen in GRegex */ 476 g_warning ("A NULL argument was passed to PCRE"); 477 break; 478- case PCRE_ERROR_BADOPTION: 479+ case PCRE2_ERROR_BADOPTION: 480 return "bad options"; 481- case PCRE_ERROR_BADMAGIC: 482+ case PCRE2_ERROR_BADMAGIC: 483 return _("corrupted object"); 484- case PCRE_ERROR_UNKNOWN_OPCODE: 485- return N_("internal error or corrupted object"); 486- case PCRE_ERROR_NOMEMORY: 487+ case PCRE2_ERROR_NOMEMORY: 488 return _("out of memory"); 489- case PCRE_ERROR_NOSUBSTRING: 490- /* not used by pcre_exec() */ 491+ case PCRE2_ERROR_NOSUBSTRING: 492+ /* not used by pcre2_match() */ 493 break; 494- case PCRE_ERROR_MATCHLIMIT: 495+ case PCRE2_ERROR_MATCHLIMIT: 496 return _("backtracking limit reached"); 497- case PCRE_ERROR_CALLOUT: 498+ case PCRE2_ERROR_CALLOUT: 499 /* callouts are not implemented */ 500 break; 501- case PCRE_ERROR_BADUTF8: 502- case PCRE_ERROR_BADUTF8_OFFSET: 503+ case PCRE2_ERROR_BADUTFOFFSET: 504 /* we do not check if strings are valid */ 505 break; 506- case PCRE_ERROR_PARTIAL: 507+ case PCRE2_ERROR_PARTIAL: 508 /* not an error */ 509 break; 510- case PCRE_ERROR_BADPARTIAL: 511- return _("the pattern contains items not supported for partial matching"); 512- case PCRE_ERROR_INTERNAL: 513+ case PCRE2_ERROR_INTERNAL: 514 return _("internal error"); 515- case PCRE_ERROR_BADCOUNT: 516- /* negative ovecsize, this should not happen in GRegex */ 517- g_warning ("A negative ovecsize was passed to PCRE"); 518- break; 519- case PCRE_ERROR_DFA_UITEM: 520+ case PCRE2_ERROR_DFA_UITEM: 521 return _("the pattern contains items not supported for partial matching"); 522- case PCRE_ERROR_DFA_UCOND: 523+ case PCRE2_ERROR_DFA_UCOND: 524 return _("back references as conditions are not supported for partial matching"); 525- case PCRE_ERROR_DFA_UMLIMIT: 526- /* the match_field field is not used in GRegex */ 527- break; 528- case PCRE_ERROR_DFA_WSSIZE: 529+ case PCRE2_ERROR_DFA_WSSIZE: 530 /* handled expanding the workspace */ 531 break; 532- case PCRE_ERROR_DFA_RECURSE: 533- case PCRE_ERROR_RECURSIONLIMIT: 534+ case PCRE2_ERROR_DFA_RECURSE: 535+ case PCRE2_ERROR_RECURSIONLIMIT: 536 return _("recursion limit reached"); 537- case PCRE_ERROR_BADNEWLINE: 538- return _("invalid combination of newline flags"); 539- case PCRE_ERROR_BADOFFSET: 540+ case PCRE2_ERROR_BADOFFSET: 541 return _("bad offset"); 542- case PCRE_ERROR_SHORTUTF8: 543- return _("short utf8"); 544- case PCRE_ERROR_RECURSELOOP: 545+ case PCRE2_ERROR_RECURSELOOP: 546 return _("recursion loop"); 547 default: 548 break; 549@@ -310,242 +468,263 @@ match_error (gint errcode) 550 static void 551 translate_compile_error (gint *errcode, const gchar **errmsg) 552 { 553- /* Compile errors are created adding 100 to the error code returned 554- * by PCRE. 555- * If errcode is known we put the translatable error message in 556- * erromsg. If errcode is unknown we put the generic 557- * G_REGEX_ERROR_COMPILE error code in errcode and keep the 558- * untranslated error message returned by PCRE. 559+ /* If errcode is known we put the translatable error message in 560+ * errmsg. If errcode is unknown we put the generic 561+ * G_REGEX_ERROR_COMPILE error code in errcode. 562 * Note that there can be more PCRE errors with the same GRegexError 563 * and that some PCRE errors are useless for us. 564 */ 565- *errcode += 100; 566 567 switch (*errcode) 568 { 569- case G_REGEX_ERROR_STRAY_BACKSLASH: 570+ case PCRE2_ERROR_END_BACKSLASH: 571+ *errcode = G_REGEX_ERROR_STRAY_BACKSLASH; 572 *errmsg = _("\\ at end of pattern"); 573 break; 574- case G_REGEX_ERROR_MISSING_CONTROL_CHAR: 575+ case PCRE2_ERROR_END_BACKSLASH_C: 576+ *errcode = G_REGEX_ERROR_MISSING_CONTROL_CHAR; 577 *errmsg = _("\\c at end of pattern"); 578 break; 579- case G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: 580+ case PCRE2_ERROR_UNKNOWN_ESCAPE: 581+ case PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE: 582+ *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE; 583 *errmsg = _("unrecognized character following \\"); 584 break; 585- case G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: 586+ case PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER: 587+ *errcode = G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER; 588 *errmsg = _("numbers out of order in {} quantifier"); 589 break; 590- case G_REGEX_ERROR_QUANTIFIER_TOO_BIG: 591+ case PCRE2_ERROR_QUANTIFIER_TOO_BIG: 592+ *errcode = G_REGEX_ERROR_QUANTIFIER_TOO_BIG; 593 *errmsg = _("number too big in {} quantifier"); 594 break; 595- case G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS: 596+ case PCRE2_ERROR_MISSING_SQUARE_BRACKET: 597+ *errcode = G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS; 598 *errmsg = _("missing terminating ] for character class"); 599 break; 600- case G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS: 601+ case PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS: 602+ *errcode = G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS; 603 *errmsg = _("invalid escape sequence in character class"); 604 break; 605- case G_REGEX_ERROR_RANGE_OUT_OF_ORDER: 606+ case PCRE2_ERROR_CLASS_RANGE_ORDER: 607+ *errcode = G_REGEX_ERROR_RANGE_OUT_OF_ORDER; 608 *errmsg = _("range out of order in character class"); 609 break; 610- case G_REGEX_ERROR_NOTHING_TO_REPEAT: 611+ case PCRE2_ERROR_QUANTIFIER_INVALID: 612+ case PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT: 613+ *errcode = G_REGEX_ERROR_NOTHING_TO_REPEAT; 614 *errmsg = _("nothing to repeat"); 615 break; 616- case 111: /* internal error: unexpected repeat */ 617- *errcode = G_REGEX_ERROR_INTERNAL; 618- *errmsg = _("unexpected repeat"); 619- break; 620- case G_REGEX_ERROR_UNRECOGNIZED_CHARACTER: 621+ case PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY: 622+ *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; 623 *errmsg = _("unrecognized character after (? or (?-"); 624 break; 625- case G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS: 626+ case PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS: 627+ *errcode = G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS; 628 *errmsg = _("POSIX named classes are supported only within a class"); 629 break; 630- case G_REGEX_ERROR_UNMATCHED_PARENTHESIS: 631+ case PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING: 632+ *errcode = G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED; 633+ *errmsg = _("POSIX collating elements are not supported"); 634+ break; 635+ case PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS: 636+ case PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS: 637+ case PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING: 638+ *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; 639 *errmsg = _("missing terminating )"); 640 break; 641- case G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE: 642+ case PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE: 643+ *errcode = G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE; 644 *errmsg = _("reference to non-existent subpattern"); 645 break; 646- case G_REGEX_ERROR_UNTERMINATED_COMMENT: 647+ case PCRE2_ERROR_MISSING_COMMENT_CLOSING: 648+ *errcode = G_REGEX_ERROR_UNTERMINATED_COMMENT; 649 *errmsg = _("missing ) after comment"); 650 break; 651- case G_REGEX_ERROR_EXPRESSION_TOO_LARGE: 652+ case PCRE2_ERROR_PATTERN_TOO_LARGE: 653+ *errcode = G_REGEX_ERROR_EXPRESSION_TOO_LARGE; 654 *errmsg = _("regular expression is too large"); 655 break; 656- case G_REGEX_ERROR_MEMORY_ERROR: 657- *errmsg = _("failed to get memory"); 658- break; 659- case 122: /* unmatched parentheses */ 660- *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; 661- *errmsg = _(") without opening ("); 662- break; 663- case 123: /* internal error: code overflow */ 664- *errcode = G_REGEX_ERROR_INTERNAL; 665- *errmsg = _("code overflow"); 666- break; 667- case 124: /* "unrecognized character after (?<\0 */ 668- *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; 669- *errmsg = _("unrecognized character after (?<"); 670+ case PCRE2_ERROR_MISSING_CONDITION_CLOSING: 671+ *errcode = G_REGEX_ERROR_MALFORMED_CONDITION; 672+ *errmsg = _("malformed number or name after (?("); 673 break; 674- case G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND: 675+ case PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH: 676+ *errcode = G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND; 677 *errmsg = _("lookbehind assertion is not fixed length"); 678 break; 679- case G_REGEX_ERROR_MALFORMED_CONDITION: 680- *errmsg = _("malformed number or name after (?("); 681- break; 682- case G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES: 683+ case PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES: 684+ *errcode = G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES; 685 *errmsg = _("conditional group contains more than two branches"); 686 break; 687- case G_REGEX_ERROR_ASSERTION_EXPECTED: 688+ case PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED: 689+ *errcode = G_REGEX_ERROR_ASSERTION_EXPECTED; 690 *errmsg = _("assertion expected after (?("); 691 break; 692- case 129: 693- *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; 694- /* translators: '(?R' and '(?[+-]digits' are both meant as (groups of) 695- * sequences here, '(?-54' would be an example for the second group. 696- */ 697- *errmsg = _("(?R or (?[+-]digits must be followed by )"); 698+ case PCRE2_ERROR_BAD_RELATIVE_REFERENCE: 699+ *errcode = G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE; 700+ *errmsg = _("a numbered reference must not be zero"); 701 break; 702- case G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME: 703+ case PCRE2_ERROR_UNKNOWN_POSIX_CLASS: 704+ *errcode = G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME; 705 *errmsg = _("unknown POSIX class name"); 706 break; 707- case G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED: 708- *errmsg = _("POSIX collating elements are not supported"); 709- break; 710- case G_REGEX_ERROR_HEX_CODE_TOO_LARGE: 711+ case PCRE2_ERROR_CODE_POINT_TOO_BIG: 712+ case PCRE2_ERROR_INVALID_HEXADECIMAL: 713+ *errcode = G_REGEX_ERROR_HEX_CODE_TOO_LARGE; 714 *errmsg = _("character value in \\x{...} sequence is too large"); 715 break; 716- case G_REGEX_ERROR_INVALID_CONDITION: 717- *errmsg = _("invalid condition (?(0)"); 718- break; 719- case G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND: 720+ case PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C: 721+ *errcode = G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND; 722 *errmsg = _("\\C not allowed in lookbehind assertion"); 723 break; 724- case 137: /* PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0 */ 725- /* A number of Perl escapes are not handled by PCRE. 726- * Therefore it explicitly raises ERR37. 727- */ 728- *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE; 729- *errmsg = _("escapes \\L, \\l, \\N{name}, \\U, and \\u are not supported"); 730- break; 731- case G_REGEX_ERROR_INFINITE_LOOP: 732- *errmsg = _("recursive call could loop indefinitely"); 733- break; 734- case 141: /* unrecognized character after (?P\0 */ 735- *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; 736- *errmsg = _("unrecognized character after (?P"); 737- break; 738- case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR: 739+ case PCRE2_ERROR_MISSING_NAME_TERMINATOR: 740+ *errcode = G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR; 741 *errmsg = _("missing terminator in subpattern name"); 742 break; 743- case G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME: 744+ case PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME: 745+ *errcode = G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME; 746 *errmsg = _("two named subpatterns have the same name"); 747 break; 748- case G_REGEX_ERROR_MALFORMED_PROPERTY: 749+ case PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY: 750+ *errcode = G_REGEX_ERROR_MALFORMED_PROPERTY; 751 *errmsg = _("malformed \\P or \\p sequence"); 752 break; 753- case G_REGEX_ERROR_UNKNOWN_PROPERTY: 754+ case PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY: 755+ *errcode = G_REGEX_ERROR_UNKNOWN_PROPERTY; 756 *errmsg = _("unknown property name after \\P or \\p"); 757 break; 758- case G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG: 759+ case PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG: 760+ *errcode = G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG; 761 *errmsg = _("subpattern name is too long (maximum 32 characters)"); 762 break; 763- case G_REGEX_ERROR_TOO_MANY_SUBPATTERNS: 764+ case PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS: 765+ *errcode = G_REGEX_ERROR_TOO_MANY_SUBPATTERNS; 766 *errmsg = _("too many named subpatterns (maximum 10,000)"); 767 break; 768- case G_REGEX_ERROR_INVALID_OCTAL_VALUE: 769+ case PCRE2_ERROR_OCTAL_BYTE_TOO_BIG: 770+ *errcode = G_REGEX_ERROR_INVALID_OCTAL_VALUE; 771 *errmsg = _("octal value is greater than \\377"); 772 break; 773- case 152: /* internal error: overran compiling workspace */ 774- *errcode = G_REGEX_ERROR_INTERNAL; 775- *errmsg = _("overran compiling workspace"); 776- break; 777- case 153: /* internal error: previously-checked referenced subpattern not found */ 778- *errcode = G_REGEX_ERROR_INTERNAL; 779- *errmsg = _("previously-checked referenced subpattern not found"); 780- break; 781- case G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: 782+ case PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES: 783+ *errcode = G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE; 784 *errmsg = _("DEFINE group contains more than one branch"); 785 break; 786- case G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: 787+ case PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE: 788+ *errcode = G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS; 789 *errmsg = _("inconsistent NEWLINE options"); 790 break; 791- case G_REGEX_ERROR_MISSING_BACK_REFERENCE: 792+ case PCRE2_ERROR_BACKSLASH_G_SYNTAX: 793+ *errcode = G_REGEX_ERROR_MISSING_BACK_REFERENCE; 794 *errmsg = _("\\g is not followed by a braced, angle-bracketed, or quoted name or " 795 "number, or by a plain number"); 796 break; 797- case G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: 798- *errmsg = _("a numbered reference must not be zero"); 799- break; 800- case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: 801+ case PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED: 802+ *errcode = G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN; 803 *errmsg = _("an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)"); 804 break; 805- case G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: 806+ case PCRE2_ERROR_VERB_UNKNOWN: 807+ *errcode = G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB; 808 *errmsg = _("(*VERB) not recognized"); 809 break; 810- case G_REGEX_ERROR_NUMBER_TOO_BIG: 811+ case PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG: 812+ *errcode = G_REGEX_ERROR_NUMBER_TOO_BIG; 813 *errmsg = _("number is too big"); 814 break; 815- case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: 816+ case PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED: 817+ *errcode = G_REGEX_ERROR_MISSING_SUBPATTERN_NAME; 818 *errmsg = _("missing subpattern name after (?&"); 819 break; 820- case G_REGEX_ERROR_MISSING_DIGIT: 821- *errmsg = _("digit expected after (?+"); 822- break; 823- case G_REGEX_ERROR_INVALID_DATA_CHARACTER: 824- *errmsg = _("] is an invalid data character in JavaScript compatibility mode"); 825- break; 826- case G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: 827+ case PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH: 828+ *errcode = G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME; 829 *errmsg = _("different names for subpatterns of the same number are not allowed"); 830 break; 831- case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: 832+ case PCRE2_ERROR_MARK_MISSING_ARGUMENT: 833+ *errcode = G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED; 834 *errmsg = _("(*MARK) must have an argument"); 835 break; 836- case G_REGEX_ERROR_INVALID_CONTROL_CHAR: 837+ case PCRE2_ERROR_BACKSLASH_C_SYNTAX: 838+ *errcode = G_REGEX_ERROR_INVALID_CONTROL_CHAR; 839 *errmsg = _( "\\c must be followed by an ASCII character"); 840 break; 841- case G_REGEX_ERROR_MISSING_NAME: 842+ case PCRE2_ERROR_BACKSLASH_K_SYNTAX: 843+ *errcode = G_REGEX_ERROR_MISSING_NAME; 844 *errmsg = _("\\k is not followed by a braced, angle-bracketed, or quoted name"); 845 break; 846- case G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: 847+ case PCRE2_ERROR_BACKSLASH_N_IN_CLASS: 848+ *errcode = G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS; 849 *errmsg = _("\\N is not supported in a class"); 850 break; 851- case G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: 852- *errmsg = _("too many forward references"); 853- break; 854- case G_REGEX_ERROR_NAME_TOO_LONG: 855+ case PCRE2_ERROR_VERB_NAME_TOO_LONG: 856+ *errcode = G_REGEX_ERROR_NAME_TOO_LONG; 857 *errmsg = _("name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)"); 858 break; 859- case G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: 860- *errmsg = _("character value in \\u.... sequence is too large"); 861+ case PCRE2_ERROR_INTERNAL_CODE_OVERFLOW: 862+ *errcode = G_REGEX_ERROR_INTERNAL; 863+ *errmsg = _("code overflow"); 864 break; 865- 866- case 116: /* erroffset passed as NULL */ 867- /* This should not happen as we never pass a NULL erroffset */ 868- g_warning ("erroffset passed as NULL"); 869- *errcode = G_REGEX_ERROR_COMPILE; 870+ case PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P: 871+ *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; 872+ *errmsg = _("unrecognized character after (?P"); 873 break; 874- case 117: /* unknown option bit(s) set */ 875- /* This should not happen as we check options before passing them 876- * to pcre_compile2() */ 877- g_warning ("unknown option bit(s) set"); 878- *errcode = G_REGEX_ERROR_COMPILE; 879+ case PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE: 880+ *errcode = G_REGEX_ERROR_INTERNAL; 881+ *errmsg = _("overran compiling workspace"); 882 break; 883- case 132: /* this version of PCRE is compiled without UTF support */ 884- case 144: /* invalid UTF-8 string */ 885- case 145: /* support for \\P, \\p, and \\X has not been compiled */ 886- case 167: /* this version of PCRE is not compiled with Unicode property support */ 887- case 173: /* disallowed Unicode code point (>= 0xd800 && <= 0xdfff) */ 888- case 174: /* invalid UTF-16 string */ 889- /* These errors should not happen as we are using an UTF-8 and UCP-enabled PCRE 890- * and we do not check if strings are valid */ 891- case 170: /* internal error: unknown opcode in find_fixedlength() */ 892+ case PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN: 893 *errcode = G_REGEX_ERROR_INTERNAL; 894+ *errmsg = _("previously-checked referenced subpattern not found"); 895 break; 896- 897+ case PCRE2_ERROR_HEAP_FAILED: 898+ case PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW: 899+ case PCRE2_ERROR_UNICODE_NOT_SUPPORTED: 900+ case PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT: 901+ case PCRE2_ERROR_NO_SURROGATES_IN_UTF16: 902+ case PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS: 903+ case PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE: 904+ case PCRE2_ERROR_INTERNAL_STUDY_ERROR: 905+ case PCRE2_ERROR_UTF_IS_DISABLED: 906+ case PCRE2_ERROR_UCP_IS_DISABLED: 907+ case PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS: 908+ case PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED: 909+ case PCRE2_ERROR_INTERNAL_BAD_CODE: 910+ case PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP: 911+ *errcode = G_REGEX_ERROR_INTERNAL; 912+ *errmsg = _("internal error"); 913+ break; 914+ case PCRE2_ERROR_INVALID_SUBPATTERN_NAME: 915+ case PCRE2_ERROR_CLASS_INVALID_RANGE: 916+ case PCRE2_ERROR_ZERO_RELATIVE_REFERENCE: 917+ case PCRE2_ERROR_PARENTHESES_STACK_CHECK: 918+ case PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED: 919+ case PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG: 920+ case PCRE2_ERROR_MISSING_CALLOUT_CLOSING: 921+ case PCRE2_ERROR_ESCAPE_INVALID_IN_VERB: 922+ case PCRE2_ERROR_NULL_PATTERN: 923+ case PCRE2_ERROR_BAD_OPTIONS: 924+ case PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP: 925+ case PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE: 926+ case PCRE2_ERROR_INVALID_OCTAL: 927+ case PCRE2_ERROR_CALLOUT_STRING_TOO_LONG: 928+ case PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG: 929+ case PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS: 930+ case PCRE2_ERROR_VERSION_CONDITION_SYNTAX: 931+ case PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER: 932+ case PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER: 933+ case PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED: 934+ case PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP: 935+ case PCRE2_ERROR_PATTERN_TOO_COMPLICATED: 936+ case PCRE2_ERROR_LOOKBEHIND_TOO_LONG: 937+ case PCRE2_ERROR_PATTERN_STRING_TOO_LONG: 938+ case PCRE2_ERROR_BAD_LITERAL_OPTIONS: 939 default: 940 *errcode = G_REGEX_ERROR_COMPILE; 941+ *errmsg = _("internal error"); 942+ break; 943 } 944+ 945+ g_assert (*errcode != 0); 946+ g_assert (*errmsg != NULL); 947 } 948 949 /* GMatchInfo */ 950@@ -568,12 +747,16 @@ match_info_new (const GRegex *regex, 951 match_info->regex = g_regex_ref ((GRegex *)regex); 952 match_info->string = string; 953 match_info->string_len = string_len; 954- match_info->matches = PCRE_ERROR_NOMATCH; 955+ match_info->matches = PCRE2_ERROR_NOMATCH; 956 match_info->pos = start_position; 957 match_info->match_opts = match_options; 958 959- pcre_fullinfo (regex->pcre_re, regex->extra, 960- PCRE_INFO_CAPTURECOUNT, &match_info->n_subpatterns); 961+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT, 962+ &match_info->n_subpatterns); 963+ 964+ match_info->match_context = pcre2_match_context_create (NULL); 965+ pcre2_set_match_limit (match_info->match_context, 65536); /* should be plenty */ 966+ pcre2_set_recursion_limit (match_info->match_context, 64); /* should be plenty */ 967 968 if (is_dfa) 969 { 970@@ -593,9 +776,41 @@ match_info_new (const GRegex *regex, 971 match_info->offsets[0] = -1; 972 match_info->offsets[1] = -1; 973 974+ match_info->match_data = pcre2_match_data_create_from_pattern ( 975+ match_info->regex->pcre_re, 976+ NULL); 977+ 978 return match_info; 979 } 980 981+static gboolean 982+recalc_match_offsets (GMatchInfo *match_info, 983+ GError **error) 984+{ 985+ PCRE2_SIZE *ovector; 986+ gint i; 987+ 988+ if (pcre2_get_ovector_count (match_info->match_data) > G_MAXINT / 2) 989+ { 990+ g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH, 991+ _("Error while matching regular expression %s: %s"), 992+ match_info->regex->pattern, _("code overflow")); 993+ return FALSE; 994+ } 995+ 996+ match_info->n_offsets = pcre2_get_ovector_count (match_info->match_data) * 2; 997+ ovector = pcre2_get_ovector_pointer (match_info->match_data); 998+ match_info->offsets = g_realloc_n (match_info->offsets, 999+ match_info->n_offsets, 1000+ sizeof (gint)); 1001+ for (i = 0; i < match_info->n_offsets; i++) 1002+ { 1003+ match_info->offsets[i] = (int) ovector[i]; 1004+ } 1005+ 1006+ return TRUE; 1007+} 1008+ 1009 /** 1010 * g_match_info_get_regex: 1011 * @match_info: a #GMatchInfo 1012@@ -667,6 +882,10 @@ g_match_info_unref (GMatchInfo *match_info) 1013 if (g_atomic_int_dec_and_test (&match_info->ref_count)) 1014 { 1015 g_regex_unref (match_info->regex); 1016+ if (match_info->match_context) 1017+ pcre2_match_context_free (match_info->match_context); 1018+ if (match_info->match_data) 1019+ pcre2_match_data_free (match_info->match_data); 1020 g_free (match_info->offsets); 1021 g_free (match_info->workspace); 1022 g_free (match_info); 1023@@ -713,6 +932,7 @@ g_match_info_next (GMatchInfo *match_info, 1024 { 1025 gint prev_match_start; 1026 gint prev_match_end; 1027+ gint opts; 1028 1029 g_return_val_if_fail (match_info != NULL, FALSE); 1030 g_return_val_if_fail (error == NULL || *error == NULL, FALSE); 1031@@ -725,25 +945,29 @@ g_match_info_next (GMatchInfo *match_info, 1032 { 1033 /* we have reached the end of the string */ 1034 match_info->pos = -1; 1035- match_info->matches = PCRE_ERROR_NOMATCH; 1036+ match_info->matches = PCRE2_ERROR_NOMATCH; 1037 return FALSE; 1038 } 1039 1040- match_info->matches = pcre_exec (match_info->regex->pcre_re, 1041- match_info->regex->extra, 1042- match_info->string, 1043- match_info->string_len, 1044- match_info->pos, 1045- match_info->regex->match_opts | match_info->match_opts, 1046- match_info->offsets, 1047- match_info->n_offsets); 1048- if (IS_PCRE_ERROR (match_info->matches)) 1049+ opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts); 1050+ match_info->matches = pcre2_match (match_info->regex->pcre_re, 1051+ (PCRE2_SPTR8) match_info->string, 1052+ match_info->string_len, 1053+ match_info->pos, 1054+ opts & ~G_REGEX_FLAGS_CONVERTED, 1055+ match_info->match_data, 1056+ match_info->match_context); 1057+ 1058+ if (IS_PCRE2_ERROR (match_info->matches)) 1059 { 1060 g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH, 1061 _("Error while matching regular expression %s: %s"), 1062 match_info->regex->pattern, match_error (match_info->matches)); 1063 return FALSE; 1064 } 1065+ else 1066+ if (!recalc_match_offsets (match_info, error)) 1067+ return FALSE; 1068 1069 /* avoid infinite loops if the pattern is an empty string or something 1070 * equivalent */ 1071@@ -753,7 +977,7 @@ g_match_info_next (GMatchInfo *match_info, 1072 { 1073 /* we have reached the end of the string */ 1074 match_info->pos = -1; 1075- match_info->matches = PCRE_ERROR_NOMATCH; 1076+ match_info->matches = PCRE2_ERROR_NOMATCH; 1077 return FALSE; 1078 } 1079 1080@@ -831,10 +1055,10 @@ g_match_info_get_match_count (const GMatchInfo *match_info) 1081 { 1082 g_return_val_if_fail (match_info, -1); 1083 1084- if (match_info->matches == PCRE_ERROR_NOMATCH) 1085+ if (match_info->matches == PCRE2_ERROR_NOMATCH) 1086 /* no match */ 1087 return 0; 1088- else if (match_info->matches < PCRE_ERROR_NOMATCH) 1089+ else if (match_info->matches < PCRE2_ERROR_NOMATCH) 1090 /* error */ 1091 return -1; 1092 else 1093@@ -889,7 +1113,7 @@ g_match_info_is_partial_match (const GMatchInfo *match_info) 1094 { 1095 g_return_val_if_fail (match_info != NULL, FALSE); 1096 1097- return match_info->matches == PCRE_ERROR_PARTIAL; 1098+ return match_info->matches == PCRE2_ERROR_PARTIAL; 1099 } 1100 1101 /** 1102@@ -986,8 +1210,6 @@ gchar * 1103 g_match_info_fetch (const GMatchInfo *match_info, 1104 gint match_num) 1105 { 1106- /* we cannot use pcre_get_substring() because it allocates the 1107- * string using pcre_malloc(). */ 1108 gchar *match = NULL; 1109 gint start, end; 1110 1111@@ -1067,24 +1289,25 @@ g_match_info_fetch_pos (const GMatchInfo *match_info, 1112 * Returns number of first matched subpattern with name @name. 1113 * There may be more than one in case when DUPNAMES is used, 1114 * and not all subpatterns with that name match; 1115- * pcre_get_stringnumber() does not work in that case. 1116+ * pcre2_substring_number_from_name() does not work in that case. 1117 */ 1118 static gint 1119 get_matched_substring_number (const GMatchInfo *match_info, 1120 const gchar *name) 1121 { 1122 gint entrysize; 1123- gchar *first, *last; 1124+ PCRE2_SPTR first, last; 1125 guchar *entry; 1126 1127- if (!(match_info->regex->compile_opts & G_REGEX_DUPNAMES)) 1128- return pcre_get_stringnumber (match_info->regex->pcre_re, name); 1129+ if (!(match_info->regex->compile_opts & PCRE2_DUPNAMES)) 1130+ return pcre2_substring_number_from_name (match_info->regex->pcre_re, (PCRE2_SPTR8) name); 1131 1132- /* This code is copied from pcre_get.c: get_first_set() */ 1133- entrysize = pcre_get_stringtable_entries (match_info->regex->pcre_re, 1134- name, 1135- &first, 1136- &last); 1137+ /* This code is analogous to code from pcre2_substring.c: 1138+ * pcre2_substring_get_byname() */ 1139+ entrysize = pcre2_substring_nametable_scan (match_info->regex->pcre_re, 1140+ (PCRE2_SPTR8) name, 1141+ &first, 1142+ &last); 1143 1144 if (entrysize <= 0) 1145 return entrysize; 1146@@ -1122,8 +1345,6 @@ gchar * 1147 g_match_info_fetch_named (const GMatchInfo *match_info, 1148 const gchar *name) 1149 { 1150- /* we cannot use pcre_get_named_substring() because it allocates the 1151- * string using pcre_malloc(). */ 1152 gint num; 1153 1154 g_return_val_if_fail (match_info != NULL, NULL); 1155@@ -1205,8 +1426,6 @@ g_match_info_fetch_named_pos (const GMatchInfo *match_info, 1156 gchar ** 1157 g_match_info_fetch_all (const GMatchInfo *match_info) 1158 { 1159- /* we cannot use pcre_get_substring_list() because the returned value 1160- * isn't suitable for g_strfreev(). */ 1161 gchar **result; 1162 gint i; 1163 1164@@ -1264,9 +1483,7 @@ g_regex_unref (GRegex *regex) 1165 { 1166 g_free (regex->pattern); 1167 if (regex->pcre_re != NULL) 1168- pcre_free (regex->pcre_re); 1169- if (regex->extra != NULL) 1170- pcre_free (regex->extra); 1171+ pcre2_code_free (regex->pcre_re); 1172 g_free (regex); 1173 } 1174 } 1175@@ -1274,11 +1491,11 @@ g_regex_unref (GRegex *regex) 1176 /* 1177 * @match_options: (inout) (optional): 1178 */ 1179-static pcre *regex_compile (const gchar *pattern, 1180- GRegexCompileFlags compile_options, 1181- GRegexCompileFlags *compile_options_out, 1182- GRegexMatchFlags *match_options, 1183- GError **error); 1184+static pcre2_code *regex_compile (const gchar *pattern, 1185+ GRegexCompileFlags compile_options, 1186+ GRegexCompileFlags *compile_options_out, 1187+ GRegexMatchFlags *match_options, 1188+ GError **error); 1189 1190 /** 1191 * g_regex_new: 1192@@ -1302,10 +1519,13 @@ g_regex_new (const gchar *pattern, 1193 GError **error) 1194 { 1195 GRegex *regex; 1196- pcre *re; 1197- const gchar *errmsg; 1198- gboolean optimize = FALSE; 1199+ pcre2_code *re; 1200 static gsize initialised = 0; 1201+ GRegexCompileFlags orig_compile_opts; 1202+ 1203+ orig_compile_opts = compile_options; 1204+ compile_options = map_to_pcre2_compile_flags (compile_options); 1205+ match_options = map_to_pcre2_match_flags (match_options); 1206 1207 g_return_val_if_fail (pattern != NULL, NULL); 1208 g_return_val_if_fail (error == NULL || *error == NULL, NULL); 1209@@ -1314,17 +1534,13 @@ g_regex_new (const gchar *pattern, 1210 1211 if (g_once_init_enter (&initialised)) 1212 { 1213- int supports_utf8, supports_ucp; 1214+ int supports_utf8; 1215 1216- pcre_config (PCRE_CONFIG_UTF8, &supports_utf8); 1217+ pcre2_config (PCRE2_CONFIG_UNICODE, &supports_utf8); 1218 if (!supports_utf8) 1219 g_critical (_("PCRE library is compiled without UTF8 support")); 1220 1221- pcre_config (PCRE_CONFIG_UNICODE_PROPERTIES, &supports_ucp); 1222- if (!supports_ucp) 1223- g_critical (_("PCRE library is compiled without UTF8 properties support")); 1224- 1225- g_once_init_leave (&initialised, supports_utf8 && supports_ucp ? 1 : 2); 1226+ g_once_init_leave (&initialised, supports_utf8 ? 1 : 2); 1227 } 1228 1229 if (G_UNLIKELY (initialised != 1)) 1230@@ -1334,14 +1550,22 @@ g_regex_new (const gchar *pattern, 1231 return NULL; 1232 } 1233 1234- /* G_REGEX_OPTIMIZE has the same numeric value of PCRE_NO_UTF8_CHECK, 1235- * as we do not need to wrap PCRE_NO_UTF8_CHECK. */ 1236- if (compile_options & G_REGEX_OPTIMIZE) 1237- optimize = TRUE; 1238+ switch (compile_options & G_REGEX_NEWLINE_MASK) 1239+ { 1240+ case 0: /* PCRE2_NEWLINE_ANY */ 1241+ case PCRE2_NEWLINE_CR: 1242+ case PCRE2_NEWLINE_LF: 1243+ case PCRE2_NEWLINE_CRLF: 1244+ case PCRE2_NEWLINE_ANYCRLF: 1245+ break; 1246+ default: 1247+ g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS, 1248+ "Invalid newline flags"); 1249+ return NULL; 1250+ } 1251 1252 re = regex_compile (pattern, compile_options, &compile_options, 1253 &match_options, error); 1254- 1255 if (re == NULL) 1256 return NULL; 1257 1258@@ -1350,80 +1574,85 @@ g_regex_new (const gchar *pattern, 1259 regex->pattern = g_strdup (pattern); 1260 regex->pcre_re = re; 1261 regex->compile_opts = compile_options; 1262+ regex->orig_compile_opts = orig_compile_opts; 1263 regex->match_opts = match_options; 1264 1265- if (optimize) 1266- { 1267- regex->extra = pcre_study (regex->pcre_re, 0, &errmsg); 1268- if (errmsg != NULL) 1269- { 1270- GError *tmp_error = g_error_new (G_REGEX_ERROR, 1271- G_REGEX_ERROR_OPTIMIZE, 1272- _("Error while optimizing " 1273- "regular expression %s: %s"), 1274- regex->pattern, 1275- errmsg); 1276- g_propagate_error (error, tmp_error); 1277- 1278- g_regex_unref (regex); 1279- return NULL; 1280- } 1281- } 1282- 1283 return regex; 1284 } 1285 1286-static pcre * 1287-regex_compile (const gchar *pattern, 1288- GRegexCompileFlags compile_options, 1289- GRegexCompileFlags *compile_options_out, 1290- GRegexMatchFlags *match_options, 1291- GError **error) 1292+static gint 1293+extract_newline_options (const GRegexCompileFlags compile_options, 1294+ const GRegexMatchFlags *match_options) 1295+{ 1296+ gint newline_options = PCRE2_NEWLINE_ANY; 1297+ 1298+ if (compile_options & G_REGEX_NEWLINE_MASK) 1299+ newline_options = compile_options & G_REGEX_NEWLINE_MASK; 1300+ if (match_options && *match_options & G_REGEX_MATCH_NEWLINE_MASK) 1301+ newline_options = *match_options & G_REGEX_MATCH_NEWLINE_MASK; 1302+ 1303+ return newline_options; 1304+} 1305+ 1306+static gint 1307+extract_bsr_options (const GRegexCompileFlags compile_options, 1308+ const GRegexMatchFlags *match_options) 1309+{ 1310+ gint bsr_options = PCRE2_BSR_UNICODE; 1311+ 1312+ if (compile_options & PCRE2_BSR_ANYCRLF) 1313+ bsr_options = PCRE2_BSR_ANYCRLF; 1314+ if (match_options && *match_options & PCRE2_BSR_ANYCRLF) 1315+ bsr_options = PCRE2_BSR_ANYCRLF; 1316+ if (match_options && *match_options & PCRE2_BSR_UNICODE) 1317+ bsr_options = PCRE2_BSR_UNICODE; 1318+ 1319+ return bsr_options; 1320+} 1321+ 1322+static pcre2_code * 1323+regex_compile (const gchar *pattern, 1324+ GRegexCompileFlags compile_options, 1325+ GRegexCompileFlags *compile_options_out, 1326+ GRegexMatchFlags *match_options, 1327+ GError **error) 1328 { 1329- pcre *re; 1330+ pcre2_code *re; 1331+ pcre2_compile_context *context; 1332 const gchar *errmsg; 1333- gint erroffset; 1334+ PCRE2_SIZE erroffset; 1335 gint errcode; 1336 GRegexCompileFlags nonpcre_compile_options; 1337 unsigned long int pcre_compile_options; 1338 1339 nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK; 1340 1341- /* In GRegex the string are, by default, UTF-8 encoded. PCRE 1342- * instead uses UTF-8 only if required with PCRE_UTF8. */ 1343- if (compile_options & G_REGEX_RAW) 1344- { 1345- /* disable utf-8 */ 1346- compile_options &= ~G_REGEX_RAW; 1347- } 1348- else 1349- { 1350- /* enable utf-8 */ 1351- compile_options |= PCRE_UTF8 | PCRE_NO_UTF8_CHECK; 1352+ context = pcre2_compile_context_create (NULL); 1353 1354- if (match_options != NULL) 1355- *match_options |= PCRE_NO_UTF8_CHECK; 1356- } 1357+ /* set newline options */ 1358+ pcre2_set_newline (context, extract_newline_options (compile_options, match_options)); 1359+ 1360+ /* set bsr options */ 1361+ pcre2_set_bsr (context, extract_bsr_options (compile_options, match_options)); 1362 1363- /* PCRE_NEWLINE_ANY is the default for the internal PCRE but 1364- * not for the system one. */ 1365- if (!(compile_options & G_REGEX_NEWLINE_CR) && 1366- !(compile_options & G_REGEX_NEWLINE_LF)) 1367+ /* In case UTF-8 mode is used, also set PCRE2_NO_UTF_CHECK */ 1368+ if (compile_options & PCRE2_UTF) 1369 { 1370- compile_options |= PCRE_NEWLINE_ANY; 1371+ compile_options |= PCRE2_NO_UTF_CHECK; 1372+ if (match_options != NULL) 1373+ *match_options |= PCRE2_NO_UTF_CHECK; 1374 } 1375 1376- compile_options |= PCRE_UCP; 1377- 1378- /* PCRE_BSR_UNICODE is the default for the internal PCRE but 1379- * possibly not for the system one. 1380- */ 1381- if (~compile_options & G_REGEX_BSR_ANYCRLF) 1382- compile_options |= PCRE_BSR_UNICODE; 1383+ compile_options |= PCRE2_UCP; 1384 1385 /* compile the pattern */ 1386- re = pcre_compile2 (pattern, compile_options, &errcode, 1387- &errmsg, &erroffset, NULL); 1388+ re = pcre2_compile ((PCRE2_SPTR8) pattern, 1389+ PCRE2_ZERO_TERMINATED, 1390+ compile_options & ~G_REGEX_FLAGS_CONVERTED, 1391+ &errcode, 1392+ &erroffset, 1393+ context); 1394+ pcre2_compile_context_free (context); 1395 1396 /* if the compilation failed, set the error member and return 1397 * immediately */ 1398@@ -1440,7 +1669,7 @@ regex_compile (const gchar *pattern, 1399 1400 tmp_error = g_error_new (G_REGEX_ERROR, errcode, 1401 _("Error while compiling regular " 1402- "expression %s at char %d: %s"), 1403+ "expression %s at char %" G_GSIZE_FORMAT ": %s"), 1404 pattern, erroffset, errmsg); 1405 g_propagate_error (error, tmp_error); 1406 1407@@ -1449,22 +1678,22 @@ regex_compile (const gchar *pattern, 1408 1409 /* For options set at the beginning of the pattern, pcre puts them into 1410 * compile options, e.g. "(?i)foo" will make the pcre structure store 1411- * PCRE_CASELESS even though it wasn't explicitly given for compilation. */ 1412- pcre_fullinfo (re, NULL, PCRE_INFO_OPTIONS, &pcre_compile_options); 1413+ * PCRE2_CASELESS even though it wasn't explicitly given for compilation. */ 1414+ pcre2_pattern_info (re, PCRE2_INFO_ALLOPTIONS, &pcre_compile_options); 1415 compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK; 1416 1417- /* Don't leak PCRE_NEWLINE_ANY, which is part of PCRE_NEWLINE_ANYCRLF */ 1418- if ((pcre_compile_options & PCRE_NEWLINE_ANYCRLF) != PCRE_NEWLINE_ANYCRLF) 1419- compile_options &= ~PCRE_NEWLINE_ANY; 1420+ /* Don't leak PCRE2_NEWLINE_ANY, which is part of PCRE2_NEWLINE_ANYCRLF */ 1421+ if ((pcre_compile_options & PCRE2_NEWLINE_ANYCRLF) != PCRE2_NEWLINE_ANYCRLF) 1422+ compile_options &= ~PCRE2_NEWLINE_ANY; 1423 1424 compile_options |= nonpcre_compile_options; 1425 1426- if (!(compile_options & G_REGEX_DUPNAMES)) 1427+ if (!(compile_options & PCRE2_DUPNAMES)) 1428 { 1429 gboolean jchanged = FALSE; 1430- pcre_fullinfo (re, NULL, PCRE_INFO_JCHANGED, &jchanged); 1431+ pcre2_pattern_info (re, PCRE2_INFO_JCHANGED, &jchanged); 1432 if (jchanged) 1433- compile_options |= G_REGEX_DUPNAMES; 1434+ compile_options |= PCRE2_DUPNAMES; 1435 } 1436 1437 if (compile_options_out != 0) 1438@@ -1509,8 +1738,7 @@ g_regex_get_max_backref (const GRegex *regex) 1439 { 1440 gint value; 1441 1442- pcre_fullinfo (regex->pcre_re, regex->extra, 1443- PCRE_INFO_BACKREFMAX, &value); 1444+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_BACKREFMAX, &value); 1445 1446 return value; 1447 } 1448@@ -1530,8 +1758,7 @@ g_regex_get_capture_count (const GRegex *regex) 1449 { 1450 gint value; 1451 1452- pcre_fullinfo (regex->pcre_re, regex->extra, 1453- PCRE_INFO_CAPTURECOUNT, &value); 1454+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT, &value); 1455 1456 return value; 1457 } 1458@@ -1551,8 +1778,7 @@ g_regex_get_has_cr_or_lf (const GRegex *regex) 1459 { 1460 gint value; 1461 1462- pcre_fullinfo (regex->pcre_re, regex->extra, 1463- PCRE_INFO_HASCRORLF, &value); 1464+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_HASCRORLF, &value); 1465 1466 return !!value; 1467 } 1468@@ -1574,8 +1800,8 @@ g_regex_get_max_lookbehind (const GRegex *regex) 1469 { 1470 gint max_lookbehind; 1471 1472- pcre_fullinfo (regex->pcre_re, regex->extra, 1473- PCRE_INFO_MAXLOOKBEHIND, &max_lookbehind); 1474+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_MAXLOOKBEHIND, 1475+ &max_lookbehind); 1476 1477 return max_lookbehind; 1478 } 1479@@ -1597,9 +1823,47 @@ g_regex_get_max_lookbehind (const GRegex *regex) 1480 GRegexCompileFlags 1481 g_regex_get_compile_flags (const GRegex *regex) 1482 { 1483+ gint extra_flags, info_value; 1484+ 1485 g_return_val_if_fail (regex != NULL, 0); 1486 1487- return regex->compile_opts; 1488+G_GNUC_BEGIN_IGNORE_DEPRECATIONS 1489+ /* Preserve original G_REGEX_OPTIMIZE */ 1490+ extra_flags = (regex->orig_compile_opts & G_REGEX_OPTIMIZE); 1491+G_GNUC_END_IGNORE_DEPRECATIONS 1492+ 1493+ /* Also include the newline options */ 1494+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_NEWLINE, &info_value); 1495+ switch (info_value) 1496+ { 1497+ case PCRE2_NEWLINE_ANYCRLF: 1498+ extra_flags |= G_REGEX_NEWLINE_ANYCRLF; 1499+ break; 1500+ case PCRE2_NEWLINE_CRLF: 1501+ extra_flags |= G_REGEX_NEWLINE_CRLF; 1502+ break; 1503+ case PCRE2_NEWLINE_LF: 1504+ extra_flags |= G_REGEX_NEWLINE_LF; 1505+ break; 1506+ case PCRE2_NEWLINE_CR: 1507+ extra_flags |= G_REGEX_NEWLINE_CR; 1508+ break; 1509+ default: 1510+ break; 1511+ } 1512+ 1513+ /* Also include the bsr options */ 1514+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_BSR, &info_value); 1515+ switch (info_value) 1516+ { 1517+ case PCRE2_BSR_ANYCRLF: 1518+ extra_flags |= G_REGEX_BSR_ANYCRLF; 1519+ break; 1520+ default: 1521+ break; 1522+ } 1523+ 1524+ return map_to_pcre1_compile_flags (regex->compile_opts) | extra_flags; 1525 } 1526 1527 /** 1528@@ -1617,7 +1881,7 @@ g_regex_get_match_flags (const GRegex *regex) 1529 { 1530 g_return_val_if_fail (regex != NULL, 0); 1531 1532- return regex->match_opts & G_REGEX_MATCH_MASK; 1533+ return map_to_pcre1_match_flags (regex->match_opts & G_REGEX_MATCH_MASK); 1534 } 1535 1536 /** 1537@@ -1651,6 +1915,9 @@ g_regex_match_simple (const gchar *pattern, 1538 GRegex *regex; 1539 gboolean result; 1540 1541+ compile_options = map_to_pcre2_compile_flags (compile_options); 1542+ match_options = map_to_pcre2_match_flags (match_options); 1543+ 1544 regex = g_regex_new (pattern, compile_options, G_REGEX_MATCH_DEFAULT, NULL); 1545 if (!regex) 1546 return FALSE; 1547@@ -1718,6 +1985,8 @@ g_regex_match (const GRegex *regex, 1548 GRegexMatchFlags match_options, 1549 GMatchInfo **match_info) 1550 { 1551+ match_options = map_to_pcre2_match_flags (match_options); 1552+ 1553 return g_regex_match_full (regex, string, -1, 0, match_options, 1554 match_info, NULL); 1555 } 1556@@ -1801,6 +2070,8 @@ g_regex_match_full (const GRegex *regex, 1557 GMatchInfo *info; 1558 gboolean match_ok; 1559 1560+ match_options = map_to_pcre2_match_flags (match_options); 1561+ 1562 g_return_val_if_fail (regex != NULL, FALSE); 1563 g_return_val_if_fail (string != NULL, FALSE); 1564 g_return_val_if_fail (start_position >= 0, FALSE); 1565@@ -1851,6 +2122,8 @@ g_regex_match_all (const GRegex *regex, 1566 GRegexMatchFlags match_options, 1567 GMatchInfo **match_info) 1568 { 1569+ match_options = map_to_pcre2_match_flags (match_options); 1570+ 1571 return g_regex_match_all_full (regex, string, -1, 0, match_options, 1572 match_info, NULL); 1573 } 1574@@ -1920,39 +2193,29 @@ g_regex_match_all_full (const GRegex *regex, 1575 { 1576 GMatchInfo *info; 1577 gboolean done; 1578- pcre *pcre_re; 1579- pcre_extra *extra; 1580+ pcre2_code *pcre_re; 1581 gboolean retval; 1582 1583+ match_options = map_to_pcre2_match_flags (match_options); 1584+ 1585 g_return_val_if_fail (regex != NULL, FALSE); 1586 g_return_val_if_fail (string != NULL, FALSE); 1587 g_return_val_if_fail (start_position >= 0, FALSE); 1588 g_return_val_if_fail (error == NULL || *error == NULL, FALSE); 1589 g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE); 1590 1591-#ifdef PCRE_NO_AUTO_POSSESS 1592- /* For PCRE >= 8.34 we need to turn off PCRE_NO_AUTO_POSSESS, which 1593- * is an optimization for normal regex matching, but results in omitting 1594- * some shorter matches here, and an observable behaviour change. 1595+ /* For PCRE2 we need to turn off PCRE2_NO_AUTO_POSSESS, which is an 1596+ * optimization for normal regex matching, but results in omitting some 1597+ * shorter matches here, and an observable behaviour change. 1598 * 1599 * DFA matching is rather niche, and very rarely used according to 1600 * codesearch.debian.net, so don't bother caching the recompiled RE. */ 1601 pcre_re = regex_compile (regex->pattern, 1602- regex->compile_opts | PCRE_NO_AUTO_POSSESS, 1603+ regex->compile_opts | PCRE2_NO_AUTO_POSSESS, 1604 NULL, NULL, error); 1605- 1606 if (pcre_re == NULL) 1607 return FALSE; 1608 1609- /* Not bothering to cache the optimization data either, with similar 1610- * reasoning */ 1611- extra = NULL; 1612-#else 1613- /* For PCRE < 8.33 the precompiled regex is fine. */ 1614- pcre_re = regex->pcre_re; 1615- extra = regex->extra; 1616-#endif 1617- 1618 info = match_info_new (regex, string, string_len, start_position, 1619 match_options, TRUE); 1620 1621@@ -1960,29 +2223,38 @@ g_regex_match_all_full (const GRegex *regex, 1622 while (!done) 1623 { 1624 done = TRUE; 1625- info->matches = pcre_dfa_exec (pcre_re, extra, 1626- info->string, info->string_len, 1627- info->pos, 1628- regex->match_opts | match_options, 1629- info->offsets, info->n_offsets, 1630- info->workspace, info->n_workspace); 1631- if (info->matches == PCRE_ERROR_DFA_WSSIZE) 1632+ info->matches = pcre2_dfa_match (pcre_re, 1633+ (PCRE2_SPTR8) info->string, info->string_len, 1634+ info->pos, 1635+ (regex->match_opts | match_options | PCRE2_NO_UTF_CHECK) & ~G_REGEX_FLAGS_CONVERTED, 1636+ info->match_data, 1637+ info->match_context, 1638+ info->workspace, info->n_workspace); 1639+ 1640+ if (!recalc_match_offsets (info, error)) 1641+ return FALSE; 1642+ 1643+ if (info->matches == PCRE2_ERROR_DFA_WSSIZE) 1644 { 1645 /* info->workspace is too small. */ 1646 info->n_workspace *= 2; 1647- info->workspace = g_realloc (info->workspace, 1648- info->n_workspace * sizeof (gint)); 1649+ info->workspace = g_realloc_n (info->workspace, 1650+ info->n_workspace, 1651+ sizeof (gint)); 1652 done = FALSE; 1653 } 1654 else if (info->matches == 0) 1655 { 1656 /* info->offsets is too small. */ 1657 info->n_offsets *= 2; 1658- info->offsets = g_realloc (info->offsets, 1659- info->n_offsets * sizeof (gint)); 1660+ info->offsets = g_realloc_n (info->offsets, 1661+ info->n_offsets, 1662+ sizeof (gint)); 1663+ pcre2_match_data_free (info->match_data); 1664+ info->match_data = pcre2_match_data_create (info->n_offsets, NULL); 1665 done = FALSE; 1666 } 1667- else if (IS_PCRE_ERROR (info->matches)) 1668+ else if (IS_PCRE2_ERROR (info->matches)) 1669 { 1670 g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH, 1671 _("Error while matching regular expression %s: %s"), 1672@@ -1990,9 +2262,7 @@ g_regex_match_all_full (const GRegex *regex, 1673 } 1674 } 1675 1676-#ifdef PCRE_NO_AUTO_POSSESS 1677- pcre_free (pcre_re); 1678-#endif 1679+ pcre2_code_free (pcre_re); 1680 1681 /* don’t assert that (info->matches <= info->n_subpatterns + 1) as that only 1682 * holds true for a single match, rather than matching all */ 1683@@ -2030,8 +2300,8 @@ g_regex_get_string_number (const GRegex *regex, 1684 g_return_val_if_fail (regex != NULL, -1); 1685 g_return_val_if_fail (name != NULL, -1); 1686 1687- num = pcre_get_stringnumber (regex->pcre_re, name); 1688- if (num == PCRE_ERROR_NOSUBSTRING) 1689+ num = pcre2_substring_number_from_name (regex->pcre_re, (PCRE2_SPTR8) name); 1690+ if (num == PCRE2_ERROR_NOSUBSTRING) 1691 num = -1; 1692 1693 return num; 1694@@ -2086,6 +2356,9 @@ g_regex_split_simple (const gchar *pattern, 1695 GRegex *regex; 1696 gchar **result; 1697 1698+ compile_options = map_to_pcre2_compile_flags (compile_options); 1699+ match_options = map_to_pcre2_match_flags (match_options); 1700+ 1701 regex = g_regex_new (pattern, compile_options, 0, NULL); 1702 if (!regex) 1703 return NULL; 1704@@ -2129,6 +2402,8 @@ g_regex_split (const GRegex *regex, 1705 const gchar *string, 1706 GRegexMatchFlags match_options) 1707 { 1708+ match_options = map_to_pcre2_match_flags (match_options); 1709+ 1710 return g_regex_split_full (regex, string, -1, 0, 1711 match_options, 0, NULL); 1712 } 1713@@ -2193,6 +2468,8 @@ g_regex_split_full (const GRegex *regex, 1714 /* the returned array of char **s */ 1715 gchar **string_list; 1716 1717+ match_options = map_to_pcre2_match_flags (match_options); 1718+ 1719 g_return_val_if_fail (regex != NULL, NULL); 1720 g_return_val_if_fail (string != NULL, NULL); 1721 g_return_val_if_fail (start_position >= 0, NULL); 1722@@ -2817,6 +3094,8 @@ g_regex_replace (const GRegex *regex, 1723 GList *list; 1724 GError *tmp_error = NULL; 1725 1726+ match_options = map_to_pcre2_match_flags (match_options); 1727+ 1728 g_return_val_if_fail (regex != NULL, NULL); 1729 g_return_val_if_fail (string != NULL, NULL); 1730 g_return_val_if_fail (start_position >= 0, NULL); 1731@@ -2886,6 +3165,8 @@ g_regex_replace_literal (const GRegex *regex, 1732 GRegexMatchFlags match_options, 1733 GError **error) 1734 { 1735+ match_options = map_to_pcre2_match_flags (match_options); 1736+ 1737 g_return_val_if_fail (replacement != NULL, NULL); 1738 g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); 1739 1740@@ -2974,6 +3255,8 @@ g_regex_replace_eval (const GRegex *regex, 1741 gboolean done = FALSE; 1742 GError *tmp_error = NULL; 1743 1744+ match_options = map_to_pcre2_match_flags (match_options); 1745+ 1746 g_return_val_if_fail (regex != NULL, NULL); 1747 g_return_val_if_fail (string != NULL, NULL); 1748 g_return_val_if_fail (start_position >= 0, NULL); 1749diff --git a/glib/gregex.h b/glib/gregex.h 1750index 817f667..11b419d 100644 1751--- a/glib/gregex.h 1752+++ b/glib/gregex.h 1753@@ -262,7 +262,9 @@ GQuark g_regex_error_quark (void); 1754 * in the usual way). 1755 * @G_REGEX_OPTIMIZE: Optimize the regular expression. If the pattern will 1756 * be used many times, then it may be worth the effort to optimize it 1757- * to improve the speed of matches. 1758+ * to improve the speed of matches. Deprecated in GLib 2.74 which now uses 1759+ * libpcre2, which doesn’t require separate optimization of queries. This 1760+ * option is now a no-op. Deprecated: 2.74 1761 * @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the 1762 * first newline. Since: 2.34 1763 * @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not 1764@@ -285,7 +287,8 @@ GQuark g_regex_error_quark (void); 1765 * is recognised. If this option is set, then "\R" only recognizes the newline 1766 * characters '\r', '\n' and '\r\n'. Since: 2.34 1767 * @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with 1768- * JavaScript rather than PCRE. Since: 2.34 1769+ * JavaScript rather than PCRE. Since GLib 2.74 this is no longer supported, 1770+ * as libpcre2 does not support it. Since: 2.34 Deprecated: 2.74 1771 * 1772 * Flags specifying compile-time options. 1773 * 1774@@ -306,7 +309,7 @@ typedef enum 1775 G_REGEX_UNGREEDY = 1 << 9, 1776 G_REGEX_RAW = 1 << 11, 1777 G_REGEX_NO_AUTO_CAPTURE = 1 << 12, 1778- G_REGEX_OPTIMIZE = 1 << 13, 1779+ G_REGEX_OPTIMIZE GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 13, 1780 G_REGEX_FIRSTLINE = 1 << 18, 1781 G_REGEX_DUPNAMES = 1 << 19, 1782 G_REGEX_NEWLINE_CR = 1 << 20, 1783@@ -314,7 +317,7 @@ typedef enum 1784 G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF, 1785 G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22, 1786 G_REGEX_BSR_ANYCRLF = 1 << 23, 1787- G_REGEX_JAVASCRIPT_COMPAT = 1 << 25 1788+ G_REGEX_JAVASCRIPT_COMPAT GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 25 1789 } GRegexCompileFlags; 1790 1791 /** 1792diff --git a/glib/meson.build b/glib/meson.build 1793index 93fa504..5bf82da 100644 1794--- a/glib/meson.build 1795+++ b/glib/meson.build 1796@@ -357,13 +357,13 @@ else 1797 glib_dtrace_hdr = [] 1798 endif 1799 1800-pcre_static_args = [] 1801+pcre2_static_args = [] 1802 1803-if use_pcre_static_flag 1804- pcre_static_args = ['-DPCRE_STATIC'] 1805+if use_pcre2_static_flag 1806+ pcre2_static_args = ['-DPCRE2_STATIC'] 1807 endif 1808 1809-glib_c_args = ['-DG_LOG_DOMAIN="GLib"', '-DGLIB_COMPILATION'] + pcre_static_args + glib_hidden_visibility_args 1810+glib_c_args = ['-DG_LOG_DOMAIN="GLib"', '-DGLIB_COMPILATION'] + pcre2_static_args + glib_hidden_visibility_args 1811 libglib = library('glib-2.0', 1812 glib_dtrace_obj, glib_dtrace_hdr, 1813 sources : [deprecated_sources, glib_sources], 1814@@ -375,7 +375,7 @@ libglib = library('glib-2.0', 1815 link_args : [noseh_link_args, glib_link_flags, win32_ldflags], 1816 include_directories : configinc, 1817 link_with: [charset_lib, gnulib_lib], 1818- dependencies : [pcre, thread_dep, librt] + libintl_deps + libiconv + platform_deps + [gnulib_libm_dependency, libm] + [libsysprof_capture_dep], 1819+ dependencies : [pcre2, thread_dep, librt] + libintl_deps + libiconv + platform_deps + [gnulib_libm_dependency, libm] + [libsysprof_capture_dep], 1820 c_args : glib_c_args, 1821 objc_args : glib_c_args, 1822 ) 1823diff --git a/glib/tests/meson.build b/glib/tests/meson.build 1824index 301158e..c1a9ceb 100644 1825--- a/glib/tests/meson.build 1826+++ b/glib/tests/meson.build 1827@@ -86,8 +86,8 @@ glib_tests = { 1828 }, 1829 'refstring' : {}, 1830 'regex' : { 1831- 'dependencies' : [pcre], 1832- 'c_args' : use_pcre_static_flag ? ['-DPCRE_STATIC'] : [], 1833+ 'dependencies' : [pcre2], 1834+ 'c_args' : use_pcre2_static_flag ? ['-DPCRE2_STATIC'] : [], 1835 }, 1836 'relation' : {}, 1837 'rwlock' : {}, 1838diff --git a/glib/tests/regex.c b/glib/tests/regex.c 1839index 50fd9c6..36982fb 100644 1840--- a/glib/tests/regex.c 1841+++ b/glib/tests/regex.c 1842@@ -25,7 +25,8 @@ 1843 #include <locale.h> 1844 #include "glib.h" 1845 1846-#include <pcre.h> 1847+#define PCRE2_CODE_UNIT_WIDTH 8 1848+#include <pcre2.h> 1849 1850 /* U+20AC EURO SIGN (symbol, currency) */ 1851 #define EURO "\xe2\x82\xac" 1852@@ -1501,7 +1502,7 @@ test_properties (void) 1853 gchar *str; 1854 1855 error = NULL; 1856- regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1857+ regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1858 res = g_regex_match (regex, "ppPP01", 0, &match); 1859 g_assert (res); 1860 str = g_match_info_fetch (match, 0); 1861@@ -1522,7 +1523,7 @@ test_class (void) 1862 gchar *str; 1863 1864 error = NULL; 1865- regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1866+ regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1867 res = g_regex_match (regex, "a:b:\340\254\236:\333\253:\316\240", 0, &match); 1868 g_assert (res); 1869 str = g_match_info_fetch (match, 0); 1870@@ -1568,7 +1569,7 @@ test_lookahead (void) 1871 gint start, end; 1872 1873 error = NULL; 1874- regex = g_regex_new ("\\w+(?=;)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1875+ regex = g_regex_new ("\\w+(?=;)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1876 g_assert (regex); 1877 g_assert_no_error (error); 1878 res = g_regex_match (regex, "word1 word2: word3;", 0, &match); 1879@@ -1582,7 +1583,7 @@ test_lookahead (void) 1880 g_regex_unref (regex); 1881 1882 error = NULL; 1883- regex = g_regex_new ("foo(?!bar)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1884+ regex = g_regex_new ("foo(?!bar)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1885 g_assert (regex); 1886 g_assert_no_error (error); 1887 res = g_regex_match (regex, "foobar foobaz", 0, &match); 1888@@ -1597,7 +1598,7 @@ test_lookahead (void) 1889 g_regex_unref (regex); 1890 1891 error = NULL; 1892- regex = g_regex_new ("(?!bar)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1893+ regex = g_regex_new ("(?!bar)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1894 g_assert (regex); 1895 g_assert_no_error (error); 1896 res = g_regex_match (regex, "foobar foobaz", 0, &match); 1897@@ -1630,7 +1631,7 @@ test_lookbehind (void) 1898 gint start, end; 1899 1900 error = NULL; 1901- regex = g_regex_new ("(?<!foo)bar", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1902+ regex = g_regex_new ("(?<!foo)bar", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1903 g_assert (regex); 1904 g_assert_no_error (error); 1905 res = g_regex_match (regex, "foobar boobar", 0, &match); 1906@@ -1645,7 +1646,7 @@ test_lookbehind (void) 1907 g_regex_unref (regex); 1908 1909 error = NULL; 1910- regex = g_regex_new ("(?<=bullock|donkey) poo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1911+ regex = g_regex_new ("(?<=bullock|donkey) poo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1912 g_assert (regex); 1913 g_assert_no_error (error); 1914 res = g_regex_match (regex, "don poo, and bullock poo", 0, &match); 1915@@ -1658,17 +1659,17 @@ test_lookbehind (void) 1916 g_match_info_free (match); 1917 g_regex_unref (regex); 1918 1919- regex = g_regex_new ("(?<!dogs?|cats?) x", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1920+ regex = g_regex_new ("(?<!dogs?|cats?) x", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1921 g_assert (regex == NULL); 1922 g_assert_error (error, G_REGEX_ERROR, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND); 1923 g_clear_error (&error); 1924 1925- regex = g_regex_new ("(?<=ab(c|de)) foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1926+ regex = g_regex_new ("(?<=ab(c|de)) foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1927 g_assert (regex == NULL); 1928 g_assert_error (error, G_REGEX_ERROR, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND); 1929 g_clear_error (&error); 1930 1931- regex = g_regex_new ("(?<=abc|abde)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1932+ regex = g_regex_new ("(?<=abc|abde)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1933 g_assert (regex); 1934 g_assert_no_error (error); 1935 res = g_regex_match (regex, "abfoo, abdfoo, abcfoo", 0, &match); 1936@@ -1680,7 +1681,7 @@ test_lookbehind (void) 1937 g_match_info_free (match); 1938 g_regex_unref (regex); 1939 1940- regex = g_regex_new ("^.*+(?<=abcd)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1941+ regex = g_regex_new ("^.*+(?<=abcd)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1942 g_assert (regex); 1943 g_assert_no_error (error); 1944 res = g_regex_match (regex, "abcabcabcabcabcabcabcabcabcd", 0, &match); 1945@@ -1689,7 +1690,7 @@ test_lookbehind (void) 1946 g_match_info_free (match); 1947 g_regex_unref (regex); 1948 1949- regex = g_regex_new ("(?<=\\d{3})(?<!999)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1950+ regex = g_regex_new ("(?<=\\d{3})(?<!999)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1951 g_assert (regex); 1952 g_assert_no_error (error); 1953 res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match); 1954@@ -1701,7 +1702,7 @@ test_lookbehind (void) 1955 g_match_info_free (match); 1956 g_regex_unref (regex); 1957 1958- regex = g_regex_new ("(?<=\\d{3}...)(?<!999)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1959+ regex = g_regex_new ("(?<=\\d{3}...)(?<!999)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1960 g_assert (regex); 1961 g_assert_no_error (error); 1962 res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match); 1963@@ -1713,7 +1714,7 @@ test_lookbehind (void) 1964 g_match_info_free (match); 1965 g_regex_unref (regex); 1966 1967- regex = g_regex_new ("(?<=\\d{3}(?!999)...)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1968+ regex = g_regex_new ("(?<=\\d{3}(?!999)...)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1969 g_assert (regex); 1970 g_assert_no_error (error); 1971 res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match); 1972@@ -1725,7 +1726,7 @@ test_lookbehind (void) 1973 g_match_info_free (match); 1974 g_regex_unref (regex); 1975 1976- regex = g_regex_new ("(?<=(?<!foo)bar)baz", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1977+ regex = g_regex_new ("(?<=(?<!foo)bar)baz", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1978 g_assert (regex); 1979 g_assert_no_error (error); 1980 res = g_regex_match (regex, "foobarbaz barfoobaz barbarbaz", 0, &match); 1981@@ -1750,7 +1751,7 @@ test_subpattern (void) 1982 gint start; 1983 1984 error = NULL; 1985- regex = g_regex_new ("cat(aract|erpillar|)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1986+ regex = g_regex_new ("cat(aract|erpillar|)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1987 g_assert (regex); 1988 g_assert_no_error (error); 1989 g_assert_cmpint (g_regex_get_capture_count (regex), ==, 1); 1990@@ -1768,7 +1769,7 @@ test_subpattern (void) 1991 g_match_info_free (match); 1992 g_regex_unref (regex); 1993 1994- regex = g_regex_new ("the ((red|white) (king|queen))", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 1995+ regex = g_regex_new ("the ((red|white) (king|queen))", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 1996 g_assert (regex); 1997 g_assert_no_error (error); 1998 g_assert_cmpint (g_regex_get_capture_count (regex), ==, 3); 1999@@ -1792,7 +1793,7 @@ test_subpattern (void) 2000 g_match_info_free (match); 2001 g_regex_unref (regex); 2002 2003- regex = g_regex_new ("the ((?:red|white) (king|queen))", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 2004+ regex = g_regex_new ("the ((?:red|white) (king|queen))", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 2005 g_assert (regex); 2006 g_assert_no_error (error); 2007 res = g_regex_match (regex, "the white queen", 0, &match); 2008@@ -1812,7 +1813,7 @@ test_subpattern (void) 2009 g_match_info_free (match); 2010 g_regex_unref (regex); 2011 2012- regex = g_regex_new ("(?|(Sat)(ur)|(Sun))day (morning|afternoon)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 2013+ regex = g_regex_new ("(?|(Sat)(ur)|(Sun))day (morning|afternoon)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 2014 g_assert (regex); 2015 g_assert_no_error (error); 2016 g_assert_cmpint (g_regex_get_capture_count (regex), ==, 3); 2017@@ -1832,7 +1833,7 @@ test_subpattern (void) 2018 g_match_info_free (match); 2019 g_regex_unref (regex); 2020 2021- regex = g_regex_new ("(?|(abc)|(def))\\1", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 2022+ regex = g_regex_new ("(?|(abc)|(def))\\1", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 2023 g_assert (regex); 2024 g_assert_no_error (error); 2025 g_assert_cmpint (g_regex_get_max_backref (regex), ==, 1); 2026@@ -1850,7 +1851,7 @@ test_subpattern (void) 2027 g_match_info_free (match); 2028 g_regex_unref (regex); 2029 2030- regex = g_regex_new ("(?|(abc)|(def))(?1)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 2031+ regex = g_regex_new ("(?|(abc)|(def))(?1)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 2032 g_assert (regex); 2033 g_assert_no_error (error); 2034 res = g_regex_match (regex, "abcabc abcdef defabc defdef", 0, &match); 2035@@ -1867,7 +1868,7 @@ test_subpattern (void) 2036 g_match_info_free (match); 2037 g_regex_unref (regex); 2038 2039- regex = g_regex_new ("(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|(?<DN>Sat)(?:urday)?", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error); 2040+ regex = g_regex_new ("(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|(?<DN>Sat)(?:urday)?", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error); 2041 g_assert (regex); 2042 g_assert_no_error (error); 2043 res = g_regex_match (regex, "Mon Tuesday Wed Saturday", 0, &match); 2044@@ -1894,7 +1895,7 @@ test_subpattern (void) 2045 g_match_info_free (match); 2046 g_regex_unref (regex); 2047 2048- regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error); 2049+ regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error); 2050 g_assert (regex); 2051 g_assert_no_error (error); 2052 res = g_regex_match (regex, "aaaaaaaaaaaaaaaa", 0, &match); 2053@@ -1918,7 +1919,7 @@ test_condition (void) 2054 gboolean res; 2055 2056 error = NULL; 2057- regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 2058+ regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 2059 g_assert (regex); 2060 g_assert_no_error (error); 2061 res = g_regex_match (regex, "a(zzzzzz)b", 0, &match); 2062@@ -1932,7 +1933,7 @@ test_condition (void) 2063 g_regex_unref (regex); 2064 2065 error = NULL; 2066- regex = g_regex_new ("^(a+)(?<OPEN>\\()?[^()]+(?(<OPEN>)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 2067+ regex = g_regex_new ("^(a+)(?<OPEN>\\()?[^()]+(?(<OPEN>)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 2068 g_assert (regex); 2069 g_assert_no_error (error); 2070 res = g_regex_match (regex, "a(zzzzzz)b", 0, &match); 2071@@ -1945,7 +1946,7 @@ test_condition (void) 2072 g_match_info_free (match); 2073 g_regex_unref (regex); 2074 2075- regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 2076+ regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 2077 g_assert (regex); 2078 g_assert_no_error (error); 2079 res = g_regex_match (regex, "a[zzzzzz]b", 0, &match); 2080@@ -1960,7 +1961,7 @@ test_condition (void) 2081 2082 regex = g_regex_new ("(?(DEFINE) (?<byte> 2[0-4]\\d | 25[0-5] | 1\\d\\d | [1-9]?\\d) )" 2083 "\\b (?&byte) (\\.(?&byte)){3} \\b", 2084- G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error); 2085+ G_REGEX_EXTENDED, 0, &error); 2086 g_assert (regex); 2087 g_assert_no_error (error); 2088 res = g_regex_match (regex, "128.0.0.1", 0, &match); 2089@@ -1979,7 +1980,7 @@ test_condition (void) 2090 2091 regex = g_regex_new ("^(?(?=[^a-z]*[a-z])" 2092 "\\d{2}-[a-z]{3}-\\d{2} | \\d{2}-\\d{2}-\\d{2} )$", 2093- G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error); 2094+ G_REGEX_EXTENDED, 0, &error); 2095 g_assert (regex); 2096 g_assert_no_error (error); 2097 res = g_regex_match (regex, "01-abc-24", 0, &match); 2098@@ -2012,7 +2013,7 @@ test_recursion (void) 2099 gint start; 2100 2101 error = NULL; 2102- regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 2103+ regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 2104 g_assert (regex); 2105 g_assert_no_error (error); 2106 res = g_regex_match (regex, "(middle)", 0, &match); 2107@@ -2029,7 +2030,7 @@ test_recursion (void) 2108 g_match_info_free (match); 2109 g_regex_unref (regex); 2110 2111- regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 2112+ regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 2113 g_assert (regex); 2114 g_assert_no_error (error); 2115 res = g_regex_match (regex, "((((((((((((((((middle))))))))))))))))", 0, &match); 2116@@ -2042,7 +2043,7 @@ test_recursion (void) 2117 g_match_info_free (match); 2118 g_regex_unref (regex); 2119 2120- regex = g_regex_new ("^(?<pn> \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 2121+ regex = g_regex_new ("^(?<pn> \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 2122 g_assert (regex); 2123 g_assert_no_error (error); 2124 g_regex_match (regex, "(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()", 0, &match); 2125@@ -2051,7 +2052,7 @@ test_recursion (void) 2126 g_match_info_free (match); 2127 g_regex_unref (regex); 2128 2129- regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 2130+ regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 2131 g_assert (regex); 2132 g_assert_no_error (error); 2133 res = g_regex_match (regex, "<ab<01<23<4>>>>", 0, &match); 2134@@ -2070,7 +2071,7 @@ test_recursion (void) 2135 g_match_info_free (match); 2136 g_regex_unref (regex); 2137 2138- regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 2139+ regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 2140 g_assert (regex); 2141 g_assert_no_error (error); 2142 res = g_regex_match (regex, "abcdcba", 0, &match); 2143@@ -2083,7 +2084,7 @@ test_recursion (void) 2144 g_match_info_free (match); 2145 g_regex_unref (regex); 2146 2147- regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 2148+ regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 2149 g_assert (regex); 2150 g_assert_no_error (error); 2151 res = g_regex_match (regex, "abcdcba", 0, &match); 2152@@ -2096,7 +2097,7 @@ test_recursion (void) 2153 g_match_info_free (match); 2154 g_regex_unref (regex); 2155 2156- regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_OPTIMIZE|G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error); 2157+ regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error); 2158 g_assert (regex); 2159 g_assert_no_error (error); 2160 res = g_regex_match (regex, "abcdcba", 0, &match); 2161@@ -2167,21 +2168,21 @@ test_max_lookbehind (void) 2162 } 2163 2164 static gboolean 2165-pcre_ge (guint64 major, guint64 minor) 2166+pcre2_ge (guint64 major, guint64 minor) 2167 { 2168- const char *version; 2169- gchar *ptr; 2170- guint64 pcre_major, pcre_minor; 2171+ gchar version[32]; 2172+ const gchar *ptr; 2173+ guint64 pcre2_major, pcre2_minor; 2174 2175- /* e.g. 8.35 2014-04-04 */ 2176- version = pcre_version (); 2177+ /* e.g. 10.36 2020-12-04 */ 2178+ pcre2_config (PCRE2_CONFIG_VERSION, version); 2179 2180- pcre_major = g_ascii_strtoull (version, &ptr, 10); 2181+ pcre2_major = g_ascii_strtoull (version, (gchar **) &ptr, 10); 2182 /* ptr points to ".MINOR (release date)" */ 2183 g_assert (ptr[0] == '.'); 2184- pcre_minor = g_ascii_strtoull (ptr + 1, NULL, 10); 2185+ pcre2_minor = g_ascii_strtoull (ptr + 1, NULL, 10); 2186 2187- return (pcre_major > major) || (pcre_major == major && pcre_minor >= minor); 2188+ return (pcre2_major > major) || (pcre2_major == major && pcre2_minor >= minor); 2189 } 2190 2191 int 2192@@ -2203,18 +2204,26 @@ main (int argc, char *argv[]) 2193 g_test_add_func ("/regex/max-lookbehind", test_max_lookbehind); 2194 2195 /* TEST_NEW(pattern, compile_opts, match_opts) */ 2196+G_GNUC_BEGIN_IGNORE_DEPRECATIONS 2197 TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL); 2198+G_GNUC_END_IGNORE_DEPRECATIONS 2199 TEST_NEW("", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); 2200 TEST_NEW(".*", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); 2201+G_GNUC_BEGIN_IGNORE_DEPRECATIONS 2202 TEST_NEW(".*", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT); 2203+G_GNUC_END_IGNORE_DEPRECATIONS 2204 TEST_NEW(".*", G_REGEX_MULTILINE, G_REGEX_MATCH_DEFAULT); 2205 TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_DEFAULT); 2206 TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_NOTBOL); 2207 TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); 2208 TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT); 2209+G_GNUC_BEGIN_IGNORE_DEPRECATIONS 2210 TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_CASELESS | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT); 2211+G_GNUC_END_IGNORE_DEPRECATIONS 2212 TEST_NEW("(?P<A>x)|(?P<A>y)", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT); 2213+G_GNUC_BEGIN_IGNORE_DEPRECATIONS 2214 TEST_NEW("(?P<A>x)|(?P<A>y)", G_REGEX_DUPNAMES | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT); 2215+G_GNUC_END_IGNORE_DEPRECATIONS 2216 /* This gives "internal error: code overflow" with pcre 6.0 */ 2217 TEST_NEW("(?i)(?-i)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); 2218 TEST_NEW ("(?i)a", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); 2219@@ -2225,9 +2234,10 @@ main (int argc, char *argv[]) 2220 TEST_NEW ("(?U)[a-z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); 2221 2222 /* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */ 2223+G_GNUC_BEGIN_IGNORE_DEPRECATIONS 2224 TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0); 2225+G_GNUC_END_IGNORE_DEPRECATIONS 2226 TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0); 2227- TEST_NEW_CHECK_FLAGS ("(?X)a", 0, 0, 0 /* not exposed by GRegex */, 0); 2228 TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0); 2229 TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0); 2230 TEST_NEW_CHECK_FLAGS ("(*UCP)a", 0, 0, 0 /* this always on in GRegex */, 0); 2231@@ -2255,16 +2265,16 @@ main (int argc, char *argv[]) 2232 TEST_NEW_FAIL ("a{4,2}", 0, G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER); 2233 TEST_NEW_FAIL ("a{999999,}", 0, G_REGEX_ERROR_QUANTIFIER_TOO_BIG); 2234 TEST_NEW_FAIL ("[a-z", 0, G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS); 2235- TEST_NEW_FAIL ("(?X)[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS); 2236+ TEST_NEW_FAIL ("[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS); 2237 TEST_NEW_FAIL ("[z-a]", 0, G_REGEX_ERROR_RANGE_OUT_OF_ORDER); 2238 TEST_NEW_FAIL ("{2,4}", 0, G_REGEX_ERROR_NOTHING_TO_REPEAT); 2239 TEST_NEW_FAIL ("a(?u)", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER); 2240- TEST_NEW_FAIL ("a(?<$foo)bar", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER); 2241+ TEST_NEW_FAIL ("a(?<$foo)bar", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME); 2242 TEST_NEW_FAIL ("a[:alpha:]b", 0, G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS); 2243 TEST_NEW_FAIL ("a(b", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS); 2244 TEST_NEW_FAIL ("a)b", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS); 2245 TEST_NEW_FAIL ("a(?R", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS); 2246- TEST_NEW_FAIL ("a(?-54", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS); 2247+ TEST_NEW_FAIL ("a(?-54", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE); 2248 TEST_NEW_FAIL ("(ab\\2)", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE); 2249 TEST_NEW_FAIL ("a(?#abc", 0, G_REGEX_ERROR_UNTERMINATED_COMMENT); 2250 TEST_NEW_FAIL ("(?<=a+)b", 0, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND); 2251@@ -2274,51 +2284,31 @@ main (int argc, char *argv[]) 2252 TEST_NEW_FAIL ("a[[:fubar:]]b", 0, G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME); 2253 TEST_NEW_FAIL ("[[.ch.]]", 0, G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED); 2254 TEST_NEW_FAIL ("\\x{110000}", 0, G_REGEX_ERROR_HEX_CODE_TOO_LARGE); 2255- TEST_NEW_FAIL ("^(?(0)f|b)oo", 0, G_REGEX_ERROR_INVALID_CONDITION); 2256+ TEST_NEW_FAIL ("^(?(0)f|b)oo", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE); 2257 TEST_NEW_FAIL ("(?<=\\C)X", 0, G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND); 2258- TEST_NEW_FAIL ("(?!\\w)(?R)", 0, G_REGEX_ERROR_INFINITE_LOOP); 2259- if (pcre_ge (8, 37)) 2260- { 2261- /* The expected errors changed here. */ 2262- TEST_NEW_FAIL ("(?(?<ab))", 0, G_REGEX_ERROR_ASSERTION_EXPECTED); 2263- } 2264- else 2265- { 2266- TEST_NEW_FAIL ("(?(?<ab))", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR); 2267- } 2268- 2269- if (pcre_ge (8, 35)) 2270- { 2271- /* The expected errors changed here. */ 2272- TEST_NEW_FAIL ("(?P<sub>foo)\\g<sub", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR); 2273- } 2274- else 2275- { 2276- TEST_NEW_FAIL ("(?P<sub>foo)\\g<sub", 0, G_REGEX_ERROR_MISSING_BACK_REFERENCE); 2277- } 2278+ TEST_NEW ("(?!\\w)(?R)", 0, 0); 2279+ TEST_NEW_FAIL ("(?(?<ab))", 0, G_REGEX_ERROR_ASSERTION_EXPECTED); 2280+ TEST_NEW_FAIL ("(?P<sub>foo)\\g<sub", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR); 2281 TEST_NEW_FAIL ("(?P<x>eks)(?P<x>eccs)", 0, G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME); 2282-#if 0 2283- TEST_NEW_FAIL (?, 0, G_REGEX_ERROR_MALFORMED_PROPERTY); 2284- TEST_NEW_FAIL (?, 0, G_REGEX_ERROR_UNKNOWN_PROPERTY); 2285-#endif 2286 TEST_NEW_FAIL ("\\666", G_REGEX_RAW, G_REGEX_ERROR_INVALID_OCTAL_VALUE); 2287 TEST_NEW_FAIL ("^(?(DEFINE) abc | xyz ) ", 0, G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE); 2288 TEST_NEW_FAIL ("a", G_REGEX_NEWLINE_CRLF | G_REGEX_NEWLINE_ANYCRLF, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS); 2289 TEST_NEW_FAIL ("^(a)\\g{3", 0, G_REGEX_ERROR_MISSING_BACK_REFERENCE); 2290- TEST_NEW_FAIL ("^(a)\\g{0}", 0, G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE); 2291- TEST_NEW_FAIL ("abc(*FAIL:123)xyz", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN); 2292+ TEST_NEW_FAIL ("^(a)\\g{0}", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE); 2293+ TEST_NEW ("abc(*FAIL:123)xyz", 0, 0); 2294 TEST_NEW_FAIL ("a(*FOOBAR)b", 0, G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB); 2295- TEST_NEW_FAIL ("(?i:A{1,}\\6666666666)", 0, G_REGEX_ERROR_NUMBER_TOO_BIG); 2296+ if (pcre2_ge (10, 37)) 2297+ { 2298+ TEST_NEW ("(?i:A{1,}\\6666666666)", 0, 0); 2299+ } 2300 TEST_NEW_FAIL ("(?<a>)(?&)", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME); 2301- TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_MISSING_DIGIT); 2302- TEST_NEW_FAIL ("TA]", G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_INVALID_DATA_CHARACTER); 2303+ TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE); 2304 TEST_NEW_FAIL ("(?|(?<a>A)|(?<b>B))", 0, G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME); 2305 TEST_NEW_FAIL ("a(*MARK)b", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED); 2306 TEST_NEW_FAIL ("^\\c€", 0, G_REGEX_ERROR_INVALID_CONTROL_CHAR); 2307 TEST_NEW_FAIL ("\\k", 0, G_REGEX_ERROR_MISSING_NAME); 2308 TEST_NEW_FAIL ("a[\\NB]c", 0, G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS); 2309 TEST_NEW_FAIL ("(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEFG)XX", 0, G_REGEX_ERROR_NAME_TOO_LONG); 2310- TEST_NEW_FAIL ("\\u0100", G_REGEX_RAW | G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE); 2311 2312 /* These errors can't really be tested easily: 2313 * G_REGEX_ERROR_EXPRESSION_TOO_LARGE 2314@@ -2474,7 +2464,15 @@ main (int argc, char *argv[]) 2315 TEST_MATCH("a#\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE); 2316 TEST_MATCH("a#\r\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE); 2317 TEST_MATCH("a#\rb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE); 2318- TEST_MATCH("a#\nb", G_REGEX_EXTENDED, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, FALSE); 2319+ /* Due to PCRE2 only supporting newline settings passed to pcre2_compile (and 2320+ * not to pcre2_match also), we have to compile the pattern with the 2321+ * effective (combined from compile and match options) newline setting. 2322+ * However, this setting also affects how newlines are interpreted *inside* 2323+ * the pattern. With G_REGEX_EXTENDED, this changes where the comment 2324+ * (started with `#`) ends. 2325+ */ 2326+ /* On PCRE1, this test expected no match; on PCRE2 it matches because of the above. */ 2327+ TEST_MATCH("a#\nb", G_REGEX_EXTENDED, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, TRUE /*FALSE*/); 2328 TEST_MATCH("a#\nb", G_REGEX_EXTENDED | G_REGEX_NEWLINE_CR, 0, "a", -1, 0, 0, TRUE); 2329 2330 TEST_MATCH("line\nbreak", G_REGEX_MULTILINE, 0, "this is a line\nbreak", -1, 0, 0, TRUE); 2331@@ -2487,21 +2485,19 @@ main (int argc, char *argv[]) 2332 * with pcre's internal tables. Bug #678273 */ 2333 TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "DŽ", -1, 0, 0, TRUE); 2334 TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "dž", -1, 0, 0, TRUE); 2335-#if PCRE_MAJOR > 8 || (PCRE_MAJOR == 8 && PCRE_MINOR >= 32) 2336- /* This would incorrectly fail to match in pcre < 8.32, so only assert 2337- * this for known-good pcre. */ 2338 TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "Dž", -1, 0, 0, TRUE); 2339-#endif 2340 2341 /* TEST_MATCH_NEXT#(pattern, string, string_len, start_position, ...) */ 2342 TEST_MATCH_NEXT0("a", "x", -1, 0); 2343 TEST_MATCH_NEXT0("a", "ax", -1, 1); 2344 TEST_MATCH_NEXT0("a", "xa", 1, 0); 2345 TEST_MATCH_NEXT0("a", "axa", 1, 2); 2346+ TEST_MATCH_NEXT1("", "", -1, 0, "", 0, 0); 2347 TEST_MATCH_NEXT1("a", "a", -1, 0, "a", 0, 1); 2348 TEST_MATCH_NEXT1("a", "xax", -1, 0, "a", 1, 2); 2349 TEST_MATCH_NEXT1(EURO, ENG EURO, -1, 0, EURO, 2, 5); 2350 TEST_MATCH_NEXT1("a*", "", -1, 0, "", 0, 0); 2351+ TEST_MATCH_NEXT2("", "a", -1, 0, "", 0, 0, "", 1, 1); 2352 TEST_MATCH_NEXT2("a*", "aa", -1, 0, "aa", 0, 2, "", 2, 2); 2353 TEST_MATCH_NEXT2(EURO "*", EURO EURO, -1, 0, EURO EURO, 0, 6, "", 6, 6); 2354 TEST_MATCH_NEXT2("a", "axa", -1, 0, "a", 0, 1, "a", 2, 3); 2355@@ -2675,11 +2671,6 @@ main (int argc, char *argv[]) 2356 TEST_EXPAND("a", "a", "\\0130", FALSE, "X"); 2357 TEST_EXPAND("a", "a", "\\\\\\0", FALSE, "\\a"); 2358 TEST_EXPAND("a(?P<G>.)c", "xabcy", "X\\g<G>X", FALSE, "XbX"); 2359-#if !(PCRE_MAJOR > 8 || (PCRE_MAJOR == 8 && PCRE_MINOR >= 34)) 2360- /* PCRE >= 8.34 no longer allows this usage. */ 2361- TEST_EXPAND("(.)(?P<1>.)", "ab", "\\1", FALSE, "a"); 2362- TEST_EXPAND("(.)(?P<1>.)", "ab", "\\g<1>", FALSE, "a"); 2363-#endif 2364 TEST_EXPAND(".", EURO, "\\0", FALSE, EURO); 2365 TEST_EXPAND("(.)", EURO, "\\1", FALSE, EURO); 2366 TEST_EXPAND("(?P<G>.)", EURO, "\\g<G>", FALSE, EURO); 2367@@ -2798,6 +2789,10 @@ main (int argc, char *argv[]) 2368 TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)", "A", 1); 2369 TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)", "B", 2); 2370 TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)", "C", -1); 2371+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "A", 1); 2372+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "B", 2); 2373+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "C", 3); 2374+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "D", -1); 2375 TEST_GET_STRING_NUMBER("(?P<A>.)(.)(?P<B>a)", "A", 1); 2376 TEST_GET_STRING_NUMBER("(?P<A>.)(.)(?P<B>a)", "B", 3); 2377 TEST_GET_STRING_NUMBER("(?P<A>.)(.)(?P<B>a)", "C", -1); 2378diff --git a/meson.build b/meson.build 2379index 882049c..657e9f6 100644 2380--- a/meson.build 2381+++ b/meson.build 2382@@ -2024,37 +2024,38 @@ else 2383 endif 2384 endif 2385 2386-pcre = dependency('libpcre', version: '>= 8.31', required : false) # Should check for Unicode support, too. FIXME 2387-if not pcre.found() 2388+pcre2 = dependency('libpcre2-8', version: '>= 10.32', required : false) 2389+if not pcre2.found() 2390 if cc.get_id() == 'msvc' or cc.get_id() == 'clang-cl' 2391- # MSVC: Search for the PCRE library by the configuration, which corresponds 2392- # to the output of CMake builds of PCRE. Note that debugoptimized 2393+ # MSVC: Search for the PCRE2 library by the configuration, which corresponds 2394+ # to the output of CMake builds of PCRE2. Note that debugoptimized 2395 # is really a Release build with .PDB files. 2396 if vs_crt == 'debug' 2397- pcre = cc.find_library('pcred', required : false) 2398+ pcre2 = cc.find_library('pcre2d-8', required : false) 2399 else 2400- pcre = cc.find_library('pcre', required : false) 2401+ pcre2 = cc.find_library('pcre2-8', required : false) 2402 endif 2403 endif 2404 endif 2405 2406 # Try again with the fallback 2407-if not pcre.found() 2408- pcre = dependency('libpcre', required : true, fallback : ['pcre', 'pcre_dep']) 2409- use_pcre_static_flag = true 2410+if not pcre2.found() 2411+ pcre2 = dependency('libpcre2-8', required : true, fallback : ['pcre2', 'libpcre2_8']) 2412+ use_pcre2_static_flag = true 2413 elif host_system == 'windows' 2414- pcre_static = cc.links('''#define PCRE_STATIC 2415- #include <pcre.h> 2416- int main() { 2417- void *p = NULL; 2418- pcre_free(p); 2419- return 0; 2420- }''', 2421- dependencies: pcre, 2422- name : 'Windows system PCRE is a static build') 2423- use_pcre_static_flag = pcre_static 2424+ pcre2_static = cc.links('''#define PCRE2_STATIC 2425+ #define PCRE2_CODE_UNIT_WIDTH 8 2426+ #include <pcre2.h> 2427+ int main() { 2428+ void *p = NULL; 2429+ pcre2_code_free(p); 2430+ return 0; 2431+ }''', 2432+ dependencies: pcre2, 2433+ name : 'Windows system PCRE2 is a static build') 2434+ use_pcre2_static_flag = pcre2_static 2435 else 2436- use_pcre_static_flag = false 2437+ use_pcre2_static_flag = false 2438 endif 2439 2440 libm = cc.find_library('m', required : false) 2441diff --git a/po/sk.po b/po/sk.po 2442index 8d6a1ce..747ad27 100644 2443--- a/po/sk.po 2444+++ b/po/sk.po 2445@@ -5630,7 +5630,7 @@ msgstr "zlý ofset" 2446 msgid "short utf8" 2447 msgstr "krátke utf8" 2448 2449-# Ide o omyl programátora: case PCRE_ERROR_RECURSELOOP: return _("recursion loop"); 2450+# Ide o omyl programátora: case PCRE2_ERROR_RECURSELOOP: return _("recursion loop"); 2451 #: glib/gregex.c:303 2452 msgid "recursion loop" 2453 msgstr "rekurzívna slučka" 2454diff --git a/subprojects/pcre.wrap b/subprojects/pcre.wrap 2455deleted file mode 100644 2456index a6b07b9..0000000 2457--- a/subprojects/pcre.wrap 2458+++ /dev/null 2459@@ -1,11 +0,0 @@ 2460-[wrap-file] 2461-directory = pcre-8.37 2462-source_url = https://sourceforge.net/projects/pcre/files/pcre/8.37/pcre-8.37.tar.bz2 2463-source_filename = pcre-8.37.tar.bz2 2464-source_hash = 51679ea8006ce31379fb0860e46dd86665d864b5020fc9cd19e71260eef4789d 2465-patch_filename = pcre_8.37-4_patch.zip 2466-patch_url = https://wrapdb.mesonbuild.com/v2/pcre_8.37-4/get_patch 2467-patch_hash = c957f42da6f6378300eb8a18f4a5cccdb8e2aada51a703cac842982f9f785399 2468- 2469-[provide] 2470-libpcre = pcre_dep 2471-- 24722.33.0 2473 2474