1From d639c4ec009537b743dcd2209184638d9f5d68b9 Mon Sep 17 00:00:00 2001 2From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net> 3Date: Tue, 6 Sep 2022 14:49:10 +0200 4Subject: [PATCH] regex: Do not mix PCRE2 Compile, Match, Newline and BSR flags 5 6As per the PCRE2 port we still used to try to map the old GRegex flags 7(PCRE1 based) with the new PCRE2 ones, but doing that we were also 8mixing flags with enums, leading to unexpected behaviors when trying to 9get new line and BSR options out of bigger flags arrays. 10 11So, avoid doing any mapping and store the values as native PCRE2 flags 12internally and converting them back only when requested. 13 14This fixes some regressions on newline handling. 15 16Fixes: #2729 17Fixes: #2688 18Fixes: GNOME/gtksourceview#278 19--- 20 glib/gregex.c | 637 +++++++++++++++++++++++---------------------- 21 glib/tests/regex.c | 18 ++ 22 2 files changed, 341 insertions(+), 314 deletions(-) 23 24diff --git a/glib/gregex.c b/glib/gregex.c 25index a16ea98..95695f7 100644 26--- a/glib/gregex.c 27+++ b/glib/gregex.c 28@@ -3,6 +3,7 @@ 29 * Copyright (C) 1999, 2000 Scott Wimer 30 * Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com> 31 * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org> 32+ * Copyright (C) 2022, Marco Trevisan <marco.trevisan@canonical.com> 33 * 34 * This library is free software; you can redistribute it and/or 35 * modify it under the terms of the GNU Lesser General Public 36@@ -108,62 +109,105 @@ 37 * library written by Philip Hazel. 38 */ 39 40-/* Signifies that flags have already been converted from pcre1 to pcre2. The 41- * value 0x04000000u is also the value of PCRE2_MATCH_INVALID_UTF in pcre2.h, 42- * but it is not used in gregex, so we can reuse it for this flag. 43- */ 44-#define G_REGEX_FLAGS_CONVERTED 0x04000000u 45+#define G_REGEX_PCRE_GENERIC_MASK (PCRE2_ANCHORED | \ 46+ PCRE2_NO_UTF_CHECK | \ 47+ PCRE2_ENDANCHORED) 48+ 49 /* Mask of all the possible values for GRegexCompileFlags. */ 50-#define G_REGEX_COMPILE_MASK (PCRE2_CASELESS | \ 51- PCRE2_MULTILINE | \ 52- PCRE2_DOTALL | \ 53- PCRE2_EXTENDED | \ 54- PCRE2_ANCHORED | \ 55- PCRE2_DOLLAR_ENDONLY | \ 56- PCRE2_UNGREEDY | \ 57- PCRE2_UTF | \ 58- PCRE2_NO_AUTO_CAPTURE | \ 59- PCRE2_FIRSTLINE | \ 60- PCRE2_DUPNAMES | \ 61- PCRE2_NEWLINE_CR | \ 62- PCRE2_NEWLINE_LF | \ 63- PCRE2_NEWLINE_CRLF | \ 64- PCRE2_NEWLINE_ANYCRLF | \ 65- PCRE2_BSR_ANYCRLF | \ 66- G_REGEX_FLAGS_CONVERTED) 67- 68-/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */ 69-#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK) 70-#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF | \ 71- G_REGEX_FLAGS_CONVERTED) 72+#define G_REGEX_COMPILE_MASK (G_REGEX_DEFAULT | \ 73+ G_REGEX_CASELESS | \ 74+ G_REGEX_MULTILINE | \ 75+ G_REGEX_DOTALL | \ 76+ G_REGEX_EXTENDED | \ 77+ G_REGEX_ANCHORED | \ 78+ G_REGEX_DOLLAR_ENDONLY | \ 79+ G_REGEX_UNGREEDY | \ 80+ G_REGEX_RAW | \ 81+ G_REGEX_NO_AUTO_CAPTURE | \ 82+ G_REGEX_OPTIMIZE | \ 83+ G_REGEX_FIRSTLINE | \ 84+ G_REGEX_DUPNAMES | \ 85+ G_REGEX_NEWLINE_CR | \ 86+ G_REGEX_NEWLINE_LF | \ 87+ G_REGEX_NEWLINE_CRLF | \ 88+ G_REGEX_NEWLINE_ANYCRLF | \ 89+ G_REGEX_BSR_ANYCRLF) 90+ 91+#define G_REGEX_PCRE2_COMPILE_MASK (PCRE2_ALLOW_EMPTY_CLASS | \ 92+ PCRE2_ALT_BSUX | \ 93+ PCRE2_AUTO_CALLOUT | \ 94+ PCRE2_CASELESS | \ 95+ PCRE2_DOLLAR_ENDONLY | \ 96+ PCRE2_DOTALL | \ 97+ PCRE2_DUPNAMES | \ 98+ PCRE2_EXTENDED | \ 99+ PCRE2_FIRSTLINE | \ 100+ PCRE2_MATCH_UNSET_BACKREF | \ 101+ PCRE2_MULTILINE | \ 102+ PCRE2_NEVER_UCP | \ 103+ PCRE2_NEVER_UTF | \ 104+ PCRE2_NO_AUTO_CAPTURE | \ 105+ PCRE2_NO_AUTO_POSSESS | \ 106+ PCRE2_NO_DOTSTAR_ANCHOR | \ 107+ PCRE2_NO_START_OPTIMIZE | \ 108+ PCRE2_UCP | \ 109+ PCRE2_UNGREEDY | \ 110+ PCRE2_UTF | \ 111+ PCRE2_NEVER_BACKSLASH_C | \ 112+ PCRE2_ALT_CIRCUMFLEX | \ 113+ PCRE2_ALT_VERBNAMES | \ 114+ PCRE2_USE_OFFSET_LIMIT | \ 115+ PCRE2_EXTENDED_MORE | \ 116+ PCRE2_LITERAL | \ 117+ PCRE2_MATCH_INVALID_UTF | \ 118+ G_REGEX_PCRE_GENERIC_MASK) 119+ 120+#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF) 121 122 /* Mask of all the possible values for GRegexMatchFlags. */ 123-#define G_REGEX_MATCH_MASK (PCRE2_ANCHORED | \ 124- PCRE2_NOTBOL | \ 125- PCRE2_NOTEOL | \ 126- PCRE2_NOTEMPTY | \ 127- PCRE2_NEWLINE_CR | \ 128- PCRE2_NEWLINE_LF | \ 129- PCRE2_NEWLINE_CRLF | \ 130- PCRE2_NEWLINE_ANY | \ 131- PCRE2_NEWLINE_ANYCRLF | \ 132- PCRE2_BSR_ANYCRLF | \ 133- PCRE2_BSR_UNICODE | \ 134- PCRE2_PARTIAL_SOFT | \ 135- PCRE2_PARTIAL_HARD | \ 136- PCRE2_NOTEMPTY_ATSTART | \ 137- G_REGEX_FLAGS_CONVERTED) 138- 139+#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_DEFAULT | \ 140+ G_REGEX_MATCH_ANCHORED | \ 141+ G_REGEX_MATCH_NOTBOL | \ 142+ G_REGEX_MATCH_NOTEOL | \ 143+ G_REGEX_MATCH_NOTEMPTY | \ 144+ G_REGEX_MATCH_PARTIAL | \ 145+ G_REGEX_MATCH_NEWLINE_CR | \ 146+ G_REGEX_MATCH_NEWLINE_LF | \ 147+ G_REGEX_MATCH_NEWLINE_CRLF | \ 148+ G_REGEX_MATCH_NEWLINE_ANY | \ 149+ G_REGEX_MATCH_NEWLINE_ANYCRLF | \ 150+ G_REGEX_MATCH_BSR_ANYCRLF | \ 151+ G_REGEX_MATCH_BSR_ANY | \ 152+ G_REGEX_MATCH_PARTIAL_SOFT | \ 153+ G_REGEX_MATCH_PARTIAL_HARD | \ 154+ G_REGEX_MATCH_NOTEMPTY_ATSTART) 155+ 156+#define G_REGEX_PCRE2_MATCH_MASK (PCRE2_NOTBOL |\ 157+ PCRE2_NOTEOL |\ 158+ PCRE2_NOTEMPTY |\ 159+ PCRE2_NOTEMPTY_ATSTART |\ 160+ PCRE2_PARTIAL_SOFT |\ 161+ PCRE2_PARTIAL_HARD |\ 162+ PCRE2_NO_JIT |\ 163+ PCRE2_COPY_MATCHED_SUBJECT |\ 164+ G_REGEX_PCRE_GENERIC_MASK) 165+ 166+/* TODO: Support PCRE2_NEWLINE_NUL */ 167 #define G_REGEX_NEWLINE_MASK (PCRE2_NEWLINE_CR | \ 168 PCRE2_NEWLINE_LF | \ 169 PCRE2_NEWLINE_CRLF | \ 170 PCRE2_NEWLINE_ANYCRLF) 171 172-#define G_REGEX_MATCH_NEWLINE_MASK (PCRE2_NEWLINE_CR | \ 173- PCRE2_NEWLINE_LF | \ 174- PCRE2_NEWLINE_CRLF | \ 175- PCRE2_NEWLINE_ANYCRLF | \ 176- PCRE2_NEWLINE_ANY) 177+#define G_REGEX_COMPILE_NEWLINE_MASK (G_REGEX_NEWLINE_CR | \ 178+ G_REGEX_NEWLINE_LF | \ 179+ G_REGEX_NEWLINE_CRLF | \ 180+ G_REGEX_NEWLINE_ANYCRLF) 181+ 182+#define G_REGEX_MATCH_NEWLINE_MASK (G_REGEX_MATCH_NEWLINE_CR | \ 183+ G_REGEX_MATCH_NEWLINE_LF | \ 184+ G_REGEX_MATCH_NEWLINE_CRLF | \ 185+ G_REGEX_MATCH_NEWLINE_ANY | \ 186+ G_REGEX_MATCH_NEWLINE_ANYCRLF) 187 188 /* if the string is in UTF-8 use g_utf8_ functions, else use 189 * use just +/- 1. */ 190@@ -178,7 +222,7 @@ struct _GMatchInfo 191 { 192 gint ref_count; /* the ref count (atomic) */ 193 GRegex *regex; /* the regex */ 194- GRegexMatchFlags match_opts; /* options used at match time on the regex */ 195+ uint32_t match_opts; /* pcre match options used at match time on the regex */ 196 gint matches; /* number of matching sub patterns, guaranteed to be <= (n_subpatterns + 1) if doing a single match (rather than matching all) */ 197 gint n_subpatterns; /* total number of sub patterns in the regex */ 198 gint pos; /* position in the string where last match left off */ 199@@ -204,9 +248,10 @@ struct _GRegex 200 gint ref_count; /* the ref count for the immutable part (atomic) */ 201 gchar *pattern; /* the pattern */ 202 pcre2_code *pcre_re; /* compiled form of the pattern */ 203- GRegexCompileFlags compile_opts; /* options used at compile time on the pattern, pcre2 values */ 204+ uint32_t compile_opts; /* options used at compile time on the pattern, pcre2 values */ 205 GRegexCompileFlags orig_compile_opts; /* options used at compile time on the pattern, gregex values */ 206- GRegexMatchFlags match_opts; /* options used at match time on the regex */ 207+ uint32_t match_opts; /* pcre2 options used at match time on the regex */ 208+ GRegexMatchFlags orig_match_opts; /* options used as default match options, gregex values */ 209 gint jit_options; /* options which were enabled for jit compiler */ 210 JITStatus jit_status; /* indicates the status of jit compiler for this compiled regex */ 211 }; 212@@ -223,197 +268,182 @@ static GList *split_replacement (const gchar *replacement, 213 GError **error); 214 static void free_interpolation_data (InterpolationData *data); 215 216-static gint 217-map_to_pcre2_compile_flags (gint pcre1_flags) 218+static uint32_t 219+get_pcre2_compile_options (GRegexCompileFlags compile_flags) 220 { 221- /* Maps compile flags from pcre1 to pcre2 values 222- */ 223- gint pcre2_flags = G_REGEX_FLAGS_CONVERTED; 224- 225- if (pcre1_flags & G_REGEX_FLAGS_CONVERTED) 226- return pcre1_flags; 227+ /* Maps compile flags to pcre2 values */ 228+ uint32_t pcre2_flags = 0; 229 230- if (pcre1_flags & G_REGEX_CASELESS) 231+ if (compile_flags & G_REGEX_CASELESS) 232 pcre2_flags |= PCRE2_CASELESS; 233- if (pcre1_flags & G_REGEX_MULTILINE) 234+ if (compile_flags & G_REGEX_MULTILINE) 235 pcre2_flags |= PCRE2_MULTILINE; 236- if (pcre1_flags & G_REGEX_DOTALL) 237+ if (compile_flags & G_REGEX_DOTALL) 238 pcre2_flags |= PCRE2_DOTALL; 239- if (pcre1_flags & G_REGEX_EXTENDED) 240+ if (compile_flags & G_REGEX_EXTENDED) 241 pcre2_flags |= PCRE2_EXTENDED; 242- if (pcre1_flags & G_REGEX_ANCHORED) 243+ if (compile_flags & G_REGEX_ANCHORED) 244 pcre2_flags |= PCRE2_ANCHORED; 245- if (pcre1_flags & G_REGEX_DOLLAR_ENDONLY) 246+ if (compile_flags & G_REGEX_DOLLAR_ENDONLY) 247 pcre2_flags |= PCRE2_DOLLAR_ENDONLY; 248- if (pcre1_flags & G_REGEX_UNGREEDY) 249+ if (compile_flags & G_REGEX_UNGREEDY) 250 pcre2_flags |= PCRE2_UNGREEDY; 251- if (!(pcre1_flags & G_REGEX_RAW)) 252+ if (!(compile_flags & G_REGEX_RAW)) 253 pcre2_flags |= PCRE2_UTF; 254- if (pcre1_flags & G_REGEX_NO_AUTO_CAPTURE) 255+ if (compile_flags & G_REGEX_NO_AUTO_CAPTURE) 256 pcre2_flags |= PCRE2_NO_AUTO_CAPTURE; 257- if (pcre1_flags & G_REGEX_FIRSTLINE) 258+ if (compile_flags & G_REGEX_FIRSTLINE) 259 pcre2_flags |= PCRE2_FIRSTLINE; 260- if (pcre1_flags & G_REGEX_DUPNAMES) 261+ if (compile_flags & G_REGEX_DUPNAMES) 262 pcre2_flags |= PCRE2_DUPNAMES; 263- if (pcre1_flags & G_REGEX_NEWLINE_CR) 264- pcre2_flags |= PCRE2_NEWLINE_CR; 265- if (pcre1_flags & G_REGEX_NEWLINE_LF) 266- pcre2_flags |= PCRE2_NEWLINE_LF; 267- /* Check for exact match for a composite flag */ 268- if ((pcre1_flags & G_REGEX_NEWLINE_CRLF) == G_REGEX_NEWLINE_CRLF) 269- pcre2_flags |= PCRE2_NEWLINE_CRLF; 270- /* Check for exact match for a composite flag */ 271- if ((pcre1_flags & G_REGEX_NEWLINE_ANYCRLF) == G_REGEX_NEWLINE_ANYCRLF) 272- pcre2_flags |= PCRE2_NEWLINE_ANYCRLF; 273- if (pcre1_flags & G_REGEX_BSR_ANYCRLF) 274- pcre2_flags |= PCRE2_BSR_ANYCRLF; 275- 276- /* these are not available in pcre2, but we use G_REGEX_OPTIMIZE as a special 277- * case to request JIT compilation */ 278- if (pcre1_flags & G_REGEX_OPTIMIZE) 279- pcre2_flags |= 0; 280-G_GNUC_BEGIN_IGNORE_DEPRECATIONS 281- if (pcre1_flags & G_REGEX_JAVASCRIPT_COMPAT) 282- pcre2_flags |= 0; 283-G_GNUC_END_IGNORE_DEPRECATIONS 284- 285- return pcre2_flags; 286+ 287+ return pcre2_flags & G_REGEX_PCRE2_COMPILE_MASK; 288 } 289 290-static gint 291-map_to_pcre2_match_flags (gint pcre1_flags) 292+static uint32_t 293+get_pcre2_match_options (GRegexMatchFlags match_flags, 294+ GRegexCompileFlags compile_flags) 295 { 296- /* Maps match flags from pcre1 to pcre2 values 297- */ 298- gint pcre2_flags = G_REGEX_FLAGS_CONVERTED; 299- 300- if (pcre1_flags & G_REGEX_FLAGS_CONVERTED) 301- return pcre1_flags; 302+ /* Maps match flags to pcre2 values */ 303+ uint32_t pcre2_flags = 0; 304 305- if (pcre1_flags & G_REGEX_MATCH_ANCHORED) 306+ if (match_flags & G_REGEX_MATCH_ANCHORED) 307 pcre2_flags |= PCRE2_ANCHORED; 308- if (pcre1_flags & G_REGEX_MATCH_NOTBOL) 309+ if (match_flags & G_REGEX_MATCH_NOTBOL) 310 pcre2_flags |= PCRE2_NOTBOL; 311- if (pcre1_flags & G_REGEX_MATCH_NOTEOL) 312+ if (match_flags & G_REGEX_MATCH_NOTEOL) 313 pcre2_flags |= PCRE2_NOTEOL; 314- if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY) 315+ if (match_flags & G_REGEX_MATCH_NOTEMPTY) 316 pcre2_flags |= PCRE2_NOTEMPTY; 317- if (pcre1_flags & G_REGEX_MATCH_NEWLINE_CR) 318- pcre2_flags |= PCRE2_NEWLINE_CR; 319- if (pcre1_flags & G_REGEX_MATCH_NEWLINE_LF) 320- pcre2_flags |= PCRE2_NEWLINE_LF; 321- /* Check for exact match for a composite flag */ 322- if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_CRLF) == G_REGEX_MATCH_NEWLINE_CRLF) 323- pcre2_flags |= PCRE2_NEWLINE_CRLF; 324- if (pcre1_flags & G_REGEX_MATCH_NEWLINE_ANY) 325- pcre2_flags |= PCRE2_NEWLINE_ANY; 326- /* Check for exact match for a composite flag */ 327- if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_ANYCRLF) == G_REGEX_MATCH_NEWLINE_ANYCRLF) 328- pcre2_flags |= PCRE2_NEWLINE_ANYCRLF; 329- if (pcre1_flags & G_REGEX_MATCH_BSR_ANYCRLF) 330- pcre2_flags |= PCRE2_BSR_ANYCRLF; 331- if (pcre1_flags & G_REGEX_MATCH_BSR_ANY) 332- pcre2_flags |= PCRE2_BSR_UNICODE; 333- if (pcre1_flags & G_REGEX_MATCH_PARTIAL_SOFT) 334+ if (match_flags & G_REGEX_MATCH_PARTIAL_SOFT) 335 pcre2_flags |= PCRE2_PARTIAL_SOFT; 336- if (pcre1_flags & G_REGEX_MATCH_PARTIAL_HARD) 337+ if (match_flags & G_REGEX_MATCH_PARTIAL_HARD) 338 pcre2_flags |= PCRE2_PARTIAL_HARD; 339- if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART) 340+ if (match_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART) 341 pcre2_flags |= PCRE2_NOTEMPTY_ATSTART; 342 343- return pcre2_flags; 344+ if (compile_flags & G_REGEX_RAW) 345+ pcre2_flags |= PCRE2_NO_UTF_CHECK; 346+ 347+ return pcre2_flags & G_REGEX_PCRE2_MATCH_MASK; 348 } 349 350-static gint 351-map_to_pcre1_compile_flags (gint pcre2_flags) 352+static GRegexCompileFlags 353+g_regex_compile_flags_from_pcre2 (uint32_t pcre2_flags) 354 { 355- /* Maps compile flags from pcre2 to pcre1 values 356- */ 357- gint pcre1_flags = 0; 358- 359- if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED)) 360- return pcre2_flags; 361+ GRegexCompileFlags compile_flags = G_REGEX_DEFAULT; 362 363 if (pcre2_flags & PCRE2_CASELESS) 364- pcre1_flags |= G_REGEX_CASELESS; 365+ compile_flags |= G_REGEX_CASELESS; 366 if (pcre2_flags & PCRE2_MULTILINE) 367- pcre1_flags |= G_REGEX_MULTILINE; 368+ compile_flags |= G_REGEX_MULTILINE; 369 if (pcre2_flags & PCRE2_DOTALL) 370- pcre1_flags |= G_REGEX_DOTALL; 371+ compile_flags |= G_REGEX_DOTALL; 372 if (pcre2_flags & PCRE2_EXTENDED) 373- pcre1_flags |= G_REGEX_EXTENDED; 374+ compile_flags |= G_REGEX_EXTENDED; 375 if (pcre2_flags & PCRE2_ANCHORED) 376- pcre1_flags |= G_REGEX_ANCHORED; 377+ compile_flags |= G_REGEX_ANCHORED; 378 if (pcre2_flags & PCRE2_DOLLAR_ENDONLY) 379- pcre1_flags |= G_REGEX_DOLLAR_ENDONLY; 380+ compile_flags |= G_REGEX_DOLLAR_ENDONLY; 381 if (pcre2_flags & PCRE2_UNGREEDY) 382- pcre1_flags |= G_REGEX_UNGREEDY; 383+ compile_flags |= G_REGEX_UNGREEDY; 384 if (!(pcre2_flags & PCRE2_UTF)) 385- pcre1_flags |= G_REGEX_RAW; 386+ compile_flags |= G_REGEX_RAW; 387 if (pcre2_flags & PCRE2_NO_AUTO_CAPTURE) 388- pcre1_flags |= G_REGEX_NO_AUTO_CAPTURE; 389+ compile_flags |= G_REGEX_NO_AUTO_CAPTURE; 390 if (pcre2_flags & PCRE2_FIRSTLINE) 391- pcre1_flags |= G_REGEX_FIRSTLINE; 392+ compile_flags |= G_REGEX_FIRSTLINE; 393 if (pcre2_flags & PCRE2_DUPNAMES) 394- pcre1_flags |= G_REGEX_DUPNAMES; 395- if (pcre2_flags & PCRE2_NEWLINE_CR) 396- pcre1_flags |= G_REGEX_NEWLINE_CR; 397- if (pcre2_flags & PCRE2_NEWLINE_LF) 398- pcre1_flags |= G_REGEX_NEWLINE_LF; 399- /* Check for exact match for a composite flag */ 400- if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF) 401- pcre1_flags |= G_REGEX_NEWLINE_CRLF; 402- /* Check for exact match for a composite flag */ 403- if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF) 404- pcre1_flags |= G_REGEX_NEWLINE_ANYCRLF; 405- if (pcre2_flags & PCRE2_BSR_ANYCRLF) 406- pcre1_flags |= G_REGEX_BSR_ANYCRLF; 407- 408- return pcre1_flags; 409+ compile_flags |= G_REGEX_DUPNAMES; 410+ 411+ return compile_flags & G_REGEX_COMPILE_MASK; 412 } 413 414-static gint 415-map_to_pcre1_match_flags (gint pcre2_flags) 416+static GRegexMatchFlags 417+g_regex_match_flags_from_pcre2 (uint32_t pcre2_flags) 418 { 419- /* Maps match flags from pcre2 to pcre1 values 420- */ 421- gint pcre1_flags = 0; 422- 423- if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED)) 424- return pcre2_flags; 425+ GRegexMatchFlags match_flags = G_REGEX_MATCH_DEFAULT; 426 427 if (pcre2_flags & PCRE2_ANCHORED) 428- pcre1_flags |= G_REGEX_MATCH_ANCHORED; 429+ match_flags |= G_REGEX_MATCH_ANCHORED; 430 if (pcre2_flags & PCRE2_NOTBOL) 431- pcre1_flags |= G_REGEX_MATCH_NOTBOL; 432+ match_flags |= G_REGEX_MATCH_NOTBOL; 433 if (pcre2_flags & PCRE2_NOTEOL) 434- pcre1_flags |= G_REGEX_MATCH_NOTEOL; 435+ match_flags |= G_REGEX_MATCH_NOTEOL; 436 if (pcre2_flags & PCRE2_NOTEMPTY) 437- pcre1_flags |= G_REGEX_MATCH_NOTEMPTY; 438- if (pcre2_flags & PCRE2_NEWLINE_CR) 439- pcre1_flags |= G_REGEX_MATCH_NEWLINE_CR; 440- if (pcre2_flags & PCRE2_NEWLINE_LF) 441- pcre1_flags |= G_REGEX_MATCH_NEWLINE_LF; 442- /* Check for exact match for a composite flag */ 443- if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF) 444- pcre1_flags |= G_REGEX_MATCH_NEWLINE_CRLF; 445- if (pcre2_flags & PCRE2_NEWLINE_ANY) 446- pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANY; 447- /* Check for exact match for a composite flag */ 448- if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF) 449- pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANYCRLF; 450- if (pcre2_flags & PCRE2_BSR_ANYCRLF) 451- pcre1_flags |= G_REGEX_MATCH_BSR_ANYCRLF; 452- if (pcre2_flags & PCRE2_BSR_UNICODE) 453- pcre1_flags |= G_REGEX_MATCH_BSR_ANY; 454+ match_flags |= G_REGEX_MATCH_NOTEMPTY; 455 if (pcre2_flags & PCRE2_PARTIAL_SOFT) 456- pcre1_flags |= G_REGEX_MATCH_PARTIAL_SOFT; 457+ match_flags |= G_REGEX_MATCH_PARTIAL_SOFT; 458 if (pcre2_flags & PCRE2_PARTIAL_HARD) 459- pcre1_flags |= G_REGEX_MATCH_PARTIAL_HARD; 460+ match_flags |= G_REGEX_MATCH_PARTIAL_HARD; 461 if (pcre2_flags & PCRE2_NOTEMPTY_ATSTART) 462- pcre1_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART; 463+ match_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART; 464+ 465+ return (match_flags & G_REGEX_MATCH_MASK); 466+} 467+ 468+static uint32_t 469+get_pcre2_newline_compile_options (GRegexCompileFlags compile_flags) 470+{ 471+ compile_flags &= G_REGEX_COMPILE_NEWLINE_MASK; 472+ 473+ switch (compile_flags) 474+ { 475+ case G_REGEX_NEWLINE_CR: 476+ return PCRE2_NEWLINE_CR; 477+ case G_REGEX_NEWLINE_LF: 478+ return PCRE2_NEWLINE_LF; 479+ case G_REGEX_NEWLINE_CRLF: 480+ return PCRE2_NEWLINE_CRLF; 481+ case G_REGEX_NEWLINE_ANYCRLF: 482+ return PCRE2_NEWLINE_ANYCRLF; 483+ default: 484+ if (compile_flags != 0) 485+ return 0; 486+ 487+ return PCRE2_NEWLINE_ANY; 488+ } 489+} 490+ 491+static uint32_t 492+get_pcre2_newline_match_options (GRegexMatchFlags match_flags) 493+{ 494+ switch (match_flags & G_REGEX_MATCH_NEWLINE_MASK) 495+ { 496+ case G_REGEX_MATCH_NEWLINE_CR: 497+ return PCRE2_NEWLINE_CR; 498+ case G_REGEX_MATCH_NEWLINE_LF: 499+ return PCRE2_NEWLINE_LF; 500+ case G_REGEX_MATCH_NEWLINE_CRLF: 501+ return PCRE2_NEWLINE_CRLF; 502+ case G_REGEX_MATCH_NEWLINE_ANY: 503+ return PCRE2_NEWLINE_ANY; 504+ case G_REGEX_MATCH_NEWLINE_ANYCRLF: 505+ return PCRE2_NEWLINE_ANYCRLF; 506+ default: 507+ return 0; 508+ } 509+} 510+ 511+static uint32_t 512+get_pcre2_bsr_compile_options (GRegexCompileFlags compile_flags) 513+{ 514+ if (compile_flags & G_REGEX_BSR_ANYCRLF) 515+ return PCRE2_BSR_ANYCRLF; 516 517- return pcre1_flags; 518+ return PCRE2_BSR_UNICODE; 519+} 520+ 521+static uint32_t 522+get_pcre2_bsr_match_options (GRegexMatchFlags match_flags) 523+{ 524+ if (match_flags & G_REGEX_MATCH_BSR_ANYCRLF) 525+ return PCRE2_BSR_ANYCRLF; 526+ 527+ if (match_flags & G_REGEX_MATCH_BSR_ANY) 528+ return PCRE2_BSR_UNICODE; 529+ 530+ return 0; 531 } 532 533 static const gchar * 534@@ -742,12 +772,12 @@ translate_compile_error (gint *errcode, const gchar **errmsg) 535 /* GMatchInfo */ 536 537 static GMatchInfo * 538-match_info_new (const GRegex *regex, 539- const gchar *string, 540- gint string_len, 541- gint start_position, 542- gint match_options, 543- gboolean is_dfa) 544+match_info_new (const GRegex *regex, 545+ const gchar *string, 546+ gint string_len, 547+ gint start_position, 548+ GRegexMatchFlags match_options, 549+ gboolean is_dfa) 550 { 551 GMatchInfo *match_info; 552 553@@ -761,7 +791,8 @@ match_info_new (const GRegex *regex, 554 match_info->string_len = string_len; 555 match_info->matches = PCRE2_ERROR_NOMATCH; 556 match_info->pos = start_position; 557- match_info->match_opts = match_options; 558+ match_info->match_opts = 559+ get_pcre2_match_options (match_options, regex->orig_compile_opts); 560 561 pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT, 562 &match_info->n_subpatterns); 563@@ -822,8 +853,8 @@ recalc_match_offsets (GMatchInfo *match_info, 564 } 565 566 static void 567-enable_jit_with_match_options (GRegex *regex, 568- GRegexMatchFlags match_options) 569+enable_jit_with_match_options (GRegex *regex, 570+ uint32_t match_options) 571 { 572 gint old_jit_options, new_jit_options, retval; 573 574@@ -1009,7 +1040,7 @@ g_match_info_next (GMatchInfo *match_info, 575 return FALSE; 576 } 577 578- opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts); 579+ opts = match_info->regex->match_opts | match_info->match_opts; 580 581 enable_jit_with_match_options (match_info->regex, opts); 582 if (match_info->regex->jit_status == JIT_STATUS_ENABLED) 583@@ -1018,7 +1049,7 @@ g_match_info_next (GMatchInfo *match_info, 584 (PCRE2_SPTR8) match_info->string, 585 match_info->string_len, 586 match_info->pos, 587- opts & ~G_REGEX_FLAGS_CONVERTED, 588+ opts, 589 match_info->match_data, 590 match_info->match_context); 591 } 592@@ -1028,7 +1059,7 @@ g_match_info_next (GMatchInfo *match_info, 593 (PCRE2_SPTR8) match_info->string, 594 match_info->string_len, 595 match_info->pos, 596- opts & ~G_REGEX_FLAGS_CONVERTED, 597+ opts, 598 match_info->match_data, 599 match_info->match_context); 600 } 601@@ -1563,14 +1594,14 @@ g_regex_unref (GRegex *regex) 602 } 603 } 604 605-/* 606- * @match_options: (inout) (optional): 607- */ 608-static pcre2_code *regex_compile (const gchar *pattern, 609- GRegexCompileFlags compile_options, 610- GRegexCompileFlags *compile_options_out, 611- GRegexMatchFlags *match_options, 612- GError **error); 613+static pcre2_code * regex_compile (const gchar *pattern, 614+ uint32_t compile_options, 615+ uint32_t newline_options, 616+ uint32_t bsr_options, 617+ GError **error); 618+ 619+static uint32_t get_pcre2_inline_compile_options (pcre2_code *re, 620+ uint32_t compile_options); 621 622 /** 623 * g_regex_new: 624@@ -1596,11 +1627,10 @@ g_regex_new (const gchar *pattern, 625 GRegex *regex; 626 pcre2_code *re; 627 static gsize initialised = 0; 628- GRegexCompileFlags orig_compile_opts; 629- 630- orig_compile_opts = compile_options; 631- compile_options = map_to_pcre2_compile_flags (compile_options); 632- match_options = map_to_pcre2_match_flags (match_options); 633+ uint32_t pcre_compile_options; 634+ uint32_t pcre_match_options; 635+ uint32_t newline_options; 636+ uint32_t bsr_options; 637 638 g_return_val_if_fail (pattern != NULL, NULL); 639 g_return_val_if_fail (error == NULL || *error == NULL, NULL); 640@@ -1618,113 +1648,97 @@ g_regex_new (const gchar *pattern, 641 g_once_init_leave (&initialised, supports_utf8 ? 1 : 2); 642 } 643 644- if (G_UNLIKELY (initialised != 1)) 645+ if (G_UNLIKELY (initialised != 1)) 646 { 647 g_set_error_literal (error, G_REGEX_ERROR, G_REGEX_ERROR_COMPILE, 648 _("PCRE library is compiled with incompatible options")); 649 return NULL; 650 } 651 652- switch (compile_options & G_REGEX_NEWLINE_MASK) 653+ pcre_compile_options = get_pcre2_compile_options (compile_options); 654+ pcre_match_options = get_pcre2_match_options (match_options, compile_options); 655+ 656+ newline_options = get_pcre2_newline_match_options (match_options); 657+ if (newline_options == 0) 658+ newline_options = get_pcre2_newline_compile_options (compile_options); 659+ 660+ if (newline_options == 0) 661 { 662- case 0: /* PCRE2_NEWLINE_ANY */ 663- case PCRE2_NEWLINE_CR: 664- case PCRE2_NEWLINE_LF: 665- case PCRE2_NEWLINE_CRLF: 666- case PCRE2_NEWLINE_ANYCRLF: 667- break; 668- default: 669 g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS, 670 "Invalid newline flags"); 671 return NULL; 672 } 673 674- re = regex_compile (pattern, compile_options, &compile_options, 675- &match_options, error); 676+ bsr_options = get_pcre2_bsr_match_options (match_options); 677+ if (!bsr_options) 678+ bsr_options = get_pcre2_bsr_compile_options (compile_options); 679+ 680+ re = regex_compile (pattern, pcre_compile_options, 681+ newline_options, bsr_options, error); 682 if (re == NULL) 683 return NULL; 684 685+ pcre_compile_options |= 686+ get_pcre2_inline_compile_options (re, pcre_compile_options); 687+ 688 regex = g_new0 (GRegex, 1); 689 regex->ref_count = 1; 690 regex->pattern = g_strdup (pattern); 691 regex->pcre_re = re; 692- regex->compile_opts = compile_options; 693- regex->orig_compile_opts = orig_compile_opts; 694- regex->match_opts = match_options; 695+ regex->compile_opts = pcre_compile_options; 696+ regex->orig_compile_opts = compile_options; 697+ regex->match_opts = pcre_match_options; 698+ regex->orig_match_opts = match_options; 699 enable_jit_with_match_options (regex, regex->match_opts); 700 701 return regex; 702 } 703 704-static gint 705-extract_newline_options (const GRegexCompileFlags compile_options, 706- const GRegexMatchFlags *match_options) 707-{ 708- gint newline_options = PCRE2_NEWLINE_ANY; 709- 710- if (compile_options & G_REGEX_NEWLINE_MASK) 711- newline_options = compile_options & G_REGEX_NEWLINE_MASK; 712- if (match_options && *match_options & G_REGEX_MATCH_NEWLINE_MASK) 713- newline_options = *match_options & G_REGEX_MATCH_NEWLINE_MASK; 714- 715- return newline_options; 716-} 717- 718-static gint 719-extract_bsr_options (const GRegexCompileFlags compile_options, 720- const GRegexMatchFlags *match_options) 721-{ 722- gint bsr_options = PCRE2_BSR_UNICODE; 723- 724- if (compile_options & PCRE2_BSR_ANYCRLF) 725- bsr_options = PCRE2_BSR_ANYCRLF; 726- if (match_options && *match_options & PCRE2_BSR_ANYCRLF) 727- bsr_options = PCRE2_BSR_ANYCRLF; 728- if (match_options && *match_options & PCRE2_BSR_UNICODE) 729- bsr_options = PCRE2_BSR_UNICODE; 730- 731- return bsr_options; 732-} 733- 734 static pcre2_code * 735-regex_compile (const gchar *pattern, 736- GRegexCompileFlags compile_options, 737- GRegexCompileFlags *compile_options_out, 738- GRegexMatchFlags *match_options, 739- GError **error) 740+regex_compile (const gchar *pattern, 741+ uint32_t compile_options, 742+ uint32_t newline_options, 743+ uint32_t bsr_options, 744+ GError **error) 745 { 746 pcre2_code *re; 747 pcre2_compile_context *context; 748 const gchar *errmsg; 749 PCRE2_SIZE erroffset; 750 gint errcode; 751- GRegexCompileFlags nonpcre_compile_options; 752- uint32_t pcre_compile_options; 753- 754- nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK; 755 756 context = pcre2_compile_context_create (NULL); 757 758 /* set newline options */ 759- pcre2_set_newline (context, extract_newline_options (compile_options, match_options)); 760+ if (pcre2_set_newline (context, newline_options) != 0) 761+ { 762+ g_set_error (error, G_REGEX_ERROR, 763+ G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS, 764+ "Invalid newline flags"); 765+ pcre2_compile_context_free (context); 766+ return NULL; 767+ } 768 769 /* set bsr options */ 770- pcre2_set_bsr (context, extract_bsr_options (compile_options, match_options)); 771+ if (pcre2_set_bsr (context, bsr_options) != 0) 772+ { 773+ g_set_error (error, G_REGEX_ERROR, 774+ G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS, 775+ "Invalid BSR flags"); 776+ pcre2_compile_context_free (context); 777+ return NULL; 778+ } 779 780 /* In case UTF-8 mode is used, also set PCRE2_NO_UTF_CHECK */ 781 if (compile_options & PCRE2_UTF) 782- { 783- compile_options |= PCRE2_NO_UTF_CHECK; 784- if (match_options != NULL) 785- *match_options |= PCRE2_NO_UTF_CHECK; 786- } 787+ compile_options |= PCRE2_NO_UTF_CHECK; 788 789 compile_options |= PCRE2_UCP; 790 791 /* compile the pattern */ 792 re = pcre2_compile ((PCRE2_SPTR8) pattern, 793 PCRE2_ZERO_TERMINATED, 794- compile_options & ~G_REGEX_FLAGS_CONVERTED, 795+ compile_options, 796 &errcode, 797 &erroffset, 798 context); 799@@ -1755,16 +1769,22 @@ regex_compile (const gchar *pattern, 800 return NULL; 801 } 802 803+ return re; 804+} 805+ 806+static uint32_t 807+get_pcre2_inline_compile_options (pcre2_code *re, 808+ uint32_t compile_options) 809+{ 810+ uint32_t pcre_compile_options; 811+ uint32_t nonpcre_compile_options; 812+ 813 /* For options set at the beginning of the pattern, pcre puts them into 814 * compile options, e.g. "(?i)foo" will make the pcre structure store 815 * PCRE2_CASELESS even though it wasn't explicitly given for compilation. */ 816+ nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK; 817 pcre2_pattern_info (re, PCRE2_INFO_ALLOPTIONS, &pcre_compile_options); 818- compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK; 819- 820- /* Don't leak PCRE2_NEWLINE_ANY, which is part of PCRE2_NEWLINE_ANYCRLF */ 821- if ((pcre_compile_options & PCRE2_NEWLINE_ANYCRLF) != PCRE2_NEWLINE_ANYCRLF) 822- compile_options &= ~PCRE2_NEWLINE_ANY; 823- 824+ compile_options = pcre_compile_options & G_REGEX_PCRE2_COMPILE_MASK; 825 compile_options |= nonpcre_compile_options; 826 827 if (!(compile_options & PCRE2_DUPNAMES)) 828@@ -1775,10 +1795,7 @@ regex_compile (const gchar *pattern, 829 compile_options |= PCRE2_DUPNAMES; 830 } 831 832- if (compile_options_out != 0) 833- *compile_options_out = compile_options; 834- 835- return re; 836+ return compile_options; 837 } 838 839 /** 840@@ -1940,7 +1957,7 @@ g_regex_get_compile_flags (const GRegex *regex) 841 break; 842 } 843 844- return map_to_pcre1_compile_flags (regex->compile_opts) | extra_flags; 845+ return g_regex_compile_flags_from_pcre2 (regex->compile_opts) | extra_flags; 846 } 847 848 /** 849@@ -1956,9 +1973,15 @@ g_regex_get_compile_flags (const GRegex *regex) 850 GRegexMatchFlags 851 g_regex_get_match_flags (const GRegex *regex) 852 { 853+ uint32_t flags; 854+ 855 g_return_val_if_fail (regex != NULL, 0); 856 857- return map_to_pcre1_match_flags (regex->match_opts & G_REGEX_MATCH_MASK); 858+ flags = g_regex_match_flags_from_pcre2 (regex->match_opts); 859+ flags |= (regex->orig_match_opts & G_REGEX_MATCH_NEWLINE_MASK); 860+ flags |= (regex->orig_match_opts & (G_REGEX_MATCH_BSR_ANY | G_REGEX_MATCH_BSR_ANYCRLF)); 861+ 862+ return flags; 863 } 864 865 /** 866@@ -1992,9 +2015,6 @@ g_regex_match_simple (const gchar *pattern, 867 GRegex *regex; 868 gboolean result; 869 870- compile_options = map_to_pcre2_compile_flags (compile_options); 871- match_options = map_to_pcre2_match_flags (match_options); 872- 873 regex = g_regex_new (pattern, compile_options, G_REGEX_MATCH_DEFAULT, NULL); 874 if (!regex) 875 return FALSE; 876@@ -2062,8 +2082,6 @@ g_regex_match (const GRegex *regex, 877 GRegexMatchFlags match_options, 878 GMatchInfo **match_info) 879 { 880- match_options = map_to_pcre2_match_flags (match_options); 881- 882 return g_regex_match_full (regex, string, -1, 0, match_options, 883 match_info, NULL); 884 } 885@@ -2147,8 +2165,6 @@ g_regex_match_full (const GRegex *regex, 886 GMatchInfo *info; 887 gboolean match_ok; 888 889- match_options = map_to_pcre2_match_flags (match_options); 890- 891 g_return_val_if_fail (regex != NULL, FALSE); 892 g_return_val_if_fail (string != NULL, FALSE); 893 g_return_val_if_fail (start_position >= 0, FALSE); 894@@ -2199,8 +2215,6 @@ g_regex_match_all (const GRegex *regex, 895 GRegexMatchFlags match_options, 896 GMatchInfo **match_info) 897 { 898- match_options = map_to_pcre2_match_flags (match_options); 899- 900 return g_regex_match_all_full (regex, string, -1, 0, match_options, 901 match_info, NULL); 902 } 903@@ -2272,8 +2286,8 @@ g_regex_match_all_full (const GRegex *regex, 904 gboolean done; 905 pcre2_code *pcre_re; 906 gboolean retval; 907- 908- match_options = map_to_pcre2_match_flags (match_options); 909+ uint32_t newline_options; 910+ uint32_t bsr_options; 911 912 g_return_val_if_fail (regex != NULL, FALSE); 913 g_return_val_if_fail (string != NULL, FALSE); 914@@ -2281,6 +2295,14 @@ g_regex_match_all_full (const GRegex *regex, 915 g_return_val_if_fail (error == NULL || *error == NULL, FALSE); 916 g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE); 917 918+ newline_options = get_pcre2_newline_match_options (match_options); 919+ if (!newline_options) 920+ newline_options = get_pcre2_newline_compile_options (regex->orig_compile_opts); 921+ 922+ bsr_options = get_pcre2_bsr_match_options (match_options); 923+ if (!bsr_options) 924+ bsr_options = get_pcre2_bsr_compile_options (regex->orig_compile_opts); 925+ 926 /* For PCRE2 we need to turn off PCRE2_NO_AUTO_POSSESS, which is an 927 * optimization for normal regex matching, but results in omitting some 928 * shorter matches here, and an observable behaviour change. 929@@ -2289,7 +2311,7 @@ g_regex_match_all_full (const GRegex *regex, 930 * codesearch.debian.net, so don't bother caching the recompiled RE. */ 931 pcre_re = regex_compile (regex->pattern, 932 regex->compile_opts | PCRE2_NO_AUTO_POSSESS, 933- NULL, NULL, error); 934+ newline_options, bsr_options, error); 935 if (pcre_re == NULL) 936 return FALSE; 937 938@@ -2303,7 +2325,7 @@ g_regex_match_all_full (const GRegex *regex, 939 info->matches = pcre2_dfa_match (pcre_re, 940 (PCRE2_SPTR8) info->string, info->string_len, 941 info->pos, 942- (regex->match_opts | match_options | PCRE2_NO_UTF_CHECK) & ~G_REGEX_FLAGS_CONVERTED, 943+ (regex->match_opts | info->match_opts), 944 info->match_data, 945 info->match_context, 946 info->workspace, info->n_workspace); 947@@ -2436,9 +2458,6 @@ g_regex_split_simple (const gchar *pattern, 948 GRegex *regex; 949 gchar **result; 950 951- compile_options = map_to_pcre2_compile_flags (compile_options); 952- match_options = map_to_pcre2_match_flags (match_options); 953- 954 regex = g_regex_new (pattern, compile_options, 0, NULL); 955 if (!regex) 956 return NULL; 957@@ -2482,8 +2501,6 @@ g_regex_split (const GRegex *regex, 958 const gchar *string, 959 GRegexMatchFlags match_options) 960 { 961- match_options = map_to_pcre2_match_flags (match_options); 962- 963 return g_regex_split_full (regex, string, -1, 0, 964 match_options, 0, NULL); 965 } 966@@ -2548,8 +2565,6 @@ g_regex_split_full (const GRegex *regex, 967 /* the returned array of char **s */ 968 gchar **string_list; 969 970- match_options = map_to_pcre2_match_flags (match_options); 971- 972 g_return_val_if_fail (regex != NULL, NULL); 973 g_return_val_if_fail (string != NULL, NULL); 974 g_return_val_if_fail (start_position >= 0, NULL); 975@@ -3174,8 +3189,6 @@ g_regex_replace (const GRegex *regex, 976 GList *list; 977 GError *tmp_error = NULL; 978 979- match_options = map_to_pcre2_match_flags (match_options); 980- 981 g_return_val_if_fail (regex != NULL, NULL); 982 g_return_val_if_fail (string != NULL, NULL); 983 g_return_val_if_fail (start_position >= 0, NULL); 984@@ -3245,8 +3258,6 @@ g_regex_replace_literal (const GRegex *regex, 985 GRegexMatchFlags match_options, 986 GError **error) 987 { 988- match_options = map_to_pcre2_match_flags (match_options); 989- 990 g_return_val_if_fail (replacement != NULL, NULL); 991 g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); 992 993@@ -3335,8 +3346,6 @@ g_regex_replace_eval (const GRegex *regex, 994 gboolean done = FALSE; 995 GError *tmp_error = NULL; 996 997- match_options = map_to_pcre2_match_flags (match_options); 998- 999 g_return_val_if_fail (regex != NULL, NULL); 1000 g_return_val_if_fail (string != NULL, NULL); 1001 g_return_val_if_fail (start_position >= 0, NULL); 1002diff --git a/glib/tests/regex.c b/glib/tests/regex.c 1003index 0d01d59..79e6b4a 100644 1004--- a/glib/tests/regex.c 1005+++ b/glib/tests/regex.c 1006@@ -1,6 +1,7 @@ 1007 /* 1008 * Copyright (C) 2005 - 2006, Marco Barisione <marco@barisione.org> 1009 * Copyright (C) 2010 Red Hat, Inc. 1010+ * Copyright (C) 2022, Marco Trevisan <marco.trevisan@canonical.com> 1011 * 1012 * This library is free software; you can redistribute it and/or 1013 * modify it under the terms of the GNU Lesser General Public 1014@@ -2353,7 +2354,13 @@ main (int argc, char *argv[]) 1015 1016 /* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */ 1017 TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0); 1018+ TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY, 1019+ G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY); 1020+ TEST_NEW_CHECK_FLAGS ("a", 0, G_REGEX_MATCH_NEWLINE_ANYCRLF | G_REGEX_MATCH_BSR_ANYCRLF, 1021+ G_REGEX_NEWLINE_ANYCRLF | G_REGEX_BSR_ANYCRLF, 1022+ G_REGEX_MATCH_NEWLINE_ANYCRLF | G_REGEX_MATCH_BSR_ANYCRLF); 1023 TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0); 1024+ TEST_NEW_CHECK_FLAGS ("(?J)a", 0, 0, G_REGEX_DUPNAMES, 0); 1025 TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0); 1026 TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0); 1027 TEST_NEW_CHECK_FLAGS ("(*UCP)a", 0, 0, 0 /* this always on in GRegex */, 0); 1028@@ -2559,6 +2566,8 @@ main (int argc, char *argv[]) 1029 TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, 0, "a\rb\rc", -1, 0, 0, TRUE); 1030 TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_LF, 0, "a\rb\rc", -1, 0, 0, FALSE); 1031 TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CRLF, 0, "a\rb\rc", -1, 0, 0, FALSE); 1032+ TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_ANYCRLF, 0, "a\r\nb\nc", -1, 0, 0, TRUE); 1033+ TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_ANYCRLF, 0, "a\r\nb\rc", -1, 0, 0, TRUE); 1034 TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a\nb\nc", -1, 0, 0, FALSE); 1035 TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a\nb\nc", -1, 0, 0, TRUE); 1036 TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a\nb\nc", -1, 0, 0, FALSE); 1037@@ -2568,6 +2577,8 @@ main (int argc, char *argv[]) 1038 TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a\rb\rc", -1, 0, 0, TRUE); 1039 TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a\rb\rc", -1, 0, 0, FALSE); 1040 TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a\rb\rc", -1, 0, 0, FALSE); 1041+ TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a\r\nb\rc", -1, 0, 0, TRUE); 1042+ TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a\r\nb\nc", -1, 0, 0, TRUE); 1043 1044 TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_ANY, "a\nb\nc", -1, 0, 0, TRUE); 1045 TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_ANY, "a\rb\rc", -1, 0, 0, TRUE); 1046@@ -2577,6 +2588,13 @@ main (int argc, char *argv[]) 1047 TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_CRLF, "a\r\nb\r\nc", -1, 0, 0, TRUE); 1048 TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_CRLF, "a\rb\rc", -1, 0, 0, FALSE); 1049 1050+ /* See https://gitlab.gnome.org/GNOME/glib/-/issues/2729#note_1544130 */ 1051+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANY, "a", -1, 0, 0, TRUE); 1052+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a", -1, 0, 0, TRUE); 1053+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, TRUE); 1054+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a", -1, 0, 0, TRUE); 1055+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a", -1, 0, 0, TRUE); 1056+ 1057 TEST_MATCH("a#\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE); 1058 TEST_MATCH("a#\r\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE); 1059 TEST_MATCH("a#\rb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE); 1060-- 10612.33.0 1062GitLab 1063