1From bcd8cb3e142bf7f1c92583aa81c34fe8ff8521c0 Mon Sep 17 00:00:00 2001 2From: Aleksei Rybalkin <aleksei@rybalkin.org> 3Date: Wed, 20 Jul 2022 20:48:17 +0000 4Subject: [PATCH] gregex: use G_REGEX_OPTIMIZE flag to enable JIT compilation 5 6Since we ported gregex to pcre2, the JIT compiler is now available to be 7used. Let's undeprecate G_REGEX_OPTIMIZE flag to control whether the JIT 8compilation is requested, since using JIT is itself an optimization. 9See [1] for details on its implementation in pcre2. 10 11[1] http://pcre.org/current/doc/html/pcre2jit.html 12 13Fixes: #566 14 15Conflict:NA 16Reference:https://gitlab.gnome.org/GNOME/glib/-/commit/bcd8cb3e142bf7f1c92583aa81c34fe8ff8521c0 17 18--- 19 glib/gregex.c | 104 ++++++++++++++++++++++++++++++------ 20 glib/gregex.h | 14 ++--- 21 glib/tests/regex.c | 128 ++++++++++++++++++++++++--------------------- 22 3 files changed, 164 insertions(+), 82 deletions(-) 23 24diff --git a/glib/gregex.c b/glib/gregex.c 25index b0edacc0d3..cf9ce23e8d 100644 26--- a/glib/gregex.c 27+++ b/glib/gregex.c 28@@ -144,7 +144,6 @@ 29 PCRE2_NOTBOL | \ 30 PCRE2_NOTEOL | \ 31 PCRE2_NOTEMPTY | \ 32- PCRE2_PARTIAL_SOFT | \ 33 PCRE2_NEWLINE_CR | \ 34 PCRE2_NEWLINE_LF | \ 35 PCRE2_NEWLINE_CRLF | \ 36@@ -195,6 +194,13 @@ struct _GMatchInfo 37 pcre2_match_data *match_data; 38 }; 39 40+typedef enum 41+{ 42+ JIT_STATUS_DEFAULT, 43+ JIT_STATUS_ENABLED, 44+ JIT_STATUS_DISABLED 45+} JITStatus; 46+ 47 struct _GRegex 48 { 49 gint ref_count; /* the ref count for the immutable part (atomic) */ 50@@ -203,6 +209,8 @@ struct _GRegex 51 GRegexCompileFlags compile_opts; /* options used at compile time on the pattern, pcre2 values */ 52 GRegexCompileFlags orig_compile_opts; /* options used at compile time on the pattern, gregex values */ 53 GRegexMatchFlags match_opts; /* options used at match time on the regex */ 54+ gint jit_options; /* options which were enabled for jit compiler */ 55+ JITStatus jit_status; /* indicates the status of jit compiler for this compiled regex */ 56 }; 57 58 /* TRUE if ret is an error code, FALSE otherwise. */ 59@@ -262,10 +270,11 @@ map_to_pcre2_compile_flags (gint pcre1_flags) 60 if (pcre1_flags & G_REGEX_BSR_ANYCRLF) 61 pcre2_flags |= PCRE2_BSR_ANYCRLF; 62 63- /* these are not available in pcre2 */ 64-G_GNUC_BEGIN_IGNORE_DEPRECATIONS 65+ /* these are not available in pcre2, but we use G_REGEX_OPTIMIZE as a special 66+ * case to request JIT compilation */ 67 if (pcre1_flags & G_REGEX_OPTIMIZE) 68 pcre2_flags |= 0; 69+G_GNUC_BEGIN_IGNORE_DEPRECATIONS 70 if (pcre1_flags & G_REGEX_JAVASCRIPT_COMPAT) 71 pcre2_flags |= 0; 72 G_GNUC_END_IGNORE_DEPRECATIONS 73@@ -291,8 +300,6 @@ map_to_pcre2_match_flags (gint pcre1_flags) 74 pcre2_flags |= PCRE2_NOTEOL; 75 if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY) 76 pcre2_flags |= PCRE2_NOTEMPTY; 77- if (pcre1_flags & G_REGEX_MATCH_PARTIAL) 78- pcre2_flags |= PCRE2_PARTIAL_SOFT; 79 if (pcre1_flags & G_REGEX_MATCH_NEWLINE_CR) 80 pcre2_flags |= PCRE2_NEWLINE_CR; 81 if (pcre1_flags & G_REGEX_MATCH_NEWLINE_LF) 82@@ -385,8 +392,6 @@ map_to_pcre1_match_flags (gint pcre2_flags) 83 pcre1_flags |= G_REGEX_MATCH_NOTEOL; 84 if (pcre2_flags & PCRE2_NOTEMPTY) 85 pcre1_flags |= G_REGEX_MATCH_NOTEMPTY; 86- if (pcre2_flags & PCRE2_PARTIAL_SOFT) 87- pcre1_flags |= G_REGEX_MATCH_PARTIAL; 88 if (pcre2_flags & PCRE2_NEWLINE_CR) 89 pcre1_flags |= G_REGEX_MATCH_NEWLINE_CR; 90 if (pcre2_flags & PCRE2_NEWLINE_LF) 91@@ -461,6 +466,9 @@ match_error (gint errcode) 92 return _("bad offset"); 93 case PCRE2_ERROR_RECURSELOOP: 94 return _("recursion loop"); 95+ case PCRE2_ERROR_JIT_BADOPTION: 96+ /* should not happen in GRegex since we check modes before each match */ 97+ return _("matching mode is requested that was not compiled for JIT"); 98 default: 99 break; 100 } 101@@ -817,6 +825,56 @@ recalc_match_offsets (GMatchInfo *match_info, 102 return TRUE; 103 } 104 105+static void 106+enable_jit_with_match_options (GRegex *regex, 107+ GRegexMatchFlags match_options) 108+{ 109+ gint old_jit_options, new_jit_options, retval; 110+ 111+ if (!(regex->orig_compile_opts & G_REGEX_OPTIMIZE)) 112+ return; 113+ if (regex->jit_status == JIT_STATUS_DISABLED) 114+ return; 115+ 116+ old_jit_options = regex->jit_options; 117+ new_jit_options = old_jit_options | PCRE2_JIT_COMPLETE; 118+ if (match_options & PCRE2_PARTIAL_HARD) 119+ new_jit_options |= PCRE2_JIT_PARTIAL_HARD; 120+ if (match_options & PCRE2_PARTIAL_SOFT) 121+ new_jit_options |= PCRE2_JIT_PARTIAL_SOFT; 122+ 123+ /* no new options enabled */ 124+ if (new_jit_options == old_jit_options) 125+ return; 126+ 127+ retval = pcre2_jit_compile (regex->pcre_re, new_jit_options); 128+ switch (retval) 129+ { 130+ case 0: /* JIT enabled successfully */ 131+ regex->jit_status = JIT_STATUS_ENABLED; 132+ regex->jit_options = new_jit_options; 133+ break; 134+ case PCRE2_ERROR_NOMEMORY: 135+ g_warning ("JIT compilation was requested with G_REGEX_OPTIMIZE, " 136+ "but JIT was unable to allocate executable memory for the " 137+ "compiler. Falling back to interpretive code."); 138+ regex->jit_status = JIT_STATUS_DISABLED; 139+ break; 140+ case PCRE2_ERROR_JIT_BADOPTION: 141+ g_warning ("JIT compilation was requested with G_REGEX_OPTIMIZE, " 142+ "but JIT support is not available. Falling back to " 143+ "interpretive code."); 144+ regex->jit_status = JIT_STATUS_DISABLED; 145+ break; 146+ default: 147+ g_warning ("JIT compilation was requested with G_REGEX_OPTIMIZE, " 148+ "but request for JIT support had unexpectedly failed. " 149+ "Falling back to interpretive code."); 150+ regex->jit_status = JIT_STATUS_DISABLED; 151+ break; 152+ } 153+} 154+ 155 /** 156 * g_match_info_get_regex: 157 * @match_info: a #GMatchInfo 158@@ -956,13 +1014,28 @@ g_match_info_next (GMatchInfo *match_info, 159 } 160 161 opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts); 162- match_info->matches = pcre2_match (match_info->regex->pcre_re, 163- (PCRE2_SPTR8) match_info->string, 164- match_info->string_len, 165- match_info->pos, 166- opts & ~G_REGEX_FLAGS_CONVERTED, 167- match_info->match_data, 168- match_info->match_context); 169+ 170+ enable_jit_with_match_options (match_info->regex, opts); 171+ if (match_info->regex->jit_status == JIT_STATUS_ENABLED) 172+ { 173+ match_info->matches = pcre2_jit_match (match_info->regex->pcre_re, 174+ (PCRE2_SPTR8) match_info->string, 175+ match_info->string_len, 176+ match_info->pos, 177+ opts & ~G_REGEX_FLAGS_CONVERTED, 178+ match_info->match_data, 179+ match_info->match_context); 180+ } 181+ else 182+ { 183+ match_info->matches = pcre2_match (match_info->regex->pcre_re, 184+ (PCRE2_SPTR8) match_info->string, 185+ match_info->string_len, 186+ match_info->pos, 187+ opts & ~G_REGEX_FLAGS_CONVERTED, 188+ match_info->match_data, 189+ match_info->match_context); 190+ } 191 192 if (IS_PCRE2_ERROR (match_info->matches)) 193 { 194@@ -1582,6 +1655,7 @@ g_regex_new (const gchar *pattern, 195 regex->compile_opts = compile_options; 196 regex->orig_compile_opts = orig_compile_opts; 197 regex->match_opts = match_options; 198+ enable_jit_with_match_options (regex, regex->match_opts); 199 200 return regex; 201 } 202@@ -1836,10 +1910,8 @@ g_regex_get_compile_flags (const GRegex *regex) 203 204 g_return_val_if_fail (regex != NULL, 0); 205 206-G_GNUC_BEGIN_IGNORE_DEPRECATIONS 207 /* Preserve original G_REGEX_OPTIMIZE */ 208 extra_flags = (regex->orig_compile_opts & G_REGEX_OPTIMIZE); 209-G_GNUC_END_IGNORE_DEPRECATIONS 210 211 /* Also include the newline options */ 212 pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_NEWLINE, &info_value); 213diff --git a/glib/gregex.h b/glib/gregex.h 214index 7010d52ab8..30eb387073 100644 215--- a/glib/gregex.h 216+++ b/glib/gregex.h 217@@ -262,11 +262,13 @@ GQuark g_regex_error_quark (void); 218 * followed by "?" behaves as if it were followed by "?:" but named 219 * parentheses can still be used for capturing (and they acquire numbers 220 * in the usual way). 221- * @G_REGEX_OPTIMIZE: Optimize the regular expression. If the pattern will 222- * be used many times, then it may be worth the effort to optimize it 223- * to improve the speed of matches. Deprecated in GLib 2.74 which now uses 224- * libpcre2, which doesn’t require separate optimization of queries. This 225- * option is now a no-op. Deprecated: 2.74 226+ * @G_REGEX_OPTIMIZE: Since 2.74 and the port to pcre2, requests JIT 227+ * compilation, which, if the just-in-time compiler is available, further 228+ * processes a compiled pattern into machine code that executes much 229+ * faster. However, it comes at the cost of extra processing before the 230+ * match is performed, so it is most beneficial to use this when the same 231+ * compiled pattern is used for matching many times. Before 2.74 this 232+ * option used the built-in non-JIT optimizations in pcre1. 233 * @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the 234 * first newline. Since: 2.34 235 * @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not 236@@ -311,7 +313,7 @@ typedef enum 237 G_REGEX_UNGREEDY = 1 << 9, 238 G_REGEX_RAW = 1 << 11, 239 G_REGEX_NO_AUTO_CAPTURE = 1 << 12, 240- G_REGEX_OPTIMIZE GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 13, 241+ G_REGEX_OPTIMIZE = 1 << 13, 242 G_REGEX_FIRSTLINE = 1 << 18, 243 G_REGEX_DUPNAMES = 1 << 19, 244 G_REGEX_NEWLINE_CR = 1 << 20, 245diff --git a/glib/tests/regex.c b/glib/tests/regex.c 246index 9a1977b248..bb1a5ff762 100644 247--- a/glib/tests/regex.c 248+++ b/glib/tests/regex.c 249@@ -516,7 +516,7 @@ test_partial (gconstpointer d) 250 GRegex *regex; 251 GMatchInfo *match_info; 252 253- regex = g_regex_new (data->pattern, G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL); 254+ regex = g_regex_new (data->pattern, data->compile_opts, G_REGEX_MATCH_DEFAULT, NULL); 255 256 g_assert (regex != NULL); 257 258@@ -534,12 +534,13 @@ test_partial (gconstpointer d) 259 g_regex_unref (regex); 260 } 261 262-#define TEST_PARTIAL_FULL(_pattern, _string, _match_opts, _expected) { \ 263+#define TEST_PARTIAL_FULL(_pattern, _string, _compile_opts, _match_opts, _expected) { \ 264 TestMatchData *data; \ 265 gchar *path; \ 266 data = g_new0 (TestMatchData, 1); \ 267 data->pattern = _pattern; \ 268 data->string = _string; \ 269+ data->compile_opts = _compile_opts; \ 270 data->match_opts = _match_opts; \ 271 data->expected = _expected; \ 272 path = g_strdup_printf ("/regex/match/partial/%d", ++total); \ 273@@ -547,7 +548,7 @@ test_partial (gconstpointer d) 274 g_free (path); \ 275 } 276 277-#define TEST_PARTIAL(_pattern, _string, _expected) TEST_PARTIAL_FULL(_pattern, _string, G_REGEX_MATCH_PARTIAL, _expected) 278+#define TEST_PARTIAL(_pattern, _string, _compile_opts, _expected) TEST_PARTIAL_FULL(_pattern, _string, _compile_opts, G_REGEX_MATCH_PARTIAL, _expected) 279 280 typedef struct { 281 const gchar *pattern; 282@@ -1504,7 +1505,7 @@ test_properties (void) 283 gchar *str; 284 285 error = NULL; 286- regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 287+ regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 288 res = g_regex_match (regex, "ppPP01", 0, &match); 289 g_assert (res); 290 str = g_match_info_fetch (match, 0); 291@@ -1525,7 +1526,7 @@ test_class (void) 292 gchar *str; 293 294 error = NULL; 295- regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 296+ regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 297 res = g_regex_match (regex, "a:b:\340\254\236:\333\253:\316\240", 0, &match); 298 g_assert (res); 299 str = g_match_info_fetch (match, 0); 300@@ -1571,7 +1572,7 @@ test_lookahead (void) 301 gint start, end; 302 303 error = NULL; 304- regex = g_regex_new ("\\w+(?=;)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 305+ regex = g_regex_new ("\\w+(?=;)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 306 g_assert (regex); 307 g_assert_no_error (error); 308 res = g_regex_match (regex, "word1 word2: word3;", 0, &match); 309@@ -1585,7 +1586,7 @@ test_lookahead (void) 310 g_regex_unref (regex); 311 312 error = NULL; 313- regex = g_regex_new ("foo(?!bar)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 314+ regex = g_regex_new ("foo(?!bar)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 315 g_assert (regex); 316 g_assert_no_error (error); 317 res = g_regex_match (regex, "foobar foobaz", 0, &match); 318@@ -1600,7 +1601,7 @@ test_lookahead (void) 319 g_regex_unref (regex); 320 321 error = NULL; 322- regex = g_regex_new ("(?!bar)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 323+ regex = g_regex_new ("(?!bar)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 324 g_assert (regex); 325 g_assert_no_error (error); 326 res = g_regex_match (regex, "foobar foobaz", 0, &match); 327@@ -1633,7 +1634,7 @@ test_lookbehind (void) 328 gint start, end; 329 330 error = NULL; 331- regex = g_regex_new ("(?<!foo)bar", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 332+ regex = g_regex_new ("(?<!foo)bar", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 333 g_assert (regex); 334 g_assert_no_error (error); 335 res = g_regex_match (regex, "foobar boobar", 0, &match); 336@@ -1648,7 +1649,7 @@ test_lookbehind (void) 337 g_regex_unref (regex); 338 339 error = NULL; 340- regex = g_regex_new ("(?<=bullock|donkey) poo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 341+ regex = g_regex_new ("(?<=bullock|donkey) poo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 342 g_assert (regex); 343 g_assert_no_error (error); 344 res = g_regex_match (regex, "don poo, and bullock poo", 0, &match); 345@@ -1661,17 +1662,17 @@ test_lookbehind (void) 346 g_match_info_free (match); 347 g_regex_unref (regex); 348 349- regex = g_regex_new ("(?<!dogs?|cats?) x", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 350+ regex = g_regex_new ("(?<!dogs?|cats?) x", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 351 g_assert (regex == NULL); 352 g_assert_error (error, G_REGEX_ERROR, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND); 353 g_clear_error (&error); 354 355- regex = g_regex_new ("(?<=ab(c|de)) foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 356+ regex = g_regex_new ("(?<=ab(c|de)) foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 357 g_assert (regex == NULL); 358 g_assert_error (error, G_REGEX_ERROR, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND); 359 g_clear_error (&error); 360 361- regex = g_regex_new ("(?<=abc|abde)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 362+ regex = g_regex_new ("(?<=abc|abde)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 363 g_assert (regex); 364 g_assert_no_error (error); 365 res = g_regex_match (regex, "abfoo, abdfoo, abcfoo", 0, &match); 366@@ -1683,7 +1684,7 @@ test_lookbehind (void) 367 g_match_info_free (match); 368 g_regex_unref (regex); 369 370- regex = g_regex_new ("^.*+(?<=abcd)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 371+ regex = g_regex_new ("^.*+(?<=abcd)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 372 g_assert (regex); 373 g_assert_no_error (error); 374 res = g_regex_match (regex, "abcabcabcabcabcabcabcabcabcd", 0, &match); 375@@ -1692,7 +1693,7 @@ test_lookbehind (void) 376 g_match_info_free (match); 377 g_regex_unref (regex); 378 379- regex = g_regex_new ("(?<=\\d{3})(?<!999)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 380+ regex = g_regex_new ("(?<=\\d{3})(?<!999)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 381 g_assert (regex); 382 g_assert_no_error (error); 383 res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match); 384@@ -1704,7 +1705,7 @@ test_lookbehind (void) 385 g_match_info_free (match); 386 g_regex_unref (regex); 387 388- regex = g_regex_new ("(?<=\\d{3}...)(?<!999)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 389+ regex = g_regex_new ("(?<=\\d{3}...)(?<!999)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 390 g_assert (regex); 391 g_assert_no_error (error); 392 res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match); 393@@ -1716,7 +1717,7 @@ test_lookbehind (void) 394 g_match_info_free (match); 395 g_regex_unref (regex); 396 397- regex = g_regex_new ("(?<=\\d{3}(?!999)...)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 398+ regex = g_regex_new ("(?<=\\d{3}(?!999)...)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 399 g_assert (regex); 400 g_assert_no_error (error); 401 res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match); 402@@ -1728,7 +1729,7 @@ test_lookbehind (void) 403 g_match_info_free (match); 404 g_regex_unref (regex); 405 406- regex = g_regex_new ("(?<=(?<!foo)bar)baz", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 407+ regex = g_regex_new ("(?<=(?<!foo)bar)baz", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 408 g_assert (regex); 409 g_assert_no_error (error); 410 res = g_regex_match (regex, "foobarbaz barfoobaz barbarbaz", 0, &match); 411@@ -1753,7 +1754,7 @@ test_subpattern (void) 412 gint start; 413 414 error = NULL; 415- regex = g_regex_new ("cat(aract|erpillar|)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 416+ regex = g_regex_new ("cat(aract|erpillar|)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 417 g_assert (regex); 418 g_assert_no_error (error); 419 g_assert_cmpint (g_regex_get_capture_count (regex), ==, 1); 420@@ -1771,7 +1772,7 @@ test_subpattern (void) 421 g_match_info_free (match); 422 g_regex_unref (regex); 423 424- regex = g_regex_new ("the ((red|white) (king|queen))", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 425+ regex = g_regex_new ("the ((red|white) (king|queen))", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 426 g_assert (regex); 427 g_assert_no_error (error); 428 g_assert_cmpint (g_regex_get_capture_count (regex), ==, 3); 429@@ -1795,7 +1796,7 @@ test_subpattern (void) 430 g_match_info_free (match); 431 g_regex_unref (regex); 432 433- regex = g_regex_new ("the ((?:red|white) (king|queen))", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 434+ regex = g_regex_new ("the ((?:red|white) (king|queen))", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 435 g_assert (regex); 436 g_assert_no_error (error); 437 res = g_regex_match (regex, "the white queen", 0, &match); 438@@ -1815,7 +1816,7 @@ test_subpattern (void) 439 g_match_info_free (match); 440 g_regex_unref (regex); 441 442- regex = g_regex_new ("(?|(Sat)(ur)|(Sun))day (morning|afternoon)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 443+ regex = g_regex_new ("(?|(Sat)(ur)|(Sun))day (morning|afternoon)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 444 g_assert (regex); 445 g_assert_no_error (error); 446 g_assert_cmpint (g_regex_get_capture_count (regex), ==, 3); 447@@ -1835,7 +1836,7 @@ test_subpattern (void) 448 g_match_info_free (match); 449 g_regex_unref (regex); 450 451- regex = g_regex_new ("(?|(abc)|(def))\\1", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 452+ regex = g_regex_new ("(?|(abc)|(def))\\1", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 453 g_assert (regex); 454 g_assert_no_error (error); 455 g_assert_cmpint (g_regex_get_max_backref (regex), ==, 1); 456@@ -1853,7 +1854,7 @@ test_subpattern (void) 457 g_match_info_free (match); 458 g_regex_unref (regex); 459 460- regex = g_regex_new ("(?|(abc)|(def))(?1)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 461+ regex = g_regex_new ("(?|(abc)|(def))(?1)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 462 g_assert (regex); 463 g_assert_no_error (error); 464 res = g_regex_match (regex, "abcabc abcdef defabc defdef", 0, &match); 465@@ -1870,7 +1871,7 @@ test_subpattern (void) 466 g_match_info_free (match); 467 g_regex_unref (regex); 468 469- regex = g_regex_new ("(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|(?<DN>Sat)(?:urday)?", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error); 470+ regex = g_regex_new ("(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|(?<DN>Sat)(?:urday)?", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error); 471 g_assert (regex); 472 g_assert_no_error (error); 473 res = g_regex_match (regex, "Mon Tuesday Wed Saturday", 0, &match); 474@@ -1897,7 +1898,7 @@ test_subpattern (void) 475 g_match_info_free (match); 476 g_regex_unref (regex); 477 478- regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error); 479+ regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error); 480 g_assert (regex); 481 g_assert_no_error (error); 482 res = g_regex_match (regex, "aaaaaaaaaaaaaaaa", 0, &match); 483@@ -1921,7 +1922,7 @@ test_condition (void) 484 gboolean res; 485 486 error = NULL; 487- regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 488+ regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 489 g_assert (regex); 490 g_assert_no_error (error); 491 res = g_regex_match (regex, "a(zzzzzz)b", 0, &match); 492@@ -1935,7 +1936,7 @@ test_condition (void) 493 g_regex_unref (regex); 494 495 error = NULL; 496- regex = g_regex_new ("^(a+)(?<OPEN>\\()?[^()]+(?(<OPEN>)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 497+ regex = g_regex_new ("^(a+)(?<OPEN>\\()?[^()]+(?(<OPEN>)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 498 g_assert (regex); 499 g_assert_no_error (error); 500 res = g_regex_match (regex, "a(zzzzzz)b", 0, &match); 501@@ -1948,7 +1949,7 @@ test_condition (void) 502 g_match_info_free (match); 503 g_regex_unref (regex); 504 505- regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 506+ regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 507 g_assert (regex); 508 g_assert_no_error (error); 509 res = g_regex_match (regex, "a[zzzzzz]b", 0, &match); 510@@ -1963,7 +1964,7 @@ test_condition (void) 511 512 regex = g_regex_new ("(?(DEFINE) (?<byte> 2[0-4]\\d | 25[0-5] | 1\\d\\d | [1-9]?\\d) )" 513 "\\b (?&byte) (\\.(?&byte)){3} \\b", 514- G_REGEX_EXTENDED, 0, &error); 515+ G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error); 516 g_assert (regex); 517 g_assert_no_error (error); 518 res = g_regex_match (regex, "128.0.0.1", 0, &match); 519@@ -1982,7 +1983,7 @@ test_condition (void) 520 521 regex = g_regex_new ("^(?(?=[^a-z]*[a-z])" 522 "\\d{2}-[a-z]{3}-\\d{2} | \\d{2}-\\d{2}-\\d{2} )$", 523- G_REGEX_EXTENDED, 0, &error); 524+ G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error); 525 g_assert (regex); 526 g_assert_no_error (error); 527 res = g_regex_match (regex, "01-abc-24", 0, &match); 528@@ -2015,7 +2016,7 @@ test_recursion (void) 529 gint start; 530 531 error = NULL; 532- regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 533+ regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 534 g_assert (regex); 535 g_assert_no_error (error); 536 res = g_regex_match (regex, "(middle)", 0, &match); 537@@ -2032,7 +2033,7 @@ test_recursion (void) 538 g_match_info_free (match); 539 g_regex_unref (regex); 540 541- regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 542+ regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 543 g_assert (regex); 544 g_assert_no_error (error); 545 res = g_regex_match (regex, "((((((((((((((((middle))))))))))))))))", 0, &match); 546@@ -2045,7 +2046,7 @@ test_recursion (void) 547 g_match_info_free (match); 548 g_regex_unref (regex); 549 550- regex = g_regex_new ("^(?<pn> \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 551+ regex = g_regex_new ("^(?<pn> \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 552 g_assert (regex); 553 g_assert_no_error (error); 554 g_regex_match (regex, "(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()", 0, &match); 555@@ -2054,7 +2055,7 @@ test_recursion (void) 556 g_match_info_free (match); 557 g_regex_unref (regex); 558 559- regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 560+ regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error); 561 g_assert (regex); 562 g_assert_no_error (error); 563 res = g_regex_match (regex, "<ab<01<23<4>>>>", 0, &match); 564@@ -2073,7 +2074,7 @@ test_recursion (void) 565 g_match_info_free (match); 566 g_regex_unref (regex); 567 568- regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 569+ regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 570 g_assert (regex); 571 g_assert_no_error (error); 572 res = g_regex_match (regex, "abcdcba", 0, &match); 573@@ -2086,7 +2087,7 @@ test_recursion (void) 574 g_match_info_free (match); 575 g_regex_unref (regex); 576 577- regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error); 578+ regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error); 579 g_assert (regex); 580 g_assert_no_error (error); 581 res = g_regex_match (regex, "abcdcba", 0, &match); 582@@ -2099,7 +2100,7 @@ test_recursion (void) 583 g_match_info_free (match); 584 g_regex_unref (regex); 585 586- regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error); 587+ regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_OPTIMIZE|G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error); 588 g_assert (regex); 589 g_assert_no_error (error); 590 res = g_regex_match (regex, "abcdcba", 0, &match); 591@@ -2219,26 +2220,18 @@ main (int argc, char *argv[]) 592 g_test_add_func ("/regex/compile-errors", test_compile_errors); 593 594 /* TEST_NEW(pattern, compile_opts, match_opts) */ 595-G_GNUC_BEGIN_IGNORE_DEPRECATIONS 596 TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL); 597-G_GNUC_END_IGNORE_DEPRECATIONS 598 TEST_NEW("", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); 599 TEST_NEW(".*", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); 600-G_GNUC_BEGIN_IGNORE_DEPRECATIONS 601 TEST_NEW(".*", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT); 602-G_GNUC_END_IGNORE_DEPRECATIONS 603 TEST_NEW(".*", G_REGEX_MULTILINE, G_REGEX_MATCH_DEFAULT); 604 TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_DEFAULT); 605 TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_NOTBOL); 606 TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); 607 TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT); 608-G_GNUC_BEGIN_IGNORE_DEPRECATIONS 609 TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_CASELESS | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT); 610-G_GNUC_END_IGNORE_DEPRECATIONS 611 TEST_NEW("(?P<A>x)|(?P<A>y)", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT); 612-G_GNUC_BEGIN_IGNORE_DEPRECATIONS 613 TEST_NEW("(?P<A>x)|(?P<A>y)", G_REGEX_DUPNAMES | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT); 614-G_GNUC_END_IGNORE_DEPRECATIONS 615 /* This gives "internal error: code overflow" with pcre 6.0 */ 616 TEST_NEW("(?i)(?-i)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); 617 TEST_NEW ("(?i)a", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); 618@@ -2249,9 +2242,7 @@ G_GNUC_END_IGNORE_DEPRECATIONS 619 TEST_NEW ("(?U)[a-z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT); 620 621 /* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */ 622-G_GNUC_BEGIN_IGNORE_DEPRECATIONS 623 TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0); 624-G_GNUC_END_IGNORE_DEPRECATIONS 625 TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0); 626 TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0); 627 TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0); 628@@ -2540,18 +2531,35 @@ G_GNUC_END_IGNORE_DEPRECATIONS 629 TEST_MATCH_COUNT("(a)?(b)", "b", 0, 0, 3); 630 TEST_MATCH_COUNT("(a)?(b)", "ab", 0, 0, 3); 631 632- /* TEST_PARTIAL(pattern, string, expected) */ 633- TEST_PARTIAL("^ab", "a", TRUE); 634- TEST_PARTIAL("^ab", "xa", FALSE); 635- TEST_PARTIAL("ab", "xa", TRUE); 636- TEST_PARTIAL("ab", "ab", FALSE); /* normal match. */ 637- TEST_PARTIAL("a+b", "aa", TRUE); 638- TEST_PARTIAL("(a)+b", "aa", TRUE); 639- TEST_PARTIAL("a?b", "a", TRUE); 640- 641- /* Test soft vs. hard partial matching */ 642- TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_SOFT, FALSE); 643- TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_HARD, TRUE); 644+ /* TEST_PARTIAL(pattern, string, expected), no JIT */ 645+ TEST_PARTIAL("^ab", "a", G_REGEX_DEFAULT, TRUE); 646+ TEST_PARTIAL("^ab", "xa", G_REGEX_DEFAULT, FALSE); 647+ TEST_PARTIAL("ab", "xa", G_REGEX_DEFAULT, TRUE); 648+ TEST_PARTIAL("ab", "ab", G_REGEX_DEFAULT, FALSE); /* normal match. */ 649+ TEST_PARTIAL("a+b", "aa", G_REGEX_DEFAULT, TRUE); 650+ TEST_PARTIAL("(a)+b", "aa", G_REGEX_DEFAULT, TRUE); 651+ TEST_PARTIAL("a?b", "a", G_REGEX_DEFAULT, TRUE); 652+ 653+ /* TEST_PARTIAL(pattern, string, expected) with JIT */ 654+ TEST_PARTIAL("^ab", "a", G_REGEX_OPTIMIZE, TRUE); 655+ TEST_PARTIAL("^ab", "xa", G_REGEX_OPTIMIZE, FALSE); 656+ TEST_PARTIAL("ab", "xa", G_REGEX_OPTIMIZE, TRUE); 657+ TEST_PARTIAL("ab", "ab", G_REGEX_OPTIMIZE, FALSE); /* normal match. */ 658+ TEST_PARTIAL("a+b", "aa", G_REGEX_OPTIMIZE, TRUE); 659+ TEST_PARTIAL("(a)+b", "aa", G_REGEX_OPTIMIZE, TRUE); 660+ TEST_PARTIAL("a?b", "a", G_REGEX_OPTIMIZE, TRUE); 661+ 662+ /* Test soft vs. hard partial matching, no JIT */ 663+ TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_DEFAULT, G_REGEX_MATCH_PARTIAL_SOFT, FALSE); 664+ TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_DEFAULT, G_REGEX_MATCH_PARTIAL_HARD, TRUE); 665+ TEST_PARTIAL_FULL("ab+", "ab", G_REGEX_DEFAULT, G_REGEX_MATCH_PARTIAL_SOFT, FALSE); 666+ TEST_PARTIAL_FULL("ab+", "ab", G_REGEX_DEFAULT, G_REGEX_MATCH_PARTIAL_HARD, TRUE); 667+ 668+ /* Test soft vs. hard partial matching with JIT */ 669+ TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_OPTIMIZE, G_REGEX_MATCH_PARTIAL_SOFT, FALSE); 670+ TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_OPTIMIZE, G_REGEX_MATCH_PARTIAL_HARD, TRUE); 671+ TEST_PARTIAL_FULL("ab+", "ab", G_REGEX_OPTIMIZE, G_REGEX_MATCH_PARTIAL_SOFT, FALSE); 672+ TEST_PARTIAL_FULL("ab+", "ab", G_REGEX_OPTIMIZE, G_REGEX_MATCH_PARTIAL_HARD, TRUE); 673 674 /* TEST_SUB_PATTERN(pattern, string, start_position, sub_n, expected_sub, 675 * expected_start, expected_end) */ 676-- 677GitLab 678 679