1From bec68b2d74853de5e23ee40c890433fa336ffbc5 Mon Sep 17 00:00:00 2001 2From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net> 3Date: Fri, 9 Sep 2022 18:30:15 +0200 4Subject: [PATCH] glib/regex: Do not use JIT when using unsupported match 5 options 6 7Do not store jit status for regex unless during initial compilation. 8After that, decide whether to use it depending on matching options. 9 10In fact there are some matching options that are incompatible with JIT, 11as the PCRE2 docs states: 12 13 Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not 14 supported by the just-in-time (JIT) compiler. If it is set, JIT 15 matching is disabled and the interpretive code in pcre2_match() is 16 run. Apart from PCRE2_NO_JIT (obviously), the remaining options are 17 supported for JIT matching. 18 19Fixes: GNOME/gtksourceview#283 20--- 21 glib/gregex.c | 38 ++++++++++++++++--------- 22 glib/tests/regex.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++ 23 2 files changed, 94 insertions(+), 13 deletions(-) 24 25diff --git a/glib/gregex.c b/glib/gregex.c 26index fe7473e628..220a1a11ac 100644 27--- a/glib/gregex.c 28+++ b/glib/gregex.c 29@@ -201,6 +201,13 @@ 30 PCRE2_NEWLINE_CRLF | \ 31 PCRE2_NEWLINE_ANYCRLF) 32 33+/* Some match options are not supported when using JIT as stated in the 34+ * pcre2jit man page under the 芦UNSUPPORTED OPTIONS AND PATTERN ITEMS禄 section: 35+ * https://www.pcre.org/current/doc/html/pcre2jit.html#SEC5 36+ */ 37+#define G_REGEX_PCRE2_JIT_UNSUPPORTED_OPTIONS (PCRE2_ANCHORED | \ 38+ PCRE2_ENDANCHORED) 39+ 40 #define G_REGEX_COMPILE_NEWLINE_MASK (G_REGEX_NEWLINE_CR | \ 41 G_REGEX_NEWLINE_LF | \ 42 G_REGEX_NEWLINE_CRLF | \ 43@@ -869,7 +876,7 @@ recalc_match_offsets (GMatchInfo *match_info, 44 return TRUE; 45 } 46 47-static void 48+static JITStatus 49 enable_jit_with_match_options (GRegex *regex, 50 uint32_t match_options) 51 { 52@@ -877,9 +884,13 @@ enable_jit_with_match_options (GRegex *regex, 53 uint32_t old_jit_options, new_jit_options; 54 55 if (!(regex->orig_compile_opts & G_REGEX_OPTIMIZE)) 56- return; 57+ return JIT_STATUS_DISABLED; 58+ 59 if (regex->jit_status == JIT_STATUS_DISABLED) 60- return; 61+ return JIT_STATUS_DISABLED; 62+ 63+ if (match_options & G_REGEX_PCRE2_JIT_UNSUPPORTED_OPTIONS) 64+ return JIT_STATUS_DISABLED; 65 66 old_jit_options = regex->jit_options; 67 new_jit_options = old_jit_options | PCRE2_JIT_COMPLETE; 68@@ -890,34 +901,34 @@ enable_jit_with_match_options (GRegex *regex, 69 70 /* no new options enabled */ 71 if (new_jit_options == old_jit_options) 72- return; 73+ return regex->jit_status; 74 75 retval = pcre2_jit_compile (regex->pcre_re, new_jit_options); 76 switch (retval) 77 { 78 case 0: /* JIT enabled successfully */ 79- regex->jit_status = JIT_STATUS_ENABLED; 80 regex->jit_options = new_jit_options; 81- break; 82+ return JIT_STATUS_ENABLED; 83 case PCRE2_ERROR_NOMEMORY: 84 g_debug ("JIT compilation was requested with G_REGEX_OPTIMIZE, " 85 "but JIT was unable to allocate executable memory for the " 86 "compiler. Falling back to interpretive code."); 87- regex->jit_status = JIT_STATUS_DISABLED; 88- break; 89+ return JIT_STATUS_DISABLED; 90 case PCRE2_ERROR_JIT_BADOPTION: 91 g_debug ("JIT compilation was requested with G_REGEX_OPTIMIZE, " 92 "but JIT support is not available. Falling back to " 93 "interpretive code."); 94- regex->jit_status = JIT_STATUS_DISABLED; 95+ return JIT_STATUS_DISABLED; 96 break; 97 default: 98 g_debug ("JIT compilation was requested with G_REGEX_OPTIMIZE, " 99 "but request for JIT support had unexpectedly failed (error %d). " 100 "Falling back to interpretive code.", retval); 101- regex->jit_status = JIT_STATUS_DISABLED; 102+ return JIT_STATUS_DISABLED; 103 break; 104 } 105+ 106+ return regex->jit_status; 107 } 108 109 /** 110@@ -1039,6 +1050,7 @@ gboolean 111 g_match_info_next (GMatchInfo *match_info, 112 GError **error) 113 { 114+ JITStatus jit_status; 115 gint prev_match_start; 116 gint prev_match_end; 117 uint32_t opts; 118@@ -1060,8 +1072,8 @@ g_match_info_next (GMatchInfo *match_info, 119 120 opts = match_info->regex->match_opts | match_info->match_opts; 121 122- enable_jit_with_match_options (match_info->regex, opts); 123- if (match_info->regex->jit_status == JIT_STATUS_ENABLED) 124+ jit_status = enable_jit_with_match_options (match_info->regex, opts); 125+ if (jit_status == JIT_STATUS_ENABLED) 126 { 127 match_info->matches = pcre2_jit_match (match_info->regex->pcre_re, 128 (PCRE2_SPTR8) match_info->string, 129@@ -1727,7 +1739,7 @@ g_regex_new (const gchar *pattern, 130 regex->orig_compile_opts = compile_options; 131 regex->match_opts = pcre_match_options; 132 regex->orig_match_opts = match_options; 133- enable_jit_with_match_options (regex, regex->match_opts); 134+ regex->jit_status = enable_jit_with_match_options (regex, regex->match_opts); 135 136 return regex; 137 } 138diff --git a/glib/tests/regex.c b/glib/tests/regex.c 139index 26844d63a7..2052ba0204 100644 140--- a/glib/tests/regex.c 141+++ b/glib/tests/regex.c 142@@ -2334,6 +2334,67 @@ test_compile_errors (void) 143 g_clear_error (&error); 144 } 145 146+static void 147+test_jit_unsupported_matching_options (void) 148+{ 149+ GRegex *regex; 150+ GMatchInfo *info; 151+ gchar *substring; 152+ 153+ regex = g_regex_new ("(\\w+)#(\\w+)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, NULL); 154+ 155+ g_assert_true (g_regex_match (regex, "aa#bb cc#dd", G_REGEX_MATCH_DEFAULT, &info)); 156+ g_assert_cmpint (g_match_info_get_match_count (info), ==, 3); 157+ substring = g_match_info_fetch (info, 1); 158+ g_assert_cmpstr (substring, ==, "aa"); 159+ g_clear_pointer (&substring, g_free); 160+ substring = g_match_info_fetch (info, 2); 161+ g_assert_cmpstr (substring, ==, "bb"); 162+ g_clear_pointer (&substring, g_free); 163+ g_assert_true (g_match_info_next (info, NULL)); 164+ g_assert_cmpint (g_match_info_get_match_count (info), ==, 3); 165+ substring = g_match_info_fetch (info, 1); 166+ g_assert_cmpstr (substring, ==, "cc"); 167+ g_clear_pointer (&substring, g_free); 168+ substring = g_match_info_fetch (info, 2); 169+ g_assert_cmpstr (substring, ==, "dd"); 170+ g_clear_pointer (&substring, g_free); 171+ g_assert_false (g_match_info_next (info, NULL)); 172+ g_match_info_free (info); 173+ 174+ g_assert_true (g_regex_match (regex, "aa#bb cc#dd", G_REGEX_MATCH_ANCHORED, &info)); 175+ g_assert_cmpint (g_match_info_get_match_count (info), ==, 3); 176+ substring = g_match_info_fetch (info, 1); 177+ g_assert_cmpstr (substring, ==, "aa"); 178+ g_clear_pointer (&substring, g_free); 179+ substring = g_match_info_fetch (info, 2); 180+ g_assert_cmpstr (substring, ==, "bb"); 181+ g_clear_pointer (&substring, g_free); 182+ g_assert_false (g_match_info_next (info, NULL)); 183+ g_match_info_free (info); 184+ 185+ g_assert_true (g_regex_match (regex, "aa#bb cc#dd", G_REGEX_MATCH_DEFAULT, &info)); 186+ g_assert_cmpint (g_match_info_get_match_count (info), ==, 3); 187+ substring = g_match_info_fetch (info, 1); 188+ g_assert_cmpstr (substring, ==, "aa"); 189+ g_clear_pointer (&substring, g_free); 190+ substring = g_match_info_fetch (info, 2); 191+ g_assert_cmpstr (substring, ==, "bb"); 192+ g_clear_pointer (&substring, g_free); 193+ g_assert_true (g_match_info_next (info, NULL)); 194+ g_assert_cmpint (g_match_info_get_match_count (info), ==, 3); 195+ substring = g_match_info_fetch (info, 1); 196+ g_assert_cmpstr (substring, ==, "cc"); 197+ g_clear_pointer (&substring, g_free); 198+ substring = g_match_info_fetch (info, 2); 199+ g_assert_cmpstr (substring, ==, "dd"); 200+ g_clear_pointer (&substring, g_free); 201+ g_assert_false (g_match_info_next (info, NULL)); 202+ g_match_info_free (info); 203+ 204+ g_regex_unref (regex); 205+} 206+ 207 int 208 main (int argc, char *argv[]) 209 { 210@@ -2352,6 +2413,7 @@ main (int argc, char *argv[]) 211 g_test_add_func ("/regex/explicit-crlf", test_explicit_crlf); 212 g_test_add_func ("/regex/max-lookbehind", test_max_lookbehind); 213 g_test_add_func ("/regex/compile-errors", test_compile_errors); 214+ g_test_add_func ("/regex/jit-unsupported-matching", test_jit_unsupported_matching_options); 215 216 /* TEST_NEW(pattern, compile_opts, match_opts) */ 217 TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL); 218@@ -2488,6 +2550,7 @@ main (int argc, char *argv[]) 219 TEST_MATCH_SIMPLE("a", "ab", 0, G_REGEX_MATCH_ANCHORED, TRUE); 220 TEST_MATCH_SIMPLE("a", "a", G_REGEX_CASELESS, 0, TRUE); 221 TEST_MATCH_SIMPLE("a", "A", G_REGEX_CASELESS, 0, TRUE); 222+ TEST_MATCH_SIMPLE("\\C\\C", "ab", G_REGEX_OPTIMIZE | G_REGEX_RAW, 0, TRUE); 223 /* These are needed to test extended properties. */ 224 TEST_MATCH_SIMPLE(AGRAVE, AGRAVE, G_REGEX_CASELESS, 0, TRUE); 225 TEST_MATCH_SIMPLE(AGRAVE, AGRAVE_UPPER, G_REGEX_CASELESS, 0, TRUE); 226@@ -2947,6 +3010,12 @@ main (int argc, char *argv[]) 227 TEST_REPLACE("\\S+", "hello world", 0, "\\U-\\0-", "-HELLO- -WORLD-"); 228 TEST_REPLACE(".", "a", 0, "\\A", NULL); 229 TEST_REPLACE(".", "a", 0, "\\g", NULL); 230+ TEST_REPLACE_OPTIONS("(\\w+)#(\\w+)", "aa#bb cc#dd", 0, "\\2#\\1", "bb#aa dd#cc", 231+ G_REGEX_OPTIMIZE|G_REGEX_MULTILINE|G_REGEX_CASELESS, 232+ 0); 233+ TEST_REPLACE_OPTIONS("(\\w+)#(\\w+)", "aa#bb cc#dd", 0, "\\2#\\1", "bb#aa cc#dd", 234+ G_REGEX_OPTIMIZE|G_REGEX_MULTILINE|G_REGEX_CASELESS, 235+ G_REGEX_MATCH_ANCHORED); 236 237 /* TEST_REPLACE_LIT(pattern, string, start_position, replacement, expected) */ 238 TEST_REPLACE_LIT("a", "ababa", 0, "A", "AbAbA"); 239-- 240GitLab 241 242