Lines Matching +full:no +full:- +full:useless +full:- +full:escape
1 /* GRegex -- regular expression API wrapper around PCRE.
5 * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org>
39 * @title: Perl-compatible regular expressions
41 * @see_also: [Regular expression syntax][glib-regex-syntax]
61 * to these functions must be encoded in UTF-8. The lengths and the positions
64 * single character. If you set #G_REGEX_RAW the strings can be non-valid
65 * UTF-8 strings and a byte is treated as a character, so "\xc3\xa0" is two
70 * sequence use "\R". This particular group matches either the two-character
90 * is a syntax error; the '\x' escape only allows 0 to 2 hexadecimal digits, and
91 * you must use the '\u' escape sequence with 4 hex digits to specify a unicode
104 * The regular expressions low-level functionalities are obtained through
154 /* if the string is in UTF-8 use g_utf8_ functions, else use
155 * use just +/- 1. */
156 #define NEXT_CHAR(re, s) (((re)->compile_opts & G_REGEX_RAW) ? \
159 #define PREV_CHAR(re, s) (((re)->compile_opts & G_REGEX_RAW) ? \
160 ((s) - 1) : \
607 * and that some PCRE errors are useless for us. in translate_compile_error()
633 *errmsg = _("invalid escape sequence in character class"); in translate_compile_error()
646 *errmsg = _("unrecognized character after (? or (?-"); in translate_compile_error()
655 *errmsg = _("reference to non-existent subpattern"); in translate_compile_error()
692 /* translators: '(?R' and '(?[+-]digits' are both meant as (groups of) in translate_compile_error()
693 * sequences here, '(?-54' would be an example for the second group. in translate_compile_error()
695 *errmsg = _("(?R or (?[+-]digits must be followed by )"); in translate_compile_error()
751 case 153: /* internal error: previously-checked referenced subpattern not found */ in translate_compile_error()
753 *errmsg = _("previously-checked referenced subpattern not found"); in translate_compile_error()
762 *errmsg = _("\\g is not followed by a braced, angle-bracketed, or quoted name or " in translate_compile_error()
796 *errmsg = _("\\k is not followed by a braced, angle-bracketed, or quoted name"); in translate_compile_error()
823 case 144: /* invalid UTF-8 string */ in translate_compile_error()
827 case 174: /* invalid UTF-16 string */ in translate_compile_error()
828 /* These errors should not happen as we are using an UTF-8 and UCP-enabled PCRE in translate_compile_error()
857 match_info->ref_count = 1; in match_info_new()
858 match_info->regex = g_regex_ref ((GRegex *)regex); in match_info_new()
859 match_info->string = string; in match_info_new()
860 match_info->string_len = string_len; in match_info_new()
861 match_info->matches = PCRE2_ERROR_NOMATCH; in match_info_new()
862 match_info->pos = start_position; in match_info_new()
863 match_info->match_opts = match_options; in match_info_new()
869 match_info->n_offsets = 24; in match_info_new()
870 match_info->n_workspace = 100; in match_info_new()
871 match_info->workspace = g_new (gint, match_info->n_workspace); in match_info_new()
876 pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT, in match_info_new()
878 match_info->n_offsets = (capture_count + 1) * 3; in match_info_new()
881 match_info->offsets = g_new0 (gint, match_info->n_offsets); in match_info_new()
883 match_info->offsets[0] = -1; in match_info_new()
884 match_info->offsets[1] = -1; in match_info_new()
886 match_info->match_data = pcre2_match_data_create_from_pattern ( in match_info_new()
887 match_info->regex->pcre_re, in match_info_new()
909 return match_info->regex; in g_match_info_get_regex()
928 return match_info->string; in g_match_info_get_string()
945 g_atomic_int_inc (&match_info->ref_count); in g_match_info_ref()
961 if (g_atomic_int_dec_and_test (&match_info->ref_count)) in g_match_info_unref()
963 g_regex_unref (match_info->regex); in g_match_info_unref()
964 if (match_info->match_data) in g_match_info_unref()
965 pcre2_match_data_free (match_info->match_data); in g_match_info_unref()
966 g_free (match_info->offsets); in g_match_info_unref()
967 g_free (match_info->workspace); in g_match_info_unref()
1018 g_return_val_if_fail (match_info->pos >= 0, FALSE); in g_match_info_next()
1020 prev_match_start = match_info->offsets[0]; in g_match_info_next()
1021 prev_match_end = match_info->offsets[1]; in g_match_info_next()
1023 if (match_info->pos > match_info->string_len) in g_match_info_next()
1026 match_info->pos = -1; in g_match_info_next()
1027 match_info->matches = PCRE2_ERROR_NOMATCH; in g_match_info_next()
1031 opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts); in g_match_info_next()
1032 match_info->matches = pcre2_match (match_info->regex->pcre_re, in g_match_info_next()
1033 (PCRE2_SPTR)match_info->string, in g_match_info_next()
1034 match_info->string_len, in g_match_info_next()
1035 match_info->pos, in g_match_info_next()
1037 match_info->match_data, in g_match_info_next()
1040 if (IS_PCRE_ERROR (match_info->matches)) in g_match_info_next()
1044 match_info->regex->pattern, match_error (match_info->matches)); in g_match_info_next()
1049 match_info->n_offsets = pcre2_get_ovector_count (match_info->match_data) * 2; in g_match_info_next()
1050 ovector = pcre2_get_ovector_pointer (match_info->match_data); in g_match_info_next()
1051 match_info->offsets = g_realloc_n (match_info->offsets, in g_match_info_next()
1052 match_info->n_offsets, in g_match_info_next()
1054 for (i = 0; i < match_info->n_offsets; i++) in g_match_info_next()
1056 match_info->offsets[i] = (int) ovector[i]; in g_match_info_next()
1062 if (match_info->pos == match_info->offsets[1]) in g_match_info_next()
1064 if (match_info->pos > match_info->string_len) in g_match_info_next()
1067 match_info->pos = -1; in g_match_info_next()
1068 match_info->matches = PCRE2_ERROR_NOMATCH; in g_match_info_next()
1072 match_info->pos = NEXT_CHAR (match_info->regex, in g_match_info_next()
1073 &match_info->string[match_info->pos]) - in g_match_info_next()
1074 match_info->string; in g_match_info_next()
1078 match_info->pos = match_info->offsets[1]; in g_match_info_next()
1082 * empty strings, for instance if the pattern is "(?=[A-Z0-9])" and in g_match_info_next()
1084 * - search at position 0: match from 0 to 0 in g_match_info_next()
1085 * - search at position 1: match from 3 to 3 in g_match_info_next()
1086 * - search at position 3: match from 3 to 3 (duplicate) in g_match_info_next()
1087 * - search at position 4: match from 5 to 5 in g_match_info_next()
1088 * - search at position 5: match from 5 to 5 (duplicate) in g_match_info_next()
1089 * - search at position 6: no match -> stop in g_match_info_next()
1092 if (match_info->matches >= 0 && in g_match_info_next()
1093 prev_match_start == match_info->offsets[0] && in g_match_info_next()
1094 prev_match_end == match_info->offsets[1]) in g_match_info_next()
1100 return match_info->matches >= 0; in g_match_info_next()
1119 return match_info->matches >= 0; in g_match_info_matches()
1128 * has no substrings in it and 0 is returned if the match failed.
1135 * Returns: Number of matched substrings, or -1 if an error occurred
1142 g_return_val_if_fail (match_info, -1); in g_match_info_get_match_count()
1144 if (match_info->matches == PCRE2_ERROR_NOMATCH) in g_match_info_get_match_count()
1145 /* no match */ in g_match_info_get_match_count()
1147 else if (match_info->matches < PCRE2_ERROR_NOMATCH) in g_match_info_get_match_count()
1149 return -1; in g_match_info_get_match_count()
1152 return match_info->matches; in g_match_info_get_match_count()
1162 * distinguish this case from other cases in which there is no match.
1189 * The restrictions no longer apply.
1202 return match_info->matches == PCRE2_ERROR_PARTIAL; in g_match_info_is_partial_match()
1212 * references and escape sequences expanded. References refer to the last
1216 * The @string_to_expand must be UTF-8 encoded even if #G_REGEX_RAW was
1311 else if (start == -1) in g_match_info_fetch()
1314 match = g_strndup (&match_info->string[start], end - start); in g_match_info_fetch()
1334 * and @end_pos are set to -1 and %TRUE is returned.
1359 if (match_num >= match_info->matches) in g_match_info_fetch_pos()
1363 *start_pos = match_info->offsets[2 * match_num]; in g_match_info_fetch_pos()
1366 *end_pos = match_info->offsets[2 * match_num + 1]; in g_match_info_fetch_pos()
1385 if (!(match_info->regex->compile_opts & PCRE2_DUPNAMES)) in get_matched_substring_number()
1386 return pcre2_substring_number_from_name (match_info->regex->pcre_re, (PCRE2_SPTR)name); in get_matched_substring_number()
1389 entrysize = pcre2_substring_nametable_scan (match_info->regex->pcre_re, in get_matched_substring_number()
1400 if (match_info->offsets[n*2] >= 0) in get_matched_substring_number()
1457 * then @start_pos and @end_pos are set to -1 and %TRUE is returned.
1504 * Returns: (transfer full): a %NULL-terminated array of gchar *
1520 if (match_info->matches < 0) in g_match_info_fetch_all()
1523 result = g_new (gchar *, match_info->matches + 1); in g_match_info_fetch_all()
1524 for (i = 0; i < match_info->matches; i++) in g_match_info_fetch_all()
1534 G_DEFINE_QUARK (g-regex-error-quark, g_regex_error)
1550 g_atomic_int_inc (®ex->ref_count); in g_regex_ref()
1568 if (g_atomic_int_dec_and_test (®ex->ref_count)) in g_regex_unref()
1570 g_free (regex->pattern); in g_regex_unref()
1571 if (regex->pcre_re != NULL) in g_regex_unref()
1572 pcre2_code_free (regex->pcre_re); in g_regex_unref()
1643 regex->ref_count = 1; in g_regex_new()
1644 regex->pattern = g_strdup (pattern); in g_regex_new()
1645 regex->pcre_re = re; in g_regex_new()
1646 regex->compile_opts = compile_options; in g_regex_new()
1647 regex->match_opts = match_options; in g_regex_new()
1671 /* In GRegex the string are, by default, UTF-8 encoded. PCRE in regex_compile()
1672 * instead uses UTF-8 only if required with PCRE_UTF8. */ in regex_compile()
1675 /* disable utf-8 */ in regex_compile()
1680 /* enable utf-8 */ in regex_compile()
1764 return regex->pattern; in g_regex_get_pattern()
1784 pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_BACKREFMAX, &value); in g_regex_get_max_backref()
1804 pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT, &value); in g_regex_get_capture_count()
1824 pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_HASCRORLF, &value); in g_regex_get_has_cr_or_lf()
1834 * pattern. This information is useful when doing multi-segment matching using
1846 pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_MAXLOOKBEHIND, in g_regex_get_max_lookbehind()
1860 * top-level within the compiled pattern.
1871 return map_to_pcre1_compile_flags (regex->compile_opts); in g_regex_get_compile_flags()
1889 return map_to_pcre1_match_flags (regex->match_opts & G_REGEX_MATCH_MASK); in g_regex_get_match_flags()
1929 result = g_regex_match_full (regex, string, -1, 0, match_options, NULL, NULL); in g_regex_match_simple()
1947 * Unless %G_REGEX_RAW is specified in the options, @string must be valid UTF-8.
1954 * To retrieve all the non-overlapping matches of the pattern in
1957 * |[<!-- language="C" -->
1961 * // Print all uppercase-only words.
1965 * regex = g_regex_new ("[A-Z]+", 0, 0, NULL);
1995 return g_regex_match_full (regex, string, -1, 0, match_options, in g_regex_match()
2003 * @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated
2019 * Unless %G_REGEX_RAW is specified in the options, @string must be valid UTF-8.
2031 * To retrieve all the non-overlapping matches of the pattern in
2034 * |[<!-- language="C" -->
2038 * // Print all uppercase-only words.
2043 * regex = g_regex_new ("[A-Z]+", 0, 0, NULL);
2044 * g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error);
2056 * g_printerr ("Error while matching: %s\n", error->message);
2132 return g_regex_match_all_full (regex, string, -1, 0, match_options, in g_regex_match_all()
2140 * @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated
2174 * Unless %G_REGEX_RAW is specified in the options, @string must be valid UTF-8.
2221 pcre_re = regex_compile (regex->pattern, in g_regex_match_all_full()
2222 regex->compile_opts | PCRE2_NO_AUTO_POSSESS, in g_regex_match_all_full()
2229 pcre_re = regex->pcre_re; in g_regex_match_all_full()
2239 info->matches = pcre2_dfa_match (pcre_re, in g_regex_match_all_full()
2240 (PCRE2_SPTR)info->string, info->string_len, in g_regex_match_all_full()
2241 info->pos, in g_regex_match_all_full()
2243 info->match_data, in g_regex_match_all_full()
2245 info->workspace, info->n_workspace); in g_regex_match_all_full()
2247 info->n_offsets = pcre2_get_ovector_count (info->match_data) * 2; in g_regex_match_all_full()
2248 ovector = pcre2_get_ovector_pointer (info->match_data); in g_regex_match_all_full()
2249 info->offsets = g_realloc (info->offsets, in g_regex_match_all_full()
2250 info->n_offsets * sizeof (gint)); in g_regex_match_all_full()
2251 for (i = 0; i < info->n_offsets; i++) in g_regex_match_all_full()
2253 info->offsets[i] = (int) ovector[i]; in g_regex_match_all_full()
2256 if (info->matches == PCRE2_ERROR_DFA_WSSIZE) in g_regex_match_all_full()
2258 /* info->workspace is too small. */ in g_regex_match_all_full()
2259 info->n_workspace *= 2; in g_regex_match_all_full()
2260 info->workspace = g_realloc (info->workspace, in g_regex_match_all_full()
2261 info->n_workspace * sizeof (gint)); in g_regex_match_all_full()
2264 else if (info->matches == 0) in g_regex_match_all_full()
2266 /* info->offsets is too small. */ in g_regex_match_all_full()
2267 info->n_offsets *= 2; in g_regex_match_all_full()
2268 info->offsets = g_realloc (info->offsets, in g_regex_match_all_full()
2269 info->n_offsets * sizeof (gint)); in g_regex_match_all_full()
2272 else if (IS_PCRE_ERROR (info->matches)) in g_regex_match_all_full()
2276 regex->pattern, match_error (info->matches)); in g_regex_match_all_full()
2284 /* set info->pos to -1 so that a call to g_match_info_next() fails. */ in g_regex_match_all_full()
2285 info->pos = -1; in g_regex_match_all_full()
2286 retval = info->matches >= 0; in g_regex_match_all_full()
2303 * Returns: The number of the subexpression or -1 if @name
2314 g_return_val_if_fail (regex != NULL, -1); in g_regex_get_string_number()
2315 g_return_val_if_fail (name != NULL, -1); in g_regex_get_string_number()
2317 num = pcre2_substring_number_from_name (regex->pcre_re, (PCRE2_SPTR)name); in g_regex_get_string_number()
2319 num = -1; in g_regex_get_string_number()
2359 * Returns: (transfer full): a %NULL-terminated array of strings. Free
2380 result = g_regex_split_full (regex, string, -1, 0, match_options, 0, NULL); in g_regex_split_simple()
2409 * Returns: (transfer full): a %NULL-terminated gchar ** array. Free
2421 return g_regex_split_full (regex, string, -1, 0, in g_regex_split()
2429 * @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated
2458 * Returns: (transfer full): a %NULL-terminated gchar ** array. Free
2499 /* zero-length string */ in g_regex_split_full()
2500 if (string_len - start_position == 0) in g_regex_split_full()
2507 string_len - start_position); in g_regex_split_full()
2524 (match_info->offsets[0] == match_info->offsets[1]); in g_regex_split_full()
2530 if (last_separator_end != match_info->offsets[1]) in g_regex_split_full()
2536 match_info->offsets[0] - last_separator_end); in g_regex_split_full()
2552 /* if there was no match, copy to end of string. */ in g_regex_split_full()
2556 match_info->string_len - last_separator_end); in g_regex_split_full()
2559 /* no more tokens, end the loop. */ in g_regex_split_full()
2563 /* -1 to leave room for the last part. */ in g_regex_split_full()
2564 if (token_count >= max_tokens - 1) in g_regex_split_full()
2573 match_info->pos = PREV_CHAR (regex, &string[match_info->pos]) - string; in g_regex_split_full()
2576 * tokens, but we are at the end of the string, so there are no in g_regex_split_full()
2578 if (string_len > match_info->pos) in g_regex_split_full()
2580 gchar *token = g_strndup (string + match_info->pos, in g_regex_split_full()
2581 string_len - match_info->pos); in g_regex_split_full()
2588 last_separator_end = match_info->pos; in g_regex_split_full()
2593 last_separator_end = PREV_CHAR (regex, &string[last_separator_end]) - string; in g_regex_split_full()
2608 string_list[i++] = last->data; in g_regex_split_full()
2648 g_free (data->text); in free_interpolation_data()
2669 data->c = '\t'; in expand_escape()
2670 data->type = REPL_TYPE_CHARACTER; in expand_escape()
2674 data->c = '\n'; in expand_escape()
2675 data->type = REPL_TYPE_CHARACTER; in expand_escape()
2679 data->c = '\v'; in expand_escape()
2680 data->type = REPL_TYPE_CHARACTER; in expand_escape()
2684 data->c = '\r'; in expand_escape()
2685 data->type = REPL_TYPE_CHARACTER; in expand_escape()
2689 data->c = '\f'; in expand_escape()
2690 data->type = REPL_TYPE_CHARACTER; in expand_escape()
2694 data->c = '\a'; in expand_escape()
2695 data->type = REPL_TYPE_CHARACTER; in expand_escape()
2699 data->c = '\b'; in expand_escape()
2700 data->type = REPL_TYPE_CHARACTER; in expand_escape()
2704 data->c = '\\'; in expand_escape()
2705 data->type = REPL_TYPE_CHARACTER; in expand_escape()
2741 data->type = REPL_TYPE_STRING; in expand_escape()
2742 data->text = g_new0 (gchar, 8); in expand_escape()
2743 g_unichar_to_utf8 (x, data->text); in expand_escape()
2747 data->type = REPL_TYPE_CHANGE_CASE; in expand_escape()
2748 data->change_case = CHANGE_CASE_LOWER_SINGLE; in expand_escape()
2752 data->type = REPL_TYPE_CHANGE_CASE; in expand_escape()
2753 data->change_case = CHANGE_CASE_UPPER_SINGLE; in expand_escape()
2757 data->type = REPL_TYPE_CHANGE_CASE; in expand_escape()
2758 data->change_case = CHANGE_CASE_LOWER; in expand_escape()
2762 data->type = REPL_TYPE_CHANGE_CASE; in expand_escape()
2763 data->change_case = CHANGE_CASE_UPPER; in expand_escape()
2767 data->type = REPL_TYPE_CHANGE_CASE; in expand_escape()
2768 data->change_case = CHANGE_CASE_NONE; in expand_escape()
2788 if (p - q == 0) in expand_escape()
2790 error_detail = _("zero-length symbolic reference"); in expand_escape()
2809 data->num = x; in expand_escape()
2810 data->type = REPL_TYPE_NUMERIC_REFERENCE; in expand_escape()
2826 data->text = g_strndup (q, p - q); in expand_escape()
2827 data->type = REPL_TYPE_SYMBOLIC_REFERENCE; in expand_escape()
2871 data->type = REPL_TYPE_STRING; in expand_escape()
2872 data->text = g_new0 (gchar, 8); in expand_escape()
2873 g_unichar_to_utf8 (x, data->text); in expand_escape()
2877 data->type = REPL_TYPE_NUMERIC_REFERENCE; in expand_escape()
2878 data->num = d; in expand_escape()
2886 error_detail = _("unknown escape sequence"); in expand_escape()
2899 (gulong)(p - replacement), in expand_escape()
2935 if (p - start > 0) in split_replacement()
2938 data->text = g_strndup (start, p - start); in split_replacement()
2939 data->type = REPL_TYPE_STRING; in split_replacement()
2997 for (list = data; list; list = list->next) in interpolate_replacement()
2999 idata = list->data; in interpolate_replacement()
3000 switch (idata->type) in interpolate_replacement()
3003 string_append (result, idata->text, &change_case); in interpolate_replacement()
3006 g_string_append_c (result, CHANGE_CASE (idata->c, change_case)); in interpolate_replacement()
3011 match = g_match_info_fetch (match_info, idata->num); in interpolate_replacement()
3019 match = g_match_info_fetch_named (match_info, idata->text); in interpolate_replacement()
3027 change_case = idata->change_case; in interpolate_replacement()
3043 InterpolationData *data = list->data; in interpolation_list_needs_match()
3045 if (data->type == REPL_TYPE_SYMBOLIC_REFERENCE || in interpolation_list_needs_match()
3046 data->type == REPL_TYPE_NUMERIC_REFERENCE) in interpolation_list_needs_match()
3051 list = list->next; in interpolation_list_needs_match()
3061 * @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated
3070 * number-th captured subexpression of the match, '\g<name>' refers
3078 * - \l: Convert to lower case the next character
3079 * - \u: Convert to upper case the next character
3080 * - \L: Convert to lower case till \E
3081 * - \U: Convert to upper case till \E
3082 * - \E: End case modification
3086 * The @replacement string must be UTF-8 encoded even if #G_REGEX_RAW was
3087 * passed to g_regex_new(). If you want to use not UTF-8 encoded strings
3154 * @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated
3199 * @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated
3215 * |[<!-- language="C" -->
3246 * res = g_regex_replace_eval (reg, text, -1, 0, 0, eval_cb, h, NULL);
3292 match_info->offsets[0] - str_pos); in g_regex_replace_eval()
3294 str_pos = match_info->offsets[1]; in g_regex_replace_eval()
3305 g_string_append_len (result, string + str_pos, string_len - str_pos); in g_regex_replace_eval()
3317 * (see g_regex_replace()), i.e. that all escape sequences in
3356 * @string: the string to escape
3362 * For completeness, @length can be -1 for a nul-terminated string.
3365 * Returns: a newly-allocated escaped string
3395 g_string_append_len (escaped, piece_start, p - piece_start); in g_regex_escape_nul()
3417 g_string_append_len (escaped, piece_start, end - piece_start); in g_regex_escape_nul()
3424 * @string: (array length=length): the string to escape
3425 * @length: the length of @string, in bytes, or -1 if @string is nul-terminated
3435 * Returns: a newly-allocated escaped string
3476 g_string_append_len (escaped, piece_start, p - piece_start); in g_regex_escape_string()
3491 g_string_append_len (escaped, piece_start, end - piece_start); in g_regex_escape_string()