1 /* GRegex -- regular expression API wrapper around PCRE. 2 * 3 * Copyright (C) 1999, 2000 Scott Wimer 4 * Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com> 5 * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org> 6 * 7 * This library is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * This library is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public License 18 * along with this library; if not, see <http://www.gnu.org/licenses/>. 19 */ 20 21 #ifndef __G_REGEX_H__ 22 #define __G_REGEX_H__ 23 24 #if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION) 25 #error "Only <glib.h> can be included directly." 26 #endif 27 28 #include <glib/gerror.h> 29 #include <glib/gstring.h> 30 31 G_BEGIN_DECLS 32 33 /** 34 * GRegexError: 35 * @G_REGEX_ERROR_COMPILE: Compilation of the regular expression failed. 36 * @G_REGEX_ERROR_OPTIMIZE: Optimization of the regular expression failed. 37 * @G_REGEX_ERROR_REPLACE: Replacement failed due to an ill-formed replacement 38 * string. 39 * @G_REGEX_ERROR_MATCH: The match process failed. 40 * @G_REGEX_ERROR_INTERNAL: Internal error of the regular expression engine. 41 * Since 2.16 42 * @G_REGEX_ERROR_STRAY_BACKSLASH: "\\" at end of pattern. Since 2.16 43 * @G_REGEX_ERROR_MISSING_CONTROL_CHAR: "\\c" at end of pattern. Since 2.16 44 * @G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: Unrecognized character follows "\\". 45 * Since 2.16 46 * @G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: Numbers out of order in "{}" 47 * quantifier. Since 2.16 48 * @G_REGEX_ERROR_QUANTIFIER_TOO_BIG: Number too big in "{}" quantifier. 49 * Since 2.16 50 * @G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS: Missing terminating "]" for 51 * character class. Since 2.16 52 * @G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS: Invalid escape sequence 53 * in character class. Since 2.16 54 * @G_REGEX_ERROR_RANGE_OUT_OF_ORDER: Range out of order in character class. 55 * Since 2.16 56 * @G_REGEX_ERROR_NOTHING_TO_REPEAT: Nothing to repeat. Since 2.16 57 * @G_REGEX_ERROR_UNRECOGNIZED_CHARACTER: Unrecognized character after "(?", 58 * "(?<" or "(?P". Since 2.16 59 * @G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS: POSIX named classes are 60 * supported only within a class. Since 2.16 61 * @G_REGEX_ERROR_UNMATCHED_PARENTHESIS: Missing terminating ")" or ")" 62 * without opening "(". Since 2.16 63 * @G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE: Reference to non-existent 64 * subpattern. Since 2.16 65 * @G_REGEX_ERROR_UNTERMINATED_COMMENT: Missing terminating ")" after comment. 66 * Since 2.16 67 * @G_REGEX_ERROR_EXPRESSION_TOO_LARGE: Regular expression too large. 68 * Since 2.16 69 * @G_REGEX_ERROR_MEMORY_ERROR: Failed to get memory. Since 2.16 70 * @G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND: Lookbehind assertion is not 71 * fixed length. Since 2.16 72 * @G_REGEX_ERROR_MALFORMED_CONDITION: Malformed number or name after "(?(". 73 * Since 2.16 74 * @G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES: Conditional group contains 75 * more than two branches. Since 2.16 76 * @G_REGEX_ERROR_ASSERTION_EXPECTED: Assertion expected after "(?(". 77 * Since 2.16 78 * @G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME: Unknown POSIX class name. 79 * Since 2.16 80 * @G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED: POSIX collating 81 * elements are not supported. Since 2.16 82 * @G_REGEX_ERROR_HEX_CODE_TOO_LARGE: Character value in "\\x{...}" sequence 83 * is too large. Since 2.16 84 * @G_REGEX_ERROR_INVALID_CONDITION: Invalid condition "(?(0)". Since 2.16 85 * @G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND: \\C not allowed in 86 * lookbehind assertion. Since 2.16 87 * @G_REGEX_ERROR_INFINITE_LOOP: Recursive call could loop indefinitely. 88 * Since 2.16 89 * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR: Missing terminator 90 * in subpattern name. Since 2.16 91 * @G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME: Two named subpatterns have 92 * the same name. Since 2.16 93 * @G_REGEX_ERROR_MALFORMED_PROPERTY: Malformed "\\P" or "\\p" sequence. 94 * Since 2.16 95 * @G_REGEX_ERROR_UNKNOWN_PROPERTY: Unknown property name after "\\P" or 96 * "\\p". Since 2.16 97 * @G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG: Subpattern name is too long 98 * (maximum 32 characters). Since 2.16 99 * @G_REGEX_ERROR_TOO_MANY_SUBPATTERNS: Too many named subpatterns (maximum 100 * 10,000). Since 2.16 101 * @G_REGEX_ERROR_INVALID_OCTAL_VALUE: Octal value is greater than "\\377". 102 * Since 2.16 103 * @G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: "DEFINE" group contains more 104 * than one branch. Since 2.16 105 * @G_REGEX_ERROR_DEFINE_REPETION: Repeating a "DEFINE" group is not allowed. 106 * This error is never raised. Since: 2.16 Deprecated: 2.34 107 * @G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: Inconsistent newline options. 108 * Since 2.16 109 * @G_REGEX_ERROR_MISSING_BACK_REFERENCE: "\\g" is not followed by a braced, 110 * angle-bracketed, or quoted name or number, or by a plain number. Since: 2.16 111 * @G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: relative reference must not be zero. Since: 2.34 112 * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: the backtracing 113 * control verb used does not allow an argument. Since: 2.34 114 * @G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: unknown backtracing 115 * control verb. Since: 2.34 116 * @G_REGEX_ERROR_NUMBER_TOO_BIG: number is too big in escape sequence. Since: 2.34 117 * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: Missing subpattern name. Since: 2.34 118 * @G_REGEX_ERROR_MISSING_DIGIT: Missing digit. Since 2.34 119 * @G_REGEX_ERROR_INVALID_DATA_CHARACTER: In JavaScript compatibility mode, 120 * "[" is an invalid data character. Since: 2.34 121 * @G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: different names for subpatterns of the 122 * same number are not allowed. Since: 2.34 123 * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: the backtracing control 124 * verb requires an argument. Since: 2.34 125 * @G_REGEX_ERROR_INVALID_CONTROL_CHAR: "\\c" must be followed by an ASCII 126 * character. Since: 2.34 127 * @G_REGEX_ERROR_MISSING_NAME: "\\k" is not followed by a braced, angle-bracketed, or 128 * quoted name. Since: 2.34 129 * @G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: "\\N" is not supported in a class. Since: 2.34 130 * @G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: too many forward references. Since: 2.34 131 * @G_REGEX_ERROR_NAME_TOO_LONG: the name is too long in "(*MARK)", "(*PRUNE)", 132 * "(*SKIP)", or "(*THEN)". Since: 2.34 133 * @G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: the character value in the \\u sequence is 134 * too large. Since: 2.34 135 * 136 * Error codes returned by regular expressions functions. 137 * 138 * Since: 2.14 139 */ 140 typedef enum 141 { 142 G_REGEX_ERROR_COMPILE, 143 G_REGEX_ERROR_OPTIMIZE, 144 G_REGEX_ERROR_REPLACE, 145 G_REGEX_ERROR_MATCH, 146 G_REGEX_ERROR_INTERNAL, 147 148 /* These are the error codes from PCRE + 100 */ 149 G_REGEX_ERROR_STRAY_BACKSLASH = 101, 150 G_REGEX_ERROR_MISSING_CONTROL_CHAR = 102, 151 G_REGEX_ERROR_UNRECOGNIZED_ESCAPE = 103, 152 G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER = 104, 153 G_REGEX_ERROR_QUANTIFIER_TOO_BIG = 105, 154 G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS = 106, 155 G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS = 107, 156 G_REGEX_ERROR_RANGE_OUT_OF_ORDER = 108, 157 G_REGEX_ERROR_NOTHING_TO_REPEAT = 109, 158 G_REGEX_ERROR_UNRECOGNIZED_CHARACTER = 112, 159 G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS = 113, 160 G_REGEX_ERROR_UNMATCHED_PARENTHESIS = 114, 161 G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE = 115, 162 G_REGEX_ERROR_UNTERMINATED_COMMENT = 118, 163 G_REGEX_ERROR_EXPRESSION_TOO_LARGE = 120, 164 G_REGEX_ERROR_MEMORY_ERROR = 121, 165 G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND = 125, 166 G_REGEX_ERROR_MALFORMED_CONDITION = 126, 167 G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES = 127, 168 G_REGEX_ERROR_ASSERTION_EXPECTED = 128, 169 G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME = 130, 170 G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED = 131, 171 G_REGEX_ERROR_HEX_CODE_TOO_LARGE = 134, 172 G_REGEX_ERROR_INVALID_CONDITION = 135, 173 G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND = 136, 174 G_REGEX_ERROR_INFINITE_LOOP = 140, 175 G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR = 142, 176 G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME = 143, 177 G_REGEX_ERROR_MALFORMED_PROPERTY = 146, 178 G_REGEX_ERROR_UNKNOWN_PROPERTY = 147, 179 G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG = 148, 180 G_REGEX_ERROR_TOO_MANY_SUBPATTERNS = 149, 181 G_REGEX_ERROR_INVALID_OCTAL_VALUE = 151, 182 G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE = 154, 183 G_REGEX_ERROR_DEFINE_REPETION = 155, 184 G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS = 156, 185 G_REGEX_ERROR_MISSING_BACK_REFERENCE = 157, 186 G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE = 158, 187 G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN = 159, 188 G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB = 160, 189 G_REGEX_ERROR_NUMBER_TOO_BIG = 161, 190 G_REGEX_ERROR_MISSING_SUBPATTERN_NAME = 162, 191 G_REGEX_ERROR_MISSING_DIGIT = 163, 192 G_REGEX_ERROR_INVALID_DATA_CHARACTER = 164, 193 G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME = 165, 194 G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED = 166, 195 G_REGEX_ERROR_INVALID_CONTROL_CHAR = 168, 196 G_REGEX_ERROR_MISSING_NAME = 169, 197 G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS = 171, 198 G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES = 172, 199 G_REGEX_ERROR_NAME_TOO_LONG = 175, 200 G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176 201 } GRegexError; 202 203 /** 204 * G_REGEX_ERROR: 205 * 206 * Error domain for regular expressions. Errors in this domain will be 207 * from the #GRegexError enumeration. See #GError for information on 208 * error domains. 209 * 210 * Since: 2.14 211 */ 212 #define G_REGEX_ERROR g_regex_error_quark () 213 214 GLIB_AVAILABLE_IN_ALL 215 GQuark g_regex_error_quark (void); 216 217 /** 218 * GRegexCompileFlags: 219 * @G_REGEX_CASELESS: Letters in the pattern match both upper- and 220 * lowercase letters. This option can be changed within a pattern 221 * by a "(?i)" option setting. 222 * @G_REGEX_MULTILINE: By default, GRegex treats the strings as consisting 223 * of a single line of characters (even if it actually contains 224 * newlines). The "start of line" metacharacter ("^") matches only 225 * at the start of the string, while the "end of line" metacharacter 226 * ("$") matches only at the end of the string, or before a terminating 227 * newline (unless #G_REGEX_DOLLAR_ENDONLY is set). When 228 * #G_REGEX_MULTILINE is set, the "start of line" and "end of line" 229 * constructs match immediately following or immediately before any 230 * newline in the string, respectively, as well as at the very start 231 * and end. This can be changed within a pattern by a "(?m)" option 232 * setting. 233 * @G_REGEX_DOTALL: A dot metacharacter (".") in the pattern matches all 234 * characters, including newlines. Without it, newlines are excluded. 235 * This option can be changed within a pattern by a ("?s") option setting. 236 * @G_REGEX_EXTENDED: Whitespace data characters in the pattern are 237 * totally ignored except when escaped or inside a character class. 238 * Whitespace does not include the VT character (code 11). In addition, 239 * characters between an unescaped "#" outside a character class and 240 * the next newline character, inclusive, are also ignored. This can 241 * be changed within a pattern by a "(?x)" option setting. 242 * @G_REGEX_ANCHORED: The pattern is forced to be "anchored", that is, 243 * it is constrained to match only at the first matching point in the 244 * string that is being searched. This effect can also be achieved by 245 * appropriate constructs in the pattern itself such as the "^" 246 * metacharacter. 247 * @G_REGEX_DOLLAR_ENDONLY: A dollar metacharacter ("$") in the pattern 248 * matches only at the end of the string. Without this option, a 249 * dollar also matches immediately before the final character if 250 * it is a newline (but not before any other newlines). This option 251 * is ignored if #G_REGEX_MULTILINE is set. 252 * @G_REGEX_UNGREEDY: Inverts the "greediness" of the quantifiers so that 253 * they are not greedy by default, but become greedy if followed by "?". 254 * It can also be set by a "(?U)" option setting within the pattern. 255 * @G_REGEX_RAW: Usually strings must be valid UTF-8 strings, using this 256 * flag they are considered as a raw sequence of bytes. 257 * @G_REGEX_NO_AUTO_CAPTURE: Disables the use of numbered capturing 258 * parentheses in the pattern. Any opening parenthesis that is not 259 * followed by "?" behaves as if it were followed by "?:" but named 260 * parentheses can still be used for capturing (and they acquire numbers 261 * in the usual way). 262 * @G_REGEX_OPTIMIZE: Optimize the regular expression. If the pattern will 263 * be used many times, then it may be worth the effort to optimize it 264 * to improve the speed of matches. 265 * @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the 266 * first newline. Since: 2.34 267 * @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not 268 * be unique. This can be helpful for certain types of pattern when it 269 * is known that only one instance of the named subpattern can ever be 270 * matched. 271 * @G_REGEX_NEWLINE_CR: Usually any newline character or character sequence is 272 * recognized. If this option is set, the only recognized newline character 273 * is '\r'. 274 * @G_REGEX_NEWLINE_LF: Usually any newline character or character sequence is 275 * recognized. If this option is set, the only recognized newline character 276 * is '\n'. 277 * @G_REGEX_NEWLINE_CRLF: Usually any newline character or character sequence is 278 * recognized. If this option is set, the only recognized newline character 279 * sequence is '\r\n'. 280 * @G_REGEX_NEWLINE_ANYCRLF: Usually any newline character or character sequence 281 * is recognized. If this option is set, the only recognized newline character 282 * sequences are '\r', '\n', and '\r\n'. Since: 2.34 283 * @G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence 284 * is recognised. If this option is set, then "\R" only recognizes the newline 285 * characters '\r', '\n' and '\r\n'. Since: 2.34 286 * @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with 287 * JavaScript rather than PCRE. Since: 2.34 288 * 289 * Flags specifying compile-time options. 290 * 291 * Since: 2.14 292 */ 293 /* Remember to update G_REGEX_COMPILE_MASK in gregex.c after 294 * adding a new flag. 295 */ 296 typedef enum 297 { 298 G_REGEX_CASELESS = 1 << 0, 299 G_REGEX_MULTILINE = 1 << 1, 300 G_REGEX_DOTALL = 1 << 2, 301 G_REGEX_EXTENDED = 1 << 3, 302 G_REGEX_ANCHORED = 1 << 4, 303 G_REGEX_DOLLAR_ENDONLY = 1 << 5, 304 G_REGEX_UNGREEDY = 1 << 9, 305 G_REGEX_RAW = 1 << 11, 306 G_REGEX_NO_AUTO_CAPTURE = 1 << 12, 307 G_REGEX_OPTIMIZE = 1 << 13, 308 G_REGEX_FIRSTLINE = 1 << 18, 309 G_REGEX_DUPNAMES = 1 << 19, 310 G_REGEX_NEWLINE_CR = 1 << 20, 311 G_REGEX_NEWLINE_LF = 1 << 21, 312 G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF, 313 G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22, 314 G_REGEX_BSR_ANYCRLF = 1 << 23, 315 G_REGEX_JAVASCRIPT_COMPAT = 1 << 25 316 } GRegexCompileFlags; 317 318 /** 319 * GRegexMatchFlags: 320 * @G_REGEX_MATCH_ANCHORED: The pattern is forced to be "anchored", that is, 321 * it is constrained to match only at the first matching point in the 322 * string that is being searched. This effect can also be achieved by 323 * appropriate constructs in the pattern itself such as the "^" 324 * metacharacter. 325 * @G_REGEX_MATCH_NOTBOL: Specifies that first character of the string is 326 * not the beginning of a line, so the circumflex metacharacter should 327 * not match before it. Setting this without #G_REGEX_MULTILINE (at 328 * compile time) causes circumflex never to match. This option affects 329 * only the behaviour of the circumflex metacharacter, it does not 330 * affect "\A". 331 * @G_REGEX_MATCH_NOTEOL: Specifies that the end of the subject string is 332 * not the end of a line, so the dollar metacharacter should not match 333 * it nor (except in multiline mode) a newline immediately before it. 334 * Setting this without #G_REGEX_MULTILINE (at compile time) causes 335 * dollar never to match. This option affects only the behaviour of 336 * the dollar metacharacter, it does not affect "\Z" or "\z". 337 * @G_REGEX_MATCH_NOTEMPTY: An empty string is not considered to be a valid 338 * match if this option is set. If there are alternatives in the pattern, 339 * they are tried. If all the alternatives match the empty string, the 340 * entire match fails. For example, if the pattern "a?b?" is applied to 341 * a string not beginning with "a" or "b", it matches the empty string 342 * at the start of the string. With this flag set, this match is not 343 * valid, so GRegex searches further into the string for occurrences 344 * of "a" or "b". 345 * @G_REGEX_MATCH_PARTIAL: Turns on the partial matching feature, for more 346 * documentation on partial matching see g_match_info_is_partial_match(). 347 * @G_REGEX_MATCH_NEWLINE_CR: Overrides the newline definition set when 348 * creating a new #GRegex, setting the '\r' character as line terminator. 349 * @G_REGEX_MATCH_NEWLINE_LF: Overrides the newline definition set when 350 * creating a new #GRegex, setting the '\n' character as line terminator. 351 * @G_REGEX_MATCH_NEWLINE_CRLF: Overrides the newline definition set when 352 * creating a new #GRegex, setting the '\r\n' characters sequence as line terminator. 353 * @G_REGEX_MATCH_NEWLINE_ANY: Overrides the newline definition set when 354 * creating a new #GRegex, any Unicode newline sequence 355 * is recognised as a newline. These are '\r', '\n' and '\rn', and the 356 * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF), 357 * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and 358 * U+2029 PARAGRAPH SEPARATOR. 359 * @G_REGEX_MATCH_NEWLINE_ANYCRLF: Overrides the newline definition set when 360 * creating a new #GRegex; any '\r', '\n', or '\r\n' character sequence 361 * is recognized as a newline. Since: 2.34 362 * @G_REGEX_MATCH_BSR_ANYCRLF: Overrides the newline definition for "\R" set when 363 * creating a new #GRegex; only '\r', '\n', or '\r\n' character sequences 364 * are recognized as a newline by "\R". Since: 2.34 365 * @G_REGEX_MATCH_BSR_ANY: Overrides the newline definition for "\R" set when 366 * creating a new #GRegex; any Unicode newline character or character sequence 367 * are recognized as a newline by "\R". These are '\r', '\n' and '\rn', and the 368 * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF), 369 * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and 370 * U+2029 PARAGRAPH SEPARATOR. Since: 2.34 371 * @G_REGEX_MATCH_PARTIAL_SOFT: An alias for #G_REGEX_MATCH_PARTIAL. Since: 2.34 372 * @G_REGEX_MATCH_PARTIAL_HARD: Turns on the partial matching feature. In contrast to 373 * to #G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match 374 * is found, without continuing to search for a possible complete match. See 375 * g_match_info_is_partial_match() for more information. Since: 2.34 376 * @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to 377 * the start of the matched string. For anchored 378 * patterns this can only happen for pattern containing "\K". Since: 2.34 379 * 380 * Flags specifying match-time options. 381 * 382 * Since: 2.14 383 */ 384 /* Remember to update G_REGEX_MATCH_MASK in gregex.c after 385 * adding a new flag. */ 386 typedef enum 387 { 388 G_REGEX_MATCH_ANCHORED = 1 << 4, 389 G_REGEX_MATCH_NOTBOL = 1 << 7, 390 G_REGEX_MATCH_NOTEOL = 1 << 8, 391 G_REGEX_MATCH_NOTEMPTY = 1 << 10, 392 G_REGEX_MATCH_PARTIAL = 1 << 15, 393 G_REGEX_MATCH_NEWLINE_CR = 1 << 20, 394 G_REGEX_MATCH_NEWLINE_LF = 1 << 21, 395 G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF, 396 G_REGEX_MATCH_NEWLINE_ANY = 1 << 22, 397 G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY, 398 G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23, 399 G_REGEX_MATCH_BSR_ANY = 1 << 24, 400 G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL, 401 G_REGEX_MATCH_PARTIAL_HARD = 1 << 27, 402 G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28 403 } GRegexMatchFlags; 404 405 /** 406 * GRegex: 407 * 408 * A GRegex is the "compiled" form of a regular expression pattern. 409 * This structure is opaque and its fields cannot be accessed directly. 410 * 411 * Since: 2.14 412 */ 413 typedef struct _GRegex GRegex; 414 415 416 /** 417 * GMatchInfo: 418 * 419 * A GMatchInfo is an opaque struct used to return information about 420 * matches. 421 */ 422 typedef struct _GMatchInfo GMatchInfo; 423 424 /** 425 * GRegexEvalCallback: 426 * @match_info: the #GMatchInfo generated by the match. 427 * Use g_match_info_get_regex() and g_match_info_get_string() if you 428 * need the #GRegex or the matched string. 429 * @result: a #GString containing the new string 430 * @user_data: user data passed to g_regex_replace_eval() 431 * 432 * Specifies the type of the function passed to g_regex_replace_eval(). 433 * It is called for each occurrence of the pattern in the string passed 434 * to g_regex_replace_eval(), and it should append the replacement to 435 * @result. 436 * 437 * Returns: %FALSE to continue the replacement process, %TRUE to stop it 438 * 439 * Since: 2.14 440 */ 441 typedef gboolean (*GRegexEvalCallback) (const GMatchInfo *match_info, 442 GString *result, 443 gpointer user_data); 444 445 446 GLIB_AVAILABLE_IN_ALL 447 GRegex *g_regex_new (const gchar *pattern, 448 GRegexCompileFlags compile_options, 449 GRegexMatchFlags match_options, 450 GError **error); 451 GLIB_AVAILABLE_IN_ALL 452 GRegex *g_regex_ref (GRegex *regex); 453 GLIB_AVAILABLE_IN_ALL 454 void g_regex_unref (GRegex *regex); 455 GLIB_AVAILABLE_IN_ALL 456 const gchar *g_regex_get_pattern (const GRegex *regex); 457 GLIB_AVAILABLE_IN_ALL 458 gint g_regex_get_max_backref (const GRegex *regex); 459 GLIB_AVAILABLE_IN_ALL 460 gint g_regex_get_capture_count (const GRegex *regex); 461 GLIB_AVAILABLE_IN_ALL 462 gboolean g_regex_get_has_cr_or_lf (const GRegex *regex); 463 GLIB_AVAILABLE_IN_2_38 464 gint g_regex_get_max_lookbehind (const GRegex *regex); 465 GLIB_AVAILABLE_IN_ALL 466 gint g_regex_get_string_number (const GRegex *regex, 467 const gchar *name); 468 GLIB_AVAILABLE_IN_ALL 469 gchar *g_regex_escape_string (const gchar *string, 470 gint length); 471 GLIB_AVAILABLE_IN_ALL 472 gchar *g_regex_escape_nul (const gchar *string, 473 gint length); 474 475 GLIB_AVAILABLE_IN_ALL 476 GRegexCompileFlags g_regex_get_compile_flags (const GRegex *regex); 477 GLIB_AVAILABLE_IN_ALL 478 GRegexMatchFlags g_regex_get_match_flags (const GRegex *regex); 479 480 /* Matching. */ 481 GLIB_AVAILABLE_IN_ALL 482 gboolean g_regex_match_simple (const gchar *pattern, 483 const gchar *string, 484 GRegexCompileFlags compile_options, 485 GRegexMatchFlags match_options); 486 GLIB_AVAILABLE_IN_ALL 487 gboolean g_regex_match (const GRegex *regex, 488 const gchar *string, 489 GRegexMatchFlags match_options, 490 GMatchInfo **match_info); 491 GLIB_AVAILABLE_IN_ALL 492 gboolean g_regex_match_full (const GRegex *regex, 493 const gchar *string, 494 gssize string_len, 495 gint start_position, 496 GRegexMatchFlags match_options, 497 GMatchInfo **match_info, 498 GError **error); 499 GLIB_AVAILABLE_IN_ALL 500 gboolean g_regex_match_all (const GRegex *regex, 501 const gchar *string, 502 GRegexMatchFlags match_options, 503 GMatchInfo **match_info); 504 GLIB_AVAILABLE_IN_ALL 505 gboolean g_regex_match_all_full (const GRegex *regex, 506 const gchar *string, 507 gssize string_len, 508 gint start_position, 509 GRegexMatchFlags match_options, 510 GMatchInfo **match_info, 511 GError **error); 512 513 /* String splitting. */ 514 GLIB_AVAILABLE_IN_ALL 515 gchar **g_regex_split_simple (const gchar *pattern, 516 const gchar *string, 517 GRegexCompileFlags compile_options, 518 GRegexMatchFlags match_options); 519 GLIB_AVAILABLE_IN_ALL 520 gchar **g_regex_split (const GRegex *regex, 521 const gchar *string, 522 GRegexMatchFlags match_options); 523 GLIB_AVAILABLE_IN_ALL 524 gchar **g_regex_split_full (const GRegex *regex, 525 const gchar *string, 526 gssize string_len, 527 gint start_position, 528 GRegexMatchFlags match_options, 529 gint max_tokens, 530 GError **error); 531 532 /* String replacement. */ 533 GLIB_AVAILABLE_IN_ALL 534 gchar *g_regex_replace (const GRegex *regex, 535 const gchar *string, 536 gssize string_len, 537 gint start_position, 538 const gchar *replacement, 539 GRegexMatchFlags match_options, 540 GError **error); 541 GLIB_AVAILABLE_IN_ALL 542 gchar *g_regex_replace_literal (const GRegex *regex, 543 const gchar *string, 544 gssize string_len, 545 gint start_position, 546 const gchar *replacement, 547 GRegexMatchFlags match_options, 548 GError **error); 549 GLIB_AVAILABLE_IN_ALL 550 gchar *g_regex_replace_eval (const GRegex *regex, 551 const gchar *string, 552 gssize string_len, 553 gint start_position, 554 GRegexMatchFlags match_options, 555 GRegexEvalCallback eval, 556 gpointer user_data, 557 GError **error); 558 GLIB_AVAILABLE_IN_ALL 559 gboolean g_regex_check_replacement (const gchar *replacement, 560 gboolean *has_references, 561 GError **error); 562 563 /* Match info */ 564 GLIB_AVAILABLE_IN_ALL 565 GRegex *g_match_info_get_regex (const GMatchInfo *match_info); 566 GLIB_AVAILABLE_IN_ALL 567 const gchar *g_match_info_get_string (const GMatchInfo *match_info); 568 569 GLIB_AVAILABLE_IN_ALL 570 GMatchInfo *g_match_info_ref (GMatchInfo *match_info); 571 GLIB_AVAILABLE_IN_ALL 572 void g_match_info_unref (GMatchInfo *match_info); 573 GLIB_AVAILABLE_IN_ALL 574 void g_match_info_free (GMatchInfo *match_info); 575 GLIB_AVAILABLE_IN_ALL 576 gboolean g_match_info_next (GMatchInfo *match_info, 577 GError **error); 578 GLIB_AVAILABLE_IN_ALL 579 gboolean g_match_info_matches (const GMatchInfo *match_info); 580 GLIB_AVAILABLE_IN_ALL 581 gint g_match_info_get_match_count (const GMatchInfo *match_info); 582 GLIB_AVAILABLE_IN_ALL 583 gboolean g_match_info_is_partial_match (const GMatchInfo *match_info); 584 GLIB_AVAILABLE_IN_ALL 585 gchar *g_match_info_expand_references(const GMatchInfo *match_info, 586 const gchar *string_to_expand, 587 GError **error); 588 GLIB_AVAILABLE_IN_ALL 589 gchar *g_match_info_fetch (const GMatchInfo *match_info, 590 gint match_num); 591 GLIB_AVAILABLE_IN_ALL 592 gboolean g_match_info_fetch_pos (const GMatchInfo *match_info, 593 gint match_num, 594 gint *start_pos, 595 gint *end_pos); 596 GLIB_AVAILABLE_IN_ALL 597 gchar *g_match_info_fetch_named (const GMatchInfo *match_info, 598 const gchar *name); 599 GLIB_AVAILABLE_IN_ALL 600 gboolean g_match_info_fetch_named_pos (const GMatchInfo *match_info, 601 const gchar *name, 602 gint *start_pos, 603 gint *end_pos); 604 GLIB_AVAILABLE_IN_ALL 605 gchar **g_match_info_fetch_all (const GMatchInfo *match_info); 606 607 G_END_DECLS 608 609 #endif /* __G_REGEX_H__ */ 610