1 /**********************************************************************
2 regposix.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #define regex_t onig_regex_t
33 #include "regint.h"
34 #undef regex_t
35 #include "onigposix.h"
36
37 #define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
38 #define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
39
40 /* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
41 #define ENC_STRING_LEN(enc,s,len) do { \
42 if (ONIGENC_MBC_MINLEN(enc) == 1) { \
43 UChar* tmps = (UChar* )(s); \
44 while (*tmps != 0) tmps++; \
45 len = (int)(tmps - (UChar* )(s)); \
46 } \
47 else { \
48 len = onigenc_str_bytelen_null(enc, (UChar* )s); \
49 } \
50 } while(0)
51
52 typedef struct {
53 int onig_err;
54 int posix_err;
55 } O2PERR;
56
57 static int
onig2posix_error_code(int code)58 onig2posix_error_code(int code)
59 {
60 static const O2PERR o2p[] = {
61 { ONIG_MISMATCH, REG_NOMATCH },
62 { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
63 { ONIGERR_MEMORY, REG_ESPACE },
64 { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
65 { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
66 { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
67 { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
68 { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
69 { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
70 { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG },
71 { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
72 { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
73 { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
74 { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
75 { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
76 { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
77 { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE },
78 { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
79 { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
80 { ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
81 { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
82 { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
83 { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
84 { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
85 { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
86 { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
87 { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
88 { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
89 { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
90 { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
91 { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
92 { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
93 { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
94 { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
95 { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },
96 { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
97 { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
98 { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
99 { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
100 { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
101 { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
102 { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
103 { ONIGERR_INVALID_BACKREF, REG_ESUBREG },
104 { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },
105 { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
106 { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
107 { ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC },
108 { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },
109 { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },
110 { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },
111 { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
112 { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
113 { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },
114 { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
115 { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
116 { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
117 { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
118 { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
119 { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
120
121 };
122
123 int i;
124
125 if (code >= 0) return 0;
126
127 for (i = 0; i < (int )(sizeof(o2p) / sizeof(o2p[0])); i++) {
128 if (code == o2p[i].onig_err)
129 return o2p[i].posix_err;
130 }
131
132 return REG_EONIG_INTERNAL; /* but, unknown error code */
133 }
134
135 extern int
regcomp(regex_t * reg,const char * pattern,int posix_options)136 regcomp(regex_t* reg, const char* pattern, int posix_options)
137 {
138 int r, len;
139 OnigSyntaxType* syntax = OnigDefaultSyntax;
140 OnigOptionType options;
141
142 if ((posix_options & REG_EXTENDED) == 0)
143 syntax = ONIG_SYNTAX_POSIX_BASIC;
144
145 options = syntax->options;
146 if ((posix_options & REG_ICASE) != 0)
147 ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
148 if ((posix_options & REG_NEWLINE) != 0) {
149 ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
150 ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
151 }
152
153 reg->comp_options = posix_options;
154
155 ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
156 r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
157 options, OnigEncDefaultCharEncoding, syntax,
158 (OnigErrorInfo* )NULL);
159 if (r != ONIG_NORMAL) {
160 return onig2posix_error_code(r);
161 }
162
163 reg->re_nsub = ONIG_C(reg)->num_mem;
164 return 0;
165 }
166
167 extern int
regexec(regex_t * reg,const char * str,size_t nmatch,regmatch_t pmatch[],int posix_options)168 regexec(regex_t* reg, const char* str, size_t nmatch,
169 regmatch_t pmatch[], int posix_options)
170 {
171 int r, i, len;
172 UChar* end;
173 regmatch_t* pm;
174 OnigOptionType options;
175
176 options = ONIG_OPTION_POSIX_REGION;
177 if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
178 if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
179
180 if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) {
181 pm = (regmatch_t* )NULL;
182 nmatch = 0;
183 }
184 else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) {
185 pm = (regmatch_t* )xmalloc(sizeof(regmatch_t)
186 * (ONIG_C(reg)->num_mem + 1));
187 if (pm == NULL)
188 return REG_ESPACE;
189 }
190 else {
191 pm = pmatch;
192 }
193
194 ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
195 end = (UChar* )(str + len);
196 r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
197 (OnigRegion* )pm, options);
198
199 if (r >= 0) {
200 r = 0; /* Match */
201 if (pm != pmatch && pm != NULL) {
202 xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch);
203 }
204 }
205 else if (r == ONIG_MISMATCH) {
206 r = REG_NOMATCH;
207 for (i = 0; i < (int )nmatch; i++)
208 pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
209 }
210 else {
211 r = onig2posix_error_code(r);
212 }
213
214 if (pm != pmatch && pm != NULL)
215 xfree(pm);
216
217 #if 0
218 if (reg->re_nsub > nmatch - 1)
219 reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
220 #endif
221
222 return r;
223 }
224
225 extern void
regfree(regex_t * reg)226 regfree(regex_t* reg)
227 {
228 onig_free(ONIG_C(reg));
229 }
230
231
232 extern void
reg_set_encoding(int mb_code)233 reg_set_encoding(int mb_code)
234 {
235 OnigEncoding enc;
236
237 switch (mb_code) {
238 case REG_POSIX_ENCODING_ASCII:
239 enc = ONIG_ENCODING_ASCII;
240 break;
241 case REG_POSIX_ENCODING_EUC_JP:
242 enc = ONIG_ENCODING_EUC_JP;
243 break;
244 case REG_POSIX_ENCODING_SJIS:
245 enc = ONIG_ENCODING_SJIS;
246 break;
247 case REG_POSIX_ENCODING_UTF8:
248 enc = ONIG_ENCODING_UTF8;
249 break;
250 case REG_POSIX_ENCODING_UTF16_BE:
251 enc = ONIG_ENCODING_UTF16_BE;
252 break;
253 case REG_POSIX_ENCODING_UTF16_LE:
254 enc = ONIG_ENCODING_UTF16_LE;
255 break;
256
257 default:
258 return ;
259 break;
260 }
261
262 onigenc_set_default_encoding(enc);
263 }
264
265 extern int
reg_name_to_group_numbers(regex_t * reg,const unsigned char * name,const unsigned char * name_end,int ** nums)266 reg_name_to_group_numbers(regex_t* reg,
267 const unsigned char* name, const unsigned char* name_end, int** nums)
268 {
269 return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
270 }
271
272 typedef struct {
273 int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
274 regex_t* reg;
275 void* arg;
276 } i_wrap;
277
278 static int
i_wrapper(const UChar * name,const UChar * name_end,int ng,int * gs,onig_regex_t * reg ARG_UNUSED,void * arg)279 i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,
280 onig_regex_t* reg ARG_UNUSED, void* arg)
281 {
282 i_wrap* warg = (i_wrap* )arg;
283
284 return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
285 }
286
287 extern int
reg_foreach_name(regex_t * reg,int (* func)(const unsigned char *,const unsigned char *,int,int *,regex_t *,void *),void * arg)288 reg_foreach_name(regex_t* reg,
289 int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
290 void* arg)
291 {
292 i_wrap warg;
293
294 warg.func = func;
295 warg.reg = reg;
296 warg.arg = arg;
297
298 return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
299 }
300
301 extern int
reg_number_of_names(regex_t * reg)302 reg_number_of_names(regex_t* reg)
303 {
304 return onig_number_of_names(ONIG_C(reg));
305 }
306