1 #include <assert.h>
2 #include <stdint.h>
3 #include <stdio.h>
4 #include <string.h>
5
6 #include "regex.h"
7 #include "label_file.h"
8
9 #ifdef USE_PCRE2
10 #define REGEX_ARCH_SIZE_T PCRE2_SIZE
11 #else
12 #define REGEX_ARCH_SIZE_T size_t
13 #endif
14
15 #ifndef __BYTE_ORDER__
16
17 /* If the compiler doesn't define __BYTE_ORDER__, try to use the C
18 * library <endian.h> header definitions. */
19 #include <endian.h>
20 #ifndef __BYTE_ORDER
21 #error Neither __BYTE_ORDER__ nor __BYTE_ORDER defined. Unable to determine endianness.
22 #endif
23
24 #define __ORDER_LITTLE_ENDIAN __LITTLE_ENDIAN
25 #define __ORDER_BIG_ENDIAN __BIG_ENDIAN
26 #define __BYTE_ORDER__ __BYTE_ORDER
27
28 #endif
29
30 #ifdef USE_PCRE2
regex_arch_string(void)31 char const *regex_arch_string(void)
32 {
33 static char arch_string_buffer[32];
34 static char const *arch_string = "";
35 char const *endianness = NULL;
36 int rc;
37
38 if (arch_string[0] == '\0') {
39 if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
40 endianness = "el";
41 else if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
42 endianness = "eb";
43
44 if (!endianness)
45 return NULL;
46
47 rc = snprintf(arch_string_buffer, sizeof(arch_string_buffer),
48 "%zu-%zu-%s", sizeof(void *),
49 sizeof(REGEX_ARCH_SIZE_T),
50 endianness);
51 if (rc < 0)
52 abort();
53
54 arch_string = &arch_string_buffer[0];
55 }
56 return arch_string;
57 }
58
59 struct regex_data {
60 pcre2_code *regex; /* compiled regular expression */
61 /*
62 * match data block required for the compiled
63 * pattern in pcre2
64 */
65 pcre2_match_data *match_data;
66 };
67
regex_prepare_data(struct regex_data ** regex,char const * pattern_string,struct regex_error_data * errordata)68 int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
69 struct regex_error_data *errordata)
70 {
71 memset(errordata, 0, sizeof(struct regex_error_data));
72
73 *regex = regex_data_create();
74 if (!(*regex))
75 return -1;
76
77 (*regex)->regex = pcre2_compile(
78 (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL,
79 &errordata->error_code, &errordata->error_offset, NULL);
80 if (!(*regex)->regex) {
81 goto err;
82 }
83
84 (*regex)->match_data =
85 pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
86 if (!(*regex)->match_data) {
87 goto err;
88 }
89 return 0;
90
91 err:
92 regex_data_free(*regex);
93 *regex = NULL;
94 return -1;
95 }
96
regex_version(void)97 char const *regex_version(void)
98 {
99 static char version_buf[256];
100 size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
101 if (len <= 0 || len > sizeof(version_buf))
102 return NULL;
103
104 pcre2_config(PCRE2_CONFIG_VERSION, version_buf);
105 return version_buf;
106 }
107
regex_load_mmap(struct mmap_area * mmap_area,struct regex_data ** regex,int do_load_precompregex)108 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
109 int do_load_precompregex)
110 {
111 int rc;
112 uint32_t entry_len;
113
114 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
115 if (rc < 0)
116 return -1;
117
118 if (entry_len && do_load_precompregex) {
119 /*
120 * this should yield exactly one because we store one pattern at
121 * a time
122 */
123 rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
124 if (rc != 1)
125 return -1;
126
127 *regex = regex_data_create();
128 if (!*regex)
129 return -1;
130
131 rc = pcre2_serialize_decode(&(*regex)->regex, 1,
132 (PCRE2_SPTR)mmap_area->next_addr,
133 NULL);
134 if (rc != 1)
135 goto err;
136
137 (*regex)->match_data =
138 pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
139 if (!(*regex)->match_data)
140 goto err;
141 }
142
143 /* and skip the decoded bit */
144 rc = next_entry(NULL, mmap_area, entry_len);
145 if (rc < 0)
146 goto err;
147
148 return 0;
149 err:
150 regex_data_free(*regex);
151 *regex = NULL;
152 return -1;
153 }
154
regex_writef(struct regex_data * regex,FILE * fp,int do_write_precompregex)155 int regex_writef(struct regex_data *regex, FILE *fp, int do_write_precompregex)
156 {
157 int rc = 0;
158 size_t len;
159 PCRE2_SIZE serialized_size;
160 uint32_t to_write = 0;
161 PCRE2_UCHAR *bytes = NULL;
162
163 if (do_write_precompregex) {
164 /* encode the patter for serialization */
165 rc = pcre2_serialize_encode((const pcre2_code **)®ex->regex,
166 1, &bytes, &serialized_size, NULL);
167 if (rc != 1) {
168 rc = -1;
169 goto out;
170 }
171 to_write = serialized_size;
172 }
173
174 /* write serialized pattern's size */
175 len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
176 if (len != 1) {
177 rc = -1;
178 goto out;
179 }
180
181 if (do_write_precompregex) {
182 /* write serialized pattern */
183 len = fwrite(bytes, 1, to_write, fp);
184 if (len != to_write)
185 rc = -1;
186 }
187
188 out:
189 if (bytes)
190 pcre2_serialize_free(bytes);
191
192 return rc;
193 }
194
regex_data_free(struct regex_data * regex)195 void regex_data_free(struct regex_data *regex)
196 {
197 if (regex) {
198 if (regex->regex)
199 pcre2_code_free(regex->regex);
200 if (regex->match_data)
201 pcre2_match_data_free(regex->match_data);
202 free(regex);
203 }
204 }
205
regex_match(struct regex_data * regex,char const * subject,int partial)206 int regex_match(struct regex_data *regex, char const *subject, int partial)
207 {
208 int rc;
209 rc = pcre2_match(
210 regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
211 partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL);
212 if (rc > 0)
213 return REGEX_MATCH;
214 switch (rc) {
215 case PCRE2_ERROR_PARTIAL:
216 return REGEX_MATCH_PARTIAL;
217 case PCRE2_ERROR_NOMATCH:
218 return REGEX_NO_MATCH;
219 default:
220 return REGEX_ERROR;
221 }
222 }
223
224 /*
225 * TODO Replace this compare function with something that actually compares the
226 * regular expressions.
227 * This compare function basically just compares the binary representations of
228 * the automatons, and because this representation contains pointers and
229 * metadata, it can only return a match if regex1 == regex2.
230 * Preferably, this function would be replaced with an algorithm that computes
231 * the equivalence of the automatons systematically.
232 */
regex_cmp(struct regex_data * regex1,struct regex_data * regex2)233 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
234 {
235 int rc;
236 size_t len1, len2;
237 rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
238 assert(rc == 0);
239 rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
240 assert(rc == 0);
241 if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
242 return SELABEL_INCOMPARABLE;
243
244 return SELABEL_EQUAL;
245 }
246
247 #else // !USE_PCRE2
regex_arch_string(void)248 char const *regex_arch_string(void)
249 {
250 return "N/A";
251 }
252
253 /* Prior to version 8.20, libpcre did not have pcre_free_study() */
254 #if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
255 #define pcre_free_study pcre_free
256 #endif
257
258 struct regex_data {
259 int owned; /*
260 * non zero if regex and pcre_extra is owned by this
261 * structure and thus must be freed on destruction.
262 */
263 pcre *regex; /* compiled regular expression */
264 union {
265 pcre_extra *sd; /* pointer to extra compiled stuff */
266 pcre_extra lsd; /* used to hold the mmap'd version */
267 };
268 };
269
regex_prepare_data(struct regex_data ** regex,char const * pattern_string,struct regex_error_data * errordata)270 int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
271 struct regex_error_data *errordata)
272 {
273 memset(errordata, 0, sizeof(struct regex_error_data));
274
275 *regex = regex_data_create();
276 if (!(*regex))
277 return -1;
278
279 (*regex)->regex =
280 pcre_compile(pattern_string, PCRE_DOTALL, &errordata->error_buffer,
281 &errordata->error_offset, NULL);
282 if (!(*regex)->regex)
283 goto err;
284
285 (*regex)->owned = 1;
286
287 (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
288 if (!(*regex)->sd && errordata->error_buffer)
289 goto err;
290
291 return 0;
292
293 err:
294 regex_data_free(*regex);
295 *regex = NULL;
296 return -1;
297 }
298
regex_version(void)299 char const *regex_version(void)
300 {
301 return pcre_version();
302 }
303
regex_load_mmap(struct mmap_area * mmap_area,struct regex_data ** regex,int unused)304 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
305 int unused __attribute__((unused)))
306 {
307 int rc;
308 uint32_t entry_len;
309 size_t info_len;
310
311 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
312 if (rc < 0 || !entry_len)
313 return -1;
314
315 *regex = regex_data_create();
316 if (!(*regex))
317 return -1;
318
319 (*regex)->owned = 0;
320 (*regex)->regex = (pcre *)mmap_area->next_addr;
321 rc = next_entry(NULL, mmap_area, entry_len);
322 if (rc < 0)
323 goto err;
324
325 /*
326 * Check that regex lengths match. pcre_fullinfo()
327 * also validates its magic number.
328 */
329 rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
330 if (rc < 0 || info_len != entry_len)
331 goto err;
332
333 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
334 if (rc < 0 || !entry_len)
335 goto err;
336
337 if (entry_len) {
338 (*regex)->lsd.study_data = (void *)mmap_area->next_addr;
339 (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
340 rc = next_entry(NULL, mmap_area, entry_len);
341 if (rc < 0)
342 goto err;
343
344 /* Check that study data lengths match. */
345 rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
346 PCRE_INFO_STUDYSIZE, &info_len);
347 if (rc < 0 || info_len != entry_len)
348 goto err;
349 }
350 return 0;
351
352 err:
353 regex_data_free(*regex);
354 *regex = NULL;
355 return -1;
356 }
357
get_pcre_extra(struct regex_data * regex)358 static inline pcre_extra *get_pcre_extra(struct regex_data *regex)
359 {
360 if (!regex) return NULL;
361 if (regex->owned) {
362 return regex->sd;
363 } else if (regex->lsd.study_data) {
364 return ®ex->lsd;
365 } else {
366 return NULL;
367 }
368 }
369
regex_writef(struct regex_data * regex,FILE * fp,int unused)370 int regex_writef(struct regex_data *regex, FILE *fp,
371 int unused __attribute__((unused)))
372 {
373 int rc;
374 size_t len;
375 uint32_t to_write;
376 size_t size;
377 pcre_extra *sd = get_pcre_extra(regex);
378
379 /* determine the size of the pcre data in bytes */
380 rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
381 if (rc < 0)
382 return -1;
383
384 /* write the number of bytes in the pcre data */
385 to_write = size;
386 len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
387 if (len != 1)
388 return -1;
389
390 /* write the actual pcre data as a char array */
391 len = fwrite(regex->regex, 1, to_write, fp);
392 if (len != to_write)
393 return -1;
394
395 if (sd) {
396 /* determine the size of the pcre study info */
397 rc =
398 pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size);
399 if (rc < 0)
400 return -1;
401 } else
402 size = 0;
403
404 /* write the number of bytes in the pcre study data */
405 to_write = size;
406 len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
407 if (len != 1)
408 return -1;
409
410 if (sd) {
411 /* write the actual pcre study data as a char array */
412 len = fwrite(sd->study_data, 1, to_write, fp);
413 if (len != to_write)
414 return -1;
415 }
416
417 return 0;
418 }
419
regex_data_free(struct regex_data * regex)420 void regex_data_free(struct regex_data *regex)
421 {
422 if (regex) {
423 if (regex->owned) {
424 if (regex->regex)
425 pcre_free(regex->regex);
426 if (regex->sd)
427 pcre_free_study(regex->sd);
428 }
429 free(regex);
430 }
431 }
432
regex_match(struct regex_data * regex,char const * subject,int partial)433 int regex_match(struct regex_data *regex, char const *subject, int partial)
434 {
435 int rc;
436
437 rc = pcre_exec(regex->regex, get_pcre_extra(regex),
438 subject, strlen(subject), 0,
439 partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0);
440 switch (rc) {
441 case 0:
442 return REGEX_MATCH;
443 case PCRE_ERROR_PARTIAL:
444 return REGEX_MATCH_PARTIAL;
445 case PCRE_ERROR_NOMATCH:
446 return REGEX_NO_MATCH;
447 default:
448 return REGEX_ERROR;
449 }
450 }
451
452 /*
453 * TODO Replace this compare function with something that actually compares the
454 * regular expressions.
455 * This compare function basically just compares the binary representations of
456 * the automatons, and because this representation contains pointers and
457 * metadata, it can only return a match if regex1 == regex2.
458 * Preferably, this function would be replaced with an algorithm that computes
459 * the equivalence of the automatons systematically.
460 */
regex_cmp(struct regex_data * regex1,struct regex_data * regex2)461 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
462 {
463 int rc;
464 size_t len1, len2;
465 rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
466 assert(rc == 0);
467 rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
468 assert(rc == 0);
469 if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
470 return SELABEL_INCOMPARABLE;
471
472 return SELABEL_EQUAL;
473 }
474
475 #endif
476
regex_data_create(void)477 struct regex_data *regex_data_create(void)
478 {
479 return (struct regex_data *)calloc(1, sizeof(struct regex_data));
480 }
481
regex_format_error(struct regex_error_data const * error_data,char * buffer,size_t buf_size)482 void regex_format_error(struct regex_error_data const *error_data, char *buffer,
483 size_t buf_size)
484 {
485 unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
486 char *ptr = &buffer[buf_size - the_end_length];
487 int rc = 0;
488 size_t pos = 0;
489 if (!buffer || !buf_size)
490 return;
491 rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
492 if (rc < 0)
493 /*
494 * If snprintf fails it constitutes a logical error that needs
495 * fixing.
496 */
497 abort();
498
499 pos += rc;
500 if (pos >= buf_size)
501 goto truncated;
502
503 if (error_data->error_offset > 0) {
504 #ifdef USE_PCRE2
505 rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
506 error_data->error_offset);
507 #else
508 rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
509 error_data->error_offset);
510 #endif
511 if (rc < 0)
512 abort();
513 }
514 pos += rc;
515 if (pos >= buf_size)
516 goto truncated;
517
518 #ifdef USE_PCRE2
519 rc = pcre2_get_error_message(error_data->error_code,
520 (PCRE2_UCHAR *)(buffer + pos),
521 buf_size - pos);
522 if (rc == PCRE2_ERROR_NOMEMORY)
523 goto truncated;
524 #else
525 rc = snprintf(buffer + pos, buf_size - pos, "%s",
526 error_data->error_buffer);
527 if (rc < 0)
528 abort();
529
530 if ((size_t)rc < strlen(error_data->error_buffer))
531 goto truncated;
532 #endif
533
534 return;
535
536 truncated:
537 /* replace end of string with "..." to indicate that it was truncated */
538 switch (the_end_length) {
539 /* no break statements, fall-through is intended */
540 case 4:
541 *ptr++ = '.';
542 case 3:
543 *ptr++ = '.';
544 case 2:
545 *ptr++ = '.';
546 case 1:
547 *ptr++ = '\0';
548 default:
549 break;
550 }
551 return;
552 }
553