• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <assert.h>
2 #include <stdint.h>
3 #include <stdio.h>
4 #include <string.h>
5 
6 #include "regex.h"
7 #include "label_file.h"
8 
9 #ifdef USE_PCRE2
10 #define REGEX_ARCH_SIZE_T PCRE2_SIZE
11 #else
12 #define REGEX_ARCH_SIZE_T size_t
13 #endif
14 
15 #ifndef __BYTE_ORDER__
16 
17 /* If the compiler doesn't define __BYTE_ORDER__, try to use the C
18  * library <endian.h> header definitions. */
19 #include <endian.h>
20 #ifndef __BYTE_ORDER
21 #error Neither __BYTE_ORDER__ nor __BYTE_ORDER defined. Unable to determine endianness.
22 #endif
23 
24 #define __ORDER_LITTLE_ENDIAN __LITTLE_ENDIAN
25 #define __ORDER_BIG_ENDIAN __BIG_ENDIAN
26 #define __BYTE_ORDER__ __BYTE_ORDER
27 
28 #endif
29 
30 #ifdef USE_PCRE2
regex_arch_string(void)31 char const *regex_arch_string(void)
32 {
33 	static char arch_string_buffer[32];
34 	static char const *arch_string = "";
35 	char const *endianness = NULL;
36 	int rc;
37 
38 	if (arch_string[0] == '\0') {
39 		if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
40 			endianness = "el";
41 		else if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
42 			endianness = "eb";
43 
44 		if (!endianness)
45 			return NULL;
46 
47 		rc = snprintf(arch_string_buffer, sizeof(arch_string_buffer),
48 				"%zu-%zu-%s", sizeof(void *),
49 				sizeof(REGEX_ARCH_SIZE_T),
50 				endianness);
51 		if (rc < 0)
52 			abort();
53 
54 		arch_string = &arch_string_buffer[0];
55 	}
56 	return arch_string;
57 }
58 
59 struct regex_data {
60 	pcre2_code *regex; /* compiled regular expression */
61 	/*
62 	 * match data block required for the compiled
63 	 * pattern in pcre2
64 	 */
65 	pcre2_match_data *match_data;
66 };
67 
regex_prepare_data(struct regex_data ** regex,char const * pattern_string,struct regex_error_data * errordata)68 int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
69 		       struct regex_error_data *errordata)
70 {
71 	memset(errordata, 0, sizeof(struct regex_error_data));
72 
73 	*regex = regex_data_create();
74 	if (!(*regex))
75 		return -1;
76 
77 	(*regex)->regex = pcre2_compile(
78 	    (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL,
79 	    &errordata->error_code, &errordata->error_offset, NULL);
80 	if (!(*regex)->regex) {
81 		goto err;
82 	}
83 
84 	(*regex)->match_data =
85 	    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
86 	if (!(*regex)->match_data) {
87 		goto err;
88 	}
89 	return 0;
90 
91 err:
92 	regex_data_free(*regex);
93 	*regex = NULL;
94 	return -1;
95 }
96 
regex_version(void)97 char const *regex_version(void)
98 {
99 	static char version_buf[256];
100 	size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
101 	if (len <= 0 || len > sizeof(version_buf))
102 		return NULL;
103 
104 	pcre2_config(PCRE2_CONFIG_VERSION, version_buf);
105 	return version_buf;
106 }
107 
regex_load_mmap(struct mmap_area * mmap_area,struct regex_data ** regex,int do_load_precompregex)108 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
109 		    int do_load_precompregex)
110 {
111 	int rc;
112 	uint32_t entry_len;
113 
114 	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
115 	if (rc < 0)
116 		return -1;
117 
118 	if (entry_len && do_load_precompregex) {
119 		/*
120 		 * this should yield exactly one because we store one pattern at
121 		 * a time
122 		 */
123 		rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
124 		if (rc != 1)
125 			return -1;
126 
127 		*regex = regex_data_create();
128 		if (!*regex)
129 			return -1;
130 
131 		rc = pcre2_serialize_decode(&(*regex)->regex, 1,
132 					    (PCRE2_SPTR)mmap_area->next_addr,
133 					    NULL);
134 		if (rc != 1)
135 			goto err;
136 
137 		(*regex)->match_data =
138 		    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
139 		if (!(*regex)->match_data)
140 			goto err;
141 	}
142 
143 	/* and skip the decoded bit */
144 	rc = next_entry(NULL, mmap_area, entry_len);
145 	if (rc < 0)
146 		goto err;
147 
148 	return 0;
149 err:
150 	regex_data_free(*regex);
151 	*regex = NULL;
152 	return -1;
153 }
154 
regex_writef(struct regex_data * regex,FILE * fp,int do_write_precompregex)155 int regex_writef(struct regex_data *regex, FILE *fp, int do_write_precompregex)
156 {
157 	int rc = 0;
158 	size_t len;
159 	PCRE2_SIZE serialized_size;
160 	uint32_t to_write = 0;
161 	PCRE2_UCHAR *bytes = NULL;
162 
163 	if (do_write_precompregex) {
164 		/* encode the patter for serialization */
165 		rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex,
166 					    1, &bytes, &serialized_size, NULL);
167 		if (rc != 1) {
168 			rc = -1;
169 			goto out;
170 		}
171 		to_write = serialized_size;
172 	}
173 
174 	/* write serialized pattern's size */
175 	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
176 	if (len != 1) {
177 		rc = -1;
178 		goto out;
179 	}
180 
181 	if (do_write_precompregex) {
182 		/* write serialized pattern */
183 		len = fwrite(bytes, 1, to_write, fp);
184 		if (len != to_write)
185 			rc = -1;
186 	}
187 
188 out:
189 	if (bytes)
190 		pcre2_serialize_free(bytes);
191 
192 	return rc;
193 }
194 
regex_data_free(struct regex_data * regex)195 void regex_data_free(struct regex_data *regex)
196 {
197 	if (regex) {
198 		if (regex->regex)
199 			pcre2_code_free(regex->regex);
200 		if (regex->match_data)
201 			pcre2_match_data_free(regex->match_data);
202 		free(regex);
203 	}
204 }
205 
regex_match(struct regex_data * regex,char const * subject,int partial)206 int regex_match(struct regex_data *regex, char const *subject, int partial)
207 {
208 	int rc;
209 	rc = pcre2_match(
210 	    regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
211 	    partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL);
212 	if (rc > 0)
213 		return REGEX_MATCH;
214 	switch (rc) {
215 	case PCRE2_ERROR_PARTIAL:
216 		return REGEX_MATCH_PARTIAL;
217 	case PCRE2_ERROR_NOMATCH:
218 		return REGEX_NO_MATCH;
219 	default:
220 		return REGEX_ERROR;
221 	}
222 }
223 
224 /*
225  * TODO Replace this compare function with something that actually compares the
226  * regular expressions.
227  * This compare function basically just compares the binary representations of
228  * the automatons, and because this representation contains pointers and
229  * metadata, it can only return a match if regex1 == regex2.
230  * Preferably, this function would be replaced with an algorithm that computes
231  * the equivalence of the automatons systematically.
232  */
regex_cmp(struct regex_data * regex1,struct regex_data * regex2)233 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
234 {
235 	int rc;
236 	size_t len1, len2;
237 	rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
238 	assert(rc == 0);
239 	rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
240 	assert(rc == 0);
241 	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
242 		return SELABEL_INCOMPARABLE;
243 
244 	return SELABEL_EQUAL;
245 }
246 
247 #else // !USE_PCRE2
regex_arch_string(void)248 char const *regex_arch_string(void)
249 {
250 	return "N/A";
251 }
252 
253 /* Prior to version 8.20, libpcre did not have pcre_free_study() */
254 #if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
255 #define pcre_free_study pcre_free
256 #endif
257 
258 struct regex_data {
259 	int owned;   /*
260 		      * non zero if regex and pcre_extra is owned by this
261 		      * structure and thus must be freed on destruction.
262 		      */
263 	pcre *regex; /* compiled regular expression */
264 	union {
265 		pcre_extra *sd; /* pointer to extra compiled stuff */
266 		pcre_extra lsd; /* used to hold the mmap'd version */
267 	};
268 };
269 
regex_prepare_data(struct regex_data ** regex,char const * pattern_string,struct regex_error_data * errordata)270 int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
271 		       struct regex_error_data *errordata)
272 {
273 	memset(errordata, 0, sizeof(struct regex_error_data));
274 
275 	*regex = regex_data_create();
276 	if (!(*regex))
277 		return -1;
278 
279 	(*regex)->regex =
280 	    pcre_compile(pattern_string, PCRE_DOTALL, &errordata->error_buffer,
281 			 &errordata->error_offset, NULL);
282 	if (!(*regex)->regex)
283 		goto err;
284 
285 	(*regex)->owned = 1;
286 
287 	(*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
288 	if (!(*regex)->sd && errordata->error_buffer)
289 		goto err;
290 
291 	return 0;
292 
293 err:
294 	regex_data_free(*regex);
295 	*regex = NULL;
296 	return -1;
297 }
298 
regex_version(void)299 char const *regex_version(void)
300 {
301 	return pcre_version();
302 }
303 
regex_load_mmap(struct mmap_area * mmap_area,struct regex_data ** regex,int unused)304 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
305 		    int unused __attribute__((unused)))
306 {
307 	int rc;
308 	uint32_t entry_len;
309 	size_t info_len;
310 
311 	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
312 	if (rc < 0 || !entry_len)
313 		return -1;
314 
315 	*regex = regex_data_create();
316 	if (!(*regex))
317 		return -1;
318 
319 	(*regex)->owned = 0;
320 	(*regex)->regex = (pcre *)mmap_area->next_addr;
321 	rc = next_entry(NULL, mmap_area, entry_len);
322 	if (rc < 0)
323 		goto err;
324 
325 	/*
326 	 * Check that regex lengths match. pcre_fullinfo()
327 	 * also validates its magic number.
328 	 */
329 	rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
330 	if (rc < 0 || info_len != entry_len)
331 		goto err;
332 
333 	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
334 	if (rc < 0 || !entry_len)
335 		goto err;
336 
337 	if (entry_len) {
338 		(*regex)->lsd.study_data = (void *)mmap_area->next_addr;
339 		(*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
340 		rc = next_entry(NULL, mmap_area, entry_len);
341 		if (rc < 0)
342 			goto err;
343 
344 		/* Check that study data lengths match. */
345 		rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
346 				   PCRE_INFO_STUDYSIZE, &info_len);
347 		if (rc < 0 || info_len != entry_len)
348 			goto err;
349 	}
350 	return 0;
351 
352 err:
353 	regex_data_free(*regex);
354 	*regex = NULL;
355 	return -1;
356 }
357 
get_pcre_extra(struct regex_data * regex)358 static inline pcre_extra *get_pcre_extra(struct regex_data *regex)
359 {
360 	if (!regex) return NULL;
361 	if (regex->owned) {
362 		return regex->sd;
363 	} else if (regex->lsd.study_data) {
364 		return &regex->lsd;
365 	} else {
366 		return NULL;
367 	}
368 }
369 
regex_writef(struct regex_data * regex,FILE * fp,int unused)370 int regex_writef(struct regex_data *regex, FILE *fp,
371 		 int unused __attribute__((unused)))
372 {
373 	int rc;
374 	size_t len;
375 	uint32_t to_write;
376 	size_t size;
377 	pcre_extra *sd = get_pcre_extra(regex);
378 
379 	/* determine the size of the pcre data in bytes */
380 	rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
381 	if (rc < 0)
382 		return -1;
383 
384 	/* write the number of bytes in the pcre data */
385 	to_write = size;
386 	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
387 	if (len != 1)
388 		return -1;
389 
390 	/* write the actual pcre data as a char array */
391 	len = fwrite(regex->regex, 1, to_write, fp);
392 	if (len != to_write)
393 		return -1;
394 
395 	if (sd) {
396 		/* determine the size of the pcre study info */
397 		rc =
398 		    pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size);
399 		if (rc < 0)
400 			return -1;
401 	} else
402 		size = 0;
403 
404 	/* write the number of bytes in the pcre study data */
405 	to_write = size;
406 	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
407 	if (len != 1)
408 		return -1;
409 
410 	if (sd) {
411 		/* write the actual pcre study data as a char array */
412 		len = fwrite(sd->study_data, 1, to_write, fp);
413 		if (len != to_write)
414 			return -1;
415 	}
416 
417 	return 0;
418 }
419 
regex_data_free(struct regex_data * regex)420 void regex_data_free(struct regex_data *regex)
421 {
422 	if (regex) {
423 		if (regex->owned) {
424 			if (regex->regex)
425 				pcre_free(regex->regex);
426 			if (regex->sd)
427 				pcre_free_study(regex->sd);
428 		}
429 		free(regex);
430 	}
431 }
432 
regex_match(struct regex_data * regex,char const * subject,int partial)433 int regex_match(struct regex_data *regex, char const *subject, int partial)
434 {
435 	int rc;
436 
437 	rc = pcre_exec(regex->regex, get_pcre_extra(regex),
438 		       subject, strlen(subject), 0,
439 		       partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0);
440 	switch (rc) {
441 	case 0:
442 		return REGEX_MATCH;
443 	case PCRE_ERROR_PARTIAL:
444 		return REGEX_MATCH_PARTIAL;
445 	case PCRE_ERROR_NOMATCH:
446 		return REGEX_NO_MATCH;
447 	default:
448 		return REGEX_ERROR;
449 	}
450 }
451 
452 /*
453  * TODO Replace this compare function with something that actually compares the
454  * regular expressions.
455  * This compare function basically just compares the binary representations of
456  * the automatons, and because this representation contains pointers and
457  * metadata, it can only return a match if regex1 == regex2.
458  * Preferably, this function would be replaced with an algorithm that computes
459  * the equivalence of the automatons systematically.
460  */
regex_cmp(struct regex_data * regex1,struct regex_data * regex2)461 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
462 {
463 	int rc;
464 	size_t len1, len2;
465 	rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
466 	assert(rc == 0);
467 	rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
468 	assert(rc == 0);
469 	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
470 		return SELABEL_INCOMPARABLE;
471 
472 	return SELABEL_EQUAL;
473 }
474 
475 #endif
476 
regex_data_create(void)477 struct regex_data *regex_data_create(void)
478 {
479 	return (struct regex_data *)calloc(1, sizeof(struct regex_data));
480 }
481 
regex_format_error(struct regex_error_data const * error_data,char * buffer,size_t buf_size)482 void regex_format_error(struct regex_error_data const *error_data, char *buffer,
483 			size_t buf_size)
484 {
485 	unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
486 	char *ptr = &buffer[buf_size - the_end_length];
487 	int rc = 0;
488 	size_t pos = 0;
489 	if (!buffer || !buf_size)
490 		return;
491 	rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
492 	if (rc < 0)
493 		/*
494 		 * If snprintf fails it constitutes a logical error that needs
495 		 * fixing.
496 		 */
497 		abort();
498 
499 	pos += rc;
500 	if (pos >= buf_size)
501 		goto truncated;
502 
503 	if (error_data->error_offset > 0) {
504 #ifdef USE_PCRE2
505 		rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
506 			      error_data->error_offset);
507 #else
508 		rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
509 			      error_data->error_offset);
510 #endif
511 		if (rc < 0)
512 			abort();
513 	}
514 	pos += rc;
515 	if (pos >= buf_size)
516 		goto truncated;
517 
518 #ifdef USE_PCRE2
519 	rc = pcre2_get_error_message(error_data->error_code,
520 				     (PCRE2_UCHAR *)(buffer + pos),
521 				     buf_size - pos);
522 	if (rc == PCRE2_ERROR_NOMEMORY)
523 		goto truncated;
524 #else
525 	rc = snprintf(buffer + pos, buf_size - pos, "%s",
526 		      error_data->error_buffer);
527 	if (rc < 0)
528 		abort();
529 
530 	if ((size_t)rc < strlen(error_data->error_buffer))
531 		goto truncated;
532 #endif
533 
534 	return;
535 
536 truncated:
537 	/* replace end of string with "..." to indicate that it was truncated */
538 	switch (the_end_length) {
539 	/* no break statements, fall-through is intended */
540 	case 4:
541 		*ptr++ = '.';
542 	case 3:
543 		*ptr++ = '.';
544 	case 2:
545 		*ptr++ = '.';
546 	case 1:
547 		*ptr++ = '\0';
548 	default:
549 		break;
550 	}
551 	return;
552 }
553