• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This example code shows how to iterate over all regex matches in a file,
3  * emit the match location and print the contents of a capturing group.
4  */
5 
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <sys/mman.h>
11 #include <sys/stat.h>
12 #include <sys/types.h>
13 #include <unistd.h>
14 
15 #include "rure.h"
16 
main()17 int main() {
18     /* Open a file and mmap it. */
19     int fd = open("sherlock.txt", O_RDONLY);
20     if (fd == -1) {
21         perror("failed to open sherlock.txt");
22         exit(1);
23     }
24     struct stat status;
25     if (fstat(fd, &status) == -1) {
26         perror("failed to stat sherlock.txt");
27         exit(1);
28     }
29     if ((uintmax_t)status.st_size > SIZE_MAX) {
30         perror("file too big");
31         exit(1);
32     }
33     if (status.st_size == 0) {
34         perror("file empty");
35         exit(1);
36     }
37     size_t sherlock_len = (size_t)status.st_size;
38     const uint8_t *sherlock = (const uint8_t *)mmap(
39         NULL, status.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
40     close(fd);
41     if (sherlock == MAP_FAILED) {
42         perror("could not mmap file");
43         exit(1);
44     }
45 
46     /*
47      * Compile the regular expression. A more convenient routine,
48      * rure_compile_must, is also available, which will abort the process if
49      * and print an error message to stderr if the regex compilation fails.
50      * We show the full gory details here as an example.
51      */
52     const char *pattern = "(\\w+)\\s+Holmes";
53     size_t pattern_len = strlen(pattern);
54     rure_error *err = rure_error_new();
55     rure *re = rure_compile((const uint8_t *)pattern, pattern_len,
56                             RURE_FLAG_UNICODE | RURE_FLAG_CASEI, NULL, err);
57     if (NULL == re) {
58         /* A null regex means compilation failed and an error exists. */
59         printf("compilation of %s failed: %s\n",
60                pattern, rure_error_message(err));
61         rure_error_free(err);
62         munmap((char*)sherlock, sherlock_len);
63         exit(1);
64     }
65     rure_error_free(err);
66 
67     /*
68      * Create an iterator to find all successive non-overlapping matches.
69      * For each match, we extract the location of the capturing group.
70      */
71     rure_match group0 = {0};
72     rure_match group1 = {0};
73     rure_captures *caps = rure_captures_new(re);
74     rure_iter *it = rure_iter_new(re);
75 
76     while (rure_iter_next_captures(it, sherlock, sherlock_len, caps)) {
77         /*
78          * Get the location of the full match and the capturing group.
79          * We know that both accesses are successful since the body of the
80          * loop only executes if there is a match and both capture groups
81          * must match in order for the entire regex to match.
82          *
83          * N.B. The zeroth group corresponds to the full match of the regex.
84          */
85         rure_captures_at(caps, 0, &group0);
86         rure_captures_at(caps, 1, &group1);
87         printf("%.*s (match at: %zu, %zu)\n",
88                (int)(group1.end - group1.start),
89                sherlock + group1.start,
90                group0.start, group0.end);
91     }
92 
93     /* Free all our resources. */
94     munmap((char*)sherlock, sherlock_len);
95     rure_captures_free(caps);
96     rure_iter_free(it);
97     rure_free(re);
98     return 0;
99 }
100