1 /**
2 * Copyright (c) 2019, The Linux Foundation. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above
10 * copyright notice, this list of conditions and the following
11 * disclaimer in the documentation and/or other materials provided
12 * with the distribution.
13 * * Neither the name of The Linux Foundation nor the names of its
14 * contributors may be used to endorse or promote products derived
15 * from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #ifndef SBUF_PARSER_H
31 #define SBUF_PARSER_H
32
33 #include "sbuf.h"
34
35 /**
36 * Greedy Recursive Descent Parser in C
37 *
38 * Stop using strstr or regular expressions. This simple Recursive Descent Parser can be
39 * used to handle complex grammars.
40 *
41 * For example:
42 * parsing a query string form a uri
43 * input: "file:///foo/bar_far.so.1?_blah1&_bar=barval5&_barfar"
44 * expected output:
45 * parsed query: _blah1 =
46 * parsed query: _bar = barval5
47 * parsed query: _barfar =
48 *
49 * static int qmark(struct sbuf *buf) {
50 * return sbuf_char(buf, '?');
51 * }
52 * static int notandoreq(struct sbuf *buf) {
53 * return sbuf_notchars(buf, "&=");
54 * }
55 * static int notand(struct sbuf *buf) {
56 * return sbuf_notchar(buf, '&');
57 * }
58 *
59 * const char *name;
60 * int nameLen;
61 * const char *value;
62 * int valueLen;
63 * const char *data = "file:///foo/bar_far.so.1?_blah1&_bar=barval5&_barfar";
64 *
65 * //initialize
66 * sbuf_parser_init(&buf, data, strlen(data));
67 *
68 * //parse until question mark
69 * assert(sbuf_until(&buf, sbuf_any, qmark));
70 *
71 * //parse each query
72 * while(!sbuf_end(&buf)) {
73 * //record where the name starts
74 * name = sbuf_cur(&buf);
75 *
76 * //name is valid until '=' or '&'
77 * assert(sbuf_many1(&buf, notandoreq));
78 * nameLen = sbuf_cur(&buf) - name;
79 *
80 * value = 0;
81 * valueLen = 0;
82 * //if the next char is a '=' then we also get a value
83 * if(sbuf_char(&buf, '=')) {
84 * value = sbuf_cur(&buf);
85 *
86 * //value is until the next query that starts with '&'
87 * assert(sbuf_many1(&buf, notand));
88 * valueLen = sbuf_cur(&buf) - value;
89 * }
90 * //expect '&' or end
91 * sbuf_char(&buf, '&');
92 * printf("parsed query: %.*s = %.*s\n", nameLen, name, valueLen, value);
93 * }
94 *
95 */
96
97 //! init
sbuf_parser_init(struct sbuf * buf,const char * data,int dataLen)98 static __inline void sbuf_parser_init(struct sbuf* buf, const char *data, int dataLen) {
99 sbuf_init(buf, 0, (void*)data, dataLen);
100 }
101
102 //! current postiion
sbuf_cur(struct sbuf * buf)103 static __inline char *sbuf_cur(struct sbuf* buf) {
104 return (char*)sbuf_head(buf);
105 }
106
107 //! look at the next character if the buffer is still valid
sbuf_peek(struct sbuf * buf,char * c)108 static __inline int sbuf_peek(struct sbuf* buf, char* c) {
109 if(!sbuf_valid(buf)) {
110 return 0;
111 }
112 *c = *sbuf_cur(buf);
113 return 1;
114 }
115
116 //! returns true if the buffer is ended
sbuf_end(struct sbuf * buf)117 static __inline int sbuf_end(struct sbuf* buf) {
118 return sbuf_left(buf) == 0;
119 }
120
121 //! consume 1 char if its in string chars
sbuf_chars(struct sbuf * buf,const char * chars)122 static __inline int sbuf_chars(struct sbuf *buf, const char *chars) {
123 int i = 0;
124 char c;
125 if(!sbuf_peek(buf, &c)) {
126 return 0;
127 }
128 for(i = 0; chars[i] != 0; ++i) {
129 if(c == chars[i]) {
130 sbuf_advance(buf, 1);
131 return 1;
132 }
133 }
134 return 0;
135 }
136
137 //! consume 1 char only if its not in string chars
sbuf_notchars(struct sbuf * buf,const char * chars)138 static __inline int sbuf_notchars(struct sbuf *buf, const char *chars) {
139 int i = 0;
140 char c;
141 if(!sbuf_peek(buf, &c)) {
142 return 0;
143 }
144 for(i = 0; chars[i] != 0; ++i) {
145 if(c == chars[i]) {
146 return 0;
147 }
148 }
149 sbuf_advance(buf, 1);
150 return 1;
151 }
152
153 //! consume only char t
sbuf_char(struct sbuf * buf,const char t)154 static __inline int sbuf_char(struct sbuf *buf, const char t) {
155 char str[2] = {t, 0};
156 return sbuf_chars(buf, str);
157 }
158
159 //! consume any char except for t
sbuf_notchar(struct sbuf * buf,const char t)160 static __inline int sbuf_notchar(struct sbuf *buf, const char t) {
161 char str[2] = {t, 0};
162 return sbuf_notchars(buf, str);
163 }
164
165 /**
166 * consume any char
167 */
sbuf_any(struct sbuf * buf)168 static __inline int sbuf_any(struct sbuf* buf) {
169 return sbuf_notchars(buf, "");
170 }
171
172
173 /**
174 * range is pairs of characters
175 *
176 * pairs are inclusive, start must be less then or equal then the end
177 *
178 * for example: AZaz09--..
179 * matches uppercase and lowercase letters, numbers, dashes and dots
180 *
181 */
sbuf_range(struct sbuf * buf,const char * chars)182 static __inline int sbuf_range(struct sbuf *buf, const char *chars) {
183 int i, j;
184 char c;
185 if(!sbuf_peek(buf, &c)) {
186 return 0;
187 }
188 for(i = 0, j = 1; chars[i] != 0 && chars[j] != 0; i+=2,j+=2) {
189 if(c >= chars[i] && c <= chars[j]) {
190 sbuf_advance(buf, 1);
191 return 1;
192 }
193 }
194 return 0;
195 }
196
197
198 /**
199 * greedly consume and match the entire string
200 * empty string always succeeds without consuming any data
201 */
sbuf_string(struct sbuf * buf,const char * str)202 static __inline int sbuf_string(struct sbuf *buf, const char *str) {
203 int i = 0;
204 for(i = 0; str[i] != 0; ++i) {
205 if(!sbuf_char(buf, str[i])) {
206 return 0;
207 }
208 }
209 return 1;
210 }
211
212 /**
213 * consumes until fails
214 */
sbuf_many(struct sbuf * buf,int (* consume)(struct sbuf * buf))215 static __inline int sbuf_many(struct sbuf *buf,
216 int(*consume)(struct sbuf *buf))
217 {
218 if(!sbuf_valid(buf)) {
219 return 0;
220 }
221 while(consume(buf)) {;}
222 return 1;
223 }
224 /**
225 * consumes until fails, must consume at least 1
226 */
sbuf_many1(struct sbuf * buf,int (* consume)(struct sbuf * buf))227 static __inline int sbuf_many1(struct sbuf *buf,
228 int(*consume)(struct sbuf *buf))
229 {
230 if(!consume(buf)) {
231 return 0;
232 }
233 sbuf_many(buf, consume);
234 return 1;
235 }
236 /**
237 * runs 'consume' until 'stop' succeeds
238 * 'stop' must fail in such a way that it doesn't consume any data
239 */
sbuf_until(struct sbuf * buf,int (* consume)(struct sbuf * buf),int (* stop)(struct sbuf * buf))240 static __inline int sbuf_until(struct sbuf *buf,
241 int(*consume)(struct sbuf *buf),
242 int(*stop)(struct sbuf *buf))
243 {
244 while(!stop(buf)) {
245 if(!consume(buf)) {
246 return 0;
247 }
248 }
249 return 1;
250 }
251
252 /**
253 * allows for backtracking,
254 * @param parser, runs parser and only consume if it succeeds
255 */
sbuf_try(struct sbuf * buf,int (* parser)(struct sbuf * buf))256 static __inline int sbuf_try(struct sbuf *buf, int(*parser)(struct sbuf *buf))
257 {
258 struct sbuf tryp;
259 sbuf_parser_init(&tryp, sbuf_cur(buf), sbuf_left(buf));
260 if(parser(&tryp)) {
261 sbuf_advance(buf, sbuf_cur(&tryp) - sbuf_cur(buf));
262 return 1;
263 }
264 return 0;
265 }
266 #endif // SBUF_PARSER_H
267