1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 2011 Intel Corporation; author: H. Peter Anvin
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
8 * Boston MA 02110-1301, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13 #include <inttypes.h>
14 #include <string.h>
15 #include <stdlib.h>
16 #include <ctype.h>
17 #include <dprintf.h>
18 #include "pxe.h"
19
20 enum http_readdir_state {
21 st_start, /* 0 Initial state */
22 st_open, /* 1 "<" */
23 st_a, /* 2 "<a" */
24 st_attribute, /* 3 "<a " */
25 st_h, /* 4 "<a h" */
26 st_hr, /* 5 */
27 st_hre, /* 6 */
28 st_href, /* 7 */
29 st_hrefeq, /* 8 */
30 st_hrefqu, /* 9 */
31 st_badtag, /* 10 */
32 st_badtagqu, /* 11 */
33 st_badattr, /* 12 */
34 st_badattrqu, /* 13 */
35 };
36
37 struct machine {
38 char xchar;
39 uint8_t st_xchar;
40 uint8_t st_left; /* < */
41 uint8_t st_right; /* > */
42 uint8_t st_space; /* white */
43 uint8_t st_other; /* anything else */
44 };
45
46 static const struct machine statemachine[] = {
47 /* xchar st_xchar st_left st_right st_space st_other */
48 { 0, 0, st_open, st_start, st_start, st_start },
49 { 'a', st_a, st_badtag, st_start, st_open, st_badtag },
50 { 0, 0, st_open, st_open, st_attribute, st_badtag },
51 { 'h', st_h, st_open, st_start, st_attribute, st_badattr },
52 { 'r', st_hr, st_open, st_start, st_attribute, st_badattr },
53 { 'e', st_hre, st_open, st_start, st_attribute, st_badattr },
54 { 'f', st_href, st_open, st_start, st_attribute, st_badattr },
55 { '=', st_hrefeq, st_open, st_start, st_attribute, st_badattr },
56 { '\"', st_hrefqu, st_open, st_start, st_attribute, st_hrefeq },
57 { '\"', st_attribute, st_hrefqu, st_hrefqu, st_hrefqu, st_hrefqu },
58 { '\"', st_badtagqu, st_open, st_start, st_badtag, st_badtag },
59 { '\"', st_badtag, st_badtagqu, st_badtagqu, st_badtagqu, st_badtagqu },
60 { '\"', st_badattrqu, st_open, st_start, st_attribute, st_badattr },
61 { '\"', st_attribute, st_badattrqu, st_badattrqu, st_badattrqu, st_badattrqu },
62 };
63
64 struct html_entity {
65 uint16_t ucs;
66 const char entity[9];
67 };
68
69 static const struct html_entity entities[] = {
70 { 34, "quot" },
71 { 38, "amp" },
72 { 60, "lt" },
73 { 62, "gt" },
74 #ifdef HTTP_ALL_ENTITIES
75 { 160, "nbsp" },
76 { 161, "iexcl" },
77 { 162, "cent" },
78 { 163, "pound" },
79 { 164, "curren" },
80 { 165, "yen" },
81 { 166, "brvbar" },
82 { 167, "sect" },
83 { 168, "uml" },
84 { 169, "copy" },
85 { 170, "ordf" },
86 { 171, "laquo" },
87 { 172, "not" },
88 { 173, "shy" },
89 { 174, "reg" },
90 { 175, "macr" },
91 { 176, "deg" },
92 { 177, "plusmn" },
93 { 178, "sup2" },
94 { 179, "sup3" },
95 { 180, "acute" },
96 { 181, "micro" },
97 { 182, "para" },
98 { 183, "middot" },
99 { 184, "cedil" },
100 { 185, "sup1" },
101 { 186, "ordm" },
102 { 187, "raquo" },
103 { 188, "frac14" },
104 { 189, "frac12" },
105 { 190, "frac34" },
106 { 191, "iquest" },
107 { 192, "Agrave" },
108 { 193, "Aacute" },
109 { 194, "Acirc" },
110 { 195, "Atilde" },
111 { 196, "Auml" },
112 { 197, "Aring" },
113 { 198, "AElig" },
114 { 199, "Ccedil" },
115 { 200, "Egrave" },
116 { 201, "Eacute" },
117 { 202, "Ecirc" },
118 { 203, "Euml" },
119 { 204, "Igrave" },
120 { 205, "Iacute" },
121 { 206, "Icirc" },
122 { 207, "Iuml" },
123 { 208, "ETH" },
124 { 209, "Ntilde" },
125 { 210, "Ograve" },
126 { 211, "Oacute" },
127 { 212, "Ocirc" },
128 { 213, "Otilde" },
129 { 214, "Ouml" },
130 { 215, "times" },
131 { 216, "Oslash" },
132 { 217, "Ugrave" },
133 { 218, "Uacute" },
134 { 219, "Ucirc" },
135 { 220, "Uuml" },
136 { 221, "Yacute" },
137 { 222, "THORN" },
138 { 223, "szlig" },
139 { 224, "agrave" },
140 { 225, "aacute" },
141 { 226, "acirc" },
142 { 227, "atilde" },
143 { 228, "auml" },
144 { 229, "aring" },
145 { 230, "aelig" },
146 { 231, "ccedil" },
147 { 232, "egrave" },
148 { 233, "eacute" },
149 { 234, "ecirc" },
150 { 235, "euml" },
151 { 236, "igrave" },
152 { 237, "iacute" },
153 { 238, "icirc" },
154 { 239, "iuml" },
155 { 240, "eth" },
156 { 241, "ntilde" },
157 { 242, "ograve" },
158 { 243, "oacute" },
159 { 244, "ocirc" },
160 { 245, "otilde" },
161 { 246, "ouml" },
162 { 247, "divide" },
163 { 248, "oslash" },
164 { 249, "ugrave" },
165 { 250, "uacute" },
166 { 251, "ucirc" },
167 { 252, "uuml" },
168 { 253, "yacute" },
169 { 254, "thorn" },
170 { 255, "yuml" },
171 { 338, "OElig" },
172 { 339, "oelig" },
173 { 352, "Scaron" },
174 { 353, "scaron" },
175 { 376, "Yuml" },
176 { 402, "fnof" },
177 { 710, "circ" },
178 { 732, "tilde" },
179 { 913, "Alpha" },
180 { 914, "Beta" },
181 { 915, "Gamma" },
182 { 916, "Delta" },
183 { 917, "Epsilon" },
184 { 918, "Zeta" },
185 { 919, "Eta" },
186 { 920, "Theta" },
187 { 921, "Iota" },
188 { 922, "Kappa" },
189 { 923, "Lambda" },
190 { 924, "Mu" },
191 { 925, "Nu" },
192 { 926, "Xi" },
193 { 927, "Omicron" },
194 { 928, "Pi" },
195 { 929, "Rho" },
196 { 931, "Sigma" },
197 { 932, "Tau" },
198 { 933, "Upsilon" },
199 { 934, "Phi" },
200 { 935, "Chi" },
201 { 936, "Psi" },
202 { 937, "Omega" },
203 { 945, "alpha" },
204 { 946, "beta" },
205 { 947, "gamma" },
206 { 948, "delta" },
207 { 949, "epsilon" },
208 { 950, "zeta" },
209 { 951, "eta" },
210 { 952, "theta" },
211 { 953, "iota" },
212 { 954, "kappa" },
213 { 955, "lambda" },
214 { 956, "mu" },
215 { 957, "nu" },
216 { 958, "xi" },
217 { 959, "omicron" },
218 { 960, "pi" },
219 { 961, "rho" },
220 { 962, "sigmaf" },
221 { 963, "sigma" },
222 { 964, "tau" },
223 { 965, "upsilon" },
224 { 966, "phi" },
225 { 967, "chi" },
226 { 968, "psi" },
227 { 969, "omega" },
228 { 977, "thetasym" },
229 { 978, "upsih" },
230 { 982, "piv" },
231 { 8194, "ensp" },
232 { 8195, "emsp" },
233 { 8201, "thinsp" },
234 { 8204, "zwnj" },
235 { 8205, "zwj" },
236 { 8206, "lrm" },
237 { 8207, "rlm" },
238 { 8211, "ndash" },
239 { 8212, "mdash" },
240 { 8216, "lsquo" },
241 { 8217, "rsquo" },
242 { 8218, "sbquo" },
243 { 8220, "ldquo" },
244 { 8221, "rdquo" },
245 { 8222, "bdquo" },
246 { 8224, "dagger" },
247 { 8225, "Dagger" },
248 { 8226, "bull" },
249 { 8230, "hellip" },
250 { 8240, "permil" },
251 { 8242, "prime" },
252 { 8243, "Prime" },
253 { 8249, "lsaquo" },
254 { 8250, "rsaquo" },
255 { 8254, "oline" },
256 { 8260, "frasl" },
257 { 8364, "euro" },
258 { 8465, "image" },
259 { 8472, "weierp" },
260 { 8476, "real" },
261 { 8482, "trade" },
262 { 8501, "alefsym" },
263 { 8592, "larr" },
264 { 8593, "uarr" },
265 { 8594, "rarr" },
266 { 8595, "darr" },
267 { 8596, "harr" },
268 { 8629, "crarr" },
269 { 8656, "lArr" },
270 { 8657, "uArr" },
271 { 8658, "rArr" },
272 { 8659, "dArr" },
273 { 8660, "hArr" },
274 { 8704, "forall" },
275 { 8706, "part" },
276 { 8707, "exist" },
277 { 8709, "empty" },
278 { 8711, "nabla" },
279 { 8712, "isin" },
280 { 8713, "notin" },
281 { 8715, "ni" },
282 { 8719, "prod" },
283 { 8721, "sum" },
284 { 8722, "minus" },
285 { 8727, "lowast" },
286 { 8730, "radic" },
287 { 8733, "prop" },
288 { 8734, "infin" },
289 { 8736, "ang" },
290 { 8743, "and" },
291 { 8744, "or" },
292 { 8745, "cap" },
293 { 8746, "cup" },
294 { 8747, "int" },
295 { 8756, "there4" },
296 { 8764, "sim" },
297 { 8773, "cong" },
298 { 8776, "asymp" },
299 { 8800, "ne" },
300 { 8801, "equiv" },
301 { 8804, "le" },
302 { 8805, "ge" },
303 { 8834, "sub" },
304 { 8835, "sup" },
305 { 8836, "nsub" },
306 { 8838, "sube" },
307 { 8839, "supe" },
308 { 8853, "oplus" },
309 { 8855, "otimes" },
310 { 8869, "perp" },
311 { 8901, "sdot" },
312 { 8968, "lceil" },
313 { 8969, "rceil" },
314 { 8970, "lfloor" },
315 { 8971, "rfloor" },
316 { 9001, "lang" },
317 { 9002, "rang" },
318 { 9674, "loz" },
319 { 9824, "spades" },
320 { 9827, "clubs" },
321 { 9829, "hearts" },
322 { 9830, "diams" },
323 #endif /* HTTP_ALL_ENTITIES */
324 { 0, "" }
325 };
326
327 struct entity_state {
328 char entity_buf[16];
329 char *ep;
330 };
331
emit(char * p,int c,struct entity_state * st)332 static char *emit(char *p, int c, struct entity_state *st)
333 {
334 const struct html_entity *ent;
335 unsigned int ucs;
336
337 if (!st->ep) {
338 if (c == '&') {
339 /* Entity open */
340 st->ep = st->entity_buf;
341 } else {
342 *p++ = c;
343 }
344 } else {
345 if (c == ';') {
346 st->ep = NULL;
347 *p = '\0';
348 if (st->entity_buf[0] == '#') {
349 if ((st->entity_buf[1] | 0x20)== 'x') {
350 ucs = strtoul(st->entity_buf + 2, NULL, 16);
351 } else {
352 ucs = strtoul(st->entity_buf + 1, NULL, 10);
353 }
354 } else {
355 for (ent = entities; ent->ucs; ent++) {
356 if (!strcmp(st->entity_buf, ent->entity))
357 break;
358 }
359 ucs = ent->ucs;
360 }
361 if (ucs < 32 || ucs >= 0x10ffff)
362 return p; /* Bogus */
363 if (ucs >= 0x10000) {
364 *p++ = 0xf0 + (ucs >> 18);
365 *p++ = 0x80 + ((ucs >> 12) & 0x3f);
366 *p++ = 0x80 + ((ucs >> 6) & 0x3f);
367 *p++ = 0x80 + (ucs & 0x3f);
368 } else if (ucs >= 0x800) {
369 *p++ = 0xe0 + (ucs >> 12);
370 *p++ = 0x80 + ((ucs >> 6) & 0x3f);
371 *p++ = 0x80 + (ucs & 0x3f);
372 } else if (ucs >= 0x80) {
373 *p++ = 0xc0 + (ucs >> 6);
374 *p++ = 0x80 + (ucs & 0x3f);
375 } else {
376 *p++ = ucs;
377 }
378 } else if (st->ep < st->entity_buf + sizeof st->entity_buf - 1) {
379 *st->ep++ = c;
380 }
381 }
382 return p;
383 }
384
http_get_filename(struct inode * inode,char * buf)385 static const char *http_get_filename(struct inode *inode, char *buf)
386 {
387 int c, lc;
388 char *p;
389 const struct machine *sm;
390 struct entity_state es;
391 enum http_readdir_state state = st_start;
392 enum http_readdir_state pstate = st_start;
393
394 memset(&es, 0, sizeof es);
395
396 p = buf;
397 for (;;) {
398 c = pxe_getc(inode);
399 if (c == -1)
400 return NULL;
401
402 lc = tolower(c);
403
404 sm = &statemachine[state];
405
406 if (lc == sm->xchar)
407 state = sm->st_xchar;
408 else if (c == '<')
409 state = sm->st_left;
410 else if (c == '>')
411 state = sm->st_right;
412 else if (isspace(c))
413 state = sm->st_space;
414 else
415 state = sm->st_other;
416
417 if (state == st_hrefeq || state == st_hrefqu) {
418 if (state != pstate)
419 p = buf;
420 else if (p < buf + FILENAME_MAX)
421 p = emit(p, c, &es);
422 pstate = state;
423 } else {
424 if (pstate != st_start)
425 pstate = st_start;
426 if (p != buf && state == st_start) {
427 *p = '\0';
428 return buf;
429 }
430 }
431 }
432 }
433
http_readdir(struct inode * inode,struct dirent * dirent)434 int http_readdir(struct inode *inode, struct dirent *dirent)
435 {
436 char buf[FILENAME_MAX + 6];
437 const char *fn, *sp;
438
439 for (;;) {
440 fn = http_get_filename(inode, buf);
441
442 if (!fn)
443 return -1; /* End of directory */
444
445 /* Ignore entries with http special characters */
446 if (strchr(fn, '#'))
447 continue;
448 if (strchr(fn, '?'))
449 continue;
450
451 /* A slash if present has to be the last character, and not the first */
452 sp = strchr(fn, '/');
453 if (sp) {
454 if (sp == fn || sp[1])
455 continue;
456 } else {
457 sp = strchr(fn, '\0');
458 }
459
460 if (sp > fn + NAME_MAX)
461 continue;
462
463 dirent->d_ino = 0; /* Not applicable */
464 dirent->d_off = 0; /* Not applicable */
465 dirent->d_reclen = offsetof(struct dirent, d_name) + (sp-fn) + 1;
466 dirent->d_type = *sp == '/' ? DT_DIR : DT_REG;
467 memcpy(dirent->d_name, fn, sp-fn);
468 dirent->d_name[sp-fn] = '\0';
469 return 0;
470 }
471 }
472