• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------- *
2  *
3  *   Copyright 2011 Intel Corporation; author: H. Peter Anvin
4  *
5  *   This program is free software; you can redistribute it and/or modify
6  *   it under the terms of the GNU General Public License as published by
7  *   the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
8  *   Boston MA 02110-1301, USA; either version 2 of the License, or
9  *   (at your option) any later version; incorporated herein by reference.
10  *
11  * ----------------------------------------------------------------------- */
12 
13 #include <inttypes.h>
14 #include <string.h>
15 #include <stdlib.h>
16 #include <ctype.h>
17 #include <dprintf.h>
18 #include "pxe.h"
19 
20 enum http_readdir_state {
21     st_start,			/*  0 Initial state */
22     st_open,			/*  1 "<" */
23     st_a,			/*  2 "<a" */
24     st_attribute,		/*  3 "<a " */
25     st_h,			/*  4 "<a h" */
26     st_hr,			/*  5 */
27     st_hre,			/*  6 */
28     st_href,			/*  7 */
29     st_hrefeq,			/*  8 */
30     st_hrefqu,			/*  9 */
31     st_badtag,			/* 10 */
32     st_badtagqu,		/* 11 */
33     st_badattr,			/* 12 */
34     st_badattrqu,		/* 13 */
35 };
36 
37 struct machine {
38     char xchar;
39     uint8_t st_xchar;
40     uint8_t st_left;		/* < */
41     uint8_t st_right;		/* > */
42     uint8_t st_space;		/* white */
43     uint8_t st_other;		/* anything else */
44 };
45 
46 static const struct machine statemachine[] = {
47     /* xchar	st_xchar	st_left		st_right	st_space	st_other */
48     { 0,	0,		st_open,	st_start,	st_start,	st_start },
49     { 'a',	st_a,		st_badtag,	st_start,	st_open,	st_badtag },
50     { 0,	0,		st_open,	st_open,	st_attribute,	st_badtag },
51     { 'h',	st_h,		st_open,	st_start,	st_attribute,	st_badattr },
52     { 'r',	st_hr,		st_open,	st_start,	st_attribute,	st_badattr },
53     { 'e',	st_hre,		st_open,	st_start,	st_attribute,	st_badattr },
54     { 'f',	st_href,	st_open,	st_start,	st_attribute,	st_badattr },
55     { '=',	st_hrefeq,	st_open,	st_start,	st_attribute,	st_badattr },
56     { '\"',	st_hrefqu,	st_open,	st_start,	st_attribute,	st_hrefeq },
57     { '\"',	st_attribute,	st_hrefqu,	st_hrefqu,	st_hrefqu,	st_hrefqu },
58     { '\"',	st_badtagqu,	st_open,	st_start,	st_badtag,	st_badtag },
59     { '\"',	st_badtag,	st_badtagqu,	st_badtagqu,	st_badtagqu,	st_badtagqu },
60     { '\"',	st_badattrqu,	st_open,	st_start,	st_attribute,	st_badattr },
61     { '\"',	st_attribute,	st_badattrqu,	st_badattrqu,	st_badattrqu,	st_badattrqu },
62 };
63 
64 struct html_entity {
65     uint16_t ucs;
66     const char entity[9];
67 };
68 
69 static const struct html_entity entities[] = {
70     {   34, "quot" },
71     {   38, "amp" },
72     {   60, "lt" },
73     {   62, "gt" },
74 #ifdef HTTP_ALL_ENTITIES
75     {  160, "nbsp" },
76     {  161, "iexcl" },
77     {  162, "cent" },
78     {  163, "pound" },
79     {  164, "curren" },
80     {  165, "yen" },
81     {  166, "brvbar" },
82     {  167, "sect" },
83     {  168, "uml" },
84     {  169, "copy" },
85     {  170, "ordf" },
86     {  171, "laquo" },
87     {  172, "not" },
88     {  173, "shy" },
89     {  174, "reg" },
90     {  175, "macr" },
91     {  176, "deg" },
92     {  177, "plusmn" },
93     {  178, "sup2" },
94     {  179, "sup3" },
95     {  180, "acute" },
96     {  181, "micro" },
97     {  182, "para" },
98     {  183, "middot" },
99     {  184, "cedil" },
100     {  185, "sup1" },
101     {  186, "ordm" },
102     {  187, "raquo" },
103     {  188, "frac14" },
104     {  189, "frac12" },
105     {  190, "frac34" },
106     {  191, "iquest" },
107     {  192, "Agrave" },
108     {  193, "Aacute" },
109     {  194, "Acirc" },
110     {  195, "Atilde" },
111     {  196, "Auml" },
112     {  197, "Aring" },
113     {  198, "AElig" },
114     {  199, "Ccedil" },
115     {  200, "Egrave" },
116     {  201, "Eacute" },
117     {  202, "Ecirc" },
118     {  203, "Euml" },
119     {  204, "Igrave" },
120     {  205, "Iacute" },
121     {  206, "Icirc" },
122     {  207, "Iuml" },
123     {  208, "ETH" },
124     {  209, "Ntilde" },
125     {  210, "Ograve" },
126     {  211, "Oacute" },
127     {  212, "Ocirc" },
128     {  213, "Otilde" },
129     {  214, "Ouml" },
130     {  215, "times" },
131     {  216, "Oslash" },
132     {  217, "Ugrave" },
133     {  218, "Uacute" },
134     {  219, "Ucirc" },
135     {  220, "Uuml" },
136     {  221, "Yacute" },
137     {  222, "THORN" },
138     {  223, "szlig" },
139     {  224, "agrave" },
140     {  225, "aacute" },
141     {  226, "acirc" },
142     {  227, "atilde" },
143     {  228, "auml" },
144     {  229, "aring" },
145     {  230, "aelig" },
146     {  231, "ccedil" },
147     {  232, "egrave" },
148     {  233, "eacute" },
149     {  234, "ecirc" },
150     {  235, "euml" },
151     {  236, "igrave" },
152     {  237, "iacute" },
153     {  238, "icirc" },
154     {  239, "iuml" },
155     {  240, "eth" },
156     {  241, "ntilde" },
157     {  242, "ograve" },
158     {  243, "oacute" },
159     {  244, "ocirc" },
160     {  245, "otilde" },
161     {  246, "ouml" },
162     {  247, "divide" },
163     {  248, "oslash" },
164     {  249, "ugrave" },
165     {  250, "uacute" },
166     {  251, "ucirc" },
167     {  252, "uuml" },
168     {  253, "yacute" },
169     {  254, "thorn" },
170     {  255, "yuml" },
171     {  338, "OElig" },
172     {  339, "oelig" },
173     {  352, "Scaron" },
174     {  353, "scaron" },
175     {  376, "Yuml" },
176     {  402, "fnof" },
177     {  710, "circ" },
178     {  732, "tilde" },
179     {  913, "Alpha" },
180     {  914, "Beta" },
181     {  915, "Gamma" },
182     {  916, "Delta" },
183     {  917, "Epsilon" },
184     {  918, "Zeta" },
185     {  919, "Eta" },
186     {  920, "Theta" },
187     {  921, "Iota" },
188     {  922, "Kappa" },
189     {  923, "Lambda" },
190     {  924, "Mu" },
191     {  925, "Nu" },
192     {  926, "Xi" },
193     {  927, "Omicron" },
194     {  928, "Pi" },
195     {  929, "Rho" },
196     {  931, "Sigma" },
197     {  932, "Tau" },
198     {  933, "Upsilon" },
199     {  934, "Phi" },
200     {  935, "Chi" },
201     {  936, "Psi" },
202     {  937, "Omega" },
203     {  945, "alpha" },
204     {  946, "beta" },
205     {  947, "gamma" },
206     {  948, "delta" },
207     {  949, "epsilon" },
208     {  950, "zeta" },
209     {  951, "eta" },
210     {  952, "theta" },
211     {  953, "iota" },
212     {  954, "kappa" },
213     {  955, "lambda" },
214     {  956, "mu" },
215     {  957, "nu" },
216     {  958, "xi" },
217     {  959, "omicron" },
218     {  960, "pi" },
219     {  961, "rho" },
220     {  962, "sigmaf" },
221     {  963, "sigma" },
222     {  964, "tau" },
223     {  965, "upsilon" },
224     {  966, "phi" },
225     {  967, "chi" },
226     {  968, "psi" },
227     {  969, "omega" },
228     {  977, "thetasym" },
229     {  978, "upsih" },
230     {  982, "piv" },
231     { 8194, "ensp" },
232     { 8195, "emsp" },
233     { 8201, "thinsp" },
234     { 8204, "zwnj" },
235     { 8205, "zwj" },
236     { 8206, "lrm" },
237     { 8207, "rlm" },
238     { 8211, "ndash" },
239     { 8212, "mdash" },
240     { 8216, "lsquo" },
241     { 8217, "rsquo" },
242     { 8218, "sbquo" },
243     { 8220, "ldquo" },
244     { 8221, "rdquo" },
245     { 8222, "bdquo" },
246     { 8224, "dagger" },
247     { 8225, "Dagger" },
248     { 8226, "bull" },
249     { 8230, "hellip" },
250     { 8240, "permil" },
251     { 8242, "prime" },
252     { 8243, "Prime" },
253     { 8249, "lsaquo" },
254     { 8250, "rsaquo" },
255     { 8254, "oline" },
256     { 8260, "frasl" },
257     { 8364, "euro" },
258     { 8465, "image" },
259     { 8472, "weierp" },
260     { 8476, "real" },
261     { 8482, "trade" },
262     { 8501, "alefsym" },
263     { 8592, "larr" },
264     { 8593, "uarr" },
265     { 8594, "rarr" },
266     { 8595, "darr" },
267     { 8596, "harr" },
268     { 8629, "crarr" },
269     { 8656, "lArr" },
270     { 8657, "uArr" },
271     { 8658, "rArr" },
272     { 8659, "dArr" },
273     { 8660, "hArr" },
274     { 8704, "forall" },
275     { 8706, "part" },
276     { 8707, "exist" },
277     { 8709, "empty" },
278     { 8711, "nabla" },
279     { 8712, "isin" },
280     { 8713, "notin" },
281     { 8715, "ni" },
282     { 8719, "prod" },
283     { 8721, "sum" },
284     { 8722, "minus" },
285     { 8727, "lowast" },
286     { 8730, "radic" },
287     { 8733, "prop" },
288     { 8734, "infin" },
289     { 8736, "ang" },
290     { 8743, "and" },
291     { 8744, "or" },
292     { 8745, "cap" },
293     { 8746, "cup" },
294     { 8747, "int" },
295     { 8756, "there4" },
296     { 8764, "sim" },
297     { 8773, "cong" },
298     { 8776, "asymp" },
299     { 8800, "ne" },
300     { 8801, "equiv" },
301     { 8804, "le" },
302     { 8805, "ge" },
303     { 8834, "sub" },
304     { 8835, "sup" },
305     { 8836, "nsub" },
306     { 8838, "sube" },
307     { 8839, "supe" },
308     { 8853, "oplus" },
309     { 8855, "otimes" },
310     { 8869, "perp" },
311     { 8901, "sdot" },
312     { 8968, "lceil" },
313     { 8969, "rceil" },
314     { 8970, "lfloor" },
315     { 8971, "rfloor" },
316     { 9001, "lang" },
317     { 9002, "rang" },
318     { 9674, "loz" },
319     { 9824, "spades" },
320     { 9827, "clubs" },
321     { 9829, "hearts" },
322     { 9830, "diams" },
323 #endif /* HTTP_ALL_ENTITIES */
324     { 0, "" }
325 };
326 
327 struct entity_state {
328     char entity_buf[16];
329     char *ep;
330 };
331 
emit(char * p,int c,struct entity_state * st)332 static char *emit(char *p, int c, struct entity_state *st)
333 {
334     const struct html_entity *ent;
335     unsigned int ucs;
336 
337     if (!st->ep) {
338 	if (c == '&') {
339 	    /* Entity open */
340 	    st->ep = st->entity_buf;
341 	} else {
342 	    *p++ = c;
343 	}
344     } else {
345 	if (c == ';') {
346 	    st->ep = NULL;
347 	    *p = '\0';
348 	    if (st->entity_buf[0] == '#') {
349 		if ((st->entity_buf[1] | 0x20)== 'x') {
350 		    ucs = strtoul(st->entity_buf + 2, NULL, 16);
351 		} else {
352 		    ucs = strtoul(st->entity_buf + 1, NULL, 10);
353 		}
354 	    } else {
355 		for (ent = entities; ent->ucs; ent++) {
356 		    if (!strcmp(st->entity_buf, ent->entity))
357 			break;
358 		}
359 		ucs = ent->ucs;
360 	    }
361 	    if (ucs < 32 || ucs >= 0x10ffff)
362 		return p;	/* Bogus */
363 	    if (ucs >= 0x10000) {
364 		*p++ = 0xf0 + (ucs >> 18);
365 		*p++ = 0x80 + ((ucs >> 12) & 0x3f);
366 		*p++ = 0x80 + ((ucs >> 6) & 0x3f);
367 		*p++ = 0x80 + (ucs & 0x3f);
368 	    } else if (ucs >= 0x800) {
369 		*p++ = 0xe0 + (ucs >> 12);
370 		*p++ = 0x80 + ((ucs >> 6) & 0x3f);
371 		*p++ = 0x80 + (ucs & 0x3f);
372 	    } else if (ucs >= 0x80) {
373 		*p++ = 0xc0 + (ucs >> 6);
374 		*p++ = 0x80 + (ucs & 0x3f);
375 	    } else {
376 		*p++ = ucs;
377 	    }
378 	} else if (st->ep < st->entity_buf + sizeof st->entity_buf - 1) {
379 	    *st->ep++ = c;
380 	}
381     }
382     return p;
383 }
384 
http_get_filename(struct inode * inode,char * buf)385 static const char *http_get_filename(struct inode *inode, char *buf)
386 {
387     int c, lc;
388     char *p;
389     const struct machine *sm;
390     struct entity_state es;
391     enum http_readdir_state state = st_start;
392     enum http_readdir_state pstate = st_start;
393 
394     memset(&es, 0, sizeof es);
395 
396     p = buf;
397     for (;;) {
398 	c = pxe_getc(inode);
399 	if (c == -1)
400 	    return NULL;
401 
402 	lc = tolower(c);
403 
404 	sm = &statemachine[state];
405 
406 	if (lc == sm->xchar)
407 	    state = sm->st_xchar;
408 	else if (c == '<')
409 	    state = sm->st_left;
410 	else if (c == '>')
411 	    state = sm->st_right;
412 	else if (isspace(c))
413 	    state = sm->st_space;
414 	else
415 	    state = sm->st_other;
416 
417 	if (state == st_hrefeq || state == st_hrefqu) {
418 	    if (state != pstate)
419 		p = buf;
420 	    else if (p < buf + FILENAME_MAX)
421 		p = emit(p, c, &es);
422 	    pstate = state;
423 	} else {
424 	    if (pstate != st_start)
425 		pstate = st_start;
426 	    if (p != buf && state == st_start) {
427 		*p = '\0';
428 		return buf;
429 	    }
430 	}
431     }
432 }
433 
http_readdir(struct inode * inode,struct dirent * dirent)434 int http_readdir(struct inode *inode, struct dirent *dirent)
435 {
436     char buf[FILENAME_MAX + 6];
437     const char *fn, *sp;
438 
439     for (;;) {
440 	fn = http_get_filename(inode, buf);
441 
442 	if (!fn)
443 	    return -1;		/* End of directory */
444 
445 	/* Ignore entries with http special characters */
446 	if (strchr(fn, '#'))
447 	    continue;
448 	if (strchr(fn, '?'))
449 	    continue;
450 
451 	/* A slash if present has to be the last character, and not the first */
452 	sp = strchr(fn, '/');
453 	if (sp) {
454 	    if (sp == fn || sp[1])
455 		continue;
456 	} else {
457 	    sp = strchr(fn, '\0');
458 	}
459 
460 	if (sp > fn + NAME_MAX)
461 	    continue;
462 
463 	dirent->d_ino = 0;	/* Not applicable */
464 	dirent->d_off = 0;	/* Not applicable */
465 	dirent->d_reclen = offsetof(struct dirent, d_name) + (sp-fn) + 1;
466 	dirent->d_type = *sp == '/' ? DT_DIR : DT_REG;
467 	memcpy(dirent->d_name, fn, sp-fn);
468 	dirent->d_name[sp-fn] = '\0';
469 	return 0;
470     }
471 }
472