1 /*
2 * xmlSeed.c: Generate the XML seed corpus for fuzzing.
3 *
4 * See Copyright for the status of this software.
5 */
6
7 #include <stdio.h>
8 #include <string.h>
9 #include <glob.h>
10 #include <libgen.h>
11 #include <sys/stat.h>
12
13 #ifdef _WIN32
14 #include <direct.h>
15 #else
16 #include <unistd.h>
17 #endif
18
19 #include <libxml/parser.h>
20 #include <libxml/parserInternals.h>
21 #include <libxml/HTMLparser.h>
22 #include <libxml/xinclude.h>
23 #include <libxml/xmlschemas.h>
24 #include "fuzz.h"
25
26 #define PATH_SIZE 500
27 #define SEED_BUF_SIZE 16384
28 #define EXPR_SIZE 4500
29
30 #define FLAG_READER (1 << 0)
31 #define FLAG_LINT (1 << 1)
32 #define FLAG_PUSH_CHUNK_SIZE (1 << 2)
33
34 typedef int
35 (*fileFunc)(const char *base, FILE *out);
36
37 typedef int
38 (*mainFunc)(const char *arg);
39
40 static struct {
41 FILE *out;
42 xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
43 xmlExternalEntityLoader oldLoader;
44 fileFunc processFile;
45 const char *fuzzer;
46 int counter;
47 char cwd[PATH_SIZE];
48 int flags;
49 } globalData;
50
51 #if defined(HAVE_SCHEMA_FUZZER) || \
52 defined(HAVE_XML_FUZZER)
53 /*
54 * A custom resource loader that writes all external DTDs or entities to a
55 * single file in the format expected by xmlFuzzResourceLoader.
56 */
57 static int
fuzzResourceRecorder(void * data ATTRIBUTE_UNUSED,const char * URL,const char * ID ATTRIBUTE_UNUSED,xmlResourceType type ATTRIBUTE_UNUSED,int flags,xmlParserInputPtr * out)58 fuzzResourceRecorder(void *data ATTRIBUTE_UNUSED, const char *URL,
59 const char *ID ATTRIBUTE_UNUSED,
60 xmlResourceType type ATTRIBUTE_UNUSED, int flags,
61 xmlParserInputPtr *out) {
62 xmlParserInputPtr in;
63 static const int chunkSize = 16384;
64 int code, len;
65
66 *out = NULL;
67
68 code = xmlNewInputFromUrl(URL, flags, &in);
69 if (code != XML_ERR_OK)
70 return(code);
71
72 if (globalData.entities == NULL) {
73 globalData.entities = xmlHashCreate(4);
74 } else if (xmlHashLookup(globalData.entities,
75 (const xmlChar *) URL) != NULL) {
76 *out = in;
77 return(XML_ERR_OK);
78 }
79
80 do {
81 len = xmlParserInputGrow(in, chunkSize);
82 if (len < 0) {
83 fprintf(stderr, "Error reading %s\n", URL);
84 xmlFreeInputStream(in);
85 return(in->buf->error);
86 }
87 } while (len > 0);
88
89 xmlFuzzWriteString(globalData.out, URL);
90 xmlFuzzWriteString(globalData.out,
91 (char *) xmlBufContent(in->buf->buffer));
92
93 xmlFreeInputStream(in);
94
95 xmlHashAddEntry(globalData.entities, (const xmlChar *) URL,
96 globalData.entities);
97
98 return(xmlNewInputFromUrl(URL, flags, out));
99 }
100
101 static void
fuzzRecorderInit(FILE * out)102 fuzzRecorderInit(FILE *out) {
103 globalData.out = out;
104 globalData.entities = xmlHashCreate(8);
105 globalData.oldLoader = xmlGetExternalEntityLoader();
106 }
107
108 static void
fuzzRecorderCleanup(void)109 fuzzRecorderCleanup(void) {
110 xmlHashFree(globalData.entities, NULL);
111 globalData.out = NULL;
112 globalData.entities = NULL;
113 globalData.oldLoader = NULL;
114 }
115 #endif
116
117 #ifdef HAVE_XML_FUZZER
118 static int
processXml(const char * docFile,FILE * out)119 processXml(const char *docFile, FILE *out) {
120 int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
121 xmlParserCtxtPtr ctxt;
122 xmlDocPtr doc;
123
124 if (globalData.flags & FLAG_LINT) {
125 /* Switches */
126 xmlFuzzWriteInt(out, 0, 4);
127 xmlFuzzWriteInt(out, 0, 4);
128 /* maxmem */
129 xmlFuzzWriteInt(out, 0, 4);
130 /* max-ampl */
131 xmlFuzzWriteInt(out, 0, 1);
132 /* pretty */
133 xmlFuzzWriteInt(out, 0, 1);
134 /* encode */
135 xmlFuzzWriteString(out, "");
136 /* pattern */
137 xmlFuzzWriteString(out, "");
138 /* xpath */
139 xmlFuzzWriteString(out, "");
140 } else {
141 /* Parser options. */
142 xmlFuzzWriteInt(out, opts, 4);
143 /* Max allocations. */
144 xmlFuzzWriteInt(out, 0, 4);
145
146 if (globalData.flags & FLAG_PUSH_CHUNK_SIZE) {
147 /* Chunk size for push parser */
148 xmlFuzzWriteInt(out, 256, 4);
149 }
150
151 if (globalData.flags & FLAG_READER) {
152 /* Initial reader program with a couple of OP_READs */
153 xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01");
154 }
155 }
156
157 fuzzRecorderInit(out);
158
159 ctxt = xmlNewParserCtxt();
160 xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
161 xmlCtxtSetResourceLoader(ctxt, fuzzResourceRecorder, NULL);
162 doc = xmlCtxtReadFile(ctxt, docFile, NULL, opts);
163 #ifdef LIBXML_XINCLUDE_ENABLED
164 {
165 xmlXIncludeCtxtPtr xinc = xmlXIncludeNewContext(doc);
166
167 xmlXIncludeSetErrorHandler(xinc, xmlFuzzSErrorFunc, NULL);
168 xmlXIncludeSetResourceLoader(xinc, fuzzResourceRecorder, NULL);
169 xmlXIncludeSetFlags(xinc, opts);
170 xmlXIncludeProcessNode(xinc, (xmlNodePtr) doc);
171 xmlXIncludeFreeContext(xinc);
172 }
173 #endif
174 xmlFreeDoc(doc);
175 xmlFreeParserCtxt(ctxt);
176
177 fuzzRecorderCleanup();
178
179 return(0);
180 }
181 #endif
182
183 #ifdef HAVE_HTML_FUZZER
184 static int
processHtml(const char * docFile,FILE * out)185 processHtml(const char *docFile, FILE *out) {
186 char buf[SEED_BUF_SIZE];
187 FILE *file;
188 size_t size;
189
190 /* Parser options. */
191 xmlFuzzWriteInt(out, 0, 4);
192 /* Max allocations. */
193 xmlFuzzWriteInt(out, 0, 4);
194
195 /* Copy file */
196 file = fopen(docFile, "rb");
197 if (file == NULL) {
198 fprintf(stderr, "couldn't open %s\n", docFile);
199 return(0);
200 }
201 do {
202 size = fread(buf, 1, SEED_BUF_SIZE, file);
203 if (size > 0)
204 fwrite(buf, 1, size, out);
205 } while (size == SEED_BUF_SIZE);
206 fclose(file);
207
208 return(0);
209 }
210 #endif
211
212 #ifdef HAVE_SCHEMA_FUZZER
213 static int
processSchema(const char * docFile,FILE * out)214 processSchema(const char *docFile, FILE *out) {
215 xmlSchemaPtr schema;
216 xmlSchemaParserCtxtPtr pctxt;
217
218 /* Max allocations. */
219 xmlFuzzWriteInt(out, 0, 4);
220
221 fuzzRecorderInit(out);
222
223 pctxt = xmlSchemaNewParserCtxt(docFile);
224 xmlSchemaSetParserStructuredErrors(pctxt, xmlFuzzSErrorFunc, NULL);
225 xmlSchemaSetResourceLoader(pctxt, fuzzResourceRecorder, NULL);
226 schema = xmlSchemaParse(pctxt);
227 xmlSchemaFreeParserCtxt(pctxt);
228 xmlSchemaFree(schema);
229
230 fuzzRecorderCleanup();
231
232 return(0);
233 }
234 #endif
235
236 #if defined(HAVE_HTML_FUZZER) || \
237 defined(HAVE_SCHEMA_FUZZER) || \
238 defined(HAVE_XML_FUZZER)
239 static int
processPattern(const char * pattern)240 processPattern(const char *pattern) {
241 glob_t globbuf;
242 int ret = 0;
243 int res;
244 size_t i;
245
246 res = glob(pattern, 0, NULL, &globbuf);
247 if (res == GLOB_NOMATCH)
248 return(0);
249 if (res != 0) {
250 fprintf(stderr, "couldn't match pattern %s\n", pattern);
251 return(-1);
252 }
253
254 for (i = 0; i < globbuf.gl_pathc; i++) {
255 struct stat statbuf;
256 char outPath[PATH_SIZE];
257 char *dirBuf = NULL;
258 char *baseBuf = NULL;
259 const char *path, *dir, *base;
260 FILE *out = NULL;
261 int dirChanged = 0;
262 size_t size;
263
264 path = globbuf.gl_pathv[i];
265
266 if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
267 continue;
268
269 dirBuf = (char *) xmlCharStrdup(path);
270 baseBuf = (char *) xmlCharStrdup(path);
271 if ((dirBuf == NULL) || (baseBuf == NULL)) {
272 fprintf(stderr, "memory allocation failed\n");
273 ret = -1;
274 goto error;
275 }
276 dir = dirname(dirBuf);
277 base = basename(baseBuf);
278
279 size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
280 globalData.fuzzer, base);
281 if (size >= PATH_SIZE) {
282 fprintf(stderr, "creating path failed\n");
283 ret = -1;
284 goto error;
285 }
286 out = fopen(outPath, "wb");
287 if (out == NULL) {
288 fprintf(stderr, "couldn't open %s for writing\n", outPath);
289 ret = -1;
290 goto error;
291 }
292 if (chdir(dir) != 0) {
293 fprintf(stderr, "couldn't chdir to %s\n", dir);
294 ret = -1;
295 goto error;
296 }
297 dirChanged = 1;
298 if (globalData.processFile(base, out) != 0)
299 ret = -1;
300
301 error:
302 if (out != NULL)
303 fclose(out);
304 xmlFree(dirBuf);
305 xmlFree(baseBuf);
306 if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
307 fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
308 ret = -1;
309 break;
310 }
311 }
312
313 globfree(&globbuf);
314 return(ret);
315 }
316 #endif
317
318 #ifdef HAVE_XPATH_FUZZER
319 static int
processXPath(const char * testDir,const char * prefix,const char * name,const char * data,const char * subdir,int xptr)320 processXPath(const char *testDir, const char *prefix, const char *name,
321 const char *data, const char *subdir, int xptr) {
322 char pattern[PATH_SIZE];
323 glob_t globbuf;
324 size_t i, size;
325 int ret = 0, res;
326
327 size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
328 testDir, subdir, prefix);
329 if (size >= PATH_SIZE)
330 return(-1);
331 res = glob(pattern, 0, NULL, &globbuf);
332 if (res == GLOB_NOMATCH)
333 return(0);
334 if (res != 0) {
335 fprintf(stderr, "couldn't match pattern %s\n", pattern);
336 return(-1);
337 }
338
339 for (i = 0; i < globbuf.gl_pathc; i++) {
340 char *path = globbuf.gl_pathv[i];
341 struct stat statbuf;
342 FILE *in;
343 char expr[EXPR_SIZE];
344
345 if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
346 continue;
347
348 in = fopen(path, "rb");
349 if (in == NULL) {
350 ret = -1;
351 continue;
352 }
353
354 while (fgets(expr, EXPR_SIZE, in) != NULL) {
355 char outPath[PATH_SIZE];
356 FILE *out;
357 int j;
358
359 for (j = 0; expr[j] != 0; j++)
360 if (expr[j] == '\r' || expr[j] == '\n')
361 break;
362 expr[j] = 0;
363
364 size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
365 name, globalData.counter);
366 if (size >= PATH_SIZE) {
367 ret = -1;
368 continue;
369 }
370 out = fopen(outPath, "wb");
371 if (out == NULL) {
372 ret = -1;
373 continue;
374 }
375
376 /* Max allocations. */
377 xmlFuzzWriteInt(out, 0, 4);
378
379 if (xptr) {
380 xmlFuzzWriteString(out, expr);
381 } else {
382 char xptrExpr[EXPR_SIZE+100];
383
384 /* Wrap XPath expressions as XPointer */
385 snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
386 xmlFuzzWriteString(out, xptrExpr);
387 }
388
389 xmlFuzzWriteString(out, data);
390
391 fclose(out);
392 globalData.counter++;
393 }
394
395 fclose(in);
396 }
397
398 globfree(&globbuf);
399
400 return(ret);
401 }
402
403 static int
processXPathDir(const char * testDir)404 processXPathDir(const char *testDir) {
405 char pattern[PATH_SIZE];
406 glob_t globbuf;
407 size_t i, size;
408 int ret = 0;
409
410 globalData.counter = 1;
411 if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
412 ret = -1;
413
414 size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
415 if (size >= PATH_SIZE)
416 return(1);
417 if (glob(pattern, 0, NULL, &globbuf) != 0)
418 return(1);
419
420 for (i = 0; i < globbuf.gl_pathc; i++) {
421 char *path = globbuf.gl_pathv[i];
422 char *data;
423 const char *docFile;
424
425 data = xmlSlurpFile(path, NULL);
426 if (data == NULL) {
427 ret = -1;
428 continue;
429 }
430 docFile = basename(path);
431
432 globalData.counter = 1;
433 if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
434 ret = -1;
435 if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
436 ret = -1;
437 if (processXPath(testDir, docFile, docFile, data, "xptr-xp1", 1) != 0)
438 ret = -1;
439
440 xmlFree(data);
441 }
442
443 globfree(&globbuf);
444
445 return(ret);
446 }
447 #endif
448
449 int
main(int argc,const char ** argv)450 main(int argc, const char **argv) {
451 mainFunc processArg = NULL;
452 const char *fuzzer;
453 int ret = 0;
454 int i;
455
456 if (argc < 3) {
457 fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
458 return(1);
459 }
460
461 fuzzer = argv[1];
462 if (strcmp(fuzzer, "html") == 0) {
463 #ifdef HAVE_HTML_FUZZER
464 processArg = processPattern;
465 globalData.flags |= FLAG_PUSH_CHUNK_SIZE;
466 globalData.processFile = processHtml;
467 #endif
468 } else if (strcmp(fuzzer, "lint") == 0) {
469 #ifdef HAVE_LINT_FUZZER
470 processArg = processPattern;
471 globalData.flags |= FLAG_LINT;
472 globalData.processFile = processXml;
473 #endif
474 } else if (strcmp(fuzzer, "reader") == 0) {
475 #ifdef HAVE_READER_FUZZER
476 processArg = processPattern;
477 globalData.flags |= FLAG_READER;
478 globalData.processFile = processXml;
479 #endif
480 } else if (strcmp(fuzzer, "schema") == 0) {
481 #ifdef HAVE_SCHEMA_FUZZER
482 processArg = processPattern;
483 globalData.processFile = processSchema;
484 #endif
485 } else if (strcmp(fuzzer, "valid") == 0) {
486 #ifdef HAVE_VALID_FUZZER
487 processArg = processPattern;
488 globalData.processFile = processXml;
489 #endif
490 } else if (strcmp(fuzzer, "xinclude") == 0) {
491 #ifdef HAVE_XINCLUDE_FUZZER
492 processArg = processPattern;
493 globalData.processFile = processXml;
494 #endif
495 } else if (strcmp(fuzzer, "xml") == 0) {
496 #ifdef HAVE_XML_FUZZER
497 processArg = processPattern;
498 globalData.flags |= FLAG_PUSH_CHUNK_SIZE;
499 globalData.processFile = processXml;
500 #endif
501 } else if (strcmp(fuzzer, "xpath") == 0) {
502 #ifdef HAVE_XPATH_FUZZER
503 processArg = processXPathDir;
504 #endif
505 } else {
506 fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
507 return(1);
508 }
509 globalData.fuzzer = fuzzer;
510
511 if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
512 fprintf(stderr, "couldn't get current directory\n");
513 return(1);
514 }
515
516 if (processArg != NULL)
517 for (i = 2; i < argc; i++)
518 processArg(argv[i]);
519
520 return(ret);
521 }
522
523