• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * xmlSeed.c: Generate the XML seed corpus for fuzzing.
3  *
4  * See Copyright for the status of this software.
5  */
6 
7 #include <stdio.h>
8 #include <string.h>
9 #include <glob.h>
10 #include <libgen.h>
11 #include <sys/stat.h>
12 
13 #ifdef _WIN32
14 #include <direct.h>
15 #else
16 #include <unistd.h>
17 #endif
18 
19 #include <libxml/parser.h>
20 #include <libxml/parserInternals.h>
21 #include <libxml/HTMLparser.h>
22 #include <libxml/xinclude.h>
23 #include <libxml/xmlschemas.h>
24 #include "fuzz.h"
25 
26 #define PATH_SIZE 500
27 #define SEED_BUF_SIZE 16384
28 #define EXPR_SIZE 4500
29 
30 #define FLAG_READER             (1 << 0)
31 #define FLAG_LINT               (1 << 1)
32 #define FLAG_PUSH_CHUNK_SIZE    (1 << 2)
33 
34 typedef int
35 (*fileFunc)(const char *base, FILE *out);
36 
37 typedef int
38 (*mainFunc)(const char *arg);
39 
40 static struct {
41     FILE *out;
42     xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
43     xmlExternalEntityLoader oldLoader;
44     fileFunc processFile;
45     const char *fuzzer;
46     int counter;
47     char cwd[PATH_SIZE];
48     int flags;
49 } globalData;
50 
51 #if defined(HAVE_SCHEMA_FUZZER) || \
52     defined(HAVE_XML_FUZZER)
53 /*
54  * A custom resource loader that writes all external DTDs or entities to a
55  * single file in the format expected by xmlFuzzResourceLoader.
56  */
57 static int
fuzzResourceRecorder(void * data ATTRIBUTE_UNUSED,const char * URL,const char * ID ATTRIBUTE_UNUSED,xmlResourceType type ATTRIBUTE_UNUSED,int flags,xmlParserInputPtr * out)58 fuzzResourceRecorder(void *data ATTRIBUTE_UNUSED, const char *URL,
59                      const char *ID ATTRIBUTE_UNUSED,
60                      xmlResourceType type ATTRIBUTE_UNUSED, int flags,
61                      xmlParserInputPtr *out) {
62     xmlParserInputPtr in;
63     static const int chunkSize = 16384;
64     int code, len;
65 
66     *out = NULL;
67 
68     code = xmlNewInputFromUrl(URL, flags, &in);
69     if (code != XML_ERR_OK)
70         return(code);
71 
72     if (globalData.entities == NULL) {
73         globalData.entities = xmlHashCreate(4);
74     } else if (xmlHashLookup(globalData.entities,
75                              (const xmlChar *) URL) != NULL) {
76         *out = in;
77         return(XML_ERR_OK);
78     }
79 
80     do {
81         len = xmlParserInputGrow(in, chunkSize);
82         if (len < 0) {
83             fprintf(stderr, "Error reading %s\n", URL);
84             xmlFreeInputStream(in);
85             return(in->buf->error);
86         }
87     } while (len > 0);
88 
89     xmlFuzzWriteString(globalData.out, URL);
90     xmlFuzzWriteString(globalData.out,
91                        (char *) xmlBufContent(in->buf->buffer));
92 
93     xmlFreeInputStream(in);
94 
95     xmlHashAddEntry(globalData.entities, (const xmlChar *) URL,
96                     globalData.entities);
97 
98     return(xmlNewInputFromUrl(URL, flags, out));
99 }
100 
101 static void
fuzzRecorderInit(FILE * out)102 fuzzRecorderInit(FILE *out) {
103     globalData.out = out;
104     globalData.entities = xmlHashCreate(8);
105     globalData.oldLoader = xmlGetExternalEntityLoader();
106 }
107 
108 static void
fuzzRecorderCleanup(void)109 fuzzRecorderCleanup(void) {
110     xmlHashFree(globalData.entities, NULL);
111     globalData.out = NULL;
112     globalData.entities = NULL;
113     globalData.oldLoader = NULL;
114 }
115 #endif
116 
117 #ifdef HAVE_XML_FUZZER
118 static int
processXml(const char * docFile,FILE * out)119 processXml(const char *docFile, FILE *out) {
120     int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
121     xmlParserCtxtPtr ctxt;
122     xmlDocPtr doc;
123 
124     if (globalData.flags & FLAG_LINT) {
125         /* Switches */
126         xmlFuzzWriteInt(out, 0, 4);
127         xmlFuzzWriteInt(out, 0, 4);
128         /* maxmem */
129         xmlFuzzWriteInt(out, 0, 4);
130         /* max-ampl */
131         xmlFuzzWriteInt(out, 0, 1);
132         /* pretty */
133         xmlFuzzWriteInt(out, 0, 1);
134         /* encode */
135         xmlFuzzWriteString(out, "");
136         /* pattern */
137         xmlFuzzWriteString(out, "");
138         /* xpath */
139         xmlFuzzWriteString(out, "");
140     } else {
141         /* Parser options. */
142         xmlFuzzWriteInt(out, opts, 4);
143         /* Max allocations. */
144         xmlFuzzWriteInt(out, 0, 4);
145 
146         if (globalData.flags & FLAG_PUSH_CHUNK_SIZE) {
147             /* Chunk size for push parser */
148             xmlFuzzWriteInt(out, 256, 4);
149         }
150 
151         if (globalData.flags & FLAG_READER) {
152             /* Initial reader program with a couple of OP_READs */
153             xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01");
154         }
155     }
156 
157     fuzzRecorderInit(out);
158 
159     ctxt = xmlNewParserCtxt();
160     xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
161     xmlCtxtSetResourceLoader(ctxt, fuzzResourceRecorder, NULL);
162     doc = xmlCtxtReadFile(ctxt, docFile, NULL, opts);
163 #ifdef LIBXML_XINCLUDE_ENABLED
164     {
165         xmlXIncludeCtxtPtr xinc = xmlXIncludeNewContext(doc);
166 
167         xmlXIncludeSetErrorHandler(xinc, xmlFuzzSErrorFunc, NULL);
168         xmlXIncludeSetResourceLoader(xinc, fuzzResourceRecorder, NULL);
169         xmlXIncludeSetFlags(xinc, opts);
170         xmlXIncludeProcessNode(xinc, (xmlNodePtr) doc);
171         xmlXIncludeFreeContext(xinc);
172     }
173 #endif
174     xmlFreeDoc(doc);
175     xmlFreeParserCtxt(ctxt);
176 
177     fuzzRecorderCleanup();
178 
179     return(0);
180 }
181 #endif
182 
183 #ifdef HAVE_HTML_FUZZER
184 static int
processHtml(const char * docFile,FILE * out)185 processHtml(const char *docFile, FILE *out) {
186     char buf[SEED_BUF_SIZE];
187     FILE *file;
188     size_t size;
189 
190     /* Parser options. */
191     xmlFuzzWriteInt(out, 0, 4);
192     /* Max allocations. */
193     xmlFuzzWriteInt(out, 0, 4);
194 
195     /* Copy file */
196     file = fopen(docFile, "rb");
197     if (file == NULL) {
198         fprintf(stderr, "couldn't open %s\n", docFile);
199         return(0);
200     }
201     do {
202         size = fread(buf, 1, SEED_BUF_SIZE, file);
203         if (size > 0)
204             fwrite(buf, 1, size, out);
205     } while (size == SEED_BUF_SIZE);
206     fclose(file);
207 
208     return(0);
209 }
210 #endif
211 
212 #ifdef HAVE_SCHEMA_FUZZER
213 static int
processSchema(const char * docFile,FILE * out)214 processSchema(const char *docFile, FILE *out) {
215     xmlSchemaPtr schema;
216     xmlSchemaParserCtxtPtr pctxt;
217 
218     /* Max allocations. */
219     xmlFuzzWriteInt(out, 0, 4);
220 
221     fuzzRecorderInit(out);
222 
223     pctxt = xmlSchemaNewParserCtxt(docFile);
224     xmlSchemaSetParserStructuredErrors(pctxt, xmlFuzzSErrorFunc, NULL);
225     xmlSchemaSetResourceLoader(pctxt, fuzzResourceRecorder, NULL);
226     schema = xmlSchemaParse(pctxt);
227     xmlSchemaFreeParserCtxt(pctxt);
228     xmlSchemaFree(schema);
229 
230     fuzzRecorderCleanup();
231 
232     return(0);
233 }
234 #endif
235 
236 #if defined(HAVE_HTML_FUZZER) || \
237     defined(HAVE_SCHEMA_FUZZER) || \
238     defined(HAVE_XML_FUZZER)
239 static int
processPattern(const char * pattern)240 processPattern(const char *pattern) {
241     glob_t globbuf;
242     int ret = 0;
243     int res;
244     size_t i;
245 
246     res = glob(pattern, 0, NULL, &globbuf);
247     if (res == GLOB_NOMATCH)
248         return(0);
249     if (res != 0) {
250         fprintf(stderr, "couldn't match pattern %s\n", pattern);
251         return(-1);
252     }
253 
254     for (i = 0; i < globbuf.gl_pathc; i++) {
255         struct stat statbuf;
256         char outPath[PATH_SIZE];
257         char *dirBuf = NULL;
258         char *baseBuf = NULL;
259         const char *path, *dir, *base;
260         FILE *out = NULL;
261         int dirChanged = 0;
262         size_t size;
263 
264         path = globbuf.gl_pathv[i];
265 
266         if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
267             continue;
268 
269         dirBuf = (char *) xmlCharStrdup(path);
270         baseBuf = (char *) xmlCharStrdup(path);
271         if ((dirBuf == NULL) || (baseBuf == NULL)) {
272             fprintf(stderr, "memory allocation failed\n");
273             ret = -1;
274             goto error;
275         }
276         dir = dirname(dirBuf);
277         base = basename(baseBuf);
278 
279         size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
280                         globalData.fuzzer, base);
281         if (size >= PATH_SIZE) {
282             fprintf(stderr, "creating path failed\n");
283             ret = -1;
284             goto error;
285         }
286         out = fopen(outPath, "wb");
287         if (out == NULL) {
288             fprintf(stderr, "couldn't open %s for writing\n", outPath);
289             ret = -1;
290             goto error;
291         }
292         if (chdir(dir) != 0) {
293             fprintf(stderr, "couldn't chdir to %s\n", dir);
294             ret = -1;
295             goto error;
296         }
297         dirChanged = 1;
298         if (globalData.processFile(base, out) != 0)
299             ret = -1;
300 
301 error:
302         if (out != NULL)
303             fclose(out);
304         xmlFree(dirBuf);
305         xmlFree(baseBuf);
306         if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
307             fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
308             ret = -1;
309             break;
310         }
311     }
312 
313     globfree(&globbuf);
314     return(ret);
315 }
316 #endif
317 
318 #ifdef HAVE_XPATH_FUZZER
319 static int
processXPath(const char * testDir,const char * prefix,const char * name,const char * data,const char * subdir,int xptr)320 processXPath(const char *testDir, const char *prefix, const char *name,
321              const char *data, const char *subdir, int xptr) {
322     char pattern[PATH_SIZE];
323     glob_t globbuf;
324     size_t i, size;
325     int ret = 0, res;
326 
327     size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
328                     testDir, subdir, prefix);
329     if (size >= PATH_SIZE)
330         return(-1);
331     res = glob(pattern, 0, NULL, &globbuf);
332     if (res == GLOB_NOMATCH)
333         return(0);
334     if (res != 0) {
335         fprintf(stderr, "couldn't match pattern %s\n", pattern);
336         return(-1);
337     }
338 
339     for (i = 0; i < globbuf.gl_pathc; i++) {
340         char *path = globbuf.gl_pathv[i];
341         struct stat statbuf;
342         FILE *in;
343         char expr[EXPR_SIZE];
344 
345         if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
346             continue;
347 
348         in = fopen(path, "rb");
349         if (in == NULL) {
350             ret = -1;
351             continue;
352         }
353 
354         while (fgets(expr, EXPR_SIZE, in) != NULL) {
355             char outPath[PATH_SIZE];
356             FILE *out;
357             int j;
358 
359             for (j = 0; expr[j] != 0; j++)
360                 if (expr[j] == '\r' || expr[j] == '\n')
361                     break;
362             expr[j] = 0;
363 
364             size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
365                             name, globalData.counter);
366             if (size >= PATH_SIZE) {
367                 ret = -1;
368                 continue;
369             }
370             out = fopen(outPath, "wb");
371             if (out == NULL) {
372                 ret = -1;
373                 continue;
374             }
375 
376             /* Max allocations. */
377             xmlFuzzWriteInt(out, 0, 4);
378 
379             if (xptr) {
380                 xmlFuzzWriteString(out, expr);
381             } else {
382                 char xptrExpr[EXPR_SIZE+100];
383 
384                 /* Wrap XPath expressions as XPointer */
385                 snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
386                 xmlFuzzWriteString(out, xptrExpr);
387             }
388 
389             xmlFuzzWriteString(out, data);
390 
391             fclose(out);
392             globalData.counter++;
393         }
394 
395         fclose(in);
396     }
397 
398     globfree(&globbuf);
399 
400     return(ret);
401 }
402 
403 static int
processXPathDir(const char * testDir)404 processXPathDir(const char *testDir) {
405     char pattern[PATH_SIZE];
406     glob_t globbuf;
407     size_t i, size;
408     int ret = 0;
409 
410     globalData.counter = 1;
411     if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
412         ret = -1;
413 
414     size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
415     if (size >= PATH_SIZE)
416         return(1);
417     if (glob(pattern, 0, NULL, &globbuf) != 0)
418         return(1);
419 
420     for (i = 0; i < globbuf.gl_pathc; i++) {
421         char *path = globbuf.gl_pathv[i];
422         char *data;
423         const char *docFile;
424 
425         data = xmlSlurpFile(path, NULL);
426         if (data == NULL) {
427             ret = -1;
428             continue;
429         }
430         docFile = basename(path);
431 
432         globalData.counter = 1;
433         if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
434             ret = -1;
435         if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
436             ret = -1;
437         if (processXPath(testDir, docFile, docFile, data, "xptr-xp1", 1) != 0)
438             ret = -1;
439 
440         xmlFree(data);
441     }
442 
443     globfree(&globbuf);
444 
445     return(ret);
446 }
447 #endif
448 
449 int
main(int argc,const char ** argv)450 main(int argc, const char **argv) {
451     mainFunc processArg = NULL;
452     const char *fuzzer;
453     int ret = 0;
454     int i;
455 
456     if (argc < 3) {
457         fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
458         return(1);
459     }
460 
461     fuzzer = argv[1];
462     if (strcmp(fuzzer, "html") == 0) {
463 #ifdef HAVE_HTML_FUZZER
464         processArg = processPattern;
465         globalData.flags |= FLAG_PUSH_CHUNK_SIZE;
466         globalData.processFile = processHtml;
467 #endif
468     } else if (strcmp(fuzzer, "lint") == 0) {
469 #ifdef HAVE_LINT_FUZZER
470         processArg = processPattern;
471         globalData.flags |= FLAG_LINT;
472         globalData.processFile = processXml;
473 #endif
474     } else if (strcmp(fuzzer, "reader") == 0) {
475 #ifdef HAVE_READER_FUZZER
476         processArg = processPattern;
477         globalData.flags |= FLAG_READER;
478         globalData.processFile = processXml;
479 #endif
480     } else if (strcmp(fuzzer, "schema") == 0) {
481 #ifdef HAVE_SCHEMA_FUZZER
482         processArg = processPattern;
483         globalData.processFile = processSchema;
484 #endif
485     } else if (strcmp(fuzzer, "valid") == 0) {
486 #ifdef HAVE_VALID_FUZZER
487         processArg = processPattern;
488         globalData.processFile = processXml;
489 #endif
490     } else if (strcmp(fuzzer, "xinclude") == 0) {
491 #ifdef HAVE_XINCLUDE_FUZZER
492         processArg = processPattern;
493         globalData.processFile = processXml;
494 #endif
495     } else if (strcmp(fuzzer, "xml") == 0) {
496 #ifdef HAVE_XML_FUZZER
497         processArg = processPattern;
498         globalData.flags |= FLAG_PUSH_CHUNK_SIZE;
499         globalData.processFile = processXml;
500 #endif
501     } else if (strcmp(fuzzer, "xpath") == 0) {
502 #ifdef HAVE_XPATH_FUZZER
503         processArg = processXPathDir;
504 #endif
505     } else {
506         fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
507         return(1);
508     }
509     globalData.fuzzer = fuzzer;
510 
511     if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
512         fprintf(stderr, "couldn't get current directory\n");
513         return(1);
514     }
515 
516     if (processArg != NULL)
517         for (i = 2; i < argc; i++)
518             processArg(argv[i]);
519 
520     return(ret);
521 }
522 
523