• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * xmlSeed.c: Generate the XML seed corpus for fuzzing.
3  *
4  * See Copyright for the status of this software.
5  */
6 
7 #include <stdio.h>
8 #include <string.h>
9 #include <glob.h>
10 #include <libgen.h>
11 #include <sys/stat.h>
12 
13 #ifdef _WIN32
14 #include <direct.h>
15 #else
16 #include <unistd.h>
17 #endif
18 
19 #include <libxml/parser.h>
20 #include <libxml/parserInternals.h>
21 #include <libxml/HTMLparser.h>
22 #include <libxml/xinclude.h>
23 #include <libxml/xmlschemas.h>
24 #include "fuzz.h"
25 
26 #define PATH_SIZE 500
27 #define SEED_BUF_SIZE 16384
28 #define EXPR_SIZE 4500
29 
30 typedef int
31 (*fileFunc)(const char *base, FILE *out);
32 
33 typedef int
34 (*mainFunc)(const char *arg);
35 
36 static struct {
37     FILE *out;
38     xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
39     xmlExternalEntityLoader oldLoader;
40     fileFunc processFile;
41     const char *fuzzer;
42     int counter;
43     char cwd[PATH_SIZE];
44 } globalData;
45 
46 /*
47  * A custom entity loader that writes all external DTDs or entities to a
48  * single file in the format expected by xmlFuzzEntityLoader.
49  */
50 static xmlParserInputPtr
fuzzEntityRecorder(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)51 fuzzEntityRecorder(const char *URL, const char *ID,
52                       xmlParserCtxtPtr ctxt) {
53     xmlParserInputPtr in;
54     static const int chunkSize = 16384;
55     int len;
56 
57     in = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
58     if (in == NULL)
59         return(NULL);
60 
61     if (globalData.entities == NULL) {
62         globalData.entities = xmlHashCreate(4);
63     } else if (xmlHashLookup(globalData.entities,
64                              (const xmlChar *) URL) != NULL) {
65         return(in);
66     }
67 
68     do {
69         len = xmlParserInputBufferGrow(in->buf, chunkSize);
70         if (len < 0) {
71             fprintf(stderr, "Error reading %s\n", URL);
72             xmlFreeInputStream(in);
73             return(NULL);
74         }
75     } while (len > 0);
76 
77     xmlFuzzWriteString(globalData.out, URL);
78     xmlFuzzWriteString(globalData.out,
79                        (char *) xmlBufContent(in->buf->buffer));
80 
81     xmlFreeInputStream(in);
82 
83     xmlHashAddEntry(globalData.entities, (const xmlChar *) URL, NULL);
84 
85     return(xmlNoNetExternalEntityLoader(URL, ID, ctxt));
86 }
87 
88 static void
fuzzRecorderInit(FILE * out)89 fuzzRecorderInit(FILE *out) {
90     globalData.out = out;
91     globalData.entities = xmlHashCreate(8);
92     globalData.oldLoader = xmlGetExternalEntityLoader();
93     xmlSetExternalEntityLoader(fuzzEntityRecorder);
94 }
95 
96 static void
fuzzRecorderCleanup()97 fuzzRecorderCleanup() {
98     xmlSetExternalEntityLoader(globalData.oldLoader);
99     xmlHashFree(globalData.entities, xmlHashDefaultDeallocator);
100     globalData.out = NULL;
101     globalData.entities = NULL;
102     globalData.oldLoader = NULL;
103 }
104 
105 #ifdef HAVE_XML_FUZZER
106 static int
processXml(const char * docFile,FILE * out)107 processXml(const char *docFile, FILE *out) {
108     int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
109     xmlDocPtr doc;
110 
111     fwrite(&opts, sizeof(opts), 1, out);
112 
113     fuzzRecorderInit(out);
114 
115     doc = xmlReadFile(docFile, NULL, opts);
116     xmlXIncludeProcessFlags(doc, opts);
117     xmlFreeDoc(doc);
118 
119     fuzzRecorderCleanup();
120 
121     return(0);
122 }
123 #endif
124 
125 #ifdef HAVE_HTML_FUZZER
126 static int
processHtml(const char * docFile,FILE * out)127 processHtml(const char *docFile, FILE *out) {
128     char buf[SEED_BUF_SIZE];
129     FILE *file;
130     size_t size;
131     int opts = 0;
132 
133     fwrite(&opts, sizeof(opts), 1, out);
134 
135     /* Copy file */
136     file = fopen(docFile, "rb");
137     if (file == NULL) {
138         fprintf(stderr, "couldn't open %s\n", docFile);
139         return(0);
140     }
141     do {
142         size = fread(buf, 1, SEED_BUF_SIZE, file);
143         if (size > 0)
144             fwrite(buf, 1, size, out);
145     } while (size == SEED_BUF_SIZE);
146     fclose(file);
147 
148     return(0);
149 }
150 #endif
151 
152 #ifdef HAVE_SCHEMA_FUZZER
153 static int
processSchema(const char * docFile,FILE * out)154 processSchema(const char *docFile, FILE *out) {
155     xmlSchemaPtr schema;
156     xmlSchemaParserCtxtPtr pctxt;
157 
158     fuzzRecorderInit(out);
159 
160     pctxt = xmlSchemaNewParserCtxt(docFile);
161     xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL);
162     schema = xmlSchemaParse(pctxt);
163     xmlSchemaFreeParserCtxt(pctxt);
164     xmlSchemaFree(schema);
165 
166     fuzzRecorderCleanup();
167 
168     return(0);
169 }
170 #endif
171 
172 static int
processPattern(const char * pattern)173 processPattern(const char *pattern) {
174     glob_t globbuf;
175     int ret = 0;
176     int res, i;
177 
178     res = glob(pattern, 0, NULL, &globbuf);
179     if (res == GLOB_NOMATCH)
180         return(0);
181     if (res != 0) {
182         fprintf(stderr, "couldn't match pattern %s\n", pattern);
183         return(-1);
184     }
185 
186     for (i = 0; i < globbuf.gl_pathc; i++) {
187         struct stat statbuf;
188         char outPath[PATH_SIZE];
189         char *dirBuf = NULL;
190         char *baseBuf = NULL;
191         const char *path, *dir, *base;
192         FILE *out = NULL;
193         int dirChanged = 0;
194         size_t size;
195 
196         path = globbuf.gl_pathv[i];
197 
198         if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
199             continue;
200 
201         dirBuf = (char *) xmlCharStrdup(path);
202         baseBuf = (char *) xmlCharStrdup(path);
203         if ((dirBuf == NULL) || (baseBuf == NULL)) {
204             fprintf(stderr, "memory allocation failed\n");
205             ret = -1;
206             goto error;
207         }
208         dir = dirname(dirBuf);
209         base = basename(baseBuf);
210 
211         size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
212                         globalData.fuzzer, base);
213         if (size >= PATH_SIZE) {
214             fprintf(stderr, "creating path failed\n");
215             ret = -1;
216             goto error;
217         }
218         out = fopen(outPath, "wb");
219         if (out == NULL) {
220             fprintf(stderr, "couldn't open %s for writing\n", outPath);
221             ret = -1;
222             goto error;
223         }
224         if (chdir(dir) != 0) {
225             fprintf(stderr, "couldn't chdir to %s\n", dir);
226             ret = -1;
227             goto error;
228         }
229         dirChanged = 1;
230         if (globalData.processFile(base, out) != 0)
231             ret = -1;
232 
233 error:
234         if (out != NULL)
235             fclose(out);
236         xmlFree(dirBuf);
237         xmlFree(baseBuf);
238         if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
239             fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
240             ret = -1;
241             break;
242         }
243     }
244 
245     globfree(&globbuf);
246     return(ret);
247 }
248 
249 #ifdef HAVE_XPATH_FUZZER
250 static int
processXPath(const char * testDir,const char * prefix,const char * name,const char * data,const char * subdir,int xptr)251 processXPath(const char *testDir, const char *prefix, const char *name,
252              const char *data, const char *subdir, int xptr) {
253     char pattern[PATH_SIZE];
254     glob_t globbuf;
255     size_t i, size;
256     int ret = 0, res;
257 
258     size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
259                     testDir, subdir, prefix);
260     if (size >= PATH_SIZE)
261         return(-1);
262     res = glob(pattern, 0, NULL, &globbuf);
263     if (res == GLOB_NOMATCH)
264         return(0);
265     if (res != 0) {
266         fprintf(stderr, "couldn't match pattern %s\n", pattern);
267         return(-1);
268     }
269 
270     for (i = 0; i < globbuf.gl_pathc; i++) {
271         char *path = globbuf.gl_pathv[i];
272         struct stat statbuf;
273         FILE *in;
274         char expr[EXPR_SIZE];
275 
276         if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
277             continue;
278 
279         in = fopen(path, "rb");
280         if (in == NULL) {
281             ret = -1;
282             continue;
283         }
284 
285         while (fgets(expr, EXPR_SIZE, in) > 0) {
286             char outPath[PATH_SIZE];
287             FILE *out;
288             int j;
289 
290             for (j = 0; expr[j] != 0; j++)
291                 if (expr[j] == '\r' || expr[j] == '\n')
292                     break;
293             expr[j] = 0;
294 
295             size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
296                             name, globalData.counter);
297             if (size >= PATH_SIZE) {
298                 ret = -1;
299                 continue;
300             }
301             out = fopen(outPath, "wb");
302             if (out == NULL) {
303                 ret = -1;
304                 continue;
305             }
306 
307             if (xptr) {
308                 xmlFuzzWriteString(out, expr);
309             } else {
310                 char xptrExpr[EXPR_SIZE+100];
311 
312                 /* Wrap XPath expressions as XPointer */
313                 snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
314                 xmlFuzzWriteString(out, xptrExpr);
315             }
316 
317             xmlFuzzWriteString(out, data);
318 
319             fclose(out);
320             globalData.counter++;
321         }
322 
323         fclose(in);
324     }
325 
326     globfree(&globbuf);
327 
328     return(ret);
329 }
330 
331 int
processXPathDir(const char * testDir)332 processXPathDir(const char *testDir) {
333     char pattern[PATH_SIZE];
334     glob_t globbuf;
335     size_t i, size;
336     int ret = 0;
337 
338     globalData.counter = 1;
339     if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
340         ret = -1;
341 
342     size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
343     if (size >= PATH_SIZE)
344         return(1);
345     if (glob(pattern, 0, NULL, &globbuf) != 0)
346         return(1);
347 
348     for (i = 0; i < globbuf.gl_pathc; i++) {
349         char *path = globbuf.gl_pathv[i];
350         char *data;
351         const char *docFile;
352 
353         data = xmlSlurpFile(path, NULL);
354         if (data == NULL) {
355             ret = -1;
356             continue;
357         }
358         docFile = basename(path);
359 
360         globalData.counter = 1;
361         if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
362             ret = -1;
363         if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
364             ret = -1;
365 
366         xmlFree(data);
367     }
368 
369     globfree(&globbuf);
370 
371     return(ret);
372 }
373 #endif
374 
375 int
main(int argc,const char ** argv)376 main(int argc, const char **argv) {
377     mainFunc processArg = NULL;
378     const char *fuzzer;
379     int ret = 0;
380     int xpath = 0;
381     int i;
382 
383     if (argc < 3) {
384         fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
385         return(1);
386     }
387 
388     xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
389 
390     fuzzer = argv[1];
391     if (strcmp(fuzzer, "html") == 0) {
392 #ifdef HAVE_HTML_FUZZER
393         processArg = processPattern;
394         globalData.processFile = processHtml;
395 #endif
396     } else if (strcmp(fuzzer, "schema") == 0) {
397 #ifdef HAVE_SCHEMA_FUZZER
398         processArg = processPattern;
399         globalData.processFile = processSchema;
400 #endif
401     } else if (strcmp(fuzzer, "xml") == 0) {
402 #ifdef HAVE_XML_FUZZER
403         processArg = processPattern;
404         globalData.processFile = processXml;
405 #endif
406     } else if (strcmp(fuzzer, "xpath") == 0) {
407 #ifdef HAVE_XPATH_FUZZER
408         processArg = processXPathDir;
409 #endif
410     } else {
411         fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
412         return(1);
413     }
414     globalData.fuzzer = fuzzer;
415 
416     if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
417         fprintf(stderr, "couldn't get current directory\n");
418         return(1);
419     }
420 
421     if (processArg != NULL)
422         for (i = 2; i < argc; i++)
423             processArg(argv[i]);
424 
425     return(ret);
426 }
427 
428