• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * html.c: a libFuzzer target to test several HTML parser interfaces.
3  *
4  * See Copyright for the status of this software.
5  */
6 
7 #include <libxml/HTMLparser.h>
8 #include <libxml/HTMLtree.h>
9 #include <libxml/catalog.h>
10 #include "fuzz.h"
11 
12 int
LLVMFuzzerInitialize(int * argc ATTRIBUTE_UNUSED,char *** argv ATTRIBUTE_UNUSED)13 LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
14                      char ***argv ATTRIBUTE_UNUSED) {
15     xmlInitParser();
16 #ifdef LIBXML_CATALOG_ENABLED
17     xmlInitializeCatalog();
18 #endif
19     xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
20 
21     return 0;
22 }
23 
24 int
LLVMFuzzerTestOneInput(const char * data,size_t size)25 LLVMFuzzerTestOneInput(const char *data, size_t size) {
26     static const size_t maxChunkSize = 128;
27     htmlDocPtr doc;
28     htmlParserCtxtPtr ctxt;
29     xmlOutputBufferPtr out;
30     const char *docBuffer;
31     size_t docSize, consumed, chunkSize;
32     int opts, outSize;
33 
34     xmlFuzzDataInit(data, size);
35     opts = xmlFuzzReadInt();
36 
37     docBuffer = xmlFuzzReadRemaining(&docSize);
38     if (docBuffer == NULL) {
39         xmlFuzzDataCleanup();
40         return(0);
41     }
42 
43     /* Pull parser */
44 
45     doc = htmlReadMemory(docBuffer, docSize, NULL, NULL, opts);
46 
47     /*
48      * Also test the serializer. Call htmlDocContentDumpOutput with our
49      * own buffer to avoid encoding the output. The HTML encoding is
50      * excruciatingly slow (see htmlEntityValueLookup).
51      */
52     out = xmlAllocOutputBuffer(NULL);
53     htmlDocContentDumpOutput(out, doc, NULL);
54     xmlOutputBufferClose(out);
55 
56     xmlFreeDoc(doc);
57 
58     /* Push parser */
59 
60     ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
61                                     XML_CHAR_ENCODING_NONE);
62     htmlCtxtUseOptions(ctxt, opts);
63 
64     for (consumed = 0; consumed < docSize; consumed += chunkSize) {
65         chunkSize = docSize - consumed;
66         if (chunkSize > maxChunkSize)
67             chunkSize = maxChunkSize;
68         htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0);
69     }
70 
71     htmlParseChunk(ctxt, NULL, 0, 1);
72     xmlFreeDoc(ctxt->myDoc);
73     htmlFreeParserCtxt(ctxt);
74 
75     /* Cleanup */
76 
77     xmlFuzzDataCleanup();
78     xmlResetLastError();
79 
80     return(0);
81 }
82 
83