• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * testrecurse.c: C program to run libxml2 regression tests checking entities
3  *            recursions
4  *
5  * To compile on Unixes:
6  * cc -o testrecurse `xml2-config --cflags` testrecurse.c `xml2-config --libs` -lpthread
7  *
8  * See Copyright for the status of this software.
9  *
10  * daniel@veillard.com
11  */
12 
13 #include <stdio.h>
14 
15 #include <stdlib.h>
16 #include <string.h>
17 #include <sys/stat.h>
18 
19 #include <libxml/parser.h>
20 #include <libxml/parserInternals.h>
21 #include <libxml/tree.h>
22 #include <libxml/uri.h>
23 
24 /*
25  * O_BINARY is just for Windows compatibility - if it isn't defined
26  * on this system, avoid any compilation error
27  */
28 #ifdef	O_BINARY
29 #define RD_FLAGS	O_RDONLY | O_BINARY
30 #else
31 #define	RD_FLAGS	O_RDONLY
32 #endif
33 
34 #define OPT_SAX         (1<<0)
35 #define OPT_NO_SUBST    (1<<1)
36 
37 typedef int (*functest) (const char *filename, const char *result,
38                          const char *error, int options);
39 
40 typedef struct testDesc testDesc;
41 typedef testDesc *testDescPtr;
42 struct testDesc {
43     const char *desc; /* description of the test */
44     functest    func; /* function implementing the test */
45     const char *in;   /* glob to path for input files */
46     const char *out;  /* output directory */
47     const char *suffix;/* suffix for output files */
48     const char *err;  /* suffix for error output files */
49     int     options;  /* parser options for the test */
50 };
51 
52 static int checkTestFile(const char *filename);
53 
54 
55 #if defined(_WIN32)
56 
57 #include <windows.h>
58 
59 typedef struct
60 {
61       size_t gl_pathc;    /* Count of paths matched so far  */
62       char **gl_pathv;    /* List of matched pathnames.  */
63       size_t gl_offs;     /* Slots to reserve in 'gl_pathv'.  */
64 } glob_t;
65 
66 #define GLOB_DOOFFS 0
glob(const char * pattern,ATTRIBUTE_UNUSED int flags,ATTRIBUTE_UNUSED int errfunc (const char * epath,int eerrno),glob_t * pglob)67 static int glob(const char *pattern, ATTRIBUTE_UNUSED int flags,
68                 ATTRIBUTE_UNUSED int errfunc(const char *epath, int eerrno),
69                 glob_t *pglob) {
70     glob_t *ret;
71     WIN32_FIND_DATA FindFileData;
72     HANDLE hFind;
73     unsigned int nb_paths = 0;
74     char directory[500];
75     int len;
76 
77     if ((pattern == NULL) || (pglob == NULL)) return(-1);
78 
79     strncpy(directory, pattern, 499);
80     for (len = strlen(directory);len >= 0;len--) {
81         if (directory[len] == '/') {
82 	    len++;
83 	    directory[len] = 0;
84 	    break;
85 	}
86     }
87     if (len <= 0)
88         len = 0;
89 
90 
91     ret = pglob;
92     memset(ret, 0, sizeof(glob_t));
93 
94     hFind = FindFirstFileA(pattern, &FindFileData);
95     if (hFind == INVALID_HANDLE_VALUE)
96         return(0);
97     nb_paths = 20;
98     ret->gl_pathv = (char **) malloc(nb_paths * sizeof(char *));
99     if (ret->gl_pathv == NULL) {
100 	FindClose(hFind);
101         return(-1);
102     }
103     strncpy(directory + len, FindFileData.cFileName, 499 - len);
104     ret->gl_pathv[ret->gl_pathc] = strdup(directory);
105     if (ret->gl_pathv[ret->gl_pathc] == NULL)
106         goto done;
107     ret->gl_pathc++;
108     while(FindNextFileA(hFind, &FindFileData)) {
109         if (FindFileData.cFileName[0] == '.')
110 	    continue;
111         if (ret->gl_pathc + 2 > nb_paths) {
112             char **tmp = realloc(ret->gl_pathv, nb_paths * 2 * sizeof(char *));
113             if (tmp == NULL)
114                 break;
115             ret->gl_pathv = tmp;
116             nb_paths *= 2;
117 	}
118 	strncpy(directory + len, FindFileData.cFileName, 499 - len);
119 	ret->gl_pathv[ret->gl_pathc] = strdup(directory);
120         if (ret->gl_pathv[ret->gl_pathc] == NULL)
121             break;
122         ret->gl_pathc++;
123     }
124     ret->gl_pathv[ret->gl_pathc] = NULL;
125 
126 done:
127     FindClose(hFind);
128     return(0);
129 }
130 
131 
132 
globfree(glob_t * pglob)133 static void globfree(glob_t *pglob) {
134     unsigned int i;
135     if (pglob == NULL)
136         return;
137 
138     for (i = 0;i < pglob->gl_pathc;i++) {
139          if (pglob->gl_pathv[i] != NULL)
140              free(pglob->gl_pathv[i]);
141     }
142 }
143 
144 #else
145 #include <glob.h>
146 #endif
147 
148 /************************************************************************
149  *									*
150  *		Huge document generator					*
151  *									*
152  ************************************************************************/
153 
154 #include <libxml/xmlIO.h>
155 
156 typedef struct {
157     const char *URL;
158     const char *start;
159     const char *segment;
160     const char *finish;
161 } xmlHugeDocParts;
162 
163 static const xmlHugeDocParts hugeDocTable[] = {
164     {
165         "test/recurse/huge.xml",
166 
167         "<!DOCTYPE foo ["
168         "<!ELEMENT foo (bar*)> "
169         "<!ELEMENT bar (#PCDATA)> "
170         "<!ATTLIST bar attr CDATA #IMPLIED> "
171         "<!ENTITY a SYSTEM 'ga.ent'> "
172         "<!ENTITY b SYSTEM 'gb.ent'> "
173         "<!ENTITY c SYSTEM 'gc.ent'> "
174         "<!ENTITY f 'some internal data'> "
175         "<!ENTITY e '&f;&f;'> "
176         "<!ENTITY d '&e;&e;'> "
177         "]> "
178         "<foo>",
179 
180         "  <bar attr='&e; &f; &d;'>&a; &b; &c; &e; &f; &d;</bar>\n"
181         "  <bar>_123456789_123456789_123456789_123456789</bar>\n"
182         "  <bar>_123456789_123456789_123456789_123456789</bar>\n"
183         "  <bar>_123456789_123456789_123456789_123456789</bar>\n"
184         "  <bar>_123456789_123456789_123456789_123456789</bar>\n",
185 
186         "</foo>"
187     },
188     {
189         "test/recurse/huge_dtd.dtd",
190 
191         "<!ELEMENT foo (#PCDATA)>\n"
192         "<!ENTITY ent 'success'>\n"
193         "<!ENTITY % a SYSTEM 'pa.ent'>\n"
194         "<!ENTITY % b SYSTEM 'pb.ent'>\n"
195         "<!ENTITY % c SYSTEM 'pc.ent'>\n"
196         "<!ENTITY % d '<!-- comment -->'>\n"
197         "<!ENTITY % e '%d;%d;'>\n"
198         "<!ENTITY % f '%e;%e;'>\n",
199 
200         "<!ENTITY ent '%a; %b; %c; %d; %e; %f;'>\n"
201         "%a; %b; %c; %d; %e; %f;\n"
202         "<!-- _123456789_123456789_123456789_123456789 -->\n"
203         "<!-- _123456789_123456789_123456789_123456789 -->\n"
204         "<!-- _123456789_123456789_123456789_123456789 -->\n",
205 
206         ""
207     },
208     { NULL, NULL, NULL, NULL }
209 };
210 
211 static const xmlHugeDocParts *hugeDocParts;
212 static int curseg = 0;
213 static const char *current;
214 static int rlen;
215 
216 /**
217  * hugeMatch:
218  * @URI: an URI to test
219  *
220  * Check for a huge query
221  *
222  * Returns 1 if yes and 0 if another Input module should be used
223  */
224 static int
hugeMatch(const char * URI)225 hugeMatch(const char * URI) {
226     int i;
227 
228     if (URI == NULL)
229         return(0);
230 
231     for (i = 0; hugeDocTable[i].URL; i++) {
232         if (strcmp(URI, hugeDocTable[i].URL) == 0)
233             return(1);
234     }
235 
236     return(0);
237 }
238 
239 /**
240  * hugeOpen:
241  * @URI: an URI to test
242  *
243  * Return a pointer to the huge query handler, in this example simply
244  * the current pointer...
245  *
246  * Returns an Input context or NULL in case or error
247  */
248 static void *
hugeOpen(const char * URI)249 hugeOpen(const char * URI) {
250     int i;
251 
252     if (URI == NULL)
253         return(NULL);
254 
255     for (i = 0; hugeDocTable[i].URL; i++) {
256         if (strcmp(URI, hugeDocTable[i].URL) == 0) {
257             hugeDocParts = hugeDocTable + i;
258             curseg = 0;
259             current = hugeDocParts->start;
260             rlen = strlen(current);
261             return((void *) current);
262         }
263     }
264 
265     return(NULL);
266 }
267 
268 /**
269  * hugeClose:
270  * @context: the read context
271  *
272  * Close the huge query handler
273  *
274  * Returns 0 or -1 in case of error
275  */
276 static int
hugeClose(void * context)277 hugeClose(void * context) {
278     if (context == NULL) return(-1);
279     return(0);
280 }
281 
282 #define MAX_NODES 1000
283 
284 /**
285  * hugeRead:
286  * @context: the read context
287  * @buffer: where to store data
288  * @len: number of bytes to read
289  *
290  * Implement an huge query read.
291  *
292  * Returns the number of bytes read or -1 in case of error
293  */
294 static int
hugeRead(void * context,char * buffer,int len)295 hugeRead(void *context, char *buffer, int len)
296 {
297     if ((context == NULL) || (buffer == NULL) || (len < 0))
298         return (-1);
299 
300     if (len >= rlen) {
301         if (curseg >= MAX_NODES + 1) {
302             rlen = 0;
303             return(0);
304         }
305         len = rlen;
306         rlen = 0;
307 	memcpy(buffer, current, len);
308         curseg ++;
309         if (curseg == MAX_NODES) {
310             current = hugeDocParts->finish;
311 	} else {
312             current = hugeDocParts->segment;
313 	}
314         rlen = strlen(current);
315     } else {
316 	memcpy(buffer, current, len);
317 	rlen -= len;
318         current += len;
319     }
320     return (len);
321 }
322 
323 /************************************************************************
324  *									*
325  *		Libxml2 specific routines				*
326  *									*
327  ************************************************************************/
328 
329 static int nb_tests = 0;
330 static int nb_errors = 0;
331 static int nb_leaks = 0;
332 static int extraMemoryFromResolver = 0;
333 
334 static int
fatalError(void)335 fatalError(void) {
336     fprintf(stderr, "Exitting tests on fatal error\n");
337     exit(1);
338 }
339 
340 /*
341  * We need to trap calls to the resolver to not account memory for the catalog
342  * which is shared to the current running test. We also don't want to have
343  * network downloads modifying tests.
344  */
345 static xmlParserInputPtr
testExternalEntityLoader(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)346 testExternalEntityLoader(const char *URL, const char *ID,
347 			 xmlParserCtxtPtr ctxt) {
348     xmlParserInputPtr ret;
349 
350     if (checkTestFile(URL)) {
351 	ret = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
352     } else {
353 	int memused = xmlMemUsed();
354 	ret = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
355 	extraMemoryFromResolver += xmlMemUsed() - memused;
356     }
357 
358     return(ret);
359 }
360 
361 /*
362  * Trapping the error messages at the generic level to grab the equivalent of
363  * stderr messages on CLI tools.
364  */
365 static char testErrors[32769];
366 static int testErrorsSize = 0;
367 
368 static void
channel(void * ctx ATTRIBUTE_UNUSED,const char * msg,...)369 channel(void *ctx  ATTRIBUTE_UNUSED, const char *msg, ...) {
370     va_list args;
371     int res;
372 
373     if (testErrorsSize >= 32768)
374         return;
375     va_start(args, msg);
376     res = vsnprintf(&testErrors[testErrorsSize],
377                     32768 - testErrorsSize,
378 		    msg, args);
379     va_end(args);
380     if (testErrorsSize + res >= 32768) {
381         /* buffer is full */
382 	testErrorsSize = 32768;
383 	testErrors[testErrorsSize] = 0;
384     } else {
385         testErrorsSize += res;
386     }
387     testErrors[testErrorsSize] = 0;
388 }
389 
390 /**
391  * xmlParserPrintFileContext:
392  * @input:  an xmlParserInputPtr input
393  *
394  * Displays current context within the input content for error tracking
395  */
396 
397 static void
xmlParserPrintFileContextInternal(xmlParserInputPtr input,xmlGenericErrorFunc chanl,void * data)398 xmlParserPrintFileContextInternal(xmlParserInputPtr input ,
399 		xmlGenericErrorFunc chanl, void *data ) {
400     const xmlChar *cur, *base;
401     unsigned int n, col;	/* GCC warns if signed, because compared with sizeof() */
402     xmlChar  content[81]; /* space for 80 chars + line terminator */
403     xmlChar *ctnt;
404 
405     if (input == NULL) return;
406     cur = input->cur;
407     base = input->base;
408     /* skip backwards over any end-of-lines */
409     while ((cur > base) && ((*(cur) == '\n') || (*(cur) == '\r'))) {
410 	cur--;
411     }
412     n = 0;
413     /* search backwards for beginning-of-line (to max buff size) */
414     while ((n++ < (sizeof(content)-1)) && (cur > base) &&
415    (*(cur) != '\n') && (*(cur) != '\r'))
416         cur--;
417     if ((*(cur) == '\n') || (*(cur) == '\r')) cur++;
418     /* calculate the error position in terms of the current position */
419     col = input->cur - cur;
420     /* search forward for end-of-line (to max buff size) */
421     n = 0;
422     ctnt = content;
423     /* copy selected text to our buffer */
424     while ((*cur != 0) && (*(cur) != '\n') &&
425    (*(cur) != '\r') && (n < sizeof(content)-1)) {
426 		*ctnt++ = *cur++;
427 	n++;
428     }
429     *ctnt = 0;
430     /* print out the selected text */
431     chanl(data ,"%s\n", content);
432     /* create blank line with problem pointer */
433     n = 0;
434     ctnt = content;
435     /* (leave buffer space for pointer + line terminator) */
436     while ((n<col) && (n++ < sizeof(content)-2) && (*ctnt != 0)) {
437 	if (*(ctnt) != '\t')
438 	    *(ctnt) = ' ';
439 	ctnt++;
440     }
441     *ctnt++ = '^';
442     *ctnt = 0;
443     chanl(data ,"%s\n", content);
444 }
445 
446 static void
testStructuredErrorHandler(void * ctx ATTRIBUTE_UNUSED,const xmlError * err)447 testStructuredErrorHandler(void *ctx ATTRIBUTE_UNUSED, const xmlError *err) {
448     char *file = NULL;
449     int line = 0;
450     int code = -1;
451     int domain;
452     void *data = NULL;
453     const char *str;
454     const xmlChar *name = NULL;
455     xmlNodePtr node;
456     xmlErrorLevel level;
457     xmlParserInputPtr input = NULL;
458     xmlParserInputPtr cur = NULL;
459     xmlParserCtxtPtr ctxt = NULL;
460 
461     if (err == NULL)
462         return;
463 
464     file = err->file;
465     line = err->line;
466     code = err->code;
467     domain = err->domain;
468     level = err->level;
469     node = err->node;
470     if ((domain == XML_FROM_PARSER) || (domain == XML_FROM_HTML) ||
471         (domain == XML_FROM_DTD) || (domain == XML_FROM_NAMESPACE) ||
472 	(domain == XML_FROM_IO) || (domain == XML_FROM_VALID)) {
473 	ctxt = err->ctxt;
474     }
475     str = err->message;
476 
477     if (code == XML_ERR_OK)
478         return;
479 
480     if ((node != NULL) && (node->type == XML_ELEMENT_NODE))
481         name = node->name;
482 
483     /*
484      * Maintain the compatibility with the legacy error handling
485      */
486     if (ctxt != NULL) {
487         input = ctxt->input;
488         if ((input != NULL) && (input->filename == NULL) &&
489             (ctxt->inputNr > 1)) {
490             cur = input;
491             input = ctxt->inputTab[ctxt->inputNr - 2];
492         }
493         if (input != NULL) {
494             if (input->filename)
495                 channel(data, "%s:%d: ", input->filename, input->line);
496             else if ((line != 0) && (domain == XML_FROM_PARSER))
497                 channel(data, "Entity: line %d: ", input->line);
498         }
499     } else {
500         if (file != NULL)
501             channel(data, "%s:%d: ", file, line);
502         else if ((line != 0) && (domain == XML_FROM_PARSER))
503             channel(data, "Entity: line %d: ", line);
504     }
505     if (name != NULL) {
506         channel(data, "element %s: ", name);
507     }
508     if (code == XML_ERR_OK)
509         return;
510     switch (domain) {
511         case XML_FROM_PARSER:
512             channel(data, "parser ");
513             break;
514         case XML_FROM_NAMESPACE:
515             channel(data, "namespace ");
516             break;
517         case XML_FROM_DTD:
518         case XML_FROM_VALID:
519             channel(data, "validity ");
520             break;
521         case XML_FROM_HTML:
522             channel(data, "HTML parser ");
523             break;
524         case XML_FROM_MEMORY:
525             channel(data, "memory ");
526             break;
527         case XML_FROM_OUTPUT:
528             channel(data, "output ");
529             break;
530         case XML_FROM_IO:
531             channel(data, "I/O ");
532             break;
533         case XML_FROM_XINCLUDE:
534             channel(data, "XInclude ");
535             break;
536         case XML_FROM_XPATH:
537             channel(data, "XPath ");
538             break;
539         case XML_FROM_XPOINTER:
540             channel(data, "parser ");
541             break;
542         case XML_FROM_REGEXP:
543             channel(data, "regexp ");
544             break;
545         case XML_FROM_MODULE:
546             channel(data, "module ");
547             break;
548         case XML_FROM_SCHEMASV:
549             channel(data, "Schemas validity ");
550             break;
551         case XML_FROM_SCHEMASP:
552             channel(data, "Schemas parser ");
553             break;
554         case XML_FROM_RELAXNGP:
555             channel(data, "Relax-NG parser ");
556             break;
557         case XML_FROM_RELAXNGV:
558             channel(data, "Relax-NG validity ");
559             break;
560         case XML_FROM_CATALOG:
561             channel(data, "Catalog ");
562             break;
563         case XML_FROM_C14N:
564             channel(data, "C14N ");
565             break;
566         case XML_FROM_XSLT:
567             channel(data, "XSLT ");
568             break;
569         default:
570             break;
571     }
572     if (code == XML_ERR_OK)
573         return;
574     switch (level) {
575         case XML_ERR_NONE:
576             channel(data, ": ");
577             break;
578         case XML_ERR_WARNING:
579             channel(data, "warning : ");
580             break;
581         case XML_ERR_ERROR:
582             channel(data, "error : ");
583             break;
584         case XML_ERR_FATAL:
585             channel(data, "error : ");
586             break;
587     }
588     if (code == XML_ERR_OK)
589         return;
590     if (str != NULL) {
591         int len;
592 	len = xmlStrlen((const xmlChar *)str);
593 	if ((len > 0) && (str[len - 1] != '\n'))
594 	    channel(data, "%s\n", str);
595 	else
596 	    channel(data, "%s", str);
597     } else {
598         channel(data, "%s\n", "out of memory error");
599     }
600     if (code == XML_ERR_OK)
601         return;
602 
603     if (ctxt != NULL) {
604         xmlParserPrintFileContextInternal(input, channel, data);
605         if (cur != NULL) {
606             if (cur->filename)
607                 channel(data, "%s:%d: \n", cur->filename, cur->line);
608             else if ((line != 0) && (domain == XML_FROM_PARSER))
609                 channel(data, "Entity: line %d: \n", cur->line);
610             xmlParserPrintFileContextInternal(cur, channel, data);
611         }
612     }
613     if ((domain == XML_FROM_XPATH) && (err->str1 != NULL) &&
614         (err->int1 < 100) &&
615 	(err->int1 < xmlStrlen((const xmlChar *)err->str1))) {
616 	xmlChar buf[150];
617 	int i;
618 
619 	channel(data, "%s\n", err->str1);
620 	for (i=0;i < err->int1;i++)
621 	     buf[i] = ' ';
622 	buf[i++] = '^';
623 	buf[i] = 0;
624 	channel(data, "%s\n", buf);
625     }
626 }
627 
628 static void
initializeLibxml2(void)629 initializeLibxml2(void) {
630     xmlMemSetup(xmlMemFree, xmlMemMalloc, xmlMemRealloc, xmlMemoryStrdup);
631     xmlInitParser();
632     xmlSetExternalEntityLoader(testExternalEntityLoader);
633     xmlSetStructuredErrorFunc(NULL, testStructuredErrorHandler);
634     /*
635      * register the new I/O handlers
636      */
637     if (xmlRegisterInputCallbacks(hugeMatch, hugeOpen,
638                                   hugeRead, hugeClose) < 0) {
639         fprintf(stderr, "failed to register Huge handler\n");
640 	exit(1);
641     }
642 }
643 
644 static void
initSAX(xmlParserCtxtPtr ctxt)645 initSAX(xmlParserCtxtPtr ctxt) {
646     ctxt->sax->startElementNs = NULL;
647     ctxt->sax->endElementNs = NULL;
648     ctxt->sax->characters = NULL;
649     ctxt->sax->cdataBlock = NULL;
650     ctxt->sax->ignorableWhitespace = NULL;
651     ctxt->sax->processingInstruction = NULL;
652     ctxt->sax->comment = NULL;
653 }
654 
655 /************************************************************************
656  *									*
657  *		File name and path utilities				*
658  *									*
659  ************************************************************************/
660 
baseFilename(const char * filename)661 static const char *baseFilename(const char *filename) {
662     const char *cur;
663     if (filename == NULL)
664         return(NULL);
665     cur = &filename[strlen(filename)];
666     while ((cur > filename) && (*cur != '/'))
667         cur--;
668     if (*cur == '/')
669         return(cur + 1);
670     return(cur);
671 }
672 
resultFilename(const char * filename,const char * out,const char * suffix)673 static char *resultFilename(const char *filename, const char *out,
674                             const char *suffix) {
675     const char *base;
676     char res[500];
677     char suffixbuff[500];
678 
679 /*************
680     if ((filename[0] == 't') && (filename[1] == 'e') &&
681         (filename[2] == 's') && (filename[3] == 't') &&
682 	(filename[4] == '/'))
683 	filename = &filename[5];
684  *************/
685 
686     base = baseFilename(filename);
687     if (suffix == NULL)
688         suffix = ".tmp";
689     if (out == NULL)
690         out = "";
691 
692     strncpy(suffixbuff,suffix,499);
693 #ifdef VMS
694     if(strstr(base,".") && suffixbuff[0]=='.')
695       suffixbuff[0]='_';
696 #endif
697 
698     if (snprintf(res, 499, "%s%s%s", out, base, suffixbuff) >= 499)
699         res[499] = 0;
700     return(strdup(res));
701 }
702 
checkTestFile(const char * filename)703 static int checkTestFile(const char *filename) {
704     struct stat buf;
705 
706     if (stat(filename, &buf) == -1)
707         return(0);
708 
709 #if defined(_WIN32)
710     if (!(buf.st_mode & _S_IFREG))
711         return(0);
712 #else
713     if (!S_ISREG(buf.st_mode))
714         return(0);
715 #endif
716 
717     return(1);
718 }
719 
720 
721 
722 /************************************************************************
723  *									*
724  *		Test to detect or not recursive entities		*
725  *									*
726  ************************************************************************/
727 /**
728  * recursiveDetectTest:
729  * @filename: the file to parse
730  * @result: the file with expected result
731  * @err: the file with error messages: unused
732  *
733  * Parse a file loading DTD and replacing entities check it fails for
734  * lol cases
735  *
736  * Returns 0 in case of success, an error code otherwise
737  */
738 static int
recursiveDetectTest(const char * filename,const char * result ATTRIBUTE_UNUSED,const char * err ATTRIBUTE_UNUSED,int options)739 recursiveDetectTest(const char *filename,
740              const char *result ATTRIBUTE_UNUSED,
741              const char *err ATTRIBUTE_UNUSED,
742 	     int options) {
743     xmlDocPtr doc;
744     xmlParserCtxtPtr ctxt;
745     int res = 0;
746     /*
747      * XML_PARSE_DTDVALID is the only way to load external entities
748      * without XML_PARSE_NOENT. The validation result doesn't matter
749      * anyway.
750      */
751     int parserOptions = XML_PARSE_DTDVALID;
752 
753     nb_tests++;
754 
755     ctxt = xmlNewParserCtxt();
756     if (options & OPT_SAX)
757         initSAX(ctxt);
758     if ((options & OPT_NO_SUBST) == 0)
759         parserOptions |= XML_PARSE_NOENT;
760     /*
761      * base of the test, parse with the old API
762      */
763     doc = xmlCtxtReadFile(ctxt, filename, NULL, parserOptions);
764     if ((doc != NULL) || (ctxt->lastError.code != XML_ERR_ENTITY_LOOP)) {
765         fprintf(stderr, "Failed to detect recursion in %s\n", filename);
766 	xmlFreeParserCtxt(ctxt);
767 	xmlFreeDoc(doc);
768         return(1);
769     }
770     xmlFreeParserCtxt(ctxt);
771 
772     return(res);
773 }
774 
775 /**
776  * notRecursiveDetectTest:
777  * @filename: the file to parse
778  * @result: the file with expected result
779  * @err: the file with error messages: unused
780  *
781  * Parse a file loading DTD and replacing entities check it works for
782  * good cases
783  *
784  * Returns 0 in case of success, an error code otherwise
785  */
786 static int
notRecursiveDetectTest(const char * filename,const char * result ATTRIBUTE_UNUSED,const char * err ATTRIBUTE_UNUSED,int options)787 notRecursiveDetectTest(const char *filename,
788              const char *result ATTRIBUTE_UNUSED,
789              const char *err ATTRIBUTE_UNUSED,
790 	     int options) {
791     xmlDocPtr doc;
792     xmlParserCtxtPtr ctxt;
793     int res = 0;
794     int parserOptions = XML_PARSE_DTDLOAD;
795 
796     nb_tests++;
797 
798     ctxt = xmlNewParserCtxt();
799     if (options & OPT_SAX)
800         initSAX(ctxt);
801     if ((options & OPT_NO_SUBST) == 0)
802         parserOptions |= XML_PARSE_NOENT;
803     /*
804      * base of the test, parse with the old API
805      */
806     doc = xmlCtxtReadFile(ctxt, filename, NULL, parserOptions);
807     if (doc == NULL) {
808         fprintf(stderr, "Failed to parse correct file %s\n", filename);
809 	xmlFreeParserCtxt(ctxt);
810         return(1);
811     }
812     xmlFreeDoc(doc);
813     xmlFreeParserCtxt(ctxt);
814 
815     return(res);
816 }
817 
818 /**
819  * notRecursiveHugeTest:
820  * @filename: the file to parse
821  * @result: the file with expected result
822  * @err: the file with error messages: unused
823  *
824  * Parse a memory generated file
825  * good cases
826  *
827  * Returns 0 in case of success, an error code otherwise
828  */
829 static int
notRecursiveHugeTest(const char * filename ATTRIBUTE_UNUSED,const char * result ATTRIBUTE_UNUSED,const char * err ATTRIBUTE_UNUSED,int options)830 notRecursiveHugeTest(const char *filename ATTRIBUTE_UNUSED,
831              const char *result ATTRIBUTE_UNUSED,
832              const char *err ATTRIBUTE_UNUSED,
833 	     int options) {
834     xmlParserCtxtPtr ctxt;
835     xmlDocPtr doc;
836     int res = 0;
837     int parserOptions = XML_PARSE_DTDVALID;
838 
839     nb_tests++;
840 
841     ctxt = xmlNewParserCtxt();
842     if (options & OPT_SAX)
843         initSAX(ctxt);
844     if ((options & OPT_NO_SUBST) == 0)
845         parserOptions |= XML_PARSE_NOENT;
846     doc = xmlCtxtReadFile(ctxt, "test/recurse/huge.xml", NULL, parserOptions);
847     if (doc == NULL) {
848         fprintf(stderr, "Failed to parse huge.xml\n");
849 	res = 1;
850     } else {
851         xmlEntityPtr ent;
852         unsigned long fixed_cost = 20;
853         unsigned long allowed_expansion = 1000000;
854         unsigned long f_size = xmlStrlen(BAD_CAST "some internal data");
855         unsigned long e_size;
856         unsigned long d_size;
857         unsigned long total_size;
858 
859         ent = xmlGetDocEntity(doc, BAD_CAST "e");
860         e_size = f_size * 2 +
861                  xmlStrlen(BAD_CAST "&f;") * 2 +
862                  fixed_cost * 2;
863         if (ent->expandedSize != e_size) {
864             fprintf(stderr, "Wrong size for entity e: %lu (expected %lu)\n",
865                     ent->expandedSize, e_size);
866             res = 1;
867         }
868 
869         ent = xmlGetDocEntity(doc, BAD_CAST "b");
870         if (ent->expandedSize != e_size) {
871             fprintf(stderr, "Wrong size for entity b: %lu (expected %lu)\n",
872                     ent->expandedSize, e_size);
873             res = 1;
874         }
875 
876         ent = xmlGetDocEntity(doc, BAD_CAST "d");
877         d_size = e_size * 2 +
878                  xmlStrlen(BAD_CAST "&e;") * 2 +
879                  fixed_cost * 2;
880         if (ent->expandedSize != d_size) {
881             fprintf(stderr, "Wrong size for entity d: %lu (expected %lu)\n",
882                     ent->expandedSize, d_size);
883             res = 1;
884         }
885 
886         ent = xmlGetDocEntity(doc, BAD_CAST "c");
887         if (ent->expandedSize != d_size) {
888             fprintf(stderr, "Wrong size for entity c: %lu (expected %lu)\n",
889                     ent->expandedSize, d_size);
890             res = 1;
891         }
892 
893         if (ctxt->sizeentcopy < allowed_expansion) {
894             fprintf(stderr, "Total entity size too small: %lu\n",
895                     ctxt->sizeentcopy);
896             res = 1;
897         }
898 
899         total_size = (f_size + e_size + d_size + 3 * fixed_cost) *
900                      (MAX_NODES - 1) * 3;
901         if (ctxt->sizeentcopy != total_size) {
902             fprintf(stderr, "Wrong total entity size: %lu (expected %lu)\n",
903                     ctxt->sizeentcopy, total_size);
904             res = 1;
905         }
906 
907         if (ctxt->sizeentities != 30) {
908             fprintf(stderr, "Wrong parsed entity size: %lu (expected %lu)\n",
909                     ctxt->sizeentities, 30lu);
910             res = 1;
911         }
912     }
913 
914     xmlFreeDoc(doc);
915     xmlFreeParserCtxt(ctxt);
916 
917     return(res);
918 }
919 
920 /**
921  * notRecursiveHugeTest:
922  * @filename: the file to parse
923  * @result: the file with expected result
924  * @err: the file with error messages: unused
925  *
926  * Parse a memory generated file
927  * good cases
928  *
929  * Returns 0 in case of success, an error code otherwise
930  */
931 static int
hugeDtdTest(const char * filename ATTRIBUTE_UNUSED,const char * result ATTRIBUTE_UNUSED,const char * err ATTRIBUTE_UNUSED,int options)932 hugeDtdTest(const char *filename ATTRIBUTE_UNUSED,
933             const char *result ATTRIBUTE_UNUSED,
934             const char *err ATTRIBUTE_UNUSED,
935             int options) {
936     xmlParserCtxtPtr ctxt;
937     xmlDocPtr doc;
938     int res = 0;
939     int parserOptions = XML_PARSE_DTDVALID;
940 
941     nb_tests++;
942 
943     ctxt = xmlNewParserCtxt();
944     if (options & OPT_SAX)
945         initSAX(ctxt);
946     if ((options & OPT_NO_SUBST) == 0)
947         parserOptions |= XML_PARSE_NOENT;
948     doc = xmlCtxtReadFile(ctxt, "test/recurse/huge_dtd.xml", NULL,
949                           parserOptions);
950     if (doc == NULL) {
951         fprintf(stderr, "Failed to parse huge_dtd.xml\n");
952 	res = 1;
953     } else {
954         unsigned long fixed_cost = 20;
955         unsigned long allowed_expansion = 1000000;
956         unsigned long a_size = xmlStrlen(BAD_CAST "<!-- comment -->");
957         unsigned long b_size;
958         unsigned long c_size;
959         unsigned long e_size;
960         unsigned long f_size;
961         unsigned long total_size;
962 
963         if (ctxt->sizeentcopy < allowed_expansion) {
964             fprintf(stderr, "Total entity size too small: %lu\n",
965                     ctxt->sizeentcopy);
966             res = 1;
967         }
968 
969         b_size = (a_size + strlen("&a;") + fixed_cost) * 2;
970         c_size = (b_size + strlen("&b;") + fixed_cost) * 2;
971         /*
972          * Internal parameter entites are substitued eagerly and
973          * need different accounting.
974          */
975         e_size = a_size * 2;
976         f_size = e_size * 2;
977         total_size = /* internal */
978                      e_size + f_size + fixed_cost * 4 +
979                      (a_size + e_size + f_size + fixed_cost * 3) *
980                      (MAX_NODES - 1) * 2 +
981                      /* external */
982                      (a_size + b_size + c_size + fixed_cost * 3) *
983                      (MAX_NODES - 1) * 2 +
984                      /* final reference in main doc */
985                      strlen("success") + fixed_cost;
986         if (ctxt->sizeentcopy != total_size) {
987             fprintf(stderr, "Wrong total entity size: %lu (expected %lu)\n",
988                     ctxt->sizeentcopy, total_size);
989             res = 1;
990         }
991 
992         total_size = strlen(hugeDocParts->start) +
993                      strlen(hugeDocParts->segment) * (MAX_NODES - 1) +
994                      strlen(hugeDocParts->finish) +
995                      /*
996                       * Other external entities pa.ent, pb.ent, pc.ent.
997                       * These are currently counted twice because they're
998                       * used both in DTD and EntityValue.
999                       */
1000                      (16 + 6 + 6) * 2;
1001         if (ctxt->sizeentities != total_size) {
1002             fprintf(stderr, "Wrong parsed entity size: %lu (expected %lu)\n",
1003                     ctxt->sizeentities, total_size);
1004             res = 1;
1005         }
1006     }
1007 
1008     xmlFreeDoc(doc);
1009     xmlFreeParserCtxt(ctxt);
1010 
1011     return(res);
1012 }
1013 
1014 /************************************************************************
1015  *									*
1016  *			Tests Descriptions				*
1017  *									*
1018  ************************************************************************/
1019 
1020 static
1021 testDesc testDescriptions[] = {
1022     { "Parsing recursive test cases" ,
1023       recursiveDetectTest, "./test/recurse/lol*.xml", NULL, NULL, NULL,
1024       0 },
1025     { "Parsing recursive test cases (no substitution)" ,
1026       recursiveDetectTest, "./test/recurse/lol*.xml", NULL, NULL, NULL,
1027       OPT_NO_SUBST },
1028     { "Parsing recursive test cases (SAX)" ,
1029       recursiveDetectTest, "./test/recurse/lol*.xml", NULL, NULL, NULL,
1030       OPT_SAX },
1031     { "Parsing recursive test cases (SAX, no substitution)" ,
1032       recursiveDetectTest, "./test/recurse/lol*.xml", NULL, NULL, NULL,
1033       OPT_SAX | OPT_NO_SUBST },
1034     { "Parsing non-recursive test cases" ,
1035       notRecursiveDetectTest, "./test/recurse/good*.xml", NULL, NULL, NULL,
1036       0 },
1037     { "Parsing non-recursive test cases (SAX)" ,
1038       notRecursiveDetectTest, "./test/recurse/good*.xml", NULL, NULL, NULL,
1039       OPT_SAX },
1040     { "Parsing non-recursive huge case" ,
1041       notRecursiveHugeTest, NULL, NULL, NULL, NULL,
1042       0 },
1043     { "Parsing non-recursive huge case (no substitution)" ,
1044       notRecursiveHugeTest, NULL, NULL, NULL, NULL,
1045       OPT_NO_SUBST },
1046     { "Parsing non-recursive huge case (SAX)" ,
1047       notRecursiveHugeTest, NULL, NULL, NULL, NULL,
1048       OPT_SAX },
1049     { "Parsing non-recursive huge case (SAX, no substitution)" ,
1050       notRecursiveHugeTest, NULL, NULL, NULL, NULL,
1051       OPT_SAX | OPT_NO_SUBST },
1052     { "Parsing non-recursive huge DTD case" ,
1053       hugeDtdTest, NULL, NULL, NULL, NULL,
1054       0 },
1055     {NULL, NULL, NULL, NULL, NULL, NULL, 0}
1056 };
1057 
1058 /************************************************************************
1059  *									*
1060  *		The main code driving the tests				*
1061  *									*
1062  ************************************************************************/
1063 
1064 static int
launchTests(testDescPtr tst)1065 launchTests(testDescPtr tst) {
1066     int res = 0, err = 0;
1067     size_t i;
1068     char *result;
1069     char *error;
1070     int mem;
1071 
1072     if (tst == NULL) return(-1);
1073     if (tst->in != NULL) {
1074 	glob_t globbuf;
1075 
1076 	globbuf.gl_offs = 0;
1077 	glob(tst->in, GLOB_DOOFFS, NULL, &globbuf);
1078 	for (i = 0;i < globbuf.gl_pathc;i++) {
1079 	    if (!checkTestFile(globbuf.gl_pathv[i]))
1080 	        continue;
1081 	    if (tst->suffix != NULL) {
1082 		result = resultFilename(globbuf.gl_pathv[i], tst->out,
1083 					tst->suffix);
1084 		if (result == NULL) {
1085 		    fprintf(stderr, "Out of memory !\n");
1086 		    fatalError();
1087 		}
1088 	    } else {
1089 	        result = NULL;
1090 	    }
1091 	    if (tst->err != NULL) {
1092 		error = resultFilename(globbuf.gl_pathv[i], tst->out,
1093 		                        tst->err);
1094 		if (error == NULL) {
1095 		    fprintf(stderr, "Out of memory !\n");
1096 		    fatalError();
1097 		}
1098 	    } else {
1099 	        error = NULL;
1100 	    }
1101 	    if ((result) &&(!checkTestFile(result))) {
1102 	        fprintf(stderr, "Missing result file %s\n", result);
1103 	    } else if ((error) &&(!checkTestFile(error))) {
1104 	        fprintf(stderr, "Missing error file %s\n", error);
1105 	    } else {
1106 		mem = xmlMemUsed();
1107 		extraMemoryFromResolver = 0;
1108 		testErrorsSize = 0;
1109 		testErrors[0] = 0;
1110 		res = tst->func(globbuf.gl_pathv[i], result, error,
1111 		                tst->options | XML_PARSE_COMPACT);
1112 		xmlResetLastError();
1113 		if (res != 0) {
1114 		    fprintf(stderr, "File %s generated an error\n",
1115 		            globbuf.gl_pathv[i]);
1116 		    nb_errors++;
1117 		    err++;
1118 		}
1119 		else if (xmlMemUsed() != mem) {
1120 		    if ((xmlMemUsed() != mem) &&
1121 		        (extraMemoryFromResolver == 0)) {
1122 			fprintf(stderr, "File %s leaked %d bytes\n",
1123 				globbuf.gl_pathv[i], xmlMemUsed() - mem);
1124 			nb_leaks++;
1125 			err++;
1126 		    }
1127 		}
1128 		testErrorsSize = 0;
1129 	    }
1130 	    if (result)
1131 		free(result);
1132 	    if (error)
1133 		free(error);
1134 	}
1135 	globfree(&globbuf);
1136     } else {
1137         testErrorsSize = 0;
1138 	testErrors[0] = 0;
1139 	extraMemoryFromResolver = 0;
1140         res = tst->func(NULL, NULL, NULL, tst->options);
1141 	if (res != 0) {
1142 	    nb_errors++;
1143 	    err++;
1144 	}
1145     }
1146     return(err);
1147 }
1148 
1149 static int verbose = 0;
1150 static int tests_quiet = 0;
1151 
1152 static int
runtest(int i)1153 runtest(int i) {
1154     int ret = 0, res;
1155     int old_errors, old_tests, old_leaks;
1156 
1157     old_errors = nb_errors;
1158     old_tests = nb_tests;
1159     old_leaks = nb_leaks;
1160     if ((tests_quiet == 0) && (testDescriptions[i].desc != NULL))
1161 	printf("## %s\n", testDescriptions[i].desc);
1162     res = launchTests(&testDescriptions[i]);
1163     if (res != 0)
1164 	ret++;
1165     if (verbose) {
1166 	if ((nb_errors == old_errors) && (nb_leaks == old_leaks))
1167 	    printf("Ran %d tests, no errors\n", nb_tests - old_tests);
1168 	else
1169 	    printf("Ran %d tests, %d errors, %d leaks\n",
1170 		   nb_tests - old_tests,
1171 		   nb_errors - old_errors,
1172 		   nb_leaks - old_leaks);
1173     }
1174     return(ret);
1175 }
1176 
1177 int
main(int argc ATTRIBUTE_UNUSED,char ** argv ATTRIBUTE_UNUSED)1178 main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
1179     int i, a, ret = 0;
1180     int subset = 0;
1181 
1182     initializeLibxml2();
1183 
1184     for (a = 1; a < argc;a++) {
1185         if (!strcmp(argv[a], "-v"))
1186 	    verbose = 1;
1187         else if (!strcmp(argv[a], "-quiet"))
1188 	    tests_quiet = 1;
1189 	else {
1190 	    for (i = 0; testDescriptions[i].func != NULL; i++) {
1191 	        if (strstr(testDescriptions[i].desc, argv[a])) {
1192 		    ret += runtest(i);
1193 		    subset++;
1194 		}
1195 	    }
1196 	}
1197     }
1198     if (subset == 0) {
1199 	for (i = 0; testDescriptions[i].func != NULL; i++) {
1200 	    ret += runtest(i);
1201 	}
1202     }
1203     if ((nb_errors == 0) && (nb_leaks == 0)) {
1204         ret = 0;
1205 	printf("Total %d tests, no errors\n",
1206 	       nb_tests);
1207     } else {
1208         ret = 1;
1209 	printf("Total %d tests, %d errors, %d leaks\n",
1210 	       nb_tests, nb_errors, nb_leaks);
1211     }
1212     xmlCleanupParser();
1213 
1214     return(ret);
1215 }
1216