1 /*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
6 * daniel@veillard.com
7 */
8
9 #include "libxml.h"
10
11 #ifdef LIBXML_HTML_ENABLED
12
13 #include <string.h>
14 #include <stdarg.h>
15
16
17 #ifdef HAVE_SYS_TYPES_H
18 #include <sys/types.h>
19 #endif
20 #ifdef HAVE_SYS_STAT_H
21 #include <sys/stat.h>
22 #endif
23 #ifdef HAVE_FCNTL_H
24 #include <fcntl.h>
25 #endif
26 #ifdef HAVE_UNISTD_H
27 #include <unistd.h>
28 #endif
29 #ifdef HAVE_STDLIB_H
30 #include <stdlib.h>
31 #endif
32
33 #include <libxml/xmlmemory.h>
34 #include <libxml/HTMLparser.h>
35 #include <libxml/HTMLtree.h>
36 #include <libxml/debugXML.h>
37 #include <libxml/xmlerror.h>
38 #include <libxml/globals.h>
39
40 #ifdef LIBXML_DEBUG_ENABLED
41 static int debug = 0;
42 #endif
43 static int copy = 0;
44 static int sax = 0;
45 static int repeat = 0;
46 static int noout = 0;
47 #ifdef LIBXML_PUSH_ENABLED
48 static int push = 0;
49 #endif /* LIBXML_PUSH_ENABLED */
50 static char *encoding = NULL;
51 static int options = 0;
52
53 static xmlSAXHandler emptySAXHandlerStruct = {
54 NULL, /* internalSubset */
55 NULL, /* isStandalone */
56 NULL, /* hasInternalSubset */
57 NULL, /* hasExternalSubset */
58 NULL, /* resolveEntity */
59 NULL, /* getEntity */
60 NULL, /* entityDecl */
61 NULL, /* notationDecl */
62 NULL, /* attributeDecl */
63 NULL, /* elementDecl */
64 NULL, /* unparsedEntityDecl */
65 NULL, /* setDocumentLocator */
66 NULL, /* startDocument */
67 NULL, /* endDocument */
68 NULL, /* startElement */
69 NULL, /* endElement */
70 NULL, /* reference */
71 NULL, /* characters */
72 NULL, /* ignorableWhitespace */
73 NULL, /* processingInstruction */
74 NULL, /* comment */
75 NULL, /* xmlParserWarning */
76 NULL, /* xmlParserError */
77 NULL, /* xmlParserError */
78 NULL, /* getParameterEntity */
79 NULL, /* cdataBlock */
80 NULL, /* externalSubset */
81 1, /* initialized */
82 NULL, /* private */
83 NULL, /* startElementNsSAX2Func */
84 NULL, /* endElementNsSAX2Func */
85 NULL /* xmlStructuredErrorFunc */
86 };
87
88 static xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
89 extern xmlSAXHandlerPtr debugSAXHandler;
90
91 /************************************************************************
92 * *
93 * Debug Handlers *
94 * *
95 ************************************************************************/
96
97 /**
98 * isStandaloneDebug:
99 * @ctxt: An XML parser context
100 *
101 * Is this document tagged standalone ?
102 *
103 * Returns 1 if true
104 */
105 static int
isStandaloneDebug(void * ctx ATTRIBUTE_UNUSED)106 isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
107 {
108 fprintf(stdout, "SAX.isStandalone()\n");
109 return(0);
110 }
111
112 /**
113 * hasInternalSubsetDebug:
114 * @ctxt: An XML parser context
115 *
116 * Does this document has an internal subset
117 *
118 * Returns 1 if true
119 */
120 static int
hasInternalSubsetDebug(void * ctx ATTRIBUTE_UNUSED)121 hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
122 {
123 fprintf(stdout, "SAX.hasInternalSubset()\n");
124 return(0);
125 }
126
127 /**
128 * hasExternalSubsetDebug:
129 * @ctxt: An XML parser context
130 *
131 * Does this document has an external subset
132 *
133 * Returns 1 if true
134 */
135 static int
hasExternalSubsetDebug(void * ctx ATTRIBUTE_UNUSED)136 hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
137 {
138 fprintf(stdout, "SAX.hasExternalSubset()\n");
139 return(0);
140 }
141
142 /**
143 * hasInternalSubsetDebug:
144 * @ctxt: An XML parser context
145 *
146 * Does this document has an internal subset
147 */
148 static void
internalSubsetDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * name,const xmlChar * ExternalID,const xmlChar * SystemID)149 internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
150 const xmlChar *ExternalID, const xmlChar *SystemID)
151 {
152 fprintf(stdout, "SAX.internalSubset(%s,", name);
153 if (ExternalID == NULL)
154 fprintf(stdout, " ,");
155 else
156 fprintf(stdout, " %s,", ExternalID);
157 if (SystemID == NULL)
158 fprintf(stdout, " )\n");
159 else
160 fprintf(stdout, " %s)\n", SystemID);
161 }
162
163 /**
164 * resolveEntityDebug:
165 * @ctxt: An XML parser context
166 * @publicId: The public ID of the entity
167 * @systemId: The system ID of the entity
168 *
169 * Special entity resolver, better left to the parser, it has
170 * more context than the application layer.
171 * The default behaviour is to NOT resolve the entities, in that case
172 * the ENTITY_REF nodes are built in the structure (and the parameter
173 * values).
174 *
175 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
176 */
177 static xmlParserInputPtr
resolveEntityDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * publicId,const xmlChar * systemId)178 resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
179 {
180 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
181
182
183 fprintf(stdout, "SAX.resolveEntity(");
184 if (publicId != NULL)
185 fprintf(stdout, "%s", (char *)publicId);
186 else
187 fprintf(stdout, " ");
188 if (systemId != NULL)
189 fprintf(stdout, ", %s)\n", (char *)systemId);
190 else
191 fprintf(stdout, ", )\n");
192 /*********
193 if (systemId != NULL) {
194 return(xmlNewInputFromFile(ctxt, (char *) systemId));
195 }
196 *********/
197 return(NULL);
198 }
199
200 /**
201 * getEntityDebug:
202 * @ctxt: An XML parser context
203 * @name: The entity name
204 *
205 * Get an entity by name
206 *
207 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
208 */
209 static xmlEntityPtr
getEntityDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * name)210 getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
211 {
212 fprintf(stdout, "SAX.getEntity(%s)\n", name);
213 return(NULL);
214 }
215
216 /**
217 * getParameterEntityDebug:
218 * @ctxt: An XML parser context
219 * @name: The entity name
220 *
221 * Get a parameter entity by name
222 *
223 * Returns the xmlParserInputPtr
224 */
225 static xmlEntityPtr
getParameterEntityDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * name)226 getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
227 {
228 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
229 return(NULL);
230 }
231
232
233 /**
234 * entityDeclDebug:
235 * @ctxt: An XML parser context
236 * @name: the entity name
237 * @type: the entity type
238 * @publicId: The public ID of the entity
239 * @systemId: The system ID of the entity
240 * @content: the entity value (without processing).
241 *
242 * An entity definition has been parsed
243 */
244 static void
entityDeclDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * name,int type,const xmlChar * publicId,const xmlChar * systemId,xmlChar * content)245 entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
246 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
247 {
248 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
249 name, type, publicId, systemId, content);
250 }
251
252 /**
253 * attributeDeclDebug:
254 * @ctxt: An XML parser context
255 * @name: the attribute name
256 * @type: the attribute type
257 *
258 * An attribute definition has been parsed
259 */
260 static void
attributeDeclDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * elem,const xmlChar * name,int type,int def,const xmlChar * defaultValue,xmlEnumerationPtr tree ATTRIBUTE_UNUSED)261 attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
262 int type, int def, const xmlChar *defaultValue,
263 xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
264 {
265 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
266 elem, name, type, def, defaultValue);
267 }
268
269 /**
270 * elementDeclDebug:
271 * @ctxt: An XML parser context
272 * @name: the element name
273 * @type: the element type
274 * @content: the element value (without processing).
275 *
276 * An element definition has been parsed
277 */
278 static void
elementDeclDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * name,int type,xmlElementContentPtr content ATTRIBUTE_UNUSED)279 elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
280 xmlElementContentPtr content ATTRIBUTE_UNUSED)
281 {
282 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
283 name, type);
284 }
285
286 /**
287 * notationDeclDebug:
288 * @ctxt: An XML parser context
289 * @name: The name of the notation
290 * @publicId: The public ID of the entity
291 * @systemId: The system ID of the entity
292 *
293 * What to do when a notation declaration has been parsed.
294 */
295 static void
notationDeclDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * name,const xmlChar * publicId,const xmlChar * systemId)296 notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
297 const xmlChar *publicId, const xmlChar *systemId)
298 {
299 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
300 (char *) name, (char *) publicId, (char *) systemId);
301 }
302
303 /**
304 * unparsedEntityDeclDebug:
305 * @ctxt: An XML parser context
306 * @name: The name of the entity
307 * @publicId: The public ID of the entity
308 * @systemId: The system ID of the entity
309 * @notationName: the name of the notation
310 *
311 * What to do when an unparsed entity declaration is parsed
312 */
313 static void
unparsedEntityDeclDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * name,const xmlChar * publicId,const xmlChar * systemId,const xmlChar * notationName)314 unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
315 const xmlChar *publicId, const xmlChar *systemId,
316 const xmlChar *notationName)
317 {
318 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
319 (char *) name, (char *) publicId, (char *) systemId,
320 (char *) notationName);
321 }
322
323 /**
324 * setDocumentLocatorDebug:
325 * @ctxt: An XML parser context
326 * @loc: A SAX Locator
327 *
328 * Receive the document locator at startup, actually xmlDefaultSAXLocator
329 * Everything is available on the context, so this is useless in our case.
330 */
331 static void
setDocumentLocatorDebug(void * ctx ATTRIBUTE_UNUSED,xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)332 setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
333 {
334 fprintf(stdout, "SAX.setDocumentLocator()\n");
335 }
336
337 /**
338 * startDocumentDebug:
339 * @ctxt: An XML parser context
340 *
341 * called when the document start being processed.
342 */
343 static void
startDocumentDebug(void * ctx ATTRIBUTE_UNUSED)344 startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
345 {
346 fprintf(stdout, "SAX.startDocument()\n");
347 }
348
349 /**
350 * endDocumentDebug:
351 * @ctxt: An XML parser context
352 *
353 * called when the document end has been detected.
354 */
355 static void
endDocumentDebug(void * ctx ATTRIBUTE_UNUSED)356 endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
357 {
358 fprintf(stdout, "SAX.endDocument()\n");
359 }
360
361 /**
362 * startElementDebug:
363 * @ctxt: An XML parser context
364 * @name: The element name
365 *
366 * called when an opening tag has been processed.
367 */
368 static void
startElementDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * name,const xmlChar ** atts)369 startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
370 {
371 int i;
372
373 fprintf(stdout, "SAX.startElement(%s", (char *) name);
374 if (atts != NULL) {
375 for (i = 0;(atts[i] != NULL);i++) {
376 fprintf(stdout, ", %s", atts[i++]);
377 if (atts[i] != NULL) {
378 unsigned char output[40];
379 const unsigned char *att = atts[i];
380 int outlen, attlen;
381 fprintf(stdout, "='");
382 while ((attlen = strlen((char*)att)) > 0) {
383 outlen = sizeof output - 1;
384 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
385 output[outlen] = 0;
386 fprintf(stdout, "%s", (char *) output);
387 att += attlen;
388 }
389 fprintf(stdout, "'");
390 }
391 }
392 }
393 fprintf(stdout, ")\n");
394 }
395
396 /**
397 * endElementDebug:
398 * @ctxt: An XML parser context
399 * @name: The element name
400 *
401 * called when the end of an element has been detected.
402 */
403 static void
endElementDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * name)404 endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
405 {
406 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
407 }
408
409 /**
410 * charactersDebug:
411 * @ctxt: An XML parser context
412 * @ch: a xmlChar string
413 * @len: the number of xmlChar
414 *
415 * receiving some chars from the parser.
416 * Question: how much at a time ???
417 */
418 static void
charactersDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * ch,int len)419 charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
420 {
421 unsigned char output[40];
422 int inlen = len, outlen = 30;
423
424 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
425 output[outlen] = 0;
426
427 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
428 }
429
430 /**
431 * cdataDebug:
432 * @ctxt: An XML parser context
433 * @ch: a xmlChar string
434 * @len: the number of xmlChar
435 *
436 * receiving some cdata chars from the parser.
437 * Question: how much at a time ???
438 */
439 static void
cdataDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * ch,int len)440 cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
441 {
442 unsigned char output[40];
443 int inlen = len, outlen = 30;
444
445 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
446 output[outlen] = 0;
447
448 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
449 }
450
451 /**
452 * referenceDebug:
453 * @ctxt: An XML parser context
454 * @name: The entity name
455 *
456 * called when an entity reference is detected.
457 */
458 static void
referenceDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * name)459 referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
460 {
461 fprintf(stdout, "SAX.reference(%s)\n", name);
462 }
463
464 /**
465 * ignorableWhitespaceDebug:
466 * @ctxt: An XML parser context
467 * @ch: a xmlChar string
468 * @start: the first char in the string
469 * @len: the number of xmlChar
470 *
471 * receiving some ignorable whitespaces from the parser.
472 * Question: how much at a time ???
473 */
474 static void
ignorableWhitespaceDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * ch,int len)475 ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
476 {
477 char output[40];
478 int i;
479
480 for (i = 0;(i<len) && (i < 30);i++)
481 output[i] = ch[i];
482 output[i] = 0;
483
484 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
485 }
486
487 /**
488 * processingInstructionDebug:
489 * @ctxt: An XML parser context
490 * @target: the target name
491 * @data: the PI data's
492 * @len: the number of xmlChar
493 *
494 * A processing instruction has been parsed.
495 */
496 static void
processingInstructionDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * target,const xmlChar * data)497 processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
498 const xmlChar *data)
499 {
500 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
501 (char *) target, (char *) data);
502 }
503
504 /**
505 * commentDebug:
506 * @ctxt: An XML parser context
507 * @value: the comment content
508 *
509 * A comment has been parsed.
510 */
511 static void
commentDebug(void * ctx ATTRIBUTE_UNUSED,const xmlChar * value)512 commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
513 {
514 fprintf(stdout, "SAX.comment(%s)\n", value);
515 }
516
517 /**
518 * warningDebug:
519 * @ctxt: An XML parser context
520 * @msg: the message to display/transmit
521 * @...: extra parameters for the message display
522 *
523 * Display and format a warning messages, gives file, line, position and
524 * extra parameters.
525 */
526 static void XMLCDECL
warningDebug(void * ctx ATTRIBUTE_UNUSED,const char * msg,...)527 warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
528 {
529 va_list args;
530
531 va_start(args, msg);
532 fprintf(stdout, "SAX.warning: ");
533 vfprintf(stdout, msg, args);
534 va_end(args);
535 }
536
537 /**
538 * errorDebug:
539 * @ctxt: An XML parser context
540 * @msg: the message to display/transmit
541 * @...: extra parameters for the message display
542 *
543 * Display and format a error messages, gives file, line, position and
544 * extra parameters.
545 */
546 static void XMLCDECL
errorDebug(void * ctx ATTRIBUTE_UNUSED,const char * msg,...)547 errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
548 {
549 va_list args;
550
551 va_start(args, msg);
552 fprintf(stdout, "SAX.error: ");
553 vfprintf(stdout, msg, args);
554 va_end(args);
555 }
556
557 /**
558 * fatalErrorDebug:
559 * @ctxt: An XML parser context
560 * @msg: the message to display/transmit
561 * @...: extra parameters for the message display
562 *
563 * Display and format a fatalError messages, gives file, line, position and
564 * extra parameters.
565 */
566 static void XMLCDECL
fatalErrorDebug(void * ctx ATTRIBUTE_UNUSED,const char * msg,...)567 fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
568 {
569 va_list args;
570
571 va_start(args, msg);
572 fprintf(stdout, "SAX.fatalError: ");
573 vfprintf(stdout, msg, args);
574 va_end(args);
575 }
576
577 static xmlSAXHandler debugSAXHandlerStruct = {
578 internalSubsetDebug,
579 isStandaloneDebug,
580 hasInternalSubsetDebug,
581 hasExternalSubsetDebug,
582 resolveEntityDebug,
583 getEntityDebug,
584 entityDeclDebug,
585 notationDeclDebug,
586 attributeDeclDebug,
587 elementDeclDebug,
588 unparsedEntityDeclDebug,
589 setDocumentLocatorDebug,
590 startDocumentDebug,
591 endDocumentDebug,
592 startElementDebug,
593 endElementDebug,
594 referenceDebug,
595 charactersDebug,
596 ignorableWhitespaceDebug,
597 processingInstructionDebug,
598 commentDebug,
599 warningDebug,
600 errorDebug,
601 fatalErrorDebug,
602 getParameterEntityDebug,
603 cdataDebug,
604 NULL,
605 1,
606 NULL,
607 NULL,
608 NULL,
609 NULL
610 };
611
612 xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
613 /************************************************************************
614 * *
615 * Debug *
616 * *
617 ************************************************************************/
618
619 static void
parseSAXFile(char * filename)620 parseSAXFile(char *filename) {
621 htmlDocPtr doc = NULL;
622
623 /*
624 * Empty callbacks for checking
625 */
626 #ifdef LIBXML_PUSH_ENABLED
627 if (push) {
628 FILE *f;
629
630 #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
631 f = fopen(filename, "rb");
632 #else
633 f = fopen(filename, "r");
634 #endif
635 if (f != NULL) {
636 int res, size = 3;
637 char chars[4096];
638 htmlParserCtxtPtr ctxt;
639
640 /* if (repeat) */
641 size = 4096;
642 res = fread(chars, 1, 4, f);
643 if (res > 0) {
644 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
645 chars, res, filename, XML_CHAR_ENCODING_NONE);
646 while ((res = fread(chars, 1, size, f)) > 0) {
647 htmlParseChunk(ctxt, chars, res, 0);
648 }
649 htmlParseChunk(ctxt, chars, 0, 1);
650 doc = ctxt->myDoc;
651 htmlFreeParserCtxt(ctxt);
652 }
653 if (doc != NULL) {
654 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
655 xmlFreeDoc(doc);
656 }
657 fclose(f);
658 }
659 if (!noout) {
660 #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
661 f = fopen(filename, "rb");
662 #else
663 f = fopen(filename, "r");
664 #endif
665 if (f != NULL) {
666 int res, size = 3;
667 char chars[4096];
668 htmlParserCtxtPtr ctxt;
669
670 /* if (repeat) */
671 size = 4096;
672 res = fread(chars, 1, 4, f);
673 if (res > 0) {
674 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
675 chars, res, filename, XML_CHAR_ENCODING_NONE);
676 while ((res = fread(chars, 1, size, f)) > 0) {
677 htmlParseChunk(ctxt, chars, res, 0);
678 }
679 htmlParseChunk(ctxt, chars, 0, 1);
680 doc = ctxt->myDoc;
681 htmlFreeParserCtxt(ctxt);
682 }
683 if (doc != NULL) {
684 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
685 xmlFreeDoc(doc);
686 }
687 fclose(f);
688 }
689 }
690 } else {
691 #endif /* LIBXML_PUSH_ENABLED */
692 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
693 if (doc != NULL) {
694 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
695 xmlFreeDoc(doc);
696 }
697
698 if (!noout) {
699 /*
700 * Debug callback
701 */
702 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
703 if (doc != NULL) {
704 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
705 xmlFreeDoc(doc);
706 }
707 }
708 #ifdef LIBXML_PUSH_ENABLED
709 }
710 #endif /* LIBXML_PUSH_ENABLED */
711 }
712
713 static void
parseAndPrintFile(char * filename)714 parseAndPrintFile(char *filename) {
715 htmlDocPtr doc = NULL;
716
717 /*
718 * build an HTML tree from a string;
719 */
720 #ifdef LIBXML_PUSH_ENABLED
721 if (push) {
722 FILE *f;
723
724 #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
725 f = fopen(filename, "rb");
726 #else
727 f = fopen(filename, "r");
728 #endif
729 if (f != NULL) {
730 int res, size = 3;
731 char chars[4096];
732 htmlParserCtxtPtr ctxt;
733
734 /* if (repeat) */
735 size = 4096;
736 res = fread(chars, 1, 4, f);
737 if (res > 0) {
738 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
739 chars, res, filename, XML_CHAR_ENCODING_NONE);
740 while ((res = fread(chars, 1, size, f)) > 0) {
741 htmlParseChunk(ctxt, chars, res, 0);
742 }
743 htmlParseChunk(ctxt, chars, 0, 1);
744 doc = ctxt->myDoc;
745 htmlFreeParserCtxt(ctxt);
746 }
747 fclose(f);
748 }
749 } else {
750 doc = htmlReadFile(filename, NULL, options);
751 }
752 #else
753 doc = htmlReadFile(filename,NULL,options);
754 #endif
755 if (doc == NULL) {
756 xmlGenericError(xmlGenericErrorContext,
757 "Could not parse %s\n", filename);
758 }
759
760 #ifdef LIBXML_TREE_ENABLED
761 /*
762 * test intermediate copy if needed.
763 */
764 if (copy) {
765 htmlDocPtr tmp;
766
767 tmp = doc;
768 doc = xmlCopyDoc(doc, 1);
769 xmlFreeDoc(tmp);
770 }
771 #endif
772
773 #ifdef LIBXML_OUTPUT_ENABLED
774 /*
775 * print it.
776 */
777 if (!noout) {
778 #ifdef LIBXML_DEBUG_ENABLED
779 if (!debug) {
780 if (encoding)
781 htmlSaveFileEnc("-", doc, encoding);
782 else
783 htmlDocDump(stdout, doc);
784 } else
785 xmlDebugDumpDocument(stdout, doc);
786 #else
787 if (encoding)
788 htmlSaveFileEnc("-", doc, encoding);
789 else
790 htmlDocDump(stdout, doc);
791 #endif
792 }
793 #endif /* LIBXML_OUTPUT_ENABLED */
794
795 /*
796 * free it.
797 */
798 xmlFreeDoc(doc);
799 }
800
main(int argc,char ** argv)801 int main(int argc, char **argv) {
802 int i, count;
803 int files = 0;
804
805 for (i = 1; i < argc ; i++) {
806 #ifdef LIBXML_DEBUG_ENABLED
807 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
808 debug++;
809 else
810 #endif
811 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
812 copy++;
813 #ifdef LIBXML_PUSH_ENABLED
814 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
815 push++;
816 #endif /* LIBXML_PUSH_ENABLED */
817 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
818 sax++;
819 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
820 noout++;
821 else if ((!strcmp(argv[i], "-repeat")) ||
822 (!strcmp(argv[i], "--repeat")))
823 repeat++;
824 else if ((!strcmp(argv[i], "-encode")) ||
825 (!strcmp(argv[i], "--encode"))) {
826 i++;
827 encoding = argv[i];
828 }
829 }
830 for (i = 1; i < argc ; i++) {
831 if ((!strcmp(argv[i], "-encode")) ||
832 (!strcmp(argv[i], "--encode"))) {
833 i++;
834 continue;
835 }
836 if (argv[i][0] != '-') {
837 if (repeat) {
838 for (count = 0;count < 100 * repeat;count++) {
839 if (sax)
840 parseSAXFile(argv[i]);
841 else
842 parseAndPrintFile(argv[i]);
843 }
844 } else {
845 if (sax)
846 parseSAXFile(argv[i]);
847 else
848 parseAndPrintFile(argv[i]);
849 }
850 files ++;
851 }
852 }
853 if (files == 0) {
854 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
855 argv[0]);
856 printf("\tParse the HTML files and output the result of the parsing\n");
857 #ifdef LIBXML_DEBUG_ENABLED
858 printf("\t--debug : dump a debug tree of the in-memory document\n");
859 #endif
860 printf("\t--copy : used to test the internal copy implementation\n");
861 printf("\t--sax : debug the sequence of SAX callbacks\n");
862 printf("\t--repeat : parse the file 100 times, for timing\n");
863 printf("\t--noout : do not print the result\n");
864 #ifdef LIBXML_PUSH_ENABLED
865 printf("\t--push : use the push mode parser\n");
866 #endif /* LIBXML_PUSH_ENABLED */
867 printf("\t--encode encoding : output in the given encoding\n");
868 }
869 xmlCleanupParser();
870 xmlMemoryDump();
871
872 return(0);
873 }
874 #else /* !LIBXML_HTML_ENABLED */
875 #include <stdio.h>
main(int argc ATTRIBUTE_UNUSED,char ** argv ATTRIBUTE_UNUSED)876 int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
877 printf("%s : HTML support not compiled in\n", argv[0]);
878 return(0);
879 }
880 #endif
881