1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 /* 19 * $Id$ 20 */ 21 22 package org.apache.qetest.xsl; 23 24 import java.io.BufferedReader; 25 import java.io.FileReader; 26 import java.io.PrintWriter; 27 import java.net.URL; 28 import java.util.Properties; 29 30 import javax.xml.parsers.DocumentBuilder; 31 import javax.xml.parsers.DocumentBuilderFactory; 32 33 import org.apache.qetest.QetestUtils; 34 import org.w3c.dom.Attr; 35 import org.w3c.dom.Document; 36 import org.w3c.dom.Element; 37 import org.w3c.dom.NamedNodeMap; 38 import org.w3c.dom.Node; 39 import org.w3c.dom.Text; 40 import org.w3c.tidy.Tidy; 41 import org.xml.sax.ErrorHandler; 42 import org.xml.sax.InputSource; 43 import org.xml.sax.SAXException; 44 import org.xml.sax.SAXParseException; 45 46 /** 47 * Uses an XML/HTML/Text diff comparator to check or diff two files. 48 * <p>Given two files, an actual test result and a known good or 'gold' 49 * test result, diff the two files to see if they are equal; if not, provide 50 * some very basic info on where they differ.</p> 51 * 52 * <p>Attempts to parse each file as an XML document using Xerces; 53 * if that fails, attempt to parse each as an HTML document using 54 * <i>NEED NEW HTML PARSER</i>; if that fails, pretend to parse each 55 * doc as text and construct a faux document node; then do 56 * readLine() and construct a <line> element for each line.</p> 57 * 58 * <p>The comparison routine then recursively compares the two 59 * documents node-by-node; see the code for exactly how each 60 * node type is handled. Note that some node types are currently 61 * ignored.</p> 62 * 63 * //@todo document whitespace difference handling better -sc 64 * //@todo check how XML decls are handled (or not) -sc 65 * //@todo Allow param to define the type of parse we do (i.e. if a 66 * testwriter knows their output file will be XML, we should only 67 * attempt to parse it as XML, not other types) 68 * @see XHTComparatorXSLTC for an alternate implementation of 69 * diff() which tests some things as QNames (which checks for the 70 * true namespace, instead of just the prefix) 71 * @author Scott_Boag@lotus.com 72 * @author Shane_Curcuru@lotus.com 73 * @version $Id$ 74 */ 75 public class XHTComparator 76 { 77 78 /** 79 * Maximum output length we may log for differing values. 80 * When two nodes have mismatched values, we output the first 81 * two values that were mismatched. In some cases, this may be 82 * extremely long, so limit how much we output for convenience. 83 */ 84 protected int maxDisplayLen = 511; // arbitrary length, for convenience 85 86 /** 87 * Accessor method for maxDisplayLen. 88 * @param i maximum length we log out 89 */ setMaxDisplayLen(int i)90 public void setMaxDisplayLen(int i) 91 { 92 if (i > 0) 93 maxDisplayLen = i; 94 } 95 96 /** Constants for reporting out reason for failed diffs. */ 97 public static final String SEPARATOR = ";"; 98 99 /** LBRACKET '[' */ 100 public static final String LBRACKET = "["; 101 102 /** RBRACKET ']' */ 103 public static final String RBRACKET = "]"; 104 105 /** TEST 'test', for the actual value. */ 106 public static final String TEST = "test"; 107 108 /** GOLD 'gold' for the gold or expected value. */ 109 public static final String GOLD = "gold"; 110 111 /** PARSE_TYPE '-parse-type' */ 112 public static final String PARSE_TYPE = "-parse-type" + SEPARATOR; // postpended to TEST or GOLD 113 114 /** OTHER_ERROR 'other-error' */ 115 public static final String OTHER_ERROR = "other-error" + SEPARATOR; 116 117 /** WARNING 'warning' */ 118 public static final String WARNING = "warning" + SEPARATOR; 119 120 /** MISMATCH_NODE */ 121 public static final String MISMATCH_NODE = "mismatch-node" + SEPARATOR; 122 123 /** MISSING_TEST_NODE */ 124 public static final String MISSING_TEST_NODE = "missing-node-" + TEST 125 + SEPARATOR; 126 127 /** MISSING_GOLD_NODE */ 128 public static final String MISSING_GOLD_NODE = "missing-node-" + GOLD 129 + SEPARATOR; 130 131 /** MISMATCH_ATTRIBUTE */ 132 public static final String MISMATCH_ATTRIBUTE = "mismatch-attribute" 133 + SEPARATOR; 134 135 /** MISMATCH_VALUE */ 136 public static final String MISMATCH_VALUE = "mismatch-value" + SEPARATOR; 137 138 /** MISMATCH_VALUE */ 139 public static final String MISMATCH_VALUE_GOLD = "mismatch-value-gold" + SEPARATOR; 140 141 /** MISMATCH_VALUE */ 142 public static final String MISMATCH_VALUE_TEXT = "mismatch-value-text" + SEPARATOR; 143 144 /** MISSING_TEST_VALUE */ 145 public static final String MISSING_TEST_VALUE = "missing-value-" + TEST 146 + SEPARATOR; 147 148 /** MISSING_GOLD_VALUE */ 149 public static final String MISSING_GOLD_VALUE = "missing-value-" + GOLD 150 + SEPARATOR; 151 152 /** WHITESPACE_DIFF */ 153 public static final String WHITESPACE_DIFF = "whitespace-diff;"; 154 155 /** 156 * Compare two files by parsing into DOMs and comparing trees. 157 * 158 * <p>Parses the goldFileName by using the 159 * {@link #parse(String, PrintWriter, String, Properties) parse worker method} 160 * - if null, we bail and return false. If non-null, we parse the 161 * testFileName into a Document as well. Then we call 162 * {@link #diff(Node, Node, PrintWriter, boolean[]) diff worker method} 163 * to do the real work of comparing.</p> 164 * 165 * @param goldFileName expected file 166 * @param testFileName actual file 167 * @param reporter PrintWriter to dump status info to 168 * @param warning array of warning flags (for whitespace diffs, 169 * item[0] is set to true if we find whitespace-only diffs) 170 * @param attributes to attempt to set onto parsers 171 * @return true if they match, false otherwise 172 */ compare(String goldFileName, String testFileName, PrintWriter reporter, boolean[] warning, Properties attributes)173 public boolean compare(String goldFileName, String testFileName, 174 PrintWriter reporter, boolean[] warning, 175 Properties attributes) 176 { 177 178 // parse the gold doc 179 Document goldDoc = parse(goldFileName, reporter, GOLD, attributes); 180 181 // parse the test doc only if gold doc was parsed OK 182 //@todo Jun-02 -sc Note the logic here might be improveable to 183 // actually report file missing problems better: i.e. 184 // in theory, if the actual is missing, it's a fail; if 185 // the gold (only) is missing, it's ambiguous 186 Document testDoc = (null != goldDoc) 187 ? parse(testFileName, reporter, TEST, attributes) : null; 188 189 if (null == goldDoc) 190 { 191 reporter.println(OTHER_ERROR + GOLD + SEPARATOR 192 + "document null"); 193 194 return false; 195 } 196 else if (null == testDoc) 197 { 198 reporter.println(OTHER_ERROR + TEST + SEPARATOR 199 + "document null"); 200 201 return false; 202 } 203 204 return diff(goldDoc, testDoc, reporter, warning); 205 } 206 207 // Reporter format: 208 // REASON_CONSTANT;gold val;test val;reason description 209 210 /** 211 * Diff two Nodes recursively and report true if equal. 212 * 213 * <p>The contract is: when you enter here the gold and test nodes are the same type, 214 * both non-null, and both in the same basic position in the tree. 215 * //@todo verify caller really performs for the contract -sc</p> 216 * 217 * <p>See the code for how it's done; note that not all node 218 * types are actually compared currently. Also see 219 * {@link XHTComparatorXSLTC} for an alternate implementation.</p> 220 * 221 * @param gold or expected node 222 * @param test actual node 223 * @param reporter PrintWriter to dump status info to 224 * @param warning[] if any whitespace diffs found 225 * 226 * @return true if pass, false if any problems encountered 227 */ diff(Node gold, Node test, PrintWriter reporter, boolean[] warning)228 boolean diff(Node gold, Node test, PrintWriter reporter, 229 boolean[] warning) 230 { 231 232 String name1 = gold.getNodeName(); 233 String name2 = test.getNodeName(); 234 235 // If both there but not equal, fail 236 if ((null != name1) && (null != name2) &&!name1.equals(name2)) 237 { 238 reporter.println(MISMATCH_NODE + nodeTypeString(gold) + SEPARATOR 239 + nodeTypeString(test) + SEPARATOR 240 + "name does not equal test node"); 241 242 return false; 243 } 244 else if ((null != name1) && (null == name2)) 245 { 246 reporter.println(MISSING_TEST_NODE + nodeTypeString(gold) 247 + SEPARATOR + nodeTypeString(test) + SEPARATOR 248 + "name missing on test"); 249 250 return false; 251 } 252 else if ((null == name1) && (null != name2)) 253 { 254 reporter.println(MISSING_GOLD_NODE + nodeTypeString(gold) 255 + SEPARATOR + nodeTypeString(test) + SEPARATOR 256 + "name missing on gold"); 257 258 return false; 259 } 260 261 String value1 = gold.getNodeValue(); 262 String value2 = test.getNodeValue(); 263 264 if ((null != value1) && (null != value2) &&!value1.equals(value2)) 265 { 266 reporter.println(MISMATCH_VALUE + nodeTypeString(gold) + "len=" 267 + value1.length() + SEPARATOR 268 + nodeTypeString(test) + "len=" + value2.length() 269 + SEPARATOR + "values do not match"); 270 printNodeDiff(gold, test, reporter); 271 return false; 272 } 273 else if ((null != value1) && (null == value2)) 274 { 275 reporter.println(MISSING_TEST_VALUE + nodeTypeString(gold) + "-" 276 + value1 + SEPARATOR + nodeTypeString(test) 277 + SEPARATOR + "test no value"); 278 279 return false; 280 } 281 else if ((null == value1) && (null != value2)) 282 { 283 reporter.println(MISSING_GOLD_VALUE + nodeTypeString(gold) 284 + SEPARATOR + nodeTypeString(test) + "-" + value2 285 + SEPARATOR + "gold no value"); 286 287 return false; 288 } 289 290 switch (gold.getNodeType()) 291 { 292 case Node.DOCUMENT_NODE : 293 { 294 295 // Why don't we do anything here? -sc 296 } 297 break; 298 case Node.ELEMENT_NODE : 299 { 300 301 // Explicitly ignore attribute ordering 302 // TODO do we need to make this settable for testing purposes? -sc 303 NamedNodeMap goldAttrs = gold.getAttributes(); 304 NamedNodeMap testAttrs = test.getAttributes(); 305 306 if ((null != goldAttrs) && (null == testAttrs)) 307 { 308 reporter.println(MISMATCH_ATTRIBUTE + nodeTypeString(gold) 309 + SEPARATOR + nodeTypeString(test) + SEPARATOR 310 + "test no attrs"); 311 312 return false; 313 } 314 else if ((null == goldAttrs) && (null != testAttrs)) 315 { 316 reporter.println(MISMATCH_ATTRIBUTE + nodeTypeString(gold) 317 + SEPARATOR + nodeTypeString(test) + SEPARATOR 318 + "gold no attrs"); 319 320 return false; 321 } 322 323 int gn = goldAttrs.getLength(); 324 int tn = testAttrs.getLength(); 325 326 if (gn != tn) 327 { 328 reporter.println(MISMATCH_ATTRIBUTE + nodeTypeString(gold) 329 + "-" + gn + SEPARATOR + nodeTypeString(test) 330 + "-" + tn + SEPARATOR 331 + "attribte count mismatch"); 332 333 // TODO: add output of each set of attrs for comparisons 334 return false; 335 } 336 337 // TODO verify this checks the full list of attributes both ways, 338 // from gold->test and from test->gold -sc 339 for (int i = 0; i < gn; i++) 340 { 341 Attr goldAttr = (Attr) goldAttrs.item(i); 342 String goldAttrName = goldAttr.getName(); 343 Node testAttr = testAttrs.getNamedItem(goldAttrName); 344 345 if (null == testAttr) 346 { 347 reporter.println(MISMATCH_ATTRIBUTE + nodeTypeString(gold) 348 + "-" + goldAttrName + SEPARATOR 349 + nodeTypeString(test) + SEPARATOR 350 + "missing attribute on test"); 351 352 return false; 353 } 354 355 if (!diff(goldAttr, testAttr, reporter, warning)) 356 { 357 return false; 358 } 359 } 360 } 361 break; 362 case Node.CDATA_SECTION_NODE :{} 363 break; 364 case Node.ENTITY_REFERENCE_NODE :{} 365 break; 366 case Node.ATTRIBUTE_NODE :{} 367 break; 368 case Node.COMMENT_NODE :{} 369 break; 370 case Node.ENTITY_NODE :{} 371 break; 372 case Node.NOTATION_NODE :{} 373 break; 374 case Node.PROCESSING_INSTRUCTION_NODE :{} 375 break; 376 case Node.TEXT_NODE :{} 377 break; 378 default :{} 379 } 380 381 Node try2[] = new Node[2]; 382 Node goldChild = gold.getFirstChild(); 383 Node testChild = test.getFirstChild(); 384 385 if (!basicChildCompare(goldChild, testChild, reporter, warning, try2)) 386 return false; 387 388 goldChild = try2[0]; 389 testChild = try2[1]; 390 391 while (null != goldChild) 392 { 393 if (!diff(goldChild, testChild, reporter, warning)) 394 return false; 395 396 goldChild = goldChild.getNextSibling(); 397 testChild = testChild.getNextSibling(); 398 399 if (!basicChildCompare(goldChild, testChild, reporter, warning, 400 try2)) 401 return false; 402 403 goldChild = try2[0]; 404 testChild = try2[1]; 405 } 406 407 return true; 408 } // end of diff() 409 410 /** 411 * Returns Character.isWhitespace 412 * @param s String to check for whitespace 413 * @return true if all whitespace; false otherwise 414 */ isWhiteSpace(String s)415 boolean isWhiteSpace(String s) 416 { 417 418 int n = s.length(); 419 420 for (int i = 0; i < n; i++) 421 { 422 if (!Character.isWhitespace(s.charAt(i))) 423 return false; 424 } 425 426 return true; 427 } // end of isWhiteSpace() 428 429 /** 430 * NEEDSDOC Method tryToAdvancePastWhitespace 431 * 432 * 433 * @param n node to check if it's whitespace 434 * @param reporter PrintWriter to dump status info to 435 * @param warning set to true if we advance past a 436 * whitespace node; note that this logic isn't quite 437 * correct, I think (it should only be set if 438 * we advance past whitespace that isn't equal in 439 * both trees or something like that) 440 * @param next array of nodes to continue thru 441 * @param which index into next array 442 * 443 * @return Node we should be at after advancing 444 */ tryToAdvancePastWhitespace(Node n, PrintWriter reporter, boolean[] warning, Node next[], int which)445 Node tryToAdvancePastWhitespace(Node n, PrintWriter reporter, 446 boolean[] warning, Node next[], int which) 447 { 448 449 if (n.getNodeType() == Node.TEXT_NODE) 450 { 451 String data = n.getNodeValue(); 452 453 if (null != data) 454 { 455 if (isWhiteSpace(data)) 456 { 457 warning[0] = true; 458 459 reporter.print(WHITESPACE_DIFF + " "); // TODO check the format of this; maybe use println -sc 460 461 n = n.getNextSibling(); 462 next[which] = n; 463 } 464 } 465 } 466 467 return n; 468 } // end of tryToAdvancePastWhitespace() 469 470 /** 471 * NEEDSDOC Method basicChildCompare 472 * 473 * 474 * NEEDSDOC @param gold 475 * NEEDSDOC @param test 476 * @param reporter PrintWriter to dump status info to 477 * NEEDSDOC @param warning 478 * NEEDSDOC @param next 479 * 480 * NEEDSDOC (basicChildCompare) @return 481 */ basicChildCompare(Node gold, Node test, PrintWriter reporter, boolean[] warning, Node next[])482 boolean basicChildCompare(Node gold, Node test, PrintWriter reporter, 483 boolean[] warning, Node next[]) 484 { 485 486 next[0] = gold; 487 next[1] = test; 488 489 boolean alreadyTriedToAdvance = false; 490 491 if ((null != gold) && (null == test)) 492 { 493 gold = tryToAdvancePastWhitespace(gold, reporter, warning, next, 494 0); 495 alreadyTriedToAdvance = true; 496 497 if ((null != gold) && (null == test)) 498 { 499 reporter.println(MISSING_TEST_NODE + nodeTypeString(gold) 500 + SEPARATOR + SEPARATOR 501 + "missing node on test"); 502 503 return false; 504 } 505 } 506 else if ((null == gold) && (null != test)) 507 { 508 test = tryToAdvancePastWhitespace(test, reporter, warning, next, 509 1); 510 alreadyTriedToAdvance = true; 511 512 if ((null == gold) && (null != test)) 513 { 514 reporter.println(MISSING_GOLD_NODE + SEPARATOR 515 + nodeTypeString(test) + SEPARATOR 516 + "missing node on gold"); 517 518 return false; 519 } 520 } 521 522 if ((null != gold) && (gold.getNodeType() != test.getNodeType())) 523 { 524 Node savedGold = gold; 525 Node savedTest = test; 526 527 if (!alreadyTriedToAdvance) 528 { 529 gold = tryToAdvancePastWhitespace(gold, reporter, warning, 530 next, 0); 531 532 if (gold == savedGold) 533 { 534 test = tryToAdvancePastWhitespace(test, reporter, 535 warning, next, 1); 536 } 537 } 538 539 if ((null != gold) && (gold.getNodeType() != test.getNodeType())) 540 { 541 gold = savedGold; 542 test = savedTest; 543 544 reporter.println(MISMATCH_NODE + nodeTypeString(gold) 545 + SEPARATOR + nodeTypeString(test) + SEPARATOR 546 + "node type mismatch"); 547 printNodeDiff(gold, test, reporter); 548 549 return false; 550 } 551 } 552 553 return true; 554 } // end of basicChildCompare() 555 556 /** 557 * Cheap-o text printout of a node. By Scott. 558 * 559 * @param n node to print info for 560 * @return String of getNodeType plus getNodeName 561 */ nodeTypeString(Node n)562 public static String nodeTypeString(Node n) 563 { 564 switch (n.getNodeType()) 565 { 566 case Node.DOCUMENT_NODE : 567 return "DOCUMENT(" + n.getNodeName() + ")"; 568 case Node.ELEMENT_NODE : 569 return "ELEMENT(" + n.getNodeName() + ")"; 570 case Node.CDATA_SECTION_NODE : 571 return "CDATA_SECTION(" + n.getNodeName() + ")"; 572 case Node.ENTITY_REFERENCE_NODE : 573 return "ENTITY_REFERENCE(" + n.getNodeName() + ")"; 574 case Node.ATTRIBUTE_NODE : 575 return "ATTRIBUTE(" + n.getNodeName() + ")"; 576 case Node.COMMENT_NODE : 577 return "COMMENT(" + n.getNodeName() + ")"; 578 case Node.ENTITY_NODE : 579 return "ENTITY(" + n.getNodeName() + ")"; 580 case Node.NOTATION_NODE : 581 return "NOTATION(" + n.getNodeName() + ")"; 582 case Node.PROCESSING_INSTRUCTION_NODE : 583 return "PROCESSING_INSTRUCTION(" + n.getNodeName() + ")"; 584 case Node.TEXT_NODE : 585 return "TEXT()"; // #text is all that's ever printed out, so skip it 586 default : 587 return "UNKNOWN(" + n.getNodeName() + ")"; 588 } 589 } // end of nodeTypeString() 590 591 592 /** 593 * Cheap-o text printout of two different nodes. 594 * 595 * @param goldNode or expected node to print info 596 * @param testNode or actual node to print info 597 * @param n node to print info for 598 * @param reporter PrintWriter to dump status info to 599 */ printNodeDiff(Node goldNode, Node testNode, PrintWriter reporter)600 public void printNodeDiff(Node goldNode, Node testNode, PrintWriter reporter) 601 { 602 String goldValue = goldNode.getNodeValue(); 603 String testValue = testNode.getNodeValue(); 604 if (null == goldValue) 605 goldValue = "null"; 606 if (null == testValue) 607 testValue = "null"; 608 609 // Limit length we output to logs; extremely long values 610 // are more hassle than they're worth (at that point, 611 // it's either obvious what the problem is, or it's 612 // such a small problem that you'll need to manually 613 // compare the files separately 614 if (goldValue.length() > maxDisplayLen) 615 goldValue = goldValue.substring(0, maxDisplayLen); 616 if (testValue.length() > maxDisplayLen) 617 testValue = testValue.substring(0, maxDisplayLen); 618 reporter.println(MISMATCH_VALUE_GOLD + nodeTypeString(goldNode) + SEPARATOR + "\n" + goldValue); 619 reporter.println(MISMATCH_VALUE_TEXT + nodeTypeString(testNode) + SEPARATOR + "\n" + testValue); 620 } 621 622 623 /** 624 * Simple worker method to parse filename to a Document. 625 * 626 * <p>Attempts XML parse, if that throws an exception, then 627 * we attempt an HTML parse (when parser available), if 628 * that throws an exception, then we parse as text: 629 * we construct a faux document element to hold it all, 630 * and then parse by readLine() and put each line of 631 * text into a <line> element.</p> 632 * 633 * @param filename to parse as a local path 634 * @param reporter PrintWriter to dump status info to 635 * @param which either TEST or GOLD file being parsed 636 * @param attributes name=value pairs to set on the 637 * DocumentBuilderFactory that we use to parse 638 * 639 * @return Document object with contents of the file; 640 * otherwise throws an unchecked RuntimeException if there 641 * is any fatal problem 642 */ parse(String filename, PrintWriter reporter, String which, Properties attributes)643 Document parse(String filename, PrintWriter reporter, String which, Properties attributes) 644 { 645 // Force filerefs to be URI's if needed: note this is independent of any other files 646 String docURI = QetestUtils.filenameToURL(filename); 647 648 DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); 649 // Always set namespaces on 650 dfactory.setNamespaceAware(true); 651 // Set other attributes here as needed 652 applyAttributes(dfactory, attributes); 653 654 // Local class: cheap non-printing ErrorHandler 655 // This is used to suppress validation warnings which 656 // would otherwise clutter up the console 657 ErrorHandler nullHandler = new ErrorHandler() { 658 public void warning(SAXParseException e) throws SAXException {} 659 public void error(SAXParseException e) throws SAXException {} 660 public void fatalError(SAXParseException e) throws SAXException 661 { 662 throw e; 663 } 664 }; 665 666 String parseType = which + PARSE_TYPE + "[xml];"; 667 Document doc = null; 668 try 669 { 670 // First, attempt to parse as XML (preferred)... 671 DocumentBuilder docBuilder = dfactory.newDocumentBuilder(); 672 docBuilder.setErrorHandler(nullHandler); 673 doc = docBuilder.parse(new InputSource(docURI)); 674 } 675 catch (Throwable se) 676 { 677 // ... if we couldn't parse as XML, attempt parse as HTML... 678 reporter.println(WARNING + se.toString()); 679 parseType = which + PARSE_TYPE + "[html];"; 680 681 try 682 { 683 // Use the copy of Tidy that the XSLTC team has checked in 684 // Submitted by: Gunnar Klauberg <gklauberg@yahoo.de> 685 // Alternate by: Santiago.PericasGeertsen@sun.com 686 Tidy tidy = new Tidy(); 687 tidy.setXHTML(true); 688 tidy.setTidyMark(false); 689 tidy.setShowWarnings(false); 690 tidy.setShowErrors(0); 691 tidy.setQuiet(true); 692 doc = tidy.parseDOM(new URL(docURI).openStream(), null); 693 } 694 catch (Exception e) 695 { 696 // ... if we can't parse as HTML, then just parse the text 697 try 698 { 699 reporter.println(WARNING + e.toString()); 700 parseType = which + PARSE_TYPE + "[text];"; 701 702 // First build a faux document with parent element 703 DocumentBuilder docBuilder = dfactory.newDocumentBuilder(); 704 doc = docBuilder.newDocument(); 705 Element outElem = doc.createElement("out"); 706 707 // Parse as text, line by line 708 // Since we already know it should be text, this should 709 // work better than parsing by bytes. 710 FileReader fr = new FileReader(filename); 711 BufferedReader br = new BufferedReader(fr); 712 for (;;) 713 { 714 String tmp = br.readLine(); 715 716 if (tmp == null) 717 { 718 break; 719 } 720 // An additional thing we could do would 721 // be to put in the line number in the 722 // file in here somehow, so when users 723 // view reports, they get that info 724 Element lineElem = doc.createElement("line"); 725 outElem.appendChild(lineElem); 726 Text textNode = doc.createTextNode(tmp); 727 lineElem.appendChild(textNode); 728 } 729 // Now stick the whole element into the document to return 730 doc.appendChild(outElem); 731 } 732 catch (Throwable throwable) 733 { 734 reporter.println(OTHER_ERROR + filename + SEPARATOR 735 + "threw:" + throwable.toString()); 736 } 737 } 738 } 739 740 // Output a newline here for readability 741 reporter.println(parseType); 742 743 return doc; 744 } // end of parse() 745 746 /** 747 * Pass applicable attributes onto our DocumentBuilderFactory. 748 * 749 * Only passes thru attributes we explicitly know about and 750 * are constants from XHTFileCheckService. 751 * 752 * @param dbf factory to attempt to set* onto 753 * @param attrs various attributes we should try to set 754 */ applyAttributes(DocumentBuilderFactory dfactory, Properties attributes)755 protected void applyAttributes(DocumentBuilderFactory dfactory, Properties attributes) 756 { 757 if ((null == attributes) || (null == dfactory)) 758 return; 759 760 String tmp = attributes.getProperty(XHTFileCheckService.SETVALIDATING); 761 if (null != tmp) 762 { 763 dfactory.setValidating(new Boolean(tmp).booleanValue()); 764 } 765 tmp = attributes.getProperty(XHTFileCheckService.SETIGNORINGELEMENTCONTENTWHITESPACE); 766 if (null != tmp) 767 { 768 dfactory.setIgnoringElementContentWhitespace(new Boolean(tmp).booleanValue()); 769 } 770 tmp = attributes.getProperty(XHTFileCheckService.SETEXPANDENTITYREFERENCES); 771 if (null != tmp) 772 { 773 dfactory.setExpandEntityReferences(new Boolean(tmp).booleanValue()); 774 } 775 tmp = attributes.getProperty(XHTFileCheckService.SETIGNORINGCOMMENTS); 776 if (null != tmp) 777 { 778 dfactory.setIgnoringComments(new Boolean(tmp).booleanValue()); 779 } 780 tmp = attributes.getProperty(XHTFileCheckService.SETCOALESCING); 781 if (null != tmp) 782 { 783 dfactory.setCoalescing(new Boolean(tmp).booleanValue()); 784 } 785 /* Unknown attributes are ignored! */ 786 } 787 788 } 789