1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 /* 19 * $Id: TreeWalker.java 468655 2006-10-28 07:12:06Z minchau $ 20 */ 21 package org.apache.xml.utils; 22 23 import java.io.File; 24 25 import org.w3c.dom.Comment; 26 import org.w3c.dom.Element; 27 import org.w3c.dom.EntityReference; 28 import org.w3c.dom.NamedNodeMap; 29 import org.w3c.dom.Node; 30 import org.w3c.dom.ProcessingInstruction; 31 import org.w3c.dom.Text; 32 33 import org.xml.sax.ContentHandler; 34 import org.xml.sax.Locator; 35 import org.xml.sax.ext.LexicalHandler; 36 import org.xml.sax.helpers.LocatorImpl; 37 38 /** 39 * This class does a pre-order walk of the DOM tree, calling a ContentHandler 40 * interface as it goes. 41 * @xsl.usage advanced 42 */ 43 44 public class TreeWalker 45 { 46 47 /** Local reference to a ContentHandler */ 48 private ContentHandler m_contentHandler = null; 49 50 // ARGHH!! JAXP Uses Xerces without setting the namespace processing to ON! 51 // DOM2Helper m_dh = new DOM2Helper(); 52 53 /** DomHelper for this TreeWalker */ 54 protected DOMHelper m_dh; 55 56 /** Locator object for this TreeWalker */ 57 private LocatorImpl m_locator = new LocatorImpl(); 58 59 /** 60 * Get the ContentHandler used for the tree walk. 61 * 62 * @return the ContentHandler used for the tree walk 63 */ getContentHandler()64 public ContentHandler getContentHandler() 65 { 66 return m_contentHandler; 67 } 68 69 /** 70 * Get the ContentHandler used for the tree walk. 71 * 72 * @return the ContentHandler used for the tree walk 73 */ setContentHandler(ContentHandler ch)74 public void setContentHandler(ContentHandler ch) 75 { 76 m_contentHandler = ch; 77 } 78 79 /** 80 * Constructor. 81 * @param contentHandler The implemention of the 82 * @param systemId System identifier for the document. 83 * contentHandler operation (toXMLString, digest, ...) 84 */ TreeWalker(ContentHandler contentHandler, DOMHelper dh, String systemId)85 public TreeWalker(ContentHandler contentHandler, DOMHelper dh, String systemId) 86 { 87 this.m_contentHandler = contentHandler; 88 m_contentHandler.setDocumentLocator(m_locator); 89 if (systemId != null) 90 m_locator.setSystemId(systemId); 91 else { 92 try { 93 // Bug see Bugzilla 26741 94 m_locator.setSystemId(System.getProperty("user.dir") + File.separator + "dummy.xsl"); 95 } 96 catch (SecurityException se) {// user.dir not accessible from applet 97 } 98 } 99 m_dh = dh; 100 } 101 102 /** 103 * Constructor. 104 * @param contentHandler The implemention of the 105 * contentHandler operation (toXMLString, digest, ...) 106 */ TreeWalker(ContentHandler contentHandler, DOMHelper dh)107 public TreeWalker(ContentHandler contentHandler, DOMHelper dh) 108 { 109 this.m_contentHandler = contentHandler; 110 m_contentHandler.setDocumentLocator(m_locator); 111 try { 112 // Bug see Bugzilla 26741 113 m_locator.setSystemId(System.getProperty("user.dir") + File.separator + "dummy.xsl"); 114 } 115 catch (SecurityException se){// user.dir not accessible from applet 116 } 117 m_dh = dh; 118 } 119 120 /** 121 * Constructor. 122 * @param contentHandler The implemention of the 123 * contentHandler operation (toXMLString, digest, ...) 124 */ TreeWalker(ContentHandler contentHandler)125 public TreeWalker(ContentHandler contentHandler) 126 { 127 this.m_contentHandler = contentHandler; 128 if (m_contentHandler != null) 129 m_contentHandler.setDocumentLocator(m_locator); 130 try { 131 // Bug see Bugzilla 26741 132 m_locator.setSystemId(System.getProperty("user.dir") + File.separator + "dummy.xsl"); 133 } 134 catch (SecurityException se){// user.dir not accessible from applet 135 136 } 137 m_dh = new DOM2Helper(); 138 } 139 140 /** 141 * Perform a pre-order traversal non-recursive style. 142 * 143 * Note that TreeWalker assumes that the subtree is intended to represent 144 * a complete (though not necessarily well-formed) document and, during a 145 * traversal, startDocument and endDocument will always be issued to the 146 * SAX listener. 147 * 148 * @param pos Node in the tree where to start traversal 149 * 150 * @throws TransformerException 151 */ traverse(Node pos)152 public void traverse(Node pos) throws org.xml.sax.SAXException 153 { 154 this.m_contentHandler.startDocument(); 155 156 traverseFragment(pos); 157 158 this.m_contentHandler.endDocument(); 159 } 160 161 /** 162 * Perform a pre-order traversal non-recursive style. 163 * 164 * In contrast to the traverse() method this method will not issue 165 * startDocument() and endDocument() events to the SAX listener. 166 * 167 * @param pos Node in the tree where to start traversal 168 * 169 * @throws TransformerException 170 */ traverseFragment(Node pos)171 public void traverseFragment(Node pos) throws org.xml.sax.SAXException 172 { 173 Node top = pos; 174 175 while (null != pos) 176 { 177 startNode(pos); 178 179 Node nextNode = pos.getFirstChild(); 180 181 while (null == nextNode) 182 { 183 endNode(pos); 184 185 if (top.equals(pos)) 186 break; 187 188 nextNode = pos.getNextSibling(); 189 190 if (null == nextNode) 191 { 192 pos = pos.getParentNode(); 193 194 if ((null == pos) || (top.equals(pos))) 195 { 196 if (null != pos) 197 endNode(pos); 198 199 nextNode = null; 200 201 break; 202 } 203 } 204 } 205 206 pos = nextNode; 207 } 208 } 209 210 /** 211 * Perform a pre-order traversal non-recursive style. 212 213 * Note that TreeWalker assumes that the subtree is intended to represent 214 * a complete (though not necessarily well-formed) document and, during a 215 * traversal, startDocument and endDocument will always be issued to the 216 * SAX listener. 217 * 218 * @param pos Node in the tree where to start traversal 219 * @param top Node in the tree where to end traversal 220 * 221 * @throws TransformerException 222 */ traverse(Node pos, Node top)223 public void traverse(Node pos, Node top) throws org.xml.sax.SAXException 224 { 225 226 this.m_contentHandler.startDocument(); 227 228 while (null != pos) 229 { 230 startNode(pos); 231 232 Node nextNode = pos.getFirstChild(); 233 234 while (null == nextNode) 235 { 236 endNode(pos); 237 238 if ((null != top) && top.equals(pos)) 239 break; 240 241 nextNode = pos.getNextSibling(); 242 243 if (null == nextNode) 244 { 245 pos = pos.getParentNode(); 246 247 if ((null == pos) || ((null != top) && top.equals(pos))) 248 { 249 nextNode = null; 250 251 break; 252 } 253 } 254 } 255 256 pos = nextNode; 257 } 258 this.m_contentHandler.endDocument(); 259 } 260 261 /** Flag indicating whether following text to be processed is raw text */ 262 boolean nextIsRaw = false; 263 264 /** 265 * Optimized dispatch of characters. 266 */ dispatachChars(Node node)267 private final void dispatachChars(Node node) 268 throws org.xml.sax.SAXException 269 { 270 if(m_contentHandler instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler) 271 { 272 ((org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler)m_contentHandler).characters(node); 273 } 274 else 275 { 276 String data = ((Text) node).getData(); 277 this.m_contentHandler.characters(data.toCharArray(), 0, data.length()); 278 } 279 } 280 281 /** 282 * Start processing given node 283 * 284 * 285 * @param node Node to process 286 * 287 * @throws org.xml.sax.SAXException 288 */ startNode(Node node)289 protected void startNode(Node node) throws org.xml.sax.SAXException 290 { 291 292 if (m_contentHandler instanceof NodeConsumer) 293 { 294 ((NodeConsumer) m_contentHandler).setOriginatingNode(node); 295 } 296 297 if (node instanceof Locator) 298 { 299 Locator loc = (Locator)node; 300 m_locator.setColumnNumber(loc.getColumnNumber()); 301 m_locator.setLineNumber(loc.getLineNumber()); 302 m_locator.setPublicId(loc.getPublicId()); 303 m_locator.setSystemId(loc.getSystemId()); 304 } 305 else 306 { 307 m_locator.setColumnNumber(0); 308 m_locator.setLineNumber(0); 309 } 310 311 switch (node.getNodeType()) 312 { 313 case Node.COMMENT_NODE : 314 { 315 String data = ((Comment) node).getData(); 316 317 if (m_contentHandler instanceof LexicalHandler) 318 { 319 LexicalHandler lh = ((LexicalHandler) this.m_contentHandler); 320 321 lh.comment(data.toCharArray(), 0, data.length()); 322 } 323 } 324 break; 325 case Node.DOCUMENT_FRAGMENT_NODE : 326 327 // ??; 328 break; 329 case Node.DOCUMENT_NODE : 330 331 break; 332 case Node.ELEMENT_NODE : 333 NamedNodeMap atts = ((Element) node).getAttributes(); 334 int nAttrs = atts.getLength(); 335 // System.out.println("TreeWalker#startNode: "+node.getNodeName()); 336 337 for (int i = 0; i < nAttrs; i++) 338 { 339 Node attr = atts.item(i); 340 String attrName = attr.getNodeName(); 341 342 // System.out.println("TreeWalker#startNode: attr["+i+"] = "+attrName+", "+attr.getNodeValue()); 343 if (attrName.equals("xmlns") || attrName.startsWith("xmlns:")) 344 { 345 // System.out.println("TreeWalker#startNode: attr["+i+"] = "+attrName+", "+attr.getNodeValue()); 346 int index; 347 // Use "" instead of null, as Xerces likes "" for the 348 // name of the default namespace. Fix attributed 349 // to "Steven Murray" <smurray@ebt.com>. 350 String prefix = (index = attrName.indexOf(":")) < 0 351 ? "" : attrName.substring(index + 1); 352 353 this.m_contentHandler.startPrefixMapping(prefix, 354 attr.getNodeValue()); 355 } 356 357 } 358 359 // System.out.println("m_dh.getNamespaceOfNode(node): "+m_dh.getNamespaceOfNode(node)); 360 // System.out.println("m_dh.getLocalNameOfNode(node): "+m_dh.getLocalNameOfNode(node)); 361 String ns = m_dh.getNamespaceOfNode(node); 362 if(null == ns) 363 ns = ""; 364 this.m_contentHandler.startElement(ns, 365 m_dh.getLocalNameOfNode(node), 366 node.getNodeName(), 367 new AttList(atts, m_dh)); 368 break; 369 case Node.PROCESSING_INSTRUCTION_NODE : 370 { 371 ProcessingInstruction pi = (ProcessingInstruction) node; 372 String name = pi.getNodeName(); 373 374 // String data = pi.getData(); 375 if (name.equals("xslt-next-is-raw")) 376 { 377 nextIsRaw = true; 378 } 379 else 380 { 381 this.m_contentHandler.processingInstruction(pi.getNodeName(), 382 pi.getData()); 383 } 384 } 385 break; 386 case Node.CDATA_SECTION_NODE : 387 { 388 boolean isLexH = (m_contentHandler instanceof LexicalHandler); 389 LexicalHandler lh = isLexH 390 ? ((LexicalHandler) this.m_contentHandler) : null; 391 392 if (isLexH) 393 { 394 lh.startCDATA(); 395 } 396 397 dispatachChars(node); 398 399 { 400 if (isLexH) 401 { 402 lh.endCDATA(); 403 } 404 } 405 } 406 break; 407 case Node.TEXT_NODE : 408 { 409 //String data = ((Text) node).getData(); 410 411 if (nextIsRaw) 412 { 413 nextIsRaw = false; 414 415 m_contentHandler.processingInstruction(javax.xml.transform.Result.PI_DISABLE_OUTPUT_ESCAPING, ""); 416 dispatachChars(node); 417 m_contentHandler.processingInstruction(javax.xml.transform.Result.PI_ENABLE_OUTPUT_ESCAPING, ""); 418 } 419 else 420 { 421 dispatachChars(node); 422 } 423 } 424 break; 425 case Node.ENTITY_REFERENCE_NODE : 426 { 427 EntityReference eref = (EntityReference) node; 428 429 if (m_contentHandler instanceof LexicalHandler) 430 { 431 ((LexicalHandler) this.m_contentHandler).startEntity( 432 eref.getNodeName()); 433 } 434 else 435 { 436 437 // warning("Can not output entity to a pure SAX ContentHandler"); 438 } 439 } 440 break; 441 default : 442 } 443 } 444 445 /** 446 * End processing of given node 447 * 448 * 449 * @param node Node we just finished processing 450 * 451 * @throws org.xml.sax.SAXException 452 */ 453 protected void endNode(Node node) throws org.xml.sax.SAXException 454 { 455 456 switch (node.getNodeType()) 457 { 458 case Node.DOCUMENT_NODE : 459 break; 460 461 case Node.ELEMENT_NODE : 462 String ns = m_dh.getNamespaceOfNode(node); 463 if(null == ns) 464 ns = ""; 465 this.m_contentHandler.endElement(ns, 466 m_dh.getLocalNameOfNode(node), 467 node.getNodeName()); 468 469 NamedNodeMap atts = ((Element) node).getAttributes(); 470 int nAttrs = atts.getLength(); 471 472 for (int i = 0; i < nAttrs; i++) 473 { 474 Node attr = atts.item(i); 475 String attrName = attr.getNodeName(); 476 477 if (attrName.equals("xmlns") || attrName.startsWith("xmlns:")) 478 { 479 int index; 480 // Use "" instead of null, as Xerces likes "" for the 481 // name of the default namespace. Fix attributed 482 // to "Steven Murray" <smurray@ebt.com>. 483 String prefix = (index = attrName.indexOf(":")) < 0 484 ? "" : attrName.substring(index + 1); 485 486 this.m_contentHandler.endPrefixMapping(prefix); 487 } 488 } 489 break; 490 case Node.CDATA_SECTION_NODE : 491 break; 492 case Node.ENTITY_REFERENCE_NODE : 493 { 494 EntityReference eref = (EntityReference) node; 495 496 if (m_contentHandler instanceof LexicalHandler) 497 { 498 LexicalHandler lh = ((LexicalHandler) this.m_contentHandler); 499 500 lh.endEntity(eref.getNodeName()); 501 } 502 } 503 break; 504 default : 505 } 506 } 507 } //TreeWalker 508 509