1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 /* 19 * $Id: DOM2DTM.java 478671 2006-11-23 21:00:31Z minchau $ 20 */ 21 package org.apache.xml.dtm.ref.dom2dtm; 22 23 import java.util.Vector; 24 25 import javax.xml.transform.SourceLocator; 26 import javax.xml.transform.dom.DOMSource; 27 28 import org.apache.xml.dtm.DTM; 29 import org.apache.xml.dtm.DTMManager; 30 import org.apache.xml.dtm.DTMWSFilter; 31 import org.apache.xml.dtm.ref.DTMDefaultBaseIterators; 32 import org.apache.xml.dtm.ref.DTMManagerDefault; 33 import org.apache.xml.dtm.ref.ExpandedNameTable; 34 import org.apache.xml.dtm.ref.IncrementalSAXSource; 35 import org.apache.xml.res.XMLErrorResources; 36 import org.apache.xml.res.XMLMessages; 37 import org.apache.xml.utils.FastStringBuffer; 38 import org.apache.xml.utils.QName; 39 import org.apache.xml.utils.StringBufferPool; 40 import org.apache.xml.utils.TreeWalker; 41 import org.apache.xml.utils.XMLCharacterRecognizer; 42 import org.apache.xml.utils.XMLString; 43 import org.apache.xml.utils.XMLStringFactory; 44 import org.w3c.dom.Attr; 45 import org.w3c.dom.Document; 46 import org.w3c.dom.DocumentType; 47 import org.w3c.dom.Element; 48 import org.w3c.dom.Entity; 49 import org.w3c.dom.NamedNodeMap; 50 import org.w3c.dom.Node; 51 import org.xml.sax.ContentHandler; 52 53 /** The <code>DOM2DTM</code> class serves up a DOM's contents via the 54 * DTM API. 55 * 56 * Note that it doesn't necessarily represent a full Document 57 * tree. You can wrap a DOM2DTM around a specific node and its subtree 58 * and the right things should happen. (I don't _think_ we currently 59 * support DocumentFrgment nodes as roots, though that might be worth 60 * considering.) 61 * 62 * Note too that we do not currently attempt to track document 63 * mutation. If you alter the DOM after wrapping DOM2DTM around it, 64 * all bets are off. 65 * */ 66 public class DOM2DTM extends DTMDefaultBaseIterators 67 { 68 static final boolean JJK_DEBUG=false; 69 static final boolean JJK_NEWCODE=true; 70 71 /** Manefest constant 72 */ 73 static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace"; 74 75 /** The current position in the DOM tree. Last node examined for 76 * possible copying to DTM. */ 77 transient private Node m_pos; 78 /** The current position in the DTM tree. Who children get appended to. */ 79 private int m_last_parent=0; 80 /** The current position in the DTM tree. Who children reference as their 81 * previous sib. */ 82 private int m_last_kid=NULL; 83 84 /** The top of the subtree. 85 * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.' 86 * */ 87 transient private Node m_root; 88 89 /** True iff the first element has been processed. This is used to control 90 synthesis of the implied xml: namespace declaration node. */ 91 boolean m_processedFirstElement=false; 92 93 /** true if ALL the nodes in the m_root subtree have been processed; 94 * false if our incremental build has not yet finished scanning the 95 * DOM tree. */ 96 transient private boolean m_nodesAreProcessed; 97 98 /** The node objects. The instance part of the handle indexes 99 * directly into this vector. Each DTM node may actually be 100 * composed of several DOM nodes (for example, if logically-adjacent 101 * Text/CDATASection nodes in the DOM have been coalesced into a 102 * single DTM Text node); this table points only to the first in 103 * that sequence. */ 104 protected Vector m_nodes = new Vector(); 105 106 /** 107 * Construct a DOM2DTM object from a DOM node. 108 * 109 * @param mgr The DTMManager who owns this DTM. 110 * @param domSource the DOM source that this DTM will wrap. 111 * @param dtmIdentity The DTM identity ID for this DTM. 112 * @param whiteSpaceFilter The white space filter for this DTM, which may 113 * be null. 114 * @param xstringfactory XMLString factory for creating character content. 115 * @param doIndexing true if the caller considers it worth it to use 116 * indexing schemes. 117 */ DOM2DTM(DTMManager mgr, DOMSource domSource, int dtmIdentity, DTMWSFilter whiteSpaceFilter, XMLStringFactory xstringfactory, boolean doIndexing)118 public DOM2DTM(DTMManager mgr, DOMSource domSource, 119 int dtmIdentity, DTMWSFilter whiteSpaceFilter, 120 XMLStringFactory xstringfactory, 121 boolean doIndexing) 122 { 123 super(mgr, domSource, dtmIdentity, whiteSpaceFilter, 124 xstringfactory, doIndexing); 125 126 // Initialize DOM navigation 127 m_pos=m_root = domSource.getNode(); 128 // Initialize DTM navigation 129 m_last_parent=m_last_kid=NULL; 130 m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL); 131 132 // Apparently the domSource root may not actually be the 133 // Document node. If it's an Element node, we need to immediately 134 // add its attributes. Adapted from nextNode(). 135 // %REVIEW% Move this logic into addNode and recurse? Cleaner! 136 // 137 // (If it's an EntityReference node, we're probably in 138 // seriously bad trouble. For now 139 // I'm just hoping nobody is ever quite that foolish... %REVIEW%) 140 // 141 // %ISSUE% What about inherited namespaces in this case? 142 // Do we need to special-case initialize them into the DTM model? 143 if(ELEMENT_NODE == m_root.getNodeType()) 144 { 145 NamedNodeMap attrs=m_root.getAttributes(); 146 int attrsize=(attrs==null) ? 0 : attrs.getLength(); 147 if(attrsize>0) 148 { 149 int attrIndex=NULL; // start with no previous sib 150 for(int i=0;i<attrsize;++i) 151 { 152 // No need to force nodetype in this case; 153 // addNode() will take care of switching it from 154 // Attr to Namespace if necessary. 155 attrIndex=addNode(attrs.item(i),0,attrIndex,NULL); 156 m_firstch.setElementAt(DTM.NULL,attrIndex); 157 } 158 // Terminate list of attrs, and make sure they aren't 159 // considered children of the element 160 m_nextsib.setElementAt(DTM.NULL,attrIndex); 161 162 // IMPORTANT: This does NOT change m_last_parent or m_last_kid! 163 } // if attrs exist 164 } //if(ELEMENT_NODE) 165 166 // Initialize DTM-completed status 167 m_nodesAreProcessed = false; 168 } 169 170 /** 171 * Construct the node map from the node. 172 * 173 * @param node The node that is to be added to the DTM. 174 * @param parentIndex The current parent index. 175 * @param previousSibling The previous sibling index. 176 * @param forceNodeType If not DTM.NULL, overrides the DOM node type. 177 * Used to force nodes to Text rather than CDATASection when their 178 * coalesced value includes ordinary Text nodes (current DTM behavior). 179 * 180 * @return The index identity of the node that was added. 181 */ addNode(Node node, int parentIndex, int previousSibling, int forceNodeType)182 protected int addNode(Node node, int parentIndex, 183 int previousSibling, int forceNodeType) 184 { 185 int nodeIndex = m_nodes.size(); 186 187 // Have we overflowed a DTM Identity's addressing range? 188 if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS)) 189 { 190 try 191 { 192 if(m_mgr==null) 193 throw new ClassCastException(); 194 195 // Handle as Extended Addressing 196 DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr; 197 int id=mgrD.getFirstFreeDTMID(); 198 mgrD.addDTM(this,id,nodeIndex); 199 m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS); 200 } 201 catch(ClassCastException e) 202 { 203 // %REVIEW% Wrong error message, but I've been told we're trying 204 // not to add messages right not for I18N reasons. 205 // %REVIEW% Should this be a Fatal Error? 206 error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available"; 207 } 208 } 209 210 m_size++; 211 // ensureSize(nodeIndex); 212 213 int type; 214 if(NULL==forceNodeType) 215 type = node.getNodeType(); 216 else 217 type=forceNodeType; 218 219 // %REVIEW% The Namespace Spec currently says that Namespaces are 220 // processed in a non-namespace-aware manner, by matching the 221 // QName, even though there is in fact a namespace assigned to 222 // these nodes in the DOM. If and when that changes, we will have 223 // to consider whether we check the namespace-for-namespaces 224 // rather than the node name. 225 // 226 // %TBD% Note that the DOM does not necessarily explicitly declare 227 // all the namespaces it uses. DOM Level 3 will introduce a 228 // namespace-normalization operation which reconciles that, and we 229 // can request that users invoke it or otherwise ensure that the 230 // tree is namespace-well-formed before passing the DOM to Xalan. 231 // But if they don't, what should we do about it? We probably 232 // don't want to alter the source DOM (and may not be able to do 233 // so if it's read-only). The best available answer might be to 234 // synthesize additional DTM Namespace Nodes that don't correspond 235 // to DOM Attr Nodes. 236 if (Node.ATTRIBUTE_NODE == type) 237 { 238 String name = node.getNodeName(); 239 240 if (name.startsWith("xmlns:") || name.equals("xmlns")) 241 { 242 type = DTM.NAMESPACE_NODE; 243 } 244 } 245 246 m_nodes.addElement(node); 247 248 m_firstch.setElementAt(NOTPROCESSED,nodeIndex); 249 m_nextsib.setElementAt(NOTPROCESSED,nodeIndex); 250 m_prevsib.setElementAt(previousSibling,nodeIndex); 251 m_parent.setElementAt(parentIndex,nodeIndex); 252 253 if(DTM.NULL != parentIndex && 254 type != DTM.ATTRIBUTE_NODE && 255 type != DTM.NAMESPACE_NODE) 256 { 257 // If the DTM parent had no children, this becomes its first child. 258 if(NOTPROCESSED == m_firstch.elementAt(parentIndex)) 259 m_firstch.setElementAt(nodeIndex,parentIndex); 260 } 261 262 String nsURI = node.getNamespaceURI(); 263 264 // Deal with the difference between Namespace spec and XSLT 265 // definitions of local name. (The former says PIs don't have 266 // localnames; the latter says they do.) 267 String localName = (type == Node.PROCESSING_INSTRUCTION_NODE) ? 268 node.getNodeName() : 269 node.getLocalName(); 270 271 // Hack to make DOM1 sort of work... 272 if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE)) 273 && null == localName) 274 localName = node.getNodeName(); // -sb 275 276 ExpandedNameTable exnt = m_expandedNameTable; 277 278 // %TBD% Nodes created with the old non-namespace-aware DOM 279 // calls createElement() and createAttribute() will never have a 280 // localname. That will cause their expandedNameID to be just the 281 // nodeType... which will keep them from being matched 282 // successfully by name. Since the DOM makes no promise that 283 // those will participate in namespace processing, this is 284 // officially accepted as Not Our Fault. But it might be nice to 285 // issue a diagnostic message! 286 if(node.getLocalName()==null && 287 (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE)) 288 { 289 // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM."); 290 } 291 292 int expandedNameID = (null != localName) 293 ? exnt.getExpandedTypeID(nsURI, localName, type) : 294 exnt.getExpandedTypeID(type); 295 296 m_exptype.setElementAt(expandedNameID,nodeIndex); 297 298 indexNode(expandedNameID, nodeIndex); 299 300 if (DTM.NULL != previousSibling) 301 m_nextsib.setElementAt(nodeIndex,previousSibling); 302 303 // This should be done after m_exptype has been set, and probably should 304 // always be the last thing we do 305 if (type == DTM.NAMESPACE_NODE) 306 declareNamespaceInContext(parentIndex,nodeIndex); 307 308 return nodeIndex; 309 } 310 311 /** 312 * Get the number of nodes that have been added. 313 */ getNumberOfNodes()314 public int getNumberOfNodes() 315 { 316 return m_nodes.size(); 317 } 318 319 /** 320 * This method iterates to the next node that will be added to the table. 321 * Each call to this method adds a new node to the table, unless the end 322 * is reached, in which case it returns null. 323 * 324 * @return The true if a next node is found or false if 325 * there are no more nodes. 326 */ nextNode()327 protected boolean nextNode() 328 { 329 // Non-recursive one-fetch-at-a-time depth-first traversal with 330 // attribute/namespace nodes and white-space stripping. 331 // Navigating the DOM is simple, navigating the DTM is simple; 332 // keeping track of both at once is a trifle baroque but at least 333 // we've avoided most of the special cases. 334 if (m_nodesAreProcessed) 335 return false; 336 337 // %REVIEW% Is this local copy Really Useful from a performance 338 // point of view? Or is this a false microoptimization? 339 Node pos=m_pos; 340 Node next=null; 341 int nexttype=NULL; 342 343 // Navigate DOM tree 344 do 345 { 346 // Look down to first child. 347 if (pos.hasChildNodes()) 348 { 349 next = pos.getFirstChild(); 350 351 // %REVIEW% There's probably a more elegant way to skip 352 // the doctype. (Just let it go and Suppress it? 353 if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType()) 354 next=next.getNextSibling(); 355 356 // Push DTM context -- except for children of Entity References, 357 // which have no DTM equivalent and cause no DTM navigation. 358 if(ENTITY_REFERENCE_NODE!=pos.getNodeType()) 359 { 360 m_last_parent=m_last_kid; 361 m_last_kid=NULL; 362 // Whitespace-handler context stacking 363 if(null != m_wsfilter) 364 { 365 short wsv = 366 m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this); 367 boolean shouldStrip = (DTMWSFilter.INHERIT == wsv) 368 ? getShouldStripWhitespace() 369 : (DTMWSFilter.STRIP == wsv); 370 pushShouldStripWhitespace(shouldStrip); 371 } // if(m_wsfilter) 372 } 373 } 374 375 // If that fails, look up and right (but not past root!) 376 else 377 { 378 if(m_last_kid!=NULL) 379 { 380 // Last node posted at this level had no more children 381 // If it has _no_ children, we need to record that. 382 if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED) 383 m_firstch.setElementAt(NULL,m_last_kid); 384 } 385 386 while(m_last_parent != NULL) 387 { 388 // %REVIEW% There's probably a more elegant way to 389 // skip the doctype. (Just let it go and Suppress it? 390 next = pos.getNextSibling(); 391 if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType()) 392 next=next.getNextSibling(); 393 394 if(next!=null) 395 break; // Found it! 396 397 // No next-sibling found. Pop the DOM. 398 pos=pos.getParentNode(); 399 if(pos==null) 400 { 401 // %TBD% Should never arise, but I want to be sure of that... 402 if(JJK_DEBUG) 403 { 404 System.out.println("***** DOM2DTM Pop Control Flow problem"); 405 for(;;); // Freeze right here! 406 } 407 } 408 409 // The only parents in the DTM are Elements. However, 410 // the DOM could contain EntityReferences. If we 411 // encounter one, pop it _without_ popping DTM. 412 if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType()) 413 { 414 // Nothing needs doing 415 if(JJK_DEBUG) 416 System.out.println("***** DOM2DTM popping EntRef"); 417 } 418 else 419 { 420 popShouldStripWhitespace(); 421 // Fix and pop DTM 422 if(m_last_kid==NULL) 423 m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element 424 else 425 m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else 426 m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent); 427 } 428 } 429 if(m_last_parent==NULL) 430 next=null; 431 } 432 433 if(next!=null) 434 nexttype=next.getNodeType(); 435 436 // If it's an entity ref, advance past it. 437 // 438 // %REVIEW% Should we let this out the door and just suppress it? 439 // More work, but simpler code, more likely to be correct, and 440 // it doesn't happen very often. We'd get rid of the loop too. 441 if (ENTITY_REFERENCE_NODE == nexttype) 442 pos=next; 443 } 444 while (ENTITY_REFERENCE_NODE == nexttype); 445 446 // Did we run out of the tree? 447 if(next==null) 448 { 449 m_nextsib.setElementAt(NULL,0); 450 m_nodesAreProcessed = true; 451 m_pos=null; 452 453 if(JJK_DEBUG) 454 { 455 System.out.println("***** DOM2DTM Crosscheck:"); 456 for(int i=0;i<m_nodes.size();++i) 457 System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i)); 458 } 459 460 return false; 461 } 462 463 // Text needs some special handling: 464 // 465 // DTM may skip whitespace. This is handled by the suppressNode flag, which 466 // when true will keep the DTM node from being created. 467 // 468 // DTM only directly records the first DOM node of any logically-contiguous 469 // sequence. The lastTextNode value will be set to the last node in the 470 // contiguous sequence, and -- AFTER the DTM addNode -- can be used to 471 // advance next over this whole block. Should be simpler than special-casing 472 // the above loop for "Was the logically-preceeding sibling a text node". 473 // 474 // Finally, a DTM node should be considered a CDATASection only if all the 475 // contiguous text it covers is CDATASections. The first Text should 476 // force DTM to Text. 477 478 boolean suppressNode=false; 479 Node lastTextNode=null; 480 481 nexttype=next.getNodeType(); 482 483 // nexttype=pos.getNodeType(); 484 if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype) 485 { 486 // If filtering, initially assume we're going to suppress the node 487 suppressNode=((null != m_wsfilter) && getShouldStripWhitespace()); 488 489 // Scan logically contiguous text (siblings, plus "flattening" 490 // of entity reference boundaries). 491 Node n=next; 492 while(n!=null) 493 { 494 lastTextNode=n; 495 // Any Text node means DTM considers it all Text 496 if(TEXT_NODE == n.getNodeType()) 497 nexttype=TEXT_NODE; 498 // Any non-whitespace in this sequence blocks whitespace 499 // suppression 500 suppressNode &= 501 XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue()); 502 503 n=logicalNextDOMTextNode(n); 504 } 505 } 506 507 // Special handling for PIs: Some DOMs represent the XML 508 // Declaration as a PI. This is officially incorrect, per the DOM 509 // spec, but is considered a "wrong but tolerable" temporary 510 // workaround pending proper handling of these fields in DOM Level 511 // 3. We want to recognize and reject that case. 512 else if(PROCESSING_INSTRUCTION_NODE==nexttype) 513 { 514 suppressNode = (pos.getNodeName().toLowerCase().equals("xml")); 515 } 516 517 518 if(!suppressNode) 519 { 520 // Inserting next. NOTE that we force the node type; for 521 // coalesced Text, this records CDATASections adjacent to 522 // ordinary Text as Text. 523 int nextindex=addNode(next,m_last_parent,m_last_kid, 524 nexttype); 525 526 m_last_kid=nextindex; 527 528 if(ELEMENT_NODE == nexttype) 529 { 530 int attrIndex=NULL; // start with no previous sib 531 // Process attributes _now_, rather than waiting. 532 // Simpler control flow, makes NS cache available immediately. 533 NamedNodeMap attrs=next.getAttributes(); 534 int attrsize=(attrs==null) ? 0 : attrs.getLength(); 535 if(attrsize>0) 536 { 537 for(int i=0;i<attrsize;++i) 538 { 539 // No need to force nodetype in this case; 540 // addNode() will take care of switching it from 541 // Attr to Namespace if necessary. 542 attrIndex=addNode(attrs.item(i), 543 nextindex,attrIndex,NULL); 544 m_firstch.setElementAt(DTM.NULL,attrIndex); 545 546 // If the xml: prefix is explicitly declared 547 // we don't need to synthesize one. 548 // 549 // NOTE that XML Namespaces were not originally 550 // defined as being namespace-aware (grrr), and 551 // while the W3C is planning to fix this it's 552 // safer for now to test the QName and trust the 553 // parsers to prevent anyone from redefining the 554 // reserved xmlns: prefix 555 if(!m_processedFirstElement 556 && "xmlns:xml".equals(attrs.item(i).getNodeName())) 557 m_processedFirstElement=true; 558 } 559 // Terminate list of attrs, and make sure they aren't 560 // considered children of the element 561 } // if attrs exist 562 if(!m_processedFirstElement) 563 { 564 // The DOM might not have an explicit declaration for the 565 // implicit "xml:" prefix, but the XPath data model 566 // requires that this appear as a Namespace Node so we 567 // have to synthesize one. You can think of this as 568 // being a default attribute defined by the XML 569 // Namespaces spec rather than by the DTD. 570 attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode( 571 (Element)next,"xml",NAMESPACE_DECL_NS, 572 makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1) 573 ), 574 nextindex,attrIndex,NULL); 575 m_firstch.setElementAt(DTM.NULL,attrIndex); 576 m_processedFirstElement=true; 577 } 578 if(attrIndex!=NULL) 579 m_nextsib.setElementAt(DTM.NULL,attrIndex); 580 } //if(ELEMENT_NODE) 581 } // (if !suppressNode) 582 583 // Text postprocessing: Act on values stored above 584 if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype) 585 { 586 // %TBD% If nexttype was forced to TEXT, patch the DTM node 587 588 next=lastTextNode; // Advance the DOM cursor over contiguous text 589 } 590 591 // Remember where we left off. 592 m_pos=next; 593 return true; 594 } 595 596 597 /** 598 * Return an DOM node for the given node. 599 * 600 * @param nodeHandle The node ID. 601 * 602 * @return A node representation of the DTM node. 603 */ getNode(int nodeHandle)604 public Node getNode(int nodeHandle) 605 { 606 607 int identity = makeNodeIdentity(nodeHandle); 608 609 return (Node) m_nodes.elementAt(identity); 610 } 611 612 /** 613 * Get a Node from an identity index. 614 * 615 * NEEDSDOC @param nodeIdentity 616 * 617 * NEEDSDOC ($objectName$) @return 618 */ lookupNode(int nodeIdentity)619 protected Node lookupNode(int nodeIdentity) 620 { 621 return (Node) m_nodes.elementAt(nodeIdentity); 622 } 623 624 /** 625 * Get the next node identity value in the list, and call the iterator 626 * if it hasn't been added yet. 627 * 628 * @param identity The node identity (index). 629 * @return identity+1, or DTM.NULL. 630 */ getNextNodeIdentity(int identity)631 protected int getNextNodeIdentity(int identity) 632 { 633 634 identity += 1; 635 636 if (identity >= m_nodes.size()) 637 { 638 if (!nextNode()) 639 identity = DTM.NULL; 640 } 641 642 return identity; 643 } 644 645 /** 646 * Get the handle from a Node. 647 * <p>%OPT% This will be pretty slow.</p> 648 * 649 * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path; 650 * walk down DTM reconstructing path) might be considerably faster 651 * on later nodes in large documents. That might also imply improving 652 * this call to handle nodes which would be in this DTM but 653 * have not yet been built, which might or might not be a Good Thing.</p> 654 * 655 * %REVIEW% This relies on being able to test node-identity via 656 * object-identity. DTM2DOM proxying is a great example of a case where 657 * that doesn't work. DOM Level 3 will provide the isSameNode() method 658 * to fix that, but until then this is going to be flaky. 659 * 660 * @param node A node, which may be null. 661 * 662 * @return The node handle or <code>DTM.NULL</code>. 663 */ getHandleFromNode(Node node)664 private int getHandleFromNode(Node node) 665 { 666 if (null != node) 667 { 668 int len = m_nodes.size(); 669 boolean isMore; 670 int i = 0; 671 do 672 { 673 for (; i < len; i++) 674 { 675 if (m_nodes.elementAt(i) == node) 676 return makeNodeHandle(i); 677 } 678 679 isMore = nextNode(); 680 681 len = m_nodes.size(); 682 683 } 684 while(isMore || i < len); 685 } 686 687 return DTM.NULL; 688 } 689 690 /** Get the handle from a Node. This is a more robust version of 691 * getHandleFromNode, intended to be usable by the public. 692 * 693 * <p>%OPT% This will be pretty slow.</p> 694 * 695 * %REVIEW% This relies on being able to test node-identity via 696 * object-identity. DTM2DOM proxying is a great example of a case where 697 * that doesn't work. DOM Level 3 will provide the isSameNode() method 698 * to fix that, but until then this is going to be flaky. 699 * 700 * @param node A node, which may be null. 701 * 702 * @return The node handle or <code>DTM.NULL</code>. */ getHandleOfNode(Node node)703 public int getHandleOfNode(Node node) 704 { 705 if (null != node) 706 { 707 // Is Node actually within the same document? If not, don't search! 708 // This would be easier if m_root was always the Document node, but 709 // we decided to allow wrapping a DTM around a subtree. 710 if((m_root==node) || 711 (m_root.getNodeType()==DOCUMENT_NODE && 712 m_root==node.getOwnerDocument()) || 713 (m_root.getNodeType()!=DOCUMENT_NODE && 714 m_root.getOwnerDocument()==node.getOwnerDocument()) 715 ) 716 { 717 // If node _is_ in m_root's tree, find its handle 718 // 719 // %OPT% This check may be improved significantly when DOM 720 // Level 3 nodeKey and relative-order tests become 721 // available! 722 for(Node cursor=node; 723 cursor!=null; 724 cursor= 725 (cursor.getNodeType()!=ATTRIBUTE_NODE) 726 ? cursor.getParentNode() 727 : ((org.w3c.dom.Attr)cursor).getOwnerElement()) 728 { 729 if(cursor==m_root) 730 // We know this node; find its handle. 731 return getHandleFromNode(node); 732 } // for ancestors of node 733 } // if node and m_root in same Document 734 } // if node!=null 735 736 return DTM.NULL; 737 } 738 739 /** 740 * Retrieves an attribute node by by qualified name and namespace URI. 741 * 742 * @param nodeHandle int Handle of the node upon which to look up this attribute.. 743 * @param namespaceURI The namespace URI of the attribute to 744 * retrieve, or null. 745 * @param name The local name of the attribute to 746 * retrieve. 747 * @return The attribute node handle with the specified name ( 748 * <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such 749 * attribute. 750 */ getAttributeNode(int nodeHandle, String namespaceURI, String name)751 public int getAttributeNode(int nodeHandle, String namespaceURI, 752 String name) 753 { 754 755 // %OPT% This is probably slower than it needs to be. 756 if (null == namespaceURI) 757 namespaceURI = ""; 758 759 int type = getNodeType(nodeHandle); 760 761 if (DTM.ELEMENT_NODE == type) 762 { 763 764 // Assume that attributes immediately follow the element. 765 int identity = makeNodeIdentity(nodeHandle); 766 767 while (DTM.NULL != (identity = getNextNodeIdentity(identity))) 768 { 769 // Assume this can not be null. 770 type = _type(identity); 771 772 // %REVIEW% 773 // Should namespace nodes be retrievable DOM-style as attrs? 774 // If not we need a separate function... which may be desirable 775 // architecturally, but which is ugly from a code point of view. 776 // (If we REALLY insist on it, this code should become a subroutine 777 // of both -- retrieve the node, then test if the type matches 778 // what you're looking for.) 779 if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE) 780 { 781 Node node = lookupNode(identity); 782 String nodeuri = node.getNamespaceURI(); 783 784 if (null == nodeuri) 785 nodeuri = ""; 786 787 String nodelocalname = node.getLocalName(); 788 789 if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname)) 790 return makeNodeHandle(identity); 791 } 792 793 else // if (DTM.NAMESPACE_NODE != type) 794 { 795 break; 796 } 797 } 798 } 799 800 return DTM.NULL; 801 } 802 803 /** 804 * Get the string-value of a node as a String object 805 * (see http://www.w3.org/TR/xpath#data-model 806 * for the definition of a node's string-value). 807 * 808 * @param nodeHandle The node ID. 809 * 810 * @return A string object that represents the string-value of the given node. 811 */ getStringValue(int nodeHandle)812 public XMLString getStringValue(int nodeHandle) 813 { 814 815 int type = getNodeType(nodeHandle); 816 Node node = getNode(nodeHandle); 817 // %TBD% If an element only has one text node, we should just use it 818 // directly. 819 if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type 820 || DTM.DOCUMENT_FRAGMENT_NODE == type) 821 { 822 FastStringBuffer buf = StringBufferPool.get(); 823 String s; 824 825 try 826 { 827 getNodeData(node, buf); 828 829 s = (buf.length() > 0) ? buf.toString() : ""; 830 } 831 finally 832 { 833 StringBufferPool.free(buf); 834 } 835 836 return m_xstrf.newstr( s ); 837 } 838 else if(TEXT_NODE == type || CDATA_SECTION_NODE == type) 839 { 840 // If this is a DTM text node, it may be made of multiple DOM text 841 // nodes -- including navigating into Entity References. DOM2DTM 842 // records the first node in the sequence and requires that we 843 // pick up the others when we retrieve the DTM node's value. 844 // 845 // %REVIEW% DOM Level 3 is expected to add a "whole text" 846 // retrieval method which performs this function for us. 847 FastStringBuffer buf = StringBufferPool.get(); 848 while(node!=null) 849 { 850 buf.append(node.getNodeValue()); 851 node=logicalNextDOMTextNode(node); 852 } 853 String s=(buf.length() > 0) ? buf.toString() : ""; 854 StringBufferPool.free(buf); 855 return m_xstrf.newstr( s ); 856 } 857 else 858 return m_xstrf.newstr( node.getNodeValue() ); 859 } 860 861 /** 862 * Determine if the string-value of a node is whitespace 863 * 864 * @param nodeHandle The node Handle. 865 * 866 * @return Return true if the given node is whitespace. 867 */ isWhitespace(int nodeHandle)868 public boolean isWhitespace(int nodeHandle) 869 { 870 int type = getNodeType(nodeHandle); 871 Node node = getNode(nodeHandle); 872 if(TEXT_NODE == type || CDATA_SECTION_NODE == type) 873 { 874 // If this is a DTM text node, it may be made of multiple DOM text 875 // nodes -- including navigating into Entity References. DOM2DTM 876 // records the first node in the sequence and requires that we 877 // pick up the others when we retrieve the DTM node's value. 878 // 879 // %REVIEW% DOM Level 3 is expected to add a "whole text" 880 // retrieval method which performs this function for us. 881 FastStringBuffer buf = StringBufferPool.get(); 882 while(node!=null) 883 { 884 buf.append(node.getNodeValue()); 885 node=logicalNextDOMTextNode(node); 886 } 887 boolean b = buf.isWhitespace(0, buf.length()); 888 StringBufferPool.free(buf); 889 return b; 890 } 891 return false; 892 } 893 894 /** 895 * Retrieve the text content of a DOM subtree, appending it into a 896 * user-supplied FastStringBuffer object. Note that attributes are 897 * not considered part of the content of an element. 898 * <p> 899 * There are open questions regarding whitespace stripping. 900 * Currently we make no special effort in that regard, since the standard 901 * DOM doesn't yet provide DTD-based information to distinguish 902 * whitespace-in-element-context from genuine #PCDATA. Note that we 903 * should probably also consider xml:space if/when we address this. 904 * DOM Level 3 may solve the problem for us. 905 * <p> 906 * %REVIEW% Actually, since this method operates on the DOM side of the 907 * fence rather than the DTM side, it SHOULDN'T do 908 * any special handling. The DOM does what the DOM does; if you want 909 * DTM-level abstractions, use DTM-level methods. 910 * 911 * @param node Node whose subtree is to be walked, gathering the 912 * contents of all Text or CDATASection nodes. 913 * @param buf FastStringBuffer into which the contents of the text 914 * nodes are to be concatenated. 915 */ getNodeData(Node node, FastStringBuffer buf)916 protected static void getNodeData(Node node, FastStringBuffer buf) 917 { 918 919 switch (node.getNodeType()) 920 { 921 case Node.DOCUMENT_FRAGMENT_NODE : 922 case Node.DOCUMENT_NODE : 923 case Node.ELEMENT_NODE : 924 { 925 for (Node child = node.getFirstChild(); null != child; 926 child = child.getNextSibling()) 927 { 928 getNodeData(child, buf); 929 } 930 } 931 break; 932 case Node.TEXT_NODE : 933 case Node.CDATA_SECTION_NODE : 934 case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node 935 buf.append(node.getNodeValue()); 936 break; 937 case Node.PROCESSING_INSTRUCTION_NODE : 938 // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); 939 break; 940 default : 941 // ignore 942 break; 943 } 944 } 945 946 /** 947 * Given a node handle, return its DOM-style node name. This will 948 * include names such as #text or #document. 949 * 950 * @param nodeHandle the id of the node. 951 * @return String Name of this node, which may be an empty string. 952 * %REVIEW% Document when empty string is possible... 953 * %REVIEW-COMMENT% It should never be empty, should it? 954 */ getNodeName(int nodeHandle)955 public String getNodeName(int nodeHandle) 956 { 957 958 Node node = getNode(nodeHandle); 959 960 // Assume non-null. 961 return node.getNodeName(); 962 } 963 964 /** 965 * Given a node handle, return the XPath node name. This should be 966 * the name as described by the XPath data model, NOT the DOM-style 967 * name. 968 * 969 * @param nodeHandle the id of the node. 970 * @return String Name of this node, which may be an empty string. 971 */ getNodeNameX(int nodeHandle)972 public String getNodeNameX(int nodeHandle) 973 { 974 975 String name; 976 short type = getNodeType(nodeHandle); 977 978 switch (type) 979 { 980 case DTM.NAMESPACE_NODE : 981 { 982 Node node = getNode(nodeHandle); 983 984 // assume not null. 985 name = node.getNodeName(); 986 if(name.startsWith("xmlns:")) 987 { 988 name = QName.getLocalPart(name); 989 } 990 else if(name.equals("xmlns")) 991 { 992 name = ""; 993 } 994 } 995 break; 996 case DTM.ATTRIBUTE_NODE : 997 case DTM.ELEMENT_NODE : 998 case DTM.ENTITY_REFERENCE_NODE : 999 case DTM.PROCESSING_INSTRUCTION_NODE : 1000 { 1001 Node node = getNode(nodeHandle); 1002 1003 // assume not null. 1004 name = node.getNodeName(); 1005 } 1006 break; 1007 default : 1008 name = ""; 1009 } 1010 1011 return name; 1012 } 1013 1014 /** 1015 * Given a node handle, return its XPath-style localname. 1016 * (As defined in Namespaces, this is the portion of the name after any 1017 * colon character). 1018 * 1019 * @param nodeHandle the id of the node. 1020 * @return String Local name of this node. 1021 */ getLocalName(int nodeHandle)1022 public String getLocalName(int nodeHandle) 1023 { 1024 if(JJK_NEWCODE) 1025 { 1026 int id=makeNodeIdentity(nodeHandle); 1027 if(NULL==id) return null; 1028 Node newnode=(Node)m_nodes.elementAt(id); 1029 String newname=newnode.getLocalName(); 1030 if (null == newname) 1031 { 1032 // XSLT treats PIs, and possibly other things, as having QNames. 1033 String qname = newnode.getNodeName(); 1034 if('#'==qname.charAt(0)) 1035 { 1036 // Match old default for this function 1037 // This conversion may or may not be necessary 1038 newname=""; 1039 } 1040 else 1041 { 1042 int index = qname.indexOf(':'); 1043 newname = (index < 0) ? qname : qname.substring(index + 1); 1044 } 1045 } 1046 return newname; 1047 } 1048 else 1049 { 1050 String name; 1051 short type = getNodeType(nodeHandle); 1052 switch (type) 1053 { 1054 case DTM.ATTRIBUTE_NODE : 1055 case DTM.ELEMENT_NODE : 1056 case DTM.ENTITY_REFERENCE_NODE : 1057 case DTM.NAMESPACE_NODE : 1058 case DTM.PROCESSING_INSTRUCTION_NODE : 1059 { 1060 Node node = getNode(nodeHandle); 1061 1062 // assume not null. 1063 name = node.getLocalName(); 1064 1065 if (null == name) 1066 { 1067 String qname = node.getNodeName(); 1068 int index = qname.indexOf(':'); 1069 1070 name = (index < 0) ? qname : qname.substring(index + 1); 1071 } 1072 } 1073 break; 1074 default : 1075 name = ""; 1076 } 1077 return name; 1078 } 1079 } 1080 1081 /** 1082 * Given a namespace handle, return the prefix that the namespace decl is 1083 * mapping. 1084 * Given a node handle, return the prefix used to map to the namespace. 1085 * 1086 * <p> %REVIEW% Are you sure you want "" for no prefix? </p> 1087 * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb </p> 1088 * 1089 * @param nodeHandle the id of the node. 1090 * @return String prefix of this node's name, or "" if no explicit 1091 * namespace prefix was given. 1092 */ getPrefix(int nodeHandle)1093 public String getPrefix(int nodeHandle) 1094 { 1095 1096 String prefix; 1097 short type = getNodeType(nodeHandle); 1098 1099 switch (type) 1100 { 1101 case DTM.NAMESPACE_NODE : 1102 { 1103 Node node = getNode(nodeHandle); 1104 1105 // assume not null. 1106 String qname = node.getNodeName(); 1107 int index = qname.indexOf(':'); 1108 1109 prefix = (index < 0) ? "" : qname.substring(index + 1); 1110 } 1111 break; 1112 case DTM.ATTRIBUTE_NODE : 1113 case DTM.ELEMENT_NODE : 1114 { 1115 Node node = getNode(nodeHandle); 1116 1117 // assume not null. 1118 String qname = node.getNodeName(); 1119 int index = qname.indexOf(':'); 1120 1121 prefix = (index < 0) ? "" : qname.substring(0, index); 1122 } 1123 break; 1124 default : 1125 prefix = ""; 1126 } 1127 1128 return prefix; 1129 } 1130 1131 /** 1132 * Given a node handle, return its DOM-style namespace URI 1133 * (As defined in Namespaces, this is the declared URI which this node's 1134 * prefix -- or default in lieu thereof -- was mapped to.) 1135 * 1136 * <p>%REVIEW% Null or ""? -sb</p> 1137 * 1138 * @param nodeHandle the id of the node. 1139 * @return String URI value of this node's namespace, or null if no 1140 * namespace was resolved. 1141 */ getNamespaceURI(int nodeHandle)1142 public String getNamespaceURI(int nodeHandle) 1143 { 1144 if(JJK_NEWCODE) 1145 { 1146 int id=makeNodeIdentity(nodeHandle); 1147 if(id==NULL) return null; 1148 Node node=(Node)m_nodes.elementAt(id); 1149 return node.getNamespaceURI(); 1150 } 1151 else 1152 { 1153 String nsuri; 1154 short type = getNodeType(nodeHandle); 1155 1156 switch (type) 1157 { 1158 case DTM.ATTRIBUTE_NODE : 1159 case DTM.ELEMENT_NODE : 1160 case DTM.ENTITY_REFERENCE_NODE : 1161 case DTM.NAMESPACE_NODE : 1162 case DTM.PROCESSING_INSTRUCTION_NODE : 1163 { 1164 Node node = getNode(nodeHandle); 1165 1166 // assume not null. 1167 nsuri = node.getNamespaceURI(); 1168 1169 // %TBD% Handle DOM1? 1170 } 1171 break; 1172 default : 1173 nsuri = null; 1174 } 1175 1176 return nsuri; 1177 } 1178 1179 } 1180 1181 /** Utility function: Given a DOM Text node, determine whether it is 1182 * logically followed by another Text or CDATASection node. This may 1183 * involve traversing into Entity References. 1184 * 1185 * %REVIEW% DOM Level 3 is expected to add functionality which may 1186 * allow us to retire this. 1187 */ logicalNextDOMTextNode(Node n)1188 private Node logicalNextDOMTextNode(Node n) 1189 { 1190 Node p=n.getNextSibling(); 1191 if(p==null) 1192 { 1193 // Walk out of any EntityReferenceNodes that ended with text 1194 for(n=n.getParentNode(); 1195 n!=null && ENTITY_REFERENCE_NODE == n.getNodeType(); 1196 n=n.getParentNode()) 1197 { 1198 p=n.getNextSibling(); 1199 if(p!=null) 1200 break; 1201 } 1202 } 1203 n=p; 1204 while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType()) 1205 { 1206 // Walk into any EntityReferenceNodes that start with text 1207 if(n.hasChildNodes()) 1208 n=n.getFirstChild(); 1209 else 1210 n=n.getNextSibling(); 1211 } 1212 if(n!=null) 1213 { 1214 // Found a logical next sibling. Is it text? 1215 int ntype=n.getNodeType(); 1216 if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype) 1217 n=null; 1218 } 1219 return n; 1220 } 1221 1222 /** 1223 * Given a node handle, return its node value. This is mostly 1224 * as defined by the DOM, but may ignore some conveniences. 1225 * <p> 1226 * 1227 * @param nodeHandle The node id. 1228 * @return String Value of this node, or null if not 1229 * meaningful for this node type. 1230 */ getNodeValue(int nodeHandle)1231 public String getNodeValue(int nodeHandle) 1232 { 1233 // The _type(nodeHandle) call was taking the lion's share of our 1234 // time, and was wrong anyway since it wasn't coverting handle to 1235 // identity. Inlined it. 1236 int type = _exptype(makeNodeIdentity(nodeHandle)); 1237 type=(NULL != type) ? getNodeType(nodeHandle) : NULL; 1238 1239 if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type) 1240 return getNode(nodeHandle).getNodeValue(); 1241 1242 // If this is a DTM text node, it may be made of multiple DOM text 1243 // nodes -- including navigating into Entity References. DOM2DTM 1244 // records the first node in the sequence and requires that we 1245 // pick up the others when we retrieve the DTM node's value. 1246 // 1247 // %REVIEW% DOM Level 3 is expected to add a "whole text" 1248 // retrieval method which performs this function for us. 1249 Node node = getNode(nodeHandle); 1250 Node n=logicalNextDOMTextNode(node); 1251 if(n==null) 1252 return node.getNodeValue(); 1253 1254 FastStringBuffer buf = StringBufferPool.get(); 1255 buf.append(node.getNodeValue()); 1256 while(n!=null) 1257 { 1258 buf.append(n.getNodeValue()); 1259 n=logicalNextDOMTextNode(n); 1260 } 1261 String s = (buf.length() > 0) ? buf.toString() : ""; 1262 StringBufferPool.free(buf); 1263 return s; 1264 } 1265 1266 /** 1267 * A document type declaration information item has the following properties: 1268 * 1269 * 1. [system identifier] The system identifier of the external subset, if 1270 * it exists. Otherwise this property has no value. 1271 * 1272 * @return the system identifier String object, or null if there is none. 1273 */ getDocumentTypeDeclarationSystemIdentifier()1274 public String getDocumentTypeDeclarationSystemIdentifier() 1275 { 1276 1277 Document doc; 1278 1279 if (m_root.getNodeType() == Node.DOCUMENT_NODE) 1280 doc = (Document) m_root; 1281 else 1282 doc = m_root.getOwnerDocument(); 1283 1284 if (null != doc) 1285 { 1286 DocumentType dtd = doc.getDoctype(); 1287 1288 if (null != dtd) 1289 { 1290 return dtd.getSystemId(); 1291 } 1292 } 1293 1294 return null; 1295 } 1296 1297 /** 1298 * Return the public identifier of the external subset, 1299 * normalized as described in 4.2.2 External Entities [XML]. If there is 1300 * no external subset or if it has no public identifier, this property 1301 * has no value. 1302 * 1303 * @return the public identifier String object, or null if there is none. 1304 */ getDocumentTypeDeclarationPublicIdentifier()1305 public String getDocumentTypeDeclarationPublicIdentifier() 1306 { 1307 1308 Document doc; 1309 1310 if (m_root.getNodeType() == Node.DOCUMENT_NODE) 1311 doc = (Document) m_root; 1312 else 1313 doc = m_root.getOwnerDocument(); 1314 1315 if (null != doc) 1316 { 1317 DocumentType dtd = doc.getDoctype(); 1318 1319 if (null != dtd) 1320 { 1321 return dtd.getPublicId(); 1322 } 1323 } 1324 1325 return null; 1326 } 1327 1328 /** 1329 * Returns the <code>Element</code> whose <code>ID</code> is given by 1330 * <code>elementId</code>. If no such element exists, returns 1331 * <code>DTM.NULL</code>. Behavior is not defined if more than one element 1332 * has this <code>ID</code>. Attributes (including those 1333 * with the name "ID") are not of type ID unless so defined by DTD/Schema 1334 * information available to the DTM implementation. 1335 * Implementations that do not know whether attributes are of type ID or 1336 * not are expected to return <code>DTM.NULL</code>. 1337 * 1338 * <p>%REVIEW% Presumably IDs are still scoped to a single document, 1339 * and this operation searches only within a single document, right? 1340 * Wouldn't want collisions between DTMs in the same process.</p> 1341 * 1342 * @param elementId The unique <code>id</code> value for an element. 1343 * @return The handle of the matching element. 1344 */ getElementById(String elementId)1345 public int getElementById(String elementId) 1346 { 1347 1348 Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) 1349 ? (Document) m_root : m_root.getOwnerDocument(); 1350 1351 if(null != doc) 1352 { 1353 Node elem = doc.getElementById(elementId); 1354 if(null != elem) 1355 { 1356 int elemHandle = getHandleFromNode(elem); 1357 1358 if(DTM.NULL == elemHandle) 1359 { 1360 int identity = m_nodes.size()-1; 1361 while (DTM.NULL != (identity = getNextNodeIdentity(identity))) 1362 { 1363 Node node = getNode(identity); 1364 if(node == elem) 1365 { 1366 elemHandle = getHandleFromNode(elem); 1367 break; 1368 } 1369 } 1370 } 1371 1372 return elemHandle; 1373 } 1374 1375 } 1376 return DTM.NULL; 1377 } 1378 1379 /** 1380 * The getUnparsedEntityURI function returns the URI of the unparsed 1381 * entity with the specified name in the same document as the context 1382 * node (see [3.3 Unparsed Entities]). It returns the empty string if 1383 * there is no such entity. 1384 * <p> 1385 * XML processors may choose to use the System Identifier (if one 1386 * is provided) to resolve the entity, rather than the URI in the 1387 * Public Identifier. The details are dependent on the processor, and 1388 * we would have to support some form of plug-in resolver to handle 1389 * this properly. Currently, we simply return the System Identifier if 1390 * present, and hope that it a usable URI or that our caller can 1391 * map it to one. 1392 * TODO: Resolve Public Identifiers... or consider changing function name. 1393 * <p> 1394 * If we find a relative URI 1395 * reference, XML expects it to be resolved in terms of the base URI 1396 * of the document. The DOM doesn't do that for us, and it isn't 1397 * entirely clear whether that should be done here; currently that's 1398 * pushed up to a higher level of our application. (Note that DOM Level 1399 * 1 didn't store the document's base URI.) 1400 * TODO: Consider resolving Relative URIs. 1401 * <p> 1402 * (The DOM's statement that "An XML processor may choose to 1403 * completely expand entities before the structure model is passed 1404 * to the DOM" refers only to parsed entities, not unparsed, and hence 1405 * doesn't affect this function.) 1406 * 1407 * @param name A string containing the Entity Name of the unparsed 1408 * entity. 1409 * 1410 * @return String containing the URI of the Unparsed Entity, or an 1411 * empty string if no such entity exists. 1412 */ getUnparsedEntityURI(String name)1413 public String getUnparsedEntityURI(String name) 1414 { 1415 1416 String url = ""; 1417 Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) 1418 ? (Document) m_root : m_root.getOwnerDocument(); 1419 1420 if (null != doc) 1421 { 1422 DocumentType doctype = doc.getDoctype(); 1423 1424 if (null != doctype) 1425 { 1426 NamedNodeMap entities = doctype.getEntities(); 1427 if(null == entities) 1428 return url; 1429 Entity entity = (Entity) entities.getNamedItem(name); 1430 if(null == entity) 1431 return url; 1432 1433 String notationName = entity.getNotationName(); 1434 1435 if (null != notationName) // then it's unparsed 1436 { 1437 // The draft says: "The XSLT processor may use the public 1438 // identifier to generate a URI for the entity instead of the URI 1439 // specified in the system identifier. If the XSLT processor does 1440 // not use the public identifier to generate the URI, it must use 1441 // the system identifier; if the system identifier is a relative 1442 // URI, it must be resolved into an absolute URI using the URI of 1443 // the resource containing the entity declaration as the base 1444 // URI [RFC2396]." 1445 // So I'm falling a bit short here. 1446 url = entity.getSystemId(); 1447 1448 if (null == url) 1449 { 1450 url = entity.getPublicId(); 1451 } 1452 else 1453 { 1454 // This should be resolved to an absolute URL, but that's hard 1455 // to do from here. 1456 } 1457 } 1458 } 1459 } 1460 1461 return url; 1462 } 1463 1464 /** 1465 * 5. [specified] A flag indicating whether this attribute was actually 1466 * specified in the start-tag of its element, or was defaulted from the 1467 * DTD. 1468 * 1469 * @param attributeHandle the attribute handle 1470 * @return <code>true</code> if the attribute was specified; 1471 * <code>false</code> if it was defaulted. 1472 */ isAttributeSpecified(int attributeHandle)1473 public boolean isAttributeSpecified(int attributeHandle) 1474 { 1475 int type = getNodeType(attributeHandle); 1476 1477 if (DTM.ATTRIBUTE_NODE == type) 1478 { 1479 Attr attr = (Attr)getNode(attributeHandle); 1480 return attr.getSpecified(); 1481 } 1482 return false; 1483 } 1484 1485 /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since 1486 * we're wrapped around an existing DOM. 1487 * 1488 * @param source The IncrementalSAXSource that we want to recieve events from 1489 * on demand. 1490 */ setIncrementalSAXSource(IncrementalSAXSource source)1491 public void setIncrementalSAXSource(IncrementalSAXSource source) 1492 { 1493 } 1494 1495 /** getContentHandler returns "our SAX builder" -- the thing that 1496 * someone else should send SAX events to in order to extend this 1497 * DTM model. 1498 * 1499 * @return null if this model doesn't respond to SAX events, 1500 * "this" if the DTM object has a built-in SAX ContentHandler, 1501 * the IncrmentalSAXSource if we're bound to one and should receive 1502 * the SAX stream via it for incremental build purposes... 1503 * */ getContentHandler()1504 public org.xml.sax.ContentHandler getContentHandler() 1505 { 1506 return null; 1507 } 1508 1509 /** 1510 * Return this DTM's lexical handler. 1511 * 1512 * %REVIEW% Should this return null if constrution already done/begun? 1513 * 1514 * @return null if this model doesn't respond to lexical SAX events, 1515 * "this" if the DTM object has a built-in SAX ContentHandler, 1516 * the IncrementalSAXSource if we're bound to one and should receive 1517 * the SAX stream via it for incremental build purposes... 1518 */ getLexicalHandler()1519 public org.xml.sax.ext.LexicalHandler getLexicalHandler() 1520 { 1521 1522 return null; 1523 } 1524 1525 1526 /** 1527 * Return this DTM's EntityResolver. 1528 * 1529 * @return null if this model doesn't respond to SAX entity ref events. 1530 */ getEntityResolver()1531 public org.xml.sax.EntityResolver getEntityResolver() 1532 { 1533 1534 return null; 1535 } 1536 1537 /** 1538 * Return this DTM's DTDHandler. 1539 * 1540 * @return null if this model doesn't respond to SAX dtd events. 1541 */ getDTDHandler()1542 public org.xml.sax.DTDHandler getDTDHandler() 1543 { 1544 1545 return null; 1546 } 1547 1548 /** 1549 * Return this DTM's ErrorHandler. 1550 * 1551 * @return null if this model doesn't respond to SAX error events. 1552 */ getErrorHandler()1553 public org.xml.sax.ErrorHandler getErrorHandler() 1554 { 1555 1556 return null; 1557 } 1558 1559 /** 1560 * Return this DTM's DeclHandler. 1561 * 1562 * @return null if this model doesn't respond to SAX Decl events. 1563 */ getDeclHandler()1564 public org.xml.sax.ext.DeclHandler getDeclHandler() 1565 { 1566 1567 return null; 1568 } 1569 1570 /** @return true iff we're building this model incrementally (eg 1571 * we're partnered with a IncrementalSAXSource) and thus require that the 1572 * transformation and the parse run simultaneously. Guidance to the 1573 * DTMManager. 1574 * */ needsTwoThreads()1575 public boolean needsTwoThreads() 1576 { 1577 return false; 1578 } 1579 1580 // ========== Direct SAX Dispatch, for optimization purposes ======== 1581 1582 /** 1583 * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition 1584 * of whitespace. Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S"> 1585 * the definition of <CODE>S</CODE></A> for details. 1586 * @param ch Character to check as XML whitespace. 1587 * @return =true if <var>ch</var> is XML whitespace; otherwise =false. 1588 */ isSpace(char ch)1589 private static boolean isSpace(char ch) 1590 { 1591 return XMLCharacterRecognizer.isWhiteSpace(ch); // Take the easy way out for now. 1592 } 1593 1594 /** 1595 * Directly call the 1596 * characters method on the passed ContentHandler for the 1597 * string-value of the given node (see http://www.w3.org/TR/xpath#data-model 1598 * for the definition of a node's string-value). Multiple calls to the 1599 * ContentHandler's characters methods may well occur for a single call to 1600 * this method. 1601 * 1602 * @param nodeHandle The node ID. 1603 * @param ch A non-null reference to a ContentHandler. 1604 * 1605 * @throws org.xml.sax.SAXException 1606 */ dispatchCharactersEvents( int nodeHandle, org.xml.sax.ContentHandler ch, boolean normalize)1607 public void dispatchCharactersEvents( 1608 int nodeHandle, org.xml.sax.ContentHandler ch, 1609 boolean normalize) 1610 throws org.xml.sax.SAXException 1611 { 1612 if(normalize) 1613 { 1614 XMLString str = getStringValue(nodeHandle); 1615 str = str.fixWhiteSpace(true, true, false); 1616 str.dispatchCharactersEvents(ch); 1617 } 1618 else 1619 { 1620 int type = getNodeType(nodeHandle); 1621 Node node = getNode(nodeHandle); 1622 dispatchNodeData(node, ch, 0); 1623 // Text coalition -- a DTM text node may represent multiple 1624 // DOM nodes. 1625 if(TEXT_NODE == type || CDATA_SECTION_NODE == type) 1626 { 1627 while( null != (node=logicalNextDOMTextNode(node)) ) 1628 { 1629 dispatchNodeData(node, ch, 0); 1630 } 1631 } 1632 } 1633 } 1634 1635 /** 1636 * Retrieve the text content of a DOM subtree, appending it into a 1637 * user-supplied FastStringBuffer object. Note that attributes are 1638 * not considered part of the content of an element. 1639 * <p> 1640 * There are open questions regarding whitespace stripping. 1641 * Currently we make no special effort in that regard, since the standard 1642 * DOM doesn't yet provide DTD-based information to distinguish 1643 * whitespace-in-element-context from genuine #PCDATA. Note that we 1644 * should probably also consider xml:space if/when we address this. 1645 * DOM Level 3 may solve the problem for us. 1646 * <p> 1647 * %REVIEW% Note that as a DOM-level operation, it can be argued that this 1648 * routine _shouldn't_ perform any processing beyond what the DOM already 1649 * does, and that whitespace stripping and so on belong at the DTM level. 1650 * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM. 1651 * 1652 * @param node Node whose subtree is to be walked, gathering the 1653 * contents of all Text or CDATASection nodes. 1654 */ dispatchNodeData(Node node, org.xml.sax.ContentHandler ch, int depth)1655 protected static void dispatchNodeData(Node node, 1656 org.xml.sax.ContentHandler ch, 1657 int depth) 1658 throws org.xml.sax.SAXException 1659 { 1660 1661 switch (node.getNodeType()) 1662 { 1663 case Node.DOCUMENT_FRAGMENT_NODE : 1664 case Node.DOCUMENT_NODE : 1665 case Node.ELEMENT_NODE : 1666 { 1667 for (Node child = node.getFirstChild(); null != child; 1668 child = child.getNextSibling()) 1669 { 1670 dispatchNodeData(child, ch, depth+1); 1671 } 1672 } 1673 break; 1674 case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW% 1675 case Node.COMMENT_NODE : 1676 if(0 != depth) 1677 break; 1678 // NOTE: Because this operation works in the DOM space, it does _not_ attempt 1679 // to perform Text Coalition. That should only be done in DTM space. 1680 case Node.TEXT_NODE : 1681 case Node.CDATA_SECTION_NODE : 1682 case Node.ATTRIBUTE_NODE : 1683 String str = node.getNodeValue(); 1684 if(ch instanceof CharacterNodeHandler) 1685 { 1686 ((CharacterNodeHandler)ch).characters(node); 1687 } 1688 else 1689 { 1690 ch.characters(str.toCharArray(), 0, str.length()); 1691 } 1692 break; 1693 // /* case Node.PROCESSING_INSTRUCTION_NODE : 1694 // // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); 1695 // break; */ 1696 default : 1697 // ignore 1698 break; 1699 } 1700 } 1701 1702 TreeWalker m_walker = new TreeWalker(null); 1703 1704 /** 1705 * Directly create SAX parser events from a subtree. 1706 * 1707 * @param nodeHandle The node ID. 1708 * @param ch A non-null reference to a ContentHandler. 1709 * 1710 * @throws org.xml.sax.SAXException 1711 */ dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)1712 public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch) 1713 throws org.xml.sax.SAXException 1714 { 1715 TreeWalker treeWalker = m_walker; 1716 ContentHandler prevCH = treeWalker.getContentHandler(); 1717 1718 if(null != prevCH) 1719 { 1720 treeWalker = new TreeWalker(null); 1721 } 1722 treeWalker.setContentHandler(ch); 1723 1724 try 1725 { 1726 Node node = getNode(nodeHandle); 1727 treeWalker.traverseFragment(node); 1728 } 1729 finally 1730 { 1731 treeWalker.setContentHandler(null); 1732 } 1733 } 1734 1735 public interface CharacterNodeHandler 1736 { characters(Node node)1737 public void characters(Node node) 1738 throws org.xml.sax.SAXException; 1739 } 1740 1741 /** 1742 * For the moment all the run time properties are ignored by this 1743 * class. 1744 * 1745 * @param property a <code>String</code> value 1746 * @param value an <code>Object</code> value 1747 */ setProperty(String property, Object value)1748 public void setProperty(String property, Object value) 1749 { 1750 } 1751 1752 /** 1753 * No source information is available for DOM2DTM, so return 1754 * <code>null</code> here. 1755 * 1756 * @param node an <code>int</code> value 1757 * @return null 1758 */ getSourceLocatorFor(int node)1759 public SourceLocator getSourceLocatorFor(int node) 1760 { 1761 return null; 1762 } 1763 1764 } 1765 1766 1767