1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 /* 19 * $Id: ToTextStream.java 468654 2006-10-28 07:09:23Z minchau $ 20 */ 21 package org.apache.xml.serializer; 22 23 import java.io.IOException; 24 25 import org.apache.xml.serializer.utils.MsgKey; 26 import org.apache.xml.serializer.utils.Utils; 27 import org.xml.sax.Attributes; 28 import org.xml.sax.SAXException; 29 30 /** 31 * This class is not a public API. 32 * It is only public because it is used in other packages. 33 * This class converts SAX or SAX-like calls to a 34 * serialized document for xsl:output method of "text". 35 * @xsl.usage internal 36 */ 37 public class ToTextStream extends ToStream 38 { 39 40 41 /** 42 * Default constructor. 43 */ ToTextStream()44 public ToTextStream() 45 { 46 super(); 47 } 48 49 50 51 /** 52 * Receive notification of the beginning of a document. 53 * 54 * <p>The SAX parser will invoke this method only once, before any 55 * other methods in this interface or in DTDHandler (except for 56 * setDocumentLocator).</p> 57 * 58 * @throws org.xml.sax.SAXException Any SAX exception, possibly 59 * wrapping another exception. 60 * 61 * @throws org.xml.sax.SAXException 62 */ startDocumentInternal()63 protected void startDocumentInternal() throws org.xml.sax.SAXException 64 { 65 super.startDocumentInternal(); 66 67 m_needToCallStartDocument = false; 68 69 // No action for the moment. 70 } 71 72 /** 73 * Receive notification of the end of a document. 74 * 75 * <p>The SAX parser will invoke this method only once, and it will 76 * be the last method invoked during the parse. The parser shall 77 * not invoke this method until it has either abandoned parsing 78 * (because of an unrecoverable error) or reached the end of 79 * input.</p> 80 * 81 * @throws org.xml.sax.SAXException Any SAX exception, possibly 82 * wrapping another exception. 83 * 84 * @throws org.xml.sax.SAXException 85 */ endDocument()86 public void endDocument() throws org.xml.sax.SAXException 87 { 88 flushPending(); 89 flushWriter(); 90 if (m_tracer != null) 91 super.fireEndDoc(); 92 } 93 94 /** 95 * Receive notification of the beginning of an element. 96 * 97 * <p>The Parser will invoke this method at the beginning of every 98 * element in the XML document; there will be a corresponding 99 * endElement() event for every startElement() event (even when the 100 * element is empty). All of the element's content will be 101 * reported, in order, before the corresponding endElement() 102 * event.</p> 103 * 104 * <p>If the element name has a namespace prefix, the prefix will 105 * still be attached. Note that the attribute list provided will 106 * contain only attributes with explicit values (specified or 107 * defaulted): #IMPLIED attributes will be omitted.</p> 108 * 109 * 110 * @param namespaceURI The Namespace URI, or the empty string if the 111 * element has no Namespace URI or if Namespace 112 * processing is not being performed. 113 * @param localName The local name (without prefix), or the 114 * empty string if Namespace processing is not being 115 * performed. 116 * @param name The qualified name (with prefix), or the 117 * empty string if qualified names are not available. 118 * @param atts The attributes attached to the element, if any. 119 * @throws org.xml.sax.SAXException Any SAX exception, possibly 120 * wrapping another exception. 121 * @see #endElement 122 * @see org.xml.sax.AttributeList 123 * 124 * @throws org.xml.sax.SAXException 125 */ startElement( String namespaceURI, String localName, String name, Attributes atts)126 public void startElement( 127 String namespaceURI, String localName, String name, Attributes atts) 128 throws org.xml.sax.SAXException 129 { 130 // time to fire off startElement event 131 if (m_tracer != null) { 132 super.fireStartElem(name); 133 this.firePseudoAttributes(); 134 } 135 return; 136 } 137 138 /** 139 * Receive notification of the end of an element. 140 * 141 * <p>The SAX parser will invoke this method at the end of every 142 * element in the XML document; there will be a corresponding 143 * startElement() event for every endElement() event (even when the 144 * element is empty).</p> 145 * 146 * <p>If the element name has a namespace prefix, the prefix will 147 * still be attached to the name.</p> 148 * 149 * 150 * @param namespaceURI The Namespace URI, or the empty string if the 151 * element has no Namespace URI or if Namespace 152 * processing is not being performed. 153 * @param localName The local name (without prefix), or the 154 * empty string if Namespace processing is not being 155 * performed. 156 * @param name The qualified name (with prefix), or the 157 * empty string if qualified names are not available. 158 * @throws org.xml.sax.SAXException Any SAX exception, possibly 159 * wrapping another exception. 160 * 161 * @throws org.xml.sax.SAXException 162 */ endElement(String namespaceURI, String localName, String name)163 public void endElement(String namespaceURI, String localName, String name) 164 throws org.xml.sax.SAXException 165 { 166 if (m_tracer != null) 167 super.fireEndElem(name); 168 } 169 170 /** 171 * Receive notification of character data. 172 * 173 * <p>The Parser will call this method to report each chunk of 174 * character data. SAX parsers may return all contiguous character 175 * data in a single chunk, or they may split it into several 176 * chunks; however, all of the characters in any single event 177 * must come from the same external entity, so that the Locator 178 * provides useful information.</p> 179 * 180 * <p>The application must not attempt to read from the array 181 * outside of the specified range.</p> 182 * 183 * <p>Note that some parsers will report whitespace using the 184 * ignorableWhitespace() method rather than this one (validating 185 * parsers must do so).</p> 186 * 187 * @param ch The characters from the XML document. 188 * @param start The start position in the array. 189 * @param length The number of characters to read from the array. 190 * @throws org.xml.sax.SAXException Any SAX exception, possibly 191 * wrapping another exception. 192 * @see #ignorableWhitespace 193 * @see org.xml.sax.Locator 194 */ characters(char ch[], int start, int length)195 public void characters(char ch[], int start, int length) 196 throws org.xml.sax.SAXException 197 { 198 199 flushPending(); 200 201 try 202 { 203 if (inTemporaryOutputState()) { 204 /* leave characters un-processed as we are 205 * creating temporary output, the output generated by 206 * this serializer will be input to a final serializer 207 * later on and it will do the processing in final 208 * output state (not temporary output state). 209 * 210 * A "temporary" ToTextStream serializer is used to 211 * evaluate attribute value templates (for example), 212 * and the result of evaluating such a thing 213 * is fed into a final serializer later on. 214 */ 215 m_writer.write(ch, start, length); 216 } 217 else { 218 // In final output state we do process the characters! 219 writeNormalizedChars(ch, start, length, m_lineSepUse); 220 } 221 222 if (m_tracer != null) 223 super.fireCharEvent(ch, start, length); 224 } 225 catch(IOException ioe) 226 { 227 throw new SAXException(ioe); 228 } 229 } 230 231 /** 232 * If available, when the disable-output-escaping attribute is used, 233 * output raw text without escaping. 234 * 235 * @param ch The characters from the XML document. 236 * @param start The start position in the array. 237 * @param length The number of characters to read from the array. 238 * 239 * @throws org.xml.sax.SAXException Any SAX exception, possibly 240 * wrapping another exception. 241 */ charactersRaw(char ch[], int start, int length)242 public void charactersRaw(char ch[], int start, int length) 243 throws org.xml.sax.SAXException 244 { 245 246 try 247 { 248 writeNormalizedChars(ch, start, length, m_lineSepUse); 249 } 250 catch(IOException ioe) 251 { 252 throw new SAXException(ioe); 253 } 254 } 255 256 /** 257 * Normalize the characters, but don't escape. Different from 258 * SerializerToXML#writeNormalizedChars because it does not attempt to do 259 * XML escaping at all. 260 * 261 * @param ch The characters from the XML document. 262 * @param start The start position in the array. 263 * @param length The number of characters to read from the array. 264 * @param useLineSep true if the operating systems 265 * end-of-line separator should be output rather than a new-line character. 266 * 267 * @throws IOException 268 * @throws org.xml.sax.SAXException 269 */ writeNormalizedChars( final char ch[], final int start, final int length, final boolean useLineSep)270 void writeNormalizedChars( 271 final char ch[], 272 final int start, 273 final int length, 274 final boolean useLineSep) 275 throws IOException, org.xml.sax.SAXException 276 { 277 final String encoding = getEncoding(); 278 final java.io.Writer writer = m_writer; 279 final int end = start + length; 280 281 /* copy a few "constants" before the loop for performance */ 282 final char S_LINEFEED = CharInfo.S_LINEFEED; 283 284 // This for() loop always increments i by one at the end 285 // of the loop. Additional increments of i adjust for when 286 // two input characters (a high/low UTF16 surrogate pair) 287 // are processed. 288 for (int i = start; i < end; i++) { 289 final char c = ch[i]; 290 291 if (S_LINEFEED == c && useLineSep) { 292 writer.write(m_lineSep, 0, m_lineSepLen); 293 // one input char processed 294 } else if (m_encodingInfo.isInEncoding(c)) { 295 writer.write(c); 296 // one input char processed 297 } else if (Encodings.isHighUTF16Surrogate(c)) { 298 final int codePoint = writeUTF16Surrogate(c, ch, i, end); 299 if (codePoint != 0) { 300 // I think we can just emit the message, 301 // not crash and burn. 302 final String integralValue = Integer.toString(codePoint); 303 final String msg = Utils.messages.createMessage( 304 MsgKey.ER_ILLEGAL_CHARACTER, 305 new Object[] { integralValue, encoding }); 306 307 //Older behavior was to throw the message, 308 //but newer gentler behavior is to write a message to System.err 309 //throw new SAXException(msg); 310 System.err.println(msg); 311 312 } 313 i++; // two input chars processed 314 } else { 315 // Don't know what to do with this char, it is 316 // not in the encoding and not a high char in 317 // a surrogate pair, so write out as an entity ref 318 if (encoding != null) { 319 /* The output encoding is known, 320 * so somthing is wrong. 321 */ 322 323 // not in the encoding, so write out a character reference 324 writer.write('&'); 325 writer.write('#'); 326 writer.write(Integer.toString(c)); 327 writer.write(';'); 328 329 // I think we can just emit the message, 330 // not crash and burn. 331 final String integralValue = Integer.toString(c); 332 final String msg = Utils.messages.createMessage( 333 MsgKey.ER_ILLEGAL_CHARACTER, 334 new Object[] { integralValue, encoding }); 335 336 //Older behavior was to throw the message, 337 //but newer gentler behavior is to write a message to System.err 338 //throw new SAXException(msg); 339 System.err.println(msg); 340 } else { 341 /* The output encoding is not known, 342 * so just write it out as-is. 343 */ 344 writer.write(c); 345 } 346 347 // one input char was processed 348 } 349 } 350 } 351 352 /** 353 * Receive notification of cdata. 354 * 355 * <p>The Parser will call this method to report each chunk of 356 * character data. SAX parsers may return all contiguous character 357 * data in a single chunk, or they may split it into several 358 * chunks; however, all of the characters in any single event 359 * must come from the same external entity, so that the Locator 360 * provides useful information.</p> 361 * 362 * <p>The application must not attempt to read from the array 363 * outside of the specified range.</p> 364 * 365 * <p>Note that some parsers will report whitespace using the 366 * ignorableWhitespace() method rather than this one (validating 367 * parsers must do so).</p> 368 * 369 * @param ch The characters from the XML document. 370 * @param start The start position in the array. 371 * @param length The number of characters to read from the array. 372 * @throws org.xml.sax.SAXException Any SAX exception, possibly 373 * wrapping another exception. 374 * @see #ignorableWhitespace 375 * @see org.xml.sax.Locator 376 */ cdata(char ch[], int start, int length)377 public void cdata(char ch[], int start, int length) 378 throws org.xml.sax.SAXException 379 { 380 try 381 { 382 writeNormalizedChars(ch, start, length, m_lineSepUse); 383 if (m_tracer != null) 384 super.fireCDATAEvent(ch, start, length); 385 } 386 catch(IOException ioe) 387 { 388 throw new SAXException(ioe); 389 } 390 } 391 392 /** 393 * Receive notification of ignorable whitespace in element content. 394 * 395 * <p>Validating Parsers must use this method to report each chunk 396 * of ignorable whitespace (see the W3C XML 1.0 recommendation, 397 * section 2.10): non-validating parsers may also use this method 398 * if they are capable of parsing and using content models.</p> 399 * 400 * <p>SAX parsers may return all contiguous whitespace in a single 401 * chunk, or they may split it into several chunks; however, all of 402 * the characters in any single event must come from the same 403 * external entity, so that the Locator provides useful 404 * information.</p> 405 * 406 * <p>The application must not attempt to read from the array 407 * outside of the specified range.</p> 408 * 409 * @param ch The characters from the XML document. 410 * @param start The start position in the array. 411 * @param length The number of characters to read from the array. 412 * @throws org.xml.sax.SAXException Any SAX exception, possibly 413 * wrapping another exception. 414 * @see #characters 415 * 416 * @throws org.xml.sax.SAXException 417 */ ignorableWhitespace(char ch[], int start, int length)418 public void ignorableWhitespace(char ch[], int start, int length) 419 throws org.xml.sax.SAXException 420 { 421 422 try 423 { 424 writeNormalizedChars(ch, start, length, m_lineSepUse); 425 } 426 catch(IOException ioe) 427 { 428 throw new SAXException(ioe); 429 } 430 } 431 432 /** 433 * Receive notification of a processing instruction. 434 * 435 * <p>The Parser will invoke this method once for each processing 436 * instruction found: note that processing instructions may occur 437 * before or after the main document element.</p> 438 * 439 * <p>A SAX parser should never report an XML declaration (XML 1.0, 440 * section 2.8) or a text declaration (XML 1.0, section 4.3.1) 441 * using this method.</p> 442 * 443 * @param target The processing instruction target. 444 * @param data The processing instruction data, or null if 445 * none was supplied. 446 * @throws org.xml.sax.SAXException Any SAX exception, possibly 447 * wrapping another exception. 448 * 449 * @throws org.xml.sax.SAXException 450 */ processingInstruction(String target, String data)451 public void processingInstruction(String target, String data) 452 throws org.xml.sax.SAXException 453 { 454 // flush anything pending first 455 flushPending(); 456 457 if (m_tracer != null) 458 super.fireEscapingEvent(target, data); 459 } 460 461 /** 462 * Called when a Comment is to be constructed. 463 * Note that Xalan will normally invoke the other version of this method. 464 * %REVIEW% In fact, is this one ever needed, or was it a mistake? 465 * 466 * @param data The comment data. 467 * @throws org.xml.sax.SAXException Any SAX exception, possibly 468 * wrapping another exception. 469 */ comment(String data)470 public void comment(String data) throws org.xml.sax.SAXException 471 { 472 final int length = data.length(); 473 if (length > m_charsBuff.length) 474 { 475 m_charsBuff = new char[length*2 + 1]; 476 } 477 data.getChars(0, length, m_charsBuff, 0); 478 comment(m_charsBuff, 0, length); 479 } 480 481 /** 482 * Report an XML comment anywhere in the document. 483 * 484 * This callback will be used for comments inside or outside the 485 * document element, including comments in the external DTD 486 * subset (if read). 487 * 488 * @param ch An array holding the characters in the comment. 489 * @param start The starting position in the array. 490 * @param length The number of characters to use from the array. 491 * @throws org.xml.sax.SAXException The application may raise an exception. 492 */ comment(char ch[], int start, int length)493 public void comment(char ch[], int start, int length) 494 throws org.xml.sax.SAXException 495 { 496 497 flushPending(); 498 if (m_tracer != null) 499 super.fireCommentEvent(ch, start, length); 500 } 501 502 /** 503 * Receive notivication of a entityReference. 504 * 505 * @param name non-null reference to the name of the entity. 506 * 507 * @throws org.xml.sax.SAXException 508 */ entityReference(String name)509 public void entityReference(String name) throws org.xml.sax.SAXException 510 { 511 if (m_tracer != null) 512 super.fireEntityReference(name); 513 } 514 515 /** 516 * @see ExtendedContentHandler#addAttribute(String, String, String, String, String) 517 */ addAttribute( String uri, String localName, String rawName, String type, String value, boolean XSLAttribute)518 public void addAttribute( 519 String uri, 520 String localName, 521 String rawName, 522 String type, 523 String value, 524 boolean XSLAttribute) 525 { 526 // do nothing, just forget all about the attribute 527 } 528 529 /** 530 * @see org.xml.sax.ext.LexicalHandler#endCDATA() 531 */ endCDATA()532 public void endCDATA() throws SAXException 533 { 534 // do nothing 535 } 536 537 /** 538 * @see ExtendedContentHandler#endElement(String) 539 */ endElement(String elemName)540 public void endElement(String elemName) throws SAXException 541 { 542 if (m_tracer != null) 543 super.fireEndElem(elemName); 544 } 545 546 /** 547 * From XSLTC 548 */ startElement( String elementNamespaceURI, String elementLocalName, String elementName)549 public void startElement( 550 String elementNamespaceURI, 551 String elementLocalName, 552 String elementName) 553 throws SAXException 554 { 555 if (m_needToCallStartDocument) 556 startDocumentInternal(); 557 // time to fire off startlement event. 558 if (m_tracer != null) { 559 super.fireStartElem(elementName); 560 this.firePseudoAttributes(); 561 } 562 563 return; 564 } 565 566 567 /** 568 * From XSLTC 569 */ characters(String characters)570 public void characters(String characters) 571 throws SAXException 572 { 573 final int length = characters.length(); 574 if (length > m_charsBuff.length) 575 { 576 m_charsBuff = new char[length*2 + 1]; 577 } 578 characters.getChars(0, length, m_charsBuff, 0); 579 characters(m_charsBuff, 0, length); 580 } 581 582 583 /** 584 * From XSLTC 585 */ addAttribute(String name, String value)586 public void addAttribute(String name, String value) 587 { 588 // do nothing, forget about the attribute 589 } 590 591 /** 592 * Add a unique attribute 593 */ addUniqueAttribute(String qName, String value, int flags)594 public void addUniqueAttribute(String qName, String value, int flags) 595 throws SAXException 596 { 597 // do nothing, forget about the attribute 598 } 599 startPrefixMapping( String prefix, String uri, boolean shouldFlush)600 public boolean startPrefixMapping( 601 String prefix, 602 String uri, 603 boolean shouldFlush) 604 throws SAXException 605 { 606 // no namespace support for HTML 607 return false; 608 } 609 610 startPrefixMapping(String prefix, String uri)611 public void startPrefixMapping(String prefix, String uri) 612 throws org.xml.sax.SAXException 613 { 614 // no namespace support for HTML 615 } 616 617 namespaceAfterStartElement( final String prefix, final String uri)618 public void namespaceAfterStartElement( 619 final String prefix, 620 final String uri) 621 throws SAXException 622 { 623 // no namespace support for HTML 624 } 625 flushPending()626 public void flushPending() throws org.xml.sax.SAXException 627 { 628 if (m_needToCallStartDocument) 629 { 630 startDocumentInternal(); 631 m_needToCallStartDocument = false; 632 } 633 } 634 } 635