1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 /* 19 * $Id: ToHTMLStream.java 468654 2006-10-28 07:09:23Z minchau $ 20 */ 21 package org.apache.xml.serializer; 22 23 import java.io.IOException; 24 import java.util.Properties; 25 26 import javax.xml.transform.Result; 27 28 import org.apache.xml.serializer.utils.MsgKey; 29 import org.apache.xml.serializer.utils.Utils; 30 import org.xml.sax.Attributes; 31 import org.xml.sax.SAXException; 32 33 /** 34 * This serializer takes a series of SAX or 35 * SAX-like events and writes its output 36 * to the given stream. 37 * 38 * This class is not a public API, it is public 39 * because it is used from another package. 40 * 41 * @xsl.usage internal 42 */ 43 public class ToHTMLStream extends ToStream 44 { 45 46 /** This flag is set while receiving events from the DTD */ 47 protected boolean m_inDTD = false; 48 49 /** True if the current element is a block element. (seems like 50 * this needs to be a stack. -sb). */ 51 private boolean m_inBlockElem = false; 52 53 /** 54 * Map that tells which XML characters should have special treatment, and it 55 * provides character to entity name lookup. 56 */ 57 private final CharInfo m_htmlcharInfo = 58 // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); 59 CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML); 60 61 /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */ 62 static final Trie m_elementFlags = new Trie(); 63 64 static { 65 initTagReference(m_elementFlags); 66 } initTagReference(Trie m_elementFlags)67 static void initTagReference(Trie m_elementFlags) { 68 69 // HTML 4.0 loose DTD 70 m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY)); 71 m_elementFlags.put( 72 "FRAME", 73 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 74 m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK)); 75 m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK)); 76 m_elementFlags.put( 77 "ISINDEX", 78 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 79 m_elementFlags.put( 80 "APPLET", 81 new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE)); 82 m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK)); 83 m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK)); 84 m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK)); 85 86 // HTML 4.0 strict DTD 87 m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 88 m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 89 m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 90 m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 91 m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 92 m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE)); 93 m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE)); 94 m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE)); 95 m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE)); 96 m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE)); 97 m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE)); 98 m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE)); 99 m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE)); 100 m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE)); 101 m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE)); 102 m_elementFlags.put( 103 "SUP", 104 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 105 m_elementFlags.put( 106 "SUB", 107 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 108 m_elementFlags.put( 109 "SPAN", 110 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 111 m_elementFlags.put( 112 "BDO", 113 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 114 m_elementFlags.put( 115 "BR", 116 new ElemDesc( 117 0 118 | ElemDesc.SPECIAL 119 | ElemDesc.ASPECIAL 120 | ElemDesc.EMPTY 121 | ElemDesc.BLOCK)); 122 m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK)); 123 m_elementFlags.put( 124 "ADDRESS", 125 new ElemDesc( 126 0 127 | ElemDesc.BLOCK 128 | ElemDesc.BLOCKFORM 129 | ElemDesc.BLOCKFORMFIELDSET)); 130 m_elementFlags.put( 131 "DIV", 132 new ElemDesc( 133 0 134 | ElemDesc.BLOCK 135 | ElemDesc.BLOCKFORM 136 | ElemDesc.BLOCKFORMFIELDSET)); 137 m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL)); 138 m_elementFlags.put( 139 "MAP", 140 new ElemDesc( 141 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK)); 142 m_elementFlags.put( 143 "AREA", 144 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 145 m_elementFlags.put( 146 "LINK", 147 new ElemDesc( 148 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 149 m_elementFlags.put( 150 "IMG", 151 new ElemDesc( 152 0 153 | ElemDesc.SPECIAL 154 | ElemDesc.ASPECIAL 155 | ElemDesc.EMPTY 156 | ElemDesc.WHITESPACESENSITIVE)); 157 m_elementFlags.put( 158 "OBJECT", 159 new ElemDesc( 160 0 161 | ElemDesc.SPECIAL 162 | ElemDesc.ASPECIAL 163 | ElemDesc.HEADMISC 164 | ElemDesc.WHITESPACESENSITIVE)); 165 m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY)); 166 m_elementFlags.put( 167 "HR", 168 new ElemDesc( 169 0 170 | ElemDesc.BLOCK 171 | ElemDesc.BLOCKFORM 172 | ElemDesc.BLOCKFORMFIELDSET 173 | ElemDesc.EMPTY)); 174 m_elementFlags.put( 175 "P", 176 new ElemDesc( 177 0 178 | ElemDesc.BLOCK 179 | ElemDesc.BLOCKFORM 180 | ElemDesc.BLOCKFORMFIELDSET)); 181 m_elementFlags.put( 182 "H1", 183 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 184 m_elementFlags.put( 185 "H2", 186 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 187 m_elementFlags.put( 188 "H3", 189 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 190 m_elementFlags.put( 191 "H4", 192 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 193 m_elementFlags.put( 194 "H5", 195 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 196 m_elementFlags.put( 197 "H6", 198 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 199 m_elementFlags.put( 200 "PRE", 201 new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK)); 202 m_elementFlags.put( 203 "Q", 204 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 205 m_elementFlags.put( 206 "BLOCKQUOTE", 207 new ElemDesc( 208 0 209 | ElemDesc.BLOCK 210 | ElemDesc.BLOCKFORM 211 | ElemDesc.BLOCKFORMFIELDSET)); 212 m_elementFlags.put("INS", new ElemDesc(0)); 213 m_elementFlags.put("DEL", new ElemDesc(0)); 214 m_elementFlags.put( 215 "DL", 216 new ElemDesc( 217 0 218 | ElemDesc.BLOCK 219 | ElemDesc.BLOCKFORM 220 | ElemDesc.BLOCKFORMFIELDSET)); 221 m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK)); 222 m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK)); 223 m_elementFlags.put( 224 "OL", 225 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 226 m_elementFlags.put( 227 "UL", 228 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 229 m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK)); 230 m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK)); 231 m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL)); 232 m_elementFlags.put( 233 "INPUT", 234 new ElemDesc( 235 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY)); 236 m_elementFlags.put( 237 "SELECT", 238 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 239 m_elementFlags.put("OPTGROUP", new ElemDesc(0)); 240 m_elementFlags.put("OPTION", new ElemDesc(0)); 241 m_elementFlags.put( 242 "TEXTAREA", 243 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 244 m_elementFlags.put( 245 "FIELDSET", 246 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM)); 247 m_elementFlags.put("LEGEND", new ElemDesc(0)); 248 m_elementFlags.put( 249 "BUTTON", 250 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 251 m_elementFlags.put( 252 "TABLE", 253 new ElemDesc( 254 0 255 | ElemDesc.BLOCK 256 | ElemDesc.BLOCKFORM 257 | ElemDesc.BLOCKFORMFIELDSET)); 258 m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK)); 259 m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK)); 260 m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK)); 261 m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK)); 262 m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK)); 263 m_elementFlags.put( 264 "COL", 265 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 266 m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK)); 267 m_elementFlags.put("TH", new ElemDesc(0)); 268 m_elementFlags.put("TD", new ElemDesc(0)); 269 m_elementFlags.put( 270 "HEAD", 271 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM)); 272 m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK)); 273 m_elementFlags.put( 274 "BASE", 275 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 276 m_elementFlags.put( 277 "META", 278 new ElemDesc( 279 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 280 m_elementFlags.put( 281 "STYLE", 282 new ElemDesc( 283 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK)); 284 m_elementFlags.put( 285 "SCRIPT", 286 new ElemDesc( 287 0 288 | ElemDesc.SPECIAL 289 | ElemDesc.ASPECIAL 290 | ElemDesc.HEADMISC 291 | ElemDesc.RAW)); 292 m_elementFlags.put( 293 "NOSCRIPT", 294 new ElemDesc( 295 0 296 | ElemDesc.BLOCK 297 | ElemDesc.BLOCKFORM 298 | ElemDesc.BLOCKFORMFIELDSET)); 299 m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HTMLELEM)); 300 301 // From "John Ky" <hand@syd.speednet.com.au 302 // Transitional Document Type Definition () 303 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont 304 m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 305 306 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE 307 m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 308 m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 309 310 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U 311 m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 312 313 // From "John Ky" <hand@syd.speednet.com.au 314 m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 315 316 // HTML 4.0, section 16.5 317 m_elementFlags.put( 318 "IFRAME", 319 new ElemDesc( 320 0 321 | ElemDesc.BLOCK 322 | ElemDesc.BLOCKFORM 323 | ElemDesc.BLOCKFORMFIELDSET)); 324 325 // Netscape 4 extension 326 m_elementFlags.put( 327 "LAYER", 328 new ElemDesc( 329 0 330 | ElemDesc.BLOCK 331 | ElemDesc.BLOCKFORM 332 | ElemDesc.BLOCKFORMFIELDSET)); 333 // Netscape 4 extension 334 m_elementFlags.put( 335 "ILAYER", 336 new ElemDesc( 337 0 338 | ElemDesc.BLOCK 339 | ElemDesc.BLOCKFORM 340 | ElemDesc.BLOCKFORMFIELDSET)); 341 342 // NOW FOR ATTRIBUTE INFORMATION . . . 343 ElemDesc elemDesc; 344 345 346 // ---------------------------------------------- 347 elemDesc = (ElemDesc) m_elementFlags.get("a"); 348 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 349 elemDesc.setAttr("NAME", ElemDesc.ATTRURL); 350 351 // ---------------------------------------------- 352 elemDesc = (ElemDesc) m_elementFlags.get("area"); 353 354 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 355 elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY); 356 357 // ---------------------------------------------- 358 elemDesc = (ElemDesc) m_elementFlags.get("base"); 359 360 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 361 362 // ---------------------------------------------- 363 elemDesc = (ElemDesc) m_elementFlags.get("button"); 364 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 365 366 // ---------------------------------------------- 367 elemDesc = (ElemDesc) m_elementFlags.get("blockquote"); 368 369 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 370 371 // ---------------------------------------------- 372 elemDesc = (ElemDesc) m_elementFlags.get("del"); 373 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 374 375 // ---------------------------------------------- 376 elemDesc = (ElemDesc) m_elementFlags.get("dir"); 377 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 378 379 // ---------------------------------------------- 380 381 elemDesc = (ElemDesc) m_elementFlags.get("div"); 382 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension 383 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 384 385 // ---------------------------------------------- 386 elemDesc = (ElemDesc) m_elementFlags.get("dl"); 387 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 388 389 // ---------------------------------------------- 390 elemDesc = (ElemDesc) m_elementFlags.get("form"); 391 elemDesc.setAttr("ACTION", ElemDesc.ATTRURL); 392 393 // ---------------------------------------------- 394 // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM> 395 elemDesc = (ElemDesc) m_elementFlags.get("frame"); 396 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 397 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 398 elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY); 399 400 // ---------------------------------------------- 401 elemDesc = (ElemDesc) m_elementFlags.get("head"); 402 elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL); 403 404 // ---------------------------------------------- 405 elemDesc = (ElemDesc) m_elementFlags.get("hr"); 406 elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY); 407 408 // ---------------------------------------------- 409 // HTML 4.0, section 16.5 410 elemDesc = (ElemDesc) m_elementFlags.get("iframe"); 411 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 412 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 413 414 // ---------------------------------------------- 415 // Netscape 4 extension 416 elemDesc = (ElemDesc) m_elementFlags.get("ilayer"); 417 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 418 419 // ---------------------------------------------- 420 elemDesc = (ElemDesc) m_elementFlags.get("img"); 421 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 422 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 423 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 424 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 425 426 // ---------------------------------------------- 427 elemDesc = (ElemDesc) m_elementFlags.get("input"); 428 429 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 430 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 431 elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY); 432 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 433 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 434 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 435 436 // ---------------------------------------------- 437 elemDesc = (ElemDesc) m_elementFlags.get("ins"); 438 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 439 440 // ---------------------------------------------- 441 // Netscape 4 extension 442 elemDesc = (ElemDesc) m_elementFlags.get("layer"); 443 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 444 445 // ---------------------------------------------- 446 elemDesc = (ElemDesc) m_elementFlags.get("link"); 447 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 448 449 // ---------------------------------------------- 450 elemDesc = (ElemDesc) m_elementFlags.get("menu"); 451 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 452 453 // ---------------------------------------------- 454 elemDesc = (ElemDesc) m_elementFlags.get("object"); 455 456 elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL); 457 elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL); 458 elemDesc.setAttr("DATA", ElemDesc.ATTRURL); 459 elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL); 460 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 461 elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY); 462 463 // ---------------------------------------------- 464 elemDesc = (ElemDesc) m_elementFlags.get("ol"); 465 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 466 467 // ---------------------------------------------- 468 elemDesc = (ElemDesc) m_elementFlags.get("optgroup"); 469 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 470 471 // ---------------------------------------------- 472 elemDesc = (ElemDesc) m_elementFlags.get("option"); 473 elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY); 474 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 475 476 // ---------------------------------------------- 477 elemDesc = (ElemDesc) m_elementFlags.get("q"); 478 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 479 480 // ---------------------------------------------- 481 elemDesc = (ElemDesc) m_elementFlags.get("script"); 482 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 483 elemDesc.setAttr("FOR", ElemDesc.ATTRURL); 484 elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY); 485 486 // ---------------------------------------------- 487 elemDesc = (ElemDesc) m_elementFlags.get("select"); 488 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 489 elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY); 490 491 // ---------------------------------------------- 492 elemDesc = (ElemDesc) m_elementFlags.get("table"); 493 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 494 495 // ---------------------------------------------- 496 elemDesc = (ElemDesc) m_elementFlags.get("td"); 497 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 498 499 // ---------------------------------------------- 500 elemDesc = (ElemDesc) m_elementFlags.get("textarea"); 501 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 502 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 503 504 // ---------------------------------------------- 505 elemDesc = (ElemDesc) m_elementFlags.get("th"); 506 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 507 508 // ---------------------------------------------- 509 // The nowrap attribute of a tr element is both 510 // a Netscape and Internet-Explorer extension 511 elemDesc = (ElemDesc) m_elementFlags.get("tr"); 512 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 513 514 // ---------------------------------------------- 515 elemDesc = (ElemDesc) m_elementFlags.get("ul"); 516 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 517 } 518 519 /** 520 * Dummy element for elements not found. 521 */ 522 static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK); 523 524 /** True if URLs should be specially escaped with the %xx form. */ 525 private boolean m_specialEscapeURLs = true; 526 527 /** True if the META tag should be omitted. */ 528 private boolean m_omitMetaTag = false; 529 530 /** 531 * Tells if the formatter should use special URL escaping. 532 * 533 * @param bool True if URLs should be specially escaped with the %xx form. 534 */ setSpecialEscapeURLs(boolean bool)535 public void setSpecialEscapeURLs(boolean bool) 536 { 537 m_specialEscapeURLs = bool; 538 } 539 540 /** 541 * Tells if the formatter should omit the META tag. 542 * 543 * @param bool True if the META tag should be omitted. 544 */ setOmitMetaTag(boolean bool)545 public void setOmitMetaTag(boolean bool) 546 { 547 m_omitMetaTag = bool; 548 } 549 550 /** 551 * Specifies an output format for this serializer. It the 552 * serializer has already been associated with an output format, 553 * it will switch to the new format. This method should not be 554 * called while the serializer is in the process of serializing 555 * a document. 556 * 557 * This method can be called multiple times before starting 558 * the serialization of a particular result-tree. In principle 559 * all serialization parameters can be changed, with the exception 560 * of method="html" (it must be method="html" otherwise we 561 * shouldn't even have a ToHTMLStream object here!) 562 * 563 * @param format The output format or serialzation parameters 564 * to use. 565 */ setOutputFormat(Properties format)566 public void setOutputFormat(Properties format) 567 { 568 /* 569 * If "format" does not contain the property 570 * S_USE_URL_ESCAPING, then don't set this value at all, 571 * just leave as-is rather than explicitly setting it. 572 */ 573 String value; 574 value = format.getProperty(OutputPropertiesFactory.S_USE_URL_ESCAPING); 575 if (value != null) { 576 m_specialEscapeURLs = 577 OutputPropertyUtils.getBooleanProperty( 578 OutputPropertiesFactory.S_USE_URL_ESCAPING, 579 format); 580 } 581 582 /* 583 * If "format" does not contain the property 584 * S_OMIT_META_TAG, then don't set this value at all, 585 * just leave as-is rather than explicitly setting it. 586 */ 587 value = format.getProperty(OutputPropertiesFactory.S_OMIT_META_TAG); 588 if (value != null) { 589 m_omitMetaTag = 590 OutputPropertyUtils.getBooleanProperty( 591 OutputPropertiesFactory.S_OMIT_META_TAG, 592 format); 593 } 594 595 super.setOutputFormat(format); 596 } 597 598 /** 599 * Tells if the formatter should use special URL escaping. 600 * 601 * @return True if URLs should be specially escaped with the %xx form. 602 */ getSpecialEscapeURLs()603 private final boolean getSpecialEscapeURLs() 604 { 605 return m_specialEscapeURLs; 606 } 607 608 /** 609 * Tells if the formatter should omit the META tag. 610 * 611 * @return True if the META tag should be omitted. 612 */ getOmitMetaTag()613 private final boolean getOmitMetaTag() 614 { 615 return m_omitMetaTag; 616 } 617 618 /** 619 * Get a description of the given element. 620 * 621 * @param name non-null name of element, case insensitive. 622 * 623 * @return non-null reference to ElemDesc, which may be m_dummy if no 624 * element description matches the given name. 625 */ getElemDesc(String name)626 public static final ElemDesc getElemDesc(String name) 627 { 628 /* this method used to return m_dummy when name was null 629 * but now it doesn't check and and requires non-null name. 630 */ 631 Object obj = m_elementFlags.get(name); 632 if (null != obj) 633 return (ElemDesc)obj; 634 return m_dummy; 635 } 636 637 638 /** 639 * A Trie that is just a copy of the "static" one. 640 * We need this one to be able to use the faster, but not thread-safe 641 * method Trie.get2(name) 642 */ 643 private Trie m_htmlInfo = new Trie(m_elementFlags); 644 /** 645 * Calls to this method could be replaced with calls to 646 * getElemDesc(name), but this one should be faster. 647 */ getElemDesc2(String name)648 private ElemDesc getElemDesc2(String name) 649 { 650 Object obj = m_htmlInfo.get2(name); 651 if (null != obj) 652 return (ElemDesc)obj; 653 return m_dummy; 654 } 655 656 /** 657 * Default constructor. 658 */ ToHTMLStream()659 public ToHTMLStream() 660 { 661 662 super(); 663 // we are just constructing this thing, no output properties 664 // have been used, so we will set the right default for 665 // indenting anyways 666 m_doIndent = true; 667 m_charInfo = m_htmlcharInfo; 668 // initialize namespaces 669 m_prefixMap = new NamespaceMappings(); 670 671 } 672 673 /** The name of the current element. */ 674 // private String m_currentElementName = null; 675 676 /** 677 * Receive notification of the beginning of a document. 678 * 679 * @throws org.xml.sax.SAXException Any SAX exception, possibly 680 * wrapping another exception. 681 * 682 * @throws org.xml.sax.SAXException 683 */ startDocumentInternal()684 protected void startDocumentInternal() throws org.xml.sax.SAXException 685 { 686 super.startDocumentInternal(); 687 688 m_needToCallStartDocument = false; 689 m_needToOutputDocTypeDecl = true; 690 m_startNewLine = false; 691 setOmitXMLDeclaration(true); 692 } 693 694 /** 695 * This method should only get called once. 696 * If a DOCTYPE declaration needs to get written out, it will 697 * be written out. If it doesn't need to be written out, then 698 * the call to this method has no effect. 699 */ outputDocTypeDecl(String name)700 private void outputDocTypeDecl(String name) throws SAXException { 701 if (true == m_needToOutputDocTypeDecl) 702 { 703 String doctypeSystem = getDoctypeSystem(); 704 String doctypePublic = getDoctypePublic(); 705 if ((null != doctypeSystem) || (null != doctypePublic)) 706 { 707 final java.io.Writer writer = m_writer; 708 try 709 { 710 writer.write("<!DOCTYPE "); 711 writer.write(name); 712 713 if (null != doctypePublic) 714 { 715 writer.write(" PUBLIC \""); 716 writer.write(doctypePublic); 717 writer.write('"'); 718 } 719 720 if (null != doctypeSystem) 721 { 722 if (null == doctypePublic) 723 writer.write(" SYSTEM \""); 724 else 725 writer.write(" \""); 726 727 writer.write(doctypeSystem); 728 writer.write('"'); 729 } 730 731 writer.write('>'); 732 outputLineSep(); 733 } 734 catch(IOException e) 735 { 736 throw new SAXException(e); 737 } 738 } 739 } 740 741 m_needToOutputDocTypeDecl = false; 742 } 743 744 /** 745 * Receive notification of the end of a document. 746 * 747 * @throws org.xml.sax.SAXException Any SAX exception, possibly 748 * wrapping another exception. 749 * 750 * @throws org.xml.sax.SAXException 751 */ endDocument()752 public final void endDocument() throws org.xml.sax.SAXException 753 { 754 755 flushPending(); 756 if (m_doIndent && !m_isprevtext) 757 { 758 try 759 { 760 outputLineSep(); 761 } 762 catch(IOException e) 763 { 764 throw new SAXException(e); 765 } 766 } 767 768 flushWriter(); 769 if (m_tracer != null) 770 super.fireEndDoc(); 771 } 772 773 /** 774 * Receive notification of the beginning of an element. 775 * 776 * 777 * @param namespaceURI 778 * @param localName 779 * @param name The element type name. 780 * @param atts The attributes attached to the element, if any. 781 * @throws org.xml.sax.SAXException Any SAX exception, possibly 782 * wrapping another exception. 783 * @see #endElement 784 * @see org.xml.sax.AttributeList 785 */ startElement( String namespaceURI, String localName, String name, Attributes atts)786 public void startElement( 787 String namespaceURI, 788 String localName, 789 String name, 790 Attributes atts) 791 throws org.xml.sax.SAXException 792 { 793 794 ElemContext elemContext = m_elemContext; 795 796 // clean up any pending things first 797 if (elemContext.m_startTagOpen) 798 { 799 closeStartTag(); 800 elemContext.m_startTagOpen = false; 801 } 802 else if (m_cdataTagOpen) 803 { 804 closeCDATA(); 805 m_cdataTagOpen = false; 806 } 807 else if (m_needToCallStartDocument) 808 { 809 startDocumentInternal(); 810 m_needToCallStartDocument = false; 811 } 812 813 if (m_needToOutputDocTypeDecl) { 814 String n = name; 815 if (n == null || n.length() == 0) { 816 // If the lexical QName is not given 817 // use the localName in the DOCTYPE 818 n = localName; 819 } 820 outputDocTypeDecl(n); 821 } 822 823 824 // if this element has a namespace then treat it like XML 825 if (null != namespaceURI && namespaceURI.length() > 0) 826 { 827 super.startElement(namespaceURI, localName, name, atts); 828 829 return; 830 } 831 832 try 833 { 834 // getElemDesc2(name) is faster than getElemDesc(name) 835 ElemDesc elemDesc = getElemDesc2(name); 836 int elemFlags = elemDesc.getFlags(); 837 838 // deal with indentation issues first 839 if (m_doIndent) 840 { 841 842 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; 843 if (m_ispreserve) 844 m_ispreserve = false; 845 else if ( 846 (null != elemContext.m_elementName) 847 && (!m_inBlockElem 848 || isBlockElement) /* && !isWhiteSpaceSensitive */ 849 ) 850 { 851 m_startNewLine = true; 852 853 indent(); 854 855 } 856 m_inBlockElem = !isBlockElement; 857 } 858 859 // save any attributes for later processing 860 if (atts != null) 861 addAttributes(atts); 862 863 m_isprevtext = false; 864 final java.io.Writer writer = m_writer; 865 writer.write('<'); 866 writer.write(name); 867 868 869 870 if (m_tracer != null) 871 firePseudoAttributes(); 872 873 if ((elemFlags & ElemDesc.EMPTY) != 0) 874 { 875 // an optimization for elements which are expected 876 // to be empty. 877 m_elemContext = elemContext.push(); 878 /* XSLTC sometimes calls namespaceAfterStartElement() 879 * so we need to remember the name 880 */ 881 m_elemContext.m_elementName = name; 882 m_elemContext.m_elementDesc = elemDesc; 883 return; 884 } 885 else 886 { 887 elemContext = elemContext.push(namespaceURI,localName,name); 888 m_elemContext = elemContext; 889 elemContext.m_elementDesc = elemDesc; 890 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0; 891 } 892 893 894 if ((elemFlags & ElemDesc.HEADELEM) != 0) 895 { 896 // This is the <HEAD> element, do some special processing 897 closeStartTag(); 898 elemContext.m_startTagOpen = false; 899 if (!m_omitMetaTag) 900 { 901 if (m_doIndent) 902 indent(); 903 writer.write( 904 "<META http-equiv=\"Content-Type\" content=\"text/html; charset="); 905 String encoding = getEncoding(); 906 String encode = Encodings.getMimeEncoding(encoding); 907 writer.write(encode); 908 writer.write("\">"); 909 } 910 } 911 } 912 catch (IOException e) 913 { 914 throw new SAXException(e); 915 } 916 } 917 918 /** 919 * Receive notification of the end of an element. 920 * 921 * 922 * @param namespaceURI 923 * @param localName 924 * @param name The element type name 925 * @throws org.xml.sax.SAXException Any SAX exception, possibly 926 * wrapping another exception. 927 */ endElement( final String namespaceURI, final String localName, final String name)928 public final void endElement( 929 final String namespaceURI, 930 final String localName, 931 final String name) 932 throws org.xml.sax.SAXException 933 { 934 // deal with any pending issues 935 if (m_cdataTagOpen) 936 closeCDATA(); 937 938 // if the element has a namespace, treat it like XML, not HTML 939 if (null != namespaceURI && namespaceURI.length() > 0) 940 { 941 super.endElement(namespaceURI, localName, name); 942 943 return; 944 } 945 946 try 947 { 948 949 ElemContext elemContext = m_elemContext; 950 final ElemDesc elemDesc = elemContext.m_elementDesc; 951 final int elemFlags = elemDesc.getFlags(); 952 final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0; 953 954 // deal with any indentation issues 955 if (m_doIndent) 956 { 957 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0; 958 boolean shouldIndent = false; 959 960 if (m_ispreserve) 961 { 962 m_ispreserve = false; 963 } 964 else if (m_doIndent && (!m_inBlockElem || isBlockElement)) 965 { 966 m_startNewLine = true; 967 shouldIndent = true; 968 } 969 if (!elemContext.m_startTagOpen && shouldIndent) 970 indent(elemContext.m_currentElemDepth - 1); 971 m_inBlockElem = !isBlockElement; 972 } 973 974 final java.io.Writer writer = m_writer; 975 if (!elemContext.m_startTagOpen) 976 { 977 writer.write("</"); 978 writer.write(name); 979 writer.write('>'); 980 } 981 else 982 { 983 // the start-tag open when this method was called, 984 // so we need to process it now. 985 986 if (m_tracer != null) 987 super.fireStartElem(name); 988 989 // the starting tag was still open when we received this endElement() call 990 // so we need to process any gathered attributes NOW, before they go away. 991 int nAttrs = m_attributes.getLength(); 992 if (nAttrs > 0) 993 { 994 processAttributes(m_writer, nAttrs); 995 // clear attributes object for re-use with next element 996 m_attributes.clear(); 997 } 998 if (!elemEmpty) 999 { 1000 // As per Dave/Paul recommendation 12/06/2000 1001 // if (shouldIndent) 1002 // writer.write('>'); 1003 // indent(m_currentIndent); 1004 1005 writer.write("></"); 1006 writer.write(name); 1007 writer.write('>'); 1008 } 1009 else 1010 { 1011 writer.write('>'); 1012 } 1013 } 1014 1015 // clean up because the element has ended 1016 if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0) 1017 m_ispreserve = true; 1018 m_isprevtext = false; 1019 1020 // fire off the end element event 1021 if (m_tracer != null) 1022 super.fireEndElem(name); 1023 1024 // OPTIMIZE-EMPTY 1025 if (elemEmpty) 1026 { 1027 // a quick exit if the HTML element had no children. 1028 // This block of code can be removed if the corresponding block of code 1029 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed 1030 m_elemContext = elemContext.m_prev; 1031 return; 1032 } 1033 1034 // some more clean because the element has ended. 1035 if (!elemContext.m_startTagOpen) 1036 { 1037 if (m_doIndent && !m_preserves.isEmpty()) 1038 m_preserves.pop(); 1039 } 1040 m_elemContext = elemContext.m_prev; 1041 // m_isRawStack.pop(); 1042 } 1043 catch (IOException e) 1044 { 1045 throw new SAXException(e); 1046 } 1047 } 1048 1049 /** 1050 * Process an attribute. 1051 * @param writer The writer to write the processed output to. 1052 * @param name The name of the attribute. 1053 * @param value The value of the attribute. 1054 * @param elemDesc The description of the HTML element 1055 * that has this attribute. 1056 * 1057 * @throws org.xml.sax.SAXException 1058 */ processAttribute( java.io.Writer writer, String name, String value, ElemDesc elemDesc)1059 protected void processAttribute( 1060 java.io.Writer writer, 1061 String name, 1062 String value, 1063 ElemDesc elemDesc) 1064 throws IOException 1065 { 1066 writer.write(' '); 1067 1068 if ( ((value.length() == 0) || value.equalsIgnoreCase(name)) 1069 && elemDesc != null 1070 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY)) 1071 { 1072 writer.write(name); 1073 } 1074 else 1075 { 1076 // %REVIEW% %OPT% 1077 // Two calls to single-char write may NOT 1078 // be more efficient than one to string-write... 1079 writer.write(name); 1080 writer.write("=\""); 1081 if ( elemDesc != null 1082 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL)) 1083 writeAttrURI(writer, value, m_specialEscapeURLs); 1084 else 1085 writeAttrString(writer, value, this.getEncoding()); 1086 writer.write('"'); 1087 1088 } 1089 } 1090 1091 /** 1092 * Tell if a character is an ASCII digit. 1093 */ isASCIIDigit(char c)1094 private boolean isASCIIDigit(char c) 1095 { 1096 return (c >= '0' && c <= '9'); 1097 } 1098 1099 /** 1100 * Make an integer into an HH hex value. 1101 * Does no checking on the size of the input, since this 1102 * is only meant to be used locally by writeAttrURI. 1103 * 1104 * @param i must be a value less than 255. 1105 * 1106 * @return should be a two character string. 1107 */ makeHHString(int i)1108 private static String makeHHString(int i) 1109 { 1110 String s = Integer.toHexString(i).toUpperCase(); 1111 if (s.length() == 1) 1112 { 1113 s = "0" + s; 1114 } 1115 return s; 1116 } 1117 1118 /** 1119 * Dmitri Ilyin: Makes sure if the String is HH encoded sign. 1120 * @param str must be 2 characters long 1121 * 1122 * @return true or false 1123 */ isHHSign(String str)1124 private boolean isHHSign(String str) 1125 { 1126 boolean sign = true; 1127 try 1128 { 1129 char r = (char) Integer.parseInt(str, 16); 1130 } 1131 catch (NumberFormatException e) 1132 { 1133 sign = false; 1134 } 1135 return sign; 1136 } 1137 1138 /** 1139 * Write the specified <var>string</var> after substituting non ASCII characters, 1140 * with <CODE>%HH</CODE>, where HH is the hex of the byte value. 1141 * 1142 * @param string String to convert to XML format. 1143 * @param doURLEscaping True if we should try to encode as 1144 * per http://www.ietf.org/rfc/rfc2396.txt. 1145 * 1146 * @throws org.xml.sax.SAXException if a bad surrogate pair is detected. 1147 */ writeAttrURI( final java.io.Writer writer, String string, boolean doURLEscaping)1148 public void writeAttrURI( 1149 final java.io.Writer writer, String string, boolean doURLEscaping) 1150 throws IOException 1151 { 1152 // http://www.ietf.org/rfc/rfc2396.txt says: 1153 // A URI is always in an "escaped" form, since escaping or unescaping a 1154 // completed URI might change its semantics. Normally, the only time 1155 // escape encodings can safely be made is when the URI is being created 1156 // from its component parts; each component may have its own set of 1157 // characters that are reserved, so only the mechanism responsible for 1158 // generating or interpreting that component can determine whether or 1159 // not escaping a character will change its semantics. Likewise, a URI 1160 // must be separated into its components before the escaped characters 1161 // within those components can be safely decoded. 1162 // 1163 // ...So we do our best to do limited escaping of the URL, without 1164 // causing damage. If the URL is already properly escaped, in theory, this 1165 // function should not change the string value. 1166 1167 final int end = string.length(); 1168 if (end > m_attrBuff.length) 1169 { 1170 m_attrBuff = new char[end*2 + 1]; 1171 } 1172 string.getChars(0,end, m_attrBuff, 0); 1173 final char[] chars = m_attrBuff; 1174 1175 int cleanStart = 0; 1176 int cleanLength = 0; 1177 1178 1179 char ch = 0; 1180 for (int i = 0; i < end; i++) 1181 { 1182 ch = chars[i]; 1183 1184 if ((ch < 32) || (ch > 126)) 1185 { 1186 if (cleanLength > 0) 1187 { 1188 writer.write(chars, cleanStart, cleanLength); 1189 cleanLength = 0; 1190 } 1191 if (doURLEscaping) 1192 { 1193 // Encode UTF16 to UTF8. 1194 // Reference is Unicode, A Primer, by Tony Graham. 1195 // Page 92. 1196 1197 // Note that Kay doesn't escape 0x20... 1198 // if(ch == 0x20) // Not sure about this... -sb 1199 // { 1200 // writer.write(ch); 1201 // } 1202 // else 1203 if (ch <= 0x7F) 1204 { 1205 writer.write('%'); 1206 writer.write(makeHHString(ch)); 1207 } 1208 else if (ch <= 0x7FF) 1209 { 1210 // Clear low 6 bits before rotate, put high 4 bits in low byte, 1211 // and set two high bits. 1212 int high = (ch >> 6) | 0xC0; 1213 int low = (ch & 0x3F) | 0x80; 1214 // First 6 bits, + high bit 1215 writer.write('%'); 1216 writer.write(makeHHString(high)); 1217 writer.write('%'); 1218 writer.write(makeHHString(low)); 1219 } 1220 else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate 1221 { 1222 // I'm sure this can be done in 3 instructions, but I choose 1223 // to try and do it exactly like it is done in the book, at least 1224 // until we are sure this is totally clean. I don't think performance 1225 // is a big issue with this particular function, though I could be 1226 // wrong. Also, the stuff below clearly does more masking than 1227 // it needs to do. 1228 1229 // Clear high 6 bits. 1230 int highSurrogate = ((int) ch) & 0x03FF; 1231 1232 // Middle 4 bits (wwww) + 1 1233 // "Note that the value of wwww from the high surrogate bit pattern 1234 // is incremented to make the uuuuu bit pattern in the scalar value 1235 // so the surrogate pair don't address the BMP." 1236 int wwww = ((highSurrogate & 0x03C0) >> 6); 1237 int uuuuu = wwww + 1; 1238 1239 // next 4 bits 1240 int zzzz = (highSurrogate & 0x003C) >> 2; 1241 1242 // low 2 bits 1243 int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; 1244 1245 // Get low surrogate character. 1246 ch = chars[++i]; 1247 1248 // Clear high 6 bits. 1249 int lowSurrogate = ((int) ch) & 0x03FF; 1250 1251 // put the middle 4 bits into the bottom of yyyyyy (byte 3) 1252 yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); 1253 1254 // bottom 6 bits. 1255 int xxxxxx = (lowSurrogate & 0x003F); 1256 1257 int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu 1258 int byte2 = 1259 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; 1260 int byte3 = 0x80 | yyyyyy; 1261 int byte4 = 0x80 | xxxxxx; 1262 1263 writer.write('%'); 1264 writer.write(makeHHString(byte1)); 1265 writer.write('%'); 1266 writer.write(makeHHString(byte2)); 1267 writer.write('%'); 1268 writer.write(makeHHString(byte3)); 1269 writer.write('%'); 1270 writer.write(makeHHString(byte4)); 1271 } 1272 else 1273 { 1274 int high = (ch >> 12) | 0xE0; // top 4 bits 1275 int middle = ((ch & 0x0FC0) >> 6) | 0x80; 1276 // middle 6 bits 1277 int low = (ch & 0x3F) | 0x80; 1278 // First 6 bits, + high bit 1279 writer.write('%'); 1280 writer.write(makeHHString(high)); 1281 writer.write('%'); 1282 writer.write(makeHHString(middle)); 1283 writer.write('%'); 1284 writer.write(makeHHString(low)); 1285 } 1286 1287 } 1288 else if (escapingNotNeeded(ch)) 1289 { 1290 writer.write(ch); 1291 } 1292 else 1293 { 1294 writer.write("&#"); 1295 writer.write(Integer.toString(ch)); 1296 writer.write(';'); 1297 } 1298 // In this character range we have first written out any previously accumulated 1299 // "clean" characters, then processed the current more complicated character, 1300 // which may have incremented "i". 1301 // We now we reset the next possible clean character. 1302 cleanStart = i + 1; 1303 } 1304 // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as 1305 // not allowing quotes in the URI proper syntax, nor in the fragment 1306 // identifier, we believe that it's OK to double escape quotes. 1307 else if (ch == '"') 1308 { 1309 // If the character is a '%' number number, try to avoid double-escaping. 1310 // There is a question if this is legal behavior. 1311 1312 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded 1313 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little. 1314 1315 // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) ) 1316 1317 // We are no longer escaping '%' 1318 1319 if (cleanLength > 0) 1320 { 1321 writer.write(chars, cleanStart, cleanLength); 1322 cleanLength = 0; 1323 } 1324 1325 1326 // Mike Kay encodes this as ", so he may know something I don't? 1327 if (doURLEscaping) 1328 writer.write("%22"); 1329 else 1330 writer.write("""); // we have to escape this, I guess. 1331 1332 // We have written out any clean characters, then the escaped '%' and now we 1333 // We now we reset the next possible clean character. 1334 cleanStart = i + 1; 1335 } 1336 else if (ch == '&') 1337 { 1338 // HTML 4.01 reads, "Authors should use "&" (ASCII decimal 38) 1339 // instead of "&" to avoid confusion with the beginning of a character 1340 // reference (entity reference open delimiter). 1341 if (cleanLength > 0) 1342 { 1343 writer.write(chars, cleanStart, cleanLength); 1344 cleanLength = 0; 1345 } 1346 writer.write("&"); 1347 cleanStart = i + 1; 1348 } 1349 else 1350 { 1351 // no processing for this character, just count how 1352 // many characters in a row that we have that need no processing 1353 cleanLength++; 1354 } 1355 } 1356 1357 // are there any clean characters at the end of the array 1358 // that we haven't processed yet? 1359 if (cleanLength > 1) 1360 { 1361 // if the whole string can be written out as-is do so 1362 // otherwise write out the clean chars at the end of the 1363 // array 1364 if (cleanStart == 0) 1365 writer.write(string); 1366 else 1367 writer.write(chars, cleanStart, cleanLength); 1368 } 1369 else if (cleanLength == 1) 1370 { 1371 // a little optimization for 1 clean character 1372 // (we could have let the previous if(...) handle them all) 1373 writer.write(ch); 1374 } 1375 } 1376 1377 /** 1378 * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>, 1379 * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>. 1380 * 1381 * @param string String to convert to XML format. 1382 * @param encoding CURRENTLY NOT IMPLEMENTED. 1383 * 1384 * @throws org.xml.sax.SAXException 1385 */ writeAttrString( final java.io.Writer writer, String string, String encoding)1386 public void writeAttrString( 1387 final java.io.Writer writer, String string, String encoding) 1388 throws IOException 1389 { 1390 final int end = string.length(); 1391 if (end > m_attrBuff.length) 1392 { 1393 m_attrBuff = new char[end * 2 + 1]; 1394 } 1395 string.getChars(0, end, m_attrBuff, 0); 1396 final char[] chars = m_attrBuff; 1397 1398 1399 1400 int cleanStart = 0; 1401 int cleanLength = 0; 1402 1403 char ch = 0; 1404 for (int i = 0; i < end; i++) 1405 { 1406 ch = chars[i]; 1407 1408 // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE); 1409 // System.out.println("ch: "+(int)ch); 1410 // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); 1411 // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); 1412 if (escapingNotNeeded(ch) && (!m_charInfo.shouldMapAttrChar(ch))) 1413 { 1414 cleanLength++; 1415 } 1416 else if ('<' == ch || '>' == ch) 1417 { 1418 cleanLength++; // no escaping in this case, as specified in 15.2 1419 } 1420 else if ( 1421 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1])) 1422 { 1423 cleanLength++; // no escaping in this case, as specified in 15.2 1424 } 1425 else 1426 { 1427 if (cleanLength > 0) 1428 { 1429 writer.write(chars,cleanStart,cleanLength); 1430 cleanLength = 0; 1431 } 1432 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true); 1433 1434 if (i != pos) 1435 { 1436 i = pos - 1; 1437 } 1438 else 1439 { 1440 if (Encodings.isHighUTF16Surrogate(ch)) 1441 { 1442 1443 writeUTF16Surrogate(ch, chars, i, end); 1444 i++; // two input characters processed 1445 // this increments by one and the for() 1446 // loop itself increments by another one. 1447 } 1448 1449 // The next is kind of a hack to keep from escaping in the case 1450 // of Shift_JIS and the like. 1451 1452 /* 1453 else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF) 1454 && (ch != 160)) 1455 { 1456 writer.write(ch); // no escaping in this case 1457 } 1458 else 1459 */ 1460 String outputStringForChar = m_charInfo.getOutputStringForChar(ch); 1461 if (null != outputStringForChar) 1462 { 1463 writer.write(outputStringForChar); 1464 } 1465 else if (escapingNotNeeded(ch)) 1466 { 1467 writer.write(ch); // no escaping in this case 1468 } 1469 else 1470 { 1471 writer.write("&#"); 1472 writer.write(Integer.toString(ch)); 1473 writer.write(';'); 1474 } 1475 } 1476 cleanStart = i + 1; 1477 } 1478 } // end of for() 1479 1480 // are there any clean characters at the end of the array 1481 // that we haven't processed yet? 1482 if (cleanLength > 1) 1483 { 1484 // if the whole string can be written out as-is do so 1485 // otherwise write out the clean chars at the end of the 1486 // array 1487 if (cleanStart == 0) 1488 writer.write(string); 1489 else 1490 writer.write(chars, cleanStart, cleanLength); 1491 } 1492 else if (cleanLength == 1) 1493 { 1494 // a little optimization for 1 clean character 1495 // (we could have let the previous if(...) handle them all) 1496 writer.write(ch); 1497 } 1498 } 1499 1500 1501 1502 /** 1503 * Receive notification of character data. 1504 * 1505 * <p>The Parser will call this method to report each chunk of 1506 * character data. SAX parsers may return all contiguous character 1507 * data in a single chunk, or they may split it into several 1508 * chunks; however, all of the characters in any single event 1509 * must come from the same external entity, so that the Locator 1510 * provides useful information.</p> 1511 * 1512 * <p>The application must not attempt to read from the array 1513 * outside of the specified range.</p> 1514 * 1515 * <p>Note that some parsers will report whitespace using the 1516 * ignorableWhitespace() method rather than this one (validating 1517 * parsers must do so).</p> 1518 * 1519 * @param chars The characters from the XML document. 1520 * @param start The start position in the array. 1521 * @param length The number of characters to read from the array. 1522 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1523 * wrapping another exception. 1524 * @see #ignorableWhitespace 1525 * @see org.xml.sax.Locator 1526 * 1527 * @throws org.xml.sax.SAXException 1528 */ characters(char chars[], int start, int length)1529 public final void characters(char chars[], int start, int length) 1530 throws org.xml.sax.SAXException 1531 { 1532 1533 if (m_elemContext.m_isRaw) 1534 { 1535 try 1536 { 1537 // Clean up some pending issues. 1538 if (m_elemContext.m_startTagOpen) 1539 { 1540 closeStartTag(); 1541 m_elemContext.m_startTagOpen = false; 1542 } 1543 1544 m_ispreserve = true; 1545 1546 writeNormalizedChars(chars, start, length, false, m_lineSepUse); 1547 1548 // time to generate characters event 1549 if (m_tracer != null) 1550 super.fireCharEvent(chars, start, length); 1551 1552 return; 1553 } 1554 catch (IOException ioe) 1555 { 1556 throw new org.xml.sax.SAXException( 1557 Utils.messages.createMessage(MsgKey.ER_OIERROR,null),ioe); 1558 } 1559 } 1560 else 1561 { 1562 super.characters(chars, start, length); 1563 } 1564 } 1565 1566 /** 1567 * Receive notification of cdata. 1568 * 1569 * <p>The Parser will call this method to report each chunk of 1570 * character data. SAX parsers may return all contiguous character 1571 * data in a single chunk, or they may split it into several 1572 * chunks; however, all of the characters in any single event 1573 * must come from the same external entity, so that the Locator 1574 * provides useful information.</p> 1575 * 1576 * <p>The application must not attempt to read from the array 1577 * outside of the specified range.</p> 1578 * 1579 * <p>Note that some parsers will report whitespace using the 1580 * ignorableWhitespace() method rather than this one (validating 1581 * parsers must do so).</p> 1582 * 1583 * @param ch The characters from the XML document. 1584 * @param start The start position in the array. 1585 * @param length The number of characters to read from the array. 1586 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1587 * wrapping another exception. 1588 * @see #ignorableWhitespace 1589 * @see org.xml.sax.Locator 1590 * 1591 * @throws org.xml.sax.SAXException 1592 */ cdata(char ch[], int start, int length)1593 public final void cdata(char ch[], int start, int length) 1594 throws org.xml.sax.SAXException 1595 { 1596 1597 if ((null != m_elemContext.m_elementName) 1598 && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT") 1599 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE"))) 1600 { 1601 try 1602 { 1603 if (m_elemContext.m_startTagOpen) 1604 { 1605 closeStartTag(); 1606 m_elemContext.m_startTagOpen = false; 1607 } 1608 1609 m_ispreserve = true; 1610 1611 if (shouldIndent()) 1612 indent(); 1613 1614 // writer.write(ch, start, length); 1615 writeNormalizedChars(ch, start, length, true, m_lineSepUse); 1616 } 1617 catch (IOException ioe) 1618 { 1619 throw new org.xml.sax.SAXException( 1620 Utils.messages.createMessage( 1621 MsgKey.ER_OIERROR, 1622 null), 1623 ioe); 1624 //"IO error", ioe); 1625 } 1626 } 1627 else 1628 { 1629 super.cdata(ch, start, length); 1630 } 1631 } 1632 1633 /** 1634 * Receive notification of a processing instruction. 1635 * 1636 * @param target The processing instruction target. 1637 * @param data The processing instruction data, or null if 1638 * none was supplied. 1639 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1640 * wrapping another exception. 1641 * 1642 * @throws org.xml.sax.SAXException 1643 */ processingInstruction(String target, String data)1644 public void processingInstruction(String target, String data) 1645 throws org.xml.sax.SAXException 1646 { 1647 1648 // Process any pending starDocument and startElement first. 1649 flushPending(); 1650 1651 // Use a fairly nasty hack to tell if the next node is supposed to be 1652 // unescaped text. 1653 if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) 1654 { 1655 startNonEscaping(); 1656 } 1657 else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) 1658 { 1659 endNonEscaping(); 1660 } 1661 else 1662 { 1663 try 1664 { 1665 // clean up any pending things first 1666 if (m_elemContext.m_startTagOpen) 1667 { 1668 closeStartTag(); 1669 m_elemContext.m_startTagOpen = false; 1670 } 1671 else if (m_cdataTagOpen) 1672 { 1673 closeCDATA(); 1674 } 1675 else if (m_needToCallStartDocument) 1676 { 1677 startDocumentInternal(); 1678 } 1679 1680 1681 /* 1682 * Perhaps processing instructions can be written out in HTML before 1683 * the DOCTYPE, in which case this could be emitted with the 1684 * startElement call, that knows the name of the document element 1685 * doing it right. 1686 */ 1687 if (true == m_needToOutputDocTypeDecl) 1688 outputDocTypeDecl("html"); // best guess for the upcoming element 1689 1690 1691 if (shouldIndent()) 1692 indent(); 1693 1694 final java.io.Writer writer = m_writer; 1695 //writer.write("<?" + target); 1696 writer.write("<?"); 1697 writer.write(target); 1698 1699 if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0))) 1700 writer.write(' '); 1701 1702 //writer.write(data + ">"); // different from XML 1703 writer.write(data); // different from XML 1704 writer.write('>'); // different from XML 1705 1706 // Always output a newline char if not inside of an 1707 // element. The whitespace is not significant in that 1708 // case. 1709 if (m_elemContext.m_currentElemDepth <= 0) 1710 outputLineSep(); 1711 1712 m_startNewLine = true; 1713 } 1714 catch(IOException e) 1715 { 1716 throw new SAXException(e); 1717 } 1718 } 1719 1720 // now generate the PI event 1721 if (m_tracer != null) 1722 super.fireEscapingEvent(target, data); 1723 } 1724 1725 /** 1726 * Receive notivication of a entityReference. 1727 * 1728 * @param name non-null reference to entity name string. 1729 * 1730 * @throws org.xml.sax.SAXException 1731 */ entityReference(String name)1732 public final void entityReference(String name) 1733 throws org.xml.sax.SAXException 1734 { 1735 try 1736 { 1737 1738 final java.io.Writer writer = m_writer; 1739 writer.write('&'); 1740 writer.write(name); 1741 writer.write(';'); 1742 1743 } catch(IOException e) 1744 { 1745 throw new SAXException(e); 1746 } 1747 } 1748 /** 1749 * @see ExtendedContentHandler#endElement(String) 1750 */ endElement(String elemName)1751 public final void endElement(String elemName) throws SAXException 1752 { 1753 endElement(null, null, elemName); 1754 } 1755 1756 /** 1757 * Process the attributes, which means to write out the currently 1758 * collected attributes to the writer. The attributes are not 1759 * cleared by this method 1760 * 1761 * @param writer the writer to write processed attributes to. 1762 * @param nAttrs the number of attributes in m_attributes 1763 * to be processed 1764 * 1765 * @throws org.xml.sax.SAXException 1766 */ processAttributes(java.io.Writer writer, int nAttrs)1767 public void processAttributes(java.io.Writer writer, int nAttrs) 1768 throws IOException,SAXException 1769 { 1770 /* 1771 * process the collected attributes 1772 */ 1773 for (int i = 0; i < nAttrs; i++) 1774 { 1775 processAttribute( 1776 writer, 1777 m_attributes.getQName(i), 1778 m_attributes.getValue(i), 1779 m_elemContext.m_elementDesc); 1780 } 1781 } 1782 1783 /** 1784 * For the enclosing elements starting tag write out out any attributes 1785 * followed by ">". At this point we also mark if this element is 1786 * a cdata-section-element. 1787 * 1788 *@throws org.xml.sax.SAXException 1789 */ closeStartTag()1790 protected void closeStartTag() throws SAXException 1791 { 1792 try 1793 { 1794 1795 // finish processing attributes, time to fire off the start element event 1796 if (m_tracer != null) 1797 super.fireStartElem(m_elemContext.m_elementName); 1798 1799 int nAttrs = m_attributes.getLength(); 1800 if (nAttrs>0) 1801 { 1802 processAttributes(m_writer, nAttrs); 1803 // clear attributes object for re-use with next element 1804 m_attributes.clear(); 1805 } 1806 1807 m_writer.write('>'); 1808 1809 /* At this point we have the prefix mappings now, so 1810 * lets determine if the current element is specified in the cdata- 1811 * section-elements list. 1812 */ 1813 if (m_CdataElems != null) // if there are any cdata sections 1814 m_elemContext.m_isCdataSection = isCdataSection(); 1815 if (m_doIndent) 1816 { 1817 m_isprevtext = false; 1818 m_preserves.push(m_ispreserve); 1819 } 1820 1821 } 1822 catch(IOException e) 1823 { 1824 throw new SAXException(e); 1825 } 1826 } 1827 1828 1829 1830 /** 1831 * This method is used when a prefix/uri namespace mapping 1832 * is indicated after the element was started with a 1833 * startElement() and before and endElement(). 1834 * startPrefixMapping(prefix,uri) would be used before the 1835 * startElement() call. 1836 * @param uri the URI of the namespace 1837 * @param prefix the prefix associated with the given URI. 1838 * 1839 * @see ExtendedContentHandler#namespaceAfterStartElement(String, String) 1840 */ namespaceAfterStartElement(String prefix, String uri)1841 public void namespaceAfterStartElement(String prefix, String uri) 1842 throws SAXException 1843 { 1844 // hack for XSLTC with finding URI for default namespace 1845 if (m_elemContext.m_elementURI == null) 1846 { 1847 String prefix1 = getPrefixPart(m_elemContext.m_elementName); 1848 if (prefix1 == null && EMPTYSTRING.equals(prefix)) 1849 { 1850 // the elements URI is not known yet, and it 1851 // doesn't have a prefix, and we are currently 1852 // setting the uri for prefix "", so we have 1853 // the uri for the element... lets remember it 1854 m_elemContext.m_elementURI = uri; 1855 } 1856 } 1857 startPrefixMapping(prefix,uri,false); 1858 } 1859 startDTD(String name, String publicId, String systemId)1860 public void startDTD(String name, String publicId, String systemId) 1861 throws SAXException 1862 { 1863 m_inDTD = true; 1864 super.startDTD(name, publicId, systemId); 1865 } 1866 1867 /** 1868 * Report the end of DTD declarations. 1869 * @throws org.xml.sax.SAXException The application may raise an exception. 1870 * @see #startDTD 1871 */ endDTD()1872 public void endDTD() throws org.xml.sax.SAXException 1873 { 1874 m_inDTD = false; 1875 /* for ToHTMLStream the DOCTYPE is entirely output in the 1876 * startDocumentInternal() method, so don't do anything here 1877 */ 1878 } 1879 /** 1880 * This method does nothing. 1881 */ attributeDecl( String eName, String aName, String type, String valueDefault, String value)1882 public void attributeDecl( 1883 String eName, 1884 String aName, 1885 String type, 1886 String valueDefault, 1887 String value) 1888 throws SAXException 1889 { 1890 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1891 } 1892 1893 /** 1894 * This method does nothing. 1895 */ elementDecl(String name, String model)1896 public void elementDecl(String name, String model) throws SAXException 1897 { 1898 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1899 } 1900 /** 1901 * This method does nothing. 1902 */ internalEntityDecl(String name, String value)1903 public void internalEntityDecl(String name, String value) 1904 throws SAXException 1905 { 1906 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1907 } 1908 /** 1909 * This method does nothing. 1910 */ externalEntityDecl( String name, String publicId, String systemId)1911 public void externalEntityDecl( 1912 String name, 1913 String publicId, 1914 String systemId) 1915 throws SAXException 1916 { 1917 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1918 } 1919 1920 /** 1921 * This method is used to add an attribute to the currently open element. 1922 * The caller has guaranted that this attribute is unique, which means that it 1923 * not been seen before and will not be seen again. 1924 * 1925 * @param name the qualified name of the attribute 1926 * @param value the value of the attribute which can contain only 1927 * ASCII printable characters characters in the range 32 to 127 inclusive. 1928 * @param flags the bit values of this integer give optimization information. 1929 */ addUniqueAttribute(String name, String value, int flags)1930 public void addUniqueAttribute(String name, String value, int flags) 1931 throws SAXException 1932 { 1933 try 1934 { 1935 final java.io.Writer writer = m_writer; 1936 if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt) 1937 { 1938 // "flags" has indicated that the characters 1939 // '>' '<' '&' and '"' are not in the value and 1940 // m_htmlcharInfo has recorded that there are no other 1941 // entities in the range 0 to 127 so we write out the 1942 // value directly 1943 writer.write(' '); 1944 writer.write(name); 1945 writer.write("=\""); 1946 writer.write(value); 1947 writer.write('"'); 1948 } 1949 else if ( 1950 (flags & HTML_ATTREMPTY) > 0 1951 && (value.length() == 0 || value.equalsIgnoreCase(name))) 1952 { 1953 writer.write(' '); 1954 writer.write(name); 1955 } 1956 else 1957 { 1958 writer.write(' '); 1959 writer.write(name); 1960 writer.write("=\""); 1961 if ((flags & HTML_ATTRURL) > 0) 1962 { 1963 writeAttrURI(writer, value, m_specialEscapeURLs); 1964 } 1965 else 1966 { 1967 writeAttrString(writer, value, this.getEncoding()); 1968 } 1969 writer.write('"'); 1970 } 1971 } catch (IOException e) { 1972 throw new SAXException(e); 1973 } 1974 } 1975 comment(char ch[], int start, int length)1976 public void comment(char ch[], int start, int length) 1977 throws SAXException 1978 { 1979 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1980 if (m_inDTD) 1981 return; 1982 1983 // Clean up some pending issues, just in case 1984 // this call is coming right after a startElement() 1985 // or we are in the middle of writing out CDATA 1986 // or if a startDocument() call was not received 1987 if (m_elemContext.m_startTagOpen) 1988 { 1989 closeStartTag(); 1990 m_elemContext.m_startTagOpen = false; 1991 } 1992 else if (m_cdataTagOpen) 1993 { 1994 closeCDATA(); 1995 } 1996 else if (m_needToCallStartDocument) 1997 { 1998 startDocumentInternal(); 1999 } 2000 2001 /* 2002 * Perhaps comments can be written out in HTML before the DOCTYPE. 2003 * In this case we might delete this call to writeOutDOCTYPE, and 2004 * it would be handled within the startElement() call. 2005 */ 2006 if (m_needToOutputDocTypeDecl) 2007 outputDocTypeDecl("html"); // best guess for the upcoming element 2008 2009 super.comment(ch, start, length); 2010 } 2011 reset()2012 public boolean reset() 2013 { 2014 boolean ret = super.reset(); 2015 if (!ret) 2016 return false; 2017 resetToHTMLStream(); 2018 return true; 2019 } 2020 resetToHTMLStream()2021 private void resetToHTMLStream() 2022 { 2023 // m_htmlcharInfo remains unchanged 2024 // m_htmlInfo = null; // Don't reset 2025 m_inBlockElem = false; 2026 m_inDTD = false; 2027 m_omitMetaTag = false; 2028 m_specialEscapeURLs = true; 2029 } 2030 2031 static class Trie 2032 { 2033 /** 2034 * A digital search trie for 7-bit ASCII text 2035 * The API is a subset of java.util.Hashtable 2036 * The key must be a 7-bit ASCII string 2037 * The value may be any Java Object 2038 * One can get an object stored in a trie from its key, 2039 * but the search is either case sensitive or case 2040 * insensitive to the characters in the key, and this 2041 * choice of sensitivity or insensitivity is made when 2042 * the Trie is created, before any objects are put in it. 2043 * 2044 * This class is a copy of the one in org.apache.xml.utils. 2045 * It exists to cut the serializers dependancy on that package. 2046 * 2047 * @xsl.usage internal 2048 */ 2049 2050 /** Size of the m_nextChar array. */ 2051 public static final int ALPHA_SIZE = 128; 2052 2053 /** The root node of the tree. */ 2054 final Node m_Root; 2055 2056 /** helper buffer to convert Strings to char arrays */ 2057 private char[] m_charBuffer = new char[0]; 2058 2059 /** true if the search for an object is lower case only with the key */ 2060 private final boolean m_lowerCaseOnly; 2061 2062 /** 2063 * Construct the trie that has a case insensitive search. 2064 */ Trie()2065 public Trie() 2066 { 2067 m_Root = new Node(); 2068 m_lowerCaseOnly = false; 2069 } 2070 2071 /** 2072 * Construct the trie given the desired case sensitivity with the key. 2073 * @param lowerCaseOnly true if the search keys are to be loser case only, 2074 * not case insensitive. 2075 */ Trie(boolean lowerCaseOnly)2076 public Trie(boolean lowerCaseOnly) 2077 { 2078 m_Root = new Node(); 2079 m_lowerCaseOnly = lowerCaseOnly; 2080 } 2081 2082 /** 2083 * Put an object into the trie for lookup. 2084 * 2085 * @param key must be a 7-bit ASCII string 2086 * @param value any java object. 2087 * 2088 * @return The old object that matched key, or null. 2089 */ put(String key, Object value)2090 public Object put(String key, Object value) 2091 { 2092 2093 final int len = key.length(); 2094 if (len > m_charBuffer.length) 2095 { 2096 // make the biggest buffer ever needed in get(String) 2097 m_charBuffer = new char[len]; 2098 } 2099 2100 Node node = m_Root; 2101 2102 for (int i = 0; i < len; i++) 2103 { 2104 Node nextNode = 2105 node.m_nextChar[Character.toLowerCase(key.charAt(i))]; 2106 2107 if (nextNode != null) 2108 { 2109 node = nextNode; 2110 } 2111 else 2112 { 2113 for (; i < len; i++) 2114 { 2115 Node newNode = new Node(); 2116 if (m_lowerCaseOnly) 2117 { 2118 // put this value into the tree only with a lower case key 2119 node.m_nextChar[Character.toLowerCase( 2120 key.charAt(i))] = 2121 newNode; 2122 } 2123 else 2124 { 2125 // put this value into the tree with a case insensitive key 2126 node.m_nextChar[Character.toUpperCase( 2127 key.charAt(i))] = 2128 newNode; 2129 node.m_nextChar[Character.toLowerCase( 2130 key.charAt(i))] = 2131 newNode; 2132 } 2133 node = newNode; 2134 } 2135 break; 2136 } 2137 } 2138 2139 Object ret = node.m_Value; 2140 2141 node.m_Value = value; 2142 2143 return ret; 2144 } 2145 2146 /** 2147 * Get an object that matches the key. 2148 * 2149 * @param key must be a 7-bit ASCII string 2150 * 2151 * @return The object that matches the key, or null. 2152 */ get(final String key)2153 public Object get(final String key) 2154 { 2155 2156 final int len = key.length(); 2157 2158 /* If the name is too long, we won't find it, this also keeps us 2159 * from overflowing m_charBuffer 2160 */ 2161 if (m_charBuffer.length < len) 2162 return null; 2163 2164 Node node = m_Root; 2165 switch (len) // optimize the look up based on the number of chars 2166 { 2167 // case 0 looks silly, but the generated bytecode runs 2168 // faster for lookup of elements of length 2 with this in 2169 // and a fair bit faster. Don't know why. 2170 case 0 : 2171 { 2172 return null; 2173 } 2174 2175 case 1 : 2176 { 2177 final char ch = key.charAt(0); 2178 if (ch < ALPHA_SIZE) 2179 { 2180 node = node.m_nextChar[ch]; 2181 if (node != null) 2182 return node.m_Value; 2183 } 2184 return null; 2185 } 2186 // comment out case 2 because the default is faster 2187 // case 2 : 2188 // { 2189 // final char ch0 = key.charAt(0); 2190 // final char ch1 = key.charAt(1); 2191 // if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE) 2192 // { 2193 // node = node.m_nextChar[ch0]; 2194 // if (node != null) 2195 // { 2196 // 2197 // if (ch1 < ALPHA_SIZE) 2198 // { 2199 // node = node.m_nextChar[ch1]; 2200 // if (node != null) 2201 // return node.m_Value; 2202 // } 2203 // } 2204 // } 2205 // return null; 2206 // } 2207 default : 2208 { 2209 for (int i = 0; i < len; i++) 2210 { 2211 // A thread-safe way to loop over the characters 2212 final char ch = key.charAt(i); 2213 if (ALPHA_SIZE <= ch) 2214 { 2215 // the key is not 7-bit ASCII so we won't find it here 2216 return null; 2217 } 2218 2219 node = node.m_nextChar[ch]; 2220 if (node == null) 2221 return null; 2222 } 2223 2224 return node.m_Value; 2225 } 2226 } 2227 } 2228 2229 /** 2230 * The node representation for the trie. 2231 * @xsl.usage internal 2232 */ 2233 private class Node 2234 { 2235 2236 /** 2237 * Constructor, creates a Node[ALPHA_SIZE]. 2238 */ Node()2239 Node() 2240 { 2241 m_nextChar = new Node[ALPHA_SIZE]; 2242 m_Value = null; 2243 } 2244 2245 /** The next nodes. */ 2246 final Node m_nextChar[]; 2247 2248 /** The value. */ 2249 Object m_Value; 2250 } 2251 /** 2252 * Construct the trie from another Trie. 2253 * Both the existing Trie and this new one share the same table for 2254 * lookup, and it is assumed that the table is fully populated and 2255 * not changing anymore. 2256 * 2257 * @param existingTrie the Trie that this one is a copy of. 2258 */ Trie(Trie existingTrie)2259 public Trie(Trie existingTrie) 2260 { 2261 // copy some fields from the existing Trie into this one. 2262 m_Root = existingTrie.m_Root; 2263 m_lowerCaseOnly = existingTrie.m_lowerCaseOnly; 2264 2265 // get a buffer just big enough to hold the longest key in the table. 2266 int max = existingTrie.getLongestKeyLength(); 2267 m_charBuffer = new char[max]; 2268 } 2269 2270 /** 2271 * Get an object that matches the key. 2272 * This method is faster than get(), but is not thread-safe. 2273 * 2274 * @param key must be a 7-bit ASCII string 2275 * 2276 * @return The object that matches the key, or null. 2277 */ get2(final String key)2278 public Object get2(final String key) 2279 { 2280 2281 final int len = key.length(); 2282 2283 /* If the name is too long, we won't find it, this also keeps us 2284 * from overflowing m_charBuffer 2285 */ 2286 if (m_charBuffer.length < len) 2287 return null; 2288 2289 Node node = m_Root; 2290 switch (len) // optimize the look up based on the number of chars 2291 { 2292 // case 0 looks silly, but the generated bytecode runs 2293 // faster for lookup of elements of length 2 with this in 2294 // and a fair bit faster. Don't know why. 2295 case 0 : 2296 { 2297 return null; 2298 } 2299 2300 case 1 : 2301 { 2302 final char ch = key.charAt(0); 2303 if (ch < ALPHA_SIZE) 2304 { 2305 node = node.m_nextChar[ch]; 2306 if (node != null) 2307 return node.m_Value; 2308 } 2309 return null; 2310 } 2311 default : 2312 { 2313 /* Copy string into array. This is not thread-safe because 2314 * it modifies the contents of m_charBuffer. If multiple 2315 * threads were to use this Trie they all would be 2316 * using this same array (not good). So this 2317 * method is not thread-safe, but it is faster because 2318 * converting to a char[] and looping over elements of 2319 * the array is faster than a String's charAt(i). 2320 */ 2321 key.getChars(0, len, m_charBuffer, 0); 2322 2323 for (int i = 0; i < len; i++) 2324 { 2325 final char ch = m_charBuffer[i]; 2326 if (ALPHA_SIZE <= ch) 2327 { 2328 // the key is not 7-bit ASCII so we won't find it here 2329 return null; 2330 } 2331 2332 node = node.m_nextChar[ch]; 2333 if (node == null) 2334 return null; 2335 } 2336 2337 return node.m_Value; 2338 } 2339 } 2340 } 2341 2342 /** 2343 * Get the length of the longest key used in the table. 2344 */ getLongestKeyLength()2345 public int getLongestKeyLength() 2346 { 2347 return m_charBuffer.length; 2348 } 2349 } 2350 } 2351