• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the  "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 /*
19  * $Id: FastStringBuffer.java 469279 2006-10-30 21:18:02Z minchau $
20  */
21 package org.apache.xml.utils;
22 
23 /**
24  * Bare-bones, unsafe, fast string buffer. No thread-safety, no
25  * parameter range checking, exposed fields. Note that in typical
26  * applications, thread-safety of a StringBuffer is a somewhat
27  * dubious concept in any case.
28  * <p>
29  * Note that Stree and DTM used a single FastStringBuffer as a string pool,
30  * by recording start and length indices within this single buffer. This
31  * minimizes heap overhead, but of course requires more work when retrieving
32  * the data.
33  * <p>
34  * FastStringBuffer operates as a "chunked buffer". Doing so
35  * reduces the need to recopy existing information when an append
36  * exceeds the space available; we just allocate another chunk and
37  * flow across to it. (The array of chunks may need to grow,
38  * admittedly, but that's a much smaller object.) Some excess
39  * recopying may arise when we extract Strings which cross chunk
40  * boundaries; larger chunks make that less frequent.
41  * <p>
42  * The size values are parameterized, to allow tuning this code. In
43  * theory, Result Tree Fragments might want to be tuned differently
44  * from the main document's text.
45  * <p>
46  * %REVIEW% An experiment in self-tuning is
47  * included in the code (using nested FastStringBuffers to achieve
48  * variation in chunk sizes), but this implementation has proven to
49  * be problematic when data may be being copied from the FSB into itself.
50  * We should either re-architect that to make this safe (if possible)
51  * or remove that code and clean up for performance/maintainability reasons.
52  * <p>
53  */
54 public class FastStringBuffer
55 {
56   // If nonzero, forces the inial chunk size.
57   /**/static final int DEBUG_FORCE_INIT_BITS=0;
58 
59   	// %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
60   	// back into the same FSB (variable set from previous variable, for example)
61   	// and blocksize changes in mid-copy... there's risk of severe malfunction in
62   	// the read process, due to how the resizing code re-jiggers storage. Arggh.
63   	// If we want to retain the variable-size-block feature, we need to reconsider
64   	// that issue. For now, I have forced us into fixed-size mode.
65     static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true;
66 
67 	/** Manifest constant: Suppress leading whitespace.
68 	 * This should be used when normalize-to-SAX is called for the first chunk of a
69 	 * multi-chunk output, or one following unsuppressed whitespace in a previous
70 	 * chunk.
71 	 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
72 	 */
73 	public static final int SUPPRESS_LEADING_WS=0x01;
74 
75 	/** Manifest constant: Suppress trailing whitespace.
76 	 * This should be used when normalize-to-SAX is called for the last chunk of a
77 	 * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
78 	 */
79 	public static final int SUPPRESS_TRAILING_WS=0x02;
80 
81 	/** Manifest constant: Suppress both leading and trailing whitespace.
82 	 * This should be used when normalize-to-SAX is called for a complete string.
83 	 * (I'm not wild about the name of this one. Ideas welcome.)
84 	 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
85 	 */
86 	public static final int SUPPRESS_BOTH
87 		= SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
88 
89 	/** Manifest constant: Carry trailing whitespace of one chunk as leading
90 	 * whitespace of the next chunk. Used internally; I don't see any reason
91 	 * to make it public right now.
92 	 */
93 	private static final int CARRY_WS=0x04;
94 
95 	/**
96    * Field m_chunkBits sets our chunking strategy, by saying how many
97    * bits of index can be used within a single chunk before flowing over
98    * to the next chunk. For example, if m_chunkbits is set to 15, each
99    * chunk can contain up to 2^15 (32K) characters
100    */
101   int m_chunkBits = 15;
102 
103   /**
104    * Field m_maxChunkBits affects our chunk-growth strategy, by saying what
105    * the largest permissible chunk size is in this particular FastStringBuffer
106    * hierarchy.
107    */
108   int m_maxChunkBits = 15;
109 
110   /**
111    * Field m_rechunkBits affects our chunk-growth strategy, by saying how
112    * many chunks should be allocated at one size before we encapsulate them
113    * into the first chunk of the next size up. For example, if m_rechunkBits
114    * is set to 3, then after 8 chunks at a given size we will rebundle
115    * them as the first element of a FastStringBuffer using a chunk size
116    * 8 times larger (chunkBits shifted left three bits).
117    */
118   int m_rebundleBits = 2;
119 
120   /**
121    * Field m_chunkSize establishes the maximum size of one chunk of the array
122    * as 2**chunkbits characters.
123    * (Which may also be the minimum size if we aren't tuning for storage)
124    */
125   int m_chunkSize;  // =1<<(m_chunkBits-1);
126 
127   /**
128    * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
129    * worth of low-order '1' bits, useful for shift-and-mask addressing
130    * within the chunks.
131    */
132   int m_chunkMask;  // =m_chunkSize-1;
133 
134   /**
135    * Field m_array holds the string buffer's text contents, using an
136    * array-of-arrays. Note that this array, and the arrays it contains, may be
137    * reallocated when necessary in order to allow the buffer to grow;
138    * references to them should be considered to be invalidated after any
139    * append. However, the only time these arrays are directly exposed
140    * is in the sendSAXcharacters call.
141    */
142   char[][] m_array;
143 
144   /**
145    * Field m_lastChunk is an index into m_array[], pointing to the last
146    * chunk of the Chunked Array currently in use. Note that additional
147    * chunks may actually be allocated, eg if the FastStringBuffer had
148    * previously been truncated or if someone issued an ensureSpace request.
149    * <p>
150    * The insertion point for append operations is addressed by the combination
151    * of m_lastChunk and m_firstFree.
152    */
153   int m_lastChunk = 0;
154 
155   /**
156    * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
157    * the first character in the Chunked Array which is not part of the
158    * FastStringBuffer's current content. Since m_array[][] is zero-based,
159    * the length of that content can be calculated as
160    * (m_lastChunk<<m_chunkBits) + m_firstFree
161    */
162   int m_firstFree = 0;
163 
164   /**
165    * Field m_innerFSB, when non-null, is a FastStringBuffer whose total
166    * length equals m_chunkSize, and which replaces m_array[0]. This allows
167    * building a hierarchy of FastStringBuffers, where early appends use
168    * a smaller chunkSize (for less wasted memory overhead) but later
169    * ones use a larger chunkSize (for less heap activity overhead).
170    */
171   FastStringBuffer m_innerFSB = null;
172 
173   /**
174    * Construct a FastStringBuffer, with allocation policy as per parameters.
175    * <p>
176    * For coding convenience, I've expressed both allocation sizes in terms of
177    * a number of bits. That's needed for the final size of a chunk,
178    * to permit fast and efficient shift-and-mask addressing. It's less critical
179    * for the inital size, and may be reconsidered.
180    * <p>
181    * An alternative would be to accept integer sizes and round to powers of two;
182    * that really doesn't seem to buy us much, if anything.
183    *
184    * @param initChunkBits Length in characters of the initial allocation
185    * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
186    * characters.) Later chunks will use larger allocation units, to trade off
187    * allocation speed of large document against storage efficiency of small
188    * ones.
189    * @param maxChunkBits Number of character-offset bits that should be used for
190    * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
191    * characters.
192    * @param rebundleBits Number of character-offset bits that addressing should
193    * advance before we attempt to take a step from initChunkBits to maxChunkBits
194    */
FastStringBuffer(int initChunkBits, int maxChunkBits, int rebundleBits)195   public FastStringBuffer(int initChunkBits, int maxChunkBits,
196                           int rebundleBits)
197   {
198     if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS;
199 
200     // %REVIEW%
201     // Should this force to larger value, or smaller? Smaller less efficient, but if
202     // someone requested variable mode it's because they care about storage space.
203     // On the other hand, given the other changes I'm making, odds are that we should
204     // adopt the larger size. Dither, dither, dither... This is just stopgap workaround
205     // anyway; we need a permanant solution.
206     //
207     if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits;
208     //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
209 
210     m_array = new char[16][];
211 
212     // Don't bite off more than we're prepared to swallow!
213     if (initChunkBits > maxChunkBits)
214       initChunkBits = maxChunkBits;
215 
216     m_chunkBits = initChunkBits;
217     m_maxChunkBits = maxChunkBits;
218     m_rebundleBits = rebundleBits;
219     m_chunkSize = 1 << (initChunkBits);
220     m_chunkMask = m_chunkSize - 1;
221     m_array[0] = new char[m_chunkSize];
222   }
223 
224   /**
225    * Construct a FastStringBuffer, using a default rebundleBits value.
226    *
227    * NEEDSDOC @param initChunkBits
228    * NEEDSDOC @param maxChunkBits
229    */
FastStringBuffer(int initChunkBits, int maxChunkBits)230   public FastStringBuffer(int initChunkBits, int maxChunkBits)
231   {
232     this(initChunkBits, maxChunkBits, 2);
233   }
234 
235   /**
236    * Construct a FastStringBuffer, using default maxChunkBits and
237    * rebundleBits values.
238    * <p>
239    * ISSUE: Should this call assert initial size, or fixed size?
240    * Now configured as initial, with a default for fixed.
241    *
242    * NEEDSDOC @param initChunkBits
243    */
FastStringBuffer(int initChunkBits)244   public FastStringBuffer(int initChunkBits)
245   {
246     this(initChunkBits, 15, 2);
247   }
248 
249   /**
250    * Construct a FastStringBuffer, using a default allocation policy.
251    */
FastStringBuffer()252   public FastStringBuffer()
253   {
254 
255     // 10 bits is 1K. 15 bits is 32K. Remember that these are character
256     // counts, so actual memory allocation unit is doubled for UTF-16 chars.
257     //
258     // For reference: In the original FastStringBuffer, we simply
259     // overallocated by blocksize (default 1KB) on each buffer-growth.
260     this(10, 15, 2);
261   }
262 
263   /**
264    * Get the length of the list. Synonym for length().
265    *
266    * @return the number of characters in the FastStringBuffer's content.
267    */
size()268   public final int size()
269   {
270     return (m_lastChunk << m_chunkBits) + m_firstFree;
271   }
272 
273   /**
274    * Get the length of the list. Synonym for size().
275    *
276    * @return the number of characters in the FastStringBuffer's content.
277    */
length()278   public final int length()
279   {
280     return (m_lastChunk << m_chunkBits) + m_firstFree;
281   }
282 
283   /**
284    * Discard the content of the FastStringBuffer, and most of the memory
285    * that was allocated by it, restoring the initial state. Note that this
286    * may eventually be different from setLength(0), which see.
287    */
reset()288   public final void reset()
289   {
290 
291     m_lastChunk = 0;
292     m_firstFree = 0;
293 
294     // Recover the original chunk size
295     FastStringBuffer innermost = this;
296 
297     while (innermost.m_innerFSB != null)
298     {
299       innermost = innermost.m_innerFSB;
300     }
301 
302     m_chunkBits = innermost.m_chunkBits;
303     m_chunkSize = innermost.m_chunkSize;
304     m_chunkMask = innermost.m_chunkMask;
305 
306     // Discard the hierarchy
307     m_innerFSB = null;
308     m_array = new char[16][0];
309     m_array[0] = new char[m_chunkSize];
310   }
311 
312   /**
313    * Directly set how much of the FastStringBuffer's storage is to be
314    * considered part of its content. This is a fast but hazardous
315    * operation. It is not protected against negative values, or values
316    * greater than the amount of storage currently available... and even
317    * if additional storage does exist, its contents are unpredictable.
318    * The only safe use for our setLength() is to truncate the FastStringBuffer
319    * to a shorter string.
320    *
321    * @param l New length. If l<0 or l>=getLength(), this operation will
322    * not report an error but future operations will almost certainly fail.
323    */
setLength(int l)324   public final void setLength(int l)
325   {
326     m_lastChunk = l >>> m_chunkBits;
327 
328     if (m_lastChunk == 0 && m_innerFSB != null)
329     {
330       // Replace this FSB with the appropriate inner FSB, truncated
331       m_innerFSB.setLength(l, this);
332     }
333     else
334     {
335       m_firstFree = l & m_chunkMask;
336 
337 	  // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
338 	  // us pointing at the start of a chunk which has not yet been allocated. Rather than
339 	  // pay the cost of dealing with that in the append loops (more scattered and more
340 	  // inner-loop), we correct it here by moving to the safe side of that
341 	  // line -- as we would have left the indexes had we appended up to that point.
342       if(m_firstFree==0 && m_lastChunk>0)
343       {
344       	--m_lastChunk;
345       	m_firstFree=m_chunkSize;
346       }
347     }
348   }
349 
350   /**
351    * Subroutine for the public setLength() method. Deals with the fact
352    * that truncation may require restoring one of the innerFSBs
353    *
354    * NEEDSDOC @param l
355    * NEEDSDOC @param rootFSB
356    */
setLength(int l, FastStringBuffer rootFSB)357   private final void setLength(int l, FastStringBuffer rootFSB)
358   {
359 
360     m_lastChunk = l >>> m_chunkBits;
361 
362     if (m_lastChunk == 0 && m_innerFSB != null)
363     {
364       m_innerFSB.setLength(l, rootFSB);
365     }
366     else
367     {
368 
369       // Undo encapsulation -- pop the innerFSB data back up to root.
370       // Inefficient, but attempts to keep the code simple.
371       rootFSB.m_chunkBits = m_chunkBits;
372       rootFSB.m_maxChunkBits = m_maxChunkBits;
373       rootFSB.m_rebundleBits = m_rebundleBits;
374       rootFSB.m_chunkSize = m_chunkSize;
375       rootFSB.m_chunkMask = m_chunkMask;
376       rootFSB.m_array = m_array;
377       rootFSB.m_innerFSB = m_innerFSB;
378       rootFSB.m_lastChunk = m_lastChunk;
379 
380       // Finally, truncate this sucker.
381       rootFSB.m_firstFree = l & m_chunkMask;
382     }
383   }
384 
385   /**
386    * Note that this operation has been somewhat deoptimized by the shift to a
387    * chunked array, as there is no factory method to produce a String object
388    * directly from an array of arrays and hence a double copy is needed.
389    * By using ensureCapacity we hope to minimize the heap overhead of building
390    * the intermediate StringBuffer.
391    * <p>
392    * (It really is a pity that Java didn't design String as a final subclass
393    * of MutableString, rather than having StringBuffer be a separate hierarchy.
394    * We'd avoid a <strong>lot</strong> of double-buffering.)
395    *
396    * @return the contents of the FastStringBuffer as a standard Java string.
397    */
toString()398   public final String toString()
399   {
400 
401     int length = (m_lastChunk << m_chunkBits) + m_firstFree;
402 
403     return getString(new StringBuffer(length), 0, 0, length).toString();
404   }
405 
406   /**
407    * Append a single character onto the FastStringBuffer, growing the
408    * storage if necessary.
409    * <p>
410    * NOTE THAT after calling append(), previously obtained
411    * references to m_array[][] may no longer be valid....
412    * though in fact they should be in this instance.
413    *
414    * @param value character to be appended.
415    */
append(char value)416   public final void append(char value)
417   {
418 
419     char[] chunk;
420 
421     // We may have preallocated chunks. If so, all but last should
422     // be at full size.
423 
424     if (m_firstFree < m_chunkSize)  // Simplified test single-character-fits
425       chunk = m_array[m_lastChunk];
426     else
427     {
428 
429       // Extend array?
430       int i = m_array.length;
431 
432       if (m_lastChunk + 1 == i)
433       {
434         char[][] newarray = new char[i + 16][];
435 
436         System.arraycopy(m_array, 0, newarray, 0, i);
437 
438         m_array = newarray;
439       }
440 
441       // Advance one chunk
442       chunk = m_array[++m_lastChunk];
443 
444       if (chunk == null)
445       {
446 
447         // Hierarchical encapsulation
448         if (m_lastChunk == 1 << m_rebundleBits
449                 && m_chunkBits < m_maxChunkBits)
450         {
451 
452           // Should do all the work of both encapsulating
453           // existing data and establishing new sizes/offsets
454           m_innerFSB = new FastStringBuffer(this);
455         }
456 
457         // Add a chunk.
458         chunk = m_array[m_lastChunk] = new char[m_chunkSize];
459       }
460 
461       m_firstFree = 0;
462     }
463 
464     // Space exists in the chunk. Append the character.
465     chunk[m_firstFree++] = value;
466   }
467 
468   /**
469    * Append the contents of a String onto the FastStringBuffer,
470    * growing the storage if necessary.
471    * <p>
472    * NOTE THAT after calling append(), previously obtained
473    * references to m_array[] may no longer be valid.
474    *
475    * @param value String whose contents are to be appended.
476    */
append(String value)477   public final void append(String value)
478   {
479 
480     if (value == null)
481       return;
482     int strlen = value.length();
483 
484     if (0 == strlen)
485       return;
486 
487     int copyfrom = 0;
488     char[] chunk = m_array[m_lastChunk];
489     int available = m_chunkSize - m_firstFree;
490 
491     // Repeat while data remains to be copied
492     while (strlen > 0)
493     {
494 
495       // Copy what fits
496       if (available > strlen)
497         available = strlen;
498 
499       value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
500                      m_firstFree);
501 
502       strlen -= available;
503       copyfrom += available;
504 
505       // If there's more left, allocate another chunk and continue
506       if (strlen > 0)
507       {
508 
509         // Extend array?
510         int i = m_array.length;
511 
512         if (m_lastChunk + 1 == i)
513         {
514           char[][] newarray = new char[i + 16][];
515 
516           System.arraycopy(m_array, 0, newarray, 0, i);
517 
518           m_array = newarray;
519         }
520 
521         // Advance one chunk
522         chunk = m_array[++m_lastChunk];
523 
524         if (chunk == null)
525         {
526 
527           // Hierarchical encapsulation
528           if (m_lastChunk == 1 << m_rebundleBits
529                   && m_chunkBits < m_maxChunkBits)
530           {
531 
532             // Should do all the work of both encapsulating
533             // existing data and establishing new sizes/offsets
534             m_innerFSB = new FastStringBuffer(this);
535           }
536 
537           // Add a chunk.
538           chunk = m_array[m_lastChunk] = new char[m_chunkSize];
539         }
540 
541         available = m_chunkSize;
542         m_firstFree = 0;
543       }
544     }
545 
546     // Adjust the insert point in the last chunk, when we've reached it.
547     m_firstFree += available;
548   }
549 
550   /**
551    * Append the contents of a StringBuffer onto the FastStringBuffer,
552    * growing the storage if necessary.
553    * <p>
554    * NOTE THAT after calling append(), previously obtained
555    * references to m_array[] may no longer be valid.
556    *
557    * @param value StringBuffer whose contents are to be appended.
558    */
append(StringBuffer value)559   public final void append(StringBuffer value)
560   {
561 
562     if (value == null)
563       return;
564     int strlen = value.length();
565 
566     if (0 == strlen)
567       return;
568 
569     int copyfrom = 0;
570     char[] chunk = m_array[m_lastChunk];
571     int available = m_chunkSize - m_firstFree;
572 
573     // Repeat while data remains to be copied
574     while (strlen > 0)
575     {
576 
577       // Copy what fits
578       if (available > strlen)
579         available = strlen;
580 
581       value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
582                      m_firstFree);
583 
584       strlen -= available;
585       copyfrom += available;
586 
587       // If there's more left, allocate another chunk and continue
588       if (strlen > 0)
589       {
590 
591         // Extend array?
592         int i = m_array.length;
593 
594         if (m_lastChunk + 1 == i)
595         {
596           char[][] newarray = new char[i + 16][];
597 
598           System.arraycopy(m_array, 0, newarray, 0, i);
599 
600           m_array = newarray;
601         }
602 
603         // Advance one chunk
604         chunk = m_array[++m_lastChunk];
605 
606         if (chunk == null)
607         {
608 
609           // Hierarchical encapsulation
610           if (m_lastChunk == 1 << m_rebundleBits
611                   && m_chunkBits < m_maxChunkBits)
612           {
613 
614             // Should do all the work of both encapsulating
615             // existing data and establishing new sizes/offsets
616             m_innerFSB = new FastStringBuffer(this);
617           }
618 
619           // Add a chunk.
620           chunk = m_array[m_lastChunk] = new char[m_chunkSize];
621         }
622 
623         available = m_chunkSize;
624         m_firstFree = 0;
625       }
626     }
627 
628     // Adjust the insert point in the last chunk, when we've reached it.
629     m_firstFree += available;
630   }
631 
632   /**
633    * Append part of the contents of a Character Array onto the
634    * FastStringBuffer,  growing the storage if necessary.
635    * <p>
636    * NOTE THAT after calling append(), previously obtained
637    * references to m_array[] may no longer be valid.
638    *
639    * @param chars character array from which data is to be copied
640    * @param start offset in chars of first character to be copied,
641    * zero-based.
642    * @param length number of characters to be copied
643    */
append(char[] chars, int start, int length)644   public final void append(char[] chars, int start, int length)
645   {
646 
647     int strlen = length;
648 
649     if (0 == strlen)
650       return;
651 
652     int copyfrom = start;
653     char[] chunk = m_array[m_lastChunk];
654     int available = m_chunkSize - m_firstFree;
655 
656     // Repeat while data remains to be copied
657     while (strlen > 0)
658     {
659 
660       // Copy what fits
661       if (available > strlen)
662         available = strlen;
663 
664       System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree,
665                        available);
666 
667       strlen -= available;
668       copyfrom += available;
669 
670       // If there's more left, allocate another chunk and continue
671       if (strlen > 0)
672       {
673 
674         // Extend array?
675         int i = m_array.length;
676 
677         if (m_lastChunk + 1 == i)
678         {
679           char[][] newarray = new char[i + 16][];
680 
681           System.arraycopy(m_array, 0, newarray, 0, i);
682 
683           m_array = newarray;
684         }
685 
686         // Advance one chunk
687         chunk = m_array[++m_lastChunk];
688 
689         if (chunk == null)
690         {
691 
692           // Hierarchical encapsulation
693           if (m_lastChunk == 1 << m_rebundleBits
694                   && m_chunkBits < m_maxChunkBits)
695           {
696 
697             // Should do all the work of both encapsulating
698             // existing data and establishing new sizes/offsets
699             m_innerFSB = new FastStringBuffer(this);
700           }
701 
702           // Add a chunk.
703           chunk = m_array[m_lastChunk] = new char[m_chunkSize];
704         }
705 
706         available = m_chunkSize;
707         m_firstFree = 0;
708       }
709     }
710 
711     // Adjust the insert point in the last chunk, when we've reached it.
712     m_firstFree += available;
713   }
714 
715   /**
716    * Append the contents of another FastStringBuffer onto
717    * this FastStringBuffer, growing the storage if necessary.
718    * <p>
719    * NOTE THAT after calling append(), previously obtained
720    * references to m_array[] may no longer be valid.
721    *
722    * @param value FastStringBuffer whose contents are
723    * to be appended.
724    */
append(FastStringBuffer value)725   public final void append(FastStringBuffer value)
726   {
727 
728     // Complicating factor here is that the two buffers may use
729     // different chunk sizes, and even if they're the same we're
730     // probably on a different alignment due to previously appended
731     // data. We have to work through the source in bite-sized chunks.
732     if (value == null)
733       return;
734     int strlen = value.length();
735 
736     if (0 == strlen)
737       return;
738 
739     int copyfrom = 0;
740     char[] chunk = m_array[m_lastChunk];
741     int available = m_chunkSize - m_firstFree;
742 
743     // Repeat while data remains to be copied
744     while (strlen > 0)
745     {
746 
747       // Copy what fits
748       if (available > strlen)
749         available = strlen;
750 
751       int sourcechunk = (copyfrom + value.m_chunkSize - 1)
752                         >>> value.m_chunkBits;
753       int sourcecolumn = copyfrom & value.m_chunkMask;
754       int runlength = value.m_chunkSize - sourcecolumn;
755 
756       if (runlength > available)
757         runlength = available;
758 
759       System.arraycopy(value.m_array[sourcechunk], sourcecolumn,
760                        m_array[m_lastChunk], m_firstFree, runlength);
761 
762       if (runlength != available)
763         System.arraycopy(value.m_array[sourcechunk + 1], 0,
764                          m_array[m_lastChunk], m_firstFree + runlength,
765                          available - runlength);
766 
767       strlen -= available;
768       copyfrom += available;
769 
770       // If there's more left, allocate another chunk and continue
771       if (strlen > 0)
772       {
773 
774         // Extend array?
775         int i = m_array.length;
776 
777         if (m_lastChunk + 1 == i)
778         {
779           char[][] newarray = new char[i + 16][];
780 
781           System.arraycopy(m_array, 0, newarray, 0, i);
782 
783           m_array = newarray;
784         }
785 
786         // Advance one chunk
787         chunk = m_array[++m_lastChunk];
788 
789         if (chunk == null)
790         {
791 
792           // Hierarchical encapsulation
793           if (m_lastChunk == 1 << m_rebundleBits
794                   && m_chunkBits < m_maxChunkBits)
795           {
796 
797             // Should do all the work of both encapsulating
798             // existing data and establishing new sizes/offsets
799             m_innerFSB = new FastStringBuffer(this);
800           }
801 
802           // Add a chunk.
803           chunk = m_array[m_lastChunk] = new char[m_chunkSize];
804         }
805 
806         available = m_chunkSize;
807         m_firstFree = 0;
808       }
809     }
810 
811     // Adjust the insert point in the last chunk, when we've reached it.
812     m_firstFree += available;
813   }
814 
815   /**
816    * @return true if the specified range of characters are all whitespace,
817    * as defined by XMLCharacterRecognizer.
818    * <p>
819    * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
820    *
821    * @param start Offset of first character in the range.
822    * @param length Number of characters to send.
823    */
isWhitespace(int start, int length)824   public boolean isWhitespace(int start, int length)
825   {
826 
827     int sourcechunk = start >>> m_chunkBits;
828     int sourcecolumn = start & m_chunkMask;
829     int available = m_chunkSize - sourcecolumn;
830     boolean chunkOK;
831 
832     while (length > 0)
833     {
834       int runlength = (length <= available) ? length : available;
835 
836       if (sourcechunk == 0 && m_innerFSB != null)
837         chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength);
838       else
839         chunkOK = org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(
840           m_array[sourcechunk], sourcecolumn, runlength);
841 
842       if (!chunkOK)
843         return false;
844 
845       length -= runlength;
846 
847       ++sourcechunk;
848 
849       sourcecolumn = 0;
850       available = m_chunkSize;
851     }
852 
853     return true;
854   }
855 
856   /**
857    * @param start Offset of first character in the range.
858    * @param length Number of characters to send.
859    * @return a new String object initialized from the specified range of
860    * characters.
861    */
getString(int start, int length)862   public String getString(int start, int length)
863   {
864     int startColumn = start & m_chunkMask;
865     int startChunk = start >>> m_chunkBits;
866     if (startColumn + length < m_chunkMask && m_innerFSB == null) {
867       return getOneChunkString(startChunk, startColumn, length);
868     }
869     return getString(new StringBuffer(length), startChunk, startColumn,
870                      length).toString();
871   }
872 
getOneChunkString(int startChunk, int startColumn, int length)873   protected String getOneChunkString(int startChunk, int startColumn,
874                                      int length) {
875     return new String(m_array[startChunk], startColumn, length);
876   }
877 
878   /**
879    * @param sb StringBuffer to be appended to
880    * @param start Offset of first character in the range.
881    * @param length Number of characters to send.
882    * @return sb with the requested text appended to it
883    */
getString(StringBuffer sb, int start, int length)884   StringBuffer getString(StringBuffer sb, int start, int length)
885   {
886     return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length);
887   }
888 
889   /**
890    * Internal support for toString() and getString().
891    * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into
892    * and returns a StringBuffer supplied by the caller. This simplifies
893    * m_innerFSB support.
894    * <p>
895    * Note that this operation has been somewhat deoptimized by the shift to a
896    * chunked array, as there is no factory method to produce a String object
897    * directly from an array of arrays and hence a double copy is needed.
898    * By presetting length we hope to minimize the heap overhead of building
899    * the intermediate StringBuffer.
900    * <p>
901    * (It really is a pity that Java didn't design String as a final subclass
902    * of MutableString, rather than having StringBuffer be a separate hierarchy.
903    * We'd avoid a <strong>lot</strong> of double-buffering.)
904    *
905    *
906    * @param sb
907    * @param startChunk
908    * @param startColumn
909    * @param length
910    *
911    * @return the contents of the FastStringBuffer as a standard Java string.
912    */
getString(StringBuffer sb, int startChunk, int startColumn, int length)913   StringBuffer getString(StringBuffer sb, int startChunk, int startColumn,
914                          int length)
915   {
916 
917     int stop = (startChunk << m_chunkBits) + startColumn + length;
918     int stopChunk = stop >>> m_chunkBits;
919     int stopColumn = stop & m_chunkMask;
920 
921     // Factored out
922     //StringBuffer sb=new StringBuffer(length);
923     for (int i = startChunk; i < stopChunk; ++i)
924     {
925       if (i == 0 && m_innerFSB != null)
926         m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn);
927       else
928         sb.append(m_array[i], startColumn, m_chunkSize - startColumn);
929 
930       startColumn = 0;  // after first chunk
931     }
932 
933     if (stopChunk == 0 && m_innerFSB != null)
934       m_innerFSB.getString(sb, startColumn, stopColumn - startColumn);
935     else if (stopColumn > startColumn)
936       sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn);
937 
938     return sb;
939   }
940 
941   /**
942    * Get a single character from the string buffer.
943    *
944    *
945    * @param pos character position requested.
946    * @return A character from the requested position.
947    */
charAt(int pos)948   public char charAt(int pos)
949   {
950     int startChunk = pos >>> m_chunkBits;
951 
952     if (startChunk == 0 && m_innerFSB != null)
953       return m_innerFSB.charAt(pos & m_chunkMask);
954     else
955       return m_array[startChunk][pos & m_chunkMask];
956   }
957 
958   /**
959    * Sends the specified range of characters as one or more SAX characters()
960    * events.
961    * Note that the buffer reference passed to the ContentHandler may be
962    * invalidated if the FastStringBuffer is edited; it's the user's
963    * responsibility to manage access to the FastStringBuffer to prevent this
964    * problem from arising.
965    * <p>
966    * Note too that there is no promise that the output will be sent as a
967    * single call. As is always true in SAX, one logical string may be split
968    * across multiple blocks of memory and hence delivered as several
969    * successive events.
970    *
971    * @param ch SAX ContentHandler object to receive the event.
972    * @param start Offset of first character in the range.
973    * @param length Number of characters to send.
974    * @exception org.xml.sax.SAXException may be thrown by handler's
975    * characters() method.
976    */
sendSAXcharacters( org.xml.sax.ContentHandler ch, int start, int length)977   public void sendSAXcharacters(
978           org.xml.sax.ContentHandler ch, int start, int length)
979             throws org.xml.sax.SAXException
980   {
981 
982     int startChunk = start >>> m_chunkBits;
983     int startColumn = start & m_chunkMask;
984     if (startColumn + length < m_chunkMask && m_innerFSB == null) {
985         ch.characters(m_array[startChunk], startColumn, length);
986         return;
987     }
988 
989     int stop = start + length;
990     int stopChunk = stop >>> m_chunkBits;
991     int stopColumn = stop & m_chunkMask;
992 
993     for (int i = startChunk; i < stopChunk; ++i)
994     {
995       if (i == 0 && m_innerFSB != null)
996         m_innerFSB.sendSAXcharacters(ch, startColumn,
997                                      m_chunkSize - startColumn);
998       else
999         ch.characters(m_array[i], startColumn, m_chunkSize - startColumn);
1000 
1001       startColumn = 0;  // after first chunk
1002     }
1003 
1004     // Last, or only, chunk
1005     if (stopChunk == 0 && m_innerFSB != null)
1006       m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn);
1007     else if (stopColumn > startColumn)
1008     {
1009       ch.characters(m_array[stopChunk], startColumn,
1010                     stopColumn - startColumn);
1011     }
1012   }
1013 
1014   /**
1015    * Sends the specified range of characters as one or more SAX characters()
1016    * events, normalizing the characters according to XSLT rules.
1017    *
1018    * @param ch SAX ContentHandler object to receive the event.
1019    * @param start Offset of first character in the range.
1020    * @param length Number of characters to send.
1021    * @return normalization status to apply to next chunk (because we may
1022    * have been called recursively to process an inner FSB):
1023    * <dl>
1024    * <dt>0</dt>
1025    * <dd>if this output did not end in retained whitespace, and thus whitespace
1026    * at the start of the following chunk (if any) should be converted to a
1027    * single space.
1028    * <dt>SUPPRESS_LEADING_WS</dt>
1029    * <dd>if this output ended in retained whitespace, and thus whitespace
1030    * at the start of the following chunk (if any) should be completely
1031    * suppressed.</dd>
1032    * </dd>
1033    * </dl>
1034    * @exception org.xml.sax.SAXException may be thrown by handler's
1035    * characters() method.
1036    */
sendNormalizedSAXcharacters( org.xml.sax.ContentHandler ch, int start, int length)1037   public int sendNormalizedSAXcharacters(
1038           org.xml.sax.ContentHandler ch, int start, int length)
1039             throws org.xml.sax.SAXException
1040   {
1041 	// This call always starts at the beginning of the
1042     // string being written out, either because it was called directly or
1043     // because it was an m_innerFSB recursion. This is important since
1044 	// it gives us a well-known initial state for this flag:
1045 	int stateForNextChunk=SUPPRESS_LEADING_WS;
1046 
1047     int stop = start + length;
1048     int startChunk = start >>> m_chunkBits;
1049     int startColumn = start & m_chunkMask;
1050     int stopChunk = stop >>> m_chunkBits;
1051     int stopColumn = stop & m_chunkMask;
1052 
1053     for (int i = startChunk; i < stopChunk; ++i)
1054     {
1055       if (i == 0 && m_innerFSB != null)
1056 				stateForNextChunk=
1057         m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
1058                                      m_chunkSize - startColumn);
1059       else
1060 				stateForNextChunk=
1061         sendNormalizedSAXcharacters(m_array[i], startColumn,
1062                                     m_chunkSize - startColumn,
1063 																		ch,stateForNextChunk);
1064 
1065       startColumn = 0;  // after first chunk
1066     }
1067 
1068     // Last, or only, chunk
1069     if (stopChunk == 0 && m_innerFSB != null)
1070 			stateForNextChunk= // %REVIEW% Is this update really needed?
1071       m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn);
1072     else if (stopColumn > startColumn)
1073     {
1074 			stateForNextChunk= // %REVIEW% Is this update really needed?
1075       sendNormalizedSAXcharacters(m_array[stopChunk],
1076 																	startColumn, stopColumn - startColumn,
1077 																	ch, stateForNextChunk | SUPPRESS_TRAILING_WS);
1078     }
1079 		return stateForNextChunk;
1080   }
1081 
1082   static final char[] SINGLE_SPACE = {' '};
1083 
1084   /**
1085    * Internal method to directly normalize and dispatch the character array.
1086    * This version is aware of the fact that it may be called several times
1087    * in succession if the data is made up of multiple "chunks", and thus
1088    * must actively manage the handling of leading and trailing whitespace.
1089    *
1090    * Note: The recursion is due to the possible recursion of inner FSBs.
1091    *
1092    * @param ch The characters from the XML document.
1093    * @param start The start position in the array.
1094    * @param length The number of characters to read from the array.
1095    * @param handler SAX ContentHandler object to receive the event.
1096    * @param edgeTreatmentFlags How leading/trailing spaces should be handled.
1097    * This is a bitfield contining two flags, bitwise-ORed together:
1098    * <dl>
1099    * <dt>SUPPRESS_LEADING_WS</dt>
1100    * <dd>When false, causes leading whitespace to be converted to a single
1101    * space; when true, causes it to be discarded entirely.
1102    * Should be set TRUE for the first chunk, and (in multi-chunk output)
1103    * whenever the previous chunk ended in retained whitespace.</dd>
1104    * <dt>SUPPRESS_TRAILING_WS</dt>
1105    * <dd>When false, causes trailing whitespace to be converted to a single
1106    * space; when true, causes it to be discarded entirely.
1107    * Should be set TRUE for the last or only chunk.
1108    * </dd>
1109    * </dl>
1110    * @return normalization status, as in the edgeTreatmentFlags parameter:
1111    * <dl>
1112    * <dt>0</dt>
1113    * <dd>if this output did not end in retained whitespace, and thus whitespace
1114    * at the start of the following chunk (if any) should be converted to a
1115    * single space.
1116    * <dt>SUPPRESS_LEADING_WS</dt>
1117    * <dd>if this output ended in retained whitespace, and thus whitespace
1118    * at the start of the following chunk (if any) should be completely
1119    * suppressed.</dd>
1120    * </dd>
1121    * </dl>
1122    *
1123    *
1124    * @exception org.xml.sax.SAXException Any SAX exception, possibly
1125    *            wrapping another exception.
1126    */
sendNormalizedSAXcharacters(char ch[], int start, int length, org.xml.sax.ContentHandler handler, int edgeTreatmentFlags)1127   static int sendNormalizedSAXcharacters(char ch[],
1128              int start, int length,
1129              org.xml.sax.ContentHandler handler,
1130 						 int edgeTreatmentFlags)
1131           throws org.xml.sax.SAXException
1132   {
1133      boolean processingLeadingWhitespace =
1134                        ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0);
1135      boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0);
1136      int currPos = start;
1137      int limit = start+length;
1138 
1139      // Strip any leading spaces first, if required
1140      if (processingLeadingWhitespace) {
1141          for (; currPos < limit
1142                 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1143               currPos++) { }
1144 
1145          // If we've only encountered leading spaces, the
1146          // current state remains unchanged
1147          if (currPos == limit) {
1148              return edgeTreatmentFlags;
1149          }
1150      }
1151 
1152      // If we get here, there are no more leading spaces to strip
1153      while (currPos < limit) {
1154          int startNonWhitespace = currPos;
1155 
1156          // Grab a chunk of non-whitespace characters
1157          for (; currPos < limit
1158                 && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1159               currPos++) { }
1160 
1161          // Non-whitespace seen - emit them, along with a single
1162          // space for any preceding whitespace characters
1163          if (startNonWhitespace != currPos) {
1164              if (seenWhitespace) {
1165                  handler.characters(SINGLE_SPACE, 0, 1);
1166                  seenWhitespace = false;
1167              }
1168              handler.characters(ch, startNonWhitespace,
1169                                 currPos - startNonWhitespace);
1170          }
1171 
1172          int startWhitespace = currPos;
1173 
1174          // Consume any whitespace characters
1175          for (; currPos < limit
1176                 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1177               currPos++) { }
1178 
1179          if (startWhitespace != currPos) {
1180              seenWhitespace = true;
1181          }
1182      }
1183 
1184      return (seenWhitespace ? CARRY_WS : 0)
1185             | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS);
1186   }
1187 
1188   /**
1189    * Directly normalize and dispatch the character array.
1190    *
1191    * @param ch The characters from the XML document.
1192    * @param start The start position in the array.
1193    * @param length The number of characters to read from the array.
1194    * @param handler SAX ContentHandler object to receive the event.
1195    * @exception org.xml.sax.SAXException Any SAX exception, possibly
1196    *            wrapping another exception.
1197    */
sendNormalizedSAXcharacters(char ch[], int start, int length, org.xml.sax.ContentHandler handler)1198   public static void sendNormalizedSAXcharacters(char ch[],
1199              int start, int length,
1200              org.xml.sax.ContentHandler handler)
1201           throws org.xml.sax.SAXException
1202   {
1203 		sendNormalizedSAXcharacters(ch, start, length,
1204              handler, SUPPRESS_BOTH);
1205 	}
1206 
1207 	/**
1208    * Sends the specified range of characters as sax Comment.
1209    * <p>
1210    * Note that, unlike sendSAXcharacters, this has to be done as a single
1211    * call to LexicalHandler#comment.
1212    *
1213    * @param ch SAX LexicalHandler object to receive the event.
1214    * @param start Offset of first character in the range.
1215    * @param length Number of characters to send.
1216    * @exception org.xml.sax.SAXException may be thrown by handler's
1217    * characters() method.
1218    */
sendSAXComment( org.xml.sax.ext.LexicalHandler ch, int start, int length)1219   public void sendSAXComment(
1220           org.xml.sax.ext.LexicalHandler ch, int start, int length)
1221             throws org.xml.sax.SAXException
1222   {
1223 
1224     // %OPT% Do it this way for now...
1225     String comment = getString(start, length);
1226     ch.comment(comment.toCharArray(), 0, length);
1227   }
1228 
1229   /**
1230    * Copies characters from this string into the destination character
1231    * array.
1232    *
1233    * @param      srcBegin   index of the first character in the string
1234    *                        to copy.
1235    * @param      srcEnd     index after the last character in the string
1236    *                        to copy.
1237    * @param      dst        the destination array.
1238    * @param      dstBegin   the start offset in the destination array.
1239    * @exception IndexOutOfBoundsException If any of the following
1240    *            is true:
1241    *            <ul><li><code>srcBegin</code> is negative.
1242    *            <li><code>srcBegin</code> is greater than <code>srcEnd</code>
1243    *            <li><code>srcEnd</code> is greater than the length of this
1244    *                string
1245    *            <li><code>dstBegin</code> is negative
1246    *            <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
1247    *                <code>dst.length</code></ul>
1248    * @exception NullPointerException if <code>dst</code> is <code>null</code>
1249    */
getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)1250   private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
1251   {
1252     // %TBD% Joe needs to write this function.  Make public when implemented.
1253   }
1254 
1255   /**
1256    * Encapsulation c'tor. After this is called, the source FastStringBuffer
1257    * will be reset to use the new object as its m_innerFSB, and will have
1258    * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED
1259    * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
1260    *
1261    * NEEDSDOC @param source
1262    */
FastStringBuffer(FastStringBuffer source)1263   private FastStringBuffer(FastStringBuffer source)
1264   {
1265 
1266     // Copy existing information into new encapsulation
1267     m_chunkBits = source.m_chunkBits;
1268     m_maxChunkBits = source.m_maxChunkBits;
1269     m_rebundleBits = source.m_rebundleBits;
1270     m_chunkSize = source.m_chunkSize;
1271     m_chunkMask = source.m_chunkMask;
1272     m_array = source.m_array;
1273     m_innerFSB = source.m_innerFSB;
1274 
1275     // These have to be adjusted because we're calling just at the time
1276     // when we would be about to allocate another chunk
1277     m_lastChunk = source.m_lastChunk - 1;
1278     m_firstFree = source.m_chunkSize;
1279 
1280     // Establish capsule as the Inner FSB, reset chunk sizes/addressing
1281     source.m_array = new char[16][];
1282     source.m_innerFSB = this;
1283 
1284     // Since we encapsulated just as we were about to append another
1285     // chunk, return ready to create the chunk after the innerFSB
1286     // -- 1, not 0.
1287     source.m_lastChunk = 1;
1288     source.m_firstFree = 0;
1289     source.m_chunkBits += m_rebundleBits;
1290     source.m_chunkSize = 1 << (source.m_chunkBits);
1291     source.m_chunkMask = source.m_chunkSize - 1;
1292   }
1293 }
1294