• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.common.io;
16 
17 import static com.google.common.base.Preconditions.checkNotNull;
18 
19 import com.google.common.annotations.Beta;
20 import com.google.common.annotations.GwtIncompatible;
21 import com.google.common.base.Ascii;
22 import com.google.common.base.Optional;
23 import com.google.common.base.Splitter;
24 import com.google.common.collect.AbstractIterator;
25 import com.google.common.collect.ImmutableList;
26 import com.google.common.collect.Lists;
27 import com.google.errorprone.annotations.CanIgnoreReturnValue;
28 import java.io.BufferedReader;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.Reader;
32 import java.io.StringReader;
33 import java.io.Writer;
34 import java.nio.charset.Charset;
35 import java.util.Iterator;
36 import java.util.List;
37 import javax.annotation.CheckForNull;
38 import org.checkerframework.checker.nullness.qual.Nullable;
39 
40 /**
41  * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code
42  * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead,
43  * it is an immutable <i>supplier</i> of {@code Reader} instances.
44  *
45  * <p>{@code CharSource} provides two kinds of methods:
46  *
47  * <ul>
48  *   <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
49  *       instance each time they are called. The caller is responsible for ensuring that the
50  *       returned reader is closed.
51  *   <li><b>Convenience methods:</b> These are implementations of common operations that are
52  *       typically implemented by opening a reader using one of the methods in the first category,
53  *       doing something and finally closing the reader that was opened.
54  * </ul>
55  *
56  * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source
57  * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code
58  * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to
59  * be an empty line at the end if the contents are terminated with a line separator.
60  *
61  * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
62  * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
63  *
64  * <p><b>Note:</b> In general, {@code CharSource} is intended to be used for "file-like" sources
65  * that provide readers that are:
66  *
67  * <ul>
68  *   <li><b>Finite:</b> Many operations, such as {@link #length()} and {@link #read()}, will either
69  *       block indefinitely or fail if the source creates an infinite reader.
70  *   <li><b>Non-destructive:</b> A <i>destructive</i> reader will consume or otherwise alter the
71  *       source as they are read from it. A source that provides such readers will not be reusable,
72  *       and operations that read from the stream (including {@link #length()}, in some
73  *       implementations) will prevent further operations from completing as expected.
74  * </ul>
75  *
76  * @since 14.0
77  * @author Colin Decker
78  */
79 @GwtIncompatible
80 @ElementTypesAreNonnullByDefault
81 public abstract class CharSource {
82 
83   /** Constructor for use by subclasses. */
CharSource()84   protected CharSource() {}
85 
86   /**
87    * Returns a {@link ByteSource} view of this char source that encodes chars read from this source
88    * as bytes using the given {@link Charset}.
89    *
90    * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
91    * the default implementation of this method will ensure that the original {@code CharSource} is
92    * returned, rather than round-trip encoding. Subclasses that override this method should behave
93    * the same way.
94    *
95    * @since 20.0
96    */
97   @Beta
asByteSource(Charset charset)98   public ByteSource asByteSource(Charset charset) {
99     return new AsByteSource(charset);
100   }
101 
102   /**
103    * Opens a new {@link Reader} for reading from this source. This method returns a new, independent
104    * reader each time it is called.
105    *
106    * <p>The caller is responsible for ensuring that the returned reader is closed.
107    *
108    * @throws IOException if an I/O error occurs while opening the reader
109    */
openStream()110   public abstract Reader openStream() throws IOException;
111 
112   /**
113    * Opens a new {@link BufferedReader} for reading from this source. This method returns a new,
114    * independent reader each time it is called.
115    *
116    * <p>The caller is responsible for ensuring that the returned reader is closed.
117    *
118    * @throws IOException if an I/O error occurs while of opening the reader
119    */
openBufferedStream()120   public BufferedReader openBufferedStream() throws IOException {
121     Reader reader = openStream();
122     return (reader instanceof BufferedReader)
123         ? (BufferedReader) reader
124         : new BufferedReader(reader);
125   }
126 
127   /**
128    * Returns the size of this source in chars, if the size can be easily determined without actually
129    * opening the data stream.
130    *
131    * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code
132    * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i>
133    * that this method will return a different number of chars than would be returned by reading all
134    * of the chars.
135    *
136    * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
137    * return a different number of chars if the contents are changed.
138    *
139    * @since 19.0
140    */
141   @Beta
lengthIfKnown()142   public Optional<Long> lengthIfKnown() {
143     return Optional.absent();
144   }
145 
146   /**
147    * Returns the length of this source in chars, even if doing so requires opening and traversing an
148    * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
149    *
150    * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
151    * absent, it will fall back to a heavyweight operation that will open a stream, {@link
152    * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that
153    * were skipped.
154    *
155    * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
156    * implementation, it is <i>possible</i> that this method will return a different number of chars
157    * than would be returned by reading all of the chars.
158    *
159    * <p>In either case, for mutable sources such as files, a subsequent read may return a different
160    * number of chars if the contents are changed.
161    *
162    * @throws IOException if an I/O error occurs while reading the length of this source
163    * @since 19.0
164    */
165   @Beta
length()166   public long length() throws IOException {
167     Optional<Long> lengthIfKnown = lengthIfKnown();
168     if (lengthIfKnown.isPresent()) {
169       return lengthIfKnown.get();
170     }
171 
172     Closer closer = Closer.create();
173     try {
174       Reader reader = closer.register(openStream());
175       return countBySkipping(reader);
176     } catch (Throwable e) {
177       throw closer.rethrow(e);
178     } finally {
179       closer.close();
180     }
181   }
182 
countBySkipping(Reader reader)183   private long countBySkipping(Reader reader) throws IOException {
184     long count = 0;
185     long read;
186     while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
187       count += read;
188     }
189     return count;
190   }
191 
192   /**
193    * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
194    * Does not close {@code appendable} if it is {@code Closeable}.
195    *
196    * @return the number of characters copied
197    * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
198    *     appendable}
199    */
200   @CanIgnoreReturnValue
copyTo(Appendable appendable)201   public long copyTo(Appendable appendable) throws IOException {
202     checkNotNull(appendable);
203 
204     Closer closer = Closer.create();
205     try {
206       Reader reader = closer.register(openStream());
207       return CharStreams.copy(reader, appendable);
208     } catch (Throwable e) {
209       throw closer.rethrow(e);
210     } finally {
211       closer.close();
212     }
213   }
214 
215   /**
216    * Copies the contents of this source to the given sink.
217    *
218    * @return the number of characters copied
219    * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
220    *     sink}
221    */
222   @CanIgnoreReturnValue
copyTo(CharSink sink)223   public long copyTo(CharSink sink) throws IOException {
224     checkNotNull(sink);
225 
226     Closer closer = Closer.create();
227     try {
228       Reader reader = closer.register(openStream());
229       Writer writer = closer.register(sink.openStream());
230       return CharStreams.copy(reader, writer);
231     } catch (Throwable e) {
232       throw closer.rethrow(e);
233     } finally {
234       closer.close();
235     }
236   }
237 
238   /**
239    * Reads the contents of this source as a string.
240    *
241    * @throws IOException if an I/O error occurs while reading from this source
242    */
read()243   public String read() throws IOException {
244     Closer closer = Closer.create();
245     try {
246       Reader reader = closer.register(openStream());
247       return CharStreams.toString(reader);
248     } catch (Throwable e) {
249       throw closer.rethrow(e);
250     } finally {
251       closer.close();
252     }
253   }
254 
255   /**
256    * Reads the first line of this source as a string. Returns {@code null} if this source is empty.
257    *
258    * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
259    * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
260    * \n}. If the source's content does not end in a line termination sequence, it is treated as if
261    * it does.
262    *
263    * @throws IOException if an I/O error occurs while reading from this source
264    */
265   @CheckForNull
readFirstLine()266   public String readFirstLine() throws IOException {
267     Closer closer = Closer.create();
268     try {
269       BufferedReader reader = closer.register(openBufferedStream());
270       return reader.readLine();
271     } catch (Throwable e) {
272       throw closer.rethrow(e);
273     } finally {
274       closer.close();
275     }
276   }
277 
278   /**
279    * Reads all the lines of this source as a list of strings. The returned list will be empty if
280    * this source is empty.
281    *
282    * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
283    * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
284    * \n}. If the source's content does not end in a line termination sequence, it is treated as if
285    * it does.
286    *
287    * @throws IOException if an I/O error occurs while reading from this source
288    */
readLines()289   public ImmutableList<String> readLines() throws IOException {
290     Closer closer = Closer.create();
291     try {
292       BufferedReader reader = closer.register(openBufferedStream());
293       List<String> result = Lists.newArrayList();
294       String line;
295       while ((line = reader.readLine()) != null) {
296         result.add(line);
297       }
298       return ImmutableList.copyOf(result);
299     } catch (Throwable e) {
300       throw closer.rethrow(e);
301     } finally {
302       closer.close();
303     }
304   }
305 
306   /**
307    * Reads lines of text from this source, processing each line as it is read using the given {@link
308    * LineProcessor processor}. Stops when all lines have been processed or the processor returns
309    * {@code false} and returns the result produced by the processor.
310    *
311    * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
312    * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
313    * \n}. If the source's content does not end in a line termination sequence, it is treated as if
314    * it does.
315    *
316    * @throws IOException if an I/O error occurs while reading from this source or if {@code
317    *     processor} throws an {@code IOException}
318    * @since 16.0
319    */
320   @Beta
321   @CanIgnoreReturnValue // some processors won't return a useful result
322   @ParametricNullness
readLines(LineProcessor<T> processor)323   public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException {
324     checkNotNull(processor);
325 
326     Closer closer = Closer.create();
327     try {
328       Reader reader = closer.register(openStream());
329       return CharStreams.readLines(reader, processor);
330     } catch (Throwable e) {
331       throw closer.rethrow(e);
332     } finally {
333       closer.close();
334     }
335   }
336 
337   /**
338    * Returns whether the source has zero chars. The default implementation first checks {@link
339    * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be
340    * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF.
341    *
342    * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
343    * chars are actually available for reading. This means that a source may return {@code true} from
344    * {@code isEmpty()} despite having readable content.
345    *
346    * @throws IOException if an I/O error occurs
347    * @since 15.0
348    */
isEmpty()349   public boolean isEmpty() throws IOException {
350     Optional<Long> lengthIfKnown = lengthIfKnown();
351     if (lengthIfKnown.isPresent()) {
352       return lengthIfKnown.get() == 0L;
353     }
354     Closer closer = Closer.create();
355     try {
356       Reader reader = closer.register(openStream());
357       return reader.read() == -1;
358     } catch (Throwable e) {
359       throw closer.rethrow(e);
360     } finally {
361       closer.close();
362     }
363   }
364 
365   /**
366    * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
367    * the source will contain the concatenated data from the streams of the underlying sources.
368    *
369    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
370    * close the open underlying stream.
371    *
372    * @param sources the sources to concatenate
373    * @return a {@code CharSource} containing the concatenated data
374    * @since 15.0
375    */
concat(Iterable<? extends CharSource> sources)376   public static CharSource concat(Iterable<? extends CharSource> sources) {
377     return new ConcatenatedCharSource(sources);
378   }
379 
380   /**
381    * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
382    * the source will contain the concatenated data from the streams of the underlying sources.
383    *
384    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
385    * close the open underlying stream.
386    *
387    * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
388    * is called. This will fail if the iterator is infinite and may cause problems if the iterator
389    * eagerly fetches data for each source when iterated (rather than producing sources that only
390    * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
391    * possible.
392    *
393    * @param sources the sources to concatenate
394    * @return a {@code CharSource} containing the concatenated data
395    * @throws NullPointerException if any of {@code sources} is {@code null}
396    * @since 15.0
397    */
concat(Iterator<? extends CharSource> sources)398   public static CharSource concat(Iterator<? extends CharSource> sources) {
399     return concat(ImmutableList.copyOf(sources));
400   }
401 
402   /**
403    * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
404    * the source will contain the concatenated data from the streams of the underlying sources.
405    *
406    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
407    * close the open underlying stream.
408    *
409    * @param sources the sources to concatenate
410    * @return a {@code CharSource} containing the concatenated data
411    * @throws NullPointerException if any of {@code sources} is {@code null}
412    * @since 15.0
413    */
concat(CharSource... sources)414   public static CharSource concat(CharSource... sources) {
415     return concat(ImmutableList.copyOf(sources));
416   }
417 
418   /**
419    * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
420    * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
421    * the {@code charSequence} is mutated while it is being read, so don't do that.
422    *
423    * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
424    */
wrap(CharSequence charSequence)425   public static CharSource wrap(CharSequence charSequence) {
426     return charSequence instanceof String
427         ? new StringCharSource((String) charSequence)
428         : new CharSequenceCharSource(charSequence);
429   }
430 
431   /**
432    * Returns an immutable {@link CharSource} that contains no characters.
433    *
434    * @since 15.0
435    */
empty()436   public static CharSource empty() {
437     return EmptyCharSource.INSTANCE;
438   }
439 
440   /** A byte source that reads chars from this source and encodes them as bytes using a charset. */
441   private final class AsByteSource extends ByteSource {
442 
443     final Charset charset;
444 
AsByteSource(Charset charset)445     AsByteSource(Charset charset) {
446       this.charset = checkNotNull(charset);
447     }
448 
449     @Override
asCharSource(Charset charset)450     public CharSource asCharSource(Charset charset) {
451       if (charset.equals(this.charset)) {
452         return CharSource.this;
453       }
454       return super.asCharSource(charset);
455     }
456 
457     @Override
openStream()458     public InputStream openStream() throws IOException {
459       return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
460     }
461 
462     @Override
toString()463     public String toString() {
464       return CharSource.this.toString() + ".asByteSource(" + charset + ")";
465     }
466   }
467 
468   private static class CharSequenceCharSource extends CharSource {
469 
470     private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
471 
472     protected final CharSequence seq;
473 
CharSequenceCharSource(CharSequence seq)474     protected CharSequenceCharSource(CharSequence seq) {
475       this.seq = checkNotNull(seq);
476     }
477 
478     @Override
openStream()479     public Reader openStream() {
480       return new CharSequenceReader(seq);
481     }
482 
483     @Override
read()484     public String read() {
485       return seq.toString();
486     }
487 
488     @Override
isEmpty()489     public boolean isEmpty() {
490       return seq.length() == 0;
491     }
492 
493     @Override
length()494     public long length() {
495       return seq.length();
496     }
497 
498     @Override
lengthIfKnown()499     public Optional<Long> lengthIfKnown() {
500       return Optional.of((long) seq.length());
501     }
502 
503     /**
504      * Returns an iterator over the lines in the string. If the string ends in a newline, a final
505      * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine().
506      */
linesIterator()507     private Iterator<String> linesIterator() {
508       return new AbstractIterator<String>() {
509         Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
510 
511         @Override
512         @CheckForNull
513         protected String computeNext() {
514           if (lines.hasNext()) {
515             String next = lines.next();
516             // skip last line if it's empty
517             if (lines.hasNext() || !next.isEmpty()) {
518               return next;
519             }
520           }
521           return endOfData();
522         }
523       };
524     }
525 
526     @Override
527     @CheckForNull
readFirstLine()528     public String readFirstLine() {
529       Iterator<String> lines = linesIterator();
530       return lines.hasNext() ? lines.next() : null;
531     }
532 
533     @Override
readLines()534     public ImmutableList<String> readLines() {
535       return ImmutableList.copyOf(linesIterator());
536     }
537 
538     @Override
539     @ParametricNullness
readLines(LineProcessor<T> processor)540     public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException {
541       Iterator<String> lines = linesIterator();
542       while (lines.hasNext()) {
543         if (!processor.processLine(lines.next())) {
544           break;
545         }
546       }
547       return processor.getResult();
548     }
549 
550     @Override
toString()551     public String toString() {
552       return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
553     }
554   }
555 
556   /**
557    * Subclass specialized for string instances.
558    *
559    * <p>Since Strings are immutable and built into the jdk we can optimize some operations
560    *
561    * <ul>
562    *   <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can
563    *       use {@link String#getChars(int, int, char[], int)} instead of copying characters one by
564    *       one with {@link CharSequence#charAt(int)}.
565    *   <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link
566    *       #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length
567    *       can't change, and it is faster because many writers and appendables are optimized for
568    *       appending string instances.
569    * </ul>
570    */
571   private static class StringCharSource extends CharSequenceCharSource {
StringCharSource(String seq)572     protected StringCharSource(String seq) {
573       super(seq);
574     }
575 
576     @Override
openStream()577     public Reader openStream() {
578       return new StringReader((String) seq);
579     }
580 
581     @Override
copyTo(Appendable appendable)582     public long copyTo(Appendable appendable) throws IOException {
583       appendable.append(seq);
584       return seq.length();
585     }
586 
587     @Override
copyTo(CharSink sink)588     public long copyTo(CharSink sink) throws IOException {
589       checkNotNull(sink);
590       Closer closer = Closer.create();
591       try {
592         Writer writer = closer.register(sink.openStream());
593         writer.write((String) seq);
594         return seq.length();
595       } catch (Throwable e) {
596         throw closer.rethrow(e);
597       } finally {
598         closer.close();
599       }
600     }
601   }
602 
603   private static final class EmptyCharSource extends StringCharSource {
604 
605     private static final EmptyCharSource INSTANCE = new EmptyCharSource();
606 
EmptyCharSource()607     private EmptyCharSource() {
608       super("");
609     }
610 
611     @Override
toString()612     public String toString() {
613       return "CharSource.empty()";
614     }
615   }
616 
617   private static final class ConcatenatedCharSource extends CharSource {
618 
619     private final Iterable<? extends CharSource> sources;
620 
ConcatenatedCharSource(Iterable<? extends CharSource> sources)621     ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
622       this.sources = checkNotNull(sources);
623     }
624 
625     @Override
openStream()626     public Reader openStream() throws IOException {
627       return new MultiReader(sources.iterator());
628     }
629 
630     @Override
isEmpty()631     public boolean isEmpty() throws IOException {
632       for (CharSource source : sources) {
633         if (!source.isEmpty()) {
634           return false;
635         }
636       }
637       return true;
638     }
639 
640     @Override
lengthIfKnown()641     public Optional<Long> lengthIfKnown() {
642       long result = 0L;
643       for (CharSource source : sources) {
644         Optional<Long> lengthIfKnown = source.lengthIfKnown();
645         if (!lengthIfKnown.isPresent()) {
646           return Optional.absent();
647         }
648         result += lengthIfKnown.get();
649       }
650       return Optional.of(result);
651     }
652 
653     @Override
length()654     public long length() throws IOException {
655       long result = 0L;
656       for (CharSource source : sources) {
657         result += source.length();
658       }
659       return result;
660     }
661 
662     @Override
toString()663     public String toString() {
664       return "CharSource.concat(" + sources + ")";
665     }
666   }
667 }
668