• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.common.io;
16 
17 import com.google.caliper.BeforeExperiment;
18 import com.google.caliper.Benchmark;
19 import com.google.caliper.Param;
20 import com.google.caliper.api.VmOptions;
21 import com.google.common.base.Optional;
22 import java.io.IOException;
23 import java.io.InputStreamReader;
24 import java.nio.charset.Charset;
25 import java.util.Random;
26 
27 /**
28  * Benchmarks for various potential implementations of {@code ByteSource.asCharSource(...).read()}.
29  */
30 // These benchmarks allocate a lot of data so use a large heap
31 @VmOptions({"-Xms12g", "-Xmx12g", "-d64"})
32 public class ByteSourceAsCharSourceReadBenchmark {
33   enum ReadStrategy {
34     TO_BYTE_ARRAY_NEW_STRING {
35       @Override
read(ByteSource byteSource, Charset cs)36       String read(ByteSource byteSource, Charset cs) throws IOException {
37         return new String(byteSource.read(), cs);
38       }
39     },
40     USING_CHARSTREAMS_COPY {
41       @Override
read(ByteSource byteSource, Charset cs)42       String read(ByteSource byteSource, Charset cs) throws IOException {
43         StringBuilder sb = new StringBuilder();
44         try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) {
45           CharStreams.copy(reader, sb);
46         }
47         return sb.toString();
48       }
49     },
50     // It really seems like this should be faster than TO_BYTE_ARRAY_NEW_STRING.  But it just isn't
51     // my best guess is that the jdk authors have spent more time optimizing that callpath than this
52     // one. (StringCoding$StringDecoder vs. StreamDecoder).  StringCoding has a ton of special cases
53     // theoretically we could duplicate all that logic here to try to beat 'new String' or at least
54     // come close.
55     USING_DECODER_WITH_SIZE_HINT {
56       @Override
read(ByteSource byteSource, Charset cs)57       String read(ByteSource byteSource, Charset cs) throws IOException {
58         Optional<Long> size = byteSource.sizeIfKnown();
59         // if we know the size and it fits in an int
60         if (size.isPresent() && size.get().longValue() == size.get().intValue()) {
61           // otherwise try to presize a StringBuilder
62           // it is kind of lame that we need to construct a decoder to access this value.
63           // if this is a concern we could add special cases for some known charsets (like utf8)
64           // or we could avoid inputstreamreader and use the decoder api directly
65           // TODO(lukes): in a real implementation we would need to handle overflow conditions
66           int maxChars = (int) (size.get().intValue() * cs.newDecoder().maxCharsPerByte());
67           char[] buffer = new char[maxChars];
68           int bufIndex = 0;
69           int remaining = buffer.length;
70           try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) {
71             int nRead = 0;
72             while (remaining > 0 && (nRead = reader.read(buffer, bufIndex, remaining)) != -1) {
73               bufIndex += nRead;
74               remaining -= nRead;
75             }
76             if (nRead == -1) {
77               // we reached EOF
78               return new String(buffer, 0, bufIndex);
79             }
80             // otherwise we got the size wrong.  This can happen if the size changes between when
81             // we called sizeIfKnown and when we started reading the file (or i guess if
82             // maxCharsPerByte is wrong)
83             // Fallback to an incremental approach
84             StringBuilder builder = new StringBuilder(bufIndex + 32);
85             builder.append(buffer, 0, bufIndex);
86             buffer = null; // release for gc
87             CharStreams.copy(reader, builder);
88             return builder.toString();
89           }
90 
91         } else {
92           return TO_BYTE_ARRAY_NEW_STRING.read(byteSource, cs);
93         }
94       }
95     };
96 
read(ByteSource byteSource, Charset cs)97     abstract String read(ByteSource byteSource, Charset cs) throws IOException;
98   }
99 
100   @Param({"UTF-8"})
101   String charsetName;
102 
103   @Param ReadStrategy strategy;
104 
105   @Param({"10", "1024", "1048576"})
106   int size;
107 
108   Charset charset;
109   ByteSource data;
110 
111   @BeforeExperiment
setUp()112   public void setUp() {
113     charset = Charset.forName(charsetName);
114     StringBuilder sb = new StringBuilder();
115     Random random = new Random(0xdeadbeef); // for unpredictable but reproducible behavior
116     sb.ensureCapacity(size);
117     for (int k = 0; k < size; k++) {
118       // [9-127) includes all ascii non-control characters
119       sb.append((char) (random.nextInt(127 - 9) + 9));
120     }
121     String string = sb.toString();
122     sb.setLength(0);
123     data = ByteSource.wrap(string.getBytes(charset));
124   }
125 
126   @Benchmark
timeCopy(int reps)127   public int timeCopy(int reps) throws IOException {
128     int r = 0;
129     final Charset localCharset = charset;
130     final ByteSource localData = data;
131     final ReadStrategy localStrategy = strategy;
132     for (int i = 0; i < reps; i++) {
133       r += localStrategy.read(localData, localCharset).hashCode();
134     }
135     return r;
136   }
137 }
138