1 /* 2 * Copyright (C) 2017 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 package com.google.common.io; 16 17 import com.google.caliper.BeforeExperiment; 18 import com.google.caliper.Benchmark; 19 import com.google.caliper.Param; 20 import com.google.caliper.api.VmOptions; 21 import com.google.common.base.Optional; 22 import java.io.IOException; 23 import java.io.InputStreamReader; 24 import java.nio.charset.Charset; 25 import java.util.Random; 26 27 /** 28 * Benchmarks for various potential implementations of {@code ByteSource.asCharSource(...).read()}. 29 */ 30 // These benchmarks allocate a lot of data so use a large heap 31 @VmOptions({"-Xms12g", "-Xmx12g", "-d64"}) 32 public class ByteSourceAsCharSourceReadBenchmark { 33 enum ReadStrategy { 34 TO_BYTE_ARRAY_NEW_STRING { 35 @Override read(ByteSource byteSource, Charset cs)36 String read(ByteSource byteSource, Charset cs) throws IOException { 37 return new String(byteSource.read(), cs); 38 } 39 }, 40 USING_CHARSTREAMS_COPY { 41 @Override read(ByteSource byteSource, Charset cs)42 String read(ByteSource byteSource, Charset cs) throws IOException { 43 StringBuilder sb = new StringBuilder(); 44 try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) { 45 CharStreams.copy(reader, sb); 46 } 47 return sb.toString(); 48 } 49 }, 50 // It really seems like this should be faster than TO_BYTE_ARRAY_NEW_STRING. But it just isn't 51 // my best guess is that the jdk authors have spent more time optimizing that callpath than this 52 // one. (StringCoding$StringDecoder vs. StreamDecoder). StringCoding has a ton of special cases 53 // theoretically we could duplicate all that logic here to try to beat 'new String' or at least 54 // come close. 55 USING_DECODER_WITH_SIZE_HINT { 56 @Override read(ByteSource byteSource, Charset cs)57 String read(ByteSource byteSource, Charset cs) throws IOException { 58 Optional<Long> size = byteSource.sizeIfKnown(); 59 // if we know the size and it fits in an int 60 if (size.isPresent() && size.get().longValue() == size.get().intValue()) { 61 // otherwise try to presize a StringBuilder 62 // it is kind of lame that we need to construct a decoder to access this value. 63 // if this is a concern we could add special cases for some known charsets (like utf8) 64 // or we could avoid inputstreamreader and use the decoder api directly 65 // TODO(lukes): in a real implementation we would need to handle overflow conditions 66 int maxChars = (int) (size.get().intValue() * cs.newDecoder().maxCharsPerByte()); 67 char[] buffer = new char[maxChars]; 68 int bufIndex = 0; 69 int remaining = buffer.length; 70 try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) { 71 int nRead = 0; 72 while (remaining > 0 && (nRead = reader.read(buffer, bufIndex, remaining)) != -1) { 73 bufIndex += nRead; 74 remaining -= nRead; 75 } 76 if (nRead == -1) { 77 // we reached EOF 78 return new String(buffer, 0, bufIndex); 79 } 80 // otherwise we got the size wrong. This can happen if the size changes between when 81 // we called sizeIfKnown and when we started reading the file (or i guess if 82 // maxCharsPerByte is wrong) 83 // Fallback to an incremental approach 84 StringBuilder builder = new StringBuilder(bufIndex + 32); 85 builder.append(buffer, 0, bufIndex); 86 buffer = null; // release for gc 87 CharStreams.copy(reader, builder); 88 return builder.toString(); 89 } 90 91 } else { 92 return TO_BYTE_ARRAY_NEW_STRING.read(byteSource, cs); 93 } 94 } 95 }; 96 read(ByteSource byteSource, Charset cs)97 abstract String read(ByteSource byteSource, Charset cs) throws IOException; 98 } 99 100 @Param({"UTF-8"}) 101 String charsetName; 102 103 @Param ReadStrategy strategy; 104 105 @Param({"10", "1024", "1048576"}) 106 int size; 107 108 Charset charset; 109 ByteSource data; 110 111 @BeforeExperiment setUp()112 public void setUp() { 113 charset = Charset.forName(charsetName); 114 StringBuilder sb = new StringBuilder(); 115 Random random = new Random(0xdeadbeef); // for unpredictable but reproducible behavior 116 sb.ensureCapacity(size); 117 for (int k = 0; k < size; k++) { 118 // [9-127) includes all ascii non-control characters 119 sb.append((char) (random.nextInt(127 - 9) + 9)); 120 } 121 String string = sb.toString(); 122 sb.setLength(0); 123 data = ByteSource.wrap(string.getBytes(charset)); 124 } 125 126 @Benchmark timeCopy(int reps)127 public int timeCopy(int reps) throws IOException { 128 int r = 0; 129 final Charset localCharset = charset; 130 final ByteSource localData = data; 131 final ReadStrategy localStrategy = strategy; 132 for (int i = 0; i < reps; i++) { 133 r += localStrategy.read(localData, localCharset).hashCode(); 134 } 135 return r; 136 } 137 } 138