1 /* 2 * Copyright (C) 2011 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 package com.google.common.hash; 16 17 import static com.google.common.base.Preconditions.checkArgument; 18 19 import com.google.errorprone.annotations.CanIgnoreReturnValue; 20 import java.nio.ByteBuffer; 21 import java.nio.ByteOrder; 22 23 /** 24 * A convenience base class for implementors of {@code Hasher}; handles accumulating data until an 25 * entire "chunk" (of implementation-dependent length) is ready to be hashed. 26 * 27 * @author Kevin Bourrillion 28 * @author Dimitris Andreou 29 */ 30 // TODO(kevinb): this class still needs some design-and-document-for-inheritance love 31 @CanIgnoreReturnValue 32 @ElementTypesAreNonnullByDefault 33 abstract class AbstractStreamingHasher extends AbstractHasher { 34 /** Buffer via which we pass data to the hash algorithm (the implementor) */ 35 private final ByteBuffer buffer; 36 37 /** Number of bytes to be filled before process() invocation(s). */ 38 private final int bufferSize; 39 40 /** Number of bytes processed per process() invocation. */ 41 private final int chunkSize; 42 43 /** 44 * Constructor for use by subclasses. This hasher instance will process chunks of the specified 45 * size. 46 * 47 * @param chunkSize the number of bytes available per {@link #process(ByteBuffer)} invocation; 48 * must be at least 4 49 */ AbstractStreamingHasher(int chunkSize)50 protected AbstractStreamingHasher(int chunkSize) { 51 this(chunkSize, chunkSize); 52 } 53 54 /** 55 * Constructor for use by subclasses. This hasher instance will process chunks of the specified 56 * size, using an internal buffer of {@code bufferSize} size, which must be a multiple of {@code 57 * chunkSize}. 58 * 59 * @param chunkSize the number of bytes available per {@link #process(ByteBuffer)} invocation; 60 * must be at least 4 61 * @param bufferSize the size of the internal buffer. Must be a multiple of chunkSize 62 */ AbstractStreamingHasher(int chunkSize, int bufferSize)63 protected AbstractStreamingHasher(int chunkSize, int bufferSize) { 64 // TODO(kevinb): check more preconditions (as bufferSize >= chunkSize) if this is ever public 65 checkArgument(bufferSize % chunkSize == 0); 66 67 // TODO(user): benchmark performance difference with longer buffer 68 // always space for a single primitive 69 this.buffer = ByteBuffer.allocate(bufferSize + 7).order(ByteOrder.LITTLE_ENDIAN); 70 this.bufferSize = bufferSize; 71 this.chunkSize = chunkSize; 72 } 73 74 /** Processes the available bytes of the buffer (at most {@code chunk} bytes). */ process(ByteBuffer bb)75 protected abstract void process(ByteBuffer bb); 76 77 /** 78 * This is invoked for the last bytes of the input, which are not enough to fill a whole chunk. 79 * The passed {@code ByteBuffer} is guaranteed to be non-empty. 80 * 81 * <p>This implementation simply pads with zeros and delegates to {@link #process(ByteBuffer)}. 82 */ processRemaining(ByteBuffer bb)83 protected void processRemaining(ByteBuffer bb) { 84 Java8Compatibility.position(bb, bb.limit()); // move at the end 85 Java8Compatibility.limit(bb, chunkSize + 7); // get ready to pad with longs 86 while (bb.position() < chunkSize) { 87 bb.putLong(0); 88 } 89 Java8Compatibility.limit(bb, chunkSize); 90 Java8Compatibility.flip(bb); 91 process(bb); 92 } 93 94 @Override putBytes(byte[] bytes, int off, int len)95 public final Hasher putBytes(byte[] bytes, int off, int len) { 96 return putBytesInternal(ByteBuffer.wrap(bytes, off, len).order(ByteOrder.LITTLE_ENDIAN)); 97 } 98 99 @Override putBytes(ByteBuffer readBuffer)100 public final Hasher putBytes(ByteBuffer readBuffer) { 101 ByteOrder order = readBuffer.order(); 102 try { 103 readBuffer.order(ByteOrder.LITTLE_ENDIAN); 104 return putBytesInternal(readBuffer); 105 } finally { 106 readBuffer.order(order); 107 } 108 } 109 putBytesInternal(ByteBuffer readBuffer)110 private Hasher putBytesInternal(ByteBuffer readBuffer) { 111 // If we have room for all of it, this is easy 112 if (readBuffer.remaining() <= buffer.remaining()) { 113 buffer.put(readBuffer); 114 munchIfFull(); 115 return this; 116 } 117 118 // First add just enough to fill buffer size, and munch that 119 int bytesToCopy = bufferSize - buffer.position(); 120 for (int i = 0; i < bytesToCopy; i++) { 121 buffer.put(readBuffer.get()); 122 } 123 munch(); // buffer becomes empty here, since chunkSize divides bufferSize 124 125 // Now process directly from the rest of the input buffer 126 while (readBuffer.remaining() >= chunkSize) { 127 process(readBuffer); 128 } 129 130 // Finally stick the remainder back in our usual buffer 131 buffer.put(readBuffer); 132 return this; 133 } 134 135 /* 136 * Note: hashString(CharSequence, Charset) is intentionally not overridden. 137 * 138 * While intuitively, using CharsetEncoder to encode the CharSequence directly to the buffer (or 139 * even to an intermediate buffer) should be considerably more efficient than potentially 140 * copying the CharSequence to a String and then calling getBytes(Charset) on that String, in 141 * reality there are optimizations that make the getBytes(Charset) approach considerably faster, 142 * at least for commonly used charsets like UTF-8. 143 */ 144 145 @Override putByte(byte b)146 public final Hasher putByte(byte b) { 147 buffer.put(b); 148 munchIfFull(); 149 return this; 150 } 151 152 @Override putShort(short s)153 public final Hasher putShort(short s) { 154 buffer.putShort(s); 155 munchIfFull(); 156 return this; 157 } 158 159 @Override putChar(char c)160 public final Hasher putChar(char c) { 161 buffer.putChar(c); 162 munchIfFull(); 163 return this; 164 } 165 166 @Override putInt(int i)167 public final Hasher putInt(int i) { 168 buffer.putInt(i); 169 munchIfFull(); 170 return this; 171 } 172 173 @Override putLong(long l)174 public final Hasher putLong(long l) { 175 buffer.putLong(l); 176 munchIfFull(); 177 return this; 178 } 179 180 @Override hash()181 public final HashCode hash() { 182 munch(); 183 Java8Compatibility.flip(buffer); 184 if (buffer.remaining() > 0) { 185 processRemaining(buffer); 186 Java8Compatibility.position(buffer, buffer.limit()); 187 } 188 return makeHash(); 189 } 190 191 /** 192 * Computes a hash code based on the data that have been provided to this hasher. This is called 193 * after all chunks are handled with {@link #process} and any leftover bytes that did not make a 194 * complete chunk are handled with {@link #processRemaining}. 195 */ makeHash()196 protected abstract HashCode makeHash(); 197 198 // Process pent-up data in chunks munchIfFull()199 private void munchIfFull() { 200 if (buffer.remaining() < 8) { 201 // buffer is full; not enough room for a primitive. We have at least one full chunk. 202 munch(); 203 } 204 } 205 munch()206 private void munch() { 207 Java8Compatibility.flip(buffer); 208 while (buffer.remaining() >= chunkSize) { 209 // we could limit the buffer to ensure process() does not read more than 210 // chunkSize number of bytes, but we trust the implementations 211 process(buffer); 212 } 213 buffer.compact(); // preserve any remaining data that do not make a full chunk 214 } 215 } 216