package perf; import java.nio.charset.Charset; import java.util.Arrays; /** * Trie container/wrapper, in this case implements Enum-value lookup. * Sample code to possibly use for streamlined-lookup by dictionary, using * UTF-8 bytes of {@link Enum#name()} as the key. */ public class EnumByBytesLookup> { private final static Charset UTF8 = Charset.forName("UTF-8"); private final Trie _root; private final int _size; private EnumByBytesLookup(Trie root, int size) { _root = root; _size = size; } public static > EnumByBytesLookup buildFor(Class enumClass) { Trie root = new Trie(null); int size = 0; for (EIN en : enumClass.getEnumConstants()) { byte[] key = en.name().getBytes(UTF8); root = root.with(en, key); ++size; } return new EnumByBytesLookup(root, size); } public E find(byte[] rawId) { return _root.find(rawId); } public int size() { return _size; } } /** * Trie nodes */ class Trie { private final static byte[] NO_BYTES = new byte[0]; private final static Trie[] NO_NODES = new Trie[0]; /** * For leaves, value matched by sequence */ private final T _match; private final byte[] _nextBytes; private final Trie[] nextNodes; private final int nextCount; @SuppressWarnings("unchecked") Trie(T match) { this(match, NO_BYTES, (Trie[]) NO_NODES); } private Trie(T match, byte[] nextBytes, Trie[] nextNodes) { this._match = match; this._nextBytes = nextBytes; this.nextNodes = nextNodes; nextCount = nextBytes.length; } private Trie(Trie base, T match) { // should we allow duplicate calls with same match? For now, let's not if (base._match != null) { throw new IllegalArgumentException("Trying to add same match multiple times"); } this._match = match; _nextBytes = base._nextBytes; nextNodes = base.nextNodes; nextCount = base.nextCount; } private Trie(Trie base, byte nextByte, Trie nextNode) { // should we allow duplicate calls with same match? For now, let's not if (base._match != null) { throw new IllegalArgumentException("Trying to add same match multiple times"); } _match = base._match; int size = base._nextBytes.length + 1; _nextBytes = Arrays.copyOf(base._nextBytes, size); _nextBytes[size-1] = nextByte; nextNodes = Arrays.copyOf(base.nextNodes, size); nextNodes[size-1] = nextNode; nextCount = size; } /** * Constructor used when an existing branch needs to be replaced due to addition */ private Trie(Trie base, int offset, Trie newNode) { _match = base._match; // can keep nextBytes, as they don't change _nextBytes = base._nextBytes; // but must create a copy of next nodes, to modify one entry nextNodes = Arrays.copyOf(base.nextNodes, base.nextNodes.length); nextNodes[offset] = newNode; nextCount = base.nextCount; } /** * "Mutant factory" method: constructs a modified Trie, with specified raw id * added. */ public Trie with(T match, byte[] rawId) { return with(match, rawId, 0, rawId.length); } private Trie with(T match, byte[] rawId, int start, int end) { if (start == end) { return new Trie(this, match); } // Ok: two choices; either we follow existing branch; or need to create new one final byte b = rawId[start++]; for (int i = 0; i < nextCount; ++i) { if (_nextBytes[i] == b) { // existing branch: good day for delegation... Trie old = nextNodes[i]; // to keep things truly immutable, copy underlying arrays, then return new Trie(this, i, old.with(match, rawId, start, end)); } } // simplest recursively, but for fun let's convert to iteration. Start with tail Trie curr = new Trie(match); for (int i = end-1; i >= start; --i) { curr = new Trie(this, rawId[i], curr); } return new Trie(this, b, curr); } public T find(byte[] id) { return find(id, 0, id.length); } public T find(byte[] id, int offset, int length) { Trie t = this; final int end = offset+length; for (; offset < end; ++offset) { byte b = id[offset]; t = t.next(b); if (t == null) { // NOTE: if using null-padding, would trim here /* if (b == (byte) 0) { break; } */ return null; } } return t._match; } private Trie next(int b) { for (int i = 0; i < nextCount; ++i) { if (_nextBytes[i] == b) { return nextNodes[i]; } } return null; } }