1 package org.unicode.cldr.util; 2 3 import java.nio.charset.Charset; 4 import java.security.MessageDigest; 5 import java.util.Map; 6 import java.util.concurrent.ConcurrentHashMap; 7 8 /** 9 * Produce an ID for a string based on a long hash. When used properly, the odds 10 * of collision are so low that the ID can be used as a proxy for the 11 * original string. The ID is non-negative. The algorithm uses SHA-1 over the 12 * UTF-8 bytes in the string. Also provides lookup for long previously generated for string. 13 * 14 * @author markdavis 15 */ 16 public final class StringId { 17 private static final Map<String, Long> STRING_TO_ID = new ConcurrentHashMap<String, Long>(); 18 private static final Map<Long, String> ID_TO_STRING = new ConcurrentHashMap<Long, String>(); 19 private static final MessageDigest digest; 20 private static final Charset UTF_8 = Charset.forName("UTF-8"); 21 private static final int RETRY_LIMIT = 9; 22 static { 23 try { 24 digest = MessageDigest.getInstance("SHA-1"); 25 } catch (Exception e) { 26 throw new IllegalArgumentException(e); // darn'd checked exceptions 27 } 28 } 29 30 /** 31 * Get the ID for a string. 32 * 33 * @param string 34 * input string. 35 * @return a value from 0 to 0x7FFFFFFFFFFFFFFFL. 36 */ getId(CharSequence charSequence)37 public static long getId(CharSequence charSequence) { 38 String string = charSequence.toString(); 39 Long resultLong = STRING_TO_ID.get(string); 40 if (resultLong != null) { 41 return resultLong; 42 } 43 int retryCount = RETRY_LIMIT; 44 while (true) { 45 try { 46 synchronized (digest) { 47 byte[] hash = digest.digest(string.getBytes(UTF_8)); 48 long result = 0; 49 for (int i = 0; i < 8; ++i) { 50 result <<= 8; 51 result ^= hash[i]; 52 } 53 // mash the top bit to make things easier 54 result &= 0x7FFFFFFFFFFFFFFFL; 55 STRING_TO_ID.put(string, result); 56 ID_TO_STRING.put(result, string); 57 return result; 58 } 59 } catch (RuntimeException e) { 60 if (--retryCount < 0) { 61 throw e; 62 } 63 } 64 } 65 } 66 67 /** 68 * Get the hex ID for a string. 69 * 70 * @param string 71 * input string. 72 * @return a string with the hex value 73 */ getHexId(CharSequence string)74 public static String getHexId(CharSequence string) { 75 return Long.toHexString(getId(string)); 76 } 77 78 /** 79 * Get the hex ID for a string. 80 * 81 * @param string 82 * input string. 83 * @return a string with the hex value 84 */ getStringFromHexId(String string)85 public static String getStringFromHexId(String string) { 86 return getStringFromId(Long.parseLong(string, 16)); 87 } 88 89 /** 90 * Returns string previously used to generate the longValue with getId. 91 * @param longValue 92 * @return String previously used to generate the longValue with getId. 93 */ getStringFromId(long longValue)94 public static String getStringFromId(long longValue) { 95 return ID_TO_STRING.get(longValue); 96 } 97 }