1 package org.unicode.cldr.util; 2 3 import java.nio.charset.StandardCharsets; 4 import java.security.MessageDigest; 5 import java.util.Map; 6 import java.util.concurrent.ConcurrentHashMap; 7 8 /** 9 * Produce an ID for a string based on a long hash. When used properly, the odds 10 * of collision are so low that the ID can be used as a proxy for the 11 * original string. The ID is non-negative. The algorithm uses SHA-1 over the 12 * UTF-8 bytes in the string. Also provides lookup for long previously generated for string. 13 * 14 * @author markdavis 15 */ 16 public final class StringId { 17 private static final Map<String, Long> STRING_TO_ID = new ConcurrentHashMap<>(); 18 private static final Map<Long, String> ID_TO_STRING = new ConcurrentHashMap<>(); 19 private static final MessageDigest digest; 20 private static final int RETRY_LIMIT = 9; 21 static { 22 try { 23 digest = MessageDigest.getInstance("SHA-1"); 24 } catch (Exception e) { 25 throw new IllegalArgumentException(e); // darn'd checked exceptions 26 } 27 } 28 29 /** 30 * Get the ID for a string. 31 * 32 * @param string 33 * input string. 34 * @return a value from 0 to 0x7FFFFFFFFFFFFFFFL. 35 */ getId(CharSequence charSequence)36 public static long getId(CharSequence charSequence) { 37 String string = charSequence.toString(); 38 Long resultLong = STRING_TO_ID.get(string); 39 if (resultLong != null) { 40 return resultLong; 41 } 42 int retryCount = RETRY_LIMIT; 43 while (true) { 44 try { 45 synchronized (digest) { 46 byte[] hash = digest.digest(string.getBytes(StandardCharsets.UTF_8)); 47 long result = 0; 48 for (int i = 0; i < 8; ++i) { 49 result <<= 8; 50 result ^= hash[i]; 51 } 52 // mash the top bit to make things easier 53 result &= 0x7FFFFFFFFFFFFFFFL; 54 STRING_TO_ID.put(string, result); 55 ID_TO_STRING.put(result, string); 56 return result; 57 } 58 } catch (RuntimeException e) { 59 if (--retryCount < 0) { 60 throw e; 61 } 62 } 63 } 64 } 65 66 /** 67 * Get the hex ID for a string. 68 * 69 * @param string 70 * input string. 71 * @return a string with the hex value 72 */ getHexId(CharSequence string)73 public static String getHexId(CharSequence string) { 74 return Long.toHexString(getId(string)); 75 } 76 77 /** 78 * Get the hex ID for a string. 79 * 80 * @param string 81 * input string. 82 * @return a string with the hex value 83 */ getStringFromHexId(String string)84 public static String getStringFromHexId(String string) { 85 return getStringFromId(Long.parseLong(string, 16)); 86 } 87 88 /** 89 * Returns string previously used to generate the longValue with getId. 90 * @param longValue 91 * @return String previously used to generate the longValue with getId. 92 */ getStringFromId(long longValue)93 public static String getStringFromId(long longValue) { 94 return ID_TO_STRING.get(longValue); 95 } 96 }