• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.nio.charset.StandardCharsets;
4 import java.security.MessageDigest;
5 import java.util.Map;
6 import java.util.concurrent.ConcurrentHashMap;
7 
8 /**
9  * Produce an ID for a string based on a long hash. When used properly, the odds
10  * of collision are so low that the ID can be used as a proxy for the
11  * original string. The ID is non-negative. The algorithm uses SHA-1 over the
12  * UTF-8 bytes in the string. Also provides lookup for long previously generated for string.
13  *
14  * @author markdavis
15  */
16 public final class StringId {
17     private static final Map<String, Long> STRING_TO_ID = new ConcurrentHashMap<>();
18     private static final Map<Long, String> ID_TO_STRING = new ConcurrentHashMap<>();
19     private static final MessageDigest digest;
20     private static final int RETRY_LIMIT = 9;
21     static {
22         try {
23             digest = MessageDigest.getInstance("SHA-1");
24         } catch (Exception e) {
25             throw new IllegalArgumentException(e); // darn'd checked exceptions
26         }
27     }
28 
29     /**
30      * Get the ID for a string.
31      *
32      * @param string
33      *            input string.
34      * @return a value from 0 to 0x7FFFFFFFFFFFFFFFL.
35      */
getId(CharSequence charSequence)36     public static long getId(CharSequence charSequence) {
37         String string = charSequence.toString();
38         Long resultLong = STRING_TO_ID.get(string);
39         if (resultLong != null) {
40             return resultLong;
41         }
42         int retryCount = RETRY_LIMIT;
43         while (true) {
44             try {
45                 synchronized (digest) {
46                     byte[] hash = digest.digest(string.getBytes(StandardCharsets.UTF_8));
47                     long result = 0;
48                     for (int i = 0; i < 8; ++i) {
49                         result <<= 8;
50                         result ^= hash[i];
51                     }
52                     // mash the top bit to make things easier
53                     result &= 0x7FFFFFFFFFFFFFFFL;
54                     STRING_TO_ID.put(string, result);
55                     ID_TO_STRING.put(result, string);
56                     return result;
57                 }
58             } catch (RuntimeException e) {
59                 if (--retryCount < 0) {
60                     throw e;
61                 }
62             }
63         }
64     }
65 
66     /**
67      * Get the hex ID for a string.
68      *
69      * @param string
70      *            input string.
71      * @return a string with the hex value
72      */
getHexId(CharSequence string)73     public static String getHexId(CharSequence string) {
74         return Long.toHexString(getId(string));
75     }
76 
77     /**
78      * Get the hex ID for a string.
79      *
80      * @param string
81      *            input string.
82      * @return a string with the hex value
83      */
getStringFromHexId(String string)84     public static String getStringFromHexId(String string) {
85         return getStringFromId(Long.parseLong(string, 16));
86     }
87 
88     /**
89      * Returns string previously used to generate the longValue with getId.
90      * @param longValue
91      * @return String previously used to generate the longValue with getId.
92      */
getStringFromId(long longValue)93     public static String getStringFromId(long longValue) {
94         return ID_TO_STRING.get(longValue);
95     }
96 }