1 package org.unicode.cldr.util; 2 3 public class CharUtilities { 4 5 /** 6 * Simple wrapper for CharSequence 7 * 8 * @author markdavis 9 * 10 */ 11 public static class CharSourceWrapper<T extends CharSequence> implements CharSource { 12 protected T source; 13 CharSourceWrapper(T source)14 public CharSourceWrapper(T source) { 15 this.source = source; 16 } 17 hasCharAt(int index)18 public boolean hasCharAt(int index) { 19 return index < source.length(); 20 } 21 charAt(int index)22 public char charAt(int index) { 23 return source.charAt(index); 24 } 25 toSourceOffset(int index)26 public int toSourceOffset(int index) { 27 return index; 28 } 29 sublist(int start, int end)30 public CharSource sublist(int start, int end) { 31 return new CharSourceWrapper<CharSequence>(source.subSequence(start, end)); 32 } 33 sublist(int start)34 public CharSource sublist(int start) { 35 return new CharSourceWrapper<CharSequence>(source.subSequence(start, source.length())); 36 } 37 getKnownLength()38 public int getKnownLength() { 39 return source.length(); 40 } 41 subSequence(int start, int end)42 public CharSequence subSequence(int start, int end) { 43 return source.subSequence(start, end); 44 } 45 46 @Override toString()47 public String toString() { 48 return source.toString(); 49 } 50 sourceSubSequence(int start, int end)51 public CharSequence sourceSubSequence(int start, int end) { 52 return source.subSequence(toSourceOffset(start), toSourceOffset(end)); 53 } 54 fromSourceOffset(int index)55 public int fromSourceOffset(int index) { 56 return index; 57 } 58 setStart(int index)59 public CharSource setStart(int index) { 60 return this; 61 } 62 getStart()63 public int getStart() { 64 return 0; 65 } 66 } 67 68 /** 69 * Return the code point order of two CharSequences. 70 * If the text has isolated surrogates, they will not sort correctly. 71 * 72 * @param text1 73 * @param text2 74 * @return 75 */ compare(CharSource text1, CharSource text2)76 public static int compare(CharSource text1, CharSource text2) { 77 int i1 = 0; 78 int i2 = 0; 79 80 while (true) { 81 // handle running out of room 82 if (!text1.hasCharAt(i1)) { 83 if (text2.hasCharAt(i2)) { 84 return 0; 85 } 86 return -1; 87 } else if (text2.hasCharAt(i2)) { 88 return 1; 89 } 90 int cp1 = text1.charAt(i1++); 91 int cp2 = text2.charAt(i2++); 92 // if they are different, do a fixup 93 94 if (cp1 != cp2) { 95 return (cp1 + utf16Fixup[cp1 >> 11]) - 96 (cp2 + utf16Fixup[cp2 >> 11]); 97 } 98 } 99 } 100 101 private static final char utf16Fixup[] = { 102 0, 0, 0, 0, 0, 0, 0, 0, 103 0, 0, 0, 0, 0, 0, 0, 0, 104 0, 0, 0, 0, 0, 0, 0, 0, 105 0, 0, 0, 0x2000, 0xf800, 0xf800, 0xf800, 0xf800 106 }; 107 108 /** 109 * Return the code point order of two CharSequences. 110 * If the text has isolated surrogates, they will not sort correctly. 111 * 112 * @param text1 113 * @param text2 114 * @return 115 */ compare(CharSequence text1, CharSequence text2)116 public static int compare(CharSequence text1, CharSequence text2) { 117 int i1 = 0; 118 int i2 = 0; 119 120 while (true) { 121 // handle running out of room 122 if (i1 >= text1.length()) { 123 if (i2 >= text2.length()) { 124 return 0; 125 } 126 return -1; 127 } else if (i2 >= text2.length()) { 128 return 1; 129 } 130 int cp1 = text1.charAt(i1++); 131 int cp2 = text2.charAt(i2++); 132 // if they are different, do a fixup 133 134 if (cp1 != cp2) { 135 return (cp1 + utf16Fixup[cp1 >> 11]) - 136 (cp2 + utf16Fixup[cp2 >> 11]); 137 } 138 } 139 } 140 141 }