1 package org.unicode.cldr.util; 2 3 public class CharUtilities { 4 5 /** 6 * Simple wrapper for CharSequence 7 * 8 * @author markdavis 9 * 10 */ 11 public static class CharSourceWrapper<T extends CharSequence> implements CharSource { 12 protected T source; 13 CharSourceWrapper(T source)14 public CharSourceWrapper(T source) { 15 this.source = source; 16 } 17 18 @Override hasCharAt(int index)19 public boolean hasCharAt(int index) { 20 return index < source.length(); 21 } 22 23 @Override charAt(int index)24 public char charAt(int index) { 25 return source.charAt(index); 26 } 27 28 @Override toSourceOffset(int index)29 public int toSourceOffset(int index) { 30 return index; 31 } 32 33 @Override sublist(int start, int end)34 public CharSource sublist(int start, int end) { 35 return new CharSourceWrapper<>(source.subSequence(start, end)); 36 } 37 38 @Override sublist(int start)39 public CharSource sublist(int start) { 40 return new CharSourceWrapper<>(source.subSequence(start, source.length())); 41 } 42 43 @Override getKnownLength()44 public int getKnownLength() { 45 return source.length(); 46 } 47 48 @Override subSequence(int start, int end)49 public CharSequence subSequence(int start, int end) { 50 return source.subSequence(start, end); 51 } 52 53 @Override toString()54 public String toString() { 55 return source.toString(); 56 } 57 sourceSubSequence(int start, int end)58 public CharSequence sourceSubSequence(int start, int end) { 59 return source.subSequence(toSourceOffset(start), toSourceOffset(end)); 60 } 61 62 @Override fromSourceOffset(int index)63 public int fromSourceOffset(int index) { 64 return index; 65 } 66 67 @Override setStart(int index)68 public CharSource setStart(int index) { 69 return this; 70 } 71 72 @Override getStart()73 public int getStart() { 74 return 0; 75 } 76 } 77 78 /** 79 * Return the code point order of two CharSequences. 80 * If the text has isolated surrogates, they will not sort correctly. 81 * 82 * @param text1 83 * @param text2 84 * @return 85 */ compare(CharSource text1, CharSource text2)86 public static int compare(CharSource text1, CharSource text2) { 87 int i1 = 0; 88 int i2 = 0; 89 90 while (true) { 91 // handle running out of room 92 if (!text1.hasCharAt(i1)) { 93 if (text2.hasCharAt(i2)) { 94 return 0; 95 } 96 return -1; 97 } else if (text2.hasCharAt(i2)) { 98 return 1; 99 } 100 int cp1 = text1.charAt(i1++); 101 int cp2 = text2.charAt(i2++); 102 // if they are different, do a fixup 103 104 if (cp1 != cp2) { 105 return (cp1 + utf16Fixup[cp1 >> 11]) - 106 (cp2 + utf16Fixup[cp2 >> 11]); 107 } 108 } 109 } 110 111 private static final char utf16Fixup[] = { 112 0, 0, 0, 0, 0, 0, 0, 0, 113 0, 0, 0, 0, 0, 0, 0, 0, 114 0, 0, 0, 0, 0, 0, 0, 0, 115 0, 0, 0, 0x2000, 0xf800, 0xf800, 0xf800, 0xf800 116 }; 117 118 /** 119 * Return the code point order of two CharSequences. 120 * If the text has isolated surrogates, they will not sort correctly. 121 * 122 * @param text1 123 * @param text2 124 * @return 125 */ compare(CharSequence text1, CharSequence text2)126 public static int compare(CharSequence text1, CharSequence text2) { 127 int i1 = 0; 128 int i2 = 0; 129 130 while (true) { 131 // handle running out of room 132 if (i1 >= text1.length()) { 133 if (i2 >= text2.length()) { 134 return 0; 135 } 136 return -1; 137 } else if (i2 >= text2.length()) { 138 return 1; 139 } 140 int cp1 = text1.charAt(i1++); 141 int cp2 = text2.charAt(i2++); 142 // if they are different, do a fixup 143 144 if (cp1 != cp2) { 145 return (cp1 + utf16Fixup[cp1 >> 11]) - 146 (cp2 + utf16Fixup[cp2 >> 11]); 147 } 148 } 149 } 150 151 }