1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu.mapper; 4 5 import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.BoundaryType.GRAPHEME; 6 import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.BoundaryType.SENTENCE; 7 import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.SegmentationType.LINE_BREAK; 8 import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.SegmentationType.SENTENCE_BREAK; 9 import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; 10 11 import java.util.Arrays; 12 import java.util.Optional; 13 14 import org.junit.Test; 15 import org.junit.runner.RunWith; 16 import org.junit.runners.JUnit4; 17 import org.unicode.cldr.api.CldrData; 18 import org.unicode.cldr.api.CldrDataSupplier; 19 import org.unicode.cldr.api.CldrValue; 20 import org.unicode.icu.tool.cldrtoicu.IcuData; 21 import org.unicode.icu.tool.cldrtoicu.RbValue; 22 23 import com.google.common.base.Ascii; 24 import com.google.common.base.CaseFormat; 25 26 @RunWith(JUnit4.class) 27 public class BreakIteratorMapperTest { 28 enum SegmentationType { 29 GRAPHEME_CLUSTER_BREAK, LINE_BREAK, SENTENCE_BREAK, WORD_BREAK; 30 toString()31 @Override public String toString() { 32 return CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name()); 33 } 34 } 35 36 enum BoundaryType { 37 GRAPHEME, WORD, LINE, SENTENCE, TITLE; 38 39 // E.g. "icu:grapheme" toString()40 @Override public String toString() { 41 return "icu:" + Ascii.toLowerCase(name()); 42 } 43 } 44 45 @Test testSingleSuppression()46 public void testSingleSuppression() { 47 int idx = 0; 48 CldrData cldrData = cldrData( 49 suppression(SENTENCE_BREAK, "L.P.", ++idx), 50 suppression(SENTENCE_BREAK, "Alt.", ++idx), 51 suppression(SENTENCE_BREAK, "Approx.", ++idx)); 52 53 IcuData icuData = new IcuData("xx", true); 54 BreakIteratorMapper.process(icuData, cldrData, Optional.empty()); 55 56 assertThat(icuData).getPaths().hasSize(1); 57 assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array", 58 RbValue.of("L.P."), 59 RbValue.of("Alt."), 60 RbValue.of("Approx.")); 61 } 62 63 // In real data, suppression is only a SentenceBreak thing, but we might as well test it for 64 // other types. 65 @Test testMultipleSupressionTypes()66 public void testMultipleSupressionTypes() { 67 int idx = 0; 68 CldrData cldrData = cldrData( 69 suppression(SENTENCE_BREAK, "L.P.", ++idx), 70 suppression(SENTENCE_BREAK, "Alt.", ++idx), 71 suppression(SENTENCE_BREAK, "Approx.", ++idx), 72 suppression(LINE_BREAK, "Foo", ++idx), 73 suppression(LINE_BREAK, "Bar", ++idx), 74 suppression(LINE_BREAK, "Baz", ++idx)); 75 76 IcuData icuData = new IcuData("xx", true); 77 BreakIteratorMapper.process(icuData, cldrData, Optional.empty()); 78 79 assertThat(icuData).getPaths().hasSize(2); 80 assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array", 81 RbValue.of("L.P."), 82 RbValue.of("Alt."), 83 RbValue.of("Approx.")); 84 assertThat(icuData).hasValuesFor("/exceptions/LineBreak:array", 85 RbValue.of("Foo"), 86 RbValue.of("Bar"), 87 RbValue.of("Baz")); 88 } 89 90 @Test testSpecials_dictionary()91 public void testSpecials_dictionary() { 92 CldrData specials = cldrData( 93 dictionary("foo", "<foo deps>"), 94 dictionary("bar", "<bar deps>")); 95 96 IcuData icuData = new IcuData("xx", true); 97 BreakIteratorMapper.process(icuData, cldrData(), Optional.of(specials)); 98 99 assertThat(icuData).getPaths().hasSize(2); 100 assertThat(icuData).hasValuesFor("/dictionaries/foo:process(dependency)", "<foo deps>"); 101 assertThat(icuData).hasValuesFor("/dictionaries/bar:process(dependency)", "<bar deps>"); 102 } 103 104 @Test testSpecials_boundaries()105 public void testSpecials_boundaries() { 106 CldrData specials = cldrData( 107 boundaries(GRAPHEME, "<grapheme deps>", null), 108 boundaries(SENTENCE, "<sentence deps>", "altName")); 109 110 IcuData icuData = new IcuData("xx", true); 111 BreakIteratorMapper.process(icuData, cldrData(), Optional.of(specials)); 112 113 assertThat(icuData).getPaths().hasSize(2); 114 assertThat(icuData) 115 .hasValuesFor("/boundaries/grapheme:process(dependency)", "<grapheme deps>"); 116 assertThat(icuData) 117 .hasValuesFor("/boundaries/sentence_altName:process(dependency)", "<sentence deps>"); 118 } 119 cldrData(CldrValue... values)120 private static CldrData cldrData(CldrValue... values) { 121 return CldrDataSupplier.forValues(Arrays.asList(values)); 122 } 123 suppression(SegmentationType type, String value, int index)124 private static CldrValue suppression(SegmentationType type, String value, int index) { 125 StringBuilder cldrPath = new StringBuilder("//ldml/segmentations"); 126 appendAttribute(cldrPath.append("/segmentation"), "type", type); 127 cldrPath.append("/suppressions[@type=\"standard\"]"); 128 // Suppression is an ordered element, so needs a sort index. 129 cldrPath.append("/suppression#").append(index); 130 return CldrValue.parseValue(cldrPath.toString(), value); 131 } 132 dictionary(String type, String dependency)133 private static CldrValue dictionary(String type, String dependency) { 134 StringBuilder cldrPath = new StringBuilder("//ldml/special/icu:breakIteratorData"); 135 cldrPath.append("/icu:dictionaries/icu:dictionary"); 136 appendAttribute(cldrPath, "type", type); 137 appendAttribute(cldrPath, "icu:dependency", dependency); 138 return CldrValue.parseValue(cldrPath.toString(), ""); 139 } 140 boundaries(BoundaryType type, String dependency, String alt)141 private static CldrValue boundaries(BoundaryType type, String dependency, String alt) { 142 StringBuilder cldrPath = new StringBuilder("//ldml/special/icu:breakIteratorData"); 143 cldrPath.append("/icu:boundaries/").append(type); 144 appendAttribute(cldrPath, "icu:dependency", dependency); 145 if (alt != null) { 146 appendAttribute(cldrPath, "alt", alt); 147 } 148 return CldrValue.parseValue(cldrPath.toString(), ""); 149 } 150 appendAttribute(StringBuilder out, String k, Object v)151 private static void appendAttribute(StringBuilder out, String k, Object v) { 152 out.append(String.format("[@%s=\"%s\"]", k, v)); 153 } 154 }