• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu.mapper;
4 
5 import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.BoundaryType.GRAPHEME;
6 import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.BoundaryType.SENTENCE;
7 import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.SegmentationType.LINE_BREAK;
8 import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.SegmentationType.SENTENCE_BREAK;
9 import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
10 
11 import java.util.Arrays;
12 import java.util.Optional;
13 
14 import org.junit.Test;
15 import org.junit.runner.RunWith;
16 import org.junit.runners.JUnit4;
17 import org.unicode.cldr.api.CldrData;
18 import org.unicode.cldr.api.CldrDataSupplier;
19 import org.unicode.cldr.api.CldrValue;
20 import org.unicode.icu.tool.cldrtoicu.IcuData;
21 import org.unicode.icu.tool.cldrtoicu.RbValue;
22 
23 import com.google.common.base.Ascii;
24 import com.google.common.base.CaseFormat;
25 
26 @RunWith(JUnit4.class)
27 public class BreakIteratorMapperTest {
28     enum SegmentationType {
29         GRAPHEME_CLUSTER_BREAK, LINE_BREAK, SENTENCE_BREAK, WORD_BREAK;
30 
toString()31         @Override public String toString() {
32             return CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name());
33         }
34     }
35 
36     enum BoundaryType {
37         GRAPHEME, WORD, LINE, SENTENCE, TITLE;
38 
39         // E.g. "icu:grapheme"
toString()40         @Override public String toString() {
41             return "icu:" + Ascii.toLowerCase(name());
42         }
43     }
44 
45     @Test
testSingleSuppression()46     public void testSingleSuppression() {
47         int idx = 0;
48         CldrData cldrData = cldrData(
49             suppression(SENTENCE_BREAK, "L.P.", ++idx),
50             suppression(SENTENCE_BREAK, "Alt.", ++idx),
51             suppression(SENTENCE_BREAK, "Approx.", ++idx));
52 
53         IcuData icuData = new IcuData("xx", true);
54         BreakIteratorMapper.process(icuData, cldrData, Optional.empty());
55 
56         assertThat(icuData).getPaths().hasSize(1);
57         assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array",
58             RbValue.of("L.P."),
59             RbValue.of("Alt."),
60             RbValue.of("Approx."));
61     }
62 
63     // In real data, suppression is only a SentenceBreak thing, but we might as well test it for
64     // other types.
65     @Test
testMultipleSupressionTypes()66     public void testMultipleSupressionTypes() {
67         int idx = 0;
68         CldrData cldrData = cldrData(
69             suppression(SENTENCE_BREAK, "L.P.", ++idx),
70             suppression(SENTENCE_BREAK, "Alt.", ++idx),
71             suppression(SENTENCE_BREAK, "Approx.", ++idx),
72             suppression(LINE_BREAK, "Foo", ++idx),
73             suppression(LINE_BREAK, "Bar", ++idx),
74             suppression(LINE_BREAK, "Baz", ++idx));
75 
76         IcuData icuData = new IcuData("xx", true);
77         BreakIteratorMapper.process(icuData, cldrData, Optional.empty());
78 
79         assertThat(icuData).getPaths().hasSize(2);
80         assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array",
81             RbValue.of("L.P."),
82             RbValue.of("Alt."),
83             RbValue.of("Approx."));
84         assertThat(icuData).hasValuesFor("/exceptions/LineBreak:array",
85             RbValue.of("Foo"),
86             RbValue.of("Bar"),
87             RbValue.of("Baz"));
88     }
89 
90     @Test
testSpecials_dictionary()91     public void testSpecials_dictionary() {
92         CldrData specials = cldrData(
93             dictionary("foo", "<foo deps>"),
94             dictionary("bar", "<bar deps>"));
95 
96         IcuData icuData = new IcuData("xx", true);
97         BreakIteratorMapper.process(icuData, cldrData(), Optional.of(specials));
98 
99         assertThat(icuData).getPaths().hasSize(2);
100         assertThat(icuData).hasValuesFor("/dictionaries/foo:process(dependency)", "<foo deps>");
101         assertThat(icuData).hasValuesFor("/dictionaries/bar:process(dependency)", "<bar deps>");
102     }
103 
104     @Test
testSpecials_boundaries()105     public void testSpecials_boundaries() {
106         CldrData specials = cldrData(
107             boundaries(GRAPHEME, "<grapheme deps>", null),
108             boundaries(SENTENCE, "<sentence deps>", "altName"));
109 
110         IcuData icuData = new IcuData("xx", true);
111         BreakIteratorMapper.process(icuData, cldrData(), Optional.of(specials));
112 
113         assertThat(icuData).getPaths().hasSize(2);
114         assertThat(icuData)
115             .hasValuesFor("/boundaries/grapheme:process(dependency)", "<grapheme deps>");
116         assertThat(icuData)
117             .hasValuesFor("/boundaries/sentence_altName:process(dependency)", "<sentence deps>");
118     }
119 
cldrData(CldrValue... values)120     private static CldrData cldrData(CldrValue... values) {
121         return CldrDataSupplier.forValues(Arrays.asList(values));
122     }
123 
suppression(SegmentationType type, String value, int index)124     private static CldrValue suppression(SegmentationType type, String value, int index) {
125         StringBuilder cldrPath = new StringBuilder("//ldml/segmentations");
126         appendAttribute(cldrPath.append("/segmentation"), "type", type);
127         cldrPath.append("/suppressions[@type=\"standard\"]");
128         // Suppression is an ordered element, so needs a sort index.
129         cldrPath.append("/suppression#").append(index);
130         return CldrValue.parseValue(cldrPath.toString(), value);
131     }
132 
dictionary(String type, String dependency)133     private static CldrValue dictionary(String type, String dependency) {
134         StringBuilder cldrPath = new StringBuilder("//ldml/special/icu:breakIteratorData");
135         cldrPath.append("/icu:dictionaries/icu:dictionary");
136         appendAttribute(cldrPath, "type", type);
137         appendAttribute(cldrPath, "icu:dependency", dependency);
138         return CldrValue.parseValue(cldrPath.toString(), "");
139     }
140 
boundaries(BoundaryType type, String dependency, String alt)141     private static CldrValue boundaries(BoundaryType type, String dependency, String alt) {
142         StringBuilder cldrPath = new StringBuilder("//ldml/special/icu:breakIteratorData");
143         cldrPath.append("/icu:boundaries/").append(type);
144         appendAttribute(cldrPath, "icu:dependency", dependency);
145         if (alt != null) {
146             appendAttribute(cldrPath, "alt", alt);
147         }
148         return CldrValue.parseValue(cldrPath.toString(), "");
149     }
150 
appendAttribute(StringBuilder out, String k, Object v)151     private static void appendAttribute(StringBuilder out, String k, Object v) {
152         out.append(String.format("[@%s=\"%s\"]", k, v));
153     }
154 }