1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu.mapper; 4 5 import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; 6 7 import java.util.Arrays; 8 import java.util.Optional; 9 10 import org.junit.Test; 11 import org.junit.runner.RunWith; 12 import org.junit.runners.JUnit4; 13 import org.unicode.cldr.api.CldrData; 14 import org.unicode.cldr.api.CldrDataSupplier; 15 import org.unicode.cldr.api.CldrValue; 16 import org.unicode.icu.tool.cldrtoicu.IcuData; 17 18 import com.google.common.base.Joiner; 19 20 @RunWith(JUnit4.class) 21 public class CollationMapperTest { 22 private static final String CLDR_VERSION = "1.23.4"; 23 24 @Test testEmpty()25 public void testEmpty() { 26 IcuData icuData = new IcuData("xx", true); 27 CollationMapper.process(icuData, cldrData(), Optional.empty(), CLDR_VERSION); 28 29 assertThat(icuData).hasName("xx"); 30 assertThat(icuData).hasFallback(true); 31 assertThat(icuData).getPaths().isEmpty(); 32 33 // Root gets a couple of special paths added to it due to the need to work around a CLDR 34 // data bug. 35 IcuData rootData = new IcuData("root", true); 36 CollationMapper.process(rootData, cldrData(), Optional.empty(), CLDR_VERSION); 37 assertThat(rootData).hasName("root"); 38 assertThat(rootData).hasFallback(true); 39 assertThat(rootData).getPaths().hasSize(2); 40 assertThat(rootData).hasValuesFor("/collations/standard/Version", CLDR_VERSION); 41 assertThat(rootData).hasEmptyValue("/collations/standard/Sequence"); 42 } 43 44 @Test testDefault()45 public void testDefault() { 46 CldrData cldrData = 47 cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value")); 48 49 IcuData icuData = new IcuData("xx", true); 50 CollationMapper.process(icuData, cldrData, Optional.empty(), CLDR_VERSION); 51 assertThat(icuData).getPaths().hasSize(1); 52 assertThat(icuData).hasValuesFor("/collations/default", "any value"); 53 } 54 55 // This tests legacy behaviour which mimics the original converter code. There's no promise 56 // that it's semantically correct though. 57 @Test testLastAltRuleOverridesExisting()58 public void testLastAltRuleOverridesExisting() { 59 // Note that in DTD order (which is what the paths are processed in) the path with no "alt" 60 // attribute comes after everything else, but the first "alt" path is overwritten by the 61 // second. It's not even clear there should ever be two alt paths, or what the paths mean 62 // (the original code seems to suggest it's looking for the "short" alternate form, but 63 // the "alt" attribute can have more that the value "short"...) 64 CldrData cldrData = cldrData( 65 collationRule("foo", "alt1", "First alt rule"), 66 collationRule("foo", "alt2", "Second alt rule"), 67 collationRule("foo", null, "First rule")); 68 69 IcuData icuData = new IcuData("xx", true); 70 CollationMapper.process(icuData, cldrData, Optional.empty(), CLDR_VERSION); 71 assertThat(icuData).getPaths().hasSize(2); 72 assertThat(icuData).hasValuesFor("/collations/foo/Version", CLDR_VERSION); 73 assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule"); 74 } 75 76 @Test testCommentAndWhitespaceStripping()77 public void testCommentAndWhitespaceStripping() { 78 CldrData cldrData = cldrData( 79 collationRule("foo", null, 80 "# Comments are stripped", 81 "", 82 " # As are empty lines and leading/trailing spaces", 83 " Here is a value ", 84 "# And more comments to be stripped", 85 "And another value")); 86 87 IcuData icuData = new IcuData("xx", true); 88 CollationMapper.process(icuData, cldrData, Optional.empty(), CLDR_VERSION); 89 assertThat(icuData).hasValuesFor("/collations/foo/Sequence", 90 "Here is a value", 91 "And another value"); 92 } 93 94 // Just in case anything weird happens with non-BMP char sequences: 95 // <collation type='emoji'> 96 // <cr><![CDATA[ 97 // # START AUTOGENERATED EMOJI ORDER 98 // & [last primary ignorable]<<* 99 // & [before 1]\uFDD1€ 100 // <* 101 // <*☺ 102 // <* 103 // ... 104 @Test testEmoji()105 public void testEmoji() { 106 CldrData cldrData = cldrData( 107 collationRule("emoji", null, 108 " # START AUTOGENERATED EMOJI ORDER", 109 " & [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2" 110 + "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF", 111 " & [before 1]\uFDD1€", 112 " <*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05" 113 + "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A" 114 + "\uD83D\uDE07", 115 " <*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17☺\uD83D\uDE1A" 116 + "\uD83D\uDE19", 117 " <*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11")); 118 119 IcuData icuData = new IcuData("xx", true); 120 CollationMapper.process(icuData, cldrData, Optional.empty(), CLDR_VERSION); 121 122 assertThat(icuData).getPaths().hasSize(2); 123 assertThat(icuData).hasValuesFor("/collations/emoji/Version", CLDR_VERSION); 124 assertThat(icuData).hasValuesFor("/collations/emoji/Sequence", 125 "& [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2" 126 + "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF", 127 "& [before 1]\uFDD1€", 128 "<*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05" 129 + "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A" 130 + "\uD83D\uDE07", 131 "<*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17☺\uD83D\uDE1A" 132 + "\uD83D\uDE19", 133 "<*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11"); 134 } 135 136 @Test testSpecials()137 public void testSpecials() { 138 CldrData specials = cldrData( 139 CldrValue.parseValue("//ldml/special/icu:UCARules[@icu:uca_rules=\"special rule\"]", ""), 140 CldrValue.parseValue("//ldml/special/icu:depends[@icu:dependency=\"special deps\"]", "")); 141 142 IcuData icuData = new IcuData("xx", true); 143 CollationMapper.process(icuData, cldrData(), Optional.of(specials), CLDR_VERSION); 144 assertThat(icuData).getPaths().hasSize(2); 145 assertThat(icuData).hasValuesFor("UCARules:process(uca_rules)", "special rule"); 146 assertThat(icuData).hasValuesFor("depends:process(dependency)", "special deps"); 147 } 148 cldrData(CldrValue... values)149 private static CldrData cldrData(CldrValue... values) { 150 return CldrDataSupplier.forValues(Arrays.asList(values)); 151 } 152 collationRule(String type, String alt, String... lines)153 private static CldrValue collationRule(String type, String alt, String... lines) { 154 StringBuilder cldrPath = new StringBuilder("//ldml/collations"); 155 appendAttribute(cldrPath.append("/collation"), "type", type); 156 cldrPath.append("/cr"); 157 if (alt != null) { 158 appendAttribute(cldrPath, "alt", alt); 159 } 160 return CldrValue.parseValue(cldrPath.toString(), Joiner.on('\n').join(lines)); 161 } 162 appendAttribute(StringBuilder out, String k, Object v)163 private static void appendAttribute(StringBuilder out, String k, Object v) { 164 out.append(String.format("[@%s=\"%s\"]", k, v)); 165 } 166 }