1 package org.unicode.cldr.test; 2 3 import java.util.List; 4 import java.util.regex.Matcher; 5 import java.util.regex.Pattern; 6 7 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 8 import org.unicode.cldr.util.PatternCache; 9 10 import com.ibm.icu.text.UnicodeSet; 11 12 public class CheckQuotes extends CheckCLDR { 13 public static final String VALID_DELIMITER_URL = "https://cldr.unicode.org/translation/characters"; 14 15 private static final Pattern ASCII_QUOTES = PatternCache.get("[\'\"]"); 16 private static final Pattern UNITS = PatternCache.get("//ldml/units/.*"); 17 private static final Pattern DELIMITERS = PatternCache.get("//ldml/delimiters/.*"); 18 private static final UnicodeSet VALID_DELIMITERS = new UnicodeSet() 19 .add(0x2018, 0x201A) 20 .add(0x201C, 0x201E) 21 .add(0x300C, 0x300F) 22 .add(0x2039, 0x203A) 23 .add(0x00AB) 24 .add(0x00BB); 25 26 @Override handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)27 public CheckCLDR handleCheck(String path, String fullPath, String value, Options options, 28 List<CheckStatus> result) { 29 if (value == null) { 30 return this; 31 } 32 33 if (UNITS.matcher(path).matches()) { 34 Matcher matcher = ASCII_QUOTES.matcher(value); 35 CheckStatus.Type type = CheckStatus.warningType; 36 if (this.getCldrFileToCheck().getLocaleID().equals("en")) { 37 type = CheckStatus.errorType; 38 } 39 if (matcher.find()) { 40 result.add(new CheckStatus().setCause(this) 41 .setMainType(type) 42 .setSubtype(Subtype.asciiQuotesNotAllowed) 43 .setMessage("Use of ASCII quote marks (' \") is discouraged. Use primes for units (′ ″) and curly quotes for text (‘ ’ “ ” …)")); 44 } 45 } 46 if (DELIMITERS.matcher(path).matches()) { 47 if (!VALID_DELIMITERS.contains(value)) { 48 result.add(new CheckStatus().setCause(this) 49 .setMainType(CheckStatus.errorType) 50 .setSubtype(Subtype.invalidDelimiter) 51 .setMessage("Invalid delimiter. See " + VALID_DELIMITER_URL + " for a list of valid delimiters.")); 52 } 53 } 54 return this; 55 } 56 } 57