1 /* 2 * Copyright (C) 2017 The Libphonenumber Authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.google.i18n.phonenumbers.metadata.i18n; 17 18 import static com.google.common.base.Preconditions.checkArgument; 19 20 import com.google.auto.value.AutoValue; 21 import java.util.regex.Pattern; 22 23 /** 24 * A simple type-safe identifier for BCP 47 language tags containing only language code and an 25 * optional script (e.g. "en" or "zh-Hant"). This class does no canonicalization on the values its 26 * given, apart from normalizing the separator to a hyphen. 27 * 28 * <p>We can't really use {@code Locale} here because there's an issue whereby the JDK deliberately 29 * uses deprecated language tags and would, for example, convert "id" (Indonesian) to "in", which 30 * is at odds with BCP 47. See {@link java.util.Locale#forLanguageTag(String) forLanguageTag()} for 31 * more information. 32 * 33 * <p>The metadata tooling makes only minimal use of the semantics of language codes, relying on 34 * them mainly as key values, and never tries to canonicalize or modify them (i.e. it is possible 35 * that a language code used for this data may end up being non-canonical). It is up to any library 36 * which loads the metadata at runtime to ensure that its mappings to the data account for current 37 * canonicalization. 38 */ 39 @AutoValue 40 public abstract class SimpleLanguageTag { 41 // This can be extended or modified to use Locale as necessary. 42 private static final Pattern SIMPLE_TAG = Pattern.compile("[a-z]{2,3}(?:[-_][A-Z][a-z]{3})?"); 43 44 /** 45 * Returns a language tag instance for the given string with minimal structural checking. If the 46 * given tag uses {@code '_'} for separating language and script it's converted into {@code '-'}. 47 */ of(String lang)48 public static SimpleLanguageTag of(String lang) { 49 checkArgument(SIMPLE_TAG.matcher(lang).matches(), "invalid language tag: %s", lang); 50 return new AutoValue_SimpleLanguageTag(lang.replace('_', '-')); 51 } 52 53 // Visible for AutoValue only. lang()54 abstract String lang(); 55 56 @Override toString()57 public final String toString() { 58 return lang(); 59 } 60 } 61