1#!/bin/bash 2# Copyright (c) 2014 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6set -e 7 8# Remove entries currently not used in Chromium/V8. 9function filter_locale_data { 10 echo Removing unncessary categories in ${localedatapath} 11 for langpath in ${localedatapath}/*.txt 12 do 13 echo Overwriting ${langpath} ... 14 sed -r -i \ 15 '/^ characterLabel\{$/,/^ \}$/d 16 /^ AuxExemplarCharacters\{.*\}$/d 17 /^ AuxExemplarCharacters\{$/, /^ \}$/d 18 /^ ExemplarCharacters\{.*\}$/d 19 /^ ExemplarCharacters\{$/, /^ \}$/d 20 /^ ExemplarCharactersNumbers\{.*\}$/d 21 /^ ExemplarCharactersPunctuation\{.*\}$/d 22 /^ ExemplarCharactersPunctuation\{$/, /^ \}$/d 23 /^ (mon|tue|wed|thu|fri|sat|sun)(|-short|-narrow)\{$/, /^ \}$/d 24 /^ (mon|tue|wed|thu|fri|sat|sun)(|-short|-narrow)\{.*\}$/d 25 /^ (mon|tue|wed|thu|fri|sat|sun)-(short|narrow):alias\{.*\}$/d' ${langpath} 26 # Delete empty blocks. Otherwise, locale fallback fails. 27 # See crbug.com/v8/8414 . 28 sed -r -i \ 29 '/^ fields\{$/ { 30 N 31 /^ fields\{\n \}/ d 32 }' "${langpath}" 33 done 34} 35 36# Remove display names for languages that are not listed in the accept-language 37# list of Chromium. 38function filter_display_language_names { 39 for lang in $(grep -v '^#' "${scriptdir}/accept_lang.list") 40 do 41 # Set $OP to '|' only if $ACCEPT_LANG_PATTERN is not empty. 42 OP=${ACCEPT_LANG_PATTERN:+|} 43 ACCEPT_LANG_PATTERN="${ACCEPT_LANG_PATTERN}${OP}${lang}" 44 done 45 ACCEPT_LANG_PATTERN="(${ACCEPT_LANG_PATTERN})[^a-z]" 46 47 echo "Filtering out display names for non-A-L languages in ${langdatapath}" 48 for langpath in ${langdatapath}/*.txt 49 do 50 target=${langpath} 51 echo Overwriting ${target} ... 52 sed -r -i \ 53 '/^ Keys\{$/,/^ \}$/d 54 /^ Languages\{$/, /^ \}$/ { 55 /^ Languages\{$/p 56 /^ '${ACCEPT_LANG_PATTERN}'/p 57 /^ \}$/p 58 d 59 } 60 /^ Types\{$/,/^ \}$/d 61 /^ Types%short\{$/,/^ \}$/d 62 /^ characterLabelPattern\{$/,/^ \}$/d 63 /^ Variants\{$/,/^ \}$/d' ${target} 64 65 # Delete an empty "Languages" block. Otherwise, getting the display 66 # name for all the language in a given locale (e.g. en_GB) would fail 67 # when the above filtering sed command results in an empty "Languages" 68 # block. 69 sed -r -i \ 70 '/^ Languages\{$/ { 71 N 72 /^ Languages\{\n \}/ d 73 }' ${target} 74 done 75} 76 77 78# Keep only the minimum locale data for non-UI languages. 79function abridge_locale_data_for_non_ui_languages { 80 for lang in $(grep -v '^#' "${scriptdir}/chrome_ui_languages.list") 81 do 82 # Set $OP to '|' only if $UI_LANGUAGES is not empty. 83 OP=${UI_LANGUAGES:+|} 84 UI_LANGUAGES="${UI_LANGUAGES}${OP}${lang}" 85 done 86 87 EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" \ 88 "${scriptdir}/accept_lang.list") 89 90 echo Creating minimum locale data in ${localedatapath} 91 for lang in ${EXTRA_LANGUAGES} 92 do 93 target=${localedatapath}/${lang}.txt 94 [ -e ${target} ] || { echo "missing ${lang}"; continue; } 95 echo Overwriting ${target} ... 96 97 # Do not include '%%Parent' line on purpose. 98 sed -n -r -i \ 99 '1, /^'${lang}'\{$/p 100 /^ "%%ALIAS"\{/p 101 /^ (LocaleScript|layout)\{$/, /^ \}$/p 102 /^ Version\{.*$/p 103 /^\}$/p' ${target} 104 done 105 106 echo Creating minimum locale data in ${langdatapath} 107 for lang in ${EXTRA_LANGUAGES} 108 do 109 target=${langdatapath}/${lang}.txt 110 [ -e ${target} ] || { echo "missing ${lang}"; continue; } 111 echo Overwriting ${target} ... 112 113 # Do not include '%%Parent' line on purpose. 114 sed -n -r -i \ 115 '1, /^'${lang}'\{$/p 116 /^ "%%ALIAS"\{/p 117 /^ Languages\{$/, /^ \}$/ { 118 /^ Languages\{$/p 119 /^ '${lang}'\{.*\}$/p 120 /^ \}$/p 121 } 122 /^\}$/p' ${target} 123 done 124} 125 126# Keep only the currencies used by the larget 150 economies in terms of GDP. 127# TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies. 128# See also http://en.wikipedia.org/wiki/List_of_circulating_currencies 129function filter_currency_data { 130 unset KEEPLIST 131 for currency in $(grep -v '^#' "${scriptdir}/currencies.list") 132 do 133 OP=${KEEPLIST:+|} 134 KEEPLIST=${KEEPLIST}${OP}${currency} 135 done 136 KEEPLIST="(${KEEPLIST})" 137 138 for i in ${dataroot}/curr/*.txt 139 do 140 locale=$(basename $i .txt) 141 [ $locale == 'supplementalData' ] && continue; 142 echo "Overwriting $i for $locale" 143 sed -n -r -i \ 144 '1, /^'${locale}'\{$/ p 145 /^ "%%ALIAS"\{/ p 146 /^ ___\{..\}$/ p 147 /^ %%Parent\{/ p 148 /^ Currencies\{$/, /^ \}$/ { 149 /^ Currencies\{$/ p 150 /^ '$KEEPLIST'\{$/, /^ \}$/ p 151 /^ \}$/ p 152 } 153 /^ Currencies%narrow\{$/, /^ \}$/ { 154 /^ Currencies%narrow\{$/ p 155 /^ '$KEEPLIST'\{".*\}$/ p 156 /^ \}$/ p 157 } 158 /^ CurrencyPlurals\{$/, /^ \}$/ { 159 /^ CurrencyPlurals\{$/ p 160 /^ '$KEEPLIST'\{$/, /^ \}$/ p 161 /^ \}$/ p 162 } 163 /^ [cC]urrency(Map|Meta|Spacing|UnitPatterns)\{$/, /^ \}$/ p 164 /^ Version\{.*\}$/p 165 /^\}$/p' "${i}" 166 167 # Delete empty blocks. Otherwise, locale fallback fails. 168 # See crbug.com/791318. 169 sed -r -i \ 170 '/^ Currenc(ie.*|yPlurals)\{$/ { 171 N 172 /^ Currenc(ie.*|yPlurals)\{\n \}/ d 173 }' "${i}" 174 done 175} 176 177# Remove the display names for numeric region codes other than 178# 419 (Latin America) because we don't use them. 179function filter_region_data { 180 sed -i '/[0-35-9][0-9][0-9]{/ d' ${dataroot}/region/*.txt 181} 182 183# This assumes that exemplar city ("ec") is only present in 184# non-meta zones and that meta zones are listed after non-meta 185# zones. 186function remove_exemplar_cities { 187 for i in ${dataroot}/zone/*.txt 188 do 189 [ $i != "${dataroot}/zone/root.txt" ] && \ 190 sed -i '/^ zoneStrings/, /^ "meta:/ { 191 /^ zoneStrings/ p 192 /^ "meta:/ p 193 d 194 }' $i 195 done 196} 197 198# Keep only duration and compound in units* sections. 199function filter_unit_data { 200 for i in ${dataroot}/unit/*.txt 201 do 202 echo Overwriting $i ... 203 sed -r -i \ 204 '/^ units(|Narrow|Short)\{$/, /^ \}$/ { 205 /^ units(|Narrow|Short)\{$/ p 206 /^ (duration|compound)\{$/, /^ \}$/ p 207 /^ \}$/ p 208 d 209 }' ${i} 210 211 # Delete empty units,units{Narrow|Short} block. Otherwise, locale fallback 212 # fails. See crbug.com/707515. 213 sed -r -i \ 214 '/^ units(|Narrow|Short)\{$/ { 215 N 216 /^ units(|Narrow|Short)\{\n \}/ d 217 }' ${i} 218 done 219} 220 221# big5han and gb2312han collation do not make any sense and nobody uses them. 222function remove_legacy_chinese_codepoint_collation { 223 echo "Removing Big5 / GB2312 / UniHan collation data from Chinese locale" 224 target="${dataroot}/coll/zh.txt" 225 echo "Overwriting ${target}" 226 sed -r -i '/^ (uni|big5|gb2312)han\{$/,/^ \}$/ d' ${target} 227} 228 229treeroot="$(dirname "$0")/.." 230dataroot="${treeroot}/source/data" 231scriptdir="${treeroot}/scripts" 232localedatapath="${dataroot}/locales" 233langdatapath="${dataroot}/lang" 234 235filter_locale_data 236filter_display_language_names 237abridge_locale_data_for_non_ui_languages 238filter_currency_data 239filter_region_data 240remove_legacy_chinese_codepoint_collation 241filter_unit_data 242 243# Chromium OS needs exemplar cities for timezones, but not Chromium. 244# It'll save 400kB (uncompressed), but the size difference in 245# 7z compressed installer is <= 100kB. 246# TODO(jshin): Make separate data files for CrOS and Chromium. 247#remove_exemplar_cities 248