• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/bin/bash
2# Copyright (c) 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6set -e
7
8# Remove entries currently not used in Chromium/V8.
9function filter_locale_data {
10  echo Removing unncessary categories in ${localedatapath}
11  for langpath in ${localedatapath}/*.txt
12  do
13    echo Overwriting ${langpath} ...
14    sed -r -i \
15      '/^    characterLabel\{$/,/^    \}$/d
16       /^    AuxExemplarCharacters\{.*\}$/d
17       /^    AuxExemplarCharacters\{$/, /^    \}$/d
18       /^    ExemplarCharacters\{.*\}$/d
19       /^    ExemplarCharacters\{$/, /^    \}$/d
20       /^    ExemplarCharactersNumbers\{.*\}$/d
21       /^    ExemplarCharactersPunctuation\{.*\}$/d
22       /^    ExemplarCharactersPunctuation\{$/, /^    \}$/d
23       /^        (mon|tue|wed|thu|fri|sat|sun)(|-short|-narrow)\{$/, /^        \}$/d
24       /^        (mon|tue|wed|thu|fri|sat|sun)(|-short|-narrow)\{.*\}$/d
25       /^        (mon|tue|wed|thu|fri|sat|sun)-(short|narrow):alias\{.*\}$/d' ${langpath}
26    # Delete empty blocks. Otherwise, locale fallback fails.
27    # See crbug.com/v8/8414 .
28    sed -r -i \
29      '/^    fields\{$/ {
30         N
31         /^    fields\{\n    \}/ d
32      }' "${langpath}"
33  done
34}
35
36# Remove display names for languages that are not listed in the accept-language
37# list of Chromium.
38function filter_display_language_names {
39  for lang in $(grep -v '^#' "${scriptdir}/accept_lang.list")
40  do
41    # Set $OP to '|' only if $ACCEPT_LANG_PATTERN is not empty.
42    OP=${ACCEPT_LANG_PATTERN:+|}
43    ACCEPT_LANG_PATTERN="${ACCEPT_LANG_PATTERN}${OP}${lang}"
44  done
45  ACCEPT_LANG_PATTERN="(${ACCEPT_LANG_PATTERN})[^a-z]"
46
47  echo "Filtering out display names for non-A-L languages in ${langdatapath}"
48  for langpath in ${langdatapath}/*.txt
49  do
50    target=${langpath}
51    echo Overwriting ${target} ...
52    sed -r -i \
53    '/^    Keys\{$/,/^    \}$/d
54     /^    Languages\{$/, /^    \}$/ {
55       /^    Languages\{$/p
56       /^        '${ACCEPT_LANG_PATTERN}'/p
57       /^    \}$/p
58       d
59     }
60     /^    Types\{$/,/^    \}$/d
61     /^    Types%short\{$/,/^    \}$/d
62     /^    characterLabelPattern\{$/,/^    \}$/d
63     /^    Variants\{$/,/^    \}$/d' ${target}
64
65    # Delete an empty "Languages" block. Otherwise, getting the display
66    # name for all the language in a given locale (e.g. en_GB) would fail
67    # when the above filtering sed command results in an empty "Languages"
68    # block.
69    sed -r -i \
70    '/^    Languages\{$/ {
71       N
72       /^    Languages\{\n    \}/ d
73    }' ${target}
74  done
75}
76
77
78# Keep only the minimum locale data for non-UI languages.
79function abridge_locale_data_for_non_ui_languages {
80  for lang in $(grep -v '^#' "${scriptdir}/chrome_ui_languages.list")
81  do
82    # Set $OP to '|' only if $UI_LANGUAGES is not empty.
83    OP=${UI_LANGUAGES:+|}
84    UI_LANGUAGES="${UI_LANGUAGES}${OP}${lang}"
85  done
86
87  EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" \
88                    "${scriptdir}/accept_lang.list")
89
90  echo Creating minimum locale data in ${localedatapath}
91  for lang in ${EXTRA_LANGUAGES}
92  do
93    target=${localedatapath}/${lang}.txt
94    [  -e ${target} ] || { echo "missing ${lang}"; continue; }
95    echo Overwriting ${target} ...
96
97    # Do not include '%%Parent' line on purpose.
98    sed -n -r -i \
99      '1, /^'${lang}'\{$/p
100       /^    "%%ALIAS"\{/p
101       /^    (LocaleScript|layout)\{$/, /^    \}$/p
102       /^    Version\{.*$/p
103       /^\}$/p' ${target}
104  done
105
106  echo Creating minimum locale data in ${langdatapath}
107  for lang in ${EXTRA_LANGUAGES}
108  do
109    target=${langdatapath}/${lang}.txt
110    [  -e ${target} ] || { echo "missing ${lang}"; continue; }
111    echo Overwriting ${target} ...
112
113    # Do not include '%%Parent' line on purpose.
114    sed -n -r -i \
115      '1, /^'${lang}'\{$/p
116       /^    "%%ALIAS"\{/p
117       /^    Languages\{$/, /^    \}$/ {
118         /^    Languages\{$/p
119         /^        '${lang}'\{.*\}$/p
120         /^    \}$/p
121       }
122       /^\}$/p' ${target}
123  done
124}
125
126# Keep only the currencies used by the larget 150 economies in terms of GDP.
127# TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies.
128# See also http://en.wikipedia.org/wiki/List_of_circulating_currencies
129function filter_currency_data {
130  unset KEEPLIST
131  for currency in $(grep -v '^#' "${scriptdir}/currencies.list")
132  do
133    OP=${KEEPLIST:+|}
134    KEEPLIST=${KEEPLIST}${OP}${currency}
135  done
136  KEEPLIST="(${KEEPLIST})"
137
138  for i in ${dataroot}/curr/*.txt
139  do
140    locale=$(basename $i .txt)
141    [ $locale == 'supplementalData' ] && continue;
142    echo "Overwriting $i for $locale"
143    sed -n -r -i \
144      '1, /^'${locale}'\{$/ p
145       /^    "%%ALIAS"\{/ p
146       /^    ___\{..\}$/ p
147       /^    %%Parent\{/ p
148       /^    Currencies\{$/, /^    \}$/ {
149         /^    Currencies\{$/ p
150         /^        '$KEEPLIST'\{$/, /^        \}$/ p
151         /^    \}$/ p
152       }
153       /^    Currencies%narrow\{$/, /^    \}$/ {
154         /^    Currencies%narrow\{$/ p
155         /^        '$KEEPLIST'\{".*\}$/ p
156         /^    \}$/ p
157       }
158       /^    CurrencyPlurals\{$/, /^    \}$/ {
159         /^    CurrencyPlurals\{$/ p
160         /^        '$KEEPLIST'\{$/, /^        \}$/ p
161         /^    \}$/ p
162       }
163       /^    [cC]urrency(Map|Meta|Spacing|UnitPatterns)\{$/, /^    \}$/ p
164       /^    Version\{.*\}$/p
165       /^\}$/p' "${i}"
166
167    # Delete empty blocks. Otherwise, locale fallback fails.
168    # See crbug.com/791318.
169    sed -r -i \
170      '/^    Currenc(ie.*|yPlurals)\{$/ {
171         N
172         /^    Currenc(ie.*|yPlurals)\{\n    \}/ d
173      }' "${i}"
174  done
175}
176
177# Remove the display names for numeric region codes other than
178# 419 (Latin America) because we don't use them.
179function filter_region_data {
180  sed -i  '/[0-35-9][0-9][0-9]{/ d' ${dataroot}/region/*.txt
181}
182
183# This assumes that exemplar city ("ec") is only present in
184# non-meta zones and that meta zones are listed after non-meta
185# zones.
186function remove_exemplar_cities {
187  for i in ${dataroot}/zone/*.txt
188  do
189    [ $i != "${dataroot}/zone/root.txt" ] && \
190    sed -i '/^    zoneStrings/, /^        "meta:/ {
191      /^    zoneStrings/ p
192      /^        "meta:/ p
193      d
194    }' $i
195  done
196}
197
198# Keep only duration and compound in units* sections.
199function filter_unit_data {
200  for i in ${dataroot}/unit/*.txt
201  do
202    echo Overwriting $i ...
203    sed -r -i \
204      '/^    units(|Narrow|Short)\{$/, /^    \}$/ {
205         /^    units(|Narrow|Short)\{$/ p
206         /^        (duration|compound)\{$/, /^        \}$/ p
207         /^    \}$/ p
208         d
209       }' ${i}
210
211    # Delete empty units,units{Narrow|Short} block. Otherwise, locale fallback
212    # fails. See crbug.com/707515.
213    sed -r -i \
214      '/^    units(|Narrow|Short)\{$/ {
215         N
216         /^    units(|Narrow|Short)\{\n    \}/ d
217      }' ${i}
218  done
219}
220
221# big5han and gb2312han collation do not make any sense and nobody uses them.
222function remove_legacy_chinese_codepoint_collation {
223  echo "Removing Big5 / GB2312 / UniHan collation data from Chinese locale"
224  target="${dataroot}/coll/zh.txt"
225  echo "Overwriting ${target}"
226  sed -r -i '/^        (uni|big5|gb2312)han\{$/,/^        \}$/ d' ${target}
227}
228
229treeroot="$(dirname "$0")/.."
230dataroot="${treeroot}/source/data"
231scriptdir="${treeroot}/scripts"
232localedatapath="${dataroot}/locales"
233langdatapath="${dataroot}/lang"
234
235filter_locale_data
236filter_display_language_names
237abridge_locale_data_for_non_ui_languages
238filter_currency_data
239filter_region_data
240remove_legacy_chinese_codepoint_collation
241filter_unit_data
242
243# Chromium OS needs exemplar cities for timezones, but not Chromium.
244# It'll save 400kB (uncompressed), but the size difference in
245# 7z compressed installer is <= 100kB.
246# TODO(jshin): Make separate data files for CrOS and Chromium.
247#remove_exemplar_cities
248