• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2017 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Utility methods associated with ICU source and builds."""
16
17from __future__ import print_function
18
19import filecmp
20import glob
21import os
22import pathlib
23import shutil
24import subprocess
25import sys
26
27import i18nutil
28import ziputil
29
30
31# See https://github.com/unicode-org/icu/blob/main/docs/userguide/icu_data/buildtool.md
32# for the documentation.
33ICU_DATA_FILTERS = """{
34  "featureFilters": {
35    "misc": {
36      "excludelist": [
37        "metaZones",
38        "timezoneTypes",
39        "windowsZones",
40        "zoneinfo64"
41      ]
42    },
43    "brkitr_adaboost": {
44      "includelist": [
45        "jaml"
46      ]
47    }
48  }
49}
50"""
51
52ICU_MLDATA_FILTERS = """{
53  "featureFilters": {
54    "brkitr_adaboost": {
55      "includelist": [
56        "jaml"
57      ]
58    }
59  }
60}
61"""
62
63
64def cldrDir():
65  """Returns the location of CLDR in the Android source tree."""
66  android_build_top = i18nutil.GetAndroidRootOrDie()
67  cldr_dir = os.path.realpath('%s/external/cldr' % android_build_top)
68  i18nutil.CheckDirExists(cldr_dir, 'external/cldr')
69  return cldr_dir
70
71
72def icuDir():
73  """Returns the location of ICU in the Android source tree."""
74  android_build_top = i18nutil.GetAndroidRootOrDie()
75  icu_dir = os.path.realpath('%s/external/icu' % android_build_top)
76  i18nutil.CheckDirExists(icu_dir, 'external/icu')
77  return icu_dir
78
79
80def icu4cDir():
81  """Returns the location of ICU4C in the Android source tree."""
82  icu4c_dir = os.path.realpath('%s/icu4c/source' % icuDir())
83  i18nutil.CheckDirExists(icu4c_dir, 'external/icu/icu4c/source')
84  return icu4c_dir
85
86
87def icu4jDir():
88  """Returns the location of ICU4J in the Android source tree."""
89  icu4j_dir = os.path.realpath('%s/icu4j' % icuDir())
90  i18nutil.CheckDirExists(icu4j_dir, 'external/icu/icu4j')
91  return icu4j_dir
92
93
94def datFile(icu_build_dir):
95  """Returns the location of the ICU .dat file in the specified ICU build dir."""
96  dat_file_pattern = '%s/data/out/tmp/icudt??l.dat' % icu_build_dir
97  dat_files = glob.glob(dat_file_pattern)
98  if len(dat_files) != 1:
99    print('ERROR: Unexpectedly found %d .dat files (%s). Halting.' % (len(datfiles), datfiles))
100    sys.exit(1)
101  dat_file = dat_files[0]
102  return dat_file
103
104
105def PrepareIcuBuild(icu_build_dir, data_filters_json=None):
106  """Sets up an ICU build in the specified directory.
107
108  Creates the directory and runs "runConfigureICU Linux"
109  """
110  # Keep track of the original cwd so we can go back to it at the end.
111  original_working_dir = os.getcwd()
112
113  # Create a directory to run 'make' from.
114  if not os.path.exists(icu_build_dir):
115    os.mkdir(icu_build_dir)
116  os.chdir(icu_build_dir)
117
118  # Build the ICU tools.
119  print('Configuring ICU tools...')
120  cmd = ['env']
121  if data_filters_json is not None:
122    json_file_path = os.path.join(icu_build_dir, "icu4c_data_filters.json")
123    print("json path: %s" % json_file_path)
124    writeFileContent(json_file_path, data_filters_json)
125    cmd.append('ICU_DATA_FILTER_FILE=%s' % json_file_path)
126
127  cmd += ['ICU_DATA_BUILDTOOL_OPTS=--include_uni_core_data',
128          '%s/runConfigureICU' % icu4cDir(),
129          'Linux']
130  subprocess.check_call(cmd)
131
132  os.chdir(original_working_dir)
133
134def writeFileContent(file_path, file_content):
135  """Write a string into the file"""
136  with open(file_path, "w") as file:
137    file.write(file_content)
138
139def MakeTzDataFiles(icu_build_dir, iana_tar_file):
140  """Builds and runs the ICU tools in ${icu_Build_dir}/tools/tzcode.
141
142  The tools are run against the specified IANA tzdata .tar.gz.
143  The resulting zoneinfo64.txt is copied into the src directories.
144  """
145  tzcode_working_dir = '%s/tools/tzcode' % icu_build_dir
146
147  # Fix missing files.
148  # The tz2icu tool only picks up icuregions and icuzones if they are in the CWD
149  for icu_data_file in [ 'icuregions', 'icuzones']:
150    icu_data_file_source = '%s/tools/tzcode/%s' % (icu4cDir(), icu_data_file)
151    icu_data_file_symlink = '%s/%s' % (tzcode_working_dir, icu_data_file)
152    os.symlink(icu_data_file_source, icu_data_file_symlink)
153
154  iana_tar_filename = os.path.basename(iana_tar_file)
155  working_iana_tar_file = '%s/%s' % (tzcode_working_dir, iana_tar_filename)
156  shutil.copyfile(iana_tar_file, working_iana_tar_file)
157
158  print('Making ICU tz data files...')
159  # The Makefile assumes the existence of the bin directory.
160  os.mkdir('%s/bin' % icu_build_dir)
161
162  # -j1 is needed because the build is not parallelizable. http://b/109641429
163  subprocess.check_call(['make', '-j1', '-C', tzcode_working_dir])
164
165  # Copy the source file to its ultimate destination.
166  zoneinfo_file = '%s/zoneinfo64.txt' % tzcode_working_dir
167  icu_txt_data_dir = '%s/data/misc' % icu4cDir()
168  print('Copying zoneinfo64.txt to %s ...' % icu_txt_data_dir)
169  shutil.copy(zoneinfo_file, icu_txt_data_dir)
170
171
172def MakeAndCopyIcuDataFiles(icu_build_dir, copy_icu4c_dat_file_only=False):
173  """Builds the ICU .dat and .jar files using the current src data.
174
175  The files are copied back into the expected locations in the src tree.
176
177  This is a low-level method.
178  Please check :func:`GenerateIcuDataFiles()` for caveats.
179  """
180  # Keep track of the original cwd so we can go back to it at the end.
181  original_working_dir = os.getcwd()
182
183  # Regenerate the .dat file.
184  os.chdir(icu_build_dir)
185  subprocess.check_call(['make', '-j32'])
186
187  # Copy the .dat file to its ultimate destination.
188  icu_dat_data_dir = '%s/stubdata' % icu4cDir()
189  dat_file = datFile(icu_build_dir)
190
191  print('Copying %s to %s ...' % (dat_file, icu_dat_data_dir))
192  shutil.copy(dat_file, icu_dat_data_dir)
193
194  if copy_icu4c_dat_file_only:
195    return
196
197  # Generate the ICU4J .jar files
198  subprocess.check_call(['make', '-j32', 'icu4j-data'])
199
200  # Generate the test data in icu4c/source/test/testdata/out
201  subprocess.check_call(['make', '-j32', 'tests'])
202
203  # Copy the ICU4J .jar files to their ultimate destination.
204  icu_jar_data_dir = '%s/main/shared/data' % icu4jDir()
205  jarfiles = glob.glob('data/out/icu4j/*.jar')
206  if len(jarfiles) != 3:
207    print('ERROR: Unexpectedly found %d .jar files (%s). Halting.' % (len(jarfiles), jarfiles))
208    sys.exit(1)
209  for jarfile in jarfiles:
210    icu_jarfile = os.path.join(icu_jar_data_dir, os.path.basename(jarfile))
211    if ziputil.ZipCompare(jarfile, icu_jarfile):
212      print('Ignoring %s which is identical to %s ...' % (jarfile, icu_jarfile))
213    else:
214      print('Copying %s to %s ...' % (jarfile, icu_jar_data_dir))
215      shutil.copy(jarfile, icu_jar_data_dir)
216
217  testdata_out_dir = '%s/test/testdata/out' % icu4cDir()
218  print('Copying test data to %s ' % testdata_out_dir)
219  if os.path.exists(testdata_out_dir):
220    shutil.rmtree(testdata_out_dir)
221  shutil.copytree('test/testdata/out', testdata_out_dir)
222
223  # Switch back to the original working cwd.
224  os.chdir(original_working_dir)
225
226
227def MakeAndCopyOverlayTzIcuData(icu_build_dir, dest_file):
228  """Makes a .dat file containing just time-zone data.
229
230  The overlay file can be used as an overlay of a full ICU .dat file
231  to provide newer time zone data. Some strings like translated
232  time zone names will be missing, but rules will be correct.
233  """
234
235  # Keep track of the original cwd so we can go back to it at the end.
236  original_working_dir = os.getcwd()
237
238  # Regenerate the .res files.
239  os.chdir(icu_build_dir)
240  subprocess.check_call(['make', '-j32'])
241
242  # The list of ICU resources needed for time zone data overlays.
243  tz_res_names = [
244          'metaZones.res',
245          'timezoneTypes.res',
246          'windowsZones.res',
247          'zoneinfo64.res',
248  ]
249
250  dat_file = datFile(icu_build_dir)
251  icu_package_dat = os.path.basename(dat_file)
252  if not icu_package_dat.endswith('.dat'):
253      print('%s does not end with .dat' % icu_package_dat)
254      sys.exit(1)
255  icu_package = icu_package_dat[:-4]
256
257  # Create a staging directory to hold the files to go into the overlay .dat
258  res_staging_dir = '%s/overlay_res' % icu_build_dir
259  os.mkdir(res_staging_dir)
260
261  # Copy all the .res files we need from, e.g. ./data/out/build/icudt55l, to the staging directory
262  res_src_dir = '%s/data/out/build/%s' % (icu_build_dir, icu_package)
263  for tz_res_name in tz_res_names:
264    shutil.copy('%s/%s' % (res_src_dir, tz_res_name), res_staging_dir)
265
266  # Create a .lst file to pass to pkgdata.
267  tz_files_file = '%s/tzdata.lst' % res_staging_dir
268  with open(tz_files_file, "a") as tz_files:
269    for tz_res_name in tz_res_names:
270      tz_files.write('%s\n' % tz_res_name)
271
272  icu_lib_dir = '%s/lib' % icu_build_dir
273  pkg_data_bin = '%s/bin/pkgdata' % icu_build_dir
274
275  # Run pkgdata to create a .dat file.
276  icu_env = os.environ.copy()
277  icu_env["LD_LIBRARY_PATH"] = icu_lib_dir
278
279  # pkgdata treats the .lst file it is given as relative to CWD, and the path also affects the
280  # resource names in the .dat file produced so we change the CWD.
281  os.chdir(res_staging_dir)
282
283  # -F : force rebuilding all data
284  # -m common : create a .dat
285  # -v : verbose
286  # -T . : use "." as a temp dir
287  # -d . : use "." as the dest dir
288  # -p <name> : Set the "data name"
289  p = subprocess.Popen(
290      [pkg_data_bin, '-F', '-m', 'common', '-v', '-T', '.', '-d', '.', '-p',
291          icu_package, tz_files_file],
292      env=icu_env)
293  p.wait()
294  if p.returncode != 0:
295    print('pkgdata failed with status code: %s' % p.returncode)
296
297  # Copy the .dat to the chosen place / name.
298  generated_dat_file = '%s/%s' % (res_staging_dir, icu_package_dat)
299  shutil.copyfile(generated_dat_file, dest_file)
300  print('ICU overlay .dat can be found here: %s' % dest_file)
301
302  # Switch back to the original working cwd.
303  os.chdir(original_working_dir)
304
305def _MakeLangInfo():
306  """ Regenerates icu4c/source/data/misc/langInfo.txt.
307  Returns true if the file was changed and false otherwise.
308
309  This is implementation detail, should not be called outside
310  of this script.
311  """
312
313  # Generate icu4c/source/data/misc/langInfo.txt by a ICU4J tool
314  langInfo_dst_path = os.path.join(icu4cDir(), 'data/misc/langInfo.txt')
315  print('Building %s' % langInfo_dst_path)
316  langInfo_out_path = '/tmp/langInfo.txt'  # path hardcoded in the LocaleDistanceBuilder tool
317  if os.path.exists(langInfo_out_path):
318    os.remove(langInfo_out_path)
319
320  icu4j_dir = icu4jDir()
321  os.chdir(icu4j_dir)
322  subprocess.check_call(['ant', 'icu4jJar'])
323  os.chdir(os.path.join(icu4j_dir, 'tools', 'misc'))
324  subprocess.check_call(['ant', 'jar'])
325  subprocess.check_call([
326    'java',
327    '-cp',
328    'out/lib/icu4j-tools.jar:../../icu4j.jar',
329    'com.ibm.icu.dev.tool.locale.LocaleDistanceBuilder',
330  ])
331  if (filecmp.cmp(langInfo_dst_path, langInfo_out_path)):
332    print('The files {src} and {dst} are the same'.format(src=langInfo_out_path, dst=langInfo_dst_path))
333    return False
334
335  print('Copying {src} to {dst}'.format(src=langInfo_out_path, dst=langInfo_dst_path))
336  shutil.copyfile(langInfo_out_path, langInfo_dst_path)
337  return True
338
339def GenerateIcuDataFiles():
340  """ There are ICU files generation of which depends on ICU itself.
341  This method repeatedly builds ICU and re-generates these files until they
342  converge, i.e. subsequent builds do not change these files.
343  """
344  last_icu_build_dir = _MakeIcuDataFilesOnce()
345
346  # If icu4c/source/data/misc/langInfo.txt is re-generated, the binary data files need to be
347  # re-generated. MakeIcuDataFiles() is called until it converges because the re-generation
348  # depends icu4j, and icu4j depends on the binary data files.
349  while _MakeLangInfo():
350    last_icu_build_dir = _MakeIcuDataFilesOnce()
351
352  _MakeIcuDataFilesWithoutTimeZoneFiles(last_icu_build_dir)
353
354def _MakeIcuDataFilesOnce():
355  """Builds ICU and copies .dat and .jar files to expected places.
356  Build is invoked only once. It is unlikely that you need to call
357  this method outside of this script.
358
359  This is a low-level method.
360  Please check :func:`GenerateIcuDataFiles()` for caveats.
361  """
362  i18nutil.SwitchToNewTemporaryDirectory()
363  icu_build_dir = '%s/icu' % os.getcwd()
364
365  PrepareIcuBuild(icu_build_dir, data_filters_json=ICU_MLDATA_FILTERS)
366
367  MakeAndCopyIcuDataFiles(icu_build_dir)
368
369  return icu_build_dir
370
371def _MakeIcuDataFilesWithoutTimeZoneFiles(icu_build_dir):
372  """
373  Remove the timezone .res files from the .dat file in order to save ~200 KB file size.
374  TODO (b/206956042): Move this to the first build whenhttps://unicode-org.atlassian.net/browse/ICU-21769 is fixed.
375  Now another build is needed to build a new .dat file without the timezone files.
376  """
377  # A manual removal of the .lst file is needed to force GNUmake to rebuild the .lst file
378  list_file_path = pathlib.Path(icu_build_dir, 'data/out/tmp/icudata.lst')
379  list_file_path.unlink(missing_ok=True)
380
381  PrepareIcuBuild(icu_build_dir, data_filters_json=ICU_DATA_FILTERS)
382  # copy_icu4c_dat_file_only is set to true to avoid copying the ICU4J data or other files
383  # because the data files may be incomplete to be consumed for a host tool.
384  # The ICU4J implementation on device doesn't use the ICU4J data files,
385  # e.g. ./icu4j/main/shared/data/icudata.jar
386  MakeAndCopyIcuDataFiles(icu_build_dir, copy_icu4c_dat_file_only=True)
387
388def CopyLicenseFiles(target_dir):
389  """Copies ICU license files to the target_dir"""
390
391  license_file = '%s/main/shared/licenses/LICENSE' % icu4jDir()
392  print('Copying %s to %s ...' % (license_file, target_dir))
393  shutil.copy(license_file, target_dir)
394
395