1# Copyright 2017 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Utility methods associated with ICU source and builds.""" 16 17from __future__ import print_function 18 19import filecmp 20import glob 21import os 22import pathlib 23import shutil 24import subprocess 25import sys 26 27import i18nutil 28import ziputil 29 30 31# See https://github.com/unicode-org/icu/blob/main/docs/userguide/icu_data/buildtool.md 32# for the documentation. 33ICU_DATA_FILTERS = """{ 34 "featureFilters": { 35 "misc": { 36 "excludelist": [ 37 "metaZones", 38 "timezoneTypes", 39 "windowsZones", 40 "zoneinfo64" 41 ] 42 } 43 } 44} 45""" 46 47 48def cldrDir(): 49 """Returns the location of CLDR in the Android source tree.""" 50 android_build_top = i18nutil.GetAndroidRootOrDie() 51 cldr_dir = os.path.realpath('%s/external/cldr' % android_build_top) 52 i18nutil.CheckDirExists(cldr_dir, 'external/cldr') 53 return cldr_dir 54 55 56def icuDir(): 57 """Returns the location of ICU in the Android source tree.""" 58 android_build_top = i18nutil.GetAndroidRootOrDie() 59 icu_dir = os.path.realpath('%s/external/icu' % android_build_top) 60 i18nutil.CheckDirExists(icu_dir, 'external/icu') 61 return icu_dir 62 63 64def icu4cDir(): 65 """Returns the location of ICU4C in the Android source tree.""" 66 icu4c_dir = os.path.realpath('%s/icu4c/source' % icuDir()) 67 i18nutil.CheckDirExists(icu4c_dir, 'external/icu/icu4c/source') 68 return icu4c_dir 69 70 71def icu4jDir(): 72 """Returns the location of ICU4J in the Android source tree.""" 73 icu4j_dir = os.path.realpath('%s/icu4j' % icuDir()) 74 i18nutil.CheckDirExists(icu4j_dir, 'external/icu/icu4j') 75 return icu4j_dir 76 77 78def datFile(icu_build_dir): 79 """Returns the location of the ICU .dat file in the specified ICU build dir.""" 80 dat_file_pattern = '%s/data/out/tmp/icudt??l.dat' % icu_build_dir 81 dat_files = glob.glob(dat_file_pattern) 82 if len(dat_files) != 1: 83 print('ERROR: Unexpectedly found %d .dat files (%s). Halting.' % (len(datfiles), datfiles)) 84 sys.exit(1) 85 dat_file = dat_files[0] 86 return dat_file 87 88 89def PrepareIcuBuild(icu_build_dir, data_filters_json=None): 90 """Sets up an ICU build in the specified directory. 91 92 Creates the directory and runs "runConfigureICU Linux" 93 """ 94 # Keep track of the original cwd so we can go back to it at the end. 95 original_working_dir = os.getcwd() 96 97 # Create a directory to run 'make' from. 98 if not os.path.exists(icu_build_dir): 99 os.mkdir(icu_build_dir) 100 os.chdir(icu_build_dir) 101 102 # Build the ICU tools. 103 print('Configuring ICU tools...') 104 cmd = ['env'] 105 if data_filters_json is not None: 106 json_file_path = os.path.join(icu_build_dir, "icu4c_data_filters.json") 107 print("json path: %s" % json_file_path) 108 writeFileContent(json_file_path, data_filters_json) 109 cmd.append('ICU_DATA_FILTER_FILE=%s' % json_file_path) 110 111 cmd += ['ICU_DATA_BUILDTOOL_OPTS=--include_uni_core_data', 112 '%s/runConfigureICU' % icu4cDir(), 113 'Linux'] 114 subprocess.check_call(cmd) 115 116 os.chdir(original_working_dir) 117 118def writeFileContent(file_path, file_content): 119 """Write a string into the file""" 120 with open(file_path, "w") as file: 121 file.write(file_content) 122 123def MakeTzDataFiles(icu_build_dir, iana_tar_file): 124 """Builds and runs the ICU tools in ${icu_Build_dir}/tools/tzcode. 125 126 The tools are run against the specified IANA tzdata .tar.gz. 127 The resulting zoneinfo64.txt is copied into the src directories. 128 """ 129 tzcode_working_dir = '%s/tools/tzcode' % icu_build_dir 130 131 # Fix missing files. 132 # The tz2icu tool only picks up icuregions and icuzones if they are in the CWD 133 for icu_data_file in [ 'icuregions', 'icuzones']: 134 icu_data_file_source = '%s/tools/tzcode/%s' % (icu4cDir(), icu_data_file) 135 icu_data_file_symlink = '%s/%s' % (tzcode_working_dir, icu_data_file) 136 os.symlink(icu_data_file_source, icu_data_file_symlink) 137 138 iana_tar_filename = os.path.basename(iana_tar_file) 139 working_iana_tar_file = '%s/%s' % (tzcode_working_dir, iana_tar_filename) 140 shutil.copyfile(iana_tar_file, working_iana_tar_file) 141 142 print('Making ICU tz data files...') 143 # The Makefile assumes the existence of the bin directory. 144 os.mkdir('%s/bin' % icu_build_dir) 145 146 # -j1 is needed because the build is not parallelizable. http://b/109641429 147 subprocess.check_call(['make', '-j1', '-C', tzcode_working_dir]) 148 149 # Copy the source file to its ultimate destination. 150 zoneinfo_file = '%s/zoneinfo64.txt' % tzcode_working_dir 151 icu_txt_data_dir = '%s/data/misc' % icu4cDir() 152 print('Copying zoneinfo64.txt to %s ...' % icu_txt_data_dir) 153 shutil.copy(zoneinfo_file, icu_txt_data_dir) 154 155 156def MakeAndCopyIcuDataFiles(icu_build_dir, copy_icu4c_dat_file_only=False): 157 """Builds the ICU .dat and .jar files using the current src data. 158 159 The files are copied back into the expected locations in the src tree. 160 161 This is a low-level method. 162 Please check :func:`GenerateIcuDataFiles()` for caveats. 163 """ 164 # Keep track of the original cwd so we can go back to it at the end. 165 original_working_dir = os.getcwd() 166 167 # Regenerate the .dat file. 168 os.chdir(icu_build_dir) 169 subprocess.check_call(['make', '-j32']) 170 171 # Copy the .dat file to its ultimate destination. 172 icu_dat_data_dir = '%s/stubdata' % icu4cDir() 173 dat_file = datFile(icu_build_dir) 174 175 print('Copying %s to %s ...' % (dat_file, icu_dat_data_dir)) 176 shutil.copy(dat_file, icu_dat_data_dir) 177 178 if copy_icu4c_dat_file_only: 179 return 180 181 # Generate the ICU4J .jar files 182 subprocess.check_call(['make', '-j32', 'icu4j-data']) 183 184 # Generate the test data in icu4c/source/test/testdata/out 185 subprocess.check_call(['make', '-j32', 'tests']) 186 187 # Copy the ICU4J .jar files to their ultimate destination. 188 icu_jar_data_dir = '%s/main/shared/data' % icu4jDir() 189 jarfiles = glob.glob('data/out/icu4j/*.jar') 190 if len(jarfiles) != 3: 191 print('ERROR: Unexpectedly found %d .jar files (%s). Halting.' % (len(jarfiles), jarfiles)) 192 sys.exit(1) 193 for jarfile in jarfiles: 194 icu_jarfile = os.path.join(icu_jar_data_dir, os.path.basename(jarfile)) 195 if ziputil.ZipCompare(jarfile, icu_jarfile): 196 print('Ignoring %s which is identical to %s ...' % (jarfile, icu_jarfile)) 197 else: 198 print('Copying %s to %s ...' % (jarfile, icu_jar_data_dir)) 199 shutil.copy(jarfile, icu_jar_data_dir) 200 201 testdata_out_dir = '%s/test/testdata/out' % icu4cDir() 202 print('Copying test data to %s ' % testdata_out_dir) 203 if os.path.exists(testdata_out_dir): 204 shutil.rmtree(testdata_out_dir) 205 shutil.copytree('test/testdata/out', testdata_out_dir) 206 207 # Switch back to the original working cwd. 208 os.chdir(original_working_dir) 209 210 211def MakeAndCopyOverlayTzIcuData(icu_build_dir, dest_file): 212 """Makes a .dat file containing just time-zone data. 213 214 The overlay file can be used as an overlay of a full ICU .dat file 215 to provide newer time zone data. Some strings like translated 216 time zone names will be missing, but rules will be correct. 217 """ 218 219 # Keep track of the original cwd so we can go back to it at the end. 220 original_working_dir = os.getcwd() 221 222 # Regenerate the .res files. 223 os.chdir(icu_build_dir) 224 subprocess.check_call(['make', '-j32']) 225 226 # The list of ICU resources needed for time zone data overlays. 227 tz_res_names = [ 228 'metaZones.res', 229 'timezoneTypes.res', 230 'windowsZones.res', 231 'zoneinfo64.res', 232 ] 233 234 dat_file = datFile(icu_build_dir) 235 icu_package_dat = os.path.basename(dat_file) 236 if not icu_package_dat.endswith('.dat'): 237 print('%s does not end with .dat' % icu_package_dat) 238 sys.exit(1) 239 icu_package = icu_package_dat[:-4] 240 241 # Create a staging directory to hold the files to go into the overlay .dat 242 res_staging_dir = '%s/overlay_res' % icu_build_dir 243 os.mkdir(res_staging_dir) 244 245 # Copy all the .res files we need from, e.g. ./data/out/build/icudt55l, to the staging directory 246 res_src_dir = '%s/data/out/build/%s' % (icu_build_dir, icu_package) 247 for tz_res_name in tz_res_names: 248 shutil.copy('%s/%s' % (res_src_dir, tz_res_name), res_staging_dir) 249 250 # Create a .lst file to pass to pkgdata. 251 tz_files_file = '%s/tzdata.lst' % res_staging_dir 252 with open(tz_files_file, "a") as tz_files: 253 for tz_res_name in tz_res_names: 254 tz_files.write('%s\n' % tz_res_name) 255 256 icu_lib_dir = '%s/lib' % icu_build_dir 257 pkg_data_bin = '%s/bin/pkgdata' % icu_build_dir 258 259 # Run pkgdata to create a .dat file. 260 icu_env = os.environ.copy() 261 icu_env["LD_LIBRARY_PATH"] = icu_lib_dir 262 263 # pkgdata treats the .lst file it is given as relative to CWD, and the path also affects the 264 # resource names in the .dat file produced so we change the CWD. 265 os.chdir(res_staging_dir) 266 267 # -F : force rebuilding all data 268 # -m common : create a .dat 269 # -v : verbose 270 # -T . : use "." as a temp dir 271 # -d . : use "." as the dest dir 272 # -p <name> : Set the "data name" 273 p = subprocess.Popen( 274 [pkg_data_bin, '-F', '-m', 'common', '-v', '-T', '.', '-d', '.', '-p', 275 icu_package, tz_files_file], 276 env=icu_env) 277 p.wait() 278 if p.returncode != 0: 279 print('pkgdata failed with status code: %s' % p.returncode) 280 281 # Copy the .dat to the chosen place / name. 282 generated_dat_file = '%s/%s' % (res_staging_dir, icu_package_dat) 283 shutil.copyfile(generated_dat_file, dest_file) 284 print('ICU overlay .dat can be found here: %s' % dest_file) 285 286 # Switch back to the original working cwd. 287 os.chdir(original_working_dir) 288 289def _MakeLangInfo(): 290 """ Regenerates icu4c/source/data/misc/langInfo.txt. 291 Returns true if the file was changed and false otherwise. 292 293 This is implementation detail, should not be called outside 294 of this script. 295 """ 296 297 # Generate icu4c/source/data/misc/langInfo.txt by a ICU4J tool 298 langInfo_dst_path = os.path.join(icu4cDir(), 'data/misc/langInfo.txt') 299 print('Building %s' % langInfo_dst_path) 300 langInfo_out_path = '/tmp/langInfo.txt' # path hardcoded in the LocaleDistanceBuilder tool 301 if os.path.exists(langInfo_out_path): 302 os.remove(langInfo_out_path) 303 304 icu4j_dir = icu4jDir() 305 os.chdir(icu4j_dir) 306 subprocess.check_call(['ant', 'icu4jJar']) 307 os.chdir(os.path.join(icu4j_dir, 'tools', 'misc')) 308 subprocess.check_call(['ant', 'jar']) 309 subprocess.check_call([ 310 'java', 311 '-cp', 312 'out/lib/icu4j-tools.jar:../../icu4j.jar', 313 'com.ibm.icu.dev.tool.locale.LocaleDistanceBuilder', 314 ]) 315 if (filecmp.cmp(langInfo_dst_path, langInfo_out_path)): 316 print('The files {src} and {dst} are the same'.format(src=langInfo_out_path, dst=langInfo_dst_path)) 317 return False 318 319 print('Copying {src} to {dst}'.format(src=langInfo_out_path, dst=langInfo_dst_path)) 320 shutil.copyfile(langInfo_out_path, langInfo_dst_path) 321 return True 322 323def GenerateIcuDataFiles(): 324 """ There are ICU files generation of which depends on ICU itself. 325 This method repeatedly builds ICU and re-generates these files until they 326 converge, i.e. subsequent builds do not change these files. 327 """ 328 last_icu_build_dir = _MakeIcuDataFilesOnce() 329 330 # If icu4c/source/data/misc/langInfo.txt is re-generated, the binary data files need to be 331 # re-generated. MakeIcuDataFiles() is called until it converges because the re-generation 332 # depends icu4j, and icu4j depends on the binary data files. 333 while _MakeLangInfo(): 334 last_icu_build_dir = _MakeIcuDataFilesOnce() 335 336 _MakeIcuDataFilesWithoutTimeZoneFiles(last_icu_build_dir) 337 338def _MakeIcuDataFilesOnce(): 339 """Builds ICU and copies .dat and .jar files to expected places. 340 Build is invoked only once. It is unlikely that you need to call 341 this method outside of this script. 342 343 This is a low-level method. 344 Please check :func:`GenerateIcuDataFiles()` for caveats. 345 """ 346 i18nutil.SwitchToNewTemporaryDirectory() 347 icu_build_dir = '%s/icu' % os.getcwd() 348 349 PrepareIcuBuild(icu_build_dir) 350 351 MakeAndCopyIcuDataFiles(icu_build_dir) 352 353 return icu_build_dir 354 355def _MakeIcuDataFilesWithoutTimeZoneFiles(icu_build_dir): 356 """ 357 Remove the timezone .res files from the .dat file in order to save ~200 KB file size. 358 TODO (b/206956042): Move this to the first build whenhttps://unicode-org.atlassian.net/browse/ICU-21769 is fixed. 359 Now another build is needed to build a new .dat file without the timezone files. 360 """ 361 # A manual removal of the .lst file is needed to force GNUmake to rebuild the .lst file 362 list_file_path = pathlib.Path(icu_build_dir, 'data/out/tmp/icudata.lst') 363 list_file_path.unlink(missing_ok=True) 364 365 PrepareIcuBuild(icu_build_dir, data_filters_json=ICU_DATA_FILTERS) 366 # copy_icu4c_dat_file_only is set to true to avoid copying the ICU4J data or other files 367 # because the data files may be incomplete to be consumed for a host tool. 368 # The ICU4J implementation on device doesn't use the ICU4J data files, 369 # e.g. ./icu4j/main/shared/data/icudata.jar 370 MakeAndCopyIcuDataFiles(icu_build_dir, copy_icu4c_dat_file_only=True) 371 372def CopyLicenseFiles(target_dir): 373 """Copies ICU license files to the target_dir""" 374 375 license_file = '%s/main/shared/licenses/LICENSE' % icu4jDir() 376 print('Copying %s to %s ...' % (license_file, target_dir)) 377 shutil.copy(license_file, target_dir) 378 379