1# Copyright 2017 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Utility methods associated with ICU source and builds.""" 16 17from __future__ import print_function 18 19import filecmp 20import glob 21import os 22import pathlib 23import shutil 24import subprocess 25import sys 26 27import i18nutil 28import ziputil 29 30 31# See https://github.com/unicode-org/icu/blob/main/docs/userguide/icu_data/buildtool.md 32# for the documentation. 33ICU_DATA_FILTERS = """{ 34 "featureFilters": { 35 "misc": { 36 "excludelist": [ 37 "metaZones", 38 "timezoneTypes", 39 "windowsZones", 40 "zoneinfo64" 41 ] 42 }, 43 "brkitr_adaboost": { 44 "includelist": [ 45 "jaml" 46 ] 47 } 48 } 49} 50""" 51 52ICU_MLDATA_FILTERS = """{ 53 "featureFilters": { 54 "brkitr_adaboost": { 55 "includelist": [ 56 "jaml" 57 ] 58 } 59 } 60} 61""" 62 63 64def cldrDir(): 65 """Returns the location of CLDR in the Android source tree.""" 66 android_build_top = i18nutil.GetAndroidRootOrDie() 67 cldr_dir = os.path.realpath('%s/external/cldr' % android_build_top) 68 i18nutil.CheckDirExists(cldr_dir, 'external/cldr') 69 return cldr_dir 70 71 72def icuDir(): 73 """Returns the location of ICU in the Android source tree.""" 74 android_build_top = i18nutil.GetAndroidRootOrDie() 75 icu_dir = os.path.realpath('%s/external/icu' % android_build_top) 76 i18nutil.CheckDirExists(icu_dir, 'external/icu') 77 return icu_dir 78 79 80def icu4cDir(): 81 """Returns the location of ICU4C in the Android source tree.""" 82 icu4c_dir = os.path.realpath('%s/icu4c/source' % icuDir()) 83 i18nutil.CheckDirExists(icu4c_dir, 'external/icu/icu4c/source') 84 return icu4c_dir 85 86 87def icu4jDir(): 88 """Returns the location of ICU4J in the Android source tree.""" 89 icu4j_dir = os.path.realpath('%s/icu4j' % icuDir()) 90 i18nutil.CheckDirExists(icu4j_dir, 'external/icu/icu4j') 91 return icu4j_dir 92 93 94def datFile(icu_build_dir): 95 """Returns the location of the ICU .dat file in the specified ICU build dir.""" 96 dat_file_pattern = '%s/data/out/tmp/icudt??l.dat' % icu_build_dir 97 dat_files = glob.glob(dat_file_pattern) 98 if len(dat_files) != 1: 99 print('ERROR: Unexpectedly found %d .dat files (%s). Halting.' % (len(datfiles), datfiles)) 100 sys.exit(1) 101 dat_file = dat_files[0] 102 return dat_file 103 104 105def PrepareIcuBuild(icu_build_dir, data_filters_json=None): 106 """Sets up an ICU build in the specified directory. 107 108 Creates the directory and runs "runConfigureICU Linux" 109 """ 110 # Keep track of the original cwd so we can go back to it at the end. 111 original_working_dir = os.getcwd() 112 113 # Create a directory to run 'make' from. 114 if not os.path.exists(icu_build_dir): 115 os.mkdir(icu_build_dir) 116 os.chdir(icu_build_dir) 117 118 # Build the ICU tools. 119 print('Configuring ICU tools...') 120 cmd = ['env'] 121 if data_filters_json is not None: 122 json_file_path = os.path.join(icu_build_dir, "icu4c_data_filters.json") 123 print("json path: %s" % json_file_path) 124 writeFileContent(json_file_path, data_filters_json) 125 cmd.append('ICU_DATA_FILTER_FILE=%s' % json_file_path) 126 127 cmd += ['ICU_DATA_BUILDTOOL_OPTS=--include_uni_core_data', 128 '%s/runConfigureICU' % icu4cDir(), 129 'Linux'] 130 subprocess.check_call(cmd) 131 132 os.chdir(original_working_dir) 133 134def writeFileContent(file_path, file_content): 135 """Write a string into the file""" 136 with open(file_path, "w") as file: 137 file.write(file_content) 138 139def MakeTzDataFiles(icu_build_dir, iana_tar_file): 140 """Builds and runs the ICU tools in ${icu_Build_dir}/tools/tzcode. 141 142 The tools are run against the specified IANA tzdata .tar.gz. 143 The resulting zoneinfo64.txt is copied into the src directories. 144 """ 145 tzcode_working_dir = '%s/tools/tzcode' % icu_build_dir 146 147 # Fix missing files. 148 # The tz2icu tool only picks up icuregions and icuzones if they are in the CWD 149 for icu_data_file in [ 'icuregions', 'icuzones']: 150 icu_data_file_source = '%s/tools/tzcode/%s' % (icu4cDir(), icu_data_file) 151 icu_data_file_symlink = '%s/%s' % (tzcode_working_dir, icu_data_file) 152 os.symlink(icu_data_file_source, icu_data_file_symlink) 153 154 iana_tar_filename = os.path.basename(iana_tar_file) 155 working_iana_tar_file = '%s/%s' % (tzcode_working_dir, iana_tar_filename) 156 shutil.copyfile(iana_tar_file, working_iana_tar_file) 157 158 print('Making ICU tz data files...') 159 # The Makefile assumes the existence of the bin directory. 160 os.mkdir('%s/bin' % icu_build_dir) 161 162 # -j1 is needed because the build is not parallelizable. http://b/109641429 163 subprocess.check_call(['make', '-j1', '-C', tzcode_working_dir]) 164 165 # Copy the source file to its ultimate destination. 166 zoneinfo_file = '%s/zoneinfo64.txt' % tzcode_working_dir 167 icu_txt_data_dir = '%s/data/misc' % icu4cDir() 168 print('Copying zoneinfo64.txt to %s ...' % icu_txt_data_dir) 169 shutil.copy(zoneinfo_file, icu_txt_data_dir) 170 171 172def MakeAndCopyIcuDataFiles(icu_build_dir, copy_icu4c_dat_file_only=False): 173 """Builds the ICU .dat and .jar files using the current src data. 174 175 The files are copied back into the expected locations in the src tree. 176 177 This is a low-level method. 178 Please check :func:`GenerateIcuDataFiles()` for caveats. 179 """ 180 # Keep track of the original cwd so we can go back to it at the end. 181 original_working_dir = os.getcwd() 182 183 # Regenerate the .dat file. 184 os.chdir(icu_build_dir) 185 subprocess.check_call(['make', '-j32']) 186 187 # Copy the .dat file to its ultimate destination. 188 icu_dat_data_dir = '%s/stubdata' % icu4cDir() 189 dat_file = datFile(icu_build_dir) 190 191 print('Copying %s to %s ...' % (dat_file, icu_dat_data_dir)) 192 shutil.copy(dat_file, icu_dat_data_dir) 193 194 if copy_icu4c_dat_file_only: 195 return 196 197 # Generate the ICU4J .jar files 198 subprocess.check_call(['make', '-j32', 'icu4j-data']) 199 200 # Generate the test data in icu4c/source/test/testdata/out 201 subprocess.check_call(['make', '-j32', 'tests']) 202 203 # Copy the ICU4J .jar files to their ultimate destination. 204 icu_jar_data_dir = '%s/main/shared/data' % icu4jDir() 205 jarfiles = glob.glob('data/out/icu4j/*.jar') 206 if len(jarfiles) != 3: 207 print('ERROR: Unexpectedly found %d .jar files (%s). Halting.' % (len(jarfiles), jarfiles)) 208 sys.exit(1) 209 for jarfile in jarfiles: 210 icu_jarfile = os.path.join(icu_jar_data_dir, os.path.basename(jarfile)) 211 if ziputil.ZipCompare(jarfile, icu_jarfile): 212 print('Ignoring %s which is identical to %s ...' % (jarfile, icu_jarfile)) 213 else: 214 print('Copying %s to %s ...' % (jarfile, icu_jar_data_dir)) 215 shutil.copy(jarfile, icu_jar_data_dir) 216 217 testdata_out_dir = '%s/test/testdata/out' % icu4cDir() 218 print('Copying test data to %s ' % testdata_out_dir) 219 if os.path.exists(testdata_out_dir): 220 shutil.rmtree(testdata_out_dir) 221 shutil.copytree('test/testdata/out', testdata_out_dir) 222 223 # Switch back to the original working cwd. 224 os.chdir(original_working_dir) 225 226 227def MakeAndCopyOverlayTzIcuData(icu_build_dir, dest_file): 228 """Makes a .dat file containing just time-zone data. 229 230 The overlay file can be used as an overlay of a full ICU .dat file 231 to provide newer time zone data. Some strings like translated 232 time zone names will be missing, but rules will be correct. 233 """ 234 235 # Keep track of the original cwd so we can go back to it at the end. 236 original_working_dir = os.getcwd() 237 238 # Regenerate the .res files. 239 os.chdir(icu_build_dir) 240 subprocess.check_call(['make', '-j32']) 241 242 # The list of ICU resources needed for time zone data overlays. 243 tz_res_names = [ 244 'metaZones.res', 245 'timezoneTypes.res', 246 'windowsZones.res', 247 'zoneinfo64.res', 248 ] 249 250 dat_file = datFile(icu_build_dir) 251 icu_package_dat = os.path.basename(dat_file) 252 if not icu_package_dat.endswith('.dat'): 253 print('%s does not end with .dat' % icu_package_dat) 254 sys.exit(1) 255 icu_package = icu_package_dat[:-4] 256 257 # Create a staging directory to hold the files to go into the overlay .dat 258 res_staging_dir = '%s/overlay_res' % icu_build_dir 259 os.mkdir(res_staging_dir) 260 261 # Copy all the .res files we need from, e.g. ./data/out/build/icudt55l, to the staging directory 262 res_src_dir = '%s/data/out/build/%s' % (icu_build_dir, icu_package) 263 for tz_res_name in tz_res_names: 264 shutil.copy('%s/%s' % (res_src_dir, tz_res_name), res_staging_dir) 265 266 # Create a .lst file to pass to pkgdata. 267 tz_files_file = '%s/tzdata.lst' % res_staging_dir 268 with open(tz_files_file, "a") as tz_files: 269 for tz_res_name in tz_res_names: 270 tz_files.write('%s\n' % tz_res_name) 271 272 icu_lib_dir = '%s/lib' % icu_build_dir 273 pkg_data_bin = '%s/bin/pkgdata' % icu_build_dir 274 275 # Run pkgdata to create a .dat file. 276 icu_env = os.environ.copy() 277 icu_env["LD_LIBRARY_PATH"] = icu_lib_dir 278 279 # pkgdata treats the .lst file it is given as relative to CWD, and the path also affects the 280 # resource names in the .dat file produced so we change the CWD. 281 os.chdir(res_staging_dir) 282 283 # -F : force rebuilding all data 284 # -m common : create a .dat 285 # -v : verbose 286 # -T . : use "." as a temp dir 287 # -d . : use "." as the dest dir 288 # -p <name> : Set the "data name" 289 p = subprocess.Popen( 290 [pkg_data_bin, '-F', '-m', 'common', '-v', '-T', '.', '-d', '.', '-p', 291 icu_package, tz_files_file], 292 env=icu_env) 293 p.wait() 294 if p.returncode != 0: 295 print('pkgdata failed with status code: %s' % p.returncode) 296 297 # Copy the .dat to the chosen place / name. 298 generated_dat_file = '%s/%s' % (res_staging_dir, icu_package_dat) 299 shutil.copyfile(generated_dat_file, dest_file) 300 print('ICU overlay .dat can be found here: %s' % dest_file) 301 302 # Switch back to the original working cwd. 303 os.chdir(original_working_dir) 304 305def _MakeLangInfo(): 306 """ Regenerates icu4c/source/data/misc/langInfo.txt. 307 Returns true if the file was changed and false otherwise. 308 309 This is implementation detail, should not be called outside 310 of this script. 311 """ 312 313 # Generate icu4c/source/data/misc/langInfo.txt by a ICU4J tool 314 langInfo_dst_path = os.path.join(icu4cDir(), 'data/misc/langInfo.txt') 315 print('Building %s' % langInfo_dst_path) 316 langInfo_out_path = '/tmp/langInfo.txt' # path hardcoded in the LocaleDistanceBuilder tool 317 if os.path.exists(langInfo_out_path): 318 os.remove(langInfo_out_path) 319 320 icu4j_dir = icu4jDir() 321 os.chdir(icu4j_dir) 322 subprocess.check_call(['ant', 'icu4jJar']) 323 os.chdir(os.path.join(icu4j_dir, 'tools', 'misc')) 324 subprocess.check_call(['ant', 'jar']) 325 subprocess.check_call([ 326 'java', 327 '-cp', 328 'out/lib/icu4j-tools.jar:../../icu4j.jar', 329 'com.ibm.icu.dev.tool.locale.LocaleDistanceBuilder', 330 ]) 331 if (filecmp.cmp(langInfo_dst_path, langInfo_out_path)): 332 print('The files {src} and {dst} are the same'.format(src=langInfo_out_path, dst=langInfo_dst_path)) 333 return False 334 335 print('Copying {src} to {dst}'.format(src=langInfo_out_path, dst=langInfo_dst_path)) 336 shutil.copyfile(langInfo_out_path, langInfo_dst_path) 337 return True 338 339def GenerateIcuDataFiles(): 340 """ There are ICU files generation of which depends on ICU itself. 341 This method repeatedly builds ICU and re-generates these files until they 342 converge, i.e. subsequent builds do not change these files. 343 """ 344 last_icu_build_dir = _MakeIcuDataFilesOnce() 345 346 # If icu4c/source/data/misc/langInfo.txt is re-generated, the binary data files need to be 347 # re-generated. MakeIcuDataFiles() is called until it converges because the re-generation 348 # depends icu4j, and icu4j depends on the binary data files. 349 while _MakeLangInfo(): 350 last_icu_build_dir = _MakeIcuDataFilesOnce() 351 352 _MakeIcuDataFilesWithoutTimeZoneFiles(last_icu_build_dir) 353 354def _MakeIcuDataFilesOnce(): 355 """Builds ICU and copies .dat and .jar files to expected places. 356 Build is invoked only once. It is unlikely that you need to call 357 this method outside of this script. 358 359 This is a low-level method. 360 Please check :func:`GenerateIcuDataFiles()` for caveats. 361 """ 362 i18nutil.SwitchToNewTemporaryDirectory() 363 icu_build_dir = '%s/icu' % os.getcwd() 364 365 PrepareIcuBuild(icu_build_dir, data_filters_json=ICU_MLDATA_FILTERS) 366 367 MakeAndCopyIcuDataFiles(icu_build_dir) 368 369 return icu_build_dir 370 371def _MakeIcuDataFilesWithoutTimeZoneFiles(icu_build_dir): 372 """ 373 Remove the timezone .res files from the .dat file in order to save ~200 KB file size. 374 TODO (b/206956042): Move this to the first build whenhttps://unicode-org.atlassian.net/browse/ICU-21769 is fixed. 375 Now another build is needed to build a new .dat file without the timezone files. 376 """ 377 # A manual removal of the .lst file is needed to force GNUmake to rebuild the .lst file 378 list_file_path = pathlib.Path(icu_build_dir, 'data/out/tmp/icudata.lst') 379 list_file_path.unlink(missing_ok=True) 380 381 PrepareIcuBuild(icu_build_dir, data_filters_json=ICU_DATA_FILTERS) 382 # copy_icu4c_dat_file_only is set to true to avoid copying the ICU4J data or other files 383 # because the data files may be incomplete to be consumed for a host tool. 384 # The ICU4J implementation on device doesn't use the ICU4J data files, 385 # e.g. ./icu4j/main/shared/data/icudata.jar 386 MakeAndCopyIcuDataFiles(icu_build_dir, copy_icu4c_dat_file_only=True) 387 388def CopyLicenseFiles(target_dir): 389 """Copies ICU license files to the target_dir""" 390 391 license_file = '%s/main/shared/licenses/LICENSE' % icu4jDir() 392 print('Copying %s to %s ...' % (license_file, target_dir)) 393 shutil.copy(license_file, target_dir) 394 395