1# Copyright (C) 2018 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html 3 4# Python 2/3 Compatibility (ICU-20299) 5# TODO(ICU-20301): Remove this. 6from __future__ import print_function 7 8from icutools.databuilder import * 9from icutools.databuilder import utils 10from icutools.databuilder.request_types import * 11 12import os 13import sys 14 15 16def generate(config, io, common_vars): 17 requests = [] 18 19 if len(io.glob("misc/*")) == 0: 20 print("Error: Cannot find data directory; please specify --src_dir", file=sys.stderr) 21 exit(1) 22 23 requests += generate_cnvalias(config, io, common_vars) 24 requests += generate_ulayout(config, io, common_vars) 25 requests += generate_confusables(config, io, common_vars) 26 requests += generate_conversion_mappings(config, io, common_vars) 27 requests += generate_brkitr_brk(config, io, common_vars) 28 requests += generate_stringprep(config, io, common_vars) 29 requests += generate_brkitr_dictionaries(config, io, common_vars) 30 requests += generate_normalization(config, io, common_vars) 31 requests += generate_coll_ucadata(config, io, common_vars) 32 requests += generate_full_unicore_data(config, io, common_vars) 33 requests += generate_unames(config, io, common_vars) 34 requests += generate_misc(config, io, common_vars) 35 requests += generate_curr_supplemental(config, io, common_vars) 36 requests += generate_translit(config, io, common_vars) 37 38 # Res Tree Files 39 # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files) 40 requests += generate_tree(config, io, common_vars, 41 "locales", 42 None, 43 config.use_pool_bundle, 44 []) 45 46 requests += generate_tree(config, io, common_vars, 47 "curr", 48 "curr", 49 config.use_pool_bundle, 50 []) 51 52 requests += generate_tree(config, io, common_vars, 53 "lang", 54 "lang", 55 config.use_pool_bundle, 56 []) 57 58 requests += generate_tree(config, io, common_vars, 59 "region", 60 "region", 61 config.use_pool_bundle, 62 []) 63 64 requests += generate_tree(config, io, common_vars, 65 "zone", 66 "zone", 67 config.use_pool_bundle, 68 []) 69 70 requests += generate_tree(config, io, common_vars, 71 "unit", 72 "unit", 73 config.use_pool_bundle, 74 []) 75 76 requests += generate_tree(config, io, common_vars, 77 "coll", 78 "coll", 79 # Never use pool bundle for coll, brkitr, or rbnf 80 False, 81 # Depends on timezoneTypes.res and keyTypeData.res. 82 # TODO: We should not need this dependency to build collation. 83 # TODO: Bake keyTypeData.res into the common library? 84 [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")]) 85 86 requests += generate_tree(config, io, common_vars, 87 "brkitr", 88 "brkitr", 89 # Never use pool bundle for coll, brkitr, or rbnf 90 False, 91 [DepTarget("brkitr_brk"), DepTarget("dictionaries")]) 92 93 requests += generate_tree(config, io, common_vars, 94 "rbnf", 95 "rbnf", 96 # Never use pool bundle for coll, brkitr, or rbnf 97 False, 98 []) 99 100 requests += [ 101 ListRequest( 102 name = "icudata_list", 103 variable_name = "icudata_all_output_files", 104 output_file = TmpFile("icudata.lst"), 105 include_tmp = False 106 ) 107 ] 108 109 return requests 110 111 112def generate_cnvalias(config, io, common_vars): 113 # UConv Name Aliases 114 input_file = InFile("mappings/convrtrs.txt") 115 output_file = OutFile("cnvalias.icu") 116 return [ 117 SingleExecutionRequest( 118 name = "cnvalias", 119 category = "cnvalias", 120 dep_targets = [], 121 input_files = [input_file], 122 output_files = [output_file], 123 tool = IcuTool("gencnval"), 124 args = "-s {IN_DIR} -d {OUT_DIR} " 125 "{INPUT_FILES[0]}", 126 format_with = {} 127 ) 128 ] 129 130 131def generate_confusables(config, io, common_vars): 132 # CONFUSABLES 133 txt1 = InFile("unidata/confusables.txt") 134 txt2 = InFile("unidata/confusablesWholeScript.txt") 135 cfu = OutFile("confusables.cfu") 136 return [ 137 SingleExecutionRequest( 138 name = "confusables", 139 category = "confusables", 140 dep_targets = [DepTarget("cnvalias")], 141 input_files = [txt1, txt2], 142 output_files = [cfu], 143 tool = IcuTool("gencfu"), 144 args = "-d {OUT_DIR} -i {OUT_DIR} " 145 "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} " 146 "-o {OUTPUT_FILES[0]}", 147 format_with = {} 148 ) 149 ] 150 151 152def generate_conversion_mappings(config, io, common_vars): 153 # UConv Conversion Table Files 154 input_files = [InFile(filename) for filename in io.glob("mappings/*.ucm")] 155 output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files] 156 # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv 157 return [ 158 RepeatedOrSingleExecutionRequest( 159 name = "conversion_mappings", 160 category = "conversion_mappings", 161 dep_targets = [], 162 input_files = input_files, 163 output_files = output_files, 164 tool = IcuTool("makeconv"), 165 # BEGIN android-changed 166 # args = "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}", 167 args = "-s {IN_DIR} -d {OUT_DIR} -c --small {INPUT_FILE_PLACEHOLDER}", 168 # END android-changed 169 format_with = {}, 170 repeat_with = { 171 "INPUT_FILE_PLACEHOLDER": utils.SpaceSeparatedList(file.filename for file in input_files) 172 } 173 ) 174 ] 175 176 177def generate_brkitr_brk(config, io, common_vars): 178 # BRK Files 179 input_files = [InFile(filename) for filename in io.glob("brkitr/rules/*.txt")] 180 output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files] 181 return [ 182 RepeatedExecutionRequest( 183 name = "brkitr_brk", 184 category = "brkitr_rules", 185 dep_targets = [DepTarget("cnvalias"), DepTarget("ulayout")], 186 input_files = input_files, 187 output_files = output_files, 188 tool = IcuTool("genbrk"), 189 args = "-d {OUT_DIR} -i {OUT_DIR} " 190 "-c -r {IN_DIR}/{INPUT_FILE} " 191 "-o {OUTPUT_FILE}", 192 format_with = {}, 193 repeat_with = {} 194 ) 195 ] 196 197 198def generate_stringprep(config, io, common_vars): 199 # SPP FILES 200 input_files = [InFile(filename) for filename in io.glob("sprep/*.txt")] 201 output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files] 202 bundle_names = [v.filename[6:-4] for v in input_files] 203 return [ 204 RepeatedExecutionRequest( 205 name = "stringprep", 206 category = "stringprep", 207 dep_targets = [InFile("unidata/NormalizationCorrections.txt")], 208 input_files = input_files, 209 output_files = output_files, 210 tool = IcuTool("gensprep"), 211 args = "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} " 212 "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt", 213 format_with = {}, 214 repeat_with = { 215 "BUNDLE_NAME": bundle_names 216 } 217 ) 218 ] 219 220 221def generate_brkitr_dictionaries(config, io, common_vars): 222 # Dict Files 223 input_files = [InFile(filename) for filename in io.glob("brkitr/dictionaries/*.txt")] 224 output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files] 225 extra_options_map = { 226 "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000", 227 "brkitr/dictionaries/cjdict.txt": "--uchars", 228 "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780", 229 "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80", 230 "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00" 231 } 232 extra_optionses = [extra_options_map[v.filename] for v in input_files] 233 return [ 234 RepeatedExecutionRequest( 235 name = "dictionaries", 236 category = "brkitr_dictionaries", 237 dep_targets = [], 238 input_files = input_files, 239 output_files = output_files, 240 tool = IcuTool("gendict"), 241 args = "-i {OUT_DIR} " 242 "-c {EXTRA_OPTIONS} " 243 "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}", 244 format_with = {}, 245 repeat_with = { 246 "EXTRA_OPTIONS": extra_optionses 247 } 248 ) 249 ] 250 251 252def generate_normalization(config, io, common_vars): 253 # NRM Files 254 input_files = [InFile(filename) for filename in io.glob("in/*.nrm")] 255 # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data 256 input_files.remove(InFile("in/nfc.nrm")) 257 output_files = [OutFile(v.filename[3:]) for v in input_files] 258 return [ 259 RepeatedExecutionRequest( 260 name = "normalization", 261 category = "normalization", 262 dep_targets = [], 263 input_files = input_files, 264 output_files = output_files, 265 tool = IcuTool("icupkg"), 266 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}", 267 format_with = {}, 268 repeat_with = {} 269 ) 270 ] 271 272 273def generate_coll_ucadata(config, io, common_vars): 274 # Collation Dependency File (ucadata.icu) 275 input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type) 276 output_file = OutFile("coll/ucadata.icu") 277 return [ 278 SingleExecutionRequest( 279 name = "coll_ucadata", 280 category = "coll_ucadata", 281 dep_targets = [], 282 input_files = [input_file], 283 output_files = [output_file], 284 tool = IcuTool("icupkg"), 285 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", 286 format_with = {} 287 ) 288 ] 289 290 291def generate_full_unicore_data(config, io, common_vars): 292 # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu) 293 # are hardcoded in the common DLL and therefore not included in the data package any more. 294 # They are not built by default but need to be built for ICU4J data, 295 # both in the .jar and in the .dat file (if ICU4J uses the .dat file). 296 # See ICU-4497. 297 if not config.include_uni_core_data: 298 return [] 299 300 basenames = [ 301 "pnames.icu", 302 "uprops.icu", 303 "ucase.icu", 304 "ubidi.icu", 305 "nfc.nrm" 306 ] 307 input_files = [InFile("in/%s" % bn) for bn in basenames] 308 output_files = [OutFile(bn) for bn in basenames] 309 return [ 310 RepeatedExecutionRequest( 311 name = "unicore", 312 category = "unicore", 313 input_files = input_files, 314 output_files = output_files, 315 tool = IcuTool("icupkg"), 316 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}" 317 ) 318 ] 319 320 321def generate_unames(config, io, common_vars): 322 # Unicode Character Names 323 input_file = InFile("in/unames.icu") 324 output_file = OutFile("unames.icu") 325 return [ 326 SingleExecutionRequest( 327 name = "unames", 328 category = "unames", 329 dep_targets = [], 330 input_files = [input_file], 331 output_files = [output_file], 332 tool = IcuTool("icupkg"), 333 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", 334 format_with = {} 335 ) 336 ] 337 338 339def generate_ulayout(config, io, common_vars): 340 # Unicode text layout properties 341 basename = "ulayout" 342 input_file = InFile("in/%s.icu" % basename) 343 output_file = OutFile("%s.icu" % basename) 344 return [ 345 SingleExecutionRequest( 346 name = basename, 347 category = basename, 348 dep_targets = [], 349 input_files = [input_file], 350 output_files = [output_file], 351 tool = IcuTool("icupkg"), 352 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", 353 format_with = {} 354 ) 355 ] 356 357 358def generate_misc(config, io, common_vars): 359 # Misc Data Res Files 360 input_files = [InFile(filename) for filename in io.glob("misc/*.txt")] 361 input_basenames = [v.filename[5:] for v in input_files] 362 output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames] 363 return [ 364 RepeatedExecutionRequest( 365 name = "misc_res", 366 category = "misc", 367 dep_targets = [], 368 input_files = input_files, 369 output_files = output_files, 370 tool = IcuTool("genrb"), 371 args = "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} " 372 "-k -q " 373 "{INPUT_BASENAME}", 374 format_with = {}, 375 repeat_with = { 376 "INPUT_BASENAME": input_basenames 377 } 378 ) 379 ] 380 381 382def generate_curr_supplemental(config, io, common_vars): 383 # Currency Supplemental Res File 384 input_file = InFile("curr/supplementalData.txt") 385 input_basename = "supplementalData.txt" 386 output_file = OutFile("curr/supplementalData.res") 387 return [ 388 SingleExecutionRequest( 389 name = "curr_supplemental_res", 390 category = "curr_supplemental", 391 dep_targets = [], 392 input_files = [input_file], 393 output_files = [output_file], 394 tool = IcuTool("genrb"), 395 args = "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} " 396 "-k " 397 "{INPUT_BASENAME}", 398 format_with = { 399 "INPUT_BASENAME": input_basename 400 } 401 ) 402 ] 403 404 405def generate_translit(config, io, common_vars): 406 input_files = [ 407 InFile("translit/root.txt"), 408 InFile("translit/en.txt"), 409 InFile("translit/el.txt") 410 ] 411 dep_files = set(InFile(filename) for filename in io.glob("translit/*.txt")) 412 dep_files -= set(input_files) 413 dep_files = list(sorted(dep_files)) 414 input_basenames = [v.filename[9:] for v in input_files] 415 output_files = [ 416 OutFile("translit/%s.res" % v[:-4]) 417 for v in input_basenames 418 ] 419 return [ 420 RepeatedOrSingleExecutionRequest( 421 name = "translit_res", 422 category = "translit", 423 dep_targets = dep_files, 424 input_files = input_files, 425 output_files = output_files, 426 tool = IcuTool("genrb"), 427 args = "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} " 428 "-k " 429 "{INPUT_BASENAME}", 430 format_with = { 431 }, 432 repeat_with = { 433 "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames) 434 } 435 ) 436 ] 437 438 439def generate_tree( 440 config, 441 io, 442 common_vars, 443 sub_dir, 444 out_sub_dir, 445 use_pool_bundle, 446 dep_targets): 447 requests = [] 448 category = "%s_tree" % sub_dir 449 out_prefix = "%s/" % out_sub_dir if out_sub_dir else "" 450 # TODO: Clean this up for curr 451 input_files = [InFile(filename) for filename in io.glob("%s/*.txt" % sub_dir)] 452 if sub_dir == "curr": 453 input_files.remove(InFile("curr/supplementalData.txt")) 454 input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files] 455 output_files = [ 456 OutFile("%s%s.res" % (out_prefix, v[:-4])) 457 for v in input_basenames 458 ] 459 460 # Generate Pool Bundle 461 if use_pool_bundle: 462 input_pool_files = [OutFile("%spool.res" % out_prefix)] 463 pool_target_name = "%s_pool_write" % sub_dir 464 use_pool_bundle_option = "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format( 465 OUT_PREFIX = out_prefix, 466 **common_vars 467 ) 468 requests += [ 469 SingleExecutionRequest( 470 name = pool_target_name, 471 category = category, 472 dep_targets = dep_targets, 473 input_files = input_files, 474 output_files = input_pool_files, 475 tool = IcuTool("genrb"), 476 args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " 477 "--writePoolBundle -k " 478 "{INPUT_BASENAMES_SPACED}", 479 format_with = { 480 "IN_SUB_DIR": sub_dir, 481 "OUT_PREFIX": out_prefix, 482 "INPUT_BASENAMES_SPACED": utils.SpaceSeparatedList(input_basenames) 483 } 484 ), 485 ] 486 dep_targets = dep_targets + [DepTarget(pool_target_name)] 487 else: 488 use_pool_bundle_option = "" 489 490 # Generate Res File Tree 491 requests += [ 492 RepeatedOrSingleExecutionRequest( 493 name = "%s_res" % sub_dir, 494 category = category, 495 dep_targets = dep_targets, 496 input_files = input_files, 497 output_files = output_files, 498 tool = IcuTool("genrb"), 499 # BEGIN android-changed 500 args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " + 501 ("--omitCollationRules " if sub_dir == "coll" else "") + 502 "{EXTRA_OPTION} -k " 503 "{INPUT_BASENAME}", 504 # END android-changed 505 format_with = { 506 "IN_SUB_DIR": sub_dir, 507 "OUT_PREFIX": out_prefix, 508 "EXTRA_OPTION": use_pool_bundle_option 509 }, 510 repeat_with = { 511 "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames) 512 } 513 ) 514 ] 515 516 # Generate res_index file 517 # Exclude the deprecated locale variants and root; see ICU-20628. This 518 # could be data-driven, but we do not want to perform I/O in this script 519 # (for example, we do not want to read from an XML file). 520 excluded_locales = set([ 521 "ja_JP_TRADITIONAL", 522 "th_TH_TRADITIONAL", 523 "de_", 524 "de__PHONEBOOK", 525 "es_", 526 "es__TRADITIONAL", 527 "root", 528 ]) 529 # Put alias locales in a separate structure; see ICU-20627 530 dependency_data = io.read_locale_deps(sub_dir) 531 if "aliases" in dependency_data: 532 alias_locales = set(dependency_data["aliases"].keys()) 533 else: 534 alias_locales = set() 535 alias_files = [] 536 installed_files = [] 537 for f in input_files: 538 file_stem = IndexRequest.locale_file_stem(f) 539 if file_stem in excluded_locales: 540 continue 541 destination = alias_files if file_stem in alias_locales else installed_files 542 destination.append(f) 543 cldr_version = dependency_data["cldrVersion"] if sub_dir == "locales" else None 544 index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format( 545 IN_SUB_DIR = sub_dir, 546 **common_vars 547 )) 548 index_res_file = OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format( 549 OUT_PREFIX = out_prefix, 550 **common_vars 551 )) 552 index_file_target_name = "%s_index_txt" % sub_dir 553 requests += [ 554 IndexRequest( 555 name = index_file_target_name, 556 category = category, 557 installed_files = installed_files, 558 alias_files = alias_files, 559 txt_file = index_file_txt, 560 output_file = index_res_file, 561 cldr_version = cldr_version, 562 args = "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " 563 "-k " 564 "{INDEX_NAME}.txt", 565 format_with = { 566 "IN_SUB_DIR": sub_dir, 567 "OUT_PREFIX": out_prefix 568 } 569 ) 570 ] 571 572 return requests 573