1# Copyright (C) 2018 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html 3 4# Python 2/3 Compatibility (ICU-20299) 5# TODO(ICU-20301): Remove this. 6from __future__ import print_function 7 8from icutools.databuilder import * 9from icutools.databuilder import utils 10from icutools.databuilder.request_types import * 11 12import os 13import sys 14 15 16def generate(config, io, common_vars): 17 requests = [] 18 19 if len(io.glob("misc/*")) == 0: 20 print("Error: Cannot find data directory; please specify --src_dir", file=sys.stderr) 21 exit(1) 22 23 requests += generate_cnvalias(config, io, common_vars) 24 requests += generate_ulayout(config, io, common_vars) 25 requests += generate_confusables(config, io, common_vars) 26 requests += generate_conversion_mappings(config, io, common_vars) 27 requests += generate_brkitr_brk(config, io, common_vars) 28 requests += generate_stringprep(config, io, common_vars) 29 requests += generate_brkitr_dictionaries(config, io, common_vars) 30 requests += generate_normalization(config, io, common_vars) 31 requests += generate_coll_ucadata(config, io, common_vars) 32 requests += generate_full_unicore_data(config, io, common_vars) 33 requests += generate_unames(config, io, common_vars) 34 requests += generate_misc(config, io, common_vars) 35 requests += generate_curr_supplemental(config, io, common_vars) 36 requests += generate_zone_supplemental(config, io, common_vars) 37 requests += generate_translit(config, io, common_vars) 38 39 # Res Tree Files 40 # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files) 41 requests += generate_tree(config, io, common_vars, 42 "locales", 43 None, 44 config.use_pool_bundle, 45 []) 46 47 requests += generate_tree(config, io, common_vars, 48 "curr", 49 "curr", 50 config.use_pool_bundle, 51 []) 52 53 requests += generate_tree(config, io, common_vars, 54 "lang", 55 "lang", 56 config.use_pool_bundle, 57 []) 58 59 requests += generate_tree(config, io, common_vars, 60 "region", 61 "region", 62 config.use_pool_bundle, 63 []) 64 65 requests += generate_tree(config, io, common_vars, 66 "zone", 67 "zone", 68 config.use_pool_bundle, 69 []) 70 71 requests += generate_tree(config, io, common_vars, 72 "unit", 73 "unit", 74 config.use_pool_bundle, 75 []) 76 77 requests += generate_tree(config, io, common_vars, 78 "coll", 79 "coll", 80 # Never use pool bundle for coll, brkitr, or rbnf 81 False, 82 # Depends on timezoneTypes.res and keyTypeData.res. 83 # TODO: We should not need this dependency to build collation. 84 # TODO: Bake keyTypeData.res into the common library? 85 [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")]) 86 87 requests += generate_tree(config, io, common_vars, 88 "brkitr", 89 "brkitr", 90 # Never use pool bundle for coll, brkitr, or rbnf 91 False, 92 [DepTarget("brkitr_brk"), DepTarget("dictionaries")]) 93 94 requests += generate_tree(config, io, common_vars, 95 "rbnf", 96 "rbnf", 97 # Never use pool bundle for coll, brkitr, or rbnf 98 False, 99 []) 100 101 requests += [ 102 ListRequest( 103 name = "icudata_list", 104 variable_name = "icudata_all_output_files", 105 output_file = TmpFile("icudata.lst"), 106 include_tmp = False 107 ) 108 ] 109 110 return requests 111 112 113def generate_cnvalias(config, io, common_vars): 114 # UConv Name Aliases 115 input_file = InFile("mappings/convrtrs.txt") 116 output_file = OutFile("cnvalias.icu") 117 return [ 118 SingleExecutionRequest( 119 name = "cnvalias", 120 category = "cnvalias", 121 dep_targets = [], 122 input_files = [input_file], 123 output_files = [output_file], 124 tool = IcuTool("gencnval"), 125 args = "-s {IN_DIR} -d {OUT_DIR} " 126 "{INPUT_FILES[0]}", 127 format_with = {} 128 ) 129 ] 130 131 132def generate_confusables(config, io, common_vars): 133 # CONFUSABLES 134 txt1 = InFile("unidata/confusables.txt") 135 txt2 = InFile("unidata/confusablesWholeScript.txt") 136 cfu = OutFile("confusables.cfu") 137 return [ 138 SingleExecutionRequest( 139 name = "confusables", 140 category = "confusables", 141 dep_targets = [DepTarget("cnvalias")], 142 input_files = [txt1, txt2], 143 output_files = [cfu], 144 tool = IcuTool("gencfu"), 145 args = "-d {OUT_DIR} -i {OUT_DIR} " 146 "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} " 147 "-o {OUTPUT_FILES[0]}", 148 format_with = {} 149 ) 150 ] 151 152 153def generate_conversion_mappings(config, io, common_vars): 154 # UConv Conversion Table Files 155 input_files = [InFile(filename) for filename in io.glob("mappings/*.ucm")] 156 output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files] 157 # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv 158 return [ 159 RepeatedOrSingleExecutionRequest( 160 name = "conversion_mappings", 161 category = "conversion_mappings", 162 dep_targets = [], 163 input_files = input_files, 164 output_files = output_files, 165 tool = IcuTool("makeconv"), 166 # BEGIN android-changed 167 # args = "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}", 168 args = "-s {IN_DIR} -d {OUT_DIR} -c --small {INPUT_FILE_PLACEHOLDER}", 169 # END android-changed 170 format_with = {}, 171 repeat_with = { 172 "INPUT_FILE_PLACEHOLDER": utils.SpaceSeparatedList(file.filename for file in input_files) 173 } 174 ) 175 ] 176 177 178def generate_brkitr_brk(config, io, common_vars): 179 # BRK Files 180 input_files = [InFile(filename) for filename in io.glob("brkitr/rules/*.txt")] 181 output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files] 182 return [ 183 RepeatedExecutionRequest( 184 name = "brkitr_brk", 185 category = "brkitr_rules", 186 dep_targets = [DepTarget("cnvalias"), DepTarget("ulayout")], 187 input_files = input_files, 188 output_files = output_files, 189 tool = IcuTool("genbrk"), 190 args = "-d {OUT_DIR} -i {OUT_DIR} " 191 "-c -r {IN_DIR}/{INPUT_FILE} " 192 "-o {OUTPUT_FILE}", 193 format_with = {}, 194 repeat_with = {} 195 ) 196 ] 197 198 199def generate_stringprep(config, io, common_vars): 200 # SPP FILES 201 input_files = [InFile(filename) for filename in io.glob("sprep/*.txt")] 202 output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files] 203 bundle_names = [v.filename[6:-4] for v in input_files] 204 return [ 205 RepeatedExecutionRequest( 206 name = "stringprep", 207 category = "stringprep", 208 dep_targets = [InFile("unidata/NormalizationCorrections.txt")], 209 input_files = input_files, 210 output_files = output_files, 211 tool = IcuTool("gensprep"), 212 args = "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} " 213 "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt", 214 format_with = {}, 215 repeat_with = { 216 "BUNDLE_NAME": bundle_names 217 } 218 ) 219 ] 220 221 222def generate_brkitr_dictionaries(config, io, common_vars): 223 # Dict Files 224 input_files = [InFile(filename) for filename in io.glob("brkitr/dictionaries/*.txt")] 225 output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files] 226 extra_options_map = { 227 "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000", 228 "brkitr/dictionaries/cjdict.txt": "--uchars", 229 "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780", 230 "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80", 231 "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00" 232 } 233 extra_optionses = [extra_options_map[v.filename] for v in input_files] 234 return [ 235 RepeatedExecutionRequest( 236 name = "dictionaries", 237 category = "brkitr_dictionaries", 238 dep_targets = [], 239 input_files = input_files, 240 output_files = output_files, 241 tool = IcuTool("gendict"), 242 args = "-i {OUT_DIR} " 243 "-c {EXTRA_OPTIONS} " 244 "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}", 245 format_with = {}, 246 repeat_with = { 247 "EXTRA_OPTIONS": extra_optionses 248 } 249 ) 250 ] 251 252 253def generate_normalization(config, io, common_vars): 254 # NRM Files 255 input_files = [InFile(filename) for filename in io.glob("in/*.nrm")] 256 # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data 257 input_files.remove(InFile("in/nfc.nrm")) 258 output_files = [OutFile(v.filename[3:]) for v in input_files] 259 return [ 260 RepeatedExecutionRequest( 261 name = "normalization", 262 category = "normalization", 263 dep_targets = [], 264 input_files = input_files, 265 output_files = output_files, 266 tool = IcuTool("icupkg"), 267 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}", 268 format_with = {}, 269 repeat_with = {} 270 ) 271 ] 272 273 274def generate_coll_ucadata(config, io, common_vars): 275 # Collation Dependency File (ucadata.icu) 276 input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type) 277 output_file = OutFile("coll/ucadata.icu") 278 return [ 279 SingleExecutionRequest( 280 name = "coll_ucadata", 281 category = "coll_ucadata", 282 dep_targets = [], 283 input_files = [input_file], 284 output_files = [output_file], 285 tool = IcuTool("icupkg"), 286 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", 287 format_with = {} 288 ) 289 ] 290 291 292def generate_full_unicore_data(config, io, common_vars): 293 # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu) 294 # are hardcoded in the common DLL and therefore not included in the data package any more. 295 # They are not built by default but need to be built for ICU4J data, 296 # both in the .jar and in the .dat file (if ICU4J uses the .dat file). 297 # See ICU-4497. 298 if not config.include_uni_core_data: 299 return [] 300 301 basenames = [ 302 "pnames.icu", 303 "uprops.icu", 304 "ucase.icu", 305 "ubidi.icu", 306 "nfc.nrm" 307 ] 308 input_files = [InFile("in/%s" % bn) for bn in basenames] 309 output_files = [OutFile(bn) for bn in basenames] 310 return [ 311 RepeatedExecutionRequest( 312 name = "unicore", 313 category = "unicore", 314 input_files = input_files, 315 output_files = output_files, 316 tool = IcuTool("icupkg"), 317 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}" 318 ) 319 ] 320 321 322def generate_unames(config, io, common_vars): 323 # Unicode Character Names 324 input_file = InFile("in/unames.icu") 325 output_file = OutFile("unames.icu") 326 return [ 327 SingleExecutionRequest( 328 name = "unames", 329 category = "unames", 330 dep_targets = [], 331 input_files = [input_file], 332 output_files = [output_file], 333 tool = IcuTool("icupkg"), 334 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", 335 format_with = {} 336 ) 337 ] 338 339 340def generate_ulayout(config, io, common_vars): 341 # Unicode text layout properties 342 basename = "ulayout" 343 input_file = InFile("in/%s.icu" % basename) 344 output_file = OutFile("%s.icu" % basename) 345 return [ 346 SingleExecutionRequest( 347 name = basename, 348 category = basename, 349 dep_targets = [], 350 input_files = [input_file], 351 output_files = [output_file], 352 tool = IcuTool("icupkg"), 353 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", 354 format_with = {} 355 ) 356 ] 357 358 359def generate_misc(config, io, common_vars): 360 # Misc Data Res Files 361 input_files = [InFile(filename) for filename in io.glob("misc/*.txt")] 362 input_basenames = [v.filename[5:] for v in input_files] 363 output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames] 364 return [ 365 RepeatedExecutionRequest( 366 name = "misc_res", 367 category = "misc", 368 dep_targets = [DepTarget("cnvalias")], # ICU-21175 369 input_files = input_files, 370 output_files = output_files, 371 tool = IcuTool("genrb"), 372 args = "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} " 373 "-k -q " 374 "{INPUT_BASENAME}", 375 format_with = {}, 376 repeat_with = { 377 "INPUT_BASENAME": input_basenames 378 } 379 ) 380 ] 381 382 383def generate_curr_supplemental(config, io, common_vars): 384 # Currency Supplemental Res File 385 input_file = InFile("curr/supplementalData.txt") 386 input_basename = "supplementalData.txt" 387 output_file = OutFile("curr/supplementalData.res") 388 return [ 389 SingleExecutionRequest( 390 name = "curr_supplemental_res", 391 category = "curr_supplemental", 392 dep_targets = [], 393 input_files = [input_file], 394 output_files = [output_file], 395 tool = IcuTool("genrb"), 396 args = "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} " 397 "-k " 398 "{INPUT_BASENAME}", 399 format_with = { 400 "INPUT_BASENAME": input_basename 401 } 402 ) 403 ] 404 405 406def generate_zone_supplemental(config, io, common_vars): 407 # tzdbNames Res File 408 input_file = InFile("zone/tzdbNames.txt") 409 input_basename = "tzdbNames.txt" 410 output_file = OutFile("zone/tzdbNames.res") 411 return [ 412 SingleExecutionRequest( 413 name = "zone_supplemental_res", 414 category = "zone_supplemental", 415 dep_targets = [], 416 input_files = [input_file], 417 output_files = [output_file], 418 tool = IcuTool("genrb"), 419 args = "-s {IN_DIR}/zone -d {OUT_DIR}/zone -i {OUT_DIR} " 420 "-k " 421 "{INPUT_BASENAME}", 422 format_with = { 423 "INPUT_BASENAME": input_basename 424 } 425 ) 426 ] 427 428 429def generate_translit(config, io, common_vars): 430 input_files = [ 431 InFile("translit/root.txt"), 432 InFile("translit/en.txt"), 433 InFile("translit/el.txt") 434 ] 435 dep_files = set(InFile(filename) for filename in io.glob("translit/*.txt")) 436 dep_files -= set(input_files) 437 dep_files = list(sorted(dep_files)) 438 input_basenames = [v.filename[9:] for v in input_files] 439 output_files = [ 440 OutFile("translit/%s.res" % v[:-4]) 441 for v in input_basenames 442 ] 443 return [ 444 RepeatedOrSingleExecutionRequest( 445 name = "translit_res", 446 category = "translit", 447 dep_targets = dep_files, 448 input_files = input_files, 449 output_files = output_files, 450 tool = IcuTool("genrb"), 451 args = "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} " 452 "-k " 453 "{INPUT_BASENAME}", 454 format_with = { 455 }, 456 repeat_with = { 457 "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames) 458 } 459 ) 460 ] 461 462 463def generate_tree( 464 config, 465 io, 466 common_vars, 467 sub_dir, 468 out_sub_dir, 469 use_pool_bundle, 470 dep_targets): 471 requests = [] 472 category = "%s_tree" % sub_dir 473 out_prefix = "%s/" % out_sub_dir if out_sub_dir else "" 474 input_files = [InFile(filename) for filename in io.glob("%s/*.txt" % sub_dir)] 475 if sub_dir == "curr": 476 input_files.remove(InFile("curr/supplementalData.txt")) 477 if sub_dir == "zone": 478 input_files.remove(InFile("zone/tzdbNames.txt")) 479 input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files] 480 output_files = [ 481 OutFile("%s%s.res" % (out_prefix, v[:-4])) 482 for v in input_basenames 483 ] 484 485 # Generate Pool Bundle 486 if use_pool_bundle: 487 input_pool_files = [OutFile("%spool.res" % out_prefix)] 488 pool_target_name = "%s_pool_write" % sub_dir 489 use_pool_bundle_option = "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format( 490 OUT_PREFIX = out_prefix, 491 **common_vars 492 ) 493 requests += [ 494 SingleExecutionRequest( 495 name = pool_target_name, 496 category = category, 497 dep_targets = dep_targets, 498 input_files = input_files, 499 output_files = input_pool_files, 500 tool = IcuTool("genrb"), 501 args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " 502 "--writePoolBundle -k " 503 "{INPUT_BASENAMES_SPACED}", 504 format_with = { 505 "IN_SUB_DIR": sub_dir, 506 "OUT_PREFIX": out_prefix, 507 "INPUT_BASENAMES_SPACED": utils.SpaceSeparatedList(input_basenames) 508 } 509 ), 510 ] 511 dep_targets = dep_targets + [DepTarget(pool_target_name)] 512 else: 513 use_pool_bundle_option = "" 514 515 # Generate Res File Tree 516 requests += [ 517 RepeatedOrSingleExecutionRequest( 518 name = "%s_res" % sub_dir, 519 category = category, 520 dep_targets = dep_targets, 521 input_files = input_files, 522 output_files = output_files, 523 tool = IcuTool("genrb"), 524 # BEGIN android-changed 525 args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " + 526 ("--omitCollationRules " if sub_dir == "coll" else "") + 527 "{EXTRA_OPTION} -k " 528 "{INPUT_BASENAME}", 529 # END android-changed 530 format_with = { 531 "IN_SUB_DIR": sub_dir, 532 "OUT_PREFIX": out_prefix, 533 "EXTRA_OPTION": use_pool_bundle_option 534 }, 535 repeat_with = { 536 "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames) 537 } 538 ) 539 ] 540 541 # Generate res_index file 542 # Exclude the deprecated locale variants and root; see ICU-20628. This 543 # could be data-driven, but we do not want to perform I/O in this script 544 # (for example, we do not want to read from an XML file). 545 excluded_locales = set([ 546 "ja_JP_TRADITIONAL", 547 "th_TH_TRADITIONAL", 548 "de_", 549 "de__PHONEBOOK", 550 "es_", 551 "es__TRADITIONAL", 552 "root", 553 ]) 554 # Put alias locales in a separate structure; see ICU-20627 555 dependency_data = io.read_locale_deps(sub_dir) 556 if "aliases" in dependency_data: 557 alias_locales = set(dependency_data["aliases"].keys()) 558 else: 559 alias_locales = set() 560 alias_files = [] 561 installed_files = [] 562 for f in input_files: 563 file_stem = IndexRequest.locale_file_stem(f) 564 if file_stem in excluded_locales: 565 continue 566 destination = alias_files if file_stem in alias_locales else installed_files 567 destination.append(f) 568 cldr_version = dependency_data["cldrVersion"] if sub_dir == "locales" else None 569 index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format( 570 IN_SUB_DIR = sub_dir, 571 **common_vars 572 )) 573 index_res_file = OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format( 574 OUT_PREFIX = out_prefix, 575 **common_vars 576 )) 577 index_file_target_name = "%s_index_txt" % sub_dir 578 requests += [ 579 IndexRequest( 580 name = index_file_target_name, 581 category = category, 582 installed_files = installed_files, 583 alias_files = alias_files, 584 txt_file = index_file_txt, 585 output_file = index_res_file, 586 cldr_version = cldr_version, 587 args = "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " 588 "-k " 589 "{INDEX_NAME}.txt", 590 format_with = { 591 "IN_SUB_DIR": sub_dir, 592 "OUT_PREFIX": out_prefix 593 } 594 ) 595 ] 596 597 return requests 598