1# Copyright (C) 2018 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html 3 4# Python 2/3 Compatibility (ICU-20299) 5# TODO(ICU-20301): Remove this. 6from __future__ import print_function 7 8from icutools.databuilder import * 9from icutools.databuilder import utils 10from icutools.databuilder.request_types import * 11 12import os 13import sys 14 15 16def generate(config, io, common_vars): 17 requests = [] 18 19 if len(io.glob("misc/*")) == 0: 20 print("Error: Cannot find data directory; please specify --src_dir", file=sys.stderr) 21 exit(1) 22 23 requests += generate_cnvalias(config, io, common_vars) 24 requests += generate_ulayout(config, io, common_vars) 25 requests += generate_confusables(config, io, common_vars) 26 requests += generate_conversion_mappings(config, io, common_vars) 27 requests += generate_brkitr_brk(config, io, common_vars) 28 requests += generate_stringprep(config, io, common_vars) 29 requests += generate_brkitr_dictionaries(config, io, common_vars) 30 requests += generate_normalization(config, io, common_vars) 31 requests += generate_coll_ucadata(config, io, common_vars) 32 requests += generate_full_unicore_data(config, io, common_vars) 33 requests += generate_unames(config, io, common_vars) 34 requests += generate_misc(config, io, common_vars) 35 requests += generate_curr_supplemental(config, io, common_vars) 36 requests += generate_zone_supplemental(config, io, common_vars) 37 requests += generate_translit(config, io, common_vars) 38 39 # Res Tree Files 40 # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files) 41 requests += generate_tree(config, io, common_vars, 42 "locales", 43 None, 44 config.use_pool_bundle, 45 []) 46 47 requests += generate_tree(config, io, common_vars, 48 "curr", 49 "curr", 50 config.use_pool_bundle, 51 []) 52 53 requests += generate_tree(config, io, common_vars, 54 "lang", 55 "lang", 56 config.use_pool_bundle, 57 []) 58 59 requests += generate_tree(config, io, common_vars, 60 "region", 61 "region", 62 config.use_pool_bundle, 63 []) 64 65 requests += generate_tree(config, io, common_vars, 66 "zone", 67 "zone", 68 config.use_pool_bundle, 69 []) 70 71 requests += generate_tree(config, io, common_vars, 72 "unit", 73 "unit", 74 config.use_pool_bundle, 75 []) 76 77 requests += generate_tree(config, io, common_vars, 78 "coll", 79 "coll", 80 # Never use pool bundle for coll, brkitr, or rbnf 81 False, 82 # Depends on timezoneTypes.res and keyTypeData.res. 83 # TODO: We should not need this dependency to build collation. 84 # TODO: Bake keyTypeData.res into the common library? 85 [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")]) 86 87 requests += generate_tree(config, io, common_vars, 88 "brkitr", 89 "brkitr", 90 # Never use pool bundle for coll, brkitr, or rbnf 91 False, 92 [DepTarget("brkitr_brk"), DepTarget("dictionaries")]) 93 94 requests += generate_tree(config, io, common_vars, 95 "rbnf", 96 "rbnf", 97 # Never use pool bundle for coll, brkitr, or rbnf 98 False, 99 []) 100 101 requests += [ 102 ListRequest( 103 name = "icudata_list", 104 variable_name = "icudata_all_output_files", 105 output_file = TmpFile("icudata.lst"), 106 include_tmp = False 107 ) 108 ] 109 110 return requests 111 112 113def generate_cnvalias(config, io, common_vars): 114 # UConv Name Aliases 115 input_file = InFile("mappings/convrtrs.txt") 116 output_file = OutFile("cnvalias.icu") 117 return [ 118 SingleExecutionRequest( 119 name = "cnvalias", 120 category = "cnvalias", 121 dep_targets = [], 122 input_files = [input_file], 123 output_files = [output_file], 124 tool = IcuTool("gencnval"), 125 args = "-s {IN_DIR} -d {OUT_DIR} " 126 "{INPUT_FILES[0]}", 127 format_with = {} 128 ) 129 ] 130 131 132def generate_confusables(config, io, common_vars): 133 # CONFUSABLES 134 txt1 = InFile("unidata/confusables.txt") 135 txt2 = InFile("unidata/confusablesWholeScript.txt") 136 cfu = OutFile("confusables.cfu") 137 return [ 138 SingleExecutionRequest( 139 name = "confusables", 140 category = "confusables", 141 dep_targets = [DepTarget("cnvalias")], 142 input_files = [txt1, txt2], 143 output_files = [cfu], 144 tool = IcuTool("gencfu"), 145 args = "-d {OUT_DIR} -i {OUT_DIR} " 146 "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} " 147 "-o {OUTPUT_FILES[0]}", 148 format_with = {} 149 ) 150 ] 151 152 153def generate_conversion_mappings(config, io, common_vars): 154 # UConv Conversion Table Files 155 input_files = [InFile(filename) for filename in io.glob("mappings/*.ucm")] 156 output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files] 157 # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv 158 return [ 159 RepeatedOrSingleExecutionRequest( 160 name = "conversion_mappings", 161 category = "conversion_mappings", 162 dep_targets = [], 163 input_files = input_files, 164 output_files = output_files, 165 tool = IcuTool("makeconv"), 166 args = "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}", 167 format_with = {}, 168 repeat_with = { 169 "INPUT_FILE_PLACEHOLDER": utils.SpaceSeparatedList(file.filename for file in input_files) 170 } 171 ) 172 ] 173 174 175def generate_brkitr_brk(config, io, common_vars): 176 # BRK Files 177 input_files = [InFile(filename) for filename in io.glob("brkitr/rules/*.txt")] 178 output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files] 179 return [ 180 RepeatedExecutionRequest( 181 name = "brkitr_brk", 182 category = "brkitr_rules", 183 dep_targets = [DepTarget("cnvalias"), DepTarget("ulayout")], 184 input_files = input_files, 185 output_files = output_files, 186 tool = IcuTool("genbrk"), 187 args = "-d {OUT_DIR} -i {OUT_DIR} " 188 "-c -r {IN_DIR}/{INPUT_FILE} " 189 "-o {OUTPUT_FILE}", 190 format_with = {}, 191 repeat_with = {} 192 ) 193 ] 194 195 196def generate_stringprep(config, io, common_vars): 197 # SPP FILES 198 input_files = [InFile(filename) for filename in io.glob("sprep/*.txt")] 199 output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files] 200 bundle_names = [v.filename[6:-4] for v in input_files] 201 return [ 202 RepeatedExecutionRequest( 203 name = "stringprep", 204 category = "stringprep", 205 dep_targets = [InFile("unidata/NormalizationCorrections.txt")], 206 input_files = input_files, 207 output_files = output_files, 208 tool = IcuTool("gensprep"), 209 args = "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} " 210 "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt", 211 format_with = {}, 212 repeat_with = { 213 "BUNDLE_NAME": bundle_names 214 } 215 ) 216 ] 217 218 219def generate_brkitr_dictionaries(config, io, common_vars): 220 # Dict Files 221 input_files = [InFile(filename) for filename in io.glob("brkitr/dictionaries/*.txt")] 222 output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files] 223 extra_options_map = { 224 "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000", 225 "brkitr/dictionaries/cjdict.txt": "--uchars", 226 "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780", 227 "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80", 228 "brkitr/dictionaries/thaidict.txt": "--bytes" + 229 " --transform offset-0x0e00", 230 "brkitr/dictionaries/zawgyidict.txt": "--bytes" + 231 " --transform offset-0x1000" 232 } 233 extra_optionses = [extra_options_map[v.filename] for v in input_files] 234 return [ 235 RepeatedExecutionRequest( 236 name = "dictionaries", 237 category = "brkitr_dictionaries", 238 dep_targets = [], 239 input_files = input_files, 240 output_files = output_files, 241 tool = IcuTool("gendict"), 242 args = "-i {OUT_DIR} " 243 "-c {EXTRA_OPTIONS} " 244 "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}", 245 format_with = {}, 246 repeat_with = { 247 "EXTRA_OPTIONS": extra_optionses 248 } 249 ) 250 ] 251 252 253def generate_normalization(config, io, common_vars): 254 # NRM Files 255 input_files = [InFile(filename) for filename in io.glob("in/*.nrm")] 256 # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data 257 input_files.remove(InFile("in/nfc.nrm")) 258 output_files = [OutFile(v.filename[3:]) for v in input_files] 259 return [ 260 RepeatedExecutionRequest( 261 name = "normalization", 262 category = "normalization", 263 dep_targets = [], 264 input_files = input_files, 265 output_files = output_files, 266 tool = IcuTool("icupkg"), 267 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}", 268 format_with = {}, 269 repeat_with = {} 270 ) 271 ] 272 273 274def generate_coll_ucadata(config, io, common_vars): 275 # Collation Dependency File (ucadata.icu) 276 input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type) 277 output_file = OutFile("coll/ucadata.icu") 278 return [ 279 SingleExecutionRequest( 280 name = "coll_ucadata", 281 category = "coll_ucadata", 282 dep_targets = [], 283 input_files = [input_file], 284 output_files = [output_file], 285 tool = IcuTool("icupkg"), 286 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", 287 format_with = {} 288 ) 289 ] 290 291 292def generate_full_unicore_data(config, io, common_vars): 293 # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu) 294 # are hardcoded in the common DLL and therefore not included in the data package any more. 295 # They are not built by default but need to be built for ICU4J data, 296 # both in the .jar and in the .dat file (if ICU4J uses the .dat file). 297 # See ICU-4497. 298 if not config.include_uni_core_data: 299 return [] 300 301 basenames = [ 302 "pnames.icu", 303 "uprops.icu", 304 "ucase.icu", 305 "ubidi.icu", 306 "nfc.nrm" 307 ] 308 input_files = [InFile("in/%s" % bn) for bn in basenames] 309 output_files = [OutFile(bn) for bn in basenames] 310 return [ 311 RepeatedExecutionRequest( 312 name = "unicore", 313 category = "unicore", 314 input_files = input_files, 315 output_files = output_files, 316 tool = IcuTool("icupkg"), 317 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}" 318 ) 319 ] 320 321 322def generate_unames(config, io, common_vars): 323 # Unicode Character Names 324 input_file = InFile("in/unames.icu") 325 output_file = OutFile("unames.icu") 326 return [ 327 SingleExecutionRequest( 328 name = "unames", 329 category = "unames", 330 dep_targets = [], 331 input_files = [input_file], 332 output_files = [output_file], 333 tool = IcuTool("icupkg"), 334 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", 335 format_with = {} 336 ) 337 ] 338 339 340def generate_ulayout(config, io, common_vars): 341 # Unicode text layout properties 342 basename = "ulayout" 343 input_file = InFile("in/%s.icu" % basename) 344 output_file = OutFile("%s.icu" % basename) 345 return [ 346 SingleExecutionRequest( 347 name = basename, 348 category = basename, 349 dep_targets = [], 350 input_files = [input_file], 351 output_files = [output_file], 352 tool = IcuTool("icupkg"), 353 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", 354 format_with = {} 355 ) 356 ] 357 358 359def generate_misc(config, io, common_vars): 360 # Misc Data Res Files 361 input_files = [InFile(filename) for filename in io.glob("misc/*.txt")] 362 input_basenames = [v.filename[5:] for v in input_files] 363 output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames] 364 return [ 365 RepeatedExecutionRequest( 366 name = "misc_res", 367 category = "misc", 368 dep_targets = [], 369 input_files = input_files, 370 output_files = output_files, 371 tool = IcuTool("genrb"), 372 args = "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} " 373 "-k -q " 374 "{INPUT_BASENAME}", 375 format_with = {}, 376 repeat_with = { 377 "INPUT_BASENAME": input_basenames 378 } 379 ) 380 ] 381 382 383def generate_curr_supplemental(config, io, common_vars): 384 # Currency Supplemental Res File 385 input_file = InFile("curr/supplementalData.txt") 386 input_basename = "supplementalData.txt" 387 output_file = OutFile("curr/supplementalData.res") 388 return [ 389 SingleExecutionRequest( 390 name = "curr_supplemental_res", 391 category = "curr_supplemental", 392 dep_targets = [], 393 input_files = [input_file], 394 output_files = [output_file], 395 tool = IcuTool("genrb"), 396 args = "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} " 397 "-k " 398 "{INPUT_BASENAME}", 399 format_with = { 400 "INPUT_BASENAME": input_basename 401 } 402 ) 403 ] 404 405 406def generate_zone_supplemental(config, io, common_vars): 407 # tzdbNames Res File 408 input_file = InFile("zone/tzdbNames.txt") 409 input_basename = "tzdbNames.txt" 410 output_file = OutFile("zone/tzdbNames.res") 411 return [ 412 SingleExecutionRequest( 413 name = "zone_supplemental_res", 414 category = "zone_supplemental", 415 dep_targets = [], 416 input_files = [input_file], 417 output_files = [output_file], 418 tool = IcuTool("genrb"), 419 args = "-s {IN_DIR}/zone -d {OUT_DIR}/zone -i {OUT_DIR} " 420 "-k " 421 "{INPUT_BASENAME}", 422 format_with = { 423 "INPUT_BASENAME": input_basename 424 } 425 ) 426 ] 427 428 429def generate_translit(config, io, common_vars): 430 input_files = [ 431 InFile("translit/root.txt"), 432 InFile("translit/en.txt"), 433 InFile("translit/el.txt") 434 ] 435 dep_files = set(InFile(filename) for filename in io.glob("translit/*.txt")) 436 dep_files -= set(input_files) 437 dep_files = list(sorted(dep_files)) 438 input_basenames = [v.filename[9:] for v in input_files] 439 output_files = [ 440 OutFile("translit/%s.res" % v[:-4]) 441 for v in input_basenames 442 ] 443 return [ 444 RepeatedOrSingleExecutionRequest( 445 name = "translit_res", 446 category = "translit", 447 dep_targets = dep_files, 448 input_files = input_files, 449 output_files = output_files, 450 tool = IcuTool("genrb"), 451 args = "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} " 452 "-k " 453 "{INPUT_BASENAME}", 454 format_with = { 455 }, 456 repeat_with = { 457 "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames) 458 } 459 ) 460 ] 461 462 463def generate_tree( 464 config, 465 io, 466 common_vars, 467 sub_dir, 468 out_sub_dir, 469 use_pool_bundle, 470 dep_targets): 471 requests = [] 472 category = "%s_tree" % sub_dir 473 out_prefix = "%s/" % out_sub_dir if out_sub_dir else "" 474 input_files = [InFile(filename) for filename in io.glob("%s/*.txt" % sub_dir)] 475 if sub_dir == "curr": 476 input_files.remove(InFile("curr/supplementalData.txt")) 477 if sub_dir == "zone": 478 input_files.remove(InFile("zone/tzdbNames.txt")) 479 input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files] 480 output_files = [ 481 OutFile("%s%s.res" % (out_prefix, v[:-4])) 482 for v in input_basenames 483 ] 484 485 # Generate Pool Bundle 486 if use_pool_bundle: 487 input_pool_files = [OutFile("%spool.res" % out_prefix)] 488 pool_target_name = "%s_pool_write" % sub_dir 489 use_pool_bundle_option = "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format( 490 OUT_PREFIX = out_prefix, 491 **common_vars 492 ) 493 requests += [ 494 SingleExecutionRequest( 495 name = pool_target_name, 496 category = category, 497 dep_targets = dep_targets, 498 input_files = input_files, 499 output_files = input_pool_files, 500 tool = IcuTool("genrb"), 501 args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " 502 "--writePoolBundle -k " 503 "{INPUT_BASENAMES_SPACED}", 504 format_with = { 505 "IN_SUB_DIR": sub_dir, 506 "OUT_PREFIX": out_prefix, 507 "INPUT_BASENAMES_SPACED": utils.SpaceSeparatedList(input_basenames) 508 } 509 ), 510 ] 511 dep_targets = dep_targets + [DepTarget(pool_target_name)] 512 else: 513 use_pool_bundle_option = "" 514 515 # Generate Res File Tree 516 requests += [ 517 RepeatedOrSingleExecutionRequest( 518 name = "%s_res" % sub_dir, 519 category = category, 520 dep_targets = dep_targets, 521 input_files = input_files, 522 output_files = output_files, 523 tool = IcuTool("genrb"), 524 args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " 525 "{EXTRA_OPTION} -k " 526 "{INPUT_BASENAME}", 527 format_with = { 528 "IN_SUB_DIR": sub_dir, 529 "OUT_PREFIX": out_prefix, 530 "EXTRA_OPTION": use_pool_bundle_option 531 }, 532 repeat_with = { 533 "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames) 534 } 535 ) 536 ] 537 538 # Generate res_index file 539 # Exclude the deprecated locale variants and root; see ICU-20628. This 540 # could be data-driven, but we do not want to perform I/O in this script 541 # (for example, we do not want to read from an XML file). 542 excluded_locales = set([ 543 "ja_JP_TRADITIONAL", 544 "th_TH_TRADITIONAL", 545 "de_", 546 "de__PHONEBOOK", 547 "es_", 548 "es__TRADITIONAL", 549 "root", 550 ]) 551 # Put alias locales in a separate structure; see ICU-20627 552 dependency_data = io.read_locale_deps(sub_dir) 553 if "aliases" in dependency_data: 554 alias_locales = set(dependency_data["aliases"].keys()) 555 else: 556 alias_locales = set() 557 alias_files = [] 558 installed_files = [] 559 for f in input_files: 560 file_stem = IndexRequest.locale_file_stem(f) 561 if file_stem in excluded_locales: 562 continue 563 destination = alias_files if file_stem in alias_locales else installed_files 564 destination.append(f) 565 cldr_version = dependency_data["cldrVersion"] if sub_dir == "locales" else None 566 index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format( 567 IN_SUB_DIR = sub_dir, 568 **common_vars 569 )) 570 index_res_file = OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format( 571 OUT_PREFIX = out_prefix, 572 **common_vars 573 )) 574 index_file_target_name = "%s_index_txt" % sub_dir 575 requests += [ 576 IndexRequest( 577 name = index_file_target_name, 578 category = category, 579 installed_files = installed_files, 580 alias_files = alias_files, 581 txt_file = index_file_txt, 582 output_file = index_res_file, 583 cldr_version = cldr_version, 584 args = "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " 585 "-k " 586 "{INDEX_NAME}.txt", 587 format_with = { 588 "IN_SUB_DIR": sub_dir, 589 "OUT_PREFIX": out_prefix 590 } 591 ) 592 ] 593 594 return requests 595