• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2018 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3
4# Python 2/3 Compatibility (ICU-20299)
5# TODO(ICU-20301): Remove this.
6from __future__ import print_function
7
8from icutools.databuilder import *
9from icutools.databuilder import utils
10from icutools.databuilder.request_types import *
11
12import os
13import sys
14
15
16def generate(config, io, common_vars):
17    requests = []
18
19    if len(io.glob("misc/*")) == 0:
20        print("Error: Cannot find data directory; please specify --src_dir", file=sys.stderr)
21        exit(1)
22
23    requests += generate_cnvalias(config, io, common_vars)
24    requests += generate_ulayout(config, io, common_vars)
25    requests += generate_uemoji(config, io, common_vars)
26    requests += generate_confusables(config, io, common_vars)
27    requests += generate_conversion_mappings(config, io, common_vars)
28    requests += generate_brkitr_brk(config, io, common_vars)
29    requests += generate_brkitr_lstm(config, io, common_vars)
30    requests += generate_stringprep(config, io, common_vars)
31    requests += generate_brkitr_dictionaries(config, io, common_vars)
32    requests += generate_normalization(config, io, common_vars)
33    requests += generate_coll_ucadata(config, io, common_vars)
34    requests += generate_full_unicore_data(config, io, common_vars)
35    requests += generate_unames(config, io, common_vars)
36    requests += generate_misc(config, io, common_vars)
37    requests += generate_curr_supplemental(config, io, common_vars)
38    requests += generate_zone_supplemental(config, io, common_vars)
39    requests += generate_translit(config, io, common_vars)
40
41    # Res Tree Files
42    # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files)
43    requests += generate_tree(config, io, common_vars,
44        "locales",
45        None,
46        config.use_pool_bundle,
47        [])
48
49    requests += generate_tree(config, io, common_vars,
50        "curr",
51        "curr",
52        config.use_pool_bundle,
53        [])
54
55    requests += generate_tree(config, io, common_vars,
56        "lang",
57        "lang",
58        config.use_pool_bundle,
59        [])
60
61    requests += generate_tree(config, io, common_vars,
62        "region",
63        "region",
64        config.use_pool_bundle,
65        [])
66
67    requests += generate_tree(config, io, common_vars,
68        "zone",
69        "zone",
70        config.use_pool_bundle,
71        [])
72
73    requests += generate_tree(config, io, common_vars,
74        "unit",
75        "unit",
76        config.use_pool_bundle,
77        [])
78
79    requests += generate_tree(config, io, common_vars,
80        "coll",
81        "coll",
82        # Never use pool bundle for coll, brkitr, or rbnf
83        False,
84        # Depends on timezoneTypes.res and keyTypeData.res.
85        # TODO: We should not need this dependency to build collation.
86        # TODO: Bake keyTypeData.res into the common library?
87        [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")])
88
89    requests += generate_tree(config, io, common_vars,
90        "brkitr",
91        "brkitr",
92        # Never use pool bundle for coll, brkitr, or rbnf
93        False,
94        [DepTarget("brkitr_brk"), DepTarget("dictionaries")])
95
96    requests += generate_tree(config, io, common_vars,
97        "rbnf",
98        "rbnf",
99        # Never use pool bundle for coll, brkitr, or rbnf
100        False,
101        [])
102
103    requests += [
104        ListRequest(
105            name = "icudata_list",
106            variable_name = "icudata_all_output_files",
107            output_file = TmpFile("icudata.lst"),
108            include_tmp = False
109        )
110    ]
111
112    return requests
113
114
115def generate_cnvalias(config, io, common_vars):
116    # UConv Name Aliases
117    input_file = InFile("mappings/convrtrs.txt")
118    output_file = OutFile("cnvalias.icu")
119    return [
120        SingleExecutionRequest(
121            name = "cnvalias",
122            category = "cnvalias",
123            dep_targets = [],
124            input_files = [input_file],
125            output_files = [output_file],
126            tool = IcuTool("gencnval"),
127            args = "-s {IN_DIR} -d {OUT_DIR} "
128                "{INPUT_FILES[0]}",
129            format_with = {}
130        )
131    ]
132
133
134def generate_confusables(config, io, common_vars):
135    # CONFUSABLES
136    txt1 = InFile("unidata/confusables.txt")
137    txt2 = InFile("unidata/confusablesWholeScript.txt")
138    cfu = OutFile("confusables.cfu")
139    return [
140        SingleExecutionRequest(
141            name = "confusables",
142            category = "confusables",
143            dep_targets = [DepTarget("cnvalias")],
144            input_files = [txt1, txt2],
145            output_files = [cfu],
146            tool = IcuTool("gencfu"),
147            args = "-d {OUT_DIR} -i {OUT_DIR} "
148                "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} "
149                "-o {OUTPUT_FILES[0]}",
150            format_with = {}
151        )
152    ]
153
154
155def generate_conversion_mappings(config, io, common_vars):
156    # UConv Conversion Table Files
157    input_files = [InFile(filename) for filename in io.glob("mappings/*.ucm")]
158    output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files]
159    # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv
160    return [
161        RepeatedOrSingleExecutionRequest(
162            name = "conversion_mappings",
163            category = "conversion_mappings",
164            dep_targets = [],
165            input_files = input_files,
166            output_files = output_files,
167            tool = IcuTool("makeconv"),
168            # BEGIN android-changed
169            # args = "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}",
170            args = "-s {IN_DIR} -d {OUT_DIR} -c --small {INPUT_FILE_PLACEHOLDER}",
171            # END android-changed
172            format_with = {},
173            repeat_with = {
174                "INPUT_FILE_PLACEHOLDER": utils.SpaceSeparatedList(file.filename for file in input_files)
175            }
176        )
177    ]
178
179
180def generate_brkitr_brk(config, io, common_vars):
181    # BRK Files
182    input_files = [InFile(filename) for filename in io.glob("brkitr/rules/*.txt")]
183    output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files]
184    return [
185        RepeatedExecutionRequest(
186            name = "brkitr_brk",
187            category = "brkitr_rules",
188            dep_targets =
189                [DepTarget("cnvalias"),
190                    DepTarget("ulayout"), DepTarget("uemoji"), DepTarget("lstm_res")],
191            input_files = input_files,
192            output_files = output_files,
193            tool = IcuTool("genbrk"),
194            args = "-d {OUT_DIR} -i {OUT_DIR} "
195                "-c -r {IN_DIR}/{INPUT_FILE} "
196                "-o {OUTPUT_FILE}",
197            format_with = {},
198            repeat_with = {}
199        )
200    ]
201
202
203def generate_stringprep(config, io, common_vars):
204    # SPP FILES
205    input_files = [InFile(filename) for filename in io.glob("sprep/*.txt")]
206    output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files]
207    bundle_names = [v.filename[6:-4] for v in input_files]
208    return [
209        RepeatedExecutionRequest(
210            name = "stringprep",
211            category = "stringprep",
212            dep_targets = [InFile("unidata/NormalizationCorrections.txt")],
213            input_files = input_files,
214            output_files = output_files,
215            tool = IcuTool("gensprep"),
216            args = "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} "
217                "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt",
218            format_with = {},
219            repeat_with = {
220                "BUNDLE_NAME": bundle_names
221            }
222        )
223    ]
224
225
226def generate_brkitr_dictionaries(config, io, common_vars):
227    # Dict Files
228    input_files = [InFile(filename) for filename in io.glob("brkitr/dictionaries/*.txt")]
229    output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files]
230    extra_options_map = {
231        "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000",
232        "brkitr/dictionaries/cjdict.txt": "--uchars",
233        "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780",
234        "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80",
235        "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00"
236    }
237    extra_optionses = [extra_options_map[v.filename] for v in input_files]
238    return [
239        RepeatedExecutionRequest(
240            name = "dictionaries",
241            category = "brkitr_dictionaries",
242            dep_targets = [],
243            input_files = input_files,
244            output_files = output_files,
245            tool = IcuTool("gendict"),
246            args = "-i {OUT_DIR} "
247                "-c {EXTRA_OPTIONS} "
248                "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
249            format_with = {},
250            repeat_with = {
251                "EXTRA_OPTIONS": extra_optionses
252            }
253        )
254    ]
255
256
257def generate_normalization(config, io, common_vars):
258    # NRM Files
259    input_files = [InFile(filename) for filename in io.glob("in/*.nrm")]
260    # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data
261    input_files.remove(InFile("in/nfc.nrm"))
262    output_files = [OutFile(v.filename[3:]) for v in input_files]
263    return [
264        RepeatedExecutionRequest(
265            name = "normalization",
266            category = "normalization",
267            dep_targets = [],
268            input_files = input_files,
269            output_files = output_files,
270            tool = IcuTool("icupkg"),
271            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
272            format_with = {},
273            repeat_with = {}
274        )
275    ]
276
277
278def generate_coll_ucadata(config, io, common_vars):
279    # Collation Dependency File (ucadata.icu)
280    input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type)
281    output_file = OutFile("coll/ucadata.icu")
282    return [
283        SingleExecutionRequest(
284            name = "coll_ucadata",
285            category = "coll_ucadata",
286            dep_targets = [],
287            input_files = [input_file],
288            output_files = [output_file],
289            tool = IcuTool("icupkg"),
290            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
291            format_with = {}
292        )
293    ]
294
295
296def generate_full_unicore_data(config, io, common_vars):
297    # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu)
298    # are hardcoded in the common DLL and therefore not included in the data package any more.
299    # They are not built by default but need to be built for ICU4J data,
300    # both in the .jar and in the .dat file (if ICU4J uses the .dat file).
301    # See ICU-4497.
302    if not config.include_uni_core_data:
303        return []
304
305    basenames = [
306        "pnames.icu",
307        "uprops.icu",
308        "ucase.icu",
309        "ubidi.icu",
310        "nfc.nrm"
311    ]
312    input_files = [InFile("in/%s" % bn) for bn in basenames]
313    output_files = [OutFile(bn) for bn in basenames]
314    return [
315        RepeatedExecutionRequest(
316            name = "unicore",
317            category = "unicore",
318            input_files = input_files,
319            output_files = output_files,
320            tool = IcuTool("icupkg"),
321            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}"
322        )
323    ]
324
325
326def generate_unames(config, io, common_vars):
327    # Unicode Character Names
328    input_file = InFile("in/unames.icu")
329    output_file = OutFile("unames.icu")
330    return [
331        SingleExecutionRequest(
332            name = "unames",
333            category = "unames",
334            dep_targets = [],
335            input_files = [input_file],
336            output_files = [output_file],
337            tool = IcuTool("icupkg"),
338            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
339            format_with = {}
340        )
341    ]
342
343
344def generate_ulayout(config, io, common_vars):
345    # Unicode text layout properties
346    basename = "ulayout"
347    input_file = InFile("in/%s.icu" % basename)
348    output_file = OutFile("%s.icu" % basename)
349    return [
350        SingleExecutionRequest(
351            name = basename,
352            category = basename,
353            dep_targets = [],
354            input_files = [input_file],
355            output_files = [output_file],
356            tool = IcuTool("icupkg"),
357            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
358            format_with = {}
359        )
360    ]
361
362
363def generate_uemoji(config, io, common_vars):
364    # Unicode emoji properties
365    basename = "uemoji"
366    input_file = InFile("in/%s.icu" % basename)
367    output_file = OutFile("%s.icu" % basename)
368    return [
369        SingleExecutionRequest(
370            name = basename,
371            category = basename,
372            dep_targets = [],
373            input_files = [input_file],
374            output_files = [output_file],
375            tool = IcuTool("icupkg"),
376            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
377            format_with = {}
378        )
379    ]
380
381
382def generate_misc(config, io, common_vars):
383    # Misc Data Res Files
384    input_files = [InFile(filename) for filename in io.glob("misc/*.txt")]
385    input_basenames = [v.filename[5:] for v in input_files]
386    output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames]
387    return [
388        RepeatedExecutionRequest(
389            name = "misc_res",
390            category = "misc",
391            dep_targets = [DepTarget("cnvalias")], # ICU-21175
392            input_files = input_files,
393            output_files = output_files,
394            tool = IcuTool("genrb"),
395            args = "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} "
396                "-k -q "
397                "{INPUT_BASENAME}",
398            format_with = {},
399            repeat_with = {
400                "INPUT_BASENAME": input_basenames
401            }
402        )
403    ]
404
405
406def generate_curr_supplemental(config, io, common_vars):
407    # Currency Supplemental Res File
408    input_file = InFile("curr/supplementalData.txt")
409    input_basename = "supplementalData.txt"
410    output_file = OutFile("curr/supplementalData.res")
411    return [
412        SingleExecutionRequest(
413            name = "curr_supplemental_res",
414            category = "curr_supplemental",
415            dep_targets = [],
416            input_files = [input_file],
417            output_files = [output_file],
418            tool = IcuTool("genrb"),
419            args = "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} "
420                "-k "
421                "{INPUT_BASENAME}",
422            format_with = {
423                "INPUT_BASENAME": input_basename
424            }
425        )
426    ]
427
428
429def generate_zone_supplemental(config, io, common_vars):
430    # tzdbNames Res File
431    input_file = InFile("zone/tzdbNames.txt")
432    input_basename = "tzdbNames.txt"
433    output_file = OutFile("zone/tzdbNames.res")
434    return [
435        SingleExecutionRequest(
436            name = "zone_supplemental_res",
437            category = "zone_supplemental",
438            dep_targets = [],
439            input_files = [input_file],
440            output_files = [output_file],
441            tool = IcuTool("genrb"),
442            args = "-s {IN_DIR}/zone -d {OUT_DIR}/zone -i {OUT_DIR} "
443                "-k "
444                "{INPUT_BASENAME}",
445            format_with = {
446                "INPUT_BASENAME": input_basename
447            }
448        )
449    ]
450
451
452def generate_translit(config, io, common_vars):
453    input_files = [
454        InFile("translit/root.txt"),
455        InFile("translit/en.txt"),
456        InFile("translit/el.txt")
457    ]
458    dep_files = set(InFile(filename) for filename in io.glob("translit/*.txt"))
459    dep_files -= set(input_files)
460    dep_files = list(sorted(dep_files))
461    input_basenames = [v.filename[9:] for v in input_files]
462    output_files = [
463        OutFile("translit/%s.res" % v[:-4])
464        for v in input_basenames
465    ]
466    return [
467        RepeatedOrSingleExecutionRequest(
468            name = "translit_res",
469            category = "translit",
470            dep_targets = dep_files,
471            input_files = input_files,
472            output_files = output_files,
473            tool = IcuTool("genrb"),
474            args = "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} "
475                "-k "
476                "{INPUT_BASENAME}",
477            format_with = {
478            },
479            repeat_with = {
480                "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
481            }
482        )
483    ]
484
485
486def generate_brkitr_lstm(config, io, common_vars):
487    input_files = [InFile(filename) for filename in io.glob("brkitr/lstm/*.txt")]
488    input_basenames = [v.filename[12:] for v in input_files]
489    output_files = [
490        OutFile("brkitr/%s.res" % v[:-4])
491        for v in input_basenames
492    ]
493    return [
494        RepeatedOrSingleExecutionRequest(
495            name = "lstm_res",
496            category = "brkitr_lstm",
497            dep_targets = [],
498            input_files = input_files,
499            output_files = output_files,
500            tool = IcuTool("genrb"),
501            args = "-s {IN_DIR}/brkitr/lstm -d {OUT_DIR}/brkitr -i {OUT_DIR} "
502                "-k "
503                "{INPUT_BASENAME}",
504            format_with = {
505            },
506            repeat_with = {
507                "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
508            }
509        )
510    ]
511
512def generate_tree(
513        config,
514        io,
515        common_vars,
516        sub_dir,
517        out_sub_dir,
518        use_pool_bundle,
519        dep_targets):
520    requests = []
521    category = "%s_tree" % sub_dir
522    out_prefix = "%s/" % out_sub_dir if out_sub_dir else ""
523    input_files = [InFile(filename) for filename in io.glob("%s/*.txt" % sub_dir)]
524    if sub_dir == "curr":
525        input_files.remove(InFile("curr/supplementalData.txt"))
526    if sub_dir == "zone":
527        input_files.remove(InFile("zone/tzdbNames.txt"))
528    input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files]
529    output_files = [
530        OutFile("%s%s.res" % (out_prefix, v[:-4]))
531        for v in input_basenames
532    ]
533
534    # Generate Pool Bundle
535    if use_pool_bundle:
536        input_pool_files = [OutFile("%spool.res" % out_prefix)]
537        pool_target_name = "%s_pool_write" % sub_dir
538        use_pool_bundle_option = "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format(
539            OUT_PREFIX = out_prefix,
540            **common_vars
541        )
542        requests += [
543            SingleExecutionRequest(
544                name = pool_target_name,
545                category = category,
546                dep_targets = dep_targets,
547                input_files = input_files,
548                output_files = input_pool_files,
549                tool = IcuTool("genrb"),
550                args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
551                    "--writePoolBundle -k "
552                    "{INPUT_BASENAMES_SPACED}",
553                format_with = {
554                    "IN_SUB_DIR": sub_dir,
555                    "OUT_PREFIX": out_prefix,
556                    "INPUT_BASENAMES_SPACED": utils.SpaceSeparatedList(input_basenames)
557                }
558            ),
559        ]
560        dep_targets = dep_targets + [DepTarget(pool_target_name)]
561    else:
562        use_pool_bundle_option = ""
563
564    # Generate Res File Tree
565    requests += [
566        RepeatedOrSingleExecutionRequest(
567            name = "%s_res" % sub_dir,
568            category = category,
569            dep_targets = dep_targets,
570            input_files = input_files,
571            output_files = output_files,
572            tool = IcuTool("genrb"),
573            # BEGIN android-changed
574            args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " +
575                ("--omitCollationRules " if sub_dir == "coll" else "") +
576                "{EXTRA_OPTION} -k "
577                "{INPUT_BASENAME}",
578            # END android-changed
579            format_with = {
580                "IN_SUB_DIR": sub_dir,
581                "OUT_PREFIX": out_prefix,
582                "EXTRA_OPTION": use_pool_bundle_option
583            },
584            repeat_with = {
585                "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
586            }
587        )
588    ]
589
590    # Generate res_index file
591    # Exclude the deprecated locale variants and root; see ICU-20628. This
592    # could be data-driven, but we do not want to perform I/O in this script
593    # (for example, we do not want to read from an XML file).
594    excluded_locales = set([
595        "ja_JP_TRADITIONAL",
596        "th_TH_TRADITIONAL",
597        "de_",
598        "de__PHONEBOOK",
599        "es_",
600        "es__TRADITIONAL",
601        "root",
602    ])
603    # Put alias locales in a separate structure; see ICU-20627
604    dependency_data = io.read_locale_deps(sub_dir)
605    if "aliases" in dependency_data:
606        alias_locales = set(dependency_data["aliases"].keys())
607    else:
608        alias_locales = set()
609    alias_files = []
610    installed_files = []
611    for f in input_files:
612        file_stem = IndexRequest.locale_file_stem(f)
613        if file_stem in excluded_locales:
614            continue
615        destination = alias_files if file_stem in alias_locales else installed_files
616        destination.append(f)
617    cldr_version = dependency_data["cldrVersion"] if sub_dir == "locales" else None
618    index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format(
619        IN_SUB_DIR = sub_dir,
620        **common_vars
621    ))
622    index_res_file = OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format(
623        OUT_PREFIX = out_prefix,
624        **common_vars
625    ))
626    index_file_target_name = "%s_index_txt" % sub_dir
627    requests += [
628        IndexRequest(
629            name = index_file_target_name,
630            category = category,
631            installed_files = installed_files,
632            alias_files = alias_files,
633            txt_file = index_file_txt,
634            output_file = index_res_file,
635            cldr_version = cldr_version,
636            args = "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
637                "-k "
638                "{INDEX_NAME}.txt",
639            format_with = {
640                "IN_SUB_DIR": sub_dir,
641                "OUT_PREFIX": out_prefix
642            }
643        )
644    ]
645
646    return requests
647