• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2018 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3
4# Python 2/3 Compatibility (ICU-20299)
5# TODO(ICU-20301): Remove this.
6from __future__ import print_function
7
8from icutools.databuilder import *
9from icutools.databuilder import utils
10from icutools.databuilder.request_types import *
11
12import os
13import sys
14
15
16def generate(config, io, common_vars):
17    requests = []
18
19    if len(io.glob("misc/*")) == 0:
20        print("Error: Cannot find data directory; please specify --src_dir", file=sys.stderr)
21        exit(1)
22
23    requests += generate_cnvalias(config, io, common_vars)
24    requests += generate_ulayout(config, io, common_vars)
25    requests += generate_confusables(config, io, common_vars)
26    requests += generate_conversion_mappings(config, io, common_vars)
27    requests += generate_brkitr_brk(config, io, common_vars)
28    requests += generate_stringprep(config, io, common_vars)
29    requests += generate_brkitr_dictionaries(config, io, common_vars)
30    requests += generate_normalization(config, io, common_vars)
31    requests += generate_coll_ucadata(config, io, common_vars)
32    requests += generate_full_unicore_data(config, io, common_vars)
33    requests += generate_unames(config, io, common_vars)
34    requests += generate_misc(config, io, common_vars)
35    requests += generate_curr_supplemental(config, io, common_vars)
36    requests += generate_zone_supplemental(config, io, common_vars)
37    requests += generate_translit(config, io, common_vars)
38
39    # Res Tree Files
40    # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files)
41    requests += generate_tree(config, io, common_vars,
42        "locales",
43        None,
44        config.use_pool_bundle,
45        [])
46
47    requests += generate_tree(config, io, common_vars,
48        "curr",
49        "curr",
50        config.use_pool_bundle,
51        [])
52
53    requests += generate_tree(config, io, common_vars,
54        "lang",
55        "lang",
56        config.use_pool_bundle,
57        [])
58
59    requests += generate_tree(config, io, common_vars,
60        "region",
61        "region",
62        config.use_pool_bundle,
63        [])
64
65    requests += generate_tree(config, io, common_vars,
66        "zone",
67        "zone",
68        config.use_pool_bundle,
69        [])
70
71    requests += generate_tree(config, io, common_vars,
72        "unit",
73        "unit",
74        config.use_pool_bundle,
75        [])
76
77    requests += generate_tree(config, io, common_vars,
78        "coll",
79        "coll",
80        # Never use pool bundle for coll, brkitr, or rbnf
81        False,
82        # Depends on timezoneTypes.res and keyTypeData.res.
83        # TODO: We should not need this dependency to build collation.
84        # TODO: Bake keyTypeData.res into the common library?
85        [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")])
86
87    requests += generate_tree(config, io, common_vars,
88        "brkitr",
89        "brkitr",
90        # Never use pool bundle for coll, brkitr, or rbnf
91        False,
92        [DepTarget("brkitr_brk"), DepTarget("dictionaries")])
93
94    requests += generate_tree(config, io, common_vars,
95        "rbnf",
96        "rbnf",
97        # Never use pool bundle for coll, brkitr, or rbnf
98        False,
99        [])
100
101    requests += [
102        ListRequest(
103            name = "icudata_list",
104            variable_name = "icudata_all_output_files",
105            output_file = TmpFile("icudata.lst"),
106            include_tmp = False
107        )
108    ]
109
110    return requests
111
112
113def generate_cnvalias(config, io, common_vars):
114    # UConv Name Aliases
115    input_file = InFile("mappings/convrtrs.txt")
116    output_file = OutFile("cnvalias.icu")
117    return [
118        SingleExecutionRequest(
119            name = "cnvalias",
120            category = "cnvalias",
121            dep_targets = [],
122            input_files = [input_file],
123            output_files = [output_file],
124            tool = IcuTool("gencnval"),
125            args = "-s {IN_DIR} -d {OUT_DIR} "
126                "{INPUT_FILES[0]}",
127            format_with = {}
128        )
129    ]
130
131
132def generate_confusables(config, io, common_vars):
133    # CONFUSABLES
134    txt1 = InFile("unidata/confusables.txt")
135    txt2 = InFile("unidata/confusablesWholeScript.txt")
136    cfu = OutFile("confusables.cfu")
137    return [
138        SingleExecutionRequest(
139            name = "confusables",
140            category = "confusables",
141            dep_targets = [DepTarget("cnvalias")],
142            input_files = [txt1, txt2],
143            output_files = [cfu],
144            tool = IcuTool("gencfu"),
145            args = "-d {OUT_DIR} -i {OUT_DIR} "
146                "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} "
147                "-o {OUTPUT_FILES[0]}",
148            format_with = {}
149        )
150    ]
151
152
153def generate_conversion_mappings(config, io, common_vars):
154    # UConv Conversion Table Files
155    input_files = [InFile(filename) for filename in io.glob("mappings/*.ucm")]
156    output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files]
157    # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv
158    return [
159        RepeatedOrSingleExecutionRequest(
160            name = "conversion_mappings",
161            category = "conversion_mappings",
162            dep_targets = [],
163            input_files = input_files,
164            output_files = output_files,
165            tool = IcuTool("makeconv"),
166            args = "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}",
167            format_with = {},
168            repeat_with = {
169                "INPUT_FILE_PLACEHOLDER": utils.SpaceSeparatedList(file.filename for file in input_files)
170            }
171        )
172    ]
173
174
175def generate_brkitr_brk(config, io, common_vars):
176    # BRK Files
177    input_files = [InFile(filename) for filename in io.glob("brkitr/rules/*.txt")]
178    output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files]
179    return [
180        RepeatedExecutionRequest(
181            name = "brkitr_brk",
182            category = "brkitr_rules",
183            dep_targets = [DepTarget("cnvalias"), DepTarget("ulayout")],
184            input_files = input_files,
185            output_files = output_files,
186            tool = IcuTool("genbrk"),
187            args = "-d {OUT_DIR} -i {OUT_DIR} "
188                "-c -r {IN_DIR}/{INPUT_FILE} "
189                "-o {OUTPUT_FILE}",
190            format_with = {},
191            repeat_with = {}
192        )
193    ]
194
195
196def generate_stringprep(config, io, common_vars):
197    # SPP FILES
198    input_files = [InFile(filename) for filename in io.glob("sprep/*.txt")]
199    output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files]
200    bundle_names = [v.filename[6:-4] for v in input_files]
201    return [
202        RepeatedExecutionRequest(
203            name = "stringprep",
204            category = "stringprep",
205            dep_targets = [InFile("unidata/NormalizationCorrections.txt")],
206            input_files = input_files,
207            output_files = output_files,
208            tool = IcuTool("gensprep"),
209            args = "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} "
210                "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt",
211            format_with = {},
212            repeat_with = {
213                "BUNDLE_NAME": bundle_names
214            }
215        )
216    ]
217
218
219def generate_brkitr_dictionaries(config, io, common_vars):
220    # Dict Files
221    input_files = [InFile(filename) for filename in io.glob("brkitr/dictionaries/*.txt")]
222    output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files]
223    extra_options_map = {
224        "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000",
225        "brkitr/dictionaries/cjdict.txt": "--uchars",
226        "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780",
227        "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80",
228        "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00"
229    }
230    extra_optionses = [extra_options_map[v.filename] for v in input_files]
231    return [
232        RepeatedExecutionRequest(
233            name = "dictionaries",
234            category = "brkitr_dictionaries",
235            dep_targets = [],
236            input_files = input_files,
237            output_files = output_files,
238            tool = IcuTool("gendict"),
239            args = "-i {OUT_DIR} "
240                "-c {EXTRA_OPTIONS} "
241                "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
242            format_with = {},
243            repeat_with = {
244                "EXTRA_OPTIONS": extra_optionses
245            }
246        )
247    ]
248
249
250def generate_normalization(config, io, common_vars):
251    # NRM Files
252    input_files = [InFile(filename) for filename in io.glob("in/*.nrm")]
253    # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data
254    input_files.remove(InFile("in/nfc.nrm"))
255    output_files = [OutFile(v.filename[3:]) for v in input_files]
256    return [
257        RepeatedExecutionRequest(
258            name = "normalization",
259            category = "normalization",
260            dep_targets = [],
261            input_files = input_files,
262            output_files = output_files,
263            tool = IcuTool("icupkg"),
264            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
265            format_with = {},
266            repeat_with = {}
267        )
268    ]
269
270
271def generate_coll_ucadata(config, io, common_vars):
272    # Collation Dependency File (ucadata.icu)
273    input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type)
274    output_file = OutFile("coll/ucadata.icu")
275    return [
276        SingleExecutionRequest(
277            name = "coll_ucadata",
278            category = "coll_ucadata",
279            dep_targets = [],
280            input_files = [input_file],
281            output_files = [output_file],
282            tool = IcuTool("icupkg"),
283            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
284            format_with = {}
285        )
286    ]
287
288
289def generate_full_unicore_data(config, io, common_vars):
290    # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu)
291    # are hardcoded in the common DLL and therefore not included in the data package any more.
292    # They are not built by default but need to be built for ICU4J data,
293    # both in the .jar and in the .dat file (if ICU4J uses the .dat file).
294    # See ICU-4497.
295    if not config.include_uni_core_data:
296        return []
297
298    basenames = [
299        "pnames.icu",
300        "uprops.icu",
301        "ucase.icu",
302        "ubidi.icu",
303        "nfc.nrm"
304    ]
305    input_files = [InFile("in/%s" % bn) for bn in basenames]
306    output_files = [OutFile(bn) for bn in basenames]
307    return [
308        RepeatedExecutionRequest(
309            name = "unicore",
310            category = "unicore",
311            input_files = input_files,
312            output_files = output_files,
313            tool = IcuTool("icupkg"),
314            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}"
315        )
316    ]
317
318
319def generate_unames(config, io, common_vars):
320    # Unicode Character Names
321    input_file = InFile("in/unames.icu")
322    output_file = OutFile("unames.icu")
323    return [
324        SingleExecutionRequest(
325            name = "unames",
326            category = "unames",
327            dep_targets = [],
328            input_files = [input_file],
329            output_files = [output_file],
330            tool = IcuTool("icupkg"),
331            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
332            format_with = {}
333        )
334    ]
335
336
337def generate_ulayout(config, io, common_vars):
338    # Unicode text layout properties
339    basename = "ulayout"
340    input_file = InFile("in/%s.icu" % basename)
341    output_file = OutFile("%s.icu" % basename)
342    return [
343        SingleExecutionRequest(
344            name = basename,
345            category = basename,
346            dep_targets = [],
347            input_files = [input_file],
348            output_files = [output_file],
349            tool = IcuTool("icupkg"),
350            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
351            format_with = {}
352        )
353    ]
354
355
356def generate_misc(config, io, common_vars):
357    # Misc Data Res Files
358    input_files = [InFile(filename) for filename in io.glob("misc/*.txt")]
359    input_basenames = [v.filename[5:] for v in input_files]
360    output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames]
361    return [
362        RepeatedExecutionRequest(
363            name = "misc_res",
364            category = "misc",
365            dep_targets = [DepTarget("cnvalias")], # ICU-21175
366            input_files = input_files,
367            output_files = output_files,
368            tool = IcuTool("genrb"),
369            args = "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} "
370                "-k -q "
371                "{INPUT_BASENAME}",
372            format_with = {},
373            repeat_with = {
374                "INPUT_BASENAME": input_basenames
375            }
376        )
377    ]
378
379
380def generate_curr_supplemental(config, io, common_vars):
381    # Currency Supplemental Res File
382    input_file = InFile("curr/supplementalData.txt")
383    input_basename = "supplementalData.txt"
384    output_file = OutFile("curr/supplementalData.res")
385    return [
386        SingleExecutionRequest(
387            name = "curr_supplemental_res",
388            category = "curr_supplemental",
389            dep_targets = [],
390            input_files = [input_file],
391            output_files = [output_file],
392            tool = IcuTool("genrb"),
393            args = "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} "
394                "-k "
395                "{INPUT_BASENAME}",
396            format_with = {
397                "INPUT_BASENAME": input_basename
398            }
399        )
400    ]
401
402
403def generate_zone_supplemental(config, io, common_vars):
404    # tzdbNames Res File
405    input_file = InFile("zone/tzdbNames.txt")
406    input_basename = "tzdbNames.txt"
407    output_file = OutFile("zone/tzdbNames.res")
408    return [
409        SingleExecutionRequest(
410            name = "zone_supplemental_res",
411            category = "zone_supplemental",
412            dep_targets = [],
413            input_files = [input_file],
414            output_files = [output_file],
415            tool = IcuTool("genrb"),
416            args = "-s {IN_DIR}/zone -d {OUT_DIR}/zone -i {OUT_DIR} "
417                "-k "
418                "{INPUT_BASENAME}",
419            format_with = {
420                "INPUT_BASENAME": input_basename
421            }
422        )
423    ]
424
425
426def generate_translit(config, io, common_vars):
427    input_files = [
428        InFile("translit/root.txt"),
429        InFile("translit/en.txt"),
430        InFile("translit/el.txt")
431    ]
432    dep_files = set(InFile(filename) for filename in io.glob("translit/*.txt"))
433    dep_files -= set(input_files)
434    dep_files = list(sorted(dep_files))
435    input_basenames = [v.filename[9:] for v in input_files]
436    output_files = [
437        OutFile("translit/%s.res" % v[:-4])
438        for v in input_basenames
439    ]
440    return [
441        RepeatedOrSingleExecutionRequest(
442            name = "translit_res",
443            category = "translit",
444            dep_targets = dep_files,
445            input_files = input_files,
446            output_files = output_files,
447            tool = IcuTool("genrb"),
448            args = "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} "
449                "-k "
450                "{INPUT_BASENAME}",
451            format_with = {
452            },
453            repeat_with = {
454                "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
455            }
456        )
457    ]
458
459
460def generate_tree(
461        config,
462        io,
463        common_vars,
464        sub_dir,
465        out_sub_dir,
466        use_pool_bundle,
467        dep_targets):
468    requests = []
469    category = "%s_tree" % sub_dir
470    out_prefix = "%s/" % out_sub_dir if out_sub_dir else ""
471    input_files = [InFile(filename) for filename in io.glob("%s/*.txt" % sub_dir)]
472    if sub_dir == "curr":
473        input_files.remove(InFile("curr/supplementalData.txt"))
474    if sub_dir == "zone":
475        input_files.remove(InFile("zone/tzdbNames.txt"))
476    input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files]
477    output_files = [
478        OutFile("%s%s.res" % (out_prefix, v[:-4]))
479        for v in input_basenames
480    ]
481
482    # Generate Pool Bundle
483    if use_pool_bundle:
484        input_pool_files = [OutFile("%spool.res" % out_prefix)]
485        pool_target_name = "%s_pool_write" % sub_dir
486        use_pool_bundle_option = "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format(
487            OUT_PREFIX = out_prefix,
488            **common_vars
489        )
490        requests += [
491            SingleExecutionRequest(
492                name = pool_target_name,
493                category = category,
494                dep_targets = dep_targets,
495                input_files = input_files,
496                output_files = input_pool_files,
497                tool = IcuTool("genrb"),
498                args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
499                    "--writePoolBundle -k "
500                    "{INPUT_BASENAMES_SPACED}",
501                format_with = {
502                    "IN_SUB_DIR": sub_dir,
503                    "OUT_PREFIX": out_prefix,
504                    "INPUT_BASENAMES_SPACED": utils.SpaceSeparatedList(input_basenames)
505                }
506            ),
507        ]
508        dep_targets = dep_targets + [DepTarget(pool_target_name)]
509    else:
510        use_pool_bundle_option = ""
511
512    # Generate Res File Tree
513    requests += [
514        RepeatedOrSingleExecutionRequest(
515            name = "%s_res" % sub_dir,
516            category = category,
517            dep_targets = dep_targets,
518            input_files = input_files,
519            output_files = output_files,
520            tool = IcuTool("genrb"),
521            args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
522                "{EXTRA_OPTION} -k "
523                "{INPUT_BASENAME}",
524            format_with = {
525                "IN_SUB_DIR": sub_dir,
526                "OUT_PREFIX": out_prefix,
527                "EXTRA_OPTION": use_pool_bundle_option
528            },
529            repeat_with = {
530                "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
531            }
532        )
533    ]
534
535    # Generate res_index file
536    # Exclude the deprecated locale variants and root; see ICU-20628. This
537    # could be data-driven, but we do not want to perform I/O in this script
538    # (for example, we do not want to read from an XML file).
539    excluded_locales = set([
540        "ja_JP_TRADITIONAL",
541        "th_TH_TRADITIONAL",
542        "de_",
543        "de__PHONEBOOK",
544        "es_",
545        "es__TRADITIONAL",
546        "root",
547    ])
548    # Put alias locales in a separate structure; see ICU-20627
549    dependency_data = io.read_locale_deps(sub_dir)
550    if "aliases" in dependency_data:
551        alias_locales = set(dependency_data["aliases"].keys())
552    else:
553        alias_locales = set()
554    alias_files = []
555    installed_files = []
556    for f in input_files:
557        file_stem = IndexRequest.locale_file_stem(f)
558        if file_stem in excluded_locales:
559            continue
560        destination = alias_files if file_stem in alias_locales else installed_files
561        destination.append(f)
562    cldr_version = dependency_data["cldrVersion"] if sub_dir == "locales" else None
563    index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format(
564        IN_SUB_DIR = sub_dir,
565        **common_vars
566    ))
567    index_res_file = OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format(
568        OUT_PREFIX = out_prefix,
569        **common_vars
570    ))
571    index_file_target_name = "%s_index_txt" % sub_dir
572    requests += [
573        IndexRequest(
574            name = index_file_target_name,
575            category = category,
576            installed_files = installed_files,
577            alias_files = alias_files,
578            txt_file = index_file_txt,
579            output_file = index_res_file,
580            cldr_version = cldr_version,
581            args = "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
582                "-k "
583                "{INDEX_NAME}.txt",
584            format_with = {
585                "IN_SUB_DIR": sub_dir,
586                "OUT_PREFIX": out_prefix
587            }
588        )
589    ]
590
591    return requests
592