• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2018 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3
4# Python 2/3 Compatibility (ICU-20299)
5# TODO(ICU-20301): Remove this.
6from __future__ import print_function
7
8from icutools.databuilder import *
9from icutools.databuilder import utils
10from icutools.databuilder.request_types import *
11
12import os
13import sys
14
15
16def generate(config, io, common_vars):
17    requests = []
18
19    if len(io.glob("misc/*")) == 0:
20        print("Error: Cannot find data directory; please specify --src_dir", file=sys.stderr)
21        exit(1)
22
23    requests += generate_cnvalias(config, io, common_vars)
24    requests += generate_ulayout(config, io, common_vars)
25    requests += generate_confusables(config, io, common_vars)
26    requests += generate_conversion_mappings(config, io, common_vars)
27    requests += generate_brkitr_brk(config, io, common_vars)
28    requests += generate_stringprep(config, io, common_vars)
29    requests += generate_brkitr_dictionaries(config, io, common_vars)
30    requests += generate_normalization(config, io, common_vars)
31    requests += generate_coll_ucadata(config, io, common_vars)
32    requests += generate_full_unicore_data(config, io, common_vars)
33    requests += generate_unames(config, io, common_vars)
34    requests += generate_misc(config, io, common_vars)
35    requests += generate_curr_supplemental(config, io, common_vars)
36    requests += generate_translit(config, io, common_vars)
37
38    # Res Tree Files
39    # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files)
40    requests += generate_tree(config, io, common_vars,
41        "locales",
42        None,
43        config.use_pool_bundle,
44        [])
45
46    requests += generate_tree(config, io, common_vars,
47        "curr",
48        "curr",
49        config.use_pool_bundle,
50        [])
51
52    requests += generate_tree(config, io, common_vars,
53        "lang",
54        "lang",
55        config.use_pool_bundle,
56        [])
57
58    requests += generate_tree(config, io, common_vars,
59        "region",
60        "region",
61        config.use_pool_bundle,
62        [])
63
64    requests += generate_tree(config, io, common_vars,
65        "zone",
66        "zone",
67        config.use_pool_bundle,
68        [])
69
70    requests += generate_tree(config, io, common_vars,
71        "unit",
72        "unit",
73        config.use_pool_bundle,
74        [])
75
76    requests += generate_tree(config, io, common_vars,
77        "coll",
78        "coll",
79        # Never use pool bundle for coll, brkitr, or rbnf
80        False,
81        # Depends on timezoneTypes.res and keyTypeData.res.
82        # TODO: We should not need this dependency to build collation.
83        # TODO: Bake keyTypeData.res into the common library?
84        [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")])
85
86    requests += generate_tree(config, io, common_vars,
87        "brkitr",
88        "brkitr",
89        # Never use pool bundle for coll, brkitr, or rbnf
90        False,
91        [DepTarget("brkitr_brk"), DepTarget("dictionaries")])
92
93    requests += generate_tree(config, io, common_vars,
94        "rbnf",
95        "rbnf",
96        # Never use pool bundle for coll, brkitr, or rbnf
97        False,
98        [])
99
100    requests += [
101        ListRequest(
102            name = "icudata_list",
103            variable_name = "icudata_all_output_files",
104            output_file = TmpFile("icudata.lst"),
105            include_tmp = False
106        )
107    ]
108
109    return requests
110
111
112def generate_cnvalias(config, io, common_vars):
113    # UConv Name Aliases
114    input_file = InFile("mappings/convrtrs.txt")
115    output_file = OutFile("cnvalias.icu")
116    return [
117        SingleExecutionRequest(
118            name = "cnvalias",
119            category = "cnvalias",
120            dep_targets = [],
121            input_files = [input_file],
122            output_files = [output_file],
123            tool = IcuTool("gencnval"),
124            args = "-s {IN_DIR} -d {OUT_DIR} "
125                "{INPUT_FILES[0]}",
126            format_with = {}
127        )
128    ]
129
130
131def generate_confusables(config, io, common_vars):
132    # CONFUSABLES
133    txt1 = InFile("unidata/confusables.txt")
134    txt2 = InFile("unidata/confusablesWholeScript.txt")
135    cfu = OutFile("confusables.cfu")
136    return [
137        SingleExecutionRequest(
138            name = "confusables",
139            category = "confusables",
140            dep_targets = [DepTarget("cnvalias")],
141            input_files = [txt1, txt2],
142            output_files = [cfu],
143            tool = IcuTool("gencfu"),
144            args = "-d {OUT_DIR} -i {OUT_DIR} "
145                "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} "
146                "-o {OUTPUT_FILES[0]}",
147            format_with = {}
148        )
149    ]
150
151
152def generate_conversion_mappings(config, io, common_vars):
153    # UConv Conversion Table Files
154    input_files = [InFile(filename) for filename in io.glob("mappings/*.ucm")]
155    output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files]
156    # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv
157    return [
158        RepeatedOrSingleExecutionRequest(
159            name = "conversion_mappings",
160            category = "conversion_mappings",
161            dep_targets = [],
162            input_files = input_files,
163            output_files = output_files,
164            tool = IcuTool("makeconv"),
165            # BEGIN android-changed
166            # args = "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}",
167            args = "-s {IN_DIR} -d {OUT_DIR} -c --small {INPUT_FILE_PLACEHOLDER}",
168            # END android-changed
169            format_with = {},
170            repeat_with = {
171                "INPUT_FILE_PLACEHOLDER": utils.SpaceSeparatedList(file.filename for file in input_files)
172            }
173        )
174    ]
175
176
177def generate_brkitr_brk(config, io, common_vars):
178    # BRK Files
179    input_files = [InFile(filename) for filename in io.glob("brkitr/rules/*.txt")]
180    output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files]
181    return [
182        RepeatedExecutionRequest(
183            name = "brkitr_brk",
184            category = "brkitr_rules",
185            dep_targets = [DepTarget("cnvalias"), DepTarget("ulayout")],
186            input_files = input_files,
187            output_files = output_files,
188            tool = IcuTool("genbrk"),
189            args = "-d {OUT_DIR} -i {OUT_DIR} "
190                "-c -r {IN_DIR}/{INPUT_FILE} "
191                "-o {OUTPUT_FILE}",
192            format_with = {},
193            repeat_with = {}
194        )
195    ]
196
197
198def generate_stringprep(config, io, common_vars):
199    # SPP FILES
200    input_files = [InFile(filename) for filename in io.glob("sprep/*.txt")]
201    output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files]
202    bundle_names = [v.filename[6:-4] for v in input_files]
203    return [
204        RepeatedExecutionRequest(
205            name = "stringprep",
206            category = "stringprep",
207            dep_targets = [InFile("unidata/NormalizationCorrections.txt")],
208            input_files = input_files,
209            output_files = output_files,
210            tool = IcuTool("gensprep"),
211            args = "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} "
212                "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt",
213            format_with = {},
214            repeat_with = {
215                "BUNDLE_NAME": bundle_names
216            }
217        )
218    ]
219
220
221def generate_brkitr_dictionaries(config, io, common_vars):
222    # Dict Files
223    input_files = [InFile(filename) for filename in io.glob("brkitr/dictionaries/*.txt")]
224    output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files]
225    extra_options_map = {
226        "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000",
227        "brkitr/dictionaries/cjdict.txt": "--uchars",
228        "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780",
229        "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80",
230        "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00"
231    }
232    extra_optionses = [extra_options_map[v.filename] for v in input_files]
233    return [
234        RepeatedExecutionRequest(
235            name = "dictionaries",
236            category = "brkitr_dictionaries",
237            dep_targets = [],
238            input_files = input_files,
239            output_files = output_files,
240            tool = IcuTool("gendict"),
241            args = "-i {OUT_DIR} "
242                "-c {EXTRA_OPTIONS} "
243                "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
244            format_with = {},
245            repeat_with = {
246                "EXTRA_OPTIONS": extra_optionses
247            }
248        )
249    ]
250
251
252def generate_normalization(config, io, common_vars):
253    # NRM Files
254    input_files = [InFile(filename) for filename in io.glob("in/*.nrm")]
255    # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data
256    input_files.remove(InFile("in/nfc.nrm"))
257    output_files = [OutFile(v.filename[3:]) for v in input_files]
258    return [
259        RepeatedExecutionRequest(
260            name = "normalization",
261            category = "normalization",
262            dep_targets = [],
263            input_files = input_files,
264            output_files = output_files,
265            tool = IcuTool("icupkg"),
266            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
267            format_with = {},
268            repeat_with = {}
269        )
270    ]
271
272
273def generate_coll_ucadata(config, io, common_vars):
274    # Collation Dependency File (ucadata.icu)
275    input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type)
276    output_file = OutFile("coll/ucadata.icu")
277    return [
278        SingleExecutionRequest(
279            name = "coll_ucadata",
280            category = "coll_ucadata",
281            dep_targets = [],
282            input_files = [input_file],
283            output_files = [output_file],
284            tool = IcuTool("icupkg"),
285            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
286            format_with = {}
287        )
288    ]
289
290
291def generate_full_unicore_data(config, io, common_vars):
292    # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu)
293    # are hardcoded in the common DLL and therefore not included in the data package any more.
294    # They are not built by default but need to be built for ICU4J data,
295    # both in the .jar and in the .dat file (if ICU4J uses the .dat file).
296    # See ICU-4497.
297    if not config.include_uni_core_data:
298        return []
299
300    basenames = [
301        "pnames.icu",
302        "uprops.icu",
303        "ucase.icu",
304        "ubidi.icu",
305        "nfc.nrm"
306    ]
307    input_files = [InFile("in/%s" % bn) for bn in basenames]
308    output_files = [OutFile(bn) for bn in basenames]
309    return [
310        RepeatedExecutionRequest(
311            name = "unicore",
312            category = "unicore",
313            input_files = input_files,
314            output_files = output_files,
315            tool = IcuTool("icupkg"),
316            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}"
317        )
318    ]
319
320
321def generate_unames(config, io, common_vars):
322    # Unicode Character Names
323    input_file = InFile("in/unames.icu")
324    output_file = OutFile("unames.icu")
325    return [
326        SingleExecutionRequest(
327            name = "unames",
328            category = "unames",
329            dep_targets = [],
330            input_files = [input_file],
331            output_files = [output_file],
332            tool = IcuTool("icupkg"),
333            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
334            format_with = {}
335        )
336    ]
337
338
339def generate_ulayout(config, io, common_vars):
340    # Unicode text layout properties
341    basename = "ulayout"
342    input_file = InFile("in/%s.icu" % basename)
343    output_file = OutFile("%s.icu" % basename)
344    return [
345        SingleExecutionRequest(
346            name = basename,
347            category = basename,
348            dep_targets = [],
349            input_files = [input_file],
350            output_files = [output_file],
351            tool = IcuTool("icupkg"),
352            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
353            format_with = {}
354        )
355    ]
356
357
358def generate_misc(config, io, common_vars):
359    # Misc Data Res Files
360    input_files = [InFile(filename) for filename in io.glob("misc/*.txt")]
361    input_basenames = [v.filename[5:] for v in input_files]
362    output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames]
363    return [
364        RepeatedExecutionRequest(
365            name = "misc_res",
366            category = "misc",
367            dep_targets = [],
368            input_files = input_files,
369            output_files = output_files,
370            tool = IcuTool("genrb"),
371            args = "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} "
372                "-k -q "
373                "{INPUT_BASENAME}",
374            format_with = {},
375            repeat_with = {
376                "INPUT_BASENAME": input_basenames
377            }
378        )
379    ]
380
381
382def generate_curr_supplemental(config, io, common_vars):
383    # Currency Supplemental Res File
384    input_file = InFile("curr/supplementalData.txt")
385    input_basename = "supplementalData.txt"
386    output_file = OutFile("curr/supplementalData.res")
387    return [
388        SingleExecutionRequest(
389            name = "curr_supplemental_res",
390            category = "curr_supplemental",
391            dep_targets = [],
392            input_files = [input_file],
393            output_files = [output_file],
394            tool = IcuTool("genrb"),
395            args = "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} "
396                "-k "
397                "{INPUT_BASENAME}",
398            format_with = {
399                "INPUT_BASENAME": input_basename
400            }
401        )
402    ]
403
404
405def generate_translit(config, io, common_vars):
406    input_files = [
407        InFile("translit/root.txt"),
408        InFile("translit/en.txt"),
409        InFile("translit/el.txt")
410    ]
411    dep_files = set(InFile(filename) for filename in io.glob("translit/*.txt"))
412    dep_files -= set(input_files)
413    dep_files = list(sorted(dep_files))
414    input_basenames = [v.filename[9:] for v in input_files]
415    output_files = [
416        OutFile("translit/%s.res" % v[:-4])
417        for v in input_basenames
418    ]
419    return [
420        RepeatedOrSingleExecutionRequest(
421            name = "translit_res",
422            category = "translit",
423            dep_targets = dep_files,
424            input_files = input_files,
425            output_files = output_files,
426            tool = IcuTool("genrb"),
427            args = "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} "
428                "-k "
429                "{INPUT_BASENAME}",
430            format_with = {
431            },
432            repeat_with = {
433                "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
434            }
435        )
436    ]
437
438
439def generate_tree(
440        config,
441        io,
442        common_vars,
443        sub_dir,
444        out_sub_dir,
445        use_pool_bundle,
446        dep_targets):
447    requests = []
448    category = "%s_tree" % sub_dir
449    out_prefix = "%s/" % out_sub_dir if out_sub_dir else ""
450    # TODO: Clean this up for curr
451    input_files = [InFile(filename) for filename in io.glob("%s/*.txt" % sub_dir)]
452    if sub_dir == "curr":
453        input_files.remove(InFile("curr/supplementalData.txt"))
454    input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files]
455    output_files = [
456        OutFile("%s%s.res" % (out_prefix, v[:-4]))
457        for v in input_basenames
458    ]
459
460    # Generate Pool Bundle
461    if use_pool_bundle:
462        input_pool_files = [OutFile("%spool.res" % out_prefix)]
463        pool_target_name = "%s_pool_write" % sub_dir
464        use_pool_bundle_option = "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format(
465            OUT_PREFIX = out_prefix,
466            **common_vars
467        )
468        requests += [
469            SingleExecutionRequest(
470                name = pool_target_name,
471                category = category,
472                dep_targets = dep_targets,
473                input_files = input_files,
474                output_files = input_pool_files,
475                tool = IcuTool("genrb"),
476                args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
477                    "--writePoolBundle -k "
478                    "{INPUT_BASENAMES_SPACED}",
479                format_with = {
480                    "IN_SUB_DIR": sub_dir,
481                    "OUT_PREFIX": out_prefix,
482                    "INPUT_BASENAMES_SPACED": utils.SpaceSeparatedList(input_basenames)
483                }
484            ),
485        ]
486        dep_targets = dep_targets + [DepTarget(pool_target_name)]
487    else:
488        use_pool_bundle_option = ""
489
490    # Generate Res File Tree
491    requests += [
492        RepeatedOrSingleExecutionRequest(
493            name = "%s_res" % sub_dir,
494            category = category,
495            dep_targets = dep_targets,
496            input_files = input_files,
497            output_files = output_files,
498            tool = IcuTool("genrb"),
499            # BEGIN android-changed
500            args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " +
501                ("--omitCollationRules " if sub_dir == "coll" else "") +
502                "{EXTRA_OPTION} -k "
503                "{INPUT_BASENAME}",
504            # END android-changed
505            format_with = {
506                "IN_SUB_DIR": sub_dir,
507                "OUT_PREFIX": out_prefix,
508                "EXTRA_OPTION": use_pool_bundle_option
509            },
510            repeat_with = {
511                "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
512            }
513        )
514    ]
515
516    # Generate res_index file
517    # Exclude the deprecated locale variants and root; see ICU-20628. This
518    # could be data-driven, but we do not want to perform I/O in this script
519    # (for example, we do not want to read from an XML file).
520    excluded_locales = set([
521        "ja_JP_TRADITIONAL",
522        "th_TH_TRADITIONAL",
523        "de_",
524        "de__PHONEBOOK",
525        "es_",
526        "es__TRADITIONAL",
527        "root",
528    ])
529    # Put alias locales in a separate structure; see ICU-20627
530    dependency_data = io.read_locale_deps(sub_dir)
531    if "aliases" in dependency_data:
532        alias_locales = set(dependency_data["aliases"].keys())
533    else:
534        alias_locales = set()
535    alias_files = []
536    installed_files = []
537    for f in input_files:
538        file_stem = IndexRequest.locale_file_stem(f)
539        if file_stem in excluded_locales:
540            continue
541        destination = alias_files if file_stem in alias_locales else installed_files
542        destination.append(f)
543    cldr_version = dependency_data["cldrVersion"] if sub_dir == "locales" else None
544    index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format(
545        IN_SUB_DIR = sub_dir,
546        **common_vars
547    ))
548    index_res_file = OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format(
549        OUT_PREFIX = out_prefix,
550        **common_vars
551    ))
552    index_file_target_name = "%s_index_txt" % sub_dir
553    requests += [
554        IndexRequest(
555            name = index_file_target_name,
556            category = category,
557            installed_files = installed_files,
558            alias_files = alias_files,
559            txt_file = index_file_txt,
560            output_file = index_res_file,
561            cldr_version = cldr_version,
562            args = "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
563                "-k "
564                "{INDEX_NAME}.txt",
565            format_with = {
566                "IN_SUB_DIR": sub_dir,
567                "OUT_PREFIX": out_prefix
568            }
569        )
570    ]
571
572    return requests
573