1#!/usr/bin/env python 2from __future__ import print_function 3import optparse 4import os 5import re 6import sys 7import shutil 8import bz2 9 10parser = optparse.OptionParser() 11 12parser.add_option('--icudst', 13 action='store', 14 dest='icudst', 15 default='deps/icu-small', 16 help='path to target ICU directory. Will be deleted.') 17 18parser.add_option('--icu-src', 19 action='store', 20 dest='icusrc', 21 default='deps/icu', 22 help='path to source ICU directory.') 23 24parser.add_option('--icutmp', 25 action='store', 26 dest='icutmp', 27 default='out/Release/obj/gen/icutmp', 28 help='path to icutmp dir.') 29 30(options, args) = parser.parse_args() 31 32if os.path.isdir(options.icudst): 33 print('Deleting existing icudst %s' % (options.icudst)) 34 shutil.rmtree(options.icudst) 35 36if not os.path.isdir(options.icusrc): 37 print('Missing source ICU dir --icusrc=%s' % (options.icusrc)) 38 sys.exit(1) 39 40# compression stuff. Keep the suffix and the compression function in sync. 41compression_suffix = '.bz2' 42def compress_data(infp, outfp): 43 with open(infp, 'rb') as inf: 44 with bz2.BZ2File(outfp, 'wb') as outf: 45 shutil.copyfileobj(inf, outf) 46 47def print_size(fn): 48 size = (os.stat(fn).st_size) / 1024000 49 print('%dM\t%s' % (size, fn)) 50 51ignore_regex = re.compile('^.*\.(vcxproj|filters|nrm|icu|dat|xml|txt|ac|guess|m4|in|sub|py|mak)$') 52 53def icu_ignore(dir, files): 54 subdir = dir[len(options.icusrc)+1::] 55 ign = [] 56 if len(subdir) == 0: 57 # remove all files at root level 58 ign = ign + files 59 # except... 60 ign.remove('source') 61 if 'LICENSE' in ign: 62 ign.remove('LICENSE') 63 # license.html will be removed (it's obviated by LICENSE) 64 elif 'license.html' in ign: 65 ign.remove('license.html') 66 elif subdir == 'source': 67 ign = ign + ['layout','samples','test','extra','config','layoutex','allinone','data'] 68 ign = ign + ['runConfigureICU','install-sh','mkinstalldirs','configure'] 69 ign = ign + ['io'] 70 elif subdir == 'source/tools': 71 ign = ign + ['tzcode','ctestfw','gensprep','gennorm2','gendict','icuswap', 72 'genbrk','gencfu','gencolusb','genren','memcheck','makeconv','gencnval','icuinfo','gentest'] 73 ign = ign + ['.DS_Store', 'Makefile', 'Makefile.in'] 74 75 for file in files: 76 if ignore_regex.match(file): 77 ign = ign + [file] 78 79 # print '>%s< [%s]' % (subdir, ign) 80 return ign 81 82# copied from configure 83def icu_info(icu_full_path): 84 uvernum_h = os.path.join(icu_full_path, 'source/common/unicode/uvernum.h') 85 if not os.path.isfile(uvernum_h): 86 print(' Error: could not load %s - is ICU installed?' % uvernum_h) 87 sys.exit(1) 88 icu_ver_major = None 89 matchVerExp = r'^\s*#define\s+U_ICU_VERSION_SHORT\s+"([^"]*)".*' 90 match_version = re.compile(matchVerExp) 91 for line in open(uvernum_h).readlines(): 92 m = match_version.match(line) 93 if m: 94 icu_ver_major = m.group(1) 95 if not icu_ver_major: 96 print(' Could not read U_ICU_VERSION_SHORT version from %s' % uvernum_h) 97 sys.exit(1) 98 icu_endianness = sys.byteorder[0] # TODO(srl295): EBCDIC should be 'e' 99 return (icu_ver_major, icu_endianness) 100 101(icu_ver_major, icu_endianness) = icu_info(options.icusrc) 102print("Data file root: icudt%s%s" % (icu_ver_major, icu_endianness)) 103dst_datafile = os.path.join(options.icudst, "source","data","in", "icudt%s%s.dat" % (icu_ver_major, icu_endianness)) 104 105src_datafile = os.path.join(options.icusrc, "source/data/in/icudt%sl.dat" % (icu_ver_major)) 106dst_cmp_datafile = "%s%s" % (dst_datafile, compression_suffix) 107 108if not os.path.isfile(src_datafile): 109 print("Error: icu data file not found: %s" % src_datafile) 110 exit(1) 111 112print("will use datafile %s" % (src_datafile)) 113 114print('%s --> %s' % (options.icusrc, options.icudst)) 115shutil.copytree(options.icusrc, options.icudst, ignore=icu_ignore) 116 117# now, make the data dir (since we ignored it) 118icudst_data = os.path.join(options.icudst, "source", "data") 119icudst_in = os.path.join(icudst_data, "in") 120os.mkdir(icudst_data) 121os.mkdir(icudst_in) 122 123print_size(src_datafile) 124 125print('%s --compress-> %s' % (src_datafile, dst_cmp_datafile)) 126compress_data(src_datafile, dst_cmp_datafile) 127print_size(dst_cmp_datafile) 128readme_name = os.path.join(options.icudst, "README-FULL-ICU.txt" ) 129 130# Now, print a short notice 131msg_fmt = """\ 132ICU sources - auto generated by shrink-icu-src.py\n 133This directory contains the ICU subset used by --with-intl=full-icu 134It is a strict subset of ICU {} source files with the following exception(s): 135* {} : compressed data file\n\n 136To rebuild this directory, see ../../tools/icu/README.md\n""" 137 138with open(readme_name, 'w') as out_file: 139 print(msg_fmt.format(icu_ver_major, dst_cmp_datafile), file=out_file) 140