#!/usr/bin/env python3 # coding=UTF-8 # # Copyright 2016 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Create a curated subset of Noto CJK for Android.""" import argparse import logging import os from pathlib import Path from fontTools import ttLib from nototools import font_data from nototools import tool_utils from nototools import ttc_utils # Characters supported in Noto CJK fonts that UTR #51 recommends default to # emoji-style. EMOJI_IN_CJK = { 0x26BD, # ⚽ SOCCER BALL 0x26BE, # ⚾ BASEBALL 0x1F18E, # 🆎 NEGATIVE SQUARED AB 0x1F191, # 🆑 SQUARED CL 0x1F192, # 🆒 SQUARED COOL 0x1F193, # 🆓 SQUARED FREE 0x1F194, # 🆔 SQUARED ID 0x1F195, # 🆕 SQUARED NEW 0x1F196, # 🆖 SQUARED NG 0x1F197, # 🆗 SQUARED OK 0x1F198, # 🆘 SQUARED SOS 0x1F199, # 🆙 SQUARED UP WITH EXCLAMATION MARK 0x1F19A, # 🆚 SQUARED VS 0x1F201, # 🈁 SQUARED KATAKANA KOKO 0x1F21A, # 🈚 SQUARED CJK UNIFIED IDEOGRAPH-7121 0x1F22F, # 🈯 SQUARED CJK UNIFIED IDEOGRAPH-6307 0x1F232, # 🈲 SQUARED CJK UNIFIED IDEOGRAPH-7981 0x1F233, # 🈳 SQUARED CJK UNIFIED IDEOGRAPH-7A7A 0x1F234, # 🈴 SQUARED CJK UNIFIED IDEOGRAPH-5408 0x1F235, # 🈵 SQUARED CJK UNIFIED IDEOGRAPH-6E80 0x1F236, # 🈶 SQUARED CJK UNIFIED IDEOGRAPH-6709 0x1F238, # 🈸 SQUARED CJK UNIFIED IDEOGRAPH-7533 0x1F239, # 🈹 SQUARED CJK UNIFIED IDEOGRAPH-5272 0x1F23A, # 🈺 SQUARED CJK UNIFIED IDEOGRAPH-55B6 0x1F250, # 🉐 CIRCLED IDEOGRAPH ADVANTAGE 0x1F251, # 🉑 CIRCLED IDEOGRAPH ACCEPT } # Characters we have decided we are doing as emoji-style in Android, # despite UTR #51's recommendation ANDROID_EMOJI = { 0x2600, # ☀ BLACK SUN WITH RAYS 0x2601, # ☁ CLOUD 0X260E, # ☎ BLACK TELEPHONE 0x261D, # ☝ WHITE UP POINTING INDEX 0x263A, # ☺ WHITE SMILING FACE 0x2660, # ♠ BLACK SPADE SUIT 0x2663, # ♣ BLACK CLUB SUIT 0x2665, # ♥ BLACK HEART SUIT 0x2666, # ♦ BLACK DIAMOND SUIT 0x270C, # ✌ VICTORY HAND 0x2744, # ❄ SNOWFLAKE 0x2764, # ❤ HEAVY BLACK HEART } # We don't want support for ASCII control chars. CONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F') EXCLUDED_CODEPOINTS = sorted(EMOJI_IN_CJK | ANDROID_EMOJI | CONTROL_CHARS) TTC_NAMES = ('NotoSansCJK-Regular.ttc', 'NotoSerifCJK-Regular.ttc') def remove_from_cmap(infile, outfile, exclude=frozenset()): """Removes a set of characters from a font file's cmap table.""" font = ttLib.TTFont(infile) font_data.delete_from_cmap(font, exclude) font.save(outfile) def remove_codepoints_from_ttc_using_ttc_utils(ttc_name, out_dir): otf_names = ttc_utils.ttcfile_extract(ttc_name, out_dir) with tool_utils.temp_chdir(out_dir): for index, otf_name in enumerate(otf_names): logging.info('Subsetting %s...', otf_name) remove_from_cmap(otf_name, otf_name, exclude=EXCLUDED_CODEPOINTS) ttc_utils.ttcfile_build(ttc_name, otf_names) for f in otf_names: os.remove(f) def remove_codepoints_from_ttc(ttc_path, out_dir): """Removes a set of characters from a TTC font file's cmap table.""" logging.info('Loading %s', ttc_path) ttc = ttLib.ttCollection.TTCollection(ttc_path) logging.info('Subsetting %d fonts in the collection', len(ttc)) for font in ttc: font_data.delete_from_cmap(font, EXCLUDED_CODEPOINTS) out_path = out_dir / ttc_path.name logging.info('Saving to %s', out_path) ttc.save(out_path) logging.info('Size: %d --> %d, delta=%d', ttc_path.stat().st_size, out_path.stat().st_size, out_path.stat().st_size - ttc_path.stat().st_size) def main(): parser = argparse.ArgumentParser() parser.add_argument('input', default='.', nargs='?') parser.add_argument('-o', '--output', default='subsetted') parser.add_argument('--use-ttc-utils', action='store_true') parser.add_argument('-v', '--verbose', action='count') args = parser.parse_args() if args.verbose: if args.verbose > 1: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) in_dir = Path(args.input) out_dir = Path(args.output) out_dir.mkdir(parents=True, exist_ok=True) for ttc_name in TTC_NAMES: if args.use_ttc_utils: remove_codepoints_from_ttc_using_ttc_utils(ttc_name, out_dir) else: remove_codepoints_from_ttc(in_dir / ttc_name, out_dir) if __name__ == "__main__": main()