1#!/usr/bin/python 2# coding=UTF-8 3# 4# Copyright 2016 Google Inc. All rights reserved. 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18"""Create a curated subset of Noto CJK for Android.""" 19 20import os 21 22from fontTools import ttLib 23from nototools import font_data 24from nototools import tool_utils 25from nototools import ttc_utils 26 27# Characters supported in Noto CJK fonts that UTR #51 recommends default to 28# emoji-style. 29EMOJI_IN_CJK = { 30 0x26BD, # ⚽ SOCCER BALL 31 0x26BE, # ⚾ BASEBALL 32 0x1F18E, # NEGATIVE SQUARED AB 33 0x1F191, # SQUARED CL 34 0x1F192, # SQUARED COOL 35 0x1F193, # SQUARED FREE 36 0x1F194, # SQUARED ID 37 0x1F195, # SQUARED NEW 38 0x1F196, # SQUARED NG 39 0x1F197, # SQUARED OK 40 0x1F198, # SQUARED SOS 41 0x1F199, # SQUARED UP WITH EXCLAMATION MARK 42 0x1F19A, # SQUARED VS 43 0x1F201, # SQUARED KATAKANA KOKO 44 0x1F21A, # SQUARED CJK UNIFIED IDEOGRAPH-7121 45 0x1F22F, # SQUARED CJK UNIFIED IDEOGRAPH-6307 46 0x1F232, # SQUARED CJK UNIFIED IDEOGRAPH-7981 47 0x1F233, # SQUARED CJK UNIFIED IDEOGRAPH-7A7A 48 0x1F234, # SQUARED CJK UNIFIED IDEOGRAPH-5408 49 0x1F235, # SQUARED CJK UNIFIED IDEOGRAPH-6E80 50 0x1F236, # SQUARED CJK UNIFIED IDEOGRAPH-6709 51 0x1F238, # SQUARED CJK UNIFIED IDEOGRAPH-7533 52 0x1F239, # SQUARED CJK UNIFIED IDEOGRAPH-5272 53 0x1F23A, # SQUARED CJK UNIFIED IDEOGRAPH-55B6 54 0x1F250, # CIRCLED IDEOGRAPH ADVANTAGE 55 0x1F251, # CIRCLED IDEOGRAPH ACCEPT 56} 57 58# Characters we have decided we are doing as emoji-style in Android, 59# despite UTR #51's recommendation 60ANDROID_EMOJI = { 61 0x2600, # ☀ BLACK SUN WITH RAYS 62 0x2601, # ☁ CLOUD 63 0X260E, # ☎ BLACK TELEPHONE 64 0x261D, # ☝ WHITE UP POINTING INDEX 65 0x263A, # ☺ WHITE SMILING FACE 66 0x2660, # ♠ BLACK SPADE SUIT 67 0x2663, # ♣ BLACK CLUB SUIT 68 0x2665, # ♥ BLACK HEART SUIT 69 0x2666, # ♦ BLACK DIAMOND SUIT 70 0x270C, # ✌ VICTORY HAND 71 0x2744, # ❄ SNOWFLAKE 72 0x2764, # ❤ HEAVY BLACK HEART 73} 74 75# We don't want support for ASCII control chars. 76CONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F'); 77 78EXCLUDED_CODEPOINTS = sorted(EMOJI_IN_CJK | ANDROID_EMOJI | CONTROL_CHARS) 79 80 81def remove_from_cmap(infile, outfile, exclude=frozenset()): 82 """Removes a set of characters from a font file's cmap table.""" 83 font = ttLib.TTFont(infile) 84 font_data.delete_from_cmap(font, exclude) 85 font.save(outfile) 86 87 88TEMP_DIR = 'subsetted' 89 90def remove_codepoints_from_ttc(ttc_name): 91 otf_names = ttc_utils.ttcfile_extract(ttc_name, TEMP_DIR) 92 93 with tool_utils.temp_chdir(TEMP_DIR): 94 for index, otf_name in enumerate(otf_names): 95 print 'Subsetting %s...' % otf_name 96 remove_from_cmap(otf_name, otf_name, exclude=EXCLUDED_CODEPOINTS) 97 ttc_utils.ttcfile_build(ttc_name, otf_names) 98 for f in otf_names: 99 os.remove(f) 100 101 102remove_codepoints_from_ttc('NotoSansCJK-Regular.ttc') 103remove_codepoints_from_ttc('NotoSerifCJK-Regular.ttc') 104