1#!/usr/bin/env python3 2# 3# Copyright (C) 2017 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17""" 18Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format 19under a meta tag with name 'Emji'. 20 21In order to create the final font the followings are used as inputs: 22 23- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at 24external/noto-fonts/emoji/NotoColorEmoji.ttf 25 26- Unicode files: Unicode files that are in the framework, and lists information about all the 27emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt, 28and emoji-variation-sequences.txt. Currently at external/unicode/. 29 30- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are 31in the Android font. Resides in framework and currently under external/unicode/. 32 33- data/emoji_metadata.txt: The file that includes the id, codepoints, the first 34Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font 35version that the emoji was added (compatAdded). Updated when the script is executed. 36 37- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/. 38 39After execution the following files are generated if they don't exist otherwise, they are updated: 40- font/NotoColorEmojiCompat.ttf 41- supported-emojis/emojis.txt 42- data/emoji_metadata.txt 43- src/java/android/support/text/emoji/flatbuffer/* 44""" 45 46import contextlib 47import csv 48import hashlib 49import itertools 50import json 51import os 52import re 53import shutil 54import subprocess 55import sys 56import tempfile 57from fontTools import ttLib 58from fontTools.ttLib.tables import otTables 59from nototools import font_data 60 61########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ########### 62# Last Android SDK Version 63SDK_VERSION = 30 64# metadata version that will be embedded into font. If there are updates to the font that would 65# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number 66# defines in which EmojiCompat metadata version the emoji is added to the font. 67METADATA_VERSION = 7 68 69####### main directories where output files are created ####### 70SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) 71FONT_DIR = os.path.join(SCRIPT_DIR, 'font') 72DATA_DIR = os.path.join(SCRIPT_DIR, 'data') 73SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis') 74JAVA_SRC_DIR = os.path.join('src', 'java') 75####### output files ####### 76# font file 77FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf') 78# emoji metadata json output file 79OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt') 80# emojis test file 81TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt') 82####### input files ####### 83# Unicode file names to read emoji data 84EMOJI_DATA_FILE = 'emoji-data.txt' 85EMOJI_SEQ_FILE = 'emoji-sequences.txt' 86EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt' 87EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt' 88# Android OS emoji file for emojis that are not in Unicode files 89ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt') 90ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt') 91# Android OS emoji style override file. Codepoints that are rendered with emoji style by default 92# even though not defined so in <code>emoji-data.txt</code>. 93EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt') 94# emoji metadata file 95INPUT_META_FILE = OUTPUT_META_FILE 96# default flatbuffer module location (if not specified by caller) 97FLATBUFFER_MODULE_DIR = os.path.join(SCRIPT_DIR, '..', 'emoji-compat-flatbuffers') 98# flatbuffer schema 99FLATBUFFER_SCHEMA = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'emoji_metadata.fbs') 100# file path for java header, it will be prepended to flatbuffer java files 101FLATBUFFER_HEADER = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'flatbuffer_header.txt') 102# temporary emoji metadata json output file 103OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json' 104# temporary binary file generated by flatbuffer 105FLATBUFFER_BIN = 'emoji_metadata.bin' 106# directory representation for flatbuffer java package 107FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '') 108# temporary directory that contains flatbuffer java files 109FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH) 110FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java" 111FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java" 112# directory under source where flatbuffer java files will be copied into 113FLATBUFFER_JAVA_TARGET = os.path.join(FLATBUFFER_MODULE_DIR, JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH) 114# meta tag name used in the font to embed the emoji metadata. This value is also used in 115# MetadataListReader.java in order to locate the metadata location. 116EMOJI_META_TAG_NAME = 'Emji' 117 118EMOJI_STR = 'EMOJI' 119EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION' 120ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR] 121STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE' 122 123DEFAULT_EMOJI_ID = 0xF0001 124EMOJI_STYLE_VS = 0xFE0F 125 126# The reference code point to be used for filling metrics of wartermark glyph 127WATERMARK_REF_CODE_POINT = 0x1F600 128# The code point and glyph name used for watermark. 129WATERMARK_NEW_CODE_POINT = 0x10FF00 130WATERMARK_NEW_GLYPH_ID = 'u10FF00' 131 132def to_hex_str(value): 133 """Converts given int value to hex without the 0x prefix""" 134 return format(value, 'X') 135 136def hex_str_to_int(string): 137 """Convert a hex string into int""" 138 return int(string, 16) 139 140def codepoint_to_string(codepoints): 141 """Converts a list of codepoints into a string separated with space.""" 142 return ' '.join([to_hex_str(x) for x in codepoints]) 143 144def prepend_header_to_file(file_path, header_path): 145 """Prepends the header to the file. Used to update flatbuffer java files with header, comments 146 and annotations.""" 147 with open(file_path, "r+") as original_file: 148 with open(header_path, "r") as copyright_file: 149 original_content = original_file.read() 150 original_file.seek(0) 151 original_file.write(copyright_file.read() + "\n" + original_content) 152 153 154def update_flatbuffer_java_files(flatbuffer_java_dir, header_dir, target_dir): 155 """Prepends headers to flatbuffer java files and copies to the final destination""" 156 tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA 157 tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA 158 prepend_header_to_file(tmp_metadata_list, header_dir) 159 prepend_header_to_file(tmp_metadata_item, header_dir) 160 161 if not os.path.exists(target_dir): 162 os.makedirs(target_dir) 163 164 shutil.copy(tmp_metadata_list, os.path.join(target_dir, FLATBUFFER_METADATA_LIST_JAVA)) 165 shutil.copy(tmp_metadata_item, os.path.join(target_dir, FLATBUFFER_METADATA_ITEM_JAVA)) 166 167def create_test_data(unicode_path): 168 """Read all the emojis in the unicode files and update the test file""" 169 lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE)) 170 lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE)) 171 172 lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True) 173 lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True) 174 175 # standardized variants contains a huge list of sequences, only read the ones that are emojis 176 # and also the ones with FE0F (emoji style) 177 standardized_variants_lines = read_emoji_lines( 178 os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE)) 179 for line in standardized_variants_lines: 180 if STD_VARIANTS_EMOJI_STYLE in line: 181 lines.append(line) 182 183 emojis_set = set() 184 for line in lines: 185 # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them 186 # here since we are already checking the emoji presentations with 187 # emoji-variation-sequences.txt. 188 if "BASIC_EMOJI" in line: 189 continue 190 codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')] 191 emojis_set.add(codepoint_to_string(codepoints).upper()) 192 193 emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE)) 194 for line in emoji_data_lines: 195 codepoints_range, emoji_property = codepoints_and_emoji_prop(line) 196 if not emoji_property in ACCEPTED_EMOJI_PROPERTIES: 197 continue 198 is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR 199 if is_emoji_style: 200 codepoints = [to_hex_str(x) for x in 201 codepoints_for_emojirange(codepoints_range)] 202 emojis_set.update(codepoints) 203 204 emoji_style_exceptions = get_emoji_style_exceptions(unicode_path) 205 # finally add the android default emoji exceptions 206 emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions]) 207 208 emojis_list = list(emojis_set) 209 emojis_list.sort() 210 with open(TEST_DATA_PATH, "w") as test_file: 211 for line in emojis_list: 212 test_file.write("%s\n" % line) 213 214class _EmojiData(object): 215 """Holds the information about a single emoji.""" 216 217 def __init__(self, codepoints, is_emoji_style): 218 self.codepoints = codepoints 219 self.emoji_style = is_emoji_style 220 self.emoji_id = 0 221 self.width = 0 222 self.height = 0 223 self.sdk_added = SDK_VERSION 224 self.compat_added = METADATA_VERSION 225 226 def update_metrics(self, metrics): 227 """Updates width/height instance variables with the values given in metrics dictionary. 228 :param metrics: a dictionary object that has width and height values. 229 """ 230 self.width = metrics.width 231 self.height = metrics.height 232 233 def __repr__(self): 234 return '<EmojiData {0} - {1}>'.format(self.emoji_style, 235 codepoint_to_string(self.codepoints)) 236 237 def create_json_element(self): 238 """Creates the json representation of EmojiData.""" 239 json_element = {} 240 json_element['id'] = self.emoji_id 241 json_element['emojiStyle'] = self.emoji_style 242 json_element['sdkAdded'] = self.sdk_added 243 json_element['compatAdded'] = self.compat_added 244 json_element['width'] = self.width 245 json_element['height'] = self.height 246 json_element['codepoints'] = self.codepoints 247 return json_element 248 249 def create_txt_row(self): 250 """Creates array of values for CSV of EmojiData.""" 251 row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added] 252 row += [to_hex_str(x) for x in self.codepoints] 253 return row 254 255 def update(self, emoji_id, sdk_added, compat_added): 256 """Updates current EmojiData with the values in a json element""" 257 self.emoji_id = emoji_id 258 self.sdk_added = sdk_added 259 self.compat_added = compat_added 260 261 262def read_emoji_lines(file_path, optional=False): 263 """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty 264 lines and comments 265 :param file_path: unicode emoji file path 266 :param optional: if True no exception is raised when the file cannot be read 267 :return: list of uppercase strings 268 """ 269 result = [] 270 try: 271 with open(file_path) as file_stream: 272 for line in file_stream: 273 line = line.strip() 274 if line and not line.startswith('#'): 275 result.append(line.upper()) 276 except IOError: 277 if optional: 278 pass 279 else: 280 raise 281 282 return result 283 284def get_emoji_style_exceptions(unicode_path): 285 """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers""" 286 lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE)) 287 exceptions = [] 288 for line in lines: 289 codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0]) 290 exceptions.append(codepoint) 291 return exceptions 292 293def codepoints_for_emojirange(codepoints_range): 294 """ Return codepoints given in emoji files. Expand the codepoints that are given as a range 295 such as XYZ ... UVT 296 """ 297 codepoints = [] 298 if '..' in codepoints_range: 299 range_start, range_end = codepoints_range.split('..') 300 codepoints_range = range(hex_str_to_int(range_start), 301 hex_str_to_int(range_end) + 1) 302 codepoints.extend(codepoints_range) 303 else: 304 codepoints.append(hex_str_to_int(codepoints_range)) 305 return codepoints 306 307def codepoints_and_emoji_prop(line): 308 """For a given emoji file line, return codepoints and emoji property in the line. 309 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component 310 |Extended_Pictographic] # [...]""" 311 line = line.strip() 312 if '#' in line: 313 line = line[:line.index('#')] 314 else: 315 raise ValueError("Line is expected to have # in it") 316 line = line.split(';') 317 codepoints_range = line[0].strip() 318 emoji_property = line[1].strip() 319 320 return codepoints_range, emoji_property 321 322def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions): 323 """Read unicode lines of unicode emoji file in which each line describes a set of codepoint 324 intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map. 325 A line format that is expected is as follows: 326 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component 327 |Extended_Pictographic] # [...]""" 328 lines = read_emoji_lines(file_path) 329 330 for line in lines: 331 codepoints_range, emoji_property = codepoints_and_emoji_prop(line) 332 if not emoji_property in ACCEPTED_EMOJI_PROPERTIES: 333 continue 334 is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR 335 codepoints = codepoints_for_emojirange(codepoints_range) 336 337 for codepoint in codepoints: 338 key = codepoint_to_string([codepoint]) 339 codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions 340 if key in emoji_data_map: 341 # since there are multiple definitions of emojis, only update when emoji style is 342 # True 343 if codepoint_is_emoji_style: 344 emoji_data_map[key].emoji_style = True 345 else: 346 emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style) 347 emoji_data_map[key] = emoji_data 348 349 350def read_emoji_sequences(emoji_data_map, file_path, optional=False): 351 """Reads the content of the file which contains emoji sequences. Creates EmojiData for each 352 line and puts into emoji_data_map.""" 353 lines = read_emoji_lines(file_path, optional) 354 # 1F1E6 1F1E8 ; Name ; [...] 355 for line in lines: 356 # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them 357 # here since we are already checking the emoji presentations with 358 # emoji-variation-sequences.txt. 359 if "BASIC_EMOJI" in line: 360 continue 361 codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')] 362 codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS] 363 key = codepoint_to_string(codepoints) 364 if not key in emoji_data_map: 365 emoji_data = _EmojiData(codepoints, False) 366 emoji_data_map[key] = emoji_data 367 368 369def load_emoji_data_map(unicode_path): 370 """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData. 371 :return: map of space separated codepoints to EmojiData 372 """ 373 emoji_data_map = {} 374 emoji_style_exceptions = get_emoji_style_exceptions(unicode_path) 375 read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE), 376 emoji_style_exceptions) 377 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE)) 378 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE)) 379 380 # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists. 381 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), 382 optional=True) 383 # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists. 384 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), 385 optional=True) 386 387 return emoji_data_map 388 389 390def load_previous_metadata(emoji_data_map): 391 """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields 392 in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest 393 emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not 394 exist, or contains no emojis defined returns DEFAULT_EMOJI_ID""" 395 current_emoji_id = DEFAULT_EMOJI_ID 396 if os.path.isfile(INPUT_META_FILE): 397 with open(INPUT_META_FILE) as csvfile: 398 reader = csv.reader(csvfile, delimiter=' ') 399 for row in reader: 400 if row[0].startswith('#'): 401 continue 402 emoji_id = hex_str_to_int(row[0]) 403 sdk_added = int(row[1]) 404 compat_added = int(row[2]) 405 key = codepoint_to_string(hex_str_to_int(x) for x in row[3:]) 406 if key in emoji_data_map: 407 emoji_data = emoji_data_map[key] 408 emoji_data.update(emoji_id, sdk_added, compat_added) 409 if emoji_data.emoji_id >= current_emoji_id: 410 current_emoji_id = emoji_data.emoji_id + 1 411 412 return current_emoji_id 413 414 415def update_ttlib_orig_sort(): 416 """Updates the ttLib tag sort with a closure that makes the meta table first.""" 417 orig_sort = ttLib.sortedTagList 418 419 def meta_first_table_sort(tag_list, table_order=None): 420 """Sorts the tables with the original ttLib sort, then makes the meta table first.""" 421 tag_list = orig_sort(tag_list, table_order) 422 tag_list.remove('meta') 423 tag_list.insert(0, 'meta') 424 return tag_list 425 426 ttLib.sortedTagList = meta_first_table_sort 427 428 429def inject_meta_into_font(ttf, flatbuffer_bin_filename): 430 """inject metadata binary into font""" 431 if not 'meta' in ttf: 432 ttf['meta'] = ttLib.getTableClass('meta')() 433 meta = ttf['meta'] 434 with open(flatbuffer_bin_filename, 'rb') as flatbuffer_bin_file: 435 meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read() 436 437 # sort meta tables for faster access 438 update_ttlib_orig_sort() 439 440 441def validate_input_files(font_path, unicode_path, flatbuffer_path): 442 """Validate the existence of font file and the unicode files""" 443 if not os.path.isfile(font_path): 444 raise ValueError("Font file does not exist: " + font_path) 445 446 if not os.path.isdir(unicode_path): 447 raise ValueError( 448 "Unicode directory does not exist or is not a directory " + unicode_path) 449 450 emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE), 451 os.path.join(unicode_path, EMOJI_ZWJ_FILE), 452 os.path.join(unicode_path, EMOJI_SEQ_FILE)] 453 for emoji_filename in emoji_filenames: 454 if not os.path.isfile(emoji_filename): 455 raise ValueError("Unicode emoji data file does not exist: " + emoji_filename) 456 457 if not os.path.isdir(flatbuffer_path): 458 raise ValueError( 459 "Flatbuffer directory does not exist or is not a directory " + flatbuffer_path) 460 461 flatbuffer_filenames = [os.path.join(flatbuffer_path, FLATBUFFER_SCHEMA), 462 os.path.join(flatbuffer_path, FLATBUFFER_HEADER)] 463 for flatbuffer_filename in flatbuffer_filenames: 464 if not os.path.isfile(flatbuffer_filename): 465 raise ValueError("Flatbuffer file does not exist: " + flatbuffer_filename) 466 467 468def add_file_to_sha(sha_algo, file_path): 469 with open(file_path, 'rb') as input_file: 470 for data in iter(lambda: input_file.read(8192), b''): 471 sha_algo.update(data) 472 473def create_sha_from_source_files(font_paths): 474 """Creates a SHA from the given font files""" 475 sha_algo = hashlib.sha256() 476 for file_path in font_paths: 477 add_file_to_sha(sha_algo, file_path) 478 return sha_algo.hexdigest() 479 480 481class EmojiFontCreator(object): 482 """Creates the EmojiCompat font""" 483 484 def __init__(self, font_path, unicode_path): 485 validate_input_files(font_path, unicode_path, FLATBUFFER_MODULE_DIR) 486 487 self.font_path = font_path 488 self.unicode_path = unicode_path 489 self.emoji_data_map = {} 490 self.remapped_codepoints = {} 491 self.glyph_to_image_metrics_map = {} 492 # set default emoji id to start of Supplemental Private Use Area-A 493 self.emoji_id = DEFAULT_EMOJI_ID 494 495 def update_emoji_data(self, codepoints, glyph_name): 496 """Updates the existing EmojiData identified with codepoints. The fields that are set are: 497 - emoji_id (if it does not exist) 498 - image width/height""" 499 key = codepoint_to_string(codepoints) 500 if key in self.emoji_data_map: 501 # add emoji to final data 502 emoji_data = self.emoji_data_map[key] 503 emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name]) 504 if emoji_data.emoji_id == 0: 505 emoji_data.emoji_id = self.emoji_id 506 self.emoji_id = self.emoji_id + 1 507 self.remapped_codepoints[emoji_data.emoji_id] = glyph_name 508 509 def read_cbdt(self, ttf): 510 """Read image size data from CBDT.""" 511 cbdt = ttf['CBDT'] 512 for strike_data in cbdt.strikeData: 513 for key, data in strike_data.items(): 514 data.decompile() 515 self.glyph_to_image_metrics_map[key] = data.metrics 516 517 def read_cmap12(self, ttf, glyph_to_codepoint_map): 518 """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and 519 finally clears all elements in CMAP 12""" 520 cmap = ttf['cmap'] 521 for table in cmap.tables: 522 if table.format == 12 and table.platformID == 3 and table.platEncID == 10: 523 for codepoint, glyph_name in table.cmap.items(): 524 glyph_to_codepoint_map[glyph_name] = codepoint 525 self.update_emoji_data([codepoint], glyph_name) 526 return table 527 raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10") 528 529 def read_gsub(self, ttf, glyph_to_codepoint_map): 530 """Reads the emoji sequences defined in GSUB and clear all elements under GSUB""" 531 gsub = ttf['GSUB'] 532 ligature_subtables = [] 533 context_subtables = [] 534 # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat 535 # and would be expensive with little value 536 for lookup in gsub.table.LookupList.Lookup: 537 for subtable in lookup.SubTable: 538 if subtable.LookupType == 5: 539 context_subtables.append(subtable) 540 elif subtable.LookupType == 4: 541 ligature_subtables.append(subtable) 542 543 for subtable in context_subtables: 544 self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map) 545 546 for subtable in ligature_subtables: 547 self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map) 548 549 def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map): 550 """Add substitutions defined as OpenType Context Substitution""" 551 for sub_class_set in subtable.SubClassSet: 552 if sub_class_set: 553 for sub_class_rule in sub_class_set.SubClassRule: 554 # prepare holder for substitution list. each rule will have a list that is added 555 # to the subs_list. 556 subs_list = len(sub_class_rule.SubstLookupRecord) * [None] 557 for record in sub_class_rule.SubstLookupRecord: 558 subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list, 559 record.LookupListIndex) 560 # create combinations or all lists. the combinations will be filtered by 561 # emoji_data_map. the first element that contain as a valid glyph will be used 562 # as the final glyph 563 combinations = list(itertools.product(*subs_list)) 564 for seq in combinations: 565 glyph_names = [x["input"] for x in seq] 566 codepoints = [glyph_to_codepoint_map[x] for x in glyph_names] 567 outputs = [x["output"] for x in seq if x["output"]] 568 nonempty_outputs = list(filter(lambda x: x.strip() , outputs)) 569 if len(nonempty_outputs) == 0: 570 print("Warning: no output glyph is set for " + str(glyph_names)) 571 continue 572 elif len(nonempty_outputs) > 1: 573 print( 574 "Warning: multiple glyph is set for " 575 + str(glyph_names) + ", will use the first one") 576 577 glyph = nonempty_outputs[0] 578 self.update_emoji_data(codepoints, glyph) 579 580 def get_substitutions(self, lookup_list, index): 581 result = [] 582 for x in lookup_list.Lookup[index].SubTable: 583 for input, output in x.mapping.items(): 584 result.append({"input": input, "output": output}) 585 return result 586 587 def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map): 588 for name, ligatures in subtable.ligatures.items(): 589 for ligature in ligatures: 590 glyph_names = [name] + ligature.Component 591 codepoints = [glyph_to_codepoint_map[x] for x in glyph_names] 592 self.update_emoji_data(codepoints, ligature.LigGlyph) 593 594 def write_metadata_json(self, output_json_file_path): 595 """Writes the emojis into a json file""" 596 output_json = {} 597 output_json['version'] = METADATA_VERSION 598 output_json['sourceSha'] = create_sha_from_source_files( 599 [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA]) 600 output_json['list'] = [] 601 602 emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id) 603 604 total_emoji_count = 0 605 for emoji_data in emoji_data_list: 606 element = emoji_data.create_json_element() 607 output_json['list'].append(element) 608 total_emoji_count = total_emoji_count + 1 609 610 # write the new json file to be processed by FlatBuffers 611 with open(output_json_file_path, 'w') as json_file: 612 print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')), 613 file=json_file) 614 615 return total_emoji_count 616 617 def write_metadata_csv(self): 618 """Writes emoji metadata into space separated file""" 619 with open(OUTPUT_META_FILE, 'w') as csvfile: 620 csvwriter = csv.writer(csvfile, delimiter=' ') 621 emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id) 622 csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints']) 623 for emoji_data in emoji_data_list: 624 csvwriter.writerow(emoji_data.create_txt_row()) 625 626 def add_watermark(self, ttf): 627 cmap = ttf.getBestCmap() 628 gsub = ttf['GSUB'].table 629 630 # Obtain Version string 631 m = re.search('^Version (\d*)\.(\d*)', font_data.font_version(ttf)) 632 if not m: 633 raise ValueError('The font does not have proper version string.') 634 major = m.group(1) 635 minor = m.group(2) 636 # Replace the dot with space since NotoColorEmoji does not have glyph for dot. 637 glyphs = [cmap[ord(x)] for x in '%s %s' % (major, minor)] 638 639 # Update Glyph metrics 640 ttf.getGlyphOrder().append(WATERMARK_NEW_GLYPH_ID) 641 refGlyphId = cmap[WATERMARK_REF_CODE_POINT] 642 ttf['hmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['hmtx'].metrics[refGlyphId] 643 ttf['vmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['vmtx'].metrics[refGlyphId] 644 645 # Add new Glyph to cmap 646 font_data.add_to_cmap(ttf, { WATERMARK_NEW_CODE_POINT : WATERMARK_NEW_GLYPH_ID }) 647 648 # Add lookup table for the version string. 649 lookups = gsub.LookupList.Lookup 650 new_lookup = otTables.Lookup() 651 new_lookup.LookupType = 2 # Multiple Substitution Subtable. 652 new_lookup.LookupFlag = 0 653 new_subtable = otTables.MultipleSubst() 654 new_subtable.mapping = { WATERMARK_NEW_GLYPH_ID : tuple(glyphs) } 655 new_lookup.SubTable = [ new_subtable ] 656 new_lookup_index = len(lookups) 657 lookups.append(new_lookup) 658 659 # Add feature 660 feature = next(x for x in gsub.FeatureList.FeatureRecord if x.FeatureTag == 'ccmp') 661 if not feature: 662 raise ValueError("Font doesn't contain ccmp feature.") 663 664 feature.Feature.LookupListIndex.append(new_lookup_index) 665 666 def create_font(self): 667 """Creates the EmojiCompat font. 668 :param font_path: path to Android NotoColorEmoji font 669 :param unicode_path: path to directory that contains unicode files 670 """ 671 672 tmp_dir = tempfile.mkdtemp() 673 674 # create emoji codepoints to EmojiData map 675 self.emoji_data_map = load_emoji_data_map(self.unicode_path) 676 677 # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is 678 # returned is either default or 1 greater than the largest id in previous data 679 self.emoji_id = load_previous_metadata(self.emoji_data_map) 680 681 # recalcTimestamp parameter will keep the modified field same as the original font. Changing 682 # the modified field in the font causes the font ttf file to change, which makes it harder 683 # to understand if something really changed in the font. 684 with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf: 685 # read image size data 686 self.read_cbdt(ttf) 687 688 # glyph name to codepoint map 689 glyph_to_codepoint_map = {} 690 691 # read single codepoint emojis under cmap12 and clear the table contents 692 cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map) 693 694 # read emoji sequences gsub and clear the table contents 695 self.read_gsub(ttf, glyph_to_codepoint_map) 696 697 # add all new codepoint to glyph mappings 698 cmap12_table.cmap.update(self.remapped_codepoints) 699 700 # final metadata csv will be used to generate the sha, therefore write it before 701 # metadata json is written. 702 self.write_metadata_csv() 703 704 output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME) 705 flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN) 706 flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH) 707 708 total_emoji_count = self.write_metadata_json(output_json_file) 709 710 # create the flatbuffers binary and java classes 711 flatc_command = ['flatc', 712 '-o', 713 tmp_dir, 714 '-b', 715 '-j', 716 FLATBUFFER_SCHEMA, 717 output_json_file] 718 subprocess.check_output(flatc_command) 719 720 # inject metadata binary into font 721 inject_meta_into_font(ttf, flatbuffer_bin_file) 722 723 # add wartermark glyph for manual verification. 724 self.add_watermark(ttf) 725 726 # update CBDT and CBLC versions since older android versions cannot read > 2.0 727 ttf['CBDT'].version = 2.0 728 ttf['CBLC'].version = 2.0 729 730 # save the new font 731 ttf.save(FONT_PATH) 732 733 update_flatbuffer_java_files(flatbuffer_java_dir, #tmp dir 734 FLATBUFFER_HEADER, 735 FLATBUFFER_JAVA_TARGET) 736 737 create_test_data(self.unicode_path) 738 739 # clear the tmp output directory 740 shutil.rmtree(tmp_dir, ignore_errors=True) 741 742 print( 743 "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR)) 744 745 746def print_usage(): 747 """Prints how to use the script.""" 748 print("Please specify a path to font and unicode files.\n" 749 "usage: createfont.py noto-color-emoji-path unicode-dir-path") 750 751def parse_args(argv): 752 # parse manually to avoid any extra dependencies 753 if len(argv) < 3: 754 print_usage() 755 sys.exit(1) 756 return (sys.argv[1], sys.argv[2]) 757 758def main(): 759 font_file, unicode_dir = parse_args(sys.argv) 760 EmojiFontCreator(font_file, unicode_dir).create_font() 761 762 763if __name__ == '__main__': 764 main() 765