• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format
19under a meta tag with name 'Emji'.
20
21In order to create the final font the followings are used as inputs:
22
23- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at
24external/noto-fonts/emoji/NotoColorEmoji.ttf
25
26- Unicode files: Unicode files that are in the framework, and lists information about all the
27emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt,
28and emoji-variation-sequences.txt. Currently at external/unicode/.
29
30- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are
31in the Android font. Resides in framework and currently under external/unicode/.
32
33- data/emoji_metadata.txt: The file that includes the id, codepoints, the first
34Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font
35version that the emoji was added (compatAdded). Updated when the script is executed.
36
37- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/.
38
39After execution the following files are generated if they don't exist otherwise, they are updated:
40- font/NotoColorEmojiCompat.ttf
41- supported-emojis/emojis.txt
42- data/emoji_metadata.txt
43- src/java/android/support/text/emoji/flatbuffer/*
44"""
45
46import contextlib
47import csv
48import hashlib
49import itertools
50import json
51import os
52import re
53import shutil
54import subprocess
55import sys
56import tempfile
57from fontTools import ttLib
58from fontTools.ttLib.tables import otTables
59from nototools import font_data
60
61########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ###########
62# Last Android SDK Version
63SDK_VERSION = 30
64# metadata version that will be embedded into font. If there are updates to the font that would
65# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number
66# defines in which EmojiCompat metadata version the emoji is added to the font.
67METADATA_VERSION = 7
68
69####### main directories where output files are created #######
70SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
71FONT_DIR = os.path.join(SCRIPT_DIR, 'font')
72DATA_DIR = os.path.join(SCRIPT_DIR, 'data')
73SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis')
74JAVA_SRC_DIR = os.path.join('src', 'java')
75####### output files #######
76# font file
77FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf')
78# emoji metadata json output file
79OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt')
80# emojis test file
81TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt')
82####### input files #######
83# Unicode file names to read emoji data
84EMOJI_DATA_FILE = 'emoji-data.txt'
85EMOJI_SEQ_FILE = 'emoji-sequences.txt'
86EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt'
87EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt'
88# Android OS emoji file for emojis that are not in Unicode files
89ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt')
90ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt')
91# Android OS emoji style override file. Codepoints that are rendered with emoji style by default
92# even though not defined so in <code>emoji-data.txt</code>.
93EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt')
94# emoji metadata file
95INPUT_META_FILE = OUTPUT_META_FILE
96# default flatbuffer module location (if not specified by caller)
97FLATBUFFER_MODULE_DIR = os.path.join(SCRIPT_DIR, '..', 'emoji-compat-flatbuffers')
98# flatbuffer schema
99FLATBUFFER_SCHEMA = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'emoji_metadata.fbs')
100# file path for java header, it will be prepended to flatbuffer java files
101FLATBUFFER_HEADER = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'flatbuffer_header.txt')
102# temporary emoji metadata json output file
103OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json'
104# temporary binary file generated by flatbuffer
105FLATBUFFER_BIN = 'emoji_metadata.bin'
106# directory representation for flatbuffer java package
107FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '')
108# temporary directory that contains flatbuffer java files
109FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH)
110FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java"
111FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java"
112# directory under source where flatbuffer java files will be copied into
113FLATBUFFER_JAVA_TARGET = os.path.join(FLATBUFFER_MODULE_DIR, JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH)
114# meta tag name used in the font to embed the emoji metadata. This value is also used in
115# MetadataListReader.java in order to locate the metadata location.
116EMOJI_META_TAG_NAME = 'Emji'
117
118EMOJI_STR = 'EMOJI'
119EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION'
120ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR]
121STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE'
122
123DEFAULT_EMOJI_ID = 0xF0001
124EMOJI_STYLE_VS = 0xFE0F
125
126# The reference code point to be used for filling metrics of wartermark glyph
127WATERMARK_REF_CODE_POINT = 0x1F600
128# The code point and glyph name used for watermark.
129WATERMARK_NEW_CODE_POINT = 0x10FF00
130WATERMARK_NEW_GLYPH_ID = 'u10FF00'
131
132def to_hex_str(value):
133    """Converts given int value to hex without the 0x prefix"""
134    return format(value, 'X')
135
136def hex_str_to_int(string):
137    """Convert a hex string into int"""
138    return int(string, 16)
139
140def codepoint_to_string(codepoints):
141    """Converts a list of codepoints into a string separated with space."""
142    return ' '.join([to_hex_str(x) for x in codepoints])
143
144def prepend_header_to_file(file_path, header_path):
145    """Prepends the header to the file. Used to update flatbuffer java files with header, comments
146    and annotations."""
147    with open(file_path, "r+") as original_file:
148        with open(header_path, "r") as copyright_file:
149            original_content = original_file.read()
150            original_file.seek(0)
151            original_file.write(copyright_file.read() + "\n" + original_content)
152
153
154def update_flatbuffer_java_files(flatbuffer_java_dir, header_dir, target_dir):
155    """Prepends headers to flatbuffer java files and copies to the final destination"""
156    tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA
157    tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA
158    prepend_header_to_file(tmp_metadata_list, header_dir)
159    prepend_header_to_file(tmp_metadata_item, header_dir)
160
161    if not os.path.exists(target_dir):
162        os.makedirs(target_dir)
163
164    shutil.copy(tmp_metadata_list, os.path.join(target_dir, FLATBUFFER_METADATA_LIST_JAVA))
165    shutil.copy(tmp_metadata_item, os.path.join(target_dir, FLATBUFFER_METADATA_ITEM_JAVA))
166
167def create_test_data(unicode_path):
168    """Read all the emojis in the unicode files and update the test file"""
169    lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE))
170    lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE))
171
172    lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True)
173    lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True)
174
175    # standardized variants contains a huge list of sequences, only read the ones that are emojis
176    # and also the ones with FE0F (emoji style)
177    standardized_variants_lines = read_emoji_lines(
178        os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE))
179    for line in standardized_variants_lines:
180        if STD_VARIANTS_EMOJI_STYLE in line:
181            lines.append(line)
182
183    emojis_set = set()
184    for line in lines:
185        # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
186        # here since we are already checking the emoji presentations with
187        # emoji-variation-sequences.txt.
188        if "BASIC_EMOJI" in line:
189            continue
190        codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
191        emojis_set.add(codepoint_to_string(codepoints).upper())
192
193    emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE))
194    for line in emoji_data_lines:
195        codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
196        if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
197            continue
198        is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
199        if is_emoji_style:
200            codepoints = [to_hex_str(x) for x in
201                          codepoints_for_emojirange(codepoints_range)]
202            emojis_set.update(codepoints)
203
204    emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
205    #  finally add the android default emoji exceptions
206    emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions])
207
208    emojis_list = list(emojis_set)
209    emojis_list.sort()
210    with open(TEST_DATA_PATH, "w") as test_file:
211        for line in emojis_list:
212            test_file.write("%s\n" % line)
213
214class _EmojiData(object):
215    """Holds the information about a single emoji."""
216
217    def __init__(self, codepoints, is_emoji_style):
218        self.codepoints = codepoints
219        self.emoji_style = is_emoji_style
220        self.emoji_id = 0
221        self.width = 0
222        self.height = 0
223        self.sdk_added = SDK_VERSION
224        self.compat_added = METADATA_VERSION
225
226    def update_metrics(self, metrics):
227        """Updates width/height instance variables with the values given in metrics dictionary.
228        :param metrics: a dictionary object that has width and height values.
229        """
230        self.width = metrics.width
231        self.height = metrics.height
232
233    def __repr__(self):
234        return '<EmojiData {0} - {1}>'.format(self.emoji_style,
235                                              codepoint_to_string(self.codepoints))
236
237    def create_json_element(self):
238        """Creates the json representation of EmojiData."""
239        json_element = {}
240        json_element['id'] = self.emoji_id
241        json_element['emojiStyle'] = self.emoji_style
242        json_element['sdkAdded'] = self.sdk_added
243        json_element['compatAdded'] = self.compat_added
244        json_element['width'] = self.width
245        json_element['height'] = self.height
246        json_element['codepoints'] = self.codepoints
247        return json_element
248
249    def create_txt_row(self):
250        """Creates array of values for CSV of EmojiData."""
251        row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added]
252        row += [to_hex_str(x) for x in self.codepoints]
253        return row
254
255    def update(self, emoji_id, sdk_added, compat_added):
256        """Updates current EmojiData with the values in a json element"""
257        self.emoji_id = emoji_id
258        self.sdk_added = sdk_added
259        self.compat_added = compat_added
260
261
262def read_emoji_lines(file_path, optional=False):
263    """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty
264    lines and comments
265    :param file_path: unicode emoji file path
266    :param optional: if True no exception is raised when the file cannot be read
267    :return: list of uppercase strings
268    """
269    result = []
270    try:
271        with open(file_path) as file_stream:
272            for line in file_stream:
273                line = line.strip()
274                if line and not line.startswith('#'):
275                    result.append(line.upper())
276    except IOError:
277        if optional:
278            pass
279        else:
280            raise
281
282    return result
283
284def get_emoji_style_exceptions(unicode_path):
285    """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers"""
286    lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE))
287    exceptions = []
288    for line in lines:
289        codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0])
290        exceptions.append(codepoint)
291    return exceptions
292
293def codepoints_for_emojirange(codepoints_range):
294    """ Return codepoints given in emoji files. Expand the codepoints that are given as a range
295    such as XYZ ... UVT
296    """
297    codepoints = []
298    if '..' in codepoints_range:
299        range_start, range_end = codepoints_range.split('..')
300        codepoints_range = range(hex_str_to_int(range_start),
301                                 hex_str_to_int(range_end) + 1)
302        codepoints.extend(codepoints_range)
303    else:
304        codepoints.append(hex_str_to_int(codepoints_range))
305    return codepoints
306
307def codepoints_and_emoji_prop(line):
308    """For a given emoji file line, return codepoints and emoji property in the line.
309    1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
310    |Extended_Pictographic] # [...]"""
311    line = line.strip()
312    if '#' in line:
313        line = line[:line.index('#')]
314    else:
315        raise ValueError("Line is expected to have # in it")
316    line = line.split(';')
317    codepoints_range = line[0].strip()
318    emoji_property = line[1].strip()
319
320    return codepoints_range, emoji_property
321
322def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions):
323    """Read unicode lines of unicode emoji file in which each line describes a set of codepoint
324    intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map.
325    A line format that is expected is as follows:
326    1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
327    |Extended_Pictographic] # [...]"""
328    lines = read_emoji_lines(file_path)
329
330    for line in lines:
331        codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
332        if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
333            continue
334        is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
335        codepoints = codepoints_for_emojirange(codepoints_range)
336
337        for codepoint in codepoints:
338            key = codepoint_to_string([codepoint])
339            codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions
340            if key in emoji_data_map:
341                # since there are multiple definitions of emojis, only update when emoji style is
342                # True
343                if codepoint_is_emoji_style:
344                    emoji_data_map[key].emoji_style = True
345            else:
346                emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style)
347                emoji_data_map[key] = emoji_data
348
349
350def read_emoji_sequences(emoji_data_map, file_path, optional=False):
351    """Reads the content of the file which contains emoji sequences. Creates EmojiData for each
352    line and puts into emoji_data_map."""
353    lines = read_emoji_lines(file_path, optional)
354    # 1F1E6 1F1E8 ; Name ; [...]
355    for line in lines:
356        # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
357        # here since we are already checking the emoji presentations with
358        # emoji-variation-sequences.txt.
359        if "BASIC_EMOJI" in line:
360            continue
361        codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
362        codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS]
363        key = codepoint_to_string(codepoints)
364        if not key in emoji_data_map:
365            emoji_data = _EmojiData(codepoints, False)
366            emoji_data_map[key] = emoji_data
367
368
369def load_emoji_data_map(unicode_path):
370    """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData.
371    :return: map of space separated codepoints to EmojiData
372    """
373    emoji_data_map = {}
374    emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
375    read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE),
376                         emoji_style_exceptions)
377    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE))
378    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE))
379
380    # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists.
381    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE),
382                         optional=True)
383    # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists.
384    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE),
385                         optional=True)
386
387    return emoji_data_map
388
389
390def load_previous_metadata(emoji_data_map):
391    """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields
392       in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest
393       emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not
394       exist, or contains no emojis defined returns DEFAULT_EMOJI_ID"""
395    current_emoji_id = DEFAULT_EMOJI_ID
396    if os.path.isfile(INPUT_META_FILE):
397        with open(INPUT_META_FILE) as csvfile:
398            reader = csv.reader(csvfile, delimiter=' ')
399            for row in reader:
400                if row[0].startswith('#'):
401                    continue
402                emoji_id = hex_str_to_int(row[0])
403                sdk_added = int(row[1])
404                compat_added = int(row[2])
405                key = codepoint_to_string(hex_str_to_int(x) for x in row[3:])
406                if key in emoji_data_map:
407                    emoji_data = emoji_data_map[key]
408                    emoji_data.update(emoji_id, sdk_added, compat_added)
409                    if emoji_data.emoji_id >= current_emoji_id:
410                        current_emoji_id = emoji_data.emoji_id + 1
411
412    return current_emoji_id
413
414
415def update_ttlib_orig_sort():
416    """Updates the ttLib tag sort with a closure that makes the meta table first."""
417    orig_sort = ttLib.sortedTagList
418
419    def meta_first_table_sort(tag_list, table_order=None):
420        """Sorts the tables with the original ttLib sort, then makes the meta table first."""
421        tag_list = orig_sort(tag_list, table_order)
422        tag_list.remove('meta')
423        tag_list.insert(0, 'meta')
424        return tag_list
425
426    ttLib.sortedTagList = meta_first_table_sort
427
428
429def inject_meta_into_font(ttf, flatbuffer_bin_filename):
430    """inject metadata binary into font"""
431    if not 'meta' in ttf:
432        ttf['meta'] = ttLib.getTableClass('meta')()
433    meta = ttf['meta']
434    with open(flatbuffer_bin_filename, 'rb') as flatbuffer_bin_file:
435        meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read()
436
437    # sort meta tables for faster access
438    update_ttlib_orig_sort()
439
440
441def validate_input_files(font_path, unicode_path, flatbuffer_path):
442    """Validate the existence of font file and the unicode files"""
443    if not os.path.isfile(font_path):
444        raise ValueError("Font file does not exist: " + font_path)
445
446    if not os.path.isdir(unicode_path):
447        raise ValueError(
448            "Unicode directory does not exist or is not a directory " + unicode_path)
449
450    emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE),
451                       os.path.join(unicode_path, EMOJI_ZWJ_FILE),
452                       os.path.join(unicode_path, EMOJI_SEQ_FILE)]
453    for emoji_filename in emoji_filenames:
454        if not os.path.isfile(emoji_filename):
455            raise ValueError("Unicode emoji data file does not exist: " + emoji_filename)
456
457    if not os.path.isdir(flatbuffer_path):
458        raise ValueError(
459            "Flatbuffer directory does not exist or is not a directory " + flatbuffer_path)
460
461    flatbuffer_filenames = [os.path.join(flatbuffer_path, FLATBUFFER_SCHEMA),
462                            os.path.join(flatbuffer_path, FLATBUFFER_HEADER)]
463    for flatbuffer_filename in flatbuffer_filenames:
464        if not os.path.isfile(flatbuffer_filename):
465            raise ValueError("Flatbuffer file does not exist: " + flatbuffer_filename)
466
467
468def add_file_to_sha(sha_algo, file_path):
469    with open(file_path, 'rb') as input_file:
470        for data in iter(lambda: input_file.read(8192), b''):
471            sha_algo.update(data)
472
473def create_sha_from_source_files(font_paths):
474    """Creates a SHA from the given font files"""
475    sha_algo = hashlib.sha256()
476    for file_path in font_paths:
477        add_file_to_sha(sha_algo, file_path)
478    return sha_algo.hexdigest()
479
480
481class EmojiFontCreator(object):
482    """Creates the EmojiCompat font"""
483
484    def __init__(self, font_path, unicode_path):
485        validate_input_files(font_path, unicode_path, FLATBUFFER_MODULE_DIR)
486
487        self.font_path = font_path
488        self.unicode_path = unicode_path
489        self.emoji_data_map = {}
490        self.remapped_codepoints = {}
491        self.glyph_to_image_metrics_map = {}
492        # set default emoji id to start of Supplemental Private Use Area-A
493        self.emoji_id = DEFAULT_EMOJI_ID
494
495    def update_emoji_data(self, codepoints, glyph_name):
496        """Updates the existing EmojiData identified with codepoints. The fields that are set are:
497        - emoji_id (if it does not exist)
498        - image width/height"""
499        key = codepoint_to_string(codepoints)
500        if key in self.emoji_data_map:
501            # add emoji to final data
502            emoji_data = self.emoji_data_map[key]
503            emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name])
504            if emoji_data.emoji_id == 0:
505                emoji_data.emoji_id = self.emoji_id
506                self.emoji_id = self.emoji_id + 1
507            self.remapped_codepoints[emoji_data.emoji_id] = glyph_name
508
509    def read_cbdt(self, ttf):
510        """Read image size data from CBDT."""
511        cbdt = ttf['CBDT']
512        for strike_data in cbdt.strikeData:
513            for key, data in strike_data.items():
514                data.decompile()
515                self.glyph_to_image_metrics_map[key] = data.metrics
516
517    def read_cmap12(self, ttf, glyph_to_codepoint_map):
518        """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and
519        finally clears all elements in CMAP 12"""
520        cmap = ttf['cmap']
521        for table in cmap.tables:
522            if table.format == 12 and table.platformID == 3 and table.platEncID == 10:
523                for codepoint, glyph_name in table.cmap.items():
524                    glyph_to_codepoint_map[glyph_name] = codepoint
525                    self.update_emoji_data([codepoint], glyph_name)
526                return table
527        raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10")
528
529    def read_gsub(self, ttf, glyph_to_codepoint_map):
530        """Reads the emoji sequences defined in GSUB and clear all elements under GSUB"""
531        gsub = ttf['GSUB']
532        ligature_subtables = []
533        context_subtables = []
534        # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat
535        # and would be expensive with little value
536        for lookup in gsub.table.LookupList.Lookup:
537            for subtable in lookup.SubTable:
538                if subtable.LookupType == 5:
539                    context_subtables.append(subtable)
540                elif subtable.LookupType == 4:
541                    ligature_subtables.append(subtable)
542
543        for subtable in context_subtables:
544            self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map)
545
546        for subtable in ligature_subtables:
547            self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map)
548
549    def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map):
550        """Add substitutions defined as OpenType Context Substitution"""
551        for sub_class_set in subtable.SubClassSet:
552            if sub_class_set:
553                for sub_class_rule in sub_class_set.SubClassRule:
554                    # prepare holder for substitution list. each rule will have a list that is added
555                    # to the subs_list.
556                    subs_list = len(sub_class_rule.SubstLookupRecord) * [None]
557                    for record in sub_class_rule.SubstLookupRecord:
558                        subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list,
559                                                                            record.LookupListIndex)
560                    # create combinations or all lists. the combinations will be filtered by
561                    # emoji_data_map. the first element that contain as a valid glyph will be used
562                    # as the final glyph
563                    combinations = list(itertools.product(*subs_list))
564                    for seq in combinations:
565                        glyph_names = [x["input"] for x in seq]
566                        codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
567                        outputs = [x["output"] for x in seq if x["output"]]
568                        nonempty_outputs = list(filter(lambda x: x.strip() , outputs))
569                        if len(nonempty_outputs) == 0:
570                            print("Warning: no output glyph is set for " + str(glyph_names))
571                            continue
572                        elif len(nonempty_outputs) > 1:
573                            print(
574                                "Warning: multiple glyph is set for "
575                                    + str(glyph_names) + ", will use the first one")
576
577                        glyph = nonempty_outputs[0]
578                        self.update_emoji_data(codepoints, glyph)
579
580    def get_substitutions(self, lookup_list, index):
581        result = []
582        for x in lookup_list.Lookup[index].SubTable:
583            for input, output in x.mapping.items():
584                result.append({"input": input, "output": output})
585        return result
586
587    def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map):
588        for name, ligatures in subtable.ligatures.items():
589            for ligature in ligatures:
590                glyph_names = [name] + ligature.Component
591                codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
592                self.update_emoji_data(codepoints, ligature.LigGlyph)
593
594    def write_metadata_json(self, output_json_file_path):
595        """Writes the emojis into a json file"""
596        output_json = {}
597        output_json['version'] = METADATA_VERSION
598        output_json['sourceSha'] = create_sha_from_source_files(
599            [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA])
600        output_json['list'] = []
601
602        emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
603
604        total_emoji_count = 0
605        for emoji_data in emoji_data_list:
606            element = emoji_data.create_json_element()
607            output_json['list'].append(element)
608            total_emoji_count = total_emoji_count + 1
609
610        # write the new json file to be processed by FlatBuffers
611        with open(output_json_file_path, 'w') as json_file:
612            print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')),
613                  file=json_file)
614
615        return total_emoji_count
616
617    def write_metadata_csv(self):
618        """Writes emoji metadata into space separated file"""
619        with open(OUTPUT_META_FILE, 'w') as csvfile:
620            csvwriter = csv.writer(csvfile, delimiter=' ')
621            emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
622            csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints'])
623            for emoji_data in emoji_data_list:
624                csvwriter.writerow(emoji_data.create_txt_row())
625
626    def add_watermark(self, ttf):
627        cmap = ttf.getBestCmap()
628        gsub = ttf['GSUB'].table
629
630        # Obtain Version string
631        m = re.search('^Version (\d*)\.(\d*)', font_data.font_version(ttf))
632        if not m:
633            raise ValueError('The font does not have proper version string.')
634        major = m.group(1)
635        minor = m.group(2)
636        # Replace the dot with space since NotoColorEmoji does not have glyph for dot.
637        glyphs = [cmap[ord(x)] for x in '%s %s' % (major, minor)]
638
639        # Update Glyph metrics
640        ttf.getGlyphOrder().append(WATERMARK_NEW_GLYPH_ID)
641        refGlyphId = cmap[WATERMARK_REF_CODE_POINT]
642        ttf['hmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['hmtx'].metrics[refGlyphId]
643        ttf['vmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['vmtx'].metrics[refGlyphId]
644
645        # Add new Glyph to cmap
646        font_data.add_to_cmap(ttf, { WATERMARK_NEW_CODE_POINT : WATERMARK_NEW_GLYPH_ID })
647
648        # Add lookup table for the version string.
649        lookups = gsub.LookupList.Lookup
650        new_lookup = otTables.Lookup()
651        new_lookup.LookupType = 2  # Multiple Substitution Subtable.
652        new_lookup.LookupFlag = 0
653        new_subtable = otTables.MultipleSubst()
654        new_subtable.mapping = { WATERMARK_NEW_GLYPH_ID : tuple(glyphs) }
655        new_lookup.SubTable = [ new_subtable ]
656        new_lookup_index = len(lookups)
657        lookups.append(new_lookup)
658
659        # Add feature
660        feature = next(x for x in gsub.FeatureList.FeatureRecord if x.FeatureTag == 'ccmp')
661        if not feature:
662            raise ValueError("Font doesn't contain ccmp feature.")
663
664        feature.Feature.LookupListIndex.append(new_lookup_index)
665
666    def create_font(self):
667        """Creates the EmojiCompat font.
668        :param font_path: path to Android NotoColorEmoji font
669        :param unicode_path: path to directory that contains unicode files
670        """
671
672        tmp_dir = tempfile.mkdtemp()
673
674        # create emoji codepoints to EmojiData map
675        self.emoji_data_map = load_emoji_data_map(self.unicode_path)
676
677        # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is
678        # returned is either default or 1 greater than the largest id in previous data
679        self.emoji_id = load_previous_metadata(self.emoji_data_map)
680
681        # recalcTimestamp parameter will keep the modified field same as the original font. Changing
682        # the modified field in the font causes the font ttf file to change, which makes it harder
683        # to understand if something really changed in the font.
684        with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf:
685            # read image size data
686            self.read_cbdt(ttf)
687
688            # glyph name to codepoint map
689            glyph_to_codepoint_map = {}
690
691            # read single codepoint emojis under cmap12 and clear the table contents
692            cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map)
693
694            # read emoji sequences gsub and clear the table contents
695            self.read_gsub(ttf, glyph_to_codepoint_map)
696
697            # add all new codepoint to glyph mappings
698            cmap12_table.cmap.update(self.remapped_codepoints)
699
700            # final metadata csv will be used to generate the sha, therefore write it before
701            # metadata json is written.
702            self.write_metadata_csv()
703
704            output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME)
705            flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN)
706            flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH)
707
708            total_emoji_count = self.write_metadata_json(output_json_file)
709
710            # create the flatbuffers binary and java classes
711            flatc_command = ['flatc',
712                             '-o',
713                             tmp_dir,
714                             '-b',
715                             '-j',
716                             FLATBUFFER_SCHEMA,
717                             output_json_file]
718            subprocess.check_output(flatc_command)
719
720            # inject metadata binary into font
721            inject_meta_into_font(ttf, flatbuffer_bin_file)
722
723            # add wartermark glyph for manual verification.
724            self.add_watermark(ttf)
725
726            # update CBDT and CBLC versions since older android versions cannot read > 2.0
727            ttf['CBDT'].version = 2.0
728            ttf['CBLC'].version = 2.0
729
730            # save the new font
731            ttf.save(FONT_PATH)
732
733            update_flatbuffer_java_files(flatbuffer_java_dir, #tmp dir
734                                         FLATBUFFER_HEADER,
735                                         FLATBUFFER_JAVA_TARGET)
736
737            create_test_data(self.unicode_path)
738
739            # clear the tmp output directory
740            shutil.rmtree(tmp_dir, ignore_errors=True)
741
742            print(
743                "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR))
744
745
746def print_usage():
747    """Prints how to use the script."""
748    print("Please specify a path to font and unicode files.\n"
749          "usage: createfont.py noto-color-emoji-path unicode-dir-path")
750
751def parse_args(argv):
752    # parse manually to avoid any extra dependencies
753    if len(argv) < 3:
754        print_usage()
755        sys.exit(1)
756    return (sys.argv[1], sys.argv[2])
757
758def main():
759    font_file, unicode_dir = parse_args(sys.argv)
760    EmojiFontCreator(font_file, unicode_dir).create_font()
761
762
763if __name__ == '__main__':
764    main()
765