android-12.0.0_r34/s

#!/usr/bin/env python3
#
# Copyright (C) 2017 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format
under a meta tag with name 'Emji'.

In order to create the final font the followings are used as inputs:

- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at
external/noto-fonts/emoji/NotoColorEmoji.ttf

- Unicode files: Unicode files that are in the framework, and lists information about all the
emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt,
and emoji-variation-sequences.txt. Currently at external/unicode/.

- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are
in the Android font. Resides in framework and currently under external/unicode/.

- data/emoji_metadata.txt: The file that includes the id, codepoints, the first
Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font
version that the emoji was added (compatAdded). Updated when the script is executed.

- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/.

After execution the following files are generated if they don't exist otherwise, they are updated:
- font/NotoColorEmojiCompat.ttf
- supported-emojis/emojis.txt
- data/emoji_metadata.txt
- src/java/android/support/text/emoji/flatbuffer/*
"""

import contextlib
import csv
import hashlib
import itertools
import json
import os
import re
import shutil
import subprocess
import sys
import tempfile
from fontTools import ttLib
from fontTools.ttLib.tables import otTables
from nototools import font_data

########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ###########
# Last Android SDK Version
SDK_VERSION = 30
# metadata version that will be embedded into font. If there are updates to the font that would
# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number
# defines in which EmojiCompat metadata version the emoji is added to the font.
METADATA_VERSION = 7

####### main directories where output files are created #######
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
FONT_DIR = os.path.join(SCRIPT_DIR, 'font')
DATA_DIR = os.path.join(SCRIPT_DIR, 'data')
SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis')
JAVA_SRC_DIR = os.path.join('src', 'java')
####### output files #######
# font file
FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf')
# emoji metadata json output file
OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt')
# emojis test file
TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt')
####### input files #######
# Unicode file names to read emoji data
EMOJI_DATA_FILE = 'emoji-data.txt'
EMOJI_SEQ_FILE = 'emoji-sequences.txt'
EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt'
EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt'
# Android OS emoji file for emojis that are not in Unicode files
ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt')
ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt')
# Android OS emoji style override file. Codepoints that are rendered with emoji style by default
# even though not defined so in <code>emoji-data.txt</code>.
EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt')
# emoji metadata file
INPUT_META_FILE = OUTPUT_META_FILE
# default flatbuffer module location (if not specified by caller)
FLATBUFFER_MODULE_DIR = os.path.join(SCRIPT_DIR, '..', 'emoji-compat-flatbuffers')
# flatbuffer schema
FLATBUFFER_SCHEMA = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'emoji_metadata.fbs')
# file path for java header, it will be prepended to flatbuffer java files
FLATBUFFER_HEADER = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'flatbuffer_header.txt')
# temporary emoji metadata json output file
OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json'
# temporary binary file generated by flatbuffer
FLATBUFFER_BIN = 'emoji_metadata.bin'
# directory representation for flatbuffer java package
FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '')
# temporary directory that contains flatbuffer java files
FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH)
FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java"
FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java"
# directory under source where flatbuffer java files will be copied into
FLATBUFFER_JAVA_TARGET = os.path.join(FLATBUFFER_MODULE_DIR, JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH)
# meta tag name used in the font to embed the emoji metadata. This value is also used in
# MetadataListReader.java in order to locate the metadata location.
EMOJI_META_TAG_NAME = 'Emji'

EMOJI_STR = 'EMOJI'
EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION'
ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR]
STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE'

DEFAULT_EMOJI_ID = 0xF0001
EMOJI_STYLE_VS = 0xFE0F

# The reference code point to be used for filling metrics of wartermark glyph
WATERMARK_REF_CODE_POINT = 0x1F600
# The code point and glyph name used for watermark.
WATERMARK_NEW_CODE_POINT = 0x10FF00
WATERMARK_NEW_GLYPH_ID = 'u10FF00'

def to_hex_str(value):
    """Converts given int value to hex without the 0x prefix"""
    return format(value, 'X')

def hex_str_to_int(string):
    """Convert a hex string into int"""
    return int(string, 16)

def codepoint_to_string(codepoints):
    """Converts a list of codepoints into a string separated with space."""
    return ' '.join([to_hex_str(x) for x in codepoints])

def prepend_header_to_file(file_path, header_path):
    """Prepends the header to the file. Used to update flatbuffer java files with header, comments
    and annotations."""
    with open(file_path, "r+") as original_file:
        with open(header_path, "r") as copyright_file:
            original_content = original_file.read()
            original_file.seek(0)
            original_file.write(copyright_file.read() + "\n" + original_content)


def update_flatbuffer_java_files(flatbuffer_java_dir, header_dir, target_dir):
    """Prepends headers to flatbuffer java files and copies to the final destination"""
    tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA
    tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA
    prepend_header_to_file(tmp_metadata_list, header_dir)
    prepend_header_to_file(tmp_metadata_item, header_dir)

    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    shutil.copy(tmp_metadata_list, os.path.join(target_dir, FLATBUFFER_METADATA_LIST_JAVA))
    shutil.copy(tmp_metadata_item, os.path.join(target_dir, FLATBUFFER_METADATA_ITEM_JAVA))

def create_test_data(unicode_path):
    """Read all the emojis in the unicode files and update the test file"""
    lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE))
    lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE))

    lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True)
    lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True)

    # standardized variants contains a huge list of sequences, only read the ones that are emojis
    # and also the ones with FE0F (emoji style)
    standardized_variants_lines = read_emoji_lines(
        os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE))
    for line in standardized_variants_lines:
        if STD_VARIANTS_EMOJI_STYLE in line:
            lines.append(line)

    emojis_set = set()
    for line in lines:
        # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
        # here since we are already checking the emoji presentations with
        # emoji-variation-sequences.txt.
        if "BASIC_EMOJI" in line:
            continue
        codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
        emojis_set.add(codepoint_to_string(codepoints).upper())

    emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE))
    for line in emoji_data_lines:
        codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
        if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
            continue
        is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
        if is_emoji_style:
            codepoints = [to_hex_str(x) for x in
                          codepoints_for_emojirange(codepoints_range)]
            emojis_set.update(codepoints)

    emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
    #  finally add the android default emoji exceptions
    emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions])

    emojis_list = list(emojis_set)
    emojis_list.sort()
    with open(TEST_DATA_PATH, "w") as test_file:
        for line in emojis_list:
            test_file.write("%s\n" % line)

class _EmojiData(object):
    """Holds the information about a single emoji."""

    def __init__(self, codepoints, is_emoji_style):
        self.codepoints = codepoints
        self.emoji_style = is_emoji_style
        self.emoji_id = 0
        self.width = 0
        self.height = 0
        self.sdk_added = SDK_VERSION
        self.compat_added = METADATA_VERSION

    def update_metrics(self, metrics):
        """Updates width/height instance variables with the values given in metrics dictionary.
        :param metrics: a dictionary object that has width and height values.
        """
        self.width = metrics.width
        self.height = metrics.height

    def __repr__(self):
        return '<EmojiData {0} - {1}>'.format(self.emoji_style,
                                              codepoint_to_string(self.codepoints))

    def create_json_element(self):
        """Creates the json representation of EmojiData."""
        json_element = {}
        json_element['id'] = self.emoji_id
        json_element['emojiStyle'] = self.emoji_style
        json_element['sdkAdded'] = self.sdk_added
        json_element['compatAdded'] = self.compat_added
        json_element['width'] = self.width
        json_element['height'] = self.height
        json_element['codepoints'] = self.codepoints
        return json_element

    def create_txt_row(self):
        """Creates array of values for CSV of EmojiData."""
        row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added]
        row += [to_hex_str(x) for x in self.codepoints]
        return row

    def update(self, emoji_id, sdk_added, compat_added):
        """Updates current EmojiData with the values in a json element"""
        self.emoji_id = emoji_id
        self.sdk_added = sdk_added
        self.compat_added = compat_added


def read_emoji_lines(file_path, optional=False):
    """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty
    lines and comments
    :param file_path: unicode emoji file path
    :param optional: if True no exception is raised when the file cannot be read
    :return: list of uppercase strings
    """
    result = []
    try:
        with open(file_path) as file_stream:
            for line in file_stream:
                line = line.strip()
                if line and not line.startswith('#'):
                    result.append(line.upper())
    except IOError:
        if optional:
            pass
        else:
            raise

    return result

def get_emoji_style_exceptions(unicode_path):
    """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers"""
    lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE))
    exceptions = []
    for line in lines:
        codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0])
        exceptions.append(codepoint)
    return exceptions

def codepoints_for_emojirange(codepoints_range):
    """ Return codepoints given in emoji files. Expand the codepoints that are given as a range
    such as XYZ ... UVT
    """
    codepoints = []
    if '..' in codepoints_range:
        range_start, range_end = codepoints_range.split('..')
        codepoints_range = range(hex_str_to_int(range_start),
                                 hex_str_to_int(range_end) + 1)
        codepoints.extend(codepoints_range)
    else:
        codepoints.append(hex_str_to_int(codepoints_range))
    return codepoints

def codepoints_and_emoji_prop(line):
    """For a given emoji file line, return codepoints and emoji property in the line.
    1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
    |Extended_Pictographic] # [...]"""
    line = line.strip()
    if '#' in line:
        line = line[:line.index('#')]
    else:
        raise ValueError("Line is expected to have # in it")
    line = line.split(';')
    codepoints_range = line[0].strip()
    emoji_property = line[1].strip()

    return codepoints_range, emoji_property

def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions):
    """Read unicode lines of unicode emoji file in which each line describes a set of codepoint
    intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map.
    A line format that is expected is as follows:
    1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
    |Extended_Pictographic] # [...]"""
    lines = read_emoji_lines(file_path)

    for line in lines:
        codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
        if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
            continue
        is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
        codepoints = codepoints_for_emojirange(codepoints_range)

        for codepoint in codepoints:
            key = codepoint_to_string([codepoint])
            codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions
            if key in emoji_data_map:
                # since there are multiple definitions of emojis, only update when emoji style is
                # True
                if codepoint_is_emoji_style:
                    emoji_data_map[key].emoji_style = True
            else:
                emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style)
                emoji_data_map[key] = emoji_data


def read_emoji_sequences(emoji_data_map, file_path, optional=False):
    """Reads the content of the file which contains emoji sequences. Creates EmojiData for each
    line and puts into emoji_data_map."""
    lines = read_emoji_lines(file_path, optional)
    # 1F1E6 1F1E8 ; Name ; [...]
    for line in lines:
        # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
        # here since we are already checking the emoji presentations with
        # emoji-variation-sequences.txt.
        if "BASIC_EMOJI" in line:
            continue
        codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
        codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS]
        key = codepoint_to_string(codepoints)
        if not key in emoji_data_map:
            emoji_data = _EmojiData(codepoints, False)
            emoji_data_map[key] = emoji_data


def load_emoji_data_map(unicode_path):
    """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData.
    :return: map of space separated codepoints to EmojiData
    """
    emoji_data_map = {}
    emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
    read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE),
                         emoji_style_exceptions)
    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE))
    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE))

    # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists.
    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE),
                         optional=True)
    # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists.
    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE),
                         optional=True)

    return emoji_data_map


def load_previous_metadata(emoji_data_map):
    """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields
       in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest
       emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not
       exist, or contains no emojis defined returns DEFAULT_EMOJI_ID"""
    current_emoji_id = DEFAULT_EMOJI_ID
    if os.path.isfile(INPUT_META_FILE):
        with open(INPUT_META_FILE) as csvfile:
            reader = csv.reader(csvfile, delimiter=' ')
            for row in reader:
                if row[0].startswith('#'):
                    continue
                emoji_id = hex_str_to_int(row[0])
                sdk_added = int(row[1])
                compat_added = int(row[2])
                key = codepoint_to_string(hex_str_to_int(x) for x in row[3:])
                if key in emoji_data_map:
                    emoji_data = emoji_data_map[key]
                    emoji_data.update(emoji_id, sdk_added, compat_added)
                    if emoji_data.emoji_id >= current_emoji_id:
                        current_emoji_id = emoji_data.emoji_id + 1

    return current_emoji_id


def update_ttlib_orig_sort():
    """Updates the ttLib tag sort with a closure that makes the meta table first."""
    orig_sort = ttLib.sortedTagList

    def meta_first_table_sort(tag_list, table_order=None):
        """Sorts the tables with the original ttLib sort, then makes the meta table first."""
        tag_list = orig_sort(tag_list, table_order)
        tag_list.remove('meta')
        tag_list.insert(0, 'meta')
        return tag_list

    ttLib.sortedTagList = meta_first_table_sort


def inject_meta_into_font(ttf, flatbuffer_bin_filename):
    """inject metadata binary into font"""
    if not 'meta' in ttf:
        ttf['meta'] = ttLib.getTableClass('meta')()
    meta = ttf['meta']
    with open(flatbuffer_bin_filename, 'rb') as flatbuffer_bin_file:
        meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read()

    # sort meta tables for faster access
    update_ttlib_orig_sort()


def validate_input_files(font_path, unicode_path, flatbuffer_path):
    """Validate the existence of font file and the unicode files"""
    if not os.path.isfile(font_path):
        raise ValueError("Font file does not exist: " + font_path)

    if not os.path.isdir(unicode_path):
        raise ValueError(
            "Unicode directory does not exist or is not a directory " + unicode_path)

    emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE),
                       os.path.join(unicode_path, EMOJI_ZWJ_FILE),
                       os.path.join(unicode_path, EMOJI_SEQ_FILE)]
    for emoji_filename in emoji_filenames:
        if not os.path.isfile(emoji_filename):
            raise ValueError("Unicode emoji data file does not exist: " + emoji_filename)

    if not os.path.isdir(flatbuffer_path):
        raise ValueError(
            "Flatbuffer directory does not exist or is not a directory " + flatbuffer_path)

    flatbuffer_filenames = [os.path.join(flatbuffer_path, FLATBUFFER_SCHEMA),
                            os.path.join(flatbuffer_path, FLATBUFFER_HEADER)]
    for flatbuffer_filename in flatbuffer_filenames:
        if not os.path.isfile(flatbuffer_filename):
            raise ValueError("Flatbuffer file does not exist: " + flatbuffer_filename)


def add_file_to_sha(sha_algo, file_path):
    with open(file_path, 'rb') as input_file:
        for data in iter(lambda: input_file.read(8192), b''):
            sha_algo.update(data)

def create_sha_from_source_files(font_paths):
    """Creates a SHA from the given font files"""
    sha_algo = hashlib.sha256()
    for file_path in font_paths:
        add_file_to_sha(sha_algo, file_path)
    return sha_algo.hexdigest()


class EmojiFontCreator(object):
    """Creates the EmojiCompat font"""

    def __init__(self, font_path, unicode_path):
        validate_input_files(font_path, unicode_path, FLATBUFFER_MODULE_DIR)

        self.font_path = font_path
        self.unicode_path = unicode_path
        self.emoji_data_map = {}
        self.remapped_codepoints = {}
        self.glyph_to_image_metrics_map = {}
        # set default emoji id to start of Supplemental Private Use Area-A
        self.emoji_id = DEFAULT_EMOJI_ID

    def update_emoji_data(self, codepoints, glyph_name):
        """Updates the existing EmojiData identified with codepoints. The fields that are set are:
        - emoji_id (if it does not exist)
        - image width/height"""
        key = codepoint_to_string(codepoints)
        if key in self.emoji_data_map:
            # add emoji to final data
            emoji_data = self.emoji_data_map[key]
            emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name])
            if emoji_data.emoji_id == 0:
                emoji_data.emoji_id = self.emoji_id
                self.emoji_id = self.emoji_id + 1
            self.remapped_codepoints[emoji_data.emoji_id] = glyph_name

    def read_cbdt(self, ttf):
        """Read image size data from CBDT."""
        cbdt = ttf['CBDT']
        for strike_data in cbdt.strikeData:
            for key, data in strike_data.items():
                data.decompile()
                self.glyph_to_image_metrics_map[key] = data.metrics

    def read_cmap12(self, ttf, glyph_to_codepoint_map):
        """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and
        finally clears all elements in CMAP 12"""
        cmap = ttf['cmap']
        for table in cmap.tables:
            if table.format == 12 and table.platformID == 3 and table.platEncID == 10:
                for codepoint, glyph_name in table.cmap.items():
                    glyph_to_codepoint_map[glyph_name] = codepoint
                    self.update_emoji_data([codepoint], glyph_name)
                return table
        raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10")

    def read_gsub(self, ttf, glyph_to_codepoint_map):
        """Reads the emoji sequences defined in GSUB and clear all elements under GSUB"""
        gsub = ttf['GSUB']
        ligature_subtables = []
        context_subtables = []
        # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat
        # and would be expensive with little value
        for lookup in gsub.table.LookupList.Lookup:
            for subtable in lookup.SubTable:
                if subtable.LookupType == 5:
                    context_subtables.append(subtable)
                elif subtable.LookupType == 4:
                    ligature_subtables.append(subtable)

        for subtable in context_subtables:
            self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map)

        for subtable in ligature_subtables:
            self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map)

    def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map):
        """Add substitutions defined as OpenType Context Substitution"""
        for sub_class_set in subtable.SubClassSet:
            if sub_class_set:
                for sub_class_rule in sub_class_set.SubClassRule:
                    # prepare holder for substitution list. each rule will have a list that is added
                    # to the subs_list.
                    subs_list = len(sub_class_rule.SubstLookupRecord) * [None]
                    for record in sub_class_rule.SubstLookupRecord:
                        subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list,
                                                                            record.LookupListIndex)
                    # create combinations or all lists. the combinations will be filtered by
                    # emoji_data_map. the first element that contain as a valid glyph will be used
                    # as the final glyph
                    combinations = list(itertools.product(*subs_list))
                    for seq in combinations:
                        glyph_names = [x["input"] for x in seq]
                        codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
                        outputs = [x["output"] for x in seq if x["output"]]
                        nonempty_outputs = list(filter(lambda x: x.strip() , outputs))
                        if len(nonempty_outputs) == 0:
                            print("Warning: no output glyph is set for " + str(glyph_names))
                            continue
                        elif len(nonempty_outputs) > 1:
                            print(
                                "Warning: multiple glyph is set for "
                                    + str(glyph_names) + ", will use the first one")

                        glyph = nonempty_outputs[0]
                        self.update_emoji_data(codepoints, glyph)

    def get_substitutions(self, lookup_list, index):
        result = []
        for x in lookup_list.Lookup[index].SubTable:
            for input, output in x.mapping.items():
                result.append({"input": input, "output": output})
        return result

    def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map):
        for name, ligatures in subtable.ligatures.items():
            for ligature in ligatures:
                glyph_names = [name] + ligature.Component
                codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
                self.update_emoji_data(codepoints, ligature.LigGlyph)

    def write_metadata_json(self, output_json_file_path):
        """Writes the emojis into a json file"""
        output_json = {}
        output_json['version'] = METADATA_VERSION
        output_json['sourceSha'] = create_sha_from_source_files(
            [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA])
        output_json['list'] = []

        emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)

        total_emoji_count = 0
        for emoji_data in emoji_data_list:
            element = emoji_data.create_json_element()
            output_json['list'].append(element)
            total_emoji_count = total_emoji_count + 1

        # write the new json file to be processed by FlatBuffers
        with open(output_json_file_path, 'w') as json_file:
            print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')),
                  file=json_file)

        return total_emoji_count

    def write_metadata_csv(self):
        """Writes emoji metadata into space separated file"""
        with open(OUTPUT_META_FILE, 'w') as csvfile:
            csvwriter = csv.writer(csvfile, delimiter=' ')
            emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
            csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints'])
            for emoji_data in emoji_data_list:
                csvwriter.writerow(emoji_data.create_txt_row())

    def add_watermark(self, ttf):
        cmap = ttf.getBestCmap()
        gsub = ttf['GSUB'].table

        # Obtain Version string
        m = re.search('^Version (\d*)\.(\d*)', font_data.font_version(ttf))
        if not m:
            raise ValueError('The font does not have proper version string.')
        major = m.group(1)
        minor = m.group(2)
        # Replace the dot with space since NotoColorEmoji does not have glyph for dot.
        glyphs = [cmap[ord(x)] for x in '%s %s' % (major, minor)]

        # Update Glyph metrics
        ttf.getGlyphOrder().append(WATERMARK_NEW_GLYPH_ID)
        refGlyphId = cmap[WATERMARK_REF_CODE_POINT]
        ttf['hmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['hmtx'].metrics[refGlyphId]
        ttf['vmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['vmtx'].metrics[refGlyphId]

        # Add new Glyph to cmap
        font_data.add_to_cmap(ttf, { WATERMARK_NEW_CODE_POINT : WATERMARK_NEW_GLYPH_ID })

        # Add lookup table for the version string.
        lookups = gsub.LookupList.Lookup
        new_lookup = otTables.Lookup()
        new_lookup.LookupType = 2  # Multiple Substitution Subtable.
        new_lookup.LookupFlag = 0
        new_subtable = otTables.MultipleSubst()
        new_subtable.mapping = { WATERMARK_NEW_GLYPH_ID : tuple(glyphs) }
        new_lookup.SubTable = [ new_subtable ]
        new_lookup_index = len(lookups)
        lookups.append(new_lookup)

        # Add feature
        feature = next(x for x in gsub.FeatureList.FeatureRecord if x.FeatureTag == 'ccmp')
        if not feature:
            raise ValueError("Font doesn't contain ccmp feature.")

        feature.Feature.LookupListIndex.append(new_lookup_index)

    def create_font(self):
        """Creates the EmojiCompat font.
        :param font_path: path to Android NotoColorEmoji font
        :param unicode_path: path to directory that contains unicode files
        """

        tmp_dir = tempfile.mkdtemp()

        # create emoji codepoints to EmojiData map
        self.emoji_data_map = load_emoji_data_map(self.unicode_path)

        # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is
        # returned is either default or 1 greater than the largest id in previous data
        self.emoji_id = load_previous_metadata(self.emoji_data_map)

        # recalcTimestamp parameter will keep the modified field same as the original font. Changing
        # the modified field in the font causes the font ttf file to change, which makes it harder
        # to understand if something really changed in the font.
        with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf:
            # read image size data
            self.read_cbdt(ttf)

            # glyph name to codepoint map
            glyph_to_codepoint_map = {}

            # read single codepoint emojis under cmap12 and clear the table contents
            cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map)

            # read emoji sequences gsub and clear the table contents
            self.read_gsub(ttf, glyph_to_codepoint_map)

            # add all new codepoint to glyph mappings
            cmap12_table.cmap.update(self.remapped_codepoints)

            # final metadata csv will be used to generate the sha, therefore write it before
            # metadata json is written.
            self.write_metadata_csv()

            output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME)
            flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN)
            flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH)

            total_emoji_count = self.write_metadata_json(output_json_file)

            # create the flatbuffers binary and java classes
            flatc_command = ['flatc',
                             '-o',
                             tmp_dir,
                             '-b',
                             '-j',
                             FLATBUFFER_SCHEMA,
                             output_json_file]
            subprocess.check_output(flatc_command)

            # inject metadata binary into font
            inject_meta_into_font(ttf, flatbuffer_bin_file)

            # add wartermark glyph for manual verification.
            self.add_watermark(ttf)

            # update CBDT and CBLC versions since older android versions cannot read > 2.0
            ttf['CBDT'].version = 2.0
            ttf['CBLC'].version = 2.0

            # save the new font
            ttf.save(FONT_PATH)

            update_flatbuffer_java_files(flatbuffer_java_dir, #tmp dir
                                         FLATBUFFER_HEADER,
                                         FLATBUFFER_JAVA_TARGET)

            create_test_data(self.unicode_path)

            # clear the tmp output directory
            shutil.rmtree(tmp_dir, ignore_errors=True)

            print(
                "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR))


def print_usage():
    """Prints how to use the script."""
    print("Please specify a path to font and unicode files.\n"
          "usage: createfont.py noto-color-emoji-path unicode-dir-path")

def parse_args(argv):
    # parse manually to avoid any extra dependencies
    if len(argv) < 3:
        print_usage()
        sys.exit(1)
    return (sys.argv[1], sys.argv[2])

def main():
    font_file, unicode_dir = parse_args(sys.argv)
    EmojiFontCreator(font_file, unicode_dir).create_font()


if __name__ == '__main__':
    main()