Update Minikin to use ICU's emoji data
Certain differences are still needed, since ICU appears to support Emoji 4.0 only, while we need Emoji 5.0. But the bulk of the data is now carried by ICU. We no longer need the script that generates the tables, so that's also removed. Test: Comprehensive unit tests added. Bug: 27365282 Bug: 30874706 Change-Id: I011443fbca9bb202deff7fffb40043f89e1f1fb1
This commit is contained in:
@@ -15,18 +15,6 @@
|
||||
LOCAL_PATH := $(call my-dir)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
# Generate unicode emoji data from UCD.
|
||||
UNICODE_EMOJI_H_GEN_PY := $(LOCAL_PATH)/unicode_emoji_h_gen.py
|
||||
UNICODE_EMOJI_DATA := $(TOP)/external/unicode/emoji-data.txt
|
||||
|
||||
UNICODE_EMOJI_H := $(intermediates)/generated/UnicodeData.h
|
||||
$(UNICODE_EMOJI_H): $(UNICODE_EMOJI_H_GEN_PY) $(UNICODE_EMOJI_DATA)
|
||||
$(LOCAL_PATH)/MinikinInternal.cpp: $(UNICODE_EMOJI_H)
|
||||
$(UNICODE_EMOJI_H): PRIVATE_CUSTOM_TOOL := python $(UNICODE_EMOJI_H_GEN_PY) \
|
||||
-i $(UNICODE_EMOJI_DATA) \
|
||||
-o $(UNICODE_EMOJI_H)
|
||||
$(UNICODE_EMOJI_H):
|
||||
$(transform-generated-source)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
minikin_src_files := \
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
|
||||
#include "MinikinInternal.h"
|
||||
#include "HbFontCache.h"
|
||||
#include "generated/UnicodeData.h"
|
||||
|
||||
#include <unicode/uchar.h>
|
||||
#include <log/log.h>
|
||||
|
||||
namespace minikin {
|
||||
@@ -36,7 +36,7 @@ void assertMinikinLocked() {
|
||||
bool isEmoji(uint32_t c) {
|
||||
// Emoji characters new in Unicode emoji 5.0.
|
||||
// From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
|
||||
// TODO: Remove once emoji-data.text 5.0 is in the tree.
|
||||
// TODO: Remove once emoji-data.text 5.0 is in ICU or update to 6.0.
|
||||
if ((0x1F6F7 <= c && c <= 0x1F6F8)
|
||||
|| c == 0x1F91F
|
||||
|| (0x1F928 <= c && c <= 0x1F92F)
|
||||
@@ -47,54 +47,31 @@ bool isEmoji(uint32_t c) {
|
||||
|| (0x1F9D0 <= c && c <= 0x1F9E6)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const size_t length = sizeof(generated::EMOJI_LIST) / sizeof(generated::EMOJI_LIST[0]);
|
||||
return std::binary_search(generated::EMOJI_LIST, generated::EMOJI_LIST + length, c);
|
||||
return u_hasBinaryProperty(c, UCHAR_EMOJI);
|
||||
}
|
||||
|
||||
// Based on Emoji_Modifier from http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
|
||||
bool isEmojiModifier(uint32_t c) {
|
||||
return (0x1F3FB <= c && c <= 0x1F3FF);
|
||||
// Emoji modifier are not expected to change, so there's a small change we need to customize
|
||||
// this.
|
||||
return u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER);
|
||||
}
|
||||
|
||||
// Based on Emoji_Modifier_Base from
|
||||
// http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
|
||||
bool isEmojiBase(uint32_t c) {
|
||||
if (0x261D <= c && c <= 0x270D) {
|
||||
return (c == 0x261D || c == 0x26F9 || (0x270A <= c && c <= 0x270D));
|
||||
} else if (0x1F385 <= c && c <= 0x1F93E) {
|
||||
return (c == 0x1F385
|
||||
|| (0x1F3C2 <= c && c <= 0x1F3C4)
|
||||
|| c == 0x1F3C7
|
||||
|| (0x1F3CA <= c && c <= 0x1F3CC)
|
||||
|| (0x1F442 <= c && c <= 0x1F443)
|
||||
|| (0x1F446 <= c && c <= 0x1F450)
|
||||
|| (0x1F466 <= c && c <= 0x1F469)
|
||||
|| c == 0x1F46E
|
||||
|| (0x1F470 <= c && c <= 0x1F478)
|
||||
|| c == 0x1F47C
|
||||
|| (0x1F481 <= c && c <= 0x1F483)
|
||||
|| (0x1F485 <= c && c <= 0x1F487)
|
||||
|| c == 0x1F4AA
|
||||
|| (0x1F574 <= c && c <= 0x1F575)
|
||||
|| c == 0x1F57A
|
||||
|| c == 0x1F590
|
||||
|| (0x1F595 <= c && c <= 0x1F596)
|
||||
|| (0x1F645 <= c && c <= 0x1F647)
|
||||
|| (0x1F64B <= c && c <= 0x1F64F)
|
||||
|| c == 0x1F6A3
|
||||
|| (0x1F6B4 <= c && c <= 0x1F6B6)
|
||||
|| c == 0x1F6C0
|
||||
|| c == 0x1F6CC
|
||||
|| (0x1F918 <= c && c <= 0x1F91C)
|
||||
|| (0x1F91E <= c && c <= 0x1F91F)
|
||||
|| c == 0x1F926
|
||||
|| (0x1F930 <= c && c <= 0x1F939)
|
||||
|| (0x1F93D <= c && c <= 0x1F93E)
|
||||
|| (0x1F9D1 <= c && c <= 0x1F9DD));
|
||||
} else {
|
||||
return false;
|
||||
// These two characters were removed from Emoji_Modifier_Base in Emoji 4.0, but we need to keep
|
||||
// them as emoji modifier bases since there are fonts and user-generated text out there that
|
||||
// treats these as potential emoji bases.
|
||||
if (c == 0x1F91D || c == 0x1F93C) {
|
||||
return true;
|
||||
}
|
||||
// Emoji Modifier Base characters new in Unicode emoji 5.0.
|
||||
// From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
|
||||
// TODO: Remove once emoji-data.text 5.0 is in ICU or update to 6.0.
|
||||
if (c == 0x1F91F
|
||||
|| (0x1F931 <= c && c <= 0x1F932)
|
||||
|| (0x1F9D1 <= c && c <= 0x1F9DD)) {
|
||||
return true;
|
||||
}
|
||||
return u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER_BASE);
|
||||
}
|
||||
|
||||
hb_blob_t* getFontTable(const MinikinFont* minikinFont, uint32_t tag) {
|
||||
|
||||
@@ -1,105 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright (C) 2016 The Android Open Source Project
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
"""Generate header file for unicode data."""
|
||||
|
||||
import optparse
|
||||
import sys
|
||||
|
||||
|
||||
UNICODE_EMOJI_TEMPLATE="""
|
||||
/* file generated by frameworks/minikin/lib/minikin/Android.mk */
|
||||
#ifndef MINIKIN_UNICODE_EMOJI_H
|
||||
#define MINIKIN_UNICODE_EMOJI_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace minikin {
|
||||
namespace generated {
|
||||
|
||||
int32_t EMOJI_LIST[] = {
|
||||
@@@EMOJI_DATA@@@
|
||||
};
|
||||
|
||||
} // namespace generated
|
||||
} // namespace minikin
|
||||
|
||||
#endif // MINIKIN_UNICODE_EMOJI_H
|
||||
"""
|
||||
|
||||
|
||||
def _create_opt_parser():
|
||||
parser = optparse.OptionParser()
|
||||
parser.add_option('-i', '--input', type='str', action='store',
|
||||
help='path to input emoji-data.txt')
|
||||
parser.add_option('-o', '--output', type='str', action='store',
|
||||
help='path to output UnicodeEmoji.h')
|
||||
return parser
|
||||
|
||||
|
||||
def _read_emoji_data(emoji_data_file_path):
|
||||
result = []
|
||||
with open(emoji_data_file_path) as emoji_data_file:
|
||||
for line in emoji_data_file:
|
||||
if '#' in line:
|
||||
line = line[:line.index('#')] # Drop comments.
|
||||
if not line.strip():
|
||||
continue # Skip empty line.
|
||||
|
||||
code_points, prop = line.split(';')
|
||||
code_points = code_points.strip()
|
||||
prop = prop.strip()
|
||||
if prop != 'Emoji':
|
||||
break # Only collect Emoji property code points
|
||||
|
||||
if '..' in code_points: # code point range
|
||||
cp_start, cp_end = code_points.split('..')
|
||||
result.extend(xrange(int(cp_start, 16), int(cp_end, 16) + 1))
|
||||
else:
|
||||
code_point = int(code_points, 16)
|
||||
result.append(code_point)
|
||||
return result
|
||||
|
||||
|
||||
def _generate_header_contents(emoji_list):
|
||||
INDENT = ' ' * 4
|
||||
JOINER = ', '
|
||||
|
||||
hex_list = ['0x%04X' % x for x in emoji_list]
|
||||
lines = []
|
||||
tmp_line = '%s%s' % (INDENT, hex_list[0])
|
||||
for hex_str in hex_list[1:]:
|
||||
if len(tmp_line) + len(JOINER) + len(hex_str) >= 100:
|
||||
lines.append(tmp_line + ',')
|
||||
tmp_line = '%s%s' % (INDENT, hex_str)
|
||||
else:
|
||||
tmp_line = '%s%s%s' % (tmp_line, JOINER, hex_str)
|
||||
lines.append(tmp_line)
|
||||
|
||||
template = UNICODE_EMOJI_TEMPLATE
|
||||
template = template.replace('@@@EMOJI_DATA@@@', '\n'.join(lines))
|
||||
return template
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
opt_parser = _create_opt_parser()
|
||||
opts, _ = opt_parser.parse_args()
|
||||
|
||||
emoji_list = _read_emoji_data(opts.input)
|
||||
header = _generate_header_contents(emoji_list)
|
||||
with open(opts.output, 'w') as header_file:
|
||||
header_file.write(header)
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <unicode/uchar.h>
|
||||
|
||||
#include "MinikinInternal.h"
|
||||
|
||||
namespace minikin {
|
||||
@@ -23,12 +25,56 @@ namespace minikin {
|
||||
TEST(MinikinInternalTest, isEmojiTest) {
|
||||
EXPECT_TRUE(isEmoji(0x0023)); // NUMBER SIGN
|
||||
EXPECT_TRUE(isEmoji(0x0035)); // DIGIT FIVE
|
||||
EXPECT_TRUE(isEmoji(0x2640)); // FEMALE SIGN
|
||||
EXPECT_TRUE(isEmoji(0x2642)); // MALE SIGN
|
||||
EXPECT_TRUE(isEmoji(0x2695)); // STAFF OF AESCULAPIUS
|
||||
EXPECT_TRUE(isEmoji(0x1F0CF)); // PLAYING CARD BLACK JOKER
|
||||
EXPECT_TRUE(isEmoji(0x1F1E9)); // REGIONAL INDICATOR SYMBOL LETTER D
|
||||
EXPECT_TRUE(isEmoji(0x1F6F7)); // SLED
|
||||
EXPECT_TRUE(isEmoji(0x1F9E6)); // SOCKS
|
||||
|
||||
EXPECT_FALSE(isEmoji(0x0000)); // <control>
|
||||
EXPECT_FALSE(isEmoji(0x0061)); // LATIN SMALL LETTER A
|
||||
EXPECT_FALSE(isEmoji(0x1F93B)); // MODERN PENTATHLON
|
||||
EXPECT_FALSE(isEmoji(0x1F946)); // RIFLE
|
||||
EXPECT_FALSE(isEmoji(0x29E3D)); // A han character.
|
||||
}
|
||||
|
||||
TEST(MinikinInternalTest, isEmojiModifierTest) {
|
||||
EXPECT_TRUE(isEmojiModifier(0x1F3FB)); // EMOJI MODIFIER FITZPATRICK TYPE-1-2
|
||||
EXPECT_TRUE(isEmojiModifier(0x1F3FC)); // EMOJI MODIFIER FITZPATRICK TYPE-3
|
||||
EXPECT_TRUE(isEmojiModifier(0x1F3FD)); // EMOJI MODIFIER FITZPATRICK TYPE-4
|
||||
EXPECT_TRUE(isEmojiModifier(0x1F3FE)); // EMOJI MODIFIER FITZPATRICK TYPE-5
|
||||
EXPECT_TRUE(isEmojiModifier(0x1F3FF)); // EMOJI MODIFIER FITZPATRICK TYPE-6
|
||||
|
||||
EXPECT_FALSE(isEmojiModifier(0x0000)); // <control>
|
||||
EXPECT_FALSE(isEmojiModifier(0x1F3FA)); // AMPHORA
|
||||
EXPECT_FALSE(isEmojiModifier(0x1F400)); // RAT
|
||||
EXPECT_FALSE(isEmojiModifier(0x29E3D)); // A han character.
|
||||
}
|
||||
|
||||
TEST(MinikinInternalTest, isEmojiBaseTest) {
|
||||
EXPECT_TRUE(isEmojiBase(0x261D)); // WHITE UP POINTING INDEX
|
||||
EXPECT_TRUE(isEmojiBase(0x270D)); // WRITING HAND
|
||||
EXPECT_TRUE(isEmojiBase(0x1F385)); // FATHER CHRISTMAS
|
||||
EXPECT_TRUE(isEmojiBase(0x1F3C2)); // SNOWBOARDER
|
||||
EXPECT_TRUE(isEmojiBase(0x1F3C7)); // HORSE RACING
|
||||
EXPECT_TRUE(isEmojiBase(0x1F3CC)); // GOLFER
|
||||
EXPECT_TRUE(isEmojiBase(0x1F574)); // MAN IN BUSINESS SUIT LEVITATING
|
||||
EXPECT_TRUE(isEmojiBase(0x1F6CC)); // SLEEPING ACCOMMODATION
|
||||
EXPECT_TRUE(isEmojiBase(0x1F91D)); // HANDSHAKE (removed from Emoji 4.0, but we need it)
|
||||
EXPECT_TRUE(isEmojiBase(0x1F91F)); // I LOVE YOU HAND SIGN
|
||||
EXPECT_TRUE(isEmojiBase(0x1F931)); // BREAST-FEEDING
|
||||
EXPECT_TRUE(isEmojiBase(0x1F932)); // PALMS UP TOGETHER
|
||||
EXPECT_TRUE(isEmojiBase(0x1F93C)); // WRESTLERS (removed from Emoji 4.0, but we need it)
|
||||
EXPECT_TRUE(isEmojiBase(0x1F9D1)); // ADULT
|
||||
EXPECT_TRUE(isEmojiBase(0x1F9DD)); // ELF
|
||||
|
||||
EXPECT_FALSE(isEmojiBase(0x0000)); // <control>
|
||||
EXPECT_FALSE(isEmojiBase(0x261C)); // WHITE LEFT POINTING INDEX
|
||||
EXPECT_FALSE(isEmojiBase(0x1F384)); // CHRISTMAS TREE
|
||||
EXPECT_FALSE(isEmojiBase(0x1F9DE)); // GENIE
|
||||
EXPECT_FALSE(isEmojiBase(0x29E3D)); // A han character.
|
||||
}
|
||||
|
||||
} // namespace minikin
|
||||
|
||||
Reference in New Issue
Block a user