forked from firka/flutter
Do not break before and after ZWJ.
am: 47932fa
* commit '47932fa53b75744a30034467cfae6333468f54bb':
Do not break before and after ZWJ.
Change-Id: Ic476d9048b44b84c69fd185b76108c961d8e8545
This commit is contained in:
@@ -15,7 +15,20 @@
|
||||
LOCAL_PATH := $(call my-dir)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
# Generate unicode emoji data from UCD.
|
||||
UNICODE_EMOJI_H_GEN_PY := $(LOCAL_PATH)/unicode_emoji_h_gen.py
|
||||
UNICODE_EMOJI_DATA := $(TOP)/external/unicode/emoji-data.txt
|
||||
|
||||
UNICODE_EMOJI_H := $(intermediates)/generated/UnicodeData.h
|
||||
$(UNICODE_EMOJI_H): $(UNICODE_EMOJI_H_GEN_PY) $(UNICODE_EMOJI_DATA)
|
||||
$(LOCAL_PATH)/MinikinInternal.cpp: $(UNICODE_EMOJI_H)
|
||||
$(UNICODE_EMOJI_H): PRIVATE_CUSTOM_TOOL := python $(UNICODE_EMOJI_H_GEN_PY) \
|
||||
-i $(UNICODE_EMOJI_DATA) \
|
||||
-o $(UNICODE_EMOJI_H)
|
||||
$(UNICODE_EMOJI_H):
|
||||
$(transform-generated-source)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
minikin_src_files := \
|
||||
AnalyzeStyle.cpp \
|
||||
CmapCoverage.cpp \
|
||||
@@ -40,7 +53,8 @@ minikin_src_files := \
|
||||
minikin_c_includes := \
|
||||
external/harfbuzz_ng/src \
|
||||
external/freetype/include \
|
||||
frameworks/minikin/include
|
||||
frameworks/minikin/include \
|
||||
$(intermediates)
|
||||
|
||||
minikin_shared_libraries := \
|
||||
libharfbuzz_ng \
|
||||
|
||||
@@ -66,19 +66,6 @@ bool isPureKiller(uint32_t c) {
|
||||
|| c == 0xA953 || c == 0xABED || c == 0x11134 || c == 0x112EA || c == 0x1172B);
|
||||
}
|
||||
|
||||
// Returns true if the character appears before or after zwj in a zwj emoji sequence. See
|
||||
// http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html
|
||||
bool isZwjEmoji(uint32_t c) {
|
||||
return (c == 0x2764 // HEAVY BLACK HEART
|
||||
|| c == 0x1F468 // MAN
|
||||
|| c == 0x1F469 // WOMAN
|
||||
|| c == 0x1F48B // KISS MARK
|
||||
|| c == 0x1F466 // BOY
|
||||
|| c == 0x1F467 // GIRL
|
||||
|| c == 0x1F441 // EYE
|
||||
|| c == 0x1F5E8); // LEFT SPEECH BUBBLE
|
||||
}
|
||||
|
||||
bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count,
|
||||
size_t offset) {
|
||||
// This implementation closely follows Unicode Standard Annex #29 on
|
||||
@@ -163,7 +150,7 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co
|
||||
return false;
|
||||
}
|
||||
// Tailoring: make emoji sequences with ZWJ a single grapheme cluster
|
||||
if (c1 == 0x200D && isZwjEmoji(c2) && offset_back > start) {
|
||||
if (c1 == 0x200D && isEmoji(c2) && offset_back > start) {
|
||||
// look at character before ZWJ to see that both can participate in an emoji zwj sequence
|
||||
uint32_t c0 = 0;
|
||||
U16_PREV(buf, start, offset_back, c0);
|
||||
@@ -171,7 +158,7 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co
|
||||
// skip over emoji variation selector
|
||||
U16_PREV(buf, start, offset_back, c0);
|
||||
}
|
||||
if (isZwjEmoji(c0)) {
|
||||
if (isEmoji(c0)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
#include "MinikinInternal.h"
|
||||
#include "HbFontCache.h"
|
||||
#include "generated/UnicodeData.h"
|
||||
|
||||
#include <cutils/log.h>
|
||||
|
||||
@@ -31,6 +32,11 @@ void assertMinikinLocked() {
|
||||
#endif
|
||||
}
|
||||
|
||||
bool isEmoji(uint32_t c) {
|
||||
const size_t length = sizeof(generated::EMOJI_LIST) / sizeof(generated::EMOJI_LIST[0]);
|
||||
return std::binary_search(generated::EMOJI_LIST, generated::EMOJI_LIST + length, c);
|
||||
}
|
||||
|
||||
// Based on Modifiers from http://www.unicode.org/L2/L2016/16011-data-file.txt
|
||||
bool isEmojiModifier(uint32_t c) {
|
||||
return (0x1F3FB <= c && c <= 0x1F3FF);
|
||||
|
||||
@@ -36,6 +36,9 @@ extern Mutex gMinikinLock;
|
||||
// Aborts if gMinikinLock is not acquired. Do nothing on the release build.
|
||||
void assertMinikinLocked();
|
||||
|
||||
// Returns true if c is emoji.
|
||||
bool isEmoji(uint32_t c);
|
||||
|
||||
// Returns true if c is emoji modifier base.
|
||||
bool isEmojiBase(uint32_t c);
|
||||
|
||||
|
||||
@@ -90,18 +90,9 @@ static bool isBreakValid(const uint16_t* buf, size_t bufEnd, size_t i) {
|
||||
}
|
||||
}
|
||||
|
||||
// Known emoji ZWJ sequences
|
||||
if (codePoint == CHAR_ZWJ) {
|
||||
// Possible emoji ZWJ sequence
|
||||
if (next_codepoint == 0x2764 || // HEAVY BLACK HEART
|
||||
next_codepoint == 0x1F466 || // BOY
|
||||
next_codepoint == 0x1F467 || // GIRL
|
||||
next_codepoint == 0x1F468 || // MAN
|
||||
next_codepoint == 0x1F469 || // WOMAN
|
||||
next_codepoint == 0x1F48B || // KISS MARK
|
||||
next_codepoint == 0x1F5E8) { // LEFT SPEECH BUBBLE
|
||||
return false;
|
||||
}
|
||||
// Emoji ZWJ sequences.
|
||||
if (codePoint == CHAR_ZWJ && isEmoji(next_codepoint)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Proposed Rule LB30b from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf
|
||||
|
||||
105
engine/src/flutter/libs/minikin/unicode_emoji_h_gen.py
Normal file
105
engine/src/flutter/libs/minikin/unicode_emoji_h_gen.py
Normal file
@@ -0,0 +1,105 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright (C) 2016 The Android Open Source Project
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
"""Generate header file for unicode data."""
|
||||
|
||||
import optparse
|
||||
import sys
|
||||
|
||||
|
||||
UNICODE_EMOJI_TEMPLATE="""
|
||||
/* file generated by frameworks/minikin/lib/minikin/Android.mk */
|
||||
#ifndef MINIKIN_UNICODE_EMOJI_H
|
||||
#define MINIKIN_UNICODE_EMOJI_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace android {
|
||||
namespace generated {
|
||||
|
||||
int32_t EMOJI_LIST[] = {
|
||||
@@@EMOJI_DATA@@@
|
||||
};
|
||||
|
||||
} // namespace generated
|
||||
} // namespace android
|
||||
|
||||
#endif // MINIKIN_UNICODE_EMOJI_H
|
||||
"""
|
||||
|
||||
|
||||
def _create_opt_parser():
|
||||
parser = optparse.OptionParser()
|
||||
parser.add_option('-i', '--input', type='str', action='store',
|
||||
help='path to input emoji-data.txt')
|
||||
parser.add_option('-o', '--output', type='str', action='store',
|
||||
help='path to output UnicodeEmoji.h')
|
||||
return parser
|
||||
|
||||
|
||||
def _read_emoji_data(emoji_data_file_path):
|
||||
result = []
|
||||
with open(emoji_data_file_path) as emoji_data_file:
|
||||
for line in emoji_data_file:
|
||||
if '#' in line:
|
||||
line = line[:line.index('#')] # Drop comments.
|
||||
if not line.strip():
|
||||
continue # Skip empty line.
|
||||
|
||||
code_points, prop = line.split(';')
|
||||
code_points = code_points.strip()
|
||||
prop = prop.strip()
|
||||
if prop != 'Emoji':
|
||||
break # Only collect Emoji property code points
|
||||
|
||||
if '..' in code_points: # code point range
|
||||
cp_start, cp_end = code_points.split('..')
|
||||
result.extend(xrange(int(cp_start, 16), int(cp_end, 16) + 1))
|
||||
else:
|
||||
code_point = int(code_points, 16)
|
||||
result.append(code_point)
|
||||
return result
|
||||
|
||||
|
||||
def _generate_header_contents(emoji_list):
|
||||
INDENT = ' ' * 4
|
||||
JOINER = ', '
|
||||
|
||||
hex_list = ['0x%04X' % x for x in emoji_list]
|
||||
lines = []
|
||||
tmp_line = '%s%s' % (INDENT, hex_list[0])
|
||||
for hex_str in hex_list[1:]:
|
||||
if len(tmp_line) + len(JOINER) + len(hex_str) >= 100:
|
||||
lines.append(tmp_line + ',')
|
||||
tmp_line = '%s%s' % (INDENT, hex_str)
|
||||
else:
|
||||
tmp_line = '%s%s%s' % (tmp_line, JOINER, hex_str)
|
||||
lines.append(tmp_line)
|
||||
|
||||
template = UNICODE_EMOJI_TEMPLATE
|
||||
template = template.replace('@@@EMOJI_DATA@@@', '\n'.join(lines))
|
||||
return template
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
opt_parser = _create_opt_parser()
|
||||
opts, _ = opt_parser.parse_args()
|
||||
|
||||
emoji_list = _read_emoji_data(opts.input)
|
||||
header = _generate_header_contents(emoji_list)
|
||||
with open(opts.output, 'w') as header_file:
|
||||
header_file.write(header)
|
||||
|
||||
@@ -77,6 +77,7 @@ LOCAL_SRC_FILES += \
|
||||
FontTestUtils.cpp \
|
||||
HbFontCacheTest.cpp \
|
||||
MinikinFontForTest.cpp \
|
||||
MinikinInternalTest.cpp \
|
||||
GraphemeBreakTests.cpp \
|
||||
LayoutUtilsTest.cpp \
|
||||
UnicodeUtils.cpp \
|
||||
|
||||
@@ -148,6 +148,10 @@ TEST(GraphemeBreak, tailoring) {
|
||||
EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D U+1F467 U+200D | U+1F466"));
|
||||
EXPECT_FALSE(IsBreak("U+1F441 U+200D | U+1F5E8"));
|
||||
|
||||
// Do not break before and after zwj with all kind of emoji characters.
|
||||
EXPECT_FALSE(IsBreak("U+1F431 | U+200D U+1F464"));
|
||||
EXPECT_FALSE(IsBreak("U+1F431 U+200D | U+1F464"));
|
||||
|
||||
// ARABIC LETTER BEH + ZWJ + heart, not a zwj emoji sequence, so we preserve the break
|
||||
EXPECT_TRUE(IsBreak("U+0628 U+200D | U+2764"));
|
||||
}
|
||||
|
||||
34
engine/src/flutter/tests/MinikinInternalTest.cpp
Normal file
34
engine/src/flutter/tests/MinikinInternalTest.cpp
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (C) 2016 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "MinikinInternal.h"
|
||||
|
||||
namespace android {
|
||||
|
||||
TEST(MinikinInternalTest, isEmojiTest) {
|
||||
EXPECT_TRUE(isEmoji(0x0023)); // NUMBER SIGN
|
||||
EXPECT_TRUE(isEmoji(0x0035)); // DIGIT FIVE
|
||||
EXPECT_TRUE(isEmoji(0x1F0CF)); // PLAYING CARD BLACK JOKER
|
||||
EXPECT_TRUE(isEmoji(0x1F1E9)); // REGIONAL INDICATOR SYMBOL LETTER D
|
||||
|
||||
EXPECT_FALSE(isEmoji(0x0000)); // <control>
|
||||
EXPECT_FALSE(isEmoji(0x0061)); // LATIN SMALL LETTER A
|
||||
EXPECT_FALSE(isEmoji(0x29E3D)); // A han character.
|
||||
}
|
||||
|
||||
} // namespace android
|
||||
@@ -93,6 +93,8 @@ TEST_F(WordBreakerTest, zwjEmojiSequences) {
|
||||
UTF16(0x1F469), 0x200D, 0x2764, 0x200D, UTF16(0x1F48B), 0x200D, UTF16(0x1F469),
|
||||
// eye + zwj + left speech bubble
|
||||
UTF16(0x1F441), 0x200D, UTF16(0x1F5E8),
|
||||
// CAT FACE + zwj + BUST IN SILHOUETTE
|
||||
UTF16(0x1F431), 0x200D, UTF16(0x1F464),
|
||||
};
|
||||
WordBreaker breaker;
|
||||
breaker.setLocale(icu::Locale::getEnglish());
|
||||
@@ -104,9 +106,12 @@ TEST_F(WordBreakerTest, zwjEmojiSequences) {
|
||||
EXPECT_EQ(17, breaker.next()); // after woman + zwj + heart + zwj + woman
|
||||
EXPECT_EQ(7, breaker.wordStart());
|
||||
EXPECT_EQ(17, breaker.wordEnd());
|
||||
EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end
|
||||
EXPECT_EQ(22, breaker.next()); // after eye + zwj + left speech bubble
|
||||
EXPECT_EQ(17, breaker.wordStart());
|
||||
EXPECT_EQ(22, breaker.wordEnd());
|
||||
EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end
|
||||
EXPECT_EQ(22, breaker.wordStart());
|
||||
EXPECT_EQ(27, breaker.wordEnd());
|
||||
}
|
||||
|
||||
TEST_F(WordBreakerTest, emojiWithModifier) {
|
||||
|
||||
Reference in New Issue
Block a user