forked from firka/flutter
Merge "Update emoji character data in Minikin"
This commit is contained in:
committed by
Android (Google) Code Review
commit
e4e4ea1ede
@@ -34,29 +34,39 @@ void assertMinikinLocked() {
|
||||
}
|
||||
|
||||
bool isEmoji(uint32_t c) {
|
||||
// U+2695 U+2640 U+2642 are not in emoji category in Unicode 9 but they are now emoji category.
|
||||
// TODO: remove once emoji database is updated.
|
||||
if (c == 0x2695 || c == 0x2640 || c == 0x2642) {
|
||||
// Emoji characters new in Unicode emoji 5.0.
|
||||
// From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
|
||||
// TODO: Remove once emoji-data.text 5.0 is in the tree.
|
||||
if ((0x1F6F7 <= c && c <= 0x1F6F8)
|
||||
|| c == 0x1F91F
|
||||
|| (0x1F928 <= c && c <= 0x1F92F)
|
||||
|| (0x1F931 <= c && c <= 0x1F932)
|
||||
|| c == 0x1F94C
|
||||
|| (0x1F95F <= c && c <= 0x1F96B)
|
||||
|| (0x1F992 <= c && c <= 0x1F997)
|
||||
|| (0x1F9D0 <= c && c <= 0x1F9E6)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const size_t length = sizeof(generated::EMOJI_LIST) / sizeof(generated::EMOJI_LIST[0]);
|
||||
return std::binary_search(generated::EMOJI_LIST, generated::EMOJI_LIST + length, c);
|
||||
}
|
||||
|
||||
// Based on Modifiers from http://www.unicode.org/L2/L2016/16011-data-file.txt
|
||||
// Based on Emoji_Modifier from http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
|
||||
bool isEmojiModifier(uint32_t c) {
|
||||
return (0x1F3FB <= c && c <= 0x1F3FF);
|
||||
}
|
||||
|
||||
// Based on Emoji_Modifier_Base from
|
||||
// http://www.unicode.org/Public/emoji/3.0/emoji-data.txt
|
||||
// http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
|
||||
bool isEmojiBase(uint32_t c) {
|
||||
if (0x261D <= c && c <= 0x270D) {
|
||||
return (c == 0x261D || c == 0x26F9 || (0x270A <= c && c <= 0x270D));
|
||||
} else if (0x1F385 <= c && c <= 0x1F93E) {
|
||||
return (c == 0x1F385
|
||||
|| (0x1F3C3 <= c && c <= 0x1F3C4)
|
||||
|| (0x1F3CA <= c && c <= 0x1F3CB)
|
||||
|| (0x1F3C2 <= c && c <= 0x1F3C4)
|
||||
|| c == 0x1F3C7
|
||||
|| (0x1F3CA <= c && c <= 0x1F3CC)
|
||||
|| (0x1F442 <= c && c <= 0x1F443)
|
||||
|| (0x1F446 <= c && c <= 0x1F450)
|
||||
|| (0x1F466 <= c && c <= 0x1F469)
|
||||
@@ -66,7 +76,7 @@ bool isEmojiBase(uint32_t c) {
|
||||
|| (0x1F481 <= c && c <= 0x1F483)
|
||||
|| (0x1F485 <= c && c <= 0x1F487)
|
||||
|| c == 0x1F4AA
|
||||
|| c == 0x1F575
|
||||
|| (0x1F574 <= c && c <= 0x1F575)
|
||||
|| c == 0x1F57A
|
||||
|| c == 0x1F590
|
||||
|| (0x1F595 <= c && c <= 0x1F596)
|
||||
@@ -75,11 +85,13 @@ bool isEmojiBase(uint32_t c) {
|
||||
|| c == 0x1F6A3
|
||||
|| (0x1F6B4 <= c && c <= 0x1F6B6)
|
||||
|| c == 0x1F6C0
|
||||
|| (0x1F918 <= c && c <= 0x1F91E)
|
||||
|| c == 0x1F6CC
|
||||
|| (0x1F918 <= c && c <= 0x1F91C)
|
||||
|| (0x1F91E <= c && c <= 0x1F91F)
|
||||
|| c == 0x1F926
|
||||
|| c == 0x1F930
|
||||
|| (0x1F933 <= c && c <= 0x1F939)
|
||||
|| (0x1F93B <= c && c <= 0x1F93E));
|
||||
|| (0x1F930 <= c && c <= 0x1F939)
|
||||
|| (0x1F93D <= c && c <= 0x1F93E)
|
||||
|| (0x1F9D1 <= c && c <= 0x1F9DD));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -99,7 +99,7 @@ TEST_F(WordBreakerTest, postfixAndPrefix) {
|
||||
EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd());
|
||||
}
|
||||
|
||||
TEST_F(WordBreakerTest, MyanmarKinzi) {
|
||||
TEST_F(WordBreakerTest, myanmarKinzi) {
|
||||
uint16_t buf[] = {0x1004, 0x103A, 0x1039, 0x1000, 0x102C}; // NGA, ASAT, VIRAMA, KA, UU
|
||||
WordBreaker breaker;
|
||||
icu::Locale burmese("my");
|
||||
@@ -158,6 +158,55 @@ TEST_F(WordBreakerTest, emojiWithModifier) {
|
||||
EXPECT_EQ(8, breaker.wordEnd());
|
||||
}
|
||||
|
||||
TEST_F(WordBreakerTest, unicode10Emoji) {
|
||||
// Should break between emojis.
|
||||
uint16_t buf[] = {
|
||||
// SLED + SLED
|
||||
UTF16(0x1F6F7), UTF16(0x1F6F7),
|
||||
// SLED + VS15 + SLED
|
||||
UTF16(0x1F6F7), 0xFE0E, UTF16(0x1F6F7),
|
||||
// WHITE SMILING FACE + SLED
|
||||
0x263A, UTF16(0x1F6F7),
|
||||
// WHITE SMILING FACE + VS16 + SLED
|
||||
0x263A, 0xFE0F, UTF16(0x1F6F7),
|
||||
};
|
||||
WordBreaker breaker;
|
||||
breaker.setLocale(icu::Locale::getEnglish());
|
||||
breaker.setText(buf, NELEM(buf));
|
||||
EXPECT_EQ(0, breaker.current());
|
||||
EXPECT_EQ(2, breaker.next());
|
||||
EXPECT_EQ(0, breaker.wordStart());
|
||||
EXPECT_EQ(2, breaker.wordEnd());
|
||||
|
||||
EXPECT_EQ(4, breaker.next());
|
||||
EXPECT_EQ(2, breaker.wordStart());
|
||||
EXPECT_EQ(4, breaker.wordEnd());
|
||||
|
||||
EXPECT_EQ(7, breaker.next());
|
||||
EXPECT_EQ(4, breaker.wordStart());
|
||||
EXPECT_EQ(7, breaker.wordEnd());
|
||||
|
||||
EXPECT_EQ(9, breaker.next());
|
||||
EXPECT_EQ(7, breaker.wordStart());
|
||||
EXPECT_EQ(9, breaker.wordEnd());
|
||||
|
||||
EXPECT_EQ(10, breaker.next());
|
||||
EXPECT_EQ(9, breaker.wordStart());
|
||||
EXPECT_EQ(10, breaker.wordEnd());
|
||||
|
||||
EXPECT_EQ(12, breaker.next());
|
||||
EXPECT_EQ(10, breaker.wordStart());
|
||||
EXPECT_EQ(12, breaker.wordEnd());
|
||||
|
||||
EXPECT_EQ(14, breaker.next());
|
||||
EXPECT_EQ(12, breaker.wordStart());
|
||||
EXPECT_EQ(14, breaker.wordEnd());
|
||||
|
||||
EXPECT_EQ(16, breaker.next());
|
||||
EXPECT_EQ(14, breaker.wordStart());
|
||||
EXPECT_EQ(16, breaker.wordEnd());
|
||||
}
|
||||
|
||||
TEST_F(WordBreakerTest, flagsSequenceSingleFlag) {
|
||||
const std::string kFlag = "U+1F3F4";
|
||||
const std::string flags = kFlag + " " + kFlag;
|
||||
|
||||
Reference in New Issue
Block a user