Merge "Update emoji character data in Minikin"

This commit is contained in:
TreeHugger Robot
2017-03-17 03:20:24 +00:00
committed by Android (Google) Code Review
2 changed files with 74 additions and 13 deletions

View File

@@ -34,29 +34,39 @@ void assertMinikinLocked() {
}
bool isEmoji(uint32_t c) {
// U+2695 U+2640 U+2642 are not in emoji category in Unicode 9 but they are now emoji category.
// TODO: remove once emoji database is updated.
if (c == 0x2695 || c == 0x2640 || c == 0x2642) {
// Emoji characters new in Unicode emoji 5.0.
// From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
// TODO: Remove once emoji-data.text 5.0 is in the tree.
if ((0x1F6F7 <= c && c <= 0x1F6F8)
|| c == 0x1F91F
|| (0x1F928 <= c && c <= 0x1F92F)
|| (0x1F931 <= c && c <= 0x1F932)
|| c == 0x1F94C
|| (0x1F95F <= c && c <= 0x1F96B)
|| (0x1F992 <= c && c <= 0x1F997)
|| (0x1F9D0 <= c && c <= 0x1F9E6)) {
return true;
}
const size_t length = sizeof(generated::EMOJI_LIST) / sizeof(generated::EMOJI_LIST[0]);
return std::binary_search(generated::EMOJI_LIST, generated::EMOJI_LIST + length, c);
}
// Based on Modifiers from http://www.unicode.org/L2/L2016/16011-data-file.txt
// Based on Emoji_Modifier from http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
bool isEmojiModifier(uint32_t c) {
return (0x1F3FB <= c && c <= 0x1F3FF);
}
// Based on Emoji_Modifier_Base from
// http://www.unicode.org/Public/emoji/3.0/emoji-data.txt
// http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
bool isEmojiBase(uint32_t c) {
if (0x261D <= c && c <= 0x270D) {
return (c == 0x261D || c == 0x26F9 || (0x270A <= c && c <= 0x270D));
} else if (0x1F385 <= c && c <= 0x1F93E) {
return (c == 0x1F385
|| (0x1F3C3 <= c && c <= 0x1F3C4)
|| (0x1F3CA <= c && c <= 0x1F3CB)
|| (0x1F3C2 <= c && c <= 0x1F3C4)
|| c == 0x1F3C7
|| (0x1F3CA <= c && c <= 0x1F3CC)
|| (0x1F442 <= c && c <= 0x1F443)
|| (0x1F446 <= c && c <= 0x1F450)
|| (0x1F466 <= c && c <= 0x1F469)
@@ -66,7 +76,7 @@ bool isEmojiBase(uint32_t c) {
|| (0x1F481 <= c && c <= 0x1F483)
|| (0x1F485 <= c && c <= 0x1F487)
|| c == 0x1F4AA
|| c == 0x1F575
|| (0x1F574 <= c && c <= 0x1F575)
|| c == 0x1F57A
|| c == 0x1F590
|| (0x1F595 <= c && c <= 0x1F596)
@@ -75,11 +85,13 @@ bool isEmojiBase(uint32_t c) {
|| c == 0x1F6A3
|| (0x1F6B4 <= c && c <= 0x1F6B6)
|| c == 0x1F6C0
|| (0x1F918 <= c && c <= 0x1F91E)
|| c == 0x1F6CC
|| (0x1F918 <= c && c <= 0x1F91C)
|| (0x1F91E <= c && c <= 0x1F91F)
|| c == 0x1F926
|| c == 0x1F930
|| (0x1F933 <= c && c <= 0x1F939)
|| (0x1F93B <= c && c <= 0x1F93E));
|| (0x1F930 <= c && c <= 0x1F939)
|| (0x1F93D <= c && c <= 0x1F93E)
|| (0x1F9D1 <= c && c <= 0x1F9DD));
} else {
return false;
}

View File

@@ -99,7 +99,7 @@ TEST_F(WordBreakerTest, postfixAndPrefix) {
EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd());
}
TEST_F(WordBreakerTest, MyanmarKinzi) {
TEST_F(WordBreakerTest, myanmarKinzi) {
uint16_t buf[] = {0x1004, 0x103A, 0x1039, 0x1000, 0x102C}; // NGA, ASAT, VIRAMA, KA, UU
WordBreaker breaker;
icu::Locale burmese("my");
@@ -158,6 +158,55 @@ TEST_F(WordBreakerTest, emojiWithModifier) {
EXPECT_EQ(8, breaker.wordEnd());
}
TEST_F(WordBreakerTest, unicode10Emoji) {
// Should break between emojis.
uint16_t buf[] = {
// SLED + SLED
UTF16(0x1F6F7), UTF16(0x1F6F7),
// SLED + VS15 + SLED
UTF16(0x1F6F7), 0xFE0E, UTF16(0x1F6F7),
// WHITE SMILING FACE + SLED
0x263A, UTF16(0x1F6F7),
// WHITE SMILING FACE + VS16 + SLED
0x263A, 0xFE0F, UTF16(0x1F6F7),
};
WordBreaker breaker;
breaker.setLocale(icu::Locale::getEnglish());
breaker.setText(buf, NELEM(buf));
EXPECT_EQ(0, breaker.current());
EXPECT_EQ(2, breaker.next());
EXPECT_EQ(0, breaker.wordStart());
EXPECT_EQ(2, breaker.wordEnd());
EXPECT_EQ(4, breaker.next());
EXPECT_EQ(2, breaker.wordStart());
EXPECT_EQ(4, breaker.wordEnd());
EXPECT_EQ(7, breaker.next());
EXPECT_EQ(4, breaker.wordStart());
EXPECT_EQ(7, breaker.wordEnd());
EXPECT_EQ(9, breaker.next());
EXPECT_EQ(7, breaker.wordStart());
EXPECT_EQ(9, breaker.wordEnd());
EXPECT_EQ(10, breaker.next());
EXPECT_EQ(9, breaker.wordStart());
EXPECT_EQ(10, breaker.wordEnd());
EXPECT_EQ(12, breaker.next());
EXPECT_EQ(10, breaker.wordStart());
EXPECT_EQ(12, breaker.wordEnd());
EXPECT_EQ(14, breaker.next());
EXPECT_EQ(12, breaker.wordStart());
EXPECT_EQ(14, breaker.wordEnd());
EXPECT_EQ(16, breaker.next());
EXPECT_EQ(14, breaker.wordStart());
EXPECT_EQ(16, breaker.wordEnd());
}
TEST_F(WordBreakerTest, flagsSequenceSingleFlag) {
const std::string kFlag = "U+1F3F4";
const std::string flags = kFlag + " " + kFlag;