From 3d10a1ed4f69df9dbadefa0d28a2de6f99eb0e1c Mon Sep 17 00:00:00 2001 From: Roozbeh Pournader Date: Thu, 16 Mar 2017 14:18:59 -0700 Subject: [PATCH] Update emoji character data in Minikin Update emoji character data to Unicode 10.0 / Emoji 5.0 (which also removes U+1F93B MODERN PENATHLON from the emoji base letters). Also add unit tests for line breaking for new characters (based on earlier work by Seigo Nonaka). Test: All new and existing unit tests pass; Test: Manually tested line breaking of new emojis in TextView. Bug: 28364892 Bug: 28678294 Bug: 30874706 Change-Id: I367cdab09187dc08a66a3112a5181a2b7fb338a5 --- .../flutter/libs/minikin/MinikinInternal.cpp | 36 ++++++++----- .../tests/unittest/WordBreakerTests.cpp | 51 ++++++++++++++++++- 2 files changed, 74 insertions(+), 13 deletions(-) diff --git a/engine/src/flutter/libs/minikin/MinikinInternal.cpp b/engine/src/flutter/libs/minikin/MinikinInternal.cpp index e766dce992..60fa9636f8 100644 --- a/engine/src/flutter/libs/minikin/MinikinInternal.cpp +++ b/engine/src/flutter/libs/minikin/MinikinInternal.cpp @@ -34,29 +34,39 @@ void assertMinikinLocked() { } bool isEmoji(uint32_t c) { - // U+2695 U+2640 U+2642 are not in emoji category in Unicode 9 but they are now emoji category. - // TODO: remove once emoji database is updated. - if (c == 0x2695 || c == 0x2640 || c == 0x2642) { + // Emoji characters new in Unicode emoji 5.0. + // From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt + // TODO: Remove once emoji-data.text 5.0 is in the tree. + if ((0x1F6F7 <= c && c <= 0x1F6F8) + || c == 0x1F91F + || (0x1F928 <= c && c <= 0x1F92F) + || (0x1F931 <= c && c <= 0x1F932) + || c == 0x1F94C + || (0x1F95F <= c && c <= 0x1F96B) + || (0x1F992 <= c && c <= 0x1F997) + || (0x1F9D0 <= c && c <= 0x1F9E6)) { return true; } + const size_t length = sizeof(generated::EMOJI_LIST) / sizeof(generated::EMOJI_LIST[0]); return std::binary_search(generated::EMOJI_LIST, generated::EMOJI_LIST + length, c); } -// Based on Modifiers from http://www.unicode.org/L2/L2016/16011-data-file.txt +// Based on Emoji_Modifier from http://www.unicode.org/Public/emoji/5.0/emoji-data.txt bool isEmojiModifier(uint32_t c) { return (0x1F3FB <= c && c <= 0x1F3FF); } // Based on Emoji_Modifier_Base from -// http://www.unicode.org/Public/emoji/3.0/emoji-data.txt +// http://www.unicode.org/Public/emoji/5.0/emoji-data.txt bool isEmojiBase(uint32_t c) { if (0x261D <= c && c <= 0x270D) { return (c == 0x261D || c == 0x26F9 || (0x270A <= c && c <= 0x270D)); } else if (0x1F385 <= c && c <= 0x1F93E) { return (c == 0x1F385 - || (0x1F3C3 <= c && c <= 0x1F3C4) - || (0x1F3CA <= c && c <= 0x1F3CB) + || (0x1F3C2 <= c && c <= 0x1F3C4) + || c == 0x1F3C7 + || (0x1F3CA <= c && c <= 0x1F3CC) || (0x1F442 <= c && c <= 0x1F443) || (0x1F446 <= c && c <= 0x1F450) || (0x1F466 <= c && c <= 0x1F469) @@ -66,7 +76,7 @@ bool isEmojiBase(uint32_t c) { || (0x1F481 <= c && c <= 0x1F483) || (0x1F485 <= c && c <= 0x1F487) || c == 0x1F4AA - || c == 0x1F575 + || (0x1F574 <= c && c <= 0x1F575) || c == 0x1F57A || c == 0x1F590 || (0x1F595 <= c && c <= 0x1F596) @@ -75,11 +85,13 @@ bool isEmojiBase(uint32_t c) { || c == 0x1F6A3 || (0x1F6B4 <= c && c <= 0x1F6B6) || c == 0x1F6C0 - || (0x1F918 <= c && c <= 0x1F91E) + || c == 0x1F6CC + || (0x1F918 <= c && c <= 0x1F91C) + || (0x1F91E <= c && c <= 0x1F91F) || c == 0x1F926 - || c == 0x1F930 - || (0x1F933 <= c && c <= 0x1F939) - || (0x1F93B <= c && c <= 0x1F93E)); + || (0x1F930 <= c && c <= 0x1F939) + || (0x1F93D <= c && c <= 0x1F93E) + || (0x1F9D1 <= c && c <= 0x1F9DD)); } else { return false; } diff --git a/engine/src/flutter/tests/unittest/WordBreakerTests.cpp b/engine/src/flutter/tests/unittest/WordBreakerTests.cpp index 7971b49070..13e0420c8a 100644 --- a/engine/src/flutter/tests/unittest/WordBreakerTests.cpp +++ b/engine/src/flutter/tests/unittest/WordBreakerTests.cpp @@ -99,7 +99,7 @@ TEST_F(WordBreakerTest, postfixAndPrefix) { EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd()); } -TEST_F(WordBreakerTest, MyanmarKinzi) { +TEST_F(WordBreakerTest, myanmarKinzi) { uint16_t buf[] = {0x1004, 0x103A, 0x1039, 0x1000, 0x102C}; // NGA, ASAT, VIRAMA, KA, UU WordBreaker breaker; icu::Locale burmese("my"); @@ -158,6 +158,55 @@ TEST_F(WordBreakerTest, emojiWithModifier) { EXPECT_EQ(8, breaker.wordEnd()); } +TEST_F(WordBreakerTest, unicode10Emoji) { + // Should break between emojis. + uint16_t buf[] = { + // SLED + SLED + UTF16(0x1F6F7), UTF16(0x1F6F7), + // SLED + VS15 + SLED + UTF16(0x1F6F7), 0xFE0E, UTF16(0x1F6F7), + // WHITE SMILING FACE + SLED + 0x263A, UTF16(0x1F6F7), + // WHITE SMILING FACE + VS16 + SLED + 0x263A, 0xFE0F, UTF16(0x1F6F7), + }; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getEnglish()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(2, breaker.next()); + EXPECT_EQ(0, breaker.wordStart()); + EXPECT_EQ(2, breaker.wordEnd()); + + EXPECT_EQ(4, breaker.next()); + EXPECT_EQ(2, breaker.wordStart()); + EXPECT_EQ(4, breaker.wordEnd()); + + EXPECT_EQ(7, breaker.next()); + EXPECT_EQ(4, breaker.wordStart()); + EXPECT_EQ(7, breaker.wordEnd()); + + EXPECT_EQ(9, breaker.next()); + EXPECT_EQ(7, breaker.wordStart()); + EXPECT_EQ(9, breaker.wordEnd()); + + EXPECT_EQ(10, breaker.next()); + EXPECT_EQ(9, breaker.wordStart()); + EXPECT_EQ(10, breaker.wordEnd()); + + EXPECT_EQ(12, breaker.next()); + EXPECT_EQ(10, breaker.wordStart()); + EXPECT_EQ(12, breaker.wordEnd()); + + EXPECT_EQ(14, breaker.next()); + EXPECT_EQ(12, breaker.wordStart()); + EXPECT_EQ(14, breaker.wordEnd()); + + EXPECT_EQ(16, breaker.next()); + EXPECT_EQ(14, breaker.wordStart()); + EXPECT_EQ(16, breaker.wordEnd()); +} + TEST_F(WordBreakerTest, flagsSequenceSingleFlag) { const std::string kFlag = "U+1F3F4"; const std::string flags = kFlag + " " + kFlag;