diff --git a/engine/src/flutter/libs/minikin/WordBreaker.cpp b/engine/src/flutter/libs/minikin/WordBreaker.cpp index ec84c39f9a..721c5bf0eb 100644 --- a/engine/src/flutter/libs/minikin/WordBreaker.cpp +++ b/engine/src/flutter/libs/minikin/WordBreaker.cpp @@ -17,7 +17,8 @@ #define LOG_TAG "Minikin" #include -#include "minikin/WordBreaker.h" +#include +#include "MinikinInternal.h" #include #include @@ -25,7 +26,7 @@ namespace android { const uint32_t CHAR_SOFT_HYPHEN = 0x00AD; -const uint16_t CHAR_ZWJ = 0x200D; +const uint32_t CHAR_ZWJ = 0x200D; void WordBreaker::setLocale(const icu::Locale& locale) { UErrorCode status = U_ZERO_ERROR; @@ -68,14 +69,18 @@ enum ScanState { * represents customization beyond the ICU behavior, because plain ICU provides some * line break opportunities that we don't want. **/ -static bool isBreakValid(uint16_t codeUnit, const uint16_t* buf, size_t bufEnd, size_t i) { - if (codeUnit == CHAR_SOFT_HYPHEN) { +static bool isBreakValid(const uint16_t* buf, size_t bufEnd, size_t i) { + uint32_t codePoint; + size_t prev_offset = i; + U16_PREV(buf, 0, prev_offset, codePoint); + if (codePoint == CHAR_SOFT_HYPHEN) { return false; } - if (codeUnit == CHAR_ZWJ) { + uint32_t next_codepoint; + size_t next_offset = i; + U16_NEXT(buf, next_offset, bufEnd, next_codepoint); + if (codePoint == CHAR_ZWJ) { // Possible emoji ZWJ sequence - uint32_t next_codepoint; - U16_NEXT(buf, i, bufEnd, next_codepoint); if (next_codepoint == 0x2764 || // HEAVY BLACK HEART next_codepoint == 0x1F466 || // BOY next_codepoint == 0x1F467 || // GIRL @@ -86,6 +91,17 @@ static bool isBreakValid(uint16_t codeUnit, const uint16_t* buf, size_t bufEnd, return false; } } + // Proposed Rule LB30b from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf + // EB x EM + if (isEmojiModifier(next_codepoint)) { + if (codePoint == 0xFE0F && prev_offset > 0) { + // skip over emoji variation selector + U16_PREV(buf, 0, prev_offset, codePoint); + } + if (isEmojiBase(codePoint)) { + return false; + } + } return true; } @@ -176,7 +192,7 @@ ssize_t WordBreaker::next() { result = mBreakIterator->next(); } } while (result != icu::BreakIterator::DONE && (size_t)result != mTextSize - && !isBreakValid(mText[result - 1], mText, mTextSize, result)); + && !isBreakValid(mText, mTextSize, result)); mCurrent = (ssize_t)result; return mCurrent; } diff --git a/engine/src/flutter/tests/WordBreakerTests.cpp b/engine/src/flutter/tests/WordBreakerTests.cpp index 6c5e4795c8..cb12722562 100644 --- a/engine/src/flutter/tests/WordBreakerTests.cpp +++ b/engine/src/flutter/tests/WordBreakerTests.cpp @@ -29,6 +29,8 @@ #define NELEM(x) ((sizeof(x) / sizeof((x)[0]))) #endif +#define UTF16(codepoint) U16_LEAD(codepoint), U16_TRAIL(codepoint) + using namespace android; typedef ICUTestBase WordBreakerTest; @@ -70,11 +72,11 @@ TEST_F(WordBreakerTest, softHyphen) { TEST_F(WordBreakerTest, zwjEmojiSequences) { uint16_t buf[] = { // man + zwj + heart + zwj + man - 0xD83D, 0xDC68, 0x200D, 0x2764, 0x200D, 0xD83D, 0xDC68, - // woman + zwj + heart + zwj + woman - 0xD83D, 0xDC69, 0x200D, 0x2764, 0x200D, 0xD83D, 0xDC8B, 0x200D, 0xD83D, 0xDC69, + UTF16(0x1F468), 0x200D, 0x2764, 0x200D, UTF16(0x1F468), + // woman + zwj + heart + zwj + kiss mark + zwj + woman + UTF16(0x1F469), 0x200D, 0x2764, 0x200D, UTF16(0x1F48B), 0x200D, UTF16(0x1F469), // eye + zwj + left speech bubble - 0xD83D, 0xDC41, 0x200D, 0xD83D, 0xDDE8, + UTF16(0x1F441), 0x200D, UTF16(0x1F5E8), }; WordBreaker breaker; breaker.setLocale(icu::Locale::getEnglish()); @@ -91,6 +93,23 @@ TEST_F(WordBreakerTest, zwjEmojiSequences) { EXPECT_EQ(22, breaker.wordEnd()); } +TEST_F(WordBreakerTest, emojiWithModifier) { + uint16_t buf[] = { + UTF16(0x1F466), UTF16(0x1F3FB), // boy + type 1-2 fitzpatrick modifier + 0x270C, 0xFE0F, UTF16(0x1F3FF) // victory hand + emoji style + type 6 fitzpatrick modifier + }; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getEnglish()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(4, breaker.next()); // after man + type 6 fitzpatrick modifier + EXPECT_EQ(0, breaker.wordStart()); + EXPECT_EQ(4, breaker.wordEnd()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(4, breaker.wordStart()); + EXPECT_EQ(8, breaker.wordEnd()); +} + TEST_F(WordBreakerTest, punct) { uint16_t buf[] = {0x00A1, 0x00A1, 'h', 'e', 'l', 'l' ,'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', '!'};