From 675933f27148de3495761ed8348728b3337ddd9a Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 22 Feb 2016 13:28:44 -0800 Subject: [PATCH] Suppress grapheme cluster breaks in emoji with modifiers An emoji with a modifier should be treated as a single grapheme, i.e. it should not be possible to place the cursor between the base and modifier. This patch implements the proposed Rule GB9c from Mark Davis's proposal entitled "Fixing breaking properties for emoji", L2/16-011R3. The patch also skips over variation sequences attached the to the preceding character, for computing grapheme cluster boundaries. Bug: 26829153 Change-Id: Iff5bc2bb8e5246223a017c7cf33acfbf63817f16 --- .../flutter/libs/minikin/GraphemeBreak.cpp | 53 +++++++++++++++++++ .../src/flutter/tests/GraphemeBreakTests.cpp | 24 +++++++++ 2 files changed, 77 insertions(+) diff --git a/engine/src/flutter/libs/minikin/GraphemeBreak.cpp b/engine/src/flutter/libs/minikin/GraphemeBreak.cpp index 7865d1d045..41410917d4 100644 --- a/engine/src/flutter/libs/minikin/GraphemeBreak.cpp +++ b/engine/src/flutter/libs/minikin/GraphemeBreak.cpp @@ -77,6 +77,48 @@ bool isZwjEmoji(uint32_t c) { || c == 0x1F5E8); // LEFT SPEECH BUBBLE } +// Based on Modifiers from http://www.unicode.org/L2/L2016/16011-data-file.txt +bool isEmojiModifier(uint32_t c) { + return (0x1F3FB <= c && c <= 0x1F3FF); +} + +// Based on Emoji_Modifier_Base from +// http://www.unicode.org/Public/emoji/3.0/emoji-data.txt +bool isEmojiBase(uint32_t c) { + if (0x261D <= c && c <= 0x270D) { + return (c == 0x261D || c == 0x26F9 || (0x270A <= c && c <= 0x270D)); + } else if (0x1F385 <= c && c <= 0x1F93E) { + return (c == 0x1F385 + || (0x1F3C3 <= c || c <= 0x1F3C4) + || (0x1F3CA <= c || c <= 0x1F3CB) + || (0x1F442 <= c || c <= 0x1F443) + || (0x1F446 <= c || c <= 0x1F450) + || (0x1F466 <= c || c <= 0x1F469) + || c == 0x1F46E + || (0x1F470 <= c || c <= 0x1F478) + || c == 0x1F47C + || (0x1F481 <= c || c <= 0x1F483) + || (0x1F485 <= c || c <= 0x1F487) + || c == 0x1F4AA + || c == 0x1F575 + || c == 0x1F57A + || c == 0x1F590 + || (0x1F595 <= c || c <= 0x1F596) + || (0x1F645 <= c || c <= 0x1F647) + || (0x1F64B <= c || c <= 0x1F64F) + || c == 0x1F6A3 + || (0x1F6B4 <= c || c <= 0x1F6B6) + || c == 0x1F6C0 + || (0x1F918 <= c || c <= 0x1F91E) + || c == 0x1F926 + || c == 0x1F930 + || (0x1F933 <= c || c <= 0x1F939) + || (0x1F93B <= c || c <= 0x1F93E)); + } else { + return false; + } +} + bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, size_t offset) { // This implementation closely follows Unicode Standard Annex #29 on @@ -165,6 +207,17 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co return false; } } + // Proposed Rule GB9c from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf + // E_Base x E_Modifier + if (isEmojiModifier(c2)) { + if (c1 == 0xFE0F && offset_back > start) { + // skip over emoji variation selector + U16_PREV(buf, start, offset_back, c1); + } + if (isEmojiBase(c1)) { + return false; + } + } // Rule GB10, Any รท Any return true; } diff --git a/engine/src/flutter/tests/GraphemeBreakTests.cpp b/engine/src/flutter/tests/GraphemeBreakTests.cpp index d6746bc2b0..dbd73be2b1 100644 --- a/engine/src/flutter/tests/GraphemeBreakTests.cpp +++ b/engine/src/flutter/tests/GraphemeBreakTests.cpp @@ -136,6 +136,30 @@ TEST(GraphemeBreak, tailoring) { EXPECT_TRUE(IsBreak("U+0628 U+200D | U+2764")); } +TEST(GraphemeBreak, emojiModifiers) { + EXPECT_FALSE(IsBreak("U+261D | U+1F3FB")); // white up pointing index + modifier + EXPECT_FALSE(IsBreak("U+270C | U+1F3FB")); // victory hand + modifier + EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FB")); // boy + modifier + EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FC")); // boy + modifier + EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FD")); // boy + modifier + EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FE")); // boy + modifier + EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FF")); // boy + modifier + EXPECT_FALSE(IsBreak("U+1F918 | U+1F3FF")); // sign of the horns + modifier + EXPECT_FALSE(IsBreak("U+1F933 | U+1F3FF")); // selfie (Unicode 9) + modifier + + // adding emoji style variation selector doesn't affect grapheme cluster + EXPECT_TRUE(IsBreak("U+270C U+FE0E | U+1F3FB")); // victory hand + text style + modifier + EXPECT_FALSE(IsBreak("U+270C U+FE0F | U+1F3FB")); // heart + emoji style + modifier + + // heart is not an emoji base + EXPECT_TRUE(IsBreak("U+2764 | U+1F3FB")); // heart + modifier + EXPECT_TRUE(IsBreak("U+2764 U+FE0E | U+1F3FB")); // heart + emoji style + modifier + EXPECT_TRUE(IsBreak("U+2764 U+FE0F | U+1F3FB")); // heart + emoji style + modifier + + // rat is not an emoji modifer + EXPECT_TRUE(IsBreak("U+1F466 | U+1F400")); // boy + rat +} + TEST(GraphemeBreak, offsets) { uint16_t string[] = { 0x0041, 0x06DD, 0x0045, 0x0301, 0x0049, 0x0301 }; EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(string, 2, 3, 2));