From 0eaf80b016b6780250227b8273692d2b8978816f Mon Sep 17 00:00:00 2001 From: Roozbeh Pournader Date: Wed, 16 Mar 2016 15:23:20 -0700 Subject: [PATCH] Do not allow line breaks before currency symbols Implement the change proposed in UTC document L2/16-043R (http://www.unicode.org/L2/L2016/16043r-line-break-pr-po.txt) to make sure we do not break between letters and currency symbols. Bug: 24959657 Change-Id: Ia29d0e5625f84870bd910d0c6e19036d17206704 --- engine/src/flutter/libs/minikin/WordBreaker.cpp | 13 +++++++++++++ engine/src/flutter/tests/WordBreakerTests.cpp | 16 ++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/engine/src/flutter/libs/minikin/WordBreaker.cpp b/engine/src/flutter/libs/minikin/WordBreaker.cpp index 721c5bf0eb..d420a6a0a8 100644 --- a/engine/src/flutter/libs/minikin/WordBreaker.cpp +++ b/engine/src/flutter/libs/minikin/WordBreaker.cpp @@ -79,6 +79,18 @@ static bool isBreakValid(const uint16_t* buf, size_t bufEnd, size_t i) { uint32_t next_codepoint; size_t next_offset = i; U16_NEXT(buf, next_offset, bufEnd, next_codepoint); + + // Proposed change to LB24 from http://www.unicode.org/L2/L2016/16043r-line-break-pr-po.txt + //(AL | HL) × (PR | PO) + int32_t lineBreak = u_getIntPropertyValue(codePoint, UCHAR_LINE_BREAK); + if (lineBreak == U_LB_ALPHABETIC || lineBreak == U_LB_HEBREW_LETTER) { + lineBreak = u_getIntPropertyValue(next_codepoint, UCHAR_LINE_BREAK); + if (lineBreak == U_LB_PREFIX_NUMERIC || lineBreak == U_LB_POSTFIX_NUMERIC) { + return false; + } + } + + // Known emoji ZWJ sequences if (codePoint == CHAR_ZWJ) { // Possible emoji ZWJ sequence if (next_codepoint == 0x2764 || // HEAVY BLACK HEART @@ -91,6 +103,7 @@ static bool isBreakValid(const uint16_t* buf, size_t bufEnd, size_t i) { return false; } } + // Proposed Rule LB30b from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf // EB x EM if (isEmojiModifier(next_codepoint)) { diff --git a/engine/src/flutter/tests/WordBreakerTests.cpp b/engine/src/flutter/tests/WordBreakerTests.cpp index cb12722562..480c57da96 100644 --- a/engine/src/flutter/tests/WordBreakerTests.cpp +++ b/engine/src/flutter/tests/WordBreakerTests.cpp @@ -69,6 +69,22 @@ TEST_F(WordBreakerTest, softHyphen) { EXPECT_EQ(0, breaker.breakBadness()); } +TEST_F(WordBreakerTest, postfixAndPrefix) { + uint16_t buf[] = {'U', 'S', 0x00A2, ' ', 'J', 'P', 0x00A5}; // US¢ JP¥ + WordBreaker breaker; + breaker.setLocale(icu::Locale::getEnglish()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + + EXPECT_EQ(4, breaker.next()); // after CENT SIGN + EXPECT_EQ(0, breaker.wordStart()); // "US¢" + EXPECT_EQ(3, breaker.wordEnd()); + + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end of string + EXPECT_EQ(4, breaker.wordStart()); // "JP¥" + EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd()); +} + TEST_F(WordBreakerTest, zwjEmojiSequences) { uint16_t buf[] = { // man + zwj + heart + zwj + man