diff --git a/engine/src/flutter/libs/minikin/CmapCoverage.cpp b/engine/src/flutter/libs/minikin/CmapCoverage.cpp index 75033729e3..64310006fd 100644 --- a/engine/src/flutter/libs/minikin/CmapCoverage.cpp +++ b/engine/src/flutter/libs/minikin/CmapCoverage.cpp @@ -29,11 +29,12 @@ namespace android { // These could perhaps be optimized to use __builtin_bswap16 and friends. static uint32_t readU16(const uint8_t* data, size_t offset) { - return data[offset] << 8 | data[offset + 1]; + return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]); } static uint32_t readU32(const uint8_t* data, size_t offset) { - return data[offset] << 24 | data[offset + 1] << 16 | data[offset + 2] << 8 | data[offset + 3]; + return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 | + ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]); } static void addRange(vector &coverage, uint32_t start, uint32_t end) { @@ -101,11 +102,13 @@ static bool getCoverageFormat12(vector& coverage, const uint8_t* data, const size_t kGroupSize = 12; const size_t kStartCharCodeOffset = 0; const size_t kEndCharCodeOffset = 4; + const size_t kMaxNGroups = 0xfffffff0 / kGroupSize; // protection against overflow + // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits. if (kFirstGroupOffset > size) { return false; } uint32_t nGroups = readU32(data, kNGroupsOffset); - if (kFirstGroupOffset + nGroups * kGroupSize > size) { + if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) { return false; } for (uint32_t i = 0; i < nGroups; i++) { diff --git a/engine/src/flutter/libs/minikin/FontCollection.cpp b/engine/src/flutter/libs/minikin/FontCollection.cpp index b4bfe313ba..2bcbc03779 100644 --- a/engine/src/flutter/libs/minikin/FontCollection.cpp +++ b/engine/src/flutter/libs/minikin/FontCollection.cpp @@ -167,6 +167,10 @@ static bool isStickyWhitelisted(uint32_t c) { return false; } +static bool isVariationSelector(uint32_t c) { + return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF); +} + void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style, vector* result) const { FontLanguage lang = style.getLanguage(); @@ -184,9 +188,11 @@ void FontCollection::itemize(const uint16_t *string, size_t string_size, FontSty nShorts = 2; } } - // Continue using existing font as long as it has coverage and is whitelisted + // Continue using existing font as long as it has coverage and is whitelisted; + // also variation sequences continue existing run. if (lastFamily == NULL - || !(isStickyWhitelisted(ch) && lastFamily->getCoverage()->get(ch))) { + || !((isStickyWhitelisted(ch) && lastFamily->getCoverage()->get(ch)) + || isVariationSelector(ch))) { FontFamily* family = getFamilyForChar(ch, lang, variant); if (i == 0 || family != lastFamily) { size_t start = i; diff --git a/engine/src/flutter/libs/minikin/GraphemeBreak.cpp b/engine/src/flutter/libs/minikin/GraphemeBreak.cpp index f8f386c0de..56d5b238d3 100644 --- a/engine/src/flutter/libs/minikin/GraphemeBreak.cpp +++ b/engine/src/flutter/libs/minikin/GraphemeBreak.cpp @@ -22,6 +22,19 @@ namespace android { +// Returns true if the character appears before or after zwj in a zwj emoji sequence. See +// http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html +bool isZwjEmoji(uint32_t c) { + return (c == 0x2764 // HEAVY BLACK HEART + || c == 0x1F468 // MAN + || c == 0x1F469 // WOMAN + || c == 0x1F48B // KISS MARK + || c == 0x1F466 // BOY + || c == 0x1F467 // GIRL + || c == 0x1F441 // EYE + || c == 0x1F5E8); // LEFT SPEECH BUBBLE +} + bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, size_t offset) { // This implementation closely follows Unicode Standard Annex #29 on @@ -93,6 +106,19 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { return false; } + // Tailoring: make emoji sequences with ZWJ a single grapheme cluster + if (c1 == 0x200D && isZwjEmoji(c2) && offset_back > start) { + // look at character before ZWJ to see that both can participate in an emoji zwj sequence + uint32_t c0 = 0; + U16_PREV(buf, start, offset_back, c0); + if (c0 == 0xFE0F && offset_back > start) { + // skip over emoji variation selector + U16_PREV(buf, start, offset_back, c0); + } + if (isZwjEmoji(c0)) { + return false; + } + } // Rule GB10, Any / Any return true; } diff --git a/engine/src/flutter/libs/minikin/LineBreaker.cpp b/engine/src/flutter/libs/minikin/LineBreaker.cpp index a832ca20e2..77374feaa0 100644 --- a/engine/src/flutter/libs/minikin/LineBreaker.cpp +++ b/engine/src/flutter/libs/minikin/LineBreaker.cpp @@ -17,6 +17,7 @@ #define VERBOSE_DEBUG 0 #include +#include #define LOG_TAG "Minikin" #include @@ -30,6 +31,7 @@ namespace android { const int CHAR_TAB = 0x0009; const uint16_t CHAR_SOFT_HYPHEN = 0x00AD; +const uint16_t CHAR_ZWJ = 0x200D; // Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these // constants are larger than any reasonable actual width score. @@ -123,6 +125,32 @@ static bool isLineBreakingHyphen(uint16_t c) { c == 0x2E40); // DOUBLE HYPHEN } +/** + * Determine whether a line break at position i within the buffer buf is valid. This + * represents customization beyond the ICU behavior, because plain ICU provides some + * line break opportunities that we don't want. + **/ +static bool isBreakValid(uint16_t codeUnit, const uint16_t* buf, size_t bufEnd, size_t i) { + if (codeUnit == CHAR_SOFT_HYPHEN) { + return false; + } + if (codeUnit == CHAR_ZWJ) { + // Possible emoji ZWJ sequence + uint32_t next_codepoint; + U16_NEXT(buf, i, bufEnd, next_codepoint); + if (next_codepoint == 0x2764 || // HEAVY BLACK HEART + next_codepoint == 0x1F466 || // BOY + next_codepoint == 0x1F467 || // GIRL + next_codepoint == 0x1F468 || // MAN + next_codepoint == 0x1F469 || // WOMAN + next_codepoint == 0x1F48B || // KISS MARK + next_codepoint == 0x1F5E8) { // LEFT SPEECH BUBBLE + return false; + } + } + return true; +} + // Ordinarily, this method measures the text in the range given. However, when paint // is nullptr, it assumes the widths have already been calculated and stored in the // width buffer. @@ -175,8 +203,9 @@ float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typefa } if (i + 1 == current) { // Override ICU's treatment of soft hyphen as a break opportunity, because we want it - // to be a hyphen break, with penalty and drawing behavior. - if (c != CHAR_SOFT_HYPHEN) { + // to be a hyphen break, with penalty and drawing behavior. Also, suppress line + // breaks within emoji ZWJ sequences. + if (isBreakValid(c, mTextBuf.data(), end, i + 1)) { // TODO: Add a new type of HyphenEdit for breaks whose hyphen already exists, so // we can pass the whole word down to Hyphenator like the soft hyphen case. bool wordEndsInHyphen = isLineBreakingHyphen(c);