From 34c39bcde69c8505fd72c460ee601dff1ad4c8c9 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 29 Oct 2015 11:39:58 -0700 Subject: [PATCH 1/5] Accept variation selector in emoji sequences - DO NOT MERGE This patch basically ignores variation selectors for the purpose of itemization into font runs. This allows GSUB to be applied when input sequences contain variation selectors. Bug: 25368653 Change-Id: I9c1d325ae0cd322c21b7e850d0ec4d73551b2372 --- engine/src/flutter/libs/minikin/FontCollection.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/engine/src/flutter/libs/minikin/FontCollection.cpp b/engine/src/flutter/libs/minikin/FontCollection.cpp index b4bfe313ba..2bcbc03779 100644 --- a/engine/src/flutter/libs/minikin/FontCollection.cpp +++ b/engine/src/flutter/libs/minikin/FontCollection.cpp @@ -167,6 +167,10 @@ static bool isStickyWhitelisted(uint32_t c) { return false; } +static bool isVariationSelector(uint32_t c) { + return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF); +} + void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style, vector* result) const { FontLanguage lang = style.getLanguage(); @@ -184,9 +188,11 @@ void FontCollection::itemize(const uint16_t *string, size_t string_size, FontSty nShorts = 2; } } - // Continue using existing font as long as it has coverage and is whitelisted + // Continue using existing font as long as it has coverage and is whitelisted; + // also variation sequences continue existing run. if (lastFamily == NULL - || !(isStickyWhitelisted(ch) && lastFamily->getCoverage()->get(ch))) { + || !((isStickyWhitelisted(ch) && lastFamily->getCoverage()->get(ch)) + || isVariationSelector(ch))) { FontFamily* family = getFamilyForChar(ch, lang, variant); if (i == 0 || family != lastFamily) { size_t start = i; From 5001f2ae481c3b77ffcb2b64803cc9e96e5cfeb9 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 2 Nov 2015 17:17:24 -0800 Subject: [PATCH 2/5] Suppress linebreaks in emoji ZWJ sequences - DO NOT MERGE Due to the way emoji ZWJ sequences are defined, the ICU line breaking algorithm determines that there are valid line breaks inside the sequence. This patch suppresses these line breaks. Bug: 25433289 Change-Id: I225ebebc0f4186e4b8f48fee399c4a62b3f0218a --- .../src/flutter/libs/minikin/LineBreaker.cpp | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/engine/src/flutter/libs/minikin/LineBreaker.cpp b/engine/src/flutter/libs/minikin/LineBreaker.cpp index a832ca20e2..77374feaa0 100644 --- a/engine/src/flutter/libs/minikin/LineBreaker.cpp +++ b/engine/src/flutter/libs/minikin/LineBreaker.cpp @@ -17,6 +17,7 @@ #define VERBOSE_DEBUG 0 #include +#include #define LOG_TAG "Minikin" #include @@ -30,6 +31,7 @@ namespace android { const int CHAR_TAB = 0x0009; const uint16_t CHAR_SOFT_HYPHEN = 0x00AD; +const uint16_t CHAR_ZWJ = 0x200D; // Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these // constants are larger than any reasonable actual width score. @@ -123,6 +125,32 @@ static bool isLineBreakingHyphen(uint16_t c) { c == 0x2E40); // DOUBLE HYPHEN } +/** + * Determine whether a line break at position i within the buffer buf is valid. This + * represents customization beyond the ICU behavior, because plain ICU provides some + * line break opportunities that we don't want. + **/ +static bool isBreakValid(uint16_t codeUnit, const uint16_t* buf, size_t bufEnd, size_t i) { + if (codeUnit == CHAR_SOFT_HYPHEN) { + return false; + } + if (codeUnit == CHAR_ZWJ) { + // Possible emoji ZWJ sequence + uint32_t next_codepoint; + U16_NEXT(buf, i, bufEnd, next_codepoint); + if (next_codepoint == 0x2764 || // HEAVY BLACK HEART + next_codepoint == 0x1F466 || // BOY + next_codepoint == 0x1F467 || // GIRL + next_codepoint == 0x1F468 || // MAN + next_codepoint == 0x1F469 || // WOMAN + next_codepoint == 0x1F48B || // KISS MARK + next_codepoint == 0x1F5E8) { // LEFT SPEECH BUBBLE + return false; + } + } + return true; +} + // Ordinarily, this method measures the text in the range given. However, when paint // is nullptr, it assumes the widths have already been calculated and stored in the // width buffer. @@ -175,8 +203,9 @@ float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typefa } if (i + 1 == current) { // Override ICU's treatment of soft hyphen as a break opportunity, because we want it - // to be a hyphen break, with penalty and drawing behavior. - if (c != CHAR_SOFT_HYPHEN) { + // to be a hyphen break, with penalty and drawing behavior. Also, suppress line + // breaks within emoji ZWJ sequences. + if (isBreakValid(c, mTextBuf.data(), end, i + 1)) { // TODO: Add a new type of HyphenEdit for breaks whose hyphen already exists, so // we can pass the whole word down to Hyphenator like the soft hyphen case. bool wordEndsInHyphen = isLineBreakingHyphen(c); From 1fd1c390217da22a54b49e9a91bc79b0323e6a73 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 30 Nov 2015 15:04:59 -0800 Subject: [PATCH 3/5] Avoid integer overflows in parsing fonts A malformed TTF can cause size calculations to overflow. This patch checks the maximum reasonable value so that the total size fits in 32 bits. It also adds some explicit casting to avoid possible technical undefined behavior when parsing sized unsigned values. Bug: 25645298 Change-Id: Id4716132041a6f4f1fbb73ec4e445391cf7d9616 (cherry picked from commit 371e5dbb3f61fbfb653eed4fa2e452f8978419ff) --- engine/src/flutter/libs/minikin/CmapCoverage.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/engine/src/flutter/libs/minikin/CmapCoverage.cpp b/engine/src/flutter/libs/minikin/CmapCoverage.cpp index 4156d69d5a..8be45d173c 100644 --- a/engine/src/flutter/libs/minikin/CmapCoverage.cpp +++ b/engine/src/flutter/libs/minikin/CmapCoverage.cpp @@ -30,11 +30,12 @@ namespace android { // These could perhaps be optimized to use __builtin_bswap16 and friends. static uint32_t readU16(const uint8_t* data, size_t offset) { - return data[offset] << 8 | data[offset + 1]; + return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]); } static uint32_t readU32(const uint8_t* data, size_t offset) { - return data[offset] << 24 | data[offset + 1] << 16 | data[offset + 2] << 8 | data[offset + 3]; + return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 | + ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]); } static void addRange(vector &coverage, uint32_t start, uint32_t end) { @@ -101,11 +102,13 @@ static bool getCoverageFormat12(vector& coverage, const uint8_t* data, const size_t kGroupSize = 12; const size_t kStartCharCodeOffset = 0; const size_t kEndCharCodeOffset = 4; + const size_t kMaxNGroups = 0xfffffff0 / kGroupSize; // protection against overflow + // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits. if (kFirstGroupOffset > size) { return false; } uint32_t nGroups = readU32(data, kNGroupsOffset); - if (kFirstGroupOffset + nGroups * kGroupSize > size) { + if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) { return false; } for (uint32_t i = 0; i < nGroups; i++) { From f5d2fa97bbdbf5075e293fb641cd2e5dbaa29cfa Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Wed, 6 Jan 2016 14:31:23 -0800 Subject: [PATCH 4/5] Reject fonts with invalid ranges in cmap A corrupt or malicious font may have a negative size in its cmap range, which in turn could lead to memory corruption. This patch detects the case and rejects the font, and also includes an assertion in the sparse bit set implementation if we missed any such case. External issue: https://code.google.com/p/android/issues/detail?id=192618 Bug: 26413177 Change-Id: Icc0c80e4ef389abba0964495b89aa0fae3e9f4b2 --- .../src/flutter/libs/minikin/CmapCoverage.cpp | 41 +++++++++++-------- .../src/flutter/libs/minikin/SparseBitSet.cpp | 2 + 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/engine/src/flutter/libs/minikin/CmapCoverage.cpp b/engine/src/flutter/libs/minikin/CmapCoverage.cpp index 8be45d173c..4c9643a1d4 100644 --- a/engine/src/flutter/libs/minikin/CmapCoverage.cpp +++ b/engine/src/flutter/libs/minikin/CmapCoverage.cpp @@ -50,7 +50,7 @@ static void addRange(vector &coverage, uint32_t start, uint32_t end) { } } -// Get the coverage information out of a Format 12 subtable, storing it in the coverage vector +// Get the coverage information out of a Format 4 subtable, storing it in the coverage vector static bool getCoverageFormat4(vector& coverage, const uint8_t* data, size_t size) { const size_t kSegCountOffset = 6; const size_t kEndCountOffset = 14; @@ -64,28 +64,32 @@ static bool getCoverageFormat4(vector& coverage, const uint8_t* data, return false; } for (size_t i = 0; i < segCount; i++) { - int end = readU16(data, kEndCountOffset + 2 * i); - int start = readU16(data, kHeaderSize + 2 * (segCount + i)); - int rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i)); + uint32_t end = readU16(data, kEndCountOffset + 2 * i); + uint32_t start = readU16(data, kHeaderSize + 2 * (segCount + i)); + if (end < start) { + // invalid segment range: size must be positive + return false; + } + uint32_t rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i)); if (rangeOffset == 0) { - int delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i)); + uint32_t delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i)); if (((end + delta) & 0xffff) > end - start) { addRange(coverage, start, end + 1); } else { - for (int j = start; j < end + 1; j++) { + for (uint32_t j = start; j < end + 1; j++) { if (((j + delta) & 0xffff) != 0) { addRange(coverage, j, j + 1); } } } } else { - for (int j = start; j < end + 1; j++) { + for (uint32_t j = start; j < end + 1; j++) { uint32_t actualRangeOffset = kHeaderSize + 6 * segCount + rangeOffset + (i + j - start) * 2; if (actualRangeOffset + 2 > size) { return false; } - int glyphId = readU16(data, actualRangeOffset); + uint32_t glyphId = readU16(data, actualRangeOffset); if (glyphId != 0) { addRange(coverage, j, j + 1); } @@ -115,6 +119,10 @@ static bool getCoverageFormat12(vector& coverage, const uint8_t* data, uint32_t groupOffset = kFirstGroupOffset + i * kGroupSize; uint32_t start = readU32(data, groupOffset + kStartCharCodeOffset); uint32_t end = readU32(data, groupOffset + kEndCharCodeOffset); + if (end < start) { + // invalid group range: size must be positive + return false; + } addRange(coverage, start, end + 1); // file is inclusive, vector is exclusive } return true; @@ -128,18 +136,19 @@ bool CmapCoverage::getCoverage(SparseBitSet& coverage, const uint8_t* cmap_data, const size_t kPlatformIdOffset = 0; const size_t kEncodingIdOffset = 2; const size_t kOffsetOffset = 4; - const int kMicrosoftPlatformId = 3; - const int kUnicodeBmpEncodingId = 1; - const int kUnicodeUcs4EncodingId = 10; + const uint16_t kMicrosoftPlatformId = 3; + const uint16_t kUnicodeBmpEncodingId = 1; + const uint16_t kUnicodeUcs4EncodingId = 10; + const uint32_t kNoTable = UINT32_MAX; if (kHeaderSize > cmap_size) { return false; } - int numTables = readU16(cmap_data, kNumTablesOffset); + uint32_t numTables = readU16(cmap_data, kNumTablesOffset); if (kHeaderSize + numTables * kTableSize > cmap_size) { return false; } - int bestTable = -1; - for (int i = 0; i < numTables; i++) { + uint32_t bestTable = kNoTable; + for (uint32_t i = 0; i < numTables; i++) { uint16_t platformId = readU16(cmap_data, kHeaderSize + i * kTableSize + kPlatformIdOffset); uint16_t encodingId = readU16(cmap_data, kHeaderSize + i * kTableSize + kEncodingIdOffset); if (platformId == kMicrosoftPlatformId && encodingId == kUnicodeUcs4EncodingId) { @@ -152,11 +161,11 @@ bool CmapCoverage::getCoverage(SparseBitSet& coverage, const uint8_t* cmap_data, #ifdef PRINTF_DEBUG printf("best table = %d\n", bestTable); #endif - if (bestTable < 0) { + if (bestTable == kNoTable) { return false; } uint32_t offset = readU32(cmap_data, kHeaderSize + bestTable * kTableSize + kOffsetOffset); - if (offset + 2 > cmap_size) { + if (offset > cmap_size - 2) { return false; } uint16_t format = readU16(cmap_data, offset); diff --git a/engine/src/flutter/libs/minikin/SparseBitSet.cpp b/engine/src/flutter/libs/minikin/SparseBitSet.cpp index 7acb7ba345..2265ff2bb2 100644 --- a/engine/src/flutter/libs/minikin/SparseBitSet.cpp +++ b/engine/src/flutter/libs/minikin/SparseBitSet.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -71,6 +72,7 @@ void SparseBitSet::initFromRanges(const uint32_t* ranges, size_t nRanges) { for (size_t i = 0; i < nRanges; i++) { uint32_t start = ranges[i * 2]; uint32_t end = ranges[i * 2 + 1]; + LOG_ALWAYS_FATAL_IF(end < start); // make sure range size is nonnegative uint32_t startPage = start >> kLogValuesPerPage; uint32_t endPage = (end - 1) >> kLogValuesPerPage; if (startPage >= nonzeroPageEnd) { From c4e24421ecb8b4532bde4e759625107367cd60e3 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 29 Oct 2015 14:06:07 -0700 Subject: [PATCH 5/5] Tailor grapheme boundaries so sequence emoji are one grapheme - DO NOT MERGE Make it so it's not possible to position the cursor inside an emoji formed by a sequence including zero-width joiners. Bug: 25368653 Change-Id: I67ec0874cd1505f3c82ab91492ffc3d39a52fae6 --- .../flutter/libs/minikin/GraphemeBreak.cpp | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/engine/src/flutter/libs/minikin/GraphemeBreak.cpp b/engine/src/flutter/libs/minikin/GraphemeBreak.cpp index f8f386c0de..56d5b238d3 100644 --- a/engine/src/flutter/libs/minikin/GraphemeBreak.cpp +++ b/engine/src/flutter/libs/minikin/GraphemeBreak.cpp @@ -22,6 +22,19 @@ namespace android { +// Returns true if the character appears before or after zwj in a zwj emoji sequence. See +// http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html +bool isZwjEmoji(uint32_t c) { + return (c == 0x2764 // HEAVY BLACK HEART + || c == 0x1F468 // MAN + || c == 0x1F469 // WOMAN + || c == 0x1F48B // KISS MARK + || c == 0x1F466 // BOY + || c == 0x1F467 // GIRL + || c == 0x1F441 // EYE + || c == 0x1F5E8); // LEFT SPEECH BUBBLE +} + bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, size_t offset) { // This implementation closely follows Unicode Standard Annex #29 on @@ -93,6 +106,19 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { return false; } + // Tailoring: make emoji sequences with ZWJ a single grapheme cluster + if (c1 == 0x200D && isZwjEmoji(c2) && offset_back > start) { + // look at character before ZWJ to see that both can participate in an emoji zwj sequence + uint32_t c0 = 0; + U16_PREV(buf, start, offset_back, c0); + if (c0 == 0xFE0F && offset_back > start) { + // skip over emoji variation selector + U16_PREV(buf, start, offset_back, c0); + } + if (isZwjEmoji(c0)) { + return false; + } + } // Rule GB10, Any / Any return true; }