Merge commit '408d44a0834d88a9ed4060dbeb8223fcd321f5d0' into HEAD

This commit is contained in:
Bill Yi
2016-02-17 09:50:45 -08:00
4 changed files with 71 additions and 7 deletions

View File

@@ -29,11 +29,12 @@ namespace android {
// These could perhaps be optimized to use __builtin_bswap16 and friends.
static uint32_t readU16(const uint8_t* data, size_t offset) {
return data[offset] << 8 | data[offset + 1];
return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]);
}
static uint32_t readU32(const uint8_t* data, size_t offset) {
return data[offset] << 24 | data[offset + 1] << 16 | data[offset + 2] << 8 | data[offset + 3];
return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 |
((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]);
}
static void addRange(vector<uint32_t> &coverage, uint32_t start, uint32_t end) {
@@ -101,11 +102,13 @@ static bool getCoverageFormat12(vector<uint32_t>& coverage, const uint8_t* data,
const size_t kGroupSize = 12;
const size_t kStartCharCodeOffset = 0;
const size_t kEndCharCodeOffset = 4;
const size_t kMaxNGroups = 0xfffffff0 / kGroupSize; // protection against overflow
// For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits.
if (kFirstGroupOffset > size) {
return false;
}
uint32_t nGroups = readU32(data, kNGroupsOffset);
if (kFirstGroupOffset + nGroups * kGroupSize > size) {
if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) {
return false;
}
for (uint32_t i = 0; i < nGroups; i++) {

View File

@@ -167,6 +167,10 @@ static bool isStickyWhitelisted(uint32_t c) {
return false;
}
static bool isVariationSelector(uint32_t c) {
return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF);
}
void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style,
vector<Run>* result) const {
FontLanguage lang = style.getLanguage();
@@ -184,9 +188,11 @@ void FontCollection::itemize(const uint16_t *string, size_t string_size, FontSty
nShorts = 2;
}
}
// Continue using existing font as long as it has coverage and is whitelisted
// Continue using existing font as long as it has coverage and is whitelisted;
// also variation sequences continue existing run.
if (lastFamily == NULL
|| !(isStickyWhitelisted(ch) && lastFamily->getCoverage()->get(ch))) {
|| !((isStickyWhitelisted(ch) && lastFamily->getCoverage()->get(ch))
|| isVariationSelector(ch))) {
FontFamily* family = getFamilyForChar(ch, lang, variant);
if (i == 0 || family != lastFamily) {
size_t start = i;

View File

@@ -22,6 +22,19 @@
namespace android {
// Returns true if the character appears before or after zwj in a zwj emoji sequence. See
// http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html
bool isZwjEmoji(uint32_t c) {
return (c == 0x2764 // HEAVY BLACK HEART
|| c == 0x1F468 // MAN
|| c == 0x1F469 // WOMAN
|| c == 0x1F48B // KISS MARK
|| c == 0x1F466 // BOY
|| c == 0x1F467 // GIRL
|| c == 0x1F441 // EYE
|| c == 0x1F5E8); // LEFT SPEECH BUBBLE
}
bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count,
size_t offset) {
// This implementation closely follows Unicode Standard Annex #29 on
@@ -93,6 +106,19 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co
&& u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) {
return false;
}
// Tailoring: make emoji sequences with ZWJ a single grapheme cluster
if (c1 == 0x200D && isZwjEmoji(c2) && offset_back > start) {
// look at character before ZWJ to see that both can participate in an emoji zwj sequence
uint32_t c0 = 0;
U16_PREV(buf, start, offset_back, c0);
if (c0 == 0xFE0F && offset_back > start) {
// skip over emoji variation selector
U16_PREV(buf, start, offset_back, c0);
}
if (isZwjEmoji(c0)) {
return false;
}
}
// Rule GB10, Any / Any
return true;
}

View File

@@ -17,6 +17,7 @@
#define VERBOSE_DEBUG 0
#include <limits>
#include <unicode/utf16.h>
#define LOG_TAG "Minikin"
#include <cutils/log.h>
@@ -30,6 +31,7 @@ namespace android {
const int CHAR_TAB = 0x0009;
const uint16_t CHAR_SOFT_HYPHEN = 0x00AD;
const uint16_t CHAR_ZWJ = 0x200D;
// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
// constants are larger than any reasonable actual width score.
@@ -123,6 +125,32 @@ static bool isLineBreakingHyphen(uint16_t c) {
c == 0x2E40); // DOUBLE HYPHEN
}
/**
* Determine whether a line break at position i within the buffer buf is valid. This
* represents customization beyond the ICU behavior, because plain ICU provides some
* line break opportunities that we don't want.
**/
static bool isBreakValid(uint16_t codeUnit, const uint16_t* buf, size_t bufEnd, size_t i) {
if (codeUnit == CHAR_SOFT_HYPHEN) {
return false;
}
if (codeUnit == CHAR_ZWJ) {
// Possible emoji ZWJ sequence
uint32_t next_codepoint;
U16_NEXT(buf, i, bufEnd, next_codepoint);
if (next_codepoint == 0x2764 || // HEAVY BLACK HEART
next_codepoint == 0x1F466 || // BOY
next_codepoint == 0x1F467 || // GIRL
next_codepoint == 0x1F468 || // MAN
next_codepoint == 0x1F469 || // WOMAN
next_codepoint == 0x1F48B || // KISS MARK
next_codepoint == 0x1F5E8) { // LEFT SPEECH BUBBLE
return false;
}
}
return true;
}
// Ordinarily, this method measures the text in the range given. However, when paint
// is nullptr, it assumes the widths have already been calculated and stored in the
// width buffer.
@@ -175,8 +203,9 @@ float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typefa
}
if (i + 1 == current) {
// Override ICU's treatment of soft hyphen as a break opportunity, because we want it
// to be a hyphen break, with penalty and drawing behavior.
if (c != CHAR_SOFT_HYPHEN) {
// to be a hyphen break, with penalty and drawing behavior. Also, suppress line
// breaks within emoji ZWJ sequences.
if (isBreakValid(c, mTextBuf.data(), end, i + 1)) {
// TODO: Add a new type of HyphenEdit for breaks whose hyphen already exists, so
// we can pass the whole word down to Hyphenator like the soft hyphen case.
bool wordEndsInHyphen = isLineBreakingHyphen(c);