Language and variant selection

This patch adds a "lang" pseudo-CSS property and uses it both to select
an appropriate font and control the "locl" OpenType feature to get the
most appropriate rendering for the langauge and script.  In addition,
the "-minikin-variant" property selects between "compact" and "elegant"
variants of a font, as the former is needed for vertically cramped
spaces.

This is part of the fix for bug 15179652 "Japanese font isn't shown on
LMP".

Change-Id: I7fab23c12d4c797a6d339a16e497b79a3afe9df1
This commit is contained in:
Raph Levien
2014-05-27 08:05:51 -07:00
parent e1a0422aae
commit 7d4090fbe9
7 changed files with 214 additions and 22 deletions

View File

@@ -25,26 +25,31 @@ namespace android {
enum CssTag { enum CssTag {
unknown, unknown,
fontSize, fontSize,
fontWeight,
fontStyle, fontStyle,
minikinHinting, fontWeight,
cssLang,
minikinBidi, minikinBidi,
minikinHinting,
minikinVariant,
}; };
const std::string cssTagNames[] = { const std::string cssTagNames[] = {
"unknown", "unknown",
"font-size", "font-size",
"font-weight",
"font-style", "font-style",
"-minikin-hinting", "font-weight",
"lang",
"-minikin-bidi", "-minikin-bidi",
"-minikin-hinting",
"-minikin-variant",
}; };
class CssValue { class CssValue {
public: public:
enum Type { enum Type {
UNKNOWN, UNKNOWN,
FLOAT FLOAT,
STRING
}; };
enum Units { enum Units {
SCALAR, SCALAR,
@@ -58,14 +63,20 @@ public:
Type getType() const { return mType; } Type getType() const { return mType; }
double getFloatValue() const { return floatValue; } double getFloatValue() const { return floatValue; }
int getIntValue() const { return floatValue; } int getIntValue() const { return floatValue; }
std::string getStringValue() const { return stringValue; }
std::string toString(CssTag tag) const; std::string toString(CssTag tag) const;
void setFloatValue(double v) { void setFloatValue(double v) {
mType = FLOAT; mType = FLOAT;
floatValue = v; floatValue = v;
} }
void setStringValue(const std::string& v) {
mType = STRING;
stringValue = v;
}
private: private:
Type mType; Type mType;
double floatValue; double floatValue;
std::string stringValue;
Units mUnits; Units mUnits;
}; };
@@ -85,4 +96,4 @@ private:
} // namespace android } // namespace android
#endif // MINIKIN_CSS_PARSE_H #endif // MINIKIN_CSS_PARSE_H

View File

@@ -32,7 +32,8 @@ public:
~FontCollection(); ~FontCollection();
const FontFamily* getFamilyForChar(uint32_t ch) const; const FontFamily* getFamilyForChar(uint32_t ch, FontLanguage lang, int variant) const;
class Run { class Run {
public: public:
// Do copy constructor, assignment, destructor so it can be used in vectors // Do copy constructor, assignment, destructor so it can be used in vectors

View File

@@ -25,6 +25,36 @@
namespace android { namespace android {
class MinikinFont;
// FontLanguage is a compact representation of a bcp-47 language tag. It
// does not capture all possible information, only what directly affects
// font rendering.
class FontLanguage {
friend class FontStyle;
public:
FontLanguage() : mBits(0) { }
// Parse from string
FontLanguage(const char* buf, size_t size);
bool operator==(const FontLanguage other) const { return mBits == other.mBits; }
// 0 = no match, 1 = language matches, 2 = language and script match
int match(const FontLanguage other) const;
private:
explicit FontLanguage(uint32_t bits) : mBits(bits) { }
uint32_t bits() const { return mBits; }
static const uint32_t kBaseLangMask = 0xffff;
static const uint32_t kScriptMask = (1 << 18) - (1 << 16);
static const uint32_t kHansFlag = 1 << 16;
static const uint32_t kHantFlag = 1 << 17;
uint32_t mBits;
};
// FontStyle represents all style information needed to select an actual font // FontStyle represents all style information needed to select an actual font
// from a collection. The implementation is packed into a single 32-bit word // from a collection. The implementation is packed into a single 32-bit word
// so it can be efficiently copied, embedded in other objects, etc. // so it can be efficiently copied, embedded in other objects, etc.
@@ -33,24 +63,44 @@ public:
FontStyle(int weight = 4, bool italic = false) { FontStyle(int weight = 4, bool italic = false) {
bits = (weight & kWeightMask) | (italic ? kItalicMask : 0); bits = (weight & kWeightMask) | (italic ? kItalicMask : 0);
} }
FontStyle(FontLanguage lang, int variant = 0, int weight = 4, bool italic = false) {
bits = (weight & kWeightMask) | (italic ? kItalicMask : 0)
| (variant << kVariantShift) | (lang.bits() << kLangShift);
}
int getWeight() const { return bits & kWeightMask; } int getWeight() const { return bits & kWeightMask; }
bool getItalic() const { return (bits & kItalicMask) != 0; } bool getItalic() const { return (bits & kItalicMask) != 0; }
int getVariant() const { return (bits >> kVariantShift) & kVariantMask; }
FontLanguage getLanguage() const { return FontLanguage(bits >> kLangShift); }
bool operator==(const FontStyle other) const { return bits == other.bits; } bool operator==(const FontStyle other) const { return bits == other.bits; }
// TODO: language, variant
hash_t hash() const { return bits; } hash_t hash() const { return bits; }
private: private:
static const int kWeightMask = 0xf; static const uint32_t kWeightMask = (1 << 4) - 1;
static const int kItalicMask = 16; static const uint32_t kItalicMask = 1 << 4;
static const int kVariantShift = 5;
static const uint32_t kVariantMask = (1 << 2) - 1;
static const int kLangShift = 7;
uint32_t bits; uint32_t bits;
}; };
enum FontVariant {
VARIANT_DEFAULT = 0,
VARIANT_COMPACT = 1,
VARIANT_ELEGANT = 2,
};
inline hash_t hash_type(const FontStyle &style) { inline hash_t hash_type(const FontStyle &style) {
return style.hash(); return style.hash();
} }
class FontFamily : public MinikinRefCounted { class FontFamily : public MinikinRefCounted {
public: public:
FontFamily() { }
FontFamily(FontLanguage lang, int variant) : mLang(lang), mVariant(variant) {
}
~FontFamily(); ~FontFamily();
// Add font to family, extracting style information from the font // Add font to family, extracting style information from the font
@@ -59,6 +109,9 @@ public:
void addFont(MinikinFont* typeface, FontStyle style); void addFont(MinikinFont* typeface, FontStyle style);
MinikinFont* getClosestMatch(FontStyle style) const; MinikinFont* getClosestMatch(FontStyle style) const;
FontLanguage lang() const { return mLang; }
int variant() const { return mVariant; }
// API's for enumerating the fonts in a family. These don't guarantee any particular order // API's for enumerating the fonts in a family. These don't guarantee any particular order
size_t getNumFonts() const; size_t getNumFonts() const;
MinikinFont* getFont(size_t index) const; MinikinFont* getFont(size_t index) const;
@@ -73,6 +126,8 @@ private:
MinikinFont* typeface; MinikinFont* typeface;
FontStyle style; FontStyle style;
}; };
FontLanguage mLang;
int mVariant;
std::vector<Font> mFonts; std::vector<Font> mFonts;
}; };

View File

@@ -20,6 +20,7 @@
#include <cstdio> // for sprintf - for debugging #include <cstdio> // for sprintf - for debugging
#include <minikin/CssParse.h> #include <minikin/CssParse.h>
#include <minikin/FontFamily.h>
using std::map; using std::map;
using std::pair; using std::pair;
@@ -27,27 +28,58 @@ using std::string;
namespace android { namespace android {
bool strEqC(const string str, size_t off, size_t len, const char* str2) { static bool strEqC(const string str, size_t off, size_t len, const char* str2) {
if (len != strlen(str2)) return false; if (len != strlen(str2)) return false;
return !memcmp(str.data() + off, str2, len); return !memcmp(str.data() + off, str2, len);
} }
CssTag parseTag(const string str, size_t off, size_t len) { static CssTag parseTag(const string str, size_t off, size_t len) {
if (len == 0) return unknown; if (len == 0) return unknown;
char c = str[off]; char c = str[off];
if (c == 'f') { if (c == 'f') {
if (strEqC(str, off, len, "font-size")) return fontSize; if (strEqC(str, off, len, "font-size")) return fontSize;
if (strEqC(str, off, len, "font-weight")) return fontWeight; if (strEqC(str, off, len, "font-weight")) return fontWeight;
if (strEqC(str, off, len, "font-style")) return fontStyle; if (strEqC(str, off, len, "font-style")) return fontStyle;
} else if (c == 'l') {
if (strEqC(str, off, len, "lang")) return cssLang;
} else if (c == '-') { } else if (c == '-') {
if (strEqC(str, off, len, "-minikin-hinting")) return minikinHinting;
if (strEqC(str, off, len, "-minikin-bidi")) return minikinBidi; if (strEqC(str, off, len, "-minikin-bidi")) return minikinBidi;
if (strEqC(str, off, len, "-minikin-hinting")) return minikinHinting;
if (strEqC(str, off, len, "-minikin-variant")) return minikinVariant;
} }
return unknown; return unknown;
} }
bool parseValue(const string str, size_t *off, size_t len, CssTag tag, static bool parseStringValue(const string& str, size_t* off, size_t len, CssTag tag, CssValue* v) {
CssValue* v) { const char* data = str.data();
size_t beg = *off;
if (beg == len) return false;
char first = data[beg];
bool quoted = false;
if (first == '\'' || first == '\"') {
quoted = true;
beg++;
}
size_t end;
for (end = beg; end < len; end++) {
char c = data[end];
if (quoted && c == first) {
v->setStringValue(std::string(str, beg, end - beg));
*off = end + 1;
return true;
} else if (!quoted && (c == ';' || c == ' ')) {
break;
} // TODO: deal with backslash escape, but only important for real strings
}
v->setStringValue(std::string(str, beg, end - beg));
*off = end;
return true;
}
static bool parseValue(const string& str, size_t* off, size_t len, CssTag tag, CssValue* v) {
if (tag == cssLang) {
return parseStringValue(str, off, len, tag, v);
}
const char* data = str.data(); const char* data = str.data();
char* endptr; char* endptr;
double fv = strtod(data + *off, &endptr); double fv = strtod(data + *off, &endptr);
@@ -78,6 +110,12 @@ bool parseValue(const string str, size_t *off, size_t len, CssTag tag,
} else { } else {
return false; return false;
} }
} else if (tag == minikinVariant) {
if (strEqC(str, *off, taglen, "compact")) {
fv = VARIANT_COMPACT;
} else if (strEqC(str, *off, taglen, "elegant")) {
fv = VARIANT_ELEGANT;
}
} else { } else {
return false; return false;
} }
@@ -91,10 +129,15 @@ string CssValue::toString(CssTag tag) const {
if (mType == FLOAT) { if (mType == FLOAT) {
if (tag == fontStyle) { if (tag == fontStyle) {
return floatValue ? "italic" : "normal"; return floatValue ? "italic" : "normal";
} else if (tag == minikinVariant) {
if (floatValue == VARIANT_COMPACT) return "compact";
if (floatValue == VARIANT_ELEGANT) return "elegant";
} }
char buf[64]; char buf[64];
sprintf(buf, "%g", floatValue); sprintf(buf, "%g", floatValue);
return string(buf); return string(buf);
} else if (mType == STRING) {
return stringValue; // should probably quote
} }
return ""; return "";
} }

View File

@@ -108,7 +108,19 @@ FontCollection::~FontCollection() {
} }
} }
const FontFamily* FontCollection::getFamilyForChar(uint32_t ch) const { // Implement heuristic for choosing best-match font. Here are the rules:
// 1. If first font in the collection has the character, it wins.
// 2. If a font matches both language and script, it gets a score of 4.
// 3. If a font matches just language, it gets a score of 2.
// 4. Matching the "compact" or "elegant" variant adds one to the score.
// 5. Highest score wins, with ties resolved to the first font.
// Note that we may want to make the selection more dependent on
// context, so for example a sequence of Devanagari, ZWJ, Devanagari
// would get itemized as one run, even though by the rules the ZWJ
// would go to the Latin font.
const FontFamily* FontCollection::getFamilyForChar(uint32_t ch, FontLanguage lang,
int variant) const {
if (ch >= mMaxChar) { if (ch >= mMaxChar) {
return NULL; return NULL;
} }
@@ -116,17 +128,33 @@ const FontFamily* FontCollection::getFamilyForChar(uint32_t ch) const {
#ifdef VERBOSE_DEBUG #ifdef VERBOSE_DEBUG
ALOGD("querying range %d:%d\n", range.start, range.end); ALOGD("querying range %d:%d\n", range.start, range.end);
#endif #endif
FontFamily* bestFamily = NULL;
int bestScore = -1;
for (size_t i = range.start; i < range.end; i++) { for (size_t i = range.start; i < range.end; i++) {
const FontInstance* instance = mInstanceVec[i]; const FontInstance* instance = mInstanceVec[i];
if (instance->mCoverage->get(ch)) { if (instance->mCoverage->get(ch)) {
return instance->mFamily; FontFamily* family = instance->mFamily;
// First font family in collection always matches
if (mInstances[0].mFamily == family) {
return family;
}
int score = lang.match(family->lang()) * 2;
if (variant != 0 && variant == family->variant()) {
score++;
}
if (score > bestScore) {
bestScore = score;
bestFamily = family;
}
} }
} }
return NULL; return bestFamily;
} }
void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style, void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style,
vector<Run>* result) const { vector<Run>* result) const {
FontLanguage lang = style.getLanguage();
int variant = style.getVariant();
const FontFamily* lastFamily = NULL; const FontFamily* lastFamily = NULL;
Run* run = NULL; Run* run = NULL;
int nShorts; int nShorts;
@@ -140,7 +168,7 @@ void FontCollection::itemize(const uint16_t *string, size_t string_size, FontSty
nShorts = 2; nShorts = 2;
} }
} }
const FontFamily* family = getFamilyForChar(ch); const FontFamily* family = getFamilyForChar(ch, lang, variant);
if (i == 0 || family != lastFamily) { if (i == 0 || family != lastFamily) {
Run dummy; Run dummy;
result->push_back(dummy); result->push_back(dummy);
@@ -149,6 +177,7 @@ void FontCollection::itemize(const uint16_t *string, size_t string_size, FontSty
run->font = NULL; // maybe we should do something different here run->font = NULL; // maybe we should do something different here
} else { } else {
run->font = family->getClosestMatch(style); run->font = family->getClosestMatch(style);
// TODO: simplify refcounting (FontCollection lifetime dominates)
run->font->RefLocked(); run->font->RefLocked();
} }
lastFamily = family; lastFamily = family;

View File

@@ -30,6 +30,47 @@ using std::vector;
namespace android { namespace android {
// Parse bcp-47 language identifier into internal structure
FontLanguage::FontLanguage(const char* buf, size_t size) {
uint32_t bits = 0;
size_t i;
for (i = 0; i < size && buf[i] != '-' && buf[i] != '_'; i++) {
uint16_t c = buf[i];
if (c == '-' || c == '_') break;
}
if (i == 2) {
bits = (uint8_t(buf[0]) << 8) | uint8_t(buf[1]);
}
size_t next;
for (i++; i < size; i = next + 1) {
for (next = i; next < size; next++) {
uint16_t c = buf[next];
if (c == '-' || c == '_') break;
}
if (next - i == 4 && buf[i] == 'H' && buf[i+1] == 'a' && buf[i+2] == 'n') {
if (buf[i+3] == 's') {
bits |= kHansFlag;
} else if (buf[i+3] == 't') {
bits |= kHantFlag;
}
}
// TODO: this might be a good place to infer script from country (zh_TW -> Hant),
// but perhaps it's up to the client to do that, before passing a string.
}
mBits = bits;
}
int FontLanguage::match(const FontLanguage other) const {
int result = 0;
if ((mBits & kBaseLangMask) == (other.mBits & kBaseLangMask)) {
result++;
if ((mBits & kScriptMask) != 0 && (mBits & kScriptMask) == (other.mBits & kScriptMask)) {
result++;
}
}
return result;
}
FontFamily::~FontFamily() { FontFamily::~FontFamily() {
for (size_t i = 0; i < mFonts.size(); i++) { for (size_t i = 0; i < mFonts.size(); i++) {
mFonts[i].typeface->UnrefLocked(); mFonts[i].typeface->UnrefLocked();

View File

@@ -344,7 +344,16 @@ static FontStyle styleFromCss(const CssProperties &props) {
if (props.hasTag(fontStyle)) { if (props.hasTag(fontStyle)) {
italic = props.value(fontStyle).getIntValue() != 0; italic = props.value(fontStyle).getIntValue() != 0;
} }
return FontStyle(weight, italic); FontLanguage lang;
if (props.hasTag(cssLang)) {
string langStr = props.value(cssLang).getStringValue();
lang = FontLanguage(langStr.c_str(), langStr.size());
}
int variant = 0;
if (props.hasTag(minikinVariant)) {
variant = props.value(minikinVariant).getIntValue();
}
return FontStyle(lang, variant, weight, italic);
} }
static hb_script_t codePointToScript(hb_codepoint_t codepoint) { static hb_script_t codePointToScript(hb_codepoint_t codepoint) {
@@ -486,7 +495,7 @@ static void clearHbFonts(LayoutContext* ctx) {
// TODO: API should probably take context // TODO: API should probably take context
void Layout::doLayout(const uint16_t* buf, size_t start, size_t count, size_t bufSize, void Layout::doLayout(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
const std::string& css) { const string& css) {
AutoMutex _l(gMinikinLock); AutoMutex _l(gMinikinLock);
LayoutContext ctx; LayoutContext ctx;
@@ -599,7 +608,6 @@ void Layout::doLayoutWord(const uint16_t* buf, size_t start, size_t count, size_
} }
appendLayout(value, bufStart); appendLayout(value, bufStart);
cache.mCache.put(key, value); cache.mCache.put(key, value);
} }
void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t bufSize, void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
@@ -641,6 +649,10 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t
hb_buffer_reset(buffer); hb_buffer_reset(buffer);
hb_buffer_set_script(buffer, script); hb_buffer_set_script(buffer, script);
hb_buffer_set_direction(buffer, isRtl? HB_DIRECTION_RTL : HB_DIRECTION_LTR); hb_buffer_set_direction(buffer, isRtl? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
if (ctx->props.hasTag(cssLang)) {
string lang = ctx->props.value(cssLang).getStringValue();
hb_buffer_set_language(buffer, hb_language_from_string(lang.c_str(), -1));
}
hb_buffer_add_utf16(buffer, buf, bufSize, srunstart + start, srunend - srunstart); hb_buffer_add_utf16(buffer, buf, bufSize, srunstart + start, srunend - srunstart);
hb_shape(hbFont, buffer, NULL, 0); hb_shape(hbFont, buffer, NULL, 0);
unsigned int numGlyphs; unsigned int numGlyphs;