am 7d4090fb: Language and variant selection

* commit '7d4090fbe937014d79bb996c03e763d2557d238d':
  Language and variant selection
This commit is contained in:
Raph Levien
2014-05-29 22:38:29 +00:00
committed by Android Git Automerger
7 changed files with 214 additions and 22 deletions

View File

@@ -25,26 +25,31 @@ namespace android {
enum CssTag {
unknown,
fontSize,
fontWeight,
fontStyle,
minikinHinting,
fontWeight,
cssLang,
minikinBidi,
minikinHinting,
minikinVariant,
};
const std::string cssTagNames[] = {
"unknown",
"font-size",
"font-weight",
"font-style",
"-minikin-hinting",
"font-weight",
"lang",
"-minikin-bidi",
"-minikin-hinting",
"-minikin-variant",
};
class CssValue {
public:
enum Type {
UNKNOWN,
FLOAT
FLOAT,
STRING
};
enum Units {
SCALAR,
@@ -58,14 +63,20 @@ public:
Type getType() const { return mType; }
double getFloatValue() const { return floatValue; }
int getIntValue() const { return floatValue; }
std::string getStringValue() const { return stringValue; }
std::string toString(CssTag tag) const;
void setFloatValue(double v) {
mType = FLOAT;
floatValue = v;
}
void setStringValue(const std::string& v) {
mType = STRING;
stringValue = v;
}
private:
Type mType;
double floatValue;
std::string stringValue;
Units mUnits;
};
@@ -85,4 +96,4 @@ private:
} // namespace android
#endif // MINIKIN_CSS_PARSE_H
#endif // MINIKIN_CSS_PARSE_H

View File

@@ -32,7 +32,8 @@ public:
~FontCollection();
const FontFamily* getFamilyForChar(uint32_t ch) const;
const FontFamily* getFamilyForChar(uint32_t ch, FontLanguage lang, int variant) const;
class Run {
public:
// Do copy constructor, assignment, destructor so it can be used in vectors

View File

@@ -25,6 +25,36 @@
namespace android {
class MinikinFont;
// FontLanguage is a compact representation of a bcp-47 language tag. It
// does not capture all possible information, only what directly affects
// font rendering.
class FontLanguage {
friend class FontStyle;
public:
FontLanguage() : mBits(0) { }
// Parse from string
FontLanguage(const char* buf, size_t size);
bool operator==(const FontLanguage other) const { return mBits == other.mBits; }
// 0 = no match, 1 = language matches, 2 = language and script match
int match(const FontLanguage other) const;
private:
explicit FontLanguage(uint32_t bits) : mBits(bits) { }
uint32_t bits() const { return mBits; }
static const uint32_t kBaseLangMask = 0xffff;
static const uint32_t kScriptMask = (1 << 18) - (1 << 16);
static const uint32_t kHansFlag = 1 << 16;
static const uint32_t kHantFlag = 1 << 17;
uint32_t mBits;
};
// FontStyle represents all style information needed to select an actual font
// from a collection. The implementation is packed into a single 32-bit word
// so it can be efficiently copied, embedded in other objects, etc.
@@ -33,24 +63,44 @@ public:
FontStyle(int weight = 4, bool italic = false) {
bits = (weight & kWeightMask) | (italic ? kItalicMask : 0);
}
FontStyle(FontLanguage lang, int variant = 0, int weight = 4, bool italic = false) {
bits = (weight & kWeightMask) | (italic ? kItalicMask : 0)
| (variant << kVariantShift) | (lang.bits() << kLangShift);
}
int getWeight() const { return bits & kWeightMask; }
bool getItalic() const { return (bits & kItalicMask) != 0; }
int getVariant() const { return (bits >> kVariantShift) & kVariantMask; }
FontLanguage getLanguage() const { return FontLanguage(bits >> kLangShift); }
bool operator==(const FontStyle other) const { return bits == other.bits; }
// TODO: language, variant
hash_t hash() const { return bits; }
private:
static const int kWeightMask = 0xf;
static const int kItalicMask = 16;
static const uint32_t kWeightMask = (1 << 4) - 1;
static const uint32_t kItalicMask = 1 << 4;
static const int kVariantShift = 5;
static const uint32_t kVariantMask = (1 << 2) - 1;
static const int kLangShift = 7;
uint32_t bits;
};
enum FontVariant {
VARIANT_DEFAULT = 0,
VARIANT_COMPACT = 1,
VARIANT_ELEGANT = 2,
};
inline hash_t hash_type(const FontStyle &style) {
return style.hash();
}
class FontFamily : public MinikinRefCounted {
public:
FontFamily() { }
FontFamily(FontLanguage lang, int variant) : mLang(lang), mVariant(variant) {
}
~FontFamily();
// Add font to family, extracting style information from the font
@@ -59,6 +109,9 @@ public:
void addFont(MinikinFont* typeface, FontStyle style);
MinikinFont* getClosestMatch(FontStyle style) const;
FontLanguage lang() const { return mLang; }
int variant() const { return mVariant; }
// API's for enumerating the fonts in a family. These don't guarantee any particular order
size_t getNumFonts() const;
MinikinFont* getFont(size_t index) const;
@@ -73,6 +126,8 @@ private:
MinikinFont* typeface;
FontStyle style;
};
FontLanguage mLang;
int mVariant;
std::vector<Font> mFonts;
};

View File

@@ -20,6 +20,7 @@
#include <cstdio> // for sprintf - for debugging
#include <minikin/CssParse.h>
#include <minikin/FontFamily.h>
using std::map;
using std::pair;
@@ -27,27 +28,58 @@ using std::string;
namespace android {
bool strEqC(const string str, size_t off, size_t len, const char* str2) {
static bool strEqC(const string str, size_t off, size_t len, const char* str2) {
if (len != strlen(str2)) return false;
return !memcmp(str.data() + off, str2, len);
}
CssTag parseTag(const string str, size_t off, size_t len) {
static CssTag parseTag(const string str, size_t off, size_t len) {
if (len == 0) return unknown;
char c = str[off];
if (c == 'f') {
if (strEqC(str, off, len, "font-size")) return fontSize;
if (strEqC(str, off, len, "font-weight")) return fontWeight;
if (strEqC(str, off, len, "font-style")) return fontStyle;
} else if (c == 'l') {
if (strEqC(str, off, len, "lang")) return cssLang;
} else if (c == '-') {
if (strEqC(str, off, len, "-minikin-hinting")) return minikinHinting;
if (strEqC(str, off, len, "-minikin-bidi")) return minikinBidi;
if (strEqC(str, off, len, "-minikin-hinting")) return minikinHinting;
if (strEqC(str, off, len, "-minikin-variant")) return minikinVariant;
}
return unknown;
}
bool parseValue(const string str, size_t *off, size_t len, CssTag tag,
CssValue* v) {
static bool parseStringValue(const string& str, size_t* off, size_t len, CssTag tag, CssValue* v) {
const char* data = str.data();
size_t beg = *off;
if (beg == len) return false;
char first = data[beg];
bool quoted = false;
if (first == '\'' || first == '\"') {
quoted = true;
beg++;
}
size_t end;
for (end = beg; end < len; end++) {
char c = data[end];
if (quoted && c == first) {
v->setStringValue(std::string(str, beg, end - beg));
*off = end + 1;
return true;
} else if (!quoted && (c == ';' || c == ' ')) {
break;
} // TODO: deal with backslash escape, but only important for real strings
}
v->setStringValue(std::string(str, beg, end - beg));
*off = end;
return true;
}
static bool parseValue(const string& str, size_t* off, size_t len, CssTag tag, CssValue* v) {
if (tag == cssLang) {
return parseStringValue(str, off, len, tag, v);
}
const char* data = str.data();
char* endptr;
double fv = strtod(data + *off, &endptr);
@@ -78,6 +110,12 @@ bool parseValue(const string str, size_t *off, size_t len, CssTag tag,
} else {
return false;
}
} else if (tag == minikinVariant) {
if (strEqC(str, *off, taglen, "compact")) {
fv = VARIANT_COMPACT;
} else if (strEqC(str, *off, taglen, "elegant")) {
fv = VARIANT_ELEGANT;
}
} else {
return false;
}
@@ -91,10 +129,15 @@ string CssValue::toString(CssTag tag) const {
if (mType == FLOAT) {
if (tag == fontStyle) {
return floatValue ? "italic" : "normal";
} else if (tag == minikinVariant) {
if (floatValue == VARIANT_COMPACT) return "compact";
if (floatValue == VARIANT_ELEGANT) return "elegant";
}
char buf[64];
sprintf(buf, "%g", floatValue);
return string(buf);
} else if (mType == STRING) {
return stringValue; // should probably quote
}
return "";
}

View File

@@ -108,7 +108,19 @@ FontCollection::~FontCollection() {
}
}
const FontFamily* FontCollection::getFamilyForChar(uint32_t ch) const {
// Implement heuristic for choosing best-match font. Here are the rules:
// 1. If first font in the collection has the character, it wins.
// 2. If a font matches both language and script, it gets a score of 4.
// 3. If a font matches just language, it gets a score of 2.
// 4. Matching the "compact" or "elegant" variant adds one to the score.
// 5. Highest score wins, with ties resolved to the first font.
// Note that we may want to make the selection more dependent on
// context, so for example a sequence of Devanagari, ZWJ, Devanagari
// would get itemized as one run, even though by the rules the ZWJ
// would go to the Latin font.
const FontFamily* FontCollection::getFamilyForChar(uint32_t ch, FontLanguage lang,
int variant) const {
if (ch >= mMaxChar) {
return NULL;
}
@@ -116,17 +128,33 @@ const FontFamily* FontCollection::getFamilyForChar(uint32_t ch) const {
#ifdef VERBOSE_DEBUG
ALOGD("querying range %d:%d\n", range.start, range.end);
#endif
FontFamily* bestFamily = NULL;
int bestScore = -1;
for (size_t i = range.start; i < range.end; i++) {
const FontInstance* instance = mInstanceVec[i];
if (instance->mCoverage->get(ch)) {
return instance->mFamily;
FontFamily* family = instance->mFamily;
// First font family in collection always matches
if (mInstances[0].mFamily == family) {
return family;
}
int score = lang.match(family->lang()) * 2;
if (variant != 0 && variant == family->variant()) {
score++;
}
if (score > bestScore) {
bestScore = score;
bestFamily = family;
}
}
}
return NULL;
return bestFamily;
}
void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style,
vector<Run>* result) const {
FontLanguage lang = style.getLanguage();
int variant = style.getVariant();
const FontFamily* lastFamily = NULL;
Run* run = NULL;
int nShorts;
@@ -140,7 +168,7 @@ void FontCollection::itemize(const uint16_t *string, size_t string_size, FontSty
nShorts = 2;
}
}
const FontFamily* family = getFamilyForChar(ch);
const FontFamily* family = getFamilyForChar(ch, lang, variant);
if (i == 0 || family != lastFamily) {
Run dummy;
result->push_back(dummy);
@@ -149,6 +177,7 @@ void FontCollection::itemize(const uint16_t *string, size_t string_size, FontSty
run->font = NULL; // maybe we should do something different here
} else {
run->font = family->getClosestMatch(style);
// TODO: simplify refcounting (FontCollection lifetime dominates)
run->font->RefLocked();
}
lastFamily = family;

View File

@@ -30,6 +30,47 @@ using std::vector;
namespace android {
// Parse bcp-47 language identifier into internal structure
FontLanguage::FontLanguage(const char* buf, size_t size) {
uint32_t bits = 0;
size_t i;
for (i = 0; i < size && buf[i] != '-' && buf[i] != '_'; i++) {
uint16_t c = buf[i];
if (c == '-' || c == '_') break;
}
if (i == 2) {
bits = (uint8_t(buf[0]) << 8) | uint8_t(buf[1]);
}
size_t next;
for (i++; i < size; i = next + 1) {
for (next = i; next < size; next++) {
uint16_t c = buf[next];
if (c == '-' || c == '_') break;
}
if (next - i == 4 && buf[i] == 'H' && buf[i+1] == 'a' && buf[i+2] == 'n') {
if (buf[i+3] == 's') {
bits |= kHansFlag;
} else if (buf[i+3] == 't') {
bits |= kHantFlag;
}
}
// TODO: this might be a good place to infer script from country (zh_TW -> Hant),
// but perhaps it's up to the client to do that, before passing a string.
}
mBits = bits;
}
int FontLanguage::match(const FontLanguage other) const {
int result = 0;
if ((mBits & kBaseLangMask) == (other.mBits & kBaseLangMask)) {
result++;
if ((mBits & kScriptMask) != 0 && (mBits & kScriptMask) == (other.mBits & kScriptMask)) {
result++;
}
}
return result;
}
FontFamily::~FontFamily() {
for (size_t i = 0; i < mFonts.size(); i++) {
mFonts[i].typeface->UnrefLocked();

View File

@@ -344,7 +344,16 @@ static FontStyle styleFromCss(const CssProperties &props) {
if (props.hasTag(fontStyle)) {
italic = props.value(fontStyle).getIntValue() != 0;
}
return FontStyle(weight, italic);
FontLanguage lang;
if (props.hasTag(cssLang)) {
string langStr = props.value(cssLang).getStringValue();
lang = FontLanguage(langStr.c_str(), langStr.size());
}
int variant = 0;
if (props.hasTag(minikinVariant)) {
variant = props.value(minikinVariant).getIntValue();
}
return FontStyle(lang, variant, weight, italic);
}
static hb_script_t codePointToScript(hb_codepoint_t codepoint) {
@@ -486,7 +495,7 @@ static void clearHbFonts(LayoutContext* ctx) {
// TODO: API should probably take context
void Layout::doLayout(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
const std::string& css) {
const string& css) {
AutoMutex _l(gMinikinLock);
LayoutContext ctx;
@@ -599,7 +608,6 @@ void Layout::doLayoutWord(const uint16_t* buf, size_t start, size_t count, size_
}
appendLayout(value, bufStart);
cache.mCache.put(key, value);
}
void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
@@ -641,6 +649,10 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t
hb_buffer_reset(buffer);
hb_buffer_set_script(buffer, script);
hb_buffer_set_direction(buffer, isRtl? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
if (ctx->props.hasTag(cssLang)) {
string lang = ctx->props.value(cssLang).getStringValue();
hb_buffer_set_language(buffer, hb_language_from_string(lang.c_str(), -1));
}
hb_buffer_add_utf16(buffer, buf, bufSize, srunstart + start, srunend - srunstart);
hb_shape(hbFont, buffer, NULL, 0);
unsigned int numGlyphs;