diff --git a/engine/src/flutter/include/minikin/LineBreaker.h b/engine/src/flutter/include/minikin/LineBreaker.h index e031fb3182..e28f11da44 100644 --- a/engine/src/flutter/include/minikin/LineBreaker.h +++ b/engine/src/flutter/include/minikin/LineBreaker.h @@ -27,6 +27,7 @@ #include #include #include "minikin/Hyphenator.h" +#include "minikin/WordBreaker.h" namespace android { @@ -102,11 +103,6 @@ class LineBreaker { public: const static int kTab_Shift = 29; // keep synchronized with TAB_MASK in StaticLayout.java - ~LineBreaker() { - utext_close(&mUText); - delete mBreakIterator; - } - // Note: Locale persists across multiple invocations (it is not cleaned up by finish()), // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always // be set on the first invocation, but callers are encouraged not to call again unless @@ -214,8 +210,7 @@ class LineBreaker { void finishBreaksOptimal(); - icu::BreakIterator* mBreakIterator = nullptr; - UText mUText = UTEXT_INITIALIZER; + WordBreaker mWordBreaker; std::vectormTextBuf; std::vectormCharWidths; diff --git a/engine/src/flutter/include/minikin/WordBreaker.h b/engine/src/flutter/include/minikin/WordBreaker.h new file mode 100644 index 0000000000..22275bde84 --- /dev/null +++ b/engine/src/flutter/include/minikin/WordBreaker.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A wrapper around ICU's line break iterator, that gives customized line + * break opportunities, as well as identifying words for the purpose of + * hyphenation. + */ + +#ifndef MINIKIN_WORD_BREAKER_H +#define MINIKIN_WORD_BREAKER_H + +#include "unicode/brkiter.h" +#include + +namespace android { + +class WordBreaker { +public: + ~WordBreaker() { + finish(); + } + + void setLocale(const icu::Locale& locale); + + void setText(const uint16_t* data, size_t size); + + // Advance iterator to next word break. Return offset, or -1 if EOT + ssize_t next(); + + // Current offset of iterator, equal to 0 at BOT or last return from next() + ssize_t current() const; + + // After calling next(), wordStart() and wordEnd() are offsets defining the previous + // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation. + ssize_t wordStart() const; + + ssize_t wordEnd() const; + + void finish(); + +private: + std::unique_ptr mBreakIterator; + UText mUText = UTEXT_INITIALIZER; + const uint16_t* mText = nullptr; + size_t mTextSize; + ssize_t mLast; + ssize_t mCurrent; + bool mIteratorWasReset; +}; + +} // namespace + +#endif // MINIKIN_WORD_BREAKER_H diff --git a/engine/src/flutter/libs/minikin/Android.mk b/engine/src/flutter/libs/minikin/Android.mk index 1fd0e6e8a0..7cd6f6883c 100644 --- a/engine/src/flutter/libs/minikin/Android.mk +++ b/engine/src/flutter/libs/minikin/Android.mk @@ -33,7 +33,8 @@ minikin_src_files := \ MinikinInternal.cpp \ MinikinRefCounted.cpp \ MinikinFontFreeType.cpp \ - SparseBitSet.cpp + SparseBitSet.cpp \ + WordBreaker.cpp minikin_c_includes := \ external/harfbuzz_ng/src \ diff --git a/engine/src/flutter/libs/minikin/LineBreaker.cpp b/engine/src/flutter/libs/minikin/LineBreaker.cpp index a2507e0ba8..214f19599d 100644 --- a/engine/src/flutter/libs/minikin/LineBreaker.cpp +++ b/engine/src/flutter/libs/minikin/LineBreaker.cpp @@ -29,7 +29,6 @@ using std::vector; namespace android { const int CHAR_TAB = 0x0009; -const uint16_t CHAR_SOFT_HYPHEN = 0x00AD; // Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these // constants are larger than any reasonable actual width score. @@ -55,23 +54,16 @@ const size_t LONGEST_HYPHENATED_WORD = 45; const size_t MAX_TEXT_BUF_RETAIN = 32678; void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) { - delete mBreakIterator; - UErrorCode status = U_ZERO_ERROR; - mBreakIterator = icu::BreakIterator::createLineInstance(locale, status); - // TODO: check status + mWordBreaker.setLocale(locale); - // TODO: load actual resource dependent on locale; letting Minikin do it is a hack mHyphenator = hyphenator; } void LineBreaker::setText() { - UErrorCode status = U_ZERO_ERROR; - utext_openUChars(&mUText, mTextBuf.data(), mTextBuf.size(), &status); - mBreakIterator->setText(&mUText, status); - mBreakIterator->first(); + mWordBreaker.setText(mTextBuf.data(), mTextBuf.size()); // handle initial break here because addStyleRun may never be called - mBreakIterator->next(); + mWordBreaker.next(); mCandidates.clear(); Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0}; mCandidates.push_back(cand); @@ -151,8 +143,8 @@ float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typefa mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER); } - size_t current = (size_t)mBreakIterator->current(); - size_t wordEnd = start; + size_t current = (size_t)mWordBreaker.current(); + size_t afterWord = start; size_t lastBreak = start; ParaWidth lastBreakWidth = mWidth; ParaWidth postBreak = mWidth; @@ -170,58 +162,56 @@ float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typefa mWidth += mCharWidths[i]; if (!isLineEndSpace(c)) { postBreak = mWidth; - wordEnd = i + 1; + afterWord = i + 1; } } if (i + 1 == current) { - // Override ICU's treatment of soft hyphen as a break opportunity, because we want it - // to be a hyphen break, with penalty and drawing behavior. - if (c != CHAR_SOFT_HYPHEN) { - // TODO: Add a new type of HyphenEdit for breaks whose hyphen already exists, so - // we can pass the whole word down to Hyphenator like the soft hyphen case. - bool wordEndsInHyphen = isLineBreakingHyphen(c); - if (paint != nullptr && mHyphenator != nullptr && - mHyphenationFrequency != kHyphenationFrequency_None && - !wordEndsInHyphen && !temporarilySkipHyphenation && - wordEnd > lastBreak && wordEnd - lastBreak <= LONGEST_HYPHENATED_WORD) { - mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[lastBreak], wordEnd - lastBreak); - #if VERBOSE_DEBUG - std::string hyphenatedString; - for (size_t j = lastBreak; j < wordEnd; j++) { - if (mHyphBuf[j - lastBreak]) hyphenatedString.push_back('-'); - // Note: only works with ASCII, should do UTF-8 conversion here - hyphenatedString.push_back(buffer()[j]); - } - ALOGD("hyphenated string: %s", hyphenatedString.c_str()); - #endif + // TODO: Add a new type of HyphenEdit for breaks whose hyphen already exists, so + // we can pass the whole word down to Hyphenator like the soft hyphen case. + bool wordEndsInHyphen = isLineBreakingHyphen(c); + size_t wordStart = mWordBreaker.wordStart(); + size_t wordEnd = mWordBreaker.wordEnd(); + if (paint != nullptr && mHyphenator != nullptr && + mHyphenationFrequency != kHyphenationFrequency_None && + !wordEndsInHyphen && !temporarilySkipHyphenation && + wordEnd > wordStart && wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) { + mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[wordStart], wordEnd - wordStart); +#if VERBOSE_DEBUG + std::string hyphenatedString; + for (size_t j = wordStart; j < wordEnd; j++) { + if (mHyphBuf[j - wordStart]) hyphenatedString.push_back('-'); + // Note: only works with ASCII, should do UTF-8 conversion here + hyphenatedString.push_back(buffer()[j]); + } + ALOGD("hyphenated string: %s", hyphenatedString.c_str()); +#endif - // measure hyphenated substrings - for (size_t j = lastBreak; j < wordEnd; j++) { - uint8_t hyph = mHyphBuf[j - lastBreak]; - if (hyph) { - paint->hyphenEdit = hyph; - layout.doLayout(mTextBuf.data(), lastBreak, j - lastBreak, - mTextBuf.size(), bidiFlags, style, *paint); - ParaWidth hyphPostBreak = lastBreakWidth + layout.getAdvance(); - paint->hyphenEdit = 0; - layout.doLayout(mTextBuf.data(), j, wordEnd - j, - mTextBuf.size(), bidiFlags, style, *paint); - ParaWidth hyphPreBreak = postBreak - layout.getAdvance(); - addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph); - } + // measure hyphenated substrings + for (size_t j = wordStart; j < wordEnd; j++) { + uint8_t hyph = mHyphBuf[j - wordStart]; + if (hyph) { + paint->hyphenEdit = hyph; + layout.doLayout(mTextBuf.data(), lastBreak, j - lastBreak, + mTextBuf.size(), bidiFlags, style, *paint); + ParaWidth hyphPostBreak = lastBreakWidth + layout.getAdvance(); + paint->hyphenEdit = 0; + layout.doLayout(mTextBuf.data(), j, afterWord - j, + mTextBuf.size(), bidiFlags, style, *paint); + ParaWidth hyphPreBreak = postBreak - layout.getAdvance(); + addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph); } } - // Skip hyphenating the next word if and only if the present word ends in a hyphen - temporarilySkipHyphenation = wordEndsInHyphen; - - // Skip break for zero-width characters inside replacement span - if (paint != nullptr || current == end || mCharWidths[current] > 0) { - addWordBreak(current, mWidth, postBreak, 0.0, 0); - } - lastBreak = current; - lastBreakWidth = mWidth; } - current = (size_t)mBreakIterator->next(); + // Skip hyphenating the next word if and only if the present word ends in a hyphen + temporarilySkipHyphenation = wordEndsInHyphen; + + // Skip break for zero-width characters inside replacement span + if (paint != nullptr || current == end || mCharWidths[current] > 0) { + addWordBreak(current, mWidth, postBreak, 0.0, 0); + } + lastBreak = current; + lastBreakWidth = mWidth; + current = (size_t)mWordBreaker.next(); } } @@ -425,6 +415,7 @@ size_t LineBreaker::computeBreaks() { } void LineBreaker::finish() { + mWordBreaker.finish(); mWidth = 0; mCandidates.clear(); mBreaks.clear(); diff --git a/engine/src/flutter/libs/minikin/WordBreaker.cpp b/engine/src/flutter/libs/minikin/WordBreaker.cpp new file mode 100644 index 0000000000..b422a62af8 --- /dev/null +++ b/engine/src/flutter/libs/minikin/WordBreaker.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define LOG_TAG "Minikin" +#include + +#include "minikin/WordBreaker.h" + +#include +#include + +namespace android { + +const uint32_t CHAR_SOFT_HYPHEN = 0x00AD; + +void WordBreaker::setLocale(const icu::Locale& locale) { + UErrorCode status = U_ZERO_ERROR; + mBreakIterator.reset(icu::BreakIterator::createLineInstance(locale, status)); + // TODO: handle failure status + if (mText != nullptr) { + mBreakIterator->setText(&mUText, status); + } + mIteratorWasReset = true; +} + +void WordBreaker::setText(const uint16_t* data, size_t size) { + mText = data; + mTextSize = size; + mIteratorWasReset = false; + mLast = 0; + mCurrent = 0; + UErrorCode status = U_ZERO_ERROR; + utext_openUChars(&mUText, data, size, &status); + mBreakIterator->setText(&mUText, status); + mBreakIterator->first(); +} + +ssize_t WordBreaker::current() const { + return mCurrent; +} + +ssize_t WordBreaker::next() { + int32_t result; + mLast = mCurrent; + do { + if (mIteratorWasReset) { + result = mBreakIterator->following(mCurrent); + mIteratorWasReset = false; + } else { + result = mBreakIterator->next(); + } + } while (result != icu::BreakIterator::DONE && (size_t)result != mTextSize + && mText[result - 1] == CHAR_SOFT_HYPHEN); + mCurrent = (ssize_t)result; + return mCurrent; +} + +ssize_t WordBreaker::wordStart() const { + ssize_t result = mLast; + while (result < mCurrent) { + UChar32 c; + ssize_t ix = result; + U16_NEXT(mText, ix, mCurrent, c); + int32_t lb = u_getIntPropertyValue(c, UCHAR_LINE_BREAK); + // strip leading punctuation, defined as OP and QU line breaking classes, + // see UAX #14 + if (!(lb == U_LB_OPEN_PUNCTUATION || lb == U_LB_QUOTATION)) { + break; + } + result = ix; + } + return result; +} + +ssize_t WordBreaker::wordEnd() const { + ssize_t result = mCurrent; + while (result > mLast) { + UChar32 c; + ssize_t ix = result; + U16_PREV(mText, mLast, ix, c); + int32_t gc_mask = U_GET_GC_MASK(c); + // strip trailing space and punctuation + if ((gc_mask & (U_GC_ZS_MASK | U_GC_P_MASK)) == 0) { + break; + } + result = ix; + } + return result; +} + +void WordBreaker::finish() { + mText = nullptr; + // Note: calling utext_close multiply is safe + utext_close(&mUText); +} + +} // namespace android diff --git a/engine/src/flutter/tests/Android.mk b/engine/src/flutter/tests/Android.mk index d2aa5fd107..e6586d7178 100644 --- a/engine/src/flutter/tests/Android.mk +++ b/engine/src/flutter/tests/Android.mk @@ -79,7 +79,8 @@ LOCAL_SRC_FILES += \ MinikinFontForTest.cpp \ GraphemeBreakTests.cpp \ LayoutUtilsTest.cpp \ - UnicodeUtils.cpp + UnicodeUtils.cpp \ + WordBreakerTests.cpp LOCAL_C_INCLUDES := \ $(LOCAL_PATH)/../libs/minikin/ \ diff --git a/engine/src/flutter/tests/WordBreakerTests.cpp b/engine/src/flutter/tests/WordBreakerTests.cpp new file mode 100644 index 0000000000..d389d58cd1 --- /dev/null +++ b/engine/src/flutter/tests/WordBreakerTests.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "ICUTestBase.h" +#include "UnicodeUtils.h" +#include +#include +#include +#include + +#define LOG_TAG "Minikin" +#include + +#ifndef NELEM +#define NELEM(x) ((sizeof(x) / sizeof((x)[0]))) +#endif + +using namespace android; + +typedef ICUTestBase WordBreakerTest; + +TEST_F(WordBreakerTest, basic) { + uint16_t buf[] = {'h', 'e', 'l', 'l' ,'o', ' ', 'w', 'o', 'r', 'l', 'd'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getEnglish()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(6, breaker.next()); // after "hello " + EXPECT_EQ(0, breaker.wordStart()); // "hello" + EXPECT_EQ(5, breaker.wordEnd()); + EXPECT_EQ(6, breaker.current()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(6, breaker.wordStart()); // "world" + EXPECT_EQ(11, breaker.wordEnd()); + EXPECT_EQ(11, breaker.current()); +} + +TEST_F(WordBreakerTest, softHyphen) { + uint16_t buf[] = {'h', 'e', 'l', 0x00AD, 'l' ,'o', ' ', 'w', 'o', 'r', 'l', 'd'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getEnglish()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(7, breaker.next()); // after "hel{SOFT HYPHEN}lo " + EXPECT_EQ(0, breaker.wordStart()); // "hel{SOFT HYPHEN}lo" + EXPECT_EQ(6, breaker.wordEnd()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(7, breaker.wordStart()); // "world" + EXPECT_EQ(12, breaker.wordEnd()); +} + +TEST_F(WordBreakerTest, punct) { + uint16_t buf[] = {0x00A1, 0x00A1, 'h', 'e', 'l', 'l' ,'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', + '!', '!'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getEnglish()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(9, breaker.next()); // after "¡¡hello, " + EXPECT_EQ(2, breaker.wordStart()); // "hello" + EXPECT_EQ(7, breaker.wordEnd()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(9, breaker.wordStart()); // "world" + EXPECT_EQ(14, breaker.wordEnd()); +}