Break regional indicators at even numbered code points.
Bug: 23288449 Change-Id: If1419ff9e44e8e640616979bae88311f414b42a1
This commit is contained in:
@@ -15,6 +15,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <algorithm>
|
||||||
#include <unicode/uchar.h>
|
#include <unicode/uchar.h>
|
||||||
#include <unicode/utf16.h>
|
#include <unicode/utf16.h>
|
||||||
|
|
||||||
@@ -124,17 +125,25 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co
|
|||||||
if ((p1 == U_GCB_LVT || p1 == U_GCB_T) && p2 == U_GCB_T) {
|
if ((p1 == U_GCB_LVT || p1 == U_GCB_T) && p2 == U_GCB_T) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Rule GB8a, Regional_Indicator x Regional_Indicator
|
// Rule GB8a that looks at even-off cases.
|
||||||
//
|
//
|
||||||
// Known limitation: This is overly conservative, and returns no grapheme breaks between two
|
// sot (RI RI)* RI x RI
|
||||||
// flags, such as in the character sequence "U+1F1FA U+1F1F8 [potential break] U+1F1FA U+1F1F8".
|
// [^RI] (RI RI)* RI x RI
|
||||||
// Also, it assumes that all combinations of Regional Indicators produce a flag, where they
|
// RI ÷ RI
|
||||||
// don't.
|
|
||||||
//
|
|
||||||
// There is no easy solution for doing this correctly, except for querying the font and doing
|
|
||||||
// some lookback.
|
|
||||||
if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) {
|
if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) {
|
||||||
return false;
|
// Look at up to 1000 code units.
|
||||||
|
start = std::max((ssize_t)start, (ssize_t)offset_back - 1000);
|
||||||
|
while (offset_back > start) {
|
||||||
|
U16_PREV(buf, start, offset_back, c1);
|
||||||
|
if (tailoredGraphemeClusterBreak(c1) != U_GCB_REGIONAL_INDICATOR) {
|
||||||
|
offset_back += U16_LENGTH(c1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note that the offset has moved forwared 2 code units by U16_NEXT.
|
||||||
|
// The number 4 comes from the number of code units in a whole flag.
|
||||||
|
return (offset - 2 - offset_back) % 4 == 0;
|
||||||
}
|
}
|
||||||
// Rule GB9, x Extend; Rule GB9a, x SpacingMark; Rule GB9b, Prepend x
|
// Rule GB9, x Extend; Rule GB9a, x SpacingMark; Rule GB9b, Prepend x
|
||||||
if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK || p1 == U_GCB_PREPEND) {
|
if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK || p1 == U_GCB_PREPEND) {
|
||||||
|
|||||||
@@ -84,6 +84,22 @@ TEST(GraphemeBreak, rules) {
|
|||||||
|
|
||||||
// Rule GB8a, Regional_Indicator x Regional_Indicator
|
// Rule GB8a, Regional_Indicator x Regional_Indicator
|
||||||
EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8"));
|
EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8"));
|
||||||
|
EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8")); // Regional indicator pair (flag)
|
||||||
|
EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8")); // Regional indicator pair (flag)
|
||||||
|
EXPECT_FALSE(IsBreak("U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8")); // Regional indicator pair (flag)
|
||||||
|
|
||||||
|
EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA")); // Regional indicator pair (flag)
|
||||||
|
EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA")); // Regional indicator pair (flag)
|
||||||
|
|
||||||
|
EXPECT_TRUE(IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA")); // Regional indicator pair (flag)
|
||||||
|
EXPECT_FALSE(IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA")); // Regional indicator pair (flag)
|
||||||
|
|
||||||
|
EXPECT_TRUE(
|
||||||
|
IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8")); // Regional indicator pair (flag)
|
||||||
|
EXPECT_FALSE(
|
||||||
|
IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8")); // Regional indicator pair (flag)
|
||||||
|
EXPECT_FALSE(
|
||||||
|
IsBreak("'a' U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8")); // Regional indicator pair (flag)
|
||||||
|
|
||||||
// Rule GB9, x Extend
|
// Rule GB9, x Extend
|
||||||
EXPECT_FALSE(IsBreak("'a' | U+0301")); // combining accent
|
EXPECT_FALSE(IsBreak("'a' | U+0301")); // combining accent
|
||||||
|
|||||||
Reference in New Issue
Block a user