1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include <limits>
16
17 #include "core/text/text_emoji_processor.h"
18
19 #include "base/utils/utf_helper.h"
20 #include <unicode/uchar.h>
21
22 #include "unicode/unistr.h"
23
24 namespace OHOS::Ace {
25 namespace {
26
27 constexpr int32_t LINE_FEED = 0x0A;
28 constexpr int32_t CARRIAGE_RETURN = 0x0D;
29 constexpr int32_t COMBINING_ENCLOSING_KEYCAP = 0x20E3;
30 constexpr int32_t ZERO_WIDTH_JOINER = 0x200D;
31 constexpr int32_t CANCEL_TAG = 0xE007F;
32 constexpr int32_t STATE_BEGIN = 0;
33 constexpr int32_t STATE_SECOND = 1;
34 constexpr int32_t STATE_EM = 2;
35 constexpr int32_t STATE_VS_AND_KEYCAP = 3;
36 constexpr int32_t STATE_ZWJ = 4;
37 constexpr int32_t STATE_KEYCAP = 5;
38 constexpr int32_t STATE_EMOJI = 6;
39 constexpr int32_t STATE_VS_AND_EM = 7;
40 constexpr int32_t STATE_VS = 8;
41 constexpr int32_t STATE_VS_AND_ZWJ = 9;
42 constexpr int32_t STATE_LF = 10;
43 constexpr int32_t STATE_CR = 11;
44 constexpr int32_t STATE_IN_TAG_QUEUE = 12;
45 constexpr int32_t STATE_EVEN_RIS = 13;
46 constexpr int32_t STATE_ODD_RIS = 14;
47 constexpr int32_t STATE_FINISHED = 20;
48 constexpr int32_t MAX_INT = std::numeric_limits<int32_t>::max();
49
AddAndPreventOverflow(int32_t a,int32_t b)50 int32_t AddAndPreventOverflow(int32_t a, int32_t b)
51 {
52 long tempA = static_cast<long>(a);
53 long tempB = static_cast<long>(b);
54 long ret = tempA + tempB;
55 if (ret > static_cast<long>(MAX_INT)) {
56 return MAX_INT;
57 } else if (ret < -static_cast<long>(MAX_INT)) {
58 return -MAX_INT;
59 } else {
60 return static_cast<int32_t>(ret);
61 }
62 }
63
64 } // namespace
65
Delete(int32_t startIndex,int32_t length,std::u16string & content,bool isBackward)66 int32_t TextEmojiProcessor::Delete(int32_t startIndex, int32_t length, std::u16string& content, bool isBackward)
67 {
68 std::u16string u16 = content;
69 // startIndex from selectController_->GetCaretIndex() is an utf-16 index
70 // so we need an u16string to get the correct index
71 std::u16string remainString = u"";
72 std::u32string u32ContentToDelete;
73 if (startIndex < 0 || length < 0 || u16.length() < unsigned(startIndex)) {
74 return 0;
75 }
76 uint32_t substrLength = u16.length() - unsigned(startIndex);
77 if (isBackward) {
78 if (startIndex == static_cast<int32_t>(u16.length())) {
79 u32ContentToDelete = UtfUtils::Str16ToStr32(content);
80 } else {
81 startIndex = std::clamp(startIndex, 0, static_cast<int32_t>(u16.length()));
82 remainString = u16.substr(startIndex, substrLength);
83 std::u16string temp = u16.substr(0, startIndex);
84 u32ContentToDelete = UtfUtils::Str16ToStr32(temp);
85 }
86 if (u32ContentToDelete.length() == 0) {
87 return 0;
88 }
89 for (int32_t i = 0; i < length; i++) {
90 if (!BackwardDelete(u32ContentToDelete)) {
91 break;
92 }
93 }
94 content = UtfUtils::Str32ToStr16(u32ContentToDelete) + remainString;
95 } else {
96 if (startIndex == 0) {
97 u32ContentToDelete = UtfUtils::Str16ToStr32(content);
98 } else {
99 startIndex = std::clamp(startIndex, 0, static_cast<int32_t>(u16.length()));
100 remainString = u16.substr(0, startIndex);
101 std::u16string temp = u16.substr(startIndex, substrLength);
102 u32ContentToDelete = UtfUtils::Str16ToStr32(temp);
103 }
104 if (u32ContentToDelete.length() == 0) {
105 return 0;
106 }
107 for (int32_t i = 0; i < length; i++) {
108 if (!ForwardDelete(u32ContentToDelete)) {
109 break;
110 }
111 }
112 content = remainString + UtfUtils::Str32ToStr16(u32ContentToDelete);
113 }
114 // we need length to update the cursor
115 int32_t deletedLength = static_cast<int32_t>(u16.length() - content.length());
116 return deletedLength;
117 }
118
IsIndexInEmoji(int32_t index,const std::u16string & content,int32_t & startIndex,int32_t & endIndex)119 bool TextEmojiProcessor::IsIndexInEmoji(int32_t index,
120 const std::u16string& content, int32_t& startIndex, int32_t& endIndex)
121 {
122 int32_t emojiStartIndex;
123 int32_t emojiEndIndex;
124 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
125 if (relation == EmojiRelation::IN_EMOJI) {
126 startIndex = emojiStartIndex;
127 endIndex = emojiEndIndex;
128 return true;
129 }
130 startIndex = index;
131 endIndex = index;
132 return false;
133 }
134
GetCharacterNum(const std::string & content)135 int32_t TextEmojiProcessor::GetCharacterNum(const std::string& content)
136 {
137 CHECK_NULL_RETURN(!content.empty(), 0);
138 std::u16string u16Content = StringUtils::Str8ToStr16(content);
139 return GetCharacterNum(u16Content);
140 }
141
GetCharacterNum(const std::u16string & u16Content)142 int32_t TextEmojiProcessor::GetCharacterNum(const std::u16string& u16Content)
143 {
144 CHECK_NULL_RETURN(!u16Content.empty(), 0);
145 int32_t charNum = 0;
146 int32_t pos = 0;
147 while (pos < static_cast<int32_t>(u16Content.length())) {
148 std::u32string u32Content;
149 int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, pos, u16Content);
150 if (forwardLenU16 > 1) {
151 // emoji exsit
152 pos += forwardLenU16;
153 } else {
154 // char after pos is not emoji, move one pos forward
155 pos++;
156 }
157 charNum++;
158 }
159 TAG_LOGI(AceLogTag::ACE_RICH_TEXT, "ByteNumToCharNum u16contentLen=%{public}zu pos=%{public}d charNum=%{public}d",
160 u16Content.length(), pos, charNum);
161 return charNum;
162 }
163
GetIndexRelationToEmoji(int32_t index,const std::u16string & u16Content,int32_t & startIndex,int32_t & endIndex)164 EmojiRelation TextEmojiProcessor::GetIndexRelationToEmoji(int32_t index,
165 const std::u16string& u16Content, int32_t& startIndex, int32_t& endIndex)
166 {
167 endIndex = index;
168 startIndex = index;
169 if (index < 0 || index > static_cast<int32_t>(u16Content.length())) {
170 return EmojiRelation::NO_EMOJI;
171 }
172 std::u32string u32Content;
173 int32_t backwardLen = GetEmojiLengthBackward(u32Content, index, u16Content);
174
175 int32_t emojiBackwardLengthU16 = 0;
176 if (backwardLen > 0) {
177 int32_t u32Length = static_cast<int32_t>(u32Content.length());
178 auto subIndex = u32Length - backwardLen;
179 subIndex = std::clamp(subIndex, 0, static_cast<int32_t>(u32Content.length()));
180 std::u16string tempstr = UtfUtils::Str32ToStr16(u32Content.substr(subIndex));
181 emojiBackwardLengthU16 = static_cast<int32_t>(tempstr.length());
182 index -= emojiBackwardLengthU16;
183 emojiBackwardLengthU16 = endIndex - index; // calculate length of the part of emoji
184 }
185
186 // get the whole emoji from the new start
187 int32_t emojiForwardLengthU16 = GetEmojiLengthU16Forward(u32Content, index, u16Content);
188 TAG_LOGD(AceLogTag::ACE_RICH_TEXT, "emojiBackwardLengthU16=%{public}d emojiForwardLengthU16=%{public}d",
189 emojiBackwardLengthU16, emojiForwardLengthU16);
190 if (emojiBackwardLengthU16 > 0 && emojiForwardLengthU16 > emojiBackwardLengthU16) {
191 // forward length is larget than backward one, which means the startIndex is inside one emoji
192 endIndex = index + emojiForwardLengthU16;
193 startIndex = index;
194 return EmojiRelation::IN_EMOJI;
195 } else if (emojiBackwardLengthU16 == 0 && emojiForwardLengthU16 > 1) {
196 if (index > 0 && u16Content[index - 1] == u'\u200D') {
197 return EmojiRelation::IN_EMOJI;
198 }
199 return EmojiRelation::BEFORE_EMOJI;
200 } else if (emojiBackwardLengthU16 > 1 && emojiBackwardLengthU16 == emojiForwardLengthU16) {
201 // emoji exists before index
202 int32_t newStartIndex = index + emojiForwardLengthU16;
203 int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, newStartIndex, u16Content);
204 if (forwardLenU16 > 1) {
205 // forwardLenU16 > 1 means a real emoji is found
206 return EmojiRelation::MIDDLE_EMOJI;
207 } else {
208 return EmojiRelation::AFTER_EMOJI;
209 }
210 } else if (emojiBackwardLengthU16 == 1 && emojiBackwardLengthU16 == emojiForwardLengthU16) {
211 // no emoji before index
212 int32_t newStartIndex = index + emojiForwardLengthU16;
213 int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, newStartIndex, u16Content);
214 if (forwardLenU16 > 1) {
215 // forwardLenU16 > 1 means a real emoji is found
216 return EmojiRelation::BEFORE_EMOJI;
217 }
218 }
219 return EmojiRelation::NO_EMOJI;
220 }
221
IsIndexBeforeOrInEmoji(int32_t index,const std::u16string & content)222 bool TextEmojiProcessor::IsIndexBeforeOrInEmoji(int32_t index, const std::u16string& content)
223 {
224 int32_t emojiStartIndex;
225 int32_t emojiEndIndex;
226 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
227 return relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::BEFORE_EMOJI
228 || relation == EmojiRelation::MIDDLE_EMOJI;
229 }
230
IsIndexAfterOrInEmoji(int32_t index,const std::u16string & content)231 bool TextEmojiProcessor::IsIndexAfterOrInEmoji(int32_t index, const std::u16string& content)
232 {
233 int32_t emojiStartIndex;
234 int32_t emojiEndIndex;
235 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
236 return relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::AFTER_EMOJI
237 || relation == EmojiRelation::MIDDLE_EMOJI;
238 }
239
IsIndexBeforeOrInEmoji(int32_t index,const std::u16string & content,int32_t & startIndex,int32_t & endIndex)240 bool TextEmojiProcessor::IsIndexBeforeOrInEmoji(int32_t index, const std::u16string& content,
241 int32_t& startIndex, int32_t& endIndex)
242 {
243 int32_t emojiStartIndex;
244 int32_t emojiEndIndex;
245 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
246 if (relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::BEFORE_EMOJI
247 || relation == EmojiRelation::MIDDLE_EMOJI) {
248 startIndex = emojiStartIndex;
249 endIndex = emojiEndIndex;
250 return true;
251 }
252 startIndex = index;
253 endIndex = index;
254 return false;
255 }
256
IsIndexAfterOrInEmoji(int32_t index,const std::u16string & content,int32_t & startIndex,int32_t & endIndex)257 bool TextEmojiProcessor::IsIndexAfterOrInEmoji(int32_t index, const std::u16string& content,
258 int32_t& startIndex, int32_t& endIndex)
259 {
260 int32_t emojiStartIndex;
261 int32_t emojiEndIndex;
262 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
263 if (relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::AFTER_EMOJI
264 || relation == EmojiRelation::MIDDLE_EMOJI) {
265 startIndex = emojiStartIndex;
266 endIndex = emojiEndIndex;
267 return true;
268 }
269 startIndex = index;
270 endIndex = index;
271 return false;
272 }
273
SubU16string(int32_t index,int32_t length,const std::u16string & content,bool includeStartHalf,bool includeEndHalf)274 std::u16string TextEmojiProcessor::SubU16string(
275 int32_t index, int32_t length, const std::u16string& content, bool includeStartHalf, bool includeEndHalf)
276 {
277 TextEmojiSubStringRange range = CalSubU16stringRange(index, length, content, includeStartHalf, includeEndHalf);
278 int32_t rangeLength = range.endIndex - range.startIndex;
279 if (rangeLength == 0) {
280 return u"";
281 }
282 range.startIndex = std::clamp(range.startIndex, 0, static_cast<int32_t>(content.length()));
283 return content.substr(static_cast<uint32_t>(range.startIndex), static_cast<uint32_t>(rangeLength));
284 }
285
CalSubU16stringRange(int32_t index,int32_t length,const std::u16string & content,bool includeStartHalf,bool includeEndHalf)286 TextEmojiSubStringRange TextEmojiProcessor::CalSubU16stringRange(
287 int32_t index, int32_t length, const std::u16string& content, bool includeStartHalf, bool includeEndHalf)
288 {
289 int32_t startIndex = index;
290 int32_t endIndex = AddAndPreventOverflow(index, length);
291 int32_t emojiStartIndex = index; // [emojiStartIndex, emojiEndIndex)
292 int32_t emojiEndIndex = index;
293 // need to be converted to string for processing
294 // IsIndexBeforeOrInEmoji and IsIndexAfterOrInEmoji is working for string
295 // exclude right overflow emoji
296 if (!includeEndHalf && IsIndexInEmoji(endIndex - 1, content, emojiStartIndex, emojiEndIndex) &&
297 emojiEndIndex > AddAndPreventOverflow(index, length)) {
298 emojiEndIndex = emojiStartIndex;
299 length = emojiEndIndex - index;
300 length = std::max(length, 0);
301 endIndex = AddAndPreventOverflow(index, length);
302 }
303 // process left emoji
304 if (IsIndexBeforeOrInEmoji(startIndex, content, emojiStartIndex, emojiEndIndex)) {
305 if (startIndex != emojiStartIndex && !includeStartHalf) {
306 startIndex = emojiEndIndex; // exclude current emoji
307 }
308 if (startIndex != emojiStartIndex && includeStartHalf) {
309 startIndex = emojiStartIndex; // include current emoji
310 }
311 }
312 // process right emoji
313 if (IsIndexAfterOrInEmoji(endIndex, content, emojiStartIndex, emojiEndIndex)) {
314 if (endIndex != emojiEndIndex && !includeEndHalf) {
315 endIndex = emojiStartIndex; // exclude current emoji
316 }
317 if (endIndex != emojiEndIndex && includeEndHalf) {
318 endIndex = emojiEndIndex; // include current emoji
319 }
320 }
321 TextEmojiSubStringRange result = { startIndex, endIndex };
322 return result;
323 }
324
ConvertU8stringUnpairedSurrogates(const std::string & value)325 std::string TextEmojiProcessor::ConvertU8stringUnpairedSurrogates(const std::string& value)
326 {
327 // Unpaired surrogates are replaced with U+FFFD
328 icu::UnicodeString ustring = icu::UnicodeString::fromUTF8(value);
329 std::string result;
330 ustring.toUTF8String(result);
331 return result;
332 }
333
GetEmojiLengthBackward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)334 int32_t TextEmojiProcessor::GetEmojiLengthBackward(std::u32string& u32Content,
335 int32_t& startIndex, const std::u16string& u16Content)
336 {
337 if (startIndex <= 0 || startIndex > static_cast<int32_t>(u16Content.length())) {
338 return 0;
339 }
340 do {
341 if (!UtfUtils::IsIndexInPairedSurrogates(startIndex, u16Content)) {
342 break;
343 }
344 ++startIndex;
345 } while (1);
346 std::u16string temp = u16Content.substr(0, static_cast<uint32_t>(startIndex));
347 u32Content = UtfUtils::Str16ToStr32(temp);
348 return GetEmojiLengthAtEnd(u32Content, false);
349 }
350
GetEmojiLengthU16Forward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)351 int32_t TextEmojiProcessor::GetEmojiLengthU16Forward(std::u32string& u32Content,
352 int32_t& startIndex, const std::u16string& u16Content)
353 {
354 int32_t forwardLen = GetEmojiLengthForward(u32Content, startIndex, u16Content);
355 if (u32Content.empty()) {
356 return 0;
357 }
358 return UtfUtils::Str32ToStr16(u32Content.substr(0, forwardLen)).length();
359 }
360
GetEmojiLengthForward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)361 int32_t TextEmojiProcessor::GetEmojiLengthForward(std::u32string& u32Content,
362 int32_t& startIndex, const std::u16string& u16Content)
363 {
364 if (startIndex >= static_cast<int32_t>(u16Content.length())) {
365 return 0;
366 }
367 do {
368 if (!UtfUtils::IsIndexInPairedSurrogates(startIndex, u16Content)) {
369 break;
370 }
371 --startIndex;
372 } while (1);
373 startIndex = std::clamp(startIndex, 0, static_cast<int32_t>(u16Content.length()));
374 std::u16string temp = u16Content.substr(startIndex, u16Content.length() - startIndex);
375 u32Content = UtfUtils::Str16ToStr32(temp);
376 return GetEmojiLengthAtFront(u32Content, false);
377 }
378
IsEmojiModifierBase(uint32_t codePoint)379 bool TextEmojiProcessor::IsEmojiModifierBase(uint32_t codePoint)
380 {
381 // according to the https://unicode.org/Public/emoji/4.0/emoji-data.txt
382 // emoji 4.0 removed 0x1F91D(HANDSHAKE) and 0x1F93C(WRESTLERS) from the emoji modifier base
383 // to handle with the compatibility, we need to add them back
384 if (codePoint == 0x1F91D || codePoint == 0x1F93C) {
385 return true;
386 }
387 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI_MODIFIER_BASE);
388 }
389
IsVariationSelector(uint32_t codePoint)390 bool TextEmojiProcessor::IsVariationSelector(uint32_t codePoint)
391 {
392 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_VARIATION_SELECTOR);
393 }
394
IsRegionalIndicatorSymbol(uint32_t codePoint)395 bool TextEmojiProcessor::IsRegionalIndicatorSymbol(uint32_t codePoint)
396 {
397 return u_hasBinaryProperty(codePoint, UCHAR_REGIONAL_INDICATOR);
398 }
399
IsEmoji(uint32_t codePoint)400 bool TextEmojiProcessor::IsEmoji(uint32_t codePoint)
401 {
402 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI);
403 }
404
IsEmojiModifier(uint32_t codePoint)405 bool TextEmojiProcessor::IsEmojiModifier(uint32_t codePoint)
406 {
407 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI_MODIFIER);
408 }
409
IsTagSpec(uint32_t codePoint)410 bool TextEmojiProcessor::IsTagSpec(uint32_t codePoint)
411 {
412 // according to the https://www.unicode.org/charts/PDF/U0000.pdf
413 // 0xE0020 - 0xE007E are the visible tag specs.
414 // 0xE007F is CANCEL_TAG, not in here.
415 return 0xE0020 <= codePoint && codePoint <= 0xE007E;
416 }
417
IsKeycapBase(uint32_t codePoint)418 bool TextEmojiProcessor::IsKeycapBase(uint32_t codePoint)
419 {
420 return ('0' <= codePoint && codePoint <= '9') || codePoint == '#' || codePoint == '*';
421 }
422
OnBeginState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)423 void TextEmojiProcessor::OnBeginState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
424 {
425 deleteCount = 1;
426 if (codePoint == LINE_FEED) {
427 state = STATE_LF;
428 } else if (IsVariationSelector(codePoint)) { // only backward
429 state = STATE_VS;
430 } else if (codePoint == CARRIAGE_RETURN) { // only forward
431 state = STATE_CR;
432 } else if (IsRegionalIndicatorSymbol(codePoint)) {
433 state = isBackward ? STATE_ODD_RIS : STATE_EVEN_RIS;
434 } else if (IsEmojiModifier(codePoint)) {
435 state = STATE_EM;
436 } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
437 state = STATE_KEYCAP;
438 } else if (IsEmoji(codePoint)) {
439 state = STATE_EMOJI;
440 } else if (codePoint == CANCEL_TAG) {
441 state = STATE_IN_TAG_QUEUE;
442 } else {
443 state = isBackward ? STATE_FINISHED : STATE_SECOND;
444 }
445 }
446
OnRISState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)447 void TextEmojiProcessor::OnRISState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
448 {
449 if (isBackward) {
450 switch (state) {
451 case STATE_ODD_RIS:
452 if (IsRegionalIndicatorSymbol(codePoint)) {
453 ++deleteCount;
454 state = STATE_EVEN_RIS;
455 } else {
456 state = STATE_FINISHED;
457 }
458 break;
459 case STATE_EVEN_RIS:
460 if (IsRegionalIndicatorSymbol(codePoint)) {
461 state = STATE_FINISHED;
462 }
463 break;
464 }
465 } else {
466 switch (state) {
467 case STATE_ODD_RIS:
468 state = STATE_FINISHED;
469 break;
470 case STATE_EVEN_RIS:
471 if (IsRegionalIndicatorSymbol(codePoint)) {
472 ++deleteCount;
473 state = STATE_ODD_RIS;
474 } else {
475 state = STATE_FINISHED;
476 }
477 break;
478 }
479 }
480 }
481
OnCRLFState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)482 void TextEmojiProcessor::OnCRLFState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
483 {
484 if (isBackward) {
485 if (codePoint == CARRIAGE_RETURN) {
486 ++deleteCount;
487 }
488 state = STATE_FINISHED;
489 } else {
490 switch (state) {
491 case STATE_CR:
492 if (codePoint == LINE_FEED) {
493 ++deleteCount;
494 }
495 state = STATE_FINISHED;
496 break;
497 case STATE_LF:
498 state = STATE_FINISHED;
499 break;
500 }
501 }
502 }
503
OnZWJState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)504 void TextEmojiProcessor::OnZWJState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
505 bool isBackward)
506 {
507 if (isBackward) {
508 switch (state) {
509 case STATE_ZWJ:
510 if (IsEmoji(codePoint)) {
511 ++deleteCount; // delete zwj
512 ++deleteCount; // delete emoji
513 state = IsEmojiModifier(codePoint) ? STATE_EM : STATE_EMOJI;
514 } else if (IsVariationSelector(codePoint)) {
515 lastVSCount = 1;
516 state = STATE_VS_AND_ZWJ;
517 } else {
518 state = STATE_FINISHED;
519 }
520 break;
521 case STATE_VS_AND_ZWJ:
522 if (IsEmoji(codePoint)) {
523 ++deleteCount; // delete zwj
524 ++deleteCount; // delete emoji
525 deleteCount += lastVSCount;
526 lastVSCount = 0;
527 state = STATE_EMOJI;
528 } else {
529 state = STATE_FINISHED;
530 }
531 break;
532 }
533 } else {
534 if (IsEmoji(codePoint)) {
535 ++deleteCount;
536 state = STATE_EMOJI;
537 } else {
538 state = STATE_FINISHED;
539 }
540 }
541 }
542
OnVSState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)543 void TextEmojiProcessor::OnVSState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
544 {
545 if (isBackward) {
546 if (IsEmoji(codePoint)) {
547 ++deleteCount;
548 state = STATE_EMOJI;
549 return;
550 }
551 if (!IsVariationSelector(codePoint) &&
552 u_getCombiningClass(codePoint) == 0) {
553 ++deleteCount;
554 }
555 state = STATE_FINISHED;
556 } else {
557 if (codePoint == ZERO_WIDTH_JOINER) {
558 ++deleteCount;
559 state = STATE_ZWJ;
560 return;
561 } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
562 ++deleteCount;
563 state = STATE_KEYCAP;
564 return;
565 }
566 state = STATE_FINISHED;
567 }
568 }
569
OnKeyCapState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)570 void TextEmojiProcessor::OnKeyCapState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
571 bool isBackward)
572 {
573 if (isBackward) {
574 switch (state) {
575 case STATE_KEYCAP:
576 if (IsVariationSelector(codePoint)) {
577 lastVSCount = 1;
578 state = STATE_VS_AND_KEYCAP;
579 return;
580 }
581 if (IsEmojiModifierBase(codePoint)) {
582 ++deleteCount;
583 state = STATE_FINISHED;
584 }
585 break;
586 case STATE_VS_AND_KEYCAP:
587 if (IsKeycapBase(codePoint)) {
588 deleteCount += lastVSCount + 1;
589 }
590 state = STATE_FINISHED;
591 break;
592 }
593 } else {
594 state = STATE_FINISHED;
595 }
596 }
597
OnEMState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)598 void TextEmojiProcessor::OnEMState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
599 bool isBackward)
600 {
601 if (isBackward) {
602 switch (state) {
603 case STATE_EM:
604 if (IsVariationSelector(codePoint)) {
605 lastVSCount = 1;
606 state = STATE_VS_AND_EM;
607 return;
608 } else if (IsEmojiModifierBase(codePoint)) {
609 ++deleteCount;
610 }
611 state = STATE_FINISHED;
612 break;
613 case STATE_VS_AND_EM:
614 if (IsEmojiModifierBase(codePoint)) {
615 deleteCount += lastVSCount + 1;
616 }
617 state = STATE_FINISHED;
618 break;
619 }
620 } else {
621 if (IsEmoji(codePoint)) {
622 ++deleteCount;
623 state = STATE_EMOJI;
624 return;
625 } else if (IsVariationSelector(codePoint)) {
626 ++deleteCount;
627 state = STATE_VS;
628 return;
629 } else if (codePoint == ZERO_WIDTH_JOINER) {
630 ++deleteCount;
631 state = STATE_ZWJ;
632 return;
633 } else if (IsEmojiModifierBase(codePoint)) {
634 ++deleteCount;
635 }
636 state = STATE_FINISHED;
637 }
638 }
639
OnEmojiState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)640 void TextEmojiProcessor::OnEmojiState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
641 {
642 if (isBackward) {
643 if (codePoint == ZERO_WIDTH_JOINER) {
644 state = STATE_ZWJ;
645 } else {
646 state = STATE_FINISHED;
647 }
648 } else {
649 if (codePoint == ZERO_WIDTH_JOINER) {
650 ++deleteCount;
651 state = STATE_ZWJ;
652 } else if (IsVariationSelector(codePoint)) {
653 ++deleteCount;
654 state = STATE_VS;
655 } else if (IsEmojiModifier(codePoint)) {
656 ++deleteCount;
657 state = STATE_EM;
658 } else if (IsTagSpec(codePoint)) {
659 ++deleteCount;
660 state = STATE_IN_TAG_QUEUE;
661 } else {
662 state = STATE_FINISHED;
663 }
664 }
665 }
666
OnForwardSecondState(uint32_t codePoint,int32_t & state,int32_t & deleteCount)667 void TextEmojiProcessor::OnForwardSecondState(uint32_t codePoint, int32_t& state, int32_t& deleteCount)
668 {
669 if (IsVariationSelector(codePoint)) {
670 ++deleteCount;
671 state = STATE_VS;
672 } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
673 ++deleteCount;
674 state = STATE_KEYCAP;
675 } else {
676 state = STATE_FINISHED;
677 }
678 }
679
OnTagQueueState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)680 void TextEmojiProcessor::OnTagQueueState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
681 {
682 if (isBackward) {
683 if (!IsTagSpec(codePoint)) {
684 state = STATE_FINISHED;
685 }
686 ++deleteCount;
687 } else {
688 if (IsTagSpec(codePoint)) {
689 ++deleteCount;
690 } else if (IsEmoji(codePoint)) {
691 state = STATE_FINISHED;
692 } else if (codePoint == CANCEL_TAG) {
693 ++deleteCount;
694 state = STATE_FINISHED;
695 } else {
696 ++deleteCount;
697 state = STATE_FINISHED;
698 }
699 }
700 }
701
GetEmojiLengthAtEnd(const std::u32string & u32Content,bool isCountNonEmoji)702 int32_t TextEmojiProcessor::GetEmojiLengthAtEnd(const std::u32string& u32Content, bool isCountNonEmoji)
703 {
704 int32_t deleteCount = 0;
705 int32_t lastVSCount = 0;
706 int32_t state = STATE_BEGIN;
707 int32_t tempOffset = static_cast<int32_t>(u32Content.length()) - 1;
708 do {
709 uint32_t codePoint = u32Content[tempOffset];
710 tempOffset--;
711 switch (state) {
712 case STATE_BEGIN:
713 OnBeginState(codePoint, state, deleteCount, true);
714 if (!isCountNonEmoji && (state == STATE_FINISHED || state == STATE_SECOND)) {
715 // avoid non-emoji
716 return 0;
717 }
718 break;
719 case STATE_LF:
720 OnCRLFState(codePoint, state, deleteCount, true);
721 break;
722 case STATE_ODD_RIS:
723 case STATE_EVEN_RIS:
724 OnRISState(codePoint, state, deleteCount, true);
725 break;
726 case STATE_KEYCAP:
727 case STATE_VS_AND_KEYCAP:
728 OnKeyCapState(codePoint, state, deleteCount, lastVSCount, true);
729 break;
730 case STATE_EM:
731 case STATE_VS_AND_EM:
732 OnEMState(codePoint, state, deleteCount, lastVSCount, true);
733 break;
734 case STATE_VS:
735 OnVSState(codePoint, state, deleteCount, true);
736 break;
737 case STATE_EMOJI:
738 OnEmojiState(codePoint, state, deleteCount, true);
739 break;
740 case STATE_ZWJ:
741 case STATE_VS_AND_ZWJ:
742 OnZWJState(codePoint, state, deleteCount, lastVSCount, true);
743 break;
744 case STATE_IN_TAG_QUEUE:
745 OnTagQueueState(codePoint, state, deleteCount, true);
746 break;
747 default:
748 break;
749 }
750 } while (tempOffset >= 0 && state != STATE_FINISHED);
751 return deleteCount;
752 }
753
BackwardDelete(std::u32string & u32Content)754 bool TextEmojiProcessor::BackwardDelete(std::u32string& u32Content)
755 {
756 int32_t deleteCount = GetEmojiLengthAtEnd(u32Content, true);
757 return HandleDeleteAction(u32Content, deleteCount, true);
758 }
759
GetEmojiLengthAtFront(const std::u32string & u32Content,bool isCountNonEmoji)760 int32_t TextEmojiProcessor::GetEmojiLengthAtFront(const std::u32string& u32Content, bool isCountNonEmoji)
761 {
762 int32_t deleteCount = 0;
763 int32_t state = STATE_BEGIN;
764 int32_t tempOffset = 0;
765 int32_t u32ContentLength = static_cast<int32_t>(u32Content.length());
766 do {
767 int32_t codePoint = static_cast<int32_t>(u32Content[tempOffset]);
768 tempOffset++;
769 switch (state) {
770 case STATE_BEGIN:
771 OnBeginState(codePoint, state, deleteCount, false);
772 if (!isCountNonEmoji && (state == STATE_FINISHED || state == STATE_SECOND)) {
773 return 0;
774 }
775 break;
776 case STATE_SECOND:
777 OnForwardSecondState(codePoint, state, deleteCount);
778 break;
779 case STATE_CR:
780 case STATE_LF:
781 OnCRLFState(codePoint, state, deleteCount, false);
782 break;
783 case STATE_ODD_RIS:
784 case STATE_EVEN_RIS:
785 OnRISState(codePoint, state, deleteCount, false);
786 break;
787 case STATE_KEYCAP:
788 OnKeyCapState(codePoint, state, deleteCount, deleteCount, false);
789 // in ForwardDelete, we dont need to care about lastVSCount.
790 // "Borrowing" deleteCount to lastVSCount, to avoiding the use of std::optional.
791 // same as above
792 break;
793 case STATE_EM:
794 OnEMState(codePoint, state, deleteCount, deleteCount, false);
795 break;
796 case STATE_VS:
797 OnVSState(codePoint, state, deleteCount, false);
798 break;
799 case STATE_EMOJI:
800 OnEmojiState(codePoint, state, deleteCount, false);
801 break;
802 case STATE_ZWJ:
803 OnZWJState(codePoint, state, deleteCount, deleteCount, false);
804 break;
805 case STATE_IN_TAG_QUEUE:
806 OnTagQueueState(codePoint, state, deleteCount, false);
807 break;
808 default:
809 break;
810 }
811 } while (tempOffset < u32ContentLength && state != STATE_FINISHED);
812 return deleteCount;
813 }
814
ForwardDelete(std::u32string & u32Content)815 bool TextEmojiProcessor::ForwardDelete(std::u32string& u32Content)
816 {
817 int32_t deleteCount = GetEmojiLengthAtFront(u32Content, true);
818 return HandleDeleteAction(u32Content, deleteCount, false);
819 }
820
HandleDeleteAction(std::u32string & u32Content,int32_t deleteCount,bool isBackward)821 bool TextEmojiProcessor::HandleDeleteAction(std::u32string& u32Content, int32_t deleteCount, bool isBackward)
822 {
823 int32_t contentLength = static_cast<int32_t>(u32Content.length());
824 deleteCount = std::min(deleteCount, contentLength);
825 if (isBackward) {
826 if (deleteCount > 0) {
827 int32_t start = contentLength - deleteCount;
828 start = std::clamp(start, 0, static_cast<int32_t>(u32Content.length()));
829 u32Content.erase(start, deleteCount);
830 return true;
831 }
832 } else {
833 if (deleteCount > 0) {
834 u32Content.erase(0, deleteCount);
835 return true;
836 }
837 }
838 return false;
839 }
840
841 } // namespace OHOS::Ace
842