• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <limits>
16 
17 #include "core/text/text_emoji_processor.h"
18 
19 #include <unicode/uchar.h>
20 
21 #include "base/utils/string_utils.h"
22 #include "base/utils/utils.h"
23 #include "base/utils/utf_helper.h"
24 #include "unicode/unistr.h"
25 
26 namespace OHOS::Ace {
27 namespace {
28 
29 constexpr int32_t LINE_FEED = 0x0A;
30 constexpr int32_t CARRIAGE_RETURN = 0x0D;
31 constexpr int32_t COMBINING_ENCLOSING_KEYCAP = 0x20E3;
32 constexpr int32_t ZERO_WIDTH_JOINER = 0x200D;
33 constexpr int32_t CANCEL_TAG = 0xE007F;
34 constexpr int32_t STATE_BEGIN = 0;
35 constexpr int32_t STATE_SECOND = 1;
36 constexpr int32_t STATE_EM = 2;
37 constexpr int32_t STATE_VS_AND_KEYCAP = 3;
38 constexpr int32_t STATE_ZWJ = 4;
39 constexpr int32_t STATE_KEYCAP = 5;
40 constexpr int32_t STATE_EMOJI = 6;
41 constexpr int32_t STATE_VS_AND_EM = 7;
42 constexpr int32_t STATE_VS = 8;
43 constexpr int32_t STATE_VS_AND_ZWJ = 9;
44 constexpr int32_t STATE_LF = 10;
45 constexpr int32_t STATE_CR = 11;
46 constexpr int32_t STATE_IN_TAG_QUEUE = 12;
47 constexpr int32_t STATE_EVEN_RIS = 13;
48 constexpr int32_t STATE_ODD_RIS = 14;
49 constexpr int32_t STATE_FINISHED = 20;
50 constexpr int32_t MAX_INT = std::numeric_limits<int32_t>::max();
51 
AddAndPreventOverflow(int32_t a,int32_t b)52 int32_t AddAndPreventOverflow(int32_t a, int32_t b)
53 {
54     long tempA = static_cast<long>(a);
55     long tempB = static_cast<long>(b);
56     long ret = tempA + tempB;
57     if (ret > static_cast<long>(MAX_INT)) {
58         return MAX_INT;
59     } else if (ret < -static_cast<long>(MAX_INT)) {
60         return -MAX_INT;
61     } else {
62         return static_cast<int32_t>(ret);
63     }
64 }
65 
66 } // namespace
67 
Delete(int32_t startIndex,int32_t length,std::u16string & content,bool isBackward)68 int32_t TextEmojiProcessor::Delete(int32_t startIndex, int32_t length, std::u16string& content, bool isBackward)
69 {
70     std::u16string u16 = content;
71     // startIndex from selectController_->GetCaretIndex() is an utf-16 index
72     // so we need an u16string to get the correct index
73     std::u16string remainString = u"";
74     std::u32string u32ContentToDelete;
75     if (startIndex < 0 || length < 0 || u16.length() < unsigned(startIndex)) {
76         return 0;
77     }
78     uint32_t substrLength = u16.length() - unsigned(startIndex);
79     if (isBackward) {
80         if (startIndex == static_cast<int32_t>(u16.length())) {
81             u32ContentToDelete = UtfUtils::Str16ToStr32(content);
82         } else {
83             startIndex = std::clamp(startIndex, 0, static_cast<int32_t>(u16.length()));
84             remainString = u16.substr(startIndex, substrLength);
85             std::u16string temp = u16.substr(0, startIndex);
86             u32ContentToDelete = UtfUtils::Str16ToStr32(temp);
87         }
88         if (u32ContentToDelete.length() == 0) {
89             return 0;
90         }
91         for (int32_t i = 0; i < length; i++) {
92             if (!BackwardDelete(u32ContentToDelete)) {
93                 break;
94             }
95         }
96         content = UtfUtils::Str32ToStr16(u32ContentToDelete) + remainString;
97     } else {
98         if (startIndex == 0) {
99             u32ContentToDelete = UtfUtils::Str16ToStr32(content);
100         } else {
101             startIndex = std::clamp(startIndex, 0, static_cast<int32_t>(u16.length()));
102             remainString = u16.substr(0, startIndex);
103             std::u16string temp = u16.substr(startIndex, substrLength);
104             u32ContentToDelete = UtfUtils::Str16ToStr32(temp);
105         }
106         if (u32ContentToDelete.length() == 0) {
107             return 0;
108         }
109         for (int32_t i = 0; i < length; i++) {
110             if (!ForwardDelete(u32ContentToDelete)) {
111                 break;
112             }
113         }
114         content = remainString + UtfUtils::Str32ToStr16(u32ContentToDelete);
115     }
116     // we need length to update the cursor
117     int32_t deletedLength = static_cast<int32_t>(u16.length() - content.length());
118     return deletedLength;
119 }
120 
IsIndexInEmoji(int32_t index,const std::u16string & content,int32_t & startIndex,int32_t & endIndex)121 bool TextEmojiProcessor::IsIndexInEmoji(int32_t index,
122     const std::u16string& content, int32_t& startIndex, int32_t& endIndex)
123 {
124     int32_t emojiStartIndex;
125     int32_t emojiEndIndex;
126     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
127     if (relation == EmojiRelation::IN_EMOJI) {
128         startIndex = emojiStartIndex;
129         endIndex = emojiEndIndex;
130         return true;
131     }
132     startIndex = index;
133     endIndex = index;
134     return false;
135 }
136 
GetCharacterNum(const std::string & content)137 int32_t TextEmojiProcessor::GetCharacterNum(const std::string& content)
138 {
139     CHECK_NULL_RETURN(!content.empty(), 0);
140     std::u16string u16Content = StringUtils::Str8ToStr16(content);
141     return GetCharacterNum(u16Content);
142 }
143 
GetCharacterNum(const std::u16string & u16Content)144 int32_t TextEmojiProcessor::GetCharacterNum(const std::u16string& u16Content)
145 {
146     CHECK_NULL_RETURN(!u16Content.empty(), 0);
147     int32_t charNum = 0;
148     int32_t pos = 0;
149     while (pos < static_cast<int32_t>(u16Content.length())) {
150         std::u32string u32Content;
151         int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, pos, u16Content);
152         if (forwardLenU16 > 1) {
153             // emoji exsit
154             pos += forwardLenU16;
155         } else {
156             // char after pos is not emoji, move one pos forward
157             pos++;
158         }
159         charNum++;
160     }
161     TAG_LOGI(AceLogTag::ACE_RICH_TEXT, "ByteNumToCharNum u16contentLen=%{public}zu pos=%{public}d charNum=%{public}d",
162         u16Content.length(), pos, charNum);
163     return charNum;
164 }
165 
GetIndexRelationToEmoji(int32_t index,const std::u16string & u16Content,int32_t & startIndex,int32_t & endIndex)166 EmojiRelation TextEmojiProcessor::GetIndexRelationToEmoji(int32_t index,
167     const std::u16string& u16Content, int32_t& startIndex, int32_t& endIndex)
168 {
169     endIndex = index;
170     startIndex = index;
171     if (index < 0 || index > static_cast<int32_t>(u16Content.length())) {
172         return EmojiRelation::NO_EMOJI;
173     }
174     std::u32string u32Content;
175     int32_t backwardLen = GetEmojiLengthBackward(u32Content, index, u16Content);
176 
177     int32_t emojiBackwardLengthU16 = 0;
178     if (backwardLen > 0) {
179         int32_t u32Length = static_cast<int32_t>(u32Content.length());
180         auto subIndex = u32Length - backwardLen;
181         subIndex = std::clamp(subIndex, 0, static_cast<int32_t>(u32Content.length()));
182         std::u16string tempstr = UtfUtils::Str32ToStr16(u32Content.substr(subIndex));
183         emojiBackwardLengthU16 = static_cast<int32_t>(tempstr.length());
184         index -= emojiBackwardLengthU16;
185         emojiBackwardLengthU16 = endIndex - index; // calculate length of the part of emoji
186     }
187 
188     // get the whole emoji from the new start
189     int32_t emojiForwardLengthU16 = GetEmojiLengthU16Forward(u32Content, index, u16Content);
190     TAG_LOGD(AceLogTag::ACE_RICH_TEXT, "emojiBackwardLengthU16=%{public}d emojiForwardLengthU16=%{public}d",
191         emojiBackwardLengthU16, emojiForwardLengthU16);
192     if (emojiBackwardLengthU16 > 0 && emojiForwardLengthU16 > emojiBackwardLengthU16) {
193         // forward length is larget than backward one, which means the startIndex is inside one emoji
194         endIndex = index + emojiForwardLengthU16;
195         startIndex = index;
196         return EmojiRelation::IN_EMOJI;
197     } else if (emojiBackwardLengthU16 == 0 && emojiForwardLengthU16 > 1) {
198         if (index > 0 && u16Content[index - 1] == u'\u200D') {
199             return EmojiRelation::IN_EMOJI;
200         }
201         return EmojiRelation::BEFORE_EMOJI;
202     } else if (emojiBackwardLengthU16 > 1 && emojiBackwardLengthU16 == emojiForwardLengthU16) {
203         // emoji exists before index
204         int32_t newStartIndex = index + emojiForwardLengthU16;
205         int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, newStartIndex, u16Content);
206         if (forwardLenU16 > 1) {
207             // forwardLenU16 > 1 means a real emoji is found
208             return EmojiRelation::MIDDLE_EMOJI;
209         } else {
210             return EmojiRelation::AFTER_EMOJI;
211         }
212     } else if (emojiBackwardLengthU16 == 1 && emojiBackwardLengthU16 == emojiForwardLengthU16) {
213         // no emoji before index
214         int32_t newStartIndex = index + emojiForwardLengthU16;
215         int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, newStartIndex, u16Content);
216         if (forwardLenU16 > 1) {
217             // forwardLenU16 > 1 means a real emoji is found
218             return EmojiRelation::BEFORE_EMOJI;
219         }
220     }
221     return EmojiRelation::NO_EMOJI;
222 }
223 
IsIndexBeforeOrInEmoji(int32_t index,const std::u16string & content)224 bool TextEmojiProcessor::IsIndexBeforeOrInEmoji(int32_t index, const std::u16string& content)
225 {
226     int32_t emojiStartIndex;
227     int32_t emojiEndIndex;
228     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
229     return relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::BEFORE_EMOJI
230         || relation == EmojiRelation::MIDDLE_EMOJI;
231 }
232 
IsIndexAfterOrInEmoji(int32_t index,const std::u16string & content)233 bool TextEmojiProcessor::IsIndexAfterOrInEmoji(int32_t index, const std::u16string& content)
234 {
235     int32_t emojiStartIndex;
236     int32_t emojiEndIndex;
237     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
238     return relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::AFTER_EMOJI
239         || relation == EmojiRelation::MIDDLE_EMOJI;
240 }
241 
IsIndexBeforeOrInEmoji(int32_t index,const std::u16string & content,int32_t & startIndex,int32_t & endIndex)242 bool TextEmojiProcessor::IsIndexBeforeOrInEmoji(int32_t index, const std::u16string& content,
243     int32_t& startIndex, int32_t& endIndex)
244 {
245     int32_t emojiStartIndex;
246     int32_t emojiEndIndex;
247     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
248     if (relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::BEFORE_EMOJI
249         || relation == EmojiRelation::MIDDLE_EMOJI) {
250         startIndex = emojiStartIndex;
251         endIndex = emojiEndIndex;
252         return true;
253     }
254     startIndex = index;
255     endIndex = index;
256     return false;
257 }
258 
IsIndexAfterOrInEmoji(int32_t index,const std::u16string & content,int32_t & startIndex,int32_t & endIndex)259 bool TextEmojiProcessor::IsIndexAfterOrInEmoji(int32_t index, const std::u16string& content,
260     int32_t& startIndex, int32_t& endIndex)
261 {
262     int32_t emojiStartIndex;
263     int32_t emojiEndIndex;
264     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
265     if (relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::AFTER_EMOJI
266         || relation == EmojiRelation::MIDDLE_EMOJI) {
267         startIndex = emojiStartIndex;
268         endIndex = emojiEndIndex;
269         return true;
270     }
271     startIndex = index;
272     endIndex = index;
273     return false;
274 }
275 
SubU16string(int32_t index,int32_t length,const std::u16string & content,bool includeStartHalf,bool includeEndHalf)276 std::u16string TextEmojiProcessor::SubU16string(
277     int32_t index, int32_t length, const std::u16string& content, bool includeStartHalf, bool includeEndHalf)
278 {
279     TextEmojiSubStringRange range = CalSubU16stringRange(index, length, content, includeStartHalf, includeEndHalf);
280     int32_t rangeLength = range.endIndex - range.startIndex;
281     if (rangeLength == 0) {
282         return u"";
283     }
284     range.startIndex = std::clamp(range.startIndex, 0, static_cast<int32_t>(content.length()));
285     return content.substr(static_cast<uint32_t>(range.startIndex), static_cast<uint32_t>(rangeLength));
286 }
287 
CalSubU16stringRange(int32_t index,int32_t length,const std::u16string & content,bool includeStartHalf,bool includeEndHalf)288 TextEmojiSubStringRange TextEmojiProcessor::CalSubU16stringRange(
289     int32_t index, int32_t length, const std::u16string& content, bool includeStartHalf, bool includeEndHalf)
290 {
291     int32_t startIndex = index;
292     int32_t endIndex = AddAndPreventOverflow(index, length);
293     int32_t emojiStartIndex = index;   // [emojiStartIndex, emojiEndIndex)
294     int32_t emojiEndIndex = index;
295     // need to be converted to string for processing
296     // IsIndexBeforeOrInEmoji and IsIndexAfterOrInEmoji is working for string
297     // exclude right overflow emoji
298     if (!includeEndHalf && IsIndexInEmoji(endIndex - 1, content, emojiStartIndex, emojiEndIndex) &&
299         emojiEndIndex > AddAndPreventOverflow(index, length)) {
300         emojiEndIndex = emojiStartIndex;
301         length = emojiEndIndex - index;
302         length = std::max(length, 0);
303         endIndex = AddAndPreventOverflow(index, length);
304     }
305     // process left emoji
306     if (IsIndexBeforeOrInEmoji(startIndex, content, emojiStartIndex, emojiEndIndex)) {
307         if (startIndex != emojiStartIndex && !includeStartHalf) {
308             startIndex = emojiEndIndex; // exclude current emoji
309         }
310         if (startIndex != emojiStartIndex && includeStartHalf) {
311             startIndex = emojiStartIndex; // include current emoji
312         }
313     }
314     // process right emoji
315     if (IsIndexAfterOrInEmoji(endIndex, content, emojiStartIndex, emojiEndIndex)) {
316         if (endIndex != emojiEndIndex && !includeEndHalf) {
317             endIndex = emojiStartIndex; // exclude current emoji
318         }
319         if (endIndex != emojiEndIndex && includeEndHalf) {
320             endIndex = emojiEndIndex; // include current emoji
321         }
322     }
323     TextEmojiSubStringRange result = { startIndex, endIndex };
324     return result;
325 }
326 
ConvertU8stringUnpairedSurrogates(const std::string & value)327 std::string TextEmojiProcessor::ConvertU8stringUnpairedSurrogates(const std::string& value)
328 {
329     // Unpaired surrogates are replaced with U+FFFD
330     icu::UnicodeString ustring = icu::UnicodeString::fromUTF8(value);
331     std::string result;
332     ustring.toUTF8String(result);
333     return result;
334 }
335 
GetEmojiLengthBackward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)336 int32_t TextEmojiProcessor::GetEmojiLengthBackward(std::u32string& u32Content,
337     int32_t& startIndex, const std::u16string& u16Content)
338 {
339     if (startIndex <= 0 || startIndex > static_cast<int32_t>(u16Content.length())) {
340         return 0;
341     }
342     do {
343         if (!UtfUtils::IsIndexInPairedSurrogates(startIndex, u16Content)) {
344             break;
345         }
346         ++startIndex;
347     } while (1);
348     std::u16string temp = u16Content.substr(0, static_cast<uint32_t>(startIndex));
349     u32Content = UtfUtils::Str16ToStr32(temp);
350     return GetEmojiLengthAtEnd(u32Content, false);
351 }
352 
GetEmojiLengthU16Forward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)353 int32_t TextEmojiProcessor::GetEmojiLengthU16Forward(std::u32string& u32Content,
354     int32_t& startIndex, const std::u16string& u16Content)
355 {
356     int32_t forwardLen = GetEmojiLengthForward(u32Content, startIndex, u16Content);
357     if (u32Content.empty()) {
358         return 0;
359     }
360     return UtfUtils::Str32ToStr16(u32Content.substr(0, forwardLen)).length();
361 }
362 
GetEmojiLengthForward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)363 int32_t TextEmojiProcessor::GetEmojiLengthForward(std::u32string& u32Content,
364     int32_t& startIndex, const std::u16string& u16Content)
365 {
366     if (startIndex >= static_cast<int32_t>(u16Content.length())) {
367         return 0;
368     }
369     do {
370         if (!UtfUtils::IsIndexInPairedSurrogates(startIndex, u16Content)) {
371             break;
372         }
373         --startIndex;
374     } while (1);
375     startIndex = std::clamp(startIndex, 0, static_cast<int32_t>(u16Content.length()));
376     std::u16string temp = u16Content.substr(startIndex, u16Content.length() - startIndex);
377     u32Content = UtfUtils::Str16ToStr32(temp);
378     return GetEmojiLengthAtFront(u32Content, false);
379 }
380 
IsEmojiModifierBase(uint32_t codePoint)381 bool TextEmojiProcessor::IsEmojiModifierBase(uint32_t codePoint)
382 {
383     // according to the https://unicode.org/Public/emoji/4.0/emoji-data.txt
384     // emoji 4.0 removed 0x1F91D(HANDSHAKE) and 0x1F93C(WRESTLERS) from the emoji modifier base
385     // to handle with the compatibility, we need to add them back
386     if (codePoint == 0x1F91D || codePoint == 0x1F93C) {
387         return true;
388     }
389     return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI_MODIFIER_BASE);
390 }
391 
IsVariationSelector(uint32_t codePoint)392 bool TextEmojiProcessor::IsVariationSelector(uint32_t codePoint)
393 {
394     return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_VARIATION_SELECTOR);
395 }
396 
IsRegionalIndicatorSymbol(uint32_t codePoint)397 bool TextEmojiProcessor::IsRegionalIndicatorSymbol(uint32_t codePoint)
398 {
399     return u_hasBinaryProperty(codePoint, UCHAR_REGIONAL_INDICATOR);
400 }
401 
IsEmoji(uint32_t codePoint)402 bool TextEmojiProcessor::IsEmoji(uint32_t codePoint)
403 {
404     return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI);
405 }
406 
IsEmojiModifier(uint32_t codePoint)407 bool TextEmojiProcessor::IsEmojiModifier(uint32_t codePoint)
408 {
409     return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI_MODIFIER);
410 }
411 
IsTagSpec(uint32_t codePoint)412 bool TextEmojiProcessor::IsTagSpec(uint32_t codePoint)
413 {
414     // according to the https://www.unicode.org/charts/PDF/U0000.pdf
415     // 0xE0020 - 0xE007E are the visible tag specs.
416     // 0xE007F is CANCEL_TAG, not in here.
417     return 0xE0020 <= codePoint && codePoint <= 0xE007E;
418 }
419 
IsKeycapBase(uint32_t codePoint)420 bool TextEmojiProcessor::IsKeycapBase(uint32_t codePoint)
421 {
422     return ('0' <= codePoint && codePoint <= '9') || codePoint == '#' || codePoint == '*';
423 }
424 
OnBeginState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)425 void TextEmojiProcessor::OnBeginState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
426 {
427     deleteCount = 1;
428     if (codePoint == LINE_FEED) {
429         state = STATE_LF;
430     } else if (IsVariationSelector(codePoint)) { // only backward
431         state = STATE_VS;
432     } else if (codePoint == CARRIAGE_RETURN) { // only forward
433         state = STATE_CR;
434     } else if (IsRegionalIndicatorSymbol(codePoint)) {
435         state = isBackward ? STATE_ODD_RIS : STATE_EVEN_RIS;
436     } else if (IsEmojiModifier(codePoint)) {
437         state = STATE_EM;
438     } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
439         state = STATE_KEYCAP;
440     } else if (IsEmoji(codePoint)) {
441         state = STATE_EMOJI;
442     } else if (codePoint == CANCEL_TAG) {
443         state = STATE_IN_TAG_QUEUE;
444     } else {
445         state = isBackward ? STATE_FINISHED : STATE_SECOND;
446     }
447 }
448 
OnRISState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)449 void TextEmojiProcessor::OnRISState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
450 {
451     if (isBackward) {
452         switch (state) {
453             case STATE_ODD_RIS:
454                 if (IsRegionalIndicatorSymbol(codePoint)) {
455                     ++deleteCount;
456                     state = STATE_EVEN_RIS;
457                 } else {
458                     state = STATE_FINISHED;
459                 }
460                 break;
461             case STATE_EVEN_RIS:
462                 if (IsRegionalIndicatorSymbol(codePoint)) {
463                     state = STATE_FINISHED;
464                 }
465                 break;
466         }
467     } else {
468         switch (state) {
469             case STATE_ODD_RIS:
470                 state = STATE_FINISHED;
471                 break;
472             case STATE_EVEN_RIS:
473                 if (IsRegionalIndicatorSymbol(codePoint)) {
474                     ++deleteCount;
475                     state = STATE_ODD_RIS;
476                 } else {
477                     state = STATE_FINISHED;
478                 }
479                 break;
480         }
481     }
482 }
483 
OnCRLFState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)484 void TextEmojiProcessor::OnCRLFState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
485 {
486     if (isBackward) {
487         if (codePoint == CARRIAGE_RETURN) {
488             ++deleteCount;
489         }
490         state = STATE_FINISHED;
491     } else {
492         switch (state) {
493             case STATE_CR:
494                 if (codePoint == LINE_FEED) {
495                     ++deleteCount;
496                 }
497                 state = STATE_FINISHED;
498                 break;
499             case STATE_LF:
500                 state = STATE_FINISHED;
501                 break;
502         }
503     }
504 }
505 
OnZWJState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)506 void TextEmojiProcessor::OnZWJState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
507     bool isBackward)
508 {
509     if (isBackward) {
510         switch (state) {
511             case STATE_ZWJ:
512                 if (IsEmoji(codePoint)) {
513                     ++deleteCount; // delete zwj
514                     ++deleteCount; // delete emoji
515                     state = IsEmojiModifier(codePoint) ? STATE_EM : STATE_EMOJI;
516                 } else if (IsVariationSelector(codePoint)) {
517                     lastVSCount = 1;
518                     state = STATE_VS_AND_ZWJ;
519                 } else {
520                     state = STATE_FINISHED;
521                 }
522                 break;
523             case STATE_VS_AND_ZWJ:
524                 if (IsEmoji(codePoint)) {
525                     ++deleteCount; // delete zwj
526                     ++deleteCount; // delete emoji
527                     deleteCount += lastVSCount;
528                     lastVSCount = 0;
529                     state = STATE_EMOJI;
530                 } else {
531                     state = STATE_FINISHED;
532                 }
533                 break;
534         }
535     } else {
536         if (IsEmoji(codePoint)) {
537             ++deleteCount;
538             state = STATE_EMOJI;
539         } else {
540             state = STATE_FINISHED;
541         }
542     }
543 }
544 
OnVSState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)545 void TextEmojiProcessor::OnVSState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
546 {
547     if (isBackward) {
548         if (IsEmoji(codePoint)) {
549             ++deleteCount;
550             state = STATE_EMOJI;
551             return;
552         }
553         if (!IsVariationSelector(codePoint) &&
554             u_getCombiningClass(codePoint) == 0) {
555             ++deleteCount;
556         }
557         state = STATE_FINISHED;
558     } else {
559         if (codePoint == ZERO_WIDTH_JOINER) {
560             ++deleteCount;
561             state = STATE_ZWJ;
562             return;
563         } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
564             ++deleteCount;
565             state = STATE_KEYCAP;
566             return;
567         }
568         state = STATE_FINISHED;
569     }
570 }
571 
OnKeyCapState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)572 void TextEmojiProcessor::OnKeyCapState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
573     bool isBackward)
574 {
575     if (isBackward) {
576         switch (state) {
577             case STATE_KEYCAP:
578                 if (IsVariationSelector(codePoint)) {
579                     lastVSCount = 1;
580                     state = STATE_VS_AND_KEYCAP;
581                     return;
582                 }
583                 if (IsEmojiModifierBase(codePoint)) {
584                     ++deleteCount;
585                     state = STATE_FINISHED;
586                 }
587                 break;
588             case STATE_VS_AND_KEYCAP:
589                 if (IsKeycapBase(codePoint)) {
590                     deleteCount += lastVSCount + 1;
591                 }
592                 state = STATE_FINISHED;
593                 break;
594         }
595     } else {
596         state = STATE_FINISHED;
597     }
598 }
599 
OnEMState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)600 void TextEmojiProcessor::OnEMState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
601     bool isBackward)
602 {
603     if (isBackward) {
604         switch (state) {
605             case STATE_EM:
606                 if (IsVariationSelector(codePoint)) {
607                     lastVSCount = 1;
608                     state = STATE_VS_AND_EM;
609                     return;
610                 } else if (IsEmojiModifierBase(codePoint)) {
611                     ++deleteCount;
612                 }
613                 state = STATE_FINISHED;
614                 break;
615             case STATE_VS_AND_EM:
616                 if (IsEmojiModifierBase(codePoint)) {
617                     deleteCount += lastVSCount + 1;
618                 }
619                 state = STATE_FINISHED;
620                 break;
621         }
622     } else {
623         if (IsEmoji(codePoint)) {
624             ++deleteCount;
625             state = STATE_EMOJI;
626             return;
627         } else if (IsVariationSelector(codePoint)) {
628             ++deleteCount;
629             state = STATE_VS;
630             return;
631         } else if (codePoint == ZERO_WIDTH_JOINER) {
632             ++deleteCount;
633             state = STATE_ZWJ;
634             return;
635         } else if (IsEmojiModifierBase(codePoint)) {
636             ++deleteCount;
637         }
638         state = STATE_FINISHED;
639     }
640 }
641 
OnEmojiState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)642 void TextEmojiProcessor::OnEmojiState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
643 {
644     if (isBackward) {
645         if (codePoint == ZERO_WIDTH_JOINER) {
646             state = STATE_ZWJ;
647         } else {
648             state = STATE_FINISHED;
649         }
650     } else {
651         if (codePoint == ZERO_WIDTH_JOINER) {
652             ++deleteCount;
653             state = STATE_ZWJ;
654         } else if (IsVariationSelector(codePoint)) {
655             ++deleteCount;
656             state = STATE_VS;
657         } else if (IsEmojiModifier(codePoint)) {
658             ++deleteCount;
659             state = STATE_EM;
660         } else if (IsTagSpec(codePoint)) {
661             ++deleteCount;
662             state = STATE_IN_TAG_QUEUE;
663         } else {
664             state = STATE_FINISHED;
665         }
666     }
667 }
668 
OnForwardSecondState(uint32_t codePoint,int32_t & state,int32_t & deleteCount)669 void TextEmojiProcessor::OnForwardSecondState(uint32_t codePoint, int32_t& state, int32_t& deleteCount)
670 {
671     if (IsVariationSelector(codePoint)) {
672         ++deleteCount;
673         state = STATE_VS;
674     } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
675         ++deleteCount;
676         state = STATE_KEYCAP;
677     } else {
678         state = STATE_FINISHED;
679     }
680 }
681 
OnTagQueueState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)682 void TextEmojiProcessor::OnTagQueueState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
683 {
684     if (isBackward) {
685         if (!IsTagSpec(codePoint)) {
686             state = STATE_FINISHED;
687         }
688         ++deleteCount;
689     } else {
690         if (IsTagSpec(codePoint)) {
691             ++deleteCount;
692         } else if (IsEmoji(codePoint)) {
693             state = STATE_FINISHED;
694         } else if (codePoint == CANCEL_TAG) {
695             ++deleteCount;
696             state = STATE_FINISHED;
697         } else {
698             ++deleteCount;
699             state = STATE_FINISHED;
700         }
701     }
702 }
703 
GetEmojiLengthAtEnd(const std::u32string & u32Content,bool isCountNonEmoji)704 int32_t TextEmojiProcessor::GetEmojiLengthAtEnd(const std::u32string& u32Content, bool isCountNonEmoji)
705 {
706     int32_t deleteCount = 0;
707     int32_t lastVSCount = 0;
708     int32_t state = STATE_BEGIN;
709     int32_t tempOffset = static_cast<int32_t>(u32Content.length()) - 1;
710     do {
711         uint32_t codePoint = u32Content[tempOffset];
712         tempOffset--;
713         switch (state) {
714             case STATE_BEGIN:
715                 OnBeginState(codePoint, state, deleteCount, true);
716                 if (!isCountNonEmoji && (state == STATE_FINISHED || state == STATE_SECOND)) {
717                     // avoid non-emoji
718                     return 0;
719                 }
720                 break;
721             case STATE_LF:
722                 OnCRLFState(codePoint, state, deleteCount, true);
723                 break;
724             case STATE_ODD_RIS:
725             case STATE_EVEN_RIS:
726                 OnRISState(codePoint, state, deleteCount, true);
727                 break;
728             case STATE_KEYCAP:
729             case STATE_VS_AND_KEYCAP:
730                 OnKeyCapState(codePoint, state, deleteCount, lastVSCount, true);
731                 break;
732             case STATE_EM:
733             case STATE_VS_AND_EM:
734                 OnEMState(codePoint, state, deleteCount, lastVSCount, true);
735                 break;
736             case STATE_VS:
737                 OnVSState(codePoint, state, deleteCount, true);
738                 break;
739             case STATE_EMOJI:
740                 OnEmojiState(codePoint, state, deleteCount, true);
741                 break;
742             case STATE_ZWJ:
743             case STATE_VS_AND_ZWJ:
744                 OnZWJState(codePoint, state, deleteCount, lastVSCount, true);
745                 break;
746             case STATE_IN_TAG_QUEUE:
747                 OnTagQueueState(codePoint, state, deleteCount, true);
748                 break;
749             default:
750                 break;
751         }
752     } while (tempOffset >= 0 && state != STATE_FINISHED);
753     return deleteCount;
754 }
755 
BackwardDelete(std::u32string & u32Content)756 bool TextEmojiProcessor::BackwardDelete(std::u32string& u32Content)
757 {
758     int32_t deleteCount = GetEmojiLengthAtEnd(u32Content, true);
759     return HandleDeleteAction(u32Content, deleteCount, true);
760 }
761 
GetEmojiLengthAtFront(const std::u32string & u32Content,bool isCountNonEmoji)762 int32_t TextEmojiProcessor::GetEmojiLengthAtFront(const std::u32string& u32Content, bool isCountNonEmoji)
763 {
764     int32_t deleteCount = 0;
765     int32_t state = STATE_BEGIN;
766     int32_t tempOffset = 0;
767     int32_t u32ContentLength = static_cast<int32_t>(u32Content.length());
768     do {
769         int32_t codePoint = static_cast<int32_t>(u32Content[tempOffset]);
770         tempOffset++;
771         switch (state) {
772             case STATE_BEGIN:
773                 OnBeginState(codePoint, state, deleteCount, false);
774                 if (!isCountNonEmoji && (state == STATE_FINISHED || state == STATE_SECOND)) {
775                     return 0;
776                 }
777                 break;
778             case STATE_SECOND:
779                 OnForwardSecondState(codePoint, state, deleteCount);
780                 break;
781             case STATE_CR:
782             case STATE_LF:
783                 OnCRLFState(codePoint, state, deleteCount, false);
784                 break;
785             case STATE_ODD_RIS:
786             case STATE_EVEN_RIS:
787                 OnRISState(codePoint, state, deleteCount, false);
788                 break;
789             case STATE_KEYCAP:
790                 OnKeyCapState(codePoint, state, deleteCount, deleteCount, false);
791                 // in ForwardDelete, we dont need to care about lastVSCount.
792                 // "Borrowing" deleteCount to lastVSCount, to avoiding the use of std::optional.
793                 // same as above
794                 break;
795             case STATE_EM:
796                 OnEMState(codePoint, state, deleteCount, deleteCount, false);
797                 break;
798             case STATE_VS:
799                 OnVSState(codePoint, state, deleteCount, false);
800                 break;
801             case STATE_EMOJI:
802                 OnEmojiState(codePoint, state, deleteCount, false);
803                 break;
804             case STATE_ZWJ:
805                 OnZWJState(codePoint, state, deleteCount, deleteCount, false);
806                 break;
807             case STATE_IN_TAG_QUEUE:
808                 OnTagQueueState(codePoint, state, deleteCount, false);
809                 break;
810             default:
811                 break;
812         }
813     } while (tempOffset < u32ContentLength && state != STATE_FINISHED);
814     return deleteCount;
815 }
816 
ForwardDelete(std::u32string & u32Content)817 bool TextEmojiProcessor::ForwardDelete(std::u32string& u32Content)
818 {
819     int32_t deleteCount = GetEmojiLengthAtFront(u32Content, true);
820     return HandleDeleteAction(u32Content, deleteCount, false);
821 }
822 
HandleDeleteAction(std::u32string & u32Content,int32_t deleteCount,bool isBackward)823 bool TextEmojiProcessor::HandleDeleteAction(std::u32string& u32Content, int32_t deleteCount, bool isBackward)
824 {
825     int32_t contentLength = static_cast<int32_t>(u32Content.length());
826     deleteCount = std::min(deleteCount, contentLength);
827     if (isBackward) {
828         if (deleteCount > 0) {
829             int32_t start = contentLength - deleteCount;
830             start = std::clamp(start, 0, static_cast<int32_t>(u32Content.length()));
831             u32Content.erase(start, deleteCount);
832             return true;
833         }
834     } else {
835         if (deleteCount > 0) {
836             u32Content.erase(0, deleteCount);
837             return true;
838         }
839     }
840     return false;
841 }
842 
843 } // namespace OHOS::Ace
844