• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <limits>
16 
17 #include "core/text/text_emoji_processor.h"
18 
19 #include "base/utils/utf_helper.h"
20 #include <unicode/uchar.h>
21 
22 #include "unicode/unistr.h"
23 
24 namespace OHOS::Ace {
25 namespace {
26 
27 constexpr int32_t LINE_FEED = 0x0A;
28 constexpr int32_t CARRIAGE_RETURN = 0x0D;
29 constexpr int32_t COMBINING_ENCLOSING_KEYCAP = 0x20E3;
30 constexpr int32_t ZERO_WIDTH_JOINER = 0x200D;
31 constexpr int32_t CANCEL_TAG = 0xE007F;
32 constexpr int32_t STATE_BEGIN = 0;
33 constexpr int32_t STATE_SECOND = 1;
34 constexpr int32_t STATE_EM = 2;
35 constexpr int32_t STATE_VS_AND_KEYCAP = 3;
36 constexpr int32_t STATE_ZWJ = 4;
37 constexpr int32_t STATE_KEYCAP = 5;
38 constexpr int32_t STATE_EMOJI = 6;
39 constexpr int32_t STATE_VS_AND_EM = 7;
40 constexpr int32_t STATE_VS = 8;
41 constexpr int32_t STATE_VS_AND_ZWJ = 9;
42 constexpr int32_t STATE_LF = 10;
43 constexpr int32_t STATE_CR = 11;
44 constexpr int32_t STATE_IN_TAG_QUEUE = 12;
45 constexpr int32_t STATE_EVEN_RIS = 13;
46 constexpr int32_t STATE_ODD_RIS = 14;
47 constexpr int32_t STATE_FINISHED = 20;
48 constexpr int32_t MAX_INT = std::numeric_limits<int32_t>::max();
49 
AddAndPreventOverflow(int32_t a,int32_t b)50 int32_t AddAndPreventOverflow(int32_t a, int32_t b)
51 {
52     long tempA = static_cast<long>(a);
53     long tempB = static_cast<long>(b);
54     long ret = tempA + tempB;
55     if (ret > static_cast<long>(MAX_INT)) {
56         return MAX_INT;
57     } else if (ret < -static_cast<long>(MAX_INT)) {
58         return -MAX_INT;
59     } else {
60         return static_cast<int32_t>(ret);
61     }
62 }
63 
64 } // namespace
65 
Delete(int32_t startIndex,int32_t length,std::u16string & content,bool isBackward)66 int32_t TextEmojiProcessor::Delete(int32_t startIndex, int32_t length, std::u16string& content, bool isBackward)
67 {
68     std::u16string u16 = content;
69     // startIndex from selectController_->GetCaretIndex() is an utf-16 index
70     // so we need an u16string to get the correct index
71     std::u16string remainString = u"";
72     std::u32string u32ContentToDelete;
73     if (startIndex < 0 || length < 0 || u16.length() < unsigned(startIndex)) {
74         return 0;
75     }
76     uint32_t substrLength = u16.length() - unsigned(startIndex);
77     if (isBackward) {
78         if (startIndex == static_cast<int32_t>(u16.length())) {
79             u32ContentToDelete = UtfUtils::Str16ToStr32(content);
80         } else {
81             startIndex = std::clamp(startIndex, 0, static_cast<int32_t>(u16.length()));
82             remainString = u16.substr(startIndex, substrLength);
83             std::u16string temp = u16.substr(0, startIndex);
84             u32ContentToDelete = UtfUtils::Str16ToStr32(temp);
85         }
86         if (u32ContentToDelete.length() == 0) {
87             return 0;
88         }
89         for (int32_t i = 0; i < length; i++) {
90             if (!BackwardDelete(u32ContentToDelete)) {
91                 break;
92             }
93         }
94         content = UtfUtils::Str32ToStr16(u32ContentToDelete) + remainString;
95     } else {
96         if (startIndex == 0) {
97             u32ContentToDelete = UtfUtils::Str16ToStr32(content);
98         } else {
99             startIndex = std::clamp(startIndex, 0, static_cast<int32_t>(u16.length()));
100             remainString = u16.substr(0, startIndex);
101             std::u16string temp = u16.substr(startIndex, substrLength);
102             u32ContentToDelete = UtfUtils::Str16ToStr32(temp);
103         }
104         if (u32ContentToDelete.length() == 0) {
105             return 0;
106         }
107         for (int32_t i = 0; i < length; i++) {
108             if (!ForwardDelete(u32ContentToDelete)) {
109                 break;
110             }
111         }
112         content = remainString + UtfUtils::Str32ToStr16(u32ContentToDelete);
113     }
114     // we need length to update the cursor
115     int32_t deletedLength = static_cast<int32_t>(u16.length() - content.length());
116     return deletedLength;
117 }
118 
IsIndexInEmoji(int32_t index,const std::u16string & content,int32_t & startIndex,int32_t & endIndex)119 bool TextEmojiProcessor::IsIndexInEmoji(int32_t index,
120     const std::u16string& content, int32_t& startIndex, int32_t& endIndex)
121 {
122     int32_t emojiStartIndex;
123     int32_t emojiEndIndex;
124     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
125     if (relation == EmojiRelation::IN_EMOJI) {
126         startIndex = emojiStartIndex;
127         endIndex = emojiEndIndex;
128         return true;
129     }
130     startIndex = index;
131     endIndex = index;
132     return false;
133 }
134 
GetCharacterNum(const std::string & content)135 int32_t TextEmojiProcessor::GetCharacterNum(const std::string& content)
136 {
137     CHECK_NULL_RETURN(!content.empty(), 0);
138     std::u16string u16Content = StringUtils::Str8ToStr16(content);
139     return GetCharacterNum(u16Content);
140 }
141 
GetCharacterNum(const std::u16string & u16Content)142 int32_t TextEmojiProcessor::GetCharacterNum(const std::u16string& u16Content)
143 {
144     CHECK_NULL_RETURN(!u16Content.empty(), 0);
145     int32_t charNum = 0;
146     int32_t pos = 0;
147     while (pos < static_cast<int32_t>(u16Content.length())) {
148         std::u32string u32Content;
149         int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, pos, u16Content);
150         if (forwardLenU16 > 1) {
151             // emoji exsit
152             pos += forwardLenU16;
153         } else {
154             // char after pos is not emoji, move one pos forward
155             pos++;
156         }
157         charNum++;
158     }
159     TAG_LOGI(AceLogTag::ACE_RICH_TEXT, "ByteNumToCharNum u16contentLen=%{public}zu pos=%{public}d charNum=%{public}d",
160         u16Content.length(), pos, charNum);
161     return charNum;
162 }
163 
GetIndexRelationToEmoji(int32_t index,const std::u16string & u16Content,int32_t & startIndex,int32_t & endIndex)164 EmojiRelation TextEmojiProcessor::GetIndexRelationToEmoji(int32_t index,
165     const std::u16string& u16Content, int32_t& startIndex, int32_t& endIndex)
166 {
167     endIndex = index;
168     startIndex = index;
169     if (index < 0 || index > static_cast<int32_t>(u16Content.length())) {
170         return EmojiRelation::NO_EMOJI;
171     }
172     std::u32string u32Content;
173     int32_t backwardLen = GetEmojiLengthBackward(u32Content, index, u16Content);
174 
175     int32_t emojiBackwardLengthU16 = 0;
176     if (backwardLen > 0) {
177         int32_t u32Length = static_cast<int32_t>(u32Content.length());
178         auto subIndex = u32Length - backwardLen;
179         subIndex = std::clamp(subIndex, 0, static_cast<int32_t>(u32Content.length()));
180         std::u16string tempstr = UtfUtils::Str32ToStr16(u32Content.substr(subIndex));
181         emojiBackwardLengthU16 = static_cast<int32_t>(tempstr.length());
182         index -= emojiBackwardLengthU16;
183         emojiBackwardLengthU16 = endIndex - index; // calculate length of the part of emoji
184     }
185 
186     // get the whole emoji from the new start
187     int32_t emojiForwardLengthU16 = GetEmojiLengthU16Forward(u32Content, index, u16Content);
188     TAG_LOGD(AceLogTag::ACE_RICH_TEXT, "emojiBackwardLengthU16=%{public}d emojiForwardLengthU16=%{public}d",
189         emojiBackwardLengthU16, emojiForwardLengthU16);
190     if (emojiBackwardLengthU16 > 0 && emojiForwardLengthU16 > emojiBackwardLengthU16) {
191         // forward length is larget than backward one, which means the startIndex is inside one emoji
192         endIndex = index + emojiForwardLengthU16;
193         startIndex = index;
194         return EmojiRelation::IN_EMOJI;
195     } else if (emojiBackwardLengthU16 == 0 && emojiForwardLengthU16 > 1) {
196         if (index > 0 && u16Content[index - 1] == u'\u200D') {
197             return EmojiRelation::IN_EMOJI;
198         }
199         return EmojiRelation::BEFORE_EMOJI;
200     } else if (emojiBackwardLengthU16 > 1 && emojiBackwardLengthU16 == emojiForwardLengthU16) {
201         // emoji exists before index
202         int32_t newStartIndex = index + emojiForwardLengthU16;
203         int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, newStartIndex, u16Content);
204         if (forwardLenU16 > 1) {
205             // forwardLenU16 > 1 means a real emoji is found
206             return EmojiRelation::MIDDLE_EMOJI;
207         } else {
208             return EmojiRelation::AFTER_EMOJI;
209         }
210     } else if (emojiBackwardLengthU16 == 1 && emojiBackwardLengthU16 == emojiForwardLengthU16) {
211         // no emoji before index
212         int32_t newStartIndex = index + emojiForwardLengthU16;
213         int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, newStartIndex, u16Content);
214         if (forwardLenU16 > 1) {
215             // forwardLenU16 > 1 means a real emoji is found
216             return EmojiRelation::BEFORE_EMOJI;
217         }
218     }
219     return EmojiRelation::NO_EMOJI;
220 }
221 
IsIndexBeforeOrInEmoji(int32_t index,const std::u16string & content)222 bool TextEmojiProcessor::IsIndexBeforeOrInEmoji(int32_t index, const std::u16string& content)
223 {
224     int32_t emojiStartIndex;
225     int32_t emojiEndIndex;
226     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
227     return relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::BEFORE_EMOJI
228         || relation == EmojiRelation::MIDDLE_EMOJI;
229 }
230 
IsIndexAfterOrInEmoji(int32_t index,const std::u16string & content)231 bool TextEmojiProcessor::IsIndexAfterOrInEmoji(int32_t index, const std::u16string& content)
232 {
233     int32_t emojiStartIndex;
234     int32_t emojiEndIndex;
235     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
236     return relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::AFTER_EMOJI
237         || relation == EmojiRelation::MIDDLE_EMOJI;
238 }
239 
IsIndexBeforeOrInEmoji(int32_t index,const std::u16string & content,int32_t & startIndex,int32_t & endIndex)240 bool TextEmojiProcessor::IsIndexBeforeOrInEmoji(int32_t index, const std::u16string& content,
241     int32_t& startIndex, int32_t& endIndex)
242 {
243     int32_t emojiStartIndex;
244     int32_t emojiEndIndex;
245     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
246     if (relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::BEFORE_EMOJI
247         || relation == EmojiRelation::MIDDLE_EMOJI) {
248         startIndex = emojiStartIndex;
249         endIndex = emojiEndIndex;
250         return true;
251     }
252     startIndex = index;
253     endIndex = index;
254     return false;
255 }
256 
IsIndexAfterOrInEmoji(int32_t index,const std::u16string & content,int32_t & startIndex,int32_t & endIndex)257 bool TextEmojiProcessor::IsIndexAfterOrInEmoji(int32_t index, const std::u16string& content,
258     int32_t& startIndex, int32_t& endIndex)
259 {
260     int32_t emojiStartIndex;
261     int32_t emojiEndIndex;
262     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
263     if (relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::AFTER_EMOJI
264         || relation == EmojiRelation::MIDDLE_EMOJI) {
265         startIndex = emojiStartIndex;
266         endIndex = emojiEndIndex;
267         return true;
268     }
269     startIndex = index;
270     endIndex = index;
271     return false;
272 }
273 
SubU16string(int32_t index,int32_t length,const std::u16string & content,bool includeStartHalf,bool includeEndHalf)274 std::u16string TextEmojiProcessor::SubU16string(
275     int32_t index, int32_t length, const std::u16string& content, bool includeStartHalf, bool includeEndHalf)
276 {
277     TextEmojiSubStringRange range = CalSubU16stringRange(index, length, content, includeStartHalf, includeEndHalf);
278     int32_t rangeLength = range.endIndex - range.startIndex;
279     if (rangeLength == 0) {
280         return u"";
281     }
282     range.startIndex = std::clamp(range.startIndex, 0, static_cast<int32_t>(content.length()));
283     return content.substr(static_cast<uint32_t>(range.startIndex), static_cast<uint32_t>(rangeLength));
284 }
285 
CalSubU16stringRange(int32_t index,int32_t length,const std::u16string & content,bool includeStartHalf,bool includeEndHalf)286 TextEmojiSubStringRange TextEmojiProcessor::CalSubU16stringRange(
287     int32_t index, int32_t length, const std::u16string& content, bool includeStartHalf, bool includeEndHalf)
288 {
289     int32_t startIndex = index;
290     int32_t endIndex = AddAndPreventOverflow(index, length);
291     int32_t emojiStartIndex = index;   // [emojiStartIndex, emojiEndIndex)
292     int32_t emojiEndIndex = index;
293     // need to be converted to string for processing
294     // IsIndexBeforeOrInEmoji and IsIndexAfterOrInEmoji is working for string
295     // exclude right overflow emoji
296     if (!includeEndHalf && IsIndexInEmoji(endIndex - 1, content, emojiStartIndex, emojiEndIndex) &&
297         emojiEndIndex > AddAndPreventOverflow(index, length)) {
298         emojiEndIndex = emojiStartIndex;
299         length = emojiEndIndex - index;
300         length = std::max(length, 0);
301         endIndex = AddAndPreventOverflow(index, length);
302     }
303     // process left emoji
304     if (IsIndexBeforeOrInEmoji(startIndex, content, emojiStartIndex, emojiEndIndex)) {
305         if (startIndex != emojiStartIndex && !includeStartHalf) {
306             startIndex = emojiEndIndex; // exclude current emoji
307         }
308         if (startIndex != emojiStartIndex && includeStartHalf) {
309             startIndex = emojiStartIndex; // include current emoji
310         }
311     }
312     // process right emoji
313     if (IsIndexAfterOrInEmoji(endIndex, content, emojiStartIndex, emojiEndIndex)) {
314         if (endIndex != emojiEndIndex && !includeEndHalf) {
315             endIndex = emojiStartIndex; // exclude current emoji
316         }
317         if (endIndex != emojiEndIndex && includeEndHalf) {
318             endIndex = emojiEndIndex; // include current emoji
319         }
320     }
321     TextEmojiSubStringRange result = { startIndex, endIndex };
322     return result;
323 }
324 
ConvertU8stringUnpairedSurrogates(const std::string & value)325 std::string TextEmojiProcessor::ConvertU8stringUnpairedSurrogates(const std::string& value)
326 {
327     // Unpaired surrogates are replaced with U+FFFD
328     icu::UnicodeString ustring = icu::UnicodeString::fromUTF8(value);
329     std::string result;
330     ustring.toUTF8String(result);
331     return result;
332 }
333 
GetEmojiLengthBackward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)334 int32_t TextEmojiProcessor::GetEmojiLengthBackward(std::u32string& u32Content,
335     int32_t& startIndex, const std::u16string& u16Content)
336 {
337     if (startIndex <= 0 || startIndex > static_cast<int32_t>(u16Content.length())) {
338         return 0;
339     }
340     do {
341         if (!UtfUtils::IsIndexInPairedSurrogates(startIndex, u16Content)) {
342             break;
343         }
344         ++startIndex;
345     } while (1);
346     std::u16string temp = u16Content.substr(0, static_cast<uint32_t>(startIndex));
347     u32Content = UtfUtils::Str16ToStr32(temp);
348     return GetEmojiLengthAtEnd(u32Content, false);
349 }
350 
GetEmojiLengthU16Forward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)351 int32_t TextEmojiProcessor::GetEmojiLengthU16Forward(std::u32string& u32Content,
352     int32_t& startIndex, const std::u16string& u16Content)
353 {
354     int32_t forwardLen = GetEmojiLengthForward(u32Content, startIndex, u16Content);
355     if (u32Content.empty()) {
356         return 0;
357     }
358     return UtfUtils::Str32ToStr16(u32Content.substr(0, forwardLen)).length();
359 }
360 
GetEmojiLengthForward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)361 int32_t TextEmojiProcessor::GetEmojiLengthForward(std::u32string& u32Content,
362     int32_t& startIndex, const std::u16string& u16Content)
363 {
364     if (startIndex >= static_cast<int32_t>(u16Content.length())) {
365         return 0;
366     }
367     do {
368         if (!UtfUtils::IsIndexInPairedSurrogates(startIndex, u16Content)) {
369             break;
370         }
371         --startIndex;
372     } while (1);
373     startIndex = std::clamp(startIndex, 0, static_cast<int32_t>(u16Content.length()));
374     std::u16string temp = u16Content.substr(startIndex, u16Content.length() - startIndex);
375     u32Content = UtfUtils::Str16ToStr32(temp);
376     return GetEmojiLengthAtFront(u32Content, false);
377 }
378 
IsEmojiModifierBase(uint32_t codePoint)379 bool TextEmojiProcessor::IsEmojiModifierBase(uint32_t codePoint)
380 {
381     // according to the https://unicode.org/Public/emoji/4.0/emoji-data.txt
382     // emoji 4.0 removed 0x1F91D(HANDSHAKE) and 0x1F93C(WRESTLERS) from the emoji modifier base
383     // to handle with the compatibility, we need to add them back
384     if (codePoint == 0x1F91D || codePoint == 0x1F93C) {
385         return true;
386     }
387     return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI_MODIFIER_BASE);
388 }
389 
IsVariationSelector(uint32_t codePoint)390 bool TextEmojiProcessor::IsVariationSelector(uint32_t codePoint)
391 {
392     return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_VARIATION_SELECTOR);
393 }
394 
IsRegionalIndicatorSymbol(uint32_t codePoint)395 bool TextEmojiProcessor::IsRegionalIndicatorSymbol(uint32_t codePoint)
396 {
397     return u_hasBinaryProperty(codePoint, UCHAR_REGIONAL_INDICATOR);
398 }
399 
IsEmoji(uint32_t codePoint)400 bool TextEmojiProcessor::IsEmoji(uint32_t codePoint)
401 {
402     return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI);
403 }
404 
IsEmojiModifier(uint32_t codePoint)405 bool TextEmojiProcessor::IsEmojiModifier(uint32_t codePoint)
406 {
407     return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI_MODIFIER);
408 }
409 
IsTagSpec(uint32_t codePoint)410 bool TextEmojiProcessor::IsTagSpec(uint32_t codePoint)
411 {
412     // according to the https://www.unicode.org/charts/PDF/U0000.pdf
413     // 0xE0020 - 0xE007E are the visible tag specs.
414     // 0xE007F is CANCEL_TAG, not in here.
415     return 0xE0020 <= codePoint && codePoint <= 0xE007E;
416 }
417 
IsKeycapBase(uint32_t codePoint)418 bool TextEmojiProcessor::IsKeycapBase(uint32_t codePoint)
419 {
420     return ('0' <= codePoint && codePoint <= '9') || codePoint == '#' || codePoint == '*';
421 }
422 
OnBeginState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)423 void TextEmojiProcessor::OnBeginState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
424 {
425     deleteCount = 1;
426     if (codePoint == LINE_FEED) {
427         state = STATE_LF;
428     } else if (IsVariationSelector(codePoint)) { // only backward
429         state = STATE_VS;
430     } else if (codePoint == CARRIAGE_RETURN) { // only forward
431         state = STATE_CR;
432     } else if (IsRegionalIndicatorSymbol(codePoint)) {
433         state = isBackward ? STATE_ODD_RIS : STATE_EVEN_RIS;
434     } else if (IsEmojiModifier(codePoint)) {
435         state = STATE_EM;
436     } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
437         state = STATE_KEYCAP;
438     } else if (IsEmoji(codePoint)) {
439         state = STATE_EMOJI;
440     } else if (codePoint == CANCEL_TAG) {
441         state = STATE_IN_TAG_QUEUE;
442     } else {
443         state = isBackward ? STATE_FINISHED : STATE_SECOND;
444     }
445 }
446 
OnRISState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)447 void TextEmojiProcessor::OnRISState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
448 {
449     if (isBackward) {
450         switch (state) {
451             case STATE_ODD_RIS:
452                 if (IsRegionalIndicatorSymbol(codePoint)) {
453                     ++deleteCount;
454                     state = STATE_EVEN_RIS;
455                 } else {
456                     state = STATE_FINISHED;
457                 }
458                 break;
459             case STATE_EVEN_RIS:
460                 if (IsRegionalIndicatorSymbol(codePoint)) {
461                     state = STATE_FINISHED;
462                 }
463                 break;
464         }
465     } else {
466         switch (state) {
467             case STATE_ODD_RIS:
468                 state = STATE_FINISHED;
469                 break;
470             case STATE_EVEN_RIS:
471                 if (IsRegionalIndicatorSymbol(codePoint)) {
472                     ++deleteCount;
473                     state = STATE_ODD_RIS;
474                 } else {
475                     state = STATE_FINISHED;
476                 }
477                 break;
478         }
479     }
480 }
481 
OnCRLFState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)482 void TextEmojiProcessor::OnCRLFState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
483 {
484     if (isBackward) {
485         if (codePoint == CARRIAGE_RETURN) {
486             ++deleteCount;
487         }
488         state = STATE_FINISHED;
489     } else {
490         switch (state) {
491             case STATE_CR:
492                 if (codePoint == LINE_FEED) {
493                     ++deleteCount;
494                 }
495                 state = STATE_FINISHED;
496                 break;
497             case STATE_LF:
498                 state = STATE_FINISHED;
499                 break;
500         }
501     }
502 }
503 
OnZWJState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)504 void TextEmojiProcessor::OnZWJState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
505     bool isBackward)
506 {
507     if (isBackward) {
508         switch (state) {
509             case STATE_ZWJ:
510                 if (IsEmoji(codePoint)) {
511                     ++deleteCount; // delete zwj
512                     ++deleteCount; // delete emoji
513                     state = IsEmojiModifier(codePoint) ? STATE_EM : STATE_EMOJI;
514                 } else if (IsVariationSelector(codePoint)) {
515                     lastVSCount = 1;
516                     state = STATE_VS_AND_ZWJ;
517                 } else {
518                     state = STATE_FINISHED;
519                 }
520                 break;
521             case STATE_VS_AND_ZWJ:
522                 if (IsEmoji(codePoint)) {
523                     ++deleteCount; // delete zwj
524                     ++deleteCount; // delete emoji
525                     deleteCount += lastVSCount;
526                     lastVSCount = 0;
527                     state = STATE_EMOJI;
528                 } else {
529                     state = STATE_FINISHED;
530                 }
531                 break;
532         }
533     } else {
534         if (IsEmoji(codePoint)) {
535             ++deleteCount;
536             state = STATE_EMOJI;
537         } else {
538             state = STATE_FINISHED;
539         }
540     }
541 }
542 
OnVSState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)543 void TextEmojiProcessor::OnVSState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
544 {
545     if (isBackward) {
546         if (IsEmoji(codePoint)) {
547             ++deleteCount;
548             state = STATE_EMOJI;
549             return;
550         }
551         if (!IsVariationSelector(codePoint) &&
552             u_getCombiningClass(codePoint) == 0) {
553             ++deleteCount;
554         }
555         state = STATE_FINISHED;
556     } else {
557         if (codePoint == ZERO_WIDTH_JOINER) {
558             ++deleteCount;
559             state = STATE_ZWJ;
560             return;
561         } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
562             ++deleteCount;
563             state = STATE_KEYCAP;
564             return;
565         }
566         state = STATE_FINISHED;
567     }
568 }
569 
OnKeyCapState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)570 void TextEmojiProcessor::OnKeyCapState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
571     bool isBackward)
572 {
573     if (isBackward) {
574         switch (state) {
575             case STATE_KEYCAP:
576                 if (IsVariationSelector(codePoint)) {
577                     lastVSCount = 1;
578                     state = STATE_VS_AND_KEYCAP;
579                     return;
580                 }
581                 if (IsEmojiModifierBase(codePoint)) {
582                     ++deleteCount;
583                     state = STATE_FINISHED;
584                 }
585                 break;
586             case STATE_VS_AND_KEYCAP:
587                 if (IsKeycapBase(codePoint)) {
588                     deleteCount += lastVSCount + 1;
589                 }
590                 state = STATE_FINISHED;
591                 break;
592         }
593     } else {
594         state = STATE_FINISHED;
595     }
596 }
597 
OnEMState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)598 void TextEmojiProcessor::OnEMState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
599     bool isBackward)
600 {
601     if (isBackward) {
602         switch (state) {
603             case STATE_EM:
604                 if (IsVariationSelector(codePoint)) {
605                     lastVSCount = 1;
606                     state = STATE_VS_AND_EM;
607                     return;
608                 } else if (IsEmojiModifierBase(codePoint)) {
609                     ++deleteCount;
610                 }
611                 state = STATE_FINISHED;
612                 break;
613             case STATE_VS_AND_EM:
614                 if (IsEmojiModifierBase(codePoint)) {
615                     deleteCount += lastVSCount + 1;
616                 }
617                 state = STATE_FINISHED;
618                 break;
619         }
620     } else {
621         if (IsEmoji(codePoint)) {
622             ++deleteCount;
623             state = STATE_EMOJI;
624             return;
625         } else if (IsVariationSelector(codePoint)) {
626             ++deleteCount;
627             state = STATE_VS;
628             return;
629         } else if (codePoint == ZERO_WIDTH_JOINER) {
630             ++deleteCount;
631             state = STATE_ZWJ;
632             return;
633         } else if (IsEmojiModifierBase(codePoint)) {
634             ++deleteCount;
635         }
636         state = STATE_FINISHED;
637     }
638 }
639 
OnEmojiState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)640 void TextEmojiProcessor::OnEmojiState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
641 {
642     if (isBackward) {
643         if (codePoint == ZERO_WIDTH_JOINER) {
644             state = STATE_ZWJ;
645         } else {
646             state = STATE_FINISHED;
647         }
648     } else {
649         if (codePoint == ZERO_WIDTH_JOINER) {
650             ++deleteCount;
651             state = STATE_ZWJ;
652         } else if (IsVariationSelector(codePoint)) {
653             ++deleteCount;
654             state = STATE_VS;
655         } else if (IsEmojiModifier(codePoint)) {
656             ++deleteCount;
657             state = STATE_EM;
658         } else if (IsTagSpec(codePoint)) {
659             ++deleteCount;
660             state = STATE_IN_TAG_QUEUE;
661         } else {
662             state = STATE_FINISHED;
663         }
664     }
665 }
666 
OnForwardSecondState(uint32_t codePoint,int32_t & state,int32_t & deleteCount)667 void TextEmojiProcessor::OnForwardSecondState(uint32_t codePoint, int32_t& state, int32_t& deleteCount)
668 {
669     if (IsVariationSelector(codePoint)) {
670         ++deleteCount;
671         state = STATE_VS;
672     } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
673         ++deleteCount;
674         state = STATE_KEYCAP;
675     } else {
676         state = STATE_FINISHED;
677     }
678 }
679 
OnTagQueueState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)680 void TextEmojiProcessor::OnTagQueueState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
681 {
682     if (isBackward) {
683         if (!IsTagSpec(codePoint)) {
684             state = STATE_FINISHED;
685         }
686         ++deleteCount;
687     } else {
688         if (IsTagSpec(codePoint)) {
689             ++deleteCount;
690         } else if (IsEmoji(codePoint)) {
691             state = STATE_FINISHED;
692         } else if (codePoint == CANCEL_TAG) {
693             ++deleteCount;
694             state = STATE_FINISHED;
695         } else {
696             ++deleteCount;
697             state = STATE_FINISHED;
698         }
699     }
700 }
701 
GetEmojiLengthAtEnd(const std::u32string & u32Content,bool isCountNonEmoji)702 int32_t TextEmojiProcessor::GetEmojiLengthAtEnd(const std::u32string& u32Content, bool isCountNonEmoji)
703 {
704     int32_t deleteCount = 0;
705     int32_t lastVSCount = 0;
706     int32_t state = STATE_BEGIN;
707     int32_t tempOffset = static_cast<int32_t>(u32Content.length()) - 1;
708     do {
709         uint32_t codePoint = u32Content[tempOffset];
710         tempOffset--;
711         switch (state) {
712             case STATE_BEGIN:
713                 OnBeginState(codePoint, state, deleteCount, true);
714                 if (!isCountNonEmoji && (state == STATE_FINISHED || state == STATE_SECOND)) {
715                     // avoid non-emoji
716                     return 0;
717                 }
718                 break;
719             case STATE_LF:
720                 OnCRLFState(codePoint, state, deleteCount, true);
721                 break;
722             case STATE_ODD_RIS:
723             case STATE_EVEN_RIS:
724                 OnRISState(codePoint, state, deleteCount, true);
725                 break;
726             case STATE_KEYCAP:
727             case STATE_VS_AND_KEYCAP:
728                 OnKeyCapState(codePoint, state, deleteCount, lastVSCount, true);
729                 break;
730             case STATE_EM:
731             case STATE_VS_AND_EM:
732                 OnEMState(codePoint, state, deleteCount, lastVSCount, true);
733                 break;
734             case STATE_VS:
735                 OnVSState(codePoint, state, deleteCount, true);
736                 break;
737             case STATE_EMOJI:
738                 OnEmojiState(codePoint, state, deleteCount, true);
739                 break;
740             case STATE_ZWJ:
741             case STATE_VS_AND_ZWJ:
742                 OnZWJState(codePoint, state, deleteCount, lastVSCount, true);
743                 break;
744             case STATE_IN_TAG_QUEUE:
745                 OnTagQueueState(codePoint, state, deleteCount, true);
746                 break;
747             default:
748                 break;
749         }
750     } while (tempOffset >= 0 && state != STATE_FINISHED);
751     return deleteCount;
752 }
753 
BackwardDelete(std::u32string & u32Content)754 bool TextEmojiProcessor::BackwardDelete(std::u32string& u32Content)
755 {
756     int32_t deleteCount = GetEmojiLengthAtEnd(u32Content, true);
757     return HandleDeleteAction(u32Content, deleteCount, true);
758 }
759 
GetEmojiLengthAtFront(const std::u32string & u32Content,bool isCountNonEmoji)760 int32_t TextEmojiProcessor::GetEmojiLengthAtFront(const std::u32string& u32Content, bool isCountNonEmoji)
761 {
762     int32_t deleteCount = 0;
763     int32_t state = STATE_BEGIN;
764     int32_t tempOffset = 0;
765     int32_t u32ContentLength = static_cast<int32_t>(u32Content.length());
766     do {
767         int32_t codePoint = static_cast<int32_t>(u32Content[tempOffset]);
768         tempOffset++;
769         switch (state) {
770             case STATE_BEGIN:
771                 OnBeginState(codePoint, state, deleteCount, false);
772                 if (!isCountNonEmoji && (state == STATE_FINISHED || state == STATE_SECOND)) {
773                     return 0;
774                 }
775                 break;
776             case STATE_SECOND:
777                 OnForwardSecondState(codePoint, state, deleteCount);
778                 break;
779             case STATE_CR:
780             case STATE_LF:
781                 OnCRLFState(codePoint, state, deleteCount, false);
782                 break;
783             case STATE_ODD_RIS:
784             case STATE_EVEN_RIS:
785                 OnRISState(codePoint, state, deleteCount, false);
786                 break;
787             case STATE_KEYCAP:
788                 OnKeyCapState(codePoint, state, deleteCount, deleteCount, false);
789                 // in ForwardDelete, we dont need to care about lastVSCount.
790                 // "Borrowing" deleteCount to lastVSCount, to avoiding the use of std::optional.
791                 // same as above
792                 break;
793             case STATE_EM:
794                 OnEMState(codePoint, state, deleteCount, deleteCount, false);
795                 break;
796             case STATE_VS:
797                 OnVSState(codePoint, state, deleteCount, false);
798                 break;
799             case STATE_EMOJI:
800                 OnEmojiState(codePoint, state, deleteCount, false);
801                 break;
802             case STATE_ZWJ:
803                 OnZWJState(codePoint, state, deleteCount, deleteCount, false);
804                 break;
805             case STATE_IN_TAG_QUEUE:
806                 OnTagQueueState(codePoint, state, deleteCount, false);
807                 break;
808             default:
809                 break;
810         }
811     } while (tempOffset < u32ContentLength && state != STATE_FINISHED);
812     return deleteCount;
813 }
814 
ForwardDelete(std::u32string & u32Content)815 bool TextEmojiProcessor::ForwardDelete(std::u32string& u32Content)
816 {
817     int32_t deleteCount = GetEmojiLengthAtFront(u32Content, true);
818     return HandleDeleteAction(u32Content, deleteCount, false);
819 }
820 
HandleDeleteAction(std::u32string & u32Content,int32_t deleteCount,bool isBackward)821 bool TextEmojiProcessor::HandleDeleteAction(std::u32string& u32Content, int32_t deleteCount, bool isBackward)
822 {
823     int32_t contentLength = static_cast<int32_t>(u32Content.length());
824     deleteCount = std::min(deleteCount, contentLength);
825     if (isBackward) {
826         if (deleteCount > 0) {
827             int32_t start = contentLength - deleteCount;
828             start = std::clamp(start, 0, static_cast<int32_t>(u32Content.length()));
829             u32Content.erase(start, deleteCount);
830             return true;
831         }
832     } else {
833         if (deleteCount > 0) {
834             u32Content.erase(0, deleteCount);
835             return true;
836         }
837     }
838     return false;
839 }
840 
841 } // namespace OHOS::Ace
842