1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include <limits>
16
17 #include "core/text/text_emoji_processor.h"
18
19 #include <unicode/uchar.h>
20
21 #include "base/utils/string_utils.h"
22 #include "base/utils/utils.h"
23 #include "base/utils/utf_helper.h"
24 #include "unicode/unistr.h"
25
26 namespace OHOS::Ace {
27 namespace {
28
29 constexpr int32_t LINE_FEED = 0x0A;
30 constexpr int32_t CARRIAGE_RETURN = 0x0D;
31 constexpr int32_t COMBINING_ENCLOSING_KEYCAP = 0x20E3;
32 constexpr int32_t ZERO_WIDTH_JOINER = 0x200D;
33 constexpr int32_t CANCEL_TAG = 0xE007F;
34 constexpr int32_t STATE_BEGIN = 0;
35 constexpr int32_t STATE_SECOND = 1;
36 constexpr int32_t STATE_EM = 2;
37 constexpr int32_t STATE_VS_AND_KEYCAP = 3;
38 constexpr int32_t STATE_ZWJ = 4;
39 constexpr int32_t STATE_KEYCAP = 5;
40 constexpr int32_t STATE_EMOJI = 6;
41 constexpr int32_t STATE_VS_AND_EM = 7;
42 constexpr int32_t STATE_VS = 8;
43 constexpr int32_t STATE_VS_AND_ZWJ = 9;
44 constexpr int32_t STATE_LF = 10;
45 constexpr int32_t STATE_CR = 11;
46 constexpr int32_t STATE_IN_TAG_QUEUE = 12;
47 constexpr int32_t STATE_EVEN_RIS = 13;
48 constexpr int32_t STATE_ODD_RIS = 14;
49 constexpr int32_t STATE_FINISHED = 20;
50 constexpr int32_t MAX_INT = std::numeric_limits<int32_t>::max();
51
AddAndPreventOverflow(int32_t a,int32_t b)52 int32_t AddAndPreventOverflow(int32_t a, int32_t b)
53 {
54 long tempA = static_cast<long>(a);
55 long tempB = static_cast<long>(b);
56 long ret = tempA + tempB;
57 if (ret > static_cast<long>(MAX_INT)) {
58 return MAX_INT;
59 } else if (ret < -static_cast<long>(MAX_INT)) {
60 return -MAX_INT;
61 } else {
62 return static_cast<int32_t>(ret);
63 }
64 }
65
66 } // namespace
67
Delete(int32_t startIndex,int32_t length,std::u16string & content,bool isBackward)68 int32_t TextEmojiProcessor::Delete(int32_t startIndex, int32_t length, std::u16string& content, bool isBackward)
69 {
70 std::u16string u16 = content;
71 // startIndex from selectController_->GetCaretIndex() is an utf-16 index
72 // so we need an u16string to get the correct index
73 std::u16string remainString = u"";
74 std::u32string u32ContentToDelete;
75 if (startIndex < 0 || length < 0 || u16.length() < unsigned(startIndex)) {
76 return 0;
77 }
78 uint32_t substrLength = u16.length() - unsigned(startIndex);
79 if (isBackward) {
80 if (startIndex == static_cast<int32_t>(u16.length())) {
81 u32ContentToDelete = UtfUtils::Str16ToStr32(content);
82 } else {
83 startIndex = std::clamp(startIndex, 0, static_cast<int32_t>(u16.length()));
84 remainString = u16.substr(startIndex, substrLength);
85 std::u16string temp = u16.substr(0, startIndex);
86 u32ContentToDelete = UtfUtils::Str16ToStr32(temp);
87 }
88 if (u32ContentToDelete.length() == 0) {
89 return 0;
90 }
91 for (int32_t i = 0; i < length; i++) {
92 if (!BackwardDelete(u32ContentToDelete)) {
93 break;
94 }
95 }
96 content = UtfUtils::Str32ToStr16(u32ContentToDelete) + remainString;
97 } else {
98 if (startIndex == 0) {
99 u32ContentToDelete = UtfUtils::Str16ToStr32(content);
100 } else {
101 startIndex = std::clamp(startIndex, 0, static_cast<int32_t>(u16.length()));
102 remainString = u16.substr(0, startIndex);
103 std::u16string temp = u16.substr(startIndex, substrLength);
104 u32ContentToDelete = UtfUtils::Str16ToStr32(temp);
105 }
106 if (u32ContentToDelete.length() == 0) {
107 return 0;
108 }
109 for (int32_t i = 0; i < length; i++) {
110 if (!ForwardDelete(u32ContentToDelete)) {
111 break;
112 }
113 }
114 content = remainString + UtfUtils::Str32ToStr16(u32ContentToDelete);
115 }
116 // we need length to update the cursor
117 int32_t deletedLength = static_cast<int32_t>(u16.length() - content.length());
118 return deletedLength;
119 }
120
IsIndexInEmoji(int32_t index,const std::u16string & content,int32_t & startIndex,int32_t & endIndex)121 bool TextEmojiProcessor::IsIndexInEmoji(int32_t index,
122 const std::u16string& content, int32_t& startIndex, int32_t& endIndex)
123 {
124 int32_t emojiStartIndex;
125 int32_t emojiEndIndex;
126 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
127 if (relation == EmojiRelation::IN_EMOJI) {
128 startIndex = emojiStartIndex;
129 endIndex = emojiEndIndex;
130 return true;
131 }
132 startIndex = index;
133 endIndex = index;
134 return false;
135 }
136
GetCharacterNum(const std::string & content)137 int32_t TextEmojiProcessor::GetCharacterNum(const std::string& content)
138 {
139 CHECK_NULL_RETURN(!content.empty(), 0);
140 std::u16string u16Content = StringUtils::Str8ToStr16(content);
141 return GetCharacterNum(u16Content);
142 }
143
GetCharacterNum(const std::u16string & u16Content)144 int32_t TextEmojiProcessor::GetCharacterNum(const std::u16string& u16Content)
145 {
146 CHECK_NULL_RETURN(!u16Content.empty(), 0);
147 int32_t charNum = 0;
148 int32_t pos = 0;
149 while (pos < static_cast<int32_t>(u16Content.length())) {
150 std::u32string u32Content;
151 int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, pos, u16Content);
152 if (forwardLenU16 > 1) {
153 // emoji exsit
154 pos += forwardLenU16;
155 } else {
156 // char after pos is not emoji, move one pos forward
157 pos++;
158 }
159 charNum++;
160 }
161 TAG_LOGI(AceLogTag::ACE_RICH_TEXT, "ByteNumToCharNum u16contentLen=%{public}zu pos=%{public}d charNum=%{public}d",
162 u16Content.length(), pos, charNum);
163 return charNum;
164 }
165
GetIndexRelationToEmoji(int32_t index,const std::u16string & u16Content,int32_t & startIndex,int32_t & endIndex)166 EmojiRelation TextEmojiProcessor::GetIndexRelationToEmoji(int32_t index,
167 const std::u16string& u16Content, int32_t& startIndex, int32_t& endIndex)
168 {
169 endIndex = index;
170 startIndex = index;
171 if (index < 0 || index > static_cast<int32_t>(u16Content.length())) {
172 return EmojiRelation::NO_EMOJI;
173 }
174 std::u32string u32Content;
175 int32_t backwardLen = GetEmojiLengthBackward(u32Content, index, u16Content);
176
177 int32_t emojiBackwardLengthU16 = 0;
178 if (backwardLen > 0) {
179 int32_t u32Length = static_cast<int32_t>(u32Content.length());
180 auto subIndex = u32Length - backwardLen;
181 subIndex = std::clamp(subIndex, 0, static_cast<int32_t>(u32Content.length()));
182 std::u16string tempstr = UtfUtils::Str32ToStr16(u32Content.substr(subIndex));
183 emojiBackwardLengthU16 = static_cast<int32_t>(tempstr.length());
184 index -= emojiBackwardLengthU16;
185 emojiBackwardLengthU16 = endIndex - index; // calculate length of the part of emoji
186 }
187
188 // get the whole emoji from the new start
189 int32_t emojiForwardLengthU16 = GetEmojiLengthU16Forward(u32Content, index, u16Content);
190 TAG_LOGD(AceLogTag::ACE_RICH_TEXT, "emojiBackwardLengthU16=%{public}d emojiForwardLengthU16=%{public}d",
191 emojiBackwardLengthU16, emojiForwardLengthU16);
192 if (emojiBackwardLengthU16 > 0 && emojiForwardLengthU16 > emojiBackwardLengthU16) {
193 // forward length is larget than backward one, which means the startIndex is inside one emoji
194 endIndex = index + emojiForwardLengthU16;
195 startIndex = index;
196 return EmojiRelation::IN_EMOJI;
197 } else if (emojiBackwardLengthU16 == 0 && emojiForwardLengthU16 > 1) {
198 if (index > 0 && u16Content[index - 1] == u'\u200D') {
199 return EmojiRelation::IN_EMOJI;
200 }
201 return EmojiRelation::BEFORE_EMOJI;
202 } else if (emojiBackwardLengthU16 > 1 && emojiBackwardLengthU16 == emojiForwardLengthU16) {
203 // emoji exists before index
204 int32_t newStartIndex = index + emojiForwardLengthU16;
205 int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, newStartIndex, u16Content);
206 if (forwardLenU16 > 1) {
207 // forwardLenU16 > 1 means a real emoji is found
208 return EmojiRelation::MIDDLE_EMOJI;
209 } else {
210 return EmojiRelation::AFTER_EMOJI;
211 }
212 } else if (emojiBackwardLengthU16 == 1 && emojiBackwardLengthU16 == emojiForwardLengthU16) {
213 // no emoji before index
214 int32_t newStartIndex = index + emojiForwardLengthU16;
215 int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, newStartIndex, u16Content);
216 if (forwardLenU16 > 1) {
217 // forwardLenU16 > 1 means a real emoji is found
218 return EmojiRelation::BEFORE_EMOJI;
219 }
220 }
221 return EmojiRelation::NO_EMOJI;
222 }
223
IsIndexBeforeOrInEmoji(int32_t index,const std::u16string & content)224 bool TextEmojiProcessor::IsIndexBeforeOrInEmoji(int32_t index, const std::u16string& content)
225 {
226 int32_t emojiStartIndex;
227 int32_t emojiEndIndex;
228 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
229 return relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::BEFORE_EMOJI
230 || relation == EmojiRelation::MIDDLE_EMOJI;
231 }
232
IsIndexAfterOrInEmoji(int32_t index,const std::u16string & content)233 bool TextEmojiProcessor::IsIndexAfterOrInEmoji(int32_t index, const std::u16string& content)
234 {
235 int32_t emojiStartIndex;
236 int32_t emojiEndIndex;
237 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
238 return relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::AFTER_EMOJI
239 || relation == EmojiRelation::MIDDLE_EMOJI;
240 }
241
IsIndexBeforeOrInEmoji(int32_t index,const std::u16string & content,int32_t & startIndex,int32_t & endIndex)242 bool TextEmojiProcessor::IsIndexBeforeOrInEmoji(int32_t index, const std::u16string& content,
243 int32_t& startIndex, int32_t& endIndex)
244 {
245 int32_t emojiStartIndex;
246 int32_t emojiEndIndex;
247 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
248 if (relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::BEFORE_EMOJI
249 || relation == EmojiRelation::MIDDLE_EMOJI) {
250 startIndex = emojiStartIndex;
251 endIndex = emojiEndIndex;
252 return true;
253 }
254 startIndex = index;
255 endIndex = index;
256 return false;
257 }
258
IsIndexAfterOrInEmoji(int32_t index,const std::u16string & content,int32_t & startIndex,int32_t & endIndex)259 bool TextEmojiProcessor::IsIndexAfterOrInEmoji(int32_t index, const std::u16string& content,
260 int32_t& startIndex, int32_t& endIndex)
261 {
262 int32_t emojiStartIndex;
263 int32_t emojiEndIndex;
264 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
265 if (relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::AFTER_EMOJI
266 || relation == EmojiRelation::MIDDLE_EMOJI) {
267 startIndex = emojiStartIndex;
268 endIndex = emojiEndIndex;
269 return true;
270 }
271 startIndex = index;
272 endIndex = index;
273 return false;
274 }
275
SubU16string(int32_t index,int32_t length,const std::u16string & content,bool includeStartHalf,bool includeEndHalf)276 std::u16string TextEmojiProcessor::SubU16string(
277 int32_t index, int32_t length, const std::u16string& content, bool includeStartHalf, bool includeEndHalf)
278 {
279 TextEmojiSubStringRange range = CalSubU16stringRange(index, length, content, includeStartHalf, includeEndHalf);
280 int32_t rangeLength = range.endIndex - range.startIndex;
281 if (rangeLength == 0) {
282 return u"";
283 }
284 range.startIndex = std::clamp(range.startIndex, 0, static_cast<int32_t>(content.length()));
285 return content.substr(static_cast<uint32_t>(range.startIndex), static_cast<uint32_t>(rangeLength));
286 }
287
CalSubU16stringRange(int32_t index,int32_t length,const std::u16string & content,bool includeStartHalf,bool includeEndHalf)288 TextEmojiSubStringRange TextEmojiProcessor::CalSubU16stringRange(
289 int32_t index, int32_t length, const std::u16string& content, bool includeStartHalf, bool includeEndHalf)
290 {
291 int32_t startIndex = index;
292 int32_t endIndex = AddAndPreventOverflow(index, length);
293 int32_t emojiStartIndex = index; // [emojiStartIndex, emojiEndIndex)
294 int32_t emojiEndIndex = index;
295 // need to be converted to string for processing
296 // IsIndexBeforeOrInEmoji and IsIndexAfterOrInEmoji is working for string
297 // exclude right overflow emoji
298 if (!includeEndHalf && IsIndexInEmoji(endIndex - 1, content, emojiStartIndex, emojiEndIndex) &&
299 emojiEndIndex > AddAndPreventOverflow(index, length)) {
300 emojiEndIndex = emojiStartIndex;
301 length = emojiEndIndex - index;
302 length = std::max(length, 0);
303 endIndex = AddAndPreventOverflow(index, length);
304 }
305 // process left emoji
306 if (IsIndexBeforeOrInEmoji(startIndex, content, emojiStartIndex, emojiEndIndex)) {
307 if (startIndex != emojiStartIndex && !includeStartHalf) {
308 startIndex = emojiEndIndex; // exclude current emoji
309 }
310 if (startIndex != emojiStartIndex && includeStartHalf) {
311 startIndex = emojiStartIndex; // include current emoji
312 }
313 }
314 // process right emoji
315 if (IsIndexAfterOrInEmoji(endIndex, content, emojiStartIndex, emojiEndIndex)) {
316 if (endIndex != emojiEndIndex && !includeEndHalf) {
317 endIndex = emojiStartIndex; // exclude current emoji
318 }
319 if (endIndex != emojiEndIndex && includeEndHalf) {
320 endIndex = emojiEndIndex; // include current emoji
321 }
322 }
323 TextEmojiSubStringRange result = { startIndex, endIndex };
324 return result;
325 }
326
ConvertU8stringUnpairedSurrogates(const std::string & value)327 std::string TextEmojiProcessor::ConvertU8stringUnpairedSurrogates(const std::string& value)
328 {
329 // Unpaired surrogates are replaced with U+FFFD
330 icu::UnicodeString ustring = icu::UnicodeString::fromUTF8(value);
331 std::string result;
332 ustring.toUTF8String(result);
333 return result;
334 }
335
GetEmojiLengthBackward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)336 int32_t TextEmojiProcessor::GetEmojiLengthBackward(std::u32string& u32Content,
337 int32_t& startIndex, const std::u16string& u16Content)
338 {
339 if (startIndex <= 0 || startIndex > static_cast<int32_t>(u16Content.length())) {
340 return 0;
341 }
342 do {
343 if (!UtfUtils::IsIndexInPairedSurrogates(startIndex, u16Content)) {
344 break;
345 }
346 ++startIndex;
347 } while (1);
348 std::u16string temp = u16Content.substr(0, static_cast<uint32_t>(startIndex));
349 u32Content = UtfUtils::Str16ToStr32(temp);
350 return GetEmojiLengthAtEnd(u32Content, false);
351 }
352
GetEmojiLengthU16Forward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)353 int32_t TextEmojiProcessor::GetEmojiLengthU16Forward(std::u32string& u32Content,
354 int32_t& startIndex, const std::u16string& u16Content)
355 {
356 int32_t forwardLen = GetEmojiLengthForward(u32Content, startIndex, u16Content);
357 if (u32Content.empty()) {
358 return 0;
359 }
360 return UtfUtils::Str32ToStr16(u32Content.substr(0, forwardLen)).length();
361 }
362
GetEmojiLengthForward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)363 int32_t TextEmojiProcessor::GetEmojiLengthForward(std::u32string& u32Content,
364 int32_t& startIndex, const std::u16string& u16Content)
365 {
366 if (startIndex >= static_cast<int32_t>(u16Content.length())) {
367 return 0;
368 }
369 do {
370 if (!UtfUtils::IsIndexInPairedSurrogates(startIndex, u16Content)) {
371 break;
372 }
373 --startIndex;
374 } while (1);
375 startIndex = std::clamp(startIndex, 0, static_cast<int32_t>(u16Content.length()));
376 std::u16string temp = u16Content.substr(startIndex, u16Content.length() - startIndex);
377 u32Content = UtfUtils::Str16ToStr32(temp);
378 return GetEmojiLengthAtFront(u32Content, false);
379 }
380
IsEmojiModifierBase(uint32_t codePoint)381 bool TextEmojiProcessor::IsEmojiModifierBase(uint32_t codePoint)
382 {
383 // according to the https://unicode.org/Public/emoji/4.0/emoji-data.txt
384 // emoji 4.0 removed 0x1F91D(HANDSHAKE) and 0x1F93C(WRESTLERS) from the emoji modifier base
385 // to handle with the compatibility, we need to add them back
386 if (codePoint == 0x1F91D || codePoint == 0x1F93C) {
387 return true;
388 }
389 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI_MODIFIER_BASE);
390 }
391
IsVariationSelector(uint32_t codePoint)392 bool TextEmojiProcessor::IsVariationSelector(uint32_t codePoint)
393 {
394 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_VARIATION_SELECTOR);
395 }
396
IsRegionalIndicatorSymbol(uint32_t codePoint)397 bool TextEmojiProcessor::IsRegionalIndicatorSymbol(uint32_t codePoint)
398 {
399 return u_hasBinaryProperty(codePoint, UCHAR_REGIONAL_INDICATOR);
400 }
401
IsEmoji(uint32_t codePoint)402 bool TextEmojiProcessor::IsEmoji(uint32_t codePoint)
403 {
404 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI);
405 }
406
IsEmojiModifier(uint32_t codePoint)407 bool TextEmojiProcessor::IsEmojiModifier(uint32_t codePoint)
408 {
409 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI_MODIFIER);
410 }
411
IsTagSpec(uint32_t codePoint)412 bool TextEmojiProcessor::IsTagSpec(uint32_t codePoint)
413 {
414 // according to the https://www.unicode.org/charts/PDF/U0000.pdf
415 // 0xE0020 - 0xE007E are the visible tag specs.
416 // 0xE007F is CANCEL_TAG, not in here.
417 return 0xE0020 <= codePoint && codePoint <= 0xE007E;
418 }
419
IsKeycapBase(uint32_t codePoint)420 bool TextEmojiProcessor::IsKeycapBase(uint32_t codePoint)
421 {
422 return ('0' <= codePoint && codePoint <= '9') || codePoint == '#' || codePoint == '*';
423 }
424
OnBeginState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)425 void TextEmojiProcessor::OnBeginState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
426 {
427 deleteCount = 1;
428 if (codePoint == LINE_FEED) {
429 state = STATE_LF;
430 } else if (IsVariationSelector(codePoint)) { // only backward
431 state = STATE_VS;
432 } else if (codePoint == CARRIAGE_RETURN) { // only forward
433 state = STATE_CR;
434 } else if (IsRegionalIndicatorSymbol(codePoint)) {
435 state = isBackward ? STATE_ODD_RIS : STATE_EVEN_RIS;
436 } else if (IsEmojiModifier(codePoint)) {
437 state = STATE_EM;
438 } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
439 state = STATE_KEYCAP;
440 } else if (IsEmoji(codePoint)) {
441 state = STATE_EMOJI;
442 } else if (codePoint == CANCEL_TAG) {
443 state = STATE_IN_TAG_QUEUE;
444 } else {
445 state = isBackward ? STATE_FINISHED : STATE_SECOND;
446 }
447 }
448
OnRISState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)449 void TextEmojiProcessor::OnRISState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
450 {
451 if (isBackward) {
452 switch (state) {
453 case STATE_ODD_RIS:
454 if (IsRegionalIndicatorSymbol(codePoint)) {
455 ++deleteCount;
456 state = STATE_EVEN_RIS;
457 } else {
458 state = STATE_FINISHED;
459 }
460 break;
461 case STATE_EVEN_RIS:
462 if (IsRegionalIndicatorSymbol(codePoint)) {
463 state = STATE_FINISHED;
464 }
465 break;
466 }
467 } else {
468 switch (state) {
469 case STATE_ODD_RIS:
470 state = STATE_FINISHED;
471 break;
472 case STATE_EVEN_RIS:
473 if (IsRegionalIndicatorSymbol(codePoint)) {
474 ++deleteCount;
475 state = STATE_ODD_RIS;
476 } else {
477 state = STATE_FINISHED;
478 }
479 break;
480 }
481 }
482 }
483
OnCRLFState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)484 void TextEmojiProcessor::OnCRLFState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
485 {
486 if (isBackward) {
487 if (codePoint == CARRIAGE_RETURN) {
488 ++deleteCount;
489 }
490 state = STATE_FINISHED;
491 } else {
492 switch (state) {
493 case STATE_CR:
494 if (codePoint == LINE_FEED) {
495 ++deleteCount;
496 }
497 state = STATE_FINISHED;
498 break;
499 case STATE_LF:
500 state = STATE_FINISHED;
501 break;
502 }
503 }
504 }
505
OnZWJState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)506 void TextEmojiProcessor::OnZWJState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
507 bool isBackward)
508 {
509 if (isBackward) {
510 switch (state) {
511 case STATE_ZWJ:
512 if (IsEmoji(codePoint)) {
513 ++deleteCount; // delete zwj
514 ++deleteCount; // delete emoji
515 state = IsEmojiModifier(codePoint) ? STATE_EM : STATE_EMOJI;
516 } else if (IsVariationSelector(codePoint)) {
517 lastVSCount = 1;
518 state = STATE_VS_AND_ZWJ;
519 } else {
520 state = STATE_FINISHED;
521 }
522 break;
523 case STATE_VS_AND_ZWJ:
524 if (IsEmoji(codePoint)) {
525 ++deleteCount; // delete zwj
526 ++deleteCount; // delete emoji
527 deleteCount += lastVSCount;
528 lastVSCount = 0;
529 state = STATE_EMOJI;
530 } else {
531 state = STATE_FINISHED;
532 }
533 break;
534 }
535 } else {
536 if (IsEmoji(codePoint)) {
537 ++deleteCount;
538 state = STATE_EMOJI;
539 } else {
540 state = STATE_FINISHED;
541 }
542 }
543 }
544
OnVSState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)545 void TextEmojiProcessor::OnVSState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
546 {
547 if (isBackward) {
548 if (IsEmoji(codePoint)) {
549 ++deleteCount;
550 state = STATE_EMOJI;
551 return;
552 }
553 if (!IsVariationSelector(codePoint) &&
554 u_getCombiningClass(codePoint) == 0) {
555 ++deleteCount;
556 }
557 state = STATE_FINISHED;
558 } else {
559 if (codePoint == ZERO_WIDTH_JOINER) {
560 ++deleteCount;
561 state = STATE_ZWJ;
562 return;
563 } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
564 ++deleteCount;
565 state = STATE_KEYCAP;
566 return;
567 }
568 state = STATE_FINISHED;
569 }
570 }
571
OnKeyCapState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)572 void TextEmojiProcessor::OnKeyCapState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
573 bool isBackward)
574 {
575 if (isBackward) {
576 switch (state) {
577 case STATE_KEYCAP:
578 if (IsVariationSelector(codePoint)) {
579 lastVSCount = 1;
580 state = STATE_VS_AND_KEYCAP;
581 return;
582 }
583 if (IsEmojiModifierBase(codePoint)) {
584 ++deleteCount;
585 state = STATE_FINISHED;
586 }
587 break;
588 case STATE_VS_AND_KEYCAP:
589 if (IsKeycapBase(codePoint)) {
590 deleteCount += lastVSCount + 1;
591 }
592 state = STATE_FINISHED;
593 break;
594 }
595 } else {
596 state = STATE_FINISHED;
597 }
598 }
599
OnEMState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)600 void TextEmojiProcessor::OnEMState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
601 bool isBackward)
602 {
603 if (isBackward) {
604 switch (state) {
605 case STATE_EM:
606 if (IsVariationSelector(codePoint)) {
607 lastVSCount = 1;
608 state = STATE_VS_AND_EM;
609 return;
610 } else if (IsEmojiModifierBase(codePoint)) {
611 ++deleteCount;
612 }
613 state = STATE_FINISHED;
614 break;
615 case STATE_VS_AND_EM:
616 if (IsEmojiModifierBase(codePoint)) {
617 deleteCount += lastVSCount + 1;
618 }
619 state = STATE_FINISHED;
620 break;
621 }
622 } else {
623 if (IsEmoji(codePoint)) {
624 ++deleteCount;
625 state = STATE_EMOJI;
626 return;
627 } else if (IsVariationSelector(codePoint)) {
628 ++deleteCount;
629 state = STATE_VS;
630 return;
631 } else if (codePoint == ZERO_WIDTH_JOINER) {
632 ++deleteCount;
633 state = STATE_ZWJ;
634 return;
635 } else if (IsEmojiModifierBase(codePoint)) {
636 ++deleteCount;
637 }
638 state = STATE_FINISHED;
639 }
640 }
641
OnEmojiState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)642 void TextEmojiProcessor::OnEmojiState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
643 {
644 if (isBackward) {
645 if (codePoint == ZERO_WIDTH_JOINER) {
646 state = STATE_ZWJ;
647 } else {
648 state = STATE_FINISHED;
649 }
650 } else {
651 if (codePoint == ZERO_WIDTH_JOINER) {
652 ++deleteCount;
653 state = STATE_ZWJ;
654 } else if (IsVariationSelector(codePoint)) {
655 ++deleteCount;
656 state = STATE_VS;
657 } else if (IsEmojiModifier(codePoint)) {
658 ++deleteCount;
659 state = STATE_EM;
660 } else if (IsTagSpec(codePoint)) {
661 ++deleteCount;
662 state = STATE_IN_TAG_QUEUE;
663 } else {
664 state = STATE_FINISHED;
665 }
666 }
667 }
668
OnForwardSecondState(uint32_t codePoint,int32_t & state,int32_t & deleteCount)669 void TextEmojiProcessor::OnForwardSecondState(uint32_t codePoint, int32_t& state, int32_t& deleteCount)
670 {
671 if (IsVariationSelector(codePoint)) {
672 ++deleteCount;
673 state = STATE_VS;
674 } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
675 ++deleteCount;
676 state = STATE_KEYCAP;
677 } else {
678 state = STATE_FINISHED;
679 }
680 }
681
OnTagQueueState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)682 void TextEmojiProcessor::OnTagQueueState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
683 {
684 if (isBackward) {
685 if (!IsTagSpec(codePoint)) {
686 state = STATE_FINISHED;
687 }
688 ++deleteCount;
689 } else {
690 if (IsTagSpec(codePoint)) {
691 ++deleteCount;
692 } else if (IsEmoji(codePoint)) {
693 state = STATE_FINISHED;
694 } else if (codePoint == CANCEL_TAG) {
695 ++deleteCount;
696 state = STATE_FINISHED;
697 } else {
698 ++deleteCount;
699 state = STATE_FINISHED;
700 }
701 }
702 }
703
GetEmojiLengthAtEnd(const std::u32string & u32Content,bool isCountNonEmoji)704 int32_t TextEmojiProcessor::GetEmojiLengthAtEnd(const std::u32string& u32Content, bool isCountNonEmoji)
705 {
706 int32_t deleteCount = 0;
707 int32_t lastVSCount = 0;
708 int32_t state = STATE_BEGIN;
709 int32_t tempOffset = static_cast<int32_t>(u32Content.length()) - 1;
710 do {
711 uint32_t codePoint = u32Content[tempOffset];
712 tempOffset--;
713 switch (state) {
714 case STATE_BEGIN:
715 OnBeginState(codePoint, state, deleteCount, true);
716 if (!isCountNonEmoji && (state == STATE_FINISHED || state == STATE_SECOND)) {
717 // avoid non-emoji
718 return 0;
719 }
720 break;
721 case STATE_LF:
722 OnCRLFState(codePoint, state, deleteCount, true);
723 break;
724 case STATE_ODD_RIS:
725 case STATE_EVEN_RIS:
726 OnRISState(codePoint, state, deleteCount, true);
727 break;
728 case STATE_KEYCAP:
729 case STATE_VS_AND_KEYCAP:
730 OnKeyCapState(codePoint, state, deleteCount, lastVSCount, true);
731 break;
732 case STATE_EM:
733 case STATE_VS_AND_EM:
734 OnEMState(codePoint, state, deleteCount, lastVSCount, true);
735 break;
736 case STATE_VS:
737 OnVSState(codePoint, state, deleteCount, true);
738 break;
739 case STATE_EMOJI:
740 OnEmojiState(codePoint, state, deleteCount, true);
741 break;
742 case STATE_ZWJ:
743 case STATE_VS_AND_ZWJ:
744 OnZWJState(codePoint, state, deleteCount, lastVSCount, true);
745 break;
746 case STATE_IN_TAG_QUEUE:
747 OnTagQueueState(codePoint, state, deleteCount, true);
748 break;
749 default:
750 break;
751 }
752 } while (tempOffset >= 0 && state != STATE_FINISHED);
753 return deleteCount;
754 }
755
BackwardDelete(std::u32string & u32Content)756 bool TextEmojiProcessor::BackwardDelete(std::u32string& u32Content)
757 {
758 int32_t deleteCount = GetEmojiLengthAtEnd(u32Content, true);
759 return HandleDeleteAction(u32Content, deleteCount, true);
760 }
761
GetEmojiLengthAtFront(const std::u32string & u32Content,bool isCountNonEmoji)762 int32_t TextEmojiProcessor::GetEmojiLengthAtFront(const std::u32string& u32Content, bool isCountNonEmoji)
763 {
764 int32_t deleteCount = 0;
765 int32_t state = STATE_BEGIN;
766 int32_t tempOffset = 0;
767 int32_t u32ContentLength = static_cast<int32_t>(u32Content.length());
768 do {
769 int32_t codePoint = static_cast<int32_t>(u32Content[tempOffset]);
770 tempOffset++;
771 switch (state) {
772 case STATE_BEGIN:
773 OnBeginState(codePoint, state, deleteCount, false);
774 if (!isCountNonEmoji && (state == STATE_FINISHED || state == STATE_SECOND)) {
775 return 0;
776 }
777 break;
778 case STATE_SECOND:
779 OnForwardSecondState(codePoint, state, deleteCount);
780 break;
781 case STATE_CR:
782 case STATE_LF:
783 OnCRLFState(codePoint, state, deleteCount, false);
784 break;
785 case STATE_ODD_RIS:
786 case STATE_EVEN_RIS:
787 OnRISState(codePoint, state, deleteCount, false);
788 break;
789 case STATE_KEYCAP:
790 OnKeyCapState(codePoint, state, deleteCount, deleteCount, false);
791 // in ForwardDelete, we dont need to care about lastVSCount.
792 // "Borrowing" deleteCount to lastVSCount, to avoiding the use of std::optional.
793 // same as above
794 break;
795 case STATE_EM:
796 OnEMState(codePoint, state, deleteCount, deleteCount, false);
797 break;
798 case STATE_VS:
799 OnVSState(codePoint, state, deleteCount, false);
800 break;
801 case STATE_EMOJI:
802 OnEmojiState(codePoint, state, deleteCount, false);
803 break;
804 case STATE_ZWJ:
805 OnZWJState(codePoint, state, deleteCount, deleteCount, false);
806 break;
807 case STATE_IN_TAG_QUEUE:
808 OnTagQueueState(codePoint, state, deleteCount, false);
809 break;
810 default:
811 break;
812 }
813 } while (tempOffset < u32ContentLength && state != STATE_FINISHED);
814 return deleteCount;
815 }
816
ForwardDelete(std::u32string & u32Content)817 bool TextEmojiProcessor::ForwardDelete(std::u32string& u32Content)
818 {
819 int32_t deleteCount = GetEmojiLengthAtFront(u32Content, true);
820 return HandleDeleteAction(u32Content, deleteCount, false);
821 }
822
HandleDeleteAction(std::u32string & u32Content,int32_t deleteCount,bool isBackward)823 bool TextEmojiProcessor::HandleDeleteAction(std::u32string& u32Content, int32_t deleteCount, bool isBackward)
824 {
825 int32_t contentLength = static_cast<int32_t>(u32Content.length());
826 deleteCount = std::min(deleteCount, contentLength);
827 if (isBackward) {
828 if (deleteCount > 0) {
829 int32_t start = contentLength - deleteCount;
830 start = std::clamp(start, 0, static_cast<int32_t>(u32Content.length()));
831 u32Content.erase(start, deleteCount);
832 return true;
833 }
834 } else {
835 if (deleteCount > 0) {
836 u32Content.erase(0, deleteCount);
837 return true;
838 }
839 }
840 return false;
841 }
842
843 } // namespace OHOS::Ace
844