• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/ecma_string-inl.h"
17 #include "ecmascript/object_factory.h"
18 #include "ecmascript/tests/ecma_test_common.h"
19 
20 using namespace panda::ecmascript;
21 
22 namespace panda::test {
23 class EcmaStringEqualsTest : public BaseTestWithScope<false> {
24     public:
IsUtf8EqualsUtf16UT(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)25         static bool IsUtf8EqualsUtf16UT(const uint8_t *utf8Data, size_t utf8Len,
26                                         const uint16_t *utf16Data, uint32_t utf16Len)
27         {
28             return  EcmaString::IsUtf8EqualsUtf16(utf8Data, utf8Len, utf16Data, utf16Len);
29         }
30 };
31 
32 /*
33 * @tc.name: IsUtf8EqualsUtf16
34 * @tc.desc: Test a function that compares whether an array of UTF8 characters
35 * is equal to an array of UTF16 characters
36 * @tc.type: FUNC
37 */
HWTEST_F_L0(EcmaStringEqualsTest,IsUtf8EqualsUtf16)38 HWTEST_F_L0(EcmaStringEqualsTest, IsUtf8EqualsUtf16)
39 {
40     // Test case 1: ASCII characters
41     const uint8_t utf8_01[] = "hello";  // "hello" in ASCII is valid UTF-8
42     const uint16_t utf16_01[] = {'h', 'e', 'l', 'l', 'o'};
43     EXPECT_TRUE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_01, 5, utf16_01, 5));
44 
45     // Test case 2: 2-byte UTF-8 sequences
46     const uint8_t utf8_02[] = {0xC3, 0xA9, 0xC3, 0xA8}; // "éè" in UTF-8
47     const uint16_t utf16_02[] = {0x00E9, 0x00E8}; // "éè" in UTF-16
48     EXPECT_TRUE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_02, 4, utf16_02, 2));
49 
50     // Test case 3: 3-byte UTF-8 sequences
51     const uint8_t utf8_03[] = {0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87}; // "中文" in UTF-8
52     const uint16_t utf16_03[] = {0x4E2D, 0x6587}; // "中文" in UTF-16
53     EXPECT_TRUE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_03, 6, utf16_03, 2));
54 
55     // Test case 4: 4-byte UTF-8 sequences
56     const uint8_t utf8_04[] = {0xF0, 0x9F, 0x98, 0x81}; // �� in UTF-8
57     const uint16_t utf16_04[] = {0xD83D, 0xDE01}; // �� in UTF-16 (surrogate pair)
58     EXPECT_TRUE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_04, 4, utf16_04, 2));
59 
60     // Test case 5: UTF-16 edge cases (empty strings)
61     const uint8_t *utf8_05 = nullptr;
62     const uint16_t *utf16_05 = nullptr;
63     EXPECT_TRUE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_05, 0, utf16_05, 0));
64 
65     // Test case 6: UTF-8 shorter than UTF-16
66     const uint8_t utf8_06[] = "test"; // "test" in ASCII
67     const uint16_t utf16_06[] = {'t', 'e', 's', 't', '!', '!'};
68     EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_06, 4, utf16_06, 6));
69 
70     // Test case 7: UTF-8 longer than UTF-16
71     const uint8_t utf8_07[] = {0xF0, 0x9F, 0x98, 0x81, 0xF0, 0x9F, 0x98, 0x81}; // ���� in UTF-8
72     const uint16_t utf16_07[] = {0xD83D, 0xDE01}; // �� in UTF-16 (surrogate pair)
73     EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_07, 8, utf16_07, 2));
74 
75     // Test case 8: Incomplete surrogate pair in UTF-16
76     const uint8_t utf8_08[] = {0xF0, 0x9F, 0x92, 0xA9}; // �� in UTF-8
77     const uint16_t utf16_08[] = {0xD83D}; // Missing low surrogate
78     EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_08, 4, utf16_08, 1));
79 
80     // Test case 9: Truncated UTF-8 multi-byte character
81     const uint8_t utf8_09[] = {0xE3, 0x81}; // Truncated "あ" (Japanese 'a')
82     const uint16_t utf16_09[] = {0x3042}; // Full "あ"
83     EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_09, 2, utf16_09, 1));
84 
85     // Test case 10: Longer UTF-8 sequence matching shorter UTF-16
86     const uint8_t utf8_10[] = {0xC2, 0xA3, 0xC2, 0xA3}; // "££" in UTF-8
87     const uint16_t utf16_10[] = {0x00A3}; // Single "£"
88     EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_10, 4, utf16_10, 1));
89 
90     // Test case 11: Handling noncharacters in both UTF-8 and UTF-16
91     const uint8_t utf8_11[] = {0xEF, 0xBF, 0xBE}; // UTF-8 noncharacter U+FFFE
92     const uint16_t utf16_11[] = {0xFFFE}; // UTF-16 noncharacter
93     EXPECT_TRUE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_11, 3, utf16_11, 1));
94 
95     // Test case 12: Empty UTF-8 and non-empty UTF-16
96     const uint8_t *utf8_12 = nullptr; // Empty UTF-8
97     const uint16_t utf16_12[] = {0x0061}; // "a"
98     EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_12, 0, utf16_12, 1));
99 
100     // Test case 13: Non-empty UTF-8 and empty UTF-16
101     const uint8_t utf8_13[] = {0x61}; // "a"
102     const uint16_t *utf16_13 = nullptr; // Empty UTF-16
103     EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_13, 1, utf16_13, 0));
104 }
105 }