• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "stringtableutf16stringintern_fuzzer.h"
17 #include "ecmascript/base/string_helper.h"
18 #include "ecmascript/ecma_string-inl.h"
19 #include "ecmascript/ecma_string_table_optimization-inl.h"
20 #include "common_components/base/utf_helper.h"
21 #include "ecmascript/napi/include/jsnapi.h"
22 #include "ecmascript/napi/jsnapi_helper.h"
23 #include <thread>
24 #include <vector>
25 #include <cstdint>
26 #include <optional>
27 
28 using namespace panda;
29 using namespace panda::ecmascript;
30 using namespace common::utf_helper;
31 
32 namespace OHOS {
33     constexpr size_t ONE_BYTE = 1;
34     constexpr size_t DOUBLE_BYTE = 2;
35     constexpr size_t TRIPLE_BYTE = 3;
36     constexpr size_t ONE_BYTE_BITS = 8;
37     constexpr uint16_t REPLACEMENT_CHAR = 0xFFFD;
38 
39     struct CodeUnitResult {
40         std::vector<uint16_t> units;
41         size_t step;
42     };
43 
ProcessCodeUnit(const uint8_t * data,size_t i,size_t size,bool bigEndian,uint16_t codeUnit)44     CodeUnitResult ProcessCodeUnit(const uint8_t* data, size_t i, size_t size, bool bigEndian, uint16_t codeUnit)
45     {
46         CodeUnitResult result;
47         if (codeUnit < 0xD800 || codeUnit > 0xDFFF) {
48             result.units.push_back(codeUnit);
49             result.step = DOUBLE_BYTE;
50             return result;
51         }
52 
53         if (codeUnit >= 0xD800 && codeUnit <= 0xDBFF) {
54             if (i + TRIPLE_BYTE >= size) {
55                 result.units.push_back(REPLACEMENT_CHAR);
56                 result.step = DOUBLE_BYTE;
57                 return result;
58             }
59 
60             uint16_t lowSurrogate;
61             if (bigEndian) {
62                 lowSurrogate = (static_cast<uint16_t>(data[i + DOUBLE_BYTE]) << ONE_BYTE_BITS) | data[i + TRIPLE_BYTE];
63             } else {
64                 lowSurrogate = (static_cast<uint16_t>(data[i + TRIPLE_BYTE]) << ONE_BYTE_BITS) | data[i + DOUBLE_BYTE];
65             }
66 
67             if (lowSurrogate >= 0xDC00 && lowSurrogate <= 0xDFFF) {
68                 result.units.push_back(codeUnit);
69                 result.units.push_back(lowSurrogate);
70                 result.step = DOUBLE_BYTE * DOUBLE_BYTE;
71                 return result;
72             }
73         }
74 
75         result.units.push_back(REPLACEMENT_CHAR);
76         result.step = DOUBLE_BYTE;
77         return result;
78     }
79 
CreateValidUtf16(const uint8_t * data,size_t size)80     std::vector<uint16_t> CreateValidUtf16(const uint8_t *data, size_t size)
81     {
82         std::vector<uint16_t> result;
83         if (size == 0) {
84             return result;
85         }
86 
87         bool bigEndian = false;
88         size_t startIndex = 0;
89         if (size >= DOUBLE_BYTE) {
90             if (data[0] == 0xFE && data[1] == 0xFF) {
91                 bigEndian = true;
92                 startIndex = DOUBLE_BYTE;
93             } else if (data[0] == 0xFF && data[1] == 0xFE) {
94                 startIndex = DOUBLE_BYTE;
95             }
96         }
97 
98         size_t byteCount = size - startIndex;
99         if (byteCount % DOUBLE_BYTE != 0) {
100             byteCount--;
101         }
102         result.reserve(byteCount / DOUBLE_BYTE);
103 
104         size_t i = startIndex;
105         while (i < startIndex + byteCount) {
106             uint16_t codeUnit;
107             if (bigEndian) {
108                 codeUnit = (static_cast<uint16_t>(data[i]) << ONE_BYTE_BITS) | data[i + ONE_BYTE];
109             } else {
110                 codeUnit = (static_cast<uint16_t>(data[i + ONE_BYTE]) << ONE_BYTE_BITS) | data[i];
111             }
112             auto unitResult = ProcessCodeUnit(data, i, size, bigEndian, codeUnit);
113             for (auto unit : unitResult.units) {
114                 result.push_back(unit);
115             }
116             i += unitResult.step;
117         }
118         return result;
119     }
120 
StringTableUtf16StringInternFuzzTest(const uint8_t * data,size_t size)121     void StringTableUtf16StringInternFuzzTest(const uint8_t *data, size_t size)
122     {
123         if (data == nullptr || size < DOUBLE_BYTE) {
124             LOG_ECMA(ERROR) << "illegal input!";
125             return;
126         }
127         RuntimeOption option;
128         option.SetLogLevel(common::LOG_LEVEL::ERROR);
129         EcmaVM *vm = JSNApi::CreateJSVM(option);
130         ObjectFactory *factory = vm->GetFactory();
131         EcmaStringTable *table = vm->GetEcmaStringTable();
132 
133         std::vector<uint16_t> utf16Data = CreateValidUtf16(data, size);
134         JSHandle<EcmaString> str = factory->NewFromUtf16(utf16Data.data(), utf16Data.size());
135         table->GetOrInternString(vm, utf16Data.data(), utf16Data.size(), false);
136         table->GetOrInternString(vm, *str);
137 
138         JSNApi::DestroyJSVM(vm);
139     }
140 }
141 
142 // Fuzzer entry point.
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)143 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
144 {
145     // Run your code on data.
146     OHOS::StringTableUtf16StringInternFuzzTest(data, size);
147     return 0;
148 }