1 /*
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "stringtableutf16stringintern_fuzzer.h"
17 #include "ecmascript/base/string_helper.h"
18 #include "ecmascript/ecma_string-inl.h"
19 #include "ecmascript/ecma_string_table_optimization-inl.h"
20 #include "common_components/base/utf_helper.h"
21 #include "ecmascript/napi/include/jsnapi.h"
22 #include "ecmascript/napi/jsnapi_helper.h"
23 #include <thread>
24 #include <vector>
25 #include <cstdint>
26 #include <optional>
27
28 using namespace panda;
29 using namespace panda::ecmascript;
30 using namespace common::utf_helper;
31
32 namespace OHOS {
33 constexpr size_t ONE_BYTE = 1;
34 constexpr size_t DOUBLE_BYTE = 2;
35 constexpr size_t TRIPLE_BYTE = 3;
36 constexpr size_t ONE_BYTE_BITS = 8;
37 constexpr uint16_t REPLACEMENT_CHAR = 0xFFFD;
38
39 struct CodeUnitResult {
40 std::vector<uint16_t> units;
41 size_t step;
42 };
43
ProcessCodeUnit(const uint8_t * data,size_t i,size_t size,bool bigEndian,uint16_t codeUnit)44 CodeUnitResult ProcessCodeUnit(const uint8_t* data, size_t i, size_t size, bool bigEndian, uint16_t codeUnit)
45 {
46 CodeUnitResult result;
47 if (codeUnit < 0xD800 || codeUnit > 0xDFFF) {
48 result.units.push_back(codeUnit);
49 result.step = DOUBLE_BYTE;
50 return result;
51 }
52
53 if (codeUnit >= 0xD800 && codeUnit <= 0xDBFF) {
54 if (i + TRIPLE_BYTE >= size) {
55 result.units.push_back(REPLACEMENT_CHAR);
56 result.step = DOUBLE_BYTE;
57 return result;
58 }
59
60 uint16_t lowSurrogate;
61 if (bigEndian) {
62 lowSurrogate = (static_cast<uint16_t>(data[i + DOUBLE_BYTE]) << ONE_BYTE_BITS) | data[i + TRIPLE_BYTE];
63 } else {
64 lowSurrogate = (static_cast<uint16_t>(data[i + TRIPLE_BYTE]) << ONE_BYTE_BITS) | data[i + DOUBLE_BYTE];
65 }
66
67 if (lowSurrogate >= 0xDC00 && lowSurrogate <= 0xDFFF) {
68 result.units.push_back(codeUnit);
69 result.units.push_back(lowSurrogate);
70 result.step = DOUBLE_BYTE * DOUBLE_BYTE;
71 return result;
72 }
73 }
74
75 result.units.push_back(REPLACEMENT_CHAR);
76 result.step = DOUBLE_BYTE;
77 return result;
78 }
79
CreateValidUtf16(const uint8_t * data,size_t size)80 std::vector<uint16_t> CreateValidUtf16(const uint8_t *data, size_t size)
81 {
82 std::vector<uint16_t> result;
83 if (size == 0) {
84 return result;
85 }
86
87 bool bigEndian = false;
88 size_t startIndex = 0;
89 if (size >= DOUBLE_BYTE) {
90 if (data[0] == 0xFE && data[1] == 0xFF) {
91 bigEndian = true;
92 startIndex = DOUBLE_BYTE;
93 } else if (data[0] == 0xFF && data[1] == 0xFE) {
94 startIndex = DOUBLE_BYTE;
95 }
96 }
97
98 size_t byteCount = size - startIndex;
99 if (byteCount % DOUBLE_BYTE != 0) {
100 byteCount--;
101 }
102 result.reserve(byteCount / DOUBLE_BYTE);
103
104 size_t i = startIndex;
105 while (i < startIndex + byteCount) {
106 uint16_t codeUnit;
107 if (bigEndian) {
108 codeUnit = (static_cast<uint16_t>(data[i]) << ONE_BYTE_BITS) | data[i + ONE_BYTE];
109 } else {
110 codeUnit = (static_cast<uint16_t>(data[i + ONE_BYTE]) << ONE_BYTE_BITS) | data[i];
111 }
112 auto unitResult = ProcessCodeUnit(data, i, size, bigEndian, codeUnit);
113 for (auto unit : unitResult.units) {
114 result.push_back(unit);
115 }
116 i += unitResult.step;
117 }
118 return result;
119 }
120
StringTableUtf16StringInternFuzzTest(const uint8_t * data,size_t size)121 void StringTableUtf16StringInternFuzzTest(const uint8_t *data, size_t size)
122 {
123 if (data == nullptr || size < DOUBLE_BYTE) {
124 LOG_ECMA(ERROR) << "illegal input!";
125 return;
126 }
127 RuntimeOption option;
128 option.SetLogLevel(common::LOG_LEVEL::ERROR);
129 EcmaVM *vm = JSNApi::CreateJSVM(option);
130 ObjectFactory *factory = vm->GetFactory();
131 EcmaStringTable *table = vm->GetEcmaStringTable();
132
133 std::vector<uint16_t> utf16Data = CreateValidUtf16(data, size);
134 JSHandle<EcmaString> str = factory->NewFromUtf16(utf16Data.data(), utf16Data.size());
135 table->GetOrInternString(vm, utf16Data.data(), utf16Data.size(), false);
136 table->GetOrInternString(vm, *str);
137
138 JSNApi::DestroyJSVM(vm);
139 }
140 }
141
142 // Fuzzer entry point.
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)143 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
144 {
145 // Run your code on data.
146 OHOS::StringTableUtf16StringInternFuzzTest(data, size);
147 return 0;
148 }