1 // Copyright 2021 Code Intelligence GmbH
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "fuzzed_data_provider.h"
16
17 #include <cstddef>
18 #include <cstdint>
19 #include <random>
20 #include <sstream>
21 #include <string>
22 #include <vector>
23
24 #include "fuzz_target_runner.h"
25 #include "gflags/gflags.h"
26 #include "gtest/gtest.h"
27 #include "jvm_tooling.h"
28 #include "tools/cpp/runfiles/runfiles.h"
29
30 DECLARE_string(cp);
31 DECLARE_string(jvm_args);
32 DECLARE_string(instrumentation_excludes);
33
34 DECLARE_string(target_class);
35 DECLARE_string(target_args);
36
37 namespace jazzer {
38
39 std::pair<std::string, std::size_t> FixUpModifiedUtf8(const uint8_t* pos,
40 std::size_t max_bytes,
41 jint max_length,
42 bool ascii_only,
43 bool stop_on_backslash);
44
FixUpRemainingModifiedUtf8(const std::string & str,bool ascii_only,bool stop_on_backslash)45 std::pair<std::string, std::size_t> FixUpRemainingModifiedUtf8(
46 const std::string& str, bool ascii_only, bool stop_on_backslash) {
47 return FixUpModifiedUtf8(reinterpret_cast<const uint8_t*>(str.c_str()),
48 str.length(), std::numeric_limits<jint>::max(),
49 ascii_only, stop_on_backslash);
50 }
51
52 // Work around the fact that size_t is unsigned long on Linux and unsigned long
53 // long on Windows.
operator ""_z(unsigned long long x)54 std::size_t operator"" _z(unsigned long long x) { return x; }
55
56 using namespace std::literals::string_literals;
TEST(FixUpModifiedUtf8Test,FullUtf8_ContinueOnBackslash)57 TEST(FixUpModifiedUtf8Test, FullUtf8_ContinueOnBackslash) {
58 EXPECT_EQ(std::make_pair("jazzer"s, 6_z),
59 FixUpRemainingModifiedUtf8("jazzer"s, false, false));
60 EXPECT_EQ(std::make_pair("ja\xC0\x80zzer"s, 7_z),
61 FixUpRemainingModifiedUtf8("ja\0zzer"s, false, false));
62 EXPECT_EQ(std::make_pair("ja\xC0\x80\xC0\x80zzer"s, 8_z),
63 FixUpRemainingModifiedUtf8("ja\0\0zzer"s, false, false));
64 EXPECT_EQ(std::make_pair("ja\\zzer"s, 7_z),
65 FixUpRemainingModifiedUtf8("ja\\zzer"s, false, false));
66 EXPECT_EQ(std::make_pair("ja\\\\zzer"s, 8_z),
67 FixUpRemainingModifiedUtf8("ja\\\\zzer"s, false, false));
68 EXPECT_EQ(std::make_pair("ۧ"s, 5_z),
69 FixUpRemainingModifiedUtf8(u8"ۧ"s, false, false));
70 }
71
TEST(FixUpModifiedUtf8Test,AsciiOnly_ContinueOnBackslash)72 TEST(FixUpModifiedUtf8Test, AsciiOnly_ContinueOnBackslash) {
73 EXPECT_EQ(std::make_pair("jazzer"s, 6_z),
74 FixUpRemainingModifiedUtf8("jazzer"s, true, false));
75 EXPECT_EQ(std::make_pair("ja\xC0\x80zzer"s, 7_z),
76 FixUpRemainingModifiedUtf8("ja\0zzer"s, true, false));
77 EXPECT_EQ(std::make_pair("ja\xC0\x80\xC0\x80zzer"s, 8_z),
78 FixUpRemainingModifiedUtf8("ja\0\0zzer"s, true, false));
79 EXPECT_EQ(std::make_pair("ja\\zzer"s, 7_z),
80 FixUpRemainingModifiedUtf8("ja\\zzer"s, true, false));
81 EXPECT_EQ(std::make_pair("ja\\\\zzer"s, 8_z),
82 FixUpRemainingModifiedUtf8("ja\\\\zzer"s, true, false));
83 EXPECT_EQ(std::make_pair("\x62\x02\x2C\x43\x1F"s, 5_z),
84 FixUpRemainingModifiedUtf8(u8"ۧ"s, true, false));
85 }
86
TEST(FixUpModifiedUtf8Test,FullUtf8_StopOnBackslash)87 TEST(FixUpModifiedUtf8Test, FullUtf8_StopOnBackslash) {
88 EXPECT_EQ(std::make_pair("jazzer"s, 6_z),
89 FixUpRemainingModifiedUtf8("jazzer"s, false, true));
90 EXPECT_EQ(std::make_pair("ja\xC0\x80zzer"s, 7_z),
91 FixUpRemainingModifiedUtf8("ja\0zzer"s, false, true));
92 EXPECT_EQ(std::make_pair("ja\xC0\x80\xC0\x80zzer"s, 8_z),
93 FixUpRemainingModifiedUtf8("ja\0\0zzer"s, false, true));
94 EXPECT_EQ(std::make_pair("ja"s, 4_z),
95 FixUpRemainingModifiedUtf8("ja\\zzer"s, false, true));
96 EXPECT_EQ(std::make_pair("ja\\zzer"s, 8_z),
97 FixUpRemainingModifiedUtf8("ja\\\\zzer"s, false, true));
98 }
99
TEST(FixUpModifiedUtf8Test,AsciiOnly_StopOnBackslash)100 TEST(FixUpModifiedUtf8Test, AsciiOnly_StopOnBackslash) {
101 EXPECT_EQ(std::make_pair("jazzer"s, 6_z),
102 FixUpRemainingModifiedUtf8("jazzer"s, true, true));
103 EXPECT_EQ(std::make_pair("ja\xC0\x80zzer"s, 7_z),
104 FixUpRemainingModifiedUtf8("ja\0zzer"s, true, true));
105 EXPECT_EQ(std::make_pair("ja\xC0\x80\xC0\x80zzer"s, 8_z),
106 FixUpRemainingModifiedUtf8("ja\0\0zzer"s, true, true));
107 EXPECT_EQ(std::make_pair("ja"s, 4_z),
108 FixUpRemainingModifiedUtf8("ja\\zzer"s, true, true));
109 EXPECT_EQ(std::make_pair("ja\\zzer"s, 8_z),
110 FixUpRemainingModifiedUtf8("ja\\\\zzer"s, true, true));
111 }
112
113 class FuzzedDataProviderTest : public ::testing::Test {
114 protected:
115 // After DestroyJavaVM() no new JVM instance can be created in the same
116 // process, so we set up a single JVM instance for this test binary which gets
117 // destroyed after all tests in this test suite have finished.
SetUpTestCase()118 static void SetUpTestCase() {
119 FLAGS_instrumentation_excludes = "**";
120 using ::bazel::tools::cpp::runfiles::Runfiles;
121 Runfiles* runfiles = Runfiles::CreateForTest();
122 FLAGS_cp = runfiles->Rlocation(FLAGS_cp);
123
124 jvm_ = std::make_unique<JVM>("test_executable");
125 }
126
TearDownTestCase()127 static void TearDownTestCase() { jvm_.reset(nullptr); }
128
129 static std::unique_ptr<JVM> jvm_;
130 };
131
132 std::unique_ptr<JVM> FuzzedDataProviderTest::jvm_ = nullptr;
133
134 // see testdata/test/FuzzTargetWithDataProvider.java for the implementation
135 // of the fuzz target that asserts that the correct values are received from
136 // the data provider.
137 const uint8_t kInput[] = {
138 // Bytes read from the start
139 0x01, 0x02, // consumeBytes(2): {0x01, 0x02}
140
141 'j', 'a', 'z', 'z', 'e', 'r', // consumeString(6): "jazzer"
142 'j', 'a', 0x00, 'z', 'e', 'r', // consumeString(6): "ja\u0000zer"
143 0xE2, 0x82, 0xAC, 0xC3, 0x9F, // consumeString(2): "€ẞ"
144
145 'j', 'a', 'z', 'z', 'e', 'r', // consumeAsciiString(6): "jazzer"
146 'j', 'a', 0x00, 'z', 'e', 'r', // consumeAsciiString(6): "ja\u0000zer"
147 0xE2, 0x82, 0xAC, 0xC3,
148 0x9F, // consumeAsciiString(5): "\u0062\u0002\u002C\u0043\u001F"
149
150 false, false, true, false,
151 true, // consumeBooleans(5): { false, false, true, false, true }
152 0xEF, 0xDC, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01, 0x10, 0x32, 0x54, 0x76,
153 0x98, 0xBA, 0xDC,
154 0xFE, // consumeLongs(2): { 0x0123456789ABCDEF, 0xFEDCBA9876543210 }
155
156 0x78, 0x56, 0x34, 0x12, // consumeInts(3): { 0x12345678 }
157 0x56, 0x34, 0x12, // consumeLong():
158
159 // Bytes read from the end
160 0x02, 0x03, 0x02, 0x04, // 4x pickValue in array with five elements
161
162 0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
163 0x78, // consumed but unused by consumeDouble()
164 10, // -max for next consumeDouble
165 0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
166 0x78, // consumed but unused by consumeDouble()
167 9, // max for next consumeDouble
168 0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
169 0x78, // consumed but unused by consumeDouble()
170 8, // -min for next consumeDouble
171 0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
172 0x78, // consumed but unused by consumeDouble()
173 7, // min for next consumeDouble
174 0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
175 0x78, // consumed but unused by consumeDouble()
176 6, // -denorm_min for next consumeDouble
177 0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
178 0x78, // consumed but unused by consumeDouble()
179 5, // denorm_min for next consumeDouble
180 0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
181 0x78, // consumed but unused by consumeDouble()
182 4, // NaN for next consumeDouble
183 0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
184 0x78, // consumed but unused by consumeDouble()
185 3, // -infinity for next consumeDouble
186 0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
187 0x78, // consumed but unused by consumeDouble()
188 2, // infinity for next consumeDouble
189 0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
190 0x78, // consumed but unused by consumeDouble()
191 1, // -0.0 for next consumeDouble
192 0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
193 0x78, // consumed but unused by consumeDouble()
194 0, // 0.0 for next consumeDouble
195
196 0x12, 0x34, 0x56, 0x78, 0x90, // consumed but unused by consumeFloat()
197 10, // -max for next consumeFloat
198 0x12, 0x34, 0x56, 0x78, 0x90, // consumed but unused by consumeFloat()
199 9, // max for next consumeFloat
200 0x12, 0x34, 0x56, 0x78, 0x90, // consumed but unused by consumeFloat()
201 8, // -min for next consumeFloat
202 0x12, 0x34, 0x56, 0x78, 0x90, // consumed but unused by consumeFloat()
203 7, // min for next consumeFloat
204 0x12, 0x34, 0x56, 0x78, 0x90, // consumed but unused by consumeFloat()
205 6, // -denorm_min for next consumeFloat
206 0x12, 0x34, 0x56, 0x78, 0x90, // consumed but unused by consumeFloat()
207 5, // denorm_min for next consumeFloat
208 0x12, 0x34, 0x56, 0x78, 0x90, // consumed but unused by consumeFloat()
209 4, // NaN for next consumeFloat
210 0x12, 0x34, 0x56, 0x78, 0x90, // consumed but unused by consumeFloat()
211 3, // -infinity for next consumeFloat
212 0x12, 0x34, 0x56, 0x78, 0x90, // consumed but unused by consumeFloat()
213 2, // infinity for next consumeFloat
214 0x12, 0x34, 0x56, 0x78, 0x90, // consumed but unused by consumeFloat()
215 1, // -0.0 for next consumeFloat
216 0x12, 0x34, 0x56, 0x78, 0x90, // consumed but unused by consumeFloat()
217 0, // 0.0 for next consumeFloat
218
219 0x88, 0xAB, 0x61, 0xCB, 0x32, 0xEB, 0x30,
220 0xF9, // consumeDouble(13.37, 31.337): 30.859126145478349 (small range)
221 0x51, 0xF6, 0x1F,
222 0x3A, // consumeFloat(123.0, 777.0): 271.49084 (small range)
223 0x11, 0x4D, 0xFD, 0x54, 0xD6, 0x3D, 0x43, 0x73,
224 0x39, // consumeRegularDouble(): 8.0940194040236032e+307
225 0x16, 0xCF, 0x3D, 0x29, 0x4A, // consumeRegularFloat(): -2.8546307e+38
226
227 0x61, 0xCB, 0x32, 0xEB, 0x30, 0xF9, 0x51,
228 0xF6, // consumeProbabilityDouble(): 0.96218831486039413
229 0x1F, 0x3A, 0x11, 0x4D, // consumeProbabilityFloat(): 0.30104411
230 0xFD, 0x54, 0xD6, 0x3D, 0x43, 0x73, 0x39,
231 0x16, // consumeProbabilityDouble(): 0.086814121166605432
232 0xCF, 0x3D, 0x29, 0x4A, // consumeProbabilityFloat(): 0.28969181
233
234 0x01, // consumeInt(0x12345678, 0x12345679): 0x12345679
235 0x78, // consumeInt(-0x12345678, -0x12345600): -0x12345600
236 0x78, 0x56, 0x34, 0x12, // consumeInt(): 0x12345678
237
238 0x02, // consumeByte(0x12, 0x22): 0x14
239 0x7F, // consumeByte(): 0x7F
240
241 0x01, // consumeBool(): true
242 };
243
TEST_F(FuzzedDataProviderTest,FuzzTargetWithDataProvider)244 TEST_F(FuzzedDataProviderTest, FuzzTargetWithDataProvider) {
245 FLAGS_target_class = "test/FuzzTargetWithDataProvider";
246 FLAGS_target_args = "";
247 FuzzTargetRunner fuzz_target_runner(*jvm_);
248
249 ASSERT_EQ(RunResult::kOk, fuzz_target_runner.Run(kInput, sizeof(kInput)));
250 }
251
252 constexpr std::size_t kValidModifiedUtf8NumRuns = 10000;
253 constexpr std::size_t kValidModifiedUtf8NumBytes = 100000;
254 constexpr uint32_t kValidModifiedUtf8Seed = 0x12345678;
255
TEST_F(FuzzedDataProviderTest,InvalidModifiedUtf8AfterFixup)256 TEST_F(FuzzedDataProviderTest, InvalidModifiedUtf8AfterFixup) {
257 auto modified_utf8_validator = jvm_->FindClass("test.ModifiedUtf8Encoder");
258 ASSERT_NE(nullptr, modified_utf8_validator);
259 auto string_to_modified_utf_bytes = jvm_->GetStaticMethodID(
260 modified_utf8_validator, "encode", "(Ljava/lang/String;)[B");
261 ASSERT_NE(nullptr, string_to_modified_utf_bytes);
262 auto& env = jvm_->GetEnv();
263 auto random_bytes = std::vector<uint8_t>(kValidModifiedUtf8NumBytes);
264 auto random = std::mt19937(kValidModifiedUtf8Seed);
265 for (bool ascii_only : {false, true}) {
266 for (bool stop_on_backslash : {false, true}) {
267 for (std::size_t i = 0; i < kValidModifiedUtf8NumRuns; ++i) {
268 std::generate(random_bytes.begin(), random_bytes.end(), random);
269 std::string fixed_string;
270 std::tie(fixed_string, std::ignore) = FixUpModifiedUtf8(
271 random_bytes.data(), random_bytes.size(),
272 std::numeric_limits<jint>::max(), ascii_only, stop_on_backslash);
273
274 jstring jni_fixed_string = env.NewStringUTF(fixed_string.c_str());
275 auto jni_roundtripped_bytes = (jbyteArray)env.CallStaticObjectMethod(
276 modified_utf8_validator, string_to_modified_utf_bytes,
277 jni_fixed_string);
278 ASSERT_FALSE(env.ExceptionCheck());
279 env.DeleteLocalRef(jni_fixed_string);
280 jint roundtripped_bytes_length =
281 env.GetArrayLength(jni_roundtripped_bytes);
282 jbyte* roundtripped_bytes =
283 env.GetByteArrayElements(jni_roundtripped_bytes, nullptr);
284 auto roundtripped_string =
285 std::string(reinterpret_cast<char*>(roundtripped_bytes),
286 roundtripped_bytes_length);
287 env.ReleaseByteArrayElements(jni_roundtripped_bytes, roundtripped_bytes,
288 JNI_ABORT);
289 env.DeleteLocalRef(jni_roundtripped_bytes);
290
291 // Verify that the bytes obtained from running our modified UTF-8 fix-up
292 // function remain unchanged when turned into a Java string and
293 // reencoded into modified UTF-8. This will only happen if the our
294 // fix-up function indeed returned valid modified UTF-8.
295 ASSERT_EQ(fixed_string, roundtripped_string);
296 }
297 }
298 }
299 }
300 } // namespace jazzer
301