• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Code Intelligence GmbH
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "fuzzed_data_provider.h"
16 
17 #include <cstddef>
18 #include <cstdint>
19 #include <random>
20 #include <sstream>
21 #include <string>
22 #include <vector>
23 
24 #include "fuzz_target_runner.h"
25 #include "gflags/gflags.h"
26 #include "gtest/gtest.h"
27 #include "jvm_tooling.h"
28 #include "tools/cpp/runfiles/runfiles.h"
29 
30 DECLARE_string(cp);
31 DECLARE_string(jvm_args);
32 DECLARE_string(instrumentation_excludes);
33 
34 DECLARE_string(target_class);
35 DECLARE_string(target_args);
36 
37 namespace jazzer {
38 
39 std::pair<std::string, std::size_t> FixUpModifiedUtf8(const uint8_t* pos,
40                                                       std::size_t max_bytes,
41                                                       jint max_length,
42                                                       bool ascii_only,
43                                                       bool stop_on_backslash);
44 
FixUpRemainingModifiedUtf8(const std::string & str,bool ascii_only,bool stop_on_backslash)45 std::pair<std::string, std::size_t> FixUpRemainingModifiedUtf8(
46     const std::string& str, bool ascii_only, bool stop_on_backslash) {
47   return FixUpModifiedUtf8(reinterpret_cast<const uint8_t*>(str.c_str()),
48                            str.length(), std::numeric_limits<jint>::max(),
49                            ascii_only, stop_on_backslash);
50 }
51 
52 // Work around the fact that size_t is unsigned long on Linux and unsigned long
53 // long on Windows.
operator ""_z(unsigned long long x)54 std::size_t operator"" _z(unsigned long long x) { return x; }
55 
56 using namespace std::literals::string_literals;
TEST(FixUpModifiedUtf8Test,FullUtf8_ContinueOnBackslash)57 TEST(FixUpModifiedUtf8Test, FullUtf8_ContinueOnBackslash) {
58   EXPECT_EQ(std::make_pair("jazzer"s, 6_z),
59             FixUpRemainingModifiedUtf8("jazzer"s, false, false));
60   EXPECT_EQ(std::make_pair("ja\xC0\x80zzer"s, 7_z),
61             FixUpRemainingModifiedUtf8("ja\0zzer"s, false, false));
62   EXPECT_EQ(std::make_pair("ja\xC0\x80\xC0\x80zzer"s, 8_z),
63             FixUpRemainingModifiedUtf8("ja\0\0zzer"s, false, false));
64   EXPECT_EQ(std::make_pair("ja\\zzer"s, 7_z),
65             FixUpRemainingModifiedUtf8("ja\\zzer"s, false, false));
66   EXPECT_EQ(std::make_pair("ja\\\\zzer"s, 8_z),
67             FixUpRemainingModifiedUtf8("ja\\\\zzer"s, false, false));
68   EXPECT_EQ(std::make_pair("€ß"s, 5_z),
69             FixUpRemainingModifiedUtf8(u8"€ß"s, false, false));
70 }
71 
TEST(FixUpModifiedUtf8Test,AsciiOnly_ContinueOnBackslash)72 TEST(FixUpModifiedUtf8Test, AsciiOnly_ContinueOnBackslash) {
73   EXPECT_EQ(std::make_pair("jazzer"s, 6_z),
74             FixUpRemainingModifiedUtf8("jazzer"s, true, false));
75   EXPECT_EQ(std::make_pair("ja\xC0\x80zzer"s, 7_z),
76             FixUpRemainingModifiedUtf8("ja\0zzer"s, true, false));
77   EXPECT_EQ(std::make_pair("ja\xC0\x80\xC0\x80zzer"s, 8_z),
78             FixUpRemainingModifiedUtf8("ja\0\0zzer"s, true, false));
79   EXPECT_EQ(std::make_pair("ja\\zzer"s, 7_z),
80             FixUpRemainingModifiedUtf8("ja\\zzer"s, true, false));
81   EXPECT_EQ(std::make_pair("ja\\\\zzer"s, 8_z),
82             FixUpRemainingModifiedUtf8("ja\\\\zzer"s, true, false));
83   EXPECT_EQ(std::make_pair("\x62\x02\x2C\x43\x1F"s, 5_z),
84             FixUpRemainingModifiedUtf8(u8"€ß"s, true, false));
85 }
86 
TEST(FixUpModifiedUtf8Test,FullUtf8_StopOnBackslash)87 TEST(FixUpModifiedUtf8Test, FullUtf8_StopOnBackslash) {
88   EXPECT_EQ(std::make_pair("jazzer"s, 6_z),
89             FixUpRemainingModifiedUtf8("jazzer"s, false, true));
90   EXPECT_EQ(std::make_pair("ja\xC0\x80zzer"s, 7_z),
91             FixUpRemainingModifiedUtf8("ja\0zzer"s, false, true));
92   EXPECT_EQ(std::make_pair("ja\xC0\x80\xC0\x80zzer"s, 8_z),
93             FixUpRemainingModifiedUtf8("ja\0\0zzer"s, false, true));
94   EXPECT_EQ(std::make_pair("ja"s, 4_z),
95             FixUpRemainingModifiedUtf8("ja\\zzer"s, false, true));
96   EXPECT_EQ(std::make_pair("ja\\zzer"s, 8_z),
97             FixUpRemainingModifiedUtf8("ja\\\\zzer"s, false, true));
98 }
99 
TEST(FixUpModifiedUtf8Test,AsciiOnly_StopOnBackslash)100 TEST(FixUpModifiedUtf8Test, AsciiOnly_StopOnBackslash) {
101   EXPECT_EQ(std::make_pair("jazzer"s, 6_z),
102             FixUpRemainingModifiedUtf8("jazzer"s, true, true));
103   EXPECT_EQ(std::make_pair("ja\xC0\x80zzer"s, 7_z),
104             FixUpRemainingModifiedUtf8("ja\0zzer"s, true, true));
105   EXPECT_EQ(std::make_pair("ja\xC0\x80\xC0\x80zzer"s, 8_z),
106             FixUpRemainingModifiedUtf8("ja\0\0zzer"s, true, true));
107   EXPECT_EQ(std::make_pair("ja"s, 4_z),
108             FixUpRemainingModifiedUtf8("ja\\zzer"s, true, true));
109   EXPECT_EQ(std::make_pair("ja\\zzer"s, 8_z),
110             FixUpRemainingModifiedUtf8("ja\\\\zzer"s, true, true));
111 }
112 
113 class FuzzedDataProviderTest : public ::testing::Test {
114  protected:
115   // After DestroyJavaVM() no new JVM instance can be created in the same
116   // process, so we set up a single JVM instance for this test binary which gets
117   // destroyed after all tests in this test suite have finished.
SetUpTestCase()118   static void SetUpTestCase() {
119     FLAGS_instrumentation_excludes = "**";
120     using ::bazel::tools::cpp::runfiles::Runfiles;
121     Runfiles* runfiles = Runfiles::CreateForTest();
122     FLAGS_cp = runfiles->Rlocation(FLAGS_cp);
123 
124     jvm_ = std::make_unique<JVM>("test_executable");
125   }
126 
TearDownTestCase()127   static void TearDownTestCase() { jvm_.reset(nullptr); }
128 
129   static std::unique_ptr<JVM> jvm_;
130 };
131 
132 std::unique_ptr<JVM> FuzzedDataProviderTest::jvm_ = nullptr;
133 
134 // see testdata/test/FuzzTargetWithDataProvider.java for the implementation
135 // of the fuzz target that asserts that the correct values are received from
136 // the data provider.
137 const uint8_t kInput[] = {
138     // Bytes read from the start
139     0x01, 0x02,  // consumeBytes(2): {0x01, 0x02}
140 
141     'j', 'a', 'z', 'z', 'e', 'r',   // consumeString(6): "jazzer"
142     'j', 'a', 0x00, 'z', 'e', 'r',  // consumeString(6): "ja\u0000zer"
143     0xE2, 0x82, 0xAC, 0xC3, 0x9F,   // consumeString(2): "€ẞ"
144 
145     'j', 'a', 'z', 'z', 'e', 'r',   // consumeAsciiString(6): "jazzer"
146     'j', 'a', 0x00, 'z', 'e', 'r',  // consumeAsciiString(6): "ja\u0000zer"
147     0xE2, 0x82, 0xAC, 0xC3,
148     0x9F,  // consumeAsciiString(5): "\u0062\u0002\u002C\u0043\u001F"
149 
150     false, false, true, false,
151     true,  // consumeBooleans(5): { false, false, true, false, true }
152     0xEF, 0xDC, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01, 0x10, 0x32, 0x54, 0x76,
153     0x98, 0xBA, 0xDC,
154     0xFE,  // consumeLongs(2): { 0x0123456789ABCDEF, 0xFEDCBA9876543210 }
155 
156     0x78, 0x56, 0x34, 0x12,  // consumeInts(3): { 0x12345678 }
157     0x56, 0x34, 0x12,        // consumeLong():
158 
159     // Bytes read from the end
160     0x02, 0x03, 0x02, 0x04,  // 4x pickValue in array with five elements
161 
162     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
163     0x78,  // consumed but unused by consumeDouble()
164     10,    // -max for next consumeDouble
165     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
166     0x78,  // consumed but unused by consumeDouble()
167     9,     // max for next consumeDouble
168     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
169     0x78,  // consumed but unused by consumeDouble()
170     8,     // -min for next consumeDouble
171     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
172     0x78,  // consumed but unused by consumeDouble()
173     7,     // min for next consumeDouble
174     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
175     0x78,  // consumed but unused by consumeDouble()
176     6,     // -denorm_min for next consumeDouble
177     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
178     0x78,  // consumed but unused by consumeDouble()
179     5,     // denorm_min for next consumeDouble
180     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
181     0x78,  // consumed but unused by consumeDouble()
182     4,     // NaN for next consumeDouble
183     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
184     0x78,  // consumed but unused by consumeDouble()
185     3,     // -infinity for next consumeDouble
186     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
187     0x78,  // consumed but unused by consumeDouble()
188     2,     // infinity for next consumeDouble
189     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
190     0x78,  // consumed but unused by consumeDouble()
191     1,     // -0.0 for next consumeDouble
192     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
193     0x78,  // consumed but unused by consumeDouble()
194     0,     // 0.0 for next consumeDouble
195 
196     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
197     10,                            // -max for next consumeFloat
198     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
199     9,                             // max for next consumeFloat
200     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
201     8,                             // -min for next consumeFloat
202     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
203     7,                             // min for next consumeFloat
204     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
205     6,                             // -denorm_min for next consumeFloat
206     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
207     5,                             // denorm_min for next consumeFloat
208     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
209     4,                             // NaN for next consumeFloat
210     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
211     3,                             // -infinity for next consumeFloat
212     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
213     2,                             // infinity for next consumeFloat
214     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
215     1,                             // -0.0 for next consumeFloat
216     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
217     0,                             // 0.0 for next consumeFloat
218 
219     0x88, 0xAB, 0x61, 0xCB, 0x32, 0xEB, 0x30,
220     0xF9,  // consumeDouble(13.37, 31.337): 30.859126145478349 (small range)
221     0x51, 0xF6, 0x1F,
222     0x3A,  // consumeFloat(123.0, 777.0): 271.49084 (small range)
223     0x11, 0x4D, 0xFD, 0x54, 0xD6, 0x3D, 0x43, 0x73,
224     0x39,  // consumeRegularDouble(): 8.0940194040236032e+307
225     0x16, 0xCF, 0x3D, 0x29, 0x4A,  // consumeRegularFloat(): -2.8546307e+38
226 
227     0x61, 0xCB, 0x32, 0xEB, 0x30, 0xF9, 0x51,
228     0xF6,                    // consumeProbabilityDouble(): 0.96218831486039413
229     0x1F, 0x3A, 0x11, 0x4D,  // consumeProbabilityFloat(): 0.30104411
230     0xFD, 0x54, 0xD6, 0x3D, 0x43, 0x73, 0x39,
231     0x16,                    // consumeProbabilityDouble(): 0.086814121166605432
232     0xCF, 0x3D, 0x29, 0x4A,  // consumeProbabilityFloat(): 0.28969181
233 
234     0x01,  // consumeInt(0x12345678, 0x12345679): 0x12345679
235     0x78,  // consumeInt(-0x12345678, -0x12345600): -0x12345600
236     0x78, 0x56, 0x34, 0x12,  // consumeInt(): 0x12345678
237 
238     0x02,  // consumeByte(0x12, 0x22): 0x14
239     0x7F,  // consumeByte(): 0x7F
240 
241     0x01,  // consumeBool(): true
242 };
243 
TEST_F(FuzzedDataProviderTest,FuzzTargetWithDataProvider)244 TEST_F(FuzzedDataProviderTest, FuzzTargetWithDataProvider) {
245   FLAGS_target_class = "test/FuzzTargetWithDataProvider";
246   FLAGS_target_args = "";
247   FuzzTargetRunner fuzz_target_runner(*jvm_);
248 
249   ASSERT_EQ(RunResult::kOk, fuzz_target_runner.Run(kInput, sizeof(kInput)));
250 }
251 
252 constexpr std::size_t kValidModifiedUtf8NumRuns = 10000;
253 constexpr std::size_t kValidModifiedUtf8NumBytes = 100000;
254 constexpr uint32_t kValidModifiedUtf8Seed = 0x12345678;
255 
TEST_F(FuzzedDataProviderTest,InvalidModifiedUtf8AfterFixup)256 TEST_F(FuzzedDataProviderTest, InvalidModifiedUtf8AfterFixup) {
257   auto modified_utf8_validator = jvm_->FindClass("test.ModifiedUtf8Encoder");
258   ASSERT_NE(nullptr, modified_utf8_validator);
259   auto string_to_modified_utf_bytes = jvm_->GetStaticMethodID(
260       modified_utf8_validator, "encode", "(Ljava/lang/String;)[B");
261   ASSERT_NE(nullptr, string_to_modified_utf_bytes);
262   auto& env = jvm_->GetEnv();
263   auto random_bytes = std::vector<uint8_t>(kValidModifiedUtf8NumBytes);
264   auto random = std::mt19937(kValidModifiedUtf8Seed);
265   for (bool ascii_only : {false, true}) {
266     for (bool stop_on_backslash : {false, true}) {
267       for (std::size_t i = 0; i < kValidModifiedUtf8NumRuns; ++i) {
268         std::generate(random_bytes.begin(), random_bytes.end(), random);
269         std::string fixed_string;
270         std::tie(fixed_string, std::ignore) = FixUpModifiedUtf8(
271             random_bytes.data(), random_bytes.size(),
272             std::numeric_limits<jint>::max(), ascii_only, stop_on_backslash);
273 
274         jstring jni_fixed_string = env.NewStringUTF(fixed_string.c_str());
275         auto jni_roundtripped_bytes = (jbyteArray)env.CallStaticObjectMethod(
276             modified_utf8_validator, string_to_modified_utf_bytes,
277             jni_fixed_string);
278         ASSERT_FALSE(env.ExceptionCheck());
279         env.DeleteLocalRef(jni_fixed_string);
280         jint roundtripped_bytes_length =
281             env.GetArrayLength(jni_roundtripped_bytes);
282         jbyte* roundtripped_bytes =
283             env.GetByteArrayElements(jni_roundtripped_bytes, nullptr);
284         auto roundtripped_string =
285             std::string(reinterpret_cast<char*>(roundtripped_bytes),
286                         roundtripped_bytes_length);
287         env.ReleaseByteArrayElements(jni_roundtripped_bytes, roundtripped_bytes,
288                                      JNI_ABORT);
289         env.DeleteLocalRef(jni_roundtripped_bytes);
290 
291         // Verify that the bytes obtained from running our modified UTF-8 fix-up
292         // function remain unchanged when turned into a Java string and
293         // reencoded into modified UTF-8. This will only happen if the our
294         // fix-up function indeed returned valid modified UTF-8.
295         ASSERT_EQ(fixed_string, roundtripped_string);
296       }
297     }
298   }
299 }
300 }  // namespace jazzer
301