1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include <stddef.h>
5 #include <stdint.h>
6
7 #include <cstring>
8 #include <algorithm>
9 #include <array>
10 #include <vector>
11
12 #include "fuzzer_utils.h"
13 #include "unicode/unistr.h"
14
15 // Taken from third_party/icu/source/data/mappings/convrtrs.txt file.
16 static const std::array<const char*, 45> kConverters = {
17 {
18 "UTF-8",
19 "utf-16be",
20 "utf-16le",
21 "UTF-32",
22 "UTF-32BE",
23 "UTF-32LE",
24 "ibm866-html",
25 "iso-8859-2-html",
26 "iso-8859-3-html",
27 "iso-8859-4-html",
28 "iso-8859-5-html",
29 "iso-8859-6-html",
30 "iso-8859-7-html",
31 "iso-8859-8-html",
32 "ISO-8859-8-I",
33 "iso-8859-10-html",
34 "iso-8859-13-html",
35 "iso-8859-14-html",
36 "iso-8859-15-html",
37 "iso-8859-16-html",
38 "koi8-r-html",
39 "koi8-u-html",
40 "macintosh-html",
41 "windows-874-html",
42 "windows-1250-html",
43 "windows-1251-html",
44 "windows-1252-html",
45 "windows-1253-html",
46 "windows-1254-html",
47 "windows-1255-html",
48 "windows-1256-html",
49 "windows-1257-html",
50 "windows-1258-html",
51 "x-mac-cyrillic-html",
52 "windows-936-2000",
53 "gb18030",
54 "big5-html",
55 "euc-jp-html",
56 "ISO_2022,locale=ja,version=0",
57 "shift_jis-html",
58 "euc-kr-html",
59 "ISO-2022-KR",
60 "ISO-2022-CN",
61 "ISO-2022-CN-EXT",
62 "HZ-GB-2312"
63 }
64 };
65
66 IcuEnvironment* env = new IcuEnvironment();
67
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)68 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
69 if (size < 1) {
70 return 0;
71 }
72
73 // First byte is used for random converter selection.
74 uint8_t rnd = *data;
75 data++;
76 size--;
77
78 std::unique_ptr<char[]> fuzzbuff(new char[size]);
79 std::memcpy(fuzzbuff.get(), data, size);
80
81 icu::UnicodeString str(fuzzbuff.get(), size,
82 kConverters[rnd % kConverters.size()]);
83
84 return 0;
85 }
86