1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json/string_escape.h"
6
7 #include <stddef.h>
8 #include <stdint.h>
9
10 #include <limits>
11 #include <string>
12
13 #include "base/strings/string_util.h"
14 #include "base/strings/stringprintf.h"
15 #include "base/strings/utf_string_conversion_utils.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "base/third_party/icu/icu_utf.h"
18
19 namespace base {
20
21 namespace {
22
23 // Format string for printing a \uXXXX escape sequence.
24 const char kU16EscapeFormat[] = "\\u%04X";
25
26 // The code point to output for an invalid input code unit.
27 const uint32_t kReplacementCodePoint = 0xFFFD;
28
29 // Used below in EscapeSpecialCodePoint().
30 static_assert('<' == 0x3C, "less than sign must be 0x3c");
31
32 // Try to escape the |code_point| if it is a known special character. If
33 // successful, returns true and appends the escape sequence to |dest|. This
34 // isn't required by the spec, but it's more readable by humans.
EscapeSpecialCodePoint(uint32_t code_point,std::string * dest)35 bool EscapeSpecialCodePoint(uint32_t code_point, std::string* dest) {
36 // WARNING: if you add a new case here, you need to update the reader as well.
37 // Note: \v is in the reader, but not here since the JSON spec doesn't
38 // allow it.
39 switch (code_point) {
40 case '\b':
41 dest->append("\\b");
42 break;
43 case '\f':
44 dest->append("\\f");
45 break;
46 case '\n':
47 dest->append("\\n");
48 break;
49 case '\r':
50 dest->append("\\r");
51 break;
52 case '\t':
53 dest->append("\\t");
54 break;
55 case '\\':
56 dest->append("\\\\");
57 break;
58 case '"':
59 dest->append("\\\"");
60 break;
61 // Escape < to prevent script execution; escaping > is not necessary and
62 // not doing so save a few bytes.
63 case '<':
64 dest->append("\\u003C");
65 break;
66 // Escape the "Line Separator" and "Paragraph Separator" characters, since
67 // they should be treated like a new line \r or \n.
68 case 0x2028:
69 dest->append("\\u2028");
70 break;
71 case 0x2029:
72 dest->append("\\u2029");
73 break;
74 default:
75 return false;
76 }
77 return true;
78 }
79
80 template <typename S>
EscapeJSONStringImpl(const S & str,bool put_in_quotes,std::string * dest)81 bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) {
82 bool did_replacement = false;
83
84 if (put_in_quotes)
85 dest->push_back('"');
86
87 // Casting is necessary because ICU uses int32_t. Try and do so safely.
88 CHECK_LE(str.length(),
89 static_cast<size_t>(std::numeric_limits<int32_t>::max()));
90 const int32_t length = static_cast<int32_t>(str.length());
91
92 for (int32_t i = 0; i < length; ++i) {
93 uint32_t code_point;
94 if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point) ||
95 code_point == static_cast<decltype(code_point)>(CBU_SENTINEL) ||
96 !IsValidCharacter(code_point)) {
97 code_point = kReplacementCodePoint;
98 did_replacement = true;
99 }
100
101 if (EscapeSpecialCodePoint(code_point, dest))
102 continue;
103
104 // Escape non-printing characters.
105 if (code_point < 32)
106 base::StringAppendF(dest, kU16EscapeFormat, code_point);
107 else
108 WriteUnicodeCharacter(code_point, dest);
109 }
110
111 if (put_in_quotes)
112 dest->push_back('"');
113
114 return !did_replacement;
115 }
116
117 } // namespace
118
EscapeJSONString(std::string_view str,bool put_in_quotes,std::string * dest)119 void EscapeJSONString(std::string_view str,
120 bool put_in_quotes,
121 std::string* dest) {
122 EscapeJSONStringImpl(str, put_in_quotes, dest);
123 }
124
EscapeJSONString(std::u16string_view str,bool put_in_quotes,std::string * dest)125 void EscapeJSONString(std::u16string_view str,
126 bool put_in_quotes,
127 std::string* dest) {
128 EscapeJSONStringImpl(str, put_in_quotes, dest);
129 }
130
EscapeBytesAsInvalidJSONString(std::string_view str,bool put_in_quotes)131 std::string EscapeBytesAsInvalidJSONString(std::string_view str,
132 bool put_in_quotes) {
133 std::string dest;
134
135 if (put_in_quotes)
136 dest.push_back('"');
137
138 for (std::string_view::const_iterator it = str.begin(); it != str.end();
139 ++it) {
140 unsigned char c = *it;
141 if (EscapeSpecialCodePoint(c, &dest))
142 continue;
143
144 if (c < 32 || c > 126)
145 base::StringAppendF(&dest, kU16EscapeFormat, c);
146 else
147 dest.push_back(*it);
148 }
149
150 if (put_in_quotes)
151 dest.push_back('"');
152
153 return dest;
154 }
155
156 } // namespace base
157