1 // Copyright 2006-2008 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json/string_escape.h"
6
7 #include <stddef.h>
8 #include <stdint.h>
9
10 #include <limits>
11 #include <string>
12
13 #include "base/check_op.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/strings/utf_string_conversion_utils.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "base/third_party/icu/icu_utf.h"
19
20 namespace base {
21
22 namespace {
23
24 // Format string for printing a \uXXXX escape sequence.
25 const char kU16EscapeFormat[] = "\\u%04X";
26
27 // The code point to output for an invalid input code unit.
28 const base_icu::UChar32 kReplacementCodePoint = 0xFFFD;
29
30 // Used below in EscapeSpecialCodePoint().
31 static_assert('<' == 0x3C, "less than sign must be 0x3c");
32
33 // Try to escape the |code_point| if it is a known special character. If
34 // successful, returns true and appends the escape sequence to |dest|. This
35 // isn't required by the spec, but it's more readable by humans.
EscapeSpecialCodePoint(base_icu::UChar32 code_point,std::string * dest)36 bool EscapeSpecialCodePoint(base_icu::UChar32 code_point, std::string* dest) {
37 // WARNING: if you add a new case here, you need to update the reader as well.
38 // Note: \v is in the reader, but not here since the JSON spec doesn't
39 // allow it.
40 switch (code_point) {
41 case '\b':
42 dest->append("\\b");
43 break;
44 case '\f':
45 dest->append("\\f");
46 break;
47 case '\n':
48 dest->append("\\n");
49 break;
50 case '\r':
51 dest->append("\\r");
52 break;
53 case '\t':
54 dest->append("\\t");
55 break;
56 case '\\':
57 dest->append("\\\\");
58 break;
59 case '"':
60 dest->append("\\\"");
61 break;
62 // Escape < to prevent script execution; escaping > is not necessary and
63 // not doing so save a few bytes.
64 case '<':
65 dest->append("\\u003C");
66 break;
67 // Escape the "Line Separator" and "Paragraph Separator" characters, since
68 // they should be treated like a new line \r or \n.
69 case 0x2028:
70 dest->append("\\u2028");
71 break;
72 case 0x2029:
73 dest->append("\\u2029");
74 break;
75 default:
76 return false;
77 }
78 return true;
79 }
80
81 template <typename S>
EscapeJSONStringImpl(const S & str,bool put_in_quotes,std::string * dest)82 bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) {
83 bool did_replacement = false;
84
85 if (put_in_quotes)
86 dest->push_back('"');
87
88 const size_t length = str.length();
89 for (size_t i = 0; i < length; ++i) {
90 base_icu::UChar32 code_point;
91 if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point) ||
92 code_point == CBU_SENTINEL) {
93 code_point = kReplacementCodePoint;
94 did_replacement = true;
95 }
96
97 if (EscapeSpecialCodePoint(code_point, dest))
98 continue;
99
100 // Escape non-printing characters.
101 if (code_point < 32)
102 base::StringAppendF(dest, kU16EscapeFormat, code_point);
103 else
104 WriteUnicodeCharacter(code_point, dest);
105 }
106
107 if (put_in_quotes)
108 dest->push_back('"');
109
110 return !did_replacement;
111 }
112
113 } // namespace
114
EscapeJSONString(StringPiece str,bool put_in_quotes,std::string * dest)115 bool EscapeJSONString(StringPiece str, bool put_in_quotes, std::string* dest) {
116 return EscapeJSONStringImpl(str, put_in_quotes, dest);
117 }
118
EscapeJSONString(StringPiece16 str,bool put_in_quotes,std::string * dest)119 bool EscapeJSONString(StringPiece16 str,
120 bool put_in_quotes,
121 std::string* dest) {
122 return EscapeJSONStringImpl(str, put_in_quotes, dest);
123 }
124
GetQuotedJSONString(StringPiece str)125 std::string GetQuotedJSONString(StringPiece str) {
126 std::string dest;
127 EscapeJSONStringImpl(str, true, &dest);
128 return dest;
129 }
130
GetQuotedJSONString(StringPiece16 str)131 std::string GetQuotedJSONString(StringPiece16 str) {
132 std::string dest;
133 EscapeJSONStringImpl(str, true, &dest);
134 return dest;
135 }
136
EscapeBytesAsInvalidJSONString(StringPiece str,bool put_in_quotes)137 std::string EscapeBytesAsInvalidJSONString(StringPiece str,
138 bool put_in_quotes) {
139 std::string dest;
140
141 if (put_in_quotes)
142 dest.push_back('"');
143
144 for (char c : str) {
145 if (EscapeSpecialCodePoint(c, &dest))
146 continue;
147
148 if (c < 32 || c > 126) {
149 base::StringAppendF(&dest, kU16EscapeFormat,
150 static_cast<unsigned char>(c));
151 } else {
152 dest.push_back(c);
153 }
154 }
155
156 if (put_in_quotes)
157 dest.push_back('"');
158
159 return dest;
160 }
161
162 } // namespace base
163