1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/internal/escaping.h"
16
17 #include "absl/base/internal/endian.h"
18 #include "absl/base/internal/raw_logging.h"
19
20 namespace absl {
21 ABSL_NAMESPACE_BEGIN
22 namespace strings_internal {
23
24 // The two strings below provide maps from normal 6-bit characters to their
25 // base64-escaped equivalent.
26 // For the inverse case, see kUn(WebSafe)Base64 in the external
27 // escaping.cc.
28 ABSL_CONST_INIT const char kBase64Chars[] =
29 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
30
31 ABSL_CONST_INIT const char kWebSafeBase64Chars[] =
32 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
33
34
CalculateBase64EscapedLenInternal(size_t input_len,bool do_padding)35 size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
36 // Base64 encodes three bytes of input at a time. If the input is not
37 // divisible by three, we pad as appropriate.
38 //
39 // Base64 encodes each three bytes of input into four bytes of output.
40 size_t len = (input_len / 3) * 4;
41
42 // Since all base 64 input is an integral number of octets, only the following
43 // cases can arise:
44 if (input_len % 3 == 0) {
45 // (from https://tools.ietf.org/html/rfc3548)
46 // (1) the final quantum of encoding input is an integral multiple of 24
47 // bits; here, the final unit of encoded output will be an integral
48 // multiple of 4 characters with no "=" padding,
49 } else if (input_len % 3 == 1) {
50 // (from https://tools.ietf.org/html/rfc3548)
51 // (2) the final quantum of encoding input is exactly 8 bits; here, the
52 // final unit of encoded output will be two characters followed by two
53 // "=" padding characters, or
54 len += 2;
55 if (do_padding) {
56 len += 2;
57 }
58 } else { // (input_len % 3 == 2)
59 // (from https://tools.ietf.org/html/rfc3548)
60 // (3) the final quantum of encoding input is exactly 16 bits; here, the
61 // final unit of encoded output will be three characters followed by one
62 // "=" padding character.
63 len += 3;
64 if (do_padding) {
65 len += 1;
66 }
67 }
68
69 assert(len >= input_len); // make sure we didn't overflow
70 return len;
71 }
72
73 // ----------------------------------------------------------------------
74 // Take the input in groups of 4 characters and turn each
75 // character into a code 0 to 63 thus:
76 // A-Z map to 0 to 25
77 // a-z map to 26 to 51
78 // 0-9 map to 52 to 61
79 // +(- for WebSafe) maps to 62
80 // /(_ for WebSafe) maps to 63
81 // There will be four numbers, all less than 64 which can be represented
82 // by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
83 // Arrange the 6 digit binary numbers into three bytes as such:
84 // aaaaaabb bbbbcccc ccdddddd
85 // Equals signs (one or two) are used at the end of the encoded block to
86 // indicate that the text was not an integer multiple of three bytes long.
87 // ----------------------------------------------------------------------
Base64EscapeInternal(const unsigned char * src,size_t szsrc,char * dest,size_t szdest,const char * base64,bool do_padding)88 size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
89 size_t szdest, const char* base64,
90 bool do_padding) {
91 static const char kPad64 = '=';
92
93 if (szsrc * 4 > szdest * 3) return 0;
94
95 char* cur_dest = dest;
96 const unsigned char* cur_src = src;
97
98 char* const limit_dest = dest + szdest;
99 const unsigned char* const limit_src = src + szsrc;
100
101 // (from https://tools.ietf.org/html/rfc3548)
102 // Special processing is performed if fewer than 24 bits are available
103 // at the end of the data being encoded. A full encoding quantum is
104 // always completed at the end of a quantity. When fewer than 24 input
105 // bits are available in an input group, zero bits are added (on the
106 // right) to form an integral number of 6-bit groups.
107 //
108 // If do_padding is true, padding at the end of the data is performed. This
109 // output padding uses the '=' character.
110
111 // Three bytes of data encodes to four characters of cyphertext.
112 // So we can pump through three-byte chunks atomically.
113 if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3.
114 while (cur_src < limit_src - 3) { // While we have >= 32 bits.
115 uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
116
117 cur_dest[0] = base64[in >> 18];
118 in &= 0x3FFFF;
119 cur_dest[1] = base64[in >> 12];
120 in &= 0xFFF;
121 cur_dest[2] = base64[in >> 6];
122 in &= 0x3F;
123 cur_dest[3] = base64[in];
124
125 cur_dest += 4;
126 cur_src += 3;
127 }
128 }
129 // To save time, we didn't update szdest or szsrc in the loop. So do it now.
130 szdest = static_cast<size_t>(limit_dest - cur_dest);
131 szsrc = static_cast<size_t>(limit_src - cur_src);
132
133 /* now deal with the tail (<=3 bytes) */
134 switch (szsrc) {
135 case 0:
136 // Nothing left; nothing more to do.
137 break;
138 case 1: {
139 // One byte left: this encodes to two characters, and (optionally)
140 // two pad characters to round out the four-character cypherblock.
141 if (szdest < 2) return 0;
142 uint32_t in = cur_src[0];
143 cur_dest[0] = base64[in >> 2];
144 in &= 0x3;
145 cur_dest[1] = base64[in << 4];
146 cur_dest += 2;
147 szdest -= 2;
148 if (do_padding) {
149 if (szdest < 2) return 0;
150 cur_dest[0] = kPad64;
151 cur_dest[1] = kPad64;
152 cur_dest += 2;
153 szdest -= 2;
154 }
155 break;
156 }
157 case 2: {
158 // Two bytes left: this encodes to three characters, and (optionally)
159 // one pad character to round out the four-character cypherblock.
160 if (szdest < 3) return 0;
161 uint32_t in = absl::big_endian::Load16(cur_src);
162 cur_dest[0] = base64[in >> 10];
163 in &= 0x3FF;
164 cur_dest[1] = base64[in >> 4];
165 in &= 0x00F;
166 cur_dest[2] = base64[in << 2];
167 cur_dest += 3;
168 szdest -= 3;
169 if (do_padding) {
170 if (szdest < 1) return 0;
171 cur_dest[0] = kPad64;
172 cur_dest += 1;
173 szdest -= 1;
174 }
175 break;
176 }
177 case 3: {
178 // Three bytes left: same as in the big loop above. We can't do this in
179 // the loop because the loop above always reads 4 bytes, and the fourth
180 // byte is past the end of the input.
181 if (szdest < 4) return 0;
182 uint32_t in =
183 (uint32_t{cur_src[0]} << 16) + absl::big_endian::Load16(cur_src + 1);
184 cur_dest[0] = base64[in >> 18];
185 in &= 0x3FFFF;
186 cur_dest[1] = base64[in >> 12];
187 in &= 0xFFF;
188 cur_dest[2] = base64[in >> 6];
189 in &= 0x3F;
190 cur_dest[3] = base64[in];
191 cur_dest += 4;
192 szdest -= 4;
193 break;
194 }
195 default:
196 // Should not be reached: blocks of 4 bytes are handled
197 // in the while loop before this switch statement.
198 ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
199 break;
200 }
201 return static_cast<size_t>(cur_dest - dest);
202 }
203
204 } // namespace strings_internal
205 ABSL_NAMESPACE_END
206 } // namespace absl
207