• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/internal/escaping.h"
16 
17 #include "absl/base/internal/endian.h"
18 #include "absl/base/internal/raw_logging.h"
19 
20 namespace absl {
21 ABSL_NAMESPACE_BEGIN
22 namespace strings_internal {
23 
24 ABSL_CONST_INIT const char kBase64Chars[] =
25     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
26 
CalculateBase64EscapedLenInternal(size_t input_len,bool do_padding)27 size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
28   // Base64 encodes three bytes of input at a time. If the input is not
29   // divisible by three, we pad as appropriate.
30   //
31   // (from https://tools.ietf.org/html/rfc3548)
32   // Special processing is performed if fewer than 24 bits are available
33   // at the end of the data being encoded.  A full encoding quantum is
34   // always completed at the end of a quantity.  When fewer than 24 input
35   // bits are available in an input group, zero bits are added (on the
36   // right) to form an integral number of 6-bit groups.  Padding at the
37   // end of the data is performed using the '=' character.  Since all base
38   // 64 input is an integral number of octets, only the following cases
39   // can arise:
40 
41   // Base64 encodes each three bytes of input into four bytes of output.
42   size_t len = (input_len / 3) * 4;
43 
44   if (input_len % 3 == 0) {
45     // (from https://tools.ietf.org/html/rfc3548)
46     // (1) the final quantum of encoding input is an integral multiple of 24
47     // bits; here, the final unit of encoded output will be an integral
48     // multiple of 4 characters with no "=" padding,
49   } else if (input_len % 3 == 1) {
50     // (from https://tools.ietf.org/html/rfc3548)
51     // (2) the final quantum of encoding input is exactly 8 bits; here, the
52     // final unit of encoded output will be two characters followed by two
53     // "=" padding characters, or
54     len += 2;
55     if (do_padding) {
56       len += 2;
57     }
58   } else {  // (input_len % 3 == 2)
59     // (from https://tools.ietf.org/html/rfc3548)
60     // (3) the final quantum of encoding input is exactly 16 bits; here, the
61     // final unit of encoded output will be three characters followed by one
62     // "=" padding character.
63     len += 3;
64     if (do_padding) {
65       len += 1;
66     }
67   }
68 
69   assert(len >= input_len);  // make sure we didn't overflow
70   return len;
71 }
72 
Base64EscapeInternal(const unsigned char * src,size_t szsrc,char * dest,size_t szdest,const char * base64,bool do_padding)73 size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
74                             size_t szdest, const char* base64,
75                             bool do_padding) {
76   static const char kPad64 = '=';
77 
78   if (szsrc * 4 > szdest * 3) return 0;
79 
80   char* cur_dest = dest;
81   const unsigned char* cur_src = src;
82 
83   char* const limit_dest = dest + szdest;
84   const unsigned char* const limit_src = src + szsrc;
85 
86   // Three bytes of data encodes to four characters of cyphertext.
87   // So we can pump through three-byte chunks atomically.
88   if (szsrc >= 3) {                    // "limit_src - 3" is UB if szsrc < 3.
89     while (cur_src < limit_src - 3) {  // While we have >= 32 bits.
90       uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
91 
92       cur_dest[0] = base64[in >> 18];
93       in &= 0x3FFFF;
94       cur_dest[1] = base64[in >> 12];
95       in &= 0xFFF;
96       cur_dest[2] = base64[in >> 6];
97       in &= 0x3F;
98       cur_dest[3] = base64[in];
99 
100       cur_dest += 4;
101       cur_src += 3;
102     }
103   }
104   // To save time, we didn't update szdest or szsrc in the loop.  So do it now.
105   szdest = static_cast<size_t>(limit_dest - cur_dest);
106   szsrc = static_cast<size_t>(limit_src - cur_src);
107 
108   /* now deal with the tail (<=3 bytes) */
109   switch (szsrc) {
110     case 0:
111       // Nothing left; nothing more to do.
112       break;
113     case 1: {
114       // One byte left: this encodes to two characters, and (optionally)
115       // two pad characters to round out the four-character cypherblock.
116       if (szdest < 2) return 0;
117       uint32_t in = cur_src[0];
118       cur_dest[0] = base64[in >> 2];
119       in &= 0x3;
120       cur_dest[1] = base64[in << 4];
121       cur_dest += 2;
122       szdest -= 2;
123       if (do_padding) {
124         if (szdest < 2) return 0;
125         cur_dest[0] = kPad64;
126         cur_dest[1] = kPad64;
127         cur_dest += 2;
128         szdest -= 2;
129       }
130       break;
131     }
132     case 2: {
133       // Two bytes left: this encodes to three characters, and (optionally)
134       // one pad character to round out the four-character cypherblock.
135       if (szdest < 3) return 0;
136       uint32_t in = absl::big_endian::Load16(cur_src);
137       cur_dest[0] = base64[in >> 10];
138       in &= 0x3FF;
139       cur_dest[1] = base64[in >> 4];
140       in &= 0x00F;
141       cur_dest[2] = base64[in << 2];
142       cur_dest += 3;
143       szdest -= 3;
144       if (do_padding) {
145         if (szdest < 1) return 0;
146         cur_dest[0] = kPad64;
147         cur_dest += 1;
148         szdest -= 1;
149       }
150       break;
151     }
152     case 3: {
153       // Three bytes left: same as in the big loop above.  We can't do this in
154       // the loop because the loop above always reads 4 bytes, and the fourth
155       // byte is past the end of the input.
156       if (szdest < 4) return 0;
157       uint32_t in =
158           (uint32_t{cur_src[0]} << 16) + absl::big_endian::Load16(cur_src + 1);
159       cur_dest[0] = base64[in >> 18];
160       in &= 0x3FFFF;
161       cur_dest[1] = base64[in >> 12];
162       in &= 0xFFF;
163       cur_dest[2] = base64[in >> 6];
164       in &= 0x3F;
165       cur_dest[3] = base64[in];
166       cur_dest += 4;
167       szdest -= 4;
168       break;
169     }
170     default:
171       // Should not be reached: blocks of 4 bytes are handled
172       // in the while loop before this switch statement.
173       ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
174       break;
175   }
176   return static_cast<size_t>(cur_dest - dest);
177 }
178 
179 }  // namespace strings_internal
180 ABSL_NAMESPACE_END
181 }  // namespace absl
182