1 /* gbase64.c - Base64 encoding/decoding
2 *
3 * Copyright (C) 2006 Alexander Larsson <alexl@redhat.com>
4 * Copyright (C) 2000-2003 Ximian Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this library; if not, see <http://www.gnu.org/licenses/>.
18 *
19 * This is based on code in camel, written by:
20 * Michael Zucchi <notzed@ximian.com>
21 * Jeffrey Stedfast <fejj@ximian.com>
22 */
23
24 #include "config.h"
25
26 #include <string.h>
27
28 #include "gbase64.h"
29 #include "gtestutils.h"
30 #include "glibintl.h"
31
32
33 /**
34 * SECTION:base64
35 * @title: Base64 Encoding
36 * @short_description: encodes and decodes data in Base64 format
37 *
38 * Base64 is an encoding that allows a sequence of arbitrary bytes to be
39 * encoded as a sequence of printable ASCII characters. For the definition
40 * of Base64, see
41 * [RFC 1421](http://www.ietf.org/rfc/rfc1421.txt)
42 * or
43 * [RFC 2045](http://www.ietf.org/rfc/rfc2045.txt).
44 * Base64 is most commonly used as a MIME transfer encoding
45 * for email.
46 *
47 * GLib supports incremental encoding using g_base64_encode_step() and
48 * g_base64_encode_close(). Incremental decoding can be done with
49 * g_base64_decode_step(). To encode or decode data in one go, use
50 * g_base64_encode() or g_base64_decode(). To avoid memory allocation when
51 * decoding, you can use g_base64_decode_inplace().
52 *
53 * Support for Base64 encoding has been added in GLib 2.12.
54 */
55
56 static const char base64_alphabet[] =
57 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
58
59 /**
60 * g_base64_encode_step:
61 * @in: (array length=len) (element-type guint8): the binary data to encode
62 * @len: the length of @in
63 * @break_lines: whether to break long lines
64 * @out: (out) (array) (element-type guint8): pointer to destination buffer
65 * @state: (inout): Saved state between steps, initialize to 0
66 * @save: (inout): Saved state between steps, initialize to 0
67 *
68 * Incrementally encode a sequence of binary data into its Base-64 stringified
69 * representation. By calling this function multiple times you can convert
70 * data in chunks to avoid having to have the full encoded data in memory.
71 *
72 * When all of the data has been converted you must call
73 * g_base64_encode_close() to flush the saved state.
74 *
75 * The output buffer must be large enough to fit all the data that will
76 * be written to it. Due to the way base64 encodes you will need
77 * at least: (@len / 3 + 1) * 4 + 4 bytes (+ 4 may be needed in case of
78 * non-zero state). If you enable line-breaking you will need at least:
79 * ((@len / 3 + 1) * 4 + 4) / 76 + 1 bytes of extra space.
80 *
81 * @break_lines is typically used when putting base64-encoded data in emails.
82 * It breaks the lines at 76 columns instead of putting all of the text on
83 * the same line. This avoids problems with long lines in the email system.
84 * Note however that it breaks the lines with `LF` characters, not
85 * `CR LF` sequences, so the result cannot be passed directly to SMTP
86 * or certain other protocols.
87 *
88 * Returns: The number of bytes of output that was written
89 *
90 * Since: 2.12
91 */
92 gsize
g_base64_encode_step(const guchar * in,gsize len,gboolean break_lines,gchar * out,gint * state,gint * save)93 g_base64_encode_step (const guchar *in,
94 gsize len,
95 gboolean break_lines,
96 gchar *out,
97 gint *state,
98 gint *save)
99 {
100 char *outptr;
101 const guchar *inptr;
102
103 g_return_val_if_fail (in != NULL || len == 0, 0);
104 g_return_val_if_fail (out != NULL, 0);
105 g_return_val_if_fail (state != NULL, 0);
106 g_return_val_if_fail (save != NULL, 0);
107
108 if (len == 0)
109 return 0;
110
111 inptr = in;
112 outptr = out;
113
114 if (len + ((char *) save) [0] > 2)
115 {
116 const guchar *inend = in+len-2;
117 int c1, c2, c3;
118 int already;
119
120 already = *state;
121
122 switch (((char *) save) [0])
123 {
124 case 1:
125 c1 = ((unsigned char *) save) [1];
126 goto skip1;
127 case 2:
128 c1 = ((unsigned char *) save) [1];
129 c2 = ((unsigned char *) save) [2];
130 goto skip2;
131 }
132
133 /*
134 * yes, we jump into the loop, no i'm not going to change it,
135 * it's beautiful!
136 */
137 while (inptr < inend)
138 {
139 c1 = *inptr++;
140 skip1:
141 c2 = *inptr++;
142 skip2:
143 c3 = *inptr++;
144 *outptr++ = base64_alphabet [ c1 >> 2 ];
145 *outptr++ = base64_alphabet [ c2 >> 4 |
146 ((c1&0x3) << 4) ];
147 *outptr++ = base64_alphabet [ ((c2 &0x0f) << 2) |
148 (c3 >> 6) ];
149 *outptr++ = base64_alphabet [ c3 & 0x3f ];
150 /* this is a bit ugly ... */
151 if (break_lines && (++already) >= 19)
152 {
153 *outptr++ = '\n';
154 already = 0;
155 }
156 }
157
158 ((char *)save)[0] = 0;
159 len = 2 - (inptr - inend);
160 *state = already;
161 }
162
163 g_assert (len == 0 || len == 1 || len == 2);
164
165 {
166 char *saveout;
167
168 /* points to the slot for the next char to save */
169 saveout = & (((char *)save)[1]) + ((char *)save)[0];
170
171 /* len can only be 0 1 or 2 */
172 switch(len)
173 {
174 case 2:
175 *saveout++ = *inptr++;
176 G_GNUC_FALLTHROUGH;
177 case 1:
178 *saveout++ = *inptr++;
179 }
180 ((char *)save)[0] += len;
181 }
182
183 return outptr - out;
184 }
185
186 /**
187 * g_base64_encode_close:
188 * @break_lines: whether to break long lines
189 * @out: (out) (array) (element-type guint8): pointer to destination buffer
190 * @state: (inout): Saved state from g_base64_encode_step()
191 * @save: (inout): Saved state from g_base64_encode_step()
192 *
193 * Flush the status from a sequence of calls to g_base64_encode_step().
194 *
195 * The output buffer must be large enough to fit all the data that will
196 * be written to it. It will need up to 4 bytes, or up to 5 bytes if
197 * line-breaking is enabled.
198 *
199 * The @out array will not be automatically nul-terminated.
200 *
201 * Returns: The number of bytes of output that was written
202 *
203 * Since: 2.12
204 */
205 gsize
g_base64_encode_close(gboolean break_lines,gchar * out,gint * state,gint * save)206 g_base64_encode_close (gboolean break_lines,
207 gchar *out,
208 gint *state,
209 gint *save)
210 {
211 int c1, c2;
212 char *outptr = out;
213
214 g_return_val_if_fail (out != NULL, 0);
215 g_return_val_if_fail (state != NULL, 0);
216 g_return_val_if_fail (save != NULL, 0);
217
218 c1 = ((unsigned char *) save) [1];
219 c2 = ((unsigned char *) save) [2];
220
221 switch (((char *) save) [0])
222 {
223 case 2:
224 outptr [2] = base64_alphabet[ ( (c2 &0x0f) << 2 ) ];
225 g_assert (outptr [2] != 0);
226 goto skip;
227 case 1:
228 outptr[2] = '=';
229 c2 = 0; /* saved state here is not relevant */
230 skip:
231 outptr [0] = base64_alphabet [ c1 >> 2 ];
232 outptr [1] = base64_alphabet [ c2 >> 4 | ( (c1&0x3) << 4 )];
233 outptr [3] = '=';
234 outptr += 4;
235 break;
236 }
237 if (break_lines)
238 *outptr++ = '\n';
239
240 *save = 0;
241 *state = 0;
242
243 return outptr - out;
244 }
245
246 /**
247 * g_base64_encode:
248 * @data: (array length=len) (element-type guint8) (nullable): the binary data to encode
249 * @len: the length of @data
250 *
251 * Encode a sequence of binary data into its Base-64 stringified
252 * representation.
253 *
254 * Returns: (transfer full): a newly allocated, zero-terminated Base-64
255 * encoded string representing @data. The returned string must
256 * be freed with g_free().
257 *
258 * Since: 2.12
259 */
260 gchar *
g_base64_encode(const guchar * data,gsize len)261 g_base64_encode (const guchar *data,
262 gsize len)
263 {
264 gchar *out;
265 gint state = 0, outlen;
266 gint save = 0;
267
268 g_return_val_if_fail (data != NULL || len == 0, NULL);
269
270 /* We can use a smaller limit here, since we know the saved state is 0,
271 +1 is needed for trailing \0, also check for unlikely integer overflow */
272 g_return_val_if_fail (len < ((G_MAXSIZE - 1) / 4 - 1) * 3, NULL);
273
274 out = g_malloc ((len / 3 + 1) * 4 + 1);
275
276 outlen = g_base64_encode_step (data, len, FALSE, out, &state, &save);
277 outlen += g_base64_encode_close (FALSE, out + outlen, &state, &save);
278 out[outlen] = '\0';
279
280 return (gchar *) out;
281 }
282
283 static const unsigned char mime_base64_rank[256] = {
284 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
285 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
286 255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
287 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255, 0,255,255,
288 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
289 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
290 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
291 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
292 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
293 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
294 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
295 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
296 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
297 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
298 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
299 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
300 };
301
302 /**
303 * g_base64_decode_step: (skip)
304 * @in: (array length=len) (element-type guint8): binary input data
305 * @len: max length of @in data to decode
306 * @out: (out caller-allocates) (array) (element-type guint8): output buffer
307 * @state: (inout): Saved state between steps, initialize to 0
308 * @save: (inout): Saved state between steps, initialize to 0
309 *
310 * Incrementally decode a sequence of binary data from its Base-64 stringified
311 * representation. By calling this function multiple times you can convert
312 * data in chunks to avoid having to have the full encoded data in memory.
313 *
314 * The output buffer must be large enough to fit all the data that will
315 * be written to it. Since base64 encodes 3 bytes in 4 chars you need
316 * at least: (@len / 4) * 3 + 3 bytes (+ 3 may be needed in case of non-zero
317 * state).
318 *
319 * Returns: The number of bytes of output that was written
320 *
321 * Since: 2.12
322 **/
323 gsize
g_base64_decode_step(const gchar * in,gsize len,guchar * out,gint * state,guint * save)324 g_base64_decode_step (const gchar *in,
325 gsize len,
326 guchar *out,
327 gint *state,
328 guint *save)
329 {
330 const guchar *inptr;
331 guchar *outptr;
332 const guchar *inend;
333 guchar c, rank;
334 guchar last[2];
335 unsigned int v;
336 int i;
337
338 g_return_val_if_fail (in != NULL || len == 0, 0);
339 g_return_val_if_fail (out != NULL, 0);
340 g_return_val_if_fail (state != NULL, 0);
341 g_return_val_if_fail (save != NULL, 0);
342
343 if (len == 0)
344 return 0;
345
346 inend = (const guchar *)in+len;
347 outptr = out;
348
349 /* convert 4 base64 bytes to 3 normal bytes */
350 v=*save;
351 i=*state;
352
353 last[0] = last[1] = 0;
354
355 /* we use the sign in the state to determine if we got a padding character
356 in the previous sequence */
357 if (i < 0)
358 {
359 i = -i;
360 last[0] = '=';
361 }
362
363 inptr = (const guchar *)in;
364 while (inptr < inend)
365 {
366 c = *inptr++;
367 rank = mime_base64_rank [c];
368 if (rank != 0xff)
369 {
370 last[1] = last[0];
371 last[0] = c;
372 v = (v<<6) | rank;
373 i++;
374 if (i==4)
375 {
376 *outptr++ = v>>16;
377 if (last[1] != '=')
378 *outptr++ = v>>8;
379 if (last[0] != '=')
380 *outptr++ = v;
381 i=0;
382 }
383 }
384 }
385
386 *save = v;
387 *state = last[0] == '=' ? -i : i;
388
389 return outptr - out;
390 }
391
392 /**
393 * g_base64_decode:
394 * @text: (not nullable): zero-terminated string with base64 text to decode
395 * @out_len: (out): The length of the decoded data is written here
396 *
397 * Decode a sequence of Base-64 encoded text into binary data. Note
398 * that the returned binary data is not necessarily zero-terminated,
399 * so it should not be used as a character string.
400 *
401 * Returns: (transfer full) (array length=out_len) (element-type guint8):
402 * newly allocated buffer containing the binary data
403 * that @text represents. The returned buffer must
404 * be freed with g_free().
405 *
406 * Since: 2.12
407 */
408 guchar *
g_base64_decode(const gchar * text,gsize * out_len)409 g_base64_decode (const gchar *text,
410 gsize *out_len)
411 {
412 guchar *ret;
413 gsize input_length;
414 gint state = 0;
415 guint save = 0;
416
417 g_return_val_if_fail (text != NULL, NULL);
418 g_return_val_if_fail (out_len != NULL, NULL);
419
420 input_length = strlen (text);
421
422 /* We can use a smaller limit here, since we know the saved state is 0,
423 +1 used to avoid calling g_malloc0(0), and hence returning NULL */
424 ret = g_malloc0 ((input_length / 4) * 3 + 1);
425
426 *out_len = g_base64_decode_step (text, input_length, ret, &state, &save);
427
428 return ret;
429 }
430
431 /**
432 * g_base64_decode_inplace:
433 * @text: (inout) (array length=out_len) (element-type guint8): zero-terminated
434 * string with base64 text to decode
435 * @out_len: (inout): The length of the decoded data is written here
436 *
437 * Decode a sequence of Base-64 encoded text into binary data
438 * by overwriting the input data.
439 *
440 * Returns: (transfer none): The binary data that @text responds. This pointer
441 * is the same as the input @text.
442 *
443 * Since: 2.20
444 */
445 guchar *
g_base64_decode_inplace(gchar * text,gsize * out_len)446 g_base64_decode_inplace (gchar *text,
447 gsize *out_len)
448 {
449 gint input_length, state = 0;
450 guint save = 0;
451
452 g_return_val_if_fail (text != NULL, NULL);
453 g_return_val_if_fail (out_len != NULL, NULL);
454
455 input_length = strlen (text);
456
457 g_return_val_if_fail (input_length > 1, NULL);
458
459 *out_len = g_base64_decode_step (text, input_length, (guchar *) text, &state, &save);
460
461 return (guchar *) text;
462 }
463