1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25 #include "curl_setup.h"
26
27 #ifndef CURL_DISABLE_HTTP
28
29 #include "urldata.h" /* it includes http_chunks.h */
30 #include "sendf.h" /* for the client write stuff */
31 #include "dynbuf.h"
32 #include "content_encoding.h"
33 #include "http.h"
34 #include "strtoofft.h"
35 #include "warnless.h"
36
37 /* The last #include files should be: */
38 #include "curl_memory.h"
39 #include "memdebug.h"
40
41 /*
42 * Chunk format (simplified):
43 *
44 * <HEX SIZE>[ chunk extension ] CRLF
45 * <DATA> CRLF
46 *
47 * Highlights from RFC2616 section 3.6 say:
48
49 The chunked encoding modifies the body of a message in order to
50 transfer it as a series of chunks, each with its own size indicator,
51 followed by an OPTIONAL trailer containing entity-header fields. This
52 allows dynamically produced content to be transferred along with the
53 information necessary for the recipient to verify that it has
54 received the full message.
55
56 Chunked-Body = *chunk
57 last-chunk
58 trailer
59 CRLF
60
61 chunk = chunk-size [ chunk-extension ] CRLF
62 chunk-data CRLF
63 chunk-size = 1*HEX
64 last-chunk = 1*("0") [ chunk-extension ] CRLF
65
66 chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
67 chunk-ext-name = token
68 chunk-ext-val = token | quoted-string
69 chunk-data = chunk-size(OCTET)
70 trailer = *(entity-header CRLF)
71
72 The chunk-size field is a string of hex digits indicating the size of
73 the chunk. The chunked encoding is ended by any chunk whose size is
74 zero, followed by the trailer, which is terminated by an empty line.
75
76 */
77
Curl_httpchunk_init(struct Curl_easy * data,struct Curl_chunker * ch,bool ignore_body)78 void Curl_httpchunk_init(struct Curl_easy *data, struct Curl_chunker *ch,
79 bool ignore_body)
80 {
81 (void)data;
82 ch->hexindex = 0; /* start at 0 */
83 ch->state = CHUNK_HEX; /* we get hex first! */
84 ch->last_code = CHUNKE_OK;
85 Curl_dyn_init(&ch->trailer, DYN_H1_TRAILER);
86 ch->ignore_body = ignore_body;
87 }
88
Curl_httpchunk_reset(struct Curl_easy * data,struct Curl_chunker * ch,bool ignore_body)89 void Curl_httpchunk_reset(struct Curl_easy *data, struct Curl_chunker *ch,
90 bool ignore_body)
91 {
92 (void)data;
93 ch->hexindex = 0; /* start at 0 */
94 ch->state = CHUNK_HEX; /* we get hex first! */
95 ch->last_code = CHUNKE_OK;
96 Curl_dyn_reset(&ch->trailer);
97 ch->ignore_body = ignore_body;
98 }
99
Curl_httpchunk_free(struct Curl_easy * data,struct Curl_chunker * ch)100 void Curl_httpchunk_free(struct Curl_easy *data, struct Curl_chunker *ch)
101 {
102 (void)data;
103 Curl_dyn_free(&ch->trailer);
104 }
105
Curl_httpchunk_is_done(struct Curl_easy * data,struct Curl_chunker * ch)106 bool Curl_httpchunk_is_done(struct Curl_easy *data, struct Curl_chunker *ch)
107 {
108 (void)data;
109 return ch->state == CHUNK_DONE;
110 }
111
httpchunk_readwrite(struct Curl_easy * data,struct Curl_chunker * ch,struct Curl_cwriter * cw_next,const char * buf,size_t blen,size_t * pconsumed)112 static CURLcode httpchunk_readwrite(struct Curl_easy *data,
113 struct Curl_chunker *ch,
114 struct Curl_cwriter *cw_next,
115 const char *buf, size_t blen,
116 size_t *pconsumed)
117 {
118 CURLcode result = CURLE_OK;
119 size_t piece;
120
121 *pconsumed = 0; /* nothing's written yet */
122 /* first check terminal states that will not progress anywhere */
123 if(ch->state == CHUNK_DONE)
124 return CURLE_OK;
125 if(ch->state == CHUNK_FAILED)
126 return CURLE_RECV_ERROR;
127
128 /* the original data is written to the client, but we go on with the
129 chunk read process, to properly calculate the content length */
130 if(data->set.http_te_skip && !ch->ignore_body) {
131 if(cw_next)
132 result = Curl_cwriter_write(data, cw_next, CLIENTWRITE_BODY, buf, blen);
133 else
134 result = Curl_client_write(data, CLIENTWRITE_BODY, (char *)buf, blen);
135 if(result) {
136 ch->state = CHUNK_FAILED;
137 ch->last_code = CHUNKE_PASSTHRU_ERROR;
138 return result;
139 }
140 }
141
142 while(blen) {
143 switch(ch->state) {
144 case CHUNK_HEX:
145 if(ISXDIGIT(*buf)) {
146 if(ch->hexindex >= CHUNK_MAXNUM_LEN) {
147 failf(data, "chunk hex-length longer than %d", CHUNK_MAXNUM_LEN);
148 ch->state = CHUNK_FAILED;
149 ch->last_code = CHUNKE_TOO_LONG_HEX; /* longer than we support */
150 return CURLE_RECV_ERROR;
151 }
152 ch->hexbuffer[ch->hexindex++] = *buf;
153 buf++;
154 blen--;
155 }
156 else {
157 char *endptr;
158 if(0 == ch->hexindex) {
159 /* This is illegal data, we received junk where we expected
160 a hexadecimal digit. */
161 failf(data, "chunk hex-length char not a hex digit: 0x%x", *buf);
162 ch->state = CHUNK_FAILED;
163 ch->last_code = CHUNKE_ILLEGAL_HEX;
164 return CURLE_RECV_ERROR;
165 }
166
167 /* blen and buf are unmodified */
168 ch->hexbuffer[ch->hexindex] = 0;
169 if(curlx_strtoofft(ch->hexbuffer, &endptr, 16, &ch->datasize)) {
170 failf(data, "chunk hex-length not valid: '%s'", ch->hexbuffer);
171 ch->state = CHUNK_FAILED;
172 ch->last_code = CHUNKE_ILLEGAL_HEX;
173 return CURLE_RECV_ERROR;
174 }
175 ch->state = CHUNK_LF; /* now wait for the CRLF */
176 }
177 break;
178
179 case CHUNK_LF:
180 /* waiting for the LF after a chunk size */
181 if(*buf == 0x0a) {
182 /* we're now expecting data to come, unless size was zero! */
183 if(0 == ch->datasize) {
184 ch->state = CHUNK_TRAILER; /* now check for trailers */
185 }
186 else
187 ch->state = CHUNK_DATA;
188 }
189
190 buf++;
191 blen--;
192 break;
193
194 case CHUNK_DATA:
195 /* We expect 'datasize' of data. We have 'blen' right now, it can be
196 more or less than 'datasize'. Get the smallest piece.
197 */
198 piece = blen;
199 if(ch->datasize < (curl_off_t)blen)
200 piece = curlx_sotouz(ch->datasize);
201
202 /* Write the data portion available */
203 if(!data->set.http_te_skip && !ch->ignore_body) {
204 if(cw_next)
205 result = Curl_cwriter_write(data, cw_next, CLIENTWRITE_BODY,
206 buf, piece);
207 else
208 result = Curl_client_write(data, CLIENTWRITE_BODY,
209 (char *)buf, piece);
210 if(result) {
211 ch->state = CHUNK_FAILED;
212 ch->last_code = CHUNKE_PASSTHRU_ERROR;
213 return result;
214 }
215 }
216
217 *pconsumed += piece;
218 ch->datasize -= piece; /* decrease amount left to expect */
219 buf += piece; /* move read pointer forward */
220 blen -= piece; /* decrease space left in this round */
221
222 if(0 == ch->datasize)
223 /* end of data this round, we now expect a trailing CRLF */
224 ch->state = CHUNK_POSTLF;
225 break;
226
227 case CHUNK_POSTLF:
228 if(*buf == 0x0a) {
229 /* The last one before we go back to hex state and start all over. */
230 Curl_httpchunk_reset(data, ch, ch->ignore_body);
231 }
232 else if(*buf != 0x0d) {
233 ch->state = CHUNK_FAILED;
234 ch->last_code = CHUNKE_BAD_CHUNK;
235 return CURLE_RECV_ERROR;
236 }
237 buf++;
238 blen--;
239 break;
240
241 case CHUNK_TRAILER:
242 if((*buf == 0x0d) || (*buf == 0x0a)) {
243 char *tr = Curl_dyn_ptr(&ch->trailer);
244 /* this is the end of a trailer, but if the trailer was zero bytes
245 there was no trailer and we move on */
246
247 if(tr) {
248 size_t trlen;
249 result = Curl_dyn_addn(&ch->trailer, (char *)STRCONST("\x0d\x0a"));
250 if(result) {
251 ch->state = CHUNK_FAILED;
252 ch->last_code = CHUNKE_OUT_OF_MEMORY;
253 return result;
254 }
255 tr = Curl_dyn_ptr(&ch->trailer);
256 trlen = Curl_dyn_len(&ch->trailer);
257 if(!data->set.http_te_skip) {
258 if(cw_next)
259 result = Curl_cwriter_write(data, cw_next,
260 CLIENTWRITE_HEADER|
261 CLIENTWRITE_TRAILER,
262 tr, trlen);
263 else
264 result = Curl_client_write(data,
265 CLIENTWRITE_HEADER|
266 CLIENTWRITE_TRAILER,
267 tr, trlen);
268 if(result) {
269 ch->state = CHUNK_FAILED;
270 ch->last_code = CHUNKE_PASSTHRU_ERROR;
271 return result;
272 }
273 }
274 Curl_dyn_reset(&ch->trailer);
275 ch->state = CHUNK_TRAILER_CR;
276 if(*buf == 0x0a)
277 /* already on the LF */
278 break;
279 }
280 else {
281 /* no trailer, we're on the final CRLF pair */
282 ch->state = CHUNK_TRAILER_POSTCR;
283 break; /* don't advance the pointer */
284 }
285 }
286 else {
287 result = Curl_dyn_addn(&ch->trailer, buf, 1);
288 if(result) {
289 ch->state = CHUNK_FAILED;
290 ch->last_code = CHUNKE_OUT_OF_MEMORY;
291 return result;
292 }
293 }
294 buf++;
295 blen--;
296 break;
297
298 case CHUNK_TRAILER_CR:
299 if(*buf == 0x0a) {
300 ch->state = CHUNK_TRAILER_POSTCR;
301 buf++;
302 blen--;
303 }
304 else {
305 ch->state = CHUNK_FAILED;
306 ch->last_code = CHUNKE_BAD_CHUNK;
307 return CURLE_RECV_ERROR;
308 }
309 break;
310
311 case CHUNK_TRAILER_POSTCR:
312 /* We enter this state when a CR should arrive so we expect to
313 have to first pass a CR before we wait for LF */
314 if((*buf != 0x0d) && (*buf != 0x0a)) {
315 /* not a CR then it must be another header in the trailer */
316 ch->state = CHUNK_TRAILER;
317 break;
318 }
319 if(*buf == 0x0d) {
320 /* skip if CR */
321 buf++;
322 blen--;
323 }
324 /* now wait for the final LF */
325 ch->state = CHUNK_STOP;
326 break;
327
328 case CHUNK_STOP:
329 if(*buf == 0x0a) {
330 blen--;
331 /* Record the length of any data left in the end of the buffer
332 even if there's no more chunks to read */
333 ch->datasize = blen;
334 ch->state = CHUNK_DONE;
335 return CURLE_OK;
336 }
337 else {
338 ch->state = CHUNK_FAILED;
339 ch->last_code = CHUNKE_BAD_CHUNK;
340 return CURLE_RECV_ERROR;
341 }
342 case CHUNK_DONE:
343 return CURLE_OK;
344
345 case CHUNK_FAILED:
346 return CURLE_RECV_ERROR;
347 }
348
349 }
350 return CURLE_OK;
351 }
352
Curl_chunked_strerror(CHUNKcode code)353 static const char *Curl_chunked_strerror(CHUNKcode code)
354 {
355 switch(code) {
356 default:
357 return "OK";
358 case CHUNKE_TOO_LONG_HEX:
359 return "Too long hexadecimal number";
360 case CHUNKE_ILLEGAL_HEX:
361 return "Illegal or missing hexadecimal sequence";
362 case CHUNKE_BAD_CHUNK:
363 return "Malformed encoding found";
364 case CHUNKE_PASSTHRU_ERROR:
365 return "Error writing data to client";
366 case CHUNKE_BAD_ENCODING:
367 return "Bad content-encoding found";
368 case CHUNKE_OUT_OF_MEMORY:
369 return "Out of memory";
370 }
371 }
372
Curl_httpchunk_read(struct Curl_easy * data,struct Curl_chunker * ch,char * buf,size_t blen,size_t * pconsumed)373 CURLcode Curl_httpchunk_read(struct Curl_easy *data,
374 struct Curl_chunker *ch,
375 char *buf, size_t blen,
376 size_t *pconsumed)
377 {
378 return httpchunk_readwrite(data, ch, NULL, buf, blen, pconsumed);
379 }
380
381 struct chunked_writer {
382 struct Curl_cwriter super;
383 struct Curl_chunker ch;
384 };
385
cw_chunked_init(struct Curl_easy * data,struct Curl_cwriter * writer)386 static CURLcode cw_chunked_init(struct Curl_easy *data,
387 struct Curl_cwriter *writer)
388 {
389 struct chunked_writer *ctx = (struct chunked_writer *)writer;
390
391 data->req.chunk = TRUE; /* chunks coming our way. */
392 Curl_httpchunk_init(data, &ctx->ch, FALSE);
393 return CURLE_OK;
394 }
395
cw_chunked_close(struct Curl_easy * data,struct Curl_cwriter * writer)396 static void cw_chunked_close(struct Curl_easy *data,
397 struct Curl_cwriter *writer)
398 {
399 struct chunked_writer *ctx = (struct chunked_writer *)writer;
400 Curl_httpchunk_free(data, &ctx->ch);
401 }
402
cw_chunked_write(struct Curl_easy * data,struct Curl_cwriter * writer,int type,const char * buf,size_t blen)403 static CURLcode cw_chunked_write(struct Curl_easy *data,
404 struct Curl_cwriter *writer, int type,
405 const char *buf, size_t blen)
406 {
407 struct chunked_writer *ctx = (struct chunked_writer *)writer;
408 CURLcode result;
409 size_t consumed;
410
411 if(!(type & CLIENTWRITE_BODY))
412 return Curl_cwriter_write(data, writer->next, type, buf, blen);
413
414 consumed = 0;
415 result = httpchunk_readwrite(data, &ctx->ch, writer->next, buf, blen,
416 &consumed);
417
418 if(result) {
419 if(CHUNKE_PASSTHRU_ERROR == ctx->ch.last_code) {
420 failf(data, "Failed reading the chunked-encoded stream");
421 }
422 else {
423 failf(data, "%s in chunked-encoding",
424 Curl_chunked_strerror(ctx->ch.last_code));
425 }
426 return result;
427 }
428
429 blen -= consumed;
430 if(CHUNK_DONE == ctx->ch.state) {
431 /* chunks read successfully, download is complete */
432 data->req.download_done = TRUE;
433 if(blen) {
434 infof(data, "Leftovers after chunking: %zu bytes", blen);
435 }
436 }
437 else if((type & CLIENTWRITE_EOS) && !data->req.no_body) {
438 failf(data, "transfer closed with outstanding read data remaining");
439 return CURLE_PARTIAL_FILE;
440 }
441
442 return CURLE_OK;
443 }
444
445 /* HTTP chunked Transfer-Encoding decoder */
446 const struct Curl_cwtype Curl_httpchunk_unencoder = {
447 "chunked",
448 NULL,
449 cw_chunked_init,
450 cw_chunked_write,
451 cw_chunked_close,
452 sizeof(struct chunked_writer)
453 };
454
455 #endif /* CURL_DISABLE_HTTP */
456