1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25 #include "curl_setup.h"
26
27 #ifndef CURL_DISABLE_HTTP
28
29 #include "urldata.h" /* it includes http_chunks.h */
30 #include "sendf.h" /* for the client write stuff */
31 #include "dynbuf.h"
32 #include "content_encoding.h"
33 #include "http.h"
34 #include "strtoofft.h"
35 #include "warnless.h"
36
37 /* The last #include files should be: */
38 #include "curl_memory.h"
39 #include "memdebug.h"
40
41 /*
42 * Chunk format (simplified):
43 *
44 * <HEX SIZE>[ chunk extension ] CRLF
45 * <DATA> CRLF
46 *
47 * Highlights from RFC2616 section 3.6 say:
48
49 The chunked encoding modifies the body of a message in order to
50 transfer it as a series of chunks, each with its own size indicator,
51 followed by an OPTIONAL trailer containing entity-header fields. This
52 allows dynamically produced content to be transferred along with the
53 information necessary for the recipient to verify that it has
54 received the full message.
55
56 Chunked-Body = *chunk
57 last-chunk
58 trailer
59 CRLF
60
61 chunk = chunk-size [ chunk-extension ] CRLF
62 chunk-data CRLF
63 chunk-size = 1*HEX
64 last-chunk = 1*("0") [ chunk-extension ] CRLF
65
66 chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
67 chunk-ext-name = token
68 chunk-ext-val = token | quoted-string
69 chunk-data = chunk-size(OCTET)
70 trailer = *(entity-header CRLF)
71
72 The chunk-size field is a string of hex digits indicating the size of
73 the chunk. The chunked encoding is ended by any chunk whose size is
74 zero, followed by the trailer, which is terminated by an empty line.
75
76 */
77
78 #define isxdigit_ascii(x) Curl_isxdigit(x)
79
Curl_httpchunk_init(struct Curl_easy * data)80 void Curl_httpchunk_init(struct Curl_easy *data)
81 {
82 struct connectdata *conn = data->conn;
83 struct Curl_chunker *chunk = &conn->chunk;
84 chunk->hexindex = 0; /* start at 0 */
85 chunk->state = CHUNK_HEX; /* we get hex first! */
86 Curl_dyn_init(&conn->trailer, DYN_H1_TRAILER);
87 }
88
89 /*
90 * chunk_read() returns a OK for normal operations, or a positive return code
91 * for errors. STOP means this sequence of chunks is complete. The 'wrote'
92 * argument is set to tell the caller how many bytes we actually passed to the
93 * client (for byte-counting and whatever).
94 *
95 * The states and the state-machine is further explained in the header file.
96 *
97 * This function always uses ASCII hex values to accommodate non-ASCII hosts.
98 * For example, 0x0d and 0x0a are used instead of '\r' and '\n'.
99 */
Curl_httpchunk_read(struct Curl_easy * data,char * datap,ssize_t datalen,ssize_t * wrote,CURLcode * extrap)100 CHUNKcode Curl_httpchunk_read(struct Curl_easy *data,
101 char *datap,
102 ssize_t datalen,
103 ssize_t *wrote,
104 CURLcode *extrap)
105 {
106 CURLcode result = CURLE_OK;
107 struct connectdata *conn = data->conn;
108 struct Curl_chunker *ch = &conn->chunk;
109 struct SingleRequest *k = &data->req;
110 size_t piece;
111 curl_off_t length = (curl_off_t)datalen;
112
113 *wrote = 0; /* nothing's written yet */
114
115 /* the original data is written to the client, but we go on with the
116 chunk read process, to properly calculate the content length */
117 if(data->set.http_te_skip && !k->ignorebody) {
118 result = Curl_client_write(data, CLIENTWRITE_BODY, datap, datalen);
119 if(result) {
120 *extrap = result;
121 return CHUNKE_PASSTHRU_ERROR;
122 }
123 }
124
125 while(length) {
126 switch(ch->state) {
127 case CHUNK_HEX:
128 if(ISXDIGIT(*datap)) {
129 if(ch->hexindex < CHUNK_MAXNUM_LEN) {
130 ch->hexbuffer[ch->hexindex] = *datap;
131 datap++;
132 length--;
133 ch->hexindex++;
134 }
135 else {
136 return CHUNKE_TOO_LONG_HEX; /* longer hex than we support */
137 }
138 }
139 else {
140 char *endptr;
141 if(0 == ch->hexindex)
142 /* This is illegal data, we received junk where we expected
143 a hexadecimal digit. */
144 return CHUNKE_ILLEGAL_HEX;
145
146 /* length and datap are unmodified */
147 ch->hexbuffer[ch->hexindex] = 0;
148
149 if(curlx_strtoofft(ch->hexbuffer, &endptr, 16, &ch->datasize))
150 return CHUNKE_ILLEGAL_HEX;
151 ch->state = CHUNK_LF; /* now wait for the CRLF */
152 }
153 break;
154
155 case CHUNK_LF:
156 /* waiting for the LF after a chunk size */
157 if(*datap == 0x0a) {
158 /* we're now expecting data to come, unless size was zero! */
159 if(0 == ch->datasize) {
160 ch->state = CHUNK_TRAILER; /* now check for trailers */
161 }
162 else
163 ch->state = CHUNK_DATA;
164 }
165
166 datap++;
167 length--;
168 break;
169
170 case CHUNK_DATA:
171 /* We expect 'datasize' of data. We have 'length' right now, it can be
172 more or less than 'datasize'. Get the smallest piece.
173 */
174 piece = curlx_sotouz((ch->datasize >= length)?length:ch->datasize);
175
176 /* Write the data portion available */
177 if(!data->set.http_te_skip && !k->ignorebody) {
178 result = Curl_client_write(data, CLIENTWRITE_BODY, datap, piece);
179
180 if(result) {
181 *extrap = result;
182 return CHUNKE_PASSTHRU_ERROR;
183 }
184 }
185
186 *wrote += piece;
187 ch->datasize -= piece; /* decrease amount left to expect */
188 datap += piece; /* move read pointer forward */
189 length -= piece; /* decrease space left in this round */
190
191 if(0 == ch->datasize)
192 /* end of data this round, we now expect a trailing CRLF */
193 ch->state = CHUNK_POSTLF;
194 break;
195
196 case CHUNK_POSTLF:
197 if(*datap == 0x0a) {
198 /* The last one before we go back to hex state and start all over. */
199 Curl_httpchunk_init(data); /* sets state back to CHUNK_HEX */
200 }
201 else if(*datap != 0x0d)
202 return CHUNKE_BAD_CHUNK;
203 datap++;
204 length--;
205 break;
206
207 case CHUNK_TRAILER:
208 if((*datap == 0x0d) || (*datap == 0x0a)) {
209 char *tr = Curl_dyn_ptr(&conn->trailer);
210 /* this is the end of a trailer, but if the trailer was zero bytes
211 there was no trailer and we move on */
212
213 if(tr) {
214 size_t trlen;
215 result = Curl_dyn_addn(&conn->trailer, (char *)STRCONST("\x0d\x0a"));
216 if(result)
217 return CHUNKE_OUT_OF_MEMORY;
218
219 tr = Curl_dyn_ptr(&conn->trailer);
220 trlen = Curl_dyn_len(&conn->trailer);
221 if(!data->set.http_te_skip) {
222 result = Curl_client_write(data,
223 CLIENTWRITE_HEADER|CLIENTWRITE_TRAILER,
224 tr, trlen);
225 if(result) {
226 *extrap = result;
227 return CHUNKE_PASSTHRU_ERROR;
228 }
229 }
230 Curl_dyn_reset(&conn->trailer);
231 ch->state = CHUNK_TRAILER_CR;
232 if(*datap == 0x0a)
233 /* already on the LF */
234 break;
235 }
236 else {
237 /* no trailer, we're on the final CRLF pair */
238 ch->state = CHUNK_TRAILER_POSTCR;
239 break; /* don't advance the pointer */
240 }
241 }
242 else {
243 result = Curl_dyn_addn(&conn->trailer, datap, 1);
244 if(result)
245 return CHUNKE_OUT_OF_MEMORY;
246 }
247 datap++;
248 length--;
249 break;
250
251 case CHUNK_TRAILER_CR:
252 if(*datap == 0x0a) {
253 ch->state = CHUNK_TRAILER_POSTCR;
254 datap++;
255 length--;
256 }
257 else
258 return CHUNKE_BAD_CHUNK;
259 break;
260
261 case CHUNK_TRAILER_POSTCR:
262 /* We enter this state when a CR should arrive so we expect to
263 have to first pass a CR before we wait for LF */
264 if((*datap != 0x0d) && (*datap != 0x0a)) {
265 /* not a CR then it must be another header in the trailer */
266 ch->state = CHUNK_TRAILER;
267 break;
268 }
269 if(*datap == 0x0d) {
270 /* skip if CR */
271 datap++;
272 length--;
273 }
274 /* now wait for the final LF */
275 ch->state = CHUNK_STOP;
276 break;
277
278 case CHUNK_STOP:
279 if(*datap == 0x0a) {
280 length--;
281
282 /* Record the length of any data left in the end of the buffer
283 even if there's no more chunks to read */
284 ch->datasize = curlx_sotouz(length);
285
286 return CHUNKE_STOP; /* return stop */
287 }
288 else
289 return CHUNKE_BAD_CHUNK;
290 }
291 }
292 return CHUNKE_OK;
293 }
294
Curl_chunked_strerror(CHUNKcode code)295 const char *Curl_chunked_strerror(CHUNKcode code)
296 {
297 switch(code) {
298 default:
299 return "OK";
300 case CHUNKE_TOO_LONG_HEX:
301 return "Too long hexadecimal number";
302 case CHUNKE_ILLEGAL_HEX:
303 return "Illegal or missing hexadecimal sequence";
304 case CHUNKE_BAD_CHUNK:
305 return "Malformed encoding found";
306 case CHUNKE_PASSTHRU_ERROR:
307 DEBUGASSERT(0); /* never used */
308 return "";
309 case CHUNKE_BAD_ENCODING:
310 return "Bad content-encoding found";
311 case CHUNKE_OUT_OF_MEMORY:
312 return "Out of memory";
313 }
314 }
315
316 #endif /* CURL_DISABLE_HTTP */
317