1 /*
2 * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3 * focuses on size, streamability, reentrancy and portability
4 *
5 * This is clearly not a general purpose HTTP implementation
6 * If you look for one, check:
7 * http://www.w3.org/Library/
8 *
9 * See Copyright for the status of this software.
10 *
11 * daniel@veillard.com
12 */
13
14 #define IN_LIBXML
15 #include "libxml.h"
16
17 #ifdef LIBXML_HTTP_ENABLED
18 #include <string.h>
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <errno.h>
22
23 #ifdef HAVE_UNISTD_H
24 #include <unistd.h>
25 #elif defined (_WIN32)
26 #include <io.h>
27 #endif
28 #ifdef HAVE_SYS_SOCKET_H
29 #include <sys/socket.h>
30 #endif
31 #ifdef HAVE_NETINET_IN_H
32 #include <netinet/in.h>
33 #endif
34 #ifdef HAVE_ARPA_INET_H
35 #include <arpa/inet.h>
36 #endif
37 #ifdef HAVE_NETDB_H
38 #include <netdb.h>
39 #endif
40 #ifdef HAVE_FCNTL_H
41 #include <fcntl.h>
42 #endif
43 #ifdef HAVE_SYS_TIME_H
44 #include <sys/time.h>
45 #endif
46 #ifndef HAVE_POLL_H
47 #ifdef HAVE_SYS_SELECT_H
48 #include <sys/select.h>
49 #endif
50 #else
51 #include <poll.h>
52 #endif
53 #ifdef LIBXML_ZLIB_ENABLED
54 #include <zlib.h>
55 #endif
56
57
58 #ifdef VMS
59 #include <stropts>
60 #define XML_SOCKLEN_T unsigned int
61 #endif
62
63 #if defined(_WIN32)
64 #include <wsockcompat.h>
65 #endif
66
67 #include <libxml/globals.h>
68 #include <libxml/xmlerror.h>
69 #include <libxml/xmlmemory.h>
70 #include <libxml/parser.h> /* for xmlStr(n)casecmp() */
71 #include <libxml/nanohttp.h>
72 #include <libxml/globals.h>
73 #include <libxml/uri.h>
74
75 #include "private/error.h"
76 #include "private/io.h"
77
78 /**
79 * A couple portability macros
80 */
81 #ifndef _WINSOCKAPI_
82 #define closesocket(s) close(s)
83 #define SOCKET int
84 #define INVALID_SOCKET (-1)
85 #endif
86
87 #ifndef XML_SOCKLEN_T
88 #define XML_SOCKLEN_T unsigned int
89 #endif
90
91 #define GETHOSTBYNAME_ARG_CAST (char *)
92 #define SEND_ARG2_CAST (char *)
93
94 #ifdef STANDALONE
95 #define DEBUG_HTTP
96 #define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n)
97 #define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b)
98 #endif
99
100 #define XML_NANO_HTTP_MAX_REDIR 10
101
102 #define XML_NANO_HTTP_CHUNK 4096
103
104 #define XML_NANO_HTTP_CLOSED 0
105 #define XML_NANO_HTTP_WRITE 1
106 #define XML_NANO_HTTP_READ 2
107 #define XML_NANO_HTTP_NONE 4
108
109 typedef struct xmlNanoHTTPCtxt {
110 char *protocol; /* the protocol name */
111 char *hostname; /* the host name */
112 int port; /* the port */
113 char *path; /* the path within the URL */
114 char *query; /* the query string */
115 SOCKET fd; /* the file descriptor for the socket */
116 int state; /* WRITE / READ / CLOSED */
117 char *out; /* buffer sent (zero terminated) */
118 char *outptr; /* index within the buffer sent */
119 char *in; /* the receiving buffer */
120 char *content; /* the start of the content */
121 char *inptr; /* the next byte to read from network */
122 char *inrptr; /* the next byte to give back to the client */
123 int inlen; /* len of the input buffer */
124 int last; /* return code for last operation */
125 int returnValue; /* the protocol return value */
126 int version; /* the protocol version */
127 int ContentLength; /* specified content length from HTTP header */
128 char *contentType; /* the MIME type for the input */
129 char *location; /* the new URL in case of redirect */
130 char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */
131 char *encoding; /* encoding extracted from the contentType */
132 char *mimeType; /* Mime-Type extracted from the contentType */
133 #ifdef LIBXML_ZLIB_ENABLED
134 z_stream *strm; /* Zlib stream object */
135 int usesGzip; /* "Content-Encoding: gzip" was detected */
136 #endif
137 } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
138
139 static int initialized = 0;
140 static char *proxy = NULL; /* the proxy name if any */
141 static int proxyPort; /* the proxy port if any */
142 static unsigned int timeout = 60;/* the select() timeout in seconds */
143
144 static int xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len );
145
146 /**
147 * xmlHTTPErrMemory:
148 * @extra: extra information
149 *
150 * Handle an out of memory condition
151 */
152 static void
xmlHTTPErrMemory(const char * extra)153 xmlHTTPErrMemory(const char *extra)
154 {
155 __xmlSimpleError(XML_FROM_HTTP, XML_ERR_NO_MEMORY, NULL, NULL, extra);
156 }
157
158 /**
159 * A portability function
160 */
socket_errno(void)161 static int socket_errno(void) {
162 #ifdef _WINSOCKAPI_
163 int err = WSAGetLastError();
164 switch(err) {
165 case WSAECONNRESET:
166 return(ECONNRESET);
167 case WSAEINPROGRESS:
168 return(EINPROGRESS);
169 case WSAEINTR:
170 return(EINTR);
171 case WSAESHUTDOWN:
172 return(ESHUTDOWN);
173 case WSAEWOULDBLOCK:
174 return(EWOULDBLOCK);
175 default:
176 return(err);
177 }
178 #else
179 return(errno);
180 #endif
181 }
182
183 /**
184 * xmlNanoHTTPInit:
185 *
186 * Initialize the HTTP protocol layer.
187 * Currently it just checks for proxy information
188 */
189
190 void
xmlNanoHTTPInit(void)191 xmlNanoHTTPInit(void) {
192 const char *env;
193 #ifdef _WINSOCKAPI_
194 WSADATA wsaData;
195 #endif
196
197 if (initialized)
198 return;
199
200 #ifdef _WINSOCKAPI_
201 if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0)
202 return;
203 #endif
204
205 if (proxy == NULL) {
206 proxyPort = 80;
207 env = getenv("no_proxy");
208 if (env && ((env[0] == '*') && (env[1] == 0)))
209 goto done;
210 env = getenv("http_proxy");
211 if (env != NULL) {
212 xmlNanoHTTPScanProxy(env);
213 goto done;
214 }
215 env = getenv("HTTP_PROXY");
216 if (env != NULL) {
217 xmlNanoHTTPScanProxy(env);
218 goto done;
219 }
220 }
221 done:
222 initialized = 1;
223 }
224
225 /**
226 * xmlNanoHTTPCleanup:
227 *
228 * Cleanup the HTTP protocol layer.
229 */
230
231 void
xmlNanoHTTPCleanup(void)232 xmlNanoHTTPCleanup(void) {
233 if (proxy != NULL) {
234 xmlFree(proxy);
235 proxy = NULL;
236 }
237 #ifdef _WINSOCKAPI_
238 if (initialized)
239 WSACleanup();
240 #endif
241 initialized = 0;
242 return;
243 }
244
245 /**
246 * xmlNanoHTTPScanURL:
247 * @ctxt: an HTTP context
248 * @URL: The URL used to initialize the context
249 *
250 * (Re)Initialize an HTTP context by parsing the URL and finding
251 * the protocol host port and path it indicates.
252 */
253
254 static void
xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt,const char * URL)255 xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
256 xmlURIPtr uri;
257 int len;
258
259 /*
260 * Clear any existing data from the context
261 */
262 if (ctxt->protocol != NULL) {
263 xmlFree(ctxt->protocol);
264 ctxt->protocol = NULL;
265 }
266 if (ctxt->hostname != NULL) {
267 xmlFree(ctxt->hostname);
268 ctxt->hostname = NULL;
269 }
270 if (ctxt->path != NULL) {
271 xmlFree(ctxt->path);
272 ctxt->path = NULL;
273 }
274 if (ctxt->query != NULL) {
275 xmlFree(ctxt->query);
276 ctxt->query = NULL;
277 }
278 if (URL == NULL) return;
279
280 uri = xmlParseURIRaw(URL, 1);
281 if (uri == NULL)
282 return;
283
284 if ((uri->scheme == NULL) || (uri->server == NULL)) {
285 xmlFreeURI(uri);
286 return;
287 }
288
289 ctxt->protocol = xmlMemStrdup(uri->scheme);
290 /* special case of IPv6 addresses, the [] need to be removed */
291 if ((uri->server != NULL) && (*uri->server == '[')) {
292 len = strlen(uri->server);
293 if ((len > 2) && (uri->server[len - 1] == ']')) {
294 ctxt->hostname = (char *) xmlCharStrndup(uri->server + 1, len -2);
295 } else
296 ctxt->hostname = xmlMemStrdup(uri->server);
297 } else
298 ctxt->hostname = xmlMemStrdup(uri->server);
299 if (uri->path != NULL)
300 ctxt->path = xmlMemStrdup(uri->path);
301 else
302 ctxt->path = xmlMemStrdup("/");
303 if (uri->query != NULL)
304 ctxt->query = xmlMemStrdup(uri->query);
305 if (uri->port != 0)
306 ctxt->port = uri->port;
307
308 xmlFreeURI(uri);
309 }
310
311 /**
312 * xmlNanoHTTPScanProxy:
313 * @URL: The proxy URL used to initialize the proxy context
314 *
315 * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
316 * the protocol host port it indicates.
317 * Should be like http://myproxy/ or http://myproxy:3128/
318 * A NULL URL cleans up proxy information.
319 */
320
321 void
xmlNanoHTTPScanProxy(const char * URL)322 xmlNanoHTTPScanProxy(const char *URL) {
323 xmlURIPtr uri;
324
325 if (proxy != NULL) {
326 xmlFree(proxy);
327 proxy = NULL;
328 }
329 proxyPort = 0;
330
331 #ifdef DEBUG_HTTP
332 if (URL == NULL)
333 xmlGenericError(xmlGenericErrorContext,
334 "Removing HTTP proxy info\n");
335 else
336 xmlGenericError(xmlGenericErrorContext,
337 "Using HTTP proxy %s\n", URL);
338 #endif
339 if (URL == NULL) return;
340
341 uri = xmlParseURIRaw(URL, 1);
342 if ((uri == NULL) || (uri->scheme == NULL) ||
343 (strcmp(uri->scheme, "http")) || (uri->server == NULL)) {
344 __xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n");
345 if (uri != NULL)
346 xmlFreeURI(uri);
347 return;
348 }
349
350 proxy = xmlMemStrdup(uri->server);
351 if (uri->port != 0)
352 proxyPort = uri->port;
353
354 xmlFreeURI(uri);
355 }
356
357 /**
358 * xmlNanoHTTPNewCtxt:
359 * @URL: The URL used to initialize the context
360 *
361 * Allocate and initialize a new HTTP context.
362 *
363 * Returns an HTTP context or NULL in case of error.
364 */
365
366 static xmlNanoHTTPCtxtPtr
xmlNanoHTTPNewCtxt(const char * URL)367 xmlNanoHTTPNewCtxt(const char *URL) {
368 xmlNanoHTTPCtxtPtr ret;
369
370 ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
371 if (ret == NULL) {
372 xmlHTTPErrMemory("allocating context");
373 return(NULL);
374 }
375
376 memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
377 ret->port = 80;
378 ret->returnValue = 0;
379 ret->fd = INVALID_SOCKET;
380 ret->ContentLength = -1;
381
382 xmlNanoHTTPScanURL(ret, URL);
383
384 return(ret);
385 }
386
387 /**
388 * xmlNanoHTTPFreeCtxt:
389 * @ctxt: an HTTP context
390 *
391 * Frees the context after closing the connection.
392 */
393
394 static void
xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt)395 xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
396 if (ctxt == NULL) return;
397 if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
398 if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
399 if (ctxt->path != NULL) xmlFree(ctxt->path);
400 if (ctxt->query != NULL) xmlFree(ctxt->query);
401 if (ctxt->out != NULL) xmlFree(ctxt->out);
402 if (ctxt->in != NULL) xmlFree(ctxt->in);
403 if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
404 if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
405 if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
406 if (ctxt->location != NULL) xmlFree(ctxt->location);
407 if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
408 #ifdef LIBXML_ZLIB_ENABLED
409 if (ctxt->strm != NULL) {
410 inflateEnd(ctxt->strm);
411 xmlFree(ctxt->strm);
412 }
413 #endif
414
415 ctxt->state = XML_NANO_HTTP_NONE;
416 if (ctxt->fd != INVALID_SOCKET) closesocket(ctxt->fd);
417 ctxt->fd = INVALID_SOCKET;
418 xmlFree(ctxt);
419 }
420
421 /**
422 * xmlNanoHTTPSend:
423 * @ctxt: an HTTP context
424 *
425 * Send the input needed to initiate the processing on the server side
426 * Returns number of bytes sent or -1 on error.
427 */
428
429 static int
xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt,const char * xmt_ptr,int outlen)430 xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt, const char *xmt_ptr, int outlen)
431 {
432 int total_sent = 0;
433 #ifdef HAVE_POLL_H
434 struct pollfd p;
435 #else
436 struct timeval tv;
437 fd_set wfd;
438 #endif
439
440 if ((ctxt->state & XML_NANO_HTTP_WRITE) && (xmt_ptr != NULL)) {
441 while (total_sent < outlen) {
442 int nsent = send(ctxt->fd, SEND_ARG2_CAST (xmt_ptr + total_sent),
443 outlen - total_sent, 0);
444
445 if (nsent > 0)
446 total_sent += nsent;
447 else if ((nsent == -1) &&
448 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
449 (socket_errno() != EAGAIN) &&
450 #endif
451 (socket_errno() != EWOULDBLOCK)) {
452 __xmlIOErr(XML_FROM_HTTP, 0, "send failed\n");
453 if (total_sent == 0)
454 total_sent = -1;
455 break;
456 } else {
457 /*
458 * No data sent
459 * Since non-blocking sockets are used, wait for
460 * socket to be writable or default timeout prior
461 * to retrying.
462 */
463 #ifndef HAVE_POLL_H
464 #ifndef _WINSOCKAPI_
465 if (ctxt->fd > FD_SETSIZE)
466 return -1;
467 #endif
468
469 tv.tv_sec = timeout;
470 tv.tv_usec = 0;
471 FD_ZERO(&wfd);
472 #ifdef _MSC_VER
473 #pragma warning(push)
474 #pragma warning(disable: 4018)
475 #endif
476 FD_SET(ctxt->fd, &wfd);
477 #ifdef _MSC_VER
478 #pragma warning(pop)
479 #endif
480 (void) select(ctxt->fd + 1, NULL, &wfd, NULL, &tv);
481 #else
482 p.fd = ctxt->fd;
483 p.events = POLLOUT;
484 (void) poll(&p, 1, timeout * 1000);
485 #endif /* !HAVE_POLL_H */
486 }
487 }
488 }
489
490 return total_sent;
491 }
492
493 /**
494 * xmlNanoHTTPRecv:
495 * @ctxt: an HTTP context
496 *
497 * Read information coming from the HTTP connection.
498 * This is a blocking call (but it blocks in select(), not read()).
499 *
500 * Returns the number of byte read or -1 in case of error.
501 */
502
503 static int
xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)504 xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)
505 {
506 #ifdef HAVE_POLL_H
507 struct pollfd p;
508 #else
509 fd_set rfd;
510 struct timeval tv;
511 #endif
512
513
514 while (ctxt->state & XML_NANO_HTTP_READ) {
515 if (ctxt->in == NULL) {
516 ctxt->in = (char *) xmlMallocAtomic(65000);
517 if (ctxt->in == NULL) {
518 xmlHTTPErrMemory("allocating input");
519 ctxt->last = -1;
520 return (-1);
521 }
522 ctxt->inlen = 65000;
523 ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
524 }
525 if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
526 int delta = ctxt->inrptr - ctxt->in;
527 int len = ctxt->inptr - ctxt->inrptr;
528
529 memmove(ctxt->in, ctxt->inrptr, len);
530 ctxt->inrptr -= delta;
531 ctxt->content -= delta;
532 ctxt->inptr -= delta;
533 }
534 if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
535 int d_inptr = ctxt->inptr - ctxt->in;
536 int d_content = ctxt->content - ctxt->in;
537 int d_inrptr = ctxt->inrptr - ctxt->in;
538 char *tmp_ptr = ctxt->in;
539
540 ctxt->inlen *= 2;
541 ctxt->in = (char *) xmlRealloc(tmp_ptr, ctxt->inlen);
542 if (ctxt->in == NULL) {
543 xmlHTTPErrMemory("allocating input buffer");
544 xmlFree(tmp_ptr);
545 ctxt->last = -1;
546 return (-1);
547 }
548 ctxt->inptr = ctxt->in + d_inptr;
549 ctxt->content = ctxt->in + d_content;
550 ctxt->inrptr = ctxt->in + d_inrptr;
551 }
552 ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0);
553 if (ctxt->last > 0) {
554 ctxt->inptr += ctxt->last;
555 return (ctxt->last);
556 }
557 if (ctxt->last == 0) {
558 return (0);
559 }
560 if (ctxt->last == -1) {
561 switch (socket_errno()) {
562 case EINPROGRESS:
563 case EWOULDBLOCK:
564 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
565 case EAGAIN:
566 #endif
567 break;
568
569 case ECONNRESET:
570 case ESHUTDOWN:
571 return (0);
572
573 default:
574 __xmlIOErr(XML_FROM_HTTP, 0, "recv failed\n");
575 return (-1);
576 }
577 }
578 #ifdef HAVE_POLL_H
579 p.fd = ctxt->fd;
580 p.events = POLLIN;
581 if ((poll(&p, 1, timeout * 1000) < 1)
582 #if defined(EINTR)
583 && (errno != EINTR)
584 #endif
585 )
586 return (0);
587 #else /* !HAVE_POLL_H */
588 #ifndef _WINSOCKAPI_
589 if (ctxt->fd > FD_SETSIZE)
590 return 0;
591 #endif
592
593 tv.tv_sec = timeout;
594 tv.tv_usec = 0;
595 FD_ZERO(&rfd);
596
597 #ifdef _MSC_VER
598 #pragma warning(push)
599 #pragma warning(disable: 4018)
600 #endif
601
602 FD_SET(ctxt->fd, &rfd);
603
604 #ifdef _MSC_VER
605 #pragma warning(pop)
606 #endif
607
608 if ((select(ctxt->fd + 1, &rfd, NULL, NULL, &tv) < 1)
609 #if defined(EINTR)
610 && (socket_errno() != EINTR)
611 #endif
612 )
613 return (0);
614 #endif /* !HAVE_POLL_H */
615 }
616 return (0);
617 }
618
619 /**
620 * xmlNanoHTTPReadLine:
621 * @ctxt: an HTTP context
622 *
623 * Read one line in the HTTP server output, usually for extracting
624 * the HTTP protocol information from the answer header.
625 *
626 * Returns a newly allocated string with a copy of the line, or NULL
627 * which indicate the end of the input.
628 */
629
630 static char *
xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt)631 xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
632 char buf[4096];
633 char *bp = buf;
634 int rc;
635
636 while (bp - buf < 4095) {
637 if (ctxt->inrptr == ctxt->inptr) {
638 if ( (rc = xmlNanoHTTPRecv(ctxt)) == 0) {
639 if (bp == buf)
640 return(NULL);
641 else
642 *bp = 0;
643 return(xmlMemStrdup(buf));
644 }
645 else if ( rc == -1 ) {
646 return ( NULL );
647 }
648 }
649 *bp = *ctxt->inrptr++;
650 if (*bp == '\n') {
651 *bp = 0;
652 return(xmlMemStrdup(buf));
653 }
654 if (*bp != '\r')
655 bp++;
656 }
657 buf[4095] = 0;
658 return(xmlMemStrdup(buf));
659 }
660
661
662 /**
663 * xmlNanoHTTPScanAnswer:
664 * @ctxt: an HTTP context
665 * @line: an HTTP header line
666 *
667 * Try to extract useful information from the server answer.
668 * We currently parse and process:
669 * - The HTTP revision/ return code
670 * - The Content-Type, Mime-Type and charset used
671 * - The Location for redirect processing.
672 *
673 * Returns -1 in case of failure, the file descriptor number otherwise
674 */
675
676 static void
xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt,const char * line)677 xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
678 const char *cur = line;
679
680 if (line == NULL) return;
681
682 if (!strncmp(line, "HTTP/", 5)) {
683 int version = 0;
684 int ret = 0;
685
686 cur += 5;
687 while ((*cur >= '0') && (*cur <= '9')) {
688 version *= 10;
689 version += *cur - '0';
690 cur++;
691 }
692 if (*cur == '.') {
693 cur++;
694 if ((*cur >= '0') && (*cur <= '9')) {
695 version *= 10;
696 version += *cur - '0';
697 cur++;
698 }
699 while ((*cur >= '0') && (*cur <= '9'))
700 cur++;
701 } else
702 version *= 10;
703 if ((*cur != ' ') && (*cur != '\t')) return;
704 while ((*cur == ' ') || (*cur == '\t')) cur++;
705 if ((*cur < '0') || (*cur > '9')) return;
706 while ((*cur >= '0') && (*cur <= '9')) {
707 ret *= 10;
708 ret += *cur - '0';
709 cur++;
710 }
711 if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
712 ctxt->returnValue = ret;
713 ctxt->version = version;
714 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
715 const xmlChar *charset, *last, *mime;
716 cur += 13;
717 while ((*cur == ' ') || (*cur == '\t')) cur++;
718 if (ctxt->contentType != NULL)
719 xmlFree(ctxt->contentType);
720 ctxt->contentType = xmlMemStrdup(cur);
721 mime = (const xmlChar *) cur;
722 last = mime;
723 while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
724 (*last != ';') && (*last != ','))
725 last++;
726 if (ctxt->mimeType != NULL)
727 xmlFree(ctxt->mimeType);
728 ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
729 charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
730 if (charset != NULL) {
731 charset += 8;
732 last = charset;
733 while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
734 (*last != ';') && (*last != ','))
735 last++;
736 if (ctxt->encoding != NULL)
737 xmlFree(ctxt->encoding);
738 ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
739 }
740 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
741 const xmlChar *charset, *last, *mime;
742 cur += 12;
743 if (ctxt->contentType != NULL) return;
744 while ((*cur == ' ') || (*cur == '\t')) cur++;
745 ctxt->contentType = xmlMemStrdup(cur);
746 mime = (const xmlChar *) cur;
747 last = mime;
748 while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
749 (*last != ';') && (*last != ','))
750 last++;
751 if (ctxt->mimeType != NULL)
752 xmlFree(ctxt->mimeType);
753 ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
754 charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
755 if (charset != NULL) {
756 charset += 8;
757 last = charset;
758 while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
759 (*last != ';') && (*last != ','))
760 last++;
761 if (ctxt->encoding != NULL)
762 xmlFree(ctxt->encoding);
763 ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
764 }
765 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
766 cur += 9;
767 while ((*cur == ' ') || (*cur == '\t')) cur++;
768 if (ctxt->location != NULL)
769 xmlFree(ctxt->location);
770 if (*cur == '/') {
771 xmlChar *tmp_http = xmlStrdup(BAD_CAST "http://");
772 xmlChar *tmp_loc =
773 xmlStrcat(tmp_http, (const xmlChar *) ctxt->hostname);
774 ctxt->location =
775 (char *) xmlStrcat (tmp_loc, (const xmlChar *) cur);
776 } else {
777 ctxt->location = xmlMemStrdup(cur);
778 }
779 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) {
780 cur += 17;
781 while ((*cur == ' ') || (*cur == '\t')) cur++;
782 if (ctxt->authHeader != NULL)
783 xmlFree(ctxt->authHeader);
784 ctxt->authHeader = xmlMemStrdup(cur);
785 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) {
786 cur += 19;
787 while ((*cur == ' ') || (*cur == '\t')) cur++;
788 if (ctxt->authHeader != NULL)
789 xmlFree(ctxt->authHeader);
790 ctxt->authHeader = xmlMemStrdup(cur);
791 #ifdef LIBXML_ZLIB_ENABLED
792 } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) {
793 cur += 17;
794 while ((*cur == ' ') || (*cur == '\t')) cur++;
795 if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) {
796 ctxt->usesGzip = 1;
797
798 ctxt->strm = xmlMalloc(sizeof(z_stream));
799
800 if (ctxt->strm != NULL) {
801 ctxt->strm->zalloc = Z_NULL;
802 ctxt->strm->zfree = Z_NULL;
803 ctxt->strm->opaque = Z_NULL;
804 ctxt->strm->avail_in = 0;
805 ctxt->strm->next_in = Z_NULL;
806
807 inflateInit2( ctxt->strm, 31 );
808 }
809 }
810 #endif
811 } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) {
812 cur += 15;
813 ctxt->ContentLength = strtol( cur, NULL, 10 );
814 }
815 }
816
817 /**
818 * xmlNanoHTTPConnectAttempt:
819 * @addr: a socket address structure
820 *
821 * Attempt a connection to the given IP:port endpoint. It forces
822 * non-blocking semantic on the socket, and allow 60 seconds for
823 * the host to answer.
824 *
825 * Returns -1 in case of failure, the file descriptor number otherwise
826 */
827
828 static SOCKET
xmlNanoHTTPConnectAttempt(struct sockaddr * addr)829 xmlNanoHTTPConnectAttempt(struct sockaddr *addr)
830 {
831 #ifndef HAVE_POLL_H
832 fd_set wfd;
833 #ifdef _WINSOCKAPI_
834 fd_set xfd;
835 #endif
836 struct timeval tv;
837 #else /* !HAVE_POLL_H */
838 struct pollfd p;
839 #endif /* !HAVE_POLL_H */
840 int status;
841
842 int addrlen;
843
844 SOCKET s;
845
846 #ifdef SUPPORT_IP6
847 if (addr->sa_family == AF_INET6) {
848 s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
849 addrlen = sizeof(struct sockaddr_in6);
850 } else
851 #endif
852 {
853 s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
854 addrlen = sizeof(struct sockaddr_in);
855 }
856 if (s == INVALID_SOCKET) {
857 #ifdef DEBUG_HTTP
858 perror("socket");
859 #endif
860 __xmlIOErr(XML_FROM_HTTP, 0, "socket failed\n");
861 return INVALID_SOCKET;
862 }
863 #ifdef _WINSOCKAPI_
864 {
865 u_long one = 1;
866
867 status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
868 }
869 #else /* _WINSOCKAPI_ */
870 #if defined(VMS)
871 {
872 int enable = 1;
873
874 status = ioctl(s, FIONBIO, &enable);
875 }
876 #else /* VMS */
877 if ((status = fcntl(s, F_GETFL, 0)) != -1) {
878 #ifdef O_NONBLOCK
879 status |= O_NONBLOCK;
880 #else /* O_NONBLOCK */
881 #ifdef F_NDELAY
882 status |= F_NDELAY;
883 #endif /* F_NDELAY */
884 #endif /* !O_NONBLOCK */
885 status = fcntl(s, F_SETFL, status);
886 }
887 if (status < 0) {
888 #ifdef DEBUG_HTTP
889 perror("nonblocking");
890 #endif
891 __xmlIOErr(XML_FROM_HTTP, 0, "error setting non-blocking IO\n");
892 closesocket(s);
893 return INVALID_SOCKET;
894 }
895 #endif /* !VMS */
896 #endif /* !_WINSOCKAPI_ */
897
898 if (connect(s, addr, addrlen) == -1) {
899 switch (socket_errno()) {
900 case EINPROGRESS:
901 case EWOULDBLOCK:
902 break;
903 default:
904 __xmlIOErr(XML_FROM_HTTP, 0,
905 "error connecting to HTTP server");
906 closesocket(s);
907 return INVALID_SOCKET;
908 }
909 }
910 #ifndef HAVE_POLL_H
911 tv.tv_sec = timeout;
912 tv.tv_usec = 0;
913
914 #ifdef _MSC_VER
915 #pragma warning(push)
916 #pragma warning(disable: 4018)
917 #endif
918 #ifndef _WINSOCKAPI_
919 if (s > FD_SETSIZE)
920 return INVALID_SOCKET;
921 #endif
922 FD_ZERO(&wfd);
923 FD_SET(s, &wfd);
924
925 #ifdef _WINSOCKAPI_
926 FD_ZERO(&xfd);
927 FD_SET(s, &xfd);
928
929 switch (select(s + 1, NULL, &wfd, &xfd, &tv))
930 #else
931 switch (select(s + 1, NULL, &wfd, NULL, &tv))
932 #endif
933 #ifdef _MSC_VER
934 #pragma warning(pop)
935 #endif
936
937 #else /* !HAVE_POLL_H */
938 p.fd = s;
939 p.events = POLLOUT;
940 switch (poll(&p, 1, timeout * 1000))
941 #endif /* !HAVE_POLL_H */
942
943 {
944 case 0:
945 /* Time out */
946 __xmlIOErr(XML_FROM_HTTP, 0, "Connect attempt timed out");
947 closesocket(s);
948 return INVALID_SOCKET;
949 case -1:
950 /* Ermm.. ?? */
951 __xmlIOErr(XML_FROM_HTTP, 0, "Connect failed");
952 closesocket(s);
953 return INVALID_SOCKET;
954 }
955
956 #ifndef HAVE_POLL_H
957 if (FD_ISSET(s, &wfd)
958 #ifdef _WINSOCKAPI_
959 || FD_ISSET(s, &xfd)
960 #endif
961 )
962 #else /* !HAVE_POLL_H */
963 if (p.revents == POLLOUT)
964 #endif /* !HAVE_POLL_H */
965 {
966 XML_SOCKLEN_T len;
967
968 len = sizeof(status);
969 #ifdef SO_ERROR
970 if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &status, &len) <
971 0) {
972 /* Solaris error code */
973 __xmlIOErr(XML_FROM_HTTP, 0, "getsockopt failed\n");
974 closesocket(s);
975 return INVALID_SOCKET;
976 }
977 #endif
978 if (status) {
979 __xmlIOErr(XML_FROM_HTTP, 0,
980 "Error connecting to remote host");
981 closesocket(s);
982 errno = status;
983 return INVALID_SOCKET;
984 }
985 } else {
986 /* pbm */
987 __xmlIOErr(XML_FROM_HTTP, 0, "select failed\n");
988 closesocket(s);
989 return INVALID_SOCKET;
990 }
991
992 return (s);
993 }
994
995 /**
996 * xmlNanoHTTPConnectHost:
997 * @host: the host name
998 * @port: the port number
999 *
1000 * Attempt a connection to the given host:port endpoint. It tries
1001 * the multiple IP provided by the DNS if available.
1002 *
1003 * Returns -1 in case of failure, the file descriptor number otherwise
1004 */
1005
1006 static SOCKET
xmlNanoHTTPConnectHost(const char * host,int port)1007 xmlNanoHTTPConnectHost(const char *host, int port)
1008 {
1009 struct sockaddr *addr = NULL;
1010 struct sockaddr_in sockin;
1011
1012 #ifdef SUPPORT_IP6
1013 struct sockaddr_in6 sockin6;
1014 #endif
1015 SOCKET s;
1016
1017 memset (&sockin, 0, sizeof(sockin));
1018
1019 #if defined(SUPPORT_IP6)
1020 {
1021 int status;
1022 struct addrinfo hints, *res, *result;
1023
1024 memset (&sockin6, 0, sizeof(sockin6));
1025
1026 result = NULL;
1027 memset (&hints, 0,sizeof(hints));
1028 hints.ai_socktype = SOCK_STREAM;
1029
1030 status = getaddrinfo (host, NULL, &hints, &result);
1031 if (status) {
1032 __xmlIOErr(XML_FROM_HTTP, 0, "getaddrinfo failed\n");
1033 return INVALID_SOCKET;
1034 }
1035
1036 for (res = result; res; res = res->ai_next) {
1037 if (res->ai_family == AF_INET) {
1038 if ((size_t)res->ai_addrlen > sizeof(sockin)) {
1039 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1040 freeaddrinfo (result);
1041 return INVALID_SOCKET;
1042 }
1043 memcpy (&sockin, res->ai_addr, res->ai_addrlen);
1044 sockin.sin_port = htons (port);
1045 addr = (struct sockaddr *)&sockin;
1046 } else if (res->ai_family == AF_INET6) {
1047 if ((size_t)res->ai_addrlen > sizeof(sockin6)) {
1048 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1049 freeaddrinfo (result);
1050 return INVALID_SOCKET;
1051 }
1052 memcpy (&sockin6, res->ai_addr, res->ai_addrlen);
1053 sockin6.sin6_port = htons (port);
1054 addr = (struct sockaddr *)&sockin6;
1055 } else
1056 continue; /* for */
1057
1058 s = xmlNanoHTTPConnectAttempt (addr);
1059 if (s != INVALID_SOCKET) {
1060 freeaddrinfo (result);
1061 return (s);
1062 }
1063 }
1064
1065 if (result)
1066 freeaddrinfo (result);
1067 }
1068 #else
1069 {
1070 struct hostent *h;
1071 struct in_addr ia;
1072 int i;
1073
1074 h = gethostbyname (GETHOSTBYNAME_ARG_CAST host);
1075 if (h == NULL) {
1076
1077 /*
1078 * Okay, I got fed up by the non-portability of this error message
1079 * extraction code. it work on Linux, if it work on your platform
1080 * and one want to enable it, send me the defined(foobar) needed
1081 */
1082 #if defined(HAVE_NETDB_H) && defined(HOST_NOT_FOUND) && defined(__linux__)
1083 const char *h_err_txt = "";
1084
1085 switch (h_errno) {
1086 case HOST_NOT_FOUND:
1087 h_err_txt = "Authoritative host not found";
1088 break;
1089
1090 case TRY_AGAIN:
1091 h_err_txt =
1092 "Non-authoritative host not found or server failure.";
1093 break;
1094
1095 case NO_RECOVERY:
1096 h_err_txt =
1097 "Non-recoverable errors: FORMERR, REFUSED, or NOTIMP.";
1098 break;
1099
1100 #ifdef NO_ADDRESS
1101 case NO_ADDRESS:
1102 h_err_txt =
1103 "Valid name, no data record of requested type.";
1104 break;
1105 #endif
1106
1107 default:
1108 h_err_txt = "No error text defined.";
1109 break;
1110 }
1111 __xmlIOErr(XML_FROM_HTTP, 0, h_err_txt);
1112 #else
1113 __xmlIOErr(XML_FROM_HTTP, 0, "Failed to resolve host");
1114 #endif
1115 return INVALID_SOCKET;
1116 }
1117
1118 for (i = 0; h->h_addr_list[i]; i++) {
1119 if (h->h_addrtype == AF_INET) {
1120 /* A records (IPv4) */
1121 if ((unsigned int) h->h_length > sizeof(ia)) {
1122 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1123 return INVALID_SOCKET;
1124 }
1125 memcpy (&ia, h->h_addr_list[i], h->h_length);
1126 sockin.sin_family = h->h_addrtype;
1127 sockin.sin_addr = ia;
1128 sockin.sin_port = (unsigned short)htons ((unsigned short)port);
1129 addr = (struct sockaddr *) &sockin;
1130 } else
1131 break; /* for */
1132
1133 s = xmlNanoHTTPConnectAttempt (addr);
1134 if (s != INVALID_SOCKET)
1135 return (s);
1136 }
1137 }
1138 #endif
1139
1140 #ifdef DEBUG_HTTP
1141 xmlGenericError(xmlGenericErrorContext,
1142 "xmlNanoHTTPConnectHost: unable to connect to '%s'.\n",
1143 host);
1144 #endif
1145 return INVALID_SOCKET;
1146 }
1147
1148
1149 /**
1150 * xmlNanoHTTPOpen:
1151 * @URL: The URL to load
1152 * @contentType: if available the Content-Type information will be
1153 * returned at that location
1154 *
1155 * This function try to open a connection to the indicated resource
1156 * via HTTP GET.
1157 *
1158 * Returns NULL in case of failure, otherwise a request handler.
1159 * The contentType, if provided must be freed by the caller
1160 */
1161
1162 void*
xmlNanoHTTPOpen(const char * URL,char ** contentType)1163 xmlNanoHTTPOpen(const char *URL, char **contentType) {
1164 if (contentType != NULL) *contentType = NULL;
1165 return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, 0));
1166 }
1167
1168 /**
1169 * xmlNanoHTTPOpenRedir:
1170 * @URL: The URL to load
1171 * @contentType: if available the Content-Type information will be
1172 * returned at that location
1173 * @redir: if available the redirected URL will be returned
1174 *
1175 * This function try to open a connection to the indicated resource
1176 * via HTTP GET.
1177 *
1178 * Returns NULL in case of failure, otherwise a request handler.
1179 * The contentType, if provided must be freed by the caller
1180 */
1181
1182 void*
xmlNanoHTTPOpenRedir(const char * URL,char ** contentType,char ** redir)1183 xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) {
1184 if (contentType != NULL) *contentType = NULL;
1185 if (redir != NULL) *redir = NULL;
1186 return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL,0));
1187 }
1188
1189 /**
1190 * xmlNanoHTTPRead:
1191 * @ctx: the HTTP context
1192 * @dest: a buffer
1193 * @len: the buffer length
1194 *
1195 * This function tries to read @len bytes from the existing HTTP connection
1196 * and saves them in @dest. This is a blocking call.
1197 *
1198 * Returns the number of byte read. 0 is an indication of an end of connection.
1199 * -1 indicates a parameter error.
1200 */
1201 int
xmlNanoHTTPRead(void * ctx,void * dest,int len)1202 xmlNanoHTTPRead(void *ctx, void *dest, int len) {
1203 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1204 #ifdef LIBXML_ZLIB_ENABLED
1205 int bytes_read = 0;
1206 int orig_avail_in;
1207 int z_ret;
1208 #endif
1209
1210 if (ctx == NULL) return(-1);
1211 if (dest == NULL) return(-1);
1212 if (len <= 0) return(0);
1213
1214 #ifdef LIBXML_ZLIB_ENABLED
1215 if (ctxt->usesGzip == 1) {
1216 if (ctxt->strm == NULL) return(0);
1217
1218 ctxt->strm->next_out = dest;
1219 ctxt->strm->avail_out = len;
1220 ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr;
1221
1222 while (ctxt->strm->avail_out > 0 &&
1223 (ctxt->strm->avail_in > 0 || xmlNanoHTTPRecv(ctxt) > 0)) {
1224 orig_avail_in = ctxt->strm->avail_in =
1225 ctxt->inptr - ctxt->inrptr - bytes_read;
1226 ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read);
1227
1228 z_ret = inflate(ctxt->strm, Z_NO_FLUSH);
1229 bytes_read += orig_avail_in - ctxt->strm->avail_in;
1230
1231 if (z_ret != Z_OK) break;
1232 }
1233
1234 ctxt->inrptr += bytes_read;
1235 return(len - ctxt->strm->avail_out);
1236 }
1237 #endif
1238
1239 while (ctxt->inptr - ctxt->inrptr < len) {
1240 if (xmlNanoHTTPRecv(ctxt) <= 0) break;
1241 }
1242 if (ctxt->inptr - ctxt->inrptr < len)
1243 len = ctxt->inptr - ctxt->inrptr;
1244 memcpy(dest, ctxt->inrptr, len);
1245 ctxt->inrptr += len;
1246 return(len);
1247 }
1248
1249 /**
1250 * xmlNanoHTTPClose:
1251 * @ctx: the HTTP context
1252 *
1253 * This function closes an HTTP context, it ends up the connection and
1254 * free all data related to it.
1255 */
1256 void
xmlNanoHTTPClose(void * ctx)1257 xmlNanoHTTPClose(void *ctx) {
1258 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1259
1260 if (ctx == NULL) return;
1261
1262 xmlNanoHTTPFreeCtxt(ctxt);
1263 }
1264
1265
1266 /**
1267 * xmlNanoHTTPHostnameMatch:
1268 * @pattern: The pattern as it appears in no_proxy environment variable
1269 * @hostname: The hostname to test as it appears in the URL
1270 *
1271 * This function tests whether a given hostname matches a pattern. The pattern
1272 * usually is a token from the no_proxy environment variable. Wildcards in the
1273 * pattern are not supported.
1274 *
1275 * Returns true, iff hostname matches the pattern.
1276 */
1277
1278 static int
xmlNanoHTTPHostnameMatch(const char * pattern,const char * hostname)1279 xmlNanoHTTPHostnameMatch(const char *pattern, const char *hostname) {
1280 int idx_pattern, idx_hostname;
1281 const char * pattern_start;
1282
1283 if (!pattern || *pattern == '\0' || !hostname)
1284 return 0;
1285
1286 /* Ignore trailing '.' */
1287 if (*pattern == '.') {
1288 idx_pattern = strlen(pattern) -1;
1289 pattern_start = pattern + 1;
1290 }
1291 else {
1292 idx_pattern = strlen(pattern);
1293 pattern_start = pattern;
1294 }
1295 idx_hostname = strlen(hostname);
1296
1297 for (; idx_pattern >= 0 && idx_hostname >= 0;
1298 --idx_pattern, --idx_hostname) {
1299 if (tolower(pattern_start[idx_pattern]) != tolower(hostname[idx_hostname]))
1300 break;
1301 }
1302
1303 return idx_pattern == -1 && (idx_hostname == -1|| hostname[idx_hostname] == '.');
1304 }
1305
1306
1307 /**
1308 * xmlNanoHTTPBypassProxy:
1309 * @hostname: The hostname as it appears in the URL
1310 *
1311 * This function evaluates the no_proxy environment variable and returns
1312 * whether the proxy server should be bypassed for a given host.
1313 *
1314 * Returns true, iff a proxy server should be bypassed for the given hostname.
1315 */
1316
1317 static int
xmlNanoHTTPBypassProxy(const char * hostname)1318 xmlNanoHTTPBypassProxy(const char *hostname) {
1319 size_t envlen;
1320 char *env = getenv("no_proxy"), *cpy=NULL, *p=NULL;
1321 if (!env)
1322 return 0;
1323
1324 /* (Avoid strdup because it's not portable.) */
1325 envlen = strlen(env) + 1;
1326 cpy = xmlMalloc(envlen);
1327 memcpy(cpy, env, envlen);
1328 env = cpy;
1329
1330 /* The remainder of the function is basically a tokenizing: */
1331 while (isspace(*env))
1332 ++env;
1333 if (*env == '\0') {
1334 xmlFree(cpy);
1335 return 0;
1336 }
1337
1338 p = env;
1339 while (*env) {
1340
1341 if (*env != ',') {
1342 ++env;
1343 continue;
1344 }
1345
1346 *(env++) = '\0';
1347 if (xmlNanoHTTPHostnameMatch(p, hostname)) {
1348 xmlFree(cpy);
1349 return 1;
1350 }
1351
1352 while (isspace(*env))
1353 ++env;
1354 p = env;
1355 }
1356 if (xmlNanoHTTPHostnameMatch(p, hostname)) {
1357 xmlFree(cpy);
1358 return 1;
1359 }
1360
1361 xmlFree(cpy);
1362 return 0;
1363 }
1364
1365
1366 /**
1367 * xmlNanoHTTPMethodRedir:
1368 * @URL: The URL to load
1369 * @method: the HTTP method to use
1370 * @input: the input string if any
1371 * @contentType: the Content-Type information IN and OUT
1372 * @redir: the redirected URL OUT
1373 * @headers: the extra headers
1374 * @ilen: input length
1375 *
1376 * This function try to open a connection to the indicated resource
1377 * via HTTP using the given @method, adding the given extra headers
1378 * and the input buffer for the request content.
1379 *
1380 * Returns NULL in case of failure, otherwise a request handler.
1381 * The contentType, or redir, if provided must be freed by the caller
1382 */
1383
1384 void*
xmlNanoHTTPMethodRedir(const char * URL,const char * method,const char * input,char ** contentType,char ** redir,const char * headers,int ilen)1385 xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input,
1386 char **contentType, char **redir,
1387 const char *headers, int ilen ) {
1388 xmlNanoHTTPCtxtPtr ctxt;
1389 char *bp, *p;
1390 int blen;
1391 SOCKET ret;
1392 int nbRedirects = 0;
1393 int use_proxy;
1394 char *redirURL = NULL;
1395 #ifdef DEBUG_HTTP
1396 int xmt_bytes;
1397 #endif
1398
1399 if (URL == NULL) return(NULL);
1400 if (method == NULL) method = "GET";
1401 xmlNanoHTTPInit();
1402
1403 retry:
1404 if (redirURL == NULL) {
1405 ctxt = xmlNanoHTTPNewCtxt(URL);
1406 if (ctxt == NULL)
1407 return(NULL);
1408 } else {
1409 ctxt = xmlNanoHTTPNewCtxt(redirURL);
1410 if (ctxt == NULL)
1411 return(NULL);
1412 ctxt->location = xmlMemStrdup(redirURL);
1413 }
1414
1415 if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
1416 __xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Not a valid HTTP URI");
1417 xmlNanoHTTPFreeCtxt(ctxt);
1418 if (redirURL != NULL) xmlFree(redirURL);
1419 return(NULL);
1420 }
1421 if (ctxt->hostname == NULL) {
1422 __xmlIOErr(XML_FROM_HTTP, XML_HTTP_UNKNOWN_HOST,
1423 "Failed to identify host in URI");
1424 xmlNanoHTTPFreeCtxt(ctxt);
1425 if (redirURL != NULL) xmlFree(redirURL);
1426 return(NULL);
1427 }
1428 use_proxy = proxy && !xmlNanoHTTPBypassProxy(ctxt->hostname);
1429 if (use_proxy) {
1430 blen = strlen(ctxt->hostname) * 2 + 16;
1431 ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
1432 }
1433 else {
1434 blen = strlen(ctxt->hostname);
1435 ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
1436 }
1437 if (ret == INVALID_SOCKET) {
1438 xmlNanoHTTPFreeCtxt(ctxt);
1439 if (redirURL != NULL) xmlFree(redirURL);
1440 return(NULL);
1441 }
1442 ctxt->fd = ret;
1443
1444 if (input == NULL)
1445 ilen = 0;
1446 else
1447 blen += 36;
1448
1449 if (headers != NULL)
1450 blen += strlen(headers) + 2;
1451 if (contentType && *contentType)
1452 /* reserve for string plus 'Content-Type: \r\n" */
1453 blen += strlen(*contentType) + 16;
1454 if (ctxt->query != NULL)
1455 /* 1 for '?' */
1456 blen += strlen(ctxt->query) + 1;
1457 blen += strlen(method) + strlen(ctxt->path) + 24;
1458 #ifdef LIBXML_ZLIB_ENABLED
1459 /* reserve for possible 'Accept-Encoding: gzip' string */
1460 blen += 23;
1461 #endif
1462 if (ctxt->port != 80) {
1463 /* reserve space for ':xxxxx', incl. potential proxy */
1464 if (use_proxy)
1465 blen += 17;
1466 else
1467 blen += 11;
1468 }
1469 bp = (char*)xmlMallocAtomic(blen);
1470 if ( bp == NULL ) {
1471 xmlNanoHTTPFreeCtxt( ctxt );
1472 xmlHTTPErrMemory("allocating header buffer");
1473 return ( NULL );
1474 }
1475
1476 p = bp;
1477
1478 if (use_proxy) {
1479 if (ctxt->port != 80) {
1480 p += snprintf( p, blen - (p - bp), "%s http://%s:%d%s",
1481 method, ctxt->hostname,
1482 ctxt->port, ctxt->path );
1483 }
1484 else
1485 p += snprintf( p, blen - (p - bp), "%s http://%s%s", method,
1486 ctxt->hostname, ctxt->path);
1487 }
1488 else
1489 p += snprintf( p, blen - (p - bp), "%s %s", method, ctxt->path);
1490
1491 if (ctxt->query != NULL)
1492 p += snprintf( p, blen - (p - bp), "?%s", ctxt->query);
1493
1494 if (ctxt->port == 80) {
1495 p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n",
1496 ctxt->hostname);
1497 } else {
1498 p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s:%d\r\n",
1499 ctxt->hostname, ctxt->port);
1500 }
1501
1502 #ifdef LIBXML_ZLIB_ENABLED
1503 p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n");
1504 #endif
1505
1506 if (contentType != NULL && *contentType)
1507 p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType);
1508
1509 if (headers != NULL)
1510 p += snprintf( p, blen - (p - bp), "%s", headers );
1511
1512 if (input != NULL)
1513 snprintf(p, blen - (p - bp), "Content-Length: %d\r\n\r\n", ilen );
1514 else
1515 snprintf(p, blen - (p - bp), "\r\n");
1516
1517 #ifdef DEBUG_HTTP
1518 xmlGenericError(xmlGenericErrorContext,
1519 "-> %s%s", use_proxy ? "(Proxy) " : "", bp);
1520 if ((blen -= strlen(bp)+1) < 0)
1521 xmlGenericError(xmlGenericErrorContext,
1522 "ERROR: overflowed buffer by %d bytes\n", -blen);
1523 #endif
1524 ctxt->outptr = ctxt->out = bp;
1525 ctxt->state = XML_NANO_HTTP_WRITE;
1526 blen = strlen( ctxt->out );
1527 #ifdef DEBUG_HTTP
1528 xmt_bytes = xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1529 if ( xmt_bytes != blen )
1530 xmlGenericError( xmlGenericErrorContext,
1531 "xmlNanoHTTPMethodRedir: Only %d of %d %s %s\n",
1532 xmt_bytes, blen,
1533 "bytes of HTTP headers sent to host",
1534 ctxt->hostname );
1535 #else
1536 xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1537 #endif
1538
1539 if ( input != NULL ) {
1540 #ifdef DEBUG_HTTP
1541 xmt_bytes = xmlNanoHTTPSend( ctxt, input, ilen );
1542
1543 if ( xmt_bytes != ilen )
1544 xmlGenericError( xmlGenericErrorContext,
1545 "xmlNanoHTTPMethodRedir: Only %d of %d %s %s\n",
1546 xmt_bytes, ilen,
1547 "bytes of HTTP content sent to host",
1548 ctxt->hostname );
1549 #else
1550 xmlNanoHTTPSend( ctxt, input, ilen );
1551 #endif
1552 }
1553
1554 ctxt->state = XML_NANO_HTTP_READ;
1555
1556 while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1557 if (*p == 0) {
1558 ctxt->content = ctxt->inrptr;
1559 xmlFree(p);
1560 break;
1561 }
1562 xmlNanoHTTPScanAnswer(ctxt, p);
1563
1564 #ifdef DEBUG_HTTP
1565 xmlGenericError(xmlGenericErrorContext, "<- %s\n", p);
1566 #endif
1567 xmlFree(p);
1568 }
1569
1570 if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1571 (ctxt->returnValue < 400)) {
1572 #ifdef DEBUG_HTTP
1573 xmlGenericError(xmlGenericErrorContext,
1574 "\nRedirect to: %s\n", ctxt->location);
1575 #endif
1576 while ( xmlNanoHTTPRecv(ctxt) > 0 )
1577 ;
1578 if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1579 nbRedirects++;
1580 if (redirURL != NULL)
1581 xmlFree(redirURL);
1582 redirURL = xmlMemStrdup(ctxt->location);
1583 xmlNanoHTTPFreeCtxt(ctxt);
1584 goto retry;
1585 }
1586 xmlNanoHTTPFreeCtxt(ctxt);
1587 if (redirURL != NULL) xmlFree(redirURL);
1588 #ifdef DEBUG_HTTP
1589 xmlGenericError(xmlGenericErrorContext,
1590 "xmlNanoHTTPMethodRedir: Too many redirects, aborting ...\n");
1591 #endif
1592 return(NULL);
1593 }
1594
1595 if (contentType != NULL) {
1596 if (ctxt->contentType != NULL)
1597 *contentType = xmlMemStrdup(ctxt->contentType);
1598 else
1599 *contentType = NULL;
1600 }
1601
1602 if ((redir != NULL) && (redirURL != NULL)) {
1603 *redir = redirURL;
1604 } else {
1605 if (redirURL != NULL)
1606 xmlFree(redirURL);
1607 if (redir != NULL)
1608 *redir = NULL;
1609 }
1610
1611 #ifdef DEBUG_HTTP
1612 if (ctxt->contentType != NULL)
1613 xmlGenericError(xmlGenericErrorContext,
1614 "\nCode %d, content-type '%s'\n\n",
1615 ctxt->returnValue, ctxt->contentType);
1616 else
1617 xmlGenericError(xmlGenericErrorContext,
1618 "\nCode %d, no content-type\n\n",
1619 ctxt->returnValue);
1620 #endif
1621
1622 return((void *) ctxt);
1623 }
1624
1625 /**
1626 * xmlNanoHTTPMethod:
1627 * @URL: The URL to load
1628 * @method: the HTTP method to use
1629 * @input: the input string if any
1630 * @contentType: the Content-Type information IN and OUT
1631 * @headers: the extra headers
1632 * @ilen: input length
1633 *
1634 * This function try to open a connection to the indicated resource
1635 * via HTTP using the given @method, adding the given extra headers
1636 * and the input buffer for the request content.
1637 *
1638 * Returns NULL in case of failure, otherwise a request handler.
1639 * The contentType, if provided must be freed by the caller
1640 */
1641
1642 void*
xmlNanoHTTPMethod(const char * URL,const char * method,const char * input,char ** contentType,const char * headers,int ilen)1643 xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
1644 char **contentType, const char *headers, int ilen) {
1645 return(xmlNanoHTTPMethodRedir(URL, method, input, contentType,
1646 NULL, headers, ilen));
1647 }
1648
1649 /**
1650 * xmlNanoHTTPFetch:
1651 * @URL: The URL to load
1652 * @filename: the filename where the content should be saved
1653 * @contentType: if available the Content-Type information will be
1654 * returned at that location
1655 *
1656 * This function try to fetch the indicated resource via HTTP GET
1657 * and save it's content in the file.
1658 *
1659 * Returns -1 in case of failure, 0 in case of success. The contentType,
1660 * if provided must be freed by the caller
1661 */
1662 int
xmlNanoHTTPFetch(const char * URL,const char * filename,char ** contentType)1663 xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1664 void *ctxt = NULL;
1665 char *buf = NULL;
1666 int fd;
1667 int len;
1668 int ret = 0;
1669
1670 if (filename == NULL) return(-1);
1671 ctxt = xmlNanoHTTPOpen(URL, contentType);
1672 if (ctxt == NULL) return(-1);
1673
1674 if (!strcmp(filename, "-"))
1675 fd = 0;
1676 else {
1677 fd = open(filename, O_CREAT | O_WRONLY, 00644);
1678 if (fd < 0) {
1679 xmlNanoHTTPClose(ctxt);
1680 if ((contentType != NULL) && (*contentType != NULL)) {
1681 xmlFree(*contentType);
1682 *contentType = NULL;
1683 }
1684 return(-1);
1685 }
1686 }
1687
1688 xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1689 if ( len > 0 ) {
1690 if (write(fd, buf, len) == -1) {
1691 ret = -1;
1692 }
1693 }
1694
1695 xmlNanoHTTPClose(ctxt);
1696 close(fd);
1697 return(ret);
1698 }
1699
1700 #ifdef LIBXML_OUTPUT_ENABLED
1701 /**
1702 * xmlNanoHTTPSave:
1703 * @ctxt: the HTTP context
1704 * @filename: the filename where the content should be saved
1705 *
1706 * This function saves the output of the HTTP transaction to a file
1707 * It closes and free the context at the end
1708 *
1709 * Returns -1 in case of failure, 0 in case of success.
1710 */
1711 int
xmlNanoHTTPSave(void * ctxt,const char * filename)1712 xmlNanoHTTPSave(void *ctxt, const char *filename) {
1713 char *buf = NULL;
1714 int fd;
1715 int len;
1716 int ret = 0;
1717
1718 if ((ctxt == NULL) || (filename == NULL)) return(-1);
1719
1720 if (!strcmp(filename, "-"))
1721 fd = 0;
1722 else {
1723 fd = open(filename, O_CREAT | O_WRONLY, 0666);
1724 if (fd < 0) {
1725 xmlNanoHTTPClose(ctxt);
1726 return(-1);
1727 }
1728 }
1729
1730 xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1731 if ( len > 0 ) {
1732 if (write(fd, buf, len) == -1) {
1733 ret = -1;
1734 }
1735 }
1736
1737 xmlNanoHTTPClose(ctxt);
1738 close(fd);
1739 return(ret);
1740 }
1741 #endif /* LIBXML_OUTPUT_ENABLED */
1742
1743 /**
1744 * xmlNanoHTTPReturnCode:
1745 * @ctx: the HTTP context
1746 *
1747 * Get the latest HTTP return code received
1748 *
1749 * Returns the HTTP return code for the request.
1750 */
1751 int
xmlNanoHTTPReturnCode(void * ctx)1752 xmlNanoHTTPReturnCode(void *ctx) {
1753 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1754
1755 if (ctxt == NULL) return(-1);
1756
1757 return(ctxt->returnValue);
1758 }
1759
1760 /**
1761 * xmlNanoHTTPAuthHeader:
1762 * @ctx: the HTTP context
1763 *
1764 * Get the authentication header of an HTTP context
1765 *
1766 * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate
1767 * header.
1768 */
1769 const char *
xmlNanoHTTPAuthHeader(void * ctx)1770 xmlNanoHTTPAuthHeader(void *ctx) {
1771 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1772
1773 if (ctxt == NULL) return(NULL);
1774
1775 return(ctxt->authHeader);
1776 }
1777
1778 /**
1779 * xmlNanoHTTPContentLength:
1780 * @ctx: the HTTP context
1781 *
1782 * Provides the specified content length from the HTTP header.
1783 *
1784 * Return the specified content length from the HTTP header. Note that
1785 * a value of -1 indicates that the content length element was not included in
1786 * the response header.
1787 */
1788 int
xmlNanoHTTPContentLength(void * ctx)1789 xmlNanoHTTPContentLength( void * ctx ) {
1790 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1791
1792 return ( ( ctxt == NULL ) ? -1 : ctxt->ContentLength );
1793 }
1794
1795 /**
1796 * xmlNanoHTTPRedir:
1797 * @ctx: the HTTP context
1798 *
1799 * Provides the specified redirection URL if available from the HTTP header.
1800 *
1801 * Return the specified redirection URL or NULL if not redirected.
1802 */
1803 const char *
xmlNanoHTTPRedir(void * ctx)1804 xmlNanoHTTPRedir( void * ctx ) {
1805 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1806
1807 return ( ( ctxt == NULL ) ? NULL : ctxt->location );
1808 }
1809
1810 /**
1811 * xmlNanoHTTPEncoding:
1812 * @ctx: the HTTP context
1813 *
1814 * Provides the specified encoding if specified in the HTTP headers.
1815 *
1816 * Return the specified encoding or NULL if not available
1817 */
1818 const char *
xmlNanoHTTPEncoding(void * ctx)1819 xmlNanoHTTPEncoding( void * ctx ) {
1820 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1821
1822 return ( ( ctxt == NULL ) ? NULL : ctxt->encoding );
1823 }
1824
1825 /**
1826 * xmlNanoHTTPMimeType:
1827 * @ctx: the HTTP context
1828 *
1829 * Provides the specified Mime-Type if specified in the HTTP headers.
1830 *
1831 * Return the specified Mime-Type or NULL if not available
1832 */
1833 const char *
xmlNanoHTTPMimeType(void * ctx)1834 xmlNanoHTTPMimeType( void * ctx ) {
1835 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1836
1837 return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType );
1838 }
1839
1840 /**
1841 * xmlNanoHTTPFetchContent:
1842 * @ctx: the HTTP context
1843 * @ptr: pointer to set to the content buffer.
1844 * @len: integer pointer to hold the length of the content
1845 *
1846 * Check if all the content was read
1847 *
1848 * Returns 0 if all the content was read and available, returns
1849 * -1 if received content length was less than specified or an error
1850 * occurred.
1851 */
1852 static int
xmlNanoHTTPFetchContent(void * ctx,char ** ptr,int * len)1853 xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ) {
1854 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1855
1856 int rc = 0;
1857 int cur_lgth;
1858 int rcvd_lgth;
1859 int dummy_int;
1860 char * dummy_ptr = NULL;
1861
1862 /* Dummy up return input parameters if not provided */
1863
1864 if ( len == NULL )
1865 len = &dummy_int;
1866
1867 if ( ptr == NULL )
1868 ptr = &dummy_ptr;
1869
1870 /* But can't work without the context pointer */
1871
1872 if ( ( ctxt == NULL ) || ( ctxt->content == NULL ) ) {
1873 *len = 0;
1874 *ptr = NULL;
1875 return ( -1 );
1876 }
1877
1878 rcvd_lgth = ctxt->inptr - ctxt->content;
1879
1880 while ( (cur_lgth = xmlNanoHTTPRecv( ctxt )) > 0 ) {
1881
1882 rcvd_lgth += cur_lgth;
1883 if ( (ctxt->ContentLength > 0) && (rcvd_lgth >= ctxt->ContentLength) )
1884 break;
1885 }
1886
1887 *ptr = ctxt->content;
1888 *len = rcvd_lgth;
1889
1890 if ( ( ctxt->ContentLength > 0 ) && ( rcvd_lgth < ctxt->ContentLength ) )
1891 rc = -1;
1892 else if ( rcvd_lgth == 0 )
1893 rc = -1;
1894
1895 return ( rc );
1896 }
1897
1898 #ifdef STANDALONE
main(int argc,char ** argv)1899 int main(int argc, char **argv) {
1900 char *contentType = NULL;
1901
1902 if (argv[1] != NULL) {
1903 if (argv[2] != NULL)
1904 xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1905 else
1906 xmlNanoHTTPFetch(argv[1], "-", &contentType);
1907 if (contentType != NULL) xmlFree(contentType);
1908 } else {
1909 xmlGenericError(xmlGenericErrorContext,
1910 "%s: minimal HTTP GET implementation\n", argv[0]);
1911 xmlGenericError(xmlGenericErrorContext,
1912 "\tusage %s [ URL [ filename ] ]\n", argv[0]);
1913 }
1914 xmlNanoHTTPCleanup();
1915 xmlMemoryDump();
1916 return(0);
1917 }
1918 #endif /* STANDALONE */
1919 #else /* !LIBXML_HTTP_ENABLED */
1920 #ifdef STANDALONE
1921 #include <stdio.h>
main(int argc,char ** argv)1922 int main(int argc, char **argv) {
1923 xmlGenericError(xmlGenericErrorContext,
1924 "%s : HTTP support not compiled in\n", argv[0]);
1925 return(0);
1926 }
1927 #endif /* STANDALONE */
1928 #endif /* LIBXML_HTTP_ENABLED */
1929