• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3  *             focuses on size, streamability, reentrancy and portability
4  *
5  * This is clearly not a general purpose HTTP implementation
6  * If you look for one, check:
7  *         http://www.w3.org/Library/
8  *
9  * See Copyright for the status of this software.
10  *
11  * daniel@veillard.com
12  */
13 
14 #define NEED_SOCKETS
15 #define IN_LIBXML
16 #include "libxml.h"
17 
18 #ifdef LIBXML_HTTP_ENABLED
19 #include <string.h>
20 
21 #ifdef HAVE_STDLIB_H
22 #include <stdlib.h>
23 #endif
24 #ifdef HAVE_UNISTD_H
25 #include <unistd.h>
26 #endif
27 #ifdef HAVE_SYS_TYPES_H
28 #include <sys/types.h>
29 #endif
30 #ifdef HAVE_SYS_SOCKET_H
31 #include <sys/socket.h>
32 #endif
33 #ifdef HAVE_NETINET_IN_H
34 #include <netinet/in.h>
35 #endif
36 #ifdef HAVE_ARPA_INET_H
37 #include <arpa/inet.h>
38 #endif
39 #ifdef HAVE_NETDB_H
40 #include <netdb.h>
41 #endif
42 #ifdef HAVE_RESOLV_H
43 #ifdef HAVE_ARPA_NAMESER_H
44 #include <arpa/nameser.h>
45 #endif
46 #include <resolv.h>
47 #endif
48 #ifdef HAVE_FCNTL_H
49 #include <fcntl.h>
50 #endif
51 #ifdef HAVE_ERRNO_H
52 #include <errno.h>
53 #endif
54 #ifdef HAVE_SYS_TIME_H
55 #include <sys/time.h>
56 #endif
57 #ifndef HAVE_POLL_H
58 #ifdef HAVE_SYS_SELECT_H
59 #include <sys/select.h>
60 #endif
61 #else
62 #include <poll.h>
63 #endif
64 #ifdef HAVE_STRINGS_H
65 #include <strings.h>
66 #endif
67 #ifdef SUPPORT_IP6
68 #include <resolv.h>
69 #endif
70 #ifdef HAVE_ZLIB_H
71 #include <zlib.h>
72 #endif
73 
74 
75 #ifdef VMS
76 #include <stropts>
77 #define XML_SOCKLEN_T unsigned int
78 #endif
79 
80 #if defined(__MINGW32__) || defined(_WIN32_WCE)
81 #ifndef _WINSOCKAPI_
82 #define _WINSOCKAPI_
83 #endif
84 #include <wsockcompat.h>
85 #include <winsock2.h>
86 #undef XML_SOCKLEN_T
87 #define XML_SOCKLEN_T unsigned int
88 #endif
89 
90 #include <libxml/globals.h>
91 #include <libxml/xmlerror.h>
92 #include <libxml/xmlmemory.h>
93 #include <libxml/parser.h> /* for xmlStr(n)casecmp() */
94 #include <libxml/nanohttp.h>
95 #include <libxml/globals.h>
96 #include <libxml/uri.h>
97 
98 /**
99  * A couple portability macros
100  */
101 #ifndef _WINSOCKAPI_
102 #if !defined(__BEOS__) || defined(__HAIKU__)
103 #define closesocket(s) close(s)
104 #endif
105 #define SOCKET int
106 #define INVALID_SOCKET (-1)
107 #endif
108 
109 #ifdef __BEOS__
110 #ifndef PF_INET
111 #define PF_INET AF_INET
112 #endif
113 #endif
114 
115 #ifndef XML_SOCKLEN_T
116 #define XML_SOCKLEN_T unsigned int
117 #endif
118 
119 #ifdef STANDALONE
120 #define DEBUG_HTTP
121 #define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n)
122 #define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b)
123 #endif
124 
125 #define XML_NANO_HTTP_MAX_REDIR	10
126 
127 #define XML_NANO_HTTP_CHUNK	4096
128 
129 #define XML_NANO_HTTP_CLOSED	0
130 #define XML_NANO_HTTP_WRITE	1
131 #define XML_NANO_HTTP_READ	2
132 #define XML_NANO_HTTP_NONE	4
133 
134 typedef struct xmlNanoHTTPCtxt {
135     char *protocol;	/* the protocol name */
136     char *hostname;	/* the host name */
137     int port;		/* the port */
138     char *path;		/* the path within the URL */
139     char *query;	/* the query string */
140     SOCKET fd;		/* the file descriptor for the socket */
141     int state;		/* WRITE / READ / CLOSED */
142     char *out;		/* buffer sent (zero terminated) */
143     char *outptr;	/* index within the buffer sent */
144     char *in;		/* the receiving buffer */
145     char *content;	/* the start of the content */
146     char *inptr;	/* the next byte to read from network */
147     char *inrptr;	/* the next byte to give back to the client */
148     int inlen;		/* len of the input buffer */
149     int last;		/* return code for last operation */
150     int returnValue;	/* the protocol return value */
151     int version;        /* the protocol version */
152     int ContentLength;  /* specified content length from HTTP header */
153     char *contentType;	/* the MIME type for the input */
154     char *location;	/* the new URL in case of redirect */
155     char *authHeader;	/* contents of {WWW,Proxy}-Authenticate header */
156     char *encoding;	/* encoding extracted from the contentType */
157     char *mimeType;	/* Mime-Type extracted from the contentType */
158 #ifdef HAVE_ZLIB_H
159     z_stream *strm;	/* Zlib stream object */
160     int usesGzip;	/* "Content-Encoding: gzip" was detected */
161 #endif
162 } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
163 
164 static int initialized = 0;
165 static char *proxy = NULL;	 /* the proxy name if any */
166 static int proxyPort;	/* the proxy port if any */
167 static unsigned int timeout = 60;/* the select() timeout in seconds */
168 
169 static int xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len );
170 
171 /**
172  * xmlHTTPErrMemory:
173  * @extra:  extra informations
174  *
175  * Handle an out of memory condition
176  */
177 static void
xmlHTTPErrMemory(const char * extra)178 xmlHTTPErrMemory(const char *extra)
179 {
180     __xmlSimpleError(XML_FROM_HTTP, XML_ERR_NO_MEMORY, NULL, NULL, extra);
181 }
182 
183 /**
184  * A portability function
185  */
socket_errno(void)186 static int socket_errno(void) {
187 #ifdef _WINSOCKAPI_
188     return(WSAGetLastError());
189 #else
190     return(errno);
191 #endif
192 }
193 
194 #ifdef SUPPORT_IP6
195 static
have_ipv6(void)196 int have_ipv6(void) {
197     SOCKET s;
198 
199     s = socket (AF_INET6, SOCK_STREAM, 0);
200     if (s != INVALID_SOCKET) {
201 	close (s);
202 	return (1);
203     }
204     return (0);
205 }
206 #endif
207 
208 /**
209  * xmlNanoHTTPInit:
210  *
211  * Initialize the HTTP protocol layer.
212  * Currently it just checks for proxy informations
213  */
214 
215 void
xmlNanoHTTPInit(void)216 xmlNanoHTTPInit(void) {
217     const char *env;
218 #ifdef _WINSOCKAPI_
219     WSADATA wsaData;
220 #endif
221 
222     if (initialized)
223 	return;
224 
225 #ifdef _WINSOCKAPI_
226     if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0)
227 	return;
228 #endif
229 
230     if (proxy == NULL) {
231 	proxyPort = 80;
232 	env = getenv("no_proxy");
233 	if (env && ((env[0] == '*') && (env[1] == 0)))
234 	    goto done;
235 	env = getenv("http_proxy");
236 	if (env != NULL) {
237 	    xmlNanoHTTPScanProxy(env);
238 	    goto done;
239 	}
240 	env = getenv("HTTP_PROXY");
241 	if (env != NULL) {
242 	    xmlNanoHTTPScanProxy(env);
243 	    goto done;
244 	}
245     }
246 done:
247     initialized = 1;
248 }
249 
250 /**
251  * xmlNanoHTTPCleanup:
252  *
253  * Cleanup the HTTP protocol layer.
254  */
255 
256 void
xmlNanoHTTPCleanup(void)257 xmlNanoHTTPCleanup(void) {
258     if (proxy != NULL) {
259 	xmlFree(proxy);
260 	proxy = NULL;
261     }
262 #ifdef _WINSOCKAPI_
263     if (initialized)
264 	WSACleanup();
265 #endif
266     initialized = 0;
267     return;
268 }
269 
270 /**
271  * xmlNanoHTTPScanURL:
272  * @ctxt:  an HTTP context
273  * @URL:  The URL used to initialize the context
274  *
275  * (Re)Initialize an HTTP context by parsing the URL and finding
276  * the protocol host port and path it indicates.
277  */
278 
279 static void
xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt,const char * URL)280 xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
281     xmlURIPtr uri;
282     /*
283      * Clear any existing data from the context
284      */
285     if (ctxt->protocol != NULL) {
286         xmlFree(ctxt->protocol);
287 	ctxt->protocol = NULL;
288     }
289     if (ctxt->hostname != NULL) {
290         xmlFree(ctxt->hostname);
291 	ctxt->hostname = NULL;
292     }
293     if (ctxt->path != NULL) {
294         xmlFree(ctxt->path);
295 	ctxt->path = NULL;
296     }
297     if (ctxt->query != NULL) {
298         xmlFree(ctxt->query);
299 	ctxt->query = NULL;
300     }
301     if (URL == NULL) return;
302 
303     uri = xmlParseURIRaw(URL, 1);
304     if (uri == NULL)
305 	return;
306 
307     if ((uri->scheme == NULL) || (uri->server == NULL)) {
308 	xmlFreeURI(uri);
309 	return;
310     }
311 
312     ctxt->protocol = xmlMemStrdup(uri->scheme);
313     ctxt->hostname = xmlMemStrdup(uri->server);
314     if (uri->path != NULL)
315 	ctxt->path = xmlMemStrdup(uri->path);
316     else
317 	ctxt->path = xmlMemStrdup("/");
318     if (uri->query != NULL)
319 	ctxt->query = xmlMemStrdup(uri->query);
320     if (uri->port != 0)
321 	ctxt->port = uri->port;
322 
323     xmlFreeURI(uri);
324 }
325 
326 /**
327  * xmlNanoHTTPScanProxy:
328  * @URL:  The proxy URL used to initialize the proxy context
329  *
330  * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
331  * the protocol host port it indicates.
332  * Should be like http://myproxy/ or http://myproxy:3128/
333  * A NULL URL cleans up proxy informations.
334  */
335 
336 void
xmlNanoHTTPScanProxy(const char * URL)337 xmlNanoHTTPScanProxy(const char *URL) {
338     xmlURIPtr uri;
339 
340     if (proxy != NULL) {
341         xmlFree(proxy);
342 	proxy = NULL;
343     }
344     proxyPort = 0;
345 
346 #ifdef DEBUG_HTTP
347     if (URL == NULL)
348 	xmlGenericError(xmlGenericErrorContext,
349 		"Removing HTTP proxy info\n");
350     else
351 	xmlGenericError(xmlGenericErrorContext,
352 		"Using HTTP proxy %s\n", URL);
353 #endif
354     if (URL == NULL) return;
355 
356     uri = xmlParseURIRaw(URL, 1);
357     if ((uri == NULL) || (uri->scheme == NULL) ||
358 	(strcmp(uri->scheme, "http")) || (uri->server == NULL)) {
359 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n");
360 	if (uri != NULL)
361 	    xmlFreeURI(uri);
362 	return;
363     }
364 
365     proxy = xmlMemStrdup(uri->server);
366     if (uri->port != 0)
367 	proxyPort = uri->port;
368 
369     xmlFreeURI(uri);
370 }
371 
372 /**
373  * xmlNanoHTTPNewCtxt:
374  * @URL:  The URL used to initialize the context
375  *
376  * Allocate and initialize a new HTTP context.
377  *
378  * Returns an HTTP context or NULL in case of error.
379  */
380 
381 static xmlNanoHTTPCtxtPtr
xmlNanoHTTPNewCtxt(const char * URL)382 xmlNanoHTTPNewCtxt(const char *URL) {
383     xmlNanoHTTPCtxtPtr ret;
384 
385     ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
386     if (ret == NULL) {
387         xmlHTTPErrMemory("allocating context");
388         return(NULL);
389     }
390 
391     memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
392     ret->port = 80;
393     ret->returnValue = 0;
394     ret->fd = INVALID_SOCKET;
395     ret->ContentLength = -1;
396 
397     xmlNanoHTTPScanURL(ret, URL);
398 
399     return(ret);
400 }
401 
402 /**
403  * xmlNanoHTTPFreeCtxt:
404  * @ctxt:  an HTTP context
405  *
406  * Frees the context after closing the connection.
407  */
408 
409 static void
xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt)410 xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
411     if (ctxt == NULL) return;
412     if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
413     if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
414     if (ctxt->path != NULL) xmlFree(ctxt->path);
415     if (ctxt->query != NULL) xmlFree(ctxt->query);
416     if (ctxt->out != NULL) xmlFree(ctxt->out);
417     if (ctxt->in != NULL) xmlFree(ctxt->in);
418     if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
419     if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
420     if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
421     if (ctxt->location != NULL) xmlFree(ctxt->location);
422     if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
423 #ifdef HAVE_ZLIB_H
424     if (ctxt->strm != NULL) {
425 	inflateEnd(ctxt->strm);
426 	xmlFree(ctxt->strm);
427     }
428 #endif
429 
430     ctxt->state = XML_NANO_HTTP_NONE;
431     if (ctxt->fd != INVALID_SOCKET) closesocket(ctxt->fd);
432     ctxt->fd = INVALID_SOCKET;
433     xmlFree(ctxt);
434 }
435 
436 /**
437  * xmlNanoHTTPSend:
438  * @ctxt:  an HTTP context
439  *
440  * Send the input needed to initiate the processing on the server side
441  * Returns number of bytes sent or -1 on error.
442  */
443 
444 static int
xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt,const char * xmt_ptr,int outlen)445 xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt, const char *xmt_ptr, int outlen)
446 {
447     int total_sent = 0;
448 #ifdef HAVE_POLL_H
449     struct pollfd p;
450 #else
451     struct timeval tv;
452     fd_set wfd;
453 #endif
454 
455     if ((ctxt->state & XML_NANO_HTTP_WRITE) && (xmt_ptr != NULL)) {
456         while (total_sent < outlen) {
457             int nsent = send(ctxt->fd, xmt_ptr + total_sent,
458                              outlen - total_sent, 0);
459 
460             if (nsent > 0)
461                 total_sent += nsent;
462             else if ((nsent == -1) &&
463 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
464                      (socket_errno() != EAGAIN) &&
465 #endif
466                      (socket_errno() != EWOULDBLOCK)) {
467                 __xmlIOErr(XML_FROM_HTTP, 0, "send failed\n");
468                 if (total_sent == 0)
469                     total_sent = -1;
470                 break;
471             } else {
472                 /*
473                  * No data sent
474                  * Since non-blocking sockets are used, wait for
475                  * socket to be writable or default timeout prior
476                  * to retrying.
477                  */
478 #ifndef HAVE_POLL_H
479 #ifndef _WINSOCKAPI_
480                 if (ctxt->fd > FD_SETSIZE)
481                     return -1;
482 #endif
483 
484                 tv.tv_sec = timeout;
485                 tv.tv_usec = 0;
486                 FD_ZERO(&wfd);
487 #ifdef _MSC_VER
488 #pragma warning(push)
489 #pragma warning(disable: 4018)
490 #endif
491                 FD_SET(ctxt->fd, &wfd);
492 #ifdef _MSC_VER
493 #pragma warning(pop)
494 #endif
495                 (void) select(ctxt->fd + 1, NULL, &wfd, NULL, &tv);
496 #else
497                 p.fd = ctxt->fd;
498                 p.events = POLLOUT;
499                 (void) poll(&p, 1, timeout * 1000);
500 #endif /* !HAVE_POLL_H */
501             }
502         }
503     }
504 
505     return total_sent;
506 }
507 
508 /**
509  * xmlNanoHTTPRecv:
510  * @ctxt:  an HTTP context
511  *
512  * Read information coming from the HTTP connection.
513  * This is a blocking call (but it blocks in select(), not read()).
514  *
515  * Returns the number of byte read or -1 in case of error.
516  */
517 
518 static int
xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)519 xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)
520 {
521 #ifdef HAVE_POLL_H
522     struct pollfd p;
523 #else
524     fd_set rfd;
525     struct timeval tv;
526 #endif
527 
528 
529     while (ctxt->state & XML_NANO_HTTP_READ) {
530         if (ctxt->in == NULL) {
531             ctxt->in = (char *) xmlMallocAtomic(65000 * sizeof(char));
532             if (ctxt->in == NULL) {
533                 xmlHTTPErrMemory("allocating input");
534                 ctxt->last = -1;
535                 return (-1);
536             }
537             ctxt->inlen = 65000;
538             ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
539         }
540         if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
541             int delta = ctxt->inrptr - ctxt->in;
542             int len = ctxt->inptr - ctxt->inrptr;
543 
544             memmove(ctxt->in, ctxt->inrptr, len);
545             ctxt->inrptr -= delta;
546             ctxt->content -= delta;
547             ctxt->inptr -= delta;
548         }
549         if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
550             int d_inptr = ctxt->inptr - ctxt->in;
551             int d_content = ctxt->content - ctxt->in;
552             int d_inrptr = ctxt->inrptr - ctxt->in;
553             char *tmp_ptr = ctxt->in;
554 
555             ctxt->inlen *= 2;
556             ctxt->in = (char *) xmlRealloc(tmp_ptr, ctxt->inlen);
557             if (ctxt->in == NULL) {
558                 xmlHTTPErrMemory("allocating input buffer");
559                 xmlFree(tmp_ptr);
560                 ctxt->last = -1;
561                 return (-1);
562             }
563             ctxt->inptr = ctxt->in + d_inptr;
564             ctxt->content = ctxt->in + d_content;
565             ctxt->inrptr = ctxt->in + d_inrptr;
566         }
567         ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0);
568         if (ctxt->last > 0) {
569             ctxt->inptr += ctxt->last;
570             return (ctxt->last);
571         }
572         if (ctxt->last == 0) {
573             return (0);
574         }
575         if (ctxt->last == -1) {
576             switch (socket_errno()) {
577                 case EINPROGRESS:
578                 case EWOULDBLOCK:
579 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
580                 case EAGAIN:
581 #endif
582                     break;
583 
584                 case ECONNRESET:
585                 case ESHUTDOWN:
586                     return (0);
587 
588                 default:
589                     __xmlIOErr(XML_FROM_HTTP, 0, "recv failed\n");
590                     return (-1);
591             }
592         }
593 #ifdef HAVE_POLL_H
594         p.fd = ctxt->fd;
595         p.events = POLLIN;
596         if ((poll(&p, 1, timeout * 1000) < 1)
597 #if defined(EINTR)
598             && (errno != EINTR)
599 #endif
600             )
601             return (0);
602 #else /* !HAVE_POLL_H */
603 #ifndef _WINSOCKAPI_
604         if (ctxt->fd > FD_SETSIZE)
605             return 0;
606 #endif
607 
608         tv.tv_sec = timeout;
609         tv.tv_usec = 0;
610         FD_ZERO(&rfd);
611 
612 #ifdef _MSC_VER
613 #pragma warning(push)
614 #pragma warning(disable: 4018)
615 #endif
616 
617         FD_SET(ctxt->fd, &rfd);
618 
619 #ifdef _MSC_VER
620 #pragma warning(pop)
621 #endif
622 
623         if ((select(ctxt->fd + 1, &rfd, NULL, NULL, &tv) < 1)
624 #if defined(EINTR)
625             && (errno != EINTR)
626 #endif
627             )
628             return (0);
629 #endif /* !HAVE_POLL_H */
630     }
631     return (0);
632 }
633 
634 /**
635  * xmlNanoHTTPReadLine:
636  * @ctxt:  an HTTP context
637  *
638  * Read one line in the HTTP server output, usually for extracting
639  * the HTTP protocol informations from the answer header.
640  *
641  * Returns a newly allocated string with a copy of the line, or NULL
642  *         which indicate the end of the input.
643  */
644 
645 static char *
xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt)646 xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
647     char buf[4096];
648     char *bp = buf;
649     int	rc;
650 
651     while (bp - buf < 4095) {
652 	if (ctxt->inrptr == ctxt->inptr) {
653 	    if ( (rc = xmlNanoHTTPRecv(ctxt)) == 0) {
654 		if (bp == buf)
655 		    return(NULL);
656 		else
657 		    *bp = 0;
658 		return(xmlMemStrdup(buf));
659 	    }
660 	    else if ( rc == -1 ) {
661 	        return ( NULL );
662 	    }
663 	}
664 	*bp = *ctxt->inrptr++;
665 	if (*bp == '\n') {
666 	    *bp = 0;
667 	    return(xmlMemStrdup(buf));
668 	}
669 	if (*bp != '\r')
670 	    bp++;
671     }
672     buf[4095] = 0;
673     return(xmlMemStrdup(buf));
674 }
675 
676 
677 /**
678  * xmlNanoHTTPScanAnswer:
679  * @ctxt:  an HTTP context
680  * @line:  an HTTP header line
681  *
682  * Try to extract useful informations from the server answer.
683  * We currently parse and process:
684  *  - The HTTP revision/ return code
685  *  - The Content-Type, Mime-Type and charset used
686  *  - The Location for redirect processing.
687  *
688  * Returns -1 in case of failure, the file descriptor number otherwise
689  */
690 
691 static void
xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt,const char * line)692 xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
693     const char *cur = line;
694 
695     if (line == NULL) return;
696 
697     if (!strncmp(line, "HTTP/", 5)) {
698         int version = 0;
699 	int ret = 0;
700 
701 	cur += 5;
702 	while ((*cur >= '0') && (*cur <= '9')) {
703 	    version *= 10;
704 	    version += *cur - '0';
705 	    cur++;
706 	}
707 	if (*cur == '.') {
708 	    cur++;
709 	    if ((*cur >= '0') && (*cur <= '9')) {
710 		version *= 10;
711 		version += *cur - '0';
712 		cur++;
713 	    }
714 	    while ((*cur >= '0') && (*cur <= '9'))
715 		cur++;
716 	} else
717 	    version *= 10;
718 	if ((*cur != ' ') && (*cur != '\t')) return;
719 	while ((*cur == ' ') || (*cur == '\t')) cur++;
720 	if ((*cur < '0') || (*cur > '9')) return;
721 	while ((*cur >= '0') && (*cur <= '9')) {
722 	    ret *= 10;
723 	    ret += *cur - '0';
724 	    cur++;
725 	}
726 	if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
727 	ctxt->returnValue = ret;
728         ctxt->version = version;
729     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
730         const xmlChar *charset, *last, *mime;
731         cur += 13;
732 	while ((*cur == ' ') || (*cur == '\t')) cur++;
733 	if (ctxt->contentType != NULL)
734 	    xmlFree(ctxt->contentType);
735 	ctxt->contentType = xmlMemStrdup(cur);
736 	mime = (const xmlChar *) cur;
737 	last = mime;
738 	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
739 	       (*last != ';') && (*last != ','))
740 	    last++;
741 	if (ctxt->mimeType != NULL)
742 	    xmlFree(ctxt->mimeType);
743 	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
744 	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
745 	if (charset != NULL) {
746 	    charset += 8;
747 	    last = charset;
748 	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
749 	           (*last != ';') && (*last != ','))
750 		last++;
751 	    if (ctxt->encoding != NULL)
752 	        xmlFree(ctxt->encoding);
753 	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
754 	}
755     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
756         const xmlChar *charset, *last, *mime;
757         cur += 12;
758 	if (ctxt->contentType != NULL) return;
759 	while ((*cur == ' ') || (*cur == '\t')) cur++;
760 	ctxt->contentType = xmlMemStrdup(cur);
761 	mime = (const xmlChar *) cur;
762 	last = mime;
763 	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
764 	       (*last != ';') && (*last != ','))
765 	    last++;
766 	if (ctxt->mimeType != NULL)
767 	    xmlFree(ctxt->mimeType);
768 	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
769 	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
770 	if (charset != NULL) {
771 	    charset += 8;
772 	    last = charset;
773 	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
774 	           (*last != ';') && (*last != ','))
775 		last++;
776 	    if (ctxt->encoding != NULL)
777 	        xmlFree(ctxt->encoding);
778 	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
779 	}
780     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
781         cur += 9;
782 	while ((*cur == ' ') || (*cur == '\t')) cur++;
783 	if (ctxt->location != NULL)
784 	    xmlFree(ctxt->location);
785 	if (*cur == '/') {
786 	    xmlChar *tmp_http = xmlStrdup(BAD_CAST "http://");
787 	    xmlChar *tmp_loc =
788 	        xmlStrcat(tmp_http, (const xmlChar *) ctxt->hostname);
789 	    ctxt->location =
790 	        (char *) xmlStrcat (tmp_loc, (const xmlChar *) cur);
791 	} else {
792 	    ctxt->location = xmlMemStrdup(cur);
793 	}
794     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) {
795         cur += 17;
796 	while ((*cur == ' ') || (*cur == '\t')) cur++;
797 	if (ctxt->authHeader != NULL)
798 	    xmlFree(ctxt->authHeader);
799 	ctxt->authHeader = xmlMemStrdup(cur);
800     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) {
801         cur += 19;
802 	while ((*cur == ' ') || (*cur == '\t')) cur++;
803 	if (ctxt->authHeader != NULL)
804 	    xmlFree(ctxt->authHeader);
805 	ctxt->authHeader = xmlMemStrdup(cur);
806 #ifdef HAVE_ZLIB_H
807     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) {
808 	cur += 17;
809 	while ((*cur == ' ') || (*cur == '\t')) cur++;
810 	if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) {
811 	    ctxt->usesGzip = 1;
812 
813 	    ctxt->strm = xmlMalloc(sizeof(z_stream));
814 
815 	    if (ctxt->strm != NULL) {
816 		ctxt->strm->zalloc = Z_NULL;
817 		ctxt->strm->zfree = Z_NULL;
818 		ctxt->strm->opaque = Z_NULL;
819 		ctxt->strm->avail_in = 0;
820 		ctxt->strm->next_in = Z_NULL;
821 
822 		inflateInit2( ctxt->strm, 31 );
823 	    }
824 	}
825 #endif
826     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) {
827 	cur += 15;
828 	ctxt->ContentLength = strtol( cur, NULL, 10 );
829     }
830 }
831 
832 /**
833  * xmlNanoHTTPConnectAttempt:
834  * @addr:  a socket address structure
835  *
836  * Attempt a connection to the given IP:port endpoint. It forces
837  * non-blocking semantic on the socket, and allow 60 seconds for
838  * the host to answer.
839  *
840  * Returns -1 in case of failure, the file descriptor number otherwise
841  */
842 
843 static SOCKET
xmlNanoHTTPConnectAttempt(struct sockaddr * addr)844 xmlNanoHTTPConnectAttempt(struct sockaddr *addr)
845 {
846 #ifndef HAVE_POLL_H
847     fd_set wfd;
848 #ifdef _WINSOCKAPI_
849     fd_set xfd;
850 #endif
851     struct timeval tv;
852 #else /* !HAVE_POLL_H */
853     struct pollfd p;
854 #endif /* !HAVE_POLL_H */
855     int status;
856 
857     int addrlen;
858 
859     SOCKET s;
860 
861 #ifdef SUPPORT_IP6
862     if (addr->sa_family == AF_INET6) {
863         s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
864         addrlen = sizeof(struct sockaddr_in6);
865     } else
866 #endif
867     {
868         s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
869         addrlen = sizeof(struct sockaddr_in);
870     }
871     if (s == INVALID_SOCKET) {
872 #ifdef DEBUG_HTTP
873         perror("socket");
874 #endif
875         __xmlIOErr(XML_FROM_HTTP, 0, "socket failed\n");
876         return INVALID_SOCKET;
877     }
878 #ifdef _WINSOCKAPI_
879     {
880         u_long one = 1;
881 
882         status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
883     }
884 #else /* _WINSOCKAPI_ */
885 #if defined(VMS)
886     {
887         int enable = 1;
888 
889         status = ioctl(s, FIONBIO, &enable);
890     }
891 #else /* VMS */
892 #if defined(__BEOS__) && !defined(__HAIKU__)
893     {
894         bool noblock = true;
895 
896         status =
897             setsockopt(s, SOL_SOCKET, SO_NONBLOCK, &noblock,
898                        sizeof(noblock));
899     }
900 #else /* __BEOS__ */
901     if ((status = fcntl(s, F_GETFL, 0)) != -1) {
902 #ifdef O_NONBLOCK
903         status |= O_NONBLOCK;
904 #else /* O_NONBLOCK */
905 #ifdef F_NDELAY
906         status |= F_NDELAY;
907 #endif /* F_NDELAY */
908 #endif /* !O_NONBLOCK */
909         status = fcntl(s, F_SETFL, status);
910     }
911     if (status < 0) {
912 #ifdef DEBUG_HTTP
913         perror("nonblocking");
914 #endif
915         __xmlIOErr(XML_FROM_HTTP, 0, "error setting non-blocking IO\n");
916         closesocket(s);
917         return INVALID_SOCKET;
918     }
919 #endif /* !__BEOS__ */
920 #endif /* !VMS */
921 #endif /* !_WINSOCKAPI_ */
922 
923     if (connect(s, addr, addrlen) == -1) {
924         switch (socket_errno()) {
925             case EINPROGRESS:
926             case EWOULDBLOCK:
927                 break;
928             default:
929                 __xmlIOErr(XML_FROM_HTTP, 0,
930                            "error connecting to HTTP server");
931                 closesocket(s);
932                 return INVALID_SOCKET;
933         }
934     }
935 #ifndef HAVE_POLL_H
936     tv.tv_sec = timeout;
937     tv.tv_usec = 0;
938 
939 #ifdef _MSC_VER
940 #pragma warning(push)
941 #pragma warning(disable: 4018)
942 #endif
943 #ifndef _WINSOCKAPI_
944     if (s > FD_SETSIZE)
945         return INVALID_SOCKET;
946 #endif
947     FD_ZERO(&wfd);
948     FD_SET(s, &wfd);
949 
950 #ifdef _WINSOCKAPI_
951     FD_ZERO(&xfd);
952     FD_SET(s, &xfd);
953 
954     switch (select(s + 1, NULL, &wfd, &xfd, &tv))
955 #else
956     switch (select(s + 1, NULL, &wfd, NULL, &tv))
957 #endif
958 #ifdef _MSC_VER
959 #pragma warning(pop)
960 #endif
961 
962 #else /* !HAVE_POLL_H */
963     p.fd = s;
964     p.events = POLLOUT;
965     switch (poll(&p, 1, timeout * 1000))
966 #endif /* !HAVE_POLL_H */
967 
968     {
969         case 0:
970             /* Time out */
971             __xmlIOErr(XML_FROM_HTTP, 0, "Connect attempt timed out");
972             closesocket(s);
973             return INVALID_SOCKET;
974         case -1:
975             /* Ermm.. ?? */
976             __xmlIOErr(XML_FROM_HTTP, 0, "Connect failed");
977             closesocket(s);
978             return INVALID_SOCKET;
979     }
980 
981 #ifndef HAVE_POLL_H
982     if (FD_ISSET(s, &wfd)
983 #ifdef _WINSOCKAPI_
984         || FD_ISSET(s, &xfd)
985 #endif
986         )
987 #else /* !HAVE_POLL_H */
988     if (p.revents == POLLOUT)
989 #endif /* !HAVE_POLL_H */
990     {
991         XML_SOCKLEN_T len;
992 
993         len = sizeof(status);
994 #ifdef SO_ERROR
995         if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &status, &len) <
996             0) {
997             /* Solaris error code */
998             __xmlIOErr(XML_FROM_HTTP, 0, "getsockopt failed\n");
999             return INVALID_SOCKET;
1000         }
1001 #endif
1002         if (status) {
1003             __xmlIOErr(XML_FROM_HTTP, 0,
1004                        "Error connecting to remote host");
1005             closesocket(s);
1006             errno = status;
1007             return INVALID_SOCKET;
1008         }
1009     } else {
1010         /* pbm */
1011         __xmlIOErr(XML_FROM_HTTP, 0, "select failed\n");
1012         closesocket(s);
1013         return INVALID_SOCKET;
1014     }
1015 
1016     return (s);
1017 }
1018 
1019 /**
1020  * xmlNanoHTTPConnectHost:
1021  * @host:  the host name
1022  * @port:  the port number
1023  *
1024  * Attempt a connection to the given host:port endpoint. It tries
1025  * the multiple IP provided by the DNS if available.
1026  *
1027  * Returns -1 in case of failure, the file descriptor number otherwise
1028  */
1029 
1030 static SOCKET
xmlNanoHTTPConnectHost(const char * host,int port)1031 xmlNanoHTTPConnectHost(const char *host, int port)
1032 {
1033     struct hostent *h;
1034     struct sockaddr *addr = NULL;
1035     struct in_addr ia;
1036     struct sockaddr_in sockin;
1037 
1038 #ifdef SUPPORT_IP6
1039     struct in6_addr ia6;
1040     struct sockaddr_in6 sockin6;
1041 #endif
1042     int i;
1043     SOCKET s;
1044 
1045     memset (&sockin, 0, sizeof(sockin));
1046 #ifdef SUPPORT_IP6
1047     memset (&sockin6, 0, sizeof(sockin6));
1048 #endif
1049 
1050 #if !defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && defined(RES_USE_INET6)
1051     if (have_ipv6 ())
1052     {
1053 	if (!(_res.options & RES_INIT))
1054 	    res_init();
1055 	_res.options |= RES_USE_INET6;
1056     }
1057 #endif
1058 
1059 #if defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && !defined(_WIN32)
1060     if (have_ipv6 ())
1061 #endif
1062 #if defined(HAVE_GETADDRINFO) && (defined(SUPPORT_IP6) || defined(_WIN32))
1063     {
1064 	int status;
1065 	struct addrinfo hints, *res, *result;
1066 
1067 	result = NULL;
1068 	memset (&hints, 0,sizeof(hints));
1069 	hints.ai_socktype = SOCK_STREAM;
1070 
1071 	status = getaddrinfo (host, NULL, &hints, &result);
1072 	if (status) {
1073 	    __xmlIOErr(XML_FROM_HTTP, 0, "getaddrinfo failed\n");
1074 	    return INVALID_SOCKET;
1075 	}
1076 
1077 	for (res = result; res; res = res->ai_next) {
1078 	    if (res->ai_family == AF_INET) {
1079 		if (res->ai_addrlen > sizeof(sockin)) {
1080 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1081 		    freeaddrinfo (result);
1082 		    return INVALID_SOCKET;
1083 		}
1084 		memcpy (&sockin, res->ai_addr, res->ai_addrlen);
1085 		sockin.sin_port = htons (port);
1086 		addr = (struct sockaddr *)&sockin;
1087 #ifdef SUPPORT_IP6
1088 	    } else if (have_ipv6 () && (res->ai_family == AF_INET6)) {
1089 		if (res->ai_addrlen > sizeof(sockin6)) {
1090 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1091 		    freeaddrinfo (result);
1092 		    return INVALID_SOCKET;
1093 		}
1094 		memcpy (&sockin6, res->ai_addr, res->ai_addrlen);
1095 		sockin6.sin6_port = htons (port);
1096 		addr = (struct sockaddr *)&sockin6;
1097 #endif
1098 	    } else
1099 		continue;              /* for */
1100 
1101 	    s = xmlNanoHTTPConnectAttempt (addr);
1102 	    if (s != INVALID_SOCKET) {
1103 		freeaddrinfo (result);
1104 		return (s);
1105 	    }
1106 	}
1107 
1108 	if (result)
1109 	    freeaddrinfo (result);
1110     }
1111 #endif
1112 #if defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && !defined(_WIN32)
1113     else
1114 #endif
1115 #if !defined(HAVE_GETADDRINFO) || !defined(_WIN32)
1116     {
1117 	h = gethostbyname (host);
1118 	if (h == NULL) {
1119 
1120 /*
1121  * Okay, I got fed up by the non-portability of this error message
1122  * extraction code. it work on Linux, if it work on your platform
1123  * and one want to enable it, send me the defined(foobar) needed
1124  */
1125 #if defined(HAVE_NETDB_H) && defined(HOST_NOT_FOUND) && defined(linux)
1126 	    const char *h_err_txt = "";
1127 
1128 	    switch (h_errno) {
1129 		case HOST_NOT_FOUND:
1130 		    h_err_txt = "Authoritive host not found";
1131 		    break;
1132 
1133 		case TRY_AGAIN:
1134 		    h_err_txt =
1135 			"Non-authoritive host not found or server failure.";
1136 		    break;
1137 
1138 		case NO_RECOVERY:
1139 		    h_err_txt =
1140 			"Non-recoverable errors:  FORMERR, REFUSED, or NOTIMP.";
1141 		    break;
1142 
1143 		case NO_ADDRESS:
1144 		    h_err_txt =
1145 			"Valid name, no data record of requested type.";
1146 		    break;
1147 
1148 		default:
1149 		    h_err_txt = "No error text defined.";
1150 		    break;
1151 	    }
1152 	    __xmlIOErr(XML_FROM_HTTP, 0, h_err_txt);
1153 #else
1154 	    __xmlIOErr(XML_FROM_HTTP, 0, "Failed to resolve host");
1155 #endif
1156 	    return INVALID_SOCKET;
1157 	}
1158 
1159 	for (i = 0; h->h_addr_list[i]; i++) {
1160 	    if (h->h_addrtype == AF_INET) {
1161 		/* A records (IPv4) */
1162 		if ((unsigned int) h->h_length > sizeof(ia)) {
1163 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1164 		    return INVALID_SOCKET;
1165 		}
1166 		memcpy (&ia, h->h_addr_list[i], h->h_length);
1167 		sockin.sin_family = h->h_addrtype;
1168 		sockin.sin_addr = ia;
1169 		sockin.sin_port = (u_short)htons ((unsigned short)port);
1170 		addr = (struct sockaddr *) &sockin;
1171 #ifdef SUPPORT_IP6
1172 	    } else if (have_ipv6 () && (h->h_addrtype == AF_INET6)) {
1173 		/* AAAA records (IPv6) */
1174 		if ((unsigned int) h->h_length > sizeof(ia6)) {
1175 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1176 		    return INVALID_SOCKET;
1177 		}
1178 		memcpy (&ia6, h->h_addr_list[i], h->h_length);
1179 		sockin6.sin6_family = h->h_addrtype;
1180 		sockin6.sin6_addr = ia6;
1181 		sockin6.sin6_port = htons (port);
1182 		addr = (struct sockaddr *) &sockin6;
1183 #endif
1184 	    } else
1185 		break;              /* for */
1186 
1187 	    s = xmlNanoHTTPConnectAttempt (addr);
1188 	    if (s != INVALID_SOCKET)
1189 		return (s);
1190 	}
1191     }
1192 #endif
1193 
1194 #ifdef DEBUG_HTTP
1195     xmlGenericError(xmlGenericErrorContext,
1196                     "xmlNanoHTTPConnectHost:  unable to connect to '%s'.\n",
1197                     host);
1198 #endif
1199     return INVALID_SOCKET;
1200 }
1201 
1202 
1203 /**
1204  * xmlNanoHTTPOpen:
1205  * @URL:  The URL to load
1206  * @contentType:  if available the Content-Type information will be
1207  *                returned at that location
1208  *
1209  * This function try to open a connection to the indicated resource
1210  * via HTTP GET.
1211  *
1212  * Returns NULL in case of failure, otherwise a request handler.
1213  *     The contentType, if provided must be freed by the caller
1214  */
1215 
1216 void*
xmlNanoHTTPOpen(const char * URL,char ** contentType)1217 xmlNanoHTTPOpen(const char *URL, char **contentType) {
1218     if (contentType != NULL) *contentType = NULL;
1219     return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, 0));
1220 }
1221 
1222 /**
1223  * xmlNanoHTTPOpenRedir:
1224  * @URL:  The URL to load
1225  * @contentType:  if available the Content-Type information will be
1226  *                returned at that location
1227  * @redir: if available the redirected URL will be returned
1228  *
1229  * This function try to open a connection to the indicated resource
1230  * via HTTP GET.
1231  *
1232  * Returns NULL in case of failure, otherwise a request handler.
1233  *     The contentType, if provided must be freed by the caller
1234  */
1235 
1236 void*
xmlNanoHTTPOpenRedir(const char * URL,char ** contentType,char ** redir)1237 xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) {
1238     if (contentType != NULL) *contentType = NULL;
1239     if (redir != NULL) *redir = NULL;
1240     return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL,0));
1241 }
1242 
1243 /**
1244  * xmlNanoHTTPRead:
1245  * @ctx:  the HTTP context
1246  * @dest:  a buffer
1247  * @len:  the buffer length
1248  *
1249  * This function tries to read @len bytes from the existing HTTP connection
1250  * and saves them in @dest. This is a blocking call.
1251  *
1252  * Returns the number of byte read. 0 is an indication of an end of connection.
1253  *         -1 indicates a parameter error.
1254  */
1255 int
xmlNanoHTTPRead(void * ctx,void * dest,int len)1256 xmlNanoHTTPRead(void *ctx, void *dest, int len) {
1257     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1258 #ifdef HAVE_ZLIB_H
1259     int bytes_read = 0;
1260     int orig_avail_in;
1261     int z_ret;
1262 #endif
1263 
1264     if (ctx == NULL) return(-1);
1265     if (dest == NULL) return(-1);
1266     if (len <= 0) return(0);
1267 
1268 #ifdef HAVE_ZLIB_H
1269     if (ctxt->usesGzip == 1) {
1270         if (ctxt->strm == NULL) return(0);
1271 
1272         ctxt->strm->next_out = dest;
1273         ctxt->strm->avail_out = len;
1274 	ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr;
1275 
1276         while (ctxt->strm->avail_out > 0 &&
1277 	       (ctxt->strm->avail_in > 0 || xmlNanoHTTPRecv(ctxt) > 0)) {
1278             orig_avail_in = ctxt->strm->avail_in =
1279 			    ctxt->inptr - ctxt->inrptr - bytes_read;
1280             ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read);
1281 
1282             z_ret = inflate(ctxt->strm, Z_NO_FLUSH);
1283             bytes_read += orig_avail_in - ctxt->strm->avail_in;
1284 
1285             if (z_ret != Z_OK) break;
1286 	}
1287 
1288         ctxt->inrptr += bytes_read;
1289         return(len - ctxt->strm->avail_out);
1290     }
1291 #endif
1292 
1293     while (ctxt->inptr - ctxt->inrptr < len) {
1294         if (xmlNanoHTTPRecv(ctxt) <= 0) break;
1295     }
1296     if (ctxt->inptr - ctxt->inrptr < len)
1297         len = ctxt->inptr - ctxt->inrptr;
1298     memcpy(dest, ctxt->inrptr, len);
1299     ctxt->inrptr += len;
1300     return(len);
1301 }
1302 
1303 /**
1304  * xmlNanoHTTPClose:
1305  * @ctx:  the HTTP context
1306  *
1307  * This function closes an HTTP context, it ends up the connection and
1308  * free all data related to it.
1309  */
1310 void
xmlNanoHTTPClose(void * ctx)1311 xmlNanoHTTPClose(void *ctx) {
1312     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1313 
1314     if (ctx == NULL) return;
1315 
1316     xmlNanoHTTPFreeCtxt(ctxt);
1317 }
1318 
1319 /**
1320  * xmlNanoHTTPMethodRedir:
1321  * @URL:  The URL to load
1322  * @method:  the HTTP method to use
1323  * @input:  the input string if any
1324  * @contentType:  the Content-Type information IN and OUT
1325  * @redir:  the redirected URL OUT
1326  * @headers:  the extra headers
1327  * @ilen:  input length
1328  *
1329  * This function try to open a connection to the indicated resource
1330  * via HTTP using the given @method, adding the given extra headers
1331  * and the input buffer for the request content.
1332  *
1333  * Returns NULL in case of failure, otherwise a request handler.
1334  *     The contentType, or redir, if provided must be freed by the caller
1335  */
1336 
1337 void*
xmlNanoHTTPMethodRedir(const char * URL,const char * method,const char * input,char ** contentType,char ** redir,const char * headers,int ilen)1338 xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input,
1339                   char **contentType, char **redir,
1340 		  const char *headers, int ilen ) {
1341     xmlNanoHTTPCtxtPtr ctxt;
1342     char *bp, *p;
1343     int blen;
1344     SOCKET ret;
1345     int nbRedirects = 0;
1346     char *redirURL = NULL;
1347 #ifdef DEBUG_HTTP
1348     int xmt_bytes;
1349 #endif
1350 
1351     if (URL == NULL) return(NULL);
1352     if (method == NULL) method = "GET";
1353     xmlNanoHTTPInit();
1354 
1355 retry:
1356     if (redirURL == NULL)
1357 	ctxt = xmlNanoHTTPNewCtxt(URL);
1358     else {
1359 	ctxt = xmlNanoHTTPNewCtxt(redirURL);
1360 	ctxt->location = xmlMemStrdup(redirURL);
1361     }
1362 
1363     if ( ctxt == NULL ) {
1364 	return ( NULL );
1365     }
1366 
1367     if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
1368 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Not a valid HTTP URI");
1369         xmlNanoHTTPFreeCtxt(ctxt);
1370 	if (redirURL != NULL) xmlFree(redirURL);
1371         return(NULL);
1372     }
1373     if (ctxt->hostname == NULL) {
1374 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_UNKNOWN_HOST,
1375 	           "Failed to identify host in URI");
1376         xmlNanoHTTPFreeCtxt(ctxt);
1377 	if (redirURL != NULL) xmlFree(redirURL);
1378         return(NULL);
1379     }
1380     if (proxy) {
1381 	blen = strlen(ctxt->hostname) * 2 + 16;
1382 	ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
1383     }
1384     else {
1385 	blen = strlen(ctxt->hostname);
1386 	ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
1387     }
1388     if (ret == INVALID_SOCKET) {
1389         xmlNanoHTTPFreeCtxt(ctxt);
1390 	if (redirURL != NULL) xmlFree(redirURL);
1391         return(NULL);
1392     }
1393     ctxt->fd = ret;
1394 
1395     if (input == NULL)
1396 	ilen = 0;
1397     else
1398 	blen += 36;
1399 
1400     if (headers != NULL)
1401 	blen += strlen(headers) + 2;
1402     if (contentType && *contentType)
1403 	/* reserve for string plus 'Content-Type: \r\n" */
1404 	blen += strlen(*contentType) + 16;
1405     if (ctxt->query != NULL)
1406 	/* 1 for '?' */
1407 	blen += strlen(ctxt->query) + 1;
1408     blen += strlen(method) + strlen(ctxt->path) + 24;
1409 #ifdef HAVE_ZLIB_H
1410     /* reserve for possible 'Accept-Encoding: gzip' string */
1411     blen += 23;
1412 #endif
1413     if (ctxt->port != 80) {
1414 	/* reserve space for ':xxxxx', incl. potential proxy */
1415 	if (proxy)
1416 	    blen += 12;
1417 	else
1418 	    blen += 6;
1419     }
1420     bp = (char*)xmlMallocAtomic(blen);
1421     if ( bp == NULL ) {
1422         xmlNanoHTTPFreeCtxt( ctxt );
1423 	xmlHTTPErrMemory("allocating header buffer");
1424 	return ( NULL );
1425     }
1426 
1427     p = bp;
1428 
1429     if (proxy) {
1430 	if (ctxt->port != 80) {
1431 	    p += snprintf( p, blen - (p - bp), "%s http://%s:%d%s",
1432 			method, ctxt->hostname,
1433 		 	ctxt->port, ctxt->path );
1434 	}
1435 	else
1436 	    p += snprintf( p, blen - (p - bp), "%s http://%s%s", method,
1437 	    		ctxt->hostname, ctxt->path);
1438     }
1439     else
1440 	p += snprintf( p, blen - (p - bp), "%s %s", method, ctxt->path);
1441 
1442     if (ctxt->query != NULL)
1443 	p += snprintf( p, blen - (p - bp), "?%s", ctxt->query);
1444 
1445     if (ctxt->port == 80) {
1446         p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n",
1447 		    ctxt->hostname);
1448     } else {
1449         p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s:%d\r\n",
1450 		    ctxt->hostname, ctxt->port);
1451     }
1452 
1453 #ifdef HAVE_ZLIB_H
1454     p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n");
1455 #endif
1456 
1457     if (contentType != NULL && *contentType)
1458 	p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType);
1459 
1460     if (headers != NULL)
1461 	p += snprintf( p, blen - (p - bp), "%s", headers );
1462 
1463     if (input != NULL)
1464 	snprintf(p, blen - (p - bp), "Content-Length: %d\r\n\r\n", ilen );
1465     else
1466 	snprintf(p, blen - (p - bp), "\r\n");
1467 
1468 #ifdef DEBUG_HTTP
1469     xmlGenericError(xmlGenericErrorContext,
1470 	    "-> %s%s", proxy? "(Proxy) " : "", bp);
1471     if ((blen -= strlen(bp)+1) < 0)
1472 	xmlGenericError(xmlGenericErrorContext,
1473 		"ERROR: overflowed buffer by %d bytes\n", -blen);
1474 #endif
1475     ctxt->outptr = ctxt->out = bp;
1476     ctxt->state = XML_NANO_HTTP_WRITE;
1477     blen = strlen( ctxt->out );
1478 #ifdef DEBUG_HTTP
1479     xmt_bytes = xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1480     if ( xmt_bytes != blen )
1481         xmlGenericError( xmlGenericErrorContext,
1482 			"xmlNanoHTTPMethodRedir:  Only %d of %d %s %s\n",
1483 			xmt_bytes, blen,
1484 			"bytes of HTTP headers sent to host",
1485 			ctxt->hostname );
1486 #else
1487     xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1488 #endif
1489 
1490     if ( input != NULL ) {
1491 #ifdef DEBUG_HTTP
1492         xmt_bytes = xmlNanoHTTPSend( ctxt, input, ilen );
1493 
1494 	if ( xmt_bytes != ilen )
1495 	    xmlGenericError( xmlGenericErrorContext,
1496 	    		"xmlNanoHTTPMethodRedir:  Only %d of %d %s %s\n",
1497 			xmt_bytes, ilen,
1498 			"bytes of HTTP content sent to host",
1499 			ctxt->hostname );
1500 #else
1501 	xmlNanoHTTPSend( ctxt, input, ilen );
1502 #endif
1503     }
1504 
1505     ctxt->state = XML_NANO_HTTP_READ;
1506 
1507     while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1508         if (*p == 0) {
1509 	    ctxt->content = ctxt->inrptr;
1510 	    xmlFree(p);
1511 	    break;
1512 	}
1513 	xmlNanoHTTPScanAnswer(ctxt, p);
1514 
1515 #ifdef DEBUG_HTTP
1516 	xmlGenericError(xmlGenericErrorContext, "<- %s\n", p);
1517 #endif
1518         xmlFree(p);
1519     }
1520 
1521     if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1522         (ctxt->returnValue < 400)) {
1523 #ifdef DEBUG_HTTP
1524 	xmlGenericError(xmlGenericErrorContext,
1525 		"\nRedirect to: %s\n", ctxt->location);
1526 #endif
1527 	while ( xmlNanoHTTPRecv(ctxt) > 0 ) ;
1528         if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1529 	    nbRedirects++;
1530 	    if (redirURL != NULL)
1531 		xmlFree(redirURL);
1532 	    redirURL = xmlMemStrdup(ctxt->location);
1533 	    xmlNanoHTTPFreeCtxt(ctxt);
1534 	    goto retry;
1535 	}
1536 	xmlNanoHTTPFreeCtxt(ctxt);
1537 	if (redirURL != NULL) xmlFree(redirURL);
1538 #ifdef DEBUG_HTTP
1539 	xmlGenericError(xmlGenericErrorContext,
1540 		"xmlNanoHTTPMethodRedir: Too many redirects, aborting ...\n");
1541 #endif
1542 	return(NULL);
1543     }
1544 
1545     if (contentType != NULL) {
1546 	if (ctxt->contentType != NULL)
1547 	    *contentType = xmlMemStrdup(ctxt->contentType);
1548 	else
1549 	    *contentType = NULL;
1550     }
1551 
1552     if ((redir != NULL) && (redirURL != NULL)) {
1553 	*redir = redirURL;
1554     } else {
1555 	if (redirURL != NULL)
1556 	    xmlFree(redirURL);
1557 	if (redir != NULL)
1558 	    *redir = NULL;
1559     }
1560 
1561 #ifdef DEBUG_HTTP
1562     if (ctxt->contentType != NULL)
1563 	xmlGenericError(xmlGenericErrorContext,
1564 		"\nCode %d, content-type '%s'\n\n",
1565 	       ctxt->returnValue, ctxt->contentType);
1566     else
1567 	xmlGenericError(xmlGenericErrorContext,
1568 		"\nCode %d, no content-type\n\n",
1569 	       ctxt->returnValue);
1570 #endif
1571 
1572     return((void *) ctxt);
1573 }
1574 
1575 /**
1576  * xmlNanoHTTPMethod:
1577  * @URL:  The URL to load
1578  * @method:  the HTTP method to use
1579  * @input:  the input string if any
1580  * @contentType:  the Content-Type information IN and OUT
1581  * @headers:  the extra headers
1582  * @ilen:  input length
1583  *
1584  * This function try to open a connection to the indicated resource
1585  * via HTTP using the given @method, adding the given extra headers
1586  * and the input buffer for the request content.
1587  *
1588  * Returns NULL in case of failure, otherwise a request handler.
1589  *     The contentType, if provided must be freed by the caller
1590  */
1591 
1592 void*
xmlNanoHTTPMethod(const char * URL,const char * method,const char * input,char ** contentType,const char * headers,int ilen)1593 xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
1594                   char **contentType, const char *headers, int ilen) {
1595     return(xmlNanoHTTPMethodRedir(URL, method, input, contentType,
1596 		                  NULL, headers, ilen));
1597 }
1598 
1599 /**
1600  * xmlNanoHTTPFetch:
1601  * @URL:  The URL to load
1602  * @filename:  the filename where the content should be saved
1603  * @contentType:  if available the Content-Type information will be
1604  *                returned at that location
1605  *
1606  * This function try to fetch the indicated resource via HTTP GET
1607  * and save it's content in the file.
1608  *
1609  * Returns -1 in case of failure, 0 incase of success. The contentType,
1610  *     if provided must be freed by the caller
1611  */
1612 int
xmlNanoHTTPFetch(const char * URL,const char * filename,char ** contentType)1613 xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1614     void *ctxt = NULL;
1615     char *buf = NULL;
1616     int fd;
1617     int len;
1618     int ret = 0;
1619 
1620     if (filename == NULL) return(-1);
1621     ctxt = xmlNanoHTTPOpen(URL, contentType);
1622     if (ctxt == NULL) return(-1);
1623 
1624     if (!strcmp(filename, "-"))
1625         fd = 0;
1626     else {
1627         fd = open(filename, O_CREAT | O_WRONLY, 00644);
1628 	if (fd < 0) {
1629 	    xmlNanoHTTPClose(ctxt);
1630 	    if ((contentType != NULL) && (*contentType != NULL)) {
1631 	        xmlFree(*contentType);
1632 		*contentType = NULL;
1633 	    }
1634 	    return(-1);
1635 	}
1636     }
1637 
1638     xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1639     if ( len > 0 ) {
1640 	if (write(fd, buf, len) == -1) {
1641 	    ret = -1;
1642 	}
1643     }
1644 
1645     xmlNanoHTTPClose(ctxt);
1646     close(fd);
1647     return(ret);
1648 }
1649 
1650 #ifdef LIBXML_OUTPUT_ENABLED
1651 /**
1652  * xmlNanoHTTPSave:
1653  * @ctxt:  the HTTP context
1654  * @filename:  the filename where the content should be saved
1655  *
1656  * This function saves the output of the HTTP transaction to a file
1657  * It closes and free the context at the end
1658  *
1659  * Returns -1 in case of failure, 0 incase of success.
1660  */
1661 int
xmlNanoHTTPSave(void * ctxt,const char * filename)1662 xmlNanoHTTPSave(void *ctxt, const char *filename) {
1663     char *buf = NULL;
1664     int fd;
1665     int len;
1666     int ret = 0;
1667 
1668     if ((ctxt == NULL) || (filename == NULL)) return(-1);
1669 
1670     if (!strcmp(filename, "-"))
1671         fd = 0;
1672     else {
1673         fd = open(filename, O_CREAT | O_WRONLY, 0666);
1674 	if (fd < 0) {
1675 	    xmlNanoHTTPClose(ctxt);
1676 	    return(-1);
1677 	}
1678     }
1679 
1680     xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1681     if ( len > 0 ) {
1682 	if (write(fd, buf, len) == -1) {
1683 	    ret = -1;
1684 	}
1685     }
1686 
1687     xmlNanoHTTPClose(ctxt);
1688     close(fd);
1689     return(ret);
1690 }
1691 #endif /* LIBXML_OUTPUT_ENABLED */
1692 
1693 /**
1694  * xmlNanoHTTPReturnCode:
1695  * @ctx:  the HTTP context
1696  *
1697  * Get the latest HTTP return code received
1698  *
1699  * Returns the HTTP return code for the request.
1700  */
1701 int
xmlNanoHTTPReturnCode(void * ctx)1702 xmlNanoHTTPReturnCode(void *ctx) {
1703     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1704 
1705     if (ctxt == NULL) return(-1);
1706 
1707     return(ctxt->returnValue);
1708 }
1709 
1710 /**
1711  * xmlNanoHTTPAuthHeader:
1712  * @ctx:  the HTTP context
1713  *
1714  * Get the authentication header of an HTTP context
1715  *
1716  * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate
1717  * header.
1718  */
1719 const char *
xmlNanoHTTPAuthHeader(void * ctx)1720 xmlNanoHTTPAuthHeader(void *ctx) {
1721     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1722 
1723     if (ctxt == NULL) return(NULL);
1724 
1725     return(ctxt->authHeader);
1726 }
1727 
1728 /**
1729  * xmlNanoHTTPContentLength:
1730  * @ctx:  the HTTP context
1731  *
1732  * Provides the specified content length from the HTTP header.
1733  *
1734  * Return the specified content length from the HTTP header.  Note that
1735  * a value of -1 indicates that the content length element was not included in
1736  * the response header.
1737  */
1738 int
xmlNanoHTTPContentLength(void * ctx)1739 xmlNanoHTTPContentLength( void * ctx ) {
1740     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1741 
1742     return ( ( ctxt == NULL ) ? -1 : ctxt->ContentLength );
1743 }
1744 
1745 /**
1746  * xmlNanoHTTPRedir:
1747  * @ctx:  the HTTP context
1748  *
1749  * Provides the specified redirection URL if available from the HTTP header.
1750  *
1751  * Return the specified redirection URL or NULL if not redirected.
1752  */
1753 const char *
xmlNanoHTTPRedir(void * ctx)1754 xmlNanoHTTPRedir( void * ctx ) {
1755     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1756 
1757     return ( ( ctxt == NULL ) ? NULL : ctxt->location );
1758 }
1759 
1760 /**
1761  * xmlNanoHTTPEncoding:
1762  * @ctx:  the HTTP context
1763  *
1764  * Provides the specified encoding if specified in the HTTP headers.
1765  *
1766  * Return the specified encoding or NULL if not available
1767  */
1768 const char *
xmlNanoHTTPEncoding(void * ctx)1769 xmlNanoHTTPEncoding( void * ctx ) {
1770     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1771 
1772     return ( ( ctxt == NULL ) ? NULL : ctxt->encoding );
1773 }
1774 
1775 /**
1776  * xmlNanoHTTPMimeType:
1777  * @ctx:  the HTTP context
1778  *
1779  * Provides the specified Mime-Type if specified in the HTTP headers.
1780  *
1781  * Return the specified Mime-Type or NULL if not available
1782  */
1783 const char *
xmlNanoHTTPMimeType(void * ctx)1784 xmlNanoHTTPMimeType( void * ctx ) {
1785     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1786 
1787     return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType );
1788 }
1789 
1790 /**
1791  * xmlNanoHTTPFetchContent:
1792  * @ctx:  the HTTP context
1793  * @ptr:  pointer to set to the content buffer.
1794  * @len:  integer pointer to hold the length of the content
1795  *
1796  * Check if all the content was read
1797  *
1798  * Returns 0 if all the content was read and available, returns
1799  * -1 if received content length was less than specified or an error
1800  * occurred.
1801  */
1802 static int
xmlNanoHTTPFetchContent(void * ctx,char ** ptr,int * len)1803 xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ) {
1804     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1805 
1806     int			rc = 0;
1807     int			cur_lgth;
1808     int			rcvd_lgth;
1809     int			dummy_int;
1810     char *		dummy_ptr = NULL;
1811 
1812     /*  Dummy up return input parameters if not provided  */
1813 
1814     if ( len == NULL )
1815         len = &dummy_int;
1816 
1817     if ( ptr == NULL )
1818         ptr = &dummy_ptr;
1819 
1820     /*  But can't work without the context pointer  */
1821 
1822     if ( ( ctxt == NULL ) || ( ctxt->content == NULL ) ) {
1823         *len = 0;
1824 	*ptr = NULL;
1825 	return ( -1 );
1826     }
1827 
1828     rcvd_lgth = ctxt->inptr - ctxt->content;
1829 
1830     while ( (cur_lgth = xmlNanoHTTPRecv( ctxt )) > 0 ) {
1831 
1832 	rcvd_lgth += cur_lgth;
1833 	if ( (ctxt->ContentLength > 0) && (rcvd_lgth >= ctxt->ContentLength) )
1834 	    break;
1835     }
1836 
1837     *ptr = ctxt->content;
1838     *len = rcvd_lgth;
1839 
1840     if ( ( ctxt->ContentLength > 0 ) && ( rcvd_lgth < ctxt->ContentLength ) )
1841         rc = -1;
1842     else if ( rcvd_lgth == 0 )
1843 	rc = -1;
1844 
1845     return ( rc );
1846 }
1847 
1848 #ifdef STANDALONE
main(int argc,char ** argv)1849 int main(int argc, char **argv) {
1850     char *contentType = NULL;
1851 
1852     if (argv[1] != NULL) {
1853 	if (argv[2] != NULL)
1854 	    xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1855         else
1856 	    xmlNanoHTTPFetch(argv[1], "-", &contentType);
1857 	if (contentType != NULL) xmlFree(contentType);
1858     } else {
1859         xmlGenericError(xmlGenericErrorContext,
1860 		"%s: minimal HTTP GET implementation\n", argv[0]);
1861         xmlGenericError(xmlGenericErrorContext,
1862 		"\tusage %s [ URL [ filename ] ]\n", argv[0]);
1863     }
1864     xmlNanoHTTPCleanup();
1865     xmlMemoryDump();
1866     return(0);
1867 }
1868 #endif /* STANDALONE */
1869 #else /* !LIBXML_HTTP_ENABLED */
1870 #ifdef STANDALONE
1871 #include <stdio.h>
main(int argc,char ** argv)1872 int main(int argc, char **argv) {
1873     xmlGenericError(xmlGenericErrorContext,
1874 	    "%s : HTTP support not compiled in\n", argv[0]);
1875     return(0);
1876 }
1877 #endif /* STANDALONE */
1878 #endif /* LIBXML_HTTP_ENABLED */
1879 #define bottom_nanohttp
1880 #include "elfgcchack.h"
1881