• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3  *             focuses on size, streamability, reentrancy and portability
4  *
5  * This is clearly not a general purpose HTTP implementation
6  * If you look for one, check:
7  *         http://www.w3.org/Library/
8  *
9  * See Copyright for the status of this software.
10  *
11  * daniel@veillard.com
12  */
13 
14 #define IN_LIBXML
15 #include "libxml.h"
16 
17 #ifdef LIBXML_HTTP_ENABLED
18 #include <string.h>
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <errno.h>
22 
23 #ifdef HAVE_UNISTD_H
24 #include <unistd.h>
25 #elif defined (_WIN32)
26 #include <io.h>
27 #endif
28 #ifdef HAVE_SYS_SOCKET_H
29 #include <sys/socket.h>
30 #endif
31 #ifdef HAVE_NETINET_IN_H
32 #include <netinet/in.h>
33 #endif
34 #ifdef HAVE_ARPA_INET_H
35 #include <arpa/inet.h>
36 #endif
37 #ifdef HAVE_NETDB_H
38 #include <netdb.h>
39 #endif
40 #ifdef HAVE_FCNTL_H
41 #include <fcntl.h>
42 #endif
43 #ifdef HAVE_SYS_TIME_H
44 #include <sys/time.h>
45 #endif
46 #ifndef HAVE_POLL_H
47 #ifdef HAVE_SYS_SELECT_H
48 #include <sys/select.h>
49 #endif
50 #else
51 #include <poll.h>
52 #endif
53 #ifdef LIBXML_ZLIB_ENABLED
54 #include <zlib.h>
55 #endif
56 
57 
58 #ifdef VMS
59 #include <stropts>
60 #define XML_SOCKLEN_T unsigned int
61 #endif
62 
63 #if defined(_WIN32)
64 #include <wsockcompat.h>
65 #endif
66 
67 #include <libxml/xmlerror.h>
68 #include <libxml/xmlmemory.h>
69 #include <libxml/parser.h> /* for xmlStr(n)casecmp() */
70 #include <libxml/nanohttp.h>
71 #include <libxml/uri.h>
72 
73 #include "private/error.h"
74 #include "private/io.h"
75 
76 /**
77  * A couple portability macros
78  */
79 #ifndef _WINSOCKAPI_
80 #define closesocket(s) close(s)
81 #define SOCKET int
82 #define INVALID_SOCKET (-1)
83 #endif
84 
85 #ifndef XML_SOCKLEN_T
86 #define XML_SOCKLEN_T unsigned int
87 #endif
88 
89 #define GETHOSTBYNAME_ARG_CAST (char *)
90 #define SEND_ARG2_CAST (char *)
91 
92 #ifdef STANDALONE
93 #define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n)
94 #define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b)
95 #endif
96 
97 #define XML_NANO_HTTP_MAX_REDIR	10
98 
99 #define XML_NANO_HTTP_CHUNK	4096
100 
101 #define XML_NANO_HTTP_CLOSED	0
102 #define XML_NANO_HTTP_WRITE	1
103 #define XML_NANO_HTTP_READ	2
104 #define XML_NANO_HTTP_NONE	4
105 
106 #define __xmlIOErr(domain, code, extra) ((void) 0)
107 
108 typedef struct xmlNanoHTTPCtxt {
109     char *protocol;	/* the protocol name */
110     char *hostname;	/* the host name */
111     int port;		/* the port */
112     char *path;		/* the path within the URL */
113     char *query;	/* the query string */
114     SOCKET fd;		/* the file descriptor for the socket */
115     int state;		/* WRITE / READ / CLOSED */
116     char *out;		/* buffer sent (zero terminated) */
117     char *outptr;	/* index within the buffer sent */
118     char *in;		/* the receiving buffer */
119     char *content;	/* the start of the content */
120     char *inptr;	/* the next byte to read from network */
121     char *inrptr;	/* the next byte to give back to the client */
122     int inlen;		/* len of the input buffer */
123     int last;		/* return code for last operation */
124     int returnValue;	/* the protocol return value */
125     int version;        /* the protocol version */
126     int ContentLength;  /* specified content length from HTTP header */
127     char *contentType;	/* the MIME type for the input */
128     char *location;	/* the new URL in case of redirect */
129     char *authHeader;	/* contents of {WWW,Proxy}-Authenticate header */
130     char *encoding;	/* encoding extracted from the contentType */
131     char *mimeType;	/* Mime-Type extracted from the contentType */
132 #ifdef LIBXML_ZLIB_ENABLED
133     z_stream *strm;	/* Zlib stream object */
134     int usesGzip;	/* "Content-Encoding: gzip" was detected */
135 #endif
136 } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
137 
138 static int initialized = 0;
139 static char *proxy = NULL;	 /* the proxy name if any */
140 static int proxyPort;	/* the proxy port if any */
141 static unsigned int timeout = 60;/* the select() timeout in seconds */
142 
143 static int xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len );
144 
145 /**
146  * xmlHTTPErrMemory:
147  * @extra:  extra information
148  *
149  * Handle an out of memory condition
150  */
151 static void
xmlHTTPErrMemory(void)152 xmlHTTPErrMemory(void)
153 {
154     xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_HTTP, NULL);
155 }
156 
157 /**
158  * A portability function
159  */
socket_errno(void)160 static int socket_errno(void) {
161 #ifdef _WINSOCKAPI_
162     int err = WSAGetLastError();
163     switch(err) {
164         case WSAECONNRESET:
165             return(ECONNRESET);
166         case WSAEINPROGRESS:
167             return(EINPROGRESS);
168         case WSAEINTR:
169             return(EINTR);
170         case WSAESHUTDOWN:
171             return(ESHUTDOWN);
172         case WSAEWOULDBLOCK:
173             return(EWOULDBLOCK);
174         default:
175             return(err);
176     }
177 #else
178     return(errno);
179 #endif
180 }
181 
182 /**
183  * xmlNanoHTTPInit:
184  *
185  * Initialize the HTTP protocol layer.
186  * Currently it just checks for proxy information
187  */
188 
189 void
xmlNanoHTTPInit(void)190 xmlNanoHTTPInit(void) {
191     const char *env;
192 #ifdef _WINSOCKAPI_
193     WSADATA wsaData;
194 #endif
195 
196     if (initialized)
197 	return;
198 
199 #ifdef _WINSOCKAPI_
200     if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0)
201 	return;
202 #endif
203 
204     if (proxy == NULL) {
205 	proxyPort = 80;
206 	env = getenv("no_proxy");
207 	if (env && ((env[0] == '*') && (env[1] == 0)))
208 	    goto done;
209 	env = getenv("http_proxy");
210 	if (env != NULL) {
211 	    xmlNanoHTTPScanProxy(env);
212 	    goto done;
213 	}
214 	env = getenv("HTTP_PROXY");
215 	if (env != NULL) {
216 	    xmlNanoHTTPScanProxy(env);
217 	    goto done;
218 	}
219     }
220 done:
221     initialized = 1;
222 }
223 
224 /**
225  * xmlNanoHTTPCleanup:
226  *
227  * Cleanup the HTTP protocol layer.
228  */
229 
230 void
xmlNanoHTTPCleanup(void)231 xmlNanoHTTPCleanup(void) {
232     if (proxy != NULL) {
233 	xmlFree(proxy);
234 	proxy = NULL;
235     }
236 #ifdef _WINSOCKAPI_
237     if (initialized)
238 	WSACleanup();
239 #endif
240     initialized = 0;
241     return;
242 }
243 
244 /**
245  * xmlNanoHTTPScanURL:
246  * @ctxt:  an HTTP context
247  * @URL:  The URL used to initialize the context
248  *
249  * (Re)Initialize an HTTP context by parsing the URL and finding
250  * the protocol host port and path it indicates.
251  */
252 
253 static void
xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt,const char * URL)254 xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
255     xmlURIPtr uri;
256     int len;
257 
258     /*
259      * Clear any existing data from the context
260      */
261     if (ctxt->protocol != NULL) {
262         xmlFree(ctxt->protocol);
263 	ctxt->protocol = NULL;
264     }
265     if (ctxt->hostname != NULL) {
266         xmlFree(ctxt->hostname);
267 	ctxt->hostname = NULL;
268     }
269     if (ctxt->path != NULL) {
270         xmlFree(ctxt->path);
271 	ctxt->path = NULL;
272     }
273     if (ctxt->query != NULL) {
274         xmlFree(ctxt->query);
275 	ctxt->query = NULL;
276     }
277     if (URL == NULL) return;
278 
279     uri = xmlParseURIRaw(URL, 1);
280     if (uri == NULL)
281 	return;
282 
283     if ((uri->scheme == NULL) || (uri->server == NULL)) {
284 	xmlFreeURI(uri);
285 	return;
286     }
287 
288     ctxt->protocol = xmlMemStrdup(uri->scheme);
289     /* special case of IPv6 addresses, the [] need to be removed */
290     if ((uri->server != NULL) && (*uri->server == '[')) {
291         len = strlen(uri->server);
292 	if ((len > 2) && (uri->server[len - 1] == ']')) {
293 	    ctxt->hostname = (char *) xmlCharStrndup(uri->server + 1, len -2);
294 	} else
295 	    ctxt->hostname = xmlMemStrdup(uri->server);
296     } else
297 	ctxt->hostname = xmlMemStrdup(uri->server);
298     if (uri->path != NULL)
299 	ctxt->path = xmlMemStrdup(uri->path);
300     else
301 	ctxt->path = xmlMemStrdup("/");
302     if (uri->query != NULL)
303 	ctxt->query = xmlMemStrdup(uri->query);
304     if (uri->port != 0)
305 	ctxt->port = uri->port;
306 
307     xmlFreeURI(uri);
308 }
309 
310 /**
311  * xmlNanoHTTPScanProxy:
312  * @URL:  The proxy URL used to initialize the proxy context
313  *
314  * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
315  * the protocol host port it indicates.
316  * Should be like http://myproxy/ or http://myproxy:3128/
317  * A NULL URL cleans up proxy information.
318  */
319 
320 void
xmlNanoHTTPScanProxy(const char * URL)321 xmlNanoHTTPScanProxy(const char *URL) {
322     xmlURIPtr uri;
323 
324     if (proxy != NULL) {
325         xmlFree(proxy);
326 	proxy = NULL;
327     }
328     proxyPort = 0;
329 
330     if (URL == NULL) return;
331 
332     uri = xmlParseURIRaw(URL, 1);
333     if ((uri == NULL) || (uri->scheme == NULL) ||
334 	(strcmp(uri->scheme, "http")) || (uri->server == NULL)) {
335 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n");
336 	if (uri != NULL)
337 	    xmlFreeURI(uri);
338 	return;
339     }
340 
341     proxy = xmlMemStrdup(uri->server);
342     if (uri->port != 0)
343 	proxyPort = uri->port;
344 
345     xmlFreeURI(uri);
346 }
347 
348 /**
349  * xmlNanoHTTPNewCtxt:
350  * @URL:  The URL used to initialize the context
351  *
352  * Allocate and initialize a new HTTP context.
353  *
354  * Returns an HTTP context or NULL in case of error.
355  */
356 
357 static xmlNanoHTTPCtxtPtr
xmlNanoHTTPNewCtxt(const char * URL)358 xmlNanoHTTPNewCtxt(const char *URL) {
359     xmlNanoHTTPCtxtPtr ret;
360 
361     ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
362     if (ret == NULL) {
363         xmlHTTPErrMemory();
364         return(NULL);
365     }
366 
367     memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
368     ret->port = 80;
369     ret->returnValue = 0;
370     ret->fd = INVALID_SOCKET;
371     ret->ContentLength = -1;
372 
373     xmlNanoHTTPScanURL(ret, URL);
374 
375     return(ret);
376 }
377 
378 /**
379  * xmlNanoHTTPFreeCtxt:
380  * @ctxt:  an HTTP context
381  *
382  * Frees the context after closing the connection.
383  */
384 
385 static void
xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt)386 xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
387     if (ctxt == NULL) return;
388     if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
389     if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
390     if (ctxt->path != NULL) xmlFree(ctxt->path);
391     if (ctxt->query != NULL) xmlFree(ctxt->query);
392     if (ctxt->out != NULL) xmlFree(ctxt->out);
393     if (ctxt->in != NULL) xmlFree(ctxt->in);
394     if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
395     if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
396     if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
397     if (ctxt->location != NULL) xmlFree(ctxt->location);
398     if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
399 #ifdef LIBXML_ZLIB_ENABLED
400     if (ctxt->strm != NULL) {
401 	inflateEnd(ctxt->strm);
402 	xmlFree(ctxt->strm);
403     }
404 #endif
405 
406     ctxt->state = XML_NANO_HTTP_NONE;
407     if (ctxt->fd != INVALID_SOCKET) closesocket(ctxt->fd);
408     ctxt->fd = INVALID_SOCKET;
409     xmlFree(ctxt);
410 }
411 
412 /**
413  * xmlNanoHTTPSend:
414  * @ctxt:  an HTTP context
415  *
416  * Send the input needed to initiate the processing on the server side
417  * Returns number of bytes sent or -1 on error.
418  */
419 
420 static int
xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt,const char * xmt_ptr,int outlen)421 xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt, const char *xmt_ptr, int outlen)
422 {
423     int total_sent = 0;
424 #ifdef HAVE_POLL_H
425     struct pollfd p;
426 #else
427     struct timeval tv;
428     fd_set wfd;
429 #endif
430 
431     if ((ctxt->state & XML_NANO_HTTP_WRITE) && (xmt_ptr != NULL)) {
432         while (total_sent < outlen) {
433             int nsent = send(ctxt->fd, SEND_ARG2_CAST (xmt_ptr + total_sent),
434                              outlen - total_sent, 0);
435 
436             if (nsent > 0)
437                 total_sent += nsent;
438             else if ((nsent == -1) &&
439 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
440                      (socket_errno() != EAGAIN) &&
441 #endif
442                      (socket_errno() != EWOULDBLOCK)) {
443                 __xmlIOErr(XML_FROM_HTTP, 0, "send failed\n");
444                 if (total_sent == 0)
445                     total_sent = -1;
446                 break;
447             } else {
448                 /*
449                  * No data sent
450                  * Since non-blocking sockets are used, wait for
451                  * socket to be writable or default timeout prior
452                  * to retrying.
453                  */
454 #ifndef HAVE_POLL_H
455 #ifndef _WINSOCKAPI_
456                 if (ctxt->fd > FD_SETSIZE)
457                     return -1;
458 #endif
459 
460                 tv.tv_sec = timeout;
461                 tv.tv_usec = 0;
462                 FD_ZERO(&wfd);
463 #ifdef _MSC_VER
464 #pragma warning(push)
465 #pragma warning(disable: 4018)
466 #endif
467                 FD_SET(ctxt->fd, &wfd);
468 #ifdef _MSC_VER
469 #pragma warning(pop)
470 #endif
471                 (void) select(ctxt->fd + 1, NULL, &wfd, NULL, &tv);
472 #else
473                 p.fd = ctxt->fd;
474                 p.events = POLLOUT;
475                 (void) poll(&p, 1, timeout * 1000);
476 #endif /* !HAVE_POLL_H */
477             }
478         }
479     }
480 
481     return total_sent;
482 }
483 
484 /**
485  * xmlNanoHTTPRecv:
486  * @ctxt:  an HTTP context
487  *
488  * Read information coming from the HTTP connection.
489  * This is a blocking call (but it blocks in select(), not read()).
490  *
491  * Returns the number of byte read or -1 in case of error.
492  */
493 
494 static int
xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)495 xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)
496 {
497 #ifdef HAVE_POLL_H
498     struct pollfd p;
499 #else
500     fd_set rfd;
501     struct timeval tv;
502 #endif
503 
504 
505     while (ctxt->state & XML_NANO_HTTP_READ) {
506         if (ctxt->in == NULL) {
507             ctxt->in = (char *) xmlMallocAtomic(65000);
508             if (ctxt->in == NULL) {
509                 xmlHTTPErrMemory();
510                 ctxt->last = -1;
511                 return (-1);
512             }
513             ctxt->inlen = 65000;
514             ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
515         }
516         if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
517             int delta = ctxt->inrptr - ctxt->in;
518             int len = ctxt->inptr - ctxt->inrptr;
519 
520             memmove(ctxt->in, ctxt->inrptr, len);
521             ctxt->inrptr -= delta;
522             ctxt->content -= delta;
523             ctxt->inptr -= delta;
524         }
525         if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
526             int d_inptr = ctxt->inptr - ctxt->in;
527             int d_content = ctxt->content - ctxt->in;
528             int d_inrptr = ctxt->inrptr - ctxt->in;
529             char *tmp_ptr = ctxt->in;
530 
531             ctxt->inlen *= 2;
532             ctxt->in = (char *) xmlRealloc(tmp_ptr, ctxt->inlen);
533             if (ctxt->in == NULL) {
534                 xmlHTTPErrMemory();
535                 xmlFree(tmp_ptr);
536                 ctxt->last = -1;
537                 return (-1);
538             }
539             ctxt->inptr = ctxt->in + d_inptr;
540             ctxt->content = ctxt->in + d_content;
541             ctxt->inrptr = ctxt->in + d_inrptr;
542         }
543         ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0);
544         if (ctxt->last > 0) {
545             ctxt->inptr += ctxt->last;
546             return (ctxt->last);
547         }
548         if (ctxt->last == 0) {
549             return (0);
550         }
551         if (ctxt->last == -1) {
552             switch (socket_errno()) {
553                 case EINPROGRESS:
554                 case EWOULDBLOCK:
555 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
556                 case EAGAIN:
557 #endif
558                     break;
559 
560                 case ECONNRESET:
561                 case ESHUTDOWN:
562                     return (0);
563 
564                 default:
565                     __xmlIOErr(XML_FROM_HTTP, 0, "recv failed\n");
566                     return (-1);
567             }
568         }
569 #ifdef HAVE_POLL_H
570         p.fd = ctxt->fd;
571         p.events = POLLIN;
572         if ((poll(&p, 1, timeout * 1000) < 1)
573 #if defined(EINTR)
574             && (errno != EINTR)
575 #endif
576             )
577             return (0);
578 #else /* !HAVE_POLL_H */
579 #ifndef _WINSOCKAPI_
580         if (ctxt->fd > FD_SETSIZE)
581             return 0;
582 #endif
583 
584         tv.tv_sec = timeout;
585         tv.tv_usec = 0;
586         FD_ZERO(&rfd);
587 
588 #ifdef _MSC_VER
589 #pragma warning(push)
590 #pragma warning(disable: 4018)
591 #endif
592 
593         FD_SET(ctxt->fd, &rfd);
594 
595 #ifdef _MSC_VER
596 #pragma warning(pop)
597 #endif
598 
599         if ((select(ctxt->fd + 1, &rfd, NULL, NULL, &tv) < 1)
600 #if defined(EINTR)
601             && (socket_errno() != EINTR)
602 #endif
603             )
604             return (0);
605 #endif /* !HAVE_POLL_H */
606     }
607     return (0);
608 }
609 
610 /**
611  * xmlNanoHTTPReadLine:
612  * @ctxt:  an HTTP context
613  *
614  * Read one line in the HTTP server output, usually for extracting
615  * the HTTP protocol information from the answer header.
616  *
617  * Returns a newly allocated string with a copy of the line, or NULL
618  *         which indicate the end of the input.
619  */
620 
621 static char *
xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt)622 xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
623     char buf[4096];
624     char *bp = buf;
625     int	rc;
626 
627     while (bp - buf < 4095) {
628 	if (ctxt->inrptr == ctxt->inptr) {
629 	    if ( (rc = xmlNanoHTTPRecv(ctxt)) == 0) {
630 		if (bp == buf)
631 		    return(NULL);
632 		else
633 		    *bp = 0;
634 		return(xmlMemStrdup(buf));
635 	    }
636 	    else if ( rc == -1 ) {
637 	        return ( NULL );
638 	    }
639 	}
640 	*bp = *ctxt->inrptr++;
641 	if (*bp == '\n') {
642 	    *bp = 0;
643 	    return(xmlMemStrdup(buf));
644 	}
645 	if (*bp != '\r')
646 	    bp++;
647     }
648     buf[4095] = 0;
649     return(xmlMemStrdup(buf));
650 }
651 
652 
653 /**
654  * xmlNanoHTTPScanAnswer:
655  * @ctxt:  an HTTP context
656  * @line:  an HTTP header line
657  *
658  * Try to extract useful information from the server answer.
659  * We currently parse and process:
660  *  - The HTTP revision/ return code
661  *  - The Content-Type, Mime-Type and charset used
662  *  - The Location for redirect processing.
663  *
664  * Returns -1 in case of failure, the file descriptor number otherwise
665  */
666 
667 static void
xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt,const char * line)668 xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
669     const char *cur = line;
670 
671     if (line == NULL) return;
672 
673     if (!strncmp(line, "HTTP/", 5)) {
674         int version = 0;
675 	int ret = 0;
676 
677 	cur += 5;
678 	while ((*cur >= '0') && (*cur <= '9')) {
679 	    version *= 10;
680 	    version += *cur - '0';
681 	    cur++;
682 	}
683 	if (*cur == '.') {
684 	    cur++;
685 	    if ((*cur >= '0') && (*cur <= '9')) {
686 		version *= 10;
687 		version += *cur - '0';
688 		cur++;
689 	    }
690 	    while ((*cur >= '0') && (*cur <= '9'))
691 		cur++;
692 	} else
693 	    version *= 10;
694 	if ((*cur != ' ') && (*cur != '\t')) return;
695 	while ((*cur == ' ') || (*cur == '\t')) cur++;
696 	if ((*cur < '0') || (*cur > '9')) return;
697 	while ((*cur >= '0') && (*cur <= '9')) {
698 	    ret *= 10;
699 	    ret += *cur - '0';
700 	    cur++;
701 	}
702 	if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
703 	ctxt->returnValue = ret;
704         ctxt->version = version;
705     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
706         const xmlChar *charset, *last, *mime;
707         cur += 13;
708 	while ((*cur == ' ') || (*cur == '\t')) cur++;
709 	if (ctxt->contentType != NULL)
710 	    xmlFree(ctxt->contentType);
711 	ctxt->contentType = xmlMemStrdup(cur);
712 	mime = (const xmlChar *) cur;
713 	last = mime;
714 	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
715 	       (*last != ';') && (*last != ','))
716 	    last++;
717 	if (ctxt->mimeType != NULL)
718 	    xmlFree(ctxt->mimeType);
719 	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
720 	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
721 	if (charset != NULL) {
722 	    charset += 8;
723 	    last = charset;
724 	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
725 	           (*last != ';') && (*last != ','))
726 		last++;
727 	    if (ctxt->encoding != NULL)
728 	        xmlFree(ctxt->encoding);
729 	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
730 	}
731     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
732         const xmlChar *charset, *last, *mime;
733         cur += 12;
734 	if (ctxt->contentType != NULL) return;
735 	while ((*cur == ' ') || (*cur == '\t')) cur++;
736 	ctxt->contentType = xmlMemStrdup(cur);
737 	mime = (const xmlChar *) cur;
738 	last = mime;
739 	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
740 	       (*last != ';') && (*last != ','))
741 	    last++;
742 	if (ctxt->mimeType != NULL)
743 	    xmlFree(ctxt->mimeType);
744 	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
745 	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
746 	if (charset != NULL) {
747 	    charset += 8;
748 	    last = charset;
749 	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
750 	           (*last != ';') && (*last != ','))
751 		last++;
752 	    if (ctxt->encoding != NULL)
753 	        xmlFree(ctxt->encoding);
754 	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
755 	}
756     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
757         cur += 9;
758 	while ((*cur == ' ') || (*cur == '\t')) cur++;
759 	if (ctxt->location != NULL)
760 	    xmlFree(ctxt->location);
761 	if (*cur == '/') {
762 	    xmlChar *tmp_http = xmlStrdup(BAD_CAST "http://");
763 	    xmlChar *tmp_loc =
764 	        xmlStrcat(tmp_http, (const xmlChar *) ctxt->hostname);
765 	    ctxt->location =
766 	        (char *) xmlStrcat (tmp_loc, (const xmlChar *) cur);
767 	} else {
768 	    ctxt->location = xmlMemStrdup(cur);
769 	}
770     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) {
771         cur += 17;
772 	while ((*cur == ' ') || (*cur == '\t')) cur++;
773 	if (ctxt->authHeader != NULL)
774 	    xmlFree(ctxt->authHeader);
775 	ctxt->authHeader = xmlMemStrdup(cur);
776     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) {
777         cur += 19;
778 	while ((*cur == ' ') || (*cur == '\t')) cur++;
779 	if (ctxt->authHeader != NULL)
780 	    xmlFree(ctxt->authHeader);
781 	ctxt->authHeader = xmlMemStrdup(cur);
782 #ifdef LIBXML_ZLIB_ENABLED
783     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) {
784 	cur += 17;
785 	while ((*cur == ' ') || (*cur == '\t')) cur++;
786 	if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) {
787 	    ctxt->usesGzip = 1;
788 
789 	    ctxt->strm = xmlMalloc(sizeof(z_stream));
790 
791 	    if (ctxt->strm != NULL) {
792 		ctxt->strm->zalloc = Z_NULL;
793 		ctxt->strm->zfree = Z_NULL;
794 		ctxt->strm->opaque = Z_NULL;
795 		ctxt->strm->avail_in = 0;
796 		ctxt->strm->next_in = Z_NULL;
797 
798 		inflateInit2( ctxt->strm, 31 );
799 	    }
800 	}
801 #endif
802     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) {
803 	cur += 15;
804 	ctxt->ContentLength = strtol( cur, NULL, 10 );
805     }
806 }
807 
808 /**
809  * xmlNanoHTTPConnectAttempt:
810  * @addr:  a socket address structure
811  *
812  * Attempt a connection to the given IP:port endpoint. It forces
813  * non-blocking semantic on the socket, and allow 60 seconds for
814  * the host to answer.
815  *
816  * Returns -1 in case of failure, the file descriptor number otherwise
817  */
818 
819 static SOCKET
xmlNanoHTTPConnectAttempt(struct sockaddr * addr)820 xmlNanoHTTPConnectAttempt(struct sockaddr *addr)
821 {
822 #ifndef HAVE_POLL_H
823     fd_set wfd;
824 #ifdef _WINSOCKAPI_
825     fd_set xfd;
826 #endif
827     struct timeval tv;
828 #else /* !HAVE_POLL_H */
829     struct pollfd p;
830 #endif /* !HAVE_POLL_H */
831     int status;
832 
833     int addrlen;
834 
835     SOCKET s;
836 
837 #ifdef SUPPORT_IP6
838     if (addr->sa_family == AF_INET6) {
839         s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
840         addrlen = sizeof(struct sockaddr_in6);
841     } else
842 #endif
843     {
844         s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
845         addrlen = sizeof(struct sockaddr_in);
846     }
847     if (s == INVALID_SOCKET) {
848         __xmlIOErr(XML_FROM_HTTP, 0, "socket failed\n");
849         return INVALID_SOCKET;
850     }
851 #ifdef _WINSOCKAPI_
852     {
853         u_long one = 1;
854 
855         status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
856     }
857 #else /* _WINSOCKAPI_ */
858 #if defined(VMS)
859     {
860         int enable = 1;
861 
862         status = ioctl(s, FIONBIO, &enable);
863     }
864 #else /* VMS */
865     if ((status = fcntl(s, F_GETFL, 0)) != -1) {
866 #ifdef O_NONBLOCK
867         status |= O_NONBLOCK;
868 #else /* O_NONBLOCK */
869 #ifdef F_NDELAY
870         status |= F_NDELAY;
871 #endif /* F_NDELAY */
872 #endif /* !O_NONBLOCK */
873         status = fcntl(s, F_SETFL, status);
874     }
875     if (status < 0) {
876         __xmlIOErr(XML_FROM_HTTP, 0, "error setting non-blocking IO\n");
877         closesocket(s);
878         return INVALID_SOCKET;
879     }
880 #endif /* !VMS */
881 #endif /* !_WINSOCKAPI_ */
882 
883     if (connect(s, addr, addrlen) == -1) {
884         switch (socket_errno()) {
885             case EINPROGRESS:
886             case EWOULDBLOCK:
887                 break;
888             default:
889                 __xmlIOErr(XML_FROM_HTTP, 0,
890                            "error connecting to HTTP server");
891                 closesocket(s);
892                 return INVALID_SOCKET;
893         }
894     }
895 #ifndef HAVE_POLL_H
896     tv.tv_sec = timeout;
897     tv.tv_usec = 0;
898 
899 #ifdef _MSC_VER
900 #pragma warning(push)
901 #pragma warning(disable: 4018)
902 #endif
903 #ifndef _WINSOCKAPI_
904     if (s > FD_SETSIZE)
905         return INVALID_SOCKET;
906 #endif
907     FD_ZERO(&wfd);
908     FD_SET(s, &wfd);
909 
910 #ifdef _WINSOCKAPI_
911     FD_ZERO(&xfd);
912     FD_SET(s, &xfd);
913 
914     switch (select(s + 1, NULL, &wfd, &xfd, &tv))
915 #else
916     switch (select(s + 1, NULL, &wfd, NULL, &tv))
917 #endif
918 #ifdef _MSC_VER
919 #pragma warning(pop)
920 #endif
921 
922 #else /* !HAVE_POLL_H */
923     p.fd = s;
924     p.events = POLLOUT;
925     switch (poll(&p, 1, timeout * 1000))
926 #endif /* !HAVE_POLL_H */
927 
928     {
929         case 0:
930             /* Time out */
931             __xmlIOErr(XML_FROM_HTTP, 0, "Connect attempt timed out");
932             closesocket(s);
933             return INVALID_SOCKET;
934         case -1:
935             /* Ermm.. ?? */
936             __xmlIOErr(XML_FROM_HTTP, 0, "Connect failed");
937             closesocket(s);
938             return INVALID_SOCKET;
939     }
940 
941 #ifndef HAVE_POLL_H
942     if (FD_ISSET(s, &wfd)
943 #ifdef _WINSOCKAPI_
944         || FD_ISSET(s, &xfd)
945 #endif
946         )
947 #else /* !HAVE_POLL_H */
948     if (p.revents == POLLOUT)
949 #endif /* !HAVE_POLL_H */
950     {
951         XML_SOCKLEN_T len;
952 
953         len = sizeof(status);
954 #ifdef SO_ERROR
955         if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &status, &len) <
956             0) {
957             /* Solaris error code */
958             __xmlIOErr(XML_FROM_HTTP, 0, "getsockopt failed\n");
959             closesocket(s);
960             return INVALID_SOCKET;
961         }
962 #endif
963         if (status) {
964             __xmlIOErr(XML_FROM_HTTP, 0,
965                        "Error connecting to remote host");
966             closesocket(s);
967             errno = status;
968             return INVALID_SOCKET;
969         }
970     } else {
971         /* pbm */
972         __xmlIOErr(XML_FROM_HTTP, 0, "select failed\n");
973         closesocket(s);
974         return INVALID_SOCKET;
975     }
976 
977     return (s);
978 }
979 
980 /**
981  * xmlNanoHTTPConnectHost:
982  * @host:  the host name
983  * @port:  the port number
984  *
985  * Attempt a connection to the given host:port endpoint. It tries
986  * the multiple IP provided by the DNS if available.
987  *
988  * Returns -1 in case of failure, the file descriptor number otherwise
989  */
990 
991 static SOCKET
xmlNanoHTTPConnectHost(const char * host,int port)992 xmlNanoHTTPConnectHost(const char *host, int port)
993 {
994     struct sockaddr *addr = NULL;
995     struct sockaddr_in sockin;
996 
997 #ifdef SUPPORT_IP6
998     struct sockaddr_in6 sockin6;
999 #endif
1000     SOCKET s;
1001 
1002     memset (&sockin, 0, sizeof(sockin));
1003 
1004 #if defined(SUPPORT_IP6)
1005     {
1006 	int status;
1007 	struct addrinfo hints, *res, *result;
1008 
1009         memset (&sockin6, 0, sizeof(sockin6));
1010 
1011 	result = NULL;
1012 	memset (&hints, 0,sizeof(hints));
1013 	hints.ai_socktype = SOCK_STREAM;
1014 
1015 	status = getaddrinfo (host, NULL, &hints, &result);
1016 	if (status) {
1017 	    __xmlIOErr(XML_FROM_HTTP, 0, "getaddrinfo failed\n");
1018 	    return INVALID_SOCKET;
1019 	}
1020 
1021 	for (res = result; res; res = res->ai_next) {
1022 	    if (res->ai_family == AF_INET) {
1023 		if ((size_t)res->ai_addrlen > sizeof(sockin)) {
1024 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1025 		    freeaddrinfo (result);
1026 		    return INVALID_SOCKET;
1027 		}
1028 		memcpy (&sockin, res->ai_addr, res->ai_addrlen);
1029 		sockin.sin_port = htons (port);
1030 		addr = (struct sockaddr *)&sockin;
1031 	    } else if (res->ai_family == AF_INET6) {
1032 		if ((size_t)res->ai_addrlen > sizeof(sockin6)) {
1033 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1034 		    freeaddrinfo (result);
1035 		    return INVALID_SOCKET;
1036 		}
1037 		memcpy (&sockin6, res->ai_addr, res->ai_addrlen);
1038 		sockin6.sin6_port = htons (port);
1039 		addr = (struct sockaddr *)&sockin6;
1040 	    } else
1041 		continue;              /* for */
1042 
1043 	    s = xmlNanoHTTPConnectAttempt (addr);
1044 	    if (s != INVALID_SOCKET) {
1045 		freeaddrinfo (result);
1046 		return (s);
1047 	    }
1048 	}
1049 
1050 	if (result)
1051 	    freeaddrinfo (result);
1052     }
1053 #else
1054     {
1055         struct hostent *h;
1056         struct in_addr ia;
1057         int i;
1058 
1059 	h = gethostbyname (GETHOSTBYNAME_ARG_CAST host);
1060 	if (h == NULL) {
1061 
1062 /*
1063  * Okay, I got fed up by the non-portability of this error message
1064  * extraction code. it work on Linux, if it work on your platform
1065  * and one want to enable it, send me the defined(foobar) needed
1066  */
1067 #if defined(HAVE_NETDB_H) && defined(HOST_NOT_FOUND) && defined(__linux__)
1068 	    const char *h_err_txt = "";
1069 
1070 	    switch (h_errno) {
1071 		case HOST_NOT_FOUND:
1072 		    h_err_txt = "Authoritative host not found";
1073 		    break;
1074 
1075 		case TRY_AGAIN:
1076 		    h_err_txt =
1077 			"Non-authoritative host not found or server failure.";
1078 		    break;
1079 
1080 		case NO_RECOVERY:
1081 		    h_err_txt =
1082 			"Non-recoverable errors:  FORMERR, REFUSED, or NOTIMP.";
1083 		    break;
1084 
1085 #ifdef NO_ADDRESS
1086 		case NO_ADDRESS:
1087 		    h_err_txt =
1088 			"Valid name, no data record of requested type.";
1089 		    break;
1090 #endif
1091 
1092 		default:
1093 		    h_err_txt = "No error text defined.";
1094 		    break;
1095 	    }
1096 	    __xmlIOErr(XML_FROM_HTTP, 0, h_err_txt);
1097 #else
1098 	    __xmlIOErr(XML_FROM_HTTP, 0, "Failed to resolve host");
1099 #endif
1100 	    return INVALID_SOCKET;
1101 	}
1102 
1103 	for (i = 0; h->h_addr_list[i]; i++) {
1104 	    if (h->h_addrtype == AF_INET) {
1105 		/* A records (IPv4) */
1106 		if ((unsigned int) h->h_length > sizeof(ia)) {
1107 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1108 		    return INVALID_SOCKET;
1109 		}
1110 		memcpy (&ia, h->h_addr_list[i], h->h_length);
1111 		sockin.sin_family = h->h_addrtype;
1112 		sockin.sin_addr = ia;
1113 		sockin.sin_port = (unsigned short)htons ((unsigned short)port);
1114 		addr = (struct sockaddr *) &sockin;
1115 	    } else
1116 		break;              /* for */
1117 
1118 	    s = xmlNanoHTTPConnectAttempt (addr);
1119 	    if (s != INVALID_SOCKET)
1120 		return (s);
1121 	}
1122     }
1123 #endif
1124 
1125     return INVALID_SOCKET;
1126 }
1127 
1128 
1129 /**
1130  * xmlNanoHTTPOpen:
1131  * @URL:  The URL to load
1132  * @contentType:  if available the Content-Type information will be
1133  *                returned at that location
1134  *
1135  * This function try to open a connection to the indicated resource
1136  * via HTTP GET.
1137  *
1138  * Returns NULL in case of failure, otherwise a request handler.
1139  *     The contentType, if provided must be freed by the caller
1140  */
1141 
1142 void*
xmlNanoHTTPOpen(const char * URL,char ** contentType)1143 xmlNanoHTTPOpen(const char *URL, char **contentType) {
1144     if (contentType != NULL) *contentType = NULL;
1145     return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, 0));
1146 }
1147 
1148 /**
1149  * xmlNanoHTTPOpenRedir:
1150  * @URL:  The URL to load
1151  * @contentType:  if available the Content-Type information will be
1152  *                returned at that location
1153  * @redir: if available the redirected URL will be returned
1154  *
1155  * This function try to open a connection to the indicated resource
1156  * via HTTP GET.
1157  *
1158  * Returns NULL in case of failure, otherwise a request handler.
1159  *     The contentType, if provided must be freed by the caller
1160  */
1161 
1162 void*
xmlNanoHTTPOpenRedir(const char * URL,char ** contentType,char ** redir)1163 xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) {
1164     if (contentType != NULL) *contentType = NULL;
1165     if (redir != NULL) *redir = NULL;
1166     return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL,0));
1167 }
1168 
1169 /**
1170  * xmlNanoHTTPRead:
1171  * @ctx:  the HTTP context
1172  * @dest:  a buffer
1173  * @len:  the buffer length
1174  *
1175  * This function tries to read @len bytes from the existing HTTP connection
1176  * and saves them in @dest. This is a blocking call.
1177  *
1178  * Returns the number of byte read. 0 is an indication of an end of connection.
1179  *         -1 indicates a parameter error.
1180  */
1181 int
xmlNanoHTTPRead(void * ctx,void * dest,int len)1182 xmlNanoHTTPRead(void *ctx, void *dest, int len) {
1183     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1184 #ifdef LIBXML_ZLIB_ENABLED
1185     int bytes_read = 0;
1186     int orig_avail_in;
1187     int z_ret;
1188 #endif
1189 
1190     if (ctx == NULL) return(-1);
1191     if (dest == NULL) return(-1);
1192     if (len <= 0) return(0);
1193 
1194 #ifdef LIBXML_ZLIB_ENABLED
1195     if (ctxt->usesGzip == 1) {
1196         if (ctxt->strm == NULL) return(0);
1197 
1198         ctxt->strm->next_out = dest;
1199         ctxt->strm->avail_out = len;
1200 	ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr;
1201 
1202         while (ctxt->strm->avail_out > 0 &&
1203 	       (ctxt->strm->avail_in > 0 || xmlNanoHTTPRecv(ctxt) > 0)) {
1204             orig_avail_in = ctxt->strm->avail_in =
1205 			    ctxt->inptr - ctxt->inrptr - bytes_read;
1206             ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read);
1207 
1208             z_ret = inflate(ctxt->strm, Z_NO_FLUSH);
1209             bytes_read += orig_avail_in - ctxt->strm->avail_in;
1210 
1211             if (z_ret != Z_OK) break;
1212 	}
1213 
1214         ctxt->inrptr += bytes_read;
1215         return(len - ctxt->strm->avail_out);
1216     }
1217 #endif
1218 
1219     while (ctxt->inptr - ctxt->inrptr < len) {
1220         if (xmlNanoHTTPRecv(ctxt) <= 0) break;
1221     }
1222     if (ctxt->inptr - ctxt->inrptr < len)
1223         len = ctxt->inptr - ctxt->inrptr;
1224     if (len > 0) {
1225         memcpy(dest, ctxt->inrptr, len);
1226         ctxt->inrptr += len;
1227     }
1228     return(len);
1229 }
1230 
1231 /**
1232  * xmlNanoHTTPClose:
1233  * @ctx:  the HTTP context
1234  *
1235  * This function closes an HTTP context, it ends up the connection and
1236  * free all data related to it.
1237  */
1238 void
xmlNanoHTTPClose(void * ctx)1239 xmlNanoHTTPClose(void *ctx) {
1240     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1241 
1242     if (ctx == NULL) return;
1243 
1244     xmlNanoHTTPFreeCtxt(ctxt);
1245 }
1246 
1247 
1248 /**
1249  * xmlNanoHTTPHostnameMatch:
1250  * @pattern: The pattern as it appears in no_proxy environment variable
1251  * @hostname: The hostname to test as it appears in the URL
1252  *
1253  * This function tests whether a given hostname matches a pattern. The pattern
1254  * usually is a token from the no_proxy environment variable. Wildcards in the
1255  * pattern are not supported.
1256  *
1257  * Returns true, iff hostname matches the pattern.
1258  */
1259 
1260 static int
xmlNanoHTTPHostnameMatch(const char * pattern,const char * hostname)1261 xmlNanoHTTPHostnameMatch(const char *pattern, const char *hostname) {
1262     int idx_pattern, idx_hostname;
1263     const char * pattern_start;
1264 
1265     if (!pattern || *pattern == '\0' || !hostname)
1266 	return 0;
1267 
1268     /* Ignore trailing '.' */
1269     if (*pattern == '.') {
1270         idx_pattern = strlen(pattern) -1;
1271         pattern_start = pattern + 1;
1272     }
1273     else {
1274         idx_pattern = strlen(pattern);
1275         pattern_start = pattern;
1276     }
1277     idx_hostname = strlen(hostname);
1278 
1279     for (; idx_pattern >= 0 && idx_hostname >= 0;
1280            --idx_pattern, --idx_hostname) {
1281 	if (tolower(pattern_start[idx_pattern]) != tolower(hostname[idx_hostname]))
1282 	    break;
1283     }
1284 
1285     return idx_pattern == -1 && (idx_hostname == -1|| hostname[idx_hostname] == '.');
1286 }
1287 
1288 
1289 /**
1290  * xmlNanoHTTPBypassProxy:
1291  * @hostname: The hostname as it appears in the URL
1292  *
1293  * This function evaluates the no_proxy environment variable and returns
1294  * whether the proxy server should be bypassed for a given host.
1295  *
1296  * Returns true, iff a proxy server should be bypassed for the given hostname.
1297  */
1298 
1299 static int
xmlNanoHTTPBypassProxy(const char * hostname)1300 xmlNanoHTTPBypassProxy(const char *hostname) {
1301     size_t envlen;
1302     char *env = getenv("no_proxy"), *cpy=NULL, *p=NULL;
1303     if (!env)
1304 	return 0;
1305 
1306     /* (Avoid strdup because it's not portable.) */
1307     envlen = strlen(env) + 1;
1308     cpy = xmlMalloc(envlen);
1309     memcpy(cpy, env, envlen);
1310     env = cpy;
1311 
1312     /* The remainder of the function is basically a tokenizing: */
1313     while (isspace(*env))
1314     	++env;
1315     if (*env == '\0') {
1316     	xmlFree(cpy);
1317 	return 0;
1318     }
1319 
1320     p = env;
1321     while (*env) {
1322 
1323     	if (*env != ',') {
1324 	    ++env;
1325 	    continue;
1326 	}
1327 
1328 	*(env++) = '\0';
1329 	if (xmlNanoHTTPHostnameMatch(p, hostname)) {
1330 	    xmlFree(cpy);
1331 	    return 1;
1332 	}
1333 
1334 	while (isspace(*env))
1335 	    ++env;
1336 	p = env;
1337     }
1338     if (xmlNanoHTTPHostnameMatch(p, hostname)) {
1339     	xmlFree(cpy);
1340     	return 1;
1341     }
1342 
1343     xmlFree(cpy);
1344     return 0;
1345 }
1346 
1347 
1348 /**
1349  * xmlNanoHTTPMethodRedir:
1350  * @URL:  The URL to load
1351  * @method:  the HTTP method to use
1352  * @input:  the input string if any
1353  * @contentType:  the Content-Type information IN and OUT
1354  * @redir:  the redirected URL OUT
1355  * @headers:  the extra headers
1356  * @ilen:  input length
1357  *
1358  * This function try to open a connection to the indicated resource
1359  * via HTTP using the given @method, adding the given extra headers
1360  * and the input buffer for the request content.
1361  *
1362  * Returns NULL in case of failure, otherwise a request handler.
1363  *     The contentType, or redir, if provided must be freed by the caller
1364  */
1365 
1366 void*
xmlNanoHTTPMethodRedir(const char * URL,const char * method,const char * input,char ** contentType,char ** redir,const char * headers,int ilen)1367 xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input,
1368                   char **contentType, char **redir,
1369 		  const char *headers, int ilen ) {
1370     xmlNanoHTTPCtxtPtr ctxt;
1371     char *bp, *p;
1372     int blen;
1373     SOCKET ret;
1374     int nbRedirects = 0;
1375     int use_proxy;
1376     char *redirURL = NULL;
1377 
1378     if (URL == NULL) return(NULL);
1379     if (method == NULL) method = "GET";
1380     xmlNanoHTTPInit();
1381 
1382 retry:
1383     if (redirURL == NULL) {
1384 	ctxt = xmlNanoHTTPNewCtxt(URL);
1385 	if (ctxt == NULL)
1386 	    return(NULL);
1387     } else {
1388 	ctxt = xmlNanoHTTPNewCtxt(redirURL);
1389 	if (ctxt == NULL)
1390 	    return(NULL);
1391 	ctxt->location = xmlMemStrdup(redirURL);
1392     }
1393 
1394     if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
1395 	__xmlIOErr(XML_FROM_IO, XML_IO_UNSUPPORTED_PROTOCOL, ctxt->protocol);
1396         xmlNanoHTTPFreeCtxt(ctxt);
1397 	if (redirURL != NULL) xmlFree(redirURL);
1398         return(NULL);
1399     }
1400     if (ctxt->hostname == NULL) {
1401 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_UNKNOWN_HOST,
1402 	           "Failed to identify host in URI");
1403         xmlNanoHTTPFreeCtxt(ctxt);
1404 	if (redirURL != NULL) xmlFree(redirURL);
1405         return(NULL);
1406     }
1407     use_proxy = proxy && !xmlNanoHTTPBypassProxy(ctxt->hostname);
1408     if (use_proxy) {
1409 	blen = strlen(ctxt->hostname) * 2 + 16;
1410 	ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
1411     }
1412     else {
1413 	blen = strlen(ctxt->hostname);
1414 	ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
1415     }
1416     if (ret == INVALID_SOCKET) {
1417         xmlNanoHTTPFreeCtxt(ctxt);
1418 	if (redirURL != NULL) xmlFree(redirURL);
1419         return(NULL);
1420     }
1421     ctxt->fd = ret;
1422 
1423     if (input == NULL)
1424 	ilen = 0;
1425     else
1426 	blen += 36;
1427 
1428     if (headers != NULL)
1429 	blen += strlen(headers) + 2;
1430     if (contentType && *contentType)
1431 	/* reserve for string plus 'Content-Type: \r\n" */
1432 	blen += strlen(*contentType) + 16;
1433     if (ctxt->query != NULL)
1434 	/* 1 for '?' */
1435 	blen += strlen(ctxt->query) + 1;
1436     blen += strlen(method) + strlen(ctxt->path) + 24;
1437 #ifdef LIBXML_ZLIB_ENABLED
1438     /* reserve for possible 'Accept-Encoding: gzip' string */
1439     blen += 23;
1440 #endif
1441     if (ctxt->port != 80) {
1442 	/* reserve space for ':xxxxx', incl. potential proxy */
1443 	if (use_proxy)
1444 	    blen += 17;
1445 	else
1446 	    blen += 11;
1447     }
1448     bp = (char*)xmlMallocAtomic(blen);
1449     if ( bp == NULL ) {
1450         xmlNanoHTTPFreeCtxt( ctxt );
1451 	xmlHTTPErrMemory();
1452 	return ( NULL );
1453     }
1454 
1455     p = bp;
1456 
1457     if (use_proxy) {
1458 	if (ctxt->port != 80) {
1459 	    p += snprintf( p, blen - (p - bp), "%s http://%s:%d%s",
1460 			method, ctxt->hostname,
1461 			ctxt->port, ctxt->path );
1462 	}
1463 	else
1464 	    p += snprintf( p, blen - (p - bp), "%s http://%s%s", method,
1465 			ctxt->hostname, ctxt->path);
1466     }
1467     else
1468 	p += snprintf( p, blen - (p - bp), "%s %s", method, ctxt->path);
1469 
1470     if (ctxt->query != NULL)
1471 	p += snprintf( p, blen - (p - bp), "?%s", ctxt->query);
1472 
1473     if (ctxt->port == 80) {
1474         p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n",
1475 		    ctxt->hostname);
1476     } else {
1477         p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s:%d\r\n",
1478 		    ctxt->hostname, ctxt->port);
1479     }
1480 
1481 #ifdef LIBXML_ZLIB_ENABLED
1482     p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n");
1483 #endif
1484 
1485     if (contentType != NULL && *contentType)
1486 	p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType);
1487 
1488     if (headers != NULL)
1489 	p += snprintf( p, blen - (p - bp), "%s", headers );
1490 
1491     if (input != NULL)
1492 	snprintf(p, blen - (p - bp), "Content-Length: %d\r\n\r\n", ilen );
1493     else
1494 	snprintf(p, blen - (p - bp), "\r\n");
1495 
1496     ctxt->outptr = ctxt->out = bp;
1497     ctxt->state = XML_NANO_HTTP_WRITE;
1498     blen = strlen( ctxt->out );
1499     xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1500 
1501     if ( input != NULL ) {
1502 	xmlNanoHTTPSend( ctxt, input, ilen );
1503     }
1504 
1505     ctxt->state = XML_NANO_HTTP_READ;
1506 
1507     while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1508         if (*p == 0) {
1509 	    ctxt->content = ctxt->inrptr;
1510 	    xmlFree(p);
1511 	    break;
1512 	}
1513 	xmlNanoHTTPScanAnswer(ctxt, p);
1514 
1515         xmlFree(p);
1516     }
1517 
1518     if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1519         (ctxt->returnValue < 400)) {
1520 	while ( xmlNanoHTTPRecv(ctxt) > 0 )
1521             ;
1522         if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1523 	    nbRedirects++;
1524 	    if (redirURL != NULL)
1525 		xmlFree(redirURL);
1526 	    redirURL = xmlMemStrdup(ctxt->location);
1527 	    xmlNanoHTTPFreeCtxt(ctxt);
1528 	    goto retry;
1529 	}
1530 	xmlNanoHTTPFreeCtxt(ctxt);
1531 	if (redirURL != NULL) xmlFree(redirURL);
1532 	return(NULL);
1533     }
1534 
1535     if (contentType != NULL) {
1536 	if (ctxt->contentType != NULL)
1537 	    *contentType = xmlMemStrdup(ctxt->contentType);
1538 	else
1539 	    *contentType = NULL;
1540     }
1541 
1542     if ((redir != NULL) && (redirURL != NULL)) {
1543 	*redir = redirURL;
1544     } else {
1545 	if (redirURL != NULL)
1546 	    xmlFree(redirURL);
1547 	if (redir != NULL)
1548 	    *redir = NULL;
1549     }
1550 
1551     return((void *) ctxt);
1552 }
1553 
1554 /**
1555  * xmlNanoHTTPMethod:
1556  * @URL:  The URL to load
1557  * @method:  the HTTP method to use
1558  * @input:  the input string if any
1559  * @contentType:  the Content-Type information IN and OUT
1560  * @headers:  the extra headers
1561  * @ilen:  input length
1562  *
1563  * This function try to open a connection to the indicated resource
1564  * via HTTP using the given @method, adding the given extra headers
1565  * and the input buffer for the request content.
1566  *
1567  * Returns NULL in case of failure, otherwise a request handler.
1568  *     The contentType, if provided must be freed by the caller
1569  */
1570 
1571 void*
xmlNanoHTTPMethod(const char * URL,const char * method,const char * input,char ** contentType,const char * headers,int ilen)1572 xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
1573                   char **contentType, const char *headers, int ilen) {
1574     return(xmlNanoHTTPMethodRedir(URL, method, input, contentType,
1575 		                  NULL, headers, ilen));
1576 }
1577 
1578 /**
1579  * xmlNanoHTTPFetch:
1580  * @URL:  The URL to load
1581  * @filename:  the filename where the content should be saved
1582  * @contentType:  if available the Content-Type information will be
1583  *                returned at that location
1584  *
1585  * This function try to fetch the indicated resource via HTTP GET
1586  * and save it's content in the file.
1587  *
1588  * Returns -1 in case of failure, 0 in case of success. The contentType,
1589  *     if provided must be freed by the caller
1590  */
1591 int
xmlNanoHTTPFetch(const char * URL,const char * filename,char ** contentType)1592 xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1593     void *ctxt = NULL;
1594     char *buf = NULL;
1595     int fd;
1596     int len;
1597     int ret = 0;
1598 
1599     if (filename == NULL) return(-1);
1600     ctxt = xmlNanoHTTPOpen(URL, contentType);
1601     if (ctxt == NULL) return(-1);
1602 
1603     if (!strcmp(filename, "-"))
1604         fd = 0;
1605     else {
1606         fd = open(filename, O_CREAT | O_WRONLY, 00644);
1607 	if (fd < 0) {
1608 	    xmlNanoHTTPClose(ctxt);
1609 	    if ((contentType != NULL) && (*contentType != NULL)) {
1610 	        xmlFree(*contentType);
1611 		*contentType = NULL;
1612 	    }
1613 	    return(-1);
1614 	}
1615     }
1616 
1617     xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1618     if ( len > 0 ) {
1619 	if (write(fd, buf, len) == -1) {
1620 	    ret = -1;
1621 	}
1622     }
1623 
1624     xmlNanoHTTPClose(ctxt);
1625     close(fd);
1626     return(ret);
1627 }
1628 
1629 #ifdef LIBXML_OUTPUT_ENABLED
1630 /**
1631  * xmlNanoHTTPSave:
1632  * @ctxt:  the HTTP context
1633  * @filename:  the filename where the content should be saved
1634  *
1635  * This function saves the output of the HTTP transaction to a file
1636  * It closes and free the context at the end
1637  *
1638  * Returns -1 in case of failure, 0 in case of success.
1639  */
1640 int
xmlNanoHTTPSave(void * ctxt,const char * filename)1641 xmlNanoHTTPSave(void *ctxt, const char *filename) {
1642     char *buf = NULL;
1643     int fd;
1644     int len;
1645     int ret = 0;
1646 
1647     if ((ctxt == NULL) || (filename == NULL)) return(-1);
1648 
1649     if (!strcmp(filename, "-"))
1650         fd = 0;
1651     else {
1652         fd = open(filename, O_CREAT | O_WRONLY, 0666);
1653 	if (fd < 0) {
1654 	    xmlNanoHTTPClose(ctxt);
1655 	    return(-1);
1656 	}
1657     }
1658 
1659     xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1660     if ( len > 0 ) {
1661 	if (write(fd, buf, len) == -1) {
1662 	    ret = -1;
1663 	}
1664     }
1665 
1666     xmlNanoHTTPClose(ctxt);
1667     close(fd);
1668     return(ret);
1669 }
1670 #endif /* LIBXML_OUTPUT_ENABLED */
1671 
1672 /**
1673  * xmlNanoHTTPReturnCode:
1674  * @ctx:  the HTTP context
1675  *
1676  * Get the latest HTTP return code received
1677  *
1678  * Returns the HTTP return code for the request.
1679  */
1680 int
xmlNanoHTTPReturnCode(void * ctx)1681 xmlNanoHTTPReturnCode(void *ctx) {
1682     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1683 
1684     if (ctxt == NULL) return(-1);
1685 
1686     return(ctxt->returnValue);
1687 }
1688 
1689 /**
1690  * xmlNanoHTTPAuthHeader:
1691  * @ctx:  the HTTP context
1692  *
1693  * Get the authentication header of an HTTP context
1694  *
1695  * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate
1696  * header.
1697  */
1698 const char *
xmlNanoHTTPAuthHeader(void * ctx)1699 xmlNanoHTTPAuthHeader(void *ctx) {
1700     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1701 
1702     if (ctxt == NULL) return(NULL);
1703 
1704     return(ctxt->authHeader);
1705 }
1706 
1707 /**
1708  * xmlNanoHTTPContentLength:
1709  * @ctx:  the HTTP context
1710  *
1711  * Provides the specified content length from the HTTP header.
1712  *
1713  * Return the specified content length from the HTTP header.  Note that
1714  * a value of -1 indicates that the content length element was not included in
1715  * the response header.
1716  */
1717 int
xmlNanoHTTPContentLength(void * ctx)1718 xmlNanoHTTPContentLength( void * ctx ) {
1719     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1720 
1721     return ( ( ctxt == NULL ) ? -1 : ctxt->ContentLength );
1722 }
1723 
1724 /**
1725  * xmlNanoHTTPRedir:
1726  * @ctx:  the HTTP context
1727  *
1728  * Provides the specified redirection URL if available from the HTTP header.
1729  *
1730  * Return the specified redirection URL or NULL if not redirected.
1731  */
1732 const char *
xmlNanoHTTPRedir(void * ctx)1733 xmlNanoHTTPRedir( void * ctx ) {
1734     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1735 
1736     return ( ( ctxt == NULL ) ? NULL : ctxt->location );
1737 }
1738 
1739 /**
1740  * xmlNanoHTTPEncoding:
1741  * @ctx:  the HTTP context
1742  *
1743  * Provides the specified encoding if specified in the HTTP headers.
1744  *
1745  * Return the specified encoding or NULL if not available
1746  */
1747 const char *
xmlNanoHTTPEncoding(void * ctx)1748 xmlNanoHTTPEncoding( void * ctx ) {
1749     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1750 
1751     return ( ( ctxt == NULL ) ? NULL : ctxt->encoding );
1752 }
1753 
1754 /**
1755  * xmlNanoHTTPMimeType:
1756  * @ctx:  the HTTP context
1757  *
1758  * Provides the specified Mime-Type if specified in the HTTP headers.
1759  *
1760  * Return the specified Mime-Type or NULL if not available
1761  */
1762 const char *
xmlNanoHTTPMimeType(void * ctx)1763 xmlNanoHTTPMimeType( void * ctx ) {
1764     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1765 
1766     return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType );
1767 }
1768 
1769 /**
1770  * xmlNanoHTTPFetchContent:
1771  * @ctx:  the HTTP context
1772  * @ptr:  pointer to set to the content buffer.
1773  * @len:  integer pointer to hold the length of the content
1774  *
1775  * Check if all the content was read
1776  *
1777  * Returns 0 if all the content was read and available, returns
1778  * -1 if received content length was less than specified or an error
1779  * occurred.
1780  */
1781 static int
xmlNanoHTTPFetchContent(void * ctx,char ** ptr,int * len)1782 xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ) {
1783     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1784 
1785     int			rc = 0;
1786     int			cur_lgth;
1787     int			rcvd_lgth;
1788     int			dummy_int;
1789     char *		dummy_ptr = NULL;
1790 
1791     /*  Dummy up return input parameters if not provided  */
1792 
1793     if ( len == NULL )
1794         len = &dummy_int;
1795 
1796     if ( ptr == NULL )
1797         ptr = &dummy_ptr;
1798 
1799     /*  But can't work without the context pointer  */
1800 
1801     if ( ( ctxt == NULL ) || ( ctxt->content == NULL ) ) {
1802         *len = 0;
1803 	*ptr = NULL;
1804 	return ( -1 );
1805     }
1806 
1807     rcvd_lgth = ctxt->inptr - ctxt->content;
1808 
1809     while ( (cur_lgth = xmlNanoHTTPRecv( ctxt )) > 0 ) {
1810 
1811 	rcvd_lgth += cur_lgth;
1812 	if ( (ctxt->ContentLength > 0) && (rcvd_lgth >= ctxt->ContentLength) )
1813 	    break;
1814     }
1815 
1816     *ptr = ctxt->content;
1817     *len = rcvd_lgth;
1818 
1819     if ( ( ctxt->ContentLength > 0 ) && ( rcvd_lgth < ctxt->ContentLength ) )
1820         rc = -1;
1821     else if ( rcvd_lgth == 0 )
1822 	rc = -1;
1823 
1824     return ( rc );
1825 }
1826 
1827 #ifdef STANDALONE
main(int argc,char ** argv)1828 int main(int argc, char **argv) {
1829     char *contentType = NULL;
1830 
1831     if (argv[1] != NULL) {
1832 	if (argv[2] != NULL)
1833 	    xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1834         else
1835 	    xmlNanoHTTPFetch(argv[1], "-", &contentType);
1836 	if (contentType != NULL) xmlFree(contentType);
1837     } else {
1838         fprintf(stderr,
1839 		"%s: minimal HTTP GET implementation\n", argv[0]);
1840         fprintf(stderr,
1841 		"\tusage %s [ URL [ filename ] ]\n", argv[0]);
1842     }
1843     xmlNanoHTTPCleanup();
1844     return(0);
1845 }
1846 #endif /* STANDALONE */
1847 #else /* !LIBXML_HTTP_ENABLED */
1848 #ifdef STANDALONE
1849 #include <stdio.h>
main(int argc,char ** argv)1850 int main(int argc, char **argv) {
1851     fprintf(stderr,
1852 	    "%s : HTTP support not compiled in\n", argv[0]);
1853     return(0);
1854 }
1855 #endif /* STANDALONE */
1856 #endif /* LIBXML_HTTP_ENABLED */
1857