• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3  *             focuses on size, streamability, reentrancy and portability
4  *
5  * This is clearly not a general purpose HTTP implementation
6  * If you look for one, check:
7  *         http://www.w3.org/Library/
8  *
9  * See Copyright for the status of this software.
10  *
11  * daniel@veillard.com
12  */
13 
14 #define IN_LIBXML
15 #include "libxml.h"
16 
17 #ifdef LIBXML_HTTP_ENABLED
18 #include <string.h>
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <errno.h>
22 
23 #ifdef HAVE_UNISTD_H
24 #include <unistd.h>
25 #elif defined (_WIN32)
26 #include <io.h>
27 #endif
28 #ifdef HAVE_SYS_SOCKET_H
29 #include <sys/socket.h>
30 #endif
31 #ifdef HAVE_NETINET_IN_H
32 #include <netinet/in.h>
33 #endif
34 #ifdef HAVE_ARPA_INET_H
35 #include <arpa/inet.h>
36 #endif
37 #ifdef HAVE_NETDB_H
38 #include <netdb.h>
39 #endif
40 #ifdef HAVE_FCNTL_H
41 #include <fcntl.h>
42 #endif
43 #ifdef HAVE_SYS_TIME_H
44 #include <sys/time.h>
45 #endif
46 #ifndef HAVE_POLL_H
47 #ifdef HAVE_SYS_SELECT_H
48 #include <sys/select.h>
49 #endif
50 #else
51 #include <poll.h>
52 #endif
53 #ifdef LIBXML_ZLIB_ENABLED
54 #include <zlib.h>
55 #endif
56 
57 
58 #ifdef VMS
59 #include <stropts>
60 #define XML_SOCKLEN_T unsigned int
61 #endif
62 
63 #if defined(_WIN32)
64 #include <wsockcompat.h>
65 #endif
66 
67 #include <libxml/globals.h>
68 #include <libxml/xmlerror.h>
69 #include <libxml/xmlmemory.h>
70 #include <libxml/parser.h> /* for xmlStr(n)casecmp() */
71 #include <libxml/nanohttp.h>
72 #include <libxml/globals.h>
73 #include <libxml/uri.h>
74 
75 #include "private/error.h"
76 #include "private/io.h"
77 
78 /**
79  * A couple portability macros
80  */
81 #ifndef _WINSOCKAPI_
82 #define closesocket(s) close(s)
83 #define SOCKET int
84 #define INVALID_SOCKET (-1)
85 #endif
86 
87 #ifndef XML_SOCKLEN_T
88 #define XML_SOCKLEN_T unsigned int
89 #endif
90 
91 #define GETHOSTBYNAME_ARG_CAST (char *)
92 #define SEND_ARG2_CAST (char *)
93 
94 #ifdef STANDALONE
95 #define DEBUG_HTTP
96 #define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n)
97 #define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b)
98 #endif
99 
100 #define XML_NANO_HTTP_MAX_REDIR	10
101 
102 #define XML_NANO_HTTP_CHUNK	4096
103 
104 #define XML_NANO_HTTP_CLOSED	0
105 #define XML_NANO_HTTP_WRITE	1
106 #define XML_NANO_HTTP_READ	2
107 #define XML_NANO_HTTP_NONE	4
108 
109 typedef struct xmlNanoHTTPCtxt {
110     char *protocol;	/* the protocol name */
111     char *hostname;	/* the host name */
112     int port;		/* the port */
113     char *path;		/* the path within the URL */
114     char *query;	/* the query string */
115     SOCKET fd;		/* the file descriptor for the socket */
116     int state;		/* WRITE / READ / CLOSED */
117     char *out;		/* buffer sent (zero terminated) */
118     char *outptr;	/* index within the buffer sent */
119     char *in;		/* the receiving buffer */
120     char *content;	/* the start of the content */
121     char *inptr;	/* the next byte to read from network */
122     char *inrptr;	/* the next byte to give back to the client */
123     int inlen;		/* len of the input buffer */
124     int last;		/* return code for last operation */
125     int returnValue;	/* the protocol return value */
126     int version;        /* the protocol version */
127     int ContentLength;  /* specified content length from HTTP header */
128     char *contentType;	/* the MIME type for the input */
129     char *location;	/* the new URL in case of redirect */
130     char *authHeader;	/* contents of {WWW,Proxy}-Authenticate header */
131     char *encoding;	/* encoding extracted from the contentType */
132     char *mimeType;	/* Mime-Type extracted from the contentType */
133 #ifdef LIBXML_ZLIB_ENABLED
134     z_stream *strm;	/* Zlib stream object */
135     int usesGzip;	/* "Content-Encoding: gzip" was detected */
136 #endif
137 } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
138 
139 static int initialized = 0;
140 static char *proxy = NULL;	 /* the proxy name if any */
141 static int proxyPort;	/* the proxy port if any */
142 static unsigned int timeout = 60;/* the select() timeout in seconds */
143 
144 static int xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len );
145 
146 /**
147  * xmlHTTPErrMemory:
148  * @extra:  extra information
149  *
150  * Handle an out of memory condition
151  */
152 static void
xmlHTTPErrMemory(const char * extra)153 xmlHTTPErrMemory(const char *extra)
154 {
155     __xmlSimpleError(XML_FROM_HTTP, XML_ERR_NO_MEMORY, NULL, NULL, extra);
156 }
157 
158 /**
159  * A portability function
160  */
socket_errno(void)161 static int socket_errno(void) {
162 #ifdef _WINSOCKAPI_
163     int err = WSAGetLastError();
164     switch(err) {
165         case WSAECONNRESET:
166             return(ECONNRESET);
167         case WSAEINPROGRESS:
168             return(EINPROGRESS);
169         case WSAEINTR:
170             return(EINTR);
171         case WSAESHUTDOWN:
172             return(ESHUTDOWN);
173         case WSAEWOULDBLOCK:
174             return(EWOULDBLOCK);
175         default:
176             return(err);
177     }
178 #else
179     return(errno);
180 #endif
181 }
182 
183 /**
184  * xmlNanoHTTPInit:
185  *
186  * Initialize the HTTP protocol layer.
187  * Currently it just checks for proxy information
188  */
189 
190 void
xmlNanoHTTPInit(void)191 xmlNanoHTTPInit(void) {
192     const char *env;
193 #ifdef _WINSOCKAPI_
194     WSADATA wsaData;
195 #endif
196 
197     if (initialized)
198 	return;
199 
200 #ifdef _WINSOCKAPI_
201     if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0)
202 	return;
203 #endif
204 
205     if (proxy == NULL) {
206 	proxyPort = 80;
207 	env = getenv("no_proxy");
208 	if (env && ((env[0] == '*') && (env[1] == 0)))
209 	    goto done;
210 	env = getenv("http_proxy");
211 	if (env != NULL) {
212 	    xmlNanoHTTPScanProxy(env);
213 	    goto done;
214 	}
215 	env = getenv("HTTP_PROXY");
216 	if (env != NULL) {
217 	    xmlNanoHTTPScanProxy(env);
218 	    goto done;
219 	}
220     }
221 done:
222     initialized = 1;
223 }
224 
225 /**
226  * xmlNanoHTTPCleanup:
227  *
228  * Cleanup the HTTP protocol layer.
229  */
230 
231 void
xmlNanoHTTPCleanup(void)232 xmlNanoHTTPCleanup(void) {
233     if (proxy != NULL) {
234 	xmlFree(proxy);
235 	proxy = NULL;
236     }
237 #ifdef _WINSOCKAPI_
238     if (initialized)
239 	WSACleanup();
240 #endif
241     initialized = 0;
242     return;
243 }
244 
245 /**
246  * xmlNanoHTTPScanURL:
247  * @ctxt:  an HTTP context
248  * @URL:  The URL used to initialize the context
249  *
250  * (Re)Initialize an HTTP context by parsing the URL and finding
251  * the protocol host port and path it indicates.
252  */
253 
254 static void
xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt,const char * URL)255 xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
256     xmlURIPtr uri;
257     int len;
258 
259     /*
260      * Clear any existing data from the context
261      */
262     if (ctxt->protocol != NULL) {
263         xmlFree(ctxt->protocol);
264 	ctxt->protocol = NULL;
265     }
266     if (ctxt->hostname != NULL) {
267         xmlFree(ctxt->hostname);
268 	ctxt->hostname = NULL;
269     }
270     if (ctxt->path != NULL) {
271         xmlFree(ctxt->path);
272 	ctxt->path = NULL;
273     }
274     if (ctxt->query != NULL) {
275         xmlFree(ctxt->query);
276 	ctxt->query = NULL;
277     }
278     if (URL == NULL) return;
279 
280     uri = xmlParseURIRaw(URL, 1);
281     if (uri == NULL)
282 	return;
283 
284     if ((uri->scheme == NULL) || (uri->server == NULL)) {
285 	xmlFreeURI(uri);
286 	return;
287     }
288 
289     ctxt->protocol = xmlMemStrdup(uri->scheme);
290     /* special case of IPv6 addresses, the [] need to be removed */
291     if ((uri->server != NULL) && (*uri->server == '[')) {
292         len = strlen(uri->server);
293 	if ((len > 2) && (uri->server[len - 1] == ']')) {
294 	    ctxt->hostname = (char *) xmlCharStrndup(uri->server + 1, len -2);
295 	} else
296 	    ctxt->hostname = xmlMemStrdup(uri->server);
297     } else
298 	ctxt->hostname = xmlMemStrdup(uri->server);
299     if (uri->path != NULL)
300 	ctxt->path = xmlMemStrdup(uri->path);
301     else
302 	ctxt->path = xmlMemStrdup("/");
303     if (uri->query != NULL)
304 	ctxt->query = xmlMemStrdup(uri->query);
305     if (uri->port != 0)
306 	ctxt->port = uri->port;
307 
308     xmlFreeURI(uri);
309 }
310 
311 /**
312  * xmlNanoHTTPScanProxy:
313  * @URL:  The proxy URL used to initialize the proxy context
314  *
315  * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
316  * the protocol host port it indicates.
317  * Should be like http://myproxy/ or http://myproxy:3128/
318  * A NULL URL cleans up proxy information.
319  */
320 
321 void
xmlNanoHTTPScanProxy(const char * URL)322 xmlNanoHTTPScanProxy(const char *URL) {
323     xmlURIPtr uri;
324 
325     if (proxy != NULL) {
326         xmlFree(proxy);
327 	proxy = NULL;
328     }
329     proxyPort = 0;
330 
331 #ifdef DEBUG_HTTP
332     if (URL == NULL)
333 	xmlGenericError(xmlGenericErrorContext,
334 		"Removing HTTP proxy info\n");
335     else
336 	xmlGenericError(xmlGenericErrorContext,
337 		"Using HTTP proxy %s\n", URL);
338 #endif
339     if (URL == NULL) return;
340 
341     uri = xmlParseURIRaw(URL, 1);
342     if ((uri == NULL) || (uri->scheme == NULL) ||
343 	(strcmp(uri->scheme, "http")) || (uri->server == NULL)) {
344 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n");
345 	if (uri != NULL)
346 	    xmlFreeURI(uri);
347 	return;
348     }
349 
350     proxy = xmlMemStrdup(uri->server);
351     if (uri->port != 0)
352 	proxyPort = uri->port;
353 
354     xmlFreeURI(uri);
355 }
356 
357 /**
358  * xmlNanoHTTPNewCtxt:
359  * @URL:  The URL used to initialize the context
360  *
361  * Allocate and initialize a new HTTP context.
362  *
363  * Returns an HTTP context or NULL in case of error.
364  */
365 
366 static xmlNanoHTTPCtxtPtr
xmlNanoHTTPNewCtxt(const char * URL)367 xmlNanoHTTPNewCtxt(const char *URL) {
368     xmlNanoHTTPCtxtPtr ret;
369 
370     ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
371     if (ret == NULL) {
372         xmlHTTPErrMemory("allocating context");
373         return(NULL);
374     }
375 
376     memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
377     ret->port = 80;
378     ret->returnValue = 0;
379     ret->fd = INVALID_SOCKET;
380     ret->ContentLength = -1;
381 
382     xmlNanoHTTPScanURL(ret, URL);
383 
384     return(ret);
385 }
386 
387 /**
388  * xmlNanoHTTPFreeCtxt:
389  * @ctxt:  an HTTP context
390  *
391  * Frees the context after closing the connection.
392  */
393 
394 static void
xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt)395 xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
396     if (ctxt == NULL) return;
397     if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
398     if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
399     if (ctxt->path != NULL) xmlFree(ctxt->path);
400     if (ctxt->query != NULL) xmlFree(ctxt->query);
401     if (ctxt->out != NULL) xmlFree(ctxt->out);
402     if (ctxt->in != NULL) xmlFree(ctxt->in);
403     if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
404     if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
405     if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
406     if (ctxt->location != NULL) xmlFree(ctxt->location);
407     if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
408 #ifdef LIBXML_ZLIB_ENABLED
409     if (ctxt->strm != NULL) {
410 	inflateEnd(ctxt->strm);
411 	xmlFree(ctxt->strm);
412     }
413 #endif
414 
415     ctxt->state = XML_NANO_HTTP_NONE;
416     if (ctxt->fd != INVALID_SOCKET) closesocket(ctxt->fd);
417     ctxt->fd = INVALID_SOCKET;
418     xmlFree(ctxt);
419 }
420 
421 /**
422  * xmlNanoHTTPSend:
423  * @ctxt:  an HTTP context
424  *
425  * Send the input needed to initiate the processing on the server side
426  * Returns number of bytes sent or -1 on error.
427  */
428 
429 static int
xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt,const char * xmt_ptr,int outlen)430 xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt, const char *xmt_ptr, int outlen)
431 {
432     int total_sent = 0;
433 #ifdef HAVE_POLL_H
434     struct pollfd p;
435 #else
436     struct timeval tv;
437     fd_set wfd;
438 #endif
439 
440     if ((ctxt->state & XML_NANO_HTTP_WRITE) && (xmt_ptr != NULL)) {
441         while (total_sent < outlen) {
442             int nsent = send(ctxt->fd, SEND_ARG2_CAST (xmt_ptr + total_sent),
443                              outlen - total_sent, 0);
444 
445             if (nsent > 0)
446                 total_sent += nsent;
447             else if ((nsent == -1) &&
448 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
449                      (socket_errno() != EAGAIN) &&
450 #endif
451                      (socket_errno() != EWOULDBLOCK)) {
452                 __xmlIOErr(XML_FROM_HTTP, 0, "send failed\n");
453                 if (total_sent == 0)
454                     total_sent = -1;
455                 break;
456             } else {
457                 /*
458                  * No data sent
459                  * Since non-blocking sockets are used, wait for
460                  * socket to be writable or default timeout prior
461                  * to retrying.
462                  */
463 #ifndef HAVE_POLL_H
464 #ifndef _WINSOCKAPI_
465                 if (ctxt->fd > FD_SETSIZE)
466                     return -1;
467 #endif
468 
469                 tv.tv_sec = timeout;
470                 tv.tv_usec = 0;
471                 FD_ZERO(&wfd);
472 #ifdef _MSC_VER
473 #pragma warning(push)
474 #pragma warning(disable: 4018)
475 #endif
476                 FD_SET(ctxt->fd, &wfd);
477 #ifdef _MSC_VER
478 #pragma warning(pop)
479 #endif
480                 (void) select(ctxt->fd + 1, NULL, &wfd, NULL, &tv);
481 #else
482                 p.fd = ctxt->fd;
483                 p.events = POLLOUT;
484                 (void) poll(&p, 1, timeout * 1000);
485 #endif /* !HAVE_POLL_H */
486             }
487         }
488     }
489 
490     return total_sent;
491 }
492 
493 /**
494  * xmlNanoHTTPRecv:
495  * @ctxt:  an HTTP context
496  *
497  * Read information coming from the HTTP connection.
498  * This is a blocking call (but it blocks in select(), not read()).
499  *
500  * Returns the number of byte read or -1 in case of error.
501  */
502 
503 static int
xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)504 xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)
505 {
506 #ifdef HAVE_POLL_H
507     struct pollfd p;
508 #else
509     fd_set rfd;
510     struct timeval tv;
511 #endif
512 
513 
514     while (ctxt->state & XML_NANO_HTTP_READ) {
515         if (ctxt->in == NULL) {
516             ctxt->in = (char *) xmlMallocAtomic(65000);
517             if (ctxt->in == NULL) {
518                 xmlHTTPErrMemory("allocating input");
519                 ctxt->last = -1;
520                 return (-1);
521             }
522             ctxt->inlen = 65000;
523             ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
524         }
525         if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
526             int delta = ctxt->inrptr - ctxt->in;
527             int len = ctxt->inptr - ctxt->inrptr;
528 
529             memmove(ctxt->in, ctxt->inrptr, len);
530             ctxt->inrptr -= delta;
531             ctxt->content -= delta;
532             ctxt->inptr -= delta;
533         }
534         if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
535             int d_inptr = ctxt->inptr - ctxt->in;
536             int d_content = ctxt->content - ctxt->in;
537             int d_inrptr = ctxt->inrptr - ctxt->in;
538             char *tmp_ptr = ctxt->in;
539 
540             ctxt->inlen *= 2;
541             ctxt->in = (char *) xmlRealloc(tmp_ptr, ctxt->inlen);
542             if (ctxt->in == NULL) {
543                 xmlHTTPErrMemory("allocating input buffer");
544                 xmlFree(tmp_ptr);
545                 ctxt->last = -1;
546                 return (-1);
547             }
548             ctxt->inptr = ctxt->in + d_inptr;
549             ctxt->content = ctxt->in + d_content;
550             ctxt->inrptr = ctxt->in + d_inrptr;
551         }
552         ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0);
553         if (ctxt->last > 0) {
554             ctxt->inptr += ctxt->last;
555             return (ctxt->last);
556         }
557         if (ctxt->last == 0) {
558             return (0);
559         }
560         if (ctxt->last == -1) {
561             switch (socket_errno()) {
562                 case EINPROGRESS:
563                 case EWOULDBLOCK:
564 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
565                 case EAGAIN:
566 #endif
567                     break;
568 
569                 case ECONNRESET:
570                 case ESHUTDOWN:
571                     return (0);
572 
573                 default:
574                     __xmlIOErr(XML_FROM_HTTP, 0, "recv failed\n");
575                     return (-1);
576             }
577         }
578 #ifdef HAVE_POLL_H
579         p.fd = ctxt->fd;
580         p.events = POLLIN;
581         if ((poll(&p, 1, timeout * 1000) < 1)
582 #if defined(EINTR)
583             && (errno != EINTR)
584 #endif
585             )
586             return (0);
587 #else /* !HAVE_POLL_H */
588 #ifndef _WINSOCKAPI_
589         if (ctxt->fd > FD_SETSIZE)
590             return 0;
591 #endif
592 
593         tv.tv_sec = timeout;
594         tv.tv_usec = 0;
595         FD_ZERO(&rfd);
596 
597 #ifdef _MSC_VER
598 #pragma warning(push)
599 #pragma warning(disable: 4018)
600 #endif
601 
602         FD_SET(ctxt->fd, &rfd);
603 
604 #ifdef _MSC_VER
605 #pragma warning(pop)
606 #endif
607 
608         if ((select(ctxt->fd + 1, &rfd, NULL, NULL, &tv) < 1)
609 #if defined(EINTR)
610             && (socket_errno() != EINTR)
611 #endif
612             )
613             return (0);
614 #endif /* !HAVE_POLL_H */
615     }
616     return (0);
617 }
618 
619 /**
620  * xmlNanoHTTPReadLine:
621  * @ctxt:  an HTTP context
622  *
623  * Read one line in the HTTP server output, usually for extracting
624  * the HTTP protocol information from the answer header.
625  *
626  * Returns a newly allocated string with a copy of the line, or NULL
627  *         which indicate the end of the input.
628  */
629 
630 static char *
xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt)631 xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
632     char buf[4096];
633     char *bp = buf;
634     int	rc;
635 
636     while (bp - buf < 4095) {
637 	if (ctxt->inrptr == ctxt->inptr) {
638 	    if ( (rc = xmlNanoHTTPRecv(ctxt)) == 0) {
639 		if (bp == buf)
640 		    return(NULL);
641 		else
642 		    *bp = 0;
643 		return(xmlMemStrdup(buf));
644 	    }
645 	    else if ( rc == -1 ) {
646 	        return ( NULL );
647 	    }
648 	}
649 	*bp = *ctxt->inrptr++;
650 	if (*bp == '\n') {
651 	    *bp = 0;
652 	    return(xmlMemStrdup(buf));
653 	}
654 	if (*bp != '\r')
655 	    bp++;
656     }
657     buf[4095] = 0;
658     return(xmlMemStrdup(buf));
659 }
660 
661 
662 /**
663  * xmlNanoHTTPScanAnswer:
664  * @ctxt:  an HTTP context
665  * @line:  an HTTP header line
666  *
667  * Try to extract useful information from the server answer.
668  * We currently parse and process:
669  *  - The HTTP revision/ return code
670  *  - The Content-Type, Mime-Type and charset used
671  *  - The Location for redirect processing.
672  *
673  * Returns -1 in case of failure, the file descriptor number otherwise
674  */
675 
676 static void
xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt,const char * line)677 xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
678     const char *cur = line;
679 
680     if (line == NULL) return;
681 
682     if (!strncmp(line, "HTTP/", 5)) {
683         int version = 0;
684 	int ret = 0;
685 
686 	cur += 5;
687 	while ((*cur >= '0') && (*cur <= '9')) {
688 	    version *= 10;
689 	    version += *cur - '0';
690 	    cur++;
691 	}
692 	if (*cur == '.') {
693 	    cur++;
694 	    if ((*cur >= '0') && (*cur <= '9')) {
695 		version *= 10;
696 		version += *cur - '0';
697 		cur++;
698 	    }
699 	    while ((*cur >= '0') && (*cur <= '9'))
700 		cur++;
701 	} else
702 	    version *= 10;
703 	if ((*cur != ' ') && (*cur != '\t')) return;
704 	while ((*cur == ' ') || (*cur == '\t')) cur++;
705 	if ((*cur < '0') || (*cur > '9')) return;
706 	while ((*cur >= '0') && (*cur <= '9')) {
707 	    ret *= 10;
708 	    ret += *cur - '0';
709 	    cur++;
710 	}
711 	if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
712 	ctxt->returnValue = ret;
713         ctxt->version = version;
714     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
715         const xmlChar *charset, *last, *mime;
716         cur += 13;
717 	while ((*cur == ' ') || (*cur == '\t')) cur++;
718 	if (ctxt->contentType != NULL)
719 	    xmlFree(ctxt->contentType);
720 	ctxt->contentType = xmlMemStrdup(cur);
721 	mime = (const xmlChar *) cur;
722 	last = mime;
723 	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
724 	       (*last != ';') && (*last != ','))
725 	    last++;
726 	if (ctxt->mimeType != NULL)
727 	    xmlFree(ctxt->mimeType);
728 	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
729 	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
730 	if (charset != NULL) {
731 	    charset += 8;
732 	    last = charset;
733 	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
734 	           (*last != ';') && (*last != ','))
735 		last++;
736 	    if (ctxt->encoding != NULL)
737 	        xmlFree(ctxt->encoding);
738 	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
739 	}
740     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
741         const xmlChar *charset, *last, *mime;
742         cur += 12;
743 	if (ctxt->contentType != NULL) return;
744 	while ((*cur == ' ') || (*cur == '\t')) cur++;
745 	ctxt->contentType = xmlMemStrdup(cur);
746 	mime = (const xmlChar *) cur;
747 	last = mime;
748 	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
749 	       (*last != ';') && (*last != ','))
750 	    last++;
751 	if (ctxt->mimeType != NULL)
752 	    xmlFree(ctxt->mimeType);
753 	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
754 	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
755 	if (charset != NULL) {
756 	    charset += 8;
757 	    last = charset;
758 	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
759 	           (*last != ';') && (*last != ','))
760 		last++;
761 	    if (ctxt->encoding != NULL)
762 	        xmlFree(ctxt->encoding);
763 	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
764 	}
765     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
766         cur += 9;
767 	while ((*cur == ' ') || (*cur == '\t')) cur++;
768 	if (ctxt->location != NULL)
769 	    xmlFree(ctxt->location);
770 	if (*cur == '/') {
771 	    xmlChar *tmp_http = xmlStrdup(BAD_CAST "http://");
772 	    xmlChar *tmp_loc =
773 	        xmlStrcat(tmp_http, (const xmlChar *) ctxt->hostname);
774 	    ctxt->location =
775 	        (char *) xmlStrcat (tmp_loc, (const xmlChar *) cur);
776 	} else {
777 	    ctxt->location = xmlMemStrdup(cur);
778 	}
779     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) {
780         cur += 17;
781 	while ((*cur == ' ') || (*cur == '\t')) cur++;
782 	if (ctxt->authHeader != NULL)
783 	    xmlFree(ctxt->authHeader);
784 	ctxt->authHeader = xmlMemStrdup(cur);
785     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) {
786         cur += 19;
787 	while ((*cur == ' ') || (*cur == '\t')) cur++;
788 	if (ctxt->authHeader != NULL)
789 	    xmlFree(ctxt->authHeader);
790 	ctxt->authHeader = xmlMemStrdup(cur);
791 #ifdef LIBXML_ZLIB_ENABLED
792     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) {
793 	cur += 17;
794 	while ((*cur == ' ') || (*cur == '\t')) cur++;
795 	if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) {
796 	    ctxt->usesGzip = 1;
797 
798 	    ctxt->strm = xmlMalloc(sizeof(z_stream));
799 
800 	    if (ctxt->strm != NULL) {
801 		ctxt->strm->zalloc = Z_NULL;
802 		ctxt->strm->zfree = Z_NULL;
803 		ctxt->strm->opaque = Z_NULL;
804 		ctxt->strm->avail_in = 0;
805 		ctxt->strm->next_in = Z_NULL;
806 
807 		inflateInit2( ctxt->strm, 31 );
808 	    }
809 	}
810 #endif
811     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) {
812 	cur += 15;
813 	ctxt->ContentLength = strtol( cur, NULL, 10 );
814     }
815 }
816 
817 /**
818  * xmlNanoHTTPConnectAttempt:
819  * @addr:  a socket address structure
820  *
821  * Attempt a connection to the given IP:port endpoint. It forces
822  * non-blocking semantic on the socket, and allow 60 seconds for
823  * the host to answer.
824  *
825  * Returns -1 in case of failure, the file descriptor number otherwise
826  */
827 
828 static SOCKET
xmlNanoHTTPConnectAttempt(struct sockaddr * addr)829 xmlNanoHTTPConnectAttempt(struct sockaddr *addr)
830 {
831 #ifndef HAVE_POLL_H
832     fd_set wfd;
833 #ifdef _WINSOCKAPI_
834     fd_set xfd;
835 #endif
836     struct timeval tv;
837 #else /* !HAVE_POLL_H */
838     struct pollfd p;
839 #endif /* !HAVE_POLL_H */
840     int status;
841 
842     int addrlen;
843 
844     SOCKET s;
845 
846 #ifdef SUPPORT_IP6
847     if (addr->sa_family == AF_INET6) {
848         s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
849         addrlen = sizeof(struct sockaddr_in6);
850     } else
851 #endif
852     {
853         s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
854         addrlen = sizeof(struct sockaddr_in);
855     }
856     if (s == INVALID_SOCKET) {
857 #ifdef DEBUG_HTTP
858         perror("socket");
859 #endif
860         __xmlIOErr(XML_FROM_HTTP, 0, "socket failed\n");
861         return INVALID_SOCKET;
862     }
863 #ifdef _WINSOCKAPI_
864     {
865         u_long one = 1;
866 
867         status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
868     }
869 #else /* _WINSOCKAPI_ */
870 #if defined(VMS)
871     {
872         int enable = 1;
873 
874         status = ioctl(s, FIONBIO, &enable);
875     }
876 #else /* VMS */
877     if ((status = fcntl(s, F_GETFL, 0)) != -1) {
878 #ifdef O_NONBLOCK
879         status |= O_NONBLOCK;
880 #else /* O_NONBLOCK */
881 #ifdef F_NDELAY
882         status |= F_NDELAY;
883 #endif /* F_NDELAY */
884 #endif /* !O_NONBLOCK */
885         status = fcntl(s, F_SETFL, status);
886     }
887     if (status < 0) {
888 #ifdef DEBUG_HTTP
889         perror("nonblocking");
890 #endif
891         __xmlIOErr(XML_FROM_HTTP, 0, "error setting non-blocking IO\n");
892         closesocket(s);
893         return INVALID_SOCKET;
894     }
895 #endif /* !VMS */
896 #endif /* !_WINSOCKAPI_ */
897 
898     if (connect(s, addr, addrlen) == -1) {
899         switch (socket_errno()) {
900             case EINPROGRESS:
901             case EWOULDBLOCK:
902                 break;
903             default:
904                 __xmlIOErr(XML_FROM_HTTP, 0,
905                            "error connecting to HTTP server");
906                 closesocket(s);
907                 return INVALID_SOCKET;
908         }
909     }
910 #ifndef HAVE_POLL_H
911     tv.tv_sec = timeout;
912     tv.tv_usec = 0;
913 
914 #ifdef _MSC_VER
915 #pragma warning(push)
916 #pragma warning(disable: 4018)
917 #endif
918 #ifndef _WINSOCKAPI_
919     if (s > FD_SETSIZE)
920         return INVALID_SOCKET;
921 #endif
922     FD_ZERO(&wfd);
923     FD_SET(s, &wfd);
924 
925 #ifdef _WINSOCKAPI_
926     FD_ZERO(&xfd);
927     FD_SET(s, &xfd);
928 
929     switch (select(s + 1, NULL, &wfd, &xfd, &tv))
930 #else
931     switch (select(s + 1, NULL, &wfd, NULL, &tv))
932 #endif
933 #ifdef _MSC_VER
934 #pragma warning(pop)
935 #endif
936 
937 #else /* !HAVE_POLL_H */
938     p.fd = s;
939     p.events = POLLOUT;
940     switch (poll(&p, 1, timeout * 1000))
941 #endif /* !HAVE_POLL_H */
942 
943     {
944         case 0:
945             /* Time out */
946             __xmlIOErr(XML_FROM_HTTP, 0, "Connect attempt timed out");
947             closesocket(s);
948             return INVALID_SOCKET;
949         case -1:
950             /* Ermm.. ?? */
951             __xmlIOErr(XML_FROM_HTTP, 0, "Connect failed");
952             closesocket(s);
953             return INVALID_SOCKET;
954     }
955 
956 #ifndef HAVE_POLL_H
957     if (FD_ISSET(s, &wfd)
958 #ifdef _WINSOCKAPI_
959         || FD_ISSET(s, &xfd)
960 #endif
961         )
962 #else /* !HAVE_POLL_H */
963     if (p.revents == POLLOUT)
964 #endif /* !HAVE_POLL_H */
965     {
966         XML_SOCKLEN_T len;
967 
968         len = sizeof(status);
969 #ifdef SO_ERROR
970         if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &status, &len) <
971             0) {
972             /* Solaris error code */
973             __xmlIOErr(XML_FROM_HTTP, 0, "getsockopt failed\n");
974             closesocket(s);
975             return INVALID_SOCKET;
976         }
977 #endif
978         if (status) {
979             __xmlIOErr(XML_FROM_HTTP, 0,
980                        "Error connecting to remote host");
981             closesocket(s);
982             errno = status;
983             return INVALID_SOCKET;
984         }
985     } else {
986         /* pbm */
987         __xmlIOErr(XML_FROM_HTTP, 0, "select failed\n");
988         closesocket(s);
989         return INVALID_SOCKET;
990     }
991 
992     return (s);
993 }
994 
995 /**
996  * xmlNanoHTTPConnectHost:
997  * @host:  the host name
998  * @port:  the port number
999  *
1000  * Attempt a connection to the given host:port endpoint. It tries
1001  * the multiple IP provided by the DNS if available.
1002  *
1003  * Returns -1 in case of failure, the file descriptor number otherwise
1004  */
1005 
1006 static SOCKET
xmlNanoHTTPConnectHost(const char * host,int port)1007 xmlNanoHTTPConnectHost(const char *host, int port)
1008 {
1009     struct sockaddr *addr = NULL;
1010     struct sockaddr_in sockin;
1011 
1012 #ifdef SUPPORT_IP6
1013     struct sockaddr_in6 sockin6;
1014 #endif
1015     SOCKET s;
1016 
1017     memset (&sockin, 0, sizeof(sockin));
1018 
1019 #if defined(SUPPORT_IP6)
1020     {
1021 	int status;
1022 	struct addrinfo hints, *res, *result;
1023 
1024         memset (&sockin6, 0, sizeof(sockin6));
1025 
1026 	result = NULL;
1027 	memset (&hints, 0,sizeof(hints));
1028 	hints.ai_socktype = SOCK_STREAM;
1029 
1030 	status = getaddrinfo (host, NULL, &hints, &result);
1031 	if (status) {
1032 	    __xmlIOErr(XML_FROM_HTTP, 0, "getaddrinfo failed\n");
1033 	    return INVALID_SOCKET;
1034 	}
1035 
1036 	for (res = result; res; res = res->ai_next) {
1037 	    if (res->ai_family == AF_INET) {
1038 		if ((size_t)res->ai_addrlen > sizeof(sockin)) {
1039 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1040 		    freeaddrinfo (result);
1041 		    return INVALID_SOCKET;
1042 		}
1043 		memcpy (&sockin, res->ai_addr, res->ai_addrlen);
1044 		sockin.sin_port = htons (port);
1045 		addr = (struct sockaddr *)&sockin;
1046 	    } else if (res->ai_family == AF_INET6) {
1047 		if ((size_t)res->ai_addrlen > sizeof(sockin6)) {
1048 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1049 		    freeaddrinfo (result);
1050 		    return INVALID_SOCKET;
1051 		}
1052 		memcpy (&sockin6, res->ai_addr, res->ai_addrlen);
1053 		sockin6.sin6_port = htons (port);
1054 		addr = (struct sockaddr *)&sockin6;
1055 	    } else
1056 		continue;              /* for */
1057 
1058 	    s = xmlNanoHTTPConnectAttempt (addr);
1059 	    if (s != INVALID_SOCKET) {
1060 		freeaddrinfo (result);
1061 		return (s);
1062 	    }
1063 	}
1064 
1065 	if (result)
1066 	    freeaddrinfo (result);
1067     }
1068 #else
1069     {
1070         struct hostent *h;
1071         struct in_addr ia;
1072         int i;
1073 
1074 	h = gethostbyname (GETHOSTBYNAME_ARG_CAST host);
1075 	if (h == NULL) {
1076 
1077 /*
1078  * Okay, I got fed up by the non-portability of this error message
1079  * extraction code. it work on Linux, if it work on your platform
1080  * and one want to enable it, send me the defined(foobar) needed
1081  */
1082 #if defined(HAVE_NETDB_H) && defined(HOST_NOT_FOUND) && defined(__linux__)
1083 	    const char *h_err_txt = "";
1084 
1085 	    switch (h_errno) {
1086 		case HOST_NOT_FOUND:
1087 		    h_err_txt = "Authoritative host not found";
1088 		    break;
1089 
1090 		case TRY_AGAIN:
1091 		    h_err_txt =
1092 			"Non-authoritative host not found or server failure.";
1093 		    break;
1094 
1095 		case NO_RECOVERY:
1096 		    h_err_txt =
1097 			"Non-recoverable errors:  FORMERR, REFUSED, or NOTIMP.";
1098 		    break;
1099 
1100 #ifdef NO_ADDRESS
1101 		case NO_ADDRESS:
1102 		    h_err_txt =
1103 			"Valid name, no data record of requested type.";
1104 		    break;
1105 #endif
1106 
1107 		default:
1108 		    h_err_txt = "No error text defined.";
1109 		    break;
1110 	    }
1111 	    __xmlIOErr(XML_FROM_HTTP, 0, h_err_txt);
1112 #else
1113 	    __xmlIOErr(XML_FROM_HTTP, 0, "Failed to resolve host");
1114 #endif
1115 	    return INVALID_SOCKET;
1116 	}
1117 
1118 	for (i = 0; h->h_addr_list[i]; i++) {
1119 	    if (h->h_addrtype == AF_INET) {
1120 		/* A records (IPv4) */
1121 		if ((unsigned int) h->h_length > sizeof(ia)) {
1122 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1123 		    return INVALID_SOCKET;
1124 		}
1125 		memcpy (&ia, h->h_addr_list[i], h->h_length);
1126 		sockin.sin_family = h->h_addrtype;
1127 		sockin.sin_addr = ia;
1128 		sockin.sin_port = (unsigned short)htons ((unsigned short)port);
1129 		addr = (struct sockaddr *) &sockin;
1130 	    } else
1131 		break;              /* for */
1132 
1133 	    s = xmlNanoHTTPConnectAttempt (addr);
1134 	    if (s != INVALID_SOCKET)
1135 		return (s);
1136 	}
1137     }
1138 #endif
1139 
1140 #ifdef DEBUG_HTTP
1141     xmlGenericError(xmlGenericErrorContext,
1142                     "xmlNanoHTTPConnectHost:  unable to connect to '%s'.\n",
1143                     host);
1144 #endif
1145     return INVALID_SOCKET;
1146 }
1147 
1148 
1149 /**
1150  * xmlNanoHTTPOpen:
1151  * @URL:  The URL to load
1152  * @contentType:  if available the Content-Type information will be
1153  *                returned at that location
1154  *
1155  * This function try to open a connection to the indicated resource
1156  * via HTTP GET.
1157  *
1158  * Returns NULL in case of failure, otherwise a request handler.
1159  *     The contentType, if provided must be freed by the caller
1160  */
1161 
1162 void*
xmlNanoHTTPOpen(const char * URL,char ** contentType)1163 xmlNanoHTTPOpen(const char *URL, char **contentType) {
1164     if (contentType != NULL) *contentType = NULL;
1165     return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, 0));
1166 }
1167 
1168 /**
1169  * xmlNanoHTTPOpenRedir:
1170  * @URL:  The URL to load
1171  * @contentType:  if available the Content-Type information will be
1172  *                returned at that location
1173  * @redir: if available the redirected URL will be returned
1174  *
1175  * This function try to open a connection to the indicated resource
1176  * via HTTP GET.
1177  *
1178  * Returns NULL in case of failure, otherwise a request handler.
1179  *     The contentType, if provided must be freed by the caller
1180  */
1181 
1182 void*
xmlNanoHTTPOpenRedir(const char * URL,char ** contentType,char ** redir)1183 xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) {
1184     if (contentType != NULL) *contentType = NULL;
1185     if (redir != NULL) *redir = NULL;
1186     return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL,0));
1187 }
1188 
1189 /**
1190  * xmlNanoHTTPRead:
1191  * @ctx:  the HTTP context
1192  * @dest:  a buffer
1193  * @len:  the buffer length
1194  *
1195  * This function tries to read @len bytes from the existing HTTP connection
1196  * and saves them in @dest. This is a blocking call.
1197  *
1198  * Returns the number of byte read. 0 is an indication of an end of connection.
1199  *         -1 indicates a parameter error.
1200  */
1201 int
xmlNanoHTTPRead(void * ctx,void * dest,int len)1202 xmlNanoHTTPRead(void *ctx, void *dest, int len) {
1203     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1204 #ifdef LIBXML_ZLIB_ENABLED
1205     int bytes_read = 0;
1206     int orig_avail_in;
1207     int z_ret;
1208 #endif
1209 
1210     if (ctx == NULL) return(-1);
1211     if (dest == NULL) return(-1);
1212     if (len <= 0) return(0);
1213 
1214 #ifdef LIBXML_ZLIB_ENABLED
1215     if (ctxt->usesGzip == 1) {
1216         if (ctxt->strm == NULL) return(0);
1217 
1218         ctxt->strm->next_out = dest;
1219         ctxt->strm->avail_out = len;
1220 	ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr;
1221 
1222         while (ctxt->strm->avail_out > 0 &&
1223 	       (ctxt->strm->avail_in > 0 || xmlNanoHTTPRecv(ctxt) > 0)) {
1224             orig_avail_in = ctxt->strm->avail_in =
1225 			    ctxt->inptr - ctxt->inrptr - bytes_read;
1226             ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read);
1227 
1228             z_ret = inflate(ctxt->strm, Z_NO_FLUSH);
1229             bytes_read += orig_avail_in - ctxt->strm->avail_in;
1230 
1231             if (z_ret != Z_OK) break;
1232 	}
1233 
1234         ctxt->inrptr += bytes_read;
1235         return(len - ctxt->strm->avail_out);
1236     }
1237 #endif
1238 
1239     while (ctxt->inptr - ctxt->inrptr < len) {
1240         if (xmlNanoHTTPRecv(ctxt) <= 0) break;
1241     }
1242     if (ctxt->inptr - ctxt->inrptr < len)
1243         len = ctxt->inptr - ctxt->inrptr;
1244     memcpy(dest, ctxt->inrptr, len);
1245     ctxt->inrptr += len;
1246     return(len);
1247 }
1248 
1249 /**
1250  * xmlNanoHTTPClose:
1251  * @ctx:  the HTTP context
1252  *
1253  * This function closes an HTTP context, it ends up the connection and
1254  * free all data related to it.
1255  */
1256 void
xmlNanoHTTPClose(void * ctx)1257 xmlNanoHTTPClose(void *ctx) {
1258     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1259 
1260     if (ctx == NULL) return;
1261 
1262     xmlNanoHTTPFreeCtxt(ctxt);
1263 }
1264 
1265 
1266 /**
1267  * xmlNanoHTTPHostnameMatch:
1268  * @pattern: The pattern as it appears in no_proxy environment variable
1269  * @hostname: The hostname to test as it appears in the URL
1270  *
1271  * This function tests whether a given hostname matches a pattern. The pattern
1272  * usually is a token from the no_proxy environment variable. Wildcards in the
1273  * pattern are not supported.
1274  *
1275  * Returns true, iff hostname matches the pattern.
1276  */
1277 
1278 static int
xmlNanoHTTPHostnameMatch(const char * pattern,const char * hostname)1279 xmlNanoHTTPHostnameMatch(const char *pattern, const char *hostname) {
1280     int idx_pattern, idx_hostname;
1281     const char * pattern_start;
1282 
1283     if (!pattern || *pattern == '\0' || !hostname)
1284 	return 0;
1285 
1286     /* Ignore trailing '.' */
1287     if (*pattern == '.') {
1288         idx_pattern = strlen(pattern) -1;
1289         pattern_start = pattern + 1;
1290     }
1291     else {
1292         idx_pattern = strlen(pattern);
1293         pattern_start = pattern;
1294     }
1295     idx_hostname = strlen(hostname);
1296 
1297     for (; idx_pattern >= 0 && idx_hostname >= 0;
1298            --idx_pattern, --idx_hostname) {
1299 	if (tolower(pattern_start[idx_pattern]) != tolower(hostname[idx_hostname]))
1300 	    break;
1301     }
1302 
1303     return idx_pattern == -1 && (idx_hostname == -1|| hostname[idx_hostname] == '.');
1304 }
1305 
1306 
1307 /**
1308  * xmlNanoHTTPBypassProxy:
1309  * @hostname: The hostname as it appears in the URL
1310  *
1311  * This function evaluates the no_proxy environment variable and returns
1312  * whether the proxy server should be bypassed for a given host.
1313  *
1314  * Returns true, iff a proxy server should be bypassed for the given hostname.
1315  */
1316 
1317 static int
xmlNanoHTTPBypassProxy(const char * hostname)1318 xmlNanoHTTPBypassProxy(const char *hostname) {
1319     size_t envlen;
1320     char *env = getenv("no_proxy"), *cpy=NULL, *p=NULL;
1321     if (!env)
1322 	return 0;
1323 
1324     /* (Avoid strdup because it's not portable.) */
1325     envlen = strlen(env) + 1;
1326     cpy = xmlMalloc(envlen);
1327     memcpy(cpy, env, envlen);
1328     env = cpy;
1329 
1330     /* The remainder of the function is basically a tokenizing: */
1331     while (isspace(*env))
1332     	++env;
1333     if (*env == '\0') {
1334     	xmlFree(cpy);
1335 	return 0;
1336     }
1337 
1338     p = env;
1339     while (*env) {
1340 
1341     	if (*env != ',') {
1342 	    ++env;
1343 	    continue;
1344 	}
1345 
1346 	*(env++) = '\0';
1347 	if (xmlNanoHTTPHostnameMatch(p, hostname)) {
1348 	    xmlFree(cpy);
1349 	    return 1;
1350 	}
1351 
1352 	while (isspace(*env))
1353 	    ++env;
1354 	p = env;
1355     }
1356     if (xmlNanoHTTPHostnameMatch(p, hostname)) {
1357     	xmlFree(cpy);
1358     	return 1;
1359     }
1360 
1361     xmlFree(cpy);
1362     return 0;
1363 }
1364 
1365 
1366 /**
1367  * xmlNanoHTTPMethodRedir:
1368  * @URL:  The URL to load
1369  * @method:  the HTTP method to use
1370  * @input:  the input string if any
1371  * @contentType:  the Content-Type information IN and OUT
1372  * @redir:  the redirected URL OUT
1373  * @headers:  the extra headers
1374  * @ilen:  input length
1375  *
1376  * This function try to open a connection to the indicated resource
1377  * via HTTP using the given @method, adding the given extra headers
1378  * and the input buffer for the request content.
1379  *
1380  * Returns NULL in case of failure, otherwise a request handler.
1381  *     The contentType, or redir, if provided must be freed by the caller
1382  */
1383 
1384 void*
xmlNanoHTTPMethodRedir(const char * URL,const char * method,const char * input,char ** contentType,char ** redir,const char * headers,int ilen)1385 xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input,
1386                   char **contentType, char **redir,
1387 		  const char *headers, int ilen ) {
1388     xmlNanoHTTPCtxtPtr ctxt;
1389     char *bp, *p;
1390     int blen;
1391     SOCKET ret;
1392     int nbRedirects = 0;
1393     int use_proxy;
1394     char *redirURL = NULL;
1395 #ifdef DEBUG_HTTP
1396     int xmt_bytes;
1397 #endif
1398 
1399     if (URL == NULL) return(NULL);
1400     if (method == NULL) method = "GET";
1401     xmlNanoHTTPInit();
1402 
1403 retry:
1404     if (redirURL == NULL) {
1405 	ctxt = xmlNanoHTTPNewCtxt(URL);
1406 	if (ctxt == NULL)
1407 	    return(NULL);
1408     } else {
1409 	ctxt = xmlNanoHTTPNewCtxt(redirURL);
1410 	if (ctxt == NULL)
1411 	    return(NULL);
1412 	ctxt->location = xmlMemStrdup(redirURL);
1413     }
1414 
1415     if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
1416 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Not a valid HTTP URI");
1417         xmlNanoHTTPFreeCtxt(ctxt);
1418 	if (redirURL != NULL) xmlFree(redirURL);
1419         return(NULL);
1420     }
1421     if (ctxt->hostname == NULL) {
1422 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_UNKNOWN_HOST,
1423 	           "Failed to identify host in URI");
1424         xmlNanoHTTPFreeCtxt(ctxt);
1425 	if (redirURL != NULL) xmlFree(redirURL);
1426         return(NULL);
1427     }
1428     use_proxy = proxy && !xmlNanoHTTPBypassProxy(ctxt->hostname);
1429     if (use_proxy) {
1430 	blen = strlen(ctxt->hostname) * 2 + 16;
1431 	ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
1432     }
1433     else {
1434 	blen = strlen(ctxt->hostname);
1435 	ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
1436     }
1437     if (ret == INVALID_SOCKET) {
1438         xmlNanoHTTPFreeCtxt(ctxt);
1439 	if (redirURL != NULL) xmlFree(redirURL);
1440         return(NULL);
1441     }
1442     ctxt->fd = ret;
1443 
1444     if (input == NULL)
1445 	ilen = 0;
1446     else
1447 	blen += 36;
1448 
1449     if (headers != NULL)
1450 	blen += strlen(headers) + 2;
1451     if (contentType && *contentType)
1452 	/* reserve for string plus 'Content-Type: \r\n" */
1453 	blen += strlen(*contentType) + 16;
1454     if (ctxt->query != NULL)
1455 	/* 1 for '?' */
1456 	blen += strlen(ctxt->query) + 1;
1457     blen += strlen(method) + strlen(ctxt->path) + 24;
1458 #ifdef LIBXML_ZLIB_ENABLED
1459     /* reserve for possible 'Accept-Encoding: gzip' string */
1460     blen += 23;
1461 #endif
1462     if (ctxt->port != 80) {
1463 	/* reserve space for ':xxxxx', incl. potential proxy */
1464 	if (use_proxy)
1465 	    blen += 17;
1466 	else
1467 	    blen += 11;
1468     }
1469     bp = (char*)xmlMallocAtomic(blen);
1470     if ( bp == NULL ) {
1471         xmlNanoHTTPFreeCtxt( ctxt );
1472 	xmlHTTPErrMemory("allocating header buffer");
1473 	return ( NULL );
1474     }
1475 
1476     p = bp;
1477 
1478     if (use_proxy) {
1479 	if (ctxt->port != 80) {
1480 	    p += snprintf( p, blen - (p - bp), "%s http://%s:%d%s",
1481 			method, ctxt->hostname,
1482 			ctxt->port, ctxt->path );
1483 	}
1484 	else
1485 	    p += snprintf( p, blen - (p - bp), "%s http://%s%s", method,
1486 			ctxt->hostname, ctxt->path);
1487     }
1488     else
1489 	p += snprintf( p, blen - (p - bp), "%s %s", method, ctxt->path);
1490 
1491     if (ctxt->query != NULL)
1492 	p += snprintf( p, blen - (p - bp), "?%s", ctxt->query);
1493 
1494     if (ctxt->port == 80) {
1495         p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n",
1496 		    ctxt->hostname);
1497     } else {
1498         p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s:%d\r\n",
1499 		    ctxt->hostname, ctxt->port);
1500     }
1501 
1502 #ifdef LIBXML_ZLIB_ENABLED
1503     p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n");
1504 #endif
1505 
1506     if (contentType != NULL && *contentType)
1507 	p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType);
1508 
1509     if (headers != NULL)
1510 	p += snprintf( p, blen - (p - bp), "%s", headers );
1511 
1512     if (input != NULL)
1513 	snprintf(p, blen - (p - bp), "Content-Length: %d\r\n\r\n", ilen );
1514     else
1515 	snprintf(p, blen - (p - bp), "\r\n");
1516 
1517 #ifdef DEBUG_HTTP
1518     xmlGenericError(xmlGenericErrorContext,
1519 	    "-> %s%s", use_proxy ? "(Proxy) " : "", bp);
1520     if ((blen -= strlen(bp)+1) < 0)
1521 	xmlGenericError(xmlGenericErrorContext,
1522 		"ERROR: overflowed buffer by %d bytes\n", -blen);
1523 #endif
1524     ctxt->outptr = ctxt->out = bp;
1525     ctxt->state = XML_NANO_HTTP_WRITE;
1526     blen = strlen( ctxt->out );
1527 #ifdef DEBUG_HTTP
1528     xmt_bytes = xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1529     if ( xmt_bytes != blen )
1530         xmlGenericError( xmlGenericErrorContext,
1531 			"xmlNanoHTTPMethodRedir:  Only %d of %d %s %s\n",
1532 			xmt_bytes, blen,
1533 			"bytes of HTTP headers sent to host",
1534 			ctxt->hostname );
1535 #else
1536     xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1537 #endif
1538 
1539     if ( input != NULL ) {
1540 #ifdef DEBUG_HTTP
1541         xmt_bytes = xmlNanoHTTPSend( ctxt, input, ilen );
1542 
1543 	if ( xmt_bytes != ilen )
1544 	    xmlGenericError( xmlGenericErrorContext,
1545 			"xmlNanoHTTPMethodRedir:  Only %d of %d %s %s\n",
1546 			xmt_bytes, ilen,
1547 			"bytes of HTTP content sent to host",
1548 			ctxt->hostname );
1549 #else
1550 	xmlNanoHTTPSend( ctxt, input, ilen );
1551 #endif
1552     }
1553 
1554     ctxt->state = XML_NANO_HTTP_READ;
1555 
1556     while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1557         if (*p == 0) {
1558 	    ctxt->content = ctxt->inrptr;
1559 	    xmlFree(p);
1560 	    break;
1561 	}
1562 	xmlNanoHTTPScanAnswer(ctxt, p);
1563 
1564 #ifdef DEBUG_HTTP
1565 	xmlGenericError(xmlGenericErrorContext, "<- %s\n", p);
1566 #endif
1567         xmlFree(p);
1568     }
1569 
1570     if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1571         (ctxt->returnValue < 400)) {
1572 #ifdef DEBUG_HTTP
1573 	xmlGenericError(xmlGenericErrorContext,
1574 		"\nRedirect to: %s\n", ctxt->location);
1575 #endif
1576 	while ( xmlNanoHTTPRecv(ctxt) > 0 )
1577             ;
1578         if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1579 	    nbRedirects++;
1580 	    if (redirURL != NULL)
1581 		xmlFree(redirURL);
1582 	    redirURL = xmlMemStrdup(ctxt->location);
1583 	    xmlNanoHTTPFreeCtxt(ctxt);
1584 	    goto retry;
1585 	}
1586 	xmlNanoHTTPFreeCtxt(ctxt);
1587 	if (redirURL != NULL) xmlFree(redirURL);
1588 #ifdef DEBUG_HTTP
1589 	xmlGenericError(xmlGenericErrorContext,
1590 		"xmlNanoHTTPMethodRedir: Too many redirects, aborting ...\n");
1591 #endif
1592 	return(NULL);
1593     }
1594 
1595     if (contentType != NULL) {
1596 	if (ctxt->contentType != NULL)
1597 	    *contentType = xmlMemStrdup(ctxt->contentType);
1598 	else
1599 	    *contentType = NULL;
1600     }
1601 
1602     if ((redir != NULL) && (redirURL != NULL)) {
1603 	*redir = redirURL;
1604     } else {
1605 	if (redirURL != NULL)
1606 	    xmlFree(redirURL);
1607 	if (redir != NULL)
1608 	    *redir = NULL;
1609     }
1610 
1611 #ifdef DEBUG_HTTP
1612     if (ctxt->contentType != NULL)
1613 	xmlGenericError(xmlGenericErrorContext,
1614 		"\nCode %d, content-type '%s'\n\n",
1615 	       ctxt->returnValue, ctxt->contentType);
1616     else
1617 	xmlGenericError(xmlGenericErrorContext,
1618 		"\nCode %d, no content-type\n\n",
1619 	       ctxt->returnValue);
1620 #endif
1621 
1622     return((void *) ctxt);
1623 }
1624 
1625 /**
1626  * xmlNanoHTTPMethod:
1627  * @URL:  The URL to load
1628  * @method:  the HTTP method to use
1629  * @input:  the input string if any
1630  * @contentType:  the Content-Type information IN and OUT
1631  * @headers:  the extra headers
1632  * @ilen:  input length
1633  *
1634  * This function try to open a connection to the indicated resource
1635  * via HTTP using the given @method, adding the given extra headers
1636  * and the input buffer for the request content.
1637  *
1638  * Returns NULL in case of failure, otherwise a request handler.
1639  *     The contentType, if provided must be freed by the caller
1640  */
1641 
1642 void*
xmlNanoHTTPMethod(const char * URL,const char * method,const char * input,char ** contentType,const char * headers,int ilen)1643 xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
1644                   char **contentType, const char *headers, int ilen) {
1645     return(xmlNanoHTTPMethodRedir(URL, method, input, contentType,
1646 		                  NULL, headers, ilen));
1647 }
1648 
1649 /**
1650  * xmlNanoHTTPFetch:
1651  * @URL:  The URL to load
1652  * @filename:  the filename where the content should be saved
1653  * @contentType:  if available the Content-Type information will be
1654  *                returned at that location
1655  *
1656  * This function try to fetch the indicated resource via HTTP GET
1657  * and save it's content in the file.
1658  *
1659  * Returns -1 in case of failure, 0 in case of success. The contentType,
1660  *     if provided must be freed by the caller
1661  */
1662 int
xmlNanoHTTPFetch(const char * URL,const char * filename,char ** contentType)1663 xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1664     void *ctxt = NULL;
1665     char *buf = NULL;
1666     int fd;
1667     int len;
1668     int ret = 0;
1669 
1670     if (filename == NULL) return(-1);
1671     ctxt = xmlNanoHTTPOpen(URL, contentType);
1672     if (ctxt == NULL) return(-1);
1673 
1674     if (!strcmp(filename, "-"))
1675         fd = 0;
1676     else {
1677         fd = open(filename, O_CREAT | O_WRONLY, 00644);
1678 	if (fd < 0) {
1679 	    xmlNanoHTTPClose(ctxt);
1680 	    if ((contentType != NULL) && (*contentType != NULL)) {
1681 	        xmlFree(*contentType);
1682 		*contentType = NULL;
1683 	    }
1684 	    return(-1);
1685 	}
1686     }
1687 
1688     xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1689     if ( len > 0 ) {
1690 	if (write(fd, buf, len) == -1) {
1691 	    ret = -1;
1692 	}
1693     }
1694 
1695     xmlNanoHTTPClose(ctxt);
1696     close(fd);
1697     return(ret);
1698 }
1699 
1700 #ifdef LIBXML_OUTPUT_ENABLED
1701 /**
1702  * xmlNanoHTTPSave:
1703  * @ctxt:  the HTTP context
1704  * @filename:  the filename where the content should be saved
1705  *
1706  * This function saves the output of the HTTP transaction to a file
1707  * It closes and free the context at the end
1708  *
1709  * Returns -1 in case of failure, 0 in case of success.
1710  */
1711 int
xmlNanoHTTPSave(void * ctxt,const char * filename)1712 xmlNanoHTTPSave(void *ctxt, const char *filename) {
1713     char *buf = NULL;
1714     int fd;
1715     int len;
1716     int ret = 0;
1717 
1718     if ((ctxt == NULL) || (filename == NULL)) return(-1);
1719 
1720     if (!strcmp(filename, "-"))
1721         fd = 0;
1722     else {
1723         fd = open(filename, O_CREAT | O_WRONLY, 0666);
1724 	if (fd < 0) {
1725 	    xmlNanoHTTPClose(ctxt);
1726 	    return(-1);
1727 	}
1728     }
1729 
1730     xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1731     if ( len > 0 ) {
1732 	if (write(fd, buf, len) == -1) {
1733 	    ret = -1;
1734 	}
1735     }
1736 
1737     xmlNanoHTTPClose(ctxt);
1738     close(fd);
1739     return(ret);
1740 }
1741 #endif /* LIBXML_OUTPUT_ENABLED */
1742 
1743 /**
1744  * xmlNanoHTTPReturnCode:
1745  * @ctx:  the HTTP context
1746  *
1747  * Get the latest HTTP return code received
1748  *
1749  * Returns the HTTP return code for the request.
1750  */
1751 int
xmlNanoHTTPReturnCode(void * ctx)1752 xmlNanoHTTPReturnCode(void *ctx) {
1753     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1754 
1755     if (ctxt == NULL) return(-1);
1756 
1757     return(ctxt->returnValue);
1758 }
1759 
1760 /**
1761  * xmlNanoHTTPAuthHeader:
1762  * @ctx:  the HTTP context
1763  *
1764  * Get the authentication header of an HTTP context
1765  *
1766  * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate
1767  * header.
1768  */
1769 const char *
xmlNanoHTTPAuthHeader(void * ctx)1770 xmlNanoHTTPAuthHeader(void *ctx) {
1771     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1772 
1773     if (ctxt == NULL) return(NULL);
1774 
1775     return(ctxt->authHeader);
1776 }
1777 
1778 /**
1779  * xmlNanoHTTPContentLength:
1780  * @ctx:  the HTTP context
1781  *
1782  * Provides the specified content length from the HTTP header.
1783  *
1784  * Return the specified content length from the HTTP header.  Note that
1785  * a value of -1 indicates that the content length element was not included in
1786  * the response header.
1787  */
1788 int
xmlNanoHTTPContentLength(void * ctx)1789 xmlNanoHTTPContentLength( void * ctx ) {
1790     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1791 
1792     return ( ( ctxt == NULL ) ? -1 : ctxt->ContentLength );
1793 }
1794 
1795 /**
1796  * xmlNanoHTTPRedir:
1797  * @ctx:  the HTTP context
1798  *
1799  * Provides the specified redirection URL if available from the HTTP header.
1800  *
1801  * Return the specified redirection URL or NULL if not redirected.
1802  */
1803 const char *
xmlNanoHTTPRedir(void * ctx)1804 xmlNanoHTTPRedir( void * ctx ) {
1805     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1806 
1807     return ( ( ctxt == NULL ) ? NULL : ctxt->location );
1808 }
1809 
1810 /**
1811  * xmlNanoHTTPEncoding:
1812  * @ctx:  the HTTP context
1813  *
1814  * Provides the specified encoding if specified in the HTTP headers.
1815  *
1816  * Return the specified encoding or NULL if not available
1817  */
1818 const char *
xmlNanoHTTPEncoding(void * ctx)1819 xmlNanoHTTPEncoding( void * ctx ) {
1820     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1821 
1822     return ( ( ctxt == NULL ) ? NULL : ctxt->encoding );
1823 }
1824 
1825 /**
1826  * xmlNanoHTTPMimeType:
1827  * @ctx:  the HTTP context
1828  *
1829  * Provides the specified Mime-Type if specified in the HTTP headers.
1830  *
1831  * Return the specified Mime-Type or NULL if not available
1832  */
1833 const char *
xmlNanoHTTPMimeType(void * ctx)1834 xmlNanoHTTPMimeType( void * ctx ) {
1835     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1836 
1837     return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType );
1838 }
1839 
1840 /**
1841  * xmlNanoHTTPFetchContent:
1842  * @ctx:  the HTTP context
1843  * @ptr:  pointer to set to the content buffer.
1844  * @len:  integer pointer to hold the length of the content
1845  *
1846  * Check if all the content was read
1847  *
1848  * Returns 0 if all the content was read and available, returns
1849  * -1 if received content length was less than specified or an error
1850  * occurred.
1851  */
1852 static int
xmlNanoHTTPFetchContent(void * ctx,char ** ptr,int * len)1853 xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ) {
1854     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1855 
1856     int			rc = 0;
1857     int			cur_lgth;
1858     int			rcvd_lgth;
1859     int			dummy_int;
1860     char *		dummy_ptr = NULL;
1861 
1862     /*  Dummy up return input parameters if not provided  */
1863 
1864     if ( len == NULL )
1865         len = &dummy_int;
1866 
1867     if ( ptr == NULL )
1868         ptr = &dummy_ptr;
1869 
1870     /*  But can't work without the context pointer  */
1871 
1872     if ( ( ctxt == NULL ) || ( ctxt->content == NULL ) ) {
1873         *len = 0;
1874 	*ptr = NULL;
1875 	return ( -1 );
1876     }
1877 
1878     rcvd_lgth = ctxt->inptr - ctxt->content;
1879 
1880     while ( (cur_lgth = xmlNanoHTTPRecv( ctxt )) > 0 ) {
1881 
1882 	rcvd_lgth += cur_lgth;
1883 	if ( (ctxt->ContentLength > 0) && (rcvd_lgth >= ctxt->ContentLength) )
1884 	    break;
1885     }
1886 
1887     *ptr = ctxt->content;
1888     *len = rcvd_lgth;
1889 
1890     if ( ( ctxt->ContentLength > 0 ) && ( rcvd_lgth < ctxt->ContentLength ) )
1891         rc = -1;
1892     else if ( rcvd_lgth == 0 )
1893 	rc = -1;
1894 
1895     return ( rc );
1896 }
1897 
1898 #ifdef STANDALONE
main(int argc,char ** argv)1899 int main(int argc, char **argv) {
1900     char *contentType = NULL;
1901 
1902     if (argv[1] != NULL) {
1903 	if (argv[2] != NULL)
1904 	    xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1905         else
1906 	    xmlNanoHTTPFetch(argv[1], "-", &contentType);
1907 	if (contentType != NULL) xmlFree(contentType);
1908     } else {
1909         xmlGenericError(xmlGenericErrorContext,
1910 		"%s: minimal HTTP GET implementation\n", argv[0]);
1911         xmlGenericError(xmlGenericErrorContext,
1912 		"\tusage %s [ URL [ filename ] ]\n", argv[0]);
1913     }
1914     xmlNanoHTTPCleanup();
1915     xmlMemoryDump();
1916     return(0);
1917 }
1918 #endif /* STANDALONE */
1919 #else /* !LIBXML_HTTP_ENABLED */
1920 #ifdef STANDALONE
1921 #include <stdio.h>
main(int argc,char ** argv)1922 int main(int argc, char **argv) {
1923     xmlGenericError(xmlGenericErrorContext,
1924 	    "%s : HTTP support not compiled in\n", argv[0]);
1925     return(0);
1926 }
1927 #endif /* STANDALONE */
1928 #endif /* LIBXML_HTTP_ENABLED */
1929