1 /*
2 * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3 * focuses on size, streamability, reentrancy and portability
4 *
5 * This is clearly not a general purpose HTTP implementation
6 * If you look for one, check:
7 * http://www.w3.org/Library/
8 *
9 * See Copyright for the status of this software.
10 *
11 * daniel@veillard.com
12 */
13
14 #define IN_LIBXML
15 #include "libxml.h"
16
17 #ifdef LIBXML_HTTP_ENABLED
18 #include <string.h>
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <errno.h>
22
23 #ifdef HAVE_UNISTD_H
24 #include <unistd.h>
25 #elif defined (_WIN32)
26 #include <io.h>
27 #endif
28 #ifdef HAVE_SYS_SOCKET_H
29 #include <sys/socket.h>
30 #endif
31 #ifdef HAVE_NETINET_IN_H
32 #include <netinet/in.h>
33 #endif
34 #ifdef HAVE_ARPA_INET_H
35 #include <arpa/inet.h>
36 #endif
37 #ifdef HAVE_NETDB_H
38 #include <netdb.h>
39 #endif
40 #ifdef HAVE_FCNTL_H
41 #include <fcntl.h>
42 #endif
43 #ifdef HAVE_SYS_TIME_H
44 #include <sys/time.h>
45 #endif
46 #ifndef HAVE_POLL_H
47 #ifdef HAVE_SYS_SELECT_H
48 #include <sys/select.h>
49 #endif
50 #else
51 #include <poll.h>
52 #endif
53 #ifdef LIBXML_ZLIB_ENABLED
54 #include <zlib.h>
55 #endif
56
57
58 #ifdef VMS
59 #include <stropts>
60 #define XML_SOCKLEN_T unsigned int
61 #endif
62
63 #if defined(_WIN32)
64 #include <wsockcompat.h>
65 #endif
66
67 #include <libxml/xmlerror.h>
68 #include <libxml/xmlmemory.h>
69 #include <libxml/parser.h> /* for xmlStr(n)casecmp() */
70 #include <libxml/nanohttp.h>
71 #include <libxml/uri.h>
72
73 #include "private/error.h"
74 #include "private/io.h"
75
76 /**
77 * A couple portability macros
78 */
79 #ifndef _WINSOCKAPI_
80 #define closesocket(s) close(s)
81 #define SOCKET int
82 #define INVALID_SOCKET (-1)
83 #endif
84
85 #ifndef XML_SOCKLEN_T
86 #define XML_SOCKLEN_T unsigned int
87 #endif
88
89 #define GETHOSTBYNAME_ARG_CAST (char *)
90 #define SEND_ARG2_CAST (char *)
91
92 #ifdef STANDALONE
93 #define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n)
94 #define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b)
95 #endif
96
97 #define XML_NANO_HTTP_MAX_REDIR 10
98
99 #define XML_NANO_HTTP_CHUNK 4096
100
101 #define XML_NANO_HTTP_CLOSED 0
102 #define XML_NANO_HTTP_WRITE 1
103 #define XML_NANO_HTTP_READ 2
104 #define XML_NANO_HTTP_NONE 4
105
106 #define __xmlIOErr(domain, code, extra) ((void) 0)
107
108 typedef struct xmlNanoHTTPCtxt {
109 char *protocol; /* the protocol name */
110 char *hostname; /* the host name */
111 int port; /* the port */
112 char *path; /* the path within the URL */
113 char *query; /* the query string */
114 SOCKET fd; /* the file descriptor for the socket */
115 int state; /* WRITE / READ / CLOSED */
116 char *out; /* buffer sent (zero terminated) */
117 char *outptr; /* index within the buffer sent */
118 char *in; /* the receiving buffer */
119 char *content; /* the start of the content */
120 char *inptr; /* the next byte to read from network */
121 char *inrptr; /* the next byte to give back to the client */
122 int inlen; /* len of the input buffer */
123 int last; /* return code for last operation */
124 int returnValue; /* the protocol return value */
125 int version; /* the protocol version */
126 int ContentLength; /* specified content length from HTTP header */
127 char *contentType; /* the MIME type for the input */
128 char *location; /* the new URL in case of redirect */
129 char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */
130 char *encoding; /* encoding extracted from the contentType */
131 char *mimeType; /* Mime-Type extracted from the contentType */
132 #ifdef LIBXML_ZLIB_ENABLED
133 z_stream *strm; /* Zlib stream object */
134 int usesGzip; /* "Content-Encoding: gzip" was detected */
135 #endif
136 } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
137
138 static int initialized = 0;
139 static char *proxy = NULL; /* the proxy name if any */
140 static int proxyPort; /* the proxy port if any */
141 static unsigned int timeout = 60;/* the select() timeout in seconds */
142
143 static int xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len );
144
145 /**
146 * xmlHTTPErrMemory:
147 * @extra: extra information
148 *
149 * Handle an out of memory condition
150 */
151 static void
xmlHTTPErrMemory(void)152 xmlHTTPErrMemory(void)
153 {
154 xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_HTTP, NULL);
155 }
156
157 /**
158 * A portability function
159 */
socket_errno(void)160 static int socket_errno(void) {
161 #ifdef _WINSOCKAPI_
162 int err = WSAGetLastError();
163 switch(err) {
164 case WSAECONNRESET:
165 return(ECONNRESET);
166 case WSAEINPROGRESS:
167 return(EINPROGRESS);
168 case WSAEINTR:
169 return(EINTR);
170 case WSAESHUTDOWN:
171 return(ESHUTDOWN);
172 case WSAEWOULDBLOCK:
173 return(EWOULDBLOCK);
174 default:
175 return(err);
176 }
177 #else
178 return(errno);
179 #endif
180 }
181
182 /**
183 * xmlNanoHTTPInit:
184 *
185 * Initialize the HTTP protocol layer.
186 * Currently it just checks for proxy information
187 */
188
189 void
xmlNanoHTTPInit(void)190 xmlNanoHTTPInit(void) {
191 const char *env;
192 #ifdef _WINSOCKAPI_
193 WSADATA wsaData;
194 #endif
195
196 if (initialized)
197 return;
198
199 #ifdef _WINSOCKAPI_
200 if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0)
201 return;
202 #endif
203
204 if (proxy == NULL) {
205 proxyPort = 80;
206 env = getenv("no_proxy");
207 if (env && ((env[0] == '*') && (env[1] == 0)))
208 goto done;
209 env = getenv("http_proxy");
210 if (env != NULL) {
211 xmlNanoHTTPScanProxy(env);
212 goto done;
213 }
214 env = getenv("HTTP_PROXY");
215 if (env != NULL) {
216 xmlNanoHTTPScanProxy(env);
217 goto done;
218 }
219 }
220 done:
221 initialized = 1;
222 }
223
224 /**
225 * xmlNanoHTTPCleanup:
226 *
227 * Cleanup the HTTP protocol layer.
228 */
229
230 void
xmlNanoHTTPCleanup(void)231 xmlNanoHTTPCleanup(void) {
232 if (proxy != NULL) {
233 xmlFree(proxy);
234 proxy = NULL;
235 }
236 #ifdef _WINSOCKAPI_
237 if (initialized)
238 WSACleanup();
239 #endif
240 initialized = 0;
241 return;
242 }
243
244 /**
245 * xmlNanoHTTPScanURL:
246 * @ctxt: an HTTP context
247 * @URL: The URL used to initialize the context
248 *
249 * (Re)Initialize an HTTP context by parsing the URL and finding
250 * the protocol host port and path it indicates.
251 */
252
253 static void
xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt,const char * URL)254 xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
255 xmlURIPtr uri;
256 int len;
257
258 /*
259 * Clear any existing data from the context
260 */
261 if (ctxt->protocol != NULL) {
262 xmlFree(ctxt->protocol);
263 ctxt->protocol = NULL;
264 }
265 if (ctxt->hostname != NULL) {
266 xmlFree(ctxt->hostname);
267 ctxt->hostname = NULL;
268 }
269 if (ctxt->path != NULL) {
270 xmlFree(ctxt->path);
271 ctxt->path = NULL;
272 }
273 if (ctxt->query != NULL) {
274 xmlFree(ctxt->query);
275 ctxt->query = NULL;
276 }
277 if (URL == NULL) return;
278
279 uri = xmlParseURIRaw(URL, 1);
280 if (uri == NULL)
281 return;
282
283 if ((uri->scheme == NULL) || (uri->server == NULL)) {
284 xmlFreeURI(uri);
285 return;
286 }
287
288 ctxt->protocol = xmlMemStrdup(uri->scheme);
289 /* special case of IPv6 addresses, the [] need to be removed */
290 if ((uri->server != NULL) && (*uri->server == '[')) {
291 len = strlen(uri->server);
292 if ((len > 2) && (uri->server[len - 1] == ']')) {
293 ctxt->hostname = (char *) xmlCharStrndup(uri->server + 1, len -2);
294 } else
295 ctxt->hostname = xmlMemStrdup(uri->server);
296 } else
297 ctxt->hostname = xmlMemStrdup(uri->server);
298 if (uri->path != NULL)
299 ctxt->path = xmlMemStrdup(uri->path);
300 else
301 ctxt->path = xmlMemStrdup("/");
302 if (uri->query != NULL)
303 ctxt->query = xmlMemStrdup(uri->query);
304 if (uri->port != 0)
305 ctxt->port = uri->port;
306
307 xmlFreeURI(uri);
308 }
309
310 /**
311 * xmlNanoHTTPScanProxy:
312 * @URL: The proxy URL used to initialize the proxy context
313 *
314 * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
315 * the protocol host port it indicates.
316 * Should be like http://myproxy/ or http://myproxy:3128/
317 * A NULL URL cleans up proxy information.
318 */
319
320 void
xmlNanoHTTPScanProxy(const char * URL)321 xmlNanoHTTPScanProxy(const char *URL) {
322 xmlURIPtr uri;
323
324 if (proxy != NULL) {
325 xmlFree(proxy);
326 proxy = NULL;
327 }
328 proxyPort = 0;
329
330 if (URL == NULL) return;
331
332 uri = xmlParseURIRaw(URL, 1);
333 if ((uri == NULL) || (uri->scheme == NULL) ||
334 (strcmp(uri->scheme, "http")) || (uri->server == NULL)) {
335 __xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n");
336 if (uri != NULL)
337 xmlFreeURI(uri);
338 return;
339 }
340
341 proxy = xmlMemStrdup(uri->server);
342 if (uri->port != 0)
343 proxyPort = uri->port;
344
345 xmlFreeURI(uri);
346 }
347
348 /**
349 * xmlNanoHTTPNewCtxt:
350 * @URL: The URL used to initialize the context
351 *
352 * Allocate and initialize a new HTTP context.
353 *
354 * Returns an HTTP context or NULL in case of error.
355 */
356
357 static xmlNanoHTTPCtxtPtr
xmlNanoHTTPNewCtxt(const char * URL)358 xmlNanoHTTPNewCtxt(const char *URL) {
359 xmlNanoHTTPCtxtPtr ret;
360
361 ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
362 if (ret == NULL) {
363 xmlHTTPErrMemory();
364 return(NULL);
365 }
366
367 memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
368 ret->port = 80;
369 ret->returnValue = 0;
370 ret->fd = INVALID_SOCKET;
371 ret->ContentLength = -1;
372
373 xmlNanoHTTPScanURL(ret, URL);
374
375 return(ret);
376 }
377
378 /**
379 * xmlNanoHTTPFreeCtxt:
380 * @ctxt: an HTTP context
381 *
382 * Frees the context after closing the connection.
383 */
384
385 static void
xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt)386 xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
387 if (ctxt == NULL) return;
388 if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
389 if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
390 if (ctxt->path != NULL) xmlFree(ctxt->path);
391 if (ctxt->query != NULL) xmlFree(ctxt->query);
392 if (ctxt->out != NULL) xmlFree(ctxt->out);
393 if (ctxt->in != NULL) xmlFree(ctxt->in);
394 if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
395 if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
396 if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
397 if (ctxt->location != NULL) xmlFree(ctxt->location);
398 if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
399 #ifdef LIBXML_ZLIB_ENABLED
400 if (ctxt->strm != NULL) {
401 inflateEnd(ctxt->strm);
402 xmlFree(ctxt->strm);
403 }
404 #endif
405
406 ctxt->state = XML_NANO_HTTP_NONE;
407 if (ctxt->fd != INVALID_SOCKET) closesocket(ctxt->fd);
408 ctxt->fd = INVALID_SOCKET;
409 xmlFree(ctxt);
410 }
411
412 /**
413 * xmlNanoHTTPSend:
414 * @ctxt: an HTTP context
415 *
416 * Send the input needed to initiate the processing on the server side
417 * Returns number of bytes sent or -1 on error.
418 */
419
420 static int
xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt,const char * xmt_ptr,int outlen)421 xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt, const char *xmt_ptr, int outlen)
422 {
423 int total_sent = 0;
424 #ifdef HAVE_POLL_H
425 struct pollfd p;
426 #else
427 struct timeval tv;
428 fd_set wfd;
429 #endif
430
431 if ((ctxt->state & XML_NANO_HTTP_WRITE) && (xmt_ptr != NULL)) {
432 while (total_sent < outlen) {
433 int nsent = send(ctxt->fd, SEND_ARG2_CAST (xmt_ptr + total_sent),
434 outlen - total_sent, 0);
435
436 if (nsent > 0)
437 total_sent += nsent;
438 else if ((nsent == -1) &&
439 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
440 (socket_errno() != EAGAIN) &&
441 #endif
442 (socket_errno() != EWOULDBLOCK)) {
443 __xmlIOErr(XML_FROM_HTTP, 0, "send failed\n");
444 if (total_sent == 0)
445 total_sent = -1;
446 break;
447 } else {
448 /*
449 * No data sent
450 * Since non-blocking sockets are used, wait for
451 * socket to be writable or default timeout prior
452 * to retrying.
453 */
454 #ifndef HAVE_POLL_H
455 #ifndef _WINSOCKAPI_
456 if (ctxt->fd > FD_SETSIZE)
457 return -1;
458 #endif
459
460 tv.tv_sec = timeout;
461 tv.tv_usec = 0;
462 FD_ZERO(&wfd);
463 #ifdef _MSC_VER
464 #pragma warning(push)
465 #pragma warning(disable: 4018)
466 #endif
467 FD_SET(ctxt->fd, &wfd);
468 #ifdef _MSC_VER
469 #pragma warning(pop)
470 #endif
471 (void) select(ctxt->fd + 1, NULL, &wfd, NULL, &tv);
472 #else
473 p.fd = ctxt->fd;
474 p.events = POLLOUT;
475 (void) poll(&p, 1, timeout * 1000);
476 #endif /* !HAVE_POLL_H */
477 }
478 }
479 }
480
481 return total_sent;
482 }
483
484 /**
485 * xmlNanoHTTPRecv:
486 * @ctxt: an HTTP context
487 *
488 * Read information coming from the HTTP connection.
489 * This is a blocking call (but it blocks in select(), not read()).
490 *
491 * Returns the number of byte read or -1 in case of error.
492 */
493
494 static int
xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)495 xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)
496 {
497 #ifdef HAVE_POLL_H
498 struct pollfd p;
499 #else
500 fd_set rfd;
501 struct timeval tv;
502 #endif
503
504
505 while (ctxt->state & XML_NANO_HTTP_READ) {
506 if (ctxt->in == NULL) {
507 ctxt->in = (char *) xmlMallocAtomic(65000);
508 if (ctxt->in == NULL) {
509 xmlHTTPErrMemory();
510 ctxt->last = -1;
511 return (-1);
512 }
513 ctxt->inlen = 65000;
514 ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
515 }
516 if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
517 int delta = ctxt->inrptr - ctxt->in;
518 int len = ctxt->inptr - ctxt->inrptr;
519
520 memmove(ctxt->in, ctxt->inrptr, len);
521 ctxt->inrptr -= delta;
522 ctxt->content -= delta;
523 ctxt->inptr -= delta;
524 }
525 if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
526 int d_inptr = ctxt->inptr - ctxt->in;
527 int d_content = ctxt->content - ctxt->in;
528 int d_inrptr = ctxt->inrptr - ctxt->in;
529 char *tmp_ptr = ctxt->in;
530
531 ctxt->inlen *= 2;
532 ctxt->in = (char *) xmlRealloc(tmp_ptr, ctxt->inlen);
533 if (ctxt->in == NULL) {
534 xmlHTTPErrMemory();
535 xmlFree(tmp_ptr);
536 ctxt->last = -1;
537 return (-1);
538 }
539 ctxt->inptr = ctxt->in + d_inptr;
540 ctxt->content = ctxt->in + d_content;
541 ctxt->inrptr = ctxt->in + d_inrptr;
542 }
543 ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0);
544 if (ctxt->last > 0) {
545 ctxt->inptr += ctxt->last;
546 return (ctxt->last);
547 }
548 if (ctxt->last == 0) {
549 return (0);
550 }
551 if (ctxt->last == -1) {
552 switch (socket_errno()) {
553 case EINPROGRESS:
554 case EWOULDBLOCK:
555 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
556 case EAGAIN:
557 #endif
558 break;
559
560 case ECONNRESET:
561 case ESHUTDOWN:
562 return (0);
563
564 default:
565 __xmlIOErr(XML_FROM_HTTP, 0, "recv failed\n");
566 return (-1);
567 }
568 }
569 #ifdef HAVE_POLL_H
570 p.fd = ctxt->fd;
571 p.events = POLLIN;
572 if ((poll(&p, 1, timeout * 1000) < 1)
573 #if defined(EINTR)
574 && (errno != EINTR)
575 #endif
576 )
577 return (0);
578 #else /* !HAVE_POLL_H */
579 #ifndef _WINSOCKAPI_
580 if (ctxt->fd > FD_SETSIZE)
581 return 0;
582 #endif
583
584 tv.tv_sec = timeout;
585 tv.tv_usec = 0;
586 FD_ZERO(&rfd);
587
588 #ifdef _MSC_VER
589 #pragma warning(push)
590 #pragma warning(disable: 4018)
591 #endif
592
593 FD_SET(ctxt->fd, &rfd);
594
595 #ifdef _MSC_VER
596 #pragma warning(pop)
597 #endif
598
599 if ((select(ctxt->fd + 1, &rfd, NULL, NULL, &tv) < 1)
600 #if defined(EINTR)
601 && (socket_errno() != EINTR)
602 #endif
603 )
604 return (0);
605 #endif /* !HAVE_POLL_H */
606 }
607 return (0);
608 }
609
610 /**
611 * xmlNanoHTTPReadLine:
612 * @ctxt: an HTTP context
613 *
614 * Read one line in the HTTP server output, usually for extracting
615 * the HTTP protocol information from the answer header.
616 *
617 * Returns a newly allocated string with a copy of the line, or NULL
618 * which indicate the end of the input.
619 */
620
621 static char *
xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt)622 xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
623 char buf[4096];
624 char *bp = buf;
625 int rc;
626
627 while (bp - buf < 4095) {
628 if (ctxt->inrptr == ctxt->inptr) {
629 if ( (rc = xmlNanoHTTPRecv(ctxt)) == 0) {
630 if (bp == buf)
631 return(NULL);
632 else
633 *bp = 0;
634 return(xmlMemStrdup(buf));
635 }
636 else if ( rc == -1 ) {
637 return ( NULL );
638 }
639 }
640 *bp = *ctxt->inrptr++;
641 if (*bp == '\n') {
642 *bp = 0;
643 return(xmlMemStrdup(buf));
644 }
645 if (*bp != '\r')
646 bp++;
647 }
648 buf[4095] = 0;
649 return(xmlMemStrdup(buf));
650 }
651
652
653 /**
654 * xmlNanoHTTPScanAnswer:
655 * @ctxt: an HTTP context
656 * @line: an HTTP header line
657 *
658 * Try to extract useful information from the server answer.
659 * We currently parse and process:
660 * - The HTTP revision/ return code
661 * - The Content-Type, Mime-Type and charset used
662 * - The Location for redirect processing.
663 *
664 * Returns -1 in case of failure, the file descriptor number otherwise
665 */
666
667 static void
xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt,const char * line)668 xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
669 const char *cur = line;
670
671 if (line == NULL) return;
672
673 if (!strncmp(line, "HTTP/", 5)) {
674 int version = 0;
675 int ret = 0;
676
677 cur += 5;
678 while ((*cur >= '0') && (*cur <= '9')) {
679 version *= 10;
680 version += *cur - '0';
681 cur++;
682 }
683 if (*cur == '.') {
684 cur++;
685 if ((*cur >= '0') && (*cur <= '9')) {
686 version *= 10;
687 version += *cur - '0';
688 cur++;
689 }
690 while ((*cur >= '0') && (*cur <= '9'))
691 cur++;
692 } else
693 version *= 10;
694 if ((*cur != ' ') && (*cur != '\t')) return;
695 while ((*cur == ' ') || (*cur == '\t')) cur++;
696 if ((*cur < '0') || (*cur > '9')) return;
697 while ((*cur >= '0') && (*cur <= '9')) {
698 ret *= 10;
699 ret += *cur - '0';
700 cur++;
701 }
702 if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
703 ctxt->returnValue = ret;
704 ctxt->version = version;
705 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
706 const xmlChar *charset, *last, *mime;
707 cur += 13;
708 while ((*cur == ' ') || (*cur == '\t')) cur++;
709 if (ctxt->contentType != NULL)
710 xmlFree(ctxt->contentType);
711 ctxt->contentType = xmlMemStrdup(cur);
712 mime = (const xmlChar *) cur;
713 last = mime;
714 while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
715 (*last != ';') && (*last != ','))
716 last++;
717 if (ctxt->mimeType != NULL)
718 xmlFree(ctxt->mimeType);
719 ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
720 charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
721 if (charset != NULL) {
722 charset += 8;
723 last = charset;
724 while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
725 (*last != ';') && (*last != ','))
726 last++;
727 if (ctxt->encoding != NULL)
728 xmlFree(ctxt->encoding);
729 ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
730 }
731 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
732 const xmlChar *charset, *last, *mime;
733 cur += 12;
734 if (ctxt->contentType != NULL) return;
735 while ((*cur == ' ') || (*cur == '\t')) cur++;
736 ctxt->contentType = xmlMemStrdup(cur);
737 mime = (const xmlChar *) cur;
738 last = mime;
739 while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
740 (*last != ';') && (*last != ','))
741 last++;
742 if (ctxt->mimeType != NULL)
743 xmlFree(ctxt->mimeType);
744 ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
745 charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
746 if (charset != NULL) {
747 charset += 8;
748 last = charset;
749 while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
750 (*last != ';') && (*last != ','))
751 last++;
752 if (ctxt->encoding != NULL)
753 xmlFree(ctxt->encoding);
754 ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
755 }
756 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
757 cur += 9;
758 while ((*cur == ' ') || (*cur == '\t')) cur++;
759 if (ctxt->location != NULL)
760 xmlFree(ctxt->location);
761 if (*cur == '/') {
762 xmlChar *tmp_http = xmlStrdup(BAD_CAST "http://");
763 xmlChar *tmp_loc =
764 xmlStrcat(tmp_http, (const xmlChar *) ctxt->hostname);
765 ctxt->location =
766 (char *) xmlStrcat (tmp_loc, (const xmlChar *) cur);
767 } else {
768 ctxt->location = xmlMemStrdup(cur);
769 }
770 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) {
771 cur += 17;
772 while ((*cur == ' ') || (*cur == '\t')) cur++;
773 if (ctxt->authHeader != NULL)
774 xmlFree(ctxt->authHeader);
775 ctxt->authHeader = xmlMemStrdup(cur);
776 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) {
777 cur += 19;
778 while ((*cur == ' ') || (*cur == '\t')) cur++;
779 if (ctxt->authHeader != NULL)
780 xmlFree(ctxt->authHeader);
781 ctxt->authHeader = xmlMemStrdup(cur);
782 #ifdef LIBXML_ZLIB_ENABLED
783 } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) {
784 cur += 17;
785 while ((*cur == ' ') || (*cur == '\t')) cur++;
786 if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) {
787 ctxt->usesGzip = 1;
788
789 ctxt->strm = xmlMalloc(sizeof(z_stream));
790
791 if (ctxt->strm != NULL) {
792 ctxt->strm->zalloc = Z_NULL;
793 ctxt->strm->zfree = Z_NULL;
794 ctxt->strm->opaque = Z_NULL;
795 ctxt->strm->avail_in = 0;
796 ctxt->strm->next_in = Z_NULL;
797
798 inflateInit2( ctxt->strm, 31 );
799 }
800 }
801 #endif
802 } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) {
803 cur += 15;
804 ctxt->ContentLength = strtol( cur, NULL, 10 );
805 }
806 }
807
808 /**
809 * xmlNanoHTTPConnectAttempt:
810 * @addr: a socket address structure
811 *
812 * Attempt a connection to the given IP:port endpoint. It forces
813 * non-blocking semantic on the socket, and allow 60 seconds for
814 * the host to answer.
815 *
816 * Returns -1 in case of failure, the file descriptor number otherwise
817 */
818
819 static SOCKET
xmlNanoHTTPConnectAttempt(struct sockaddr * addr)820 xmlNanoHTTPConnectAttempt(struct sockaddr *addr)
821 {
822 #ifndef HAVE_POLL_H
823 fd_set wfd;
824 #ifdef _WINSOCKAPI_
825 fd_set xfd;
826 #endif
827 struct timeval tv;
828 #else /* !HAVE_POLL_H */
829 struct pollfd p;
830 #endif /* !HAVE_POLL_H */
831 int status;
832
833 int addrlen;
834
835 SOCKET s;
836
837 #ifdef SUPPORT_IP6
838 if (addr->sa_family == AF_INET6) {
839 s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
840 addrlen = sizeof(struct sockaddr_in6);
841 } else
842 #endif
843 {
844 s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
845 addrlen = sizeof(struct sockaddr_in);
846 }
847 if (s == INVALID_SOCKET) {
848 __xmlIOErr(XML_FROM_HTTP, 0, "socket failed\n");
849 return INVALID_SOCKET;
850 }
851 #ifdef _WINSOCKAPI_
852 {
853 u_long one = 1;
854
855 status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
856 }
857 #else /* _WINSOCKAPI_ */
858 #if defined(VMS)
859 {
860 int enable = 1;
861
862 status = ioctl(s, FIONBIO, &enable);
863 }
864 #else /* VMS */
865 if ((status = fcntl(s, F_GETFL, 0)) != -1) {
866 #ifdef O_NONBLOCK
867 status |= O_NONBLOCK;
868 #else /* O_NONBLOCK */
869 #ifdef F_NDELAY
870 status |= F_NDELAY;
871 #endif /* F_NDELAY */
872 #endif /* !O_NONBLOCK */
873 status = fcntl(s, F_SETFL, status);
874 }
875 if (status < 0) {
876 __xmlIOErr(XML_FROM_HTTP, 0, "error setting non-blocking IO\n");
877 closesocket(s);
878 return INVALID_SOCKET;
879 }
880 #endif /* !VMS */
881 #endif /* !_WINSOCKAPI_ */
882
883 if (connect(s, addr, addrlen) == -1) {
884 switch (socket_errno()) {
885 case EINPROGRESS:
886 case EWOULDBLOCK:
887 break;
888 default:
889 __xmlIOErr(XML_FROM_HTTP, 0,
890 "error connecting to HTTP server");
891 closesocket(s);
892 return INVALID_SOCKET;
893 }
894 }
895 #ifndef HAVE_POLL_H
896 tv.tv_sec = timeout;
897 tv.tv_usec = 0;
898
899 #ifdef _MSC_VER
900 #pragma warning(push)
901 #pragma warning(disable: 4018)
902 #endif
903 #ifndef _WINSOCKAPI_
904 if (s > FD_SETSIZE)
905 return INVALID_SOCKET;
906 #endif
907 FD_ZERO(&wfd);
908 FD_SET(s, &wfd);
909
910 #ifdef _WINSOCKAPI_
911 FD_ZERO(&xfd);
912 FD_SET(s, &xfd);
913
914 switch (select(s + 1, NULL, &wfd, &xfd, &tv))
915 #else
916 switch (select(s + 1, NULL, &wfd, NULL, &tv))
917 #endif
918 #ifdef _MSC_VER
919 #pragma warning(pop)
920 #endif
921
922 #else /* !HAVE_POLL_H */
923 p.fd = s;
924 p.events = POLLOUT;
925 switch (poll(&p, 1, timeout * 1000))
926 #endif /* !HAVE_POLL_H */
927
928 {
929 case 0:
930 /* Time out */
931 __xmlIOErr(XML_FROM_HTTP, 0, "Connect attempt timed out");
932 closesocket(s);
933 return INVALID_SOCKET;
934 case -1:
935 /* Ermm.. ?? */
936 __xmlIOErr(XML_FROM_HTTP, 0, "Connect failed");
937 closesocket(s);
938 return INVALID_SOCKET;
939 }
940
941 #ifndef HAVE_POLL_H
942 if (FD_ISSET(s, &wfd)
943 #ifdef _WINSOCKAPI_
944 || FD_ISSET(s, &xfd)
945 #endif
946 )
947 #else /* !HAVE_POLL_H */
948 if (p.revents == POLLOUT)
949 #endif /* !HAVE_POLL_H */
950 {
951 XML_SOCKLEN_T len;
952
953 len = sizeof(status);
954 #ifdef SO_ERROR
955 if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &status, &len) <
956 0) {
957 /* Solaris error code */
958 __xmlIOErr(XML_FROM_HTTP, 0, "getsockopt failed\n");
959 closesocket(s);
960 return INVALID_SOCKET;
961 }
962 #endif
963 if (status) {
964 __xmlIOErr(XML_FROM_HTTP, 0,
965 "Error connecting to remote host");
966 closesocket(s);
967 errno = status;
968 return INVALID_SOCKET;
969 }
970 } else {
971 /* pbm */
972 __xmlIOErr(XML_FROM_HTTP, 0, "select failed\n");
973 closesocket(s);
974 return INVALID_SOCKET;
975 }
976
977 return (s);
978 }
979
980 /**
981 * xmlNanoHTTPConnectHost:
982 * @host: the host name
983 * @port: the port number
984 *
985 * Attempt a connection to the given host:port endpoint. It tries
986 * the multiple IP provided by the DNS if available.
987 *
988 * Returns -1 in case of failure, the file descriptor number otherwise
989 */
990
991 static SOCKET
xmlNanoHTTPConnectHost(const char * host,int port)992 xmlNanoHTTPConnectHost(const char *host, int port)
993 {
994 struct sockaddr *addr = NULL;
995 struct sockaddr_in sockin;
996
997 #ifdef SUPPORT_IP6
998 struct sockaddr_in6 sockin6;
999 #endif
1000 SOCKET s;
1001
1002 memset (&sockin, 0, sizeof(sockin));
1003
1004 #if defined(SUPPORT_IP6)
1005 {
1006 int status;
1007 struct addrinfo hints, *res, *result;
1008
1009 memset (&sockin6, 0, sizeof(sockin6));
1010
1011 result = NULL;
1012 memset (&hints, 0,sizeof(hints));
1013 hints.ai_socktype = SOCK_STREAM;
1014
1015 status = getaddrinfo (host, NULL, &hints, &result);
1016 if (status) {
1017 __xmlIOErr(XML_FROM_HTTP, 0, "getaddrinfo failed\n");
1018 return INVALID_SOCKET;
1019 }
1020
1021 for (res = result; res; res = res->ai_next) {
1022 if (res->ai_family == AF_INET) {
1023 if ((size_t)res->ai_addrlen > sizeof(sockin)) {
1024 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1025 freeaddrinfo (result);
1026 return INVALID_SOCKET;
1027 }
1028 memcpy (&sockin, res->ai_addr, res->ai_addrlen);
1029 sockin.sin_port = htons (port);
1030 addr = (struct sockaddr *)&sockin;
1031 } else if (res->ai_family == AF_INET6) {
1032 if ((size_t)res->ai_addrlen > sizeof(sockin6)) {
1033 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1034 freeaddrinfo (result);
1035 return INVALID_SOCKET;
1036 }
1037 memcpy (&sockin6, res->ai_addr, res->ai_addrlen);
1038 sockin6.sin6_port = htons (port);
1039 addr = (struct sockaddr *)&sockin6;
1040 } else
1041 continue; /* for */
1042
1043 s = xmlNanoHTTPConnectAttempt (addr);
1044 if (s != INVALID_SOCKET) {
1045 freeaddrinfo (result);
1046 return (s);
1047 }
1048 }
1049
1050 if (result)
1051 freeaddrinfo (result);
1052 }
1053 #else
1054 {
1055 struct hostent *h;
1056 struct in_addr ia;
1057 int i;
1058
1059 h = gethostbyname (GETHOSTBYNAME_ARG_CAST host);
1060 if (h == NULL) {
1061
1062 /*
1063 * Okay, I got fed up by the non-portability of this error message
1064 * extraction code. it work on Linux, if it work on your platform
1065 * and one want to enable it, send me the defined(foobar) needed
1066 */
1067 #if defined(HAVE_NETDB_H) && defined(HOST_NOT_FOUND) && defined(__linux__)
1068 const char *h_err_txt = "";
1069
1070 switch (h_errno) {
1071 case HOST_NOT_FOUND:
1072 h_err_txt = "Authoritative host not found";
1073 break;
1074
1075 case TRY_AGAIN:
1076 h_err_txt =
1077 "Non-authoritative host not found or server failure.";
1078 break;
1079
1080 case NO_RECOVERY:
1081 h_err_txt =
1082 "Non-recoverable errors: FORMERR, REFUSED, or NOTIMP.";
1083 break;
1084
1085 #ifdef NO_ADDRESS
1086 case NO_ADDRESS:
1087 h_err_txt =
1088 "Valid name, no data record of requested type.";
1089 break;
1090 #endif
1091
1092 default:
1093 h_err_txt = "No error text defined.";
1094 break;
1095 }
1096 __xmlIOErr(XML_FROM_HTTP, 0, h_err_txt);
1097 #else
1098 __xmlIOErr(XML_FROM_HTTP, 0, "Failed to resolve host");
1099 #endif
1100 return INVALID_SOCKET;
1101 }
1102
1103 for (i = 0; h->h_addr_list[i]; i++) {
1104 if (h->h_addrtype == AF_INET) {
1105 /* A records (IPv4) */
1106 if ((unsigned int) h->h_length > sizeof(ia)) {
1107 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1108 return INVALID_SOCKET;
1109 }
1110 memcpy (&ia, h->h_addr_list[i], h->h_length);
1111 sockin.sin_family = h->h_addrtype;
1112 sockin.sin_addr = ia;
1113 sockin.sin_port = (unsigned short)htons ((unsigned short)port);
1114 addr = (struct sockaddr *) &sockin;
1115 } else
1116 break; /* for */
1117
1118 s = xmlNanoHTTPConnectAttempt (addr);
1119 if (s != INVALID_SOCKET)
1120 return (s);
1121 }
1122 }
1123 #endif
1124
1125 return INVALID_SOCKET;
1126 }
1127
1128
1129 /**
1130 * xmlNanoHTTPOpen:
1131 * @URL: The URL to load
1132 * @contentType: if available the Content-Type information will be
1133 * returned at that location
1134 *
1135 * This function try to open a connection to the indicated resource
1136 * via HTTP GET.
1137 *
1138 * Returns NULL in case of failure, otherwise a request handler.
1139 * The contentType, if provided must be freed by the caller
1140 */
1141
1142 void*
xmlNanoHTTPOpen(const char * URL,char ** contentType)1143 xmlNanoHTTPOpen(const char *URL, char **contentType) {
1144 if (contentType != NULL) *contentType = NULL;
1145 return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, 0));
1146 }
1147
1148 /**
1149 * xmlNanoHTTPOpenRedir:
1150 * @URL: The URL to load
1151 * @contentType: if available the Content-Type information will be
1152 * returned at that location
1153 * @redir: if available the redirected URL will be returned
1154 *
1155 * This function try to open a connection to the indicated resource
1156 * via HTTP GET.
1157 *
1158 * Returns NULL in case of failure, otherwise a request handler.
1159 * The contentType, if provided must be freed by the caller
1160 */
1161
1162 void*
xmlNanoHTTPOpenRedir(const char * URL,char ** contentType,char ** redir)1163 xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) {
1164 if (contentType != NULL) *contentType = NULL;
1165 if (redir != NULL) *redir = NULL;
1166 return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL,0));
1167 }
1168
1169 /**
1170 * xmlNanoHTTPRead:
1171 * @ctx: the HTTP context
1172 * @dest: a buffer
1173 * @len: the buffer length
1174 *
1175 * This function tries to read @len bytes from the existing HTTP connection
1176 * and saves them in @dest. This is a blocking call.
1177 *
1178 * Returns the number of byte read. 0 is an indication of an end of connection.
1179 * -1 indicates a parameter error.
1180 */
1181 int
xmlNanoHTTPRead(void * ctx,void * dest,int len)1182 xmlNanoHTTPRead(void *ctx, void *dest, int len) {
1183 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1184 #ifdef LIBXML_ZLIB_ENABLED
1185 int bytes_read = 0;
1186 int orig_avail_in;
1187 int z_ret;
1188 #endif
1189
1190 if (ctx == NULL) return(-1);
1191 if (dest == NULL) return(-1);
1192 if (len <= 0) return(0);
1193
1194 #ifdef LIBXML_ZLIB_ENABLED
1195 if (ctxt->usesGzip == 1) {
1196 if (ctxt->strm == NULL) return(0);
1197
1198 ctxt->strm->next_out = dest;
1199 ctxt->strm->avail_out = len;
1200 ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr;
1201
1202 while (ctxt->strm->avail_out > 0 &&
1203 (ctxt->strm->avail_in > 0 || xmlNanoHTTPRecv(ctxt) > 0)) {
1204 orig_avail_in = ctxt->strm->avail_in =
1205 ctxt->inptr - ctxt->inrptr - bytes_read;
1206 ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read);
1207
1208 z_ret = inflate(ctxt->strm, Z_NO_FLUSH);
1209 bytes_read += orig_avail_in - ctxt->strm->avail_in;
1210
1211 if (z_ret != Z_OK) break;
1212 }
1213
1214 ctxt->inrptr += bytes_read;
1215 return(len - ctxt->strm->avail_out);
1216 }
1217 #endif
1218
1219 while (ctxt->inptr - ctxt->inrptr < len) {
1220 if (xmlNanoHTTPRecv(ctxt) <= 0) break;
1221 }
1222 if (ctxt->inptr - ctxt->inrptr < len)
1223 len = ctxt->inptr - ctxt->inrptr;
1224 if (len > 0) {
1225 memcpy(dest, ctxt->inrptr, len);
1226 ctxt->inrptr += len;
1227 }
1228 return(len);
1229 }
1230
1231 /**
1232 * xmlNanoHTTPClose:
1233 * @ctx: the HTTP context
1234 *
1235 * This function closes an HTTP context, it ends up the connection and
1236 * free all data related to it.
1237 */
1238 void
xmlNanoHTTPClose(void * ctx)1239 xmlNanoHTTPClose(void *ctx) {
1240 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1241
1242 if (ctx == NULL) return;
1243
1244 xmlNanoHTTPFreeCtxt(ctxt);
1245 }
1246
1247
1248 /**
1249 * xmlNanoHTTPHostnameMatch:
1250 * @pattern: The pattern as it appears in no_proxy environment variable
1251 * @hostname: The hostname to test as it appears in the URL
1252 *
1253 * This function tests whether a given hostname matches a pattern. The pattern
1254 * usually is a token from the no_proxy environment variable. Wildcards in the
1255 * pattern are not supported.
1256 *
1257 * Returns true, iff hostname matches the pattern.
1258 */
1259
1260 static int
xmlNanoHTTPHostnameMatch(const char * pattern,const char * hostname)1261 xmlNanoHTTPHostnameMatch(const char *pattern, const char *hostname) {
1262 int idx_pattern, idx_hostname;
1263 const char * pattern_start;
1264
1265 if (!pattern || *pattern == '\0' || !hostname)
1266 return 0;
1267
1268 /* Ignore trailing '.' */
1269 if (*pattern == '.') {
1270 idx_pattern = strlen(pattern) -1;
1271 pattern_start = pattern + 1;
1272 }
1273 else {
1274 idx_pattern = strlen(pattern);
1275 pattern_start = pattern;
1276 }
1277 idx_hostname = strlen(hostname);
1278
1279 for (; idx_pattern >= 0 && idx_hostname >= 0;
1280 --idx_pattern, --idx_hostname) {
1281 if (tolower(pattern_start[idx_pattern]) != tolower(hostname[idx_hostname]))
1282 break;
1283 }
1284
1285 return idx_pattern == -1 && (idx_hostname == -1|| hostname[idx_hostname] == '.');
1286 }
1287
1288
1289 /**
1290 * xmlNanoHTTPBypassProxy:
1291 * @hostname: The hostname as it appears in the URL
1292 *
1293 * This function evaluates the no_proxy environment variable and returns
1294 * whether the proxy server should be bypassed for a given host.
1295 *
1296 * Returns true, iff a proxy server should be bypassed for the given hostname.
1297 */
1298
1299 static int
xmlNanoHTTPBypassProxy(const char * hostname)1300 xmlNanoHTTPBypassProxy(const char *hostname) {
1301 size_t envlen;
1302 char *env = getenv("no_proxy"), *cpy=NULL, *p=NULL;
1303 if (!env)
1304 return 0;
1305
1306 /* (Avoid strdup because it's not portable.) */
1307 envlen = strlen(env) + 1;
1308 cpy = xmlMalloc(envlen);
1309 memcpy(cpy, env, envlen);
1310 env = cpy;
1311
1312 /* The remainder of the function is basically a tokenizing: */
1313 while (isspace(*env))
1314 ++env;
1315 if (*env == '\0') {
1316 xmlFree(cpy);
1317 return 0;
1318 }
1319
1320 p = env;
1321 while (*env) {
1322
1323 if (*env != ',') {
1324 ++env;
1325 continue;
1326 }
1327
1328 *(env++) = '\0';
1329 if (xmlNanoHTTPHostnameMatch(p, hostname)) {
1330 xmlFree(cpy);
1331 return 1;
1332 }
1333
1334 while (isspace(*env))
1335 ++env;
1336 p = env;
1337 }
1338 if (xmlNanoHTTPHostnameMatch(p, hostname)) {
1339 xmlFree(cpy);
1340 return 1;
1341 }
1342
1343 xmlFree(cpy);
1344 return 0;
1345 }
1346
1347
1348 /**
1349 * xmlNanoHTTPMethodRedir:
1350 * @URL: The URL to load
1351 * @method: the HTTP method to use
1352 * @input: the input string if any
1353 * @contentType: the Content-Type information IN and OUT
1354 * @redir: the redirected URL OUT
1355 * @headers: the extra headers
1356 * @ilen: input length
1357 *
1358 * This function try to open a connection to the indicated resource
1359 * via HTTP using the given @method, adding the given extra headers
1360 * and the input buffer for the request content.
1361 *
1362 * Returns NULL in case of failure, otherwise a request handler.
1363 * The contentType, or redir, if provided must be freed by the caller
1364 */
1365
1366 void*
xmlNanoHTTPMethodRedir(const char * URL,const char * method,const char * input,char ** contentType,char ** redir,const char * headers,int ilen)1367 xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input,
1368 char **contentType, char **redir,
1369 const char *headers, int ilen ) {
1370 xmlNanoHTTPCtxtPtr ctxt;
1371 char *bp, *p;
1372 int blen;
1373 SOCKET ret;
1374 int nbRedirects = 0;
1375 int use_proxy;
1376 char *redirURL = NULL;
1377
1378 if (URL == NULL) return(NULL);
1379 if (method == NULL) method = "GET";
1380 xmlNanoHTTPInit();
1381
1382 retry:
1383 if (redirURL == NULL) {
1384 ctxt = xmlNanoHTTPNewCtxt(URL);
1385 if (ctxt == NULL)
1386 return(NULL);
1387 } else {
1388 ctxt = xmlNanoHTTPNewCtxt(redirURL);
1389 if (ctxt == NULL)
1390 return(NULL);
1391 ctxt->location = xmlMemStrdup(redirURL);
1392 }
1393
1394 if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
1395 __xmlIOErr(XML_FROM_IO, XML_IO_UNSUPPORTED_PROTOCOL, ctxt->protocol);
1396 xmlNanoHTTPFreeCtxt(ctxt);
1397 if (redirURL != NULL) xmlFree(redirURL);
1398 return(NULL);
1399 }
1400 if (ctxt->hostname == NULL) {
1401 __xmlIOErr(XML_FROM_HTTP, XML_HTTP_UNKNOWN_HOST,
1402 "Failed to identify host in URI");
1403 xmlNanoHTTPFreeCtxt(ctxt);
1404 if (redirURL != NULL) xmlFree(redirURL);
1405 return(NULL);
1406 }
1407 use_proxy = proxy && !xmlNanoHTTPBypassProxy(ctxt->hostname);
1408 if (use_proxy) {
1409 blen = strlen(ctxt->hostname) * 2 + 16;
1410 ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
1411 }
1412 else {
1413 blen = strlen(ctxt->hostname);
1414 ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
1415 }
1416 if (ret == INVALID_SOCKET) {
1417 xmlNanoHTTPFreeCtxt(ctxt);
1418 if (redirURL != NULL) xmlFree(redirURL);
1419 return(NULL);
1420 }
1421 ctxt->fd = ret;
1422
1423 if (input == NULL)
1424 ilen = 0;
1425 else
1426 blen += 36;
1427
1428 if (headers != NULL)
1429 blen += strlen(headers) + 2;
1430 if (contentType && *contentType)
1431 /* reserve for string plus 'Content-Type: \r\n" */
1432 blen += strlen(*contentType) + 16;
1433 if (ctxt->query != NULL)
1434 /* 1 for '?' */
1435 blen += strlen(ctxt->query) + 1;
1436 blen += strlen(method) + strlen(ctxt->path) + 24;
1437 #ifdef LIBXML_ZLIB_ENABLED
1438 /* reserve for possible 'Accept-Encoding: gzip' string */
1439 blen += 23;
1440 #endif
1441 if (ctxt->port != 80) {
1442 /* reserve space for ':xxxxx', incl. potential proxy */
1443 if (use_proxy)
1444 blen += 17;
1445 else
1446 blen += 11;
1447 }
1448 bp = (char*)xmlMallocAtomic(blen);
1449 if ( bp == NULL ) {
1450 xmlNanoHTTPFreeCtxt( ctxt );
1451 xmlHTTPErrMemory();
1452 return ( NULL );
1453 }
1454
1455 p = bp;
1456
1457 if (use_proxy) {
1458 if (ctxt->port != 80) {
1459 p += snprintf( p, blen - (p - bp), "%s http://%s:%d%s",
1460 method, ctxt->hostname,
1461 ctxt->port, ctxt->path );
1462 }
1463 else
1464 p += snprintf( p, blen - (p - bp), "%s http://%s%s", method,
1465 ctxt->hostname, ctxt->path);
1466 }
1467 else
1468 p += snprintf( p, blen - (p - bp), "%s %s", method, ctxt->path);
1469
1470 if (ctxt->query != NULL)
1471 p += snprintf( p, blen - (p - bp), "?%s", ctxt->query);
1472
1473 if (ctxt->port == 80) {
1474 p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n",
1475 ctxt->hostname);
1476 } else {
1477 p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s:%d\r\n",
1478 ctxt->hostname, ctxt->port);
1479 }
1480
1481 #ifdef LIBXML_ZLIB_ENABLED
1482 p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n");
1483 #endif
1484
1485 if (contentType != NULL && *contentType)
1486 p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType);
1487
1488 if (headers != NULL)
1489 p += snprintf( p, blen - (p - bp), "%s", headers );
1490
1491 if (input != NULL)
1492 snprintf(p, blen - (p - bp), "Content-Length: %d\r\n\r\n", ilen );
1493 else
1494 snprintf(p, blen - (p - bp), "\r\n");
1495
1496 ctxt->outptr = ctxt->out = bp;
1497 ctxt->state = XML_NANO_HTTP_WRITE;
1498 blen = strlen( ctxt->out );
1499 xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1500
1501 if ( input != NULL ) {
1502 xmlNanoHTTPSend( ctxt, input, ilen );
1503 }
1504
1505 ctxt->state = XML_NANO_HTTP_READ;
1506
1507 while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1508 if (*p == 0) {
1509 ctxt->content = ctxt->inrptr;
1510 xmlFree(p);
1511 break;
1512 }
1513 xmlNanoHTTPScanAnswer(ctxt, p);
1514
1515 xmlFree(p);
1516 }
1517
1518 if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1519 (ctxt->returnValue < 400)) {
1520 while ( xmlNanoHTTPRecv(ctxt) > 0 )
1521 ;
1522 if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1523 nbRedirects++;
1524 if (redirURL != NULL)
1525 xmlFree(redirURL);
1526 redirURL = xmlMemStrdup(ctxt->location);
1527 xmlNanoHTTPFreeCtxt(ctxt);
1528 goto retry;
1529 }
1530 xmlNanoHTTPFreeCtxt(ctxt);
1531 if (redirURL != NULL) xmlFree(redirURL);
1532 return(NULL);
1533 }
1534
1535 if (contentType != NULL) {
1536 if (ctxt->contentType != NULL)
1537 *contentType = xmlMemStrdup(ctxt->contentType);
1538 else
1539 *contentType = NULL;
1540 }
1541
1542 if ((redir != NULL) && (redirURL != NULL)) {
1543 *redir = redirURL;
1544 } else {
1545 if (redirURL != NULL)
1546 xmlFree(redirURL);
1547 if (redir != NULL)
1548 *redir = NULL;
1549 }
1550
1551 return((void *) ctxt);
1552 }
1553
1554 /**
1555 * xmlNanoHTTPMethod:
1556 * @URL: The URL to load
1557 * @method: the HTTP method to use
1558 * @input: the input string if any
1559 * @contentType: the Content-Type information IN and OUT
1560 * @headers: the extra headers
1561 * @ilen: input length
1562 *
1563 * This function try to open a connection to the indicated resource
1564 * via HTTP using the given @method, adding the given extra headers
1565 * and the input buffer for the request content.
1566 *
1567 * Returns NULL in case of failure, otherwise a request handler.
1568 * The contentType, if provided must be freed by the caller
1569 */
1570
1571 void*
xmlNanoHTTPMethod(const char * URL,const char * method,const char * input,char ** contentType,const char * headers,int ilen)1572 xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
1573 char **contentType, const char *headers, int ilen) {
1574 return(xmlNanoHTTPMethodRedir(URL, method, input, contentType,
1575 NULL, headers, ilen));
1576 }
1577
1578 /**
1579 * xmlNanoHTTPFetch:
1580 * @URL: The URL to load
1581 * @filename: the filename where the content should be saved
1582 * @contentType: if available the Content-Type information will be
1583 * returned at that location
1584 *
1585 * This function try to fetch the indicated resource via HTTP GET
1586 * and save it's content in the file.
1587 *
1588 * Returns -1 in case of failure, 0 in case of success. The contentType,
1589 * if provided must be freed by the caller
1590 */
1591 int
xmlNanoHTTPFetch(const char * URL,const char * filename,char ** contentType)1592 xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1593 void *ctxt = NULL;
1594 char *buf = NULL;
1595 int fd;
1596 int len;
1597 int ret = 0;
1598
1599 if (filename == NULL) return(-1);
1600 ctxt = xmlNanoHTTPOpen(URL, contentType);
1601 if (ctxt == NULL) return(-1);
1602
1603 if (!strcmp(filename, "-"))
1604 fd = 0;
1605 else {
1606 fd = open(filename, O_CREAT | O_WRONLY, 00644);
1607 if (fd < 0) {
1608 xmlNanoHTTPClose(ctxt);
1609 if ((contentType != NULL) && (*contentType != NULL)) {
1610 xmlFree(*contentType);
1611 *contentType = NULL;
1612 }
1613 return(-1);
1614 }
1615 }
1616
1617 xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1618 if ( len > 0 ) {
1619 if (write(fd, buf, len) == -1) {
1620 ret = -1;
1621 }
1622 }
1623
1624 xmlNanoHTTPClose(ctxt);
1625 close(fd);
1626 return(ret);
1627 }
1628
1629 #ifdef LIBXML_OUTPUT_ENABLED
1630 /**
1631 * xmlNanoHTTPSave:
1632 * @ctxt: the HTTP context
1633 * @filename: the filename where the content should be saved
1634 *
1635 * This function saves the output of the HTTP transaction to a file
1636 * It closes and free the context at the end
1637 *
1638 * Returns -1 in case of failure, 0 in case of success.
1639 */
1640 int
xmlNanoHTTPSave(void * ctxt,const char * filename)1641 xmlNanoHTTPSave(void *ctxt, const char *filename) {
1642 char *buf = NULL;
1643 int fd;
1644 int len;
1645 int ret = 0;
1646
1647 if ((ctxt == NULL) || (filename == NULL)) return(-1);
1648
1649 if (!strcmp(filename, "-"))
1650 fd = 0;
1651 else {
1652 fd = open(filename, O_CREAT | O_WRONLY, 0666);
1653 if (fd < 0) {
1654 xmlNanoHTTPClose(ctxt);
1655 return(-1);
1656 }
1657 }
1658
1659 xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1660 if ( len > 0 ) {
1661 if (write(fd, buf, len) == -1) {
1662 ret = -1;
1663 }
1664 }
1665
1666 xmlNanoHTTPClose(ctxt);
1667 close(fd);
1668 return(ret);
1669 }
1670 #endif /* LIBXML_OUTPUT_ENABLED */
1671
1672 /**
1673 * xmlNanoHTTPReturnCode:
1674 * @ctx: the HTTP context
1675 *
1676 * Get the latest HTTP return code received
1677 *
1678 * Returns the HTTP return code for the request.
1679 */
1680 int
xmlNanoHTTPReturnCode(void * ctx)1681 xmlNanoHTTPReturnCode(void *ctx) {
1682 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1683
1684 if (ctxt == NULL) return(-1);
1685
1686 return(ctxt->returnValue);
1687 }
1688
1689 /**
1690 * xmlNanoHTTPAuthHeader:
1691 * @ctx: the HTTP context
1692 *
1693 * Get the authentication header of an HTTP context
1694 *
1695 * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate
1696 * header.
1697 */
1698 const char *
xmlNanoHTTPAuthHeader(void * ctx)1699 xmlNanoHTTPAuthHeader(void *ctx) {
1700 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1701
1702 if (ctxt == NULL) return(NULL);
1703
1704 return(ctxt->authHeader);
1705 }
1706
1707 /**
1708 * xmlNanoHTTPContentLength:
1709 * @ctx: the HTTP context
1710 *
1711 * Provides the specified content length from the HTTP header.
1712 *
1713 * Return the specified content length from the HTTP header. Note that
1714 * a value of -1 indicates that the content length element was not included in
1715 * the response header.
1716 */
1717 int
xmlNanoHTTPContentLength(void * ctx)1718 xmlNanoHTTPContentLength( void * ctx ) {
1719 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1720
1721 return ( ( ctxt == NULL ) ? -1 : ctxt->ContentLength );
1722 }
1723
1724 /**
1725 * xmlNanoHTTPRedir:
1726 * @ctx: the HTTP context
1727 *
1728 * Provides the specified redirection URL if available from the HTTP header.
1729 *
1730 * Return the specified redirection URL or NULL if not redirected.
1731 */
1732 const char *
xmlNanoHTTPRedir(void * ctx)1733 xmlNanoHTTPRedir( void * ctx ) {
1734 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1735
1736 return ( ( ctxt == NULL ) ? NULL : ctxt->location );
1737 }
1738
1739 /**
1740 * xmlNanoHTTPEncoding:
1741 * @ctx: the HTTP context
1742 *
1743 * Provides the specified encoding if specified in the HTTP headers.
1744 *
1745 * Return the specified encoding or NULL if not available
1746 */
1747 const char *
xmlNanoHTTPEncoding(void * ctx)1748 xmlNanoHTTPEncoding( void * ctx ) {
1749 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1750
1751 return ( ( ctxt == NULL ) ? NULL : ctxt->encoding );
1752 }
1753
1754 /**
1755 * xmlNanoHTTPMimeType:
1756 * @ctx: the HTTP context
1757 *
1758 * Provides the specified Mime-Type if specified in the HTTP headers.
1759 *
1760 * Return the specified Mime-Type or NULL if not available
1761 */
1762 const char *
xmlNanoHTTPMimeType(void * ctx)1763 xmlNanoHTTPMimeType( void * ctx ) {
1764 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1765
1766 return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType );
1767 }
1768
1769 /**
1770 * xmlNanoHTTPFetchContent:
1771 * @ctx: the HTTP context
1772 * @ptr: pointer to set to the content buffer.
1773 * @len: integer pointer to hold the length of the content
1774 *
1775 * Check if all the content was read
1776 *
1777 * Returns 0 if all the content was read and available, returns
1778 * -1 if received content length was less than specified or an error
1779 * occurred.
1780 */
1781 static int
xmlNanoHTTPFetchContent(void * ctx,char ** ptr,int * len)1782 xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ) {
1783 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1784
1785 int rc = 0;
1786 int cur_lgth;
1787 int rcvd_lgth;
1788 int dummy_int;
1789 char * dummy_ptr = NULL;
1790
1791 /* Dummy up return input parameters if not provided */
1792
1793 if ( len == NULL )
1794 len = &dummy_int;
1795
1796 if ( ptr == NULL )
1797 ptr = &dummy_ptr;
1798
1799 /* But can't work without the context pointer */
1800
1801 if ( ( ctxt == NULL ) || ( ctxt->content == NULL ) ) {
1802 *len = 0;
1803 *ptr = NULL;
1804 return ( -1 );
1805 }
1806
1807 rcvd_lgth = ctxt->inptr - ctxt->content;
1808
1809 while ( (cur_lgth = xmlNanoHTTPRecv( ctxt )) > 0 ) {
1810
1811 rcvd_lgth += cur_lgth;
1812 if ( (ctxt->ContentLength > 0) && (rcvd_lgth >= ctxt->ContentLength) )
1813 break;
1814 }
1815
1816 *ptr = ctxt->content;
1817 *len = rcvd_lgth;
1818
1819 if ( ( ctxt->ContentLength > 0 ) && ( rcvd_lgth < ctxt->ContentLength ) )
1820 rc = -1;
1821 else if ( rcvd_lgth == 0 )
1822 rc = -1;
1823
1824 return ( rc );
1825 }
1826
1827 #ifdef STANDALONE
main(int argc,char ** argv)1828 int main(int argc, char **argv) {
1829 char *contentType = NULL;
1830
1831 if (argv[1] != NULL) {
1832 if (argv[2] != NULL)
1833 xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1834 else
1835 xmlNanoHTTPFetch(argv[1], "-", &contentType);
1836 if (contentType != NULL) xmlFree(contentType);
1837 } else {
1838 fprintf(stderr,
1839 "%s: minimal HTTP GET implementation\n", argv[0]);
1840 fprintf(stderr,
1841 "\tusage %s [ URL [ filename ] ]\n", argv[0]);
1842 }
1843 xmlNanoHTTPCleanup();
1844 return(0);
1845 }
1846 #endif /* STANDALONE */
1847 #else /* !LIBXML_HTTP_ENABLED */
1848 #ifdef STANDALONE
1849 #include <stdio.h>
main(int argc,char ** argv)1850 int main(int argc, char **argv) {
1851 fprintf(stderr,
1852 "%s : HTTP support not compiled in\n", argv[0]);
1853 return(0);
1854 }
1855 #endif /* STANDALONE */
1856 #endif /* LIBXML_HTTP_ENABLED */
1857