1 /*
2 * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3 * focuses on size, streamability, reentrancy and portability
4 *
5 * This is clearly not a general purpose HTTP implementation
6 * If you look for one, check:
7 * http://www.w3.org/Library/
8 *
9 * See Copyright for the status of this software.
10 *
11 * daniel@veillard.com
12 */
13
14 #define NEED_SOCKETS
15 #define IN_LIBXML
16 #include "libxml.h"
17
18 #ifdef LIBXML_HTTP_ENABLED
19 #include <string.h>
20
21 #ifdef HAVE_STDLIB_H
22 #include <stdlib.h>
23 #endif
24 #ifdef HAVE_UNISTD_H
25 #include <unistd.h>
26 #endif
27 #ifdef HAVE_SYS_TYPES_H
28 #include <sys/types.h>
29 #endif
30 #ifdef HAVE_SYS_SOCKET_H
31 #include <sys/socket.h>
32 #endif
33 #ifdef HAVE_NETINET_IN_H
34 #include <netinet/in.h>
35 #endif
36 #ifdef HAVE_ARPA_INET_H
37 #include <arpa/inet.h>
38 #endif
39 #ifdef HAVE_NETDB_H
40 #include <netdb.h>
41 #endif
42 #ifdef HAVE_RESOLV_H
43 #ifdef HAVE_ARPA_NAMESER_H
44 #include <arpa/nameser.h>
45 #endif
46 #include <resolv.h>
47 #endif
48 #ifdef HAVE_FCNTL_H
49 #include <fcntl.h>
50 #endif
51 #ifdef HAVE_ERRNO_H
52 #include <errno.h>
53 #endif
54 #ifdef HAVE_SYS_TIME_H
55 #include <sys/time.h>
56 #endif
57 #ifndef HAVE_POLL_H
58 #ifdef HAVE_SYS_SELECT_H
59 #include <sys/select.h>
60 #endif
61 #else
62 #include <poll.h>
63 #endif
64 #ifdef HAVE_STRINGS_H
65 #include <strings.h>
66 #endif
67 #ifdef SUPPORT_IP6
68 #include <resolv.h>
69 #endif
70 #ifdef HAVE_ZLIB_H
71 #include <zlib.h>
72 #endif
73
74
75 #ifdef VMS
76 #include <stropts>
77 #define XML_SOCKLEN_T unsigned int
78 #endif
79
80 #if defined(__MINGW32__) || defined(_WIN32_WCE)
81 #ifndef _WINSOCKAPI_
82 #define _WINSOCKAPI_
83 #endif
84 #include <wsockcompat.h>
85 #include <winsock2.h>
86 #undef XML_SOCKLEN_T
87 #define XML_SOCKLEN_T unsigned int
88 #endif
89
90 #include <libxml/globals.h>
91 #include <libxml/xmlerror.h>
92 #include <libxml/xmlmemory.h>
93 #include <libxml/parser.h> /* for xmlStr(n)casecmp() */
94 #include <libxml/nanohttp.h>
95 #include <libxml/globals.h>
96 #include <libxml/uri.h>
97
98 /**
99 * A couple portability macros
100 */
101 #ifndef _WINSOCKAPI_
102 #if !defined(__BEOS__) || defined(__HAIKU__)
103 #define closesocket(s) close(s)
104 #endif
105 #define SOCKET int
106 #define INVALID_SOCKET (-1)
107 #endif
108
109 #ifdef __BEOS__
110 #ifndef PF_INET
111 #define PF_INET AF_INET
112 #endif
113 #endif
114
115 #ifndef XML_SOCKLEN_T
116 #define XML_SOCKLEN_T unsigned int
117 #endif
118
119 #ifdef STANDALONE
120 #define DEBUG_HTTP
121 #define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n)
122 #define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b)
123 #endif
124
125 #define XML_NANO_HTTP_MAX_REDIR 10
126
127 #define XML_NANO_HTTP_CHUNK 4096
128
129 #define XML_NANO_HTTP_CLOSED 0
130 #define XML_NANO_HTTP_WRITE 1
131 #define XML_NANO_HTTP_READ 2
132 #define XML_NANO_HTTP_NONE 4
133
134 typedef struct xmlNanoHTTPCtxt {
135 char *protocol; /* the protocol name */
136 char *hostname; /* the host name */
137 int port; /* the port */
138 char *path; /* the path within the URL */
139 char *query; /* the query string */
140 SOCKET fd; /* the file descriptor for the socket */
141 int state; /* WRITE / READ / CLOSED */
142 char *out; /* buffer sent (zero terminated) */
143 char *outptr; /* index within the buffer sent */
144 char *in; /* the receiving buffer */
145 char *content; /* the start of the content */
146 char *inptr; /* the next byte to read from network */
147 char *inrptr; /* the next byte to give back to the client */
148 int inlen; /* len of the input buffer */
149 int last; /* return code for last operation */
150 int returnValue; /* the protocol return value */
151 int version; /* the protocol version */
152 int ContentLength; /* specified content length from HTTP header */
153 char *contentType; /* the MIME type for the input */
154 char *location; /* the new URL in case of redirect */
155 char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */
156 char *encoding; /* encoding extracted from the contentType */
157 char *mimeType; /* Mime-Type extracted from the contentType */
158 #ifdef HAVE_ZLIB_H
159 z_stream *strm; /* Zlib stream object */
160 int usesGzip; /* "Content-Encoding: gzip" was detected */
161 #endif
162 } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
163
164 static int initialized = 0;
165 static char *proxy = NULL; /* the proxy name if any */
166 static int proxyPort; /* the proxy port if any */
167 static unsigned int timeout = 60;/* the select() timeout in seconds */
168
169 static int xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len );
170
171 /**
172 * xmlHTTPErrMemory:
173 * @extra: extra informations
174 *
175 * Handle an out of memory condition
176 */
177 static void
xmlHTTPErrMemory(const char * extra)178 xmlHTTPErrMemory(const char *extra)
179 {
180 __xmlSimpleError(XML_FROM_HTTP, XML_ERR_NO_MEMORY, NULL, NULL, extra);
181 }
182
183 /**
184 * A portability function
185 */
socket_errno(void)186 static int socket_errno(void) {
187 #ifdef _WINSOCKAPI_
188 return(WSAGetLastError());
189 #else
190 return(errno);
191 #endif
192 }
193
194 #ifdef SUPPORT_IP6
195 static
have_ipv6(void)196 int have_ipv6(void) {
197 SOCKET s;
198
199 s = socket (AF_INET6, SOCK_STREAM, 0);
200 if (s != INVALID_SOCKET) {
201 close (s);
202 return (1);
203 }
204 return (0);
205 }
206 #endif
207
208 /**
209 * xmlNanoHTTPInit:
210 *
211 * Initialize the HTTP protocol layer.
212 * Currently it just checks for proxy informations
213 */
214
215 void
xmlNanoHTTPInit(void)216 xmlNanoHTTPInit(void) {
217 const char *env;
218 #ifdef _WINSOCKAPI_
219 WSADATA wsaData;
220 #endif
221
222 if (initialized)
223 return;
224
225 #ifdef _WINSOCKAPI_
226 if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0)
227 return;
228 #endif
229
230 if (proxy == NULL) {
231 proxyPort = 80;
232 env = getenv("no_proxy");
233 if (env && ((env[0] == '*') && (env[1] == 0)))
234 goto done;
235 env = getenv("http_proxy");
236 if (env != NULL) {
237 xmlNanoHTTPScanProxy(env);
238 goto done;
239 }
240 env = getenv("HTTP_PROXY");
241 if (env != NULL) {
242 xmlNanoHTTPScanProxy(env);
243 goto done;
244 }
245 }
246 done:
247 initialized = 1;
248 }
249
250 /**
251 * xmlNanoHTTPCleanup:
252 *
253 * Cleanup the HTTP protocol layer.
254 */
255
256 void
xmlNanoHTTPCleanup(void)257 xmlNanoHTTPCleanup(void) {
258 if (proxy != NULL) {
259 xmlFree(proxy);
260 proxy = NULL;
261 }
262 #ifdef _WINSOCKAPI_
263 if (initialized)
264 WSACleanup();
265 #endif
266 initialized = 0;
267 return;
268 }
269
270 /**
271 * xmlNanoHTTPScanURL:
272 * @ctxt: an HTTP context
273 * @URL: The URL used to initialize the context
274 *
275 * (Re)Initialize an HTTP context by parsing the URL and finding
276 * the protocol host port and path it indicates.
277 */
278
279 static void
xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt,const char * URL)280 xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
281 xmlURIPtr uri;
282 /*
283 * Clear any existing data from the context
284 */
285 if (ctxt->protocol != NULL) {
286 xmlFree(ctxt->protocol);
287 ctxt->protocol = NULL;
288 }
289 if (ctxt->hostname != NULL) {
290 xmlFree(ctxt->hostname);
291 ctxt->hostname = NULL;
292 }
293 if (ctxt->path != NULL) {
294 xmlFree(ctxt->path);
295 ctxt->path = NULL;
296 }
297 if (ctxt->query != NULL) {
298 xmlFree(ctxt->query);
299 ctxt->query = NULL;
300 }
301 if (URL == NULL) return;
302
303 uri = xmlParseURIRaw(URL, 1);
304 if (uri == NULL)
305 return;
306
307 if ((uri->scheme == NULL) || (uri->server == NULL)) {
308 xmlFreeURI(uri);
309 return;
310 }
311
312 ctxt->protocol = xmlMemStrdup(uri->scheme);
313 ctxt->hostname = xmlMemStrdup(uri->server);
314 if (uri->path != NULL)
315 ctxt->path = xmlMemStrdup(uri->path);
316 else
317 ctxt->path = xmlMemStrdup("/");
318 if (uri->query != NULL)
319 ctxt->query = xmlMemStrdup(uri->query);
320 if (uri->port != 0)
321 ctxt->port = uri->port;
322
323 xmlFreeURI(uri);
324 }
325
326 /**
327 * xmlNanoHTTPScanProxy:
328 * @URL: The proxy URL used to initialize the proxy context
329 *
330 * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
331 * the protocol host port it indicates.
332 * Should be like http://myproxy/ or http://myproxy:3128/
333 * A NULL URL cleans up proxy informations.
334 */
335
336 void
xmlNanoHTTPScanProxy(const char * URL)337 xmlNanoHTTPScanProxy(const char *URL) {
338 xmlURIPtr uri;
339
340 if (proxy != NULL) {
341 xmlFree(proxy);
342 proxy = NULL;
343 }
344 proxyPort = 0;
345
346 #ifdef DEBUG_HTTP
347 if (URL == NULL)
348 xmlGenericError(xmlGenericErrorContext,
349 "Removing HTTP proxy info\n");
350 else
351 xmlGenericError(xmlGenericErrorContext,
352 "Using HTTP proxy %s\n", URL);
353 #endif
354 if (URL == NULL) return;
355
356 uri = xmlParseURIRaw(URL, 1);
357 if ((uri == NULL) || (uri->scheme == NULL) ||
358 (strcmp(uri->scheme, "http")) || (uri->server == NULL)) {
359 __xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n");
360 if (uri != NULL)
361 xmlFreeURI(uri);
362 return;
363 }
364
365 proxy = xmlMemStrdup(uri->server);
366 if (uri->port != 0)
367 proxyPort = uri->port;
368
369 xmlFreeURI(uri);
370 }
371
372 /**
373 * xmlNanoHTTPNewCtxt:
374 * @URL: The URL used to initialize the context
375 *
376 * Allocate and initialize a new HTTP context.
377 *
378 * Returns an HTTP context or NULL in case of error.
379 */
380
381 static xmlNanoHTTPCtxtPtr
xmlNanoHTTPNewCtxt(const char * URL)382 xmlNanoHTTPNewCtxt(const char *URL) {
383 xmlNanoHTTPCtxtPtr ret;
384
385 ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
386 if (ret == NULL) {
387 xmlHTTPErrMemory("allocating context");
388 return(NULL);
389 }
390
391 memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
392 ret->port = 80;
393 ret->returnValue = 0;
394 ret->fd = INVALID_SOCKET;
395 ret->ContentLength = -1;
396
397 xmlNanoHTTPScanURL(ret, URL);
398
399 return(ret);
400 }
401
402 /**
403 * xmlNanoHTTPFreeCtxt:
404 * @ctxt: an HTTP context
405 *
406 * Frees the context after closing the connection.
407 */
408
409 static void
xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt)410 xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
411 if (ctxt == NULL) return;
412 if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
413 if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
414 if (ctxt->path != NULL) xmlFree(ctxt->path);
415 if (ctxt->query != NULL) xmlFree(ctxt->query);
416 if (ctxt->out != NULL) xmlFree(ctxt->out);
417 if (ctxt->in != NULL) xmlFree(ctxt->in);
418 if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
419 if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
420 if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
421 if (ctxt->location != NULL) xmlFree(ctxt->location);
422 if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
423 #ifdef HAVE_ZLIB_H
424 if (ctxt->strm != NULL) {
425 inflateEnd(ctxt->strm);
426 xmlFree(ctxt->strm);
427 }
428 #endif
429
430 ctxt->state = XML_NANO_HTTP_NONE;
431 if (ctxt->fd != INVALID_SOCKET) closesocket(ctxt->fd);
432 ctxt->fd = INVALID_SOCKET;
433 xmlFree(ctxt);
434 }
435
436 /**
437 * xmlNanoHTTPSend:
438 * @ctxt: an HTTP context
439 *
440 * Send the input needed to initiate the processing on the server side
441 * Returns number of bytes sent or -1 on error.
442 */
443
444 static int
xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt,const char * xmt_ptr,int outlen)445 xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt, const char *xmt_ptr, int outlen)
446 {
447 int total_sent = 0;
448 #ifdef HAVE_POLL_H
449 struct pollfd p;
450 #else
451 struct timeval tv;
452 fd_set wfd;
453 #endif
454
455 if ((ctxt->state & XML_NANO_HTTP_WRITE) && (xmt_ptr != NULL)) {
456 while (total_sent < outlen) {
457 int nsent = send(ctxt->fd, xmt_ptr + total_sent,
458 outlen - total_sent, 0);
459
460 if (nsent > 0)
461 total_sent += nsent;
462 else if ((nsent == -1) &&
463 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
464 (socket_errno() != EAGAIN) &&
465 #endif
466 (socket_errno() != EWOULDBLOCK)) {
467 __xmlIOErr(XML_FROM_HTTP, 0, "send failed\n");
468 if (total_sent == 0)
469 total_sent = -1;
470 break;
471 } else {
472 /*
473 * No data sent
474 * Since non-blocking sockets are used, wait for
475 * socket to be writable or default timeout prior
476 * to retrying.
477 */
478 #ifndef HAVE_POLL_H
479 #ifndef _WINSOCKAPI_
480 if (ctxt->fd > FD_SETSIZE)
481 return -1;
482 #endif
483
484 tv.tv_sec = timeout;
485 tv.tv_usec = 0;
486 FD_ZERO(&wfd);
487 #ifdef _MSC_VER
488 #pragma warning(push)
489 #pragma warning(disable: 4018)
490 #endif
491 FD_SET(ctxt->fd, &wfd);
492 #ifdef _MSC_VER
493 #pragma warning(pop)
494 #endif
495 (void) select(ctxt->fd + 1, NULL, &wfd, NULL, &tv);
496 #else
497 p.fd = ctxt->fd;
498 p.events = POLLOUT;
499 (void) poll(&p, 1, timeout * 1000);
500 #endif /* !HAVE_POLL_H */
501 }
502 }
503 }
504
505 return total_sent;
506 }
507
508 /**
509 * xmlNanoHTTPRecv:
510 * @ctxt: an HTTP context
511 *
512 * Read information coming from the HTTP connection.
513 * This is a blocking call (but it blocks in select(), not read()).
514 *
515 * Returns the number of byte read or -1 in case of error.
516 */
517
518 static int
xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)519 xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)
520 {
521 #ifdef HAVE_POLL_H
522 struct pollfd p;
523 #else
524 fd_set rfd;
525 struct timeval tv;
526 #endif
527
528
529 while (ctxt->state & XML_NANO_HTTP_READ) {
530 if (ctxt->in == NULL) {
531 ctxt->in = (char *) xmlMallocAtomic(65000 * sizeof(char));
532 if (ctxt->in == NULL) {
533 xmlHTTPErrMemory("allocating input");
534 ctxt->last = -1;
535 return (-1);
536 }
537 ctxt->inlen = 65000;
538 ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
539 }
540 if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
541 int delta = ctxt->inrptr - ctxt->in;
542 int len = ctxt->inptr - ctxt->inrptr;
543
544 memmove(ctxt->in, ctxt->inrptr, len);
545 ctxt->inrptr -= delta;
546 ctxt->content -= delta;
547 ctxt->inptr -= delta;
548 }
549 if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
550 int d_inptr = ctxt->inptr - ctxt->in;
551 int d_content = ctxt->content - ctxt->in;
552 int d_inrptr = ctxt->inrptr - ctxt->in;
553 char *tmp_ptr = ctxt->in;
554
555 ctxt->inlen *= 2;
556 ctxt->in = (char *) xmlRealloc(tmp_ptr, ctxt->inlen);
557 if (ctxt->in == NULL) {
558 xmlHTTPErrMemory("allocating input buffer");
559 xmlFree(tmp_ptr);
560 ctxt->last = -1;
561 return (-1);
562 }
563 ctxt->inptr = ctxt->in + d_inptr;
564 ctxt->content = ctxt->in + d_content;
565 ctxt->inrptr = ctxt->in + d_inrptr;
566 }
567 ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0);
568 if (ctxt->last > 0) {
569 ctxt->inptr += ctxt->last;
570 return (ctxt->last);
571 }
572 if (ctxt->last == 0) {
573 return (0);
574 }
575 if (ctxt->last == -1) {
576 switch (socket_errno()) {
577 case EINPROGRESS:
578 case EWOULDBLOCK:
579 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
580 case EAGAIN:
581 #endif
582 break;
583
584 case ECONNRESET:
585 case ESHUTDOWN:
586 return (0);
587
588 default:
589 __xmlIOErr(XML_FROM_HTTP, 0, "recv failed\n");
590 return (-1);
591 }
592 }
593 #ifdef HAVE_POLL_H
594 p.fd = ctxt->fd;
595 p.events = POLLIN;
596 if ((poll(&p, 1, timeout * 1000) < 1)
597 #if defined(EINTR)
598 && (errno != EINTR)
599 #endif
600 )
601 return (0);
602 #else /* !HAVE_POLL_H */
603 #ifndef _WINSOCKAPI_
604 if (ctxt->fd > FD_SETSIZE)
605 return 0;
606 #endif
607
608 tv.tv_sec = timeout;
609 tv.tv_usec = 0;
610 FD_ZERO(&rfd);
611
612 #ifdef _MSC_VER
613 #pragma warning(push)
614 #pragma warning(disable: 4018)
615 #endif
616
617 FD_SET(ctxt->fd, &rfd);
618
619 #ifdef _MSC_VER
620 #pragma warning(pop)
621 #endif
622
623 if ((select(ctxt->fd + 1, &rfd, NULL, NULL, &tv) < 1)
624 #if defined(EINTR)
625 && (errno != EINTR)
626 #endif
627 )
628 return (0);
629 #endif /* !HAVE_POLL_H */
630 }
631 return (0);
632 }
633
634 /**
635 * xmlNanoHTTPReadLine:
636 * @ctxt: an HTTP context
637 *
638 * Read one line in the HTTP server output, usually for extracting
639 * the HTTP protocol informations from the answer header.
640 *
641 * Returns a newly allocated string with a copy of the line, or NULL
642 * which indicate the end of the input.
643 */
644
645 static char *
xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt)646 xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
647 char buf[4096];
648 char *bp = buf;
649 int rc;
650
651 while (bp - buf < 4095) {
652 if (ctxt->inrptr == ctxt->inptr) {
653 if ( (rc = xmlNanoHTTPRecv(ctxt)) == 0) {
654 if (bp == buf)
655 return(NULL);
656 else
657 *bp = 0;
658 return(xmlMemStrdup(buf));
659 }
660 else if ( rc == -1 ) {
661 return ( NULL );
662 }
663 }
664 *bp = *ctxt->inrptr++;
665 if (*bp == '\n') {
666 *bp = 0;
667 return(xmlMemStrdup(buf));
668 }
669 if (*bp != '\r')
670 bp++;
671 }
672 buf[4095] = 0;
673 return(xmlMemStrdup(buf));
674 }
675
676
677 /**
678 * xmlNanoHTTPScanAnswer:
679 * @ctxt: an HTTP context
680 * @line: an HTTP header line
681 *
682 * Try to extract useful informations from the server answer.
683 * We currently parse and process:
684 * - The HTTP revision/ return code
685 * - The Content-Type, Mime-Type and charset used
686 * - The Location for redirect processing.
687 *
688 * Returns -1 in case of failure, the file descriptor number otherwise
689 */
690
691 static void
xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt,const char * line)692 xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
693 const char *cur = line;
694
695 if (line == NULL) return;
696
697 if (!strncmp(line, "HTTP/", 5)) {
698 int version = 0;
699 int ret = 0;
700
701 cur += 5;
702 while ((*cur >= '0') && (*cur <= '9')) {
703 version *= 10;
704 version += *cur - '0';
705 cur++;
706 }
707 if (*cur == '.') {
708 cur++;
709 if ((*cur >= '0') && (*cur <= '9')) {
710 version *= 10;
711 version += *cur - '0';
712 cur++;
713 }
714 while ((*cur >= '0') && (*cur <= '9'))
715 cur++;
716 } else
717 version *= 10;
718 if ((*cur != ' ') && (*cur != '\t')) return;
719 while ((*cur == ' ') || (*cur == '\t')) cur++;
720 if ((*cur < '0') || (*cur > '9')) return;
721 while ((*cur >= '0') && (*cur <= '9')) {
722 ret *= 10;
723 ret += *cur - '0';
724 cur++;
725 }
726 if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
727 ctxt->returnValue = ret;
728 ctxt->version = version;
729 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
730 const xmlChar *charset, *last, *mime;
731 cur += 13;
732 while ((*cur == ' ') || (*cur == '\t')) cur++;
733 if (ctxt->contentType != NULL)
734 xmlFree(ctxt->contentType);
735 ctxt->contentType = xmlMemStrdup(cur);
736 mime = (const xmlChar *) cur;
737 last = mime;
738 while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
739 (*last != ';') && (*last != ','))
740 last++;
741 if (ctxt->mimeType != NULL)
742 xmlFree(ctxt->mimeType);
743 ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
744 charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
745 if (charset != NULL) {
746 charset += 8;
747 last = charset;
748 while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
749 (*last != ';') && (*last != ','))
750 last++;
751 if (ctxt->encoding != NULL)
752 xmlFree(ctxt->encoding);
753 ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
754 }
755 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
756 const xmlChar *charset, *last, *mime;
757 cur += 12;
758 if (ctxt->contentType != NULL) return;
759 while ((*cur == ' ') || (*cur == '\t')) cur++;
760 ctxt->contentType = xmlMemStrdup(cur);
761 mime = (const xmlChar *) cur;
762 last = mime;
763 while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
764 (*last != ';') && (*last != ','))
765 last++;
766 if (ctxt->mimeType != NULL)
767 xmlFree(ctxt->mimeType);
768 ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
769 charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
770 if (charset != NULL) {
771 charset += 8;
772 last = charset;
773 while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
774 (*last != ';') && (*last != ','))
775 last++;
776 if (ctxt->encoding != NULL)
777 xmlFree(ctxt->encoding);
778 ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
779 }
780 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
781 cur += 9;
782 while ((*cur == ' ') || (*cur == '\t')) cur++;
783 if (ctxt->location != NULL)
784 xmlFree(ctxt->location);
785 if (*cur == '/') {
786 xmlChar *tmp_http = xmlStrdup(BAD_CAST "http://");
787 xmlChar *tmp_loc =
788 xmlStrcat(tmp_http, (const xmlChar *) ctxt->hostname);
789 ctxt->location =
790 (char *) xmlStrcat (tmp_loc, (const xmlChar *) cur);
791 } else {
792 ctxt->location = xmlMemStrdup(cur);
793 }
794 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) {
795 cur += 17;
796 while ((*cur == ' ') || (*cur == '\t')) cur++;
797 if (ctxt->authHeader != NULL)
798 xmlFree(ctxt->authHeader);
799 ctxt->authHeader = xmlMemStrdup(cur);
800 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) {
801 cur += 19;
802 while ((*cur == ' ') || (*cur == '\t')) cur++;
803 if (ctxt->authHeader != NULL)
804 xmlFree(ctxt->authHeader);
805 ctxt->authHeader = xmlMemStrdup(cur);
806 #ifdef HAVE_ZLIB_H
807 } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) {
808 cur += 17;
809 while ((*cur == ' ') || (*cur == '\t')) cur++;
810 if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) {
811 ctxt->usesGzip = 1;
812
813 ctxt->strm = xmlMalloc(sizeof(z_stream));
814
815 if (ctxt->strm != NULL) {
816 ctxt->strm->zalloc = Z_NULL;
817 ctxt->strm->zfree = Z_NULL;
818 ctxt->strm->opaque = Z_NULL;
819 ctxt->strm->avail_in = 0;
820 ctxt->strm->next_in = Z_NULL;
821
822 inflateInit2( ctxt->strm, 31 );
823 }
824 }
825 #endif
826 } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) {
827 cur += 15;
828 ctxt->ContentLength = strtol( cur, NULL, 10 );
829 }
830 }
831
832 /**
833 * xmlNanoHTTPConnectAttempt:
834 * @addr: a socket address structure
835 *
836 * Attempt a connection to the given IP:port endpoint. It forces
837 * non-blocking semantic on the socket, and allow 60 seconds for
838 * the host to answer.
839 *
840 * Returns -1 in case of failure, the file descriptor number otherwise
841 */
842
843 static SOCKET
xmlNanoHTTPConnectAttempt(struct sockaddr * addr)844 xmlNanoHTTPConnectAttempt(struct sockaddr *addr)
845 {
846 #ifndef HAVE_POLL_H
847 fd_set wfd;
848 #ifdef _WINSOCKAPI_
849 fd_set xfd;
850 #endif
851 struct timeval tv;
852 #else /* !HAVE_POLL_H */
853 struct pollfd p;
854 #endif /* !HAVE_POLL_H */
855 int status;
856
857 int addrlen;
858
859 SOCKET s;
860
861 #ifdef SUPPORT_IP6
862 if (addr->sa_family == AF_INET6) {
863 s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
864 addrlen = sizeof(struct sockaddr_in6);
865 } else
866 #endif
867 {
868 s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
869 addrlen = sizeof(struct sockaddr_in);
870 }
871 if (s == INVALID_SOCKET) {
872 #ifdef DEBUG_HTTP
873 perror("socket");
874 #endif
875 __xmlIOErr(XML_FROM_HTTP, 0, "socket failed\n");
876 return INVALID_SOCKET;
877 }
878 #ifdef _WINSOCKAPI_
879 {
880 u_long one = 1;
881
882 status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
883 }
884 #else /* _WINSOCKAPI_ */
885 #if defined(VMS)
886 {
887 int enable = 1;
888
889 status = ioctl(s, FIONBIO, &enable);
890 }
891 #else /* VMS */
892 #if defined(__BEOS__) && !defined(__HAIKU__)
893 {
894 bool noblock = true;
895
896 status =
897 setsockopt(s, SOL_SOCKET, SO_NONBLOCK, &noblock,
898 sizeof(noblock));
899 }
900 #else /* __BEOS__ */
901 if ((status = fcntl(s, F_GETFL, 0)) != -1) {
902 #ifdef O_NONBLOCK
903 status |= O_NONBLOCK;
904 #else /* O_NONBLOCK */
905 #ifdef F_NDELAY
906 status |= F_NDELAY;
907 #endif /* F_NDELAY */
908 #endif /* !O_NONBLOCK */
909 status = fcntl(s, F_SETFL, status);
910 }
911 if (status < 0) {
912 #ifdef DEBUG_HTTP
913 perror("nonblocking");
914 #endif
915 __xmlIOErr(XML_FROM_HTTP, 0, "error setting non-blocking IO\n");
916 closesocket(s);
917 return INVALID_SOCKET;
918 }
919 #endif /* !__BEOS__ */
920 #endif /* !VMS */
921 #endif /* !_WINSOCKAPI_ */
922
923 if (connect(s, addr, addrlen) == -1) {
924 switch (socket_errno()) {
925 case EINPROGRESS:
926 case EWOULDBLOCK:
927 break;
928 default:
929 __xmlIOErr(XML_FROM_HTTP, 0,
930 "error connecting to HTTP server");
931 closesocket(s);
932 return INVALID_SOCKET;
933 }
934 }
935 #ifndef HAVE_POLL_H
936 tv.tv_sec = timeout;
937 tv.tv_usec = 0;
938
939 #ifdef _MSC_VER
940 #pragma warning(push)
941 #pragma warning(disable: 4018)
942 #endif
943 #ifndef _WINSOCKAPI_
944 if (s > FD_SETSIZE)
945 return INVALID_SOCKET;
946 #endif
947 FD_ZERO(&wfd);
948 FD_SET(s, &wfd);
949
950 #ifdef _WINSOCKAPI_
951 FD_ZERO(&xfd);
952 FD_SET(s, &xfd);
953
954 switch (select(s + 1, NULL, &wfd, &xfd, &tv))
955 #else
956 switch (select(s + 1, NULL, &wfd, NULL, &tv))
957 #endif
958 #ifdef _MSC_VER
959 #pragma warning(pop)
960 #endif
961
962 #else /* !HAVE_POLL_H */
963 p.fd = s;
964 p.events = POLLOUT;
965 switch (poll(&p, 1, timeout * 1000))
966 #endif /* !HAVE_POLL_H */
967
968 {
969 case 0:
970 /* Time out */
971 __xmlIOErr(XML_FROM_HTTP, 0, "Connect attempt timed out");
972 closesocket(s);
973 return INVALID_SOCKET;
974 case -1:
975 /* Ermm.. ?? */
976 __xmlIOErr(XML_FROM_HTTP, 0, "Connect failed");
977 closesocket(s);
978 return INVALID_SOCKET;
979 }
980
981 #ifndef HAVE_POLL_H
982 if (FD_ISSET(s, &wfd)
983 #ifdef _WINSOCKAPI_
984 || FD_ISSET(s, &xfd)
985 #endif
986 )
987 #else /* !HAVE_POLL_H */
988 if (p.revents == POLLOUT)
989 #endif /* !HAVE_POLL_H */
990 {
991 XML_SOCKLEN_T len;
992
993 len = sizeof(status);
994 #ifdef SO_ERROR
995 if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &status, &len) <
996 0) {
997 /* Solaris error code */
998 __xmlIOErr(XML_FROM_HTTP, 0, "getsockopt failed\n");
999 return INVALID_SOCKET;
1000 }
1001 #endif
1002 if (status) {
1003 __xmlIOErr(XML_FROM_HTTP, 0,
1004 "Error connecting to remote host");
1005 closesocket(s);
1006 errno = status;
1007 return INVALID_SOCKET;
1008 }
1009 } else {
1010 /* pbm */
1011 __xmlIOErr(XML_FROM_HTTP, 0, "select failed\n");
1012 closesocket(s);
1013 return INVALID_SOCKET;
1014 }
1015
1016 return (s);
1017 }
1018
1019 /**
1020 * xmlNanoHTTPConnectHost:
1021 * @host: the host name
1022 * @port: the port number
1023 *
1024 * Attempt a connection to the given host:port endpoint. It tries
1025 * the multiple IP provided by the DNS if available.
1026 *
1027 * Returns -1 in case of failure, the file descriptor number otherwise
1028 */
1029
1030 static SOCKET
xmlNanoHTTPConnectHost(const char * host,int port)1031 xmlNanoHTTPConnectHost(const char *host, int port)
1032 {
1033 struct hostent *h;
1034 struct sockaddr *addr = NULL;
1035 struct in_addr ia;
1036 struct sockaddr_in sockin;
1037
1038 #ifdef SUPPORT_IP6
1039 struct in6_addr ia6;
1040 struct sockaddr_in6 sockin6;
1041 #endif
1042 int i;
1043 SOCKET s;
1044
1045 memset (&sockin, 0, sizeof(sockin));
1046 #ifdef SUPPORT_IP6
1047 memset (&sockin6, 0, sizeof(sockin6));
1048 #endif
1049
1050 #if !defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && defined(RES_USE_INET6)
1051 if (have_ipv6 ())
1052 {
1053 if (!(_res.options & RES_INIT))
1054 res_init();
1055 _res.options |= RES_USE_INET6;
1056 }
1057 #endif
1058
1059 #if defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && !defined(_WIN32)
1060 if (have_ipv6 ())
1061 #endif
1062 #if defined(HAVE_GETADDRINFO) && (defined(SUPPORT_IP6) || defined(_WIN32))
1063 {
1064 int status;
1065 struct addrinfo hints, *res, *result;
1066
1067 result = NULL;
1068 memset (&hints, 0,sizeof(hints));
1069 hints.ai_socktype = SOCK_STREAM;
1070
1071 status = getaddrinfo (host, NULL, &hints, &result);
1072 if (status) {
1073 __xmlIOErr(XML_FROM_HTTP, 0, "getaddrinfo failed\n");
1074 return INVALID_SOCKET;
1075 }
1076
1077 for (res = result; res; res = res->ai_next) {
1078 if (res->ai_family == AF_INET) {
1079 if (res->ai_addrlen > sizeof(sockin)) {
1080 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1081 freeaddrinfo (result);
1082 return INVALID_SOCKET;
1083 }
1084 memcpy (&sockin, res->ai_addr, res->ai_addrlen);
1085 sockin.sin_port = htons (port);
1086 addr = (struct sockaddr *)&sockin;
1087 #ifdef SUPPORT_IP6
1088 } else if (have_ipv6 () && (res->ai_family == AF_INET6)) {
1089 if (res->ai_addrlen > sizeof(sockin6)) {
1090 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1091 freeaddrinfo (result);
1092 return INVALID_SOCKET;
1093 }
1094 memcpy (&sockin6, res->ai_addr, res->ai_addrlen);
1095 sockin6.sin6_port = htons (port);
1096 addr = (struct sockaddr *)&sockin6;
1097 #endif
1098 } else
1099 continue; /* for */
1100
1101 s = xmlNanoHTTPConnectAttempt (addr);
1102 if (s != INVALID_SOCKET) {
1103 freeaddrinfo (result);
1104 return (s);
1105 }
1106 }
1107
1108 if (result)
1109 freeaddrinfo (result);
1110 }
1111 #endif
1112 #if defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && !defined(_WIN32)
1113 else
1114 #endif
1115 #if !defined(HAVE_GETADDRINFO) || !defined(_WIN32)
1116 {
1117 h = gethostbyname (host);
1118 if (h == NULL) {
1119
1120 /*
1121 * Okay, I got fed up by the non-portability of this error message
1122 * extraction code. it work on Linux, if it work on your platform
1123 * and one want to enable it, send me the defined(foobar) needed
1124 */
1125 #if defined(HAVE_NETDB_H) && defined(HOST_NOT_FOUND) && defined(linux)
1126 const char *h_err_txt = "";
1127
1128 switch (h_errno) {
1129 case HOST_NOT_FOUND:
1130 h_err_txt = "Authoritive host not found";
1131 break;
1132
1133 case TRY_AGAIN:
1134 h_err_txt =
1135 "Non-authoritive host not found or server failure.";
1136 break;
1137
1138 case NO_RECOVERY:
1139 h_err_txt =
1140 "Non-recoverable errors: FORMERR, REFUSED, or NOTIMP.";
1141 break;
1142
1143 case NO_ADDRESS:
1144 h_err_txt =
1145 "Valid name, no data record of requested type.";
1146 break;
1147
1148 default:
1149 h_err_txt = "No error text defined.";
1150 break;
1151 }
1152 __xmlIOErr(XML_FROM_HTTP, 0, h_err_txt);
1153 #else
1154 __xmlIOErr(XML_FROM_HTTP, 0, "Failed to resolve host");
1155 #endif
1156 return INVALID_SOCKET;
1157 }
1158
1159 for (i = 0; h->h_addr_list[i]; i++) {
1160 if (h->h_addrtype == AF_INET) {
1161 /* A records (IPv4) */
1162 if ((unsigned int) h->h_length > sizeof(ia)) {
1163 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1164 return INVALID_SOCKET;
1165 }
1166 memcpy (&ia, h->h_addr_list[i], h->h_length);
1167 sockin.sin_family = h->h_addrtype;
1168 sockin.sin_addr = ia;
1169 sockin.sin_port = (u_short)htons ((unsigned short)port);
1170 addr = (struct sockaddr *) &sockin;
1171 #ifdef SUPPORT_IP6
1172 } else if (have_ipv6 () && (h->h_addrtype == AF_INET6)) {
1173 /* AAAA records (IPv6) */
1174 if ((unsigned int) h->h_length > sizeof(ia6)) {
1175 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1176 return INVALID_SOCKET;
1177 }
1178 memcpy (&ia6, h->h_addr_list[i], h->h_length);
1179 sockin6.sin6_family = h->h_addrtype;
1180 sockin6.sin6_addr = ia6;
1181 sockin6.sin6_port = htons (port);
1182 addr = (struct sockaddr *) &sockin6;
1183 #endif
1184 } else
1185 break; /* for */
1186
1187 s = xmlNanoHTTPConnectAttempt (addr);
1188 if (s != INVALID_SOCKET)
1189 return (s);
1190 }
1191 }
1192 #endif
1193
1194 #ifdef DEBUG_HTTP
1195 xmlGenericError(xmlGenericErrorContext,
1196 "xmlNanoHTTPConnectHost: unable to connect to '%s'.\n",
1197 host);
1198 #endif
1199 return INVALID_SOCKET;
1200 }
1201
1202
1203 /**
1204 * xmlNanoHTTPOpen:
1205 * @URL: The URL to load
1206 * @contentType: if available the Content-Type information will be
1207 * returned at that location
1208 *
1209 * This function try to open a connection to the indicated resource
1210 * via HTTP GET.
1211 *
1212 * Returns NULL in case of failure, otherwise a request handler.
1213 * The contentType, if provided must be freed by the caller
1214 */
1215
1216 void*
xmlNanoHTTPOpen(const char * URL,char ** contentType)1217 xmlNanoHTTPOpen(const char *URL, char **contentType) {
1218 if (contentType != NULL) *contentType = NULL;
1219 return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, 0));
1220 }
1221
1222 /**
1223 * xmlNanoHTTPOpenRedir:
1224 * @URL: The URL to load
1225 * @contentType: if available the Content-Type information will be
1226 * returned at that location
1227 * @redir: if available the redirected URL will be returned
1228 *
1229 * This function try to open a connection to the indicated resource
1230 * via HTTP GET.
1231 *
1232 * Returns NULL in case of failure, otherwise a request handler.
1233 * The contentType, if provided must be freed by the caller
1234 */
1235
1236 void*
xmlNanoHTTPOpenRedir(const char * URL,char ** contentType,char ** redir)1237 xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) {
1238 if (contentType != NULL) *contentType = NULL;
1239 if (redir != NULL) *redir = NULL;
1240 return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL,0));
1241 }
1242
1243 /**
1244 * xmlNanoHTTPRead:
1245 * @ctx: the HTTP context
1246 * @dest: a buffer
1247 * @len: the buffer length
1248 *
1249 * This function tries to read @len bytes from the existing HTTP connection
1250 * and saves them in @dest. This is a blocking call.
1251 *
1252 * Returns the number of byte read. 0 is an indication of an end of connection.
1253 * -1 indicates a parameter error.
1254 */
1255 int
xmlNanoHTTPRead(void * ctx,void * dest,int len)1256 xmlNanoHTTPRead(void *ctx, void *dest, int len) {
1257 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1258 #ifdef HAVE_ZLIB_H
1259 int bytes_read = 0;
1260 int orig_avail_in;
1261 int z_ret;
1262 #endif
1263
1264 if (ctx == NULL) return(-1);
1265 if (dest == NULL) return(-1);
1266 if (len <= 0) return(0);
1267
1268 #ifdef HAVE_ZLIB_H
1269 if (ctxt->usesGzip == 1) {
1270 if (ctxt->strm == NULL) return(0);
1271
1272 ctxt->strm->next_out = dest;
1273 ctxt->strm->avail_out = len;
1274 ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr;
1275
1276 while (ctxt->strm->avail_out > 0 &&
1277 (ctxt->strm->avail_in > 0 || xmlNanoHTTPRecv(ctxt) > 0)) {
1278 orig_avail_in = ctxt->strm->avail_in =
1279 ctxt->inptr - ctxt->inrptr - bytes_read;
1280 ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read);
1281
1282 z_ret = inflate(ctxt->strm, Z_NO_FLUSH);
1283 bytes_read += orig_avail_in - ctxt->strm->avail_in;
1284
1285 if (z_ret != Z_OK) break;
1286 }
1287
1288 ctxt->inrptr += bytes_read;
1289 return(len - ctxt->strm->avail_out);
1290 }
1291 #endif
1292
1293 while (ctxt->inptr - ctxt->inrptr < len) {
1294 if (xmlNanoHTTPRecv(ctxt) <= 0) break;
1295 }
1296 if (ctxt->inptr - ctxt->inrptr < len)
1297 len = ctxt->inptr - ctxt->inrptr;
1298 memcpy(dest, ctxt->inrptr, len);
1299 ctxt->inrptr += len;
1300 return(len);
1301 }
1302
1303 /**
1304 * xmlNanoHTTPClose:
1305 * @ctx: the HTTP context
1306 *
1307 * This function closes an HTTP context, it ends up the connection and
1308 * free all data related to it.
1309 */
1310 void
xmlNanoHTTPClose(void * ctx)1311 xmlNanoHTTPClose(void *ctx) {
1312 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1313
1314 if (ctx == NULL) return;
1315
1316 xmlNanoHTTPFreeCtxt(ctxt);
1317 }
1318
1319 /**
1320 * xmlNanoHTTPMethodRedir:
1321 * @URL: The URL to load
1322 * @method: the HTTP method to use
1323 * @input: the input string if any
1324 * @contentType: the Content-Type information IN and OUT
1325 * @redir: the redirected URL OUT
1326 * @headers: the extra headers
1327 * @ilen: input length
1328 *
1329 * This function try to open a connection to the indicated resource
1330 * via HTTP using the given @method, adding the given extra headers
1331 * and the input buffer for the request content.
1332 *
1333 * Returns NULL in case of failure, otherwise a request handler.
1334 * The contentType, or redir, if provided must be freed by the caller
1335 */
1336
1337 void*
xmlNanoHTTPMethodRedir(const char * URL,const char * method,const char * input,char ** contentType,char ** redir,const char * headers,int ilen)1338 xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input,
1339 char **contentType, char **redir,
1340 const char *headers, int ilen ) {
1341 xmlNanoHTTPCtxtPtr ctxt;
1342 char *bp, *p;
1343 int blen;
1344 SOCKET ret;
1345 int nbRedirects = 0;
1346 char *redirURL = NULL;
1347 #ifdef DEBUG_HTTP
1348 int xmt_bytes;
1349 #endif
1350
1351 if (URL == NULL) return(NULL);
1352 if (method == NULL) method = "GET";
1353 xmlNanoHTTPInit();
1354
1355 retry:
1356 if (redirURL == NULL)
1357 ctxt = xmlNanoHTTPNewCtxt(URL);
1358 else {
1359 ctxt = xmlNanoHTTPNewCtxt(redirURL);
1360 ctxt->location = xmlMemStrdup(redirURL);
1361 }
1362
1363 if ( ctxt == NULL ) {
1364 return ( NULL );
1365 }
1366
1367 if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
1368 __xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Not a valid HTTP URI");
1369 xmlNanoHTTPFreeCtxt(ctxt);
1370 if (redirURL != NULL) xmlFree(redirURL);
1371 return(NULL);
1372 }
1373 if (ctxt->hostname == NULL) {
1374 __xmlIOErr(XML_FROM_HTTP, XML_HTTP_UNKNOWN_HOST,
1375 "Failed to identify host in URI");
1376 xmlNanoHTTPFreeCtxt(ctxt);
1377 if (redirURL != NULL) xmlFree(redirURL);
1378 return(NULL);
1379 }
1380 if (proxy) {
1381 blen = strlen(ctxt->hostname) * 2 + 16;
1382 ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
1383 }
1384 else {
1385 blen = strlen(ctxt->hostname);
1386 ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
1387 }
1388 if (ret == INVALID_SOCKET) {
1389 xmlNanoHTTPFreeCtxt(ctxt);
1390 if (redirURL != NULL) xmlFree(redirURL);
1391 return(NULL);
1392 }
1393 ctxt->fd = ret;
1394
1395 if (input == NULL)
1396 ilen = 0;
1397 else
1398 blen += 36;
1399
1400 if (headers != NULL)
1401 blen += strlen(headers) + 2;
1402 if (contentType && *contentType)
1403 /* reserve for string plus 'Content-Type: \r\n" */
1404 blen += strlen(*contentType) + 16;
1405 if (ctxt->query != NULL)
1406 /* 1 for '?' */
1407 blen += strlen(ctxt->query) + 1;
1408 blen += strlen(method) + strlen(ctxt->path) + 24;
1409 #ifdef HAVE_ZLIB_H
1410 /* reserve for possible 'Accept-Encoding: gzip' string */
1411 blen += 23;
1412 #endif
1413 if (ctxt->port != 80) {
1414 /* reserve space for ':xxxxx', incl. potential proxy */
1415 if (proxy)
1416 blen += 12;
1417 else
1418 blen += 6;
1419 }
1420 bp = (char*)xmlMallocAtomic(blen);
1421 if ( bp == NULL ) {
1422 xmlNanoHTTPFreeCtxt( ctxt );
1423 xmlHTTPErrMemory("allocating header buffer");
1424 return ( NULL );
1425 }
1426
1427 p = bp;
1428
1429 if (proxy) {
1430 if (ctxt->port != 80) {
1431 p += snprintf( p, blen - (p - bp), "%s http://%s:%d%s",
1432 method, ctxt->hostname,
1433 ctxt->port, ctxt->path );
1434 }
1435 else
1436 p += snprintf( p, blen - (p - bp), "%s http://%s%s", method,
1437 ctxt->hostname, ctxt->path);
1438 }
1439 else
1440 p += snprintf( p, blen - (p - bp), "%s %s", method, ctxt->path);
1441
1442 if (ctxt->query != NULL)
1443 p += snprintf( p, blen - (p - bp), "?%s", ctxt->query);
1444
1445 if (ctxt->port == 80) {
1446 p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n",
1447 ctxt->hostname);
1448 } else {
1449 p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s:%d\r\n",
1450 ctxt->hostname, ctxt->port);
1451 }
1452
1453 #ifdef HAVE_ZLIB_H
1454 p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n");
1455 #endif
1456
1457 if (contentType != NULL && *contentType)
1458 p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType);
1459
1460 if (headers != NULL)
1461 p += snprintf( p, blen - (p - bp), "%s", headers );
1462
1463 if (input != NULL)
1464 snprintf(p, blen - (p - bp), "Content-Length: %d\r\n\r\n", ilen );
1465 else
1466 snprintf(p, blen - (p - bp), "\r\n");
1467
1468 #ifdef DEBUG_HTTP
1469 xmlGenericError(xmlGenericErrorContext,
1470 "-> %s%s", proxy? "(Proxy) " : "", bp);
1471 if ((blen -= strlen(bp)+1) < 0)
1472 xmlGenericError(xmlGenericErrorContext,
1473 "ERROR: overflowed buffer by %d bytes\n", -blen);
1474 #endif
1475 ctxt->outptr = ctxt->out = bp;
1476 ctxt->state = XML_NANO_HTTP_WRITE;
1477 blen = strlen( ctxt->out );
1478 #ifdef DEBUG_HTTP
1479 xmt_bytes = xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1480 if ( xmt_bytes != blen )
1481 xmlGenericError( xmlGenericErrorContext,
1482 "xmlNanoHTTPMethodRedir: Only %d of %d %s %s\n",
1483 xmt_bytes, blen,
1484 "bytes of HTTP headers sent to host",
1485 ctxt->hostname );
1486 #else
1487 xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1488 #endif
1489
1490 if ( input != NULL ) {
1491 #ifdef DEBUG_HTTP
1492 xmt_bytes = xmlNanoHTTPSend( ctxt, input, ilen );
1493
1494 if ( xmt_bytes != ilen )
1495 xmlGenericError( xmlGenericErrorContext,
1496 "xmlNanoHTTPMethodRedir: Only %d of %d %s %s\n",
1497 xmt_bytes, ilen,
1498 "bytes of HTTP content sent to host",
1499 ctxt->hostname );
1500 #else
1501 xmlNanoHTTPSend( ctxt, input, ilen );
1502 #endif
1503 }
1504
1505 ctxt->state = XML_NANO_HTTP_READ;
1506
1507 while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1508 if (*p == 0) {
1509 ctxt->content = ctxt->inrptr;
1510 xmlFree(p);
1511 break;
1512 }
1513 xmlNanoHTTPScanAnswer(ctxt, p);
1514
1515 #ifdef DEBUG_HTTP
1516 xmlGenericError(xmlGenericErrorContext, "<- %s\n", p);
1517 #endif
1518 xmlFree(p);
1519 }
1520
1521 if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1522 (ctxt->returnValue < 400)) {
1523 #ifdef DEBUG_HTTP
1524 xmlGenericError(xmlGenericErrorContext,
1525 "\nRedirect to: %s\n", ctxt->location);
1526 #endif
1527 while ( xmlNanoHTTPRecv(ctxt) > 0 ) ;
1528 if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1529 nbRedirects++;
1530 if (redirURL != NULL)
1531 xmlFree(redirURL);
1532 redirURL = xmlMemStrdup(ctxt->location);
1533 xmlNanoHTTPFreeCtxt(ctxt);
1534 goto retry;
1535 }
1536 xmlNanoHTTPFreeCtxt(ctxt);
1537 if (redirURL != NULL) xmlFree(redirURL);
1538 #ifdef DEBUG_HTTP
1539 xmlGenericError(xmlGenericErrorContext,
1540 "xmlNanoHTTPMethodRedir: Too many redirects, aborting ...\n");
1541 #endif
1542 return(NULL);
1543 }
1544
1545 if (contentType != NULL) {
1546 if (ctxt->contentType != NULL)
1547 *contentType = xmlMemStrdup(ctxt->contentType);
1548 else
1549 *contentType = NULL;
1550 }
1551
1552 if ((redir != NULL) && (redirURL != NULL)) {
1553 *redir = redirURL;
1554 } else {
1555 if (redirURL != NULL)
1556 xmlFree(redirURL);
1557 if (redir != NULL)
1558 *redir = NULL;
1559 }
1560
1561 #ifdef DEBUG_HTTP
1562 if (ctxt->contentType != NULL)
1563 xmlGenericError(xmlGenericErrorContext,
1564 "\nCode %d, content-type '%s'\n\n",
1565 ctxt->returnValue, ctxt->contentType);
1566 else
1567 xmlGenericError(xmlGenericErrorContext,
1568 "\nCode %d, no content-type\n\n",
1569 ctxt->returnValue);
1570 #endif
1571
1572 return((void *) ctxt);
1573 }
1574
1575 /**
1576 * xmlNanoHTTPMethod:
1577 * @URL: The URL to load
1578 * @method: the HTTP method to use
1579 * @input: the input string if any
1580 * @contentType: the Content-Type information IN and OUT
1581 * @headers: the extra headers
1582 * @ilen: input length
1583 *
1584 * This function try to open a connection to the indicated resource
1585 * via HTTP using the given @method, adding the given extra headers
1586 * and the input buffer for the request content.
1587 *
1588 * Returns NULL in case of failure, otherwise a request handler.
1589 * The contentType, if provided must be freed by the caller
1590 */
1591
1592 void*
xmlNanoHTTPMethod(const char * URL,const char * method,const char * input,char ** contentType,const char * headers,int ilen)1593 xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
1594 char **contentType, const char *headers, int ilen) {
1595 return(xmlNanoHTTPMethodRedir(URL, method, input, contentType,
1596 NULL, headers, ilen));
1597 }
1598
1599 /**
1600 * xmlNanoHTTPFetch:
1601 * @URL: The URL to load
1602 * @filename: the filename where the content should be saved
1603 * @contentType: if available the Content-Type information will be
1604 * returned at that location
1605 *
1606 * This function try to fetch the indicated resource via HTTP GET
1607 * and save it's content in the file.
1608 *
1609 * Returns -1 in case of failure, 0 incase of success. The contentType,
1610 * if provided must be freed by the caller
1611 */
1612 int
xmlNanoHTTPFetch(const char * URL,const char * filename,char ** contentType)1613 xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1614 void *ctxt = NULL;
1615 char *buf = NULL;
1616 int fd;
1617 int len;
1618 int ret = 0;
1619
1620 if (filename == NULL) return(-1);
1621 ctxt = xmlNanoHTTPOpen(URL, contentType);
1622 if (ctxt == NULL) return(-1);
1623
1624 if (!strcmp(filename, "-"))
1625 fd = 0;
1626 else {
1627 fd = open(filename, O_CREAT | O_WRONLY, 00644);
1628 if (fd < 0) {
1629 xmlNanoHTTPClose(ctxt);
1630 if ((contentType != NULL) && (*contentType != NULL)) {
1631 xmlFree(*contentType);
1632 *contentType = NULL;
1633 }
1634 return(-1);
1635 }
1636 }
1637
1638 xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1639 if ( len > 0 ) {
1640 if (write(fd, buf, len) == -1) {
1641 ret = -1;
1642 }
1643 }
1644
1645 xmlNanoHTTPClose(ctxt);
1646 close(fd);
1647 return(ret);
1648 }
1649
1650 #ifdef LIBXML_OUTPUT_ENABLED
1651 /**
1652 * xmlNanoHTTPSave:
1653 * @ctxt: the HTTP context
1654 * @filename: the filename where the content should be saved
1655 *
1656 * This function saves the output of the HTTP transaction to a file
1657 * It closes and free the context at the end
1658 *
1659 * Returns -1 in case of failure, 0 incase of success.
1660 */
1661 int
xmlNanoHTTPSave(void * ctxt,const char * filename)1662 xmlNanoHTTPSave(void *ctxt, const char *filename) {
1663 char *buf = NULL;
1664 int fd;
1665 int len;
1666 int ret = 0;
1667
1668 if ((ctxt == NULL) || (filename == NULL)) return(-1);
1669
1670 if (!strcmp(filename, "-"))
1671 fd = 0;
1672 else {
1673 fd = open(filename, O_CREAT | O_WRONLY, 0666);
1674 if (fd < 0) {
1675 xmlNanoHTTPClose(ctxt);
1676 return(-1);
1677 }
1678 }
1679
1680 xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1681 if ( len > 0 ) {
1682 if (write(fd, buf, len) == -1) {
1683 ret = -1;
1684 }
1685 }
1686
1687 xmlNanoHTTPClose(ctxt);
1688 close(fd);
1689 return(ret);
1690 }
1691 #endif /* LIBXML_OUTPUT_ENABLED */
1692
1693 /**
1694 * xmlNanoHTTPReturnCode:
1695 * @ctx: the HTTP context
1696 *
1697 * Get the latest HTTP return code received
1698 *
1699 * Returns the HTTP return code for the request.
1700 */
1701 int
xmlNanoHTTPReturnCode(void * ctx)1702 xmlNanoHTTPReturnCode(void *ctx) {
1703 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1704
1705 if (ctxt == NULL) return(-1);
1706
1707 return(ctxt->returnValue);
1708 }
1709
1710 /**
1711 * xmlNanoHTTPAuthHeader:
1712 * @ctx: the HTTP context
1713 *
1714 * Get the authentication header of an HTTP context
1715 *
1716 * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate
1717 * header.
1718 */
1719 const char *
xmlNanoHTTPAuthHeader(void * ctx)1720 xmlNanoHTTPAuthHeader(void *ctx) {
1721 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1722
1723 if (ctxt == NULL) return(NULL);
1724
1725 return(ctxt->authHeader);
1726 }
1727
1728 /**
1729 * xmlNanoHTTPContentLength:
1730 * @ctx: the HTTP context
1731 *
1732 * Provides the specified content length from the HTTP header.
1733 *
1734 * Return the specified content length from the HTTP header. Note that
1735 * a value of -1 indicates that the content length element was not included in
1736 * the response header.
1737 */
1738 int
xmlNanoHTTPContentLength(void * ctx)1739 xmlNanoHTTPContentLength( void * ctx ) {
1740 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1741
1742 return ( ( ctxt == NULL ) ? -1 : ctxt->ContentLength );
1743 }
1744
1745 /**
1746 * xmlNanoHTTPRedir:
1747 * @ctx: the HTTP context
1748 *
1749 * Provides the specified redirection URL if available from the HTTP header.
1750 *
1751 * Return the specified redirection URL or NULL if not redirected.
1752 */
1753 const char *
xmlNanoHTTPRedir(void * ctx)1754 xmlNanoHTTPRedir( void * ctx ) {
1755 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1756
1757 return ( ( ctxt == NULL ) ? NULL : ctxt->location );
1758 }
1759
1760 /**
1761 * xmlNanoHTTPEncoding:
1762 * @ctx: the HTTP context
1763 *
1764 * Provides the specified encoding if specified in the HTTP headers.
1765 *
1766 * Return the specified encoding or NULL if not available
1767 */
1768 const char *
xmlNanoHTTPEncoding(void * ctx)1769 xmlNanoHTTPEncoding( void * ctx ) {
1770 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1771
1772 return ( ( ctxt == NULL ) ? NULL : ctxt->encoding );
1773 }
1774
1775 /**
1776 * xmlNanoHTTPMimeType:
1777 * @ctx: the HTTP context
1778 *
1779 * Provides the specified Mime-Type if specified in the HTTP headers.
1780 *
1781 * Return the specified Mime-Type or NULL if not available
1782 */
1783 const char *
xmlNanoHTTPMimeType(void * ctx)1784 xmlNanoHTTPMimeType( void * ctx ) {
1785 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1786
1787 return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType );
1788 }
1789
1790 /**
1791 * xmlNanoHTTPFetchContent:
1792 * @ctx: the HTTP context
1793 * @ptr: pointer to set to the content buffer.
1794 * @len: integer pointer to hold the length of the content
1795 *
1796 * Check if all the content was read
1797 *
1798 * Returns 0 if all the content was read and available, returns
1799 * -1 if received content length was less than specified or an error
1800 * occurred.
1801 */
1802 static int
xmlNanoHTTPFetchContent(void * ctx,char ** ptr,int * len)1803 xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ) {
1804 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1805
1806 int rc = 0;
1807 int cur_lgth;
1808 int rcvd_lgth;
1809 int dummy_int;
1810 char * dummy_ptr = NULL;
1811
1812 /* Dummy up return input parameters if not provided */
1813
1814 if ( len == NULL )
1815 len = &dummy_int;
1816
1817 if ( ptr == NULL )
1818 ptr = &dummy_ptr;
1819
1820 /* But can't work without the context pointer */
1821
1822 if ( ( ctxt == NULL ) || ( ctxt->content == NULL ) ) {
1823 *len = 0;
1824 *ptr = NULL;
1825 return ( -1 );
1826 }
1827
1828 rcvd_lgth = ctxt->inptr - ctxt->content;
1829
1830 while ( (cur_lgth = xmlNanoHTTPRecv( ctxt )) > 0 ) {
1831
1832 rcvd_lgth += cur_lgth;
1833 if ( (ctxt->ContentLength > 0) && (rcvd_lgth >= ctxt->ContentLength) )
1834 break;
1835 }
1836
1837 *ptr = ctxt->content;
1838 *len = rcvd_lgth;
1839
1840 if ( ( ctxt->ContentLength > 0 ) && ( rcvd_lgth < ctxt->ContentLength ) )
1841 rc = -1;
1842 else if ( rcvd_lgth == 0 )
1843 rc = -1;
1844
1845 return ( rc );
1846 }
1847
1848 #ifdef STANDALONE
main(int argc,char ** argv)1849 int main(int argc, char **argv) {
1850 char *contentType = NULL;
1851
1852 if (argv[1] != NULL) {
1853 if (argv[2] != NULL)
1854 xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1855 else
1856 xmlNanoHTTPFetch(argv[1], "-", &contentType);
1857 if (contentType != NULL) xmlFree(contentType);
1858 } else {
1859 xmlGenericError(xmlGenericErrorContext,
1860 "%s: minimal HTTP GET implementation\n", argv[0]);
1861 xmlGenericError(xmlGenericErrorContext,
1862 "\tusage %s [ URL [ filename ] ]\n", argv[0]);
1863 }
1864 xmlNanoHTTPCleanup();
1865 xmlMemoryDump();
1866 return(0);
1867 }
1868 #endif /* STANDALONE */
1869 #else /* !LIBXML_HTTP_ENABLED */
1870 #ifdef STANDALONE
1871 #include <stdio.h>
main(int argc,char ** argv)1872 int main(int argc, char **argv) {
1873 xmlGenericError(xmlGenericErrorContext,
1874 "%s : HTTP support not compiled in\n", argv[0]);
1875 return(0);
1876 }
1877 #endif /* STANDALONE */
1878 #endif /* LIBXML_HTTP_ENABLED */
1879 #define bottom_nanohttp
1880 #include "elfgcchack.h"
1881