1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 2009-2011 Intel Corporation; author: H. Peter Anvin
4 *
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use,
9 * copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following
12 * conditions:
13 *
14 * The above copyright notice and this permission notice shall
15 * be included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 *
26 * ----------------------------------------------------------------------- */
27
28 /*
29 * urlparse.c
30 */
31
32 #include <string.h>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include "url.h"
36
37 /*
38 * Return the type of a URL without modifying the string
39 */
url_type(const char * url)40 enum url_type url_type(const char *url)
41 {
42 const char *q;
43
44 q = strchr(url, ':');
45 if (!q)
46 return URL_SUFFIX;
47
48 if (q[1] == '/' && q[2] == '/')
49 return URL_NORMAL;
50
51 if (q[1] == ':')
52 return URL_OLD_TFTP;
53
54 return URL_SUFFIX;
55 }
56
57 /*
58 * Decompose a URL into its components. This is done in-place;
59 * this routine does not allocate any additional storage. Freeing the
60 * original buffer frees all storage used.
61 */
parse_url(struct url_info * ui,char * url)62 void parse_url(struct url_info *ui, char *url)
63 {
64 char *p = url;
65 char *q, *r, *s;
66 int c;
67
68 memset(ui, 0, sizeof *ui);
69
70 q = strchr(p, ':');
71 if (q && (q[1] == '/' && q[2] == '/')) {
72 ui->type = URL_NORMAL;
73
74 ui->scheme = p;
75 *q = '\0';
76 p = q+3;
77
78 q = strchr(p, '/');
79 if (q) {
80 *q = '\0';
81 ui->path = q+1;
82 q = strchr(q+1, '#');
83 if (q)
84 *q = '\0';
85 } else {
86 ui->path = "";
87 }
88
89 r = strchr(p, '@');
90 if (r) {
91 ui->user = p;
92 *r = '\0';
93 s = strchr(p, ':');
94 if (s) {
95 *s = '\0';
96 ui->passwd = s+1;
97 }
98 p = r+1;
99 }
100
101 ui->host = p;
102 r = strchr(p, ':');
103 if (r) {
104 *r++ = '\0';
105 ui->port = 0;
106 while ((c = *r++)) {
107 c -= '0';
108 if (c > 9)
109 break;
110 ui->port = ui->port * 10 + c;
111 }
112 }
113 } else if (q && q[1] == ':') {
114 *q = '\0';
115 ui->scheme = "tftp";
116 ui->host = p;
117 ui->path = q+2;
118 ui->type = URL_OLD_TFTP;
119 } else {
120 ui->path = p;
121 ui->type = URL_SUFFIX;
122 }
123 }
124
125 /*
126 * Escapes unsafe characters in a URL.
127 * This does *not* escape things like query characters!
128 * Returns the number of characters in the total output.
129 */
url_escape_unsafe(char * output,const char * input,size_t bufsize)130 size_t url_escape_unsafe(char *output, const char *input, size_t bufsize)
131 {
132 static const char uchexchar[] = "0123456789ABCDEF";
133 const char *p;
134 unsigned char c;
135 char *q;
136 size_t n = 0;
137
138 q = output;
139 for (p = input; (c = *p); p++) {
140 if (c <= ' ' || c > '~') {
141 if (++n < bufsize) *q++ = '%';
142 if (++n < bufsize) *q++ = uchexchar[c >> 4];
143 if (++n < bufsize) *q++ = uchexchar[c & 15];
144 } else {
145 if (++n < bufsize) *q++ = c;
146 }
147 }
148
149 *q = '\0';
150 return n;
151 }
152
hexdigit(char c)153 static int hexdigit(char c)
154 {
155 if (c >= '0' && c <= '9')
156 return c - '0';
157 c |= 0x20;
158 if (c >= 'a' && c <= 'f')
159 return c - 'a' + 10;
160 return -1;
161 }
162
163 /*
164 * Unescapes a buffer, optionally ending at an *unescaped* terminator
165 * (like ; for TFTP). The unescaping is done in-place.
166 *
167 * If a terminator is reached, return a pointer to the first character
168 * after the terminator.
169 */
url_unescape(char * buffer,char terminator)170 char *url_unescape(char *buffer, char terminator)
171 {
172 char *p = buffer;
173 char *q = buffer;
174 unsigned char c;
175 int x, y;
176
177 while ((c = *p)) {
178 if (c == terminator) {
179 *q = '\0';
180 return p;
181 }
182 p++;
183 if (c == '%') {
184 x = hexdigit(p[0]);
185 if (x >= 0) {
186 y = hexdigit(p[1]);
187 if (y >= 0) {
188 *q++ = (x << 4) + y;
189 p += 2;
190 continue;
191 }
192 }
193 }
194 *q++ = c;
195 }
196 *q = '\0';
197 return NULL;
198 }
199
200 #ifdef URL_TEST
201
main(int argc,char * argv[])202 int main(int argc, char *argv[])
203 {
204 int i;
205 struct url_info url;
206
207 for (i = 1; i < argc; i++) {
208 parse_url(&url, argv[i]);
209 printf("scheme: %s\n"
210 "user: %s\n"
211 "passwd: %s\n"
212 "host: %s\n"
213 "port: %d\n"
214 "path: %s\n"
215 "type: %d\n",
216 url.scheme, url.user, url.passwd, url.host, url.port,
217 url.path, url.type);
218 }
219
220 return 0;
221 }
222
223 #endif
224