• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GIO - GLib Input, Output and Streaming Library
2  *
3  * Copyright (C) 2006-2007 Red Hat, Inc.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General
16  * Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
17  *
18  * Author: Alexander Larsson <alexl@redhat.com>
19  */
20 
21 #include "config.h"
22 
23 #include "gurifuncs.h"
24 
25 #include <glib/gstrfuncs.h>
26 #include <glib/gmessages.h>
27 #include <glib/gstring.h>
28 #include <glib/gmem.h>
29 
30 #include <string.h>
31 
32 #include "config.h"
33 
34 /**
35  * SECTION:gurifuncs
36  * @title: URI Functions
37  * @short_description: manipulating URIs
38  *
39  * Functions for manipulating Universal Resource Identifiers (URIs) as
40  * defined by
41  * [RFC 3986](http://www.ietf.org/rfc/rfc3986.txt).
42  * It is highly recommended that you have read and
43  * understand RFC 3986 for understanding this API.
44  */
45 
46 static int
unescape_character(const char * scanner)47 unescape_character (const char *scanner)
48 {
49   int first_digit;
50   int second_digit;
51 
52   first_digit = g_ascii_xdigit_value (*scanner++);
53   if (first_digit < 0)
54     return -1;
55 
56   second_digit = g_ascii_xdigit_value (*scanner++);
57   if (second_digit < 0)
58     return -1;
59 
60   return (first_digit << 4) | second_digit;
61 }
62 
63 /**
64  * g_uri_unescape_segment:
65  * @escaped_string: (nullable): A string, may be %NULL
66  * @escaped_string_end: (nullable): Pointer to end of @escaped_string, may be %NULL
67  * @illegal_characters: (nullable): An optional string of illegal characters not to be allowed, may be %NULL
68  *
69  * Unescapes a segment of an escaped string.
70  *
71  * If any of the characters in @illegal_characters or the character zero appears
72  * as an escaped character in @escaped_string then that is an error and %NULL
73  * will be returned. This is useful it you want to avoid for instance having a
74  * slash being expanded in an escaped path element, which might confuse pathname
75  * handling.
76  *
77  * Returns: an unescaped version of @escaped_string or %NULL on error.
78  * The returned string should be freed when no longer needed.  As a
79  * special case if %NULL is given for @escaped_string, this function
80  * will return %NULL.
81  *
82  * Since: 2.16
83  **/
84 char *
g_uri_unescape_segment(const char * escaped_string,const char * escaped_string_end,const char * illegal_characters)85 g_uri_unescape_segment (const char *escaped_string,
86 			const char *escaped_string_end,
87 			const char *illegal_characters)
88 {
89   const char *in;
90   char *out, *result;
91   gint character;
92 
93   if (escaped_string == NULL)
94     return NULL;
95 
96   if (escaped_string_end == NULL)
97     escaped_string_end = escaped_string + strlen (escaped_string);
98 
99   result = g_malloc (escaped_string_end - escaped_string + 1);
100 
101   out = result;
102   for (in = escaped_string; in < escaped_string_end; in++)
103     {
104       character = *in;
105 
106       if (*in == '%')
107 	{
108 	  in++;
109 
110 	  if (escaped_string_end - in < 2)
111 	    {
112 	      /* Invalid escaped char (to short) */
113 	      g_free (result);
114 	      return NULL;
115 	    }
116 
117 	  character = unescape_character (in);
118 
119 	  /* Check for an illegal character. We consider '\0' illegal here. */
120 	  if (character <= 0 ||
121 	      (illegal_characters != NULL &&
122 	       strchr (illegal_characters, (char)character) != NULL))
123 	    {
124 	      g_free (result);
125 	      return NULL;
126 	    }
127 
128 	  in++; /* The other char will be eaten in the loop header */
129 	}
130       *out++ = (char)character;
131     }
132 
133   *out = '\0';
134 
135   return result;
136 }
137 
138 /**
139  * g_uri_unescape_string:
140  * @escaped_string: an escaped string to be unescaped.
141  * @illegal_characters: (nullable): a string of illegal characters not to be
142  *      allowed, or %NULL.
143  *
144  * Unescapes a whole escaped string.
145  *
146  * If any of the characters in @illegal_characters or the character zero appears
147  * as an escaped character in @escaped_string then that is an error and %NULL
148  * will be returned. This is useful it you want to avoid for instance having a
149  * slash being expanded in an escaped path element, which might confuse pathname
150  * handling.
151  *
152  * Returns: an unescaped version of @escaped_string. The returned string
153  * should be freed when no longer needed.
154  *
155  * Since: 2.16
156  **/
157 char *
g_uri_unescape_string(const char * escaped_string,const char * illegal_characters)158 g_uri_unescape_string (const char *escaped_string,
159 		       const char *illegal_characters)
160 {
161   return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
162 }
163 
164 /**
165  * g_uri_parse_scheme:
166  * @uri: a valid URI.
167  *
168  * Gets the scheme portion of a URI string. RFC 3986 decodes the scheme as:
169  * |[
170  * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
171  * ]|
172  * Common schemes include "file", "http", "svn+ssh", etc.
173  *
174  * Returns: The "Scheme" component of the URI, or %NULL on error.
175  * The returned string should be freed when no longer needed.
176  *
177  * Since: 2.16
178  **/
179 char *
g_uri_parse_scheme(const char * uri)180 g_uri_parse_scheme (const char  *uri)
181 {
182   const char *p;
183   char c;
184 
185   g_return_val_if_fail (uri != NULL, NULL);
186 
187   /* From RFC 3986 Decodes:
188    * URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
189    */
190 
191   p = uri;
192 
193   /* Decode scheme:
194      scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
195   */
196 
197   if (!g_ascii_isalpha (*p))
198     return NULL;
199 
200   while (1)
201     {
202       c = *p++;
203 
204       if (c == ':')
205 	break;
206 
207       if (!(g_ascii_isalnum(c) ||
208 	    c == '+' ||
209 	    c == '-' ||
210 	    c == '.'))
211 	return NULL;
212     }
213 
214   return g_strndup (uri, p - uri - 1);
215 }
216 
217 /**
218  * g_uri_escape_string:
219  * @unescaped: the unescaped input string.
220  * @reserved_chars_allowed: (nullable): a string of reserved characters that
221  *      are allowed to be used, or %NULL.
222  * @allow_utf8: %TRUE if the result can include UTF-8 characters.
223  *
224  * Escapes a string for use in a URI.
225  *
226  * Normally all characters that are not "unreserved" (i.e. ASCII alphanumerical
227  * characters plus dash, dot, underscore and tilde) are escaped.
228  * But if you specify characters in @reserved_chars_allowed they are not
229  * escaped. This is useful for the "reserved" characters in the URI
230  * specification, since those are allowed unescaped in some portions of
231  * a URI.
232  *
233  * Returns: an escaped version of @unescaped. The returned string should be
234  * freed when no longer needed.
235  *
236  * Since: 2.16
237  **/
238 char *
g_uri_escape_string(const char * unescaped,const char * reserved_chars_allowed,gboolean allow_utf8)239 g_uri_escape_string (const char *unescaped,
240 		     const char  *reserved_chars_allowed,
241 		     gboolean     allow_utf8)
242 {
243   GString *s;
244 
245   g_return_val_if_fail (unescaped != NULL, NULL);
246 
247   s = g_string_sized_new (strlen (unescaped) + 10);
248 
249   g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
250 
251   return g_string_free (s, FALSE);
252 }
253