• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * soup-tld.c
4  *
5  * Copyright (C) 2012 Igalia S.L.
6  */
7 
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11 
12 #include <string.h>
13 
14 #include <glib/gi18n-lib.h>
15 #include <libpsl.h>
16 
17 #include "soup-tld.h"
18 #include "soup.h"
19 
20 /**
21  * SECTION:soup-tld
22  * @short_description: Top-Level Domain Utilities
23  *
24  * These functions can be used to parse hostnames to attempt to determine
25  * what part of the name belongs to the domain owner, and what part is
26  * simply a "public suffix" such as ".com".
27  */
28 
29 static const char *soup_tld_get_base_domain_internal (const char *hostname,
30 						      GError    **error);
31 
32 /**
33  * soup_tld_get_base_domain:
34  * @hostname: a hostname
35  * @error: return location for a #GError, or %NULL to ignore
36  *   errors. See #SoupTLDError for the available error codes
37  *
38  * Finds the base domain for a given @hostname. The base domain is
39  * composed by the top level domain (such as .org, .com, .co.uk, etc)
40  * plus the second level domain, for example for myhost.mydomain.com
41  * it will return mydomain.com.
42  *
43  * Note that %NULL will be returned for private URLs (those not ending
44  * with any well known TLD) because choosing a base domain for them
45  * would be totally arbitrary.
46  *
47  * Prior to libsoup 2.46, this function required that @hostname be in
48  * UTF-8 if it was an IDN. From 2.46 on, the name can be in either
49  * UTF-8 or ASCII format (and the return value will be in the same
50  * format).
51  *
52  * Returns: a pointer to the start of the base domain in @hostname. If
53  * an error occurs, %NULL will be returned and @error set.
54  *
55  * Since: 2.40
56  **/
57 const char *
soup_tld_get_base_domain(const char * hostname,GError ** error)58 soup_tld_get_base_domain (const char *hostname, GError **error)
59 {
60 	g_return_val_if_fail (hostname, NULL);
61 
62 	return soup_tld_get_base_domain_internal (hostname, error);
63 }
64 
65 static psl_ctx_t *
soup_psl_context(void)66 soup_psl_context (void)
67 {
68 	static psl_ctx_t *psl = NULL;
69 
70 	if (!psl)
71 		psl = psl_latest (NULL);
72 
73 	return psl;
74 }
75 
76 /**
77  * soup_tld_domain_is_public_suffix:
78  * @domain: a domain name
79  *
80  * Looks whether the @domain passed as argument is a public domain
81  * suffix (.org, .com, .co.uk, etc) or not.
82  *
83  * Prior to libsoup 2.46, this function required that @domain be in
84  * UTF-8 if it was an IDN. From 2.46 on, the name can be in either
85  * UTF-8 or ASCII format.
86  *
87  * Returns: %TRUE if it is a public domain, %FALSE otherwise.
88  *
89  * Since: 2.40
90  **/
91 gboolean
soup_tld_domain_is_public_suffix(const char * domain)92 soup_tld_domain_is_public_suffix (const char *domain)
93 {
94 	const psl_ctx_t* psl = soup_psl_context ();
95 
96 	g_return_val_if_fail (domain, FALSE);
97 
98 	if (!psl) {
99 		g_warning ("soup-tld: There is no public-suffix data available.");
100 		return FALSE;
101 	}
102 
103 	return psl_is_public_suffix2 (psl, domain, PSL_TYPE_ANY | PSL_TYPE_NO_STAR_RULE);
104 }
105 
106 /**
107  * SOUP_TLD_ERROR:
108  *
109  * The #GError domain for soup-tld-related errors.
110  *
111  * Since: 2.40
112  */
113 /**
114  * SoupTLDError:
115  * @SOUP_TLD_ERROR_INVALID_HOSTNAME: A hostname was syntactically
116  *   invalid.
117  * @SOUP_TLD_ERROR_IS_IP_ADDRESS: The passed-in "hostname" was
118  *   actually an IP address (and thus has no base domain or
119  *   public suffix).
120  * @SOUP_TLD_ERROR_NOT_ENOUGH_DOMAINS: The passed-in hostname
121  *   did not have enough components. Eg, calling
122  *   soup_tld_get_base_domain() on <literal>"co.uk"</literal>.
123  * @SOUP_TLD_ERROR_NO_BASE_DOMAIN: The passed-in hostname has
124  *   no recognized public suffix.
125  *
126  * Error codes for %SOUP_TLD_ERROR.
127  *
128  * Since: 2.40
129  */
130 
131 GQuark
soup_tld_error_quark(void)132 soup_tld_error_quark (void)
133 {
134 	static GQuark error;
135 	if (!error)
136 		error = g_quark_from_static_string ("soup_tld_error_quark");
137 	return error;
138 }
139 
140 static const char *
soup_tld_get_base_domain_internal(const char * hostname,GError ** error)141 soup_tld_get_base_domain_internal (const char *hostname, GError **error)
142 {
143 	char *utf8_hostname = NULL;
144 	const psl_ctx_t* psl = soup_psl_context ();
145 	const char *registrable_domain, *unregistrable_domain;
146 
147 	if (!psl) {
148 		g_set_error_literal (error, SOUP_TLD_ERROR,
149 				     SOUP_TLD_ERROR_NO_PSL_DATA,
150 				     _("No public-suffix list available."));
151 		return NULL;
152 	}
153 
154 	/* Valid hostnames neither start with a dot nor have more than one
155 	 * dot together.
156 	 */
157 	if (*hostname == '.') {
158 		g_set_error_literal (error, SOUP_TLD_ERROR,
159 				     SOUP_TLD_ERROR_INVALID_HOSTNAME,
160 				     _("Invalid hostname"));
161 		return NULL;
162 	}
163 
164 	if (g_hostname_is_ip_address (hostname)) {
165 		g_set_error_literal (error, SOUP_TLD_ERROR,
166 				     SOUP_TLD_ERROR_IS_IP_ADDRESS,
167 				     _("Hostname is an IP address"));
168 		return NULL;
169 	}
170 
171 	if (g_hostname_is_ascii_encoded (hostname)) {
172 		utf8_hostname = g_hostname_to_unicode (hostname);
173 		if (!utf8_hostname) {
174 			g_set_error_literal (error, SOUP_TLD_ERROR,
175 					     SOUP_TLD_ERROR_INVALID_HOSTNAME,
176 					     _("Invalid hostname"));
177 			return NULL;
178 		}
179 		g_free (utf8_hostname);
180 	}
181 
182 	/* Fetch the domain portion of the hostname and check whether
183 	 * it's a public domain. */
184 	unregistrable_domain = psl_unregistrable_domain (psl, hostname);
185 	if (!psl_is_public_suffix2 (psl, unregistrable_domain, PSL_TYPE_ANY | PSL_TYPE_NO_STAR_RULE)) {
186 		g_set_error_literal (error, SOUP_TLD_ERROR,
187 				     SOUP_TLD_ERROR_NO_BASE_DOMAIN,
188 				     _("Hostname has no base domain"));
189 		return NULL;
190 	}
191 
192 	registrable_domain = psl_registrable_domain (psl, hostname);
193 	if (!registrable_domain) {
194 		g_set_error_literal (error, SOUP_TLD_ERROR,
195 				     SOUP_TLD_ERROR_NOT_ENOUGH_DOMAINS,
196 				     _("Not enough domains"));
197 		return NULL;
198 	}
199 
200 	return registrable_domain;
201 }
202