1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3 * soup-tld.c
4 *
5 * Copyright (C) 2012 Igalia S.L.
6 */
7
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11
12 #include <string.h>
13
14 #include <glib/gi18n-lib.h>
15 #include <libpsl.h>
16
17 #include "soup-tld.h"
18 #include "soup.h"
19
20 /**
21 * SECTION:soup-tld
22 * @short_description: Top-Level Domain Utilities
23 *
24 * These functions can be used to parse hostnames to attempt to determine
25 * what part of the name belongs to the domain owner, and what part is
26 * simply a "public suffix" such as ".com".
27 */
28
29 static const char *soup_tld_get_base_domain_internal (const char *hostname,
30 GError **error);
31
32 /**
33 * soup_tld_get_base_domain:
34 * @hostname: a hostname
35 * @error: return location for a #GError, or %NULL to ignore
36 * errors. See #SoupTLDError for the available error codes
37 *
38 * Finds the base domain for a given @hostname. The base domain is
39 * composed by the top level domain (such as .org, .com, .co.uk, etc)
40 * plus the second level domain, for example for myhost.mydomain.com
41 * it will return mydomain.com.
42 *
43 * Note that %NULL will be returned for private URLs (those not ending
44 * with any well known TLD) because choosing a base domain for them
45 * would be totally arbitrary.
46 *
47 * Prior to libsoup 2.46, this function required that @hostname be in
48 * UTF-8 if it was an IDN. From 2.46 on, the name can be in either
49 * UTF-8 or ASCII format (and the return value will be in the same
50 * format).
51 *
52 * Returns: a pointer to the start of the base domain in @hostname. If
53 * an error occurs, %NULL will be returned and @error set.
54 *
55 * Since: 2.40
56 **/
57 const char *
soup_tld_get_base_domain(const char * hostname,GError ** error)58 soup_tld_get_base_domain (const char *hostname, GError **error)
59 {
60 g_return_val_if_fail (hostname, NULL);
61
62 return soup_tld_get_base_domain_internal (hostname, error);
63 }
64
65 static psl_ctx_t *
soup_psl_context(void)66 soup_psl_context (void)
67 {
68 static psl_ctx_t *psl = NULL;
69
70 if (!psl)
71 psl = psl_latest (NULL);
72
73 return psl;
74 }
75
76 /**
77 * soup_tld_domain_is_public_suffix:
78 * @domain: a domain name
79 *
80 * Looks whether the @domain passed as argument is a public domain
81 * suffix (.org, .com, .co.uk, etc) or not.
82 *
83 * Prior to libsoup 2.46, this function required that @domain be in
84 * UTF-8 if it was an IDN. From 2.46 on, the name can be in either
85 * UTF-8 or ASCII format.
86 *
87 * Returns: %TRUE if it is a public domain, %FALSE otherwise.
88 *
89 * Since: 2.40
90 **/
91 gboolean
soup_tld_domain_is_public_suffix(const char * domain)92 soup_tld_domain_is_public_suffix (const char *domain)
93 {
94 const psl_ctx_t* psl = soup_psl_context ();
95
96 g_return_val_if_fail (domain, FALSE);
97
98 if (!psl) {
99 g_warning ("soup-tld: There is no public-suffix data available.");
100 return FALSE;
101 }
102
103 return psl_is_public_suffix2 (psl, domain, PSL_TYPE_ANY | PSL_TYPE_NO_STAR_RULE);
104 }
105
106 /**
107 * SOUP_TLD_ERROR:
108 *
109 * The #GError domain for soup-tld-related errors.
110 *
111 * Since: 2.40
112 */
113 /**
114 * SoupTLDError:
115 * @SOUP_TLD_ERROR_INVALID_HOSTNAME: A hostname was syntactically
116 * invalid.
117 * @SOUP_TLD_ERROR_IS_IP_ADDRESS: The passed-in "hostname" was
118 * actually an IP address (and thus has no base domain or
119 * public suffix).
120 * @SOUP_TLD_ERROR_NOT_ENOUGH_DOMAINS: The passed-in hostname
121 * did not have enough components. Eg, calling
122 * soup_tld_get_base_domain() on <literal>"co.uk"</literal>.
123 * @SOUP_TLD_ERROR_NO_BASE_DOMAIN: The passed-in hostname has
124 * no recognized public suffix.
125 *
126 * Error codes for %SOUP_TLD_ERROR.
127 *
128 * Since: 2.40
129 */
130
131 GQuark
soup_tld_error_quark(void)132 soup_tld_error_quark (void)
133 {
134 static GQuark error;
135 if (!error)
136 error = g_quark_from_static_string ("soup_tld_error_quark");
137 return error;
138 }
139
140 static const char *
soup_tld_get_base_domain_internal(const char * hostname,GError ** error)141 soup_tld_get_base_domain_internal (const char *hostname, GError **error)
142 {
143 char *utf8_hostname = NULL;
144 const psl_ctx_t* psl = soup_psl_context ();
145 const char *registrable_domain, *unregistrable_domain;
146
147 if (!psl) {
148 g_set_error_literal (error, SOUP_TLD_ERROR,
149 SOUP_TLD_ERROR_NO_PSL_DATA,
150 _("No public-suffix list available."));
151 return NULL;
152 }
153
154 /* Valid hostnames neither start with a dot nor have more than one
155 * dot together.
156 */
157 if (*hostname == '.') {
158 g_set_error_literal (error, SOUP_TLD_ERROR,
159 SOUP_TLD_ERROR_INVALID_HOSTNAME,
160 _("Invalid hostname"));
161 return NULL;
162 }
163
164 if (g_hostname_is_ip_address (hostname)) {
165 g_set_error_literal (error, SOUP_TLD_ERROR,
166 SOUP_TLD_ERROR_IS_IP_ADDRESS,
167 _("Hostname is an IP address"));
168 return NULL;
169 }
170
171 if (g_hostname_is_ascii_encoded (hostname)) {
172 utf8_hostname = g_hostname_to_unicode (hostname);
173 if (!utf8_hostname) {
174 g_set_error_literal (error, SOUP_TLD_ERROR,
175 SOUP_TLD_ERROR_INVALID_HOSTNAME,
176 _("Invalid hostname"));
177 return NULL;
178 }
179 g_free (utf8_hostname);
180 }
181
182 /* Fetch the domain portion of the hostname and check whether
183 * it's a public domain. */
184 unregistrable_domain = psl_unregistrable_domain (psl, hostname);
185 if (!psl_is_public_suffix2 (psl, unregistrable_domain, PSL_TYPE_ANY | PSL_TYPE_NO_STAR_RULE)) {
186 g_set_error_literal (error, SOUP_TLD_ERROR,
187 SOUP_TLD_ERROR_NO_BASE_DOMAIN,
188 _("Hostname has no base domain"));
189 return NULL;
190 }
191
192 registrable_domain = psl_registrable_domain (psl, hostname);
193 if (!registrable_domain) {
194 g_set_error_literal (error, SOUP_TLD_ERROR,
195 SOUP_TLD_ERROR_NOT_ENOUGH_DOMAINS,
196 _("Not enough domains"));
197 return NULL;
198 }
199
200 return registrable_domain;
201 }
202