1 /*
2 * Copyright(c) 2014-2018 Tim Ruehsen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 *
22 * This file is part of libpsl.
23 *
24 * Public Suffix List routines
25 *
26 * Changelog
27 * 19.03.2014 Tim Ruehsen created from libmget/cookie.c
28 *
29 */
30
31 #if HAVE_CONFIG_H
32 # include <config.h>
33 #endif
34
35 #if defined(__GNUC__) && defined(__GNUC_MINOR__)
36 # define GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
37 #else
38 # define GCC_VERSION_AT_LEAST(major, minor) 0
39 #endif
40
41 #if GCC_VERSION_AT_LEAST(2,95)
42 # define PSL_UNUSED __attribute__ ((unused))
43 #else
44 # define PSL_UNUSED
45 #endif
46
47 #include <sys/types.h>
48 #include <sys/stat.h>
49
50 #ifdef _WIN32
51 # include <winsock2.h>
52 # include <ws2tcpip.h>
53 #else
54 # include <sys/socket.h>
55 # include <netinet/in.h>
56 # include <unistd.h>
57 #endif
58
59 #if defined(_MSC_VER) && ! defined(ssize_t)
60 # include <basetsd.h>
61 typedef SSIZE_T ssize_t;
62 #endif
63
64 #include <stdio.h>
65 #include <stdlib.h>
66 #include <string.h>
67 #ifdef HAVE_STRINGS_H
68 # include <strings.h>
69 #endif
70 #include <ctype.h>
71 #include <time.h>
72 #include <errno.h>
73 #include <limits.h> /* for UINT_MAX */
74
75 #ifdef HAVE_NL_LANGINFO
76 # include <langinfo.h>
77 #endif
78
79 #ifndef _WIN32
80 # include <arpa/inet.h>
81 #endif
82
83 #ifdef HAVE_ALLOCA_H
84 # include <alloca.h>
85 #endif
86
87 #ifdef WITH_LIBICU
88 # include <unicode/uversion.h>
89 # include <unicode/ustring.h>
90 # include <unicode/uidna.h>
91 # include <unicode/ucnv.h>
92 #elif defined(WITH_LIBIDN2)
93 # include <iconv.h>
94 # include <idn2.h>
95 # include <unicase.h>
96 # include <unistr.h>
97 #elif defined(WITH_LIBIDN)
98 # include <iconv.h>
99 # include <stringprep.h>
100 # include <idna.h>
101 # include <unicase.h>
102 # include <unistr.h>
103 #endif
104
105 #ifndef WINICONV_CONST
106 # define WINICONV_CONST
107 #endif
108
109 #include <libpsl.h>
110
111 /**
112 * SECTION:libpsl
113 * @short_description: Public Suffix List library functions
114 * @title: libpsl
115 * @stability: Stable
116 * @include: libpsl.h
117 *
118 * [Public Suffix List](https://publicsuffix.org/) library functions.
119 *
120 */
121
122 #define countof(a) (sizeof(a)/sizeof(*(a)))
123
124 #define PRIV_PSL_FLAG_EXCEPTION (1<<0)
125 #define PRIV_PSL_FLAG_WILDCARD (1<<1)
126 #define PRIV_PSL_FLAG_ICANN (1<<2) /* entry of ICANN section */
127 #define PRIV_PSL_FLAG_PRIVATE (1<<3) /* entry of PRIVATE section */
128 #define PRIV_PSL_FLAG_PLAIN (1<<4) /* just used for PSL syntax checking */
129
130 typedef struct {
131 char
132 label_buf[48];
133 const char *
134 label;
135 unsigned short
136 length;
137 unsigned char
138 nlabels, /* number of labels */
139 flags;
140 } psl_entry_t;
141
142 /* stripped down version libmget vector routines */
143 typedef struct {
144 int
145 (*cmp)(const psl_entry_t **, const psl_entry_t **); /* comparison function */
146 psl_entry_t
147 **entry; /* pointer to array of pointers to elements */
148 int
149 max, /* allocated elements */
150 cur; /* number of elements in use */
151 } psl_vector_t;
152
153 struct psl_ctx_st {
154 psl_vector_t
155 *suffixes;
156 unsigned char
157 *dafsa;
158 size_t
159 dafsa_size;
160 int
161 nsuffixes,
162 nexceptions,
163 nwildcards;
164 unsigned
165 utf8 : 1; /* 1: data contains UTF-8 + punycode encoded rules */
166 };
167
168 /* include the PSL data generated by psl-make-dafsa */
169 #if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) || defined(BUILTIN_GENERATOR_LIBIDN)
170 #include "suffixes_dafsa.h"
171 #else
172 static const unsigned char kDafsa[] = "";
173 static time_t _psl_file_time = 0;
174 static int _psl_nsuffixes = 0;
175 static int _psl_nexceptions = 0;
176 static int _psl_nwildcards = 0;
177 static const char _psl_sha1_checksum[] = "";
178 static const char _psl_filename[] = "";
179 #endif
180
181 /* references to these PSLs will result in lookups to built-in data */
182 static const psl_ctx_t
183 builtin_psl;
184
185 #ifdef PSL_DISTFILE
186 static const char _psl_dist_filename[] = PSL_DISTFILE;
187 #else
188 static const char _psl_dist_filename[] = "";
189 #endif
190
vector_alloc(int max,int (* cmp)(const psl_entry_t **,const psl_entry_t **))191 static psl_vector_t *vector_alloc(int max, int (*cmp)(const psl_entry_t **, const psl_entry_t **))
192 {
193 psl_vector_t *v;
194
195 if (!(v = calloc(1, sizeof(psl_vector_t))))
196 return NULL;
197
198 if (!(v->entry = malloc(max * sizeof(psl_entry_t *)))) {
199 free(v);
200 return NULL;
201 }
202
203 v->max = max;
204 v->cmp = cmp;
205 return v;
206 }
207
vector_free(psl_vector_t ** v)208 static void vector_free(psl_vector_t **v)
209 {
210 if (v && *v) {
211 if ((*v)->entry) {
212 int it;
213
214 for (it = 0; it < (*v)->cur; it++)
215 free((*v)->entry[it]);
216
217 free((*v)->entry);
218 }
219 free(*v);
220 }
221 }
222
vector_get(const psl_vector_t * v,int pos)223 static psl_entry_t *vector_get(const psl_vector_t *v, int pos)
224 {
225 if (pos < 0 || !v || pos >= v->cur) return NULL;
226
227 return v->entry[pos];
228 }
229
230 /* the entries must be sorted by */
vector_find(const psl_vector_t * v,const psl_entry_t * elem)231 static int vector_find(const psl_vector_t *v, const psl_entry_t *elem)
232 {
233 if (v) {
234 int l, r, m;
235 int res;
236
237 /* binary search for element (exact match) */
238 for (l = 0, r = v->cur - 1; l <= r;) {
239 m = (l + r) / 2;
240 if ((res = v->cmp(&elem, (const psl_entry_t **)&(v->entry[m]))) > 0) l = m + 1;
241 else if (res < 0) r = m - 1;
242 else return m;
243 }
244 }
245
246 return -1; /* not found */
247 }
248
vector_add(psl_vector_t * v,const psl_entry_t * elem)249 static int vector_add(psl_vector_t *v, const psl_entry_t *elem)
250 {
251 if (v) {
252 void *elemp;
253
254 if (!(elemp = malloc(sizeof(psl_entry_t))))
255 return -1;
256
257 memcpy(elemp, elem, sizeof(psl_entry_t));
258
259 if (v->max == v->cur) {
260 void *m = realloc(v->entry, (v->max *= 2) * sizeof(psl_entry_t *));
261
262 if (m)
263 v->entry = m;
264 else {
265 free(elemp);
266 return -1;
267 }
268 }
269
270 v->entry[v->cur++] = elemp;
271 return v->cur - 1;
272 }
273
274 return -1;
275 }
276
vector_sort(psl_vector_t * v)277 static void vector_sort(psl_vector_t *v)
278 {
279 if (v && v->cmp)
280 qsort(v->entry, v->cur, sizeof(psl_vector_t **), (int(*)(const void *, const void *))v->cmp);
281 }
282
283 /* by this kind of sorting, we can easily see if a domain matches or not */
suffix_compare(const psl_entry_t * s1,const psl_entry_t * s2)284 static int suffix_compare(const psl_entry_t *s1, const psl_entry_t *s2)
285 {
286 int n;
287
288 if ((n = s2->nlabels - s1->nlabels))
289 return n; /* most labels first */
290
291 if ((n = s1->length - s2->length))
292 return n; /* shorter rules first */
293
294 return strcmp(s1->label ? s1->label : s1->label_buf, s2->label ? s2->label : s2->label_buf);
295 }
296
297 /* needed to sort array of pointers, given to qsort() */
suffix_compare_array(const psl_entry_t ** s1,const psl_entry_t ** s2)298 static int suffix_compare_array(const psl_entry_t **s1, const psl_entry_t **s2)
299 {
300 return suffix_compare(*s1, *s2);
301 }
302
suffix_init(psl_entry_t * suffix,const char * rule,size_t length)303 static int suffix_init(psl_entry_t *suffix, const char *rule, size_t length)
304 {
305 const char *src;
306 char *dst;
307
308 suffix->label = suffix->label_buf;
309
310 if (length >= sizeof(suffix->label_buf) - 1) {
311 suffix->nlabels = 0;
312 /* fprintf(stderr, "Suffix rule too long (%zd, ignored): %s\n", length, rule); */
313 return -1;
314 }
315
316 suffix->length = (unsigned char)length;
317
318 suffix->nlabels = 1;
319
320 for (dst = suffix->label_buf, src = rule; *src;) {
321 if (*src == '.')
322 suffix->nlabels++;
323 *dst++ = *src++;
324 }
325 *dst = 0;
326
327 return 0;
328 }
329
330 #if !defined(WITH_LIBIDN) && !defined(WITH_LIBIDN2) && !defined(WITH_LIBICU)
331 /*
332 * When configured without runtime IDNA support (./configure --disable-runtime), we need a pure ASCII
333 * representation of non-ASCII characters in labels as found in UTF-8 domain names.
334 * This is because the current DAFSA format used may only hold character values [21..127].
335 *
336 Code copied from http://www.nicemice.net/idn/punycode-spec.gz on
337 2011-01-04 with SHA-1 a966a8017f6be579d74a50a226accc7607c40133
338 labeled punycode-spec 1.0.3 (2006-Mar-24-Thu). It is modified for
339 libpsl by Tim Rühsen. License on the original code:
340
341 punycode-spec 1.0.3 (2006-Mar-23-Thu)
342 http://www.nicemice.net/idn/
343 Adam M. Costello
344 http://www.nicemice.net/amc/
345
346 B. Disclaimer and license
347
348 Regarding this entire document or any portion of it (including
349 the pseudocode and C code), the author makes no guarantees and
350 is not responsible for any damage resulting from its use. The
351 author grants irrevocable permission to anyone to use, modify,
352 and distribute it in any way that does not diminish the rights
353 of anyone else to use, modify, and distribute it, provided that
354 redistributed derivative works do not contain misleading author or
355 version information. Derivative works need not be licensed under
356 similar terms.
357
358 C. Punycode sample implementation
359
360 punycode-sample.c 2.0.0 (2004-Mar-21-Sun)
361 http://www.nicemice.net/idn/
362 Adam M. Costello
363 http://www.nicemice.net/amc/
364
365 This is ANSI C code (C89) implementing Punycode 1.0.x.
366 */
367 enum punycode_status {
368 punycode_success = 0,
369 punycode_bad_input = 1, /* Input is invalid. */
370 punycode_big_output = 2, /* Output would exceed the space provided. */
371 punycode_overflow = 3 /* Wider integers needed to process input. */
372 };
373
374 #ifdef PUNYCODE_UINT
375 typedef PUNYCODE_UINT punycode_uint;
376 #elif UINT_MAX >= (1 << 26) - 1
377 typedef unsigned int punycode_uint;
378 #else
379 typedef unsigned long punycode_uint;
380 #endif
381
382 /*** Bootstring parameters for Punycode ***/
383 enum {
384 base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700,
385 initial_bias = 72, initial_n = 0x80, delimiter = 0x2D
386 };
387
encode_digit(punycode_uint d)388 static char encode_digit(punycode_uint d)
389 {
390 return d + 22 + 75 * (d < 26);
391 /* 0..25 map to ASCII a..z or A..Z */
392 /* 26..35 map to ASCII 0..9 */
393 }
394 #define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26)
395 static const punycode_uint maxint = -1;
396
adapt(punycode_uint delta,punycode_uint numpoints,int firsttime)397 static punycode_uint adapt(punycode_uint delta, punycode_uint numpoints, int firsttime)
398 {
399 punycode_uint k;
400
401 delta = firsttime ? delta / damp : delta >> 1;
402 /* delta >> 1 is a faster way of doing delta / 2 */
403 delta += delta / numpoints;
404
405 for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) {
406 delta /= base - tmin;
407 }
408
409 return k + (base - tmin + 1) * delta / (delta + skew);
410 }
411
punycode_encode(size_t input_length_orig,const punycode_uint input[],size_t * output_length,char output[])412 static enum punycode_status punycode_encode(
413 size_t input_length_orig,
414 const punycode_uint input[],
415 size_t *output_length,
416 char output[])
417 {
418 punycode_uint input_length, n, delta, h, b, bias, j, m, q, k, t;
419 size_t out, max_out;
420
421 /* The Punycode spec assumes that the input length is the same type */
422 /* of integer as a code point, so we need to convert the size_t to */
423 /* a punycode_uint, which could overflow. */
424
425 if (input_length_orig > maxint)
426 return punycode_overflow;
427
428 input_length = (punycode_uint) input_length_orig;
429
430 /* Initialize the state: */
431
432 n = initial_n;
433 delta = 0;
434 out = 0;
435 max_out = *output_length;
436 bias = initial_bias;
437
438 /* Handle the basic code points: */
439 for (j = 0; j < input_length; ++j) {
440 if (input[j] < 0x80) {
441 if (max_out - out < 2)
442 return punycode_big_output;
443 output[out++] = (char) input[j];
444 }
445 /* else if (input[j] < n) return punycode_bad_input; */
446 /* (not needed for Punycode with unsigned code points) */
447 }
448
449 h = b = (punycode_uint) out;
450 /* cannot overflow because out <= input_length <= maxint */
451
452 /* h is the number of code points that have been handled, b is the */
453 /* number of basic code points, and out is the number of ASCII code */
454 /* points that have been output. */
455
456 if (b > 0)
457 output[out++] = delimiter;
458
459 /* Main encoding loop: */
460
461 while (h < input_length) {
462 /* All non-basic code points < n have been */
463 /* handled already. Find the next larger one: */
464
465 for (m = maxint, j = 0; j < input_length; ++j) {
466 /* if (basic(input[j])) continue; */
467 /* (not needed for Punycode) */
468 if (input[j] >= n && input[j] < m)
469 m = input[j];
470 }
471
472 /* Increase delta enough to advance the decoder's */
473 /* <n,i> state to <m,0>, but guard against overflow: */
474
475 if (m - n > (maxint - delta) / (h + 1))
476 return punycode_overflow;
477 delta += (m - n) * (h + 1);
478 n = m;
479
480 for (j = 0; j < input_length; ++j) {
481 /* Punycode does not need to check whether input[j] is basic: */
482 if (input[j] < n /* || basic(input[j]) */) {
483 if (++delta == 0)
484 return punycode_overflow;
485 }
486
487 if (input[j] == n) {
488 /* Represent delta as a generalized variable-length integer: */
489
490 for (q = delta, k = base;; k += base) {
491 if (out >= max_out)
492 return punycode_big_output;
493 t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */
494 k >= bias + tmax ? tmax : k - bias;
495 if (q < t)
496 break;
497 output[out++] = encode_digit(t + (q - t) % (base - t));
498 q = (q - t) / (base - t);
499 }
500
501 output[out++] = encode_digit(q);
502 bias = adapt(delta, h + 1, h == b);
503 delta = 0;
504 ++h;
505 }
506 }
507
508 ++delta, ++n;
509 }
510
511 *output_length = out;
512 return punycode_success;
513 }
514
utf8_to_utf32(const char * in,size_t inlen,punycode_uint * out,size_t outlen)515 static ssize_t utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out, size_t outlen)
516 {
517 size_t n = 0;
518 const unsigned char *s = (void *)in;
519 const unsigned char *e = (void *)(in + inlen);
520
521 if (!outlen)
522 return -1;
523
524 outlen--;
525
526 while (n < outlen) {
527 size_t inleft = e - s;
528
529 if (inleft >= 1 && (*s & 0x80) == 0) { /* 0xxxxxxx ASCII char */
530 out[n++] = *s;
531 s++;
532 } else if (inleft >= 2 && (*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ {
533 if ((s[1] & 0xC0) != 0x80)
534 return -1;
535 out[n++] = ((*s & 0x1F) << 6) | (s[1] & 0x3F);
536 s += 2;
537 } else if (inleft >= 3 && (*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ {
538 if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
539 return -1;
540 out[n++] = ((*s & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
541 s += 3;
542 } else if (inleft >= 4 && (*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ {
543 if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
544 return -1;
545 out[n++] = ((*s & 0x07) << 18) | ((s[1] & 0x3F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
546 s += 4;
547 } else if (!inleft) {
548 break;
549 } else
550 return -1;
551 }
552
553 return n;
554 }
555
mem_is_ascii(const char * s,size_t n)556 static int mem_is_ascii(const char *s, size_t n)
557 {
558 for (; n; n--) /* 'while(n--)' generates unsigned integer overflow on n = 0 */
559 if (*((unsigned char *)s++) >= 128)
560 return 0;
561
562 return 1;
563 }
564
domain_to_punycode(const char * domain,char * out,size_t outsize)565 static int domain_to_punycode(const char *domain, char *out, size_t outsize)
566 {
567 size_t outlen = 0, labellen;
568 punycode_uint input[256];
569 const char *label, *e;
570
571 for (e = label = domain; e; label = e + 1) {
572 e = strchr(label, '.');
573 labellen = e ? (size_t) (e - label) : strlen(label);
574 /* printf("s=%s inlen=%zd\n", label, labellen); */
575
576 if (mem_is_ascii(label, labellen)) {
577 if (outlen + labellen + (e != NULL) >= outsize)
578 return 1;
579
580 /* printf("outlen=%zd labellen=%zd\n", outlen, labellen); */
581 memcpy(out + outlen, label, labellen);
582 outlen += labellen;
583 } else {
584 ssize_t inputlen = 0;
585
586 if (outlen + labellen + (e != NULL) + 4 >= outsize)
587 return 1;
588
589 if ((inputlen = utf8_to_utf32(label, labellen, input, countof(input))) < 0)
590 return 1;
591
592 memcpy(out + outlen, "xn--", 4);
593 outlen += 4;
594
595 labellen = outsize - outlen;
596 /* printf("n=%zd space_left=%zd\n", n, labellen); */
597 if (punycode_encode(inputlen, input, &labellen, out + outlen))
598 return 1;
599 outlen += labellen;
600 }
601
602 if (e)
603 out[outlen++] = '.';
604 out[outlen] = 0;
605 }
606
607 return 0;
608 }
609 #endif
610
isspace_ascii(const char c)611 static int isspace_ascii(const char c)
612 {
613 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
614 }
615
str_is_ascii(const char * s)616 static int str_is_ascii(const char *s)
617 {
618 while (*s && *((unsigned char *)s) < 128) s++;
619
620 return !*s;
621 }
622
623 #if defined(WITH_LIBIDN)
624 /*
625 * Work around a libidn <= 1.30 vulnerability.
626 *
627 * The function checks for a valid UTF-8 character sequence before
628 * passing it to idna_to_ascii_8z().
629 *
630 * [1] https://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
631 * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
632 * [3] https://curl.haxx.se/mail/lib-2015-06/0143.html
633 */
utf8_is_valid(const char * utf8)634 static int utf8_is_valid(const char *utf8)
635 {
636 const unsigned char *s = (const unsigned char *) utf8;
637
638 while (*s) {
639 if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */
640 s++;
641 else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ {
642 if ((s[1] & 0xC0) != 0x80)
643 return 0;
644 s += 2;
645 } else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ {
646 if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
647 return 0;
648 s += 3;
649 } else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ {
650 if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
651 return 0;
652 s += 4;
653 } else
654 return 0;
655 }
656
657 return 1;
658 }
659 #endif
660
661 typedef void *psl_idna_t;
662
psl_idna_open(void)663 static psl_idna_t *psl_idna_open(void)
664 {
665 #if defined(WITH_LIBICU)
666 UErrorCode status = 0;
667 return (void *)uidna_openUTS46(UIDNA_USE_STD3_RULES | UIDNA_NONTRANSITIONAL_TO_ASCII, &status);
668 #endif
669 return NULL;
670 }
671
psl_idna_close(psl_idna_t * idna PSL_UNUSED)672 static void psl_idna_close(psl_idna_t *idna PSL_UNUSED)
673 {
674 #if defined(WITH_LIBICU)
675 if (idna)
676 uidna_close((UIDNA *)idna);
677 #endif
678 }
679
psl_idna_toASCII(psl_idna_t * idna PSL_UNUSED,const char * utf8,char ** ascii)680 static int psl_idna_toASCII(psl_idna_t *idna PSL_UNUSED, const char *utf8, char **ascii)
681 {
682 int ret = -1;
683
684 #if defined(WITH_LIBICU)
685 /* IDNA2008 UTS#46 punycode conversion */
686 if (idna) {
687 char lookupname_buf[128] = "", *lookupname = lookupname_buf;
688 UErrorCode status = 0;
689 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
690 UChar utf16_dst[128], utf16_src_buf[128];
691 UChar *utf16_src = utf16_src_buf;
692 int32_t utf16_src_length, bytes_written;
693 int32_t utf16_dst_length;
694
695 u_strFromUTF8(utf16_src, countof(utf16_src_buf), &utf16_src_length, utf8, -1, &status);
696 if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */
697
698 if (utf16_src_length >= (int) countof(utf16_src_buf)) {
699 utf16_src = malloc((utf16_src_length + 1) * sizeof(UChar));
700 if (!utf16_src) goto cleanup;
701
702 u_strFromUTF8(utf16_src, utf16_src_length, NULL, utf8, -1, &status);
703 if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */
704
705 utf16_src[utf16_src_length] = 0; /* u_strFromUTF8() doesn't 0-terminate if dest is filled up */
706 }
707
708 utf16_dst_length = uidna_nameToASCII((UIDNA *)idna, utf16_src, utf16_src_length, utf16_dst, countof(utf16_dst), &info, &status);
709 if (!U_SUCCESS(status)) goto cleanup; /* to ASCII conversion failed */
710
711 u_strToUTF8(lookupname, sizeof(lookupname_buf), &bytes_written, utf16_dst, utf16_dst_length, &status);
712 if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */
713
714 if (bytes_written >= (int) sizeof(lookupname_buf)) {
715 lookupname = malloc(bytes_written + 1);
716 if (!lookupname) goto cleanup;
717
718 u_strToUTF8(lookupname, bytes_written, NULL, utf16_dst, utf16_dst_length, &status);
719 if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */
720
721 lookupname[bytes_written] = 0; /* u_strToUTF8() doesn't 0-terminate if dest is filled up */
722 } else {
723 if (!(lookupname = strdup(lookupname)))
724 goto cleanup;
725 }
726
727 if (ascii) {
728 *ascii = lookupname;
729 lookupname = NULL;
730 }
731
732 ret = 0;
733
734 cleanup:
735 if (lookupname != lookupname_buf)
736 free(lookupname);
737 if (utf16_src != utf16_src_buf)
738 free(utf16_src);
739 }
740 #elif defined(WITH_LIBIDN2)
741 #if IDN2_VERSION_NUMBER >= 0x00140000
742 int rc;
743
744 /* IDN2_TRANSITIONAL automatically converts to lowercase
745 * IDN2_NFC_INPUT converts to NFC before toASCII conversion
746 * Since IDN2_TRANSITIONAL implicitly does NFC conversion, we don't need
747 * the additional IDN2_NFC_INPUT. But just for the unlikely case that the linked
748 * library is not matching the headers when building and it doesn't support TR46,
749 * we provide IDN2_NFC_INPUT. */
750
751 if ((rc = idn2_lookup_u8((uint8_t *)utf8, (uint8_t **)ascii, IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL)) == IDN2_OK)
752 ret = 0;
753 /* else
754 fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
755 #else
756 int rc;
757 uint8_t *lower;
758 size_t len = u8_strlen((uint8_t *)utf8) + 1;
759
760 /* we need a conversion to lowercase */
761 if (!(lower = u8_tolower((uint8_t *)utf8, len, 0, UNINORM_NFKC, NULL, &len))) {
762 /* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", utf8, errno); */
763 return -1;
764 }
765
766 if ((rc = idn2_lookup_u8(lower, (uint8_t **)ascii, 0)) == IDN2_OK) {
767 ret = 0;
768 } /* else
769 fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
770
771 free(lower);
772 #endif
773 #elif defined(WITH_LIBIDN)
774 int rc;
775
776 if (!utf8_is_valid(utf8)) {
777 /* fprintf(stderr, "Invalid UTF-8 sequence not converted: '%s'\n", utf8); */
778 return -1;
779 }
780
781 /* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */
782
783 if ((rc = idna_to_ascii_8z(utf8, ascii, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
784 ret = 0;
785 } /* else
786 fprintf(stderr, "toASCII failed (%d): %s\n", rc, idna_strerror(rc)); */
787 #else
788 char lookupname[128];
789
790 if (domain_to_punycode(utf8, lookupname, sizeof(lookupname)) == 0) {
791 if (ascii)
792 if ((*ascii = strdup(lookupname)))
793 ret = 0;
794 }
795 #endif
796
797 return ret;
798 }
799
add_punycode_if_needed(psl_idna_t * idna,psl_vector_t * v,psl_entry_t * e)800 static void add_punycode_if_needed(psl_idna_t *idna, psl_vector_t *v, psl_entry_t *e)
801 {
802 char *lookupname;
803
804 if (str_is_ascii(e->label_buf))
805 return;
806
807 if (psl_idna_toASCII(idna, e->label_buf, &lookupname) == 0) {
808 if (strcmp(e->label_buf, lookupname)) {
809 psl_entry_t suffix, *suffixp;
810
811 /* fprintf(stderr, "toASCII '%s' -> '%s'\n", e->label_buf, lookupname); */
812 if (suffix_init(&suffix, lookupname, strlen(lookupname)) == 0) {
813 suffix.flags = e->flags;
814 if ((suffixp = vector_get(v, vector_add(v, &suffix))))
815 suffixp->label = suffixp->label_buf; /* set label to changed address */
816 }
817 } /* else ignore */
818
819 free(lookupname);
820 }
821 }
822
823 /* prototypes */
824 int LookupStringInFixedSet(const unsigned char* graph, size_t length, const char* key, size_t key_length);
825 int GetUtfMode(const unsigned char *graph, size_t length);
826
is_public_suffix(const psl_ctx_t * psl,const char * domain,int type)827 static int is_public_suffix(const psl_ctx_t *psl, const char *domain, int type)
828 {
829 psl_entry_t suffix;
830 const char *p;
831 char *punycode = NULL;
832 int need_conversion = 0;
833
834 /* this function should be called without leading dots, just make sure */
835 if (*domain == '.')
836 domain++;
837
838 suffix.nlabels = 1;
839
840 for (p = domain; *p; p++) {
841 if (*p == '.')
842 suffix.nlabels++;
843 else if (*((unsigned char *)p) >= 128)
844 need_conversion = 1; /* in case domain is non-ascii we need a toASCII conversion */
845 }
846
847 if (suffix.nlabels == 1) {
848 /* TLD, this is the prevailing '*' match. If type excludes the '*' rule, continue.
849 */
850 if (!(type & PSL_TYPE_NO_STAR_RULE))
851 return 1;
852 }
853
854 type &= ~PSL_TYPE_NO_STAR_RULE;
855
856 if (psl->utf8 || psl == &builtin_psl)
857 need_conversion = 0;
858
859 if (need_conversion) {
860 psl_idna_t *idna = psl_idna_open();
861
862 if (psl_idna_toASCII(idna, domain, &punycode) == 0) {
863 suffix.label = punycode;
864 suffix.length = strlen(punycode);
865 } else {
866 /* fallback */
867
868 suffix.label = domain;
869 suffix.length = p - suffix.label;
870 }
871
872 psl_idna_close(idna);
873 } else {
874 suffix.label = domain;
875 suffix.length = p - suffix.label;
876 }
877
878 if (psl == &builtin_psl || psl->dafsa) {
879 size_t dafsa_size = psl == &builtin_psl ? sizeof(kDafsa) : psl->dafsa_size;
880 const unsigned char *dafsa = psl == &builtin_psl ? kDafsa : psl->dafsa;
881 int rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
882 if (rc != -1) {
883 /* check for correct rule type */
884 if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN))
885 goto suffix_no;
886 else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE))
887 goto suffix_no;
888
889 if (rc & PRIV_PSL_FLAG_EXCEPTION)
890 goto suffix_no;
891
892 /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
893 /* definitely a match, no matter if the found rule is a wildcard or not */
894 goto suffix_yes;
895 }
896 if ((suffix.label = strchr(suffix.label, '.'))) {
897 suffix.label++;
898 suffix.length = strlen(suffix.label);
899 suffix.nlabels--;
900
901 rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
902 if (rc != -1) {
903 /* check for correct rule type */
904 if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN))
905 goto suffix_no;
906 else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE))
907 goto suffix_no;
908
909 if (rc & PRIV_PSL_FLAG_WILDCARD)
910 goto suffix_yes;
911 }
912 }
913 } else {
914 psl_entry_t *rule = vector_get(psl->suffixes, 0);
915
916 if (!rule || rule->nlabels < suffix.nlabels - 1)
917 goto suffix_no;
918
919 rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix));
920
921 if (rule) {
922 /* check for correct rule type */
923 if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN))
924 goto suffix_no;
925 else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE))
926 goto suffix_no;
927
928 if (rule->flags & PRIV_PSL_FLAG_EXCEPTION)
929 goto suffix_no;
930
931 /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
932 /* definitely a match, no matter if the found rule is a wildcard or not */
933 goto suffix_yes;
934 }
935
936 if ((suffix.label = strchr(suffix.label, '.'))) {
937 int pos;
938
939 suffix.label++;
940 suffix.length = strlen(suffix.label);
941 suffix.nlabels--;
942
943 rule = vector_get(psl->suffixes, (pos = vector_find(psl->suffixes, &suffix)));
944
945 if (rule) {
946 /* check for correct rule type */
947 if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN))
948 goto suffix_no;
949 else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE))
950 goto suffix_no;
951
952 if (rule->flags & PRIV_PSL_FLAG_WILDCARD)
953 goto suffix_yes;
954 }
955 }
956 }
957
958 suffix_no:
959 if (punycode)
960 free(punycode);
961 return 0;
962
963 suffix_yes:
964 if (punycode)
965 free(punycode);
966 return 1;
967 }
968
969 /**
970 * psl_is_public_suffix:
971 * @psl: PSL context
972 * @domain: Domain string
973 *
974 * This function checks if @domain is a public suffix by the means of the
975 * [Mozilla Public Suffix List](https://publicsuffix.org).
976 *
977 * For cookie domain checking see psl_is_cookie_domain_acceptable().
978 *
979 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
980 * Other encodings likely result in incorrect return values.
981 * Use helper function psl_str_to_utf8lower() for normalization @domain.
982 *
983 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
984 * psl_builtin().
985 *
986 * Returns: 1 if domain is a public suffix, 0 if not.
987 *
988 * Since: 0.1
989 */
psl_is_public_suffix(const psl_ctx_t * psl,const char * domain)990 int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
991 {
992 if (!psl || !domain)
993 return 1;
994
995 return is_public_suffix(psl, domain, PSL_TYPE_ANY);
996 }
997
998 /**
999 * psl_is_public_suffix2:
1000 * @psl: PSL context
1001 * @domain: Domain string
1002 * @type: Domain type
1003 *
1004 * This function checks if @domain is a public suffix by the means of the
1005 * [Mozilla Public Suffix List](https://publicsuffix.org).
1006 *
1007 * @type specifies the PSL section where to perform the lookup. Valid values are
1008 * %PSL_TYPE_PRIVATE, %PSL_TYPE_ICANN, %PSL_TYPE_NO_STAR_RULE, and %PSL_TYPE_ANY.
1009 *
1010 * %PSL_TYPE_NO_STAR_RULE switches of the 'prevailing star rule' (see
1011 * [List](https://publicsuffix.org/list) under 'Algorithm' 2.).
1012 * Applying the flag means that TLDs not explicitly listed in the PSL are *not* treated as public suffixes.
1013 *
1014 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1015 * Other encodings likely result in incorrect return values.
1016 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1017 *
1018 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1019 * psl_builtin().
1020 *
1021 * Returns: 1 if domain is a public suffix, 0 if not.
1022 *
1023 * Since: 0.1
1024 */
psl_is_public_suffix2(const psl_ctx_t * psl,const char * domain,int type)1025 int psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type)
1026 {
1027 if (!psl || !domain)
1028 return 1;
1029
1030 return is_public_suffix(psl, domain, type);
1031 }
1032
1033 /**
1034 * psl_unregistrable_domain:
1035 * @psl: PSL context
1036 * @domain: Domain string
1037 *
1038 * This function finds the longest public suffix part of @domain by the means
1039 * of the [Mozilla Public Suffix List](https://publicsuffix.org).
1040 *
1041 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1042 * Other encodings likely result in incorrect return values.
1043 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1044 *
1045 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1046 * psl_builtin().
1047 *
1048 * Returns: Pointer to longest public suffix part of @domain or %NULL if @domain
1049 * does not contain a public suffix (or if @psl is %NULL).
1050 *
1051 * Since: 0.1
1052 */
psl_unregistrable_domain(const psl_ctx_t * psl,const char * domain)1053 const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
1054 {
1055 int nlabels = 0;
1056 const char *p;
1057
1058 if (!psl || !domain)
1059 return NULL;
1060
1061 /*
1062 * In the main loop we introduce a O(N^2) behavior to avoid code duplication.
1063 * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right.
1064 */
1065 for (p = domain + strlen(domain) - 1; p >= domain; p--) {
1066 if (*p == '.' && ++nlabels > 8) {
1067 domain = p + 1;
1068 break;
1069 }
1070 }
1071
1072 /*
1073 * We check from left to right to catch special PSL entries like 'forgot.his.name':
1074 * 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
1075 */
1076
1077 while (!is_public_suffix(psl, domain, 0)) {
1078 if ((domain = strchr(domain, '.')))
1079 domain++;
1080 else
1081 break; /* prevent endless loop if is_public_suffix() is broken. */
1082 }
1083
1084 return domain;
1085 }
1086
1087 /**
1088 * psl_registrable_domain:
1089 * @psl: PSL context
1090 * @domain: Domain string
1091 *
1092 * This function finds the shortest private suffix part of @domain by the means
1093 * of the [Mozilla Public Suffix List](https://publicsuffix.org).
1094 *
1095 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1096 * Other encodings likely result in incorrect return values.
1097 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1098 *
1099 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1100 * psl_builtin().
1101 *
1102 * Returns: Pointer to shortest private suffix part of @domain or %NULL if @domain
1103 * does not contain a private suffix (or if @psl is %NULL).
1104 *
1105 * Since: 0.1
1106 */
psl_registrable_domain(const psl_ctx_t * psl,const char * domain)1107 const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
1108 {
1109 const char *p, *regdom = NULL;
1110 int nlabels = 0;
1111
1112 if (!psl || !domain || *domain == '.')
1113 return NULL;
1114
1115 /*
1116 * In the main loop we introduce a O(N^2) behavior to avoid code duplication.
1117 * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right.
1118 */
1119 for (p = domain + strlen(domain) - 1; p >= domain; p--) {
1120 if (*p == '.' && ++nlabels > 8) {
1121 domain = p + 1;
1122 break;
1123 }
1124 }
1125
1126 /*
1127 * We check from left to right to catch special PSL entries like 'forgot.his.name':
1128 * 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
1129 */
1130
1131 while (!is_public_suffix(psl, domain, 0)) {
1132 if ((p = strchr(domain, '.'))) {
1133 regdom = domain;
1134 domain = p + 1;
1135 } else
1136 break; /* prevent endless loop if is_public_suffix() is broken. */
1137 }
1138
1139 return regdom;
1140 }
1141
1142 /**
1143 * psl_load_file:
1144 * @fname: Name of PSL file
1145 *
1146 * This function loads the public suffixes file named @fname.
1147 * To free the allocated resources, call psl_free().
1148 *
1149 * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international.
1150 *
1151 * Returns: Pointer to a PSL context or %NULL on failure.
1152 *
1153 * Since: 0.1
1154 */
psl_load_file(const char * fname)1155 psl_ctx_t *psl_load_file(const char *fname)
1156 {
1157 FILE *fp;
1158 psl_ctx_t *psl = NULL;
1159
1160 if (!fname)
1161 return NULL;
1162
1163 if ((fp = fopen(fname, "rb"))) {
1164 psl = psl_load_fp(fp);
1165 fclose(fp);
1166 }
1167
1168 return psl;
1169 }
1170
1171 /**
1172 * psl_load_fp:
1173 * @fp: %FILE pointer
1174 *
1175 * This function loads the public suffixes from a %FILE pointer.
1176 * To free the allocated resources, call psl_free().
1177 *
1178 * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international.
1179 *
1180 * Returns: Pointer to a PSL context or %NULL on failure.
1181 *
1182 * Since: 0.1
1183 */
psl_load_fp(FILE * fp)1184 psl_ctx_t *psl_load_fp(FILE *fp)
1185 {
1186 psl_ctx_t *psl;
1187 psl_entry_t suffix, *suffixp;
1188 char buf[256], *linep, *p;
1189 int type = 0, is_dafsa;
1190 psl_idna_t *idna;
1191
1192 if (!fp)
1193 return NULL;
1194
1195 if (!(psl = calloc(1, sizeof(psl_ctx_t))))
1196 return NULL;
1197
1198 /* read first line to allow ASCII / DAFSA detection */
1199 if (!(linep = fgets(buf, sizeof(buf) - 1, fp)))
1200 goto fail;
1201
1202 is_dafsa = strlen(buf) == 16 && !strncmp(buf, ".DAFSA@PSL_", 11);
1203
1204 if (is_dafsa) {
1205 void *m;
1206 size_t size = 65536, n, len = 0;
1207 int version = atoi(buf + 11);
1208
1209 if (version != 0)
1210 goto fail;
1211
1212 if (!(psl->dafsa = malloc(size)))
1213 goto fail;
1214
1215 memcpy(psl->dafsa, buf, len);
1216
1217 while ((n = fread(psl->dafsa + len, 1, size - len, fp)) > 0) {
1218 len += n;
1219 if (len >= size) {
1220 if (!(m = realloc(psl->dafsa, size *= 2)))
1221 goto fail;
1222 psl->dafsa = m;
1223 }
1224 }
1225
1226 /* release unused memory */
1227 if ((m = realloc(psl->dafsa, len)))
1228 psl->dafsa = m;
1229 else if (!len)
1230 psl->dafsa = NULL; /* realloc() just free'd psl->dafsa */
1231
1232 psl->dafsa_size = len;
1233 psl->utf8 = !!GetUtfMode(psl->dafsa, len);
1234
1235 return psl;
1236 }
1237
1238 idna = psl_idna_open();
1239
1240 /*
1241 * as of 02.11.2012, the list at https://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
1242 * as of 19.02.2014, the list at https://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
1243 * as of 07.10.2018, the list at https://publicsuffix.org/list/ contains ~8600 rules and 8 exceptions.
1244 */
1245 psl->suffixes = vector_alloc(8*1024, suffix_compare_array);
1246 psl->utf8 = 1; /* we put UTF-8 and punycode rules in the lookup vector */
1247
1248 do {
1249 while (isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
1250 if (!*linep) continue; /* skip empty lines */
1251
1252 if (*linep == '/' && linep[1] == '/') {
1253 if (!type) {
1254 if (strstr(linep + 2, "===BEGIN ICANN DOMAINS==="))
1255 type = PRIV_PSL_FLAG_ICANN;
1256 else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS==="))
1257 type = PRIV_PSL_FLAG_PRIVATE;
1258 }
1259 else if (type == PRIV_PSL_FLAG_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
1260 type = 0;
1261 else if (type == PRIV_PSL_FLAG_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
1262 type = 0;
1263
1264 continue; /* skip comments */
1265 }
1266
1267 /* parse suffix rule */
1268 for (p = linep; *linep && !isspace_ascii(*linep);) linep++;
1269 *linep = 0;
1270
1271 if (*p == '!') {
1272 p++;
1273 suffix.flags = PRIV_PSL_FLAG_EXCEPTION | type;
1274 psl->nexceptions++;
1275 } else if (*p == '*') {
1276 if (*++p != '.') {
1277 /* fprintf(stderr, "Unsupported kind of rule (ignored): %s\n", p - 1); */
1278 continue;
1279 }
1280 p++;
1281 /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
1282 suffix.flags = PRIV_PSL_FLAG_WILDCARD | PRIV_PSL_FLAG_PLAIN | type;
1283 psl->nwildcards++;
1284 psl->nsuffixes++;
1285 } else {
1286 suffix.flags = PRIV_PSL_FLAG_PLAIN | type;
1287 psl->nsuffixes++;
1288 }
1289
1290 if (suffix_init(&suffix, p, linep - p) == 0) {
1291 int index;
1292
1293 if ((index = vector_find(psl->suffixes, &suffix)) >= 0) {
1294 /* Found existing entry:
1295 * Combination of exception and plain rule is ambiguous
1296 * !foo.bar
1297 * foo.bar
1298 *
1299 * Allowed:
1300 * !foo.bar + *.foo.bar
1301 * foo.bar + *.foo.bar
1302 *
1303 * We do not check here, let's do it later.
1304 */
1305
1306 suffixp = vector_get(psl->suffixes, index);
1307 suffixp->flags |= suffix.flags;
1308 } else {
1309 /* New entry */
1310 suffixp = vector_get(psl->suffixes, vector_add(psl->suffixes, &suffix));
1311 }
1312
1313 if (suffixp) {
1314 suffixp->label = suffixp->label_buf; /* set label to changed address */
1315 add_punycode_if_needed(idna, psl->suffixes, suffixp);
1316 }
1317 }
1318 } while ((linep = fgets(buf, sizeof(buf), fp)));
1319
1320 vector_sort(psl->suffixes);
1321
1322 psl_idna_close(idna);
1323
1324 return psl;
1325
1326 fail:
1327 psl_free(psl);
1328 return NULL;
1329 }
1330
1331 /**
1332 * psl_free:
1333 * @psl: PSL context pointer
1334 *
1335 * This function frees the the PSL context that has been retrieved via
1336 * psl_load_fp() or psl_load_file().
1337 *
1338 * Since: 0.1
1339 */
psl_free(psl_ctx_t * psl)1340 void psl_free(psl_ctx_t *psl)
1341 {
1342 if (psl && psl != &builtin_psl) {
1343 vector_free(&psl->suffixes);
1344 free(psl->dafsa);
1345 free(psl);
1346 }
1347 }
1348
1349 /**
1350 * psl_builtin:
1351 *
1352 * This function returns the PSL context that has been generated and built in at compile-time.
1353 * You don't have to free the returned context explicitly.
1354 *
1355 * The builtin data also contains punycode entries, one for each international domain name.
1356 *
1357 * If the generation of built-in data has been disabled during compilation, %NULL will be returned.
1358 * When using the builtin psl context, you can provide UTF-8 (lowercase + NFKC) or ASCII/ACE (punycode)
1359 * representations of domains to functions like psl_is_public_suffix().
1360 *
1361 * Returns: Pointer to the built in PSL data or %NULL if this data is not available.
1362 *
1363 * Since: 0.1
1364 */
psl_builtin(void)1365 const psl_ctx_t *psl_builtin(void)
1366 {
1367 #if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) || defined(BUILTIN_GENERATOR_LIBIDN)
1368 return &builtin_psl;
1369 #else
1370 return NULL;
1371 #endif
1372 }
1373
1374 /**
1375 * psl_suffix_count:
1376 * @psl: PSL context pointer
1377 *
1378 * This function returns number of public suffixes maintained by @psl.
1379 * The number of exceptions within the Public Suffix List are not included.
1380 *
1381 * If the information is not available, the return value is -1 (since 0.19).
1382 * This is the case with DAFSA blobs or if @psl is %NULL.
1383 *
1384 * Returns: Number of public suffixes entries in PSL context or -1 if this information is not available.
1385 *
1386 * Since: 0.1
1387 */
psl_suffix_count(const psl_ctx_t * psl)1388 int psl_suffix_count(const psl_ctx_t *psl)
1389 {
1390 if (psl == &builtin_psl)
1391 return _psl_nsuffixes;
1392 else if (psl)
1393 return psl->dafsa ? -1 : psl->nsuffixes;
1394 else
1395 return -1;
1396 }
1397
1398 /**
1399 * psl_suffix_exception_count:
1400 * @psl: PSL context pointer
1401 *
1402 * This function returns number of public suffix exceptions maintained by @psl.
1403 *
1404 * If the information is not available, the return value is -1 (since 0.19).
1405 * This is the case with DAFSA blobs or if @psl is %NULL.
1406 *
1407 * Returns: Number of public suffix exceptions in PSL context or -1 if this information is not available.
1408 *
1409 * Since: 0.1
1410 */
psl_suffix_exception_count(const psl_ctx_t * psl)1411 int psl_suffix_exception_count(const psl_ctx_t *psl)
1412 {
1413 if (psl == &builtin_psl)
1414 return _psl_nexceptions;
1415 else if (psl)
1416 return psl->dafsa ? -1 : psl->nexceptions;
1417 else
1418 return -1;
1419 }
1420
1421 /**
1422 * psl_suffix_wildcard_count:
1423 * @psl: PSL context pointer
1424 *
1425 * This function returns number of public suffix wildcards maintained by @psl.
1426 *
1427 * If the information is not available, the return value is -1 (since 0.19).
1428 * This is the case with DAFSA blobs or if @psl is %NULL.
1429 *
1430 * Returns: Number of public suffix wildcards in PSL context or -1 if this information is not available.
1431 *
1432 * Since: 0.10.0
1433 */
psl_suffix_wildcard_count(const psl_ctx_t * psl)1434 int psl_suffix_wildcard_count(const psl_ctx_t *psl)
1435 {
1436 if (psl == &builtin_psl)
1437 return _psl_nwildcards;
1438 else if (psl)
1439 return psl->dafsa ? -1 : psl->nwildcards;
1440 else
1441 return -1;
1442 }
1443
1444 /**
1445 * psl_builtin_file_time:
1446 *
1447 * This function returns the mtime of the Public Suffix List file that has been built in.
1448 *
1449 * If the generation of built-in data has been disabled during compilation, 0 will be returned.
1450 *
1451 * Returns: time_t value or 0.
1452 *
1453 * Since: 0.1
1454 */
psl_builtin_file_time(void)1455 time_t psl_builtin_file_time(void)
1456 {
1457 return _psl_file_time;
1458 }
1459
1460 /**
1461 * psl_builtin_sha1sum:
1462 *
1463 * This function returns the SHA1 checksum of the Public Suffix List file that has been built in.
1464 * The returned string is in lowercase hex encoding, e.g. "2af1e9e3044eda0678bb05949d7cca2f769901d8".
1465 *
1466 * If the generation of built-in data has been disabled during compilation, an empty string will be returned.
1467 *
1468 * Returns: String containing SHA1 checksum or an empty string.
1469 *
1470 * Since: 0.1
1471 */
psl_builtin_sha1sum(void)1472 const char *psl_builtin_sha1sum(void)
1473 {
1474 return _psl_sha1_checksum;
1475 }
1476
1477 /**
1478 * psl_builtin_filename:
1479 *
1480 * This function returns the file name of the Public Suffix List file that has been built in.
1481 *
1482 * If the generation of built-in data has been disabled during compilation, an empty string will be returned.
1483 *
1484 * Returns: String containing the PSL file name or an empty string.
1485 *
1486 * Since: 0.1
1487 */
psl_builtin_filename(void)1488 const char *psl_builtin_filename(void)
1489 {
1490 return _psl_filename;
1491 }
1492
1493 /**
1494 * psl_builtin_outdated:
1495 *
1496 * This function checks if the built-in data is older than the file it has been created from.
1497 * If it is, it might be a good idea for the application to reload the PSL.
1498 * The mtime is taken as reference.
1499 *
1500 * If the PSL file does not exist, it is assumed that the built-in data is not outdated.
1501 *
1502 * Returns: 1 if the built-in is outdated, 0 otherwise.
1503 *
1504 * Since: 0.10.0
1505 */
psl_builtin_outdated(void)1506 int psl_builtin_outdated(void)
1507 {
1508 struct stat st;
1509
1510 if (stat(_psl_filename, &st) == 0 && st.st_mtime > _psl_file_time)
1511 return 1;
1512
1513 return 0;
1514 }
1515
1516 /**
1517 * psl_dist_filename:
1518 *
1519 * This function returns the file name of the distribution/system PSL data file.
1520 * This file will be considered by psl_latest().
1521 *
1522 * Return the filename that is set by ./configure --with-psl-distfile, or an empty string.
1523 *
1524 * Returns: String containing a PSL file name or an empty string.
1525 *
1526 * Since: 0.16
1527 */
psl_dist_filename(void)1528 const char *psl_dist_filename(void)
1529 {
1530 return _psl_dist_filename;
1531 }
1532
1533 /**
1534 * psl_get_version:
1535 *
1536 * Get libpsl version.
1537 *
1538 * Returns: String containing version of libpsl.
1539 *
1540 * Since: 0.2.5
1541 **/
psl_get_version(void)1542 const char *psl_get_version(void)
1543 {
1544 #ifdef WITH_LIBICU
1545 return PACKAGE_VERSION " (+libicu/" U_ICU_VERSION ")";
1546 #elif defined(WITH_LIBIDN2)
1547 return PACKAGE_VERSION " (+libidn2/" IDN2_VERSION ")";
1548 #elif defined(WITH_LIBIDN)
1549 return PACKAGE_VERSION " (+libidn/" STRINGPREP_VERSION ")";
1550 #else
1551 return PACKAGE_VERSION " (no IDNA support)";
1552 #endif
1553 }
1554
1555 /**
1556 * psl_check_version_number:
1557 * @version: Version number (hex) to check against.
1558 *
1559 * Check the given version number is at minimum the current library version number.
1560 * The version number must be a hexadecimal number like 0x000a01 (V0.10.1).
1561 *
1562 * Returns: Returns the library version number if the given version number is at least
1563 * the version of the library, else return 0; If the argument is 0, the function returns
1564 * the library version number without performing a check.
1565 *
1566 * Since: 0.11.0
1567 **/
psl_check_version_number(int version)1568 int psl_check_version_number(int version)
1569 {
1570 if (version) {
1571 int major = version >> 16;
1572 int minor = (version >> 8) & 0xFF;
1573 int patch = version & 0xFF;
1574
1575 if (major < PSL_VERSION_MAJOR
1576 || (major == PSL_VERSION_MAJOR && minor < PSL_VERSION_MINOR)
1577 || (major == PSL_VERSION_MAJOR && minor == PSL_VERSION_MINOR && patch < PSL_VERSION_PATCH))
1578 {
1579 return 0;
1580 }
1581 }
1582
1583 return PSL_VERSION_NUMBER;
1584 }
1585
1586 /* return whether hostname is an IP address or not */
isip(const char * hostname)1587 static int isip(const char *hostname)
1588 {
1589 #ifdef _WIN32
1590 WCHAR wName[INET6_ADDRSTRLEN+1];
1591
1592 struct sockaddr_in addr = {0};
1593 struct sockaddr_in6 addr6 = {0};
1594
1595 INT size = sizeof(addr);
1596 INT size6 = sizeof(addr6);
1597
1598 if (!MultiByteToWideChar(CP_UTF8, 0, hostname, -1, wName, countof(wName)))
1599 return 0;
1600
1601 return (WSAStringToAddressW(wName, AF_INET, NULL, (struct sockaddr *)&addr, &size) != SOCKET_ERROR) |
1602 (WSAStringToAddressW(wName, AF_INET6, NULL, (struct sockaddr *)&addr6, &size6) != SOCKET_ERROR);
1603 #else
1604 struct in_addr addr;
1605 struct in6_addr addr6;
1606
1607 return inet_pton(AF_INET, hostname, &addr) || inet_pton(AF_INET6, hostname, &addr6);
1608 #endif
1609 }
1610
1611 /**
1612 * psl_is_cookie_domain_acceptable:
1613 * @psl: PSL context pointer
1614 * @hostname: The request hostname.
1615 * @cookie_domain: The domain value from a cookie
1616 *
1617 * This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request
1618 * @hostname.
1619 *
1620 * For international domain names both, @hostname and @cookie_domain, have to be either in UTF-8 (lowercase + NFKC)
1621 * or in ASCII/ACE (punycode) format. Other encodings or mixing UTF-8 and punycode likely result in incorrect return values.
1622 *
1623 * Use helper function psl_str_to_utf8lower() for normalization of @hostname and @cookie_domain.
1624 *
1625 * Examples:
1626 * 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com',
1627 * but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix.
1628 *
1629 * 2. Cookie domain 'his.name' would be acceptable for hostname 'remember.his.name',
1630 * but NOT for 'forgot.his.name' since 'forgot.his.name' is a public suffix.
1631 *
1632 * Returns: 1 if acceptable, 0 if not acceptable.
1633 *
1634 * Since: 0.1
1635 */
psl_is_cookie_domain_acceptable(const psl_ctx_t * psl,const char * hostname,const char * cookie_domain)1636 int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain)
1637 {
1638 const char *p;
1639 size_t hostname_length, cookie_domain_length;
1640
1641 if (!psl || !hostname || !cookie_domain)
1642 return 0;
1643
1644 while (*cookie_domain == '.')
1645 cookie_domain++;
1646
1647 if (!strcmp(hostname, cookie_domain))
1648 return 1; /* an exact match is acceptable (and pretty common) */
1649
1650 if (isip(hostname))
1651 return 0; /* Hostname is an IP address and these must match fully (RFC 6265, 5.1.3) */
1652
1653 cookie_domain_length = strlen(cookie_domain);
1654 hostname_length = strlen(hostname);
1655
1656 if (cookie_domain_length >= hostname_length)
1657 return 0; /* cookie_domain is too long */
1658
1659 p = hostname + hostname_length - cookie_domain_length;
1660 if (!strcmp(p, cookie_domain) && p[-1] == '.') {
1661 /* OK, cookie_domain matches, but it must be longer than the longest public suffix in 'hostname' */
1662
1663 if (!(p = psl_unregistrable_domain(psl, hostname)))
1664 return 1;
1665
1666 if (cookie_domain_length > strlen(p))
1667 return 1;
1668 }
1669
1670 return 0;
1671 }
1672
1673 /**
1674 * psl_free_string:
1675 * @str: pointer to lowercase string returned by psl_str_to_utf8lower()
1676 *
1677 * This function free()'s the memory allocated by psl_str_to_utf8lower() when
1678 * returning a lowercase string
1679 *
1680 * Since: 0.19
1681 */
psl_free_string(char * str)1682 void psl_free_string(char *str)
1683 {
1684 if (str)
1685 free(str);
1686 }
1687
1688 /**
1689 * psl_str_to_utf8lower:
1690 * @str: string to convert
1691 * @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL
1692 * @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL
1693 * @lower: return value containing the converted string
1694 *
1695 * This helper function converts a string to UTF-8 lowercase + NFKC representation.
1696 * Lowercase + NFKC UTF-8 is needed as input to the domain checking functions.
1697 *
1698 * @lower stays unchanged on error.
1699 *
1700 * When returning PSL_SUCCESS, the return value 'lower' must be freed after usage.
1701 *
1702 * Returns: psl_error_t value.
1703 * PSL_SUCCESS: Success
1704 * PSL_ERR_INVALID_ARG: @str is a %NULL value.
1705 * PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding
1706 * PSL_ERR_TO_UTF16: Failed to convert @str to unicode
1707 * PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase
1708 * PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8
1709 * PSL_ERR_NO_MEM: Failed to allocate memory
1710 *
1711 * Since: 0.4
1712 */
psl_str_to_utf8lower(const char * str,const char * encoding PSL_UNUSED,const char * locale PSL_UNUSED,char ** lower)1713 psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding PSL_UNUSED, const char *locale PSL_UNUSED, char **lower)
1714 {
1715 int ret = PSL_ERR_INVALID_ARG;
1716
1717 if (!str)
1718 return PSL_ERR_INVALID_ARG;
1719
1720 /* shortcut to avoid costly conversion */
1721 if (str_is_ascii(str)) {
1722 if (lower) {
1723 char *p, *tmp;
1724
1725 if (!(tmp = strdup(str)))
1726 return PSL_ERR_NO_MEM;
1727
1728 *lower = tmp;
1729
1730 /* convert ASCII string to lowercase */
1731 for (p = *lower; *p; p++)
1732 if (isupper(*p))
1733 *p = tolower(*p);
1734 }
1735 return PSL_SUCCESS;
1736 }
1737
1738 #ifdef WITH_LIBICU
1739 do {
1740 size_t str_length = strlen(str);
1741 UErrorCode status = 0;
1742 UChar *utf16_dst, *utf16_lower;
1743 int32_t utf16_dst_length;
1744 char *utf8_lower;
1745 UConverter *uconv;
1746
1747 if (str_length < 256) {
1748 /* C89 allocation */
1749 utf16_dst = alloca(sizeof(UChar) * (str_length * 2 + 1));
1750 utf16_lower = alloca(sizeof(UChar) * (str_length * 2 + 1));
1751 utf8_lower = alloca(str_length * 6 + 1);
1752 } else {
1753 utf16_dst = malloc(sizeof(UChar) * (str_length * 2 + 1));
1754 utf16_lower = malloc(sizeof(UChar) * (str_length * 2 + 1));
1755 utf8_lower = malloc(str_length * 6 + 1);
1756
1757 if (!utf16_dst || !utf16_lower || !utf8_lower) {
1758 ret = PSL_ERR_NO_MEM;
1759 goto out;
1760 }
1761 }
1762
1763 uconv = ucnv_open(encoding, &status);
1764 if (U_SUCCESS(status)) {
1765 utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, str_length * 2 + 1, str, str_length, &status);
1766 ucnv_close(uconv);
1767
1768 if (U_SUCCESS(status)) {
1769 int32_t utf16_lower_length = u_strToLower(utf16_lower, str_length * 2 + 1, utf16_dst, utf16_dst_length, locale, &status);
1770 if (U_SUCCESS(status)) {
1771 u_strToUTF8(utf8_lower, str_length * 6 + 1, NULL, utf16_lower, utf16_lower_length, &status);
1772 if (U_SUCCESS(status)) {
1773 ret = PSL_SUCCESS;
1774 if (lower) {
1775 char *tmp = strdup(utf8_lower);
1776
1777 if (tmp)
1778 *lower = tmp;
1779 else
1780 ret = PSL_ERR_NO_MEM;
1781 }
1782 } else {
1783 ret = PSL_ERR_TO_UTF8;
1784 /* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
1785 }
1786 } else {
1787 ret = PSL_ERR_TO_LOWER;
1788 /* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */
1789 }
1790 } else {
1791 ret = PSL_ERR_TO_UTF16;
1792 /* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */
1793 }
1794 } else {
1795 ret = PSL_ERR_CONVERTER;
1796 /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */
1797 }
1798 out:
1799 if (str_length >= 256) {
1800 free(utf16_dst);
1801 free(utf16_lower);
1802 free(utf8_lower);
1803 }
1804 } while (0);
1805 #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
1806 do {
1807 /* find out local charset encoding */
1808 if (!encoding) {
1809 #ifdef HAVE_NL_LANGINFO
1810 encoding = nl_langinfo(CODESET);
1811 #elif defined _WIN32
1812 static char buf[16];
1813 snprintf(buf, sizeof(buf), "CP%u", GetACP());
1814 encoding = buf;
1815 #endif
1816 if (!encoding || !*encoding)
1817 encoding = "ASCII";
1818 }
1819
1820 /* convert to UTF-8 */
1821 if (strcasecmp(encoding, "utf-8")) {
1822 iconv_t cd = iconv_open("utf-8", encoding);
1823
1824 if (cd != (iconv_t)-1) {
1825 char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */
1826 size_t tmp_len = strlen(str) + 1;
1827 size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len;
1828 char *dst = malloc(dst_len + 1), *dst_tmp = dst;
1829
1830 if (!dst) {
1831 ret = PSL_ERR_NO_MEM;
1832 }
1833 else if (iconv(cd, (WINICONV_CONST char **)&tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1
1834 && iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1)
1835 {
1836 /* start size for u8_tolower internal memory allocation.
1837 * u8_tolower() does not terminate the result string. we have 0 byte included in above tmp_len
1838 * and thus in len. */
1839 size_t len = dst_len - dst_len_tmp;
1840
1841 if ((tmp = (char *)u8_tolower((uint8_t *)dst, len, 0, UNINORM_NFKC, NULL, &len))) {
1842 ret = PSL_SUCCESS;
1843 if (lower) {
1844 *lower = tmp;
1845 tmp = NULL;
1846 } else
1847 free(tmp);
1848 } else {
1849 ret = PSL_ERR_TO_LOWER;
1850 /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
1851 }
1852 } else {
1853 ret = PSL_ERR_TO_UTF8;
1854 /* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
1855 }
1856
1857 free(dst);
1858 iconv_close(cd);
1859 } else {
1860 ret = PSL_ERR_TO_UTF8;
1861 /* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
1862 }
1863 } else {
1864 /* we need a conversion to lowercase */
1865 uint8_t *tmp;
1866
1867 /* start size for u8_tolower internal memory allocation.
1868 * u8_tolower() does not terminate the result string, so include terminating 0 byte in len. */
1869 size_t len = u8_strlen((uint8_t *)str) + 1;
1870
1871 if ((tmp = u8_tolower((uint8_t *)str, len, 0, UNINORM_NFKC, NULL, &len))) {
1872 ret = PSL_SUCCESS;
1873 if (lower) {
1874 *lower = (char*)tmp;
1875 tmp = NULL;
1876 } else
1877 free(tmp);
1878 } else {
1879 ret = PSL_ERR_TO_LOWER;
1880 /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
1881 }
1882 }
1883
1884 } while (0);
1885 #endif
1886
1887 return ret;
1888 }
1889
1890 /* if file is newer than the builtin data, insert it reverse sorted by mtime */
insert_file(const char * fname,const char ** psl_fname,time_t * psl_mtime,int n)1891 static int insert_file(const char *fname, const char **psl_fname, time_t *psl_mtime, int n)
1892 {
1893 struct stat st;
1894 int it;
1895
1896 if (fname && *fname && stat(fname, &st) == 0 && st.st_mtime > _psl_file_time) {
1897 /* add file name and mtime to end of array */
1898 psl_fname[n] = fname;
1899 psl_mtime[n++] = st.st_mtime;
1900
1901 /* move the new entry to it's correct position */
1902 for (it = n - 2; it >= 0 && st.st_mtime > psl_mtime[it]; it--) {
1903 psl_fname[it + 1] = psl_fname[it];
1904 psl_mtime[it + 1] = psl_mtime[it];
1905 psl_fname[it] = fname;
1906 psl_mtime[it] = st.st_mtime;
1907 }
1908 }
1909
1910 return n;
1911 }
1912
1913 /**
1914 * psl_latest:
1915 * @fname: Name of PSL file or %NULL
1916 *
1917 * This function loads the the latest available PSL data from either
1918 * - @fname (application specific filename, may be %NULL)
1919 * - location specified during built-time (filename from ./configure --with-psl-distfile)
1920 * - built-in PSL data (generated from ./configure --with-psl-file)
1921 * - location of built-in data (filename from ./configure --with-psl-file)
1922 *
1923 * If none of the above is available, the function returns %NULL.
1924 *
1925 * To free the allocated resources, call psl_free().
1926 *
1927 * Returns: Pointer to a PSL context or %NULL on failure.
1928 *
1929 * Since: 0.16
1930 */
psl_latest(const char * fname)1931 psl_ctx_t *psl_latest(const char *fname)
1932 {
1933 psl_ctx_t *psl;
1934 const char *psl_fname[3];
1935 time_t psl_mtime[3];
1936 int it, ntimes;
1937
1938 psl_fname[0] = NULL; /* silence gcc 6.2 false warning */
1939
1940 /* create array of PSL files reverse sorted by mtime (latest first) */
1941 ntimes = insert_file(fname, psl_fname, psl_mtime, 0);
1942 ntimes = insert_file(_psl_dist_filename, psl_fname, psl_mtime, ntimes);
1943 ntimes = insert_file(_psl_filename, psl_fname, psl_mtime, ntimes);
1944
1945 /* load PSL data from the latest file, falling back to the second recent, ... */
1946 for (psl = NULL, it = 0; it < ntimes; it++) {
1947 if (psl_mtime[it] > _psl_file_time)
1948 if ((psl = psl_load_file(psl_fname[it])))
1949 break;
1950 }
1951
1952 /* if file loading failed or there is no file newer than the builtin data,
1953 * then return the builtin data. */
1954 return psl ? psl : (psl_ctx_t *) psl_builtin();
1955 }
1956