1 /* -*- mode: C; c-file-style: "gnu" -*- */
2 /* xdgmimeglob.c: Private file. Datastructure for storing the globs.
3 *
4 * More info can be found at http://www.freedesktop.org/standards/
5 *
6 * Copyright (C) 2003 Red Hat, Inc.
7 * Copyright (C) 2003 Jonathan Blandford <jrb@alum.mit.edu>
8 *
9 * Licensed under the Academic Free License version 2.0
10 * Or under the following terms:
11 *
12 * This library is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
24 */
25
26 #include "config.h"
27
28 #include "xdgmimeglob.h"
29 #include "xdgmimeint.h"
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <assert.h>
33 #include <string.h>
34 #include <fnmatch.h>
35
36 #ifndef MAX
37 #define MAX(a,b) ((a) > (b) ? (a) : (b))
38 #endif
39
40 #ifndef FALSE
41 #define FALSE (0)
42 #endif
43
44 #ifndef TRUE
45 #define TRUE (!FALSE)
46 #endif
47
48 typedef struct XdgGlobHashNode XdgGlobHashNode;
49 typedef struct XdgGlobList XdgGlobList;
50
51 struct XdgGlobHashNode
52 {
53 xdg_unichar_t character;
54 const char *mime_type;
55 int weight;
56 int case_sensitive;
57 XdgGlobHashNode *next;
58 XdgGlobHashNode *child;
59 };
60 struct XdgGlobList
61 {
62 const char *data;
63 const char *mime_type;
64 int weight;
65 int case_sensitive;
66 XdgGlobList *next;
67 };
68
69 struct XdgGlobHash
70 {
71 XdgGlobList *literal_list;
72 XdgGlobHashNode *simple_node;
73 XdgGlobList *full_list;
74 };
75
76
77 /* XdgGlobList
78 */
79 static XdgGlobList *
_xdg_glob_list_new(void)80 _xdg_glob_list_new (void)
81 {
82 XdgGlobList *new_element;
83
84 new_element = calloc (1, sizeof (XdgGlobList));
85
86 return new_element;
87 }
88
89 /* Frees glob_list and all of it's children */
90 static void
_xdg_glob_list_free(XdgGlobList * glob_list)91 _xdg_glob_list_free (XdgGlobList *glob_list)
92 {
93 XdgGlobList *ptr, *next;
94
95 ptr = glob_list;
96
97 while (ptr != NULL)
98 {
99 next = ptr->next;
100
101 if (ptr->data)
102 free ((void *) ptr->data);
103 if (ptr->mime_type)
104 free ((void *) ptr->mime_type);
105 free (ptr);
106
107 ptr = next;
108 }
109 }
110
111 static XdgGlobList *
_xdg_glob_list_append(XdgGlobList * glob_list,void * data,const char * mime_type,int weight,int case_sensitive)112 _xdg_glob_list_append (XdgGlobList *glob_list,
113 void *data,
114 const char *mime_type,
115 int weight,
116 int case_sensitive)
117 {
118 XdgGlobList *new_element;
119 XdgGlobList *tmp_element;
120
121 tmp_element = glob_list;
122 while (tmp_element != NULL)
123 {
124 if (strcmp (tmp_element->data, data) == 0 &&
125 strcmp (tmp_element->mime_type, mime_type) == 0)
126 return glob_list;
127
128 tmp_element = tmp_element->next;
129 }
130
131 new_element = _xdg_glob_list_new ();
132 new_element->data = data;
133 new_element->mime_type = mime_type;
134 new_element->weight = weight;
135 new_element->case_sensitive = case_sensitive;
136 if (glob_list == NULL)
137 return new_element;
138
139 tmp_element = glob_list;
140 while (tmp_element->next != NULL)
141 tmp_element = tmp_element->next;
142
143 tmp_element->next = new_element;
144
145 return glob_list;
146 }
147
148 /* XdgGlobHashNode
149 */
150
151 static XdgGlobHashNode *
_xdg_glob_hash_node_new(void)152 _xdg_glob_hash_node_new (void)
153 {
154 XdgGlobHashNode *glob_hash_node;
155
156 glob_hash_node = calloc (1, sizeof (XdgGlobHashNode));
157
158 return glob_hash_node;
159 }
160
161 #ifdef NOT_USED_IN_GIO
162
163 static void
_xdg_glob_hash_node_dump(XdgGlobHashNode * glob_hash_node,int depth)164 _xdg_glob_hash_node_dump (XdgGlobHashNode *glob_hash_node,
165 int depth)
166 {
167 int i;
168 for (i = 0; i < depth; i++)
169 printf (" ");
170
171 printf ("%c", (char)glob_hash_node->character);
172 if (glob_hash_node->mime_type)
173 printf (" - %s %d\n", glob_hash_node->mime_type, glob_hash_node->weight);
174 else
175 printf ("\n");
176 if (glob_hash_node->child)
177 _xdg_glob_hash_node_dump (glob_hash_node->child, depth + 1);
178 if (glob_hash_node->next)
179 _xdg_glob_hash_node_dump (glob_hash_node->next, depth);
180 }
181
182 #endif
183
184 static XdgGlobHashNode *
_xdg_glob_hash_insert_ucs4(XdgGlobHashNode * glob_hash_node,xdg_unichar_t * text,const char * mime_type,int weight,int case_sensitive)185 _xdg_glob_hash_insert_ucs4 (XdgGlobHashNode *glob_hash_node,
186 xdg_unichar_t *text,
187 const char *mime_type,
188 int weight,
189 int case_sensitive)
190 {
191 XdgGlobHashNode *node;
192 xdg_unichar_t character;
193
194 character = text[0];
195
196 if ((glob_hash_node == NULL) ||
197 (character < glob_hash_node->character))
198 {
199 node = _xdg_glob_hash_node_new ();
200 node->character = character;
201 node->next = glob_hash_node;
202 glob_hash_node = node;
203 }
204 else if (character == glob_hash_node->character)
205 {
206 node = glob_hash_node;
207 }
208 else
209 {
210 XdgGlobHashNode *prev_node;
211 int found_node = FALSE;
212
213 /* Look for the first character of text in glob_hash_node, and insert it if we
214 * have to.*/
215 prev_node = glob_hash_node;
216 node = prev_node->next;
217
218 while (node != NULL)
219 {
220 if (character < node->character)
221 {
222 node = _xdg_glob_hash_node_new ();
223 node->character = character;
224 node->next = prev_node->next;
225 prev_node->next = node;
226
227 found_node = TRUE;
228 break;
229 }
230 else if (character == node->character)
231 {
232 found_node = TRUE;
233 break;
234 }
235 prev_node = node;
236 node = node->next;
237 }
238
239 if (! found_node)
240 {
241 node = _xdg_glob_hash_node_new ();
242 node->character = character;
243 node->next = prev_node->next;
244 prev_node->next = node;
245 }
246 }
247
248 text++;
249 if (*text == 0)
250 {
251 if (node->mime_type)
252 {
253 if (strcmp (node->mime_type, mime_type) != 0)
254 {
255 XdgGlobHashNode *child;
256 int found_node = FALSE;
257
258 child = node->child;
259 while (child && child->character == 0)
260 {
261 if (strcmp (child->mime_type, mime_type) == 0)
262 {
263 found_node = TRUE;
264 break;
265 }
266 child = child->next;
267 }
268
269 if (!found_node)
270 {
271 child = _xdg_glob_hash_node_new ();
272 child->character = 0;
273 child->mime_type = strdup (mime_type);
274 child->weight = weight;
275 child->case_sensitive = case_sensitive;
276 child->child = NULL;
277 child->next = node->child;
278 node->child = child;
279 }
280 }
281 }
282 else
283 {
284 node->mime_type = strdup (mime_type);
285 node->weight = weight;
286 node->case_sensitive = case_sensitive;
287 }
288 }
289 else
290 {
291 node->child = _xdg_glob_hash_insert_ucs4 (node->child, text, mime_type, weight, case_sensitive);
292 }
293 return glob_hash_node;
294 }
295
296 /* glob must be valid UTF-8 */
297 static XdgGlobHashNode *
_xdg_glob_hash_insert_text(XdgGlobHashNode * glob_hash_node,const char * text,const char * mime_type,int weight,int case_sensitive)298 _xdg_glob_hash_insert_text (XdgGlobHashNode *glob_hash_node,
299 const char *text,
300 const char *mime_type,
301 int weight,
302 int case_sensitive)
303 {
304 XdgGlobHashNode *node;
305 xdg_unichar_t *unitext;
306 int len;
307
308 unitext = _xdg_convert_to_ucs4 (text, &len);
309 _xdg_reverse_ucs4 (unitext, len);
310 node = _xdg_glob_hash_insert_ucs4 (glob_hash_node, unitext, mime_type, weight, case_sensitive);
311 free (unitext);
312 return node;
313 }
314
315 typedef struct {
316 const char *mime;
317 int weight;
318 } MimeWeight;
319
320 static int
_xdg_glob_hash_node_lookup_file_name(XdgGlobHashNode * glob_hash_node,const char * file_name,int len,int case_sensitive_check,MimeWeight mime_types[],int n_mime_types)321 _xdg_glob_hash_node_lookup_file_name (XdgGlobHashNode *glob_hash_node,
322 const char *file_name,
323 int len,
324 int case_sensitive_check,
325 MimeWeight mime_types[],
326 int n_mime_types)
327 {
328 int n;
329 XdgGlobHashNode *node;
330 xdg_unichar_t character;
331
332 if (glob_hash_node == NULL)
333 return 0;
334
335 character = file_name[len - 1];
336
337 for (node = glob_hash_node; node && character >= node->character; node = node->next)
338 {
339 if (character == node->character)
340 {
341 len--;
342 n = 0;
343 if (len > 0)
344 {
345 n = _xdg_glob_hash_node_lookup_file_name (node->child,
346 file_name,
347 len,
348 case_sensitive_check,
349 mime_types,
350 n_mime_types);
351 }
352 if (n == 0)
353 {
354 if (node->mime_type &&
355 (case_sensitive_check ||
356 !node->case_sensitive))
357 {
358 mime_types[n].mime = node->mime_type;
359 mime_types[n].weight = node->weight;
360 n++;
361 }
362 node = node->child;
363 while (n < n_mime_types && node && node->character == 0)
364 {
365 if (node->mime_type &&
366 (case_sensitive_check ||
367 !node->case_sensitive))
368 {
369 mime_types[n].mime = node->mime_type;
370 mime_types[n].weight = node->weight;
371 n++;
372 }
373 node = node->next;
374 }
375 }
376 return n;
377 }
378 }
379
380 return 0;
381 }
382
compare_mime_weight(const void * a,const void * b)383 static int compare_mime_weight (const void *a, const void *b)
384 {
385 const MimeWeight *aa = (const MimeWeight *)a;
386 const MimeWeight *bb = (const MimeWeight *)b;
387
388 return bb->weight - aa->weight;
389 }
390
391 #define ISUPPER(c) ((c) >= 'A' && (c) <= 'Z')
392 static char *
ascii_tolower(const char * str)393 ascii_tolower (const char *str)
394 {
395 char *p, *lower;
396
397 lower = strdup (str);
398 p = lower;
399 while (*p != 0)
400 {
401 char c = *p;
402 *p++ = ISUPPER (c) ? c - 'A' + 'a' : c;
403 }
404 return lower;
405 }
406
407 static int
filter_out_dupes(MimeWeight mimes[],int n_mimes)408 filter_out_dupes (MimeWeight mimes[], int n_mimes)
409 {
410 int last;
411 int i, j;
412
413 last = n_mimes;
414
415 for (i = 0; i < last; i++)
416 {
417 j = i + 1;
418 while (j < last)
419 {
420 if (strcmp (mimes[i].mime, mimes[j].mime) == 0)
421 {
422 mimes[i].weight = MAX (mimes[i].weight, mimes[j].weight);
423 last--;
424 mimes[j].mime = mimes[last].mime;
425 mimes[j].weight = mimes[last].weight;
426 }
427 else
428 j++;
429 }
430 }
431
432 return last;
433 }
434
435 int
_xdg_glob_hash_lookup_file_name(XdgGlobHash * glob_hash,const char * file_name,const char * mime_types[],int n_mime_types)436 _xdg_glob_hash_lookup_file_name (XdgGlobHash *glob_hash,
437 const char *file_name,
438 const char *mime_types[],
439 int n_mime_types)
440 {
441 XdgGlobList *list;
442 int i, n;
443 MimeWeight mimes[10];
444 int n_mimes = 10;
445 int len;
446 char *lower_case;
447
448 /* First, check the literals */
449
450 assert (file_name != NULL && n_mime_types > 0);
451
452 n = 0;
453
454 lower_case = ascii_tolower (file_name);
455
456 for (list = glob_hash->literal_list; list; list = list->next)
457 {
458 if (strcmp ((const char *)list->data, file_name) == 0)
459 {
460 mime_types[0] = list->mime_type;
461 free (lower_case);
462 return 1;
463 }
464 }
465
466 for (list = glob_hash->literal_list; list; list = list->next)
467 {
468 if (!list->case_sensitive &&
469 strcmp ((const char *)list->data, lower_case) == 0)
470 {
471 mime_types[0] = list->mime_type;
472 free (lower_case);
473 return 1;
474 }
475 }
476
477
478 len = strlen (file_name);
479 n = _xdg_glob_hash_node_lookup_file_name (glob_hash->simple_node, lower_case, len, FALSE,
480 mimes, n_mimes);
481 if (n < 2)
482 n += _xdg_glob_hash_node_lookup_file_name (glob_hash->simple_node, file_name, len, TRUE,
483 mimes + n, n_mimes - n);
484
485 if (n < 2)
486 {
487 for (list = glob_hash->full_list; list && n < n_mime_types; list = list->next)
488 {
489 if (fnmatch ((const char *)list->data, file_name, 0) == 0)
490 {
491 mimes[n].mime = list->mime_type;
492 mimes[n].weight = list->weight;
493 n++;
494 }
495 }
496 }
497 free (lower_case);
498
499 n = filter_out_dupes (mimes, n);
500
501 qsort (mimes, n, sizeof (MimeWeight), compare_mime_weight);
502
503 if (n_mime_types < n)
504 n = n_mime_types;
505
506 for (i = 0; i < n; i++)
507 mime_types[i] = mimes[i].mime;
508
509 return n;
510 }
511
512
513
514 /* XdgGlobHash
515 */
516
517 XdgGlobHash *
_xdg_glob_hash_new(void)518 _xdg_glob_hash_new (void)
519 {
520 XdgGlobHash *glob_hash;
521
522 glob_hash = calloc (1, sizeof (XdgGlobHash));
523
524 return glob_hash;
525 }
526
527
528 static void
_xdg_glob_hash_free_nodes(XdgGlobHashNode * node)529 _xdg_glob_hash_free_nodes (XdgGlobHashNode *node)
530 {
531 if (node)
532 {
533 if (node->child)
534 _xdg_glob_hash_free_nodes (node->child);
535 if (node->next)
536 _xdg_glob_hash_free_nodes (node->next);
537 if (node->mime_type)
538 free ((void *) node->mime_type);
539 free (node);
540 }
541 }
542
543 void
_xdg_glob_hash_free(XdgGlobHash * glob_hash)544 _xdg_glob_hash_free (XdgGlobHash *glob_hash)
545 {
546 _xdg_glob_list_free (glob_hash->literal_list);
547 _xdg_glob_list_free (glob_hash->full_list);
548 _xdg_glob_hash_free_nodes (glob_hash->simple_node);
549 free (glob_hash);
550 }
551
552 XdgGlobType
_xdg_glob_determine_type(const char * glob)553 _xdg_glob_determine_type (const char *glob)
554 {
555 const char *ptr;
556 int maybe_in_simple_glob = FALSE;
557 int first_char = TRUE;
558
559 ptr = glob;
560
561 while (*ptr != '\0')
562 {
563 if (*ptr == '*' && first_char)
564 maybe_in_simple_glob = TRUE;
565 else if (*ptr == '\\' || *ptr == '[' || *ptr == '?' || *ptr == '*')
566 return XDG_GLOB_FULL;
567
568 first_char = FALSE;
569 ptr = _xdg_utf8_next_char (ptr);
570 }
571 if (maybe_in_simple_glob)
572 return XDG_GLOB_SIMPLE;
573 else
574 return XDG_GLOB_LITERAL;
575 }
576
577 /* glob must be valid UTF-8 */
578 void
_xdg_glob_hash_append_glob(XdgGlobHash * glob_hash,const char * glob,const char * mime_type,int weight,int case_sensitive)579 _xdg_glob_hash_append_glob (XdgGlobHash *glob_hash,
580 const char *glob,
581 const char *mime_type,
582 int weight,
583 int case_sensitive)
584 {
585 XdgGlobType type;
586
587 assert (glob_hash != NULL);
588 assert (glob != NULL);
589
590 type = _xdg_glob_determine_type (glob);
591
592 switch (type)
593 {
594 case XDG_GLOB_LITERAL:
595 glob_hash->literal_list = _xdg_glob_list_append (glob_hash->literal_list, strdup (glob), strdup (mime_type), weight, case_sensitive);
596 break;
597 case XDG_GLOB_SIMPLE:
598 glob_hash->simple_node = _xdg_glob_hash_insert_text (glob_hash->simple_node, glob + 1, mime_type, weight, case_sensitive);
599 break;
600 case XDG_GLOB_FULL:
601 glob_hash->full_list = _xdg_glob_list_append (glob_hash->full_list, strdup (glob), strdup (mime_type), weight, case_sensitive);
602 break;
603 }
604 }
605
606 #ifdef NOT_USED_IN_GIO
607
608 void
_xdg_glob_hash_dump(XdgGlobHash * glob_hash)609 _xdg_glob_hash_dump (XdgGlobHash *glob_hash)
610 {
611 XdgGlobList *list;
612 printf ("LITERAL STRINGS\n");
613 if (!glob_hash || glob_hash->literal_list == NULL)
614 {
615 printf (" None\n");
616 }
617 else
618 {
619 for (list = glob_hash->literal_list; list; list = list->next)
620 printf (" %s - %s %d\n", (char *)list->data, list->mime_type, list->weight);
621 }
622 printf ("\nSIMPLE GLOBS\n");
623 if (!glob_hash || glob_hash->simple_node == NULL)
624 {
625 printf (" None\n");
626 }
627 else
628 {
629 _xdg_glob_hash_node_dump (glob_hash->simple_node, 4);
630 }
631
632 printf ("\nFULL GLOBS\n");
633 if (!glob_hash || glob_hash->full_list == NULL)
634 {
635 printf (" None\n");
636 }
637 else
638 {
639 for (list = glob_hash->full_list; list; list = list->next)
640 printf (" %s - %s %d\n", (char *)list->data, list->mime_type, list->weight);
641 }
642 }
643
644 #endif
645
646 void
_xdg_mime_glob_read_from_file(XdgGlobHash * glob_hash,const char * file_name,int version_two)647 _xdg_mime_glob_read_from_file (XdgGlobHash *glob_hash,
648 const char *file_name,
649 int version_two)
650 {
651 FILE *glob_file;
652 char line[255];
653 char *p;
654
655 glob_file = fopen (file_name, "r");
656
657 if (glob_file == NULL)
658 return;
659
660 /* FIXME: Not UTF-8 safe. Doesn't work if lines are greater than 255 chars.
661 * Blah */
662 while (fgets (line, 255, glob_file) != NULL)
663 {
664 char *colon;
665 char *mimetype, *glob, *end;
666 int weight;
667 int case_sensitive;
668
669 if (line[0] == '#' || line[0] == 0)
670 continue;
671
672 end = line + strlen(line) - 1;
673 if (*end == '\n')
674 *end = 0;
675
676 p = line;
677 if (version_two)
678 {
679 colon = strchr (p, ':');
680 if (colon == NULL)
681 continue;
682 *colon = 0;
683 weight = atoi (p);
684 p = colon + 1;
685 }
686 else
687 weight = 50;
688
689 colon = strchr (p, ':');
690 if (colon == NULL)
691 continue;
692 *colon = 0;
693
694 mimetype = p;
695 p = colon + 1;
696 glob = p;
697 case_sensitive = FALSE;
698
699 colon = strchr (p, ':');
700 if (version_two && colon != NULL)
701 {
702 char *flag;
703
704 /* We got flags */
705 *colon = 0;
706 p = colon + 1;
707
708 /* Flags end at next colon */
709 colon = strchr (p, ':');
710 if (colon != NULL)
711 *colon = 0;
712
713 flag = strstr (p, "cs");
714 if (flag != NULL &&
715 /* Start or after comma */
716 (flag == p ||
717 flag[-1] == ',') &&
718 /* ends with comma or end of string */
719 (flag[2] == 0 ||
720 flag[2] == ','))
721 case_sensitive = TRUE;
722 }
723
724 _xdg_glob_hash_append_glob (glob_hash, glob, mimetype, weight, case_sensitive);
725 }
726
727 fclose (glob_file);
728 }
729