1 /*
2 * Online help index routines for CUPS.
3 *
4 * Copyright © 2007-2019 by Apple Inc.
5 * Copyright © 1997-2007 by Easy Software Products.
6 *
7 * Licensed under Apache License v2.0. See the file "LICENSE" for more
8 * information.
9 */
10
11 /*
12 * Include necessary headers...
13 */
14
15 #include "cgi-private.h"
16 #include <cups/dir.h>
17
18
19 /*
20 * List of common English words that should not be indexed...
21 */
22
23 static char help_common_words[][6] =
24 {
25 "about",
26 "all",
27 "an",
28 "and",
29 "are",
30 "as",
31 "at",
32 "be",
33 "been",
34 "but",
35 "by",
36 "call",
37 "can",
38 "come",
39 "could",
40 "day",
41 "did",
42 "do",
43 "down",
44 "each",
45 "find",
46 "first",
47 "for",
48 "from",
49 "go",
50 "had",
51 "has",
52 "have",
53 "he",
54 "her",
55 "him",
56 "his",
57 "hot",
58 "how",
59 "if",
60 "in",
61 "is",
62 "it",
63 "know",
64 "like",
65 "long",
66 "look",
67 "make",
68 "many",
69 "may",
70 "more",
71 "most",
72 "my",
73 "no",
74 "now",
75 "of",
76 "on",
77 "one",
78 "or",
79 "other",
80 "out",
81 "over",
82 "said",
83 "see",
84 "she",
85 "side",
86 "so",
87 "some",
88 "sound",
89 "than",
90 "that",
91 "the",
92 "their",
93 "them",
94 "then",
95 "there",
96 "these",
97 "they",
98 "thing",
99 "this",
100 "time",
101 "to",
102 "two",
103 "up",
104 "use",
105 "was",
106 "water",
107 "way",
108 "we",
109 "were",
110 "what",
111 "when",
112 "which",
113 "who",
114 "will",
115 "with",
116 "word",
117 "would",
118 "write",
119 "you",
120 "your"
121 };
122
123
124 /*
125 * Local functions...
126 */
127
128 static help_word_t *help_add_word(help_node_t *n, const char *text);
129 static void help_delete_node(help_node_t *n);
130 static void help_delete_word(help_word_t *w);
131 static int help_load_directory(help_index_t *hi,
132 const char *directory,
133 const char *relative);
134 static int help_load_file(help_index_t *hi,
135 const char *filename,
136 const char *relative,
137 time_t mtime);
138 static help_node_t *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
139 static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
140 static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
141 static int help_sort_words(help_word_t *w1, help_word_t *w2);
142
143
144 /*
145 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
146 */
147
148 void
helpDeleteIndex(help_index_t * hi)149 helpDeleteIndex(help_index_t *hi) /* I - Help index */
150 {
151 help_node_t *node; /* Current node */
152
153
154 if (!hi)
155 return;
156
157 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
158 node;
159 node = (help_node_t *)cupsArrayNext(hi->nodes))
160 {
161 if (!hi->search)
162 help_delete_node(node);
163 }
164
165 cupsArrayDelete(hi->nodes);
166 cupsArrayDelete(hi->sorted);
167
168 free(hi);
169 }
170
171
172 /*
173 * 'helpFindNode()' - Find a node in an index.
174 */
175
176 help_node_t * /* O - Node pointer or NULL */
helpFindNode(help_index_t * hi,const char * filename,const char * anchor)177 helpFindNode(help_index_t *hi, /* I - Index */
178 const char *filename, /* I - Filename */
179 const char *anchor) /* I - Anchor */
180 {
181 help_node_t key; /* Search key */
182
183
184 /*
185 * Range check input...
186 */
187
188 if (!hi || !filename)
189 return (NULL);
190
191 /*
192 * Initialize the search key...
193 */
194
195 key.filename = (char *)filename;
196 key.anchor = (char *)anchor;
197
198 /*
199 * Return any match...
200 */
201
202 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
203 }
204
205
206 /*
207 * 'helpLoadIndex()' - Load a help index from disk.
208 */
209
210 help_index_t * /* O - Index pointer or NULL */
helpLoadIndex(const char * hifile,const char * directory)211 helpLoadIndex(const char *hifile, /* I - Index filename */
212 const char *directory) /* I - Directory that is indexed */
213 {
214 help_index_t *hi; /* Help index */
215 cups_file_t *fp; /* Current file */
216 char line[2048], /* Line from file */
217 *ptr, /* Pointer into line */
218 *filename, /* Filename in line */
219 *anchor, /* Anchor in line */
220 *sectptr, /* Section pointer in line */
221 section[1024], /* Section name */
222 *text; /* Text in line */
223 time_t mtime; /* Modification time */
224 off_t offset; /* Offset into file */
225 size_t length; /* Length in bytes */
226 int update; /* Update? */
227 help_node_t *node; /* Current node */
228 help_word_t *word; /* Current word */
229
230
231 /*
232 * Create a new, empty index.
233 */
234
235 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
236 return (NULL);
237
238 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
239 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
240
241 if (!hi->nodes || !hi->sorted)
242 {
243 cupsArrayDelete(hi->nodes);
244 cupsArrayDelete(hi->sorted);
245 free(hi);
246 return (NULL);
247 }
248
249 /*
250 * Try loading the existing index file...
251 */
252
253 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
254 {
255 /*
256 * Lock the file and then read the first line...
257 */
258
259 cupsFileLock(fp, 1);
260
261 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
262 {
263 /*
264 * Got a valid header line, now read the data lines...
265 */
266
267 node = NULL;
268
269 while (cupsFileGets(fp, line, sizeof(line)))
270 {
271 /*
272 * Each line looks like one of the following:
273 *
274 * filename mtime offset length "section" "text"
275 * filename#anchor offset length "text"
276 * SP count word
277 */
278
279 if (line[0] == ' ')
280 {
281 /*
282 * Read a word in the current node...
283 */
284
285 if (!node || (ptr = strrchr(line, ' ')) == NULL)
286 continue;
287
288 if ((word = help_add_word(node, ptr + 1)) != NULL)
289 word->count = atoi(line + 1);
290 }
291 else
292 {
293 /*
294 * Add a node...
295 */
296
297 filename = line;
298
299 if ((ptr = strchr(line, ' ')) == NULL)
300 break;
301
302 while (isspace(*ptr & 255))
303 *ptr++ = '\0';
304
305 if ((anchor = strrchr(filename, '#')) != NULL)
306 {
307 *anchor++ = '\0';
308 mtime = 0;
309 }
310 else
311 mtime = strtol(ptr, &ptr, 10);
312
313 offset = strtoll(ptr, &ptr, 10);
314 length = (size_t)strtoll(ptr, &ptr, 10);
315
316 while (isspace(*ptr & 255))
317 ptr ++;
318
319 if (!anchor)
320 {
321 /*
322 * Get section...
323 */
324
325 if (*ptr != '\"')
326 break;
327
328 ptr ++;
329 sectptr = ptr;
330
331 while (*ptr && *ptr != '\"')
332 ptr ++;
333
334 if (*ptr != '\"')
335 break;
336
337 *ptr++ = '\0';
338
339 strlcpy(section, sectptr, sizeof(section));
340
341 while (isspace(*ptr & 255))
342 ptr ++;
343 }
344 else
345 section[0] = '\0';
346
347 if (*ptr != '\"')
348 break;
349
350 ptr ++;
351 text = ptr;
352
353 while (*ptr && *ptr != '\"')
354 ptr ++;
355
356 if (*ptr != '\"')
357 break;
358
359 *ptr++ = '\0';
360
361 if ((node = help_new_node(filename, anchor, section, text,
362 mtime, offset, length)) == NULL)
363 break;
364
365 node->score = -1;
366
367 cupsArrayAdd(hi->nodes, node);
368 }
369 }
370 }
371
372 cupsFileClose(fp);
373 }
374
375 /*
376 * Scan for new/updated files...
377 */
378
379 update = help_load_directory(hi, directory, NULL);
380
381 /*
382 * Remove any files that are no longer installed...
383 */
384
385 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
386 node;
387 node = (help_node_t *)cupsArrayNext(hi->nodes))
388 if (node->score < 0)
389 {
390 /*
391 * Delete this node...
392 */
393
394 cupsArrayRemove(hi->nodes, node);
395 help_delete_node(node);
396 }
397
398 /*
399 * Add nodes to the sorted array...
400 */
401
402 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
403 node;
404 node = (help_node_t *)cupsArrayNext(hi->nodes))
405 cupsArrayAdd(hi->sorted, node);
406
407 /*
408 * Save the index if we updated it...
409 */
410
411 if (update)
412 helpSaveIndex(hi, hifile);
413
414 /*
415 * Return the index...
416 */
417
418 return (hi);
419 }
420
421
422 /*
423 * 'helpSaveIndex()' - Save a help index to disk.
424 */
425
426 int /* O - 0 on success, -1 on error */
helpSaveIndex(help_index_t * hi,const char * hifile)427 helpSaveIndex(help_index_t *hi, /* I - Index */
428 const char *hifile) /* I - Index filename */
429 {
430 cups_file_t *fp; /* Index file */
431 help_node_t *node; /* Current node */
432 help_word_t *word; /* Current word */
433
434
435 /*
436 * Try creating a new index file...
437 */
438
439 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
440 return (-1);
441
442 /*
443 * Lock the file while we write it...
444 */
445
446 cupsFileLock(fp, 1);
447
448 cupsFilePuts(fp, "HELPV2\n");
449
450 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
451 node;
452 node = (help_node_t *)cupsArrayNext(hi->nodes))
453 {
454 /*
455 * Write the current node with/without the anchor...
456 */
457
458 if (node->anchor)
459 {
460 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
461 node->filename, node->anchor,
462 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
463 node->text) < 0)
464 break;
465 }
466 else
467 {
468 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
469 node->filename, (int)node->mtime,
470 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
471 node->section ? node->section : "", node->text) < 0)
472 break;
473 }
474
475 /*
476 * Then write the words associated with the node...
477 */
478
479 for (word = (help_word_t *)cupsArrayFirst(node->words);
480 word;
481 word = (help_word_t *)cupsArrayNext(node->words))
482 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
483 break;
484 }
485
486 cupsFileFlush(fp);
487
488 if (cupsFileClose(fp) < 0)
489 return (-1);
490 else if (node)
491 return (-1);
492 else
493 return (0);
494 }
495
496
497 /*
498 * 'helpSearchIndex()' - Search an index.
499 */
500
501 help_index_t * /* O - Search index */
helpSearchIndex(help_index_t * hi,const char * query,const char * section,const char * filename)502 helpSearchIndex(help_index_t *hi, /* I - Index */
503 const char *query, /* I - Query string */
504 const char *section, /* I - Limit search to this section */
505 const char *filename) /* I - Limit search to this file */
506 {
507 help_index_t *search; /* Search index */
508 help_node_t *node; /* Current node */
509 help_word_t *word; /* Current word */
510 void *sc; /* Search context */
511 int matches; /* Number of matches */
512
513
514 /*
515 * Range check...
516 */
517
518 if (!hi || !query)
519 return (NULL);
520
521 /*
522 * Reset the scores of all nodes to 0...
523 */
524
525 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
526 node;
527 node = (help_node_t *)cupsArrayNext(hi->nodes))
528 node->score = 0;
529
530 /*
531 * Find the first node to search in...
532 */
533
534 if (filename)
535 {
536 node = helpFindNode(hi, filename, NULL);
537 if (!node)
538 return (NULL);
539 }
540 else
541 node = (help_node_t *)cupsArrayFirst(hi->nodes);
542
543 /*
544 * Convert the query into a regular expression...
545 */
546
547 sc = cgiCompileSearch(query);
548 if (!sc)
549 return (NULL);
550
551 /*
552 * Allocate a search index...
553 */
554
555 search = calloc(1, sizeof(help_index_t));
556 if (!search)
557 {
558 cgiFreeSearch(sc);
559 return (NULL);
560 }
561
562 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
563 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
564
565 if (!search->nodes || !search->sorted)
566 {
567 cupsArrayDelete(search->nodes);
568 cupsArrayDelete(search->sorted);
569 free(search);
570 cgiFreeSearch(sc);
571 return (NULL);
572 }
573
574 search->search = 1;
575
576 /*
577 * Check each node in the index, adding matching nodes to the
578 * search index...
579 */
580
581 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
582 if (section && strcmp(node->section, section))
583 continue;
584 else if (filename && strcmp(node->filename, filename))
585 continue;
586 else
587 {
588 matches = cgiDoSearch(sc, node->text);
589
590 for (word = (help_word_t *)cupsArrayFirst(node->words);
591 word;
592 word = (help_word_t *)cupsArrayNext(node->words))
593 if (cgiDoSearch(sc, word->text) > 0)
594 matches += word->count;
595
596 if (matches > 0)
597 {
598 /*
599 * Found a match, add the node to the search index...
600 */
601
602 node->score = matches;
603
604 cupsArrayAdd(search->nodes, node);
605 cupsArrayAdd(search->sorted, node);
606 }
607 }
608
609 /*
610 * Free the search context...
611 */
612
613 cgiFreeSearch(sc);
614
615 /*
616 * Return the results...
617 */
618
619 return (search);
620 }
621
622
623 /*
624 * 'help_add_word()' - Add a word to a node.
625 */
626
627 static help_word_t * /* O - New word */
help_add_word(help_node_t * n,const char * text)628 help_add_word(help_node_t *n, /* I - Node */
629 const char *text) /* I - Word text */
630 {
631 help_word_t *w, /* New word */
632 key; /* Search key */
633
634
635 /*
636 * Create the words array as needed...
637 */
638
639 if (!n->words)
640 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
641
642 /*
643 * See if the word is already added...
644 */
645
646 key.text = (char *)text;
647
648 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
649 {
650 /*
651 * Create a new word...
652 */
653
654 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
655 return (NULL);
656
657 if ((w->text = strdup(text)) == NULL)
658 {
659 free(w);
660 return (NULL);
661 }
662
663 cupsArrayAdd(n->words, w);
664 }
665
666 /*
667 * Bump the counter for this word and return it...
668 */
669
670 w->count ++;
671
672 return (w);
673 }
674
675
676 /*
677 * 'help_delete_node()' - Free all memory used by a node.
678 */
679
680 static void
help_delete_node(help_node_t * n)681 help_delete_node(help_node_t *n) /* I - Node */
682 {
683 help_word_t *w; /* Current word */
684
685
686 if (!n)
687 return;
688
689 if (n->filename)
690 free(n->filename);
691
692 if (n->anchor)
693 free(n->anchor);
694
695 if (n->section)
696 free(n->section);
697
698 if (n->text)
699 free(n->text);
700
701 for (w = (help_word_t *)cupsArrayFirst(n->words);
702 w;
703 w = (help_word_t *)cupsArrayNext(n->words))
704 help_delete_word(w);
705
706 cupsArrayDelete(n->words);
707
708 free(n);
709 }
710
711
712 /*
713 * 'help_delete_word()' - Free all memory used by a word.
714 */
715
716 static void
help_delete_word(help_word_t * w)717 help_delete_word(help_word_t *w) /* I - Word */
718 {
719 if (!w)
720 return;
721
722 if (w->text)
723 free(w->text);
724
725 free(w);
726 }
727
728
729 /*
730 * 'help_load_directory()' - Load a directory of files into an index.
731 */
732
733 static int /* O - 0 = success, -1 = error, 1 = updated */
help_load_directory(help_index_t * hi,const char * directory,const char * relative)734 help_load_directory(
735 help_index_t *hi, /* I - Index */
736 const char *directory, /* I - Directory */
737 const char *relative) /* I - Relative path */
738 {
739 cups_dir_t *dir; /* Directory file */
740 cups_dentry_t *dent; /* Directory entry */
741 char *ext, /* Pointer to extension */
742 filename[1024], /* Full filename */
743 relname[1024]; /* Relative filename */
744 int update; /* Updated? */
745 help_node_t *node; /* Current node */
746
747
748 /*
749 * Open the directory and scan it...
750 */
751
752 if ((dir = cupsDirOpen(directory)) == NULL)
753 return (0);
754
755 update = 0;
756
757 while ((dent = cupsDirRead(dir)) != NULL)
758 {
759 /*
760 * Skip "." files...
761 */
762
763 if (dent->filename[0] == '.')
764 continue;
765
766 /*
767 * Get absolute and relative filenames...
768 */
769
770 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
771 if (relative)
772 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
773 else
774 strlcpy(relname, dent->filename, sizeof(relname));
775
776 /*
777 * Check if we have a HTML file...
778 */
779
780 if ((ext = strstr(dent->filename, ".html")) != NULL &&
781 (!ext[5] || !strcmp(ext + 5, ".gz")))
782 {
783 /*
784 * HTML file, see if we have already indexed the file...
785 */
786
787 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
788 {
789 /*
790 * File already indexed - check dates to confirm that the
791 * index is up-to-date...
792 */
793
794 if (node->mtime == dent->fileinfo.st_mtime)
795 {
796 /*
797 * Same modification time, so mark all of the nodes
798 * for this file as up-to-date...
799 */
800
801 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
802 if (!strcmp(node->filename, relname))
803 node->score = 0;
804 else
805 break;
806
807 continue;
808 }
809 }
810
811 update = 1;
812
813 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
814 }
815 else if (S_ISDIR(dent->fileinfo.st_mode))
816 {
817 /*
818 * Process sub-directory...
819 */
820
821 if (help_load_directory(hi, filename, relname) == 1)
822 update = 1;
823 }
824 }
825
826 cupsDirClose(dir);
827
828 return (update);
829 }
830
831
832 /*
833 * 'help_load_file()' - Load a HTML files into an index.
834 */
835
836 static int /* O - 0 = success, -1 = error */
help_load_file(help_index_t * hi,const char * filename,const char * relative,time_t mtime)837 help_load_file(
838 help_index_t *hi, /* I - Index */
839 const char *filename, /* I - Filename */
840 const char *relative, /* I - Relative path */
841 time_t mtime) /* I - Modification time */
842 {
843 cups_file_t *fp; /* HTML file */
844 help_node_t *node; /* Current node */
845 char line[1024], /* Line from file */
846 temp[1024], /* Temporary word */
847 section[1024], /* Section */
848 *ptr, /* Pointer into line */
849 *anchor, /* Anchor name */
850 *text; /* Text for anchor */
851 off_t offset; /* File offset */
852 char quote; /* Quote character */
853 help_word_t *word; /* Current word */
854 int wordlen; /* Length of word */
855
856
857 if ((fp = cupsFileOpen(filename, "r")) == NULL)
858 return (-1);
859
860 node = NULL;
861 offset = 0;
862
863 strlcpy(section, "Other", sizeof(section));
864
865 while (cupsFileGets(fp, line, sizeof(line)))
866 {
867 /*
868 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
869 */
870
871 if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
872 {
873 /*
874 * Got section line, copy it!
875 */
876
877 for (ptr += 13; isspace(*ptr & 255); ptr ++);
878
879 strlcpy(section, ptr, sizeof(section));
880 if ((ptr = strstr(section, "-->")) != NULL)
881 {
882 /*
883 * Strip comment stuff from end of line...
884 */
885
886 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
887
888 if (isspace(*ptr & 255))
889 *ptr = '\0';
890 }
891 continue;
892 }
893
894 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
895 {
896 ptr ++;
897
898 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
899 {
900 /*
901 * Found the title...
902 */
903
904 anchor = NULL;
905 ptr += 6;
906 }
907 else
908 {
909 char *idptr; /* Pointer to ID */
910
911 if (!_cups_strncasecmp(ptr, "A NAME=", 7))
912 ptr += 7;
913 else if ((idptr = strstr(ptr, " ID=")) != NULL)
914 ptr = idptr + 4;
915 else if ((idptr = strstr(ptr, " id=")) != NULL)
916 ptr = idptr + 4;
917 else
918 continue;
919
920 /*
921 * Found an anchor...
922 */
923
924 if (*ptr == '\"' || *ptr == '\'')
925 {
926 /*
927 * Get quoted anchor...
928 */
929
930 quote = *ptr;
931 anchor = ptr + 1;
932 if ((ptr = strchr(anchor, quote)) != NULL)
933 *ptr++ = '\0';
934 else
935 break;
936 }
937 else
938 {
939 /*
940 * Get unquoted anchor...
941 */
942
943 anchor = ptr + 1;
944
945 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
946
947 if (*ptr != '>')
948 *ptr++ = '\0';
949 else
950 break;
951 }
952
953 /*
954 * Got the anchor, now lets find the end...
955 */
956
957 while (*ptr && *ptr != '>')
958 ptr ++;
959
960 if (*ptr != '>')
961 break;
962
963 *ptr++ = '\0';
964 }
965
966 /*
967 * Now collect text for the link...
968 */
969
970 text = ptr;
971 while ((ptr = strchr(text, '<')) == NULL)
972 {
973 ptr = text + strlen(text);
974 if (ptr >= (line + sizeof(line) - 2))
975 break;
976
977 *ptr++ = ' ';
978
979 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
980 break;
981 }
982
983 *ptr = '\0';
984
985 if (node)
986 node->length = (size_t)(offset - node->offset);
987
988 if (!*text)
989 {
990 node = NULL;
991 break;
992 }
993
994 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
995 {
996 /*
997 * Node already in the index, so replace the text and other
998 * data...
999 */
1000
1001 cupsArrayRemove(hi->nodes, node);
1002
1003 if (node->section)
1004 free(node->section);
1005
1006 if (node->text)
1007 free(node->text);
1008
1009 if (node->words)
1010 {
1011 for (word = (help_word_t *)cupsArrayFirst(node->words);
1012 word;
1013 word = (help_word_t *)cupsArrayNext(node->words))
1014 help_delete_word(word);
1015
1016 cupsArrayDelete(node->words);
1017 node->words = NULL;
1018 }
1019
1020 node->section = section[0] ? strdup(section) : NULL;
1021 node->text = strdup(text);
1022 node->mtime = mtime;
1023 node->offset = offset;
1024 node->score = 0;
1025 }
1026 else
1027 {
1028 /*
1029 * New node...
1030 */
1031
1032 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1033 }
1034
1035 /*
1036 * Go through the text value and replace tabs and newlines with
1037 * whitespace and eliminate extra whitespace...
1038 */
1039
1040 for (ptr = node->text, text = node->text; *ptr;)
1041 if (isspace(*ptr & 255))
1042 {
1043 while (isspace(*ptr & 255))
1044 ptr ++;
1045
1046 *text++ = ' ';
1047 }
1048 else if (text != ptr)
1049 *text++ = *ptr++;
1050 else
1051 {
1052 text ++;
1053 ptr ++;
1054 }
1055
1056 *text = '\0';
1057
1058 /*
1059 * (Re)add the node to the array...
1060 */
1061
1062 cupsArrayAdd(hi->nodes, node);
1063
1064 if (!anchor)
1065 node = NULL;
1066 break;
1067 }
1068
1069 if (node)
1070 {
1071 /*
1072 * Scan this line for words...
1073 */
1074
1075 for (ptr = line; *ptr; ptr ++)
1076 {
1077 /*
1078 * Skip HTML stuff...
1079 */
1080
1081 if (*ptr == '<')
1082 {
1083 if (!strncmp(ptr, "<!--", 4))
1084 {
1085 /*
1086 * Skip HTML comment...
1087 */
1088
1089 if ((text = strstr(ptr + 4, "-->")) == NULL)
1090 ptr += strlen(ptr) - 1;
1091 else
1092 ptr = text + 2;
1093 }
1094 else
1095 {
1096 /*
1097 * Skip HTML element...
1098 */
1099
1100 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1101 {
1102 if (*ptr == '\"' || *ptr == '\'')
1103 {
1104 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1105
1106 if (!*ptr)
1107 ptr --;
1108 }
1109 }
1110
1111 if (!*ptr)
1112 ptr --;
1113 }
1114
1115 continue;
1116 }
1117 else if (*ptr == '&')
1118 {
1119 /*
1120 * Skip HTML entity...
1121 */
1122
1123 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1124
1125 if (!*ptr)
1126 ptr --;
1127
1128 continue;
1129 }
1130 else if (!isalnum(*ptr & 255))
1131 continue;
1132
1133 /*
1134 * Found the start of a word, search until we find the end...
1135 */
1136
1137 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1138
1139 wordlen = (int)(ptr - text);
1140
1141 memcpy(temp, text, (size_t)wordlen);
1142 temp[wordlen] = '\0';
1143
1144 ptr --;
1145
1146 if (wordlen > 1 && !bsearch(temp, help_common_words,
1147 (sizeof(help_common_words) /
1148 sizeof(help_common_words[0])),
1149 sizeof(help_common_words[0]),
1150 (int (*)(const void *, const void *))
1151 _cups_strcasecmp))
1152 help_add_word(node, temp);
1153 }
1154 }
1155
1156 /*
1157 * Get the offset of the next line...
1158 */
1159
1160 offset = cupsFileTell(fp);
1161 }
1162
1163 cupsFileClose(fp);
1164
1165 if (node)
1166 node->length = (size_t)(offset - node->offset);
1167
1168 return (0);
1169 }
1170
1171
1172 /*
1173 * 'help_new_node()' - Create a new node and add it to an index.
1174 */
1175
1176 static help_node_t * /* O - Node pointer or NULL on error */
help_new_node(const char * filename,const char * anchor,const char * section,const char * text,time_t mtime,off_t offset,size_t length)1177 help_new_node(const char *filename, /* I - Filename */
1178 const char *anchor, /* I - Anchor */
1179 const char *section, /* I - Section */
1180 const char *text, /* I - Text */
1181 time_t mtime, /* I - Modification time */
1182 off_t offset, /* I - Offset in file */
1183 size_t length) /* I - Length in bytes */
1184 {
1185 help_node_t *n; /* Node */
1186
1187
1188 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1189 if (!n)
1190 return (NULL);
1191
1192 n->filename = strdup(filename);
1193 n->anchor = anchor ? strdup(anchor) : NULL;
1194 n->section = (section && *section) ? strdup(section) : NULL;
1195 n->text = strdup(text);
1196 n->mtime = mtime;
1197 n->offset = offset;
1198 n->length = length;
1199
1200 return (n);
1201 }
1202
1203
1204 /*
1205 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1206 */
1207
1208 static int /* O - Difference */
help_sort_by_name(help_node_t * n1,help_node_t * n2)1209 help_sort_by_name(help_node_t *n1, /* I - First node */
1210 help_node_t *n2) /* I - Second node */
1211 {
1212 int diff; /* Difference */
1213
1214
1215 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1216 return (diff);
1217
1218 if (!n1->anchor && !n2->anchor)
1219 return (0);
1220 else if (!n1->anchor)
1221 return (-1);
1222 else if (!n2->anchor)
1223 return (1);
1224 else
1225 return (strcmp(n1->anchor, n2->anchor));
1226 }
1227
1228
1229 /*
1230 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1231 */
1232
1233 static int /* O - Difference */
help_sort_by_score(help_node_t * n1,help_node_t * n2)1234 help_sort_by_score(help_node_t *n1, /* I - First node */
1235 help_node_t *n2) /* I - Second node */
1236 {
1237 int diff; /* Difference */
1238
1239
1240 if (n1->score != n2->score)
1241 return (n2->score - n1->score);
1242
1243 if (n1->section && !n2->section)
1244 return (1);
1245 else if (!n1->section && n2->section)
1246 return (-1);
1247 else if (n1->section && n2->section &&
1248 (diff = strcmp(n1->section, n2->section)) != 0)
1249 return (diff);
1250
1251 return (_cups_strcasecmp(n1->text, n2->text));
1252 }
1253
1254
1255 /*
1256 * 'help_sort_words()' - Sort words alphabetically.
1257 */
1258
1259 static int /* O - Difference */
help_sort_words(help_word_t * w1,help_word_t * w2)1260 help_sort_words(help_word_t *w1, /* I - Second word */
1261 help_word_t *w2) /* I - Second word */
1262 {
1263 return (_cups_strcasecmp(w1->text, w2->text));
1264 }
1265