1 /*
2 * Online help index routines for CUPS.
3 *
4 * Copyright © 2020-2024 by OpenPrinting.
5 * Copyright © 2007-2019 by Apple Inc.
6 * Copyright © 1997-2007 by Easy Software Products.
7 *
8 * Licensed under Apache License v2.0. See the file "LICENSE" for more
9 * information.
10 */
11
12 /*
13 * Include necessary headers...
14 */
15
16 #include "cgi-private.h"
17 #include <cups/dir.h>
18
19
20 /*
21 * List of common English words that should not be indexed...
22 */
23
24 static char help_common_words[][6] =
25 {
26 "about",
27 "all",
28 "an",
29 "and",
30 "are",
31 "as",
32 "at",
33 "be",
34 "been",
35 "but",
36 "by",
37 "call",
38 "can",
39 "come",
40 "could",
41 "day",
42 "did",
43 "do",
44 "down",
45 "each",
46 "find",
47 "first",
48 "for",
49 "from",
50 "go",
51 "had",
52 "has",
53 "have",
54 "he",
55 "her",
56 "him",
57 "his",
58 "hot",
59 "how",
60 "if",
61 "in",
62 "is",
63 "it",
64 "know",
65 "like",
66 "long",
67 "look",
68 "make",
69 "many",
70 "may",
71 "more",
72 "most",
73 "my",
74 "no",
75 "now",
76 "of",
77 "on",
78 "one",
79 "or",
80 "other",
81 "out",
82 "over",
83 "said",
84 "see",
85 "she",
86 "side",
87 "so",
88 "some",
89 "sound",
90 "than",
91 "that",
92 "the",
93 "their",
94 "them",
95 "then",
96 "there",
97 "these",
98 "they",
99 "thing",
100 "this",
101 "time",
102 "to",
103 "two",
104 "up",
105 "use",
106 "was",
107 "water",
108 "way",
109 "we",
110 "were",
111 "what",
112 "when",
113 "which",
114 "who",
115 "will",
116 "with",
117 "word",
118 "would",
119 "write",
120 "you",
121 "your"
122 };
123
124
125 /*
126 * Local functions...
127 */
128
129 static help_word_t *help_add_word(help_node_t *n, const char *text);
130 static void help_delete_node(help_node_t *n);
131 static void help_delete_word(help_word_t *w);
132 static int help_load_directory(help_index_t *hi,
133 const char *directory,
134 const char *relative);
135 static int help_load_file(help_index_t *hi,
136 const char *filename,
137 const char *relative,
138 time_t mtime);
139 static help_node_t *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
140 static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
141 static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
142 static int help_sort_words(help_word_t *w1, help_word_t *w2);
143
144
145 /*
146 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
147 */
148
149 void
helpDeleteIndex(help_index_t * hi)150 helpDeleteIndex(help_index_t *hi) /* I - Help index */
151 {
152 help_node_t *node; /* Current node */
153
154
155 if (!hi)
156 return;
157
158 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
159 node;
160 node = (help_node_t *)cupsArrayNext(hi->nodes))
161 {
162 if (!hi->search)
163 help_delete_node(node);
164 }
165
166 cupsArrayDelete(hi->nodes);
167 cupsArrayDelete(hi->sorted);
168
169 free(hi);
170 }
171
172
173 /*
174 * 'helpFindNode()' - Find a node in an index.
175 */
176
177 help_node_t * /* O - Node pointer or NULL */
helpFindNode(help_index_t * hi,const char * filename,const char * anchor)178 helpFindNode(help_index_t *hi, /* I - Index */
179 const char *filename, /* I - Filename */
180 const char *anchor) /* I - Anchor */
181 {
182 help_node_t key; /* Search key */
183
184
185 /*
186 * Range check input...
187 */
188
189 if (!hi || !filename)
190 return (NULL);
191
192 /*
193 * Initialize the search key...
194 */
195
196 key.filename = (char *)filename;
197 key.anchor = (char *)anchor;
198
199 /*
200 * Return any match...
201 */
202
203 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
204 }
205
206
207 /*
208 * 'helpLoadIndex()' - Load a help index from disk.
209 */
210
211 help_index_t * /* O - Index pointer or NULL */
helpLoadIndex(const char * hifile,const char * directory)212 helpLoadIndex(const char *hifile, /* I - Index filename */
213 const char *directory) /* I - Directory that is indexed */
214 {
215 help_index_t *hi; /* Help index */
216 cups_file_t *fp; /* Current file */
217 char line[2048], /* Line from file */
218 *ptr, /* Pointer into line */
219 *filename, /* Filename in line */
220 *anchor, /* Anchor in line */
221 *sectptr, /* Section pointer in line */
222 section[1024], /* Section name */
223 *text; /* Text in line */
224 time_t mtime; /* Modification time */
225 off_t offset; /* Offset into file */
226 size_t length; /* Length in bytes */
227 int update; /* Update? */
228 help_node_t *node; /* Current node */
229 help_word_t *word; /* Current word */
230
231
232 /*
233 * Create a new, empty index.
234 */
235
236 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
237 return (NULL);
238
239 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
240 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
241
242 if (!hi->nodes || !hi->sorted)
243 {
244 cupsArrayDelete(hi->nodes);
245 cupsArrayDelete(hi->sorted);
246 free(hi);
247 return (NULL);
248 }
249
250 /*
251 * Try loading the existing index file...
252 */
253
254 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
255 {
256 /*
257 * Lock the file and then read the first line...
258 */
259
260 cupsFileLock(fp, 1);
261
262 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
263 {
264 /*
265 * Got a valid header line, now read the data lines...
266 */
267
268 node = NULL;
269
270 while (cupsFileGets(fp, line, sizeof(line)))
271 {
272 /*
273 * Each line looks like one of the following:
274 *
275 * filename mtime offset length "section" "text"
276 * filename#anchor offset length "text"
277 * SP count word
278 */
279
280 if (line[0] == ' ')
281 {
282 /*
283 * Read a word in the current node...
284 */
285
286 if (!node || (ptr = strrchr(line, ' ')) == NULL)
287 continue;
288
289 if ((word = help_add_word(node, ptr + 1)) != NULL)
290 word->count = atoi(line + 1);
291 }
292 else
293 {
294 /*
295 * Add a node...
296 */
297
298 filename = line;
299
300 if ((ptr = strchr(line, ' ')) == NULL)
301 break;
302
303 while (isspace(*ptr & 255))
304 *ptr++ = '\0';
305
306 if ((anchor = strrchr(filename, '#')) != NULL)
307 {
308 *anchor++ = '\0';
309 mtime = 0;
310 }
311 else
312 mtime = strtol(ptr, &ptr, 10);
313
314 offset = strtoll(ptr, &ptr, 10);
315 length = (size_t)strtoll(ptr, &ptr, 10);
316
317 while (isspace(*ptr & 255))
318 ptr ++;
319
320 if (!anchor)
321 {
322 /*
323 * Get section...
324 */
325
326 if (*ptr != '\"')
327 break;
328
329 ptr ++;
330 sectptr = ptr;
331
332 while (*ptr && *ptr != '\"')
333 ptr ++;
334
335 if (*ptr != '\"')
336 break;
337
338 *ptr++ = '\0';
339
340 strlcpy(section, sectptr, sizeof(section));
341
342 while (isspace(*ptr & 255))
343 ptr ++;
344 }
345 else
346 section[0] = '\0';
347
348 if (*ptr != '\"')
349 break;
350
351 ptr ++;
352 text = ptr;
353
354 while (*ptr && *ptr != '\"')
355 ptr ++;
356
357 if (*ptr != '\"')
358 break;
359
360 *ptr++ = '\0';
361
362 if ((node = help_new_node(filename, anchor, section, text,
363 mtime, offset, length)) == NULL)
364 break;
365
366 node->score = -1;
367
368 cupsArrayAdd(hi->nodes, node);
369 }
370 }
371 }
372
373 cupsFileClose(fp);
374 }
375
376 /*
377 * Scan for new/updated files...
378 */
379
380 update = help_load_directory(hi, directory, NULL);
381
382 /*
383 * Remove any files that are no longer installed...
384 */
385
386 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
387 node;
388 node = (help_node_t *)cupsArrayNext(hi->nodes))
389 if (node->score < 0)
390 {
391 /*
392 * Delete this node...
393 */
394
395 cupsArrayRemove(hi->nodes, node);
396 help_delete_node(node);
397 }
398
399 /*
400 * Add nodes to the sorted array...
401 */
402
403 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
404 node;
405 node = (help_node_t *)cupsArrayNext(hi->nodes))
406 cupsArrayAdd(hi->sorted, node);
407
408 /*
409 * Save the index if we updated it...
410 */
411
412 if (update)
413 helpSaveIndex(hi, hifile);
414
415 /*
416 * Return the index...
417 */
418
419 return (hi);
420 }
421
422
423 /*
424 * 'helpSaveIndex()' - Save a help index to disk.
425 */
426
427 int /* O - 0 on success, -1 on error */
helpSaveIndex(help_index_t * hi,const char * hifile)428 helpSaveIndex(help_index_t *hi, /* I - Index */
429 const char *hifile) /* I - Index filename */
430 {
431 cups_file_t *fp; /* Index file */
432 help_node_t *node; /* Current node */
433 help_word_t *word; /* Current word */
434
435
436 /*
437 * Try creating a new index file...
438 */
439
440 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
441 return (-1);
442
443 /*
444 * Lock the file while we write it...
445 */
446
447 cupsFileLock(fp, 1);
448
449 cupsFilePuts(fp, "HELPV2\n");
450
451 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
452 node;
453 node = (help_node_t *)cupsArrayNext(hi->nodes))
454 {
455 /*
456 * Write the current node with/without the anchor...
457 */
458
459 if (node->anchor)
460 {
461 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
462 node->filename, node->anchor,
463 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
464 node->text) < 0)
465 break;
466 }
467 else
468 {
469 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
470 node->filename, (int)node->mtime,
471 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
472 node->section ? node->section : "", node->text) < 0)
473 break;
474 }
475
476 /*
477 * Then write the words associated with the node...
478 */
479
480 for (word = (help_word_t *)cupsArrayFirst(node->words);
481 word;
482 word = (help_word_t *)cupsArrayNext(node->words))
483 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
484 break;
485 }
486
487 cupsFileFlush(fp);
488
489 if (cupsFileClose(fp) < 0)
490 return (-1);
491 else if (node)
492 return (-1);
493 else
494 return (0);
495 }
496
497
498 /*
499 * 'helpSearchIndex()' - Search an index.
500 */
501
502 help_index_t * /* O - Search index */
helpSearchIndex(help_index_t * hi,const char * query,const char * section,const char * filename)503 helpSearchIndex(help_index_t *hi, /* I - Index */
504 const char *query, /* I - Query string */
505 const char *section, /* I - Limit search to this section */
506 const char *filename) /* I - Limit search to this file */
507 {
508 help_index_t *search; /* Search index */
509 help_node_t *node; /* Current node */
510 help_word_t *word; /* Current word */
511 void *sc; /* Search context */
512 int matches; /* Number of matches */
513
514
515 /*
516 * Range check...
517 */
518
519 if (!hi || !query)
520 return (NULL);
521
522 /*
523 * Reset the scores of all nodes to 0...
524 */
525
526 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
527 node;
528 node = (help_node_t *)cupsArrayNext(hi->nodes))
529 node->score = 0;
530
531 /*
532 * Find the first node to search in...
533 */
534
535 if (filename)
536 {
537 node = helpFindNode(hi, filename, NULL);
538 if (!node)
539 return (NULL);
540 }
541 else
542 node = (help_node_t *)cupsArrayFirst(hi->nodes);
543
544 /*
545 * Convert the query into a regular expression...
546 */
547
548 sc = cgiCompileSearch(query);
549 if (!sc)
550 return (NULL);
551
552 /*
553 * Allocate a search index...
554 */
555
556 search = calloc(1, sizeof(help_index_t));
557 if (!search)
558 {
559 cgiFreeSearch(sc);
560 return (NULL);
561 }
562
563 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
564 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
565
566 if (!search->nodes || !search->sorted)
567 {
568 cupsArrayDelete(search->nodes);
569 cupsArrayDelete(search->sorted);
570 free(search);
571 cgiFreeSearch(sc);
572 return (NULL);
573 }
574
575 search->search = 1;
576
577 /*
578 * Check each node in the index, adding matching nodes to the
579 * search index...
580 */
581
582 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
583 if (node->section && section && strcmp(node->section, section))
584 continue;
585 else if (filename && strcmp(node->filename, filename))
586 continue;
587 else
588 {
589 matches = cgiDoSearch(sc, node->text);
590
591 for (word = (help_word_t *)cupsArrayFirst(node->words);
592 word;
593 word = (help_word_t *)cupsArrayNext(node->words))
594 if (cgiDoSearch(sc, word->text) > 0)
595 matches += word->count;
596
597 if (matches > 0)
598 {
599 /*
600 * Found a match, add the node to the search index...
601 */
602
603 node->score = matches;
604
605 cupsArrayAdd(search->nodes, node);
606 cupsArrayAdd(search->sorted, node);
607 }
608 }
609
610 /*
611 * Free the search context...
612 */
613
614 cgiFreeSearch(sc);
615
616 /*
617 * Return the results...
618 */
619
620 return (search);
621 }
622
623
624 /*
625 * 'help_add_word()' - Add a word to a node.
626 */
627
628 static help_word_t * /* O - New word */
help_add_word(help_node_t * n,const char * text)629 help_add_word(help_node_t *n, /* I - Node */
630 const char *text) /* I - Word text */
631 {
632 help_word_t *w, /* New word */
633 key; /* Search key */
634
635
636 /*
637 * Create the words array as needed...
638 */
639
640 if (!n->words)
641 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
642
643 /*
644 * See if the word is already added...
645 */
646
647 key.text = (char *)text;
648
649 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
650 {
651 /*
652 * Create a new word...
653 */
654
655 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
656 return (NULL);
657
658 if ((w->text = strdup(text)) == NULL)
659 {
660 free(w);
661 return (NULL);
662 }
663
664 cupsArrayAdd(n->words, w);
665 }
666
667 /*
668 * Bump the counter for this word and return it...
669 */
670
671 w->count ++;
672
673 return (w);
674 }
675
676
677 /*
678 * 'help_delete_node()' - Free all memory used by a node.
679 */
680
681 static void
help_delete_node(help_node_t * n)682 help_delete_node(help_node_t *n) /* I - Node */
683 {
684 help_word_t *w; /* Current word */
685
686
687 if (!n)
688 return;
689
690 if (n->filename)
691 free(n->filename);
692
693 if (n->anchor)
694 free(n->anchor);
695
696 if (n->section)
697 free(n->section);
698
699 if (n->text)
700 free(n->text);
701
702 for (w = (help_word_t *)cupsArrayFirst(n->words);
703 w;
704 w = (help_word_t *)cupsArrayNext(n->words))
705 help_delete_word(w);
706
707 cupsArrayDelete(n->words);
708
709 free(n);
710 }
711
712
713 /*
714 * 'help_delete_word()' - Free all memory used by a word.
715 */
716
717 static void
help_delete_word(help_word_t * w)718 help_delete_word(help_word_t *w) /* I - Word */
719 {
720 if (!w)
721 return;
722
723 if (w->text)
724 free(w->text);
725
726 free(w);
727 }
728
729
730 /*
731 * 'help_load_directory()' - Load a directory of files into an index.
732 */
733
734 static int /* O - 0 = success, -1 = error, 1 = updated */
help_load_directory(help_index_t * hi,const char * directory,const char * relative)735 help_load_directory(
736 help_index_t *hi, /* I - Index */
737 const char *directory, /* I - Directory */
738 const char *relative) /* I - Relative path */
739 {
740 cups_dir_t *dir; /* Directory file */
741 cups_dentry_t *dent; /* Directory entry */
742 char *ext, /* Pointer to extension */
743 filename[1024], /* Full filename */
744 relname[1024]; /* Relative filename */
745 int update; /* Updated? */
746 help_node_t *node; /* Current node */
747
748
749 /*
750 * Open the directory and scan it...
751 */
752
753 if ((dir = cupsDirOpen(directory)) == NULL)
754 return (0);
755
756 update = 0;
757
758 while ((dent = cupsDirRead(dir)) != NULL)
759 {
760 /*
761 * Skip "." files...
762 */
763
764 if (dent->filename[0] == '.')
765 continue;
766
767 /*
768 * Get absolute and relative filenames...
769 */
770
771 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
772 if (relative)
773 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
774 else
775 strlcpy(relname, dent->filename, sizeof(relname));
776
777 /*
778 * Check if we have a HTML file...
779 */
780
781 if ((ext = strstr(dent->filename, ".html")) != NULL &&
782 (!ext[5] || !strcmp(ext + 5, ".gz")))
783 {
784 /*
785 * HTML file, see if we have already indexed the file...
786 */
787
788 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
789 {
790 /*
791 * File already indexed - check dates to confirm that the
792 * index is up-to-date...
793 */
794
795 if (node->mtime == dent->fileinfo.st_mtime)
796 {
797 /*
798 * Same modification time, so mark all of the nodes
799 * for this file as up-to-date...
800 */
801
802 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
803 if (!strcmp(node->filename, relname))
804 node->score = 0;
805 else
806 break;
807
808 continue;
809 }
810 }
811
812 update = 1;
813
814 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
815 }
816 else if (S_ISDIR(dent->fileinfo.st_mode))
817 {
818 /*
819 * Process sub-directory...
820 */
821
822 if (help_load_directory(hi, filename, relname) == 1)
823 update = 1;
824 }
825 }
826
827 cupsDirClose(dir);
828
829 return (update);
830 }
831
832
833 /*
834 * 'help_load_file()' - Load a HTML files into an index.
835 */
836
837 static int /* O - 0 = success, -1 = error */
help_load_file(help_index_t * hi,const char * filename,const char * relative,time_t mtime)838 help_load_file(
839 help_index_t *hi, /* I - Index */
840 const char *filename, /* I - Filename */
841 const char *relative, /* I - Relative path */
842 time_t mtime) /* I - Modification time */
843 {
844 cups_file_t *fp; /* HTML file */
845 help_node_t *node; /* Current node */
846 char line[1024], /* Line from file */
847 temp[1024], /* Temporary word */
848 section[1024], /* Section */
849 *ptr, /* Pointer into line */
850 *anchor, /* Anchor name */
851 *text; /* Text for anchor */
852 off_t offset; /* File offset */
853 char quote; /* Quote character */
854 help_word_t *word; /* Current word */
855 size_t wordlen; /* Length of word */
856
857
858 if ((fp = cupsFileOpen(filename, "r")) == NULL)
859 return (-1);
860
861 node = NULL;
862 offset = 0;
863
864 strlcpy(section, "Other", sizeof(section));
865
866 while (cupsFileGets(fp, line, sizeof(line)))
867 {
868 /*
869 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
870 */
871
872 if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
873 {
874 /*
875 * Got section line, copy it!
876 */
877
878 for (ptr += 13; isspace(*ptr & 255); ptr ++);
879
880 strlcpy(section, ptr, sizeof(section));
881 if ((ptr = strstr(section, "-->")) != NULL)
882 {
883 /*
884 * Strip comment stuff from end of line...
885 */
886
887 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
888
889 if (isspace(*ptr & 255))
890 *ptr = '\0';
891 }
892 continue;
893 }
894
895 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
896 {
897 ptr ++;
898
899 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
900 {
901 /*
902 * Found the title...
903 */
904
905 anchor = NULL;
906 ptr += 6;
907 }
908 else
909 {
910 char *idptr; /* Pointer to ID */
911
912 if (!_cups_strncasecmp(ptr, "A NAME=", 7))
913 ptr += 7;
914 else if ((idptr = strstr(ptr, " ID=")) != NULL)
915 ptr = idptr + 4;
916 else if ((idptr = strstr(ptr, " id=")) != NULL)
917 ptr = idptr + 4;
918 else
919 continue;
920
921 /*
922 * Found an anchor...
923 */
924
925 if (*ptr == '\"' || *ptr == '\'')
926 {
927 /*
928 * Get quoted anchor...
929 */
930
931 quote = *ptr;
932 anchor = ptr + 1;
933 if ((ptr = strchr(anchor, quote)) != NULL)
934 *ptr++ = '\0';
935 else
936 break;
937 }
938 else
939 {
940 /*
941 * Get unquoted anchor...
942 */
943
944 anchor = ptr + 1;
945
946 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
947
948 if (*ptr != '>')
949 *ptr++ = '\0';
950 else
951 break;
952 }
953
954 /*
955 * Got the anchor, now lets find the end...
956 */
957
958 while (*ptr && *ptr != '>')
959 ptr ++;
960
961 if (*ptr != '>')
962 break;
963
964 *ptr++ = '\0';
965 }
966
967 /*
968 * Now collect text for the link...
969 */
970
971 text = ptr;
972 while ((ptr = strchr(text, '<')) == NULL)
973 {
974 ptr = text + strlen(text);
975 if (ptr >= (line + sizeof(line) - 2))
976 break;
977
978 *ptr++ = ' ';
979
980 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
981 break;
982 }
983
984 *ptr = '\0';
985
986 if (node)
987 node->length = (size_t)(offset - node->offset);
988
989 if (!*text)
990 {
991 node = NULL;
992 break;
993 }
994
995 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
996 {
997 /*
998 * Node already in the index, so replace the text and other
999 * data...
1000 */
1001
1002 cupsArrayRemove(hi->nodes, node);
1003
1004 if (node->section)
1005 free(node->section);
1006
1007 if (node->text)
1008 free(node->text);
1009
1010 if (node->words)
1011 {
1012 for (word = (help_word_t *)cupsArrayFirst(node->words);
1013 word;
1014 word = (help_word_t *)cupsArrayNext(node->words))
1015 help_delete_word(word);
1016
1017 cupsArrayDelete(node->words);
1018 node->words = NULL;
1019 }
1020
1021 node->section = section[0] ? strdup(section) : NULL;
1022 node->text = strdup(text);
1023 node->mtime = mtime;
1024 node->offset = offset;
1025 node->score = 0;
1026 }
1027 else
1028 {
1029 /*
1030 * New node...
1031 */
1032
1033 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1034 }
1035
1036 /*
1037 * Go through the text value and replace tabs and newlines with
1038 * whitespace and eliminate extra whitespace...
1039 */
1040
1041 for (ptr = node->text, text = node->text; *ptr;)
1042 if (isspace(*ptr & 255))
1043 {
1044 while (isspace(*ptr & 255))
1045 ptr ++;
1046
1047 *text++ = ' ';
1048 }
1049 else if (text != ptr)
1050 *text++ = *ptr++;
1051 else
1052 {
1053 text ++;
1054 ptr ++;
1055 }
1056
1057 *text = '\0';
1058
1059 /*
1060 * (Re)add the node to the array...
1061 */
1062
1063 cupsArrayAdd(hi->nodes, node);
1064
1065 if (!anchor)
1066 node = NULL;
1067 break;
1068 }
1069
1070 if (node)
1071 {
1072 /*
1073 * Scan this line for words...
1074 */
1075
1076 for (ptr = line; *ptr; ptr ++)
1077 {
1078 /*
1079 * Skip HTML stuff...
1080 */
1081
1082 if (*ptr == '<')
1083 {
1084 if (!strncmp(ptr, "<!--", 4))
1085 {
1086 /*
1087 * Skip HTML comment...
1088 */
1089
1090 if ((text = strstr(ptr + 4, "-->")) == NULL)
1091 ptr += strlen(ptr) - 1;
1092 else
1093 ptr = text + 2;
1094 }
1095 else
1096 {
1097 /*
1098 * Skip HTML element...
1099 */
1100
1101 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1102 {
1103 if (*ptr == '\"' || *ptr == '\'')
1104 {
1105 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1106
1107 if (!*ptr)
1108 ptr --;
1109 }
1110 }
1111
1112 if (!*ptr)
1113 ptr --;
1114 }
1115
1116 continue;
1117 }
1118 else if (*ptr == '&')
1119 {
1120 /*
1121 * Skip HTML entity...
1122 */
1123
1124 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1125
1126 if (!*ptr)
1127 ptr --;
1128
1129 continue;
1130 }
1131 else if (!isalnum(*ptr & 255))
1132 continue;
1133
1134 /*
1135 * Found the start of a word, search until we find the end...
1136 */
1137
1138 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1139
1140 wordlen = (size_t)(ptr - text);
1141
1142 memcpy(temp, text, wordlen);
1143 temp[wordlen] = '\0';
1144
1145 ptr --;
1146
1147 if (wordlen > 1 && !bsearch(temp, help_common_words,
1148 (sizeof(help_common_words) /
1149 sizeof(help_common_words[0])),
1150 sizeof(help_common_words[0]),
1151 (int (*)(const void *, const void *))
1152 _cups_strcasecmp))
1153 help_add_word(node, temp);
1154 }
1155 }
1156
1157 /*
1158 * Get the offset of the next line...
1159 */
1160
1161 offset = cupsFileTell(fp);
1162 }
1163
1164 cupsFileClose(fp);
1165
1166 if (node)
1167 node->length = (size_t)(offset - node->offset);
1168
1169 return (0);
1170 }
1171
1172
1173 /*
1174 * 'help_new_node()' - Create a new node and add it to an index.
1175 */
1176
1177 static help_node_t * /* O - Node pointer or NULL on error */
help_new_node(const char * filename,const char * anchor,const char * section,const char * text,time_t mtime,off_t offset,size_t length)1178 help_new_node(const char *filename, /* I - Filename */
1179 const char *anchor, /* I - Anchor */
1180 const char *section, /* I - Section */
1181 const char *text, /* I - Text */
1182 time_t mtime, /* I - Modification time */
1183 off_t offset, /* I - Offset in file */
1184 size_t length) /* I - Length in bytes */
1185 {
1186 help_node_t *n; /* Node */
1187
1188
1189 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1190 if (!n)
1191 return (NULL);
1192
1193 n->filename = strdup(filename);
1194 n->anchor = anchor ? strdup(anchor) : NULL;
1195 n->section = (section && *section) ? strdup(section) : NULL;
1196 n->text = strdup(text);
1197 n->mtime = mtime;
1198 n->offset = offset;
1199 n->length = length;
1200
1201 return (n);
1202 }
1203
1204
1205 /*
1206 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1207 */
1208
1209 static int /* O - Difference */
help_sort_by_name(help_node_t * n1,help_node_t * n2)1210 help_sort_by_name(help_node_t *n1, /* I - First node */
1211 help_node_t *n2) /* I - Second node */
1212 {
1213 int diff; /* Difference */
1214
1215
1216 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1217 return (diff);
1218
1219 if (!n1->anchor && !n2->anchor)
1220 return (0);
1221 else if (!n1->anchor)
1222 return (-1);
1223 else if (!n2->anchor)
1224 return (1);
1225 else
1226 return (strcmp(n1->anchor, n2->anchor));
1227 }
1228
1229
1230 /*
1231 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1232 */
1233
1234 static int /* O - Difference */
help_sort_by_score(help_node_t * n1,help_node_t * n2)1235 help_sort_by_score(help_node_t *n1, /* I - First node */
1236 help_node_t *n2) /* I - Second node */
1237 {
1238 int diff; /* Difference */
1239
1240
1241 if (n1->score != n2->score)
1242 return (n2->score - n1->score);
1243
1244 if (n1->section && !n2->section)
1245 return (1);
1246 else if (!n1->section && n2->section)
1247 return (-1);
1248 else if (n1->section && n2->section &&
1249 (diff = strcmp(n1->section, n2->section)) != 0)
1250 return (diff);
1251
1252 return (_cups_strcasecmp(n1->text, n2->text));
1253 }
1254
1255
1256 /*
1257 * 'help_sort_words()' - Sort words alphabetically.
1258 */
1259
1260 static int /* O - Difference */
help_sort_words(help_word_t * w1,help_word_t * w2)1261 help_sort_words(help_word_t *w1, /* I - Second word */
1262 help_word_t *w2) /* I - Second word */
1263 {
1264 return (_cups_strcasecmp(w1->text, w2->text));
1265 }
1266