• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Online help index routines for CUPS.
3  *
4  * Copyright © 2020-2024 by OpenPrinting.
5  * Copyright © 2007-2019 by Apple Inc.
6  * Copyright © 1997-2007 by Easy Software Products.
7  *
8  * Licensed under Apache License v2.0.  See the file "LICENSE" for more
9  * information.
10  */
11 
12 /*
13  * Include necessary headers...
14  */
15 
16 #include "cgi-private.h"
17 #include <cups/dir.h>
18 
19 
20 /*
21  * List of common English words that should not be indexed...
22  */
23 
24 static char		help_common_words[][6] =
25 			{
26 			  "about",
27 			  "all",
28 			  "an",
29 			  "and",
30 			  "are",
31 			  "as",
32 			  "at",
33 			  "be",
34 			  "been",
35 			  "but",
36 			  "by",
37 			  "call",
38 			  "can",
39 			  "come",
40 			  "could",
41 			  "day",
42 			  "did",
43 			  "do",
44 			  "down",
45 			  "each",
46 			  "find",
47 			  "first",
48 			  "for",
49 			  "from",
50 			  "go",
51 			  "had",
52 			  "has",
53 			  "have",
54 			  "he",
55 			  "her",
56 			  "him",
57 			  "his",
58 			  "hot",
59 			  "how",
60 			  "if",
61 			  "in",
62 			  "is",
63 			  "it",
64 			  "know",
65 			  "like",
66 			  "long",
67 			  "look",
68 			  "make",
69 			  "many",
70 			  "may",
71 			  "more",
72 			  "most",
73 			  "my",
74 			  "no",
75 			  "now",
76 			  "of",
77 			  "on",
78 			  "one",
79 			  "or",
80 			  "other",
81 			  "out",
82 			  "over",
83 			  "said",
84 			  "see",
85 			  "she",
86 			  "side",
87 			  "so",
88 			  "some",
89 			  "sound",
90 			  "than",
91 			  "that",
92 			  "the",
93 			  "their",
94 			  "them",
95 			  "then",
96 			  "there",
97 			  "these",
98 			  "they",
99 			  "thing",
100 			  "this",
101 			  "time",
102 			  "to",
103 			  "two",
104 			  "up",
105 			  "use",
106 			  "was",
107 			  "water",
108 			  "way",
109 			  "we",
110 			  "were",
111 			  "what",
112 			  "when",
113 			  "which",
114 			  "who",
115 			  "will",
116 			  "with",
117 			  "word",
118 			  "would",
119 			  "write",
120 			  "you",
121 			  "your"
122 			};
123 
124 
125 /*
126  * Local functions...
127  */
128 
129 static help_word_t	*help_add_word(help_node_t *n, const char *text);
130 static void		help_delete_node(help_node_t *n);
131 static void		help_delete_word(help_word_t *w);
132 static int		help_load_directory(help_index_t *hi,
133 			                    const char *directory,
134 					    const char *relative);
135 static int		help_load_file(help_index_t *hi,
136 			               const char *filename,
137 				       const char *relative,
138 				       time_t     mtime);
139 static help_node_t	*help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
140 static int		help_sort_by_name(help_node_t *p1, help_node_t *p2);
141 static int		help_sort_by_score(help_node_t *p1, help_node_t *p2);
142 static int		help_sort_words(help_word_t *w1, help_word_t *w2);
143 
144 
145 /*
146  * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
147  */
148 
149 void
helpDeleteIndex(help_index_t * hi)150 helpDeleteIndex(help_index_t *hi)	/* I - Help index */
151 {
152   help_node_t	*node;			/* Current node */
153 
154 
155   if (!hi)
156     return;
157 
158   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
159        node;
160        node = (help_node_t *)cupsArrayNext(hi->nodes))
161   {
162     if (!hi->search)
163       help_delete_node(node);
164   }
165 
166   cupsArrayDelete(hi->nodes);
167   cupsArrayDelete(hi->sorted);
168 
169   free(hi);
170 }
171 
172 
173 /*
174  * 'helpFindNode()' - Find a node in an index.
175  */
176 
177 help_node_t *				/* O - Node pointer or NULL */
helpFindNode(help_index_t * hi,const char * filename,const char * anchor)178 helpFindNode(help_index_t *hi,		/* I - Index */
179              const char   *filename,	/* I - Filename */
180              const char   *anchor)	/* I - Anchor */
181 {
182   help_node_t	key;			/* Search key */
183 
184 
185  /*
186   * Range check input...
187   */
188 
189   if (!hi || !filename)
190     return (NULL);
191 
192  /*
193   * Initialize the search key...
194   */
195 
196   key.filename = (char *)filename;
197   key.anchor   = (char *)anchor;
198 
199  /*
200   * Return any match...
201   */
202 
203   return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
204 }
205 
206 
207 /*
208  * 'helpLoadIndex()' - Load a help index from disk.
209  */
210 
211 help_index_t *				/* O - Index pointer or NULL */
helpLoadIndex(const char * hifile,const char * directory)212 helpLoadIndex(const char *hifile,	/* I - Index filename */
213               const char *directory)	/* I - Directory that is indexed */
214 {
215   help_index_t	*hi;			/* Help index */
216   cups_file_t	*fp;			/* Current file */
217   char		line[2048],		/* Line from file */
218 		*ptr,			/* Pointer into line */
219 		*filename,		/* Filename in line */
220 		*anchor,		/* Anchor in line */
221 		*sectptr,		/* Section pointer in line */
222 		section[1024],		/* Section name */
223 		*text;			/* Text in line */
224   time_t	mtime;			/* Modification time */
225   off_t		offset;			/* Offset into file */
226   size_t	length;			/* Length in bytes */
227   int		update;			/* Update? */
228   help_node_t	*node;			/* Current node */
229   help_word_t	*word;			/* Current word */
230 
231 
232  /*
233   * Create a new, empty index.
234   */
235 
236   if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
237     return (NULL);
238 
239   hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
240   hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
241 
242   if (!hi->nodes || !hi->sorted)
243   {
244     cupsArrayDelete(hi->nodes);
245     cupsArrayDelete(hi->sorted);
246     free(hi);
247     return (NULL);
248   }
249 
250  /*
251   * Try loading the existing index file...
252   */
253 
254   if ((fp = cupsFileOpen(hifile, "r")) != NULL)
255   {
256    /*
257     * Lock the file and then read the first line...
258     */
259 
260     cupsFileLock(fp, 1);
261 
262     if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
263     {
264      /*
265       * Got a valid header line, now read the data lines...
266       */
267 
268       node = NULL;
269 
270       while (cupsFileGets(fp, line, sizeof(line)))
271       {
272        /*
273 	* Each line looks like one of the following:
274 	*
275 	*     filename mtime offset length "section" "text"
276 	*     filename#anchor offset length "text"
277 	*     SP count word
278 	*/
279 
280         if (line[0] == ' ')
281 	{
282 	 /*
283 	  * Read a word in the current node...
284 	  */
285 
286           if (!node || (ptr = strrchr(line, ' ')) == NULL)
287 	    continue;
288 
289           if ((word = help_add_word(node, ptr + 1)) != NULL)
290 	    word->count = atoi(line + 1);
291         }
292 	else
293 	{
294 	 /*
295 	  * Add a node...
296 	  */
297 
298 	  filename = line;
299 
300 	  if ((ptr = strchr(line, ' ')) == NULL)
301             break;
302 
303 	  while (isspace(*ptr & 255))
304             *ptr++ = '\0';
305 
306 	  if ((anchor = strrchr(filename, '#')) != NULL)
307 	  {
308             *anchor++ = '\0';
309 	    mtime = 0;
310 	  }
311 	  else
312 	    mtime = strtol(ptr, &ptr, 10);
313 
314 	  offset = strtoll(ptr, &ptr, 10);
315 	  length = (size_t)strtoll(ptr, &ptr, 10);
316 
317 	  while (isspace(*ptr & 255))
318             ptr ++;
319 
320           if (!anchor)
321 	  {
322 	   /*
323 	    * Get section...
324 	    */
325 
326             if (*ptr != '\"')
327 	      break;
328 
329             ptr ++;
330 	    sectptr = ptr;
331 
332             while (*ptr && *ptr != '\"')
333 	      ptr ++;
334 
335             if (*ptr != '\"')
336 	      break;
337 
338             *ptr++ = '\0';
339 
340             strlcpy(section, sectptr, sizeof(section));
341 
342 	    while (isspace(*ptr & 255))
343               ptr ++;
344           }
345           else
346             section[0] = '\0';
347 
348           if (*ptr != '\"')
349 	    break;
350 
351           ptr ++;
352 	  text = ptr;
353 
354           while (*ptr && *ptr != '\"')
355 	    ptr ++;
356 
357           if (*ptr != '\"')
358 	    break;
359 
360           *ptr++ = '\0';
361 
362 	  if ((node = help_new_node(filename, anchor, section, text,
363 				    mtime, offset, length)) == NULL)
364             break;
365 
366 	  node->score = -1;
367 
368 	  cupsArrayAdd(hi->nodes, node);
369         }
370       }
371     }
372 
373     cupsFileClose(fp);
374   }
375 
376  /*
377   * Scan for new/updated files...
378   */
379 
380   update = help_load_directory(hi, directory, NULL);
381 
382  /*
383   * Remove any files that are no longer installed...
384   */
385 
386   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
387        node;
388        node = (help_node_t *)cupsArrayNext(hi->nodes))
389     if (node->score < 0)
390     {
391      /*
392       * Delete this node...
393       */
394 
395       cupsArrayRemove(hi->nodes, node);
396       help_delete_node(node);
397     }
398 
399  /*
400   * Add nodes to the sorted array...
401   */
402 
403   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
404        node;
405        node = (help_node_t *)cupsArrayNext(hi->nodes))
406     cupsArrayAdd(hi->sorted, node);
407 
408  /*
409   * Save the index if we updated it...
410   */
411 
412   if (update)
413     helpSaveIndex(hi, hifile);
414 
415  /*
416   * Return the index...
417   */
418 
419   return (hi);
420 }
421 
422 
423 /*
424  * 'helpSaveIndex()' - Save a help index to disk.
425  */
426 
427 int					/* O - 0 on success, -1 on error */
helpSaveIndex(help_index_t * hi,const char * hifile)428 helpSaveIndex(help_index_t *hi,		/* I - Index */
429               const char   *hifile)	/* I - Index filename */
430 {
431   cups_file_t	*fp;			/* Index file */
432   help_node_t	*node;			/* Current node */
433   help_word_t	*word;			/* Current word */
434 
435 
436  /*
437   * Try creating a new index file...
438   */
439 
440   if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
441     return (-1);
442 
443  /*
444   * Lock the file while we write it...
445   */
446 
447   cupsFileLock(fp, 1);
448 
449   cupsFilePuts(fp, "HELPV2\n");
450 
451   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
452        node;
453        node = (help_node_t *)cupsArrayNext(hi->nodes))
454   {
455    /*
456     * Write the current node with/without the anchor...
457     */
458 
459     if (node->anchor)
460     {
461       if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
462                          node->filename, node->anchor,
463                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
464 			 node->text) < 0)
465         break;
466     }
467     else
468     {
469       if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
470                          node->filename, (int)node->mtime,
471                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
472 			 node->section ? node->section : "", node->text) < 0)
473         break;
474     }
475 
476    /*
477     * Then write the words associated with the node...
478     */
479 
480     for (word = (help_word_t *)cupsArrayFirst(node->words);
481          word;
482 	 word = (help_word_t *)cupsArrayNext(node->words))
483       if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
484         break;
485   }
486 
487   cupsFileFlush(fp);
488 
489   if (cupsFileClose(fp) < 0)
490     return (-1);
491   else if (node)
492     return (-1);
493   else
494     return (0);
495 }
496 
497 
498 /*
499  * 'helpSearchIndex()' - Search an index.
500  */
501 
502 help_index_t *				/* O - Search index */
helpSearchIndex(help_index_t * hi,const char * query,const char * section,const char * filename)503 helpSearchIndex(help_index_t *hi,	/* I - Index */
504                 const char   *query,	/* I - Query string */
505 		const char   *section,	/* I - Limit search to this section */
506 		const char   *filename)	/* I - Limit search to this file */
507 {
508   help_index_t	*search;		/* Search index */
509   help_node_t	*node;			/* Current node */
510   help_word_t	*word;			/* Current word */
511   void		*sc;			/* Search context */
512   int		matches;		/* Number of matches */
513 
514 
515  /*
516   * Range check...
517   */
518 
519   if (!hi || !query)
520     return (NULL);
521 
522  /*
523   * Reset the scores of all nodes to 0...
524   */
525 
526   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
527        node;
528        node = (help_node_t *)cupsArrayNext(hi->nodes))
529     node->score = 0;
530 
531  /*
532   * Find the first node to search in...
533   */
534 
535   if (filename)
536   {
537     node = helpFindNode(hi, filename, NULL);
538     if (!node)
539       return (NULL);
540   }
541   else
542     node = (help_node_t *)cupsArrayFirst(hi->nodes);
543 
544  /*
545   * Convert the query into a regular expression...
546   */
547 
548   sc = cgiCompileSearch(query);
549   if (!sc)
550     return (NULL);
551 
552  /*
553   * Allocate a search index...
554   */
555 
556   search = calloc(1, sizeof(help_index_t));
557   if (!search)
558   {
559     cgiFreeSearch(sc);
560     return (NULL);
561   }
562 
563   search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
564   search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
565 
566   if (!search->nodes || !search->sorted)
567   {
568     cupsArrayDelete(search->nodes);
569     cupsArrayDelete(search->sorted);
570     free(search);
571     cgiFreeSearch(sc);
572     return (NULL);
573   }
574 
575   search->search = 1;
576 
577  /*
578   * Check each node in the index, adding matching nodes to the
579   * search index...
580   */
581 
582   for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
583     if (node->section && section && strcmp(node->section, section))
584       continue;
585     else if (filename && strcmp(node->filename, filename))
586       continue;
587     else
588     {
589       matches = cgiDoSearch(sc, node->text);
590 
591       for (word = (help_word_t *)cupsArrayFirst(node->words);
592            word;
593 	   word = (help_word_t *)cupsArrayNext(node->words))
594         if (cgiDoSearch(sc, word->text) > 0)
595           matches += word->count;
596 
597       if (matches > 0)
598       {
599        /*
600 	* Found a match, add the node to the search index...
601 	*/
602 
603 	node->score = matches;
604 
605 	cupsArrayAdd(search->nodes, node);
606 	cupsArrayAdd(search->sorted, node);
607       }
608     }
609 
610  /*
611   * Free the search context...
612   */
613 
614   cgiFreeSearch(sc);
615 
616  /*
617   * Return the results...
618   */
619 
620   return (search);
621 }
622 
623 
624 /*
625  * 'help_add_word()' - Add a word to a node.
626  */
627 
628 static help_word_t *			/* O - New word */
help_add_word(help_node_t * n,const char * text)629 help_add_word(help_node_t *n,		/* I - Node */
630               const char  *text)	/* I - Word text */
631 {
632   help_word_t	*w,			/* New word */
633 		key;			/* Search key */
634 
635 
636  /*
637   * Create the words array as needed...
638   */
639 
640   if (!n->words)
641     n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
642 
643  /*
644   * See if the word is already added...
645   */
646 
647   key.text = (char *)text;
648 
649   if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
650   {
651    /*
652     * Create a new word...
653     */
654 
655     if ((w = calloc(1, sizeof(help_word_t))) == NULL)
656       return (NULL);
657 
658     if ((w->text = strdup(text)) == NULL)
659     {
660       free(w);
661       return (NULL);
662     }
663 
664     cupsArrayAdd(n->words, w);
665   }
666 
667  /*
668   * Bump the counter for this word and return it...
669   */
670 
671   w->count ++;
672 
673   return (w);
674 }
675 
676 
677 /*
678  * 'help_delete_node()' - Free all memory used by a node.
679  */
680 
681 static void
help_delete_node(help_node_t * n)682 help_delete_node(help_node_t *n)	/* I - Node */
683 {
684   help_word_t	*w;			/* Current word */
685 
686 
687   if (!n)
688     return;
689 
690   if (n->filename)
691     free(n->filename);
692 
693   if (n->anchor)
694     free(n->anchor);
695 
696   if (n->section)
697     free(n->section);
698 
699   if (n->text)
700     free(n->text);
701 
702   for (w = (help_word_t *)cupsArrayFirst(n->words);
703        w;
704        w = (help_word_t *)cupsArrayNext(n->words))
705     help_delete_word(w);
706 
707   cupsArrayDelete(n->words);
708 
709   free(n);
710 }
711 
712 
713 /*
714  * 'help_delete_word()' - Free all memory used by a word.
715  */
716 
717 static void
help_delete_word(help_word_t * w)718 help_delete_word(help_word_t *w)	/* I - Word */
719 {
720   if (!w)
721     return;
722 
723   if (w->text)
724     free(w->text);
725 
726   free(w);
727 }
728 
729 
730 /*
731  * 'help_load_directory()' - Load a directory of files into an index.
732  */
733 
734 static int				/* O - 0 = success, -1 = error, 1 = updated */
help_load_directory(help_index_t * hi,const char * directory,const char * relative)735 help_load_directory(
736     help_index_t *hi,			/* I - Index */
737     const char   *directory,		/* I - Directory */
738     const char   *relative)		/* I - Relative path */
739 {
740   cups_dir_t	*dir;			/* Directory file */
741   cups_dentry_t	*dent;			/* Directory entry */
742   char		*ext,			/* Pointer to extension */
743 		filename[1024],		/* Full filename */
744 		relname[1024];		/* Relative filename */
745   int		update;			/* Updated? */
746   help_node_t	*node;			/* Current node */
747 
748 
749  /*
750   * Open the directory and scan it...
751   */
752 
753   if ((dir = cupsDirOpen(directory)) == NULL)
754     return (0);
755 
756   update = 0;
757 
758   while ((dent = cupsDirRead(dir)) != NULL)
759   {
760    /*
761     * Skip "." files...
762     */
763 
764     if (dent->filename[0] == '.')
765       continue;
766 
767    /*
768     * Get absolute and relative filenames...
769     */
770 
771     snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
772     if (relative)
773       snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
774     else
775       strlcpy(relname, dent->filename, sizeof(relname));
776 
777    /*
778     * Check if we have a HTML file...
779     */
780 
781     if ((ext = strstr(dent->filename, ".html")) != NULL &&
782         (!ext[5] || !strcmp(ext + 5, ".gz")))
783     {
784      /*
785       * HTML file, see if we have already indexed the file...
786       */
787 
788       if ((node = helpFindNode(hi, relname, NULL)) != NULL)
789       {
790        /*
791         * File already indexed - check dates to confirm that the
792 	* index is up-to-date...
793 	*/
794 
795         if (node->mtime == dent->fileinfo.st_mtime)
796 	{
797 	 /*
798 	  * Same modification time, so mark all of the nodes
799 	  * for this file as up-to-date...
800 	  */
801 
802           for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
803 	    if (!strcmp(node->filename, relname))
804 	      node->score = 0;
805 	    else
806 	      break;
807 
808           continue;
809 	}
810       }
811 
812       update = 1;
813 
814       help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
815     }
816     else if (S_ISDIR(dent->fileinfo.st_mode))
817     {
818      /*
819       * Process sub-directory...
820       */
821 
822       if (help_load_directory(hi, filename, relname) == 1)
823         update = 1;
824     }
825   }
826 
827   cupsDirClose(dir);
828 
829   return (update);
830 }
831 
832 
833 /*
834  * 'help_load_file()' - Load a HTML files into an index.
835  */
836 
837 static int				/* O - 0 = success, -1 = error */
help_load_file(help_index_t * hi,const char * filename,const char * relative,time_t mtime)838 help_load_file(
839     help_index_t *hi,			/* I - Index */
840     const char   *filename,		/* I - Filename */
841     const char   *relative,		/* I - Relative path */
842     time_t       mtime)			/* I - Modification time */
843 {
844   cups_file_t	*fp;			/* HTML file */
845   help_node_t	*node;			/* Current node */
846   char		line[1024],		/* Line from file */
847 		temp[1024],		/* Temporary word */
848                 section[1024],		/* Section */
849 		*ptr,			/* Pointer into line */
850 		*anchor,		/* Anchor name */
851 		*text;			/* Text for anchor */
852   off_t		offset;			/* File offset */
853   char		quote;			/* Quote character */
854   help_word_t	*word;			/* Current word */
855   size_t		wordlen;		/* Length of word */
856 
857 
858   if ((fp = cupsFileOpen(filename, "r")) == NULL)
859     return (-1);
860 
861   node   = NULL;
862   offset = 0;
863 
864   strlcpy(section, "Other", sizeof(section));
865 
866   while (cupsFileGets(fp, line, sizeof(line)))
867   {
868    /*
869     * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
870     */
871 
872     if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
873     {
874      /*
875       * Got section line, copy it!
876       */
877 
878       for (ptr += 13; isspace(*ptr & 255); ptr ++);
879 
880       strlcpy(section, ptr, sizeof(section));
881       if ((ptr = strstr(section, "-->")) != NULL)
882       {
883        /*
884         * Strip comment stuff from end of line...
885 	*/
886 
887         for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
888 
889 	if (isspace(*ptr & 255))
890 	  *ptr = '\0';
891       }
892       continue;
893     }
894 
895     for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
896     {
897       ptr ++;
898 
899       if (!_cups_strncasecmp(ptr, "TITLE>", 6))
900       {
901        /*
902         * Found the title...
903 	*/
904 
905 	anchor = NULL;
906 	ptr += 6;
907       }
908       else
909       {
910         char *idptr;			/* Pointer to ID */
911 
912 	if (!_cups_strncasecmp(ptr, "A NAME=", 7))
913 	  ptr += 7;
914 	else if ((idptr = strstr(ptr, " ID=")) != NULL)
915 	  ptr = idptr + 4;
916 	else if ((idptr = strstr(ptr, " id=")) != NULL)
917 	  ptr = idptr + 4;
918 	else
919 	  continue;
920 
921        /*
922         * Found an anchor...
923 	*/
924 
925 	if (*ptr == '\"' || *ptr == '\'')
926 	{
927 	 /*
928 	  * Get quoted anchor...
929 	  */
930 
931 	  quote  = *ptr;
932           anchor = ptr + 1;
933 	  if ((ptr = strchr(anchor, quote)) != NULL)
934 	    *ptr++ = '\0';
935 	  else
936 	    break;
937 	}
938 	else
939 	{
940 	 /*
941 	  * Get unquoted anchor...
942 	  */
943 
944           anchor = ptr + 1;
945 
946 	  for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
947 
948 	  if (*ptr != '>')
949 	    *ptr++ = '\0';
950 	  else
951 	    break;
952 	}
953 
954        /*
955         * Got the anchor, now lets find the end...
956 	*/
957 
958         while (*ptr && *ptr != '>')
959 	  ptr ++;
960 
961         if (*ptr != '>')
962 	  break;
963 
964         *ptr++ = '\0';
965       }
966 
967      /*
968       * Now collect text for the link...
969       */
970 
971       text = ptr;
972       while ((ptr = strchr(text, '<')) == NULL)
973       {
974 	ptr = text + strlen(text);
975 	if (ptr >= (line + sizeof(line) - 2))
976 	  break;
977 
978         *ptr++ = ' ';
979 
980         if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
981 	  break;
982       }
983 
984       *ptr = '\0';
985 
986       if (node)
987 	node->length = (size_t)(offset - node->offset);
988 
989       if (!*text)
990       {
991         node = NULL;
992         break;
993       }
994 
995       if ((node = helpFindNode(hi, relative, anchor)) != NULL)
996       {
997        /*
998 	* Node already in the index, so replace the text and other
999 	* data...
1000 	*/
1001 
1002         cupsArrayRemove(hi->nodes, node);
1003 
1004         if (node->section)
1005 	  free(node->section);
1006 
1007 	if (node->text)
1008 	  free(node->text);
1009 
1010         if (node->words)
1011 	{
1012 	  for (word = (help_word_t *)cupsArrayFirst(node->words);
1013 	       word;
1014 	       word = (help_word_t *)cupsArrayNext(node->words))
1015 	    help_delete_word(word);
1016 
1017 	  cupsArrayDelete(node->words);
1018 	  node->words = NULL;
1019 	}
1020 
1021 	node->section = section[0] ? strdup(section) : NULL;
1022 	node->text    = strdup(text);
1023 	node->mtime   = mtime;
1024 	node->offset  = offset;
1025 	node->score   = 0;
1026       }
1027       else
1028       {
1029        /*
1030 	* New node...
1031 	*/
1032 
1033         node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1034       }
1035 
1036      /*
1037       * Go through the text value and replace tabs and newlines with
1038       * whitespace and eliminate extra whitespace...
1039       */
1040 
1041       for (ptr = node->text, text = node->text; *ptr;)
1042 	if (isspace(*ptr & 255))
1043 	{
1044 	  while (isspace(*ptr & 255))
1045 	    ptr ++;
1046 
1047 	  *text++ = ' ';
1048         }
1049 	else if (text != ptr)
1050 	  *text++ = *ptr++;
1051 	else
1052 	{
1053 	  text ++;
1054 	  ptr ++;
1055 	}
1056 
1057       *text = '\0';
1058 
1059      /*
1060       * (Re)add the node to the array...
1061       */
1062 
1063       cupsArrayAdd(hi->nodes, node);
1064 
1065       if (!anchor)
1066         node = NULL;
1067       break;
1068     }
1069 
1070     if (node)
1071     {
1072      /*
1073       * Scan this line for words...
1074       */
1075 
1076       for (ptr = line; *ptr; ptr ++)
1077       {
1078        /*
1079 	* Skip HTML stuff...
1080 	*/
1081 
1082 	if (*ptr == '<')
1083 	{
1084           if (!strncmp(ptr, "<!--", 4))
1085 	  {
1086 	   /*
1087 	    * Skip HTML comment...
1088 	    */
1089 
1090             if ((text = strstr(ptr + 4, "-->")) == NULL)
1091 	      ptr += strlen(ptr) - 1;
1092 	    else
1093 	      ptr = text + 2;
1094 	  }
1095 	  else
1096 	  {
1097 	   /*
1098             * Skip HTML element...
1099 	    */
1100 
1101             for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1102 	    {
1103 	      if (*ptr == '\"' || *ptr == '\'')
1104 	      {
1105 		for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1106 
1107 		if (!*ptr)
1108 		  ptr --;
1109 	      }
1110 	    }
1111 
1112 	    if (!*ptr)
1113 	      ptr --;
1114           }
1115 
1116           continue;
1117 	}
1118 	else if (*ptr == '&')
1119 	{
1120 	 /*
1121 	  * Skip HTML entity...
1122 	  */
1123 
1124 	  for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1125 
1126 	  if (!*ptr)
1127 	    ptr --;
1128 
1129 	  continue;
1130 	}
1131 	else if (!isalnum(*ptr & 255))
1132           continue;
1133 
1134        /*
1135 	* Found the start of a word, search until we find the end...
1136 	*/
1137 
1138 	for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1139 
1140 	wordlen = (size_t)(ptr - text);
1141 
1142         memcpy(temp, text, wordlen);
1143 	temp[wordlen] = '\0';
1144 
1145         ptr --;
1146 
1147 	if (wordlen > 1 && !bsearch(temp, help_common_words,
1148 	                            (sizeof(help_common_words) /
1149 				     sizeof(help_common_words[0])),
1150 				    sizeof(help_common_words[0]),
1151 				    (int (*)(const void *, const void *))
1152 				        _cups_strcasecmp))
1153           help_add_word(node, temp);
1154       }
1155     }
1156 
1157    /*
1158     * Get the offset of the next line...
1159     */
1160 
1161     offset = cupsFileTell(fp);
1162   }
1163 
1164   cupsFileClose(fp);
1165 
1166   if (node)
1167     node->length = (size_t)(offset - node->offset);
1168 
1169   return (0);
1170 }
1171 
1172 
1173 /*
1174  * 'help_new_node()' - Create a new node and add it to an index.
1175  */
1176 
1177 static help_node_t *			/* O - Node pointer or NULL on error */
help_new_node(const char * filename,const char * anchor,const char * section,const char * text,time_t mtime,off_t offset,size_t length)1178 help_new_node(const char   *filename,	/* I - Filename */
1179               const char   *anchor,	/* I - Anchor */
1180 	      const char   *section,	/* I - Section */
1181 	      const char   *text,	/* I - Text */
1182 	      time_t       mtime,	/* I - Modification time */
1183               off_t        offset,	/* I - Offset in file */
1184 	      size_t       length)	/* I - Length in bytes */
1185 {
1186   help_node_t	*n;			/* Node */
1187 
1188 
1189   n = (help_node_t *)calloc(1, sizeof(help_node_t));
1190   if (!n)
1191     return (NULL);
1192 
1193   n->filename = strdup(filename);
1194   n->anchor   = anchor ? strdup(anchor) : NULL;
1195   n->section  = (section && *section) ? strdup(section) : NULL;
1196   n->text     = strdup(text);
1197   n->mtime    = mtime;
1198   n->offset   = offset;
1199   n->length   = length;
1200 
1201   return (n);
1202 }
1203 
1204 
1205 /*
1206  * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1207  */
1208 
1209 static int				/* O - Difference */
help_sort_by_name(help_node_t * n1,help_node_t * n2)1210 help_sort_by_name(help_node_t *n1,	/* I - First node */
1211                   help_node_t *n2)	/* I - Second node */
1212 {
1213   int		diff;			/* Difference */
1214 
1215 
1216   if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1217     return (diff);
1218 
1219   if (!n1->anchor && !n2->anchor)
1220     return (0);
1221   else if (!n1->anchor)
1222     return (-1);
1223   else if (!n2->anchor)
1224     return (1);
1225   else
1226     return (strcmp(n1->anchor, n2->anchor));
1227 }
1228 
1229 
1230 /*
1231  * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1232  */
1233 
1234 static int				/* O - Difference */
help_sort_by_score(help_node_t * n1,help_node_t * n2)1235 help_sort_by_score(help_node_t *n1,	/* I - First node */
1236                    help_node_t *n2)	/* I - Second node */
1237 {
1238   int		diff;			/* Difference */
1239 
1240 
1241   if (n1->score != n2->score)
1242     return (n2->score - n1->score);
1243 
1244   if (n1->section && !n2->section)
1245     return (1);
1246   else if (!n1->section && n2->section)
1247     return (-1);
1248   else if (n1->section && n2->section &&
1249            (diff = strcmp(n1->section, n2->section)) != 0)
1250     return (diff);
1251 
1252   return (_cups_strcasecmp(n1->text, n2->text));
1253 }
1254 
1255 
1256 /*
1257  * 'help_sort_words()' - Sort words alphabetically.
1258  */
1259 
1260 static int				/* O - Difference */
help_sort_words(help_word_t * w1,help_word_t * w2)1261 help_sort_words(help_word_t *w1,	/* I - Second word */
1262                 help_word_t *w2)	/* I - Second word */
1263 {
1264   return (_cups_strcasecmp(w1->text, w2->text));
1265 }
1266