1 /*
2 * libwebsockets - small server side websockets and web server implementation
3 *
4 * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "private-lib-core.h"
26 #include "private-lib-misc-fts.h"
27
28 #include <stdio.h>
29 #include <string.h>
30 #include <assert.h>
31 #include <fcntl.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34
35 #define AC_COUNT_STASHED_CHILDREN 8
36
37 struct ch {
38 jg2_file_offset ofs;
39 char name[64];
40 int inst;
41 int child_agg;
42 int name_length;
43 int effpos;
44 int descendents;
45 };
46
47 struct wac {
48 struct ch ch[AC_COUNT_STASHED_CHILDREN];
49
50 jg2_file_offset self;
51 jg2_file_offset tifs;
52 int child_count;
53 int child;
54
55 int agg;
56 int desc;
57 char done_children;
58 char once;
59 };
60
61 struct linetable {
62 struct linetable *next;
63
64 int chunk_line_number_start;
65 int chunk_line_number_count;
66
67 off_t chunk_filepos_start;
68
69 off_t vli_ofs_in_index;
70 };
71
72 static uint32_t
b32(unsigned char * b)73 b32(unsigned char *b)
74 {
75 return (uint32_t)((b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]);
76 }
77
78 static uint16_t
b16(unsigned char * b)79 b16(unsigned char *b)
80 {
81 return (uint16_t)((b[0] << 8) | b[1]);
82 }
83
84 static int
lws_fts_filepath(struct lws_fts_file * jtf,int filepath_index,char * result,size_t len,uint32_t * ofs_linetable,uint32_t * lines)85 lws_fts_filepath(struct lws_fts_file *jtf, int filepath_index, char *result,
86 size_t len, uint32_t *ofs_linetable, uint32_t *lines)
87 {
88 unsigned char buf[256 + 15];
89 uint32_t flen;
90 int ra, bp = 0;
91 size_t m;
92 off_t o;
93
94 if (filepath_index > jtf->filepaths)
95 return 1;
96
97 if (lseek(jtf->fd, (off_t)(jtf->filepath_table + (4 * (unsigned int)filepath_index)),
98 SEEK_SET) < 0) {
99 lwsl_err("%s: unable to seek\n", __func__);
100
101 return 1;
102 }
103
104 ra = (int)read(jtf->fd, buf, 4);
105 if (ra < 0)
106 return 1;
107
108 o = (off_t)b32(buf);
109 if (lseek(jtf->fd, o, SEEK_SET) < 0) {
110 lwsl_err("%s: unable to seek\n", __func__);
111
112 return 1;
113 }
114
115 ra = (int)read(jtf->fd, buf, sizeof(buf));
116 if (ra < 0)
117 return 1;
118
119 if (ofs_linetable)
120 bp += rq32(&buf[bp], ofs_linetable);
121 else
122 bp += rq32(&buf[bp], &flen);
123 if (lines)
124 bp += rq32(&buf[bp], lines);
125 else
126 bp += rq32(&buf[bp], &flen);
127 bp += rq32(&buf[bp], &flen);
128
129 m = flen;
130 if (len - 1 < m)
131 m = flen - 1;
132
133 strncpy(result, (char *)&buf[bp], m);
134 result[m] = '\0';
135 result[len - 1] = '\0';
136
137 return 0;
138 }
139
140 /*
141 * returns -1 for fail or fd open on the trie file.
142 *
143 * *root is set to the position of the root trie entry.
144 * *flen is set to the length of the whole file
145 */
146
147 int
lws_fts_adopt(struct lws_fts_file * jtf)148 lws_fts_adopt(struct lws_fts_file *jtf)
149 {
150 unsigned char buf[256];
151 off_t ot;
152
153 if (read(jtf->fd, buf, TRIE_FILE_HDR_SIZE) != TRIE_FILE_HDR_SIZE) {
154 lwsl_err("%s: unable to read file header\n", __func__);
155 goto bail;
156 }
157
158 if (buf[0] != 0xca || buf[1] != 0x7a ||
159 buf[2] != 0x5f || buf[3] != 0x75) {
160 lwsl_err("%s: bad magic %02X %02X %02X %02X\n", __func__,
161 buf[0], buf[1], buf[2], buf[3]);
162 goto bail;
163 }
164
165 jtf->root = b32(&buf[4]);
166
167 ot = lseek(jtf->fd, 0, SEEK_END);
168 if (ot < 0) {
169 lwsl_err("%s: unable to seek\n", __func__);
170
171 goto bail;
172 }
173 jtf->flen = (jg2_file_offset)ot;
174
175 if (jtf->flen != b32(&buf[8])) {
176 lwsl_err("%s: file size doesn't match expected\n", __func__);
177
178 goto bail;
179 }
180
181 jtf->filepath_table = b32(&buf[12]);
182 jtf->filepaths = (int)b32(&buf[16]);
183
184 return jtf->fd;
185
186 bail:
187 return -1;
188 }
189
190 struct lws_fts_file *
lws_fts_open(const char * filepath)191 lws_fts_open(const char *filepath)
192 {
193 struct lws_fts_file *jtf;
194
195 jtf = lws_malloc(sizeof(*jtf), "fts open");
196 if (!jtf)
197 goto bail1;
198
199 jtf->fd = open(filepath, O_RDONLY);
200 if (jtf->fd < 0) {
201 lwsl_err("%s: unable to open %s\n", __func__, filepath);
202 goto bail2;
203 }
204
205 if (lws_fts_adopt(jtf) < 0)
206 goto bail3;
207
208 return jtf;
209
210 bail3:
211 close(jtf->fd);
212 bail2:
213 lws_free(jtf);
214 bail1:
215 return NULL;
216 }
217
218 void
lws_fts_close(struct lws_fts_file * jtf)219 lws_fts_close(struct lws_fts_file *jtf)
220 {
221 close(jtf->fd);
222 lws_free(jtf);
223 }
224
225 #define grab(_pos, _size) { \
226 bp = 0; \
227 if (lseek(jtf->fd, (off_t)(_pos), SEEK_SET) < 0) { \
228 lwsl_err("%s: unable to seek\n", __func__); \
229 \
230 goto bail; \
231 } \
232 \
233 ra = (int)read(jtf->fd, buf, (size_t)(_size)); \
234 if (ra < 0) \
235 goto bail; \
236 }
237
238 static struct linetable *
lws_fts_cache_chunktable(struct lws_fts_file * jtf,uint32_t ofs_linetable,struct lwsac ** linetable_head)239 lws_fts_cache_chunktable(struct lws_fts_file *jtf, uint32_t ofs_linetable,
240 struct lwsac **linetable_head)
241 {
242 struct linetable *lt, *first = NULL, **prev = NULL;
243 unsigned char buf[8];
244 int line = 1, bp, ra;
245 off_t cfs = 0;
246
247 *linetable_head = NULL;
248
249 do {
250 grab(ofs_linetable, sizeof(buf));
251
252 lt = lwsac_use(linetable_head, sizeof(*lt), 0);
253 if (!lt)
254 goto bail;
255 if (!first)
256 first = lt;
257
258 lt->next = NULL;
259 if (prev)
260 *prev = lt;
261 prev = <->next;
262
263 lt->chunk_line_number_start = line;
264 lt->chunk_line_number_count = b16(&buf[bp + 2]);
265 lt->vli_ofs_in_index = (off_t)(ofs_linetable + 8);
266 lt->chunk_filepos_start = cfs;
267
268 line += lt->chunk_line_number_count;
269
270 cfs += (int32_t)b32(&buf[bp + 4]);
271 ofs_linetable += b16(&buf[bp]);
272
273 } while (b16(&buf[bp]));
274
275 return first;
276
277 bail:
278 lwsac_free(linetable_head);
279
280 return NULL;
281 }
282
283 static int
lws_fts_getfileoffset(struct lws_fts_file * jtf,struct linetable * ltstart,int line,off_t * _ofs)284 lws_fts_getfileoffset(struct lws_fts_file *jtf, struct linetable *ltstart,
285 int line, off_t *_ofs)
286 {
287 struct linetable *lt = ltstart;
288 unsigned char buf[LWS_FTS_LINES_PER_CHUNK * 5];
289 uint32_t ll;
290 off_t ofs;
291 int bp, ra;
292
293 /* first figure out which chunk */
294
295 do {
296 if (line >= lt->chunk_line_number_start &&
297 line < lt->chunk_line_number_start +
298 lt->chunk_line_number_count)
299 break;
300
301 lt = lt->next;
302 } while (lt);
303
304 if (!lt)
305 goto bail;
306
307 /* we know it's in this chunk */
308
309 ofs = lt->chunk_filepos_start;
310 line -= lt->chunk_line_number_start;
311
312 grab(lt->vli_ofs_in_index, sizeof(buf));
313
314 bp = 0;
315 while (line) {
316 bp += rq32(&buf[bp], &ll);
317 ofs += (int32_t)ll;
318 line--;
319 }
320
321 /* we know the offset it is at in the original file */
322
323 *_ofs = ofs;
324
325 return 0;
326
327 bail:
328 lwsl_info("%s: bail %d\n", __func__, line);
329
330 return 1;
331 }
332
333 static int
ac_record(struct lws_fts_file * jtf,struct lwsac ** results_head,const char * needle,int pos,struct wac * s,int sp,uint32_t instances,uint32_t agg_instances,uint32_t children,struct lws_fts_result_autocomplete *** ppac)334 ac_record(struct lws_fts_file *jtf, struct lwsac **results_head,
335 const char *needle, int pos, struct wac *s, int sp,
336 uint32_t instances, uint32_t agg_instances, uint32_t children,
337 struct lws_fts_result_autocomplete ***ppac)
338 {
339 struct lws_fts_result_autocomplete *ac;
340 int n, m;
341 char *p;
342
343 if (!instances && !agg_instances)
344 return 1;
345
346 m = pos;
347 for (n = 1; n <= sp; n++)
348 m += s[n].ch[s[n].child - 1].name_length;
349
350 ac = lwsac_use(results_head, sizeof(*ac) + (unsigned int)m + 1, 0);
351 if (!ac)
352 return -1;
353
354 p = (char *)(ac + 1);
355
356 **ppac = ac;
357 ac->next = NULL;
358 *ppac = &ac->next;
359 ac->instances = (int)instances;
360 ac->agg_instances = (int)agg_instances;
361 ac->ac_length = m;
362 ac->has_children = !!children;
363 ac->elided = 0;
364
365 memcpy(p, needle, (size_t)pos);
366 p += pos;
367
368 for (n = 1; n <= sp; n++) {
369 int w = s[n].child - 1;
370
371 memcpy(p, s[n].ch[w].name, (size_t)s[n].ch[w].name_length);
372 p += s[n].ch[w].name_length;
373 }
374 p = (char *)(ac + 1);
375 p[m] = '\0';
376
377 /*
378 * deduct this child's instance weight from his antecdents to track
379 * relative path attractiveness dynamically, after we already used its
380 * best results (children are sorted best-first)
381 */
382 for (n = sp; n >= 0; n--) {
383 s[n].ch[s[n].child - 1].child_agg -= (int)instances;
384 s[n].agg -= (int)instances;
385 }
386
387 return 0;
388 }
389
390 struct lws_fts_result *
lws_fts_search(struct lws_fts_file * jtf,struct lws_fts_search_params * ftsp)391 lws_fts_search(struct lws_fts_file *jtf, struct lws_fts_search_params *ftsp)
392 {
393 uint32_t children, instances, co, sl, agg, slt, chunk,
394 fileofs_tif_start, desc, agg_instances;
395 int pos = 0, n, m, nl, bp, base = 0, ra, palm, budget, sp, ofd = -1;
396 unsigned long long tf = (unsigned long long)lws_now_usecs();
397 struct lws_fts_result_autocomplete **pac = NULL;
398 char stasis, nac = 0, credible, needle[32];
399 struct lws_fts_result_filepath *fp;
400 struct lws_fts_result *result;
401 unsigned char buf[4096];
402 off_t o, child_ofs;
403 struct wac s[128];
404
405 ftsp->results_head = NULL;
406
407 if (!ftsp->needle)
408 return NULL;
409
410 nl = (int)strlen(ftsp->needle);
411 if ((size_t)nl > sizeof(needle) - 2)
412 return NULL;
413
414 result = lwsac_use(&ftsp->results_head, sizeof(*result), 0);
415 if (!result)
416 return NULL;
417
418 /* start with no results... */
419
420 result->autocomplete_head = NULL;
421 pac = &result->autocomplete_head;
422 result->filepath_head = NULL;
423 result->duration_ms = 0;
424 result->effective_flags = ftsp->flags;
425
426 palm = 0;
427
428 for (n = 0; n < nl; n++)
429 needle[n] = (char)tolower(ftsp->needle[n]);
430 needle[nl] = '\0';
431
432 o = (off_t)jtf->root;
433 do {
434 bp = 0;
435 base = 0;
436
437 grab(o, sizeof(buf));
438
439 child_ofs = o + bp;
440 bp += rq32(&buf[bp], &fileofs_tif_start);
441 bp += rq32(&buf[bp], &children);
442 bp += rq32(&buf[bp], &instances);
443 bp += rq32(&buf[bp], &agg_instances);
444 palm = pos;
445
446 /* the children follow here */
447
448 if (pos == nl) {
449
450 nac = 0;
451 if (!fileofs_tif_start)
452 /*
453 * we matched, but there are no instances of
454 * this, it's actually an intermediate
455 */
456
457 goto autocomp;
458
459 /* we leave with bp positioned at the instance list */
460
461 o = (off_t)fileofs_tif_start;
462 grab(o, sizeof(buf));
463 break;
464 }
465
466 if (ra - bp < 1024) {
467
468 /*
469 * We don't have enough. So reload the buffer starting
470 * at where we got to.
471 */
472
473 base += bp;
474 grab(o + base, sizeof(buf));
475 }
476
477 /* gets set if any child COULD match needle if it went on */
478
479 credible = 0;
480 for (n = 0; (uint32_t)n < children; n++) {
481 uint32_t inst;
482
483 bp += rq32(&buf[bp], &co);
484 bp += rq32(&buf[bp], &inst);
485 bp += rq32(&buf[bp], &agg);
486 bp += rq32(&buf[bp], &desc);
487 bp += rq32(&buf[bp], &sl);
488
489 if (sl > (uint32_t)(nl - pos)) {
490
491 /*
492 * it can't be a match because it's longer than
493 * our needle string (but that leaves it as a
494 * perfectly fine autocomplete candidate)
495 */
496 size_t g = (size_t)(nl - pos);
497
498 /*
499 * "credible" means at least one child matches
500 * all the chars in needle up to as many as it
501 * has. If not "credible" this path cannot
502 * match.
503 */
504 if (!strncmp((char *)&buf[bp], &needle[pos], g))
505 credible = 1;
506 else
507 /*
508 * deflate the parent agg using the
509 * knowledge this child is not on the
510 * path shown by the remainder of needle
511 */
512 agg_instances -= agg;
513
514 nac = 0;
515 bp += (int)sl;
516 slt = 0;
517 pos = palm;
518 goto ensure;
519 }
520
521 /* the comparison string potentially has huge length */
522
523 slt = sl;
524 while (slt) {
525
526 /*
527 * the strategy is to compare whatever we have
528 * lying around, then bring in more if it didn't
529 * fail to match yet. That way we don't bring
530 * in anything we could already have known was
531 * not needed due to a match fail.
532 */
533
534 chunk = (uint32_t)(ra - bp);
535 if (chunk > slt)
536 chunk = slt;
537
538 if ((chunk == 1 && needle[pos] != buf[bp]) ||
539 (chunk != 1 &&
540 memcmp(&needle[pos], &buf[bp], chunk))) {
541
542 /*
543 * it doesn't match... so nothing can
544 * autocomplete this...
545 */
546 bp += (int)slt;
547 slt = 0;
548 nac = 1;
549 goto ensure;
550 }
551
552 slt -= chunk;
553 pos += (int)chunk;
554 bp += (int)chunk;
555
556 /* so far, it matches */
557
558 if (!slt) {
559 /* we matched the whole thing */
560 o = (int32_t)co;
561 if (!co)
562 goto bail;
563 n = (int)children;
564 credible = 1;
565 }
566
567 ensure:
568 /*
569 * do we have at least buf more to match, or the
570 * remainder of the string, whichever is less?
571 *
572 * bp may exceed sizeof(buf) on no match path
573 */
574 chunk = sizeof(buf);
575 if (slt < chunk)
576 chunk = slt;
577
578 if (ra - bp >= (int)chunk)
579 continue;
580
581 /*
582 * We don't have enough. So reload buf starting
583 * at where we got to.
584 */
585 base += bp;
586 grab(o + base, sizeof(buf));
587
588 } /* while we are still comparing */
589
590 } /* for each child */
591
592 if ((uint32_t)n == children) {
593 if (!credible)
594 goto bail;
595
596 nac = 0;
597 goto autocomp;
598 }
599 } while(1);
600
601 result->duration_ms = (int)(((uint64_t)lws_now_usecs() - tf) / 1000);
602
603 if (!instances && !children)
604 return result;
605
606 /* the match list may easily exceed one read buffer load ... */
607
608 o += bp;
609
610 /*
611 * Only do the file match list if it was requested in the search flags
612 */
613
614 if (!(ftsp->flags & LWSFTS_F_QUERY_FILES))
615 goto autocomp;
616
617 do {
618 uint32_t fi, tot, line, ro, ofs_linetable, lines, fplen,
619 *u, _o;
620 struct lwsac *lt_head = NULL;
621 struct linetable *ltst;
622 char path[256], *pp;
623 int footprint;
624 off_t fo;
625
626 ofd = -1;
627 grab(o, sizeof(buf));
628
629 ro = (uint32_t)o;
630 bp += rq32(&buf[bp], &_o);
631 o = (off_t)_o;
632
633 assert(!o || o > TRIE_FILE_HDR_SIZE);
634
635 bp += rq32(&buf[bp], &fi);
636 bp += rq32(&buf[bp], &tot);
637
638 if (lws_fts_filepath(jtf, (int)fi, path, sizeof(path) - 1,
639 &ofs_linetable, &lines)) {
640 lwsl_err("can't get filepath index %d\n", fi);
641 goto bail;
642 }
643
644 if (ftsp->only_filepath && strcmp(path, ftsp->only_filepath))
645 continue;
646
647 ltst = lws_fts_cache_chunktable(jtf, ofs_linetable, <_head);
648 if (!ltst)
649 goto bail;
650
651 if (ftsp->flags & LWSFTS_F_QUERY_QUOTE_LINE) {
652 ofd = open(path, O_RDONLY);
653 if (ofd < 0) {
654 lwsac_free(<_head);
655 goto bail;
656 }
657 }
658
659 fplen = (uint32_t)strlen(path);
660 footprint = (int)(sizeof(*fp) + fplen + 1);
661 if (ftsp->flags & LWSFTS_F_QUERY_FILE_LINES) {
662 /* line number and offset in file */
663 footprint += (int)(2 * sizeof(uint32_t) * tot);
664
665 if (ftsp->flags & LWSFTS_F_QUERY_QUOTE_LINE)
666 /* pointer to quote string */
667 footprint += (int)(sizeof(void *) * tot);
668 }
669
670 fp = lwsac_use(&ftsp->results_head, (unsigned int)footprint, 0);
671 if (!fp) {
672 lwsac_free(<_head);
673 goto bail;
674 }
675
676 fp->filepath_length = (int)fplen;
677 fp->lines_in_file = (int)lines;
678 fp->matches = (int)tot;
679 fp->matches_length = footprint - (int)sizeof(*fp) - (int)(fplen + 1);
680 fp->next = result->filepath_head;
681 result->filepath_head = fp;
682
683 /* line table first so it can be aligned */
684
685 u = (uint32_t*)(fp + 1);
686
687 if (ftsp->flags & LWSFTS_F_QUERY_FILE_LINES) {
688
689 /* for each line number */
690
691 for (n = 0; (uint32_t)n < tot; n++) {
692
693 unsigned char lbuf[256], *p;
694 char ebuf[384];
695 const char **v;
696 int m;
697
698 if ((ra - bp) < 8) {
699 base += bp;
700 grab((int32_t)ro + base, sizeof(buf));
701 }
702
703 bp += rq32(&buf[bp], &line);
704 *u++ = line;
705
706 if (lws_fts_getfileoffset(jtf, ltst, (int)line, &fo))
707 continue;
708
709 *u++ = (uint32_t)fo;
710
711 if (!(ftsp->flags & LWSFTS_F_QUERY_QUOTE_LINE))
712 continue;
713
714 if (lseek(ofd, fo, SEEK_SET) < 0)
715 continue;
716
717 m = (int)read(ofd, lbuf, sizeof(lbuf) - 1);
718 if (m < 0)
719 continue;
720 lbuf[sizeof(lbuf) - 1] = '\0';
721
722 p = (unsigned char *)strchr((char *)lbuf, '\n');
723 if (p)
724 m = lws_ptr_diff(p, lbuf);
725 lbuf[m] = '\0';
726 p = (unsigned char *)strchr((char *)lbuf, '\r');
727 if (p)
728 m = lws_ptr_diff(p, lbuf);
729 lbuf[m] = '\0';
730
731 lws_json_purify(ebuf, (const char *)lbuf,
732 sizeof(ebuf) - 1, NULL);
733 m = (int)strlen(ebuf);
734
735 p = lwsac_use(&ftsp->results_head, (unsigned int)m + 1, 0);
736 if (!p) {
737 lwsac_free(<_head);
738 goto bail;
739 }
740
741 memcpy(p, ebuf, (unsigned int)m);
742 p[m] = '\0';
743 v = (const char **)u;
744 *v = (const char *)p;
745 u += sizeof(const char *) / sizeof(uint32_t);
746 }
747 }
748
749 pp = ((char *)&fp[1]) + fp->matches_length;
750 memcpy(pp, path, fplen);
751 pp[fplen] = '\0';
752
753 if (ofd >= 0) {
754 close(ofd);
755 ofd = -1;
756 }
757
758 lwsac_free(<_head);
759
760 if (ftsp->only_filepath)
761 break;
762
763 } while (o);
764
765 /* sort the instance file list by results density */
766
767 do {
768 struct lws_fts_result_filepath **prf, *rf1, *rf2;
769
770 stasis = 1;
771
772 /* bubble sort keeps going until nothing changed */
773
774 prf = &result->filepath_head;
775 while (*prf) {
776
777 rf1 = *prf;
778 rf2 = rf1->next;
779
780 if (rf2 && rf1->lines_in_file && rf2->lines_in_file &&
781 ((rf1->matches * 1000) / rf1->lines_in_file) <
782 ((rf2->matches * 1000) / rf2->lines_in_file)) {
783 stasis = 0;
784
785 *prf = rf2;
786 rf1->next = rf2->next;
787 rf2->next = rf1;
788 }
789
790 prf = &(*prf)->next;
791 }
792
793 } while (!stasis);
794
795 autocomp:
796
797 if (!(ftsp->flags & LWSFTS_F_QUERY_AUTOCOMPLETE) || nac)
798 return result;
799
800 /*
801 * autocomplete (ie, the descendent paths that yield the most hits)
802 *
803 * We actually need to spider the earliest terminal descendents from
804 * the child we definitely got past, and present the first n terminal
805 * strings. The descendents are already sorted in order of highest
806 * aggregated hits in their descendents first, so simply collecting n
807 * earliest leaf children is enough.
808 *
809 * The leaf children may be quite deep down in a stack however. So we
810 * have to go through all the walking motions collecting and retaining
811 * child into for when we come back up the walk.
812 *
813 * We can completely ignore file instances for this, we just need the
814 * earliest children. And we can restrict how many children we stash
815 * in each stack level to eg, 5.
816 *
817 * child_ofs comes in pointing at the start of the trie entry that is
818 * to be the starting point for making suggestions.
819 */
820
821 budget = ftsp->max_autocomplete;
822 base = 0;
823 bp = 0;
824 pac = &result->autocomplete_head;
825 sp = 0;
826 if (pos > (int)sizeof(s[sp].ch[0].name) - 1)
827 pos = (int)sizeof(s[sp].ch[0].name) - 1;
828
829 memset(&s[sp], 0, sizeof(s[sp]));
830
831 s[sp].child = 1;
832 s[sp].tifs = fileofs_tif_start;
833 s[sp].self = (jg2_file_offset)child_ofs;
834 s[sp].ch[0].effpos = pos;
835
836 if (pos == nl)
837 n = ac_record(jtf, &ftsp->results_head, needle, pos, s, 0,
838 instances, agg_instances, children, &pac);
839
840 while (sp >= 0 && budget) {
841 int nobump = 0;
842 struct ch *tch = &s[sp].ch[s[sp].child - 1];
843
844 grab(child_ofs, sizeof(buf));
845
846 bp += rq32(&buf[bp], &fileofs_tif_start);
847 bp += rq32(&buf[bp], &children);
848 bp += rq32(&buf[bp], &instances);
849 bp += rq32(&buf[bp], &agg_instances);
850
851 if (sp > 0 && s[sp - 1].done_children &&
852 tch->effpos + tch->name_length >= nl &&
853 tch->inst && fileofs_tif_start) {
854 n = ac_record(jtf, &ftsp->results_head, needle, pos, s,
855 sp, (uint32_t)tch->inst, (uint32_t)tch->child_agg,
856 (uint32_t)tch->descendents, &pac);
857 if (n < 0)
858 goto bail;
859 if (!n)
860 if (--budget == 0)
861 break;
862 }
863
864 if (!s[sp].done_children && children) {
865 s[sp].done_children = 1;
866 sp++;
867 memset(&s[sp], 0, sizeof(s[sp]));
868 s[sp].tifs = fileofs_tif_start;
869 s[sp].self = (jg2_file_offset)child_ofs;
870
871 for (n = 0; n < (int)children && s[sp].child_count <
872 (int)LWS_ARRAY_SIZE(s[0].ch); n++) {
873 uint32_t slen, cho, agg, inst;
874 int i = s[sp].child_count;
875 struct ch *ch = &s[sp].ch[i];
876 size_t max;
877
878 bp += rq32(&buf[bp], &cho);
879 bp += rq32(&buf[bp], &inst);
880 bp += rq32(&buf[bp], &agg);
881 bp += rq32(&buf[bp], &desc);
882 bp += rq32(&buf[bp], &slen);
883
884 max = slen;
885 if (max > sizeof(ch->name) - 1)
886 max = sizeof(ch->name) - 1;
887
888 strncpy(ch->name, (char *)&buf[bp], max);
889 bp += (int)slen;
890
891 ch->name_length = (int)max;
892 ch->name[sizeof(ch->name) - 1] = '\0';
893 ch->inst = (int)inst;
894 ch->effpos =
895 s[sp - 1].ch[s[sp - 1].child - 1].effpos;
896
897 ch->child_agg = (int)agg;
898 ch->descendents = (int)desc;
899
900 /*
901 * if we have more needle chars than we matched
902 * to get this far, we can only allow potential
903 * matches that are consistent with the
904 * additional unmatched character(s)...
905 */
906
907 m = nl - ch->effpos;
908 if (m > ch->name_length)
909 m = ch->name_length;
910
911 if (m > 0 &&
912 strncmp(&needle[ch->effpos], ch->name, (unsigned int)m))
913 continue;
914
915 ch->effpos += m;
916 s[sp].ch[s[sp].child_count++].ofs = cho;
917 }
918
919 }
920
921 while (sp >= 0 && s[sp].child >= s[sp].child_count) {
922 s[sp].done_children = 0;
923 sp--;
924 }
925
926 /*
927 * Compare parent remaining agg vs parent's next siblings' still
928 * intact original agg... if the next sibling has more, abandon
929 * the parent path and go with the sibling... this keeps the
930 * autocomplete results related to popularity.
931 */
932
933 nobump = 0;
934 n = sp - 1;
935 while (n >= 0) {
936 struct lws_fts_result_autocomplete *ac =
937 (struct lws_fts_result_autocomplete *)pac;
938
939 if (s[n].child < s[n].child_count &&
940 s[n].ch[s[n].child - 1].child_agg <
941 s[n].ch[s[n].child].child_agg) {
942
943 if (pac)
944 /*
945 * mark the autocomplete result that
946 * there were more children down his
947 * path that we skipped in these results
948 */
949 ac->elided = 1;
950
951 for (m = n; m < sp + 1; m++)
952 s[m].done_children = 0;
953 sp = n;
954 child_ofs = (off_t)s[sp].ch[s[sp].child++].ofs;
955 nobump = 1;
956 }
957
958 n--;
959 }
960
961 if (nobump || sp < 0)
962 continue;
963
964 child_ofs = (off_t)s[sp].ch[s[sp].child++].ofs;
965 }
966
967 /* let's do a final sort into agg order */
968
969 do {
970 struct lws_fts_result_autocomplete *ac1, *ac2;
971
972 stasis = 1;
973
974 /* bubble sort keeps going until nothing changed */
975
976 pac = &result->autocomplete_head;
977 while (*pac) {
978
979 ac1 = *pac;
980 ac2 = ac1->next;
981
982 if (ac2 && ac1->instances < ac2->instances) {
983 stasis = 0;
984
985 *pac = ac2;
986 ac1->next = ac2->next;
987 ac2->next = ac1;
988 }
989
990 pac = &(*pac)->next;
991 }
992
993 } while (!stasis);
994
995 return result;
996
997 bail:
998 if (ofd >= 0)
999 close(ofd);
1000
1001 lwsl_info("%s: search ended up at bail\n", __func__);
1002
1003 return result;
1004 }
1005