1 /*
2 * libwebsockets - small server side websockets and web server implementation
3 *
4 * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #if !defined(_GNU_SOURCE)
26 #define _GNU_SOURCE
27 #endif
28 #include <pthread.h>
29
30 #include <libwebsockets.h>
31 #include "private-lib-core.h"
32
33 #include <string.h>
34 #include <stdio.h>
35 #include <unistd.h>
36 #include <fcntl.h>
37 #include <dirent.h>
38 #include <time.h>
39 #include <errno.h>
40 #include <stdarg.h>
41
42 #include <sys/stat.h>
43 #include <sys/time.h>
44 #include <sys/types.h>
45
46 #if defined(__APPLE__)
47 #include <sys/dirent.h>
48 /* Travis OSX does not have DT_REG... */
49 #if !defined(DT_REG)
50 #define DT_REG 8
51 #endif
52 #endif
53
54 struct file_entry {
55 lws_list_ptr sorted;
56 lws_list_ptr prev;
57 char name[64];
58 time_t modified;
59 size_t size;
60 };
61
62 struct lws_diskcache_scan {
63 struct file_entry *batch;
64 const char *cache_dir_base;
65 lws_list_ptr head;
66 time_t last_scan_completed;
67 uint64_t agg_size;
68 uint64_t cache_size_limit;
69 uint64_t avg_size;
70 uint64_t cache_tries;
71 uint64_t cache_hits;
72 int cache_subdir;
73 int batch_in_use;
74 int agg_file_count;
75 int secs_waiting;
76 };
77
78 #define KIB (1024)
79 #define MIB (KIB * KIB)
80
81 #define lp_to_fe(p, _n) lws_list_ptr_container(p, struct file_entry, _n)
82
83 static const char *hex = "0123456789abcdef";
84
85 #define BATCH_COUNT 128
86
87 static int
fe_modified_sort(lws_list_ptr a,lws_list_ptr b)88 fe_modified_sort(lws_list_ptr a, lws_list_ptr b)
89 {
90 struct file_entry *p1 = lp_to_fe(a, sorted), *p2 = lp_to_fe(b, sorted);
91
92 return (int)((long)p2->modified - (long)p1->modified);
93 }
94
95 struct lws_diskcache_scan *
lws_diskcache_create(const char * cache_dir_base,uint64_t cache_size_limit)96 lws_diskcache_create(const char *cache_dir_base, uint64_t cache_size_limit)
97 {
98 struct lws_diskcache_scan *lds = lws_malloc(sizeof(*lds), "cachescan");
99
100 if (!lds)
101 return NULL;
102
103 memset(lds, 0, sizeof(*lds));
104
105 lds->cache_dir_base = cache_dir_base;
106 lds->cache_size_limit = cache_size_limit;
107
108 return lds;
109 }
110
111 void
lws_diskcache_destroy(struct lws_diskcache_scan ** lds)112 lws_diskcache_destroy(struct lws_diskcache_scan **lds)
113 {
114 if ((*lds)->batch)
115 lws_free((*lds)->batch);
116 lws_free(*lds);
117 *lds = NULL;
118 }
119
120 int
lws_diskcache_prepare(const char * cache_base_dir,int mode,uid_t uid)121 lws_diskcache_prepare(const char *cache_base_dir, int mode, uid_t uid)
122 {
123 char dir[256];
124 int n, m;
125
126 (void)mkdir(cache_base_dir, (unsigned short)mode);
127 if (chown(cache_base_dir, uid, (gid_t)-1))
128 lwsl_err("%s: %s: unable to chown %d\n", __func__,
129 cache_base_dir, uid);
130
131 for (n = 0; n < 16; n++) {
132 lws_snprintf(dir, sizeof(dir), "%s/%c", cache_base_dir, hex[n]);
133 (void)mkdir(dir, (mode_t)mode);
134 if (chown(dir, uid, (uid_t)-1))
135 lwsl_err("%s: %s: unable to chown %d\n", __func__,
136 dir, uid);
137 for (m = 0; m < 16; m++) {
138 lws_snprintf(dir, sizeof(dir), "%s/%c/%c",
139 cache_base_dir, hex[n], hex[m]);
140 (void)mkdir(dir, (mode_t)mode);
141 if (chown(dir, uid, (uid_t)-1))
142 lwsl_err("%s: %s: unable to chown %d\n",
143 __func__, dir, uid);
144 }
145 }
146
147 return 0;
148 }
149
150 /* copies and then truncates the incoming name, and renames the file at the
151 * untruncated path to have the new truncated name */
152
153 int
lws_diskcache_finalize_name(char * cache)154 lws_diskcache_finalize_name(char *cache)
155 {
156 char ren[256], *p;
157
158 strncpy(ren, cache, sizeof(ren) - 1);
159 ren[sizeof(ren) - 1] = '\0';
160 p = strchr(cache, '~');
161 if (p) {
162 *p = '\0';
163 if (rename(ren, cache)) {
164 lwsl_err("%s: problem renaming %s to %s\n", __func__,
165 ren, cache);
166 return 1;
167 }
168
169 return 0;
170 }
171
172 return 1;
173 }
174
175 int
lws_diskcache_query(struct lws_diskcache_scan * lds,int is_bot,const char * hash_hex,int * _fd,char * cache,int cache_len,size_t * extant_cache_len)176 lws_diskcache_query(struct lws_diskcache_scan *lds, int is_bot,
177 const char *hash_hex, int *_fd, char *cache, int cache_len,
178 size_t *extant_cache_len)
179 {
180 struct stat s;
181 int n;
182
183 /* caching is disabled? */
184 if (!lds->cache_dir_base)
185 return LWS_DISKCACHE_QUERY_NO_CACHE;
186
187 if (!is_bot)
188 lds->cache_tries++;
189
190 n = lws_snprintf(cache, (size_t)cache_len, "%s/%c/%c/%s", lds->cache_dir_base,
191 hash_hex[0], hash_hex[1], hash_hex);
192
193 lwsl_info("%s: job cache %s\n", __func__, cache);
194
195 *_fd = open(cache, O_RDONLY);
196 if (*_fd >= 0) {
197 int fd;
198
199 if (!is_bot)
200 lds->cache_hits++;
201
202 if (fstat(*_fd, &s)) {
203 close(*_fd);
204
205 return LWS_DISKCACHE_QUERY_NO_CACHE;
206 }
207
208 *extant_cache_len = (size_t)s.st_size;
209
210 /* "touch" the hit cache file so it's last for LRU now */
211 fd = open(cache, O_RDWR);
212 if (fd >= 0)
213 close(fd);
214
215 return LWS_DISKCACHE_QUERY_EXISTS;
216 }
217
218 /* bots are too random to pollute the cache with their antics */
219 if (is_bot)
220 return LWS_DISKCACHE_QUERY_NO_CACHE;
221
222 /* let's create it first with a unique temp name */
223
224 lws_snprintf(cache + n, (size_t)cache_len - (unsigned int)n, "~%d-%p", (int)getpid(),
225 extant_cache_len);
226
227 *_fd = open(cache, O_RDWR | O_CREAT | O_TRUNC, 0600);
228 if (*_fd < 0) {
229 /* well... ok... we will proceed without cache then... */
230 lwsl_notice("%s: Problem creating cache %s: errno %d\n",
231 __func__, cache, errno);
232 return LWS_DISKCACHE_QUERY_NO_CACHE;
233 }
234
235 return LWS_DISKCACHE_QUERY_CREATING;
236 }
237
238 int
lws_diskcache_secs_to_idle(struct lws_diskcache_scan * lds)239 lws_diskcache_secs_to_idle(struct lws_diskcache_scan *lds)
240 {
241 return lds->secs_waiting;
242 }
243
244 /*
245 * The goal is to collect the oldest BATCH_COUNT filepaths and filesizes from
246 * the dirs under the cache dir. Since we don't need or want a full list of
247 * files in there in memory at once, we restrict the linked-list size to
248 * BATCH_COUNT entries, and once it is full, simply ignore any further files
249 * that are newer than the newest one on that list. Files older than the
250 * newest guy already on the list evict the newest guy already on the list
251 * and are sorted into the correct order. In this way no matter the number
252 * of files to be processed the memory requirement is fixed at BATCH_COUNT
253 * struct file_entry-s.
254 *
255 * The oldest subset of BATCH_COUNT files are sorted into the cd->batch
256 * allocation in more recent -> least recent order.
257 *
258 * We want to track the total size of all files we saw as well, so we know if
259 * we need to actually do anything yet to restrict how much space it's taking
260 * up.
261 *
262 * And we want to do those things statefully and incrementally instead of one
263 * big atomic operation, since the user may want a huge cache, so we look in
264 * one cache dir at a time and track state in the repodir struct.
265 *
266 * When we have seen everything, we add the doubly-linked prev pointers and then
267 * if we are over the limit, start deleting up to BATCH_COUNT files working back
268 * from the end.
269 */
270
271 int
lws_diskcache_trim(struct lws_diskcache_scan * lds)272 lws_diskcache_trim(struct lws_diskcache_scan *lds)
273 {
274 size_t cache_size_limit = (size_t)lds->cache_size_limit;
275 char dirpath[132], filepath[132 + 32];
276 lws_list_ptr lp, op = NULL;
277 int files_trimmed = 0;
278 struct file_entry *p;
279 int fd, n, ret = -1;
280 size_t trimmed = 0;
281 struct dirent *de;
282 struct stat s;
283 DIR *dir;
284
285 if (!lds->cache_subdir) {
286
287 if (lds->last_scan_completed + lds->secs_waiting > time(NULL))
288 return 0;
289
290 lds->batch = lws_malloc(sizeof(struct file_entry) *
291 BATCH_COUNT, "cache_trim");
292 if (!lds->batch) {
293 lwsl_err("%s: OOM\n", __func__);
294
295 return 1;
296 }
297 lds->agg_size = 0;
298 lds->head = NULL;
299 lds->batch_in_use = 0;
300 lds->agg_file_count = 0;
301 }
302
303 lws_snprintf(dirpath, sizeof(dirpath), "%s/%c/%c",
304 lds->cache_dir_base, hex[(lds->cache_subdir >> 4) & 15],
305 hex[lds->cache_subdir & 15]);
306
307 dir = opendir(dirpath);
308 if (!dir) {
309 lwsl_err("Unable to walk repo dir '%s'\n",
310 lds->cache_dir_base);
311 return -1;
312 }
313
314 do {
315 de = readdir(dir);
316 if (!de)
317 break;
318
319 if (de->d_type != DT_REG)
320 continue;
321
322 lds->agg_file_count++;
323
324 lws_snprintf(filepath, sizeof(filepath), "%s/%s", dirpath,
325 de->d_name);
326
327 fd = open(filepath, O_RDONLY);
328 if (fd < 0) {
329 lwsl_err("%s: cannot open %s\n", __func__, filepath);
330
331 continue;
332 }
333
334 n = fstat(fd, &s);
335 close(fd);
336 if (n) {
337 lwsl_notice("%s: cannot stat %s\n", __func__, filepath);
338 continue;
339 }
340
341 lds->agg_size += (uint64_t)s.st_size;
342
343 if (lds->batch_in_use == BATCH_COUNT) {
344 /*
345 * once we filled up the batch with candidates, we don't
346 * need to consider any files newer than the newest guy
347 * on the list...
348 */
349 if (lp_to_fe(lds->head, sorted)->modified < s.st_mtime)
350 continue;
351
352 /*
353 * ... and if we find an older file later, we know it
354 * will be replacing the newest guy on the list, so use
355 * that directly...
356 */
357 p = lds->head;
358 lds->head = p->sorted;
359 } else
360 /* we are still accepting anything to fill the batch */
361
362 p = &lds->batch[lds->batch_in_use++];
363
364 p->sorted = NULL;
365 strncpy(p->name, de->d_name, sizeof(p->name) - 1);
366 p->name[sizeof(p->name) - 1] = '\0';
367 p->modified = s.st_mtime;
368 p->size = (size_t)s.st_size;
369
370 lws_list_ptr_insert(&lds->head, &p->sorted, fe_modified_sort);
371 } while (de);
372
373 ret = 0;
374
375 lds->cache_subdir++;
376 if (lds->cache_subdir != 0x100)
377 goto done;
378
379 /* we completed the whole scan... */
380
381 /* if really no guidence, then 256MiB */
382 if (!cache_size_limit)
383 cache_size_limit = 256 * 1024 * 1024;
384
385 if (lds->agg_size > cache_size_limit) {
386
387 /* apply prev pointers to make the list doubly-linked */
388
389 lp = lds->head;
390 while (lp) {
391 p = lp_to_fe(lp, sorted);
392
393 p->prev = op;
394 op = &p->prev;
395 lp = p->sorted;
396 }
397
398 /*
399 * reverse the list (start from tail, now traverse using
400 * .prev)... it's oldest-first now...
401 */
402
403 lp = op;
404
405 while (lp && lds->agg_size > cache_size_limit) {
406 p = lp_to_fe(lp, prev);
407
408 lws_snprintf(filepath, sizeof(filepath), "%s/%c/%c/%s",
409 lds->cache_dir_base, p->name[0],
410 p->name[1], p->name);
411
412 if (!unlink(filepath)) {
413 lds->agg_size -= p->size;
414 trimmed += p->size;
415 files_trimmed++;
416 } else
417 lwsl_notice("%s: Failed to unlink %s\n",
418 __func__, filepath);
419
420 lp = p->prev;
421 }
422
423 if (files_trimmed)
424 lwsl_notice("%s: %s: trimmed %d files totalling "
425 "%lldKib, leaving %lldMiB\n", __func__,
426 lds->cache_dir_base, files_trimmed,
427 ((unsigned long long)trimmed) / KIB,
428 ((unsigned long long)lds->agg_size) / MIB);
429 }
430
431 if (lds->agg_size && lds->agg_file_count)
432 lds->avg_size = lds->agg_size / (uint64_t)lds->agg_file_count;
433
434 /*
435 * estimate how long we can go before scanning again... default we need
436 * to start again immediately
437 */
438
439 lds->last_scan_completed = time(NULL);
440 lds->secs_waiting = 1;
441
442 if (lds->agg_size < cache_size_limit) {
443 uint64_t avg = 4096, capacity, projected;
444
445 /* let's use 80% of the real average for margin */
446 if (lds->agg_size && lds->agg_file_count)
447 avg = ((lds->agg_size * 8) / (uint64_t)lds->agg_file_count) / 10;
448
449 /*
450 * if we collected BATCH_COUNT files of the average size,
451 * how much can we clean up in 256s?
452 */
453
454 capacity = avg * BATCH_COUNT;
455
456 /*
457 * if the cache grew by 10%, would we hit the limit even then?
458 */
459 projected = (lds->agg_size * 11) / 10;
460 if (projected < cache_size_limit)
461 /* no... */
462 lds->secs_waiting = (int)((256 / 2) * ((cache_size_limit -
463 projected) / capacity));
464
465 /*
466 * large waits imply we may not have enough info yet, so
467 * check once an hour at least.
468 */
469
470 if (lds->secs_waiting > 3600)
471 lds->secs_waiting = 3600;
472 } else
473 lds->secs_waiting = 0;
474
475 lwsl_info("%s: cache %s: %lldKiB / %lldKiB, next scan %ds\n", __func__,
476 lds->cache_dir_base,
477 (unsigned long long)lds->agg_size / KIB,
478 (unsigned long long)cache_size_limit / KIB,
479 lds->secs_waiting);
480
481 lws_free(lds->batch);
482 lds->batch = NULL;
483
484 lds->cache_subdir = 0;
485
486 done:
487 closedir(dir);
488
489 return ret;
490 }
491