• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
2  * Permission is hereby granted, free of charge, to any person obtaining a copy
3  * of this software and associated documentation files (the "Software"), to
4  * deal in the Software without restriction, including without limitation the
5  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
6  * sell copies of the Software, and to permit persons to whom the Software is
7  * furnished to do so, subject to the following conditions:
8  *
9  * The above copyright notice and this permission notice shall be included in
10  * all copies or substantial portions of the Software.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
18  * IN THE SOFTWARE.
19  */
20 
21 /* We lean on the fact that POLL{IN,OUT,ERR,HUP} correspond with their
22  * EPOLL* counterparts.  We use the POLL* variants in this file because that
23  * is what libuv uses elsewhere.
24  */
25 
26 #include "uv.h"
27 #include "internal.h"
28 #include "uv_log.h"
29 #include <inttypes.h>
30 #include <stdatomic.h>
31 #include <stddef.h>  /* offsetof */
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <assert.h>
37 #include <errno.h>
38 
39 #include <fcntl.h>
40 #include <ifaddrs.h>
41 #include <net/ethernet.h>
42 #include <net/if.h>
43 #include <netpacket/packet.h>
44 #include <sys/epoll.h>
45 #include <sys/inotify.h>
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/prctl.h>
49 #include <sys/socket.h>
50 #include <sys/stat.h>
51 #include <sys/syscall.h>
52 #include <sys/sysinfo.h>
53 #include <sys/sysmacros.h>
54 #include <sys/types.h>
55 #include <sys/utsname.h>
56 #include <time.h>
57 #include <unistd.h>
58 
59 #ifdef USE_FFRT
60 #include "ffrt.h"
61 #include "c/executor_task.h"
62 
uv__epoll_wait(struct epoll_event * events,int eventsize,uint64_t timeout)63 int uv__epoll_wait(struct epoll_event* events, int eventsize, uint64_t timeout) {
64   int nfds = 0;
65   if (ffrt_get_cur_task() != NULL) {
66     ffrt_qos_t qos = ffrt_this_task_get_qos();
67     nfds = ffrt_epoll_wait(qos, events, eventsize, timeout);
68   }
69   return nfds;
70 }
71 #endif
72 
uv__epoll_ctl(int epoll_fd,int op,int fd,struct epoll_event * event)73 int uv__epoll_ctl(int epoll_fd, int op, int fd, struct epoll_event* event) {
74 #ifdef USE_FFRT
75   if (ffrt_get_cur_task() != NULL) {
76     ffrt_qos_t qos = ffrt_this_task_get_qos();
77     return ffrt_epoll_ctl(qos, op, fd, event == NULL ? 0 : event->events, NULL, NULL);
78   }
79 #endif
80   return epoll_ctl(epoll_fd, op, fd ,event);
81 }
82 #ifndef __NR_io_uring_setup
83 # define __NR_io_uring_setup 425
84 #endif
85 
86 #ifndef __NR_io_uring_enter
87 # define __NR_io_uring_enter 426
88 #endif
89 
90 #ifndef __NR_io_uring_register
91 # define __NR_io_uring_register 427
92 #endif
93 
94 #ifndef __NR_copy_file_range
95 # if defined(__x86_64__)
96 #  define __NR_copy_file_range 326
97 # elif defined(__i386__)
98 #  define __NR_copy_file_range 377
99 # elif defined(__s390__)
100 #  define __NR_copy_file_range 375
101 # elif defined(__arm__)
102 #  define __NR_copy_file_range 391
103 # elif defined(__aarch64__)
104 #  define __NR_copy_file_range 285
105 # elif defined(__powerpc__)
106 #  define __NR_copy_file_range 379
107 # elif defined(__arc__)
108 #  define __NR_copy_file_range 285
109 # elif defined(__riscv)
110 #  define __NR_copy_file_range 285
111 # endif
112 #endif /* __NR_copy_file_range */
113 
114 #ifndef __NR_statx
115 # if defined(__x86_64__)
116 #  define __NR_statx 332
117 # elif defined(__i386__)
118 #  define __NR_statx 383
119 # elif defined(__aarch64__)
120 #  define __NR_statx 397
121 # elif defined(__arm__)
122 #  define __NR_statx 397
123 # elif defined(__ppc__)
124 #  define __NR_statx 383
125 # elif defined(__s390__)
126 #  define __NR_statx 379
127 # elif defined(__riscv)
128 #  define __NR_statx 291
129 # endif
130 #endif /* __NR_statx */
131 
132 #ifndef __NR_getrandom
133 # if defined(__x86_64__)
134 #  define __NR_getrandom 318
135 # elif defined(__i386__)
136 #  define __NR_getrandom 355
137 # elif defined(__aarch64__)
138 #  define __NR_getrandom 384
139 # elif defined(__arm__)
140 #  define __NR_getrandom 384
141 # elif defined(__ppc__)
142 #  define __NR_getrandom 359
143 # elif defined(__s390__)
144 #  define __NR_getrandom 349
145 # elif defined(__riscv)
146 #  define __NR_getrandom 278
147 # endif
148 #endif /* __NR_getrandom */
149 
150 enum {
151   UV__IORING_SETUP_SQPOLL = 2u,
152 };
153 
154 enum {
155   UV__IORING_FEAT_SINGLE_MMAP = 1u,
156   UV__IORING_FEAT_NODROP = 2u,
157   UV__IORING_FEAT_RSRC_TAGS = 1024u,  /* linux v5.13 */
158 };
159 
160 enum {
161   UV__IORING_OP_READV = 1,
162   UV__IORING_OP_WRITEV = 2,
163   UV__IORING_OP_FSYNC = 3,
164   UV__IORING_OP_OPENAT = 18,
165   UV__IORING_OP_CLOSE = 19,
166   UV__IORING_OP_STATX = 21,
167   UV__IORING_OP_EPOLL_CTL = 29,
168   UV__IORING_OP_RENAMEAT = 35,
169   UV__IORING_OP_UNLINKAT = 36,
170   UV__IORING_OP_MKDIRAT = 37,
171   UV__IORING_OP_SYMLINKAT = 38,
172   UV__IORING_OP_LINKAT = 39,
173 };
174 
175 enum {
176   UV__IORING_ENTER_GETEVENTS = 1u,
177   UV__IORING_ENTER_SQ_WAKEUP = 2u,
178 };
179 
180 enum {
181   UV__IORING_SQ_NEED_WAKEUP = 1u,
182   UV__IORING_SQ_CQ_OVERFLOW = 2u,
183 };
184 
185 enum {
186   UV__MKDIRAT_SYMLINKAT_LINKAT = 1u,
187 };
188 
189 struct uv__io_cqring_offsets {
190   uint32_t head;
191   uint32_t tail;
192   uint32_t ring_mask;
193   uint32_t ring_entries;
194   uint32_t overflow;
195   uint32_t cqes;
196   uint64_t reserved0;
197   uint64_t reserved1;
198 };
199 
200 STATIC_ASSERT(40 == sizeof(struct uv__io_cqring_offsets));
201 
202 struct uv__io_sqring_offsets {
203   uint32_t head;
204   uint32_t tail;
205   uint32_t ring_mask;
206   uint32_t ring_entries;
207   uint32_t flags;
208   uint32_t dropped;
209   uint32_t array;
210   uint32_t reserved0;
211   uint64_t reserved1;
212 };
213 
214 STATIC_ASSERT(40 == sizeof(struct uv__io_sqring_offsets));
215 
216 struct uv__io_uring_cqe {
217   uint64_t user_data;
218   int32_t res;
219   uint32_t flags;
220 };
221 
222 STATIC_ASSERT(16 == sizeof(struct uv__io_uring_cqe));
223 
224 struct uv__io_uring_sqe {
225   uint8_t opcode;
226   uint8_t flags;
227   uint16_t ioprio;
228   int32_t fd;
229   union {
230     uint64_t off;
231     uint64_t addr2;
232   };
233   union {
234     uint64_t addr;
235   };
236   uint32_t len;
237   union {
238     uint32_t rw_flags;
239     uint32_t fsync_flags;
240     uint32_t open_flags;
241     uint32_t statx_flags;
242   };
243   uint64_t user_data;
244   union {
245     uint16_t buf_index;
246     uint64_t pad[3];
247   };
248 };
249 
250 STATIC_ASSERT(64 == sizeof(struct uv__io_uring_sqe));
251 STATIC_ASSERT(0 == offsetof(struct uv__io_uring_sqe, opcode));
252 STATIC_ASSERT(1 == offsetof(struct uv__io_uring_sqe, flags));
253 STATIC_ASSERT(2 == offsetof(struct uv__io_uring_sqe, ioprio));
254 STATIC_ASSERT(4 == offsetof(struct uv__io_uring_sqe, fd));
255 STATIC_ASSERT(8 == offsetof(struct uv__io_uring_sqe, off));
256 STATIC_ASSERT(16 == offsetof(struct uv__io_uring_sqe, addr));
257 STATIC_ASSERT(24 == offsetof(struct uv__io_uring_sqe, len));
258 STATIC_ASSERT(28 == offsetof(struct uv__io_uring_sqe, rw_flags));
259 STATIC_ASSERT(32 == offsetof(struct uv__io_uring_sqe, user_data));
260 STATIC_ASSERT(40 == offsetof(struct uv__io_uring_sqe, buf_index));
261 
262 struct uv__io_uring_params {
263   uint32_t sq_entries;
264   uint32_t cq_entries;
265   uint32_t flags;
266   uint32_t sq_thread_cpu;
267   uint32_t sq_thread_idle;
268   uint32_t features;
269   uint32_t reserved[4];
270   struct uv__io_sqring_offsets sq_off;  /* 40 bytes */
271   struct uv__io_cqring_offsets cq_off;  /* 40 bytes */
272 };
273 
274 STATIC_ASSERT(40 + 40 + 40 == sizeof(struct uv__io_uring_params));
275 STATIC_ASSERT(40 == offsetof(struct uv__io_uring_params, sq_off));
276 STATIC_ASSERT(80 == offsetof(struct uv__io_uring_params, cq_off));
277 
278 STATIC_ASSERT(EPOLL_CTL_ADD < 4);
279 STATIC_ASSERT(EPOLL_CTL_DEL < 4);
280 STATIC_ASSERT(EPOLL_CTL_MOD < 4);
281 
282 struct watcher_list {
283   RB_ENTRY(watcher_list) entry;
284   struct uv__queue watchers;
285   int iterating;
286   char* path;
287   int wd;
288 };
289 
290 struct watcher_root {
291   struct watcher_list* rbh_root;
292 };
293 
294 static int uv__inotify_fork(uv_loop_t* loop, struct watcher_list* root);
295 static void uv__inotify_read(uv_loop_t* loop,
296                              uv__io_t* w,
297                              unsigned int revents);
298 static int compare_watchers(const struct watcher_list* a,
299                             const struct watcher_list* b);
300 static void maybe_free_watcher_list(struct watcher_list* w,
301                                     uv_loop_t* loop);
302 
303 static void uv__epoll_ctl_flush(int epollfd,
304                                 struct uv__iou* ctl,
305                                 struct epoll_event (*events)[256]);
306 
307 static void uv__epoll_ctl_prep(int epollfd,
308                                struct uv__iou* ctl,
309                                struct epoll_event (*events)[256],
310                                int op,
311                                int fd,
312                                struct epoll_event* e);
313 
RB_GENERATE_STATIC(watcher_root,watcher_list,entry,compare_watchers)314 RB_GENERATE_STATIC(watcher_root, watcher_list, entry, compare_watchers)
315 
316 
317 static struct watcher_root* uv__inotify_watchers(uv_loop_t* loop) {
318   /* This cast works because watcher_root is a struct with a pointer as its
319    * sole member. Such type punning is unsafe in the presence of strict
320    * pointer aliasing (and is just plain nasty) but that is why libuv
321    * is compiled with -fno-strict-aliasing.
322    */
323   return (struct watcher_root*) &loop->inotify_watchers;
324 }
325 
326 
uv__kernel_version(void)327 unsigned uv__kernel_version(void) {
328   static _Atomic unsigned cached_version;
329   struct utsname u;
330   unsigned version;
331   unsigned major;
332   unsigned minor;
333   unsigned patch;
334   char v_sig[256];
335   char* needle;
336 
337   version = atomic_load_explicit(&cached_version, memory_order_relaxed);
338   if (version != 0)
339     return version;
340 
341   /* Check /proc/version_signature first as it's the way to get the mainline
342    * kernel version in Ubuntu. The format is:
343    *   Ubuntu ubuntu_kernel_version mainline_kernel_version
344    * For example:
345    *   Ubuntu 5.15.0-79.86-generic 5.15.111
346    */
347   if (0 == uv__slurp("/proc/version_signature", v_sig, sizeof(v_sig)))
348     if (3 == sscanf(v_sig, "Ubuntu %*s %u.%u.%u", &major, &minor, &patch))
349       goto calculate_version;
350 
351   if (-1 == uname(&u))
352     return 0;
353 
354   /* In Debian we need to check `version` instead of `release` to extract the
355    * mainline kernel version. This is an example of how it looks like:
356    *  #1 SMP Debian 5.10.46-4 (2021-08-03)
357    */
358   needle = strstr(u.version, "Debian ");
359   if (needle != NULL)
360     if (3 == sscanf(needle, "Debian %u.%u.%u", &major, &minor, &patch))
361       goto calculate_version;
362 
363   if (3 != sscanf(u.release, "%u.%u.%u", &major, &minor, &patch))
364     return 0;
365 
366   /* Handle it when the process runs under the UNAME26 personality:
367    *
368    * - kernels >= 3.x identify as 2.6.40+x
369    * - kernels >= 4.x identify as 2.6.60+x
370    *
371    * UNAME26 is a poorly conceived hack that doesn't let us distinguish
372    * between 4.x kernels and 5.x/6.x kernels so we conservatively assume
373    * that 2.6.60+x means 4.x.
374    *
375    * Fun fact of the day: it's technically possible to observe the actual
376    * kernel version for a brief moment because uname() first copies out the
377    * real release string before overwriting it with the backcompat string.
378    */
379   if (major == 2 && minor == 6) {
380     if (patch >= 60) {
381       major = 4;
382       minor = patch - 60;
383       patch = 0;
384     } else if (patch >= 40) {
385       major = 3;
386       minor = patch - 40;
387       patch = 0;
388     }
389   }
390 
391 calculate_version:
392   version = major * 65536 + minor * 256 + patch;
393   atomic_store_explicit(&cached_version, version, memory_order_relaxed);
394 
395   return version;
396 }
397 
398 
399 ssize_t
uv__fs_copy_file_range(int fd_in,off_t * off_in,int fd_out,off_t * off_out,size_t len,unsigned int flags)400 uv__fs_copy_file_range(int fd_in,
401                        off_t* off_in,
402                        int fd_out,
403                        off_t* off_out,
404                        size_t len,
405                        unsigned int flags)
406 {
407 #ifdef __NR_copy_file_range
408   return syscall(__NR_copy_file_range,
409                  fd_in,
410                  off_in,
411                  fd_out,
412                  off_out,
413                  len,
414                  flags);
415 #else
416   return errno = ENOSYS, -1;
417 #endif
418 }
419 
420 
uv__statx(int dirfd,const char * path,int flags,unsigned int mask,struct uv__statx * statxbuf)421 int uv__statx(int dirfd,
422               const char* path,
423               int flags,
424               unsigned int mask,
425               struct uv__statx* statxbuf) {
426 #if !defined(__NR_statx) || defined(__ANDROID_API__) && __ANDROID_API__ < 30
427   return errno = ENOSYS, -1;
428 #else
429   int rc;
430 
431   rc = syscall(__NR_statx, dirfd, path, flags, mask, statxbuf);
432   if (rc >= 0)
433     uv__msan_unpoison(statxbuf, sizeof(*statxbuf));
434 
435   return rc;
436 #endif
437 }
438 
439 
uv__getrandom(void * buf,size_t buflen,unsigned flags)440 ssize_t uv__getrandom(void* buf, size_t buflen, unsigned flags) {
441 #if !defined(__NR_getrandom) || defined(__ANDROID_API__) && __ANDROID_API__ < 28
442   return errno = ENOSYS, -1;
443 #else
444   ssize_t rc;
445 
446   rc = syscall(__NR_getrandom, buf, buflen, flags);
447   if (rc >= 0)
448     uv__msan_unpoison(buf, buflen);
449 
450   return rc;
451 #endif
452 }
453 
454 
uv__io_uring_setup(int entries,struct uv__io_uring_params * params)455 int uv__io_uring_setup(int entries, struct uv__io_uring_params* params) {
456   return syscall(__NR_io_uring_setup, entries, params);
457 }
458 
459 
uv__io_uring_enter(int fd,unsigned to_submit,unsigned min_complete,unsigned flags)460 int uv__io_uring_enter(int fd,
461                        unsigned to_submit,
462                        unsigned min_complete,
463                        unsigned flags) {
464   /* io_uring_enter used to take a sigset_t but it's unused
465    * in newer kernels unless IORING_ENTER_EXT_ARG is set,
466    * in which case it takes a struct io_uring_getevents_arg.
467    */
468   return syscall(__NR_io_uring_enter,
469                  fd,
470                  to_submit,
471                  min_complete,
472                  flags,
473                  NULL,
474                  0L);
475 }
476 
477 
uv__io_uring_register(int fd,unsigned opcode,void * arg,unsigned nargs)478 int uv__io_uring_register(int fd, unsigned opcode, void* arg, unsigned nargs) {
479   return syscall(__NR_io_uring_register, fd, opcode, arg, nargs);
480 }
481 
482 
uv__use_io_uring(void)483 static int uv__use_io_uring(void) {
484 #if defined(USE_OHOS_DFX)
485   return 0;
486 #endif
487 #if defined(__ANDROID_API__)
488   return 0;  /* Possibly available but blocked by seccomp. */
489 #elif defined(__arm__) && __SIZEOF_POINTER__ == 4
490   /* See https://github.com/libuv/libuv/issues/4158. */
491   return 0;  /* All 32 bits kernels appear buggy. */
492 #elif defined(__powerpc64__) || defined(__ppc64__)
493   /* See https://github.com/libuv/libuv/issues/4283. */
494   return 0; /* Random SIGSEGV in signal handler. */
495 #else
496   /* Ternary: unknown=0, yes=1, no=-1 */
497   static _Atomic int use_io_uring;
498   char* val;
499   int use;
500 
501   use = atomic_load_explicit(&use_io_uring, memory_order_relaxed);
502 
503   if (use == 0) {
504     use = uv__kernel_version() >=
505 #if defined(__hppa__)
506     /* io_uring first supported on parisc in 6.1, functional in .51 */
507     /* https://lore.kernel.org/all/cb912694-b1fe-dbb0-4d8c-d608f3526905@gmx.de/ */
508     /* 6.1.51 */ 0x060133
509 #else
510     /* Older kernels have a bug where the sqpoll thread uses 100% CPU. */
511     /* 5.10.186 */ 0x050ABA
512 #endif
513     ? 1 : -1;
514 
515     /* But users can still enable it if they so desire. */
516     val = getenv("UV_USE_IO_URING");
517     if (val != NULL)
518       use = atoi(val) ? 1 : -1;
519 
520     atomic_store_explicit(&use_io_uring, use, memory_order_relaxed);
521   }
522 
523   return use > 0;
524 #endif
525 }
526 
527 
uv__iou_init(int epollfd,struct uv__iou * iou,uint32_t entries,uint32_t flags)528 static void uv__iou_init(int epollfd,
529                          struct uv__iou* iou,
530                          uint32_t entries,
531                          uint32_t flags) {
532   struct uv__io_uring_params params;
533   struct epoll_event e;
534   size_t cqlen;
535   size_t sqlen;
536   size_t maxlen;
537   size_t sqelen;
538   uint32_t i;
539   char* sq;
540   char* sqe;
541   int ringfd;
542 
543   sq = MAP_FAILED;
544   sqe = MAP_FAILED;
545 
546   if (!uv__use_io_uring())
547     return;
548 
549   /* SQPOLL required CAP_SYS_NICE until linux v5.12 relaxed that requirement.
550    * Mostly academic because we check for a v5.13 kernel afterwards anyway.
551    */
552   memset(&params, 0, sizeof(params));
553   params.flags = flags;
554 
555   if (flags & UV__IORING_SETUP_SQPOLL)
556     params.sq_thread_idle = 10;  /* milliseconds */
557 
558   /* Kernel returns a file descriptor with O_CLOEXEC flag set. */
559   ringfd = uv__io_uring_setup(entries, &params);
560   if (ringfd == -1)
561     return;
562 
563   /* IORING_FEAT_RSRC_TAGS is used to detect linux v5.13 but what we're
564    * actually detecting is whether IORING_OP_STATX works with SQPOLL.
565    */
566   if (!(params.features & UV__IORING_FEAT_RSRC_TAGS))
567     goto fail;
568 
569   /* Implied by IORING_FEAT_RSRC_TAGS but checked explicitly anyway. */
570   if (!(params.features & UV__IORING_FEAT_SINGLE_MMAP))
571     goto fail;
572 
573   /* Implied by IORING_FEAT_RSRC_TAGS but checked explicitly anyway. */
574   if (!(params.features & UV__IORING_FEAT_NODROP))
575     goto fail;
576 
577   sqlen = params.sq_off.array + params.sq_entries * sizeof(uint32_t);
578   cqlen =
579       params.cq_off.cqes + params.cq_entries * sizeof(struct uv__io_uring_cqe);
580   maxlen = sqlen < cqlen ? cqlen : sqlen;
581   sqelen = params.sq_entries * sizeof(struct uv__io_uring_sqe);
582 
583   sq = mmap(0,
584             maxlen,
585             PROT_READ | PROT_WRITE,
586             MAP_SHARED | MAP_POPULATE,
587             ringfd,
588             0);  /* IORING_OFF_SQ_RING */
589 
590   sqe = mmap(0,
591              sqelen,
592              PROT_READ | PROT_WRITE,
593              MAP_SHARED | MAP_POPULATE,
594              ringfd,
595              0x10000000ull);  /* IORING_OFF_SQES */
596 
597   if (sq == MAP_FAILED || sqe == MAP_FAILED)
598     goto fail;
599 
600   if (flags & UV__IORING_SETUP_SQPOLL) {
601     /* Only interested in completion events. To get notified when
602      * the kernel pulls items from the submission ring, add POLLOUT.
603      */
604     memset(&e, 0, sizeof(e));
605     e.events = POLLIN;
606     e.data.fd = ringfd;
607 
608     if (uv__epoll_ctl(epollfd, EPOLL_CTL_ADD, ringfd, &e))
609       goto fail;
610   }
611 
612   iou->sqhead = (uint32_t*) (sq + params.sq_off.head);
613   iou->sqtail = (uint32_t*) (sq + params.sq_off.tail);
614   iou->sqmask = *(uint32_t*) (sq + params.sq_off.ring_mask);
615   iou->sqarray = (uint32_t*) (sq + params.sq_off.array);
616   iou->sqflags = (uint32_t*) (sq + params.sq_off.flags);
617   iou->cqhead = (uint32_t*) (sq + params.cq_off.head);
618   iou->cqtail = (uint32_t*) (sq + params.cq_off.tail);
619   iou->cqmask = *(uint32_t*) (sq + params.cq_off.ring_mask);
620   iou->sq = sq;
621   iou->cqe = sq + params.cq_off.cqes;
622   iou->sqe = sqe;
623   iou->sqlen = sqlen;
624   iou->cqlen = cqlen;
625   iou->maxlen = maxlen;
626   iou->sqelen = sqelen;
627   iou->ringfd = ringfd;
628   iou->in_flight = 0;
629   iou->flags = 0;
630 
631   if (uv__kernel_version() >= /* 5.15.0 */ 0x050F00)
632     iou->flags |= UV__MKDIRAT_SYMLINKAT_LINKAT;
633 
634   for (i = 0; i <= iou->sqmask; i++)
635     iou->sqarray[i] = i;  /* Slot -> sqe identity mapping. */
636 
637   return;
638 
639 fail:
640   if (sq != MAP_FAILED)
641     munmap(sq, maxlen);
642 
643   if (sqe != MAP_FAILED)
644     munmap(sqe, sqelen);
645 
646   uv__close(ringfd);
647 }
648 
649 
uv__iou_delete(struct uv__iou * iou)650 static void uv__iou_delete(struct uv__iou* iou) {
651   if (iou->ringfd != -1) {
652     munmap(iou->sq, iou->maxlen);
653     munmap(iou->sqe, iou->sqelen);
654     uv__close(iou->ringfd);
655     iou->ringfd = -1;
656   }
657 }
658 
659 
uv__platform_loop_init(uv_loop_t * loop)660 int uv__platform_loop_init(uv_loop_t* loop) {
661   uv__loop_internal_fields_t* lfields;
662 
663   lfields = uv__get_internal_fields(loop);
664   lfields->ctl.ringfd = -1;
665   lfields->iou.ringfd = -1;
666 
667   loop->inotify_watchers = NULL;
668   loop->inotify_fd = -1;
669   loop->backend_fd = epoll_create1(O_CLOEXEC);
670 #ifdef USE_OHOS_DFX
671   fdsan_exchange_owner_tag(loop->backend_fd, 0, uv__get_addr_tag((void *)&loop->backend_fd));
672 #endif
673   if (loop->backend_fd == -1)
674     return UV__ERR(errno);
675 
676   uv__iou_init(loop->backend_fd, &lfields->iou, 64, UV__IORING_SETUP_SQPOLL);
677   uv__iou_init(loop->backend_fd, &lfields->ctl, 256, 0);
678   UV_LOGI("init:%{public}zu, backend_fd:%{public}d", (size_t)loop, loop->backend_fd);
679   return 0;
680 }
681 
682 
uv__io_fork(uv_loop_t * loop)683 int uv__io_fork(uv_loop_t* loop) {
684   int err;
685   struct watcher_list* root;
686 
687   root = uv__inotify_watchers(loop)->rbh_root;
688 #ifdef USE_OHOS_DFX
689     fdsan_close_with_tag(loop->backend_fd, uv__get_addr_tag((void *)&loop->backend_fd));
690 #else
691   uv__close(loop->backend_fd);
692 #endif
693   loop->backend_fd = -1;
694 
695   /* TODO(bnoordhuis) Loses items from the submission and completion rings. */
696   uv__platform_loop_delete(loop);
697 
698   err = uv__platform_loop_init(loop);
699   if (err)
700     return err;
701 
702   return uv__inotify_fork(loop, root);
703 }
704 
705 
uv__platform_loop_delete(uv_loop_t * loop)706 void uv__platform_loop_delete(uv_loop_t* loop) {
707   uv__loop_internal_fields_t* lfields;
708 
709   lfields = uv__get_internal_fields(loop);
710   uv__iou_delete(&lfields->ctl);
711   uv__iou_delete(&lfields->iou);
712 
713   if (loop->inotify_fd != -1) {
714     uv__io_stop(loop, &loop->inotify_read_watcher, POLLIN);
715     uv__close(loop->inotify_fd);
716     loop->inotify_fd = -1;
717   }
718 }
719 
720 
721 struct uv__invalidate {
722   struct epoll_event (*prep)[256];
723   struct epoll_event* events;
724   int nfds;
725 };
726 
727 
uv__platform_invalidate_fd(uv_loop_t * loop,int fd)728 void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
729   uv__loop_internal_fields_t* lfields;
730   struct uv__invalidate* inv;
731   struct epoll_event dummy;
732   int i;
733 
734   lfields = uv__get_internal_fields(loop);
735   inv = lfields->inv;
736 
737   /* Invalidate events with same file descriptor */
738   if (inv != NULL)
739     for (i = 0; i < inv->nfds; i++)
740       if (inv->events[i].data.fd == fd)
741         inv->events[i].data.fd = -1;
742 
743   /* Remove the file descriptor from the epoll.
744    * This avoids a problem where the same file description remains open
745    * in another process, causing repeated junk epoll events.
746    *
747    * We pass in a dummy epoll_event, to work around a bug in old kernels.
748    *
749    * Work around a bug in kernels 3.10 to 3.19 where passing a struct that
750    * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings.
751    */
752   memset(&dummy, 0, sizeof(dummy));
753 
754   if (inv == NULL) {
755     uv__epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy);
756   } else {
757     uv__epoll_ctl_prep(loop->backend_fd,
758                        &lfields->ctl,
759                        inv->prep,
760                        EPOLL_CTL_DEL,
761                        fd,
762                        &dummy);
763   }
764 }
765 
766 
uv__io_check_fd(uv_loop_t * loop,int fd)767 int uv__io_check_fd(uv_loop_t* loop, int fd) {
768   struct epoll_event e;
769   int rc;
770 
771   memset(&e, 0, sizeof(e));
772   e.events = POLLIN;
773   e.data.fd = -1;
774 
775   rc = 0;
776   if (uv__epoll_ctl(loop->backend_fd, EPOLL_CTL_ADD, fd, &e))
777     if (errno != EEXIST)
778       rc = UV__ERR(errno);
779 
780   if (rc == 0)
781     if (uv__epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &e))
782       abort();
783 
784   return rc;
785 }
786 
787 
788 /* Caller must initialize SQE and call uv__iou_submit(). */
uv__iou_get_sqe(struct uv__iou * iou,uv_loop_t * loop,uv_fs_t * req)789 static struct uv__io_uring_sqe* uv__iou_get_sqe(struct uv__iou* iou,
790                                                 uv_loop_t* loop,
791                                                 uv_fs_t* req) {
792   struct uv__io_uring_sqe* sqe;
793   uint32_t head;
794   uint32_t tail;
795   uint32_t mask;
796   uint32_t slot;
797 
798   if (iou->ringfd == -1)
799     return NULL;
800 
801   head = atomic_load_explicit((_Atomic uint32_t*) iou->sqhead,
802                               memory_order_acquire);
803   tail = *iou->sqtail;
804   mask = iou->sqmask;
805 
806   if ((head & mask) == ((tail + 1) & mask))
807     return NULL;  /* No room in ring buffer. TODO(bnoordhuis) maybe flush it? */
808 
809   slot = tail & mask;
810   sqe = iou->sqe;
811   sqe = &sqe[slot];
812   memset(sqe, 0, sizeof(*sqe));
813   sqe->user_data = (uintptr_t) req;
814 
815   /* Pacify uv_cancel(). */
816   req->work_req.loop = loop;
817   req->work_req.work = NULL;
818   req->work_req.done = NULL;
819   uv__queue_init(&req->work_req.wq);
820 
821   uv__req_register(loop, req);
822   iou->in_flight++;
823 
824   return sqe;
825 }
826 
827 
uv__iou_submit(struct uv__iou * iou)828 static void uv__iou_submit(struct uv__iou* iou) {
829   uint32_t flags;
830 
831   atomic_store_explicit((_Atomic uint32_t*) iou->sqtail,
832                         *iou->sqtail + 1,
833                         memory_order_release);
834 
835   flags = atomic_load_explicit((_Atomic uint32_t*) iou->sqflags,
836                                memory_order_acquire);
837 
838   if (flags & UV__IORING_SQ_NEED_WAKEUP)
839     if (uv__io_uring_enter(iou->ringfd, 0, 0, UV__IORING_ENTER_SQ_WAKEUP))
840       if (errno != EOWNERDEAD)  /* Kernel bug. Harmless, ignore. */
841         perror("libuv: io_uring_enter(wakeup)");  /* Can't happen. */
842 }
843 
844 
uv__iou_fs_close(uv_loop_t * loop,uv_fs_t * req)845 int uv__iou_fs_close(uv_loop_t* loop, uv_fs_t* req) {
846   struct uv__io_uring_sqe* sqe;
847   struct uv__iou* iou;
848   int kv;
849 
850   kv = uv__kernel_version();
851   /* Work around a poorly understood bug in older kernels where closing a file
852    * descriptor pointing to /foo/bar results in ETXTBSY errors when trying to
853    * execve("/foo/bar") later on. The bug seems to have been fixed somewhere
854    * between 5.15.85 and 5.15.90. I couldn't pinpoint the responsible commit
855    * but good candidates are the several data race fixes. Interestingly, it
856    * seems to manifest only when running under Docker so the possibility of
857    * a Docker bug can't be completely ruled out either. Yay, computers.
858    * Also, disable on non-longterm versions between 5.16.0 (non-longterm) and
859    * 6.1.0 (longterm). Starting with longterm 6.1.x, the issue seems to be
860    * solved.
861    */
862   if (kv < /* 5.15.90 */ 0x050F5A)
863     return 0;
864 
865   if (kv >= /* 5.16.0 */ 0x050A00 && kv < /* 6.1.0 */ 0x060100)
866     return 0;
867 
868 
869   iou = &uv__get_internal_fields(loop)->iou;
870 
871   sqe = uv__iou_get_sqe(iou, loop, req);
872   if (sqe == NULL)
873     return 0;
874 
875   sqe->fd = req->file;
876   sqe->opcode = UV__IORING_OP_CLOSE;
877 
878   uv__iou_submit(iou);
879 
880   return 1;
881 }
882 
883 
uv__iou_fs_fsync_or_fdatasync(uv_loop_t * loop,uv_fs_t * req,uint32_t fsync_flags)884 int uv__iou_fs_fsync_or_fdatasync(uv_loop_t* loop,
885                                   uv_fs_t* req,
886                                   uint32_t fsync_flags) {
887   struct uv__io_uring_sqe* sqe;
888   struct uv__iou* iou;
889 
890   iou = &uv__get_internal_fields(loop)->iou;
891 
892   sqe = uv__iou_get_sqe(iou, loop, req);
893   if (sqe == NULL)
894     return 0;
895 
896   /* Little known fact: setting seq->off and seq->len turns
897    * it into an asynchronous sync_file_range() operation.
898    */
899   sqe->fd = req->file;
900   sqe->fsync_flags = fsync_flags;
901   sqe->opcode = UV__IORING_OP_FSYNC;
902 
903   uv__iou_submit(iou);
904 
905   return 1;
906 }
907 
908 
uv__iou_fs_link(uv_loop_t * loop,uv_fs_t * req)909 int uv__iou_fs_link(uv_loop_t* loop, uv_fs_t* req) {
910   struct uv__io_uring_sqe* sqe;
911   struct uv__iou* iou;
912 
913   iou = &uv__get_internal_fields(loop)->iou;
914 
915   if (!(iou->flags & UV__MKDIRAT_SYMLINKAT_LINKAT))
916     return 0;
917 
918   sqe = uv__iou_get_sqe(iou, loop, req);
919   if (sqe == NULL)
920     return 0;
921 
922   sqe->addr = (uintptr_t) req->path;
923   sqe->fd = AT_FDCWD;
924   sqe->addr2 = (uintptr_t) req->new_path;
925   sqe->len = AT_FDCWD;
926   sqe->opcode = UV__IORING_OP_LINKAT;
927 
928   uv__iou_submit(iou);
929 
930   return 1;
931 }
932 
933 
uv__iou_fs_mkdir(uv_loop_t * loop,uv_fs_t * req)934 int uv__iou_fs_mkdir(uv_loop_t* loop, uv_fs_t* req) {
935   struct uv__io_uring_sqe* sqe;
936   struct uv__iou* iou;
937 
938   iou = &uv__get_internal_fields(loop)->iou;
939 
940   if (!(iou->flags & UV__MKDIRAT_SYMLINKAT_LINKAT))
941     return 0;
942 
943   sqe = uv__iou_get_sqe(iou, loop, req);
944   if (sqe == NULL)
945     return 0;
946 
947   sqe->addr = (uintptr_t) req->path;
948   sqe->fd = AT_FDCWD;
949   sqe->len = req->mode;
950   sqe->opcode = UV__IORING_OP_MKDIRAT;
951 
952   uv__iou_submit(iou);
953 
954   return 1;
955 }
956 
957 
uv__iou_fs_open(uv_loop_t * loop,uv_fs_t * req)958 int uv__iou_fs_open(uv_loop_t* loop, uv_fs_t* req) {
959   struct uv__io_uring_sqe* sqe;
960   struct uv__iou* iou;
961 
962   iou = &uv__get_internal_fields(loop)->iou;
963 
964   sqe = uv__iou_get_sqe(iou, loop, req);
965   if (sqe == NULL)
966     return 0;
967 
968   sqe->addr = (uintptr_t) req->path;
969   sqe->fd = AT_FDCWD;
970   sqe->len = req->mode;
971   sqe->opcode = UV__IORING_OP_OPENAT;
972   sqe->open_flags = req->flags | O_CLOEXEC;
973 
974   uv__iou_submit(iou);
975 
976   return 1;
977 }
978 
979 
uv__iou_fs_rename(uv_loop_t * loop,uv_fs_t * req)980 int uv__iou_fs_rename(uv_loop_t* loop, uv_fs_t* req) {
981   struct uv__io_uring_sqe* sqe;
982   struct uv__iou* iou;
983 
984   iou = &uv__get_internal_fields(loop)->iou;
985 
986   sqe = uv__iou_get_sqe(iou, loop, req);
987   if (sqe == NULL)
988     return 0;
989 
990   sqe->addr = (uintptr_t) req->path;
991   sqe->fd = AT_FDCWD;
992   sqe->addr2 = (uintptr_t) req->new_path;
993   sqe->len = AT_FDCWD;
994   sqe->opcode = UV__IORING_OP_RENAMEAT;
995 
996   uv__iou_submit(iou);
997 
998   return 1;
999 }
1000 
1001 
uv__iou_fs_symlink(uv_loop_t * loop,uv_fs_t * req)1002 int uv__iou_fs_symlink(uv_loop_t* loop, uv_fs_t* req) {
1003   struct uv__io_uring_sqe* sqe;
1004   struct uv__iou* iou;
1005 
1006   iou = &uv__get_internal_fields(loop)->iou;
1007 
1008   if (!(iou->flags & UV__MKDIRAT_SYMLINKAT_LINKAT))
1009     return 0;
1010 
1011   sqe = uv__iou_get_sqe(iou, loop, req);
1012   if (sqe == NULL)
1013     return 0;
1014 
1015   sqe->addr = (uintptr_t) req->path;
1016   sqe->fd = AT_FDCWD;
1017   sqe->addr2 = (uintptr_t) req->new_path;
1018   sqe->opcode = UV__IORING_OP_SYMLINKAT;
1019 
1020   uv__iou_submit(iou);
1021 
1022   return 1;
1023 }
1024 
1025 
uv__iou_fs_unlink(uv_loop_t * loop,uv_fs_t * req)1026 int uv__iou_fs_unlink(uv_loop_t* loop, uv_fs_t* req) {
1027   struct uv__io_uring_sqe* sqe;
1028   struct uv__iou* iou;
1029 
1030   iou = &uv__get_internal_fields(loop)->iou;
1031 
1032   sqe = uv__iou_get_sqe(iou, loop, req);
1033   if (sqe == NULL)
1034     return 0;
1035 
1036   sqe->addr = (uintptr_t) req->path;
1037   sqe->fd = AT_FDCWD;
1038   sqe->opcode = UV__IORING_OP_UNLINKAT;
1039 
1040   uv__iou_submit(iou);
1041 
1042   return 1;
1043 }
1044 
1045 
uv__iou_fs_read_or_write(uv_loop_t * loop,uv_fs_t * req,int is_read)1046 int uv__iou_fs_read_or_write(uv_loop_t* loop,
1047                              uv_fs_t* req,
1048                              int is_read) {
1049   struct uv__io_uring_sqe* sqe;
1050   struct uv__iou* iou;
1051 
1052   /* If iovcnt is greater than IOV_MAX, cap it to IOV_MAX on reads and fallback
1053    * to the threadpool on writes */
1054   if (req->nbufs > IOV_MAX) {
1055     if (is_read)
1056       req->nbufs = IOV_MAX;
1057     else
1058       return 0;
1059   }
1060 
1061   iou = &uv__get_internal_fields(loop)->iou;
1062 
1063   sqe = uv__iou_get_sqe(iou, loop, req);
1064   if (sqe == NULL)
1065     return 0;
1066 
1067   sqe->addr = (uintptr_t) req->bufs;
1068   sqe->fd = req->file;
1069   sqe->len = req->nbufs;
1070   sqe->off = req->off < 0 ? -1 : req->off;
1071   sqe->opcode = is_read ? UV__IORING_OP_READV : UV__IORING_OP_WRITEV;
1072 
1073   uv__iou_submit(iou);
1074 
1075   return 1;
1076 }
1077 
1078 
uv__iou_fs_statx(uv_loop_t * loop,uv_fs_t * req,int is_fstat,int is_lstat)1079 int uv__iou_fs_statx(uv_loop_t* loop,
1080                      uv_fs_t* req,
1081                      int is_fstat,
1082                      int is_lstat) {
1083   struct uv__io_uring_sqe* sqe;
1084   struct uv__statx* statxbuf;
1085   struct uv__iou* iou;
1086 
1087   statxbuf = uv__malloc(sizeof(*statxbuf));
1088   if (statxbuf == NULL)
1089     return 0;
1090 
1091   iou = &uv__get_internal_fields(loop)->iou;
1092 
1093   sqe = uv__iou_get_sqe(iou, loop, req);
1094   if (sqe == NULL) {
1095     uv__free(statxbuf);
1096     return 0;
1097   }
1098 
1099   req->ptr = statxbuf;
1100 
1101   sqe->addr = (uintptr_t) req->path;
1102   sqe->addr2 = (uintptr_t) statxbuf;
1103   sqe->fd = AT_FDCWD;
1104   sqe->len = 0xFFF; /* STATX_BASIC_STATS + STATX_BTIME */
1105   sqe->opcode = UV__IORING_OP_STATX;
1106 
1107   if (is_fstat) {
1108     sqe->addr = (uintptr_t) "";
1109     sqe->fd = req->file;
1110     sqe->statx_flags |= 0x1000; /* AT_EMPTY_PATH */
1111   }
1112 
1113   if (is_lstat)
1114     sqe->statx_flags |= AT_SYMLINK_NOFOLLOW;
1115 
1116   uv__iou_submit(iou);
1117 
1118   return 1;
1119 }
1120 
1121 
uv__statx_to_stat(const struct uv__statx * statxbuf,uv_stat_t * buf)1122 void uv__statx_to_stat(const struct uv__statx* statxbuf, uv_stat_t* buf) {
1123   buf->st_dev = makedev(statxbuf->stx_dev_major, statxbuf->stx_dev_minor);
1124   buf->st_mode = statxbuf->stx_mode;
1125   buf->st_nlink = statxbuf->stx_nlink;
1126   buf->st_uid = statxbuf->stx_uid;
1127   buf->st_gid = statxbuf->stx_gid;
1128   buf->st_rdev = makedev(statxbuf->stx_rdev_major, statxbuf->stx_rdev_minor);
1129   buf->st_ino = statxbuf->stx_ino;
1130   buf->st_size = statxbuf->stx_size;
1131   buf->st_blksize = statxbuf->stx_blksize;
1132   buf->st_blocks = statxbuf->stx_blocks;
1133   buf->st_atim.tv_sec = statxbuf->stx_atime.tv_sec;
1134   buf->st_atim.tv_nsec = statxbuf->stx_atime.tv_nsec;
1135   buf->st_mtim.tv_sec = statxbuf->stx_mtime.tv_sec;
1136   buf->st_mtim.tv_nsec = statxbuf->stx_mtime.tv_nsec;
1137   buf->st_ctim.tv_sec = statxbuf->stx_ctime.tv_sec;
1138   buf->st_ctim.tv_nsec = statxbuf->stx_ctime.tv_nsec;
1139   buf->st_birthtim.tv_sec = statxbuf->stx_btime.tv_sec;
1140   buf->st_birthtim.tv_nsec = statxbuf->stx_btime.tv_nsec;
1141   buf->st_flags = 0;
1142   buf->st_gen = 0;
1143 }
1144 
1145 
uv__iou_fs_statx_post(uv_fs_t * req)1146 static void uv__iou_fs_statx_post(uv_fs_t* req) {
1147   struct uv__statx* statxbuf;
1148   uv_stat_t* buf;
1149 
1150   buf = &req->statbuf;
1151   statxbuf = req->ptr;
1152   req->ptr = NULL;
1153 
1154   if (req->result == 0) {
1155     uv__msan_unpoison(statxbuf, sizeof(*statxbuf));
1156     uv__statx_to_stat(statxbuf, buf);
1157     req->ptr = buf;
1158   }
1159 
1160   uv__free(statxbuf);
1161 }
1162 
1163 
uv__poll_io_uring(uv_loop_t * loop,struct uv__iou * iou)1164 static void uv__poll_io_uring(uv_loop_t* loop, struct uv__iou* iou) {
1165   struct uv__io_uring_cqe* cqe;
1166   struct uv__io_uring_cqe* e;
1167   uv_fs_t* req;
1168   uint32_t head;
1169   uint32_t tail;
1170   uint32_t mask;
1171   uint32_t i;
1172   uint32_t flags;
1173   int nevents;
1174   int rc;
1175 
1176   head = *iou->cqhead;
1177   tail = atomic_load_explicit((_Atomic uint32_t*) iou->cqtail,
1178                               memory_order_acquire);
1179   mask = iou->cqmask;
1180   cqe = iou->cqe;
1181   nevents = 0;
1182 
1183   for (i = head; i != tail; i++) {
1184     e = &cqe[i & mask];
1185 
1186     req = (uv_fs_t*) (uintptr_t) e->user_data;
1187     assert(req->type == UV_FS);
1188 
1189     uv__req_unregister(loop, req);
1190     iou->in_flight--;
1191 
1192     /* If the op is not supported by the kernel retry using the thread pool */
1193     if (e->res == -EOPNOTSUPP) {
1194       uv__fs_post(loop, req);
1195       continue;
1196     }
1197 
1198     /* io_uring stores error codes as negative numbers, same as libuv. */
1199     req->result = e->res;
1200 
1201     switch (req->fs_type) {
1202       case UV_FS_FSTAT:
1203       case UV_FS_LSTAT:
1204       case UV_FS_STAT:
1205         uv__iou_fs_statx_post(req);
1206         break;
1207       default:  /* Squelch -Wswitch warnings. */
1208         break;
1209     }
1210 
1211     uv__metrics_update_idle_time(loop);
1212     req->cb(req);
1213     nevents++;
1214   }
1215 
1216   atomic_store_explicit((_Atomic uint32_t*) iou->cqhead,
1217                         tail,
1218                         memory_order_release);
1219 
1220   /* Check whether CQE's overflowed, if so enter the kernel to make them
1221    * available. Don't grab them immediately but in the next loop iteration to
1222    * avoid loop starvation. */
1223   flags = atomic_load_explicit((_Atomic uint32_t*) iou->sqflags,
1224                                memory_order_acquire);
1225 
1226   if (flags & UV__IORING_SQ_CQ_OVERFLOW) {
1227     do
1228       rc = uv__io_uring_enter(iou->ringfd, 0, 0, UV__IORING_ENTER_GETEVENTS);
1229     while (rc == -1 && errno == EINTR);
1230 
1231     if (rc < 0)
1232       perror("libuv: io_uring_enter(getevents)");  /* Can't happen. */
1233   }
1234 
1235   uv__metrics_inc_events(loop, nevents);
1236   if (uv__get_internal_fields(loop)->current_timeout == 0)
1237     uv__metrics_inc_events_waiting(loop, nevents);
1238 }
1239 
1240 
uv__epoll_ctl_prep(int epollfd,struct uv__iou * ctl,struct epoll_event (* events)[256],int op,int fd,struct epoll_event * e)1241 static void uv__epoll_ctl_prep(int epollfd,
1242                                struct uv__iou* ctl,
1243                                struct epoll_event (*events)[256],
1244                                int op,
1245                                int fd,
1246                                struct epoll_event* e) {
1247   struct uv__io_uring_sqe* sqe;
1248   struct epoll_event* pe;
1249   uint32_t mask;
1250   uint32_t slot;
1251   int ret = 0;
1252 
1253   if (ctl->ringfd == -1) {
1254     if (!uv__epoll_ctl(epollfd, op, fd, e))
1255       return;
1256 
1257     if (op == EPOLL_CTL_DEL)
1258       return;  /* Ignore errors, may be racing with another thread. */
1259 
1260     if (op != EPOLL_CTL_ADD) {
1261 #ifdef USE_OHOS_DFX
1262       UV_ERRNO_ABORT("errno is %d, fd is %d, backend_fd is %d(%s:%s:%d)",
1263         errno, fd, epollfd, __FILE__, __func__, __LINE__);
1264 #else
1265       abort();
1266 #endif
1267     }
1268 
1269     if (errno != EEXIST) {
1270 #ifdef USE_OHOS_DFX
1271       UV_ERRNO_ABORT("errno is %d, fd is %d, backend_fd is %d(%s:%s:%d)",
1272         errno, fd, epollfd, __FILE__, __func__, __LINE__);
1273 #else
1274       abort();
1275 #endif
1276     }
1277 
1278     /* File descriptor that's been watched before, update event mask. */
1279     ret = uv__epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, e);
1280     if (!ret)
1281       return;
1282 
1283 #ifdef USE_OHOS_DFX
1284     UV_ERRNO_ABORT("errno is %d, uv__epoll_ctl ret is %d, fd is %d, backend_fd is %d(%s:%s:%d)",
1285       errno, ret, fd, epollfd, __FILE__, __func__, __LINE__);
1286 #else
1287     abort();
1288 #endif
1289   } else {
1290     mask = ctl->sqmask;
1291     slot = (*ctl->sqtail)++ & mask;
1292 
1293     pe = &(*events)[slot];
1294     *pe = *e;
1295 
1296     sqe = ctl->sqe;
1297     sqe = &sqe[slot];
1298 
1299     memset(sqe, 0, sizeof(*sqe));
1300     sqe->addr = (uintptr_t) pe;
1301     sqe->fd = epollfd;
1302     sqe->len = op;
1303     sqe->off = fd;
1304     sqe->opcode = UV__IORING_OP_EPOLL_CTL;
1305     sqe->user_data = op | slot << 2 | (int64_t) fd << 32;
1306 
1307     if ((*ctl->sqhead & mask) == (*ctl->sqtail & mask))
1308       uv__epoll_ctl_flush(epollfd, ctl, events);
1309   }
1310 }
1311 
1312 
uv__epoll_ctl_flush(int epollfd,struct uv__iou * ctl,struct epoll_event (* events)[256])1313 static void uv__epoll_ctl_flush(int epollfd,
1314                                 struct uv__iou* ctl,
1315                                 struct epoll_event (*events)[256]) {
1316   struct epoll_event oldevents[256];
1317   struct uv__io_uring_cqe* cqe;
1318   uint32_t oldslot;
1319   uint32_t slot;
1320   uint32_t n;
1321   int fd;
1322   int op;
1323   int rc;
1324 
1325   STATIC_ASSERT(sizeof(oldevents) == sizeof(*events));
1326   assert(ctl->ringfd != -1);
1327   assert(*ctl->sqhead != *ctl->sqtail);
1328 
1329   n = *ctl->sqtail - *ctl->sqhead;
1330   do
1331     rc = uv__io_uring_enter(ctl->ringfd, n, n, UV__IORING_ENTER_GETEVENTS);
1332   while (rc == -1 && errno == EINTR);
1333 
1334   if (rc < 0)
1335     perror("libuv: io_uring_enter(getevents)");  /* Can't happen. */
1336 
1337   if (rc != (int) n)
1338     abort();
1339 
1340   assert(*ctl->sqhead == *ctl->sqtail);
1341 
1342   memcpy(oldevents, *events, sizeof(*events));
1343 
1344   /* Failed submissions are either EPOLL_CTL_DEL commands for file descriptors
1345    * that have been closed, or EPOLL_CTL_ADD commands for file descriptors
1346    * that we are already watching. Ignore the former and retry the latter
1347    * with EPOLL_CTL_MOD.
1348    */
1349   while (*ctl->cqhead != *ctl->cqtail) {
1350     slot = (*ctl->cqhead)++ & ctl->cqmask;
1351 
1352     cqe = ctl->cqe;
1353     cqe = &cqe[slot];
1354 
1355     if (cqe->res == 0)
1356       continue;
1357 
1358     fd = cqe->user_data >> 32;
1359     op = 3 & cqe->user_data;
1360     oldslot = 255 & (cqe->user_data >> 2);
1361 
1362     if (op == EPOLL_CTL_DEL)
1363       continue;
1364 
1365     if (op != EPOLL_CTL_ADD)
1366       abort();
1367 
1368     if (cqe->res != -EEXIST)
1369       abort();
1370 
1371     uv__epoll_ctl_prep(epollfd,
1372                        ctl,
1373                        events,
1374                        EPOLL_CTL_MOD,
1375                        fd,
1376                        &oldevents[oldslot]);
1377   }
1378 }
1379 
1380 
uv__io_poll(uv_loop_t * loop,int timeout)1381 void uv__io_poll(uv_loop_t* loop, int timeout) {
1382   uv__loop_internal_fields_t* lfields;
1383   struct epoll_event events[1024];
1384   struct epoll_event prep[256];
1385   struct uv__invalidate inv;
1386   struct epoll_event* pe;
1387   struct epoll_event e;
1388   struct uv__iou* ctl;
1389   struct uv__iou* iou;
1390   int real_timeout;
1391   struct uv__queue* q;
1392   uv__io_t* w;
1393   sigset_t* sigmask;
1394   sigset_t sigset;
1395   uint64_t base;
1396   int have_iou_events;
1397   int have_signals;
1398   int nevents;
1399   int epollfd;
1400   int count;
1401   int nfds;
1402   int fd;
1403   int op;
1404   int i;
1405   int user_timeout;
1406   int reset_timeout;
1407 
1408   lfields = uv__get_internal_fields(loop);
1409   ctl = &lfields->ctl;
1410   iou = &lfields->iou;
1411 
1412   sigmask = NULL;
1413   if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
1414     sigemptyset(&sigset);
1415     sigaddset(&sigset, SIGPROF);
1416     sigmask = &sigset;
1417   }
1418 
1419   assert(timeout >= -1);
1420   base = loop->time;
1421   count = 48; /* Benchmarks suggest this gives the best throughput. */
1422   real_timeout = timeout;
1423 
1424   if (lfields->flags & UV_METRICS_IDLE_TIME) {
1425     reset_timeout = 1;
1426     user_timeout = timeout;
1427     timeout = 0;
1428   } else {
1429     reset_timeout = 0;
1430     user_timeout = 0;
1431   }
1432 
1433   epollfd = loop->backend_fd;
1434 
1435   memset(&e, 0, sizeof(e));
1436 
1437   while (!uv__queue_empty(&loop->watcher_queue)) {
1438     q = uv__queue_head(&loop->watcher_queue);
1439     w = uv__queue_data(q, uv__io_t, watcher_queue);
1440     uv__queue_remove(q);
1441     uv__queue_init(q);
1442 
1443     op = EPOLL_CTL_MOD;
1444     if (w->events == 0)
1445       op = EPOLL_CTL_ADD;
1446 
1447     w->events = w->pevents;
1448     e.events = w->pevents;
1449     e.data.fd = w->fd;
1450 
1451     uv__epoll_ctl_prep(epollfd, ctl, &prep, op, w->fd, &e);
1452   }
1453 
1454   inv.events = events;
1455   inv.prep = &prep;
1456   inv.nfds = -1;
1457 
1458   for (;;) {
1459     if (loop->nfds == 0)
1460       if (iou->in_flight == 0)
1461         break;
1462 
1463     /* All event mask mutations should be visible to the kernel before
1464      * we enter epoll_pwait().
1465      */
1466     if (ctl->ringfd != -1)
1467       while (*ctl->sqhead != *ctl->sqtail)
1468         uv__epoll_ctl_flush(epollfd, ctl, &prep);
1469 
1470     /* Only need to set the provider_entry_time if timeout != 0. The function
1471      * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME.
1472      */
1473     if (timeout != 0)
1474       uv__metrics_set_provider_entry_time(loop);
1475 
1476     /* Store the current timeout in a location that's globally accessible so
1477      * other locations like uv__work_done() can determine whether the queue
1478      * of events in the callback were waiting when poll was called.
1479      */
1480     lfields->current_timeout = timeout;
1481 #ifdef USE_FFRT
1482     if (ffrt_get_cur_task() == NULL) {
1483       nfds = epoll_pwait(epollfd, events, ARRAY_SIZE(events), timeout, sigmask);
1484     } else {
1485       nfds = uv__epoll_wait(events, ARRAY_SIZE(events), timeout);
1486     }
1487 #else
1488     nfds = epoll_pwait(epollfd, events, ARRAY_SIZE(events), timeout, sigmask);
1489 #endif
1490 
1491     /* Update loop->time unconditionally. It's tempting to skip the update when
1492      * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the
1493      * operating system didn't reschedule our process while in the syscall.
1494      */
1495     SAVE_ERRNO(uv__update_time(loop));
1496 
1497     if (nfds == -1)
1498       assert(errno == EINTR);
1499     else if (nfds == 0)
1500       /* Unlimited timeout should only return with events or signal. */
1501       assert(timeout != -1);
1502 
1503     if (nfds == 0 || nfds == -1) {
1504       if (reset_timeout != 0) {
1505         timeout = user_timeout;
1506         reset_timeout = 0;
1507       } else if (nfds == 0) {
1508         return;
1509       }
1510 
1511       /* Interrupted by a signal. Update timeout and poll again. */
1512       goto update_timeout;
1513     }
1514 
1515     have_iou_events = 0;
1516     have_signals = 0;
1517     nevents = 0;
1518 
1519     inv.nfds = nfds;
1520     lfields->inv = &inv;
1521 
1522     for (i = 0; i < nfds; i++) {
1523       pe = events + i;
1524       fd = pe->data.fd;
1525 
1526       /* Skip invalidated events, see uv__platform_invalidate_fd */
1527       if (fd == -1)
1528         continue;
1529 
1530       if (fd == iou->ringfd) {
1531         uv__poll_io_uring(loop, iou);
1532         have_iou_events = 1;
1533         continue;
1534       }
1535 
1536 #ifndef USE_OHOS_DFX
1537       assert(fd >= 0);
1538       assert((unsigned) fd < loop->nwatchers);
1539 #else
1540       if (fd < 0 || (unsigned) fd >= loop->nwatchers)
1541         continue;
1542 #endif
1543 
1544       w = loop->watchers[fd];
1545 
1546       if (w == NULL) {
1547         /* File descriptor that we've stopped watching, disarm it.
1548          *
1549          * Ignore all errors because we may be racing with another thread
1550          * when the file descriptor is closed.
1551          */
1552 #ifdef USE_FFRT
1553         if (ffrt_get_cur_task() != NULL) {
1554           UV_LOGF("fd %{public}d don't belong to loop %{public}zu", fd, (size_t)loop);
1555         }
1556 #endif
1557         uv__epoll_ctl_prep(epollfd, ctl, &prep, EPOLL_CTL_DEL, fd, pe);
1558         continue;
1559       }
1560 
1561       /* Give users only events they're interested in. Prevents spurious
1562        * callbacks when previous callback invocation in this loop has stopped
1563        * the current watcher. Also, filters out events that users has not
1564        * requested us to watch.
1565        */
1566       pe->events &= w->pevents | POLLERR | POLLHUP;
1567 
1568       /* Work around an epoll quirk where it sometimes reports just the
1569        * EPOLLERR or EPOLLHUP event.  In order to force the event loop to
1570        * move forward, we merge in the read/write events that the watcher
1571        * is interested in; uv__read() and uv__write() will then deal with
1572        * the error or hangup in the usual fashion.
1573        *
1574        * Note to self: happens when epoll reports EPOLLIN|EPOLLHUP, the user
1575        * reads the available data, calls uv_read_stop(), then sometime later
1576        * calls uv_read_start() again.  By then, libuv has forgotten about the
1577        * hangup and the kernel won't report EPOLLIN again because there's
1578        * nothing left to read.  If anything, libuv is to blame here.  The
1579        * current hack is just a quick bandaid; to properly fix it, libuv
1580        * needs to remember the error/hangup event.  We should get that for
1581        * free when we switch over to edge-triggered I/O.
1582        */
1583       if (pe->events == POLLERR || pe->events == POLLHUP)
1584         pe->events |=
1585           w->pevents & (POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI);
1586 
1587       if (pe->events != 0) {
1588         /* Run signal watchers last.  This also affects child process watchers
1589          * because those are implemented in terms of signal watchers.
1590          */
1591         if (w == &loop->signal_io_watcher) {
1592           have_signals = 1;
1593         } else {
1594           uv__metrics_update_idle_time(loop);
1595           w->cb(loop, w, pe->events);
1596         }
1597 
1598         nevents++;
1599       }
1600     }
1601 
1602     uv__metrics_inc_events(loop, nevents);
1603     if (reset_timeout != 0) {
1604       timeout = user_timeout;
1605       reset_timeout = 0;
1606       uv__metrics_inc_events_waiting(loop, nevents);
1607     }
1608 
1609     if (have_signals != 0) {
1610       uv__metrics_update_idle_time(loop);
1611       loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN);
1612     }
1613 
1614     lfields->inv = NULL;
1615 
1616     if (have_iou_events != 0)
1617       break;  /* Event loop should cycle now so don't poll again. */
1618 
1619     if (have_signals != 0)
1620       break;  /* Event loop should cycle now so don't poll again. */
1621 
1622     if (nevents != 0) {
1623       if (nfds == ARRAY_SIZE(events) && --count != 0) {
1624         /* Poll for more events but don't block this time. */
1625         timeout = 0;
1626         continue;
1627       }
1628       break;
1629     }
1630 
1631 update_timeout:
1632     if (timeout == 0)
1633       break;
1634 
1635     if (timeout == -1)
1636       continue;
1637 
1638     assert(timeout > 0);
1639 
1640     real_timeout -= (loop->time - base);
1641     if (real_timeout <= 0)
1642       break;
1643 
1644     timeout = real_timeout;
1645   }
1646 
1647   if (ctl->ringfd != -1)
1648     while (*ctl->sqhead != *ctl->sqtail)
1649       uv__epoll_ctl_flush(epollfd, ctl, &prep);
1650 }
1651 
uv__hrtime(uv_clocktype_t type)1652 uint64_t uv__hrtime(uv_clocktype_t type) {
1653   static _Atomic clock_t fast_clock_id = -1;
1654   struct timespec t;
1655   clock_t clock_id;
1656 
1657   /* Prefer CLOCK_MONOTONIC_COARSE if available but only when it has
1658    * millisecond granularity or better.  CLOCK_MONOTONIC_COARSE is
1659    * serviced entirely from the vDSO, whereas CLOCK_MONOTONIC may
1660    * decide to make a costly system call.
1661    */
1662   /* TODO(bnoordhuis) Use CLOCK_MONOTONIC_COARSE for UV_CLOCK_PRECISE
1663    * when it has microsecond granularity or better (unlikely).
1664    */
1665   clock_id = CLOCK_MONOTONIC;
1666   if (type != UV_CLOCK_FAST)
1667     goto done;
1668 
1669   clock_id = atomic_load_explicit(&fast_clock_id, memory_order_relaxed);
1670   if (clock_id != -1)
1671     goto done;
1672 
1673   clock_id = CLOCK_MONOTONIC;
1674   if (0 == clock_getres(CLOCK_MONOTONIC_COARSE, &t))
1675     if (t.tv_nsec <= 1 * 1000 * 1000)
1676       clock_id = CLOCK_MONOTONIC_COARSE;
1677 
1678   atomic_store_explicit(&fast_clock_id, clock_id, memory_order_relaxed);
1679 
1680 done:
1681 
1682   if (clock_gettime(clock_id, &t))
1683     return 0;  /* Not really possible. */
1684 
1685   return t.tv_sec * (uint64_t) 1e9 + t.tv_nsec;
1686 }
1687 
1688 
uv_resident_set_memory(size_t * rss)1689 int uv_resident_set_memory(size_t* rss) {
1690   char buf[1024];
1691   const char* s;
1692   ssize_t n;
1693   long val;
1694   int fd;
1695   int i;
1696 
1697   do
1698     fd = open("/proc/self/stat", O_RDONLY);
1699   while (fd == -1 && errno == EINTR);
1700 
1701   if (fd == -1)
1702     return UV__ERR(errno);
1703 
1704   do
1705     n = read(fd, buf, sizeof(buf) - 1);
1706   while (n == -1 && errno == EINTR);
1707 
1708   uv__close(fd);
1709   if (n == -1)
1710     return UV__ERR(errno);
1711   buf[n] = '\0';
1712 
1713   s = strchr(buf, ' ');
1714   if (s == NULL)
1715     goto err;
1716 
1717   s += 1;
1718   if (*s != '(')
1719     goto err;
1720 
1721   s = strchr(s, ')');
1722   if (s == NULL)
1723     goto err;
1724 
1725   for (i = 1; i <= 22; i++) {
1726     s = strchr(s + 1, ' ');
1727     if (s == NULL)
1728       goto err;
1729   }
1730 
1731   errno = 0;
1732   val = strtol(s, NULL, 10);
1733   if (errno != 0)
1734     goto err;
1735   if (val < 0)
1736     goto err;
1737 
1738   *rss = val * getpagesize();
1739   return 0;
1740 
1741 err:
1742   return UV_EINVAL;
1743 }
1744 
uv_uptime(double * uptime)1745 int uv_uptime(double* uptime) {
1746   struct timespec now;
1747   char buf[128];
1748 
1749   /* Consult /proc/uptime when present (common case), or fall back to
1750    * clock_gettime. Why not always clock_gettime? It doesn't always return the
1751    * right result under OpenVZ and possibly other containerized environments.
1752    */
1753   if (0 == uv__slurp("/proc/uptime", buf, sizeof(buf)))
1754     if (1 == sscanf(buf, "%lf", uptime))
1755       return 0;
1756 
1757   if (clock_gettime(CLOCK_BOOTTIME, &now))
1758     return UV__ERR(errno);
1759 
1760   *uptime = now.tv_sec;
1761   return 0;
1762 }
1763 
1764 
uv_cpu_info(uv_cpu_info_t ** ci,int * count)1765 int uv_cpu_info(uv_cpu_info_t** ci, int* count) {
1766 #if defined(__PPC__)
1767   static const char model_marker[] = "cpu\t\t: ";
1768 #elif defined(__arm__)
1769   static const char model_marker[] = "Processor\t: ";
1770 #elif defined(__aarch64__)
1771   static const char model_marker[] = "CPU part\t: ";
1772 #elif defined(__mips__)
1773   static const char model_marker[] = "cpu model\t\t: ";
1774 #elif defined(__loongarch__)
1775   static const char model_marker[] = "cpu family\t\t: ";
1776 #else
1777   static const char model_marker[] = "model name\t: ";
1778 #endif
1779   static const char parts[] =
1780 #ifdef __aarch64__
1781     "0x811\nARM810\n"       "0x920\nARM920\n"      "0x922\nARM922\n"
1782     "0x926\nARM926\n"       "0x940\nARM940\n"      "0x946\nARM946\n"
1783     "0x966\nARM966\n"       "0xa20\nARM1020\n"      "0xa22\nARM1022\n"
1784     "0xa26\nARM1026\n"      "0xb02\nARM11 MPCore\n" "0xb36\nARM1136\n"
1785     "0xb56\nARM1156\n"      "0xb76\nARM1176\n"      "0xc05\nCortex-A5\n"
1786     "0xc07\nCortex-A7\n"    "0xc08\nCortex-A8\n"    "0xc09\nCortex-A9\n"
1787     "0xc0d\nCortex-A17\n"   /* Originally A12 */
1788     "0xc0f\nCortex-A15\n"   "0xc0e\nCortex-A17\n"   "0xc14\nCortex-R4\n"
1789     "0xc15\nCortex-R5\n"    "0xc17\nCortex-R7\n"    "0xc18\nCortex-R8\n"
1790     "0xc20\nCortex-M0\n"    "0xc21\nCortex-M1\n"    "0xc23\nCortex-M3\n"
1791     "0xc24\nCortex-M4\n"    "0xc27\nCortex-M7\n"    "0xc60\nCortex-M0+\n"
1792     "0xd01\nCortex-A32\n"   "0xd03\nCortex-A53\n"   "0xd04\nCortex-A35\n"
1793     "0xd05\nCortex-A55\n"   "0xd06\nCortex-A65\n"   "0xd07\nCortex-A57\n"
1794     "0xd08\nCortex-A72\n"   "0xd09\nCortex-A73\n"   "0xd0a\nCortex-A75\n"
1795     "0xd0b\nCortex-A76\n"   "0xd0c\nNeoverse-N1\n"  "0xd0d\nCortex-A77\n"
1796     "0xd0e\nCortex-A76AE\n" "0xd13\nCortex-R52\n"   "0xd20\nCortex-M23\n"
1797     "0xd21\nCortex-M33\n"   "0xd41\nCortex-A78\n"   "0xd42\nCortex-A78AE\n"
1798     "0xd4a\nNeoverse-E1\n"  "0xd4b\nCortex-A78C\n"
1799 #endif
1800     "";
1801   struct cpu {
1802     unsigned long long freq, user, nice, sys, idle, irq;
1803     unsigned model;
1804   };
1805   FILE* fp;
1806   char* p;
1807   int found;
1808   int n;
1809   unsigned i;
1810   unsigned cpu;
1811   unsigned maxcpu;
1812   unsigned size;
1813   unsigned long long skip;
1814   struct cpu (*cpus)[8192];  /* Kernel maximum. */
1815   struct cpu* c;
1816   struct cpu t;
1817   char (*model)[64];
1818   unsigned char bitmap[ARRAY_SIZE(*cpus) / 8];
1819   /* Assumption: even big.LITTLE systems will have only a handful
1820    * of different CPU models. Most systems will just have one.
1821    */
1822   char models[8][64];
1823   char buf[1024];
1824 
1825   memset(bitmap, 0, sizeof(bitmap));
1826   memset(models, 0, sizeof(models));
1827   snprintf(*models, sizeof(*models), "unknown");
1828   maxcpu = 0;
1829 
1830   cpus = uv__calloc(ARRAY_SIZE(*cpus), sizeof(**cpus));
1831   if (cpus == NULL)
1832     return UV_ENOMEM;
1833 
1834   fp = uv__open_file("/proc/stat");
1835   if (fp == NULL) {
1836     uv__free(cpus);
1837     return UV__ERR(errno);
1838   }
1839 
1840   if (NULL == fgets(buf, sizeof(buf), fp))
1841     abort();
1842 
1843   for (;;) {
1844     memset(&t, 0, sizeof(t));
1845 
1846     n = fscanf(fp, "cpu%u %llu %llu %llu %llu %llu %llu",
1847                &cpu, &t.user, &t.nice, &t.sys, &t.idle, &skip, &t.irq);
1848 
1849     if (n != 7)
1850       break;
1851 
1852     if (NULL == fgets(buf, sizeof(buf), fp))
1853       abort();
1854 
1855     if (cpu >= ARRAY_SIZE(*cpus))
1856       continue;
1857 
1858     (*cpus)[cpu] = t;
1859 
1860     bitmap[cpu >> 3] |= 1 << (cpu & 7);
1861 
1862     if (cpu >= maxcpu)
1863       maxcpu = cpu + 1;
1864   }
1865 
1866   fclose(fp);
1867 
1868   fp = uv__open_file("/proc/cpuinfo");
1869   if (fp == NULL)
1870     goto nocpuinfo;
1871 
1872   for (;;) {
1873     if (1 != fscanf(fp, "processor\t: %u\n", &cpu))
1874       break;  /* Parse error. */
1875 
1876     found = 0;
1877     while (!found && fgets(buf, sizeof(buf), fp))
1878       found = !strncmp(buf, model_marker, sizeof(model_marker) - 1);
1879 
1880     if (!found)
1881       goto next;
1882 
1883     p = buf + sizeof(model_marker) - 1;
1884     n = (int) strcspn(p, "\n");
1885 
1886     /* arm64: translate CPU part code to model name. */
1887     if (*parts) {
1888       p = memmem(parts, sizeof(parts) - 1, p, n + 1);
1889       if (p == NULL)
1890         p = "unknown";
1891       else
1892         p += n + 1;
1893       n = (int) strcspn(p, "\n");
1894     }
1895 
1896     found = 0;
1897     for (model = models; !found && model < ARRAY_END(models); model++)
1898       found = !strncmp(p, *model, strlen(*model));
1899 
1900     if (!found)
1901       goto next;
1902 
1903     if (**model == '\0')
1904       snprintf(*model, sizeof(*model), "%.*s", n, p);
1905 
1906     if (cpu < maxcpu)
1907       (*cpus)[cpu].model = model - models;
1908 
1909 next:
1910     while (fgets(buf, sizeof(buf), fp))
1911       if (*buf == '\n')
1912         break;
1913   }
1914 
1915   fclose(fp);
1916   fp = NULL;
1917 
1918 nocpuinfo:
1919 
1920   n = 0;
1921   for (cpu = 0; cpu < maxcpu; cpu++) {
1922     if (!(bitmap[cpu >> 3] & (1 << (cpu & 7))))
1923       continue;
1924 
1925     n++;
1926     snprintf(buf, sizeof(buf),
1927              "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq", cpu);
1928 
1929     fp = uv__open_file(buf);
1930     if (fp == NULL)
1931       continue;
1932 
1933     if (1 != fscanf(fp, "%llu", &(*cpus)[cpu].freq))
1934       abort();
1935     fclose(fp);
1936     fp = NULL;
1937   }
1938 
1939   size = n * sizeof(**ci) + sizeof(models);
1940   *ci = uv__malloc(size);
1941   *count = 0;
1942 
1943   if (*ci == NULL) {
1944     uv__free(cpus);
1945     return UV_ENOMEM;
1946   }
1947 
1948   *count = n;
1949   p = memcpy(*ci + n, models, sizeof(models));
1950 
1951   i = 0;
1952   for (cpu = 0; cpu < maxcpu; cpu++) {
1953     if (!(bitmap[cpu >> 3] & (1 << (cpu & 7))))
1954       continue;
1955 
1956     c = *cpus + cpu;
1957 
1958     (*ci)[i++] = (uv_cpu_info_t) {
1959       .model     = p + c->model * sizeof(*model),
1960       .speed     = c->freq / 1000,
1961       /* Note: sysconf(_SC_CLK_TCK) is fixed at 100 Hz,
1962        * therefore the multiplier is always 1000/100 = 10.
1963        */
1964       .cpu_times = (struct uv_cpu_times_s) {
1965         .user = 10 * c->user,
1966         .nice = 10 * c->nice,
1967         .sys  = 10 * c->sys,
1968         .idle = 10 * c->idle,
1969         .irq  = 10 * c->irq,
1970       },
1971     };
1972   }
1973 
1974   uv__free(cpus);
1975 
1976   return 0;
1977 }
1978 
1979 
uv__ifaddr_exclude(struct ifaddrs * ent,int exclude_type)1980 static int uv__ifaddr_exclude(struct ifaddrs *ent, int exclude_type) {
1981   if (!((ent->ifa_flags & IFF_UP) && (ent->ifa_flags & IFF_RUNNING)))
1982     return 1;
1983   if (ent->ifa_addr == NULL)
1984     return 1;
1985   /*
1986    * On Linux getifaddrs returns information related to the raw underlying
1987    * devices. We're not interested in this information yet.
1988    */
1989   if (ent->ifa_addr->sa_family == PF_PACKET)
1990     return exclude_type;
1991   return !exclude_type;
1992 }
1993 
uv_interface_addresses(uv_interface_address_t ** addresses,int * count)1994 int uv_interface_addresses(uv_interface_address_t** addresses, int* count) {
1995   struct ifaddrs *addrs, *ent;
1996   uv_interface_address_t* address;
1997   int i;
1998   struct sockaddr_ll *sll;
1999 
2000   *count = 0;
2001   *addresses = NULL;
2002 
2003   if (getifaddrs(&addrs))
2004     return UV__ERR(errno);
2005 
2006   /* Count the number of interfaces */
2007   for (ent = addrs; ent != NULL; ent = ent->ifa_next) {
2008     if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFADDR))
2009       continue;
2010 
2011     (*count)++;
2012   }
2013 
2014   if (*count == 0) {
2015     freeifaddrs(addrs);
2016     return 0;
2017   }
2018 
2019   /* Make sure the memory is initiallized to zero using calloc() */
2020   *addresses = uv__calloc(*count, sizeof(**addresses));
2021   if (!(*addresses)) {
2022     freeifaddrs(addrs);
2023     return UV_ENOMEM;
2024   }
2025 
2026   address = *addresses;
2027 
2028   for (ent = addrs; ent != NULL; ent = ent->ifa_next) {
2029     if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFADDR))
2030       continue;
2031 
2032     address->name = uv__strdup(ent->ifa_name);
2033 
2034     if (ent->ifa_addr->sa_family == AF_INET6) {
2035       address->address.address6 = *((struct sockaddr_in6*) ent->ifa_addr);
2036     } else {
2037       address->address.address4 = *((struct sockaddr_in*) ent->ifa_addr);
2038     }
2039 
2040     if (ent->ifa_netmask->sa_family == AF_INET6) {
2041       address->netmask.netmask6 = *((struct sockaddr_in6*) ent->ifa_netmask);
2042     } else {
2043       address->netmask.netmask4 = *((struct sockaddr_in*) ent->ifa_netmask);
2044     }
2045 
2046     address->is_internal = !!(ent->ifa_flags & IFF_LOOPBACK);
2047 
2048     address++;
2049   }
2050 
2051   /* Fill in physical addresses for each interface */
2052   for (ent = addrs; ent != NULL; ent = ent->ifa_next) {
2053     if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFPHYS))
2054       continue;
2055 
2056     address = *addresses;
2057 
2058     for (i = 0; i < (*count); i++) {
2059       size_t namelen = strlen(ent->ifa_name);
2060       /* Alias interface share the same physical address */
2061       if (strncmp(address->name, ent->ifa_name, namelen) == 0 &&
2062           (address->name[namelen] == 0 || address->name[namelen] == ':')) {
2063         sll = (struct sockaddr_ll*)ent->ifa_addr;
2064         memcpy(address->phys_addr, sll->sll_addr, sizeof(address->phys_addr));
2065       }
2066       address++;
2067     }
2068   }
2069 
2070   freeifaddrs(addrs);
2071 
2072   return 0;
2073 }
2074 
2075 
uv_free_interface_addresses(uv_interface_address_t * addresses,int count)2076 void uv_free_interface_addresses(uv_interface_address_t* addresses,
2077   int count) {
2078   int i;
2079 
2080   for (i = 0; i < count; i++) {
2081     uv__free(addresses[i].name);
2082   }
2083 
2084   uv__free(addresses);
2085 }
2086 
2087 
uv__set_process_title(const char * title)2088 void uv__set_process_title(const char* title) {
2089 #if defined(PR_SET_NAME)
2090   prctl(PR_SET_NAME, title);  /* Only copies first 16 characters. */
2091 #endif
2092 }
2093 
2094 
uv__read_proc_meminfo(const char * what)2095 static uint64_t uv__read_proc_meminfo(const char* what) {
2096   uint64_t rc;
2097   char* p;
2098   char buf[4096];  /* Large enough to hold all of /proc/meminfo. */
2099 
2100   if (uv__slurp("/proc/meminfo", buf, sizeof(buf)))
2101     return 0;
2102 
2103   p = strstr(buf, what);
2104 
2105   if (p == NULL)
2106     return 0;
2107 
2108   p += strlen(what);
2109 
2110   rc = 0;
2111   sscanf(p, "%" PRIu64 " kB", &rc);
2112 
2113   return rc * 1024;
2114 }
2115 
2116 
uv_get_free_memory(void)2117 uint64_t uv_get_free_memory(void) {
2118   struct sysinfo info;
2119   uint64_t rc;
2120 
2121   rc = uv__read_proc_meminfo("MemAvailable:");
2122 
2123   if (rc != 0)
2124     return rc;
2125 
2126   if (0 == sysinfo(&info))
2127     return (uint64_t) info.freeram * info.mem_unit;
2128 
2129   return 0;
2130 }
2131 
2132 
uv_get_total_memory(void)2133 uint64_t uv_get_total_memory(void) {
2134   struct sysinfo info;
2135   uint64_t rc;
2136 
2137   rc = uv__read_proc_meminfo("MemTotal:");
2138 
2139   if (rc != 0)
2140     return rc;
2141 
2142   if (0 == sysinfo(&info))
2143     return (uint64_t) info.totalram * info.mem_unit;
2144 
2145   return 0;
2146 }
2147 
2148 
uv__read_uint64(const char * filename)2149 static uint64_t uv__read_uint64(const char* filename) {
2150   char buf[32];  /* Large enough to hold an encoded uint64_t. */
2151   uint64_t rc;
2152 
2153   rc = 0;
2154   if (0 == uv__slurp(filename, buf, sizeof(buf)))
2155     if (1 != sscanf(buf, "%" PRIu64, &rc))
2156       if (0 == strcmp(buf, "max\n"))
2157         rc = UINT64_MAX;
2158 
2159   return rc;
2160 }
2161 
2162 
2163 /* Given a buffer with the contents of a cgroup1 /proc/self/cgroups,
2164  * finds the location and length of the memory controller mount path.
2165  * This disregards the leading / for easy concatenation of paths.
2166  * Returns NULL if the memory controller wasn't found. */
uv__cgroup1_find_memory_controller(char buf[static1024],int * n)2167 static char* uv__cgroup1_find_memory_controller(char buf[static 1024],
2168                                                 int* n) {
2169   char* p;
2170 
2171   /* Seek to the memory controller line. */
2172   p = strchr(buf, ':');
2173   while (p != NULL && strncmp(p, ":memory:", 8)) {
2174     p = strchr(p, '\n');
2175     if (p != NULL)
2176       p = strchr(p, ':');
2177   }
2178 
2179   if (p != NULL) {
2180     /* Determine the length of the mount path. */
2181     p = p + strlen(":memory:/");
2182     *n = (int) strcspn(p, "\n");
2183   }
2184 
2185   return p;
2186 }
2187 
uv__get_cgroup1_memory_limits(char buf[static1024],uint64_t * high,uint64_t * max)2188 static void uv__get_cgroup1_memory_limits(char buf[static 1024], uint64_t* high,
2189                                           uint64_t* max) {
2190   char filename[4097];
2191   char* p;
2192   int n;
2193   uint64_t cgroup1_max;
2194 
2195   /* Find out where the controller is mounted. */
2196   p = uv__cgroup1_find_memory_controller(buf, &n);
2197   if (p != NULL) {
2198     snprintf(filename, sizeof(filename),
2199              "/sys/fs/cgroup/memory/%.*s/memory.soft_limit_in_bytes", n, p);
2200     *high = uv__read_uint64(filename);
2201 
2202     snprintf(filename, sizeof(filename),
2203              "/sys/fs/cgroup/memory/%.*s/memory.limit_in_bytes", n, p);
2204     *max = uv__read_uint64(filename);
2205 
2206     /* If the controller wasn't mounted, the reads above will have failed,
2207      * as indicated by uv__read_uint64 returning 0.
2208      */
2209      if (*high != 0 && *max != 0)
2210        goto update_limits;
2211   }
2212 
2213   /* Fall back to the limits of the global memory controller. */
2214   *high = uv__read_uint64("/sys/fs/cgroup/memory/memory.soft_limit_in_bytes");
2215   *max = uv__read_uint64("/sys/fs/cgroup/memory/memory.limit_in_bytes");
2216 
2217   /* uv__read_uint64 detects cgroup2's "max", so we need to separately detect
2218    * cgroup1's maximum value (which is derived from LONG_MAX and PAGE_SIZE).
2219    */
2220 update_limits:
2221   cgroup1_max = LONG_MAX & ~(sysconf(_SC_PAGESIZE) - 1);
2222   if (*high == cgroup1_max)
2223     *high = UINT64_MAX;
2224   if (*max == cgroup1_max)
2225     *max = UINT64_MAX;
2226 }
2227 
uv__get_cgroup2_memory_limits(char buf[static1024],uint64_t * high,uint64_t * max)2228 static void uv__get_cgroup2_memory_limits(char buf[static 1024], uint64_t* high,
2229                                           uint64_t* max) {
2230   char filename[4097];
2231   char* p;
2232   int n;
2233 
2234   /* Find out where the controller is mounted. */
2235   p = buf + strlen("0::/");
2236   n = (int) strcspn(p, "\n");
2237 
2238   /* Read the memory limits of the controller. */
2239   snprintf(filename, sizeof(filename), "/sys/fs/cgroup/%.*s/memory.max", n, p);
2240   *max = uv__read_uint64(filename);
2241   snprintf(filename, sizeof(filename), "/sys/fs/cgroup/%.*s/memory.high", n, p);
2242   *high = uv__read_uint64(filename);
2243 }
2244 
uv__get_cgroup_constrained_memory(char buf[static1024])2245 static uint64_t uv__get_cgroup_constrained_memory(char buf[static 1024]) {
2246   uint64_t high;
2247   uint64_t max;
2248 
2249   /* In the case of cgroupv2, we'll only have a single entry. */
2250   if (strncmp(buf, "0::/", 4))
2251     uv__get_cgroup1_memory_limits(buf, &high, &max);
2252   else
2253     uv__get_cgroup2_memory_limits(buf, &high, &max);
2254 
2255   if (high == 0 || max == 0)
2256     return 0;
2257 
2258   return high < max ? high : max;
2259 }
2260 
uv_get_constrained_memory(void)2261 uint64_t uv_get_constrained_memory(void) {
2262   char buf[1024];
2263 
2264   if (uv__slurp("/proc/self/cgroup", buf, sizeof(buf)))
2265     return 0;
2266 
2267   return uv__get_cgroup_constrained_memory(buf);
2268 }
2269 
2270 
uv__get_cgroup1_current_memory(char buf[static1024])2271 static uint64_t uv__get_cgroup1_current_memory(char buf[static 1024]) {
2272   char filename[4097];
2273   uint64_t current;
2274   char* p;
2275   int n;
2276 
2277   /* Find out where the controller is mounted. */
2278   p = uv__cgroup1_find_memory_controller(buf, &n);
2279   if (p != NULL) {
2280     snprintf(filename, sizeof(filename),
2281             "/sys/fs/cgroup/memory/%.*s/memory.usage_in_bytes", n, p);
2282     current = uv__read_uint64(filename);
2283 
2284     /* If the controller wasn't mounted, the reads above will have failed,
2285      * as indicated by uv__read_uint64 returning 0.
2286      */
2287     if (current != 0)
2288       return current;
2289   }
2290 
2291   /* Fall back to the usage of the global memory controller. */
2292   return uv__read_uint64("/sys/fs/cgroup/memory/memory.usage_in_bytes");
2293 }
2294 
uv__get_cgroup2_current_memory(char buf[static1024])2295 static uint64_t uv__get_cgroup2_current_memory(char buf[static 1024]) {
2296   char filename[4097];
2297   char* p;
2298   int n;
2299 
2300   /* Find out where the controller is mounted. */
2301   p = buf + strlen("0::/");
2302   n = (int) strcspn(p, "\n");
2303 
2304   snprintf(filename, sizeof(filename),
2305            "/sys/fs/cgroup/%.*s/memory.current", n, p);
2306   return uv__read_uint64(filename);
2307 }
2308 
uv_get_available_memory(void)2309 uint64_t uv_get_available_memory(void) {
2310   char buf[1024];
2311   uint64_t constrained;
2312   uint64_t current;
2313   uint64_t total;
2314 
2315   if (uv__slurp("/proc/self/cgroup", buf, sizeof(buf)))
2316     return 0;
2317 
2318   constrained = uv__get_cgroup_constrained_memory(buf);
2319   if (constrained == 0)
2320     return uv_get_free_memory();
2321 
2322   total = uv_get_total_memory();
2323   if (constrained > total)
2324     return uv_get_free_memory();
2325 
2326   /* In the case of cgroupv2, we'll only have a single entry. */
2327   if (strncmp(buf, "0::/", 4))
2328     current = uv__get_cgroup1_current_memory(buf);
2329   else
2330     current = uv__get_cgroup2_current_memory(buf);
2331 
2332   /* memory usage can be higher than the limit (for short bursts of time) */
2333   if (constrained < current)
2334     return 0;
2335 
2336   return constrained - current;
2337 }
2338 
2339 
uv_loadavg(double avg[3])2340 void uv_loadavg(double avg[3]) {
2341   struct sysinfo info;
2342   char buf[128];  /* Large enough to hold all of /proc/loadavg. */
2343 
2344   if (0 == uv__slurp("/proc/loadavg", buf, sizeof(buf)))
2345     if (3 == sscanf(buf, "%lf %lf %lf", &avg[0], &avg[1], &avg[2]))
2346       return;
2347 
2348   if (sysinfo(&info) < 0)
2349     return;
2350 
2351   avg[0] = (double) info.loads[0] / 65536.0;
2352   avg[1] = (double) info.loads[1] / 65536.0;
2353   avg[2] = (double) info.loads[2] / 65536.0;
2354 }
2355 
2356 
compare_watchers(const struct watcher_list * a,const struct watcher_list * b)2357 static int compare_watchers(const struct watcher_list* a,
2358                             const struct watcher_list* b) {
2359   if (a->wd < b->wd) return -1;
2360   if (a->wd > b->wd) return 1;
2361   return 0;
2362 }
2363 
2364 
init_inotify(uv_loop_t * loop)2365 static int init_inotify(uv_loop_t* loop) {
2366   int fd;
2367 
2368   if (loop->inotify_fd != -1)
2369     return 0;
2370 
2371   fd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC);
2372   if (fd < 0)
2373     return UV__ERR(errno);
2374 
2375   loop->inotify_fd = fd;
2376   uv__io_init(&loop->inotify_read_watcher, uv__inotify_read, loop->inotify_fd);
2377   uv__io_start(loop, &loop->inotify_read_watcher, POLLIN);
2378 
2379   return 0;
2380 }
2381 
2382 
uv__inotify_fork(uv_loop_t * loop,struct watcher_list * root)2383 static int uv__inotify_fork(uv_loop_t* loop, struct watcher_list* root) {
2384   /* Open the inotify_fd, and re-arm all the inotify watchers. */
2385   int err;
2386   struct watcher_list* tmp_watcher_list_iter;
2387   struct watcher_list* watcher_list;
2388   struct watcher_list tmp_watcher_list;
2389   struct uv__queue queue;
2390   struct uv__queue* q;
2391   uv_fs_event_t* handle;
2392   char* tmp_path;
2393 
2394   if (root == NULL)
2395     return 0;
2396 
2397   /* We must restore the old watcher list to be able to close items
2398    * out of it.
2399    */
2400   loop->inotify_watchers = root;
2401 
2402   uv__queue_init(&tmp_watcher_list.watchers);
2403   /* Note that the queue we use is shared with the start and stop()
2404    * functions, making uv__queue_foreach unsafe to use. So we use the
2405    * uv__queue_move trick to safely iterate. Also don't free the watcher
2406    * list until we're done iterating. c.f. uv__inotify_read.
2407    */
2408   RB_FOREACH_SAFE(watcher_list, watcher_root,
2409                   uv__inotify_watchers(loop), tmp_watcher_list_iter) {
2410     watcher_list->iterating = 1;
2411     uv__queue_move(&watcher_list->watchers, &queue);
2412     while (!uv__queue_empty(&queue)) {
2413       q = uv__queue_head(&queue);
2414       handle = uv__queue_data(q, uv_fs_event_t, watchers);
2415       /* It's critical to keep a copy of path here, because it
2416        * will be set to NULL by stop() and then deallocated by
2417        * maybe_free_watcher_list
2418        */
2419       tmp_path = uv__strdup(handle->path);
2420       assert(tmp_path != NULL);
2421       uv__queue_remove(q);
2422       uv__queue_insert_tail(&watcher_list->watchers, q);
2423       uv_fs_event_stop(handle);
2424 
2425       uv__queue_insert_tail(&tmp_watcher_list.watchers, &handle->watchers);
2426       handle->path = tmp_path;
2427     }
2428     watcher_list->iterating = 0;
2429     maybe_free_watcher_list(watcher_list, loop);
2430   }
2431 
2432   uv__queue_move(&tmp_watcher_list.watchers, &queue);
2433   while (!uv__queue_empty(&queue)) {
2434       q = uv__queue_head(&queue);
2435       uv__queue_remove(q);
2436       handle = uv__queue_data(q, uv_fs_event_t, watchers);
2437       tmp_path = handle->path;
2438       handle->path = NULL;
2439       err = uv_fs_event_start(handle, handle->cb, tmp_path, 0);
2440       uv__free(tmp_path);
2441       if (err)
2442         return err;
2443   }
2444 
2445   return 0;
2446 }
2447 
2448 
find_watcher(uv_loop_t * loop,int wd)2449 static struct watcher_list* find_watcher(uv_loop_t* loop, int wd) {
2450   struct watcher_list w;
2451   w.wd = wd;
2452   return RB_FIND(watcher_root, uv__inotify_watchers(loop), &w);
2453 }
2454 
2455 
maybe_free_watcher_list(struct watcher_list * w,uv_loop_t * loop)2456 static void maybe_free_watcher_list(struct watcher_list* w, uv_loop_t* loop) {
2457   /* if the watcher_list->watchers is being iterated over, we can't free it. */
2458   if ((!w->iterating) && uv__queue_empty(&w->watchers)) {
2459     /* No watchers left for this path. Clean up. */
2460     RB_REMOVE(watcher_root, uv__inotify_watchers(loop), w);
2461     inotify_rm_watch(loop->inotify_fd, w->wd);
2462     uv__free(w);
2463   }
2464 }
2465 
2466 
uv__inotify_read(uv_loop_t * loop,uv__io_t * dummy,unsigned int events)2467 static void uv__inotify_read(uv_loop_t* loop,
2468                              uv__io_t* dummy,
2469                              unsigned int events) {
2470   const struct inotify_event* e;
2471   struct watcher_list* w;
2472   uv_fs_event_t* h;
2473   struct uv__queue queue;
2474   struct uv__queue* q;
2475   const char* path;
2476   ssize_t size;
2477   const char *p;
2478   /* needs to be large enough for sizeof(inotify_event) + strlen(path) */
2479   char buf[4096];
2480 
2481   for (;;) {
2482     do
2483       size = read(loop->inotify_fd, buf, sizeof(buf));
2484     while (size == -1 && errno == EINTR);
2485 
2486     if (size == -1) {
2487       assert(errno == EAGAIN || errno == EWOULDBLOCK);
2488       break;
2489     }
2490 
2491     assert(size > 0); /* pre-2.6.21 thing, size=0 == read buffer too small */
2492 
2493     /* Now we have one or more inotify_event structs. */
2494     for (p = buf; p < buf + size; p += sizeof(*e) + e->len) {
2495       e = (const struct inotify_event*) p;
2496 
2497       events = 0;
2498       if (e->mask & (IN_ATTRIB|IN_MODIFY))
2499         events |= UV_CHANGE;
2500       if (e->mask & ~(IN_ATTRIB|IN_MODIFY))
2501         events |= UV_RENAME;
2502 
2503       w = find_watcher(loop, e->wd);
2504       if (w == NULL)
2505         continue; /* Stale event, no watchers left. */
2506 
2507       /* inotify does not return the filename when monitoring a single file
2508        * for modifications. Repurpose the filename for API compatibility.
2509        * I'm not convinced this is a good thing, maybe it should go.
2510        */
2511       path = e->len ? (const char*) (e + 1) : uv__basename_r(w->path);
2512 
2513       /* We're about to iterate over the queue and call user's callbacks.
2514        * What can go wrong?
2515        * A callback could call uv_fs_event_stop()
2516        * and the queue can change under our feet.
2517        * So, we use uv__queue_move() trick to safely iterate over the queue.
2518        * And we don't free the watcher_list until we're done iterating.
2519        *
2520        * First,
2521        * tell uv_fs_event_stop() (that could be called from a user's callback)
2522        * not to free watcher_list.
2523        */
2524       w->iterating = 1;
2525       uv__queue_move(&w->watchers, &queue);
2526       while (!uv__queue_empty(&queue)) {
2527         q = uv__queue_head(&queue);
2528         h = uv__queue_data(q, uv_fs_event_t, watchers);
2529 
2530         uv__queue_remove(q);
2531         uv__queue_insert_tail(&w->watchers, q);
2532 
2533         h->cb(h, path, events, 0);
2534       }
2535       /* done iterating, time to (maybe) free empty watcher_list */
2536       w->iterating = 0;
2537       maybe_free_watcher_list(w, loop);
2538     }
2539   }
2540 }
2541 
2542 
uv_fs_event_init(uv_loop_t * loop,uv_fs_event_t * handle)2543 int uv_fs_event_init(uv_loop_t* loop, uv_fs_event_t* handle) {
2544   uv__handle_init(loop, (uv_handle_t*)handle, UV_FS_EVENT);
2545   return 0;
2546 }
2547 
2548 
uv_fs_event_start(uv_fs_event_t * handle,uv_fs_event_cb cb,const char * path,unsigned int flags)2549 int uv_fs_event_start(uv_fs_event_t* handle,
2550                       uv_fs_event_cb cb,
2551                       const char* path,
2552                       unsigned int flags) {
2553   struct watcher_list* w;
2554   uv_loop_t* loop;
2555   size_t len;
2556   int events;
2557   int err;
2558   int wd;
2559 
2560   if (uv__is_active(handle))
2561     return UV_EINVAL;
2562 
2563   loop = handle->loop;
2564 
2565   err = init_inotify(loop);
2566   if (err)
2567     return err;
2568 
2569   events = IN_ATTRIB
2570          | IN_CREATE
2571          | IN_MODIFY
2572          | IN_DELETE
2573          | IN_DELETE_SELF
2574          | IN_MOVE_SELF
2575          | IN_MOVED_FROM
2576          | IN_MOVED_TO;
2577 
2578   wd = inotify_add_watch(loop->inotify_fd, path, events);
2579   if (wd == -1)
2580     return UV__ERR(errno);
2581 
2582   w = find_watcher(loop, wd);
2583   if (w)
2584     goto no_insert;
2585 
2586   len = strlen(path) + 1;
2587   w = uv__malloc(sizeof(*w) + len);
2588   if (w == NULL)
2589     return UV_ENOMEM;
2590 
2591   w->wd = wd;
2592   w->path = memcpy(w + 1, path, len);
2593   uv__queue_init(&w->watchers);
2594   w->iterating = 0;
2595   RB_INSERT(watcher_root, uv__inotify_watchers(loop), w);
2596 
2597 no_insert:
2598   uv__handle_start(handle);
2599   uv__queue_insert_tail(&w->watchers, &handle->watchers);
2600   handle->path = w->path;
2601   handle->cb = cb;
2602   handle->wd = wd;
2603 
2604   return 0;
2605 }
2606 
2607 
uv_fs_event_stop(uv_fs_event_t * handle)2608 int uv_fs_event_stop(uv_fs_event_t* handle) {
2609   struct watcher_list* w;
2610 
2611   if (!uv__is_active(handle))
2612     return 0;
2613 
2614   w = find_watcher(handle->loop, handle->wd);
2615   assert(w != NULL);
2616 
2617   handle->wd = -1;
2618   handle->path = NULL;
2619   uv__handle_stop(handle);
2620   uv__queue_remove(&handle->watchers);
2621 
2622   maybe_free_watcher_list(w, handle->loop);
2623 
2624   return 0;
2625 }
2626 
2627 
uv__fs_event_close(uv_fs_event_t * handle)2628 void uv__fs_event_close(uv_fs_event_t* handle) {
2629   uv_fs_event_stop(handle);
2630 }
2631