1 /* MIT License
2 *
3 * Copyright (c) 2024 Brad House
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * SPDX-License-Identifier: MIT
25 */
26
27 /* Uses an anonymous union */
28 #if defined(__clang__) || defined(__GNUC__)
29 # pragma GCC diagnostic push
30 # if defined(__clang__)
31 # pragma GCC diagnostic ignored "-Wc11-extensions"
32 # else
33 # pragma GCC diagnostic ignored "-Wpedantic"
34 # endif
35 #endif
36
37 #include "ares_private.h"
38 #include "ares_event.h"
39 #include "ares_event_win32.h"
40
41
42 #if defined(USE_WINSOCK) && defined(CARES_THREADS)
43
44 #ifdef HAVE_LIMITS_H
45 # include <limits.h>
46 #endif
47
48 /* IMPLEMENTATION NOTES
49 * ====================
50 *
51 * This implementation uses some undocumented functionality within Windows for
52 * monitoring sockets. The Ancillary Function Driver (AFD) is the low level
53 * implementation that Winsock2 sits on top of. Winsock2 unfortunately does
54 * not expose the equivalent of epoll() or kqueue(), but it is possible to
55 * access AFD directly and use along with IOCP to simulate the functionality.
56 * We want to use IOCP if possible as it gives us the ability to monitor more
57 * than just sockets (WSAPoll is not an option), and perform arbitrary callbacks
58 * which means we can hook in non-socket related events.
59 *
60 * The information for this implementation was gathered from "wepoll" and
61 * "libuv" which both use slight variants on this. We originally went with
62 * an implementation methodology more similar to "libuv", but we had a few
63 * user reports of crashes during shutdown and memory leaks due to some
64 * events not being delivered for cleanup of closed sockets.
65 *
66 * Initialization:
67 * 1. Dynamically load the NtDeviceIoControlFile, NtCreateFile, and
68 * NtCancelIoFileEx internal symbols from ntdll.dll. (Don't believe
69 * Microsoft's documentation for NtCancelIoFileEx as it documents an
70 * invalid prototype). These functions are to open a reference to the
71 * Ancillary Function Driver (AFD), and to submit and cancel POLL
72 * requests.
73 * 2. Create an IO Completion Port base handle via CreateIoCompletionPort()
74 * that all socket events will be delivered through.
75 * 3. Create a list of AFD Handles and track the number of poll requests
76 * per AFD handle. When we exceed a pre-determined limit of poll requests
77 * for a handle (128), we will automatically create a new handle. The
78 * reason behind this is NtCancelIoFileEx uses a horrible algorithm for
79 * issuing cancellations. See:
80 * https://github.com/python-trio/trio/issues/52#issuecomment-548215128
81 * 4. Create a callback to be used to be able to interrupt waiting for IOCP
82 * events, this may be called for allowing enqueuing of additional socket
83 * events or removing socket events. PostQueuedCompletionStatus() is the
84 * obvious choice. We can use the same container format, the event
85 * delivered won't have an OVERLAPPED pointer so we can differentiate from
86 * socket events. Use the container as the completion key.
87 *
88 * Socket Add:
89 * 1. Create/Allocate a container for holding metadata about a socket
90 * including:
91 * - SOCKET base_socket;
92 * - IO_STATUS_BLOCK iosb; -- Used by AFD POLL, returned as OVERLAPPED
93 * - AFD_POLL_INFO afd_poll_info; -- Used by AFD POLL
94 * - afd list node -- for tracking which AFD handle a POLL request was
95 * submitted to.
96 * 2. Call WSAIoctl(..., SIO_BASE_HANDLE, ...) to unwrap the SOCKET and get
97 * the "base socket" we can use for polling. It appears this may fail so
98 * we should call WSAIoctl(..., SIO_BSP_HANDLE_POLL, ...) as a fallback.
99 * 3. Submit AFD POLL request (see "AFD POLL Request" section)
100 * 4. Record a mapping between the "IO Status Block" and the socket container
101 * so when events are delivered we can dereference.
102 *
103 * Socket Delete:
104 * 1. Call
105 * NtCancelIoFileEx(afd, iosb, &temp_iosb);
106 * to cancel any pending operations.
107 * 2. Tag the socket container as being queued for deletion
108 * 3. Wait for an event to be delivered for the socket (cancel isn't
109 * immediate, it delivers an event to know its complete). Delete only once
110 * that event has been delivered. If we don't do this we could try to
111 * access free()'d memory at a later point.
112 *
113 * Socket Modify:
114 * 1. Call
115 * NtCancelIoFileEx(afd, iosb, &temp_iosb)
116 * to cancel any pending operation.
117 * 2. When the event comes through that the cancel is complete, enqueue
118 * another "AFD Poll Request" for the desired events.
119 *
120 * Event Wait:
121 * 1. Call GetQueuedCompletionStatusEx() with the base IOCP handle, a
122 * stack allocated array of OVERLAPPED_ENTRY's, and an appropriate
123 * timeout.
124 * 2. Iterate across returned events, if the lpOverlapped is NULL, then the
125 * the CompletionKey is a pointer to the container registered via
126 * PostQueuedCompletionStatus(), otherwise it is the "IO Status Block"
127 * registered with the "AFD Poll Request" which needs to be dereferenced
128 * to the "socket container".
129 * 3. If it is a "socket container", disassociate it from the afd list node
130 * it was previously submitted to.
131 * 4. If it is a "socket container" check to see if we are cleaning up, if so,
132 * clean it up.
133 * 5. If it is a "socket container" that is still valid, Submit an
134 * AFD POLL Request (see "AFD POLL Request"). We must re-enable the request
135 * each time we receive a response, it is not persistent.
136 * 6. Notify of any events received as indicated in the AFD_POLL_INFO
137 * Handles[0].Events (NOTE: check NumberOfHandles > 0, and the status in
138 * the IO_STATUS_BLOCK. If we received an AFD_POLL_LOCAL_CLOSE, clean up
139 * the connection like the integrator requested it to be cleaned up.
140 *
141 * AFD Poll Request:
142 * 1. Find an afd poll handle in the list that has fewer pending requests than
143 * the limit.
144 * 2. If an afd poll handle was not associated (e.g. due to all being over
145 * limit), create a new afd poll handle by calling NtCreateFile()
146 * with path \Device\Afd , then add the AFD handle to the IO Completion
147 * Port. We can leave the completion key as blank since events for
148 * multiple sockets will be delivered through this and we need to
149 * differentiate via the OVERLAPPED member returned. Add the new AFD
150 * handle to the list of handles.
151 * 3. Initialize the AFD_POLL_INFO structure:
152 * Exclusive = FALSE; // allow multiple requests
153 * NumberOfHandles = 1;
154 * Timeout.QuadPart = LLONG_MAX;
155 * Handles[0].Handle = (HANDLE)base_socket;
156 * Handles[0].Status = 0;
157 * Handles[0].Events = AFD_POLL_LOCAL_CLOSE + additional events to wait for
158 * such as AFD_POLL_RECEIVE, etc;
159 * 4. Zero out the IO_STATUS_BLOCK structures
160 * 5. Set the "Status" member of IO_STATUS_BLOCK to STATUS_PENDING
161 * 6. Call
162 * NtDeviceIoControlFile(afd, NULL, NULL, &iosb,
163 * &iosb, IOCTL_AFD_POLL
164 * &afd_poll_info, sizeof(afd_poll_info),
165 * &afd_poll_info, sizeof(afd_poll_info));
166 *
167 *
168 * References:
169 * - https://github.com/piscisaureus/wepoll/
170 * - https://github.com/libuv/libuv/
171 */
172
173 /* Cap the number of outstanding AFD poll requests per AFD handle due to known
174 * slowdowns with large lists and NtCancelIoFileEx() */
175 # define AFD_POLL_PER_HANDLE 128
176
177 # include <stdarg.h>
178
179 /* # define CARES_DEBUG 1 */
180
181 # ifdef __GNUC__
182 # define CARES_PRINTF_LIKE(fmt, args) \
183 __attribute__((format(printf, fmt, args)))
184 # else
185 # define CARES_PRINTF_LIKE(fmt, args)
186 # endif
187
188 static void CARES_DEBUG_LOG(const char *fmt, ...) CARES_PRINTF_LIKE(1, 2);
189
CARES_DEBUG_LOG(const char * fmt,...)190 static void CARES_DEBUG_LOG(const char *fmt, ...)
191 {
192 va_list ap;
193
194 va_start(ap, fmt);
195 # ifdef CARES_DEBUG
196 vfprintf(stderr, fmt, ap);
197 fflush(stderr);
198 # endif
199 va_end(ap);
200 }
201
202 typedef struct {
203 /* Dynamically loaded symbols */
204 NtCreateFile_t NtCreateFile;
205 NtDeviceIoControlFile_t NtDeviceIoControlFile;
206 NtCancelIoFileEx_t NtCancelIoFileEx;
207
208 /* Implementation details */
209 ares_slist_t *afd_handles;
210 HANDLE iocp_handle;
211
212 /* IO_STATUS_BLOCK * -> ares_evsys_win32_eventdata_t * mapping. There is
213 * no completion key passed to IOCP with this method so we have to look
214 * up based on the lpOverlapped returned (which is mapped to IO_STATUS_BLOCK)
215 */
216 ares_htable_vpvp_t *sockets;
217
218 /* Flag about whether or not we are shutting down */
219 ares_bool_t is_shutdown;
220 } ares_evsys_win32_t;
221
222 typedef enum {
223 POLL_STATUS_NONE = 0,
224 POLL_STATUS_PENDING = 1,
225 POLL_STATUS_CANCEL = 2,
226 POLL_STATUS_DESTROY = 3
227 } poll_status_t;
228
229 typedef struct {
230 /*! Pointer to parent event container */
231 ares_event_t *event;
232 /*! Socket passed in to monitor */
233 SOCKET socket;
234 /*! Base socket derived from provided socket */
235 SOCKET base_socket;
236 /*! Structure for submitting AFD POLL requests (Internals!) */
237 AFD_POLL_INFO afd_poll_info;
238 /*! Status of current polling operation */
239 poll_status_t poll_status;
240 /*! IO Status Block structure submitted with AFD POLL requests and returned
241 * with IOCP results as lpOverlapped (even though its a different structure)
242 */
243 IO_STATUS_BLOCK iosb;
244 /*! AFD handle node an outstanding poll request is associated with */
245 ares_slist_node_t *afd_handle_node;
246 /* Lock is only for PostQueuedCompletionStatus() to prevent multiple
247 * signals. Tracking via POLL_STATUS_PENDING/POLL_STATUS_NONE */
248 ares_thread_mutex_t *lock;
249 } ares_evsys_win32_eventdata_t;
250
251 static size_t ares_evsys_win32_wait(ares_event_thread_t *e,
252 unsigned long timeout_ms);
253
ares_iocpevent_signal(const ares_event_t * event)254 static void ares_iocpevent_signal(const ares_event_t *event)
255 {
256 ares_event_thread_t *e = event->e;
257 ares_evsys_win32_t *ew = e->ev_sys_data;
258 ares_evsys_win32_eventdata_t *ed = event->data;
259 ares_bool_t queue_event = ARES_FALSE;
260
261 ares_thread_mutex_lock(ed->lock);
262 if (ed->poll_status != POLL_STATUS_PENDING) {
263 ed->poll_status = POLL_STATUS_PENDING;
264 queue_event = ARES_TRUE;
265 }
266 ares_thread_mutex_unlock(ed->lock);
267
268 if (!queue_event) {
269 return;
270 }
271
272 PostQueuedCompletionStatus(ew->iocp_handle, 0, (ULONG_PTR)event->data, NULL);
273 }
274
ares_iocpevent_cb(ares_event_thread_t * e,ares_socket_t fd,void * data,ares_event_flags_t flags)275 static void ares_iocpevent_cb(ares_event_thread_t *e, ares_socket_t fd,
276 void *data, ares_event_flags_t flags)
277 {
278 ares_evsys_win32_eventdata_t *ed = data;
279 (void)e;
280 (void)fd;
281 (void)flags;
282 ares_thread_mutex_lock(ed->lock);
283 ed->poll_status = POLL_STATUS_NONE;
284 ares_thread_mutex_unlock(ed->lock);
285 }
286
ares_iocpevent_create(ares_event_thread_t * e)287 static ares_event_t *ares_iocpevent_create(ares_event_thread_t *e)
288 {
289 ares_event_t *event = NULL;
290 ares_status_t status;
291
292 status =
293 ares_event_update(&event, e, ARES_EVENT_FLAG_OTHER, ares_iocpevent_cb,
294 ARES_SOCKET_BAD, NULL, NULL, ares_iocpevent_signal);
295 if (status != ARES_SUCCESS) {
296 return NULL;
297 }
298
299 return event;
300 }
301
ares_evsys_win32_destroy(ares_event_thread_t * e)302 static void ares_evsys_win32_destroy(ares_event_thread_t *e)
303 {
304 ares_evsys_win32_t *ew = NULL;
305
306 if (e == NULL) {
307 return;
308 }
309
310 CARES_DEBUG_LOG("** Win32 Event Destroy\n");
311
312 ew = e->ev_sys_data;
313 if (ew == NULL) {
314 return;
315 }
316
317 ew->is_shutdown = ARES_TRUE;
318 CARES_DEBUG_LOG(" ** waiting on %lu remaining sockets to be destroyed\n",
319 (unsigned long)ares_htable_vpvp_num_keys(ew->sockets));
320 while (ares_htable_vpvp_num_keys(ew->sockets)) {
321 ares_evsys_win32_wait(e, 0);
322 }
323 CARES_DEBUG_LOG(" ** all sockets cleaned up\n");
324
325
326 if (ew->iocp_handle != NULL) {
327 CloseHandle(ew->iocp_handle);
328 }
329
330 ares_slist_destroy(ew->afd_handles);
331
332 ares_htable_vpvp_destroy(ew->sockets);
333
334 ares_free(ew);
335 e->ev_sys_data = NULL;
336 }
337
338 typedef struct {
339 size_t poll_cnt;
340 HANDLE afd_handle;
341 } ares_afd_handle_t;
342
ares_afd_handle_destroy(void * arg)343 static void ares_afd_handle_destroy(void *arg)
344 {
345 ares_afd_handle_t *hnd = arg;
346 if (hnd != NULL && hnd->afd_handle != NULL) {
347 CloseHandle(hnd->afd_handle);
348 }
349 ares_free(hnd);
350 }
351
ares_afd_handle_cmp(const void * data1,const void * data2)352 static int ares_afd_handle_cmp(const void *data1, const void *data2)
353 {
354 const ares_afd_handle_t *hnd1 = data1;
355 const ares_afd_handle_t *hnd2 = data2;
356
357 if (hnd1->poll_cnt > hnd2->poll_cnt) {
358 return 1;
359 }
360 if (hnd1->poll_cnt < hnd2->poll_cnt) {
361 return -1;
362 }
363 return 0;
364 }
365
fill_object_attributes(OBJECT_ATTRIBUTES * attr,UNICODE_STRING * name,ULONG attributes)366 static void fill_object_attributes(OBJECT_ATTRIBUTES *attr,
367 UNICODE_STRING *name, ULONG attributes)
368 {
369 memset(attr, 0, sizeof(*attr));
370 attr->Length = sizeof(*attr);
371 attr->ObjectName = name;
372 attr->Attributes = attributes;
373 }
374
375 # define UNICODE_STRING_CONSTANT(s) \
376 { (sizeof(s) - 1) * sizeof(wchar_t), sizeof(s) * sizeof(wchar_t), L##s }
377
ares_afd_handle_create(ares_evsys_win32_t * ew)378 static ares_slist_node_t *ares_afd_handle_create(ares_evsys_win32_t *ew)
379 {
380 UNICODE_STRING afd_device_name = UNICODE_STRING_CONSTANT("\\Device\\Afd");
381 OBJECT_ATTRIBUTES afd_attributes;
382 NTSTATUS status;
383 IO_STATUS_BLOCK iosb;
384 ares_afd_handle_t *afd = ares_malloc_zero(sizeof(*afd));
385 ares_slist_node_t *node = NULL;
386 if (afd == NULL) {
387 goto fail;
388 }
389
390 /* Open a handle to the AFD subsystem */
391 fill_object_attributes(&afd_attributes, &afd_device_name, 0);
392 memset(&iosb, 0, sizeof(iosb));
393 iosb.Status = STATUS_PENDING;
394 status = ew->NtCreateFile(&afd->afd_handle, SYNCHRONIZE, &afd_attributes,
395 &iosb, NULL, 0, FILE_SHARE_READ | FILE_SHARE_WRITE,
396 FILE_OPEN, 0, NULL, 0);
397 if (status != STATUS_SUCCESS) {
398 CARES_DEBUG_LOG("** Failed to create AFD endpoint\n");
399 goto fail;
400 }
401
402 if (CreateIoCompletionPort(afd->afd_handle, ew->iocp_handle,
403 0 /* CompletionKey */, 0) == NULL) {
404 goto fail;
405 }
406
407 if (!SetFileCompletionNotificationModes(afd->afd_handle,
408 FILE_SKIP_SET_EVENT_ON_HANDLE)) {
409 goto fail;
410 }
411
412 node = ares_slist_insert(ew->afd_handles, afd);
413 if (node == NULL) {
414 goto fail;
415 }
416
417 return node;
418
419 fail:
420
421 ares_afd_handle_destroy(afd);
422 return NULL;
423 }
424
425 /* Fetch the lowest poll count entry, but if it exceeds the limit, create a
426 * new one and return that */
ares_afd_handle_fetch(ares_evsys_win32_t * ew)427 static ares_slist_node_t *ares_afd_handle_fetch(ares_evsys_win32_t *ew)
428 {
429 ares_slist_node_t *node = ares_slist_node_first(ew->afd_handles);
430 ares_afd_handle_t *afd = ares_slist_node_val(node);
431
432 if (afd != NULL && afd->poll_cnt < AFD_POLL_PER_HANDLE) {
433 return node;
434 }
435
436 return ares_afd_handle_create(ew);
437 }
438
ares_evsys_win32_init(ares_event_thread_t * e)439 static ares_bool_t ares_evsys_win32_init(ares_event_thread_t *e)
440 {
441 ares_evsys_win32_t *ew = NULL;
442 HMODULE ntdll;
443
444 CARES_DEBUG_LOG("** Win32 Event Init\n");
445
446 ew = ares_malloc_zero(sizeof(*ew));
447 if (ew == NULL) {
448 return ARES_FALSE;
449 }
450
451 e->ev_sys_data = ew;
452
453 /* All apps should have ntdll.dll already loaded, so just get a handle to
454 * this */
455 ntdll = GetModuleHandleA("ntdll.dll");
456 if (ntdll == NULL) {
457 goto fail;
458 }
459
460 # ifdef __GNUC__
461 # pragma GCC diagnostic push
462 # pragma GCC diagnostic ignored "-Wpedantic"
463 /* Without the (void *) cast we get:
464 * warning: cast between incompatible function types from 'FARPROC' {aka 'long
465 * long int (*)()'} to 'NTSTATUS (*)(...)'} [-Wcast-function-type] but with it
466 * we get: warning: ISO C forbids conversion of function pointer to object
467 * pointer type [-Wpedantic] look unsolvable short of killing the warning.
468 */
469 # endif
470
471 /* Load Internal symbols not typically accessible */
472 ew->NtCreateFile =
473 (NtCreateFile_t)(void *)GetProcAddress(ntdll, "NtCreateFile");
474 ew->NtDeviceIoControlFile = (NtDeviceIoControlFile_t)(void *)GetProcAddress(
475 ntdll, "NtDeviceIoControlFile");
476 ew->NtCancelIoFileEx =
477 (NtCancelIoFileEx_t)(void *)GetProcAddress(ntdll, "NtCancelIoFileEx");
478
479 # ifdef __GNUC__
480 # pragma GCC diagnostic pop
481 # endif
482
483 if (ew->NtCreateFile == NULL || ew->NtCancelIoFileEx == NULL ||
484 ew->NtDeviceIoControlFile == NULL) {
485 goto fail;
486 }
487
488 ew->iocp_handle = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
489 if (ew->iocp_handle == NULL) {
490 goto fail;
491 }
492
493 ew->afd_handles = ares_slist_create(
494 e->channel->rand_state, ares_afd_handle_cmp, ares_afd_handle_destroy);
495 if (ew->afd_handles == NULL) {
496 goto fail;
497 }
498
499 /* Create at least the first afd handle, so we know of any critical system
500 * issues during startup */
501 if (ares_afd_handle_create(ew) == NULL) {
502 goto fail;
503 }
504
505 e->ev_signal = ares_iocpevent_create(e);
506 if (e->ev_signal == NULL) {
507 goto fail;
508 }
509
510 ew->sockets = ares_htable_vpvp_create(NULL, NULL);
511 if (ew->sockets == NULL) {
512 goto fail;
513 }
514
515 return ARES_TRUE;
516
517 fail:
518 ares_evsys_win32_destroy(e);
519 return ARES_FALSE;
520 }
521
ares_evsys_win32_basesocket(ares_socket_t socket)522 static ares_socket_t ares_evsys_win32_basesocket(ares_socket_t socket)
523 {
524 while (1) {
525 DWORD bytes; /* Not used */
526 ares_socket_t base_socket = ARES_SOCKET_BAD;
527 int rv;
528
529 rv = WSAIoctl(socket, SIO_BASE_HANDLE, NULL, 0, &base_socket,
530 sizeof(base_socket), &bytes, NULL, NULL);
531 if (rv != SOCKET_ERROR && base_socket != ARES_SOCKET_BAD) {
532 socket = base_socket;
533 break;
534 }
535
536 /* If we're here, an error occurred */
537 if (GetLastError() == WSAENOTSOCK) {
538 /* This is critical, exit */
539 return ARES_SOCKET_BAD;
540 }
541
542 /* Work around known bug in Komodia based LSPs, use ARES_BSP_HANDLE_POLL
543 * to retrieve the underlying socket to then loop and get the base socket:
544 * https://docs.microsoft.com/en-us/windows/win32/winsock/winsock-ioctls
545 * https://www.komodia.com/newwiki/index.php?title=Komodia%27s_Redirector_bug_fixes#Version_2.2.2.6
546 */
547 base_socket = ARES_SOCKET_BAD;
548 rv = WSAIoctl(socket, SIO_BSP_HANDLE_POLL, NULL, 0, &base_socket,
549 sizeof(base_socket), &bytes, NULL, NULL);
550
551 if (rv != SOCKET_ERROR && base_socket != ARES_SOCKET_BAD &&
552 base_socket != socket) {
553 socket = base_socket;
554 continue; /* loop! */
555 }
556
557 return ARES_SOCKET_BAD;
558 }
559
560 return socket;
561 }
562
ares_evsys_win32_afd_enqueue(ares_event_t * event,ares_event_flags_t flags)563 static ares_bool_t ares_evsys_win32_afd_enqueue(ares_event_t *event,
564 ares_event_flags_t flags)
565 {
566 ares_event_thread_t *e = event->e;
567 ares_evsys_win32_t *ew = e->ev_sys_data;
568 ares_evsys_win32_eventdata_t *ed = event->data;
569 ares_afd_handle_t *afd;
570 NTSTATUS status;
571
572 if (e == NULL || ed == NULL || ew == NULL) {
573 return ARES_FALSE;
574 }
575
576 /* Misuse */
577 if (ed->poll_status != POLL_STATUS_NONE) {
578 return ARES_FALSE;
579 }
580
581 ed->afd_handle_node = ares_afd_handle_fetch(ew);
582 /* System resource issue? */
583 if (ed->afd_handle_node == NULL) {
584 return ARES_FALSE;
585 }
586
587 afd = ares_slist_node_val(ed->afd_handle_node);
588
589 /* Enqueue AFD Poll */
590 ed->afd_poll_info.Exclusive = FALSE;
591 ed->afd_poll_info.NumberOfHandles = 1;
592 ed->afd_poll_info.Timeout.QuadPart = LLONG_MAX;
593 ed->afd_poll_info.Handles[0].Handle = (HANDLE)ed->base_socket;
594 ed->afd_poll_info.Handles[0].Status = 0;
595 ed->afd_poll_info.Handles[0].Events = AFD_POLL_LOCAL_CLOSE;
596
597 if (flags & ARES_EVENT_FLAG_READ) {
598 ed->afd_poll_info.Handles[0].Events |=
599 (AFD_POLL_RECEIVE | AFD_POLL_DISCONNECT | AFD_POLL_ACCEPT |
600 AFD_POLL_ABORT);
601 }
602 if (flags & ARES_EVENT_FLAG_WRITE) {
603 ed->afd_poll_info.Handles[0].Events |=
604 (AFD_POLL_SEND | AFD_POLL_CONNECT_FAIL);
605 }
606 if (flags == 0) {
607 ed->afd_poll_info.Handles[0].Events |= AFD_POLL_DISCONNECT;
608 }
609
610 memset(&ed->iosb, 0, sizeof(ed->iosb));
611 ed->iosb.Status = STATUS_PENDING;
612
613 status = ew->NtDeviceIoControlFile(
614 afd->afd_handle, NULL, NULL, &ed->iosb, &ed->iosb, IOCTL_AFD_POLL,
615 &ed->afd_poll_info, sizeof(ed->afd_poll_info), &ed->afd_poll_info,
616 sizeof(ed->afd_poll_info));
617 if (status != STATUS_SUCCESS && status != STATUS_PENDING) {
618 CARES_DEBUG_LOG("** afd_enqueue ed=%p FAILED\n", (void *)ed);
619 ed->afd_handle_node = NULL;
620 return ARES_FALSE;
621 }
622
623 /* Record that we submitted a poll request to this handle and tell it to
624 * re-sort the node since we changed its sort value */
625 afd->poll_cnt++;
626 ares_slist_node_reinsert(ed->afd_handle_node);
627
628 ed->poll_status = POLL_STATUS_PENDING;
629 CARES_DEBUG_LOG("++ afd_enqueue ed=%p flags=%X\n", (void *)ed,
630 (unsigned int)flags);
631 return ARES_TRUE;
632 }
633
ares_evsys_win32_afd_cancel(ares_evsys_win32_eventdata_t * ed)634 static ares_bool_t ares_evsys_win32_afd_cancel(ares_evsys_win32_eventdata_t *ed)
635 {
636 IO_STATUS_BLOCK cancel_iosb;
637 ares_evsys_win32_t *ew;
638 NTSTATUS status;
639 ares_afd_handle_t *afd;
640
641 ew = ed->event->e->ev_sys_data;
642
643 /* Misuse */
644 if (ed->poll_status != POLL_STATUS_PENDING) {
645 return ARES_FALSE;
646 }
647
648 afd = ares_slist_node_val(ed->afd_handle_node);
649
650 /* Misuse */
651 if (afd == NULL) {
652 return ARES_FALSE;
653 }
654
655 ed->poll_status = POLL_STATUS_CANCEL;
656
657 /* Not pending, nothing to do. Most likely that means there is a pending
658 * event that hasn't yet been delivered otherwise it would be re-armed
659 * already */
660 if (ed->iosb.Status != STATUS_PENDING) {
661 CARES_DEBUG_LOG("** cancel not needed for ed=%p\n", (void *)ed);
662 return ARES_FALSE;
663 }
664
665 status = ew->NtCancelIoFileEx(afd->afd_handle, &ed->iosb, &cancel_iosb);
666
667 CARES_DEBUG_LOG("** Enqueued cancel for ed=%p, status = %lX\n", (void *)ed,
668 status);
669
670 /* NtCancelIoFileEx() may return STATUS_NOT_FOUND if the operation completed
671 * just before calling NtCancelIoFileEx(), but we have not yet received the
672 * notification (but it should be queued for the next IOCP event). */
673 if (status == STATUS_SUCCESS || status == STATUS_NOT_FOUND) {
674 return ARES_TRUE;
675 }
676
677 return ARES_FALSE;
678 }
679
ares_evsys_win32_eventdata_destroy(ares_evsys_win32_t * ew,ares_evsys_win32_eventdata_t * ed)680 static void ares_evsys_win32_eventdata_destroy(ares_evsys_win32_t *ew,
681 ares_evsys_win32_eventdata_t *ed)
682 {
683 if (ew == NULL || ed == NULL) {
684 return;
685 }
686 CARES_DEBUG_LOG("-- deleting ed=%p (%s)\n", (void *)ed,
687 (ed->socket == ARES_SOCKET_BAD) ? "data" : "socket");
688 /* These type of handles are deferred destroy. Update tracking. */
689 if (ed->socket != ARES_SOCKET_BAD) {
690 ares_htable_vpvp_remove(ew->sockets, &ed->iosb);
691 }
692
693 ares_thread_mutex_destroy(ed->lock);
694
695 if (ed->event != NULL) {
696 ed->event->data = NULL;
697 }
698
699 ares_free(ed);
700 }
701
ares_evsys_win32_event_add(ares_event_t * event)702 static ares_bool_t ares_evsys_win32_event_add(ares_event_t *event)
703 {
704 ares_event_thread_t *e = event->e;
705 ares_evsys_win32_t *ew = e->ev_sys_data;
706 ares_evsys_win32_eventdata_t *ed;
707 ares_bool_t rc = ARES_FALSE;
708
709 ed = ares_malloc_zero(sizeof(*ed));
710 ed->event = event;
711 ed->socket = event->fd;
712 ed->base_socket = ARES_SOCKET_BAD;
713 event->data = ed;
714
715 CARES_DEBUG_LOG("++ add ed=%p (%s) flags=%X\n", (void *)ed,
716 (ed->socket == ARES_SOCKET_BAD) ? "data" : "socket",
717 (unsigned int)event->flags);
718
719 /* Likely a signal event, not something we will directly handle. We create
720 * the ares_evsys_win32_eventdata_t as the placeholder to use as the
721 * IOCP Completion Key */
722 if (ed->socket == ARES_SOCKET_BAD) {
723 ed->lock = ares_thread_mutex_create();
724 if (ed->lock == NULL) {
725 goto done;
726 }
727 rc = ARES_TRUE;
728 goto done;
729 }
730
731 ed->base_socket = ares_evsys_win32_basesocket(ed->socket);
732 if (ed->base_socket == ARES_SOCKET_BAD) {
733 goto done;
734 }
735
736 if (!ares_htable_vpvp_insert(ew->sockets, &ed->iosb, ed)) {
737 goto done;
738 }
739
740 if (!ares_evsys_win32_afd_enqueue(event, event->flags)) {
741 goto done;
742 }
743
744 rc = ARES_TRUE;
745
746 done:
747 if (!rc) {
748 ares_evsys_win32_eventdata_destroy(ew, ed);
749 event->data = NULL;
750 }
751 return rc;
752 }
753
ares_evsys_win32_event_del(ares_event_t * event)754 static void ares_evsys_win32_event_del(ares_event_t *event)
755 {
756 ares_evsys_win32_eventdata_t *ed = event->data;
757
758 /* Already cleaned up, likely a LOCAL_CLOSE */
759 if (ed == NULL) {
760 return;
761 }
762
763 CARES_DEBUG_LOG("-- DELETE requested for ed=%p (%s)\n", (void *)ed,
764 (ed->socket != ARES_SOCKET_BAD) ? "socket" : "data");
765
766 /*
767 * Cancel pending AFD Poll operation.
768 */
769 if (ed->socket != ARES_SOCKET_BAD) {
770 ares_evsys_win32_afd_cancel(ed);
771 ed->poll_status = POLL_STATUS_DESTROY;
772 ed->event = NULL;
773 } else {
774 ares_evsys_win32_eventdata_destroy(event->e->ev_sys_data, ed);
775 }
776
777 event->data = NULL;
778 }
779
ares_evsys_win32_event_mod(ares_event_t * event,ares_event_flags_t new_flags)780 static void ares_evsys_win32_event_mod(ares_event_t *event,
781 ares_event_flags_t new_flags)
782 {
783 ares_evsys_win32_eventdata_t *ed = event->data;
784
785 /* Not for us */
786 if (event->fd == ARES_SOCKET_BAD || ed == NULL) {
787 return;
788 }
789
790 CARES_DEBUG_LOG("** mod ed=%p new_flags=%X\n", (void *)ed,
791 (unsigned int)new_flags);
792
793 /* All we need to do is cancel the pending operation. When the event gets
794 * delivered for the cancellation, it will automatically re-enqueue a new
795 * event */
796 ares_evsys_win32_afd_cancel(ed);
797 }
798
ares_evsys_win32_process_other_event(ares_evsys_win32_t * ew,ares_evsys_win32_eventdata_t * ed,size_t i)799 static ares_bool_t ares_evsys_win32_process_other_event(
800 ares_evsys_win32_t *ew, ares_evsys_win32_eventdata_t *ed, size_t i)
801 {
802 ares_event_t *event;
803
804 /* NOTE: do NOT dereference 'ed' if during shutdown as this could be an
805 * invalid pointer if the signal handle was cleaned up, but there was still a
806 * pending event! */
807
808 if (ew->is_shutdown) {
809 CARES_DEBUG_LOG("\t\t** i=%lu, skip non-socket handle during shutdown\n",
810 (unsigned long)i);
811 return ARES_FALSE;
812 }
813
814 event = ed->event;
815 CARES_DEBUG_LOG("\t\t** i=%lu, ed=%p (data)\n", (unsigned long)i, (void *)ed);
816
817 event->cb(event->e, event->fd, event->data, ARES_EVENT_FLAG_OTHER);
818 return ARES_TRUE;
819 }
820
ares_evsys_win32_process_socket_event(ares_evsys_win32_t * ew,ares_evsys_win32_eventdata_t * ed,size_t i)821 static ares_bool_t ares_evsys_win32_process_socket_event(
822 ares_evsys_win32_t *ew, ares_evsys_win32_eventdata_t *ed, size_t i)
823 {
824 ares_event_flags_t flags = 0;
825 ares_event_t *event = NULL;
826 ares_afd_handle_t *afd = NULL;
827
828 /* Shouldn't be possible */
829 if (ed == NULL) {
830 CARES_DEBUG_LOG("\t\t** i=%lu, Invalid handle.\n", (unsigned long)i);
831 return ARES_FALSE;
832 }
833
834 event = ed->event;
835
836 CARES_DEBUG_LOG("\t\t** i=%lu, ed=%p (socket)\n", (unsigned long)i,
837 (void *)ed);
838
839 /* Process events */
840 if (ed->poll_status == POLL_STATUS_PENDING &&
841 ed->iosb.Status == STATUS_SUCCESS &&
842 ed->afd_poll_info.NumberOfHandles > 0) {
843 if (ed->afd_poll_info.Handles[0].Events &
844 (AFD_POLL_RECEIVE | AFD_POLL_DISCONNECT | AFD_POLL_ACCEPT |
845 AFD_POLL_ABORT)) {
846 flags |= ARES_EVENT_FLAG_READ;
847 }
848 if (ed->afd_poll_info.Handles[0].Events &
849 (AFD_POLL_SEND | AFD_POLL_CONNECT_FAIL)) {
850 flags |= ARES_EVENT_FLAG_WRITE;
851 }
852 if (ed->afd_poll_info.Handles[0].Events & AFD_POLL_LOCAL_CLOSE) {
853 CARES_DEBUG_LOG("\t\t** ed=%p LOCAL CLOSE\n", (void *)ed);
854 ed->poll_status = POLL_STATUS_DESTROY;
855 }
856 }
857
858 CARES_DEBUG_LOG("\t\t** ed=%p, iosb status=%lX, poll_status=%d, flags=%X\n",
859 (void *)ed, (unsigned long)ed->iosb.Status,
860 (int)ed->poll_status, (unsigned int)flags);
861
862 /* Decrement poll count for AFD handle then resort, also disassociate
863 * with socket */
864 afd = ares_slist_node_val(ed->afd_handle_node);
865 afd->poll_cnt--;
866 ares_slist_node_reinsert(ed->afd_handle_node);
867 ed->afd_handle_node = NULL;
868
869 /* Pending destroy, go ahead and kill it */
870 if (ed->poll_status == POLL_STATUS_DESTROY) {
871 ares_evsys_win32_eventdata_destroy(ew, ed);
872 return ARES_FALSE;
873 }
874
875 ed->poll_status = POLL_STATUS_NONE;
876
877 /* Mask flags against current desired flags. We could have an event
878 * queued that is outdated. */
879 flags &= event->flags;
880
881 /* Don't actually do anything with the event that was delivered as we are
882 * in a shutdown/cleanup process. Mostly just handling the delayed
883 * destruction of sockets */
884 if (ew->is_shutdown) {
885 return ARES_FALSE;
886 }
887
888 /* Re-enqueue so we can get more events on the socket, we either
889 * received a real event, or a cancellation notice. Both cases we
890 * re-queue using the current configured event flags.
891 *
892 * If we can't re-enqueue, that likely means the socket has been
893 * closed, so we want to kill our reference to it
894 */
895 if (!ares_evsys_win32_afd_enqueue(event, event->flags)) {
896 ares_evsys_win32_eventdata_destroy(ew, ed);
897 return ARES_FALSE;
898 }
899
900 /* No events we recognize to deliver */
901 if (flags == 0) {
902 return ARES_FALSE;
903 }
904
905 event->cb(event->e, event->fd, event->data, flags);
906 return ARES_TRUE;
907 }
908
ares_evsys_win32_wait(ares_event_thread_t * e,unsigned long timeout_ms)909 static size_t ares_evsys_win32_wait(ares_event_thread_t *e,
910 unsigned long timeout_ms)
911 {
912 ares_evsys_win32_t *ew = e->ev_sys_data;
913 OVERLAPPED_ENTRY entries[16];
914 ULONG maxentries = sizeof(entries) / sizeof(*entries);
915 ULONG nentries;
916 BOOL status;
917 size_t i;
918 size_t cnt = 0;
919 DWORD tout = (timeout_ms == 0) ? INFINITE : (DWORD)timeout_ms;
920
921 CARES_DEBUG_LOG("** Wait Enter\n");
922 /* Process in a loop for as long as it fills the entire entries buffer, and
923 * on subsequent attempts, ensure the timeout is 0 */
924 do {
925 nentries = maxentries;
926 status = GetQueuedCompletionStatusEx(ew->iocp_handle, entries, nentries,
927 &nentries, tout, FALSE);
928
929 /* Next loop around, we want to return instantly if there are no events to
930 * be processed */
931 tout = 0;
932
933 if (!status) {
934 break;
935 }
936
937 CARES_DEBUG_LOG("\t** GetQueuedCompletionStatusEx returned %lu entries\n",
938 (unsigned long)nentries);
939 for (i = 0; i < (size_t)nentries; i++) {
940 ares_evsys_win32_eventdata_t *ed = NULL;
941 ares_bool_t rc;
942
943 /* For things triggered via PostQueuedCompletionStatus() we have an
944 * lpCompletionKey we can just use. Otherwise we need to dereference the
945 * pointer returned in lpOverlapped to determine the referenced
946 * socket */
947 if (entries[i].lpCompletionKey) {
948 ed = (ares_evsys_win32_eventdata_t *)entries[i].lpCompletionKey;
949 rc = ares_evsys_win32_process_other_event(ew, ed, i);
950 } else {
951 ed = ares_htable_vpvp_get_direct(ew->sockets, entries[i].lpOverlapped);
952 rc = ares_evsys_win32_process_socket_event(ew, ed, i);
953 }
954
955 /* We processed actual events */
956 if (rc) {
957 cnt++;
958 }
959 }
960 } while (nentries == maxentries);
961
962 CARES_DEBUG_LOG("** Wait Exit\n");
963
964 return cnt;
965 }
966
967 const ares_event_sys_t ares_evsys_win32 = { "win32",
968 ares_evsys_win32_init,
969 ares_evsys_win32_destroy,
970 ares_evsys_win32_event_add,
971 ares_evsys_win32_event_del,
972 ares_evsys_win32_event_mod,
973 ares_evsys_win32_wait };
974 #endif
975
976 #if defined(__clang__) || defined(__GNUC__)
977 # pragma GCC diagnostic pop
978 #endif
979