1 /*
2 * Submitted by David Pacheco (dp.spambait@gmail.com)
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. The name of the author may not be used to endorse or promote products
13 * derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY SUN MICROSYSTEMS, INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY
19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /*
28 * Copyright (c) 2007 Sun Microsystems. All rights reserved.
29 * Use is subject to license terms.
30 */
31
32 /*
33 * evport.c: event backend using Solaris 10 event ports. See port_create(3C).
34 * This implementation is loosely modeled after the one used for select(2) (in
35 * select.c).
36 *
37 * The outstanding events are tracked in a data structure called evport_data.
38 * Each entry in the ed_fds array corresponds to a file descriptor, and contains
39 * pointers to the read and write events that correspond to that fd. (That is,
40 * when the file is readable, the "read" event should handle it, etc.)
41 *
42 * evport_add and evport_del update this data structure. evport_dispatch uses it
43 * to determine where to callback when an event occurs (which it gets from
44 * port_getn).
45 *
46 * Helper functions are used: grow() grows the file descriptor array as
47 * necessary when large fd's come in. reassociate() takes care of maintaining
48 * the proper file-descriptor/event-port associations.
49 *
50 * As in the select(2) implementation, signals are handled by evsignal.
51 */
52
53 #ifdef HAVE_CONFIG_H
54 #include "config.h"
55 #endif
56
57 #include <sys/time.h>
58 #include <assert.h>
59 #include <sys/queue.h>
60 #include <errno.h>
61 #include <poll.h>
62 #include <port.h>
63 #include <signal.h>
64 #include <stdio.h>
65 #include <stdlib.h>
66 #include <string.h>
67 #include <time.h>
68 #include <unistd.h>
69 #ifdef CHECK_INVARIANTS
70 #include <assert.h>
71 #endif
72
73 #include "event.h"
74 #include "event-internal.h"
75 #include "log.h"
76 #include "evsignal.h"
77
78
79 /*
80 * Default value for ed_nevents, which is the maximum file descriptor number we
81 * can handle. If an event comes in for a file descriptor F > nevents, we will
82 * grow the array of file descriptors, doubling its size.
83 */
84 #define DEFAULT_NFDS 16
85
86
87 /*
88 * EVENTS_PER_GETN is the maximum number of events to retrieve from port_getn on
89 * any particular call. You can speed things up by increasing this, but it will
90 * (obviously) require more memory.
91 */
92 #define EVENTS_PER_GETN 8
93
94 /*
95 * Per-file-descriptor information about what events we're subscribed to. These
96 * fields are NULL if no event is subscribed to either of them.
97 */
98
99 struct fd_info {
100 struct event* fdi_revt; /* the event responsible for the "read" */
101 struct event* fdi_wevt; /* the event responsible for the "write" */
102 };
103
104 #define FDI_HAS_READ(fdi) ((fdi)->fdi_revt != NULL)
105 #define FDI_HAS_WRITE(fdi) ((fdi)->fdi_wevt != NULL)
106 #define FDI_HAS_EVENTS(fdi) (FDI_HAS_READ(fdi) || FDI_HAS_WRITE(fdi))
107 #define FDI_TO_SYSEVENTS(fdi) (FDI_HAS_READ(fdi) ? POLLIN : 0) | \
108 (FDI_HAS_WRITE(fdi) ? POLLOUT : 0)
109
110 struct evport_data {
111 int ed_port; /* event port for system events */
112 int ed_nevents; /* number of allocated fdi's */
113 struct fd_info *ed_fds; /* allocated fdi table */
114 /* fdi's that we need to reassoc */
115 int ed_pending[EVENTS_PER_GETN]; /* fd's with pending events */
116 };
117
118 static void* evport_init (struct event_base *);
119 static int evport_add (void *, struct event *);
120 static int evport_del (void *, struct event *);
121 static int evport_dispatch (struct event_base *, void *, struct timeval *);
122 static void evport_dealloc (struct event_base *, void *);
123
124 const struct eventop evportops = {
125 "evport",
126 evport_init,
127 evport_add,
128 evport_del,
129 evport_dispatch,
130 evport_dealloc,
131 1 /* need reinit */
132 };
133
134 /*
135 * Initialize the event port implementation.
136 */
137
138 static void*
evport_init(struct event_base * base)139 evport_init(struct event_base *base)
140 {
141 struct evport_data *evpd;
142 int i;
143 /*
144 * Disable event ports when this environment variable is set
145 */
146 if (evutil_getenv("EVENT_NOEVPORT"))
147 return (NULL);
148
149 if (!(evpd = calloc(1, sizeof(struct evport_data))))
150 return (NULL);
151
152 if ((evpd->ed_port = port_create()) == -1) {
153 free(evpd);
154 return (NULL);
155 }
156
157 /*
158 * Initialize file descriptor structure
159 */
160 evpd->ed_fds = calloc(DEFAULT_NFDS, sizeof(struct fd_info));
161 if (evpd->ed_fds == NULL) {
162 close(evpd->ed_port);
163 free(evpd);
164 return (NULL);
165 }
166 evpd->ed_nevents = DEFAULT_NFDS;
167 for (i = 0; i < EVENTS_PER_GETN; i++)
168 evpd->ed_pending[i] = -1;
169
170 evsignal_init(base);
171
172 return (evpd);
173 }
174
175 #ifdef CHECK_INVARIANTS
176 /*
177 * Checks some basic properties about the evport_data structure. Because it
178 * checks all file descriptors, this function can be expensive when the maximum
179 * file descriptor ever used is rather large.
180 */
181
182 static void
check_evportop(struct evport_data * evpd)183 check_evportop(struct evport_data *evpd)
184 {
185 assert(evpd);
186 assert(evpd->ed_nevents > 0);
187 assert(evpd->ed_port > 0);
188 assert(evpd->ed_fds > 0);
189
190 /*
191 * Verify the integrity of the fd_info struct as well as the events to
192 * which it points (at least, that they're valid references and correct
193 * for their position in the structure).
194 */
195 int i;
196 for (i = 0; i < evpd->ed_nevents; ++i) {
197 struct event *ev;
198 struct fd_info *fdi;
199
200 fdi = &evpd->ed_fds[i];
201 if ((ev = fdi->fdi_revt) != NULL) {
202 assert(ev->ev_fd == i);
203 }
204 if ((ev = fdi->fdi_wevt) != NULL) {
205 assert(ev->ev_fd == i);
206 }
207 }
208 }
209
210 /*
211 * Verifies very basic integrity of a given port_event.
212 */
213 static void
check_event(port_event_t * pevt)214 check_event(port_event_t* pevt)
215 {
216 /*
217 * We've only registered for PORT_SOURCE_FD events. The only
218 * other thing we can legitimately receive is PORT_SOURCE_ALERT,
219 * but since we're not using port_alert either, we can assume
220 * PORT_SOURCE_FD.
221 */
222 assert(pevt->portev_source == PORT_SOURCE_FD);
223 assert(pevt->portev_user == NULL);
224 }
225
226 #else
227 #define check_evportop(epop)
228 #define check_event(pevt)
229 #endif /* CHECK_INVARIANTS */
230
231 /*
232 * Doubles the size of the allocated file descriptor array.
233 */
234 static int
grow(struct evport_data * epdp,int factor)235 grow(struct evport_data *epdp, int factor)
236 {
237 struct fd_info *tmp;
238 int oldsize = epdp->ed_nevents;
239 int newsize = factor * oldsize;
240 assert(factor > 1);
241
242 check_evportop(epdp);
243
244 tmp = realloc(epdp->ed_fds, sizeof(struct fd_info) * newsize);
245 if (NULL == tmp)
246 return -1;
247 epdp->ed_fds = tmp;
248 memset((char*) (epdp->ed_fds + oldsize), 0,
249 (newsize - oldsize)*sizeof(struct fd_info));
250 epdp->ed_nevents = newsize;
251
252 check_evportop(epdp);
253
254 return 0;
255 }
256
257
258 /*
259 * (Re)associates the given file descriptor with the event port. The OS events
260 * are specified (implicitly) from the fd_info struct.
261 */
262 static int
reassociate(struct evport_data * epdp,struct fd_info * fdip,int fd)263 reassociate(struct evport_data *epdp, struct fd_info *fdip, int fd)
264 {
265 int sysevents = FDI_TO_SYSEVENTS(fdip);
266
267 if (sysevents != 0) {
268 if (port_associate(epdp->ed_port, PORT_SOURCE_FD,
269 fd, sysevents, NULL) == -1) {
270 event_warn("port_associate");
271 return (-1);
272 }
273 }
274
275 check_evportop(epdp);
276
277 return (0);
278 }
279
280 /*
281 * Main event loop - polls port_getn for some number of events, and processes
282 * them.
283 */
284
285 static int
evport_dispatch(struct event_base * base,void * arg,struct timeval * tv)286 evport_dispatch(struct event_base *base, void *arg, struct timeval *tv)
287 {
288 int i, res;
289 struct evport_data *epdp = arg;
290 port_event_t pevtlist[EVENTS_PER_GETN];
291
292 /*
293 * port_getn will block until it has at least nevents events. It will
294 * also return how many it's given us (which may be more than we asked
295 * for, as long as it's less than our maximum (EVENTS_PER_GETN)) in
296 * nevents.
297 */
298 int nevents = 1;
299
300 /*
301 * We have to convert a struct timeval to a struct timespec
302 * (only difference is nanoseconds vs. microseconds). If no time-based
303 * events are active, we should wait for I/O (and tv == NULL).
304 */
305 struct timespec ts;
306 struct timespec *ts_p = NULL;
307 if (tv != NULL) {
308 ts.tv_sec = tv->tv_sec;
309 ts.tv_nsec = tv->tv_usec * 1000;
310 ts_p = &ts;
311 }
312
313 /*
314 * Before doing anything else, we need to reassociate the events we hit
315 * last time which need reassociation. See comment at the end of the
316 * loop below.
317 */
318 for (i = 0; i < EVENTS_PER_GETN; ++i) {
319 struct fd_info *fdi = NULL;
320 if (epdp->ed_pending[i] != -1) {
321 fdi = &(epdp->ed_fds[epdp->ed_pending[i]]);
322 }
323
324 if (fdi != NULL && FDI_HAS_EVENTS(fdi)) {
325 int fd = FDI_HAS_READ(fdi) ? fdi->fdi_revt->ev_fd :
326 fdi->fdi_wevt->ev_fd;
327 reassociate(epdp, fdi, fd);
328 epdp->ed_pending[i] = -1;
329 }
330 }
331
332 if ((res = port_getn(epdp->ed_port, pevtlist, EVENTS_PER_GETN,
333 (unsigned int *) &nevents, ts_p)) == -1) {
334 if (errno == EINTR || errno == EAGAIN) {
335 evsignal_process(base);
336 return (0);
337 } else if (errno == ETIME) {
338 if (nevents == 0)
339 return (0);
340 } else {
341 event_warn("port_getn");
342 return (-1);
343 }
344 } else if (base->sig.evsignal_caught) {
345 evsignal_process(base);
346 }
347
348 event_debug(("%s: port_getn reports %d events", __func__, nevents));
349
350 for (i = 0; i < nevents; ++i) {
351 struct event *ev;
352 struct fd_info *fdi;
353 port_event_t *pevt = &pevtlist[i];
354 int fd = (int) pevt->portev_object;
355
356 check_evportop(epdp);
357 check_event(pevt);
358 epdp->ed_pending[i] = fd;
359
360 /*
361 * Figure out what kind of event it was
362 * (because we have to pass this to the callback)
363 */
364 res = 0;
365 if (pevt->portev_events & POLLIN)
366 res |= EV_READ;
367 if (pevt->portev_events & POLLOUT)
368 res |= EV_WRITE;
369
370 assert(epdp->ed_nevents > fd);
371 fdi = &(epdp->ed_fds[fd]);
372
373 /*
374 * We now check for each of the possible events (READ
375 * or WRITE). Then, we activate the event (which will
376 * cause its callback to be executed).
377 */
378
379 if ((res & EV_READ) && ((ev = fdi->fdi_revt) != NULL)) {
380 event_active(ev, res, 1);
381 }
382
383 if ((res & EV_WRITE) && ((ev = fdi->fdi_wevt) != NULL)) {
384 event_active(ev, res, 1);
385 }
386 } /* end of all events gotten */
387
388 check_evportop(epdp);
389
390 return (0);
391 }
392
393
394 /*
395 * Adds the given event (so that you will be notified when it happens via
396 * the callback function).
397 */
398
399 static int
evport_add(void * arg,struct event * ev)400 evport_add(void *arg, struct event *ev)
401 {
402 struct evport_data *evpd = arg;
403 struct fd_info *fdi;
404 int factor;
405
406 check_evportop(evpd);
407
408 /*
409 * Delegate, if it's not ours to handle.
410 */
411 if (ev->ev_events & EV_SIGNAL)
412 return (evsignal_add(ev));
413
414 /*
415 * If necessary, grow the file descriptor info table
416 */
417
418 factor = 1;
419 while (ev->ev_fd >= factor * evpd->ed_nevents)
420 factor *= 2;
421
422 if (factor > 1) {
423 if (-1 == grow(evpd, factor)) {
424 return (-1);
425 }
426 }
427
428 fdi = &evpd->ed_fds[ev->ev_fd];
429 if (ev->ev_events & EV_READ)
430 fdi->fdi_revt = ev;
431 if (ev->ev_events & EV_WRITE)
432 fdi->fdi_wevt = ev;
433
434 return reassociate(evpd, fdi, ev->ev_fd);
435 }
436
437 /*
438 * Removes the given event from the list of events to wait for.
439 */
440
441 static int
evport_del(void * arg,struct event * ev)442 evport_del(void *arg, struct event *ev)
443 {
444 struct evport_data *evpd = arg;
445 struct fd_info *fdi;
446 int i;
447 int associated = 1;
448
449 check_evportop(evpd);
450
451 /*
452 * Delegate, if it's not ours to handle
453 */
454 if (ev->ev_events & EV_SIGNAL) {
455 return (evsignal_del(ev));
456 }
457
458 if (evpd->ed_nevents < ev->ev_fd) {
459 return (-1);
460 }
461
462 for (i = 0; i < EVENTS_PER_GETN; ++i) {
463 if (evpd->ed_pending[i] == ev->ev_fd) {
464 associated = 0;
465 break;
466 }
467 }
468
469 fdi = &evpd->ed_fds[ev->ev_fd];
470 if (ev->ev_events & EV_READ)
471 fdi->fdi_revt = NULL;
472 if (ev->ev_events & EV_WRITE)
473 fdi->fdi_wevt = NULL;
474
475 if (associated) {
476 if (!FDI_HAS_EVENTS(fdi) &&
477 port_dissociate(evpd->ed_port, PORT_SOURCE_FD,
478 ev->ev_fd) == -1) {
479 /*
480 * Ignre EBADFD error the fd could have been closed
481 * before event_del() was called.
482 */
483 if (errno != EBADFD) {
484 event_warn("port_dissociate");
485 return (-1);
486 }
487 } else {
488 if (FDI_HAS_EVENTS(fdi)) {
489 return (reassociate(evpd, fdi, ev->ev_fd));
490 }
491 }
492 } else {
493 if (fdi->fdi_revt == NULL && fdi->fdi_wevt == NULL) {
494 evpd->ed_pending[i] = -1;
495 }
496 }
497 return 0;
498 }
499
500
501 static void
evport_dealloc(struct event_base * base,void * arg)502 evport_dealloc(struct event_base *base, void *arg)
503 {
504 struct evport_data *evpd = arg;
505
506 evsignal_dealloc(base);
507
508 close(evpd->ed_port);
509
510 if (evpd->ed_fds)
511 free(evpd->ed_fds);
512 free(evpd);
513 }
514