• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2000-2004 Niels Provos <provos@citi.umich.edu>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 
31 #include <sys/types.h>
32 #include <sys/resource.h>
33 #ifdef HAVE_SYS_TIME_H
34 #include <sys/time.h>
35 #else
36 #include <sys/_libevent_time.h>
37 #endif
38 #include <sys/queue.h>
39 #include <sys/devpoll.h>
40 #include <signal.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45 #include <fcntl.h>
46 #include <errno.h>
47 #include <assert.h>
48 
49 #include "event.h"
50 #include "event-internal.h"
51 #include "evsignal.h"
52 #include "log.h"
53 
54 /* due to limitations in the devpoll interface, we need to keep track of
55  * all file descriptors outself.
56  */
57 struct evdevpoll {
58 	struct event *evread;
59 	struct event *evwrite;
60 };
61 
62 struct devpollop {
63 	struct evdevpoll *fds;
64 	int nfds;
65 	struct pollfd *events;
66 	int nevents;
67 	int dpfd;
68 	struct pollfd *changes;
69 	int nchanges;
70 };
71 
72 static void *devpoll_init	(struct event_base *);
73 static int devpoll_add	(void *, struct event *);
74 static int devpoll_del	(void *, struct event *);
75 static int devpoll_dispatch	(struct event_base *, void *, struct timeval *);
76 static void devpoll_dealloc	(struct event_base *, void *);
77 
78 const struct eventop devpollops = {
79 	"devpoll",
80 	devpoll_init,
81 	devpoll_add,
82 	devpoll_del,
83 	devpoll_dispatch,
84 	devpoll_dealloc,
85 	1 /* need reinit */
86 };
87 
88 #define NEVENT	32000
89 
90 static int
devpoll_commit(struct devpollop * devpollop)91 devpoll_commit(struct devpollop *devpollop)
92 {
93 	/*
94 	 * Due to a bug in Solaris, we have to use pwrite with an offset of 0.
95 	 * Write is limited to 2GB of data, until it will fail.
96 	 */
97 	if (pwrite(devpollop->dpfd, devpollop->changes,
98 		sizeof(struct pollfd) * devpollop->nchanges, 0) == -1)
99 		return(-1);
100 
101 	devpollop->nchanges = 0;
102 	return(0);
103 }
104 
105 static int
devpoll_queue(struct devpollop * devpollop,int fd,int events)106 devpoll_queue(struct devpollop *devpollop, int fd, int events) {
107 	struct pollfd *pfd;
108 
109 	if (devpollop->nchanges >= devpollop->nevents) {
110 		/*
111 		 * Change buffer is full, must commit it to /dev/poll before
112 		 * adding more
113 		 */
114 		if (devpoll_commit(devpollop) != 0)
115 			return(-1);
116 	}
117 
118 	pfd = &devpollop->changes[devpollop->nchanges++];
119 	pfd->fd = fd;
120 	pfd->events = events;
121 	pfd->revents = 0;
122 
123 	return(0);
124 }
125 
126 static void *
devpoll_init(struct event_base * base)127 devpoll_init(struct event_base *base)
128 {
129 	int dpfd, nfiles = NEVENT;
130 	struct rlimit rl;
131 	struct devpollop *devpollop;
132 
133 	/* Disable devpoll when this environment variable is set */
134 	if (evutil_getenv("EVENT_NODEVPOLL"))
135 		return (NULL);
136 
137 	if (!(devpollop = calloc(1, sizeof(struct devpollop))))
138 		return (NULL);
139 
140 	if (getrlimit(RLIMIT_NOFILE, &rl) == 0 &&
141 	    rl.rlim_cur != RLIM_INFINITY)
142 		nfiles = rl.rlim_cur;
143 
144 	/* Initialize the kernel queue */
145 	if ((dpfd = open("/dev/poll", O_RDWR)) == -1) {
146                 event_warn("open: /dev/poll");
147 		free(devpollop);
148 		return (NULL);
149 	}
150 
151 	devpollop->dpfd = dpfd;
152 
153 	/* Initialize fields */
154 	devpollop->events = calloc(nfiles, sizeof(struct pollfd));
155 	if (devpollop->events == NULL) {
156 		free(devpollop);
157 		close(dpfd);
158 		return (NULL);
159 	}
160 	devpollop->nevents = nfiles;
161 
162 	devpollop->fds = calloc(nfiles, sizeof(struct evdevpoll));
163 	if (devpollop->fds == NULL) {
164 		free(devpollop->events);
165 		free(devpollop);
166 		close(dpfd);
167 		return (NULL);
168 	}
169 	devpollop->nfds = nfiles;
170 
171 	devpollop->changes = calloc(nfiles, sizeof(struct pollfd));
172 	if (devpollop->changes == NULL) {
173 		free(devpollop->fds);
174 		free(devpollop->events);
175 		free(devpollop);
176 		close(dpfd);
177 		return (NULL);
178 	}
179 
180 	evsignal_init(base);
181 
182 	return (devpollop);
183 }
184 
185 static int
devpoll_recalc(struct event_base * base,void * arg,int max)186 devpoll_recalc(struct event_base *base, void *arg, int max)
187 {
188 	struct devpollop *devpollop = arg;
189 
190 	if (max >= devpollop->nfds) {
191 		struct evdevpoll *fds;
192 		int nfds;
193 
194 		nfds = devpollop->nfds;
195 		while (nfds <= max)
196 			nfds <<= 1;
197 
198 		fds = realloc(devpollop->fds, nfds * sizeof(struct evdevpoll));
199 		if (fds == NULL) {
200 			event_warn("realloc");
201 			return (-1);
202 		}
203 		devpollop->fds = fds;
204 		memset(fds + devpollop->nfds, 0,
205 		    (nfds - devpollop->nfds) * sizeof(struct evdevpoll));
206 		devpollop->nfds = nfds;
207 	}
208 
209 	return (0);
210 }
211 
212 static int
devpoll_dispatch(struct event_base * base,void * arg,struct timeval * tv)213 devpoll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
214 {
215 	struct devpollop *devpollop = arg;
216 	struct pollfd *events = devpollop->events;
217 	struct dvpoll dvp;
218 	struct evdevpoll *evdp;
219 	int i, res, timeout = -1;
220 
221 	if (devpollop->nchanges)
222 		devpoll_commit(devpollop);
223 
224 	if (tv != NULL)
225 		timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
226 
227 	dvp.dp_fds = devpollop->events;
228 	dvp.dp_nfds = devpollop->nevents;
229 	dvp.dp_timeout = timeout;
230 
231 	res = ioctl(devpollop->dpfd, DP_POLL, &dvp);
232 
233 	if (res == -1) {
234 		if (errno != EINTR) {
235 			event_warn("ioctl: DP_POLL");
236 			return (-1);
237 		}
238 
239 		evsignal_process(base);
240 		return (0);
241 	} else if (base->sig.evsignal_caught) {
242 		evsignal_process(base);
243 	}
244 
245 	event_debug(("%s: devpoll_wait reports %d", __func__, res));
246 
247 	for (i = 0; i < res; i++) {
248 		int which = 0;
249 		int what = events[i].revents;
250 		struct event *evread = NULL, *evwrite = NULL;
251 
252 		assert(events[i].fd < devpollop->nfds);
253 		evdp = &devpollop->fds[events[i].fd];
254 
255                 if (what & POLLHUP)
256                         what |= POLLIN | POLLOUT;
257                 else if (what & POLLERR)
258                         what |= POLLIN | POLLOUT;
259 
260 		if (what & POLLIN) {
261 			evread = evdp->evread;
262 			which |= EV_READ;
263 		}
264 
265 		if (what & POLLOUT) {
266 			evwrite = evdp->evwrite;
267 			which |= EV_WRITE;
268 		}
269 
270 		if (!which)
271 			continue;
272 
273 		if (evread != NULL && !(evread->ev_events & EV_PERSIST))
274 			event_del(evread);
275 		if (evwrite != NULL && evwrite != evread &&
276 		    !(evwrite->ev_events & EV_PERSIST))
277 			event_del(evwrite);
278 
279 		if (evread != NULL)
280 			event_active(evread, EV_READ, 1);
281 		if (evwrite != NULL)
282 			event_active(evwrite, EV_WRITE, 1);
283 	}
284 
285 	return (0);
286 }
287 
288 
289 static int
devpoll_add(void * arg,struct event * ev)290 devpoll_add(void *arg, struct event *ev)
291 {
292 	struct devpollop *devpollop = arg;
293 	struct evdevpoll *evdp;
294 	int fd, events;
295 
296 	if (ev->ev_events & EV_SIGNAL)
297 		return (evsignal_add(ev));
298 
299 	fd = ev->ev_fd;
300 	if (fd >= devpollop->nfds) {
301 		/* Extend the file descriptor array as necessary */
302 		if (devpoll_recalc(ev->ev_base, devpollop, fd) == -1)
303 			return (-1);
304 	}
305 	evdp = &devpollop->fds[fd];
306 
307 	/*
308 	 * It's not necessary to OR the existing read/write events that we
309 	 * are currently interested in with the new event we are adding.
310 	 * The /dev/poll driver ORs any new events with the existing events
311 	 * that it has cached for the fd.
312 	 */
313 
314 	events = 0;
315 	if (ev->ev_events & EV_READ) {
316 		if (evdp->evread && evdp->evread != ev) {
317 		   /* There is already a different read event registered */
318 		   return(-1);
319 		}
320 		events |= POLLIN;
321 	}
322 
323 	if (ev->ev_events & EV_WRITE) {
324 		if (evdp->evwrite && evdp->evwrite != ev) {
325 		   /* There is already a different write event registered */
326 		   return(-1);
327 		}
328 		events |= POLLOUT;
329 	}
330 
331 	if (devpoll_queue(devpollop, fd, events) != 0)
332 		return(-1);
333 
334 	/* Update events responsible */
335 	if (ev->ev_events & EV_READ)
336 		evdp->evread = ev;
337 	if (ev->ev_events & EV_WRITE)
338 		evdp->evwrite = ev;
339 
340 	return (0);
341 }
342 
343 static int
devpoll_del(void * arg,struct event * ev)344 devpoll_del(void *arg, struct event *ev)
345 {
346 	struct devpollop *devpollop = arg;
347 	struct evdevpoll *evdp;
348 	int fd, events;
349 	int needwritedelete = 1, needreaddelete = 1;
350 
351 	if (ev->ev_events & EV_SIGNAL)
352 		return (evsignal_del(ev));
353 
354 	fd = ev->ev_fd;
355 	if (fd >= devpollop->nfds)
356 		return (0);
357 	evdp = &devpollop->fds[fd];
358 
359 	events = 0;
360 	if (ev->ev_events & EV_READ)
361 		events |= POLLIN;
362 	if (ev->ev_events & EV_WRITE)
363 		events |= POLLOUT;
364 
365 	/*
366 	 * The only way to remove an fd from the /dev/poll monitored set is
367 	 * to use POLLREMOVE by itself.  This removes ALL events for the fd
368 	 * provided so if we care about two events and are only removing one
369 	 * we must re-add the other event after POLLREMOVE.
370 	 */
371 
372 	if (devpoll_queue(devpollop, fd, POLLREMOVE) != 0)
373 		return(-1);
374 
375 	if ((events & (POLLIN|POLLOUT)) != (POLLIN|POLLOUT)) {
376 		/*
377 		 * We're not deleting all events, so we must resubmit the
378 		 * event that we are still interested in if one exists.
379 		 */
380 
381 		if ((events & POLLIN) && evdp->evwrite != NULL) {
382 			/* Deleting read, still care about write */
383 			devpoll_queue(devpollop, fd, POLLOUT);
384 			needwritedelete = 0;
385 		} else if ((events & POLLOUT) && evdp->evread != NULL) {
386 			/* Deleting write, still care about read */
387 			devpoll_queue(devpollop, fd, POLLIN);
388 			needreaddelete = 0;
389 		}
390 	}
391 
392 	if (needreaddelete)
393 		evdp->evread = NULL;
394 	if (needwritedelete)
395 		evdp->evwrite = NULL;
396 
397 	return (0);
398 }
399 
400 static void
devpoll_dealloc(struct event_base * base,void * arg)401 devpoll_dealloc(struct event_base *base, void *arg)
402 {
403 	struct devpollop *devpollop = arg;
404 
405 	evsignal_dealloc(base);
406 	if (devpollop->fds)
407 		free(devpollop->fds);
408 	if (devpollop->events)
409 		free(devpollop->events);
410 	if (devpollop->changes)
411 		free(devpollop->changes);
412 	if (devpollop->dpfd >= 0)
413 		close(devpollop->dpfd);
414 
415 	memset(devpollop, 0, sizeof(struct devpollop));
416 	free(devpollop);
417 }
418