1 /* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*- */
2 /* dbus-socket-set-epoll.c - a socket set implemented via Linux epoll(4)
3 *
4 * Copyright © 2011 Nokia Corporation
5 *
6 * Licensed under the Academic Free License version 2.1
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21 * MA 02110-1301 USA
22 *
23 */
24
25 #include <config.h>
26 #include "dbus-socket-set.h"
27
28 #include <dbus/dbus-internals.h>
29 #include <dbus/dbus-sysdeps.h>
30
31 #ifndef __linux__
32 # error This file is for Linux epoll(4)
33 #endif
34
35 #include <errno.h>
36 #include <fcntl.h>
37 #include <sys/epoll.h>
38 #include <unistd.h>
39
40 #ifndef DOXYGEN_SHOULD_SKIP_THIS
41
42 typedef struct {
43 DBusSocketSet parent;
44 int epfd;
45 } DBusSocketSetEpoll;
46
47 static inline DBusSocketSetEpoll *
socket_set_epoll_cast(DBusSocketSet * set)48 socket_set_epoll_cast (DBusSocketSet *set)
49 {
50 _dbus_assert (set->cls == &_dbus_socket_set_epoll_class);
51 return (DBusSocketSetEpoll *) set;
52 }
53
54 /* this is safe to call on a partially-allocated socket set */
55 static void
socket_set_epoll_free(DBusSocketSet * set)56 socket_set_epoll_free (DBusSocketSet *set)
57 {
58 DBusSocketSetEpoll *self = socket_set_epoll_cast (set);
59
60 if (self == NULL)
61 return;
62
63 if (self->epfd != -1)
64 close (self->epfd);
65
66 dbus_free (self);
67 }
68
69 DBusSocketSet *
_dbus_socket_set_epoll_new(void)70 _dbus_socket_set_epoll_new (void)
71 {
72 DBusSocketSetEpoll *self;
73
74 self = dbus_new0 (DBusSocketSetEpoll, 1);
75
76 if (self == NULL)
77 return NULL;
78
79 self->parent.cls = &_dbus_socket_set_epoll_class;
80
81 self->epfd = epoll_create1 (EPOLL_CLOEXEC);
82
83 if (self->epfd == -1)
84 {
85 int flags;
86
87 /* the size hint is ignored unless you have a rather old kernel,
88 * but must be positive on some versions, so just pick something
89 * arbitrary; it's a hint, not a limit */
90 self->epfd = epoll_create (42);
91
92 flags = fcntl (self->epfd, F_GETFD, 0);
93
94 if (flags != -1)
95 fcntl (self->epfd, F_SETFD, flags | FD_CLOEXEC);
96 }
97
98 if (self->epfd == -1)
99 {
100 socket_set_epoll_free ((DBusSocketSet *) self);
101 return NULL;
102 }
103
104 return (DBusSocketSet *) self;
105 }
106
107 static uint32_t
watch_flags_to_epoll_events(unsigned int flags)108 watch_flags_to_epoll_events (unsigned int flags)
109 {
110 uint32_t events = 0;
111
112 if (flags & DBUS_WATCH_READABLE)
113 events |= EPOLLIN;
114 if (flags & DBUS_WATCH_WRITABLE)
115 events |= EPOLLOUT;
116
117 return events;
118 }
119
120 static unsigned int
epoll_events_to_watch_flags(uint32_t events)121 epoll_events_to_watch_flags (uint32_t events)
122 {
123 short flags = 0;
124
125 if (events & EPOLLIN)
126 flags |= DBUS_WATCH_READABLE;
127 if (events & EPOLLOUT)
128 flags |= DBUS_WATCH_WRITABLE;
129 if (events & EPOLLHUP)
130 flags |= DBUS_WATCH_HANGUP;
131 if (events & EPOLLERR)
132 flags |= DBUS_WATCH_ERROR;
133
134 return flags;
135 }
136
137 static dbus_bool_t
socket_set_epoll_add(DBusSocketSet * set,int fd,unsigned int flags,dbus_bool_t enabled)138 socket_set_epoll_add (DBusSocketSet *set,
139 int fd,
140 unsigned int flags,
141 dbus_bool_t enabled)
142 {
143 DBusSocketSetEpoll *self = socket_set_epoll_cast (set);
144 struct epoll_event event;
145 int err;
146
147 event.data.fd = fd;
148
149 if (enabled)
150 {
151 event.events = watch_flags_to_epoll_events (flags);
152 }
153 else
154 {
155 /* We need to add *something* to reserve space in the kernel's data
156 * structures: see socket_set_epoll_disable for more details */
157 event.events = EPOLLET;
158 }
159
160 if (epoll_ctl (self->epfd, EPOLL_CTL_ADD, fd, &event) == 0)
161 return TRUE;
162
163 /* Anything except ENOMEM, ENOSPC means we have an internal error. */
164 err = errno;
165 switch (err)
166 {
167 case ENOMEM:
168 case ENOSPC:
169 /* be silent: this is basically OOM, which our callers are expected
170 * to cope with */
171 break;
172
173 case EBADF:
174 _dbus_warn ("Bad fd %d\n", fd);
175 break;
176
177 case EEXIST:
178 _dbus_warn ("fd %d added and then added again\n", fd);
179 break;
180
181 default:
182 _dbus_warn ("Misc error when trying to watch fd %d: %s\n", fd,
183 strerror (err));
184 break;
185 }
186
187 return FALSE;
188 }
189
190 static void
socket_set_epoll_enable(DBusSocketSet * set,int fd,unsigned int flags)191 socket_set_epoll_enable (DBusSocketSet *set,
192 int fd,
193 unsigned int flags)
194 {
195 DBusSocketSetEpoll *self = socket_set_epoll_cast (set);
196 struct epoll_event event;
197 int err;
198
199 event.data.fd = fd;
200 event.events = watch_flags_to_epoll_events (flags);
201
202 if (epoll_ctl (self->epfd, EPOLL_CTL_MOD, fd, &event) == 0)
203 return;
204
205 err = errno;
206
207 /* Enabling a file descriptor isn't allowed to fail, even for OOM, so we
208 * do our best to avoid all of these. */
209 switch (err)
210 {
211 case EBADF:
212 _dbus_warn ("Bad fd %d\n", fd);
213 break;
214
215 case ENOENT:
216 _dbus_warn ("fd %d enabled before it was added\n", fd);
217 break;
218
219 case ENOMEM:
220 _dbus_warn ("Insufficient memory to change watch for fd %d\n", fd);
221 break;
222
223 default:
224 _dbus_warn ("Misc error when trying to watch fd %d: %s\n", fd,
225 strerror (err));
226 break;
227 }
228 }
229
230 static void
socket_set_epoll_disable(DBusSocketSet * set,int fd)231 socket_set_epoll_disable (DBusSocketSet *set,
232 int fd)
233 {
234 DBusSocketSetEpoll *self = socket_set_epoll_cast (set);
235 struct epoll_event event;
236 int err;
237
238 /* The naive thing to do would be EPOLL_CTL_DEL, but that'll probably
239 * free resources in the kernel. When we come to do socket_set_epoll_enable,
240 * there might not be enough resources to bring it back!
241 *
242 * The next idea you might have is to set the flags to 0. However, events
243 * always trigger on EPOLLERR and EPOLLHUP, even if libdbus isn't actually
244 * delivering them to a DBusWatch. Because epoll is level-triggered by
245 * default, we'll busy-loop on an unhandled error or hangup; not good.
246 *
247 * So, let's set it to be edge-triggered: then the worst case is that
248 * we return from poll immediately on one iteration, ignore it because no
249 * watch is enabled, then go back to normal. When we re-enable a watch
250 * we'll switch back to level-triggered and be notified again (verified to
251 * work on 2.6.32). Compile this file with -DTEST_BEHAVIOUR_OF_EPOLLET for
252 * test code.
253 */
254 event.data.fd = fd;
255 event.events = EPOLLET;
256
257 if (epoll_ctl (self->epfd, EPOLL_CTL_MOD, fd, &event) == 0)
258 return;
259
260 err = errno;
261 _dbus_warn ("Error when trying to watch fd %d: %s\n", fd,
262 strerror (err));
263 }
264
265 static void
socket_set_epoll_remove(DBusSocketSet * set,int fd)266 socket_set_epoll_remove (DBusSocketSet *set,
267 int fd)
268 {
269 DBusSocketSetEpoll *self = socket_set_epoll_cast (set);
270 int err;
271 /* Kernels < 2.6.9 require a non-NULL struct pointer, even though its
272 * contents are ignored */
273 struct epoll_event dummy = { 0 };
274
275 if (epoll_ctl (self->epfd, EPOLL_CTL_DEL, fd, &dummy) == 0)
276 return;
277
278 err = errno;
279 _dbus_warn ("Error when trying to remove fd %d: %s\n", fd, strerror (err));
280 }
281
282 /* Optimally, this should be the same as in DBusLoop: we use it to translate
283 * between struct epoll_event and DBusSocketEvent without allocating heap
284 * memory. */
285 #define N_STACK_DESCRIPTORS 64
286
287 static int
socket_set_epoll_poll(DBusSocketSet * set,DBusSocketEvent * revents,int max_events,int timeout_ms)288 socket_set_epoll_poll (DBusSocketSet *set,
289 DBusSocketEvent *revents,
290 int max_events,
291 int timeout_ms)
292 {
293 DBusSocketSetEpoll *self = socket_set_epoll_cast (set);
294 struct epoll_event events[N_STACK_DESCRIPTORS];
295 int n_ready;
296 int i;
297
298 _dbus_assert (max_events > 0);
299
300 n_ready = epoll_wait (self->epfd, events,
301 MIN (_DBUS_N_ELEMENTS (events), max_events),
302 timeout_ms);
303
304 if (n_ready <= 0)
305 return n_ready;
306
307 for (i = 0; i < n_ready; i++)
308 {
309 revents[i].fd = events[i].data.fd;
310 revents[i].flags = epoll_events_to_watch_flags (events[i].events);
311 }
312
313 return n_ready;
314 }
315
316 DBusSocketSetClass _dbus_socket_set_epoll_class = {
317 socket_set_epoll_free,
318 socket_set_epoll_add,
319 socket_set_epoll_remove,
320 socket_set_epoll_enable,
321 socket_set_epoll_disable,
322 socket_set_epoll_poll
323 };
324
325 #ifdef TEST_BEHAVIOUR_OF_EPOLLET
326 /* usage: cat /dev/null | ./epoll
327 *
328 * desired output:
329 * ctl ADD: 0
330 * wait for HUP, edge-triggered: 1
331 * wait for HUP again: 0
332 * ctl MOD: 0
333 * wait for HUP: 1
334 */
335
336 #include <sys/epoll.h>
337
338 #include <stdio.h>
339
340 int
main(void)341 main (void)
342 {
343 struct epoll_event input;
344 struct epoll_event output;
345 int epfd = epoll_create1 (EPOLL_CLOEXEC);
346 int fd = 0; /* stdin */
347 int ret;
348
349 input.events = EPOLLHUP | EPOLLET;
350 ret = epoll_ctl (epfd, EPOLL_CTL_ADD, fd, &input);
351 printf ("ctl ADD: %d\n", ret);
352
353 ret = epoll_wait (epfd, &output, 1, -1);
354 printf ("wait for HUP, edge-triggered: %d\n", ret);
355
356 ret = epoll_wait (epfd, &output, 1, 1);
357 printf ("wait for HUP again: %d\n", ret);
358
359 input.events = EPOLLHUP;
360 ret = epoll_ctl (epfd, EPOLL_CTL_MOD, fd, &input);
361 printf ("ctl MOD: %d\n", ret);
362
363 ret = epoll_wait (epfd, &output, 1, -1);
364 printf ("wait for HUP: %d\n", ret);
365
366 return 0;
367 }
368
369 #endif /* TEST_BEHAVIOUR_OF_EPOLLET */
370
371 #endif /* !DOXYGEN_SHOULD_SKIP_THIS */
372