• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/tools/flip_server/epoll_server.h"
6 
7 #include <stdlib.h>  // for abort
8 #include <errno.h>    // for errno and strerror_r
9 #include <algorithm>
10 #include <iostream>
11 #include <utility>
12 #include <vector>
13 
14 #include "base/logging.h"
15 #include "base/timer.h"
16 
17 // Design notes: An efficient implementation of ready list has the following
18 // desirable properties:
19 //
20 // A. O(1) insertion into/removal from the list in any location.
21 // B. Once the callback is found by hash lookup using the fd, the lookup of
22 //    corresponding entry in the list is O(1).
23 // C. Safe insertion into/removal from the list during list iteration. (The
24 //    ready list's purpose is to enable completely event driven I/O model.
25 //    Thus, all the interesting bits happen in the callback. It is critical
26 //    to not place any restriction on the API during list iteration.
27 //
28 // The current implementation achieves these goals with the following design:
29 //
30 // - The ready list is constructed as a doubly linked list to enable O(1)
31 //   insertion/removal (see man 3 queue).
32 // - The forward and backward links are directly embedded inside the
33 //   CBAndEventMask struct. This enables O(1) lookup in the list for a given
34 //   callback. (Techincally, we could've used std::list of hash_set::iterator,
35 //   and keep a list::iterator in CBAndEventMask to achieve the same effect.
36 //   However, iterators have two problems: no way to portably invalidate them,
37 //   and no way to tell whether an iterator is singular or not. The only way to
38 //   overcome these issues is to keep bools in both places, but that throws off
39 //   memory alignment (up to 7 wasted bytes for each bool). The extra level of
40 //   indirection will also likely be less cache friendly. Direct manipulation
41 //   of link pointers makes it easier to retrieve the CBAndEventMask from the
42 //   list, easier to check whether an CBAndEventMask is in the list, uses less
43 //   memory (save 32 bytes/fd), and does not affect cache usage (we need to
44 //   read in the struct to use the callback anyway).)
45 // - Embed the fd directly into CBAndEventMask and switch to using hash_set.
46 //   This removes the need to store hash_map::iterator in the list just so that
47 //   we can get both the fd and the callback.
48 // - The ready list is "one shot": each entry is removed before OnEvent is
49 //   called. This removes the mutation-while-iterating problem.
50 // - Use two lists to keep track of callbacks. The ready_list_ is the one used
51 //   for registration. Before iteration, the ready_list_ is swapped into the
52 //   tmp_list_. Once iteration is done, tmp_list_ will be empty, and
53 //   ready_list_ will have all the new ready fds.
54 
55 // The size we use for buffers passed to strerror_r
56 static const int kErrorBufferSize = 256;
57 
58 namespace net {
59 
60 // Clears the pipe and returns.  Used for waking the epoll server up.
61 class ReadPipeCallback : public EpollCallbackInterface {
62  public:
OnEvent(int fd,EpollEvent * event)63   void OnEvent(int fd, EpollEvent* event) {
64     DCHECK(event->in_events == EPOLLIN);
65     int data;
66     int data_read = 1;
67     // Read until the pipe is empty.
68     while (data_read > 0) {
69       data_read = read(fd, &data, sizeof(data));
70     }
71   }
OnShutdown(EpollServer * eps,int fd)72   void OnShutdown(EpollServer *eps, int fd) {}
OnRegistration(EpollServer *,int,int)73   void OnRegistration(EpollServer*, int, int) {}
OnModification(int,int)74   void OnModification(int, int) {}       // COV_NF_LINE
OnUnregistration(int,bool)75   void OnUnregistration(int, bool) {}    // COV_NF_LINE
76 };
77 
78 ////////////////////////////////////////////////////////////////////////////////
79 ////////////////////////////////////////////////////////////////////////////////
80 
EpollServer()81 EpollServer::EpollServer()
82   : epoll_fd_(epoll_create(1024)),
83     timeout_in_us_(0),
84     recorded_now_in_us_(0),
85     ready_list_size_(0),
86     wake_cb_(new ReadPipeCallback),
87     read_fd_(-1),
88     write_fd_(-1),
89     in_wait_for_events_and_execute_callbacks_(false),
90     in_shutdown_(false) {
91   // ensure that the epoll_fd_ is valid.
92   CHECK_NE(epoll_fd_, -1);
93   LIST_INIT(&ready_list_);
94   LIST_INIT(&tmp_list_);
95 
96   int pipe_fds[2];
97   if (pipe(pipe_fds) < 0) {
98     // Unfortunately, it is impossible to test any such initialization in
99     // a constructor (as virtual methods do not yet work).
100     // This -could- be solved by moving initialization to an outside
101     // call...
102     int saved_errno = errno;
103     char buf[kErrorBufferSize];
104     LOG(FATAL) << "Error " << saved_errno
105                << " in pipe(): " << strerror_r(saved_errno, buf, sizeof(buf));
106   }
107   read_fd_ = pipe_fds[0];
108   write_fd_ = pipe_fds[1];
109   RegisterFD(read_fd_, wake_cb_.get(), EPOLLIN);
110 }
111 
CleanupFDToCBMap()112 void EpollServer::CleanupFDToCBMap() {
113   FDToCBMap::iterator cb_iter = cb_map_.begin();
114   while (cb_iter != cb_map_.end()) {
115     int fd = cb_iter->fd;
116     CB* cb = cb_iter->cb;
117 
118     cb_iter->in_use = true;
119     if (cb) {
120       cb->OnShutdown(this, fd);
121     }
122 
123     cb_map_.erase(cb_iter);
124     cb_iter = cb_map_.begin();
125   }
126 }
127 
CleanupTimeToAlarmCBMap()128 void EpollServer::CleanupTimeToAlarmCBMap() {
129   TimeToAlarmCBMap::iterator erase_it;
130 
131   // Call OnShutdown() on alarms. Note that the structure of the loop
132   // is similar to the structure of loop in the function HandleAlarms()
133   for (TimeToAlarmCBMap::iterator i = alarm_map_.begin();
134        i != alarm_map_.end();
135       ) {
136     // Note that OnShutdown() can call UnregisterAlarm() on
137     // other iterators. OnShutdown() should not call UnregisterAlarm()
138     // on self because by definition the iterator is not valid any more.
139     i->second->OnShutdown(this);
140     erase_it = i;
141     ++i;
142     alarm_map_.erase(erase_it);
143   }
144 }
145 
~EpollServer()146 EpollServer::~EpollServer() {
147   DCHECK_EQ(in_shutdown_, false);
148   in_shutdown_ = true;
149 #ifdef EPOLL_SERVER_EVENT_TRACING
150   LOG(INFO) << "\n" << event_recorder_;
151 #endif
152   VLOG(2) << "Shutting down epoll server ";
153   CleanupFDToCBMap();
154 
155   LIST_INIT(&ready_list_);
156   LIST_INIT(&tmp_list_);
157 
158   CleanupTimeToAlarmCBMap();
159 
160   close(read_fd_);
161   close(write_fd_);
162   close(epoll_fd_);
163 }
164 
165 // Whether a CBAandEventMask is on the ready list is determined by a non-NULL
166 // le_prev pointer (le_next being NULL indicates end of list).
AddToReadyList(CBAndEventMask * cb_and_mask)167 inline void EpollServer::AddToReadyList(CBAndEventMask* cb_and_mask) {
168   if (cb_and_mask->entry.le_prev == NULL) {
169     LIST_INSERT_HEAD(&ready_list_, cb_and_mask, entry);
170     ++ready_list_size_;
171   }
172 }
173 
RemoveFromReadyList(const CBAndEventMask & cb_and_mask)174 inline void EpollServer::RemoveFromReadyList(
175     const CBAndEventMask& cb_and_mask) {
176   if (cb_and_mask.entry.le_prev != NULL) {
177     LIST_REMOVE(&cb_and_mask, entry);
178     // Clean up all the ready list states. Don't bother with the other fields
179     // as they are initialized when the CBAandEventMask is added to the ready
180     // list. This saves a few cycles in the inner loop.
181     cb_and_mask.entry.le_prev = NULL;
182     --ready_list_size_;
183     if (ready_list_size_ == 0) {
184       DCHECK(ready_list_.lh_first == NULL);
185       DCHECK(tmp_list_.lh_first == NULL);
186     }
187   }
188 }
189 
RegisterFD(int fd,CB * cb,int event_mask)190 void EpollServer::RegisterFD(int fd, CB* cb, int event_mask) {
191   CHECK(cb);
192   VLOG(3) << "RegisterFD fd=" << fd << " event_mask=" << event_mask;
193   FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
194   if (cb_map_.end() != fd_i) {
195     // do we just abort, or do we just unregister the other guy?
196     // for now, lets just unregister the other guy.
197 
198     // unregister any callback that may already be registered for this FD.
199     CB* other_cb = fd_i->cb;
200     if (other_cb) {
201       // Must remove from the ready list before erasing.
202       RemoveFromReadyList(*fd_i);
203       other_cb->OnUnregistration(fd, true);
204       ModFD(fd, event_mask);
205     } else {
206       // already unregistered, so just recycle the node.
207       AddFD(fd, event_mask);
208     }
209     fd_i->cb = cb;
210     fd_i->event_mask = event_mask;
211     fd_i->events_to_fake = 0;
212   } else {
213     AddFD(fd, event_mask);
214     cb_map_.insert(CBAndEventMask(cb, event_mask, fd));
215   }
216 
217 
218   // set the FD to be non-blocking.
219   SetNonblocking(fd);
220 
221   cb->OnRegistration(this, fd, event_mask);
222 }
223 
GetFlags(int fd)224 int EpollServer::GetFlags(int fd) {
225   return fcntl(fd, F_GETFL, 0);
226 }
227 
SetNonblocking(int fd)228 void EpollServer::SetNonblocking(int fd) {
229   int flags = GetFlags(fd);
230   if (flags == -1) {
231     int saved_errno = errno;
232     char buf[kErrorBufferSize];
233     LOG(FATAL) << "Error " << saved_errno
234                << " doing fcntl(" << fd << ", F_GETFL, 0): "
235                << strerror_r(saved_errno, buf, sizeof(buf));
236   }
237   if (!(flags & O_NONBLOCK)) {
238     int saved_flags = flags;
239     flags = SetFlags(fd, flags | O_NONBLOCK);
240     if (flags == -1) {
241       // bad.
242       int saved_errno = errno;
243       char buf[kErrorBufferSize];
244       LOG(FATAL) << "Error " << saved_errno
245         << " doing fcntl(" << fd << ", F_SETFL, " << saved_flags << "): "
246         << strerror_r(saved_errno, buf, sizeof(buf));
247     }
248   }
249 }
250 
epoll_wait_impl(int epfd,struct epoll_event * events,int max_events,int timeout_in_ms)251 int EpollServer::epoll_wait_impl(int epfd,
252                                  struct epoll_event* events,
253                                  int max_events,
254                                  int timeout_in_ms) {
255   return epoll_wait(epfd, events, max_events, timeout_in_ms);
256 }
257 
RegisterFDForWrite(int fd,CB * cb)258 void EpollServer::RegisterFDForWrite(int fd, CB* cb) {
259   RegisterFD(fd, cb, EPOLLOUT);
260 }
261 
RegisterFDForReadWrite(int fd,CB * cb)262 void EpollServer::RegisterFDForReadWrite(int fd, CB* cb) {
263   RegisterFD(fd, cb, EPOLLIN | EPOLLOUT);
264 }
265 
RegisterFDForRead(int fd,CB * cb)266 void EpollServer::RegisterFDForRead(int fd, CB* cb) {
267   RegisterFD(fd, cb, EPOLLIN);
268 }
269 
UnregisterFD(int fd)270 void EpollServer::UnregisterFD(int fd) {
271   FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
272   if (cb_map_.end() == fd_i || fd_i->cb == NULL) {
273     // Doesn't exist in server, or has gone through UnregisterFD once and still
274     // inside the callchain of OnEvent.
275     return;
276   }
277 #ifdef EPOLL_SERVER_EVENT_TRACING
278   event_recorder_.RecordUnregistration(fd);
279 #endif
280   CB* cb = fd_i->cb;
281   // Since the links are embedded within the struct, we must remove it from the
282   // list before erasing it from the hash_set.
283   RemoveFromReadyList(*fd_i);
284   DelFD(fd);
285   cb->OnUnregistration(fd, false);
286   // fd_i->cb is NULL if that fd is unregistered inside the callchain of
287   // OnEvent. Since the EpollServer needs a valid CBAndEventMask after OnEvent
288   // returns in order to add it to the ready list, we cannot have UnregisterFD
289   // erase the entry if it is in use. Thus, a NULL fd_i->cb is used as a
290   // condition that tells the EpollServer that this entry is unused at a later
291   // point.
292   if (!fd_i->in_use) {
293     cb_map_.erase(fd_i);
294   } else {
295     // Remove all trace of the registration, and just keep the node alive long
296     // enough so the code that calls OnEvent doesn't have to worry about
297     // figuring out whether the CBAndEventMask is valid or not.
298     fd_i->cb = NULL;
299     fd_i->event_mask = 0;
300     fd_i->events_to_fake = 0;
301   }
302 }
303 
ModifyCallback(int fd,int event_mask)304 void EpollServer::ModifyCallback(int fd, int event_mask) {
305   ModifyFD(fd, ~0, event_mask);
306 }
307 
StopRead(int fd)308 void EpollServer::StopRead(int fd) {
309   ModifyFD(fd, EPOLLIN, 0);
310 }
311 
StartRead(int fd)312 void EpollServer::StartRead(int fd) {
313   ModifyFD(fd, 0, EPOLLIN);
314 }
315 
StopWrite(int fd)316 void EpollServer::StopWrite(int fd) {
317   ModifyFD(fd, EPOLLOUT, 0);
318 }
319 
StartWrite(int fd)320 void EpollServer::StartWrite(int fd) {
321   ModifyFD(fd, 0, EPOLLOUT);
322 }
323 
HandleEvent(int fd,int event_mask)324 void EpollServer::HandleEvent(int fd, int event_mask) {
325 #ifdef EPOLL_SERVER_EVENT_TRACING
326   event_recorder_.RecordEpollEvent(fd, event_mask);
327 #endif
328   FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
329   if (fd_i == cb_map_.end() || fd_i->cb == NULL) {
330     // Ignore the event.
331     // This could occur if epoll() returns a set of events, and
332     // while processing event A (earlier) we removed the callback
333     // for event B (and are now processing event B).
334     return;
335   }
336   fd_i->events_asserted = event_mask;
337   CBAndEventMask* cb_and_mask = const_cast<CBAndEventMask*>(&*fd_i);
338   AddToReadyList(cb_and_mask);
339 }
340 
341 class TrueFalseGuard {
342  public:
TrueFalseGuard(bool * guarded_bool)343   explicit TrueFalseGuard(bool* guarded_bool) : guarded_bool_(guarded_bool) {
344     DCHECK(guarded_bool_ != NULL);
345     DCHECK(*guarded_bool_ == false);
346     *guarded_bool_ = true;
347   }
~TrueFalseGuard()348   ~TrueFalseGuard() {
349     *guarded_bool_ = false;
350   }
351  private:
352   bool* guarded_bool_;
353 };
354 
WaitForEventsAndExecuteCallbacks()355 void EpollServer::WaitForEventsAndExecuteCallbacks() {
356   if (in_wait_for_events_and_execute_callbacks_) {
357     LOG(DFATAL) <<
358       "Attempting to call WaitForEventsAndExecuteCallbacks"
359       " when an ancestor to the current function is already"
360       " WaitForEventsAndExecuteCallbacks!";
361     // The line below is actually tested, but in coverage mode,
362     // we never see it.
363     return;  // COV_NF_LINE
364   }
365   TrueFalseGuard recursion_guard(&in_wait_for_events_and_execute_callbacks_);
366   if (alarm_map_.empty()) {
367     // no alarms, this is business as usual.
368     WaitForEventsAndCallHandleEvents(timeout_in_us_,
369                                      events_,
370                                      events_size_);
371     recorded_now_in_us_ = 0;
372     return;
373   }
374 
375   // store the 'now'. If we recomputed 'now' every iteration
376   // down below, then we might never exit that loop-- any
377   // long-running alarms might install other long-running
378   // alarms, etc. By storing it here now, we ensure that
379   // a more reasonable amount of work is done here.
380   int64 now_in_us  = NowInUsec();
381 
382   // Get the first timeout from the alarm_map where it is
383   // stored in absolute time.
384   int64 next_alarm_time_in_us =  alarm_map_.begin()->first;
385   VLOG(4) << "next_alarm_time = " << next_alarm_time_in_us
386           << " now             = " << now_in_us
387           << " timeout_in_us = " << timeout_in_us_;
388 
389   int64 wait_time_in_us;
390   int64 alarm_timeout_in_us = next_alarm_time_in_us - now_in_us;
391 
392   // If the next alarm is sooner than the default timeout, or if there is no
393   // timeout (timeout_in_us_ == -1), wake up when the alarm should fire.
394   // Otherwise use the default timeout.
395   if (alarm_timeout_in_us < timeout_in_us_ || timeout_in_us_ < 0) {
396     wait_time_in_us = std::max(alarm_timeout_in_us, static_cast<int64>(0));
397   } else {
398     wait_time_in_us = timeout_in_us_;
399   }
400 
401   VLOG(4) << "wait_time_in_us = " << wait_time_in_us;
402 
403   // wait for events.
404 
405   WaitForEventsAndCallHandleEvents(wait_time_in_us,
406                                    events_,
407                                    events_size_);
408   CallAndReregisterAlarmEvents();
409   recorded_now_in_us_ = 0;
410 }
411 
SetFDReady(int fd,int events_to_fake)412 void EpollServer::SetFDReady(int fd, int events_to_fake) {
413   FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
414   if (cb_map_.end() != fd_i && fd_i->cb != NULL) {
415     // This const_cast is necessary for LIST_HEAD_INSERT to work. Declaring
416     // entry mutable is insufficient because LIST_HEAD_INSERT assigns the
417     // forward pointer of the list head to the current cb_and_mask, and the
418     // compiler complains that it can't assign a const T* to a T*.
419     CBAndEventMask* cb_and_mask = const_cast<CBAndEventMask*>(&*fd_i);
420     // Note that there is no clearly correct behavior here when
421     // cb_and_mask->events_to_fake != 0 and this function is called.
422     // Of the two operations:
423     //      cb_and_mask->events_to_fake = events_to_fake
424     //      cb_and_mask->events_to_fake |= events_to_fake
425     // the first was picked because it discourages users from calling
426     // SetFDReady repeatedly to build up the correct event set as it is more
427     // efficient to call SetFDReady once with the correct, final mask.
428     cb_and_mask->events_to_fake = events_to_fake;
429     AddToReadyList(cb_and_mask);
430   }
431 }
432 
SetFDNotReady(int fd)433 void EpollServer::SetFDNotReady(int fd) {
434   FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
435   if (cb_map_.end() != fd_i) {
436     RemoveFromReadyList(*fd_i);
437   }
438 }
439 
IsFDReady(int fd) const440 bool EpollServer::IsFDReady(int fd) const {
441   FDToCBMap::const_iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
442   return (cb_map_.end() != fd_i &&
443           fd_i->cb != NULL &&
444           fd_i->entry.le_prev != NULL);
445 }
446 
VerifyReadyList() const447 void EpollServer::VerifyReadyList() const {
448   int count = 0;
449   CBAndEventMask* cur = ready_list_.lh_first;
450   for (; cur; cur = cur->entry.le_next) {
451     ++count;
452   }
453   for (cur = tmp_list_.lh_first; cur; cur = cur->entry.le_next) {
454     ++count;
455   }
456   CHECK_EQ(ready_list_size_, count) << "Ready list size does not match count";
457 }
458 
RegisterAlarm(int64 timeout_time_in_us,AlarmCB * ac)459 void EpollServer::RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac) {
460   CHECK(ac);
461   if (ContainsAlarm(ac)) {
462     LOG(FATAL) << "Alarm already exists " << ac;
463   }
464   VLOG(4) << "RegisteringAlarm at : " << timeout_time_in_us;
465 
466   TimeToAlarmCBMap::iterator alarm_iter =
467       alarm_map_.insert(std::make_pair(timeout_time_in_us, ac));
468 
469   all_alarms_.insert(ac);
470   // Pass the iterator to the EpollAlarmCallbackInterface.
471   ac->OnRegistration(alarm_iter, this);
472 }
473 
474 // Unregister a specific alarm callback: iterator_token must be a
475 //  valid iterator. The caller must ensure the validity of the iterator.
UnregisterAlarm(const AlarmRegToken & iterator_token)476 void EpollServer::UnregisterAlarm(const AlarmRegToken& iterator_token) {
477   AlarmCB* cb = iterator_token->second;
478   alarm_map_.erase(iterator_token);
479   all_alarms_.erase(cb);
480   cb->OnUnregistration();
481 }
482 
NumFDsRegistered() const483 int EpollServer::NumFDsRegistered() const {
484   DCHECK(cb_map_.size() >= 1);
485   // Omit the internal FD (read_fd_)
486   return cb_map_.size() - 1;
487 }
488 
Wake()489 void EpollServer::Wake() {
490   char data = 'd';  // 'd' is for data.  It's good enough for me.
491   int rv = write(write_fd_, &data, 1);
492   DCHECK(rv == 1);
493 }
494 
NowInUsec() const495 int64 EpollServer::NowInUsec() const {
496   return base::Time::Now().ToInternalValue();
497 }
498 
ApproximateNowInUsec() const499 int64 EpollServer::ApproximateNowInUsec() const {
500   if (recorded_now_in_us_ != 0) {
501     return recorded_now_in_us_;
502   }
503   return this->NowInUsec();
504 }
505 
EventMaskToString(int event_mask)506 std::string EpollServer::EventMaskToString(int event_mask) {
507   std::string s;
508   if (event_mask & EPOLLIN) s += "EPOLLIN ";
509   if (event_mask & EPOLLPRI) s += "EPOLLPRI ";
510   if (event_mask & EPOLLOUT) s += "EPOLLOUT ";
511   if (event_mask & EPOLLRDNORM) s += "EPOLLRDNORM ";
512   if (event_mask & EPOLLRDBAND) s += "EPOLLRDBAND ";
513   if (event_mask & EPOLLWRNORM) s += "EPOLLWRNORM ";
514   if (event_mask & EPOLLWRBAND) s += "EPOLLWRBAND ";
515   if (event_mask & EPOLLMSG) s += "EPOLLMSG ";
516   if (event_mask & EPOLLERR) s += "EPOLLERR ";
517   if (event_mask & EPOLLHUP) s += "EPOLLHUP ";
518   if (event_mask & EPOLLONESHOT) s += "EPOLLONESHOT ";
519   if (event_mask & EPOLLET) s += "EPOLLET ";
520   return s;
521 }
522 
LogStateOnCrash()523 void EpollServer::LogStateOnCrash() {
524   LOG(ERROR) << "----------------------Epoll Server---------------------------";
525   LOG(ERROR) << "Epoll server " << this << " polling on fd " << epoll_fd_;
526   LOG(ERROR) << "timeout_in_us_: " << timeout_in_us_;
527 
528   // Log sessions with alarms.
529   LOG(ERROR) << alarm_map_.size() << " alarms registered.";
530   for (TimeToAlarmCBMap::iterator it = alarm_map_.begin();
531        it != alarm_map_.end();
532        ++it) {
533     const bool skipped =
534         alarms_reregistered_and_should_be_skipped_.find(it->second)
535         != alarms_reregistered_and_should_be_skipped_.end();
536     LOG(ERROR) << "Alarm " << it->second << " registered at time " << it->first
537                << " and should be skipped = " << skipped;
538   }
539 
540   LOG(ERROR) << cb_map_.size() << " fd callbacks registered.";
541   for (FDToCBMap::iterator it = cb_map_.begin();
542        it != cb_map_.end();
543        ++it) {
544     LOG(ERROR) << "fd: " << it->fd << " with mask " << it->event_mask
545                << " registered with cb: " << it->cb;
546   }
547   LOG(ERROR) << "----------------------/Epoll Server--------------------------";
548 }
549 
550 
551 
552 ////////////////////////////////////////////////////////////////////////////////
553 ////////////////////////////////////////////////////////////////////////////////
554 
DelFD(int fd) const555 void EpollServer::DelFD(int fd) const {
556   struct epoll_event ee;
557   memset(&ee, 0, sizeof(ee));
558 #ifdef EPOLL_SERVER_EVENT_TRACING
559   event_recorder_.RecordFDMaskEvent(fd, 0, "DelFD");
560 #endif
561   if (epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, fd, &ee)) {
562     int saved_errno = errno;
563     char buf[kErrorBufferSize];
564     LOG(FATAL) << "Epoll set removal error for fd " << fd << ": "
565                << strerror_r(saved_errno, buf, sizeof(buf));
566   }
567 }
568 
569 ////////////////////////////////////////
570 
AddFD(int fd,int event_mask) const571 void EpollServer::AddFD(int fd, int event_mask) const {
572   struct epoll_event ee;
573   memset(&ee, 0, sizeof(ee));
574   ee.events = event_mask | EPOLLERR | EPOLLHUP;
575   ee.data.fd = fd;
576 #ifdef EPOLL_SERVER_EVENT_TRACING
577   event_recorder_.RecordFDMaskEvent(fd, ee.events, "AddFD");
578 #endif
579   if (epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, fd, &ee)) {
580     int saved_errno = errno;
581     char buf[kErrorBufferSize];
582     LOG(FATAL) << "Epoll set insertion error for fd " << fd << ": "
583                << strerror_r(saved_errno, buf, sizeof(buf));
584   }
585 }
586 
587 ////////////////////////////////////////
588 
ModFD(int fd,int event_mask) const589 void EpollServer::ModFD(int fd, int event_mask) const {
590   struct epoll_event ee;
591   memset(&ee, 0, sizeof(ee));
592   ee.events = event_mask | EPOLLERR | EPOLLHUP;
593   ee.data.fd = fd;
594 #ifdef EPOLL_SERVER_EVENT_TRACING
595   event_recorder_.RecordFDMaskEvent(fd, ee.events, "ModFD");
596 #endif
597   VLOG(3) <<  "modifying fd= " << fd << " "
598           << EventMaskToString(ee.events);
599   if (epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, fd, &ee)) {
600     int saved_errno = errno;
601     char buf[kErrorBufferSize];
602     LOG(FATAL) << "Epoll set modification error for fd " << fd << ": "
603                << strerror_r(saved_errno, buf, sizeof(buf));
604   }
605 }
606 
607 ////////////////////////////////////////
608 
ModifyFD(int fd,int remove_event,int add_event)609 void EpollServer::ModifyFD(int fd, int remove_event, int add_event) {
610   FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
611   if (cb_map_.end() == fd_i) {
612     VLOG(2) << "Didn't find the fd " << fd << "in internal structures";
613     return;
614   }
615 
616   if (fd_i->cb != NULL) {
617     int & event_mask = fd_i->event_mask;
618     VLOG(3) << "fd= " << fd
619             << " event_mask before: " << EventMaskToString(event_mask);
620     event_mask &= ~remove_event;
621     event_mask |= add_event;
622 
623     VLOG(3) << " event_mask after: " << EventMaskToString(event_mask);
624 
625     ModFD(fd, event_mask);
626 
627     fd_i->cb->OnModification(fd, event_mask);
628   }
629 }
630 
WaitForEventsAndCallHandleEvents(int64 timeout_in_us,struct epoll_event events[],int events_size)631 void EpollServer::WaitForEventsAndCallHandleEvents(int64 timeout_in_us,
632                                                    struct epoll_event events[],
633                                                    int events_size) {
634   if (timeout_in_us == 0 || ready_list_.lh_first != NULL) {
635     // If ready list is not empty, then don't sleep at all.
636     timeout_in_us = 0;
637   } else if (timeout_in_us < 0) {
638     LOG(INFO) << "Negative epoll timeout: " << timeout_in_us
639               << "us; epoll will wait forever for events.";
640     // If timeout_in_us is < 0 we are supposed to Wait forever.  This means we
641     // should set timeout_in_us to -1000 so we will
642     // Wait(-1000/1000) == Wait(-1) == Wait forever.
643     timeout_in_us = -1000;
644   } else {
645     // If timeout is specified, and the ready list is empty.
646     if (timeout_in_us < 1000) {
647       timeout_in_us = 1000;
648     }
649   }
650   const int timeout_in_ms = timeout_in_us / 1000;
651   int nfds = epoll_wait_impl(epoll_fd_,
652                              events,
653                              events_size,
654                              timeout_in_ms);
655   VLOG(3) << "nfds=" << nfds;
656 
657 #ifdef EPOLL_SERVER_EVENT_TRACING
658   event_recorder_.RecordEpollWaitEvent(timeout_in_ms, nfds);
659 #endif
660 
661   // If you're wondering why the NowInUsec() is recorded here, the answer is
662   // simple: If we did it before the epoll_wait_impl, then the max error for
663   // the ApproximateNowInUs() call would be as large as the maximum length of
664   // epoll_wait, which can be arbitrarily long. Since this would make
665   // ApproximateNowInUs() worthless, we instead record the time -after- we've
666   // done epoll_wait, which guarantees that the maximum error is the amount of
667   // time it takes to process all the events generated by epoll_wait.
668   recorded_now_in_us_ = NowInUsec();
669   if (nfds > 0) {
670     for (int i = 0; i < nfds; ++i) {
671       int event_mask = events[i].events;
672       int fd = events[i].data.fd;
673       HandleEvent(fd, event_mask);
674     }
675   } else if (nfds < 0) {
676     // Catch interrupted syscall and just ignore it and move on.
677     if (errno != EINTR && errno != 0) {
678       int saved_errno = errno;
679       char buf[kErrorBufferSize];
680       LOG(FATAL) << "Error " << saved_errno << " in epoll_wait: "
681                  << strerror_r(saved_errno, buf, sizeof(buf));
682     }
683   }
684 
685   // Now run through the ready list.
686   if (ready_list_.lh_first) {
687     CallReadyListCallbacks();
688   }
689 }
690 
CallReadyListCallbacks()691 void EpollServer::CallReadyListCallbacks() {
692   // Check pre-conditions.
693   DCHECK(tmp_list_.lh_first == NULL);
694   // Swap out the ready_list_ into the tmp_list_ before traversing the list to
695   // enable SetFDReady() to just push new items into the ready_list_.
696   std::swap(ready_list_.lh_first, tmp_list_.lh_first);
697   if (tmp_list_.lh_first) {
698     tmp_list_.lh_first->entry.le_prev = &tmp_list_.lh_first;
699     EpollEvent event(0, false);
700     while (tmp_list_.lh_first != NULL) {
701       DCHECK_GT(ready_list_size_, 0);
702       CBAndEventMask* cb_and_mask = tmp_list_.lh_first;
703       RemoveFromReadyList(*cb_and_mask);
704 
705       event.out_ready_mask = 0;
706       event.in_events =
707         cb_and_mask->events_asserted | cb_and_mask->events_to_fake;
708       // TODO(fenix): get rid of the two separate fields in cb_and_mask.
709       cb_and_mask->events_asserted = 0;
710       cb_and_mask->events_to_fake = 0;
711       {
712         // OnEvent() may call UnRegister, so we set in_use, here. Any
713         // UnRegister call will now simply set the cb to NULL instead of
714         // invalidating the cb_and_mask object (by deleting the object in the
715         // map to which cb_and_mask refers)
716         TrueFalseGuard in_use_guard(&(cb_and_mask->in_use));
717         cb_and_mask->cb->OnEvent(cb_and_mask->fd, &event);
718       }
719 
720       // Since OnEvent may have called UnregisterFD, we must check here that
721       // the callback is still valid. If it isn't, then UnregisterFD *was*
722       // called, and we should now get rid of the object.
723       if (cb_and_mask->cb == NULL) {
724         cb_map_.erase(*cb_and_mask);
725       } else if (event.out_ready_mask != 0) {
726         cb_and_mask->events_to_fake = event.out_ready_mask;
727         AddToReadyList(cb_and_mask);
728       }
729     }
730   }
731   DCHECK(tmp_list_.lh_first == NULL);
732 }
733 
734 const int EpollServer::kMinimumEffectiveAlarmQuantum = 1000;
735 
736 // Alarms may be up to kMinimumEffectiveAlarmQuantum -1 us late.
DoRoundingOnNow(int64 now_in_us) const737 inline int64 EpollServer::DoRoundingOnNow(int64 now_in_us) const {
738   now_in_us /= kMinimumEffectiveAlarmQuantum;
739   now_in_us *= kMinimumEffectiveAlarmQuantum;
740   now_in_us += (2 * kMinimumEffectiveAlarmQuantum - 1);
741   return now_in_us;
742 }
743 
CallAndReregisterAlarmEvents()744 void EpollServer::CallAndReregisterAlarmEvents() {
745   int64 now_in_us = recorded_now_in_us_;
746   DCHECK_NE(0, recorded_now_in_us_);
747   now_in_us = DoRoundingOnNow(now_in_us);
748 
749   TimeToAlarmCBMap::iterator erase_it;
750 
751   // execute alarms.
752   for (TimeToAlarmCBMap::iterator i = alarm_map_.begin();
753        i != alarm_map_.end();
754       ) {
755     if (i->first > now_in_us) {
756       break;
757     }
758     AlarmCB* cb = i->second;
759     // Execute the OnAlarm() only if we did not register
760     // it in this loop itself.
761     const bool added_in_this_round =
762         alarms_reregistered_and_should_be_skipped_.find(cb)
763         != alarms_reregistered_and_should_be_skipped_.end();
764     if (added_in_this_round) {
765       ++i;
766       continue;
767     }
768     all_alarms_.erase(cb);
769     const int64 new_timeout_time_in_us = cb->OnAlarm();
770 
771     erase_it = i;
772     ++i;
773     alarm_map_.erase(erase_it);
774 
775     if (new_timeout_time_in_us > 0) {
776       // We add to hash_set only if the new timeout is <= now_in_us.
777       // if timeout is > now_in_us then we have no fear that this alarm
778       // can be reexecuted in this loop, and hence we do not need to
779       // worry about a recursive loop.
780       DVLOG(3) << "Reregistering alarm "
781                << " " << cb
782                << " " << new_timeout_time_in_us
783                << " " << now_in_us;
784       if (new_timeout_time_in_us <= now_in_us) {
785         alarms_reregistered_and_should_be_skipped_.insert(cb);
786       }
787       RegisterAlarm(new_timeout_time_in_us, cb);
788     }
789   }
790   alarms_reregistered_and_should_be_skipped_.clear();
791 }
792 
EpollAlarm()793 EpollAlarm::EpollAlarm() : eps_(NULL), registered_(false) {
794 }
795 
~EpollAlarm()796 EpollAlarm::~EpollAlarm() {
797   UnregisterIfRegistered();
798 }
799 
OnAlarm()800 int64 EpollAlarm::OnAlarm() {
801   registered_ = false;
802   return 0;
803 }
804 
OnRegistration(const EpollServer::AlarmRegToken & token,EpollServer * eps)805 void EpollAlarm::OnRegistration(const EpollServer::AlarmRegToken& token,
806                                 EpollServer* eps) {
807   DCHECK_EQ(false, registered_);
808 
809   token_ = token;
810   eps_ = eps;
811   registered_ = true;
812 }
813 
OnUnregistration()814 void EpollAlarm::OnUnregistration() {
815   registered_ = false;
816 }
817 
OnShutdown(EpollServer * eps)818 void EpollAlarm::OnShutdown(EpollServer* eps) {
819   registered_ = false;
820   eps_ = NULL;
821 }
822 
823 // If the alarm was registered, unregister it.
UnregisterIfRegistered()824 void EpollAlarm::UnregisterIfRegistered() {
825   if (!registered_) {
826     return;
827   }
828   eps_->UnregisterAlarm(token_);
829 }
830 
831 }  // namespace net
832 
833