• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
6 #define NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
7 #pragma once
8 
9 #include <fcntl.h>
10 #include <sys/queue.h>
11 #include <ext/hash_map>  // it is annoying that gcc does this. oh well.
12 #include <ext/hash_set>
13 #include <map>
14 #include <string>
15 #include <utility>
16 #include <set>
17 #include <vector>
18 
19 // #define EPOLL_SERVER_EVENT_TRACING 1
20 //
21 // Defining EPOLL_SERVER_EVENT_TRACING
22 // causes code to exist which didn't before.
23 // This code tracks each event generated by the epollserver,
24 // as well as providing a per-fd-registered summary of
25 // events. Note that enabling this code vastly slows
26 // down operations, and uses substantially more
27 // memory. For these reasons, it should only be enabled when doing
28 // developer debugging at his/her workstation.
29 //
30 // A structure called 'EventRecorder' will exist when
31 // the macro is defined. See the EventRecorder class interface
32 // within the EpollServer class for more details.
33 #ifdef EPOLL_SERVER_EVENT_TRACING
34 #include <iostream>
35 #include "base/logging.h"
36 #endif
37 
38 #include "base/basictypes.h"
39 #include "base/memory/scoped_ptr.h"
40 #include <sys/epoll.h>
41 
42 namespace net {
43 
44 class EpollServer;
45 class EpollAlarmCallbackInterface;
46 class ReadPipeCallback;
47 
48 struct EpollEvent {
EpollEventEpollEvent49   EpollEvent(int events, bool is_epoll_wait)
50       : in_events(events),
51         out_ready_mask(0) {
52   }
53 
54   int in_events;            // incoming events
55   int out_ready_mask;       // the new event mask for ready list (0 means don't
56                             // get on the ready list). This field is always
57                             // initialized to 0 when the event is passed to
58                             // OnEvent.
59 };
60 
61 // Callbacks which go into EpollServers are expected to derive from this class.
62 class EpollCallbackInterface {
63  public:
64   // Summary:
65   //   Called when the callback is registered into a EpollServer.
66   // Args:
67   //   eps - the poll server into which this callback was registered
68   //   fd - the file descriptor which was registered
69   //   event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
70   //                which was registered (and will initially be used
71   //                in the epoll() calls)
72   virtual void OnRegistration(EpollServer* eps, int fd, int event_mask) = 0;
73 
74   // Summary:
75   //   Called when the event_mask is modified (for a file-descriptor)
76   // Args:
77   //   fd - the file descriptor which was registered
78   //   event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
79   //                which was is now curren (and will be used
80   //                in subsequent epoll() calls)
81   virtual void OnModification(int fd, int event_mask) = 0;
82 
83   // Summary:
84   //   Called whenever an event occurs on the file-descriptor.
85   //   This is where the bulk of processing is expected to occur.
86   // Args:
87   //   fd - the file descriptor which was registered
88   //   event - a struct that contains the event mask (composed of EPOLLIN,
89   //           EPOLLOUT, etc), a flag that indicates whether this is a true
90   //           epoll_wait event vs one from the ready list, and an output
91   //           parameter for OnEvent to inform the EpollServer whether to put
92   //           this fd on the ready list.
93   virtual void OnEvent(int fd, EpollEvent* event) = 0;
94 
95   // Summary:
96   //   Called when the file-descriptor is unregistered from the poll-server.
97   // Args:
98   //   fd - the file descriptor which was registered, and of this call, is now
99   //        unregistered.
100   //   replaced - If true, this callback is being replaced by another, otherwise
101   //              it is simply being removed.
102   virtual void OnUnregistration(int fd, bool replaced) = 0;
103 
104   // Summary:
105   //   Called when the epoll server is shutting down.  This is different from
106   //   OnUnregistration because the subclass may want to clean up memory.
107   //   This is called in leiu of OnUnregistration.
108   // Args:
109   //  fd - the file descriptor which was registered.
110   virtual void OnShutdown(EpollServer* eps, int fd) = 0;
111 
~EpollCallbackInterface()112   virtual ~EpollCallbackInterface() {}
113 
114  protected:
EpollCallbackInterface()115   EpollCallbackInterface() {}
116 };
117 
118 ////////////////////////////////////////////////////////////////////////////////
119 ////////////////////////////////////////////////////////////////////////////////
120 
121 class EpollServer {
122  public:
123   typedef EpollAlarmCallbackInterface AlarmCB;
124   typedef EpollCallbackInterface CB;
125 
126   typedef std::multimap<int64, AlarmCB*> TimeToAlarmCBMap;
127   typedef TimeToAlarmCBMap::iterator AlarmRegToken;
128 
129   // Summary:
130   //   Constructor:
131   //    By default, we don't wait any amount of time for events, and
132   //    we suggest to the epoll-system that we're going to use on-the-order
133   //    of 1024 FDs.
134   EpollServer();
135 
136   ////////////////////////////////////////
137 
138   // Destructor
139   virtual ~EpollServer();
140 
141   ////////////////////////////////////////
142 
143   // Summary
144   //   Register a callback to be called whenever an event contained
145   //   in the set of events included in event_mask occurs on the
146   //   file-descriptor 'fd'
147   //
148   //   Note that only one callback is allowed to be registered for
149   //   any specific file-decriptor.
150   //
151   //   If a callback is registered for a file-descriptor which has already
152   //   been registered, then the previous callback is unregistered with
153   //   the 'replaced' flag set to true. I.e. the previous callback's
154   //   OnUnregistration() function is called like so:
155   //      OnUnregistration(fd, true);
156   //
157   //  The epoll server does NOT take on ownership of the callback: the callback
158   //  creator is responsible for managing that memory.
159   //
160   // Args:
161   //   fd - a valid file-descriptor
162   //   cb - an instance of a subclass of EpollCallbackInterface
163   //   event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating
164   //                the events for which the callback would like to be
165   //                called.
166   virtual void RegisterFD(int fd, CB* cb, int event_mask);
167 
168   ////////////////////////////////////////
169 
170   // Summary:
171   //   A shortcut for RegisterFD which sets things up such that the
172   //   callback is called when 'fd' is available for writing.
173   // Args:
174   //   fd - a valid file-descriptor
175   //   cb - an instance of a subclass of EpollCallbackInterface
176   virtual void RegisterFDForWrite(int fd, CB* cb);
177 
178   ////////////////////////////////////////
179 
180   // Summary:
181   //   A shortcut for RegisterFD which sets things up such that the
182   //   callback is called when 'fd' is available for reading or writing.
183   // Args:
184   //   fd - a valid file-descriptor
185   //   cb - an instance of a subclass of EpollCallbackInterface
186   virtual void RegisterFDForReadWrite(int fd, CB* cb);
187 
188   ////////////////////////////////////////
189 
190   // Summary:
191   //   A shortcut for RegisterFD which sets things up such that the
192   //   callback is called when 'fd' is available for reading.
193   // Args:
194   //   fd - a valid file-descriptor
195   //   cb - an instance of a subclass of EpollCallbackInterface
196   virtual void RegisterFDForRead(int fd, CB* cb);
197 
198   ////////////////////////////////////////
199 
200   // Summary:
201   //   Removes the FD and the associated callback from the pollserver.
202   //   If the callback is registered with other FDs, they will continue
203   //   to be processed using the callback without modification.
204   //   If the file-descriptor specified is not registered in the
205   //   epoll_server, then nothing happens as a result of this call.
206   // Args:
207   //   fd - the file-descriptor which should no-longer be monitored.
208   virtual void UnregisterFD(int fd);
209 
210   ////////////////////////////////////////
211 
212   // Summary:
213   //   Modifies the event mask for the file-descriptor, replacing
214   //   the old event_mask with the new one specified here.
215   //   If the file-descriptor specified is not registered in the
216   //   epoll_server, then nothing happens as a result of this call.
217   // Args:
218   //   fd - the fd whose event mask should be modified.
219   //   event_mask - the new event mask.
220   virtual void ModifyCallback(int fd, int event_mask);
221 
222   ////////////////////////////////////////
223 
224   // Summary:
225   //   Modifies the event mask for the file-descriptor such that we
226   //   no longer request events when 'fd' is readable.
227   //   If the file-descriptor specified is not registered in the
228   //   epoll_server, then nothing happens as a result of this call.
229   // Args:
230   //   fd - the fd whose event mask should be modified.
231   virtual void StopRead(int fd);
232 
233   ////////////////////////////////////////
234 
235   // Summary:
236   //   Modifies the event mask for the file-descriptor such that we
237   //   request events when 'fd' is readable.
238   //   If the file-descriptor specified is not registered in the
239   //   epoll_server, then nothing happens as a result of this call.
240   // Args:
241   //   fd - the fd whose event mask should be modified.
242   virtual void StartRead(int fd);
243 
244   ////////////////////////////////////////
245 
246   // Summary:
247   //   Modifies the event mask for the file-descriptor such that we
248   //   no longer request events when 'fd' is writable.
249   //   If the file-descriptor specified is not registered in the
250   //   epoll_server, then nothing happens as a result of this call.
251   // Args:
252   //   fd - the fd whose event mask should be modified.
253   virtual void StopWrite(int fd);
254 
255   ////////////////////////////////////////
256 
257   // Summary:
258   //   Modifies the event mask for the file-descriptor such that we
259   //   request events when 'fd' is writable.
260   //   If the file-descriptor specified is not registered in the
261   //   epoll_server, then nothing happens as a result of this call.
262   // Args:
263   //   fd - the fd whose event mask should be modified.
264   virtual void StartWrite(int fd);
265 
266   ////////////////////////////////////////
267 
268   // Summary:
269   //   Looks up the callback associated with the file-desriptor 'fd'.
270   //   If a callback is associated with this file-descriptor, then
271   //   it's OnEvent() method is called with the file-descriptor 'fd',
272   //   and event_mask 'event_mask'
273   //
274   //   If no callback is registered for this file-descriptor, nothing
275   //   will happen as a result of this call.
276   //
277   //   This function is used internally by the EpollServer, but is
278   //   available publically so that events might be 'faked'. Calling
279   //   this function with an fd and event_mask is equivalent (as far
280   //   as the callback is concerned) to having a real event generated
281   //   by epoll (except, of course, that read(), etc won't necessarily
282   //   be able to read anything)
283   // Args:
284   //   fd - the file-descriptor on which an event has occured.
285   //   event_mask - a bitmask representing the events which have occured
286   //                on/for this fd. This bitmask is composed of
287   //                POLLIN, POLLOUT, etc.
288   //
289   void HandleEvent(int fd, int event_mask);
290 
291   // Summary:
292   //   Call this when you want the pollserver to
293   //   wait for events and execute the callbacks associated with
294   //   the file-descriptors on which those events have occured.
295   //   Depending on the value of timeout_in_us_, this may or may
296   //   not return immediately. Please reference the set_timeout()
297   //   function for the specific behaviour.
298   virtual void WaitForEventsAndExecuteCallbacks();
299 
300   // Summary:
301   //   When an fd is registered to use edge trigger notification, the ready
302   //   list can be used to simulate level trigger semantics. Edge trigger
303   //   registration doesn't send an initial event, and only rising edge (going
304   //   from blocked to unblocked) events are sent. A callback can put itself on
305   //   the ready list by calling SetFDReady() after calling RegisterFD(). The
306   //   OnEvent method of all callbacks associated with the fds on the ready
307   //   list will be called immediately after processing the events returned by
308   //   epoll_wait(). The fd is removed from the ready list before the
309   //   callback's OnEvent() method is invoked. To stay on the ready list, the
310   //   OnEvent() (or some function in that call chain) must call SetFDReady
311   //   again. When a fd is unregistered using UnregisterFD(), the fd is
312   //   automatically removed from the ready list.
313   //
314   //   When the callback for a edge triggered fd hits the falling edge (about
315   //   to block, either because of it got an EAGAIN, or had a short read/write
316   //   operation), it should remove itself from the ready list using
317   //   SetFDNotReady() (since OnEvent cannot distinguish between invocation
318   //   from the ready list vs from a normal epoll event). All four ready list
319   //   methods are safe to be called  within the context of the callbacks.
320   //
321   //   Since the ready list invokes EpollCallbackInterface::OnEvent, only fds
322   //   that are registered with the EpollServer will be put on the ready list.
323   //   SetFDReady() and SetFDNotReady() will do nothing if the EpollServer
324   //   doesn't know about the fd passed in.
325   //
326   //   Since the ready list cannot reliably determine proper set of events
327   //   which should be sent to the callback, SetFDReady() requests the caller
328   //   to provide the ready list with the event mask, which will be used later
329   //   when OnEvent() is invoked by the ready list. Hence, the event_mask
330   //   passedto SetFDReady() does not affect the actual epoll registration of
331   //   the fd with the kernel. If a fd is already put on the ready list, and
332   //   SetFDReady() is called again for that fd with a different event_mask,
333   //   the event_mask will be updated.
334   virtual void SetFDReady(int fd, int events_to_fake);
335 
336   virtual void SetFDNotReady(int fd);
337 
338   // Summary:
339   //   IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as
340   //   debugging tools and for writing unit tests.
341   //   ISFDReady() returns whether a fd is in the ready list.
342   //   ReadyListSize() returns the number of fds on the ready list.
343   //   VerifyReadyList() checks the consistency of internal data structure. It
344   //   will CHECK if it finds an error.
345   virtual bool IsFDReady(int fd) const;
346 
ReadyListSize()347   size_t ReadyListSize() const { return ready_list_size_; }
348 
349   void VerifyReadyList() const;
350 
351   ////////////////////////////////////////
352 
353   // Summary:
354   //   Registers an alarm 'ac' to go off at time 'timeout_time_in_us'.
355   //   If the callback returns a positive number from its OnAlarm() function,
356   //   then the callback will be re-registered at that time, else the alarm
357   //   owner is responsible for freeing up memory.
358   //
359   //   Important: A give AlarmCB* can not be registered again if it is already
360   //    registered. If a user wants to register a callback again it should first
361   //    unregister the previous callback before calling RegisterAlarm again.
362   // Args:
363   //   timeout_time_in_us - the absolute time at which the alarm should go off
364   //   ac - the alarm which will be called.
365   virtual void RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac);
366 
367   // Summary:
368   //   Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() +
369   //   delta_in_us). While this is somewhat less accurate (see the description
370   //   for ApproximateNowInUs() to see how 'approximate'), the error is never
371   //   worse than the amount of time it takes to process all events in one
372   //   WaitForEvents.  As with 'RegisterAlarm()', if the callback returns a
373   //   positive number from its OnAlarm() function, then the callback will be
374   //   re-registered at that time, else the alarm owner is responsible for
375   //   freeing up memory.
376   //   Note that this function is purely a convienence. The
377   //   same thing may be accomplished by using RegisterAlarm with
378   //   ApproximateNowInUs() directly.
379   //
380   //   Important: A give AlarmCB* can not be registered again if it is already
381   //    registered. If a user wants to register a callback again it should first
382   //    unregister the previous callback before calling RegisterAlarm again.
383   // Args:
384   //   delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at
385   //                 which point the alarm should go off.
386   //   ac - the alarm which will be called.
RegisterAlarmApproximateDelta(int64 delta_in_us,AlarmCB * ac)387   void RegisterAlarmApproximateDelta(int64 delta_in_us, AlarmCB* ac) {
388     RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac);
389   }
390 
391   ////////////////////////////////////////
392 
393   // Summary:
394   //   Unregister  the alarm referred to by iterator_token; Callers should
395   //   be warned that a token may have become already invalid when OnAlarm()
396   //   is called, was unregistered, or OnShutdown was called on that alarm.
397   // Args:
398   //    iterator_token - iterator to the alarm callback to unregister.
399   virtual void UnregisterAlarm(
400       const EpollServer::AlarmRegToken& iterator_token);
401 
402   ////////////////////////////////////////
403 
404   // Summary:
405   //   returns the number of file-descriptors registered in this EpollServer.
406   // Returns:
407   //   number of FDs registered (discounting the internal pipe used for Wake)
408   virtual int NumFDsRegistered() const;
409 
410   // Summary:
411   //   Force the epoll server to wake up (by writing to an internal pipe).
412   virtual void Wake();
413 
414   // Summary:
415   //   Wrapper around WallTimer's NowInUsec.  We do this so that we can test
416   //   EpollServer without using the system clock (and can avoid the flakiness
417   //   that would ensue)
418   // Returns:
419   //   the current time as number of microseconds since the Unix epoch.
420   virtual int64 NowInUsec() const;
421 
422   // Summary:
423   //   Since calling NowInUsec() many thousands of times per
424   //   WaitForEventsAndExecuteCallbacks function call is, to say the least,
425   //   inefficient, we allow users to use an approximate time instead. The
426   //   time returned from this function is as accurate as NowInUsec() when
427   //   WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's
428   //   callstack.
429   //   However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then
430   //   this function returns the time at which the
431   //   WaitForEventsAndExecuteCallbacks function started to process events or
432   //   alarms.
433   //
434   //   Essentially, this function makes available a fast and mostly accurate
435   //   mechanism for getting the time for any function handling an event or
436   //   alarm. When functions which are not handling callbacks or alarms call
437   //   this function, they get the slow and "absolutely" accurate time.
438   //
439   //   Users should be encouraged to use this function.
440   // Returns:
441   //   the "approximate" current time as number of microseconds since the Unix
442   //   epoch.
443   virtual int64 ApproximateNowInUsec() const;
444 
445   static std::string EventMaskToString(int event_mask);
446 
447   // Summary:
448   //   Logs the state of the epoll server with LOG(ERROR).
449   void LogStateOnCrash();
450 
451   // Summary:
452   //   Set the timeout to the value specified.
453   //   If the timeout is set to a negative number,
454   //      WaitForEventsAndExecuteCallbacks() will only return when an event has
455   //      occured
456   //   If the timeout is set to zero,
457   //      WaitForEventsAndExecuteCallbacks() will return immediately
458   //   If the timeout is set to a positive number,
459   //      WaitForEventsAndExecuteCallbacks() will return when an event has
460   //      occured, or when timeout_in_us microseconds has elapsed, whichever
461   //      is first.
462   //  Args:
463   //    timeout_in_us - value specified depending on behaviour desired.
464   //                    See above.
set_timeout_in_us(int64 timeout_in_us)465   void set_timeout_in_us(int64 timeout_in_us) {
466     timeout_in_us_ = timeout_in_us;
467   }
468 
469   ////////////////////////////////////////
470 
471   // Summary:
472   //   Accessor for the current value of timeout_in_us.
timeout_in_us()473   int timeout_in_us() const { return timeout_in_us_; }
474 
475   // Summary:
476   // Returns true when the EpollServer() is being destroyed.
in_shutdown()477   bool in_shutdown() const { return in_shutdown_; }
478 
ContainsAlarm(EpollAlarmCallbackInterface * alarm)479   bool ContainsAlarm(EpollAlarmCallbackInterface* alarm) const {
480     return all_alarms_.find(alarm) != all_alarms_.end();
481   }
482 
483   // Summary:
484   //   A function for implementing the ready list. It invokes OnEvent for each
485   //   of the fd in the ready list, and takes care of adding them back to the
486   //   ready list if the callback requests it (by checking that out_ready_mask
487   //   is non-zero).
488   void CallReadyListCallbacks();
489 
490   // Granularity at which time moves when considering what alarms are on.
491   // See function: DoRoundingOnNow() on exact usage.
492   static const int kMinimumEffectiveAlarmQuantum;
493  protected:
494 
495   virtual int GetFlags(int fd);
SetFlags(int fd,int flags)496   inline int SetFlags(int fd, int flags) {
497     return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
498   }
499 
500   virtual void SetNonblocking(int fd);
501 
502   // This exists here so that we can override this function in unittests
503   // in order to make effective mock EpollServer objects.
504   virtual int epoll_wait_impl(int epfd,
505                               struct epoll_event* events,
506                               int max_events,
507                               int timeout_in_ms);
508 
509   // this struct is used internally, and is never used by anything external
510   // to this class. Some of its members are declared mutable to get around the
511   // restriction imposed by hash_set. Since hash_set knows nothing about the
512   // objects it stores, it has to assume that every bit of the object is used
513   // in the hash function and equal_to comparison. Thus hash_set::iterator is a
514   // const iterator. In this case, the only thing that must stay constant is
515   // fd. Everything else are just along for the ride and changing them doesn't
516   // compromise the hash_set integrity.
517   struct CBAndEventMask {
CBAndEventMaskCBAndEventMask518     CBAndEventMask()
519         : cb(NULL),
520           fd(-1),
521           event_mask(0),
522           events_asserted(0),
523           events_to_fake(0),
524           in_use(false) {
525       entry.le_next = NULL;
526       entry.le_prev = NULL;
527     }
528 
CBAndEventMaskCBAndEventMask529     CBAndEventMask(EpollCallbackInterface* cb,
530                    int event_mask,
531                    int fd)
532         : cb(cb), fd(fd), event_mask(event_mask), events_asserted(0),
533           events_to_fake(0), in_use(false) {
534       entry.le_next = NULL;
535       entry.le_prev = NULL;
536     }
537 
538     // Required operator for hash_set. Normally operator== should be a free
539     // standing function. However, since CBAndEventMask is a protected type and
540     // it will never be a base class, it makes no difference.
541     bool operator==(const CBAndEventMask& cb_and_mask) const {
542       return fd == cb_and_mask.fd;
543     }
544     // A callback. If the fd is unregistered inside the callchain of OnEvent,
545     // the cb will be set to NULL.
546     mutable EpollCallbackInterface* cb;
547 
548     mutable LIST_ENTRY(CBAndEventMask) entry;
549     // file descriptor registered with the epoll server.
550     int fd;
551     // the current event_mask registered for this callback.
552     mutable int event_mask;
553     // the event_mask that was returned by epoll
554     mutable int events_asserted;
555     // the event_mask for the ready list to use to call OnEvent.
556     mutable int events_to_fake;
557     // toggle around calls to OnEvent to tell UnregisterFD to not erase the
558     // iterator because HandleEvent is using it.
559     mutable bool in_use;
560   };
561 
562   // Custom hash function to be used by hash_set.
563   struct CBAndEventMaskHash {
operatorCBAndEventMaskHash564     size_t operator()(const CBAndEventMask& cb_and_eventmask) const {
565       return static_cast<size_t>(cb_and_eventmask.fd);
566     }
567   };
568 
569   typedef __gnu_cxx::hash_set<CBAndEventMask, CBAndEventMaskHash> FDToCBMap;
570 
571   // the following four functions are OS-specific, and are likely
572   // to be changed in a subclass if the poll/select method is changed
573   // from epoll.
574 
575   // Summary:
576   //   Deletes a file-descriptor from the set of FDs that should be
577   //   monitored with epoll.
578   //   Note that this only deals with modifying data relating -directly-
579   //   with the epoll call-- it does not modify any data within the
580   //   epoll_server.
581   // Args:
582   //   fd - the file descriptor to-be-removed from the monitoring set
583   virtual void DelFD(int fd) const;
584 
585   ////////////////////////////////////////
586 
587   // Summary:
588   //   Adds a file-descriptor to the set of FDs that should be
589   //   monitored with epoll.
590   //   Note that this only deals with modifying data relating -directly-
591   //   with the epoll call.
592   // Args:
593   //   fd - the file descriptor to-be-added to the monitoring set
594   //   event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
595   //                 OR'd together) which will be associated with this
596   //                 FD initially.
597   virtual void AddFD(int fd, int event_mask) const;
598 
599   ////////////////////////////////////////
600 
601   // Summary:
602   //   Modifies a file-descriptor in the set of FDs that should be
603   //   monitored with epoll.
604   //   Note that this only deals with modifying data relating -directly-
605   //   with the epoll call.
606   // Args:
607   //   fd - the file descriptor to-be-added to the monitoring set
608   //   event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
609   //                 OR'd together) which will be associated with this
610   //                 FD after this call.
611   virtual void ModFD(int fd, int event_mask) const;
612 
613   ////////////////////////////////////////
614 
615   // Summary:
616   //   Modified the event mask associated with an FD in the set of
617   //   data needed by epoll.
618   //   Events are removed before they are added, thus, if ~0 is put
619   //   in 'remove_event', whatever is put in 'add_event' will be
620   //   the new event mask.
621   //   If the file-descriptor specified is not registered in the
622   //   epoll_server, then nothing happens as a result of this call.
623   // Args:
624   //   fd - the file descriptor whose event mask is to be modified
625   //   remove_event - the events which are to be removed from the current
626   //                  event_mask
627   //   add_event - the events which are to be added to the current event_mask
628   //
629   //
630   virtual void ModifyFD(int fd, int remove_event, int add_event);
631 
632   ////////////////////////////////////////
633 
634   // Summary:
635   //   Waits for events, and calls HandleEvents() for each
636   //   fd, event pair discovered to possibly have an event.
637   //   Note that a callback (B) may get a spurious event if
638   //   another callback (A) has closed a file-descriptor N, and
639   //   the callback (B) has a newly opened file-descriptor, which
640   //   also happens to be N.
641   virtual void WaitForEventsAndCallHandleEvents(int64 timeout_in_us,
642                                                 struct epoll_event events[],
643                                                 int events_size);
644 
645 
646 
647   // Summary:
648   //   An internal function for implementing the ready list. It adds a fd's
649   //   CBAndEventMask to the ready list. If the fd is already on the ready
650   //   list, it is a no-op.
651   void AddToReadyList(CBAndEventMask* cb_and_mask);
652 
653   // Summary:
654   //   An internal function for implementing the ready list. It remove a fd's
655   //   CBAndEventMask from the ready list. If the fd is not on the ready list,
656   //   it is a no-op.
657   void RemoveFromReadyList(const CBAndEventMask& cb_and_mask);
658 
659   // Summary:
660   // Calls any pending alarms that should go off and reregisters them if they
661   // were recurring.
662   virtual void CallAndReregisterAlarmEvents();
663 
664   // The file-descriptor created for epolling
665   int epoll_fd_;
666 
667   // The mapping of file-descriptor to CBAndEventMasks
668   FDToCBMap cb_map_;
669 
670   // Custom hash function to be used by hash_set.
671   struct AlarmCBHash {
operatorAlarmCBHash672     size_t operator()(AlarmCB*const& p) const {
673       return reinterpret_cast<size_t>(p);
674     }
675   };
676 
677 
678   // TOOD(sushantj): Having this hash_set is avoidable. We currently have it
679   // only so that we can enforce stringent checks that a caller can not register
680   // the same alarm twice. One option is to have an implementation in which
681   // this hash_set is used only in the debug mode.
682   typedef __gnu_cxx::hash_set<AlarmCB*, AlarmCBHash> AlarmCBMap;
683   AlarmCBMap all_alarms_;
684 
685   TimeToAlarmCBMap alarm_map_;
686 
687   // The amount of time in microseconds that we'll wait before returning
688   // from the WaitForEventsAndExecuteCallbacks() function.
689   // If this is positive, wait that many microseconds.
690   // If this is negative, wait forever, or for the first event that occurs
691   // If this is zero, never wait for an event.
692   int64 timeout_in_us_;
693 
694   // This is nonzero only after the invocation of epoll_wait_impl within
695   // WaitForEventsAndCallHandleEvents and before the function
696   // WaitForEventsAndExecuteCallbacks returns.  At all other times, this is
697   // zero. This enables us to have relatively accurate time returned from the
698   // ApproximateNowInUs() function. See that function for more details.
699   int64 recorded_now_in_us_;
700 
701   // This is used to implement CallAndReregisterAlarmEvents. This stores
702   // all alarms that were reregistered because OnAlarm() returned a
703   // value > 0 and the time at which they should be executed is less that
704   // the current time.  By storing such alarms in this map we ensure
705   // that while calling CallAndReregisterAlarmEvents we do not call
706   // OnAlarm on any alarm in this set. This ensures that we do not
707   // go in an infinite loop.
708   AlarmCBMap alarms_reregistered_and_should_be_skipped_;
709 
710   LIST_HEAD(ReadyList, CBAndEventMask) ready_list_;
711   LIST_HEAD(TmpList, CBAndEventMask) tmp_list_;
712   int ready_list_size_;
713   // TODO(alyssar): make this into something that scales up.
714   static const int events_size_ = 256;
715   struct epoll_event events_[256];
716 
717   // These controls the granularity for alarms
718   // See function CallAndReregisterAlarmEvents()
719   // TODO(sushantj): Add test for this.
720   int64 DoRoundingOnNow(int64 now_in_us) const;
721 
722 #ifdef EPOLL_SERVER_EVENT_TRACING
723   struct EventRecorder {
724    public:
EventRecorderEventRecorder725     EventRecorder() : num_records_(0), record_threshold_(10000) {}
726 
~EventRecorderEventRecorder727     ~EventRecorder() {
728       Clear();
729     }
730 
731     // When a number of events equals the record threshold,
732     // the collected data summary for all FDs will be written
733     // to LOG(INFO). Note that this does not include the
734     // individual events (if you'reinterested in those, you'll
735     // have to get at them programmatically).
736     // After any such flushing to LOG(INFO) all events will
737     // be cleared.
738     // Note that the definition of an 'event' is a bit 'hazy',
739     // as it includes the 'Unregistration' event, and perhaps
740     // others.
set_record_thresholdEventRecorder741     void set_record_threshold(int64 new_threshold) {
742       record_threshold_ = new_threshold;
743     }
744 
ClearEventRecorder745     void Clear() {
746       for (int i = 0; i < debug_events_.size(); ++i) {
747         delete debug_events_[i];
748       }
749       debug_events_.clear();
750       unregistered_fds_.clear();
751       event_counts_.clear();
752     }
753 
MaybeRecordAndClearEventRecorder754     void MaybeRecordAndClear() {
755       ++num_records_;
756       if ((num_records_ > record_threshold_) &&
757           (record_threshold_ > 0)) {
758         LOG(INFO) << "\n" << *this;
759         num_records_ = 0;
760         Clear();
761       }
762     }
763 
RecordFDMaskEventEventRecorder764     void RecordFDMaskEvent(int fd, int mask, const char* function) {
765       FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function);
766       debug_events_.push_back(fdmo);
767       MaybeRecordAndClear();
768     }
769 
RecordEpollWaitEventEventRecorder770     void RecordEpollWaitEvent(int timeout_in_ms,
771                               int num_events_generated) {
772       EpollWaitOutput* ewo = new EpollWaitOutput(timeout_in_ms,
773                                                   num_events_generated);
774       debug_events_.push_back(ewo);
775       MaybeRecordAndClear();
776     }
777 
RecordEpollEventEventRecorder778     void RecordEpollEvent(int fd, int event_mask) {
779       Events& events_for_fd = event_counts_[fd];
780       events_for_fd.AssignFromMask(event_mask);
781       MaybeRecordAndClear();
782     }
783 
784     friend ostream& operator<<(ostream& os, const EventRecorder& er) {
785       for (int i = 0; i < er.unregistered_fds_.size(); ++i) {
786         os << "fd: " << er.unregistered_fds_[i] << "\n";
787         os << er.unregistered_fds_[i];
788       }
789       for (EventCountsMap::const_iterator i = er.event_counts_.begin();
790            i != er.event_counts_.end();
791            ++i) {
792         os << "fd: " << i->first << "\n";
793         os << i->second;
794       }
795       for (int i = 0; i < er.debug_events_.size(); ++i) {
796         os << *(er.debug_events_[i]) << "\n";
797       }
798       return os;
799     }
800 
RecordUnregistrationEventRecorder801     void RecordUnregistration(int fd) {
802       EventCountsMap::iterator i = event_counts_.find(fd);
803       if (i != event_counts_.end()) {
804         unregistered_fds_.push_back(i->second);
805         event_counts_.erase(i);
806       }
807       MaybeRecordAndClear();
808     }
809 
810    protected:
811     class DebugOutput {
812      public:
813       friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) {
814         debug_output.OutputToStream(os);
815         return os;
816       }
817       virtual void OutputToStream(ostream* os) const = 0;
~DebugOutputEventRecorder818       virtual ~DebugOutput() {}
819     };
820 
821     class FDMaskOutput : public DebugOutput {
822      public:
FDMaskOutputEventRecorder823       FDMaskOutput(int fd, int mask, const char* function) :
824           fd_(fd), mask_(mask), function_(function) {}
OutputToStreamEventRecorder825       virtual void OutputToStream(ostream* os) const {
826         (*os) << "func: " << function_
827               << "\tfd: " << fd_;
828         if (mask_ != 0) {
829            (*os) << "\tmask: " << EventMaskToString(mask_);
830         }
831       }
832       int fd_;
833       int mask_;
834       const char* function_;
835     };
836 
837     class EpollWaitOutput : public DebugOutput {
838      public:
EpollWaitOutputEventRecorder839       EpollWaitOutput(int timeout_in_ms,
840                       int num_events_generated) :
841           timeout_in_ms_(timeout_in_ms),
842           num_events_generated_(num_events_generated) {}
OutputToStreamEventRecorder843       virtual void OutputToStream(ostream* os) const {
844         (*os) << "timeout_in_ms: " << timeout_in_ms_
845               << "\tnum_events_generated: " << num_events_generated_;
846       }
847      protected:
848       int timeout_in_ms_;
849       int num_events_generated_;
850     };
851 
852     struct Events {
EventsEventRecorder::Events853       Events() :
854           epoll_in(0),
855           epoll_pri(0),
856           epoll_out(0),
857           epoll_rdnorm(0),
858           epoll_rdband(0),
859           epoll_wrnorm(0),
860           epoll_wrband(0),
861           epoll_msg(0),
862           epoll_err(0),
863           epoll_hup(0),
864           epoll_oneshot(0),
865           epoll_et(0) {}
866 
AssignFromMaskEventRecorder::Events867       void AssignFromMask(int event_mask) {
868         if (event_mask & EPOLLIN) ++epoll_in;
869         if (event_mask & EPOLLPRI) ++epoll_pri;
870         if (event_mask & EPOLLOUT) ++epoll_out;
871         if (event_mask & EPOLLRDNORM) ++epoll_rdnorm;
872         if (event_mask & EPOLLRDBAND) ++epoll_rdband;
873         if (event_mask & EPOLLWRNORM) ++epoll_wrnorm;
874         if (event_mask & EPOLLWRBAND) ++epoll_wrband;
875         if (event_mask & EPOLLMSG) ++epoll_msg;
876         if (event_mask & EPOLLERR) ++epoll_err;
877         if (event_mask & EPOLLHUP) ++epoll_hup;
878         if (event_mask & EPOLLONESHOT) ++epoll_oneshot;
879         if (event_mask & EPOLLET) ++epoll_et;
880       };
881 
882       friend ostream& operator<<(ostream& os, const Events& ev) {
883         if (ev.epoll_in) {
884           os << "\t      EPOLLIN: " << ev.epoll_in << "\n";
885         }
886         if (ev.epoll_pri) {
887           os << "\t     EPOLLPRI: " << ev.epoll_pri << "\n";
888         }
889         if (ev.epoll_out) {
890           os << "\t     EPOLLOUT: " << ev.epoll_out << "\n";
891         }
892         if (ev.epoll_rdnorm) {
893           os << "\t  EPOLLRDNORM: " << ev.epoll_rdnorm << "\n";
894         }
895         if (ev.epoll_rdband) {
896           os << "\t  EPOLLRDBAND: " << ev.epoll_rdband << "\n";
897         }
898         if (ev.epoll_wrnorm) {
899           os << "\t  EPOLLWRNORM: " << ev.epoll_wrnorm << "\n";
900         }
901         if (ev.epoll_wrband) {
902           os << "\t  EPOLLWRBAND: " << ev.epoll_wrband << "\n";
903         }
904         if (ev.epoll_msg) {
905           os << "\t     EPOLLMSG: " << ev.epoll_msg << "\n";
906         }
907         if (ev.epoll_err) {
908           os << "\t     EPOLLERR: " << ev.epoll_err << "\n";
909         }
910         if (ev.epoll_hup) {
911           os << "\t     EPOLLHUP: " << ev.epoll_hup << "\n";
912         }
913         if (ev.epoll_oneshot) {
914           os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n";
915         }
916         if (ev.epoll_et) {
917           os << "\t      EPOLLET: " << ev.epoll_et << "\n";
918         }
919         return os;
920       }
921 
922       unsigned int epoll_in;
923       unsigned int epoll_pri;
924       unsigned int epoll_out;
925       unsigned int epoll_rdnorm;
926       unsigned int epoll_rdband;
927       unsigned int epoll_wrnorm;
928       unsigned int epoll_wrband;
929       unsigned int epoll_msg;
930       unsigned int epoll_err;
931       unsigned int epoll_hup;
932       unsigned int epoll_oneshot;
933       unsigned int epoll_et;
934     };
935 
936     std::vector<DebugOutput*> debug_events_;
937     std::vector<Events> unregistered_fds_;
938     typedef __gnu_cxx::hash_map<int, Events> EventCountsMap;
939     EventCountsMap event_counts_;
940     int64 num_records_;
941     int64 record_threshold_;
942   };
943 
ClearEventRecords()944   void ClearEventRecords() {
945     event_recorder_.Clear();
946   }
WriteEventRecords(ostream * os)947   void WriteEventRecords(ostream* os) const {
948     (*os) << event_recorder_;
949   }
950 
951   mutable EventRecorder event_recorder_;
952 
953 #endif
954 
955  private:
956   // Helper functions used in the destructor.
957   void CleanupFDToCBMap();
958   void CleanupTimeToAlarmCBMap();
959 
960   // The callback registered to the fds below.  As the purpose of their
961   // registration is to wake the epoll server it just clears the pipe and
962   // returns.
963   scoped_ptr<ReadPipeCallback> wake_cb_;
964 
965   // A pipe owned by the epoll server.  The server will be registered to listen
966   // on read_fd_ and can be woken by Wake() which writes to write_fd_.
967   int read_fd_;
968   int write_fd_;
969 
970   // This boolean is checked to see if it is false at the top of the
971   // WaitForEventsAndExecuteCallbacks function. If not, then it either returns
972   // without doing work, and logs to ERROR, or aborts the program (in
973   // DEBUG mode). If so, then it sets the bool to true, does work, and
974   // sets it back to false when done. This catches unwanted recursion.
975   bool in_wait_for_events_and_execute_callbacks_;
976 
977   // Returns true when the EpollServer() is being destroyed.
978   bool in_shutdown_;
979 
980   DISALLOW_COPY_AND_ASSIGN(EpollServer);
981 };
982 
983 class EpollAlarmCallbackInterface {
984  public:
985   // Summary:
986   //   Called when an alarm times out. Invalidates an AlarmRegToken.
987   //   WARNING: If a token was saved to refer to an alarm callback, OnAlarm must
988   //   delete it, as the reference is no longer valid.
989   // Returns:
990   //   the unix time (in microseconds) at which this alarm should be signaled
991   //   again, or 0 if the alarm should be removed.
992   virtual int64 OnAlarm() = 0;
993 
994   // Summary:
995   //   Called when the an alarm is registered. Invalidates an AlarmRegToken.
996   // Args:
997   //   token: the iterator to the the alarm registered in the alarm map.
998   //   WARNING: this token becomes invalid when the alarm fires, is
999   //   unregistered, or OnShutdown is called on that alarm.
1000   //   eps: the epoll server the alarm is registered with.
1001   virtual void OnRegistration(const EpollServer::AlarmRegToken& token,
1002                               EpollServer* eps) = 0;
1003 
1004   // Summary:
1005   //   Called when the an alarm is unregistered.
1006   //   WARNING: It is not valid to unregister a callback and then use the token
1007   //   that was saved to refer to the callback.
1008   virtual void OnUnregistration() = 0;
1009 
1010   // Summary:
1011   //   Called when the epoll server is shutting down.
1012   //   Invalidates the AlarmRegToken that was given when this alarm was
1013   //   registered.
1014   virtual void OnShutdown(EpollServer* eps) = 0;
1015 
~EpollAlarmCallbackInterface()1016   virtual ~EpollAlarmCallbackInterface() {}
1017 
1018  protected:
EpollAlarmCallbackInterface()1019   EpollAlarmCallbackInterface() {}
1020 };
1021 
1022 // A simple alarm which unregisters itself on destruction.
1023 //
1024 // PLEASE NOTE:
1025 // Any classes overriding these functions must either call the implementation
1026 // of the parent class, or is must otherwise make sure that the 'registered_'
1027 // boolean and the token, 'token_', are updated appropriately.
1028 class EpollAlarm : public EpollAlarmCallbackInterface {
1029  public:
1030   EpollAlarm();
1031 
1032   virtual ~EpollAlarm();
1033 
1034   // Marks the alarm as unregistered and returns 0.  The return value may be
1035   // safely ignored by subclasses.
1036   virtual int64 OnAlarm();
1037 
1038   // Marks the alarm as registered, and stores the token.
1039   virtual void OnRegistration(const EpollServer::AlarmRegToken& token,
1040                               EpollServer* eps);
1041 
1042   // Marks the alarm as unregistered.
1043   virtual void OnUnregistration();
1044 
1045   // Marks the alarm as unregistered.
1046   virtual void OnShutdown(EpollServer* eps);
1047 
1048   // If the alarm was registered, unregister it.
1049   void UnregisterIfRegistered();
1050 
registered()1051   bool registered() const { return registered_; }
1052 
eps()1053   const EpollServer* eps() const { return eps_; }
1054 
1055  private:
1056   EpollServer::AlarmRegToken token_;
1057   EpollServer* eps_;
1058   bool registered_;
1059 };
1060 
1061 }  // namespace net
1062 
1063 #endif  // NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
1064 
1065