• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef NET_TOOLS_EPOLL_SERVER_EPOLL_SERVER_H_
6 #define NET_TOOLS_EPOLL_SERVER_EPOLL_SERVER_H_
7 
8 #include <fcntl.h>
9 #include <sys/queue.h>
10 #include <map>
11 #include <set>
12 #include <string>
13 #include <utility>
14 #include <vector>
15 
16 // #define EPOLL_SERVER_EVENT_TRACING 1
17 //
18 // Defining EPOLL_SERVER_EVENT_TRACING
19 // causes code to exist which didn't before.
20 // This code tracks each event generated by the epollserver,
21 // as well as providing a per-fd-registered summary of
22 // events. Note that enabling this code vastly slows
23 // down operations, and uses substantially more
24 // memory. For these reasons, it should only be enabled when doing
25 // developer debugging at his/her workstation.
26 //
27 // A structure called 'EventRecorder' will exist when
28 // the macro is defined. See the EventRecorder class interface
29 // within the EpollServer class for more details.
30 #ifdef EPOLL_SERVER_EVENT_TRACING
31 #include <ostream>
32 #include "base/logging.h"
33 #endif
34 
35 #include "base/basictypes.h"
36 #include "base/compiler_specific.h"
37 #include "base/containers/hash_tables.h"
38 #include "base/memory/scoped_ptr.h"
39 #include <sys/epoll.h>
40 
41 namespace net {
42 
43 class EpollServer;
44 class EpollAlarmCallbackInterface;
45 class ReadPipeCallback;
46 
47 struct EpollEvent {
EpollEventEpollEvent48   EpollEvent(int events, bool is_epoll_wait)
49       : in_events(events),
50         out_ready_mask(0) {
51   }
52 
53   int in_events;            // incoming events
54   int out_ready_mask;       // the new event mask for ready list (0 means don't
55                             // get on the ready list). This field is always
56                             // initialized to 0 when the event is passed to
57                             // OnEvent.
58 };
59 
60 // Callbacks which go into EpollServers are expected to derive from this class.
61 class EpollCallbackInterface {
62  public:
63   // Summary:
64   //   Called when the callback is registered into a EpollServer.
65   // Args:
66   //   eps - the poll server into which this callback was registered
67   //   fd - the file descriptor which was registered
68   //   event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
69   //                which was registered (and will initially be used
70   //                in the epoll() calls)
71   virtual void OnRegistration(EpollServer* eps, int fd, int event_mask) = 0;
72 
73   // Summary:
74   //   Called when the event_mask is modified (for a file-descriptor)
75   // Args:
76   //   fd - the file descriptor which was registered
77   //   event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
78   //                which was is now curren (and will be used
79   //                in subsequent epoll() calls)
80   virtual void OnModification(int fd, int event_mask) = 0;
81 
82   // Summary:
83   //   Called whenever an event occurs on the file-descriptor.
84   //   This is where the bulk of processing is expected to occur.
85   // Args:
86   //   fd - the file descriptor which was registered
87   //   event - a struct that contains the event mask (composed of EPOLLIN,
88   //           EPOLLOUT, etc), a flag that indicates whether this is a true
89   //           epoll_wait event vs one from the ready list, and an output
90   //           parameter for OnEvent to inform the EpollServer whether to put
91   //           this fd on the ready list.
92   virtual void OnEvent(int fd, EpollEvent* event) = 0;
93 
94   // Summary:
95   //   Called when the file-descriptor is unregistered from the poll-server.
96   // Args:
97   //   fd - the file descriptor which was registered, and of this call, is now
98   //        unregistered.
99   //   replaced - If true, this callback is being replaced by another, otherwise
100   //              it is simply being removed.
101   virtual void OnUnregistration(int fd, bool replaced) = 0;
102 
103   // Summary:
104   //   Called when the epoll server is shutting down.  This is different from
105   //   OnUnregistration because the subclass may want to clean up memory.
106   //   This is called in leiu of OnUnregistration.
107   // Args:
108   //  fd - the file descriptor which was registered.
109   virtual void OnShutdown(EpollServer* eps, int fd) = 0;
110 
~EpollCallbackInterface()111   virtual ~EpollCallbackInterface() {}
112 
113  protected:
EpollCallbackInterface()114   EpollCallbackInterface() {}
115 };
116 
117 ////////////////////////////////////////////////////////////////////////////////
118 ////////////////////////////////////////////////////////////////////////////////
119 
120 class EpollServer {
121  public:
122   typedef EpollAlarmCallbackInterface AlarmCB;
123   typedef EpollCallbackInterface CB;
124 
125   typedef std::multimap<int64, AlarmCB*> TimeToAlarmCBMap;
126   typedef TimeToAlarmCBMap::iterator AlarmRegToken;
127 
128   // Summary:
129   //   Constructor:
130   //    By default, we don't wait any amount of time for events, and
131   //    we suggest to the epoll-system that we're going to use on-the-order
132   //    of 1024 FDs.
133   EpollServer();
134 
135   ////////////////////////////////////////
136 
137   // Destructor
138   virtual ~EpollServer();
139 
140   ////////////////////////////////////////
141 
142   // Summary
143   //   Register a callback to be called whenever an event contained
144   //   in the set of events included in event_mask occurs on the
145   //   file-descriptor 'fd'
146   //
147   //   Note that only one callback is allowed to be registered for
148   //   any specific file-decriptor.
149   //
150   //   If a callback is registered for a file-descriptor which has already
151   //   been registered, then the previous callback is unregistered with
152   //   the 'replaced' flag set to true. I.e. the previous callback's
153   //   OnUnregistration() function is called like so:
154   //      OnUnregistration(fd, true);
155   //
156   //  The epoll server does NOT take on ownership of the callback: the callback
157   //  creator is responsible for managing that memory.
158   //
159   // Args:
160   //   fd - a valid file-descriptor
161   //   cb - an instance of a subclass of EpollCallbackInterface
162   //   event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating
163   //                the events for which the callback would like to be
164   //                called.
165   virtual void RegisterFD(int fd, CB* cb, int event_mask);
166 
167   ////////////////////////////////////////
168 
169   // Summary:
170   //   A shortcut for RegisterFD which sets things up such that the
171   //   callback is called when 'fd' is available for writing.
172   // Args:
173   //   fd - a valid file-descriptor
174   //   cb - an instance of a subclass of EpollCallbackInterface
175   virtual void RegisterFDForWrite(int fd, CB* cb);
176 
177   ////////////////////////////////////////
178 
179   // Summary:
180   //   A shortcut for RegisterFD which sets things up such that the
181   //   callback is called when 'fd' is available for reading or writing.
182   // Args:
183   //   fd - a valid file-descriptor
184   //   cb - an instance of a subclass of EpollCallbackInterface
185   virtual void RegisterFDForReadWrite(int fd, CB* cb);
186 
187   ////////////////////////////////////////
188 
189   // Summary:
190   //   A shortcut for RegisterFD which sets things up such that the
191   //   callback is called when 'fd' is available for reading.
192   // Args:
193   //   fd - a valid file-descriptor
194   //   cb - an instance of a subclass of EpollCallbackInterface
195   virtual void RegisterFDForRead(int fd, CB* cb);
196 
197   ////////////////////////////////////////
198 
199   // Summary:
200   //   Removes the FD and the associated callback from the pollserver.
201   //   If the callback is registered with other FDs, they will continue
202   //   to be processed using the callback without modification.
203   //   If the file-descriptor specified is not registered in the
204   //   epoll_server, then nothing happens as a result of this call.
205   // Args:
206   //   fd - the file-descriptor which should no-longer be monitored.
207   virtual void UnregisterFD(int fd);
208 
209   ////////////////////////////////////////
210 
211   // Summary:
212   //   Modifies the event mask for the file-descriptor, replacing
213   //   the old event_mask with the new one specified here.
214   //   If the file-descriptor specified is not registered in the
215   //   epoll_server, then nothing happens as a result of this call.
216   // Args:
217   //   fd - the fd whose event mask should be modified.
218   //   event_mask - the new event mask.
219   virtual void ModifyCallback(int fd, int event_mask);
220 
221   ////////////////////////////////////////
222 
223   // Summary:
224   //   Modifies the event mask for the file-descriptor such that we
225   //   no longer request events when 'fd' is readable.
226   //   If the file-descriptor specified is not registered in the
227   //   epoll_server, then nothing happens as a result of this call.
228   // Args:
229   //   fd - the fd whose event mask should be modified.
230   virtual void StopRead(int fd);
231 
232   ////////////////////////////////////////
233 
234   // Summary:
235   //   Modifies the event mask for the file-descriptor such that we
236   //   request events when 'fd' is readable.
237   //   If the file-descriptor specified is not registered in the
238   //   epoll_server, then nothing happens as a result of this call.
239   // Args:
240   //   fd - the fd whose event mask should be modified.
241   virtual void StartRead(int fd);
242 
243   ////////////////////////////////////////
244 
245   // Summary:
246   //   Modifies the event mask for the file-descriptor such that we
247   //   no longer request events when 'fd' is writable.
248   //   If the file-descriptor specified is not registered in the
249   //   epoll_server, then nothing happens as a result of this call.
250   // Args:
251   //   fd - the fd whose event mask should be modified.
252   virtual void StopWrite(int fd);
253 
254   ////////////////////////////////////////
255 
256   // Summary:
257   //   Modifies the event mask for the file-descriptor such that we
258   //   request events when 'fd' is writable.
259   //   If the file-descriptor specified is not registered in the
260   //   epoll_server, then nothing happens as a result of this call.
261   // Args:
262   //   fd - the fd whose event mask should be modified.
263   virtual void StartWrite(int fd);
264 
265   ////////////////////////////////////////
266 
267   // Summary:
268   //   Looks up the callback associated with the file-desriptor 'fd'.
269   //   If a callback is associated with this file-descriptor, then
270   //   it's OnEvent() method is called with the file-descriptor 'fd',
271   //   and event_mask 'event_mask'
272   //
273   //   If no callback is registered for this file-descriptor, nothing
274   //   will happen as a result of this call.
275   //
276   //   This function is used internally by the EpollServer, but is
277   //   available publically so that events might be 'faked'. Calling
278   //   this function with an fd and event_mask is equivalent (as far
279   //   as the callback is concerned) to having a real event generated
280   //   by epoll (except, of course, that read(), etc won't necessarily
281   //   be able to read anything)
282   // Args:
283   //   fd - the file-descriptor on which an event has occured.
284   //   event_mask - a bitmask representing the events which have occured
285   //                on/for this fd. This bitmask is composed of
286   //                POLLIN, POLLOUT, etc.
287   //
288   void HandleEvent(int fd, int event_mask);
289 
290   // Summary:
291   //   Call this when you want the pollserver to
292   //   wait for events and execute the callbacks associated with
293   //   the file-descriptors on which those events have occured.
294   //   Depending on the value of timeout_in_us_, this may or may
295   //   not return immediately. Please reference the set_timeout()
296   //   function for the specific behaviour.
297   virtual void WaitForEventsAndExecuteCallbacks();
298 
299   // Summary:
300   //   When an fd is registered to use edge trigger notification, the ready
301   //   list can be used to simulate level trigger semantics. Edge trigger
302   //   registration doesn't send an initial event, and only rising edge (going
303   //   from blocked to unblocked) events are sent. A callback can put itself on
304   //   the ready list by calling SetFDReady() after calling RegisterFD(). The
305   //   OnEvent method of all callbacks associated with the fds on the ready
306   //   list will be called immediately after processing the events returned by
307   //   epoll_wait(). The fd is removed from the ready list before the
308   //   callback's OnEvent() method is invoked. To stay on the ready list, the
309   //   OnEvent() (or some function in that call chain) must call SetFDReady
310   //   again. When a fd is unregistered using UnregisterFD(), the fd is
311   //   automatically removed from the ready list.
312   //
313   //   When the callback for a edge triggered fd hits the falling edge (about
314   //   to block, either because of it got an EAGAIN, or had a short read/write
315   //   operation), it should remove itself from the ready list using
316   //   SetFDNotReady() (since OnEvent cannot distinguish between invocation
317   //   from the ready list vs from a normal epoll event). All four ready list
318   //   methods are safe to be called  within the context of the callbacks.
319   //
320   //   Since the ready list invokes EpollCallbackInterface::OnEvent, only fds
321   //   that are registered with the EpollServer will be put on the ready list.
322   //   SetFDReady() and SetFDNotReady() will do nothing if the EpollServer
323   //   doesn't know about the fd passed in.
324   //
325   //   Since the ready list cannot reliably determine proper set of events
326   //   which should be sent to the callback, SetFDReady() requests the caller
327   //   to provide the ready list with the event mask, which will be used later
328   //   when OnEvent() is invoked by the ready list. Hence, the event_mask
329   //   passedto SetFDReady() does not affect the actual epoll registration of
330   //   the fd with the kernel. If a fd is already put on the ready list, and
331   //   SetFDReady() is called again for that fd with a different event_mask,
332   //   the event_mask will be updated.
333   virtual void SetFDReady(int fd, int events_to_fake);
334 
335   virtual void SetFDNotReady(int fd);
336 
337   // Summary:
338   //   IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as
339   //   debugging tools and for writing unit tests.
340   //   ISFDReady() returns whether a fd is in the ready list.
341   //   ReadyListSize() returns the number of fds on the ready list.
342   //   VerifyReadyList() checks the consistency of internal data structure. It
343   //   will CHECK if it finds an error.
344   virtual bool IsFDReady(int fd) const;
345 
ReadyListSize()346   size_t ReadyListSize() const { return ready_list_size_; }
347 
348   void VerifyReadyList() const;
349 
350   ////////////////////////////////////////
351 
352   // Summary:
353   //   Registers an alarm 'ac' to go off at time 'timeout_time_in_us'.
354   //   If the callback returns a positive number from its OnAlarm() function,
355   //   then the callback will be re-registered at that time, else the alarm
356   //   owner is responsible for freeing up memory.
357   //
358   //   Important: A give AlarmCB* can not be registered again if it is already
359   //    registered. If a user wants to register a callback again it should first
360   //    unregister the previous callback before calling RegisterAlarm again.
361   // Args:
362   //   timeout_time_in_us - the absolute time at which the alarm should go off
363   //   ac - the alarm which will be called.
364   virtual void RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac);
365 
366   // Summary:
367   //   Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() +
368   //   delta_in_us). While this is somewhat less accurate (see the description
369   //   for ApproximateNowInUs() to see how 'approximate'), the error is never
370   //   worse than the amount of time it takes to process all events in one
371   //   WaitForEvents.  As with 'RegisterAlarm()', if the callback returns a
372   //   positive number from its OnAlarm() function, then the callback will be
373   //   re-registered at that time, else the alarm owner is responsible for
374   //   freeing up memory.
375   //   Note that this function is purely a convienence. The
376   //   same thing may be accomplished by using RegisterAlarm with
377   //   ApproximateNowInUs() directly.
378   //
379   //   Important: A give AlarmCB* can not be registered again if it is already
380   //    registered. If a user wants to register a callback again it should first
381   //    unregister the previous callback before calling RegisterAlarm again.
382   // Args:
383   //   delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at
384   //                 which point the alarm should go off.
385   //   ac - the alarm which will be called.
RegisterAlarmApproximateDelta(int64 delta_in_us,AlarmCB * ac)386   void RegisterAlarmApproximateDelta(int64 delta_in_us, AlarmCB* ac) {
387     RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac);
388   }
389 
390   ////////////////////////////////////////
391 
392   // Summary:
393   //   Unregister  the alarm referred to by iterator_token; Callers should
394   //   be warned that a token may have become already invalid when OnAlarm()
395   //   is called, was unregistered, or OnShutdown was called on that alarm.
396   // Args:
397   //    iterator_token - iterator to the alarm callback to unregister.
398   virtual void UnregisterAlarm(
399       const EpollServer::AlarmRegToken& iterator_token);
400 
401   ////////////////////////////////////////
402 
403   // Summary:
404   //   returns the number of file-descriptors registered in this EpollServer.
405   // Returns:
406   //   number of FDs registered (discounting the internal pipe used for Wake)
407   virtual int NumFDsRegistered() const;
408 
409   // Summary:
410   //   Force the epoll server to wake up (by writing to an internal pipe).
411   virtual void Wake();
412 
413   // Summary:
414   //   Wrapper around WallTimer's NowInUsec.  We do this so that we can test
415   //   EpollServer without using the system clock (and can avoid the flakiness
416   //   that would ensue)
417   // Returns:
418   //   the current time as number of microseconds since the Unix epoch.
419   virtual int64 NowInUsec() const;
420 
421   // Summary:
422   //   Since calling NowInUsec() many thousands of times per
423   //   WaitForEventsAndExecuteCallbacks function call is, to say the least,
424   //   inefficient, we allow users to use an approximate time instead. The
425   //   time returned from this function is as accurate as NowInUsec() when
426   //   WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's
427   //   callstack.
428   //   However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then
429   //   this function returns the time at which the
430   //   WaitForEventsAndExecuteCallbacks function started to process events or
431   //   alarms.
432   //
433   //   Essentially, this function makes available a fast and mostly accurate
434   //   mechanism for getting the time for any function handling an event or
435   //   alarm. When functions which are not handling callbacks or alarms call
436   //   this function, they get the slow and "absolutely" accurate time.
437   //
438   //   Users should be encouraged to use this function.
439   // Returns:
440   //   the "approximate" current time as number of microseconds since the Unix
441   //   epoch.
442   virtual int64 ApproximateNowInUsec() const;
443 
444   static std::string EventMaskToString(int event_mask);
445 
446   // Summary:
447   //   Logs the state of the epoll server with LOG(ERROR).
448   void LogStateOnCrash();
449 
450   // Summary:
451   //   Set the timeout to the value specified.
452   //   If the timeout is set to a negative number,
453   //      WaitForEventsAndExecuteCallbacks() will only return when an event has
454   //      occured
455   //   If the timeout is set to zero,
456   //      WaitForEventsAndExecuteCallbacks() will return immediately
457   //   If the timeout is set to a positive number,
458   //      WaitForEventsAndExecuteCallbacks() will return when an event has
459   //      occured, or when timeout_in_us microseconds has elapsed, whichever
460   //      is first.
461   //  Args:
462   //    timeout_in_us - value specified depending on behaviour desired.
463   //                    See above.
set_timeout_in_us(int64 timeout_in_us)464   void set_timeout_in_us(int64 timeout_in_us) {
465     timeout_in_us_ = timeout_in_us;
466   }
467 
468   ////////////////////////////////////////
469 
470   // Summary:
471   //   Accessor for the current value of timeout_in_us.
timeout_in_us()472   int timeout_in_us() const { return timeout_in_us_; }
473 
474   // Summary:
475   // Returns true when the EpollServer() is being destroyed.
in_shutdown()476   bool in_shutdown() const { return in_shutdown_; }
477 
ContainsAlarm(EpollAlarmCallbackInterface * alarm)478   bool ContainsAlarm(EpollAlarmCallbackInterface* alarm) const {
479     return all_alarms_.find(alarm) != all_alarms_.end();
480   }
481 
482   // Summary:
483   //   A function for implementing the ready list. It invokes OnEvent for each
484   //   of the fd in the ready list, and takes care of adding them back to the
485   //   ready list if the callback requests it (by checking that out_ready_mask
486   //   is non-zero).
487   void CallReadyListCallbacks();
488 
489  protected:
490   virtual int GetFlags(int fd);
SetFlags(int fd,int flags)491   inline int SetFlags(int fd, int flags) {
492     return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
493   }
494 
495   virtual void SetNonblocking(int fd);
496 
497   // This exists here so that we can override this function in unittests
498   // in order to make effective mock EpollServer objects.
499   virtual int epoll_wait_impl(int epfd,
500                               struct epoll_event* events,
501                               int max_events,
502                               int timeout_in_ms);
503 
504   // this struct is used internally, and is never used by anything external
505   // to this class. Some of its members are declared mutable to get around the
506   // restriction imposed by hash_set. Since hash_set knows nothing about the
507   // objects it stores, it has to assume that every bit of the object is used
508   // in the hash function and equal_to comparison. Thus hash_set::iterator is a
509   // const iterator. In this case, the only thing that must stay constant is
510   // fd. Everything else are just along for the ride and changing them doesn't
511   // compromise the hash_set integrity.
512   struct CBAndEventMask {
CBAndEventMaskCBAndEventMask513     CBAndEventMask()
514         : cb(NULL),
515           fd(-1),
516           event_mask(0),
517           events_asserted(0),
518           events_to_fake(0),
519           in_use(false) {
520       entry.le_next = NULL;
521       entry.le_prev = NULL;
522     }
523 
CBAndEventMaskCBAndEventMask524     CBAndEventMask(EpollCallbackInterface* cb,
525                    int event_mask,
526                    int fd)
527         : cb(cb), fd(fd), event_mask(event_mask), events_asserted(0),
528           events_to_fake(0), in_use(false) {
529       entry.le_next = NULL;
530       entry.le_prev = NULL;
531     }
532 
533     // Required operator for hash_set. Normally operator== should be a free
534     // standing function. However, since CBAndEventMask is a protected type and
535     // it will never be a base class, it makes no difference.
536     bool operator==(const CBAndEventMask& cb_and_mask) const {
537       return fd == cb_and_mask.fd;
538     }
539     // A callback. If the fd is unregistered inside the callchain of OnEvent,
540     // the cb will be set to NULL.
541     mutable EpollCallbackInterface* cb;
542 
543     mutable LIST_ENTRY(CBAndEventMask) entry;
544     // file descriptor registered with the epoll server.
545     int fd;
546     // the current event_mask registered for this callback.
547     mutable int event_mask;
548     // the event_mask that was returned by epoll
549     mutable int events_asserted;
550     // the event_mask for the ready list to use to call OnEvent.
551     mutable int events_to_fake;
552     // toggle around calls to OnEvent to tell UnregisterFD to not erase the
553     // iterator because HandleEvent is using it.
554     mutable bool in_use;
555   };
556 
557   // Custom hash function to be used by hash_set.
558   struct CBAndEventMaskHash {
operatorCBAndEventMaskHash559     size_t operator()(const CBAndEventMask& cb_and_eventmask) const {
560       return static_cast<size_t>(cb_and_eventmask.fd);
561     }
562   };
563 
564   typedef base::hash_set<CBAndEventMask, CBAndEventMaskHash> FDToCBMap;
565 
566   // the following four functions are OS-specific, and are likely
567   // to be changed in a subclass if the poll/select method is changed
568   // from epoll.
569 
570   // Summary:
571   //   Deletes a file-descriptor from the set of FDs that should be
572   //   monitored with epoll.
573   //   Note that this only deals with modifying data relating -directly-
574   //   with the epoll call-- it does not modify any data within the
575   //   epoll_server.
576   // Args:
577   //   fd - the file descriptor to-be-removed from the monitoring set
578   virtual void DelFD(int fd) const;
579 
580   ////////////////////////////////////////
581 
582   // Summary:
583   //   Adds a file-descriptor to the set of FDs that should be
584   //   monitored with epoll.
585   //   Note that this only deals with modifying data relating -directly-
586   //   with the epoll call.
587   // Args:
588   //   fd - the file descriptor to-be-added to the monitoring set
589   //   event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
590   //                 OR'd together) which will be associated with this
591   //                 FD initially.
592   virtual void AddFD(int fd, int event_mask) const;
593 
594   ////////////////////////////////////////
595 
596   // Summary:
597   //   Modifies a file-descriptor in the set of FDs that should be
598   //   monitored with epoll.
599   //   Note that this only deals with modifying data relating -directly-
600   //   with the epoll call.
601   // Args:
602   //   fd - the file descriptor to-be-added to the monitoring set
603   //   event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
604   //                 OR'd together) which will be associated with this
605   //                 FD after this call.
606   virtual void ModFD(int fd, int event_mask) const;
607 
608   ////////////////////////////////////////
609 
610   // Summary:
611   //   Modified the event mask associated with an FD in the set of
612   //   data needed by epoll.
613   //   Events are removed before they are added, thus, if ~0 is put
614   //   in 'remove_event', whatever is put in 'add_event' will be
615   //   the new event mask.
616   //   If the file-descriptor specified is not registered in the
617   //   epoll_server, then nothing happens as a result of this call.
618   // Args:
619   //   fd - the file descriptor whose event mask is to be modified
620   //   remove_event - the events which are to be removed from the current
621   //                  event_mask
622   //   add_event - the events which are to be added to the current event_mask
623   //
624   //
625   virtual void ModifyFD(int fd, int remove_event, int add_event);
626 
627   ////////////////////////////////////////
628 
629   // Summary:
630   //   Waits for events, and calls HandleEvents() for each
631   //   fd, event pair discovered to possibly have an event.
632   //   Note that a callback (B) may get a spurious event if
633   //   another callback (A) has closed a file-descriptor N, and
634   //   the callback (B) has a newly opened file-descriptor, which
635   //   also happens to be N.
636   virtual void WaitForEventsAndCallHandleEvents(int64 timeout_in_us,
637                                                 struct epoll_event events[],
638                                                 int events_size);
639 
640 
641 
642   // Summary:
643   //   An internal function for implementing the ready list. It adds a fd's
644   //   CBAndEventMask to the ready list. If the fd is already on the ready
645   //   list, it is a no-op.
646   void AddToReadyList(CBAndEventMask* cb_and_mask);
647 
648   // Summary:
649   //   An internal function for implementing the ready list. It remove a fd's
650   //   CBAndEventMask from the ready list. If the fd is not on the ready list,
651   //   it is a no-op.
652   void RemoveFromReadyList(const CBAndEventMask& cb_and_mask);
653 
654   // Summary:
655   // Calls any pending alarms that should go off and reregisters them if they
656   // were recurring.
657   virtual void CallAndReregisterAlarmEvents();
658 
659   // The file-descriptor created for epolling
660   int epoll_fd_;
661 
662   // The mapping of file-descriptor to CBAndEventMasks
663   FDToCBMap cb_map_;
664 
665   // Custom hash function to be used by hash_set.
666   struct AlarmCBHash {
operatorAlarmCBHash667     size_t operator()(AlarmCB*const& p) const {
668       return reinterpret_cast<size_t>(p);
669     }
670   };
671 
672 
673   // TOOD(sushantj): Having this hash_set is avoidable. We currently have it
674   // only so that we can enforce stringent checks that a caller can not register
675   // the same alarm twice. One option is to have an implementation in which
676   // this hash_set is used only in the debug mode.
677   typedef base::hash_set<AlarmCB*, AlarmCBHash> AlarmCBMap;
678   AlarmCBMap all_alarms_;
679 
680   TimeToAlarmCBMap alarm_map_;
681 
682   // The amount of time in microseconds that we'll wait before returning
683   // from the WaitForEventsAndExecuteCallbacks() function.
684   // If this is positive, wait that many microseconds.
685   // If this is negative, wait forever, or for the first event that occurs
686   // If this is zero, never wait for an event.
687   int64 timeout_in_us_;
688 
689   // This is nonzero only after the invocation of epoll_wait_impl within
690   // WaitForEventsAndCallHandleEvents and before the function
691   // WaitForEventsAndExecuteCallbacks returns.  At all other times, this is
692   // zero. This enables us to have relatively accurate time returned from the
693   // ApproximateNowInUs() function. See that function for more details.
694   int64 recorded_now_in_us_;
695 
696   // This is used to implement CallAndReregisterAlarmEvents. This stores
697   // all alarms that were reregistered because OnAlarm() returned a
698   // value > 0 and the time at which they should be executed is less that
699   // the current time.  By storing such alarms in this map we ensure
700   // that while calling CallAndReregisterAlarmEvents we do not call
701   // OnAlarm on any alarm in this set. This ensures that we do not
702   // go in an infinite loop.
703   AlarmCBMap alarms_reregistered_and_should_be_skipped_;
704 
705   LIST_HEAD(ReadyList, CBAndEventMask) ready_list_;
706   LIST_HEAD(TmpList, CBAndEventMask) tmp_list_;
707   int ready_list_size_;
708   // TODO(alyssar): make this into something that scales up.
709   static const int events_size_ = 256;
710   struct epoll_event events_[256];
711 
712 #ifdef EPOLL_SERVER_EVENT_TRACING
713   struct EventRecorder {
714    public:
EventRecorderEventRecorder715     EventRecorder() : num_records_(0), record_threshold_(10000) {}
716 
~EventRecorderEventRecorder717     ~EventRecorder() {
718       Clear();
719     }
720 
721     // When a number of events equals the record threshold,
722     // the collected data summary for all FDs will be written
723     // to LOG(INFO). Note that this does not include the
724     // individual events (if you'reinterested in those, you'll
725     // have to get at them programmatically).
726     // After any such flushing to LOG(INFO) all events will
727     // be cleared.
728     // Note that the definition of an 'event' is a bit 'hazy',
729     // as it includes the 'Unregistration' event, and perhaps
730     // others.
set_record_thresholdEventRecorder731     void set_record_threshold(int64 new_threshold) {
732       record_threshold_ = new_threshold;
733     }
734 
ClearEventRecorder735     void Clear() {
736       for (int i = 0; i < debug_events_.size(); ++i) {
737         delete debug_events_[i];
738       }
739       debug_events_.clear();
740       unregistered_fds_.clear();
741       event_counts_.clear();
742     }
743 
MaybeRecordAndClearEventRecorder744     void MaybeRecordAndClear() {
745       ++num_records_;
746       if ((num_records_ > record_threshold_) &&
747           (record_threshold_ > 0)) {
748         LOG(INFO) << "\n" << *this;
749         num_records_ = 0;
750         Clear();
751       }
752     }
753 
RecordFDMaskEventEventRecorder754     void RecordFDMaskEvent(int fd, int mask, const char* function) {
755       FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function);
756       debug_events_.push_back(fdmo);
757       MaybeRecordAndClear();
758     }
759 
RecordEpollWaitEventEventRecorder760     void RecordEpollWaitEvent(int timeout_in_ms,
761                               int num_events_generated) {
762       EpollWaitOutput* ewo = new EpollWaitOutput(timeout_in_ms,
763                                                   num_events_generated);
764       debug_events_.push_back(ewo);
765       MaybeRecordAndClear();
766     }
767 
RecordEpollEventEventRecorder768     void RecordEpollEvent(int fd, int event_mask) {
769       Events& events_for_fd = event_counts_[fd];
770       events_for_fd.AssignFromMask(event_mask);
771       MaybeRecordAndClear();
772     }
773 
774     friend ostream& operator<<(ostream& os, const EventRecorder& er) {
775       for (int i = 0; i < er.unregistered_fds_.size(); ++i) {
776         os << "fd: " << er.unregistered_fds_[i] << "\n";
777         os << er.unregistered_fds_[i];
778       }
779       for (EventCountsMap::const_iterator i = er.event_counts_.begin();
780            i != er.event_counts_.end();
781            ++i) {
782         os << "fd: " << i->first << "\n";
783         os << i->second;
784       }
785       for (int i = 0; i < er.debug_events_.size(); ++i) {
786         os << *(er.debug_events_[i]) << "\n";
787       }
788       return os;
789     }
790 
RecordUnregistrationEventRecorder791     void RecordUnregistration(int fd) {
792       EventCountsMap::iterator i = event_counts_.find(fd);
793       if (i != event_counts_.end()) {
794         unregistered_fds_.push_back(i->second);
795         event_counts_.erase(i);
796       }
797       MaybeRecordAndClear();
798     }
799 
800    protected:
801     class DebugOutput {
802      public:
803       friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) {
804         debug_output.OutputToStream(os);
805         return os;
806       }
807       virtual void OutputToStream(ostream* os) const = 0;
~DebugOutputEventRecorder808       virtual ~DebugOutput() {}
809     };
810 
811     class FDMaskOutput : public DebugOutput {
812      public:
FDMaskOutputEventRecorder813       FDMaskOutput(int fd, int mask, const char* function) :
814           fd_(fd), mask_(mask), function_(function) {}
OutputToStreamEventRecorder815       virtual void OutputToStream(ostream* os) const {
816         (*os) << "func: " << function_
817               << "\tfd: " << fd_;
818         if (mask_ != 0) {
819            (*os) << "\tmask: " << EventMaskToString(mask_);
820         }
821       }
822       int fd_;
823       int mask_;
824       const char* function_;
825     };
826 
827     class EpollWaitOutput : public DebugOutput {
828      public:
EpollWaitOutputEventRecorder829       EpollWaitOutput(int timeout_in_ms,
830                       int num_events_generated) :
831           timeout_in_ms_(timeout_in_ms),
832           num_events_generated_(num_events_generated) {}
OutputToStreamEventRecorder833       virtual void OutputToStream(ostream* os) const {
834         (*os) << "timeout_in_ms: " << timeout_in_ms_
835               << "\tnum_events_generated: " << num_events_generated_;
836       }
837      protected:
838       int timeout_in_ms_;
839       int num_events_generated_;
840     };
841 
842     struct Events {
EventsEventRecorder::Events843       Events() :
844           epoll_in(0),
845           epoll_pri(0),
846           epoll_out(0),
847           epoll_rdnorm(0),
848           epoll_rdband(0),
849           epoll_wrnorm(0),
850           epoll_wrband(0),
851           epoll_msg(0),
852           epoll_err(0),
853           epoll_hup(0),
854           epoll_oneshot(0),
855           epoll_et(0) {}
856 
AssignFromMaskEventRecorder::Events857       void AssignFromMask(int event_mask) {
858         if (event_mask & EPOLLIN) ++epoll_in;
859         if (event_mask & EPOLLPRI) ++epoll_pri;
860         if (event_mask & EPOLLOUT) ++epoll_out;
861         if (event_mask & EPOLLRDNORM) ++epoll_rdnorm;
862         if (event_mask & EPOLLRDBAND) ++epoll_rdband;
863         if (event_mask & EPOLLWRNORM) ++epoll_wrnorm;
864         if (event_mask & EPOLLWRBAND) ++epoll_wrband;
865         if (event_mask & EPOLLMSG) ++epoll_msg;
866         if (event_mask & EPOLLERR) ++epoll_err;
867         if (event_mask & EPOLLHUP) ++epoll_hup;
868         if (event_mask & EPOLLONESHOT) ++epoll_oneshot;
869         if (event_mask & EPOLLET) ++epoll_et;
870       };
871 
872       friend ostream& operator<<(ostream& os, const Events& ev) {
873         if (ev.epoll_in) {
874           os << "\t      EPOLLIN: " << ev.epoll_in << "\n";
875         }
876         if (ev.epoll_pri) {
877           os << "\t     EPOLLPRI: " << ev.epoll_pri << "\n";
878         }
879         if (ev.epoll_out) {
880           os << "\t     EPOLLOUT: " << ev.epoll_out << "\n";
881         }
882         if (ev.epoll_rdnorm) {
883           os << "\t  EPOLLRDNORM: " << ev.epoll_rdnorm << "\n";
884         }
885         if (ev.epoll_rdband) {
886           os << "\t  EPOLLRDBAND: " << ev.epoll_rdband << "\n";
887         }
888         if (ev.epoll_wrnorm) {
889           os << "\t  EPOLLWRNORM: " << ev.epoll_wrnorm << "\n";
890         }
891         if (ev.epoll_wrband) {
892           os << "\t  EPOLLWRBAND: " << ev.epoll_wrband << "\n";
893         }
894         if (ev.epoll_msg) {
895           os << "\t     EPOLLMSG: " << ev.epoll_msg << "\n";
896         }
897         if (ev.epoll_err) {
898           os << "\t     EPOLLERR: " << ev.epoll_err << "\n";
899         }
900         if (ev.epoll_hup) {
901           os << "\t     EPOLLHUP: " << ev.epoll_hup << "\n";
902         }
903         if (ev.epoll_oneshot) {
904           os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n";
905         }
906         if (ev.epoll_et) {
907           os << "\t      EPOLLET: " << ev.epoll_et << "\n";
908         }
909         return os;
910       }
911 
912       unsigned int epoll_in;
913       unsigned int epoll_pri;
914       unsigned int epoll_out;
915       unsigned int epoll_rdnorm;
916       unsigned int epoll_rdband;
917       unsigned int epoll_wrnorm;
918       unsigned int epoll_wrband;
919       unsigned int epoll_msg;
920       unsigned int epoll_err;
921       unsigned int epoll_hup;
922       unsigned int epoll_oneshot;
923       unsigned int epoll_et;
924     };
925 
926     std::vector<DebugOutput*> debug_events_;
927     std::vector<Events> unregistered_fds_;
928     typedef base::hash_map<int, Events> EventCountsMap;
929     EventCountsMap event_counts_;
930     int64 num_records_;
931     int64 record_threshold_;
932   };
933 
ClearEventRecords()934   void ClearEventRecords() {
935     event_recorder_.Clear();
936   }
WriteEventRecords(ostream * os)937   void WriteEventRecords(ostream* os) const {
938     (*os) << event_recorder_;
939   }
940 
941   mutable EventRecorder event_recorder_;
942 
943 #endif
944 
945  private:
946   // Helper functions used in the destructor.
947   void CleanupFDToCBMap();
948   void CleanupTimeToAlarmCBMap();
949 
950   // The callback registered to the fds below.  As the purpose of their
951   // registration is to wake the epoll server it just clears the pipe and
952   // returns.
953   scoped_ptr<ReadPipeCallback> wake_cb_;
954 
955   // A pipe owned by the epoll server.  The server will be registered to listen
956   // on read_fd_ and can be woken by Wake() which writes to write_fd_.
957   int read_fd_;
958   int write_fd_;
959 
960   // This boolean is checked to see if it is false at the top of the
961   // WaitForEventsAndExecuteCallbacks function. If not, then it either returns
962   // without doing work, and logs to ERROR, or aborts the program (in
963   // DEBUG mode). If so, then it sets the bool to true, does work, and
964   // sets it back to false when done. This catches unwanted recursion.
965   bool in_wait_for_events_and_execute_callbacks_;
966 
967   // Returns true when the EpollServer() is being destroyed.
968   bool in_shutdown_;
969 
970   DISALLOW_COPY_AND_ASSIGN(EpollServer);
971 };
972 
973 class EpollAlarmCallbackInterface {
974  public:
975   // Summary:
976   //   Called when an alarm times out. Invalidates an AlarmRegToken.
977   //   WARNING: If a token was saved to refer to an alarm callback, OnAlarm must
978   //   delete it, as the reference is no longer valid.
979   // Returns:
980   //   the unix time (in microseconds) at which this alarm should be signaled
981   //   again, or 0 if the alarm should be removed.
982   virtual int64 OnAlarm() = 0;
983 
984   // Summary:
985   //   Called when the an alarm is registered. Invalidates an AlarmRegToken.
986   // Args:
987   //   token: the iterator to the the alarm registered in the alarm map.
988   //   WARNING: this token becomes invalid when the alarm fires, is
989   //   unregistered, or OnShutdown is called on that alarm.
990   //   eps: the epoll server the alarm is registered with.
991   virtual void OnRegistration(const EpollServer::AlarmRegToken& token,
992                               EpollServer* eps) = 0;
993 
994   // Summary:
995   //   Called when the an alarm is unregistered.
996   //   WARNING: It is not valid to unregister a callback and then use the token
997   //   that was saved to refer to the callback.
998   virtual void OnUnregistration() = 0;
999 
1000   // Summary:
1001   //   Called when the epoll server is shutting down.
1002   //   Invalidates the AlarmRegToken that was given when this alarm was
1003   //   registered.
1004   virtual void OnShutdown(EpollServer* eps) = 0;
1005 
~EpollAlarmCallbackInterface()1006   virtual ~EpollAlarmCallbackInterface() {}
1007 
1008  protected:
EpollAlarmCallbackInterface()1009   EpollAlarmCallbackInterface() {}
1010 };
1011 
1012 // A simple alarm which unregisters itself on destruction.
1013 //
1014 // PLEASE NOTE:
1015 // Any classes overriding these functions must either call the implementation
1016 // of the parent class, or is must otherwise make sure that the 'registered_'
1017 // boolean and the token, 'token_', are updated appropriately.
1018 class EpollAlarm : public EpollAlarmCallbackInterface {
1019  public:
1020   EpollAlarm();
1021 
1022   virtual ~EpollAlarm();
1023 
1024   // Marks the alarm as unregistered and returns 0.  The return value may be
1025   // safely ignored by subclasses.
1026   virtual int64 OnAlarm() OVERRIDE;
1027 
1028   // Marks the alarm as registered, and stores the token.
1029   virtual void OnRegistration(const EpollServer::AlarmRegToken& token,
1030                               EpollServer* eps) OVERRIDE;
1031 
1032   // Marks the alarm as unregistered.
1033   virtual void OnUnregistration() OVERRIDE;
1034 
1035   // Marks the alarm as unregistered.
1036   virtual void OnShutdown(EpollServer* eps) OVERRIDE;
1037 
1038   // If the alarm was registered, unregister it.
1039   void UnregisterIfRegistered();
1040 
registered()1041   bool registered() const { return registered_; }
1042 
eps()1043   const EpollServer* eps() const { return eps_; }
1044 
1045  private:
1046   EpollServer::AlarmRegToken token_;
1047   EpollServer* eps_;
1048   bool registered_;
1049 };
1050 
1051 }  // namespace net
1052 
1053 #endif  // NET_TOOLS_EPOLL_SERVER_EPOLL_SERVER_H_
1054