1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_ 6 #define NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_ 7 #pragma once 8 9 #include <fcntl.h> 10 #include <sys/queue.h> 11 #include <ext/hash_map> // it is annoying that gcc does this. oh well. 12 #include <ext/hash_set> 13 #include <map> 14 #include <string> 15 #include <utility> 16 #include <set> 17 #include <vector> 18 19 // #define EPOLL_SERVER_EVENT_TRACING 1 20 // 21 // Defining EPOLL_SERVER_EVENT_TRACING 22 // causes code to exist which didn't before. 23 // This code tracks each event generated by the epollserver, 24 // as well as providing a per-fd-registered summary of 25 // events. Note that enabling this code vastly slows 26 // down operations, and uses substantially more 27 // memory. For these reasons, it should only be enabled when doing 28 // developer debugging at his/her workstation. 29 // 30 // A structure called 'EventRecorder' will exist when 31 // the macro is defined. See the EventRecorder class interface 32 // within the EpollServer class for more details. 33 #ifdef EPOLL_SERVER_EVENT_TRACING 34 #include <iostream> 35 #include "base/logging.h" 36 #endif 37 38 #include "base/basictypes.h" 39 #include "base/memory/scoped_ptr.h" 40 #include <sys/epoll.h> 41 42 namespace net { 43 44 class EpollServer; 45 class EpollAlarmCallbackInterface; 46 class ReadPipeCallback; 47 48 struct EpollEvent { EpollEventEpollEvent49 EpollEvent(int events, bool is_epoll_wait) 50 : in_events(events), 51 out_ready_mask(0) { 52 } 53 54 int in_events; // incoming events 55 int out_ready_mask; // the new event mask for ready list (0 means don't 56 // get on the ready list). This field is always 57 // initialized to 0 when the event is passed to 58 // OnEvent. 59 }; 60 61 // Callbacks which go into EpollServers are expected to derive from this class. 62 class EpollCallbackInterface { 63 public: 64 // Summary: 65 // Called when the callback is registered into a EpollServer. 66 // Args: 67 // eps - the poll server into which this callback was registered 68 // fd - the file descriptor which was registered 69 // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc) 70 // which was registered (and will initially be used 71 // in the epoll() calls) 72 virtual void OnRegistration(EpollServer* eps, int fd, int event_mask) = 0; 73 74 // Summary: 75 // Called when the event_mask is modified (for a file-descriptor) 76 // Args: 77 // fd - the file descriptor which was registered 78 // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc) 79 // which was is now curren (and will be used 80 // in subsequent epoll() calls) 81 virtual void OnModification(int fd, int event_mask) = 0; 82 83 // Summary: 84 // Called whenever an event occurs on the file-descriptor. 85 // This is where the bulk of processing is expected to occur. 86 // Args: 87 // fd - the file descriptor which was registered 88 // event - a struct that contains the event mask (composed of EPOLLIN, 89 // EPOLLOUT, etc), a flag that indicates whether this is a true 90 // epoll_wait event vs one from the ready list, and an output 91 // parameter for OnEvent to inform the EpollServer whether to put 92 // this fd on the ready list. 93 virtual void OnEvent(int fd, EpollEvent* event) = 0; 94 95 // Summary: 96 // Called when the file-descriptor is unregistered from the poll-server. 97 // Args: 98 // fd - the file descriptor which was registered, and of this call, is now 99 // unregistered. 100 // replaced - If true, this callback is being replaced by another, otherwise 101 // it is simply being removed. 102 virtual void OnUnregistration(int fd, bool replaced) = 0; 103 104 // Summary: 105 // Called when the epoll server is shutting down. This is different from 106 // OnUnregistration because the subclass may want to clean up memory. 107 // This is called in leiu of OnUnregistration. 108 // Args: 109 // fd - the file descriptor which was registered. 110 virtual void OnShutdown(EpollServer* eps, int fd) = 0; 111 ~EpollCallbackInterface()112 virtual ~EpollCallbackInterface() {} 113 114 protected: EpollCallbackInterface()115 EpollCallbackInterface() {} 116 }; 117 118 //////////////////////////////////////////////////////////////////////////////// 119 //////////////////////////////////////////////////////////////////////////////// 120 121 class EpollServer { 122 public: 123 typedef EpollAlarmCallbackInterface AlarmCB; 124 typedef EpollCallbackInterface CB; 125 126 typedef std::multimap<int64, AlarmCB*> TimeToAlarmCBMap; 127 typedef TimeToAlarmCBMap::iterator AlarmRegToken; 128 129 // Summary: 130 // Constructor: 131 // By default, we don't wait any amount of time for events, and 132 // we suggest to the epoll-system that we're going to use on-the-order 133 // of 1024 FDs. 134 EpollServer(); 135 136 //////////////////////////////////////// 137 138 // Destructor 139 virtual ~EpollServer(); 140 141 //////////////////////////////////////// 142 143 // Summary 144 // Register a callback to be called whenever an event contained 145 // in the set of events included in event_mask occurs on the 146 // file-descriptor 'fd' 147 // 148 // Note that only one callback is allowed to be registered for 149 // any specific file-decriptor. 150 // 151 // If a callback is registered for a file-descriptor which has already 152 // been registered, then the previous callback is unregistered with 153 // the 'replaced' flag set to true. I.e. the previous callback's 154 // OnUnregistration() function is called like so: 155 // OnUnregistration(fd, true); 156 // 157 // The epoll server does NOT take on ownership of the callback: the callback 158 // creator is responsible for managing that memory. 159 // 160 // Args: 161 // fd - a valid file-descriptor 162 // cb - an instance of a subclass of EpollCallbackInterface 163 // event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating 164 // the events for which the callback would like to be 165 // called. 166 virtual void RegisterFD(int fd, CB* cb, int event_mask); 167 168 //////////////////////////////////////// 169 170 // Summary: 171 // A shortcut for RegisterFD which sets things up such that the 172 // callback is called when 'fd' is available for writing. 173 // Args: 174 // fd - a valid file-descriptor 175 // cb - an instance of a subclass of EpollCallbackInterface 176 virtual void RegisterFDForWrite(int fd, CB* cb); 177 178 //////////////////////////////////////// 179 180 // Summary: 181 // A shortcut for RegisterFD which sets things up such that the 182 // callback is called when 'fd' is available for reading or writing. 183 // Args: 184 // fd - a valid file-descriptor 185 // cb - an instance of a subclass of EpollCallbackInterface 186 virtual void RegisterFDForReadWrite(int fd, CB* cb); 187 188 //////////////////////////////////////// 189 190 // Summary: 191 // A shortcut for RegisterFD which sets things up such that the 192 // callback is called when 'fd' is available for reading. 193 // Args: 194 // fd - a valid file-descriptor 195 // cb - an instance of a subclass of EpollCallbackInterface 196 virtual void RegisterFDForRead(int fd, CB* cb); 197 198 //////////////////////////////////////// 199 200 // Summary: 201 // Removes the FD and the associated callback from the pollserver. 202 // If the callback is registered with other FDs, they will continue 203 // to be processed using the callback without modification. 204 // If the file-descriptor specified is not registered in the 205 // epoll_server, then nothing happens as a result of this call. 206 // Args: 207 // fd - the file-descriptor which should no-longer be monitored. 208 virtual void UnregisterFD(int fd); 209 210 //////////////////////////////////////// 211 212 // Summary: 213 // Modifies the event mask for the file-descriptor, replacing 214 // the old event_mask with the new one specified here. 215 // If the file-descriptor specified is not registered in the 216 // epoll_server, then nothing happens as a result of this call. 217 // Args: 218 // fd - the fd whose event mask should be modified. 219 // event_mask - the new event mask. 220 virtual void ModifyCallback(int fd, int event_mask); 221 222 //////////////////////////////////////// 223 224 // Summary: 225 // Modifies the event mask for the file-descriptor such that we 226 // no longer request events when 'fd' is readable. 227 // If the file-descriptor specified is not registered in the 228 // epoll_server, then nothing happens as a result of this call. 229 // Args: 230 // fd - the fd whose event mask should be modified. 231 virtual void StopRead(int fd); 232 233 //////////////////////////////////////// 234 235 // Summary: 236 // Modifies the event mask for the file-descriptor such that we 237 // request events when 'fd' is readable. 238 // If the file-descriptor specified is not registered in the 239 // epoll_server, then nothing happens as a result of this call. 240 // Args: 241 // fd - the fd whose event mask should be modified. 242 virtual void StartRead(int fd); 243 244 //////////////////////////////////////// 245 246 // Summary: 247 // Modifies the event mask for the file-descriptor such that we 248 // no longer request events when 'fd' is writable. 249 // If the file-descriptor specified is not registered in the 250 // epoll_server, then nothing happens as a result of this call. 251 // Args: 252 // fd - the fd whose event mask should be modified. 253 virtual void StopWrite(int fd); 254 255 //////////////////////////////////////// 256 257 // Summary: 258 // Modifies the event mask for the file-descriptor such that we 259 // request events when 'fd' is writable. 260 // If the file-descriptor specified is not registered in the 261 // epoll_server, then nothing happens as a result of this call. 262 // Args: 263 // fd - the fd whose event mask should be modified. 264 virtual void StartWrite(int fd); 265 266 //////////////////////////////////////// 267 268 // Summary: 269 // Looks up the callback associated with the file-desriptor 'fd'. 270 // If a callback is associated with this file-descriptor, then 271 // it's OnEvent() method is called with the file-descriptor 'fd', 272 // and event_mask 'event_mask' 273 // 274 // If no callback is registered for this file-descriptor, nothing 275 // will happen as a result of this call. 276 // 277 // This function is used internally by the EpollServer, but is 278 // available publically so that events might be 'faked'. Calling 279 // this function with an fd and event_mask is equivalent (as far 280 // as the callback is concerned) to having a real event generated 281 // by epoll (except, of course, that read(), etc won't necessarily 282 // be able to read anything) 283 // Args: 284 // fd - the file-descriptor on which an event has occured. 285 // event_mask - a bitmask representing the events which have occured 286 // on/for this fd. This bitmask is composed of 287 // POLLIN, POLLOUT, etc. 288 // 289 void HandleEvent(int fd, int event_mask); 290 291 // Summary: 292 // Call this when you want the pollserver to 293 // wait for events and execute the callbacks associated with 294 // the file-descriptors on which those events have occured. 295 // Depending on the value of timeout_in_us_, this may or may 296 // not return immediately. Please reference the set_timeout() 297 // function for the specific behaviour. 298 virtual void WaitForEventsAndExecuteCallbacks(); 299 300 // Summary: 301 // When an fd is registered to use edge trigger notification, the ready 302 // list can be used to simulate level trigger semantics. Edge trigger 303 // registration doesn't send an initial event, and only rising edge (going 304 // from blocked to unblocked) events are sent. A callback can put itself on 305 // the ready list by calling SetFDReady() after calling RegisterFD(). The 306 // OnEvent method of all callbacks associated with the fds on the ready 307 // list will be called immediately after processing the events returned by 308 // epoll_wait(). The fd is removed from the ready list before the 309 // callback's OnEvent() method is invoked. To stay on the ready list, the 310 // OnEvent() (or some function in that call chain) must call SetFDReady 311 // again. When a fd is unregistered using UnregisterFD(), the fd is 312 // automatically removed from the ready list. 313 // 314 // When the callback for a edge triggered fd hits the falling edge (about 315 // to block, either because of it got an EAGAIN, or had a short read/write 316 // operation), it should remove itself from the ready list using 317 // SetFDNotReady() (since OnEvent cannot distinguish between invocation 318 // from the ready list vs from a normal epoll event). All four ready list 319 // methods are safe to be called within the context of the callbacks. 320 // 321 // Since the ready list invokes EpollCallbackInterface::OnEvent, only fds 322 // that are registered with the EpollServer will be put on the ready list. 323 // SetFDReady() and SetFDNotReady() will do nothing if the EpollServer 324 // doesn't know about the fd passed in. 325 // 326 // Since the ready list cannot reliably determine proper set of events 327 // which should be sent to the callback, SetFDReady() requests the caller 328 // to provide the ready list with the event mask, which will be used later 329 // when OnEvent() is invoked by the ready list. Hence, the event_mask 330 // passedto SetFDReady() does not affect the actual epoll registration of 331 // the fd with the kernel. If a fd is already put on the ready list, and 332 // SetFDReady() is called again for that fd with a different event_mask, 333 // the event_mask will be updated. 334 virtual void SetFDReady(int fd, int events_to_fake); 335 336 virtual void SetFDNotReady(int fd); 337 338 // Summary: 339 // IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as 340 // debugging tools and for writing unit tests. 341 // ISFDReady() returns whether a fd is in the ready list. 342 // ReadyListSize() returns the number of fds on the ready list. 343 // VerifyReadyList() checks the consistency of internal data structure. It 344 // will CHECK if it finds an error. 345 virtual bool IsFDReady(int fd) const; 346 ReadyListSize()347 size_t ReadyListSize() const { return ready_list_size_; } 348 349 void VerifyReadyList() const; 350 351 //////////////////////////////////////// 352 353 // Summary: 354 // Registers an alarm 'ac' to go off at time 'timeout_time_in_us'. 355 // If the callback returns a positive number from its OnAlarm() function, 356 // then the callback will be re-registered at that time, else the alarm 357 // owner is responsible for freeing up memory. 358 // 359 // Important: A give AlarmCB* can not be registered again if it is already 360 // registered. If a user wants to register a callback again it should first 361 // unregister the previous callback before calling RegisterAlarm again. 362 // Args: 363 // timeout_time_in_us - the absolute time at which the alarm should go off 364 // ac - the alarm which will be called. 365 virtual void RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac); 366 367 // Summary: 368 // Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() + 369 // delta_in_us). While this is somewhat less accurate (see the description 370 // for ApproximateNowInUs() to see how 'approximate'), the error is never 371 // worse than the amount of time it takes to process all events in one 372 // WaitForEvents. As with 'RegisterAlarm()', if the callback returns a 373 // positive number from its OnAlarm() function, then the callback will be 374 // re-registered at that time, else the alarm owner is responsible for 375 // freeing up memory. 376 // Note that this function is purely a convienence. The 377 // same thing may be accomplished by using RegisterAlarm with 378 // ApproximateNowInUs() directly. 379 // 380 // Important: A give AlarmCB* can not be registered again if it is already 381 // registered. If a user wants to register a callback again it should first 382 // unregister the previous callback before calling RegisterAlarm again. 383 // Args: 384 // delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at 385 // which point the alarm should go off. 386 // ac - the alarm which will be called. RegisterAlarmApproximateDelta(int64 delta_in_us,AlarmCB * ac)387 void RegisterAlarmApproximateDelta(int64 delta_in_us, AlarmCB* ac) { 388 RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac); 389 } 390 391 //////////////////////////////////////// 392 393 // Summary: 394 // Unregister the alarm referred to by iterator_token; Callers should 395 // be warned that a token may have become already invalid when OnAlarm() 396 // is called, was unregistered, or OnShutdown was called on that alarm. 397 // Args: 398 // iterator_token - iterator to the alarm callback to unregister. 399 virtual void UnregisterAlarm( 400 const EpollServer::AlarmRegToken& iterator_token); 401 402 //////////////////////////////////////// 403 404 // Summary: 405 // returns the number of file-descriptors registered in this EpollServer. 406 // Returns: 407 // number of FDs registered (discounting the internal pipe used for Wake) 408 virtual int NumFDsRegistered() const; 409 410 // Summary: 411 // Force the epoll server to wake up (by writing to an internal pipe). 412 virtual void Wake(); 413 414 // Summary: 415 // Wrapper around WallTimer's NowInUsec. We do this so that we can test 416 // EpollServer without using the system clock (and can avoid the flakiness 417 // that would ensue) 418 // Returns: 419 // the current time as number of microseconds since the Unix epoch. 420 virtual int64 NowInUsec() const; 421 422 // Summary: 423 // Since calling NowInUsec() many thousands of times per 424 // WaitForEventsAndExecuteCallbacks function call is, to say the least, 425 // inefficient, we allow users to use an approximate time instead. The 426 // time returned from this function is as accurate as NowInUsec() when 427 // WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's 428 // callstack. 429 // However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then 430 // this function returns the time at which the 431 // WaitForEventsAndExecuteCallbacks function started to process events or 432 // alarms. 433 // 434 // Essentially, this function makes available a fast and mostly accurate 435 // mechanism for getting the time for any function handling an event or 436 // alarm. When functions which are not handling callbacks or alarms call 437 // this function, they get the slow and "absolutely" accurate time. 438 // 439 // Users should be encouraged to use this function. 440 // Returns: 441 // the "approximate" current time as number of microseconds since the Unix 442 // epoch. 443 virtual int64 ApproximateNowInUsec() const; 444 445 static std::string EventMaskToString(int event_mask); 446 447 // Summary: 448 // Logs the state of the epoll server with LOG(ERROR). 449 void LogStateOnCrash(); 450 451 // Summary: 452 // Set the timeout to the value specified. 453 // If the timeout is set to a negative number, 454 // WaitForEventsAndExecuteCallbacks() will only return when an event has 455 // occured 456 // If the timeout is set to zero, 457 // WaitForEventsAndExecuteCallbacks() will return immediately 458 // If the timeout is set to a positive number, 459 // WaitForEventsAndExecuteCallbacks() will return when an event has 460 // occured, or when timeout_in_us microseconds has elapsed, whichever 461 // is first. 462 // Args: 463 // timeout_in_us - value specified depending on behaviour desired. 464 // See above. set_timeout_in_us(int64 timeout_in_us)465 void set_timeout_in_us(int64 timeout_in_us) { 466 timeout_in_us_ = timeout_in_us; 467 } 468 469 //////////////////////////////////////// 470 471 // Summary: 472 // Accessor for the current value of timeout_in_us. timeout_in_us()473 int timeout_in_us() const { return timeout_in_us_; } 474 475 // Summary: 476 // Returns true when the EpollServer() is being destroyed. in_shutdown()477 bool in_shutdown() const { return in_shutdown_; } 478 ContainsAlarm(EpollAlarmCallbackInterface * alarm)479 bool ContainsAlarm(EpollAlarmCallbackInterface* alarm) const { 480 return all_alarms_.find(alarm) != all_alarms_.end(); 481 } 482 483 // Summary: 484 // A function for implementing the ready list. It invokes OnEvent for each 485 // of the fd in the ready list, and takes care of adding them back to the 486 // ready list if the callback requests it (by checking that out_ready_mask 487 // is non-zero). 488 void CallReadyListCallbacks(); 489 490 // Granularity at which time moves when considering what alarms are on. 491 // See function: DoRoundingOnNow() on exact usage. 492 static const int kMinimumEffectiveAlarmQuantum; 493 protected: 494 495 virtual int GetFlags(int fd); SetFlags(int fd,int flags)496 inline int SetFlags(int fd, int flags) { 497 return fcntl(fd, F_SETFL, flags | O_NONBLOCK); 498 } 499 500 virtual void SetNonblocking(int fd); 501 502 // This exists here so that we can override this function in unittests 503 // in order to make effective mock EpollServer objects. 504 virtual int epoll_wait_impl(int epfd, 505 struct epoll_event* events, 506 int max_events, 507 int timeout_in_ms); 508 509 // this struct is used internally, and is never used by anything external 510 // to this class. Some of its members are declared mutable to get around the 511 // restriction imposed by hash_set. Since hash_set knows nothing about the 512 // objects it stores, it has to assume that every bit of the object is used 513 // in the hash function and equal_to comparison. Thus hash_set::iterator is a 514 // const iterator. In this case, the only thing that must stay constant is 515 // fd. Everything else are just along for the ride and changing them doesn't 516 // compromise the hash_set integrity. 517 struct CBAndEventMask { CBAndEventMaskCBAndEventMask518 CBAndEventMask() 519 : cb(NULL), 520 fd(-1), 521 event_mask(0), 522 events_asserted(0), 523 events_to_fake(0), 524 in_use(false) { 525 entry.le_next = NULL; 526 entry.le_prev = NULL; 527 } 528 CBAndEventMaskCBAndEventMask529 CBAndEventMask(EpollCallbackInterface* cb, 530 int event_mask, 531 int fd) 532 : cb(cb), fd(fd), event_mask(event_mask), events_asserted(0), 533 events_to_fake(0), in_use(false) { 534 entry.le_next = NULL; 535 entry.le_prev = NULL; 536 } 537 538 // Required operator for hash_set. Normally operator== should be a free 539 // standing function. However, since CBAndEventMask is a protected type and 540 // it will never be a base class, it makes no difference. 541 bool operator==(const CBAndEventMask& cb_and_mask) const { 542 return fd == cb_and_mask.fd; 543 } 544 // A callback. If the fd is unregistered inside the callchain of OnEvent, 545 // the cb will be set to NULL. 546 mutable EpollCallbackInterface* cb; 547 548 mutable LIST_ENTRY(CBAndEventMask) entry; 549 // file descriptor registered with the epoll server. 550 int fd; 551 // the current event_mask registered for this callback. 552 mutable int event_mask; 553 // the event_mask that was returned by epoll 554 mutable int events_asserted; 555 // the event_mask for the ready list to use to call OnEvent. 556 mutable int events_to_fake; 557 // toggle around calls to OnEvent to tell UnregisterFD to not erase the 558 // iterator because HandleEvent is using it. 559 mutable bool in_use; 560 }; 561 562 // Custom hash function to be used by hash_set. 563 struct CBAndEventMaskHash { operatorCBAndEventMaskHash564 size_t operator()(const CBAndEventMask& cb_and_eventmask) const { 565 return static_cast<size_t>(cb_and_eventmask.fd); 566 } 567 }; 568 569 typedef __gnu_cxx::hash_set<CBAndEventMask, CBAndEventMaskHash> FDToCBMap; 570 571 // the following four functions are OS-specific, and are likely 572 // to be changed in a subclass if the poll/select method is changed 573 // from epoll. 574 575 // Summary: 576 // Deletes a file-descriptor from the set of FDs that should be 577 // monitored with epoll. 578 // Note that this only deals with modifying data relating -directly- 579 // with the epoll call-- it does not modify any data within the 580 // epoll_server. 581 // Args: 582 // fd - the file descriptor to-be-removed from the monitoring set 583 virtual void DelFD(int fd) const; 584 585 //////////////////////////////////////// 586 587 // Summary: 588 // Adds a file-descriptor to the set of FDs that should be 589 // monitored with epoll. 590 // Note that this only deals with modifying data relating -directly- 591 // with the epoll call. 592 // Args: 593 // fd - the file descriptor to-be-added to the monitoring set 594 // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc 595 // OR'd together) which will be associated with this 596 // FD initially. 597 virtual void AddFD(int fd, int event_mask) const; 598 599 //////////////////////////////////////// 600 601 // Summary: 602 // Modifies a file-descriptor in the set of FDs that should be 603 // monitored with epoll. 604 // Note that this only deals with modifying data relating -directly- 605 // with the epoll call. 606 // Args: 607 // fd - the file descriptor to-be-added to the monitoring set 608 // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc 609 // OR'd together) which will be associated with this 610 // FD after this call. 611 virtual void ModFD(int fd, int event_mask) const; 612 613 //////////////////////////////////////// 614 615 // Summary: 616 // Modified the event mask associated with an FD in the set of 617 // data needed by epoll. 618 // Events are removed before they are added, thus, if ~0 is put 619 // in 'remove_event', whatever is put in 'add_event' will be 620 // the new event mask. 621 // If the file-descriptor specified is not registered in the 622 // epoll_server, then nothing happens as a result of this call. 623 // Args: 624 // fd - the file descriptor whose event mask is to be modified 625 // remove_event - the events which are to be removed from the current 626 // event_mask 627 // add_event - the events which are to be added to the current event_mask 628 // 629 // 630 virtual void ModifyFD(int fd, int remove_event, int add_event); 631 632 //////////////////////////////////////// 633 634 // Summary: 635 // Waits for events, and calls HandleEvents() for each 636 // fd, event pair discovered to possibly have an event. 637 // Note that a callback (B) may get a spurious event if 638 // another callback (A) has closed a file-descriptor N, and 639 // the callback (B) has a newly opened file-descriptor, which 640 // also happens to be N. 641 virtual void WaitForEventsAndCallHandleEvents(int64 timeout_in_us, 642 struct epoll_event events[], 643 int events_size); 644 645 646 647 // Summary: 648 // An internal function for implementing the ready list. It adds a fd's 649 // CBAndEventMask to the ready list. If the fd is already on the ready 650 // list, it is a no-op. 651 void AddToReadyList(CBAndEventMask* cb_and_mask); 652 653 // Summary: 654 // An internal function for implementing the ready list. It remove a fd's 655 // CBAndEventMask from the ready list. If the fd is not on the ready list, 656 // it is a no-op. 657 void RemoveFromReadyList(const CBAndEventMask& cb_and_mask); 658 659 // Summary: 660 // Calls any pending alarms that should go off and reregisters them if they 661 // were recurring. 662 virtual void CallAndReregisterAlarmEvents(); 663 664 // The file-descriptor created for epolling 665 int epoll_fd_; 666 667 // The mapping of file-descriptor to CBAndEventMasks 668 FDToCBMap cb_map_; 669 670 // Custom hash function to be used by hash_set. 671 struct AlarmCBHash { operatorAlarmCBHash672 size_t operator()(AlarmCB*const& p) const { 673 return reinterpret_cast<size_t>(p); 674 } 675 }; 676 677 678 // TOOD(sushantj): Having this hash_set is avoidable. We currently have it 679 // only so that we can enforce stringent checks that a caller can not register 680 // the same alarm twice. One option is to have an implementation in which 681 // this hash_set is used only in the debug mode. 682 typedef __gnu_cxx::hash_set<AlarmCB*, AlarmCBHash> AlarmCBMap; 683 AlarmCBMap all_alarms_; 684 685 TimeToAlarmCBMap alarm_map_; 686 687 // The amount of time in microseconds that we'll wait before returning 688 // from the WaitForEventsAndExecuteCallbacks() function. 689 // If this is positive, wait that many microseconds. 690 // If this is negative, wait forever, or for the first event that occurs 691 // If this is zero, never wait for an event. 692 int64 timeout_in_us_; 693 694 // This is nonzero only after the invocation of epoll_wait_impl within 695 // WaitForEventsAndCallHandleEvents and before the function 696 // WaitForEventsAndExecuteCallbacks returns. At all other times, this is 697 // zero. This enables us to have relatively accurate time returned from the 698 // ApproximateNowInUs() function. See that function for more details. 699 int64 recorded_now_in_us_; 700 701 // This is used to implement CallAndReregisterAlarmEvents. This stores 702 // all alarms that were reregistered because OnAlarm() returned a 703 // value > 0 and the time at which they should be executed is less that 704 // the current time. By storing such alarms in this map we ensure 705 // that while calling CallAndReregisterAlarmEvents we do not call 706 // OnAlarm on any alarm in this set. This ensures that we do not 707 // go in an infinite loop. 708 AlarmCBMap alarms_reregistered_and_should_be_skipped_; 709 710 LIST_HEAD(ReadyList, CBAndEventMask) ready_list_; 711 LIST_HEAD(TmpList, CBAndEventMask) tmp_list_; 712 int ready_list_size_; 713 // TODO(alyssar): make this into something that scales up. 714 static const int events_size_ = 256; 715 struct epoll_event events_[256]; 716 717 // These controls the granularity for alarms 718 // See function CallAndReregisterAlarmEvents() 719 // TODO(sushantj): Add test for this. 720 int64 DoRoundingOnNow(int64 now_in_us) const; 721 722 #ifdef EPOLL_SERVER_EVENT_TRACING 723 struct EventRecorder { 724 public: EventRecorderEventRecorder725 EventRecorder() : num_records_(0), record_threshold_(10000) {} 726 ~EventRecorderEventRecorder727 ~EventRecorder() { 728 Clear(); 729 } 730 731 // When a number of events equals the record threshold, 732 // the collected data summary for all FDs will be written 733 // to LOG(INFO). Note that this does not include the 734 // individual events (if you'reinterested in those, you'll 735 // have to get at them programmatically). 736 // After any such flushing to LOG(INFO) all events will 737 // be cleared. 738 // Note that the definition of an 'event' is a bit 'hazy', 739 // as it includes the 'Unregistration' event, and perhaps 740 // others. set_record_thresholdEventRecorder741 void set_record_threshold(int64 new_threshold) { 742 record_threshold_ = new_threshold; 743 } 744 ClearEventRecorder745 void Clear() { 746 for (int i = 0; i < debug_events_.size(); ++i) { 747 delete debug_events_[i]; 748 } 749 debug_events_.clear(); 750 unregistered_fds_.clear(); 751 event_counts_.clear(); 752 } 753 MaybeRecordAndClearEventRecorder754 void MaybeRecordAndClear() { 755 ++num_records_; 756 if ((num_records_ > record_threshold_) && 757 (record_threshold_ > 0)) { 758 LOG(INFO) << "\n" << *this; 759 num_records_ = 0; 760 Clear(); 761 } 762 } 763 RecordFDMaskEventEventRecorder764 void RecordFDMaskEvent(int fd, int mask, const char* function) { 765 FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function); 766 debug_events_.push_back(fdmo); 767 MaybeRecordAndClear(); 768 } 769 RecordEpollWaitEventEventRecorder770 void RecordEpollWaitEvent(int timeout_in_ms, 771 int num_events_generated) { 772 EpollWaitOutput* ewo = new EpollWaitOutput(timeout_in_ms, 773 num_events_generated); 774 debug_events_.push_back(ewo); 775 MaybeRecordAndClear(); 776 } 777 RecordEpollEventEventRecorder778 void RecordEpollEvent(int fd, int event_mask) { 779 Events& events_for_fd = event_counts_[fd]; 780 events_for_fd.AssignFromMask(event_mask); 781 MaybeRecordAndClear(); 782 } 783 784 friend ostream& operator<<(ostream& os, const EventRecorder& er) { 785 for (int i = 0; i < er.unregistered_fds_.size(); ++i) { 786 os << "fd: " << er.unregistered_fds_[i] << "\n"; 787 os << er.unregistered_fds_[i]; 788 } 789 for (EventCountsMap::const_iterator i = er.event_counts_.begin(); 790 i != er.event_counts_.end(); 791 ++i) { 792 os << "fd: " << i->first << "\n"; 793 os << i->second; 794 } 795 for (int i = 0; i < er.debug_events_.size(); ++i) { 796 os << *(er.debug_events_[i]) << "\n"; 797 } 798 return os; 799 } 800 RecordUnregistrationEventRecorder801 void RecordUnregistration(int fd) { 802 EventCountsMap::iterator i = event_counts_.find(fd); 803 if (i != event_counts_.end()) { 804 unregistered_fds_.push_back(i->second); 805 event_counts_.erase(i); 806 } 807 MaybeRecordAndClear(); 808 } 809 810 protected: 811 class DebugOutput { 812 public: 813 friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) { 814 debug_output.OutputToStream(os); 815 return os; 816 } 817 virtual void OutputToStream(ostream* os) const = 0; ~DebugOutputEventRecorder818 virtual ~DebugOutput() {} 819 }; 820 821 class FDMaskOutput : public DebugOutput { 822 public: FDMaskOutputEventRecorder823 FDMaskOutput(int fd, int mask, const char* function) : 824 fd_(fd), mask_(mask), function_(function) {} OutputToStreamEventRecorder825 virtual void OutputToStream(ostream* os) const { 826 (*os) << "func: " << function_ 827 << "\tfd: " << fd_; 828 if (mask_ != 0) { 829 (*os) << "\tmask: " << EventMaskToString(mask_); 830 } 831 } 832 int fd_; 833 int mask_; 834 const char* function_; 835 }; 836 837 class EpollWaitOutput : public DebugOutput { 838 public: EpollWaitOutputEventRecorder839 EpollWaitOutput(int timeout_in_ms, 840 int num_events_generated) : 841 timeout_in_ms_(timeout_in_ms), 842 num_events_generated_(num_events_generated) {} OutputToStreamEventRecorder843 virtual void OutputToStream(ostream* os) const { 844 (*os) << "timeout_in_ms: " << timeout_in_ms_ 845 << "\tnum_events_generated: " << num_events_generated_; 846 } 847 protected: 848 int timeout_in_ms_; 849 int num_events_generated_; 850 }; 851 852 struct Events { EventsEventRecorder::Events853 Events() : 854 epoll_in(0), 855 epoll_pri(0), 856 epoll_out(0), 857 epoll_rdnorm(0), 858 epoll_rdband(0), 859 epoll_wrnorm(0), 860 epoll_wrband(0), 861 epoll_msg(0), 862 epoll_err(0), 863 epoll_hup(0), 864 epoll_oneshot(0), 865 epoll_et(0) {} 866 AssignFromMaskEventRecorder::Events867 void AssignFromMask(int event_mask) { 868 if (event_mask & EPOLLIN) ++epoll_in; 869 if (event_mask & EPOLLPRI) ++epoll_pri; 870 if (event_mask & EPOLLOUT) ++epoll_out; 871 if (event_mask & EPOLLRDNORM) ++epoll_rdnorm; 872 if (event_mask & EPOLLRDBAND) ++epoll_rdband; 873 if (event_mask & EPOLLWRNORM) ++epoll_wrnorm; 874 if (event_mask & EPOLLWRBAND) ++epoll_wrband; 875 if (event_mask & EPOLLMSG) ++epoll_msg; 876 if (event_mask & EPOLLERR) ++epoll_err; 877 if (event_mask & EPOLLHUP) ++epoll_hup; 878 if (event_mask & EPOLLONESHOT) ++epoll_oneshot; 879 if (event_mask & EPOLLET) ++epoll_et; 880 }; 881 882 friend ostream& operator<<(ostream& os, const Events& ev) { 883 if (ev.epoll_in) { 884 os << "\t EPOLLIN: " << ev.epoll_in << "\n"; 885 } 886 if (ev.epoll_pri) { 887 os << "\t EPOLLPRI: " << ev.epoll_pri << "\n"; 888 } 889 if (ev.epoll_out) { 890 os << "\t EPOLLOUT: " << ev.epoll_out << "\n"; 891 } 892 if (ev.epoll_rdnorm) { 893 os << "\t EPOLLRDNORM: " << ev.epoll_rdnorm << "\n"; 894 } 895 if (ev.epoll_rdband) { 896 os << "\t EPOLLRDBAND: " << ev.epoll_rdband << "\n"; 897 } 898 if (ev.epoll_wrnorm) { 899 os << "\t EPOLLWRNORM: " << ev.epoll_wrnorm << "\n"; 900 } 901 if (ev.epoll_wrband) { 902 os << "\t EPOLLWRBAND: " << ev.epoll_wrband << "\n"; 903 } 904 if (ev.epoll_msg) { 905 os << "\t EPOLLMSG: " << ev.epoll_msg << "\n"; 906 } 907 if (ev.epoll_err) { 908 os << "\t EPOLLERR: " << ev.epoll_err << "\n"; 909 } 910 if (ev.epoll_hup) { 911 os << "\t EPOLLHUP: " << ev.epoll_hup << "\n"; 912 } 913 if (ev.epoll_oneshot) { 914 os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n"; 915 } 916 if (ev.epoll_et) { 917 os << "\t EPOLLET: " << ev.epoll_et << "\n"; 918 } 919 return os; 920 } 921 922 unsigned int epoll_in; 923 unsigned int epoll_pri; 924 unsigned int epoll_out; 925 unsigned int epoll_rdnorm; 926 unsigned int epoll_rdband; 927 unsigned int epoll_wrnorm; 928 unsigned int epoll_wrband; 929 unsigned int epoll_msg; 930 unsigned int epoll_err; 931 unsigned int epoll_hup; 932 unsigned int epoll_oneshot; 933 unsigned int epoll_et; 934 }; 935 936 std::vector<DebugOutput*> debug_events_; 937 std::vector<Events> unregistered_fds_; 938 typedef __gnu_cxx::hash_map<int, Events> EventCountsMap; 939 EventCountsMap event_counts_; 940 int64 num_records_; 941 int64 record_threshold_; 942 }; 943 ClearEventRecords()944 void ClearEventRecords() { 945 event_recorder_.Clear(); 946 } WriteEventRecords(ostream * os)947 void WriteEventRecords(ostream* os) const { 948 (*os) << event_recorder_; 949 } 950 951 mutable EventRecorder event_recorder_; 952 953 #endif 954 955 private: 956 // Helper functions used in the destructor. 957 void CleanupFDToCBMap(); 958 void CleanupTimeToAlarmCBMap(); 959 960 // The callback registered to the fds below. As the purpose of their 961 // registration is to wake the epoll server it just clears the pipe and 962 // returns. 963 scoped_ptr<ReadPipeCallback> wake_cb_; 964 965 // A pipe owned by the epoll server. The server will be registered to listen 966 // on read_fd_ and can be woken by Wake() which writes to write_fd_. 967 int read_fd_; 968 int write_fd_; 969 970 // This boolean is checked to see if it is false at the top of the 971 // WaitForEventsAndExecuteCallbacks function. If not, then it either returns 972 // without doing work, and logs to ERROR, or aborts the program (in 973 // DEBUG mode). If so, then it sets the bool to true, does work, and 974 // sets it back to false when done. This catches unwanted recursion. 975 bool in_wait_for_events_and_execute_callbacks_; 976 977 // Returns true when the EpollServer() is being destroyed. 978 bool in_shutdown_; 979 980 DISALLOW_COPY_AND_ASSIGN(EpollServer); 981 }; 982 983 class EpollAlarmCallbackInterface { 984 public: 985 // Summary: 986 // Called when an alarm times out. Invalidates an AlarmRegToken. 987 // WARNING: If a token was saved to refer to an alarm callback, OnAlarm must 988 // delete it, as the reference is no longer valid. 989 // Returns: 990 // the unix time (in microseconds) at which this alarm should be signaled 991 // again, or 0 if the alarm should be removed. 992 virtual int64 OnAlarm() = 0; 993 994 // Summary: 995 // Called when the an alarm is registered. Invalidates an AlarmRegToken. 996 // Args: 997 // token: the iterator to the the alarm registered in the alarm map. 998 // WARNING: this token becomes invalid when the alarm fires, is 999 // unregistered, or OnShutdown is called on that alarm. 1000 // eps: the epoll server the alarm is registered with. 1001 virtual void OnRegistration(const EpollServer::AlarmRegToken& token, 1002 EpollServer* eps) = 0; 1003 1004 // Summary: 1005 // Called when the an alarm is unregistered. 1006 // WARNING: It is not valid to unregister a callback and then use the token 1007 // that was saved to refer to the callback. 1008 virtual void OnUnregistration() = 0; 1009 1010 // Summary: 1011 // Called when the epoll server is shutting down. 1012 // Invalidates the AlarmRegToken that was given when this alarm was 1013 // registered. 1014 virtual void OnShutdown(EpollServer* eps) = 0; 1015 ~EpollAlarmCallbackInterface()1016 virtual ~EpollAlarmCallbackInterface() {} 1017 1018 protected: EpollAlarmCallbackInterface()1019 EpollAlarmCallbackInterface() {} 1020 }; 1021 1022 // A simple alarm which unregisters itself on destruction. 1023 // 1024 // PLEASE NOTE: 1025 // Any classes overriding these functions must either call the implementation 1026 // of the parent class, or is must otherwise make sure that the 'registered_' 1027 // boolean and the token, 'token_', are updated appropriately. 1028 class EpollAlarm : public EpollAlarmCallbackInterface { 1029 public: 1030 EpollAlarm(); 1031 1032 virtual ~EpollAlarm(); 1033 1034 // Marks the alarm as unregistered and returns 0. The return value may be 1035 // safely ignored by subclasses. 1036 virtual int64 OnAlarm(); 1037 1038 // Marks the alarm as registered, and stores the token. 1039 virtual void OnRegistration(const EpollServer::AlarmRegToken& token, 1040 EpollServer* eps); 1041 1042 // Marks the alarm as unregistered. 1043 virtual void OnUnregistration(); 1044 1045 // Marks the alarm as unregistered. 1046 virtual void OnShutdown(EpollServer* eps); 1047 1048 // If the alarm was registered, unregister it. 1049 void UnregisterIfRegistered(); 1050 registered()1051 bool registered() const { return registered_; } 1052 eps()1053 const EpollServer* eps() const { return eps_; } 1054 1055 private: 1056 EpollServer::AlarmRegToken token_; 1057 EpollServer* eps_; 1058 bool registered_; 1059 }; 1060 1061 } // namespace net 1062 1063 #endif // NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_ 1064 1065