1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_TOOLS_EPOLL_SERVER_EPOLL_SERVER_H_ 6 #define NET_TOOLS_EPOLL_SERVER_EPOLL_SERVER_H_ 7 8 #include <fcntl.h> 9 #include <sys/queue.h> 10 #include <map> 11 #include <set> 12 #include <string> 13 #include <utility> 14 #include <vector> 15 16 // #define EPOLL_SERVER_EVENT_TRACING 1 17 // 18 // Defining EPOLL_SERVER_EVENT_TRACING 19 // causes code to exist which didn't before. 20 // This code tracks each event generated by the epollserver, 21 // as well as providing a per-fd-registered summary of 22 // events. Note that enabling this code vastly slows 23 // down operations, and uses substantially more 24 // memory. For these reasons, it should only be enabled when doing 25 // developer debugging at his/her workstation. 26 // 27 // A structure called 'EventRecorder' will exist when 28 // the macro is defined. See the EventRecorder class interface 29 // within the EpollServer class for more details. 30 #ifdef EPOLL_SERVER_EVENT_TRACING 31 #include <ostream> 32 #include "base/logging.h" 33 #endif 34 35 #include "base/basictypes.h" 36 #include "base/compiler_specific.h" 37 #include "base/containers/hash_tables.h" 38 #include "base/memory/scoped_ptr.h" 39 #include <sys/epoll.h> 40 41 namespace net { 42 43 class EpollServer; 44 class EpollAlarmCallbackInterface; 45 class ReadPipeCallback; 46 47 struct EpollEvent { EpollEventEpollEvent48 EpollEvent(int events, bool is_epoll_wait) 49 : in_events(events), 50 out_ready_mask(0) { 51 } 52 53 int in_events; // incoming events 54 int out_ready_mask; // the new event mask for ready list (0 means don't 55 // get on the ready list). This field is always 56 // initialized to 0 when the event is passed to 57 // OnEvent. 58 }; 59 60 // Callbacks which go into EpollServers are expected to derive from this class. 61 class EpollCallbackInterface { 62 public: 63 // Summary: 64 // Called when the callback is registered into a EpollServer. 65 // Args: 66 // eps - the poll server into which this callback was registered 67 // fd - the file descriptor which was registered 68 // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc) 69 // which was registered (and will initially be used 70 // in the epoll() calls) 71 virtual void OnRegistration(EpollServer* eps, int fd, int event_mask) = 0; 72 73 // Summary: 74 // Called when the event_mask is modified (for a file-descriptor) 75 // Args: 76 // fd - the file descriptor which was registered 77 // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc) 78 // which was is now curren (and will be used 79 // in subsequent epoll() calls) 80 virtual void OnModification(int fd, int event_mask) = 0; 81 82 // Summary: 83 // Called whenever an event occurs on the file-descriptor. 84 // This is where the bulk of processing is expected to occur. 85 // Args: 86 // fd - the file descriptor which was registered 87 // event - a struct that contains the event mask (composed of EPOLLIN, 88 // EPOLLOUT, etc), a flag that indicates whether this is a true 89 // epoll_wait event vs one from the ready list, and an output 90 // parameter for OnEvent to inform the EpollServer whether to put 91 // this fd on the ready list. 92 virtual void OnEvent(int fd, EpollEvent* event) = 0; 93 94 // Summary: 95 // Called when the file-descriptor is unregistered from the poll-server. 96 // Args: 97 // fd - the file descriptor which was registered, and of this call, is now 98 // unregistered. 99 // replaced - If true, this callback is being replaced by another, otherwise 100 // it is simply being removed. 101 virtual void OnUnregistration(int fd, bool replaced) = 0; 102 103 // Summary: 104 // Called when the epoll server is shutting down. This is different from 105 // OnUnregistration because the subclass may want to clean up memory. 106 // This is called in leiu of OnUnregistration. 107 // Args: 108 // fd - the file descriptor which was registered. 109 virtual void OnShutdown(EpollServer* eps, int fd) = 0; 110 ~EpollCallbackInterface()111 virtual ~EpollCallbackInterface() {} 112 113 protected: EpollCallbackInterface()114 EpollCallbackInterface() {} 115 }; 116 117 //////////////////////////////////////////////////////////////////////////////// 118 //////////////////////////////////////////////////////////////////////////////// 119 120 class EpollServer { 121 public: 122 typedef EpollAlarmCallbackInterface AlarmCB; 123 typedef EpollCallbackInterface CB; 124 125 typedef std::multimap<int64, AlarmCB*> TimeToAlarmCBMap; 126 typedef TimeToAlarmCBMap::iterator AlarmRegToken; 127 128 // Summary: 129 // Constructor: 130 // By default, we don't wait any amount of time for events, and 131 // we suggest to the epoll-system that we're going to use on-the-order 132 // of 1024 FDs. 133 EpollServer(); 134 135 //////////////////////////////////////// 136 137 // Destructor 138 virtual ~EpollServer(); 139 140 //////////////////////////////////////// 141 142 // Summary 143 // Register a callback to be called whenever an event contained 144 // in the set of events included in event_mask occurs on the 145 // file-descriptor 'fd' 146 // 147 // Note that only one callback is allowed to be registered for 148 // any specific file-decriptor. 149 // 150 // If a callback is registered for a file-descriptor which has already 151 // been registered, then the previous callback is unregistered with 152 // the 'replaced' flag set to true. I.e. the previous callback's 153 // OnUnregistration() function is called like so: 154 // OnUnregistration(fd, true); 155 // 156 // The epoll server does NOT take on ownership of the callback: the callback 157 // creator is responsible for managing that memory. 158 // 159 // Args: 160 // fd - a valid file-descriptor 161 // cb - an instance of a subclass of EpollCallbackInterface 162 // event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating 163 // the events for which the callback would like to be 164 // called. 165 virtual void RegisterFD(int fd, CB* cb, int event_mask); 166 167 //////////////////////////////////////// 168 169 // Summary: 170 // A shortcut for RegisterFD which sets things up such that the 171 // callback is called when 'fd' is available for writing. 172 // Args: 173 // fd - a valid file-descriptor 174 // cb - an instance of a subclass of EpollCallbackInterface 175 virtual void RegisterFDForWrite(int fd, CB* cb); 176 177 //////////////////////////////////////// 178 179 // Summary: 180 // A shortcut for RegisterFD which sets things up such that the 181 // callback is called when 'fd' is available for reading or writing. 182 // Args: 183 // fd - a valid file-descriptor 184 // cb - an instance of a subclass of EpollCallbackInterface 185 virtual void RegisterFDForReadWrite(int fd, CB* cb); 186 187 //////////////////////////////////////// 188 189 // Summary: 190 // A shortcut for RegisterFD which sets things up such that the 191 // callback is called when 'fd' is available for reading. 192 // Args: 193 // fd - a valid file-descriptor 194 // cb - an instance of a subclass of EpollCallbackInterface 195 virtual void RegisterFDForRead(int fd, CB* cb); 196 197 //////////////////////////////////////// 198 199 // Summary: 200 // Removes the FD and the associated callback from the pollserver. 201 // If the callback is registered with other FDs, they will continue 202 // to be processed using the callback without modification. 203 // If the file-descriptor specified is not registered in the 204 // epoll_server, then nothing happens as a result of this call. 205 // Args: 206 // fd - the file-descriptor which should no-longer be monitored. 207 virtual void UnregisterFD(int fd); 208 209 //////////////////////////////////////// 210 211 // Summary: 212 // Modifies the event mask for the file-descriptor, replacing 213 // the old event_mask with the new one specified here. 214 // If the file-descriptor specified is not registered in the 215 // epoll_server, then nothing happens as a result of this call. 216 // Args: 217 // fd - the fd whose event mask should be modified. 218 // event_mask - the new event mask. 219 virtual void ModifyCallback(int fd, int event_mask); 220 221 //////////////////////////////////////// 222 223 // Summary: 224 // Modifies the event mask for the file-descriptor such that we 225 // no longer request events when 'fd' is readable. 226 // If the file-descriptor specified is not registered in the 227 // epoll_server, then nothing happens as a result of this call. 228 // Args: 229 // fd - the fd whose event mask should be modified. 230 virtual void StopRead(int fd); 231 232 //////////////////////////////////////// 233 234 // Summary: 235 // Modifies the event mask for the file-descriptor such that we 236 // request events when 'fd' is readable. 237 // If the file-descriptor specified is not registered in the 238 // epoll_server, then nothing happens as a result of this call. 239 // Args: 240 // fd - the fd whose event mask should be modified. 241 virtual void StartRead(int fd); 242 243 //////////////////////////////////////// 244 245 // Summary: 246 // Modifies the event mask for the file-descriptor such that we 247 // no longer request events when 'fd' is writable. 248 // If the file-descriptor specified is not registered in the 249 // epoll_server, then nothing happens as a result of this call. 250 // Args: 251 // fd - the fd whose event mask should be modified. 252 virtual void StopWrite(int fd); 253 254 //////////////////////////////////////// 255 256 // Summary: 257 // Modifies the event mask for the file-descriptor such that we 258 // request events when 'fd' is writable. 259 // If the file-descriptor specified is not registered in the 260 // epoll_server, then nothing happens as a result of this call. 261 // Args: 262 // fd - the fd whose event mask should be modified. 263 virtual void StartWrite(int fd); 264 265 //////////////////////////////////////// 266 267 // Summary: 268 // Looks up the callback associated with the file-desriptor 'fd'. 269 // If a callback is associated with this file-descriptor, then 270 // it's OnEvent() method is called with the file-descriptor 'fd', 271 // and event_mask 'event_mask' 272 // 273 // If no callback is registered for this file-descriptor, nothing 274 // will happen as a result of this call. 275 // 276 // This function is used internally by the EpollServer, but is 277 // available publically so that events might be 'faked'. Calling 278 // this function with an fd and event_mask is equivalent (as far 279 // as the callback is concerned) to having a real event generated 280 // by epoll (except, of course, that read(), etc won't necessarily 281 // be able to read anything) 282 // Args: 283 // fd - the file-descriptor on which an event has occured. 284 // event_mask - a bitmask representing the events which have occured 285 // on/for this fd. This bitmask is composed of 286 // POLLIN, POLLOUT, etc. 287 // 288 void HandleEvent(int fd, int event_mask); 289 290 // Summary: 291 // Call this when you want the pollserver to 292 // wait for events and execute the callbacks associated with 293 // the file-descriptors on which those events have occured. 294 // Depending on the value of timeout_in_us_, this may or may 295 // not return immediately. Please reference the set_timeout() 296 // function for the specific behaviour. 297 virtual void WaitForEventsAndExecuteCallbacks(); 298 299 // Summary: 300 // When an fd is registered to use edge trigger notification, the ready 301 // list can be used to simulate level trigger semantics. Edge trigger 302 // registration doesn't send an initial event, and only rising edge (going 303 // from blocked to unblocked) events are sent. A callback can put itself on 304 // the ready list by calling SetFDReady() after calling RegisterFD(). The 305 // OnEvent method of all callbacks associated with the fds on the ready 306 // list will be called immediately after processing the events returned by 307 // epoll_wait(). The fd is removed from the ready list before the 308 // callback's OnEvent() method is invoked. To stay on the ready list, the 309 // OnEvent() (or some function in that call chain) must call SetFDReady 310 // again. When a fd is unregistered using UnregisterFD(), the fd is 311 // automatically removed from the ready list. 312 // 313 // When the callback for a edge triggered fd hits the falling edge (about 314 // to block, either because of it got an EAGAIN, or had a short read/write 315 // operation), it should remove itself from the ready list using 316 // SetFDNotReady() (since OnEvent cannot distinguish between invocation 317 // from the ready list vs from a normal epoll event). All four ready list 318 // methods are safe to be called within the context of the callbacks. 319 // 320 // Since the ready list invokes EpollCallbackInterface::OnEvent, only fds 321 // that are registered with the EpollServer will be put on the ready list. 322 // SetFDReady() and SetFDNotReady() will do nothing if the EpollServer 323 // doesn't know about the fd passed in. 324 // 325 // Since the ready list cannot reliably determine proper set of events 326 // which should be sent to the callback, SetFDReady() requests the caller 327 // to provide the ready list with the event mask, which will be used later 328 // when OnEvent() is invoked by the ready list. Hence, the event_mask 329 // passedto SetFDReady() does not affect the actual epoll registration of 330 // the fd with the kernel. If a fd is already put on the ready list, and 331 // SetFDReady() is called again for that fd with a different event_mask, 332 // the event_mask will be updated. 333 virtual void SetFDReady(int fd, int events_to_fake); 334 335 virtual void SetFDNotReady(int fd); 336 337 // Summary: 338 // IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as 339 // debugging tools and for writing unit tests. 340 // ISFDReady() returns whether a fd is in the ready list. 341 // ReadyListSize() returns the number of fds on the ready list. 342 // VerifyReadyList() checks the consistency of internal data structure. It 343 // will CHECK if it finds an error. 344 virtual bool IsFDReady(int fd) const; 345 ReadyListSize()346 size_t ReadyListSize() const { return ready_list_size_; } 347 348 void VerifyReadyList() const; 349 350 //////////////////////////////////////// 351 352 // Summary: 353 // Registers an alarm 'ac' to go off at time 'timeout_time_in_us'. 354 // If the callback returns a positive number from its OnAlarm() function, 355 // then the callback will be re-registered at that time, else the alarm 356 // owner is responsible for freeing up memory. 357 // 358 // Important: A give AlarmCB* can not be registered again if it is already 359 // registered. If a user wants to register a callback again it should first 360 // unregister the previous callback before calling RegisterAlarm again. 361 // Args: 362 // timeout_time_in_us - the absolute time at which the alarm should go off 363 // ac - the alarm which will be called. 364 virtual void RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac); 365 366 // Summary: 367 // Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() + 368 // delta_in_us). While this is somewhat less accurate (see the description 369 // for ApproximateNowInUs() to see how 'approximate'), the error is never 370 // worse than the amount of time it takes to process all events in one 371 // WaitForEvents. As with 'RegisterAlarm()', if the callback returns a 372 // positive number from its OnAlarm() function, then the callback will be 373 // re-registered at that time, else the alarm owner is responsible for 374 // freeing up memory. 375 // Note that this function is purely a convienence. The 376 // same thing may be accomplished by using RegisterAlarm with 377 // ApproximateNowInUs() directly. 378 // 379 // Important: A give AlarmCB* can not be registered again if it is already 380 // registered. If a user wants to register a callback again it should first 381 // unregister the previous callback before calling RegisterAlarm again. 382 // Args: 383 // delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at 384 // which point the alarm should go off. 385 // ac - the alarm which will be called. RegisterAlarmApproximateDelta(int64 delta_in_us,AlarmCB * ac)386 void RegisterAlarmApproximateDelta(int64 delta_in_us, AlarmCB* ac) { 387 RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac); 388 } 389 390 //////////////////////////////////////// 391 392 // Summary: 393 // Unregister the alarm referred to by iterator_token; Callers should 394 // be warned that a token may have become already invalid when OnAlarm() 395 // is called, was unregistered, or OnShutdown was called on that alarm. 396 // Args: 397 // iterator_token - iterator to the alarm callback to unregister. 398 virtual void UnregisterAlarm( 399 const EpollServer::AlarmRegToken& iterator_token); 400 401 //////////////////////////////////////// 402 403 // Summary: 404 // returns the number of file-descriptors registered in this EpollServer. 405 // Returns: 406 // number of FDs registered (discounting the internal pipe used for Wake) 407 virtual int NumFDsRegistered() const; 408 409 // Summary: 410 // Force the epoll server to wake up (by writing to an internal pipe). 411 virtual void Wake(); 412 413 // Summary: 414 // Wrapper around WallTimer's NowInUsec. We do this so that we can test 415 // EpollServer without using the system clock (and can avoid the flakiness 416 // that would ensue) 417 // Returns: 418 // the current time as number of microseconds since the Unix epoch. 419 virtual int64 NowInUsec() const; 420 421 // Summary: 422 // Since calling NowInUsec() many thousands of times per 423 // WaitForEventsAndExecuteCallbacks function call is, to say the least, 424 // inefficient, we allow users to use an approximate time instead. The 425 // time returned from this function is as accurate as NowInUsec() when 426 // WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's 427 // callstack. 428 // However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then 429 // this function returns the time at which the 430 // WaitForEventsAndExecuteCallbacks function started to process events or 431 // alarms. 432 // 433 // Essentially, this function makes available a fast and mostly accurate 434 // mechanism for getting the time for any function handling an event or 435 // alarm. When functions which are not handling callbacks or alarms call 436 // this function, they get the slow and "absolutely" accurate time. 437 // 438 // Users should be encouraged to use this function. 439 // Returns: 440 // the "approximate" current time as number of microseconds since the Unix 441 // epoch. 442 virtual int64 ApproximateNowInUsec() const; 443 444 static std::string EventMaskToString(int event_mask); 445 446 // Summary: 447 // Logs the state of the epoll server with LOG(ERROR). 448 void LogStateOnCrash(); 449 450 // Summary: 451 // Set the timeout to the value specified. 452 // If the timeout is set to a negative number, 453 // WaitForEventsAndExecuteCallbacks() will only return when an event has 454 // occured 455 // If the timeout is set to zero, 456 // WaitForEventsAndExecuteCallbacks() will return immediately 457 // If the timeout is set to a positive number, 458 // WaitForEventsAndExecuteCallbacks() will return when an event has 459 // occured, or when timeout_in_us microseconds has elapsed, whichever 460 // is first. 461 // Args: 462 // timeout_in_us - value specified depending on behaviour desired. 463 // See above. set_timeout_in_us(int64 timeout_in_us)464 void set_timeout_in_us(int64 timeout_in_us) { 465 timeout_in_us_ = timeout_in_us; 466 } 467 468 //////////////////////////////////////// 469 470 // Summary: 471 // Accessor for the current value of timeout_in_us. timeout_in_us()472 int timeout_in_us() const { return timeout_in_us_; } 473 474 // Summary: 475 // Returns true when the EpollServer() is being destroyed. in_shutdown()476 bool in_shutdown() const { return in_shutdown_; } 477 ContainsAlarm(EpollAlarmCallbackInterface * alarm)478 bool ContainsAlarm(EpollAlarmCallbackInterface* alarm) const { 479 return all_alarms_.find(alarm) != all_alarms_.end(); 480 } 481 482 // Summary: 483 // A function for implementing the ready list. It invokes OnEvent for each 484 // of the fd in the ready list, and takes care of adding them back to the 485 // ready list if the callback requests it (by checking that out_ready_mask 486 // is non-zero). 487 void CallReadyListCallbacks(); 488 489 protected: 490 virtual int GetFlags(int fd); SetFlags(int fd,int flags)491 inline int SetFlags(int fd, int flags) { 492 return fcntl(fd, F_SETFL, flags | O_NONBLOCK); 493 } 494 495 virtual void SetNonblocking(int fd); 496 497 // This exists here so that we can override this function in unittests 498 // in order to make effective mock EpollServer objects. 499 virtual int epoll_wait_impl(int epfd, 500 struct epoll_event* events, 501 int max_events, 502 int timeout_in_ms); 503 504 // this struct is used internally, and is never used by anything external 505 // to this class. Some of its members are declared mutable to get around the 506 // restriction imposed by hash_set. Since hash_set knows nothing about the 507 // objects it stores, it has to assume that every bit of the object is used 508 // in the hash function and equal_to comparison. Thus hash_set::iterator is a 509 // const iterator. In this case, the only thing that must stay constant is 510 // fd. Everything else are just along for the ride and changing them doesn't 511 // compromise the hash_set integrity. 512 struct CBAndEventMask { CBAndEventMaskCBAndEventMask513 CBAndEventMask() 514 : cb(NULL), 515 fd(-1), 516 event_mask(0), 517 events_asserted(0), 518 events_to_fake(0), 519 in_use(false) { 520 entry.le_next = NULL; 521 entry.le_prev = NULL; 522 } 523 CBAndEventMaskCBAndEventMask524 CBAndEventMask(EpollCallbackInterface* cb, 525 int event_mask, 526 int fd) 527 : cb(cb), fd(fd), event_mask(event_mask), events_asserted(0), 528 events_to_fake(0), in_use(false) { 529 entry.le_next = NULL; 530 entry.le_prev = NULL; 531 } 532 533 // Required operator for hash_set. Normally operator== should be a free 534 // standing function. However, since CBAndEventMask is a protected type and 535 // it will never be a base class, it makes no difference. 536 bool operator==(const CBAndEventMask& cb_and_mask) const { 537 return fd == cb_and_mask.fd; 538 } 539 // A callback. If the fd is unregistered inside the callchain of OnEvent, 540 // the cb will be set to NULL. 541 mutable EpollCallbackInterface* cb; 542 543 mutable LIST_ENTRY(CBAndEventMask) entry; 544 // file descriptor registered with the epoll server. 545 int fd; 546 // the current event_mask registered for this callback. 547 mutable int event_mask; 548 // the event_mask that was returned by epoll 549 mutable int events_asserted; 550 // the event_mask for the ready list to use to call OnEvent. 551 mutable int events_to_fake; 552 // toggle around calls to OnEvent to tell UnregisterFD to not erase the 553 // iterator because HandleEvent is using it. 554 mutable bool in_use; 555 }; 556 557 // Custom hash function to be used by hash_set. 558 struct CBAndEventMaskHash { operatorCBAndEventMaskHash559 size_t operator()(const CBAndEventMask& cb_and_eventmask) const { 560 return static_cast<size_t>(cb_and_eventmask.fd); 561 } 562 }; 563 564 typedef base::hash_set<CBAndEventMask, CBAndEventMaskHash> FDToCBMap; 565 566 // the following four functions are OS-specific, and are likely 567 // to be changed in a subclass if the poll/select method is changed 568 // from epoll. 569 570 // Summary: 571 // Deletes a file-descriptor from the set of FDs that should be 572 // monitored with epoll. 573 // Note that this only deals with modifying data relating -directly- 574 // with the epoll call-- it does not modify any data within the 575 // epoll_server. 576 // Args: 577 // fd - the file descriptor to-be-removed from the monitoring set 578 virtual void DelFD(int fd) const; 579 580 //////////////////////////////////////// 581 582 // Summary: 583 // Adds a file-descriptor to the set of FDs that should be 584 // monitored with epoll. 585 // Note that this only deals with modifying data relating -directly- 586 // with the epoll call. 587 // Args: 588 // fd - the file descriptor to-be-added to the monitoring set 589 // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc 590 // OR'd together) which will be associated with this 591 // FD initially. 592 virtual void AddFD(int fd, int event_mask) const; 593 594 //////////////////////////////////////// 595 596 // Summary: 597 // Modifies a file-descriptor in the set of FDs that should be 598 // monitored with epoll. 599 // Note that this only deals with modifying data relating -directly- 600 // with the epoll call. 601 // Args: 602 // fd - the file descriptor to-be-added to the monitoring set 603 // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc 604 // OR'd together) which will be associated with this 605 // FD after this call. 606 virtual void ModFD(int fd, int event_mask) const; 607 608 //////////////////////////////////////// 609 610 // Summary: 611 // Modified the event mask associated with an FD in the set of 612 // data needed by epoll. 613 // Events are removed before they are added, thus, if ~0 is put 614 // in 'remove_event', whatever is put in 'add_event' will be 615 // the new event mask. 616 // If the file-descriptor specified is not registered in the 617 // epoll_server, then nothing happens as a result of this call. 618 // Args: 619 // fd - the file descriptor whose event mask is to be modified 620 // remove_event - the events which are to be removed from the current 621 // event_mask 622 // add_event - the events which are to be added to the current event_mask 623 // 624 // 625 virtual void ModifyFD(int fd, int remove_event, int add_event); 626 627 //////////////////////////////////////// 628 629 // Summary: 630 // Waits for events, and calls HandleEvents() for each 631 // fd, event pair discovered to possibly have an event. 632 // Note that a callback (B) may get a spurious event if 633 // another callback (A) has closed a file-descriptor N, and 634 // the callback (B) has a newly opened file-descriptor, which 635 // also happens to be N. 636 virtual void WaitForEventsAndCallHandleEvents(int64 timeout_in_us, 637 struct epoll_event events[], 638 int events_size); 639 640 641 642 // Summary: 643 // An internal function for implementing the ready list. It adds a fd's 644 // CBAndEventMask to the ready list. If the fd is already on the ready 645 // list, it is a no-op. 646 void AddToReadyList(CBAndEventMask* cb_and_mask); 647 648 // Summary: 649 // An internal function for implementing the ready list. It remove a fd's 650 // CBAndEventMask from the ready list. If the fd is not on the ready list, 651 // it is a no-op. 652 void RemoveFromReadyList(const CBAndEventMask& cb_and_mask); 653 654 // Summary: 655 // Calls any pending alarms that should go off and reregisters them if they 656 // were recurring. 657 virtual void CallAndReregisterAlarmEvents(); 658 659 // The file-descriptor created for epolling 660 int epoll_fd_; 661 662 // The mapping of file-descriptor to CBAndEventMasks 663 FDToCBMap cb_map_; 664 665 // Custom hash function to be used by hash_set. 666 struct AlarmCBHash { operatorAlarmCBHash667 size_t operator()(AlarmCB*const& p) const { 668 return reinterpret_cast<size_t>(p); 669 } 670 }; 671 672 673 // TOOD(sushantj): Having this hash_set is avoidable. We currently have it 674 // only so that we can enforce stringent checks that a caller can not register 675 // the same alarm twice. One option is to have an implementation in which 676 // this hash_set is used only in the debug mode. 677 typedef base::hash_set<AlarmCB*, AlarmCBHash> AlarmCBMap; 678 AlarmCBMap all_alarms_; 679 680 TimeToAlarmCBMap alarm_map_; 681 682 // The amount of time in microseconds that we'll wait before returning 683 // from the WaitForEventsAndExecuteCallbacks() function. 684 // If this is positive, wait that many microseconds. 685 // If this is negative, wait forever, or for the first event that occurs 686 // If this is zero, never wait for an event. 687 int64 timeout_in_us_; 688 689 // This is nonzero only after the invocation of epoll_wait_impl within 690 // WaitForEventsAndCallHandleEvents and before the function 691 // WaitForEventsAndExecuteCallbacks returns. At all other times, this is 692 // zero. This enables us to have relatively accurate time returned from the 693 // ApproximateNowInUs() function. See that function for more details. 694 int64 recorded_now_in_us_; 695 696 // This is used to implement CallAndReregisterAlarmEvents. This stores 697 // all alarms that were reregistered because OnAlarm() returned a 698 // value > 0 and the time at which they should be executed is less that 699 // the current time. By storing such alarms in this map we ensure 700 // that while calling CallAndReregisterAlarmEvents we do not call 701 // OnAlarm on any alarm in this set. This ensures that we do not 702 // go in an infinite loop. 703 AlarmCBMap alarms_reregistered_and_should_be_skipped_; 704 705 LIST_HEAD(ReadyList, CBAndEventMask) ready_list_; 706 LIST_HEAD(TmpList, CBAndEventMask) tmp_list_; 707 int ready_list_size_; 708 // TODO(alyssar): make this into something that scales up. 709 static const int events_size_ = 256; 710 struct epoll_event events_[256]; 711 712 #ifdef EPOLL_SERVER_EVENT_TRACING 713 struct EventRecorder { 714 public: EventRecorderEventRecorder715 EventRecorder() : num_records_(0), record_threshold_(10000) {} 716 ~EventRecorderEventRecorder717 ~EventRecorder() { 718 Clear(); 719 } 720 721 // When a number of events equals the record threshold, 722 // the collected data summary for all FDs will be written 723 // to LOG(INFO). Note that this does not include the 724 // individual events (if you'reinterested in those, you'll 725 // have to get at them programmatically). 726 // After any such flushing to LOG(INFO) all events will 727 // be cleared. 728 // Note that the definition of an 'event' is a bit 'hazy', 729 // as it includes the 'Unregistration' event, and perhaps 730 // others. set_record_thresholdEventRecorder731 void set_record_threshold(int64 new_threshold) { 732 record_threshold_ = new_threshold; 733 } 734 ClearEventRecorder735 void Clear() { 736 for (int i = 0; i < debug_events_.size(); ++i) { 737 delete debug_events_[i]; 738 } 739 debug_events_.clear(); 740 unregistered_fds_.clear(); 741 event_counts_.clear(); 742 } 743 MaybeRecordAndClearEventRecorder744 void MaybeRecordAndClear() { 745 ++num_records_; 746 if ((num_records_ > record_threshold_) && 747 (record_threshold_ > 0)) { 748 LOG(INFO) << "\n" << *this; 749 num_records_ = 0; 750 Clear(); 751 } 752 } 753 RecordFDMaskEventEventRecorder754 void RecordFDMaskEvent(int fd, int mask, const char* function) { 755 FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function); 756 debug_events_.push_back(fdmo); 757 MaybeRecordAndClear(); 758 } 759 RecordEpollWaitEventEventRecorder760 void RecordEpollWaitEvent(int timeout_in_ms, 761 int num_events_generated) { 762 EpollWaitOutput* ewo = new EpollWaitOutput(timeout_in_ms, 763 num_events_generated); 764 debug_events_.push_back(ewo); 765 MaybeRecordAndClear(); 766 } 767 RecordEpollEventEventRecorder768 void RecordEpollEvent(int fd, int event_mask) { 769 Events& events_for_fd = event_counts_[fd]; 770 events_for_fd.AssignFromMask(event_mask); 771 MaybeRecordAndClear(); 772 } 773 774 friend ostream& operator<<(ostream& os, const EventRecorder& er) { 775 for (int i = 0; i < er.unregistered_fds_.size(); ++i) { 776 os << "fd: " << er.unregistered_fds_[i] << "\n"; 777 os << er.unregistered_fds_[i]; 778 } 779 for (EventCountsMap::const_iterator i = er.event_counts_.begin(); 780 i != er.event_counts_.end(); 781 ++i) { 782 os << "fd: " << i->first << "\n"; 783 os << i->second; 784 } 785 for (int i = 0; i < er.debug_events_.size(); ++i) { 786 os << *(er.debug_events_[i]) << "\n"; 787 } 788 return os; 789 } 790 RecordUnregistrationEventRecorder791 void RecordUnregistration(int fd) { 792 EventCountsMap::iterator i = event_counts_.find(fd); 793 if (i != event_counts_.end()) { 794 unregistered_fds_.push_back(i->second); 795 event_counts_.erase(i); 796 } 797 MaybeRecordAndClear(); 798 } 799 800 protected: 801 class DebugOutput { 802 public: 803 friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) { 804 debug_output.OutputToStream(os); 805 return os; 806 } 807 virtual void OutputToStream(ostream* os) const = 0; ~DebugOutputEventRecorder808 virtual ~DebugOutput() {} 809 }; 810 811 class FDMaskOutput : public DebugOutput { 812 public: FDMaskOutputEventRecorder813 FDMaskOutput(int fd, int mask, const char* function) : 814 fd_(fd), mask_(mask), function_(function) {} OutputToStreamEventRecorder815 virtual void OutputToStream(ostream* os) const { 816 (*os) << "func: " << function_ 817 << "\tfd: " << fd_; 818 if (mask_ != 0) { 819 (*os) << "\tmask: " << EventMaskToString(mask_); 820 } 821 } 822 int fd_; 823 int mask_; 824 const char* function_; 825 }; 826 827 class EpollWaitOutput : public DebugOutput { 828 public: EpollWaitOutputEventRecorder829 EpollWaitOutput(int timeout_in_ms, 830 int num_events_generated) : 831 timeout_in_ms_(timeout_in_ms), 832 num_events_generated_(num_events_generated) {} OutputToStreamEventRecorder833 virtual void OutputToStream(ostream* os) const { 834 (*os) << "timeout_in_ms: " << timeout_in_ms_ 835 << "\tnum_events_generated: " << num_events_generated_; 836 } 837 protected: 838 int timeout_in_ms_; 839 int num_events_generated_; 840 }; 841 842 struct Events { EventsEventRecorder::Events843 Events() : 844 epoll_in(0), 845 epoll_pri(0), 846 epoll_out(0), 847 epoll_rdnorm(0), 848 epoll_rdband(0), 849 epoll_wrnorm(0), 850 epoll_wrband(0), 851 epoll_msg(0), 852 epoll_err(0), 853 epoll_hup(0), 854 epoll_oneshot(0), 855 epoll_et(0) {} 856 AssignFromMaskEventRecorder::Events857 void AssignFromMask(int event_mask) { 858 if (event_mask & EPOLLIN) ++epoll_in; 859 if (event_mask & EPOLLPRI) ++epoll_pri; 860 if (event_mask & EPOLLOUT) ++epoll_out; 861 if (event_mask & EPOLLRDNORM) ++epoll_rdnorm; 862 if (event_mask & EPOLLRDBAND) ++epoll_rdband; 863 if (event_mask & EPOLLWRNORM) ++epoll_wrnorm; 864 if (event_mask & EPOLLWRBAND) ++epoll_wrband; 865 if (event_mask & EPOLLMSG) ++epoll_msg; 866 if (event_mask & EPOLLERR) ++epoll_err; 867 if (event_mask & EPOLLHUP) ++epoll_hup; 868 if (event_mask & EPOLLONESHOT) ++epoll_oneshot; 869 if (event_mask & EPOLLET) ++epoll_et; 870 }; 871 872 friend ostream& operator<<(ostream& os, const Events& ev) { 873 if (ev.epoll_in) { 874 os << "\t EPOLLIN: " << ev.epoll_in << "\n"; 875 } 876 if (ev.epoll_pri) { 877 os << "\t EPOLLPRI: " << ev.epoll_pri << "\n"; 878 } 879 if (ev.epoll_out) { 880 os << "\t EPOLLOUT: " << ev.epoll_out << "\n"; 881 } 882 if (ev.epoll_rdnorm) { 883 os << "\t EPOLLRDNORM: " << ev.epoll_rdnorm << "\n"; 884 } 885 if (ev.epoll_rdband) { 886 os << "\t EPOLLRDBAND: " << ev.epoll_rdband << "\n"; 887 } 888 if (ev.epoll_wrnorm) { 889 os << "\t EPOLLWRNORM: " << ev.epoll_wrnorm << "\n"; 890 } 891 if (ev.epoll_wrband) { 892 os << "\t EPOLLWRBAND: " << ev.epoll_wrband << "\n"; 893 } 894 if (ev.epoll_msg) { 895 os << "\t EPOLLMSG: " << ev.epoll_msg << "\n"; 896 } 897 if (ev.epoll_err) { 898 os << "\t EPOLLERR: " << ev.epoll_err << "\n"; 899 } 900 if (ev.epoll_hup) { 901 os << "\t EPOLLHUP: " << ev.epoll_hup << "\n"; 902 } 903 if (ev.epoll_oneshot) { 904 os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n"; 905 } 906 if (ev.epoll_et) { 907 os << "\t EPOLLET: " << ev.epoll_et << "\n"; 908 } 909 return os; 910 } 911 912 unsigned int epoll_in; 913 unsigned int epoll_pri; 914 unsigned int epoll_out; 915 unsigned int epoll_rdnorm; 916 unsigned int epoll_rdband; 917 unsigned int epoll_wrnorm; 918 unsigned int epoll_wrband; 919 unsigned int epoll_msg; 920 unsigned int epoll_err; 921 unsigned int epoll_hup; 922 unsigned int epoll_oneshot; 923 unsigned int epoll_et; 924 }; 925 926 std::vector<DebugOutput*> debug_events_; 927 std::vector<Events> unregistered_fds_; 928 typedef base::hash_map<int, Events> EventCountsMap; 929 EventCountsMap event_counts_; 930 int64 num_records_; 931 int64 record_threshold_; 932 }; 933 ClearEventRecords()934 void ClearEventRecords() { 935 event_recorder_.Clear(); 936 } WriteEventRecords(ostream * os)937 void WriteEventRecords(ostream* os) const { 938 (*os) << event_recorder_; 939 } 940 941 mutable EventRecorder event_recorder_; 942 943 #endif 944 945 private: 946 // Helper functions used in the destructor. 947 void CleanupFDToCBMap(); 948 void CleanupTimeToAlarmCBMap(); 949 950 // The callback registered to the fds below. As the purpose of their 951 // registration is to wake the epoll server it just clears the pipe and 952 // returns. 953 scoped_ptr<ReadPipeCallback> wake_cb_; 954 955 // A pipe owned by the epoll server. The server will be registered to listen 956 // on read_fd_ and can be woken by Wake() which writes to write_fd_. 957 int read_fd_; 958 int write_fd_; 959 960 // This boolean is checked to see if it is false at the top of the 961 // WaitForEventsAndExecuteCallbacks function. If not, then it either returns 962 // without doing work, and logs to ERROR, or aborts the program (in 963 // DEBUG mode). If so, then it sets the bool to true, does work, and 964 // sets it back to false when done. This catches unwanted recursion. 965 bool in_wait_for_events_and_execute_callbacks_; 966 967 // Returns true when the EpollServer() is being destroyed. 968 bool in_shutdown_; 969 970 DISALLOW_COPY_AND_ASSIGN(EpollServer); 971 }; 972 973 class EpollAlarmCallbackInterface { 974 public: 975 // Summary: 976 // Called when an alarm times out. Invalidates an AlarmRegToken. 977 // WARNING: If a token was saved to refer to an alarm callback, OnAlarm must 978 // delete it, as the reference is no longer valid. 979 // Returns: 980 // the unix time (in microseconds) at which this alarm should be signaled 981 // again, or 0 if the alarm should be removed. 982 virtual int64 OnAlarm() = 0; 983 984 // Summary: 985 // Called when the an alarm is registered. Invalidates an AlarmRegToken. 986 // Args: 987 // token: the iterator to the the alarm registered in the alarm map. 988 // WARNING: this token becomes invalid when the alarm fires, is 989 // unregistered, or OnShutdown is called on that alarm. 990 // eps: the epoll server the alarm is registered with. 991 virtual void OnRegistration(const EpollServer::AlarmRegToken& token, 992 EpollServer* eps) = 0; 993 994 // Summary: 995 // Called when the an alarm is unregistered. 996 // WARNING: It is not valid to unregister a callback and then use the token 997 // that was saved to refer to the callback. 998 virtual void OnUnregistration() = 0; 999 1000 // Summary: 1001 // Called when the epoll server is shutting down. 1002 // Invalidates the AlarmRegToken that was given when this alarm was 1003 // registered. 1004 virtual void OnShutdown(EpollServer* eps) = 0; 1005 ~EpollAlarmCallbackInterface()1006 virtual ~EpollAlarmCallbackInterface() {} 1007 1008 protected: EpollAlarmCallbackInterface()1009 EpollAlarmCallbackInterface() {} 1010 }; 1011 1012 // A simple alarm which unregisters itself on destruction. 1013 // 1014 // PLEASE NOTE: 1015 // Any classes overriding these functions must either call the implementation 1016 // of the parent class, or is must otherwise make sure that the 'registered_' 1017 // boolean and the token, 'token_', are updated appropriately. 1018 class EpollAlarm : public EpollAlarmCallbackInterface { 1019 public: 1020 EpollAlarm(); 1021 1022 virtual ~EpollAlarm(); 1023 1024 // Marks the alarm as unregistered and returns 0. The return value may be 1025 // safely ignored by subclasses. 1026 virtual int64 OnAlarm() OVERRIDE; 1027 1028 // Marks the alarm as registered, and stores the token. 1029 virtual void OnRegistration(const EpollServer::AlarmRegToken& token, 1030 EpollServer* eps) OVERRIDE; 1031 1032 // Marks the alarm as unregistered. 1033 virtual void OnUnregistration() OVERRIDE; 1034 1035 // Marks the alarm as unregistered. 1036 virtual void OnShutdown(EpollServer* eps) OVERRIDE; 1037 1038 // If the alarm was registered, unregister it. 1039 void UnregisterIfRegistered(); 1040 registered()1041 bool registered() const { return registered_; } 1042 eps()1043 const EpollServer* eps() const { return eps_; } 1044 1045 private: 1046 EpollServer::AlarmRegToken token_; 1047 EpollServer* eps_; 1048 bool registered_; 1049 }; 1050 1051 } // namespace net 1052 1053 #endif // NET_TOOLS_EPOLL_SERVER_EPOLL_SERVER_H_ 1054