1 /*
2 * nghttp2 - HTTP/2 C Library
3 *
4 * Copyright (c) 2015 Tatsuhiro Tsujikawa
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25 #include "shrpx_connection.h"
26
27 #ifdef HAVE_UNISTD_H
28 # include <unistd.h>
29 #endif // HAVE_UNISTD_H
30 #include <netinet/tcp.h>
31
32 #include <limits>
33
34 #include <openssl/err.h>
35
36 #include "shrpx_tls.h"
37 #include "shrpx_memcached_request.h"
38 #include "shrpx_log.h"
39 #include "memchunk.h"
40 #include "util.h"
41 #include "ssl_compat.h"
42
43 using namespace nghttp2;
44
45 namespace shrpx {
46
47 #if !LIBRESSL_2_7_API && !OPENSSL_1_1_API
48
BIO_get_data(BIO * bio)49 void *BIO_get_data(BIO *bio) { return bio->ptr; }
BIO_set_data(BIO * bio,void * ptr)50 void BIO_set_data(BIO *bio, void *ptr) { bio->ptr = ptr; }
BIO_set_init(BIO * bio,int init)51 void BIO_set_init(BIO *bio, int init) { bio->init = init; }
52
53 #endif // !LIBRESSL_2_7_API && !OPENSSL_1_1_API
54
Connection(struct ev_loop * loop,int fd,SSL * ssl,MemchunkPool * mcpool,ev_tstamp write_timeout,ev_tstamp read_timeout,const RateLimitConfig & write_limit,const RateLimitConfig & read_limit,IOCb writecb,IOCb readcb,TimerCb timeoutcb,void * data,size_t tls_dyn_rec_warmup_threshold,ev_tstamp tls_dyn_rec_idle_timeout,Proto proto)55 Connection::Connection(struct ev_loop *loop, int fd, SSL *ssl,
56 MemchunkPool *mcpool, ev_tstamp write_timeout,
57 ev_tstamp read_timeout,
58 const RateLimitConfig &write_limit,
59 const RateLimitConfig &read_limit, IOCb writecb,
60 IOCb readcb, TimerCb timeoutcb, void *data,
61 size_t tls_dyn_rec_warmup_threshold,
62 ev_tstamp tls_dyn_rec_idle_timeout, Proto proto)
63 : tls{DefaultMemchunks(mcpool), DefaultPeekMemchunks(mcpool),
64 DefaultMemchunks(mcpool)},
65 wlimit(loop, &wev, write_limit.rate, write_limit.burst),
66 rlimit(loop, &rev, read_limit.rate, read_limit.burst, this),
67 loop(loop),
68 data(data),
69 fd(fd),
70 tls_dyn_rec_warmup_threshold(tls_dyn_rec_warmup_threshold),
71 tls_dyn_rec_idle_timeout(tls_dyn_rec_idle_timeout),
72 proto(proto),
73 last_read(0.),
74 read_timeout(read_timeout) {
75
76 ev_io_init(&wev, writecb, fd, EV_WRITE);
77 ev_io_init(&rev, readcb, fd, EV_READ);
78
79 wev.data = this;
80 rev.data = this;
81
82 ev_timer_init(&wt, timeoutcb, 0., write_timeout);
83 ev_timer_init(&rt, timeoutcb, 0., read_timeout);
84
85 wt.data = this;
86 rt.data = this;
87
88 // set 0. to double field explicitly just in case
89 tls.last_write_idle = 0.;
90
91 if (ssl) {
92 set_ssl(ssl);
93 }
94 }
95
~Connection()96 Connection::~Connection() { disconnect(); }
97
disconnect()98 void Connection::disconnect() {
99 if (tls.ssl) {
100 SSL_set_shutdown(tls.ssl,
101 SSL_get_shutdown(tls.ssl) | SSL_RECEIVED_SHUTDOWN);
102 ERR_clear_error();
103
104 if (tls.cached_session) {
105 SSL_SESSION_free(tls.cached_session);
106 tls.cached_session = nullptr;
107 }
108
109 if (tls.cached_session_lookup_req) {
110 tls.cached_session_lookup_req->canceled = true;
111 tls.cached_session_lookup_req = nullptr;
112 }
113
114 SSL_shutdown(tls.ssl);
115 SSL_free(tls.ssl);
116 tls.ssl = nullptr;
117
118 tls.wbuf.reset();
119 tls.rbuf.reset();
120 tls.last_write_idle = 0.;
121 tls.warmup_writelen = 0;
122 tls.last_writelen = 0;
123 tls.last_readlen = 0;
124 tls.handshake_state = TLSHandshakeState::NORMAL;
125 tls.initial_handshake_done = false;
126 tls.reneg_started = false;
127 tls.sct_requested = false;
128 tls.early_data_finish = false;
129 }
130
131 if (fd != -1) {
132 shutdown(fd, SHUT_WR);
133 close(fd);
134 fd = -1;
135 }
136
137 // Stop watchers here because they could be activated in
138 // SSL_shutdown().
139 ev_timer_stop(loop, &rt);
140 ev_timer_stop(loop, &wt);
141
142 rlimit.stopw();
143 wlimit.stopw();
144 }
145
prepare_client_handshake()146 void Connection::prepare_client_handshake() {
147 SSL_set_connect_state(tls.ssl);
148 // This prevents SSL_read_early_data from being called.
149 tls.early_data_finish = true;
150 }
151
prepare_server_handshake()152 void Connection::prepare_server_handshake() {
153 SSL_set_accept_state(tls.ssl);
154 tls.server_handshake = true;
155 }
156
157 // BIO implementation is inspired by openldap implementation:
158 // http://www.openldap.org/devel/cvsweb.cgi/~checkout~/libraries/libldap/tls_o.c
159 namespace {
shrpx_bio_write(BIO * b,const char * buf,int len)160 int shrpx_bio_write(BIO *b, const char *buf, int len) {
161 if (buf == nullptr || len <= 0) {
162 return 0;
163 }
164
165 auto conn = static_cast<Connection *>(BIO_get_data(b));
166 auto &wbuf = conn->tls.wbuf;
167
168 BIO_clear_retry_flags(b);
169
170 if (conn->tls.initial_handshake_done) {
171 // After handshake finished, send |buf| of length |len| to the
172 // socket directly.
173
174 // Only when TLS session was prematurely ended before server sent
175 // all handshake message, this condition is true. This could be
176 // alert from SSL_shutdown(). Since connection is already down,
177 // just return error.
178 if (wbuf.rleft()) {
179 return -1;
180 }
181 auto nwrite = conn->write_clear(buf, len);
182 if (nwrite < 0) {
183 return -1;
184 }
185
186 if (nwrite == 0) {
187 BIO_set_retry_write(b);
188 return -1;
189 }
190
191 return nwrite;
192 }
193
194 wbuf.append(buf, len);
195
196 return len;
197 }
198 } // namespace
199
200 namespace {
shrpx_bio_read(BIO * b,char * buf,int len)201 int shrpx_bio_read(BIO *b, char *buf, int len) {
202 if (buf == nullptr || len <= 0) {
203 return 0;
204 }
205
206 auto conn = static_cast<Connection *>(BIO_get_data(b));
207 auto &rbuf = conn->tls.rbuf;
208
209 BIO_clear_retry_flags(b);
210
211 if (conn->tls.initial_handshake_done && rbuf.rleft() == 0) {
212 auto nread = conn->read_clear(buf, len);
213 if (nread < 0) {
214 return -1;
215 }
216 if (nread == 0) {
217 BIO_set_retry_read(b);
218 return -1;
219 }
220 return nread;
221 }
222
223 if (rbuf.rleft() == 0) {
224 BIO_set_retry_read(b);
225 return -1;
226 }
227
228 return rbuf.remove(buf, len);
229 }
230 } // namespace
231
232 namespace {
shrpx_bio_puts(BIO * b,const char * str)233 int shrpx_bio_puts(BIO *b, const char *str) {
234 return shrpx_bio_write(b, str, strlen(str));
235 }
236 } // namespace
237
238 namespace {
shrpx_bio_gets(BIO * b,char * buf,int len)239 int shrpx_bio_gets(BIO *b, char *buf, int len) { return -1; }
240 } // namespace
241
242 namespace {
shrpx_bio_ctrl(BIO * b,int cmd,long num,void * ptr)243 long shrpx_bio_ctrl(BIO *b, int cmd, long num, void *ptr) {
244 switch (cmd) {
245 case BIO_CTRL_FLUSH:
246 return 1;
247 }
248
249 return 0;
250 }
251 } // namespace
252
253 namespace {
shrpx_bio_create(BIO * b)254 int shrpx_bio_create(BIO *b) {
255 #if OPENSSL_1_1_API
256 BIO_set_init(b, 1);
257 #else // !OPENSSL_1_1_API
258 b->init = 1;
259 b->num = 0;
260 b->ptr = nullptr;
261 b->flags = 0;
262 #endif // !OPENSSL_1_1_API
263 return 1;
264 }
265 } // namespace
266
267 namespace {
shrpx_bio_destroy(BIO * b)268 int shrpx_bio_destroy(BIO *b) {
269 if (b == nullptr) {
270 return 0;
271 }
272
273 #if !OPENSSL_1_1_API
274 b->ptr = nullptr;
275 b->init = 0;
276 b->flags = 0;
277 #endif // !OPENSSL_1_1_API
278
279 return 1;
280 }
281 } // namespace
282
283 #if OPENSSL_1_1_API
284
create_bio_method()285 BIO_METHOD *create_bio_method() {
286 auto meth = BIO_meth_new(BIO_TYPE_FD, "nghttpx-bio");
287 BIO_meth_set_write(meth, shrpx_bio_write);
288 BIO_meth_set_read(meth, shrpx_bio_read);
289 BIO_meth_set_puts(meth, shrpx_bio_puts);
290 BIO_meth_set_gets(meth, shrpx_bio_gets);
291 BIO_meth_set_ctrl(meth, shrpx_bio_ctrl);
292 BIO_meth_set_create(meth, shrpx_bio_create);
293 BIO_meth_set_destroy(meth, shrpx_bio_destroy);
294
295 return meth;
296 }
297
298 #else // !OPENSSL_1_1_API
299
create_bio_method()300 BIO_METHOD *create_bio_method() {
301 static auto meth = new BIO_METHOD{
302 BIO_TYPE_FD, "nghttpx-bio", shrpx_bio_write,
303 shrpx_bio_read, shrpx_bio_puts, shrpx_bio_gets,
304 shrpx_bio_ctrl, shrpx_bio_create, shrpx_bio_destroy,
305 };
306
307 return meth;
308 }
309
310 #endif // !OPENSSL_1_1_API
311
set_ssl(SSL * ssl)312 void Connection::set_ssl(SSL *ssl) {
313 tls.ssl = ssl;
314
315 auto &tlsconf = get_config()->tls;
316 auto bio = BIO_new(tlsconf.bio_method);
317 BIO_set_data(bio, this);
318 SSL_set_bio(tls.ssl, bio, bio);
319 SSL_set_app_data(tls.ssl, this);
320 }
321
322 namespace {
323 // We should buffer at least full encrypted TLS record here.
324 // Theoretically, peer can send client hello in several TLS records,
325 // which could exceed this limit, but it is not portable, and we don't
326 // have to handle such exotic behaviour.
read_buffer_full(DefaultPeekMemchunks & rbuf)327 bool read_buffer_full(DefaultPeekMemchunks &rbuf) {
328 return rbuf.rleft_buffered() >= 20_k;
329 }
330 } // namespace
331
tls_handshake()332 int Connection::tls_handshake() {
333 wlimit.stopw();
334 ev_timer_stop(loop, &wt);
335
336 std::array<uint8_t, 16_k> buf;
337
338 if (ev_is_active(&rev)) {
339 auto nread = read_clear(buf.data(), buf.size());
340 if (nread < 0) {
341 if (LOG_ENABLED(INFO)) {
342 LOG(INFO) << "tls: handshake read error";
343 }
344 return -1;
345 }
346 tls.rbuf.append(buf.data(), nread);
347 if (read_buffer_full(tls.rbuf)) {
348 rlimit.stopw();
349 }
350 }
351
352 if (tls.initial_handshake_done) {
353 return write_tls_pending_handshake();
354 }
355
356 switch (tls.handshake_state) {
357 case TLSHandshakeState::WAIT_FOR_SESSION_CACHE:
358 return SHRPX_ERR_INPROGRESS;
359 case TLSHandshakeState::GOT_SESSION_CACHE: {
360 // Use the same trick invented by @kazuho in h2o project.
361
362 // Discard all outgoing data.
363 tls.wbuf.reset();
364 // Rewind buffered incoming data to replay client hello.
365 tls.rbuf.disable_peek(false);
366
367 auto ssl_ctx = SSL_get_SSL_CTX(tls.ssl);
368 auto ssl_opts = SSL_get_options(tls.ssl);
369 SSL_free(tls.ssl);
370
371 auto ssl = tls::create_ssl(ssl_ctx);
372 if (!ssl) {
373 return -1;
374 }
375 if (ssl_opts & SSL_OP_NO_TICKET) {
376 SSL_set_options(ssl, SSL_OP_NO_TICKET);
377 }
378
379 set_ssl(ssl);
380
381 SSL_set_accept_state(tls.ssl);
382
383 tls.handshake_state = TLSHandshakeState::NORMAL;
384 break;
385 }
386 case TLSHandshakeState::CANCEL_SESSION_CACHE:
387 tls.handshake_state = TLSHandshakeState::NORMAL;
388 break;
389 default:
390 break;
391 }
392
393 int rv;
394
395 ERR_clear_error();
396
397 #if OPENSSL_1_1_1_API
398 if (!tls.server_handshake || tls.early_data_finish) {
399 rv = SSL_do_handshake(tls.ssl);
400 } else {
401 auto &tlsconf = get_config()->tls;
402 for (;;) {
403 size_t nread;
404
405 rv = SSL_read_early_data(tls.ssl, buf.data(), buf.size(), &nread);
406 if (rv == SSL_READ_EARLY_DATA_ERROR) {
407 // If we have early data, and server sends ServerHello, assume
408 // that handshake is completed in server side, and start
409 // processing request. If we don't exit handshake code here,
410 // server waits for EndOfEarlyData and Finished message from
411 // client, which voids the purpose of 0-RTT data. The left
412 // over of handshake is done through write_tls or read_tls.
413 if (tlsconf.no_postpone_early_data &&
414 (tls.handshake_state == TLSHandshakeState::WRITE_STARTED ||
415 tls.wbuf.rleft()) &&
416 tls.earlybuf.rleft()) {
417 rv = 1;
418 }
419
420 break;
421 }
422
423 if (LOG_ENABLED(INFO)) {
424 LOG(INFO) << "tls: read early data " << nread << " bytes";
425 }
426
427 tls.earlybuf.append(buf.data(), nread);
428
429 if (rv == SSL_READ_EARLY_DATA_FINISH) {
430 if (LOG_ENABLED(INFO)) {
431 LOG(INFO) << "tls: read all early data; total "
432 << tls.earlybuf.rleft() << " bytes";
433 }
434 tls.early_data_finish = true;
435 // The same reason stated above.
436 if (tlsconf.no_postpone_early_data &&
437 (tls.handshake_state == TLSHandshakeState::WRITE_STARTED ||
438 tls.wbuf.rleft()) &&
439 tls.earlybuf.rleft()) {
440 rv = 1;
441 } else {
442 ERR_clear_error();
443 rv = SSL_do_handshake(tls.ssl);
444 }
445 break;
446 }
447 }
448 }
449 #else // !OPENSSL_1_1_1_API
450 rv = SSL_do_handshake(tls.ssl);
451 #endif // !OPENSSL_1_1_1_API
452
453 if (rv <= 0) {
454 auto err = SSL_get_error(tls.ssl, rv);
455 switch (err) {
456 case SSL_ERROR_WANT_READ:
457 if (read_buffer_full(tls.rbuf)) {
458 if (LOG_ENABLED(INFO)) {
459 LOG(INFO) << "tls: handshake message is too large";
460 }
461 return -1;
462 }
463 break;
464 case SSL_ERROR_WANT_WRITE:
465 break;
466 case SSL_ERROR_SSL: {
467 if (LOG_ENABLED(INFO)) {
468 LOG(INFO) << "tls: handshake libssl error: "
469 << ERR_error_string(ERR_get_error(), nullptr);
470 }
471
472 struct iovec iov[1];
473 auto iovcnt = tls.wbuf.riovec(iov, 1);
474 auto nwrite = writev_clear(iov, iovcnt);
475 if (nwrite > 0) {
476 tls.wbuf.drain(nwrite);
477 }
478
479 return SHRPX_ERR_NETWORK;
480 }
481 default:
482 if (LOG_ENABLED(INFO)) {
483 LOG(INFO) << "tls: handshake libssl error " << err;
484 }
485 return SHRPX_ERR_NETWORK;
486 }
487 }
488
489 if (tls.handshake_state == TLSHandshakeState::WAIT_FOR_SESSION_CACHE) {
490 if (LOG_ENABLED(INFO)) {
491 LOG(INFO) << "tls: handshake is still in progress";
492 }
493 return SHRPX_ERR_INPROGRESS;
494 }
495
496 // Don't send handshake data if handshake was completed in OpenSSL
497 // routine. We have to check HTTP/2 requirement if HTTP/2 was
498 // negotiated before sending finished message to the peer.
499 if (rv != 1 && tls.wbuf.rleft()) {
500 // First write indicates that resumption stuff has done.
501 if (tls.handshake_state != TLSHandshakeState::WRITE_STARTED) {
502 tls.handshake_state = TLSHandshakeState::WRITE_STARTED;
503 // If peek has already disabled, this is noop.
504 tls.rbuf.disable_peek(true);
505 }
506 std::array<struct iovec, 4> iov;
507 auto iovcnt = tls.wbuf.riovec(iov.data(), iov.size());
508 auto nwrite = writev_clear(iov.data(), iovcnt);
509 if (nwrite < 0) {
510 if (LOG_ENABLED(INFO)) {
511 LOG(INFO) << "tls: handshake write error";
512 }
513 return -1;
514 }
515 tls.wbuf.drain(nwrite);
516
517 if (tls.wbuf.rleft()) {
518 wlimit.startw();
519 ev_timer_again(loop, &wt);
520 }
521 }
522
523 if (!read_buffer_full(tls.rbuf)) {
524 // We may have stopped reading
525 rlimit.startw();
526 }
527
528 if (rv != 1) {
529 if (LOG_ENABLED(INFO)) {
530 LOG(INFO) << "tls: handshake is still in progress";
531 }
532 return SHRPX_ERR_INPROGRESS;
533 }
534
535 // Handshake was done
536
537 rv = check_http2_requirement();
538 if (rv != 0) {
539 return -1;
540 }
541
542 // Just in case
543 tls.rbuf.disable_peek(true);
544
545 tls.initial_handshake_done = true;
546
547 return write_tls_pending_handshake();
548 }
549
write_tls_pending_handshake()550 int Connection::write_tls_pending_handshake() {
551 // Send handshake data left in the buffer
552 while (tls.wbuf.rleft()) {
553 std::array<struct iovec, 4> iov;
554 auto iovcnt = tls.wbuf.riovec(iov.data(), iov.size());
555 auto nwrite = writev_clear(iov.data(), iovcnt);
556 if (nwrite < 0) {
557 if (LOG_ENABLED(INFO)) {
558 LOG(INFO) << "tls: handshake write error";
559 }
560 return -1;
561 }
562 if (nwrite == 0) {
563 wlimit.startw();
564 ev_timer_again(loop, &wt);
565
566 return SHRPX_ERR_INPROGRESS;
567 }
568 tls.wbuf.drain(nwrite);
569 }
570
571 // We have to start read watcher, since later stage of code expects
572 // this.
573 rlimit.startw();
574
575 // We may have whole request in tls.rbuf. This means that we don't
576 // get notified further read event. This is especially true for
577 // HTTP/1.1.
578 handle_tls_pending_read();
579
580 if (LOG_ENABLED(INFO)) {
581 LOG(INFO) << "SSL/TLS handshake completed";
582 nghttp2::tls::TLSSessionInfo tls_info{};
583 if (nghttp2::tls::get_tls_session_info(&tls_info, tls.ssl)) {
584 LOG(INFO) << "cipher=" << tls_info.cipher
585 << " protocol=" << tls_info.protocol
586 << " resumption=" << (tls_info.session_reused ? "yes" : "no")
587 << " session_id="
588 << util::format_hex(tls_info.session_id,
589 tls_info.session_id_length);
590 }
591 }
592
593 return 0;
594 }
595
check_http2_requirement()596 int Connection::check_http2_requirement() {
597 const unsigned char *next_proto = nullptr;
598 unsigned int next_proto_len;
599
600 #ifndef OPENSSL_NO_NEXTPROTONEG
601 SSL_get0_next_proto_negotiated(tls.ssl, &next_proto, &next_proto_len);
602 #endif // !OPENSSL_NO_NEXTPROTONEG
603 #if OPENSSL_VERSION_NUMBER >= 0x10002000L
604 if (next_proto == nullptr) {
605 SSL_get0_alpn_selected(tls.ssl, &next_proto, &next_proto_len);
606 }
607 #endif // OPENSSL_VERSION_NUMBER >= 0x10002000L
608 if (next_proto == nullptr ||
609 !util::check_h2_is_selected(StringRef{next_proto, next_proto_len})) {
610 return 0;
611 }
612 if (!nghttp2::tls::check_http2_tls_version(tls.ssl)) {
613 if (LOG_ENABLED(INFO)) {
614 LOG(INFO) << "TLSv1.2 was not negotiated. HTTP/2 must not be used.";
615 }
616 return -1;
617 }
618
619 auto check_black_list = false;
620 if (tls.server_handshake) {
621 check_black_list = !get_config()->tls.no_http2_cipher_black_list;
622 } else {
623 check_black_list = !get_config()->tls.client.no_http2_cipher_black_list;
624 }
625
626 if (check_black_list &&
627 nghttp2::tls::check_http2_cipher_black_list(tls.ssl)) {
628 if (LOG_ENABLED(INFO)) {
629 LOG(INFO) << "The negotiated cipher suite is in HTTP/2 cipher suite "
630 "black list. HTTP/2 must not be used.";
631 }
632 return -1;
633 }
634
635 return 0;
636 }
637
638 namespace {
639 constexpr size_t SHRPX_SMALL_WRITE_LIMIT = 1300;
640 } // namespace
641
get_tls_write_limit()642 size_t Connection::get_tls_write_limit() {
643
644 if (tls_dyn_rec_warmup_threshold == 0) {
645 return std::numeric_limits<ssize_t>::max();
646 }
647
648 auto t = ev_now(loop);
649
650 if (tls.last_write_idle >= 0. &&
651 t - tls.last_write_idle > tls_dyn_rec_idle_timeout) {
652 // Time out, use small record size
653 tls.warmup_writelen = 0;
654 return SHRPX_SMALL_WRITE_LIMIT;
655 }
656
657 if (tls.warmup_writelen >= tls_dyn_rec_warmup_threshold) {
658 return std::numeric_limits<ssize_t>::max();
659 }
660
661 return SHRPX_SMALL_WRITE_LIMIT;
662 }
663
update_tls_warmup_writelen(size_t n)664 void Connection::update_tls_warmup_writelen(size_t n) {
665 if (tls.warmup_writelen < tls_dyn_rec_warmup_threshold) {
666 tls.warmup_writelen += n;
667 }
668 }
669
start_tls_write_idle()670 void Connection::start_tls_write_idle() {
671 if (tls.last_write_idle < 0.) {
672 tls.last_write_idle = ev_now(loop);
673 }
674 }
675
write_tls(const void * data,size_t len)676 ssize_t Connection::write_tls(const void *data, size_t len) {
677 // SSL_write requires the same arguments (buf pointer and its
678 // length) on SSL_ERROR_WANT_READ or SSL_ERROR_WANT_WRITE.
679 // get_write_limit() may return smaller length than previously
680 // passed to SSL_write, which violates OpenSSL assumption. To avoid
681 // this, we keep last legnth passed to SSL_write to
682 // tls.last_writelen if SSL_write indicated I/O blocking.
683 if (tls.last_writelen == 0) {
684 len = std::min(len, wlimit.avail());
685 len = std::min(len, get_tls_write_limit());
686 if (len == 0) {
687 return 0;
688 }
689 } else {
690 len = tls.last_writelen;
691 tls.last_writelen = 0;
692 }
693
694 tls.last_write_idle = -1.;
695
696 ERR_clear_error();
697
698 #if OPENSSL_1_1_1_API
699 int rv;
700 if (SSL_is_init_finished(tls.ssl)) {
701 rv = SSL_write(tls.ssl, data, len);
702 } else {
703 size_t nwrite;
704 rv = SSL_write_early_data(tls.ssl, data, len, &nwrite);
705 // Use the same semantics with SSL_write.
706 if (rv == 1) {
707 rv = nwrite;
708 }
709 }
710 #else // !OPENSSL_1_1_1_API
711 auto rv = SSL_write(tls.ssl, data, len);
712 #endif // !OPENSSL_1_1_1_API
713
714 if (rv <= 0) {
715 auto err = SSL_get_error(tls.ssl, rv);
716 switch (err) {
717 case SSL_ERROR_WANT_READ:
718 if (LOG_ENABLED(INFO)) {
719 LOG(INFO) << "Close connection due to TLS renegotiation";
720 }
721 return SHRPX_ERR_NETWORK;
722 case SSL_ERROR_WANT_WRITE:
723 tls.last_writelen = len;
724 // starting write watcher and timer is done in write_clear via
725 // bio.
726 return 0;
727 case SSL_ERROR_SSL:
728 if (LOG_ENABLED(INFO)) {
729 LOG(INFO) << "SSL_write: "
730 << ERR_error_string(ERR_get_error(), nullptr);
731 }
732 return SHRPX_ERR_NETWORK;
733 default:
734 if (LOG_ENABLED(INFO)) {
735 LOG(INFO) << "SSL_write: SSL_get_error returned " << err;
736 }
737 return SHRPX_ERR_NETWORK;
738 }
739 }
740
741 update_tls_warmup_writelen(rv);
742
743 return rv;
744 }
745
read_tls(void * data,size_t len)746 ssize_t Connection::read_tls(void *data, size_t len) {
747 ERR_clear_error();
748
749 #if OPENSSL_1_1_1_API
750 if (tls.earlybuf.rleft()) {
751 return tls.earlybuf.remove(data, len);
752 }
753 #endif // OPENSSL_1_1_1_API
754
755 // SSL_read requires the same arguments (buf pointer and its
756 // length) on SSL_ERROR_WANT_READ or SSL_ERROR_WANT_WRITE.
757 // rlimit_.avail() or rlimit_.avail() may return different length
758 // than the length previously passed to SSL_read, which violates
759 // OpenSSL assumption. To avoid this, we keep last legnth passed
760 // to SSL_read to tls_last_readlen_ if SSL_read indicated I/O
761 // blocking.
762 if (tls.last_readlen == 0) {
763 len = std::min(len, rlimit.avail());
764 if (len == 0) {
765 return 0;
766 }
767 } else {
768 len = tls.last_readlen;
769 tls.last_readlen = 0;
770 }
771
772 #if OPENSSL_1_1_1_API
773 if (!tls.early_data_finish) {
774 // TLSv1.3 handshake is still going on.
775 size_t nread;
776 auto rv = SSL_read_early_data(tls.ssl, data, len, &nread);
777 if (rv == SSL_READ_EARLY_DATA_ERROR) {
778 auto err = SSL_get_error(tls.ssl, rv);
779 switch (err) {
780 case SSL_ERROR_WANT_READ:
781 tls.last_readlen = len;
782 return 0;
783 case SSL_ERROR_SSL:
784 if (LOG_ENABLED(INFO)) {
785 LOG(INFO) << "SSL_read: "
786 << ERR_error_string(ERR_get_error(), nullptr);
787 }
788 return SHRPX_ERR_NETWORK;
789 default:
790 if (LOG_ENABLED(INFO)) {
791 LOG(INFO) << "SSL_read: SSL_get_error returned " << err;
792 }
793 return SHRPX_ERR_NETWORK;
794 }
795 }
796
797 if (LOG_ENABLED(INFO)) {
798 LOG(INFO) << "tls: read early data " << nread << " bytes";
799 }
800
801 if (rv == SSL_READ_EARLY_DATA_FINISH) {
802 if (LOG_ENABLED(INFO)) {
803 LOG(INFO) << "tls: read all early data";
804 }
805 tls.early_data_finish = true;
806 // We may have stopped write watcher in write_tls.
807 wlimit.startw();
808 }
809 return nread;
810 }
811 #endif // OPENSSL_1_1_1_API
812
813 auto rv = SSL_read(tls.ssl, data, len);
814
815 if (rv <= 0) {
816 auto err = SSL_get_error(tls.ssl, rv);
817 switch (err) {
818 case SSL_ERROR_WANT_READ:
819 tls.last_readlen = len;
820 return 0;
821 case SSL_ERROR_WANT_WRITE:
822 if (LOG_ENABLED(INFO)) {
823 LOG(INFO) << "Close connection due to TLS renegotiation";
824 }
825 return SHRPX_ERR_NETWORK;
826 case SSL_ERROR_ZERO_RETURN:
827 return SHRPX_ERR_EOF;
828 case SSL_ERROR_SSL:
829 if (LOG_ENABLED(INFO)) {
830 LOG(INFO) << "SSL_read: " << ERR_error_string(ERR_get_error(), nullptr);
831 }
832 return SHRPX_ERR_NETWORK;
833 default:
834 if (LOG_ENABLED(INFO)) {
835 LOG(INFO) << "SSL_read: SSL_get_error returned " << err;
836 }
837 return SHRPX_ERR_NETWORK;
838 }
839 }
840
841 return rv;
842 }
843
write_clear(const void * data,size_t len)844 ssize_t Connection::write_clear(const void *data, size_t len) {
845 len = std::min(len, wlimit.avail());
846 if (len == 0) {
847 return 0;
848 }
849
850 ssize_t nwrite;
851 while ((nwrite = write(fd, data, len)) == -1 && errno == EINTR)
852 ;
853 if (nwrite == -1) {
854 if (errno == EAGAIN || errno == EWOULDBLOCK) {
855 wlimit.startw();
856 ev_timer_again(loop, &wt);
857 return 0;
858 }
859 return SHRPX_ERR_NETWORK;
860 }
861
862 wlimit.drain(nwrite);
863
864 if (ev_is_active(&wt)) {
865 ev_timer_again(loop, &wt);
866 }
867
868 return nwrite;
869 }
870
writev_clear(struct iovec * iov,int iovcnt)871 ssize_t Connection::writev_clear(struct iovec *iov, int iovcnt) {
872 iovcnt = limit_iovec(iov, iovcnt, wlimit.avail());
873 if (iovcnt == 0) {
874 return 0;
875 }
876
877 ssize_t nwrite;
878 while ((nwrite = writev(fd, iov, iovcnt)) == -1 && errno == EINTR)
879 ;
880 if (nwrite == -1) {
881 if (errno == EAGAIN || errno == EWOULDBLOCK) {
882 wlimit.startw();
883 ev_timer_again(loop, &wt);
884 return 0;
885 }
886 return SHRPX_ERR_NETWORK;
887 }
888
889 wlimit.drain(nwrite);
890
891 if (ev_is_active(&wt)) {
892 ev_timer_again(loop, &wt);
893 }
894
895 return nwrite;
896 }
897
read_clear(void * data,size_t len)898 ssize_t Connection::read_clear(void *data, size_t len) {
899 len = std::min(len, rlimit.avail());
900 if (len == 0) {
901 return 0;
902 }
903
904 ssize_t nread;
905 while ((nread = read(fd, data, len)) == -1 && errno == EINTR)
906 ;
907 if (nread == -1) {
908 if (errno == EAGAIN || errno == EWOULDBLOCK) {
909 return 0;
910 }
911 return SHRPX_ERR_NETWORK;
912 }
913
914 if (nread == 0) {
915 return SHRPX_ERR_EOF;
916 }
917
918 rlimit.drain(nread);
919
920 return nread;
921 }
922
handle_tls_pending_read()923 void Connection::handle_tls_pending_read() {
924 if (!ev_is_active(&rev)) {
925 return;
926 }
927 rlimit.handle_tls_pending_read();
928 }
929
get_tcp_hint(TCPHint * hint) const930 int Connection::get_tcp_hint(TCPHint *hint) const {
931 #if defined(TCP_INFO) && defined(TCP_NOTSENT_LOWAT)
932 struct tcp_info tcp_info;
933 socklen_t tcp_info_len = sizeof(tcp_info);
934 int rv;
935
936 rv = getsockopt(fd, IPPROTO_TCP, TCP_INFO, &tcp_info, &tcp_info_len);
937
938 if (rv != 0) {
939 return -1;
940 }
941
942 auto avail_packets = tcp_info.tcpi_snd_cwnd > tcp_info.tcpi_unacked
943 ? tcp_info.tcpi_snd_cwnd - tcp_info.tcpi_unacked
944 : 0;
945
946 // http://www.slideshare.net/kazuho/programming-tcp-for-responsiveness
947
948 // TODO 29 (5 (header) + 8 (explicit nonce) + 16 (tag)) is TLS
949 // overhead for AES-GCM. For CHACHA20_POLY1305, it is 21 since it
950 // does not need 8 bytes explicit nonce.
951 //
952 // For TLSv1.3, AES-GCM and CHACHA20_POLY1305 overhead are now 22
953 // bytes (5 (header) + 1 (ContentType) + 16 (tag)).
954 size_t tls_overhead;
955 # ifdef TLS1_3_VERSION
956 if (SSL_version(tls.ssl) == TLS1_3_VERSION) {
957 tls_overhead = 22;
958 } else
959 # endif // TLS1_3_VERSION
960 {
961 tls_overhead = 29;
962 }
963
964 auto writable_size =
965 (avail_packets + 2) * (tcp_info.tcpi_snd_mss - tls_overhead);
966 if (writable_size > 16_k) {
967 writable_size = writable_size & ~(16_k - 1);
968 } else {
969 if (writable_size < 536) {
970 LOG(INFO) << "writable_size is too small: " << writable_size;
971 }
972 // TODO is this required?
973 writable_size = std::max(writable_size, static_cast<size_t>(536 * 2));
974 }
975
976 // if (LOG_ENABLED(INFO)) {
977 // LOG(INFO) << "snd_cwnd=" << tcp_info.tcpi_snd_cwnd
978 // << ", unacked=" << tcp_info.tcpi_unacked
979 // << ", snd_mss=" << tcp_info.tcpi_snd_mss
980 // << ", rtt=" << tcp_info.tcpi_rtt << "us"
981 // << ", rcv_space=" << tcp_info.tcpi_rcv_space
982 // << ", writable=" << writable_size;
983 // }
984
985 hint->write_buffer_size = writable_size;
986 // TODO tcpi_rcv_space is considered as rwin, is that correct?
987 hint->rwin = tcp_info.tcpi_rcv_space;
988
989 return 0;
990 #else // !defined(TCP_INFO) || !defined(TCP_NOTSENT_LOWAT)
991 return -1;
992 #endif // !defined(TCP_INFO) || !defined(TCP_NOTSENT_LOWAT)
993 }
994
again_rt(ev_tstamp t)995 void Connection::again_rt(ev_tstamp t) {
996 read_timeout = t;
997 rt.repeat = t;
998 ev_timer_again(loop, &rt);
999 last_read = ev_now(loop);
1000 }
1001
again_rt()1002 void Connection::again_rt() {
1003 rt.repeat = read_timeout;
1004 ev_timer_again(loop, &rt);
1005 last_read = ev_now(loop);
1006 }
1007
expired_rt()1008 bool Connection::expired_rt() {
1009 auto delta = read_timeout - (ev_now(loop) - last_read);
1010 if (delta < 1e-9) {
1011 return true;
1012 }
1013 rt.repeat = delta;
1014 ev_timer_again(loop, &rt);
1015 return false;
1016 }
1017
1018 } // namespace shrpx
1019