| 1 | /* |
| 2 | * Copyright (c) 2013-2014 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | #include <sys/param.h> |
| 29 | #include <sys/systm.h> |
| 30 | #include <sys/kernel.h> |
| 31 | #include <sys/protosw.h> |
| 32 | #include <sys/socketvar.h> |
| 33 | #include <sys/syslog.h> |
| 34 | |
| 35 | #include <net/route.h> |
| 36 | #include <netinet/in.h> |
| 37 | #include <netinet/in_systm.h> |
| 38 | #include <netinet/ip.h> |
| 39 | |
| 40 | #if INET6 |
| 41 | #include <netinet/ip6.h> |
| 42 | #endif /* INET6 */ |
| 43 | |
| 44 | #include <netinet/ip_var.h> |
| 45 | #include <netinet/tcp.h> |
| 46 | #include <netinet/tcp_timer.h> |
| 47 | #include <netinet/tcp_var.h> |
| 48 | #include <netinet/tcp_fsm.h> |
| 49 | #include <netinet/tcp_var.h> |
| 50 | #include <netinet/tcp_cc.h> |
| 51 | #include <netinet/tcpip.h> |
| 52 | #include <netinet/tcp_seq.h> |
| 53 | #include <kern/task.h> |
| 54 | #include <libkern/OSAtomic.h> |
| 55 | |
| 56 | static int tcp_cubic_init(struct tcpcb *tp); |
| 57 | static int tcp_cubic_cleanup(struct tcpcb *tp); |
| 58 | static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp); |
| 59 | static void tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th); |
| 60 | static void tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th); |
| 61 | static void tcp_cubic_pre_fr(struct tcpcb *tp); |
| 62 | static void tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th); |
| 63 | static void tcp_cubic_after_timeout(struct tcpcb *tp); |
| 64 | static int tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th); |
| 65 | static void tcp_cubic_switch_cc(struct tcpcb *tp, u_int16_t old_index); |
| 66 | static uint32_t tcp_cubic_update(struct tcpcb *tp, u_int32_t rtt); |
| 67 | static uint32_t tcp_cubic_tcpwin(struct tcpcb *tp, struct tcphdr *th); |
| 68 | static inline void tcp_cubic_clear_state(struct tcpcb *tp); |
| 69 | |
| 70 | |
| 71 | extern float cbrtf(float x); |
| 72 | |
| 73 | struct tcp_cc_algo tcp_cc_cubic = { |
| 74 | .name = "cubic" , |
| 75 | .init = tcp_cubic_init, |
| 76 | .cleanup = tcp_cubic_cleanup, |
| 77 | .cwnd_init = tcp_cubic_cwnd_init_or_reset, |
| 78 | .congestion_avd = tcp_cubic_congestion_avd, |
| 79 | .ack_rcvd = tcp_cubic_ack_rcvd, |
| 80 | .pre_fr = tcp_cubic_pre_fr, |
| 81 | .post_fr = tcp_cubic_post_fr, |
| 82 | .after_idle = tcp_cubic_cwnd_init_or_reset, |
| 83 | .after_timeout = tcp_cubic_after_timeout, |
| 84 | .delay_ack = tcp_cubic_delay_ack, |
| 85 | .switch_to = tcp_cubic_switch_cc |
| 86 | }; |
| 87 | |
| 88 | const float tcp_cubic_backoff = 0.2; /* multiplicative decrease factor */ |
| 89 | const float tcp_cubic_coeff = 0.4; |
| 90 | const float tcp_cubic_fast_convergence_factor = 0.875; |
| 91 | |
| 92 | SYSCTL_SKMEM_TCP_INT(OID_AUTO, cubic_tcp_friendliness, CTLFLAG_RW | CTLFLAG_LOCKED, |
| 93 | static int, tcp_cubic_tcp_friendliness, 0, "Enable TCP friendliness" ); |
| 94 | |
| 95 | SYSCTL_SKMEM_TCP_INT(OID_AUTO, cubic_fast_convergence, CTLFLAG_RW | CTLFLAG_LOCKED, |
| 96 | static int, tcp_cubic_fast_convergence, 0, "Enable fast convergence" ); |
| 97 | |
| 98 | SYSCTL_SKMEM_TCP_INT(OID_AUTO, cubic_use_minrtt, CTLFLAG_RW | CTLFLAG_LOCKED, |
| 99 | static int, tcp_cubic_use_minrtt, 0, "use a min of 5 sec rtt" ); |
| 100 | |
| 101 | static int tcp_cubic_init(struct tcpcb *tp) |
| 102 | { |
| 103 | OSIncrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); |
| 104 | |
| 105 | VERIFY(tp->t_ccstate != NULL); |
| 106 | tcp_cubic_clear_state(tp); |
| 107 | return (0); |
| 108 | } |
| 109 | |
| 110 | static int tcp_cubic_cleanup(struct tcpcb *tp) |
| 111 | { |
| 112 | #pragma unused(tp) |
| 113 | OSDecrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); |
| 114 | return (0); |
| 115 | } |
| 116 | |
| 117 | /* |
| 118 | * Initialize the congestion window at the beginning of a connection or |
| 119 | * after idle time |
| 120 | */ |
| 121 | static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp) |
| 122 | { |
| 123 | VERIFY(tp->t_ccstate != NULL); |
| 124 | |
| 125 | tcp_cubic_clear_state(tp); |
| 126 | tcp_cc_cwnd_init_or_reset(tp); |
| 127 | tp->t_pipeack = 0; |
| 128 | tcp_clear_pipeack_state(tp); |
| 129 | |
| 130 | /* Start counting bytes for RFC 3465 again */ |
| 131 | tp->t_bytes_acked = 0; |
| 132 | |
| 133 | /* |
| 134 | * slow start threshold could get initialized to a lower value |
| 135 | * when there is a cached value in the route metrics. In this case, |
| 136 | * the connection can enter congestion avoidance without any packet |
| 137 | * loss and Cubic will enter steady-state too early. It is better |
| 138 | * to always probe to find the initial slow-start threshold. |
| 139 | */ |
| 140 | if (tp->t_inpcb->inp_stat->txbytes <= TCP_CC_CWND_INIT_BYTES |
| 141 | && tp->snd_ssthresh < (TCP_MAXWIN << TCP_MAX_WINSHIFT)) |
| 142 | tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; |
| 143 | |
| 144 | /* Initialize cubic last max to be same as ssthresh */ |
| 145 | tp->t_ccstate->cub_last_max = tp->snd_ssthresh; |
| 146 | } |
| 147 | |
| 148 | /* |
| 149 | * Compute the target congestion window for the next RTT according to |
| 150 | * cubic equation when an ack is received. |
| 151 | * |
| 152 | * W(t) = C(t-K)^3 + W(last_max) |
| 153 | */ |
| 154 | static uint32_t |
| 155 | tcp_cubic_update(struct tcpcb *tp, u_int32_t rtt) |
| 156 | { |
| 157 | float K, var; |
| 158 | u_int32_t elapsed_time, win; |
| 159 | |
| 160 | win = min(tp->snd_cwnd, tp->snd_wnd); |
| 161 | if (tp->t_ccstate->cub_last_max == 0) |
| 162 | tp->t_ccstate->cub_last_max = tp->snd_ssthresh; |
| 163 | |
| 164 | if (tp->t_ccstate->cub_epoch_start == 0) { |
| 165 | /* |
| 166 | * This is the beginning of a new epoch, initialize some of |
| 167 | * the variables that we need to use for computing the |
| 168 | * congestion window later. |
| 169 | */ |
| 170 | tp->t_ccstate->cub_epoch_start = tcp_now; |
| 171 | if (tp->t_ccstate->cub_epoch_start == 0) |
| 172 | tp->t_ccstate->cub_epoch_start = 1; |
| 173 | if (win < tp->t_ccstate->cub_last_max) { |
| 174 | |
| 175 | VERIFY(current_task() == kernel_task); |
| 176 | |
| 177 | /* |
| 178 | * Compute cubic epoch period, this is the time |
| 179 | * period that the window will take to increase to |
| 180 | * last_max again after backoff due to loss. |
| 181 | */ |
| 182 | K = (tp->t_ccstate->cub_last_max - win) |
| 183 | / tp->t_maxseg / tcp_cubic_coeff; |
| 184 | K = cbrtf(K); |
| 185 | tp->t_ccstate->cub_epoch_period = K * TCP_RETRANSHZ; |
| 186 | /* Origin point */ |
| 187 | tp->t_ccstate->cub_origin_point = |
| 188 | tp->t_ccstate->cub_last_max; |
| 189 | } else { |
| 190 | tp->t_ccstate->cub_epoch_period = 0; |
| 191 | tp->t_ccstate->cub_origin_point = win; |
| 192 | } |
| 193 | tp->t_ccstate->cub_target_win = 0; |
| 194 | } |
| 195 | |
| 196 | VERIFY(tp->t_ccstate->cub_origin_point > 0); |
| 197 | /* |
| 198 | * Compute the target window for the next RTT using smoothed RTT |
| 199 | * as an estimate for next RTT. |
| 200 | */ |
| 201 | elapsed_time = timer_diff(tcp_now, 0, |
| 202 | tp->t_ccstate->cub_epoch_start, 0); |
| 203 | |
| 204 | if (tcp_cubic_use_minrtt) |
| 205 | elapsed_time += max(tcp_cubic_use_minrtt, rtt); |
| 206 | else |
| 207 | elapsed_time += rtt; |
| 208 | var = (elapsed_time - tp->t_ccstate->cub_epoch_period) / TCP_RETRANSHZ; |
| 209 | var = var * var * var * (tcp_cubic_coeff * tp->t_maxseg); |
| 210 | |
| 211 | tp->t_ccstate->cub_target_win = (u_int32_t)(tp->t_ccstate->cub_origin_point + var); |
| 212 | return (tp->t_ccstate->cub_target_win); |
| 213 | } |
| 214 | |
| 215 | /* |
| 216 | * Standard TCP utilizes bandwidth well in low RTT and low BDP connections |
| 217 | * even when there is some packet loss. Enabling TCP mode will help Cubic |
| 218 | * to achieve this kind of utilization. |
| 219 | * |
| 220 | * But if there is a bottleneck link in the path with a fixed size queue |
| 221 | * and fixed bandwidth, TCP Cubic will help to reduce packet loss at this |
| 222 | * link because of the steady-state behavior. Using average and mean |
| 223 | * absolute deviation of W(lastmax), we try to detect if the congestion |
| 224 | * window is close to the bottleneck bandwidth. In that case, disabling |
| 225 | * TCP mode will help to minimize packet loss at this link. |
| 226 | * |
| 227 | * Disable TCP mode if the W(lastmax) (the window where previous packet |
| 228 | * loss happened) is within a small range from the average last max |
| 229 | * calculated. |
| 230 | */ |
| 231 | #define TCP_CUBIC_ENABLE_TCPMODE(_tp_) \ |
| 232 | ((!soissrcrealtime((_tp_)->t_inpcb->inp_socket) && \ |
| 233 | (_tp_)->t_ccstate->cub_mean_dev > (tp->t_maxseg << 1)) ? 1 : 0) |
| 234 | |
| 235 | /* |
| 236 | * Compute the window growth if standard TCP (AIMD) was used with |
| 237 | * a backoff of 0.5 and additive increase of 1 packet per RTT. |
| 238 | * |
| 239 | * TCP window at time t can be calculated using the following equation |
| 240 | * with beta as 0.8 |
| 241 | * |
| 242 | * W(t) <- Wmax * beta + 3 * ((1 - beta)/(1 + beta)) * t/RTT |
| 243 | * |
| 244 | */ |
| 245 | static uint32_t |
| 246 | tcp_cubic_tcpwin(struct tcpcb *tp, struct tcphdr *th) |
| 247 | { |
| 248 | if (tp->t_ccstate->cub_tcp_win == 0) { |
| 249 | tp->t_ccstate->cub_tcp_win = min(tp->snd_cwnd, tp->snd_wnd); |
| 250 | tp->t_ccstate->cub_tcp_bytes_acked = 0; |
| 251 | } else { |
| 252 | tp->t_ccstate->cub_tcp_bytes_acked += |
| 253 | BYTES_ACKED(th, tp); |
| 254 | if (tp->t_ccstate->cub_tcp_bytes_acked >= |
| 255 | tp->t_ccstate->cub_tcp_win) { |
| 256 | tp->t_ccstate->cub_tcp_bytes_acked -= |
| 257 | tp->t_ccstate->cub_tcp_win; |
| 258 | tp->t_ccstate->cub_tcp_win += tp->t_maxseg; |
| 259 | } |
| 260 | } |
| 261 | return (tp->t_ccstate->cub_tcp_win); |
| 262 | } |
| 263 | |
| 264 | /* |
| 265 | * Handle an in-sequence ack during congestion avoidance phase. |
| 266 | */ |
| 267 | static void |
| 268 | tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th) |
| 269 | { |
| 270 | u_int32_t cubic_target_win, tcp_win, rtt; |
| 271 | |
| 272 | /* Do not increase congestion window in non-validated phase */ |
| 273 | if (tcp_cc_is_cwnd_nonvalidated(tp) != 0) |
| 274 | return; |
| 275 | |
| 276 | tp->t_bytes_acked += BYTES_ACKED(th, tp); |
| 277 | |
| 278 | rtt = get_base_rtt(tp); |
| 279 | /* |
| 280 | * First compute cubic window. If cubic variables are not |
| 281 | * initialized (after coming out of recovery), this call will |
| 282 | * initialize them. |
| 283 | */ |
| 284 | cubic_target_win = tcp_cubic_update(tp, rtt); |
| 285 | |
| 286 | /* Compute TCP window if a multiplicative decrease of 0.2 is used */ |
| 287 | tcp_win = tcp_cubic_tcpwin(tp, th); |
| 288 | |
| 289 | if (tp->snd_cwnd < tcp_win && |
| 290 | (tcp_cubic_tcp_friendliness == 1 || |
| 291 | TCP_CUBIC_ENABLE_TCPMODE(tp))) { |
| 292 | /* this connection is in TCP-friendly region */ |
| 293 | if (tp->t_bytes_acked >= tp->snd_cwnd) { |
| 294 | tp->t_bytes_acked -= tp->snd_cwnd; |
| 295 | tp->snd_cwnd = min(tcp_win, TCP_MAXWIN << tp->snd_scale); |
| 296 | } |
| 297 | } else { |
| 298 | if (cubic_target_win > tp->snd_cwnd) { |
| 299 | /* |
| 300 | * The target win is computed for the next RTT. |
| 301 | * To reach this value, cwnd will have to be updated |
| 302 | * one segment at a time. Compute how many bytes |
| 303 | * need to be acknowledged before we can increase |
| 304 | * the cwnd by one segment. |
| 305 | */ |
| 306 | u_int64_t incr_win; |
| 307 | incr_win = tp->snd_cwnd * tp->t_maxseg; |
| 308 | incr_win /= (cubic_target_win - tp->snd_cwnd); |
| 309 | if (incr_win > 0 && |
| 310 | tp->t_bytes_acked >= incr_win) { |
| 311 | tp->t_bytes_acked -= incr_win; |
| 312 | tp->snd_cwnd = |
| 313 | min((tp->snd_cwnd + tp->t_maxseg), |
| 314 | TCP_MAXWIN << tp->snd_scale); |
| 315 | } |
| 316 | } |
| 317 | } |
| 318 | } |
| 319 | |
| 320 | static void |
| 321 | tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) |
| 322 | { |
| 323 | /* Do not increase the congestion window in non-validated phase */ |
| 324 | if (tcp_cc_is_cwnd_nonvalidated(tp) != 0) |
| 325 | return; |
| 326 | |
| 327 | if (tp->snd_cwnd >= tp->snd_ssthresh) { |
| 328 | /* Congestion avoidance phase */ |
| 329 | tcp_cubic_congestion_avd(tp, th); |
| 330 | } else { |
| 331 | /* |
| 332 | * Use 2*SMSS as limit on increment as suggested |
| 333 | * by RFC 3465 section 2.3 |
| 334 | */ |
| 335 | uint32_t acked, abc_lim, incr; |
| 336 | |
| 337 | acked = BYTES_ACKED(th, tp); |
| 338 | abc_lim = (tcp_do_rfc3465_lim2 && |
| 339 | tp->snd_nxt == tp->snd_max) ? |
| 340 | 2 * tp->t_maxseg : tp->t_maxseg; |
| 341 | incr = min(acked, abc_lim); |
| 342 | |
| 343 | tp->snd_cwnd += incr; |
| 344 | tp->snd_cwnd = min(tp->snd_cwnd, |
| 345 | TCP_MAXWIN << tp->snd_scale); |
| 346 | } |
| 347 | } |
| 348 | |
| 349 | static void |
| 350 | tcp_cubic_pre_fr(struct tcpcb *tp) |
| 351 | { |
| 352 | u_int32_t win, avg; |
| 353 | int32_t dev; |
| 354 | tp->t_ccstate->cub_epoch_start = 0; |
| 355 | tp->t_ccstate->cub_tcp_win = 0; |
| 356 | tp->t_ccstate->cub_target_win = 0; |
| 357 | tp->t_ccstate->cub_tcp_bytes_acked = 0; |
| 358 | |
| 359 | win = min(tp->snd_cwnd, tp->snd_wnd); |
| 360 | if (tp->t_flagsext & TF_CWND_NONVALIDATED) { |
| 361 | tp->t_lossflightsize = tp->snd_max - tp->snd_una; |
| 362 | win = (max(tp->t_pipeack, tp->t_lossflightsize)) >> 1; |
| 363 | } else { |
| 364 | tp->t_lossflightsize = 0; |
| 365 | } |
| 366 | /* |
| 367 | * Note the congestion window at which packet loss occurred as |
| 368 | * cub_last_max. |
| 369 | * |
| 370 | * If the congestion window is less than the last max window when |
| 371 | * loss occurred, it indicates that capacity available in the |
| 372 | * network has gone down. This can happen if a new flow has started |
| 373 | * and it is capturing some of the bandwidth. To reach convergence |
| 374 | * quickly, backoff a little more. Disable fast convergence to |
| 375 | * disable this behavior. |
| 376 | */ |
| 377 | if (win < tp->t_ccstate->cub_last_max && |
| 378 | tcp_cubic_fast_convergence == 1) |
| 379 | tp->t_ccstate->cub_last_max = (u_int32_t)(win * |
| 380 | tcp_cubic_fast_convergence_factor); |
| 381 | else |
| 382 | tp->t_ccstate->cub_last_max = win; |
| 383 | |
| 384 | if (tp->t_ccstate->cub_last_max == 0) { |
| 385 | /* |
| 386 | * If last_max is zero because snd_wnd is zero or for |
| 387 | * any other reason, initialize it to the amount of data |
| 388 | * in flight |
| 389 | */ |
| 390 | tp->t_ccstate->cub_last_max = tp->snd_max - tp->snd_una; |
| 391 | } |
| 392 | |
| 393 | /* |
| 394 | * Compute average and mean absolute deviation of the |
| 395 | * window at which packet loss occurred. |
| 396 | */ |
| 397 | if (tp->t_ccstate->cub_avg_lastmax == 0) { |
| 398 | tp->t_ccstate->cub_avg_lastmax = tp->t_ccstate->cub_last_max; |
| 399 | } else { |
| 400 | /* |
| 401 | * Average is computed by taking 63 parts of |
| 402 | * history and one part of the most recent value |
| 403 | */ |
| 404 | avg = tp->t_ccstate->cub_avg_lastmax; |
| 405 | avg = (avg << 6) - avg; |
| 406 | tp->t_ccstate->cub_avg_lastmax = |
| 407 | (avg + tp->t_ccstate->cub_last_max) >> 6; |
| 408 | } |
| 409 | |
| 410 | /* caluclate deviation from average */ |
| 411 | dev = tp->t_ccstate->cub_avg_lastmax - tp->t_ccstate->cub_last_max; |
| 412 | |
| 413 | /* Take the absolute value */ |
| 414 | if (dev < 0) |
| 415 | dev = -dev; |
| 416 | |
| 417 | if (tp->t_ccstate->cub_mean_dev == 0) { |
| 418 | tp->t_ccstate->cub_mean_dev = dev; |
| 419 | } else { |
| 420 | dev = dev + ((tp->t_ccstate->cub_mean_dev << 4) |
| 421 | - tp->t_ccstate->cub_mean_dev); |
| 422 | tp->t_ccstate->cub_mean_dev = dev >> 4; |
| 423 | } |
| 424 | |
| 425 | /* Backoff congestion window by tcp_cubic_backoff factor */ |
| 426 | win = (u_int32_t)(win - (win * tcp_cubic_backoff)); |
| 427 | win = (win / tp->t_maxseg); |
| 428 | if (win < 2) |
| 429 | win = 2; |
| 430 | tp->snd_ssthresh = win * tp->t_maxseg; |
| 431 | tcp_cc_resize_sndbuf(tp); |
| 432 | } |
| 433 | |
| 434 | static void |
| 435 | tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th) |
| 436 | { |
| 437 | uint32_t flight_size = 0; |
| 438 | |
| 439 | if (SEQ_LEQ(th->th_ack, tp->snd_max)) |
| 440 | flight_size = tp->snd_max - th->th_ack; |
| 441 | |
| 442 | if (SACK_ENABLED(tp) && tp->t_lossflightsize > 0) { |
| 443 | u_int32_t total_rxt_size = 0, ncwnd; |
| 444 | /* |
| 445 | * When SACK is enabled, the number of retransmitted bytes |
| 446 | * can be counted more accurately. |
| 447 | */ |
| 448 | total_rxt_size = tcp_rxtseg_total_size(tp); |
| 449 | ncwnd = max(tp->t_pipeack, tp->t_lossflightsize); |
| 450 | if (total_rxt_size <= ncwnd) { |
| 451 | ncwnd = ncwnd - total_rxt_size; |
| 452 | } |
| 453 | |
| 454 | /* |
| 455 | * To avoid sending a large burst at the end of recovery |
| 456 | * set a max limit on ncwnd |
| 457 | */ |
| 458 | ncwnd = min(ncwnd, (tp->t_maxseg << 6)); |
| 459 | ncwnd = ncwnd >> 1; |
| 460 | flight_size = max(ncwnd, flight_size); |
| 461 | } |
| 462 | /* |
| 463 | * Complete ack. The current window was inflated for fast recovery. |
| 464 | * It has to be deflated post recovery. |
| 465 | * |
| 466 | * Window inflation should have left us with approx snd_ssthresh |
| 467 | * outstanding data. If the flight size is zero or one segment, |
| 468 | * make congestion window to be at least as big as 2 segments to |
| 469 | * avoid delayed acknowledgements. This is according to RFC 6582. |
| 470 | */ |
| 471 | if (flight_size < tp->snd_ssthresh) |
| 472 | tp->snd_cwnd = max(flight_size, tp->t_maxseg) |
| 473 | + tp->t_maxseg; |
| 474 | else |
| 475 | tp->snd_cwnd = tp->snd_ssthresh; |
| 476 | tp->t_ccstate->cub_tcp_win = 0; |
| 477 | tp->t_ccstate->cub_target_win = 0; |
| 478 | tp->t_ccstate->cub_tcp_bytes_acked = 0; |
| 479 | } |
| 480 | |
| 481 | static void |
| 482 | tcp_cubic_after_timeout(struct tcpcb *tp) |
| 483 | { |
| 484 | VERIFY(tp->t_ccstate != NULL); |
| 485 | |
| 486 | /* |
| 487 | * Avoid adjusting congestion window due to SYN retransmissions. |
| 488 | * If more than one byte (SYN) is outstanding then it is still |
| 489 | * needed to adjust the window. |
| 490 | */ |
| 491 | if (tp->t_state < TCPS_ESTABLISHED && |
| 492 | ((int)(tp->snd_max - tp->snd_una) <= 1)) |
| 493 | return; |
| 494 | |
| 495 | if (!IN_FASTRECOVERY(tp)) { |
| 496 | tcp_cubic_clear_state(tp); |
| 497 | tcp_cubic_pre_fr(tp); |
| 498 | } |
| 499 | |
| 500 | /* |
| 501 | * Close the congestion window down to one segment as a retransmit |
| 502 | * timeout might indicate severe congestion. |
| 503 | */ |
| 504 | tp->snd_cwnd = tp->t_maxseg; |
| 505 | } |
| 506 | |
| 507 | static int |
| 508 | tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th) |
| 509 | { |
| 510 | return (tcp_cc_delay_ack(tp, th)); |
| 511 | } |
| 512 | |
| 513 | /* |
| 514 | * When switching from a different CC it is better for Cubic to start |
| 515 | * fresh. The state required for Cubic calculation might be stale and it |
| 516 | * might not represent the current state of the network. If it starts as |
| 517 | * a new connection it will probe and learn the existing network conditions. |
| 518 | */ |
| 519 | static void |
| 520 | tcp_cubic_switch_cc(struct tcpcb *tp, uint16_t old_cc_index) |
| 521 | { |
| 522 | #pragma unused(old_cc_index) |
| 523 | tcp_cubic_cwnd_init_or_reset(tp); |
| 524 | |
| 525 | OSIncrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); |
| 526 | } |
| 527 | |
| 528 | static inline void tcp_cubic_clear_state(struct tcpcb *tp) |
| 529 | { |
| 530 | tp->t_ccstate->cub_last_max = 0; |
| 531 | tp->t_ccstate->cub_epoch_start = 0; |
| 532 | tp->t_ccstate->cub_origin_point = 0; |
| 533 | tp->t_ccstate->cub_tcp_win = 0; |
| 534 | tp->t_ccstate->cub_tcp_bytes_acked = 0; |
| 535 | tp->t_ccstate->cub_epoch_period = 0; |
| 536 | tp->t_ccstate->cub_target_win = 0; |
| 537 | } |
| 538 | |