| 1 | /* |
| 2 | * Copyright (c) 2012-2017 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #include <sys/param.h> |
| 30 | #include <sys/systm.h> |
| 31 | #include <sys/kernel.h> |
| 32 | #include <sys/mbuf.h> |
| 33 | #include <sys/mcache.h> |
| 34 | #include <sys/syslog.h> |
| 35 | #include <sys/socket.h> |
| 36 | #include <sys/socketvar.h> |
| 37 | #include <sys/protosw.h> |
| 38 | #include <sys/proc_internal.h> |
| 39 | |
| 40 | #include <mach/boolean.h> |
| 41 | #include <kern/zalloc.h> |
| 42 | #include <kern/locks.h> |
| 43 | |
| 44 | #include <netinet/mp_pcb.h> |
| 45 | #include <netinet/mptcp_var.h> |
| 46 | #include <netinet6/in6_pcb.h> |
| 47 | |
| 48 | static lck_grp_t *mp_lock_grp; |
| 49 | static lck_attr_t *mp_lock_attr; |
| 50 | static lck_grp_attr_t *mp_lock_grp_attr; |
| 51 | decl_lck_mtx_data(static, mp_lock); /* global MULTIPATH lock */ |
| 52 | decl_lck_mtx_data(static, mp_timeout_lock); |
| 53 | |
| 54 | static TAILQ_HEAD(, mppcbinfo) mppi_head = TAILQ_HEAD_INITIALIZER(mppi_head); |
| 55 | |
| 56 | static boolean_t mp_timeout_run; /* MP timer is scheduled to run */ |
| 57 | static boolean_t mp_garbage_collecting; |
| 58 | static boolean_t mp_ticking; |
| 59 | static void mp_sched_timeout(void); |
| 60 | static void mp_timeout(void *); |
| 61 | |
| 62 | void |
| 63 | mp_pcbinit(void) |
| 64 | { |
| 65 | static int mp_initialized = 0; |
| 66 | |
| 67 | VERIFY(!mp_initialized); |
| 68 | mp_initialized = 1; |
| 69 | |
| 70 | mp_lock_grp_attr = lck_grp_attr_alloc_init(); |
| 71 | mp_lock_grp = lck_grp_alloc_init("multipath" , mp_lock_grp_attr); |
| 72 | mp_lock_attr = lck_attr_alloc_init(); |
| 73 | lck_mtx_init(&mp_lock, mp_lock_grp, mp_lock_attr); |
| 74 | lck_mtx_init(&mp_timeout_lock, mp_lock_grp, mp_lock_attr); |
| 75 | } |
| 76 | |
| 77 | static void |
| 78 | mp_timeout(void *arg) |
| 79 | { |
| 80 | #pragma unused(arg) |
| 81 | struct mppcbinfo *mppi; |
| 82 | boolean_t t, gc; |
| 83 | uint32_t t_act = 0; |
| 84 | uint32_t gc_act = 0; |
| 85 | |
| 86 | /* |
| 87 | * Update coarse-grained networking timestamp (in sec.); the idea |
| 88 | * is to piggy-back on the timeout callout to update the counter |
| 89 | * returnable via net_uptime(). |
| 90 | */ |
| 91 | net_update_uptime(); |
| 92 | |
| 93 | lck_mtx_lock_spin(&mp_timeout_lock); |
| 94 | gc = mp_garbage_collecting; |
| 95 | mp_garbage_collecting = FALSE; |
| 96 | |
| 97 | t = mp_ticking; |
| 98 | mp_ticking = FALSE; |
| 99 | |
| 100 | if (gc || t) { |
| 101 | lck_mtx_unlock(&mp_timeout_lock); |
| 102 | |
| 103 | lck_mtx_lock(&mp_lock); |
| 104 | TAILQ_FOREACH(mppi, &mppi_head, mppi_entry) { |
| 105 | if ((gc && mppi->mppi_gc != NULL) || |
| 106 | (t && mppi->mppi_timer != NULL)) { |
| 107 | lck_mtx_lock(&mppi->mppi_lock); |
| 108 | if (gc && mppi->mppi_gc != NULL) |
| 109 | gc_act += mppi->mppi_gc(mppi); |
| 110 | if (t && mppi->mppi_timer != NULL) |
| 111 | t_act += mppi->mppi_timer(mppi); |
| 112 | lck_mtx_unlock(&mppi->mppi_lock); |
| 113 | } |
| 114 | } |
| 115 | lck_mtx_unlock(&mp_lock); |
| 116 | |
| 117 | lck_mtx_lock_spin(&mp_timeout_lock); |
| 118 | } |
| 119 | |
| 120 | /* lock was dropped above, so check first before overriding */ |
| 121 | if (!mp_garbage_collecting) |
| 122 | mp_garbage_collecting = (gc_act != 0); |
| 123 | if (!mp_ticking) |
| 124 | mp_ticking = (t_act != 0); |
| 125 | |
| 126 | /* re-arm the timer if there's work to do */ |
| 127 | mp_timeout_run = FALSE; |
| 128 | mp_sched_timeout(); |
| 129 | lck_mtx_unlock(&mp_timeout_lock); |
| 130 | } |
| 131 | |
| 132 | static void |
| 133 | mp_sched_timeout(void) |
| 134 | { |
| 135 | LCK_MTX_ASSERT(&mp_timeout_lock, LCK_MTX_ASSERT_OWNED); |
| 136 | |
| 137 | if (!mp_timeout_run && (mp_garbage_collecting || mp_ticking)) { |
| 138 | lck_mtx_convert_spin(&mp_timeout_lock); |
| 139 | mp_timeout_run = TRUE; |
| 140 | timeout(mp_timeout, NULL, hz); |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | void |
| 145 | mp_gc_sched(void) |
| 146 | { |
| 147 | lck_mtx_lock_spin(&mp_timeout_lock); |
| 148 | mp_garbage_collecting = TRUE; |
| 149 | mp_sched_timeout(); |
| 150 | lck_mtx_unlock(&mp_timeout_lock); |
| 151 | } |
| 152 | |
| 153 | void |
| 154 | mptcp_timer_sched(void) |
| 155 | { |
| 156 | lck_mtx_lock_spin(&mp_timeout_lock); |
| 157 | mp_ticking = TRUE; |
| 158 | mp_sched_timeout(); |
| 159 | lck_mtx_unlock(&mp_timeout_lock); |
| 160 | } |
| 161 | |
| 162 | void |
| 163 | mp_pcbinfo_attach(struct mppcbinfo *mppi) |
| 164 | { |
| 165 | struct mppcbinfo *mppi0; |
| 166 | |
| 167 | lck_mtx_lock(&mp_lock); |
| 168 | TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) { |
| 169 | if (mppi0 == mppi) { |
| 170 | panic("%s: mppi %p already in the list\n" , |
| 171 | __func__, mppi); |
| 172 | /* NOTREACHED */ |
| 173 | } |
| 174 | } |
| 175 | TAILQ_INSERT_TAIL(&mppi_head, mppi, mppi_entry); |
| 176 | lck_mtx_unlock(&mp_lock); |
| 177 | } |
| 178 | |
| 179 | int |
| 180 | mp_pcbinfo_detach(struct mppcbinfo *mppi) |
| 181 | { |
| 182 | struct mppcbinfo *mppi0; |
| 183 | int error = 0; |
| 184 | |
| 185 | lck_mtx_lock(&mp_lock); |
| 186 | TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) { |
| 187 | if (mppi0 == mppi) |
| 188 | break; |
| 189 | } |
| 190 | if (mppi0 != NULL) |
| 191 | TAILQ_REMOVE(&mppi_head, mppi0, mppi_entry); |
| 192 | else |
| 193 | error = ENXIO; |
| 194 | lck_mtx_unlock(&mp_lock); |
| 195 | |
| 196 | return (error); |
| 197 | } |
| 198 | |
| 199 | int |
| 200 | mp_pcballoc(struct socket *so, struct mppcbinfo *mppi) |
| 201 | { |
| 202 | struct mppcb *mpp = NULL; |
| 203 | int error; |
| 204 | |
| 205 | VERIFY(mpsotomppcb(so) == NULL); |
| 206 | |
| 207 | mpp = zalloc(mppi->mppi_zone); |
| 208 | if (mpp == NULL) { |
| 209 | return (ENOBUFS); |
| 210 | } |
| 211 | |
| 212 | bzero(mpp, mppi->mppi_size); |
| 213 | lck_mtx_init(&mpp->mpp_lock, mppi->mppi_lock_grp, mppi->mppi_lock_attr); |
| 214 | mpp->mpp_pcbinfo = mppi; |
| 215 | mpp->mpp_state = MPPCB_STATE_INUSE; |
| 216 | mpp->mpp_socket = so; |
| 217 | so->so_pcb = mpp; |
| 218 | |
| 219 | error = mptcp_sescreate(mpp); |
| 220 | if (error) { |
| 221 | lck_mtx_destroy(&mpp->mpp_lock, mppi->mppi_lock_grp); |
| 222 | zfree(mppi->mppi_zone, mpp); |
| 223 | return (error); |
| 224 | } |
| 225 | |
| 226 | lck_mtx_lock(&mppi->mppi_lock); |
| 227 | mpp->mpp_flags |= MPP_ATTACHED; |
| 228 | TAILQ_INSERT_TAIL(&mppi->mppi_pcbs, mpp, mpp_entry); |
| 229 | mppi->mppi_count++; |
| 230 | lck_mtx_unlock(&mppi->mppi_lock); |
| 231 | |
| 232 | return (0); |
| 233 | } |
| 234 | |
| 235 | void |
| 236 | mp_pcbdetach(struct socket *mp_so) |
| 237 | { |
| 238 | struct mppcb *mpp = mpsotomppcb(mp_so); |
| 239 | |
| 240 | mpp->mpp_state = MPPCB_STATE_DEAD; |
| 241 | if (!(mp_so->so_flags & SOF_PCBCLEARING)) |
| 242 | mp_so->so_flags |= SOF_PCBCLEARING; |
| 243 | |
| 244 | mp_gc_sched(); |
| 245 | } |
| 246 | |
| 247 | void |
| 248 | mp_pcbdispose(struct mppcb *mpp) |
| 249 | { |
| 250 | struct mppcbinfo *mppi = mpp->mpp_pcbinfo; |
| 251 | |
| 252 | VERIFY(mppi != NULL); |
| 253 | |
| 254 | LCK_MTX_ASSERT(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED); |
| 255 | mpp_lock_assert_held(mpp); |
| 256 | |
| 257 | VERIFY(mpp->mpp_state == MPPCB_STATE_DEAD); |
| 258 | VERIFY(mpp->mpp_flags & MPP_ATTACHED); |
| 259 | |
| 260 | mpp->mpp_flags &= ~MPP_ATTACHED; |
| 261 | TAILQ_REMOVE(&mppi->mppi_pcbs, mpp, mpp_entry); |
| 262 | VERIFY(mppi->mppi_count != 0); |
| 263 | mppi->mppi_count--; |
| 264 | |
| 265 | mpp_unlock(mpp); |
| 266 | |
| 267 | #if NECP |
| 268 | necp_mppcb_dispose(mpp); |
| 269 | #endif /* NECP */ |
| 270 | |
| 271 | lck_mtx_destroy(&mpp->mpp_lock, mppi->mppi_lock_grp); |
| 272 | |
| 273 | VERIFY(mpp->mpp_socket != NULL); |
| 274 | VERIFY(mpp->mpp_socket->so_usecount == 0); |
| 275 | mpp->mpp_socket->so_pcb = NULL; |
| 276 | mpp->mpp_socket = NULL; |
| 277 | |
| 278 | zfree(mppi->mppi_zone, mpp); |
| 279 | } |
| 280 | |
| 281 | static int |
| 282 | mp_getaddr_v4(struct socket *mp_so, struct sockaddr **nam, boolean_t peer) |
| 283 | { |
| 284 | struct mptses *mpte = mpsotompte(mp_so); |
| 285 | struct sockaddr_in *sin; |
| 286 | |
| 287 | /* |
| 288 | * Do the malloc first in case it blocks. |
| 289 | */ |
| 290 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); |
| 291 | if (sin == NULL) |
| 292 | return (ENOBUFS); |
| 293 | bzero(sin, sizeof (*sin)); |
| 294 | sin->sin_family = AF_INET; |
| 295 | sin->sin_len = sizeof (*sin); |
| 296 | |
| 297 | if (!peer) { |
| 298 | sin->sin_port = mpte->__mpte_src_v4.sin_port; |
| 299 | sin->sin_addr = mpte->__mpte_src_v4.sin_addr; |
| 300 | } else { |
| 301 | sin->sin_port = mpte->__mpte_dst_v4.sin_port; |
| 302 | sin->sin_addr = mpte->__mpte_dst_v4.sin_addr; |
| 303 | } |
| 304 | |
| 305 | *nam = (struct sockaddr *)sin; |
| 306 | return (0); |
| 307 | } |
| 308 | |
| 309 | static int |
| 310 | mp_getaddr_v6(struct socket *mp_so, struct sockaddr **nam, boolean_t peer) |
| 311 | { |
| 312 | struct mptses *mpte = mpsotompte(mp_so); |
| 313 | struct in6_addr addr; |
| 314 | in_port_t port; |
| 315 | |
| 316 | if (!peer) { |
| 317 | port = mpte->__mpte_src_v6.sin6_port; |
| 318 | addr = mpte->__mpte_src_v6.sin6_addr; |
| 319 | } else { |
| 320 | port = mpte->__mpte_dst_v6.sin6_port; |
| 321 | addr = mpte->__mpte_dst_v6.sin6_addr; |
| 322 | } |
| 323 | |
| 324 | *nam = in6_sockaddr(port, &addr); |
| 325 | if (*nam == NULL) |
| 326 | return (ENOBUFS); |
| 327 | |
| 328 | return (0); |
| 329 | } |
| 330 | |
| 331 | int |
| 332 | mp_getsockaddr(struct socket *mp_so, struct sockaddr **nam) |
| 333 | { |
| 334 | struct mptses *mpte = mpsotompte(mp_so); |
| 335 | |
| 336 | if (mpte->mpte_src.sa_family == AF_INET || mpte->mpte_src.sa_family == 0) |
| 337 | return mp_getaddr_v4(mp_so, nam, false); |
| 338 | else if (mpte->mpte_src.sa_family == AF_INET6) |
| 339 | return mp_getaddr_v6(mp_so, nam, false); |
| 340 | else |
| 341 | return (EINVAL); |
| 342 | } |
| 343 | |
| 344 | int |
| 345 | mp_getpeeraddr(struct socket *mp_so, struct sockaddr **nam) |
| 346 | { |
| 347 | struct mptses *mpte = mpsotompte(mp_so); |
| 348 | |
| 349 | if (mpte->mpte_src.sa_family == AF_INET || mpte->mpte_src.sa_family == 0) |
| 350 | return mp_getaddr_v4(mp_so, nam, true); |
| 351 | else if (mpte->mpte_src.sa_family == AF_INET6) |
| 352 | return mp_getaddr_v6(mp_so, nam, true); |
| 353 | else |
| 354 | return (EINVAL); |
| 355 | } |
| 356 | |