content_filter.c source code [codebrowser/bsd/net/content_filter.c]

1	/*
2	* Copyright (c) 2013-2018 Apple Inc. All rights reserved.
3	*
4	* @APPLE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. Please obtain a copy of the License at
10	* http://www.opensource.apple.com/apsl/ and read it before using this
11	* file.
12	*
13	* The Original Code and all software distributed under the License are
14	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18	* Please see the License for the specific language governing rights and
19	* limitations under the License.
20	*
21	* @APPLE_LICENSE_HEADER_END@
22	*/
23
24	/*
25	* THEORY OF OPERATION
26	*
27	* The socket content filter subsystem provides a way for user space agents to
28	* make filtering decisions based on the content of the data being sent and
29	* received by TCP/IP sockets.
30	*
31	* A content filter user space agents gets a copy of the data and the data is
32	* also kept in kernel buffer until the user space agents makes a pass or drop
33	* decision. This unidirectional flow of content avoids unnecessary data copies
34	* back to the kernel.
35	*
36	* A user space filter agent opens a kernel control socket with the name
37	* CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38	* When connected, a "struct content_filter" is created and set as the
39	* "unitinfo" of the corresponding kernel control socket instance.
40	*
41	* The socket content filter subsystem exchanges messages with the user space
42	* filter agent until an ultimate pass or drop decision is made by the
43	* user space filter agent.
44	*
45	* It should be noted that messages about many TCP/IP sockets can be multiplexed
46	* over a single kernel control socket.
47	*
48	* Notes:
49	* - The current implementation is limited to TCP sockets.
50	* - The current implementation supports up to two simultaneous content filters
51	* for the sake of simplicity of the implementation.
52	*
53	*
54	* NECP FILTER CONTROL UNIT
55	*
56	* A user space filter agent uses the Network Extension Control Policy (NECP)
57	* database to specify which TCP/IP sockets need to be filtered. The NECP
58	* criteria may be based on a variety of properties like user ID or proc UUID.
59	*
60	* The NECP "filter control unit" is used by the socket content filter subsystem
61	* to deliver the relevant TCP/IP content information to the appropriate
62	* user space filter agent via its kernel control socket instance.
63	* This works as follows:
64	*
65	* 1) The user space filter agent specifies an NECP filter control unit when
66	* in adds its filtering rules to the NECP database.
67	*
68	* 2) The user space filter agent also sets its NECP filter control unit on the
69	* content filter kernel control socket via the socket option
70	* CFIL_OPT_NECP_CONTROL_UNIT.
71	*
72	* 3) The NECP database is consulted to find out if a given TCP/IP socket
73	* needs to be subjected to content filtering and returns the corresponding
74	* NECP filter control unit -- the NECP filter control unit is actually
75	* stored in the TCP/IP socket structure so the NECP lookup is really simple.
76	*
77	* 4) The NECP filter control unit is then used to find the corresponding
78	* kernel control socket instance.
79	*
80	* Note: NECP currently supports a single filter control unit per TCP/IP socket
81	* but this restriction may be soon lifted.
82	*
83	*
84	* THE MESSAGING PROTOCOL
85	*
86	* The socket content filter subsystem and a user space filter agent
87	* communicate over the kernel control socket via an asynchronous
88	* messaging protocol (this is not a request-response protocol).
89	* The socket content filter subsystem sends event messages to the user
90	* space filter agent about the TCP/IP sockets it is interested to filter.
91	* The user space filter agent sends action messages to either allow
92	* data to pass or to disallow the data flow (and drop the connection).
93	*
94	* All messages over a content filter kernel control socket share the same
95	* common header of type "struct cfil_msg_hdr". The message type tells if
96	* it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97	* The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98	* Note the message header length field may be padded for alignment and can
99	* be larger than the actual content of the message.
100	* The field "cfm_op" describe the kind of event or action.
101	*
102	* Here are the kinds of content filter events:
103	* - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104	* - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105	* - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106	* - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
107	*
108	*
109	* EVENT MESSAGES
110	*
111	* The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112	* data that is being sent or received. The position of this span of data
113	* in the data flow is described by a set of start and end offsets. These
114	* are absolute 64 bits offsets. The first byte sent (or received) starts
115	* at offset 0 and ends at offset 1. The length of the content data
116	* is given by the difference between the end offset and the start offset.
117	*
118	* After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119	* CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
120	* action message is sent by the user space filter agent.
121	*
122	* Note: absolute 64 bits offsets should be large enough for the foreseeable
123	* future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
124	* 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
125	*
126	* They are two kinds of primary content filter actions:
127	* - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128	* - CFM_OP_DROP: to shutdown socket and disallow further data flow
129	*
130	* There is also an action to mark a given client flow as already filtered
131	* at a higher level, CFM_OP_BLESS_CLIENT.
132	*
133	*
134	* ACTION MESSAGES
135	*
136	* The CFM_OP_DATA_UPDATE action messages let the user space filter
137	* agent allow data to flow up to the specified pass offset -- there
138	* is a pass offset for outgoing data and a pass offset for incoming data.
139	* When a new TCP/IP socket is attached to the content filter, each pass offset
140	* is initially set to 0 so not data is allowed to pass by default.
141	* When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
142	* then the data flow becomes unrestricted.
143	*
144	* Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
145	* with a pass offset smaller than the pass offset of a previous
146	* CFM_OP_DATA_UPDATE message is silently ignored.
147	*
148	* A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
149	* to tell the kernel how much data it wants to see by using the peek offsets.
150	* Just like pass offsets, there is a peek offset for each direction.
151	* When a new TCP/IP socket is attached to the content filter, each peek offset
152	* is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
153	* messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
154	* with a greater than 0 peek offset is sent by the user space filter agent.
155	* When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
156	* then the flow of update data events becomes unrestricted.
157	*
158	* Note that peek offsets cannot be smaller than the corresponding pass offset.
159	* Also a peek offsets cannot be smaller than the corresponding end offset
160	* of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
161	* to set a too small peek value is silently ignored.
162	*
163	*
164	* PER SOCKET "struct cfil_info"
165	*
166	* As soon as a TCP/IP socket gets attached to a content filter, a
167	* "struct cfil_info" is created to hold the content filtering state for this
168	* socket.
169	*
170	* The content filtering state is made of the following information
171	* for each direction:
172	* - The current pass offset;
173	* - The first and last offsets of the data pending, waiting for a filtering
174	* decision;
175	* - The inject queue for data that passed the filters and that needs
176	* to be re-injected;
177	* - A content filter specific state in a set of "struct cfil_entry"
178	*
179	*
180	* CONTENT FILTER STATE "struct cfil_entry"
181	*
182	* The "struct cfil_entry" maintains the information most relevant to the
183	* message handling over a kernel control socket with a user space filter agent.
184	*
185	* The "struct cfil_entry" holds the NECP filter control unit that corresponds
186	* to the kernel control socket unit it corresponds to and also has a pointer
187	* to the corresponding "struct content_filter".
188	*
189	* For each direction, "struct cfil_entry" maintains the following information:
190	* - The pass offset
191	* - The peek offset
192	* - The offset of the last data peeked at by the filter
193	* - A queue of data that's waiting to be delivered to the user space filter
194	* agent on the kernel control socket
195	* - A queue of data for which event messages have been sent on the kernel
196	* control socket and are pending for a filtering decision.
197	*
198	*
199	* CONTENT FILTER QUEUES
200	*
201	* Data that is being filtered is steered away from the TCP/IP socket buffer
202	* and instead will sit in one of three content filter queues until the data
203	* can be re-injected into the TCP/IP socket buffer.
204	*
205	* A content filter queue is represented by "struct cfil_queue" that contains
206	* a list of mbufs and the start and end offset of the data span of
207	* the list of mbufs.
208	*
209	* The data moves into the three content filter queues according to this
210	* sequence:
211	* a) The "cfe_ctl_q" of "struct cfil_entry"
212	* b) The "cfe_pending_q" of "struct cfil_entry"
213	* c) The "cfi_inject_q" of "struct cfil_info"
214	*
215	* Note: The sequence (a),(b) may be repeated several times if there is more
216	* than one content filter attached to the TCP/IP socket.
217	*
218	* The "cfe_ctl_q" queue holds data than cannot be delivered to the
219	* kernel conntrol socket for two reasons:
220	* - The peek offset is less that the end offset of the mbuf data
221	* - The kernel control socket is flow controlled
222	*
223	* The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
224	* CFM_OP_DATA_IN have been successfully dispatched to the kernel control
225	* socket and are waiting for a pass action message fromn the user space
226	* filter agent. An mbuf length must be fully allowed to pass to be removed
227	* from the cfe_pending_q.
228	*
229	* The "cfi_inject_q" queue holds data that has been fully allowed to pass
230	* by the user space filter agent and that needs to be re-injected into the
231	* TCP/IP socket.
232	*
233	*
234	* IMPACT ON FLOW CONTROL
235	*
236	* An essential aspect of the content filer subsystem is to minimize the
237	* impact on flow control of the TCP/IP sockets being filtered.
238	*
239	* The processing overhead of the content filtering may have an effect on
240	* flow control by adding noticeable delays and cannot be eliminated --
241	* care must be taken by the user space filter agent to minimize the
242	* processing delays.
243	*
244	* The amount of data being filtered is kept in buffers while waiting for
245	* a decision by the user space filter agent. This amount of data pending
246	* needs to be subtracted from the amount of data available in the
247	* corresponding TCP/IP socket buffer. This is done by modifying
248	* sbspace() and tcp_sbspace() to account for amount of data pending
249	* in the content filter.
250	*
251	*
252	* LOCKING STRATEGY
253	*
254	* The global state of content filter subsystem is protected by a single
255	* read-write lock "cfil_lck_rw". The data flow can be done with the
256	* cfil read-write lock held as shared so it can be re-entered from multiple
257	* threads.
258	*
259	* The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
260	* protected by the socket lock.
261	*
262	* A TCP/IP socket lock cannot be taken while the cfil read-write lock
263	* is held. That's why we have some sequences where we drop the cfil read-write
264	* lock before taking the TCP/IP lock.
265	*
266	* It is also important to lock the TCP/IP socket buffer while the content
267	* filter is modifying the amount of pending data. Otherwise the calculations
268	* in sbspace() and tcp_sbspace() could be wrong.
269	*
270	* The "cfil_lck_rw" protects "struct content_filter" and also the fields
271	* "cfe_link" and "cfe_filter" of "struct cfil_entry".
272	*
273	* Actually "cfe_link" and "cfe_filter" are protected by both by
274	* "cfil_lck_rw" and the socket lock: they may be modified only when
275	* "cfil_lck_rw" is exclusive and the socket is locked.
276	*
277	* To read the other fields of "struct content_filter" we have to take
278	* "cfil_lck_rw" in shared mode.
279	*
280	*
281	* LIMITATIONS
282	*
283	* - For TCP sockets only
284	*
285	* - Does not support TCP unordered messages
286	*/
287
288	/*
289	* TO DO LIST
290	*
291	* SOONER:
292	*
293	* Deal with OOB
294	*
295	* LATER:
296	*
297	* If support datagram, enqueue control and address mbufs as well
298	*/
299
300	#include <sys/types.h>
301	#include <sys/kern_control.h>
302	#include <sys/queue.h>
303	#include <sys/domain.h>
304	#include <sys/protosw.h>
305	#include <sys/syslog.h>
306	#include <sys/systm.h>
307	#include <sys/param.h>
308	#include <sys/mbuf.h>
309
310	#include <kern/locks.h>
311	#include <kern/zalloc.h>
312	#include <kern/debug.h>
313
314	#include <net/content_filter.h>
315
316	#include <netinet/in_pcb.h>
317	#include <netinet/tcp.h>
318	#include <netinet/tcp_var.h>
319	#include <netinet/udp.h>
320	#include <netinet/udp_var.h>
321
322	#include <string.h>
323	#include <libkern/libkern.h>
324	#include <kern/sched_prim.h>
325
326	#define MAX_CONTENT_FILTER 2
327
328	struct cfil_entry;
329
330	/*
331	* The structure content_filter represents a user space content filter
332	* It's created and associated with a kernel control socket instance
333	*/
334	struct content_filter {
335	kern_ctl_ref cf_kcref;
336	u_int32_t cf_kcunit;
337	u_int32_t cf_flags;
338
339	uint32_t cf_necp_control_unit;
340
341	uint32_t cf_sock_count;
342	TAILQ_HEAD(, cfil_entry) cf_sock_entries;
343	};
344
345	#define CFF_ACTIVE 0x01
346	#define CFF_DETACHING 0x02
347	#define CFF_FLOW_CONTROLLED 0x04
348
349	struct content_filter **content_filters = NULL;
350	uint32_t cfil_active_count = `0`; / Number of active content filters /
351	uint32_t cfil_sock_attached_count = `0`; / Number of sockets attachements /
352	uint32_t cfil_sock_udp_attached_count = `0`; / Number of UDP sockets attachements /
353	uint32_t cfil_close_wait_timeout = `1000`; / in milliseconds /
354
355	static kern_ctl_ref cfil_kctlref = NULL;
356
357	static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
358	static lck_attr_t *cfil_lck_attr = NULL;
359	static lck_grp_t *cfil_lck_grp = NULL;
360	decl_lck_rw_data(static, cfil_lck_rw);
361
362	#define CFIL_RW_LCK_MAX 8
363
364	int cfil_rw_nxt_lck = `0`;
365	void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
366
367	int cfil_rw_nxt_unlck = `0`;
368	void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
369
370	#define CONTENT_FILTER_ZONE_NAME "content_filter"
371	#define CONTENT_FILTER_ZONE_MAX 10
372	static struct zone content_filter_zone = NULL; /* zone for content_filter /
373
374
375	#define CFIL_INFO_ZONE_NAME "cfil_info"
376	#define CFIL_INFO_ZONE_MAX 1024
377	static struct zone cfil_info_zone = NULL; /* zone for cfil_info /
378
379	MBUFQ_HEAD(cfil_mqhead);
380
381	struct cfil_queue {
382	uint64_t q_start; / offset of first byte in queue /
383	uint64_t q_end; / offset of last byte in queue /
384	struct cfil_mqhead q_mq;
385	};
386
387	/*
388	* struct cfil_entry
389	*
390	* The is one entry per content filter
391	*/
392	struct cfil_entry {
393	TAILQ_ENTRY(cfil_entry) cfe_link;
394	struct content_filter *cfe_filter;
395
396	struct cfil_info *cfe_cfil_info;
397	uint32_t cfe_flags;
398	uint32_t cfe_necp_control_unit;
399	struct timeval cfe_last_event; / To user space /
400	struct timeval cfe_last_action; / From user space /
401
402	struct cfe_buf {
403	/*
404	* cfe_pending_q holds data that has been delivered to
405	* the filter and for which we are waiting for an action
406	*/
407	struct cfil_queue cfe_pending_q;
408	/*
409	* This queue is for data that has not be delivered to
410	* the content filter (new data, pass peek or flow control)
411	*/
412	struct cfil_queue cfe_ctl_q;
413
414	uint64_t cfe_pass_offset;
415	uint64_t cfe_peek_offset;
416	uint64_t cfe_peeked;
417	} cfe_snd, cfe_rcv;
418	};
419
420	#define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
421	#define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
422	#define CFEF_DATA_START 0x0004 /* can send data event */
423	#define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
424	#define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
425	#define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
426	#define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
427	#define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
428
429
430	#define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
431	struct timeval _tdiff; \
432	if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
433	timersub(t1, t0, &_tdiff); \
434	(cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
435	(cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
436	(cfil)->cfi_op_list_ctr ++; \
437	}
438
439	struct cfil_hash_entry;
440
441	/*
442	* struct cfil_info
443	*
444	* There is a struct cfil_info per socket
445	*/
446	struct cfil_info {
447	TAILQ_ENTRY(cfil_info) cfi_link;
448	struct socket *cfi_so;
449	uint64_t cfi_flags;
450	uint64_t cfi_sock_id;
451	struct timeval64 cfi_first_event;
452	uint32_t cfi_op_list_ctr;
453	uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY]; / time interval in microseconds since first event /
454	unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
455
456	struct cfi_buf {
457	/*
458	* cfi_pending_first and cfi_pending_last describe the total
459	* amount of data outstanding for all the filters on
460	* this socket and data in the flow queue
461	* cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
462	*/
463	uint64_t cfi_pending_first;
464	uint64_t cfi_pending_last;
465	uint32_t cfi_pending_mbcnt;
466	uint32_t cfi_pending_mbnum;
467	uint32_t cfi_tail_drop_cnt;
468	/*
469	* cfi_pass_offset is the minimum of all the filters
470	*/
471	uint64_t cfi_pass_offset;
472	/*
473	* cfi_inject_q holds data that needs to be re-injected
474	* into the socket after filtering and that can
475	* be queued because of flow control
476	*/
477	struct cfil_queue cfi_inject_q;
478	} cfi_snd, cfi_rcv;
479
480	struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
481	struct cfil_hash_entry *cfi_hash_entry;
482	} __attribute__((aligned(`8`)));
483
484	#define CFIF_DROP 0x0001 /* drop action applied */
485	#define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
486	#define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
487	#define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
488	#define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
489	#define CFIF_SHUT_WR 0x0040 /* shutdown write */
490	#define CFIF_SHUT_RD 0x0080 /* shutdown read */
491
492	#define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
493	#define CFI_SHIFT_GENCNT 32
494	#define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
495	#define CFI_SHIFT_FLOWHASH 0
496
497	TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
498
499	#define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
500	#define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
501
502	/*
503	* UDP Socket Support
504	*/
505	LIST_HEAD(cfilhashhead, cfil_hash_entry);
506	#define CFILHASHSIZE 16
507	#define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
508	#define IS_UDP(so) (so && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
509	#define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) \|\| \
510	((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
511	#define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
512	cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
513	#define IS_DNS(local, remote) (check_port(local, 53) \|\| check_port(remote, 53) \|\| check_port(local, 5353) \|\| check_port(remote, 5353))
514
515	/*
516	* UDP Garbage Collection:
517	*/
518	static struct thread *cfil_udp_gc_thread;
519	#define UDP_FLOW_GC_IDLE_TO 30 // Flow Idle Timeout in seconds
520	#define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
521	#define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
522	#define UDP_FLOW_GC_RUN_INTERVAL_NSEC (10 * NSEC_PER_SEC) // GC wakes up every 10 seconds
523
524	/*
525	* UDP flow queue thresholds
526	*/
527	#define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
528	#define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
529	#define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
530	/*
531	* UDP flow queue threshold globals:
532	*/
533	static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
534	static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
535
536	/*
537	* struct cfil_hash_entry
538	*
539	* Hash entry for cfil_info
540	*/
541	struct cfil_hash_entry {
542	LIST_ENTRY(cfil_hash_entry) cfentry_link;
543	struct cfil_info *cfentry_cfil;
544	u_short cfentry_fport;
545	u_short cfentry_lport;
546	sa_family_t cfentry_family;
547	u_int32_t cfentry_flowhash;
548	u_int32_t cfentry_lastused;
549	union {
550	/ foreign host table entry /
551	struct in_addr_4in6 addr46;
552	struct in6_addr addr6;
553	} cfentry_faddr;
554	union {
555	/ local host table entry /
556	struct in_addr_4in6 addr46;
557	struct in6_addr addr6;
558	} cfentry_laddr;
559	};
560
561	/*
562	* struct cfil_db
563	*
564	* For each UDP socket, this is a hash table maintaining all cfil_info structs
565	* keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
566	*/
567	struct cfil_db {
568	struct socket *cfdb_so;
569	uint32_t cfdb_count; / Number of total content filters /
570	struct cfilhashhead *cfdb_hashbase;
571	u_long cfdb_hashmask;
572	struct cfil_hash_entry cfdb_only_entry; /* Optimization for connected UDP /
573	};
574
575	/*
576	* CFIL specific mbuf tag:
577	* Save state of socket at the point of data entry into cfil.
578	* Use saved state for reinjection at protocol layer.
579	*/
580	struct cfil_tag {
581	union sockaddr_in_4_6 cfil_faddr;
582	uint32_t cfil_so_state_change_cnt;
583	short cfil_so_options;
584	};
585
586	#define CFIL_HASH_ENTRY_ZONE_NAME "cfil_entry_hash"
587	#define CFIL_HASH_ENTRY_ZONE_MAX 1024
588	static struct zone *cfil_hash_entry_zone = NULL;
589
590	#define CFIL_DB_ZONE_NAME "cfil_db"
591	#define CFIL_DB_ZONE_MAX 1024
592	static struct zone *cfil_db_zone = NULL;
593
594	/*
595	* Statistics
596	*/
597
598	struct cfil_stats cfil_stats;
599
600	/*
601	* For troubleshooting
602	*/
603	int cfil_log_level = LOG_ERR;
604	int cfil_debug = `1`;
605
606	// Debug controls added for selective debugging.
607	// Disabled for production. If enabled,
608	// these will have performance impact
609	#define LIFECYCLE_DEBUG 0
610	#define VERDICT_DEBUG 0
611	#define DATA_DEBUG 0
612	#define SHOW_DEBUG 0
613	#define GC_DEBUG 0
614
615	/*
616	* Sysctls for logs and statistics
617	*/
618	static int sysctl_cfil_filter_list(struct sysctl_oid , void* , int*,
619	struct sysctl_req *);
620	static int sysctl_cfil_sock_list(struct sysctl_oid , void* , int*,
621	struct sysctl_req *);
622
623	SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW\|CTLFLAG_LOCKED, `0`, "cfil");
624
625	SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW\|CTLFLAG_LOCKED,
626	&cfil_log_level, `0`, "");
627
628	SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW\|CTLFLAG_LOCKED,
629	&cfil_debug, `0`, "");
630
631	SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD\|CTLFLAG_LOCKED,
632	&cfil_sock_attached_count, `0`, "");
633
634	SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD\|CTLFLAG_LOCKED,
635	&cfil_active_count, `0`, "");
636
637	SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW\|CTLFLAG_LOCKED,
638	&cfil_close_wait_timeout, `0`, "");
639
640	static int cfil_sbtrim = `1`;
641	SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW\|CTLFLAG_LOCKED,
642	&cfil_sbtrim, `0`, "");
643
644	SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD\|CTLFLAG_LOCKED,
645	`0`, `0`, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
646
647	SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD\|CTLFLAG_LOCKED,
648	`0`, `0`, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
649
650	SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD\|CTLFLAG_LOCKED,
651	&cfil_stats, cfil_stats, "");
652
653	/*
654	* Forward declaration to appease the compiler
655	*/
656	static int cfil_action_data_pass(struct socket , struct* cfil_info , uint32_t, int*,
657	uint64_t, uint64_t);
658	static int cfil_action_drop(struct socket , struct* cfil_info *, uint32_t);
659	static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
660	static int cfil_dispatch_closed_event(struct socket , struct* cfil_info , int*);
661	static int cfil_data_common(struct socket , struct* cfil_info , int, struct* sockaddr *,
662	struct mbuf , struct* mbuf *, uint32_t);
663	static int cfil_data_filter(struct socket , struct* cfil_info , uint32_t, int*,
664	struct mbuf *, uint64_t);
665	static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
666	struct in_addr, u_int16_t);
667	static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
668	struct in6_addr *, u_int16_t);
669	;
670	static int cfil_dispatch_attach_event(struct socket , struct* cfil_info *, uint32_t);
671	static void cfil_info_free(struct cfil_info *);
672	static struct cfil_info * cfil_info_alloc(struct socket , struct* cfil_hash_entry *);
673	static int cfil_info_attach_unit(struct socket , uint32_t, struct* cfil_info *);
674	static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
675	static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
676	static int cfil_service_pending_queue(struct socket , struct* cfil_info , uint32_t, int*);
677	static int cfil_data_service_ctl_q(struct socket , struct* cfil_info , uint32_t, int*);
678	static void cfil_info_verify(struct cfil_info *);
679	static int cfil_update_data_offsets(struct socket , struct* cfil_info , uint32_t, int*,
680	uint64_t, uint64_t);
681	static int cfil_acquire_sockbuf(struct socket , struct* cfil_info , int*);
682	static void cfil_release_sockbuf(struct socket , int*);
683	static int cfil_filters_attached(struct socket *);
684
685	static void cfil_rw_lock_exclusive(lck_rw_t *);
686	static void cfil_rw_unlock_exclusive(lck_rw_t *);
687	static void cfil_rw_lock_shared(lck_rw_t *);
688	static void cfil_rw_unlock_shared(lck_rw_t *);
689	static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
690	static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
691
692	static unsigned int cfil_data_length(struct mbuf , int* , int* *);
693	static errno_t cfil_db_init(struct socket *);
694	static void cfil_db_free(struct socket *so);
695	struct cfil_hash_entry cfil_db_lookup_entry(struct* cfil_db , struct* sockaddr , struct* sockaddr *);
696	struct cfil_hash_entry cfil_db_lookup_entry_with_sockid(struct* cfil_db *, u_int64_t);
697	struct cfil_hash_entry cfil_db_add_entry(struct* cfil_db , struct* sockaddr , struct* sockaddr *);
698	void cfil_db_delete_entry(struct cfil_db , struct* cfil_hash_entry *);
699	struct cfil_hash_entry cfil_sock_udp_get_flow(struct* socket , uint32_t, bool, struct* sockaddr , struct* sockaddr *);
700	struct cfil_info cfil_db_get_cfil_info(struct* cfil_db *, cfil_sock_id_t);
701	static errno_t cfil_sock_udp_handle_data(bool, struct socket , struct* sockaddr , struct* sockaddr *,
702	struct mbuf , struct* mbuf *, uint32_t);
703	static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
704	static void cfil_sock_udp_is_closed(struct socket *);
705	static int cfil_sock_udp_notify_shutdown(struct socket , int* , int, int);
706	static int cfil_sock_udp_shutdown(struct socket , int* *);
707	static void cfil_sock_udp_close_wait(struct socket *);
708	static void cfil_sock_udp_buf_update(struct sockbuf *);
709	static int cfil_filters_udp_attached(struct socket *, bool);
710	static void cfil_get_flow_address_v6(struct cfil_hash_entry , struct* inpcb *,
711	struct in6_addr , struct in6_addr ,
712	u_int16_t , u_int16_t );
713	static void cfil_get_flow_address(struct cfil_hash_entry , struct* inpcb *,
714	struct in_addr , struct* in_addr *,
715	u_int16_t , u_int16_t );
716	static void cfil_info_log(int, struct cfil_info , const* char *);
717	void cfil_filter_show(u_int32_t);
718	void cfil_info_show(void);
719	bool cfil_info_idle_timed_out(struct cfil_info , int*, u_int32_t);
720	bool cfil_info_action_timed_out(struct cfil_info , int*);
721	bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
722	struct m_tag cfil_udp_save_socket_state(struct* cfil_info , struct* mbuf *);
723	static void cfil_udp_gc_thread_func(void *, wait_result_t);
724	static void cfil_info_udp_expire(void *, wait_result_t);
725
726	bool check_port(struct sockaddr *, u_short);
727
728	/*
729	* Content filter global read write lock
730	*/
731
732	static void
733	cfil_rw_lock_exclusive(lck_rw_t *lck)
734	{
735	void *lr_saved;
736
737	lr_saved = __builtin_return_address(`0`);
738
739	lck_rw_lock_exclusive(lck);
740
741	cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
742	cfil_rw_nxt_lck = (cfil_rw_nxt_lck + `1`) % CFIL_RW_LCK_MAX;
743	}
744
745	static void
746	cfil_rw_unlock_exclusive(lck_rw_t *lck)
747	{
748	void *lr_saved;
749
750	lr_saved = __builtin_return_address(`0`);
751
752	lck_rw_unlock_exclusive(lck);
753
754	cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
755	cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + `1`) % CFIL_RW_LCK_MAX;
756	}
757
758	static void
759	cfil_rw_lock_shared(lck_rw_t *lck)
760	{
761	void *lr_saved;
762
763	lr_saved = __builtin_return_address(`0`);
764
765	lck_rw_lock_shared(lck);
766
767	cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
768	cfil_rw_nxt_lck = (cfil_rw_nxt_lck + `1`) % CFIL_RW_LCK_MAX;
769	}
770
771	static void
772	cfil_rw_unlock_shared(lck_rw_t *lck)
773	{
774	void *lr_saved;
775
776	lr_saved = __builtin_return_address(`0`);
777
778	lck_rw_unlock_shared(lck);
779
780	cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
781	cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + `1`) % CFIL_RW_LCK_MAX;
782	}
783
784	static boolean_t
785	cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
786	{
787	void *lr_saved;
788	boolean_t upgraded;
789
790	lr_saved = __builtin_return_address(`0`);
791
792	upgraded = lck_rw_lock_shared_to_exclusive(lck);
793	if (upgraded) {
794	cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
795	cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + `1`) % CFIL_RW_LCK_MAX;
796	}
797	return (upgraded);
798	}
799
800	static void
801	cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
802	{
803	void *lr_saved;
804
805	lr_saved = __builtin_return_address(`0`);
806
807	lck_rw_lock_exclusive_to_shared(lck);
808
809	cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
810	cfil_rw_nxt_lck = (cfil_rw_nxt_lck + `1`) % CFIL_RW_LCK_MAX;
811	}
812
813	static void
814	cfil_rw_lock_assert_held(lck_rw_t lck, int* exclusive)
815	{
816	#if !MACH_ASSERT
817	#pragma unused(lck, exclusive)
818	#endif
819	LCK_RW_ASSERT(lck,
820	exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
821	}
822
823	/*
824	* Return the number of bytes in the mbuf chain using the same
825	* method as m_length() or sballoc()
826	*
827	* Returns data len - starting from PKT start
828	* - retmbcnt - optional param to get total mbuf bytes in chain
829	* - retmbnum - optional param to get number of mbufs in chain
830	*/
831	static unsigned int
832	cfil_data_length(struct mbuf m, int* retmbcnt, int* *retmbnum)
833	{
834	struct mbuf *m0;
835	unsigned int pktlen = `0`;
836	int mbcnt;
837	int mbnum;
838
839	// Locate the start of data
840	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
841	if (m0->m_flags & M_PKTHDR)
842	break;
843	}
844	if (m0 == NULL) {
845	CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
846	return (`0`);
847	}
848	m = m0;
849
850	if (retmbcnt == NULL && retmbnum == NULL)
851	return (m_length(m));
852
853	pktlen = `0`;
854	mbcnt = `0`;
855	mbnum = `0`;
856	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
857	pktlen += m0->m_len;
858	mbnum++;
859	mbcnt += MSIZE;
860	if (m0->m_flags & M_EXT)
861	mbcnt += m0->m_ext.ext_size;
862	}
863	if (retmbcnt) {
864	*retmbcnt = mbcnt;
865	}
866	if (retmbnum) {
867	*retmbnum = mbnum;
868	}
869	return (pktlen);
870	}
871
872	static struct mbuf *
873	cfil_data_start(struct mbuf *m)
874	{
875	struct mbuf *m0;
876
877	// Locate the start of data
878	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
879	if (m0->m_flags & M_PKTHDR)
880	break;
881	}
882	return m0;
883	}
884
885	/*
886	* Common mbuf queue utilities
887	*/
888
889	static inline void
890	cfil_queue_init(struct cfil_queue *cfq)
891	{
892	cfq->q_start = `0`;
893	cfq->q_end = `0`;
894	MBUFQ_INIT(&cfq->q_mq);
895	}
896
897	static inline uint64_t
898	cfil_queue_drain(struct cfil_queue *cfq)
899	{
900	uint64_t drained = cfq->q_start - cfq->q_end;
901	cfq->q_start = `0`;
902	cfq->q_end = `0`;
903	MBUFQ_DRAIN(&cfq->q_mq);
904
905	return (drained);
906	}
907
908	/ Return 1 when empty, 0 otherwise /
909	static inline int
910	cfil_queue_empty(struct cfil_queue *cfq)
911	{
912	return (MBUFQ_EMPTY(&cfq->q_mq));
913	}
914
915	static inline uint64_t
916	cfil_queue_offset_first(struct cfil_queue *cfq)
917	{
918	return (cfq->q_start);
919	}
920
921	static inline uint64_t
922	cfil_queue_offset_last(struct cfil_queue *cfq)
923	{
924	return (cfq->q_end);
925	}
926
927	static inline uint64_t
928	cfil_queue_len(struct cfil_queue *cfq)
929	{
930	return (cfq->q_end - cfq->q_start);
931	}
932
933	/*
934	* Routines to verify some fundamental assumptions
935	*/
936
937	static void
938	cfil_queue_verify(struct cfil_queue *cfq)
939	{
940	mbuf_t chain;
941	mbuf_t m;
942	mbuf_t n;
943	uint64_t queuesize = `0`;
944
945	/ Verify offset are ordered /
946	VERIFY(cfq->q_start <= cfq->q_end);
947
948	/*
949	* When queue is empty, the offsets are equal otherwise the offsets
950	* are different
951	*/
952	VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) \|\|
953	(!MBUFQ_EMPTY(&cfq->q_mq) &&
954	cfq->q_start != cfq->q_end));
955
956	MBUFQ_FOREACH(chain, &cfq->q_mq) {
957	size_t chainsize = `0`;
958	m = chain;
959	unsigned int mlen = cfil_data_length(m, NULL, NULL);
960	// skip the addr and control stuff if present
961	m = cfil_data_start(m);
962
963	if (m == NULL \|\|
964	m == (void *)M_TAG_FREE_PATTERN \|\|
965	m->m_next == (void *)M_TAG_FREE_PATTERN \|\|
966	m->m_nextpkt == (void *)M_TAG_FREE_PATTERN)
967	panic("%s - mq %p is free at %p", __func__,
968	&cfq->q_mq, m);
969	for (n = m; n != NULL; n = n->m_next) {
970	if (n->m_type != MT_DATA &&
971	n->m_type != MT_HEADER &&
972	n->m_type != MT_OOBDATA)
973	panic("%s - %p unsupported type %u", __func__,
974	n, n->m_type);
975	chainsize += n->m_len;
976	}
977	if (mlen != chainsize)
978	panic("%s - %p m_length() %u != chainsize %lu",
979	__func__, m, mlen, chainsize);
980	queuesize += chainsize;
981	}
982	if (queuesize != cfq->q_end - cfq->q_start)
983	panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
984	m, queuesize, cfq->q_end - cfq->q_start);
985	}
986
987	static void
988	cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
989	{
990	CFIL_QUEUE_VERIFY(cfq);
991
992	MBUFQ_ENQUEUE(&cfq->q_mq, m);
993	cfq->q_end += len;
994
995	CFIL_QUEUE_VERIFY(cfq);
996	}
997
998	static void
999	cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1000	{
1001	CFIL_QUEUE_VERIFY(cfq);
1002
1003	VERIFY(cfil_data_length(m, NULL, NULL) == len);
1004
1005	MBUFQ_REMOVE(&cfq->q_mq, m);
1006	MBUFQ_NEXT(m) = NULL;
1007	cfq->q_start += len;
1008
1009	CFIL_QUEUE_VERIFY(cfq);
1010	}
1011
1012	static mbuf_t
1013	cfil_queue_first(struct cfil_queue *cfq)
1014	{
1015	return (MBUFQ_FIRST(&cfq->q_mq));
1016	}
1017
1018	static mbuf_t
1019	cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1020	{
1021	#pragma unused(cfq)
1022	return (MBUFQ_NEXT(m));
1023	}
1024
1025	static void
1026	cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1027	{
1028	CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1029	CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1030
1031	/ Verify the queues are ordered so that pending is before ctl /
1032	VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1033
1034	/ The peek offset cannot be less than the pass offset /
1035	VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1036
1037	/ Make sure we've updated the offset we peeked at /
1038	VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1039	}
1040
1041	static void
1042	cfil_entry_verify(struct cfil_entry *entry)
1043	{
1044	cfil_entry_buf_verify(&entry->cfe_snd);
1045	cfil_entry_buf_verify(&entry->cfe_rcv);
1046	}
1047
1048	static void
1049	cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1050	{
1051	CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1052
1053	VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1054	VERIFY(cfi_buf->cfi_pending_mbcnt >= `0`);
1055	}
1056
1057	static void
1058	cfil_info_verify(struct cfil_info *cfil_info)
1059	{
1060	int i;
1061
1062	if (cfil_info == NULL)
1063	return;
1064
1065	cfil_info_buf_verify(&cfil_info->cfi_snd);
1066	cfil_info_buf_verify(&cfil_info->cfi_rcv);
1067
1068	for (i = `0`; i < MAX_CONTENT_FILTER; i++)
1069	cfil_entry_verify(&cfil_info->cfi_entries[i]);
1070	}
1071
1072	static void
1073	verify_content_filter(struct content_filter *cfc)
1074	{
1075	struct cfil_entry *entry;
1076	uint32_t count = `0`;
1077
1078	VERIFY(cfc->cf_sock_count >= `0`);
1079
1080	TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1081	count++;
1082	VERIFY(cfc == entry->cfe_filter);
1083	}
1084	VERIFY(count == cfc->cf_sock_count);
1085	}
1086
1087	/*
1088	* Kernel control socket callbacks
1089	*/
1090	static errno_t
1091	cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1092	void **unitinfo)
1093	{
1094	errno_t error = `0`;
1095	struct content_filter *cfc = NULL;
1096
1097	CFIL_LOG(LOG_NOTICE, "");
1098
1099	cfc = zalloc(content_filter_zone);
1100	if (cfc == NULL) {
1101	CFIL_LOG(LOG_ERR, "zalloc failed");
1102	error = ENOMEM;
1103	goto done;
1104	}
1105	bzero(cfc, sizeof(struct content_filter));
1106
1107	cfil_rw_lock_exclusive(&cfil_lck_rw);
1108	if (content_filters == NULL) {
1109	struct content_filter **tmp;
1110
1111	cfil_rw_unlock_exclusive(&cfil_lck_rw);
1112
1113	MALLOC(tmp,
1114	struct content_filter **,
1115	MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1116	M_TEMP,
1117	M_WAITOK \| M_ZERO);
1118
1119	cfil_rw_lock_exclusive(&cfil_lck_rw);
1120
1121	if (tmp == NULL && content_filters == NULL) {
1122	error = ENOMEM;
1123	cfil_rw_unlock_exclusive(&cfil_lck_rw);
1124	goto done;
1125	}
1126	/ Another thread may have won the race /
1127	if (content_filters != NULL)
1128	FREE(tmp, M_TEMP);
1129	else
1130	content_filters = tmp;
1131	}
1132
1133	if (sac->sc_unit == `0` \|\| sac->sc_unit > MAX_CONTENT_FILTER) {
1134	CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1135	error = EINVAL;
1136	} else if (content_filters[sac->sc_unit - `1`] != NULL) {
1137	CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1138	error = EADDRINUSE;
1139	} else {
1140	/*
1141	* kernel control socket kcunit numbers start at 1
1142	*/
1143	content_filters[sac->sc_unit - `1`] = cfc;
1144
1145	cfc->cf_kcref = kctlref;
1146	cfc->cf_kcunit = sac->sc_unit;
1147	TAILQ_INIT(&cfc->cf_sock_entries);
1148
1149	*unitinfo = cfc;
1150	cfil_active_count++;
1151	}
1152	cfil_rw_unlock_exclusive(&cfil_lck_rw);
1153	done:
1154	if (error != `0` && cfc != NULL)
1155	zfree(content_filter_zone, cfc);
1156
1157	if (error == `0`)
1158	OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1159	else
1160	OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1161
1162	CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1163	error, cfil_active_count, sac->sc_unit);
1164
1165	return (error);
1166	}
1167
1168	static errno_t
1169	cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1170	{
1171	#pragma unused(kctlref)
1172	errno_t error = `0`;
1173	struct content_filter *cfc;
1174	struct cfil_entry *entry;
1175	uint64_t sock_flow_id = `0`;
1176
1177	CFIL_LOG(LOG_NOTICE, "");
1178
1179	if (content_filters == NULL) {
1180	CFIL_LOG(LOG_ERR, "no content filter");
1181	error = EINVAL;
1182	goto done;
1183	}
1184	if (kcunit > MAX_CONTENT_FILTER) {
1185	CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1186	kcunit, MAX_CONTENT_FILTER);
1187	error = EINVAL;
1188	goto done;
1189	}
1190
1191	cfc = (struct content_filter *)unitinfo;
1192	if (cfc == NULL)
1193	goto done;
1194
1195	cfil_rw_lock_exclusive(&cfil_lck_rw);
1196	if (content_filters[kcunit - `1`] != cfc \|\| cfc->cf_kcunit != kcunit) {
1197	CFIL_LOG(LOG_ERR, "bad unit info %u)",
1198	kcunit);
1199	cfil_rw_unlock_exclusive(&cfil_lck_rw);
1200	goto done;
1201	}
1202	cfc->cf_flags \|= CFF_DETACHING;
1203	/*
1204	* Remove all sockets from the filter
1205	*/
1206	while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1207	cfil_rw_lock_assert_held(&cfil_lck_rw, `1`);
1208
1209	verify_content_filter(cfc);
1210	/*
1211	* Accept all outstanding data by pushing to next filter
1212	* or back to socket
1213	*
1214	* TBD: Actually we should make sure all data has been pushed
1215	* back to socket
1216	*/
1217	if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1218	struct cfil_info *cfil_info = entry->cfe_cfil_info;
1219	struct socket *so = cfil_info->cfi_so;
1220	sock_flow_id = cfil_info->cfi_sock_id;
1221
1222	/ Need to let data flow immediately /
1223	entry->cfe_flags \|= CFEF_SENT_SOCK_ATTACHED \|
1224	CFEF_DATA_START;
1225
1226	/*
1227	* Respect locking hierarchy
1228	*/
1229	cfil_rw_unlock_exclusive(&cfil_lck_rw);
1230
1231	socket_lock(so, `1`);
1232
1233	/*
1234	* When cfe_filter is NULL the filter is detached
1235	* and the entry has been removed from cf_sock_entries
1236	*/
1237	if ((so->so_cfil == NULL && so->so_cfil_db == NULL) \|\| entry->cfe_filter == NULL) {
1238	cfil_rw_lock_exclusive(&cfil_lck_rw);
1239	goto release;
1240	}
1241
1242	(void) cfil_action_data_pass(so, cfil_info, kcunit, `1`,
1243	CFM_MAX_OFFSET,
1244	CFM_MAX_OFFSET);
1245
1246	(void) cfil_action_data_pass(so, cfil_info, kcunit, `0`,
1247	CFM_MAX_OFFSET,
1248	CFM_MAX_OFFSET);
1249
1250	cfil_rw_lock_exclusive(&cfil_lck_rw);
1251
1252	/*
1253	* Check again to make sure if the cfil_info is still valid
1254	* as the socket may have been unlocked when when calling
1255	* cfil_acquire_sockbuf()
1256	*/
1257	if (entry->cfe_filter == NULL \|\|
1258	(so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1259	goto release;
1260	}
1261
1262	/ The filter is now detached /
1263	entry->cfe_flags \|= CFEF_CFIL_DETACHED;
1264	#if LIFECYCLE_DEBUG
1265	cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1266	#endif
1267	CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1268	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1269	if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1270	cfil_filters_attached(so) == `0`) {
1271	CFIL_LOG(LOG_NOTICE, "so %llx waking",
1272	(uint64_t)VM_KERNEL_ADDRPERM(so));
1273	wakeup((caddr_t)cfil_info);
1274	}
1275
1276	/*
1277	* Remove the filter entry from the content filter
1278	* but leave the rest of the state intact as the queues
1279	* may not be empty yet
1280	*/
1281	entry->cfe_filter = NULL;
1282	entry->cfe_necp_control_unit = `0`;
1283
1284	TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1285	cfc->cf_sock_count--;
1286	release:
1287	socket_unlock(so, `1`);
1288	}
1289	}
1290	verify_content_filter(cfc);
1291
1292	VERIFY(cfc->cf_sock_count == `0`);
1293
1294	/*
1295	* Make filter inactive
1296	*/
1297	content_filters[kcunit - `1`] = NULL;
1298	cfil_active_count--;
1299	cfil_rw_unlock_exclusive(&cfil_lck_rw);
1300
1301	zfree(content_filter_zone, cfc);
1302	done:
1303	if (error == `0`)
1304	OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1305	else
1306	OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1307
1308	CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1309	error, cfil_active_count, kcunit);
1310
1311	return (error);
1312	}
1313
1314	/*
1315	* cfil_acquire_sockbuf()
1316	*
1317	* Prevent any other thread from acquiring the sockbuf
1318	* We use sb_cfil_thread as a semaphore to prevent other threads from
1319	* messing with the sockbuf -- see sblock()
1320	* Note: We do not set SB_LOCK here because the thread may check or modify
1321	* SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1322	* sblock(), sbunlock() or sodefunct()
1323	*/
1324	static int
1325	cfil_acquire_sockbuf(struct socket so, struct* cfil_info cfil_info, int* outgoing)
1326	{
1327	thread_t tp = current_thread();
1328	struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1329	lck_mtx_t *mutex_held;
1330	int error = `0`;
1331
1332	/*
1333	* Wait until no thread is holding the sockbuf and other content
1334	* filter threads have released the sockbuf
1335	*/
1336	while ((sb->sb_flags & SB_LOCK) \|\|
1337	(sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1338	if (so->so_proto->pr_getlock != NULL)
1339	mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1340	else
1341	mutex_held = so->so_proto->pr_domain->dom_mtx;
1342
1343	LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1344
1345	sb->sb_wantlock++;
1346	VERIFY(sb->sb_wantlock != `0`);
1347
1348	msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1349	NULL);
1350
1351	VERIFY(sb->sb_wantlock != `0`);
1352	sb->sb_wantlock--;
1353	}
1354	/*
1355	* Use reference count for repetitive calls on same thread
1356	*/
1357	if (sb->sb_cfil_refs == `0`) {
1358	VERIFY(sb->sb_cfil_thread == NULL);
1359	VERIFY((sb->sb_flags & SB_LOCK) == `0`);
1360
1361	sb->sb_cfil_thread = tp;
1362	sb->sb_flags \|= SB_LOCK;
1363	}
1364	sb->sb_cfil_refs++;
1365
1366	/ We acquire the socket buffer when we need to cleanup /
1367	if (cfil_info == NULL) {
1368	CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1369	(uint64_t)VM_KERNEL_ADDRPERM(so));
1370	error = `0`;
1371	} else if (cfil_info->cfi_flags & CFIF_DROP) {
1372	CFIL_LOG(LOG_ERR, "so %llx drop set",
1373	(uint64_t)VM_KERNEL_ADDRPERM(so));
1374	error = EPIPE;
1375	}
1376
1377	return (error);
1378	}
1379
1380	static void
1381	cfil_release_sockbuf(struct socket so, int* outgoing)
1382	{
1383	struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1384	thread_t tp = current_thread();
1385
1386	socket_lock_assert_owned(so);
1387
1388	if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)
1389	panic("%s sb_cfil_thread %p not current %p", __func__,
1390	sb->sb_cfil_thread, tp);
1391	/*
1392	* Don't panic if we are defunct because SB_LOCK has
1393	* been cleared by sodefunct()
1394	*/
1395	if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK))
1396	panic("%s SB_LOCK not set on %p", __func__,
1397	sb);
1398	/*
1399	* We can unlock when the thread unwinds to the last reference
1400	*/
1401	sb->sb_cfil_refs--;
1402	if (sb->sb_cfil_refs == `0`) {
1403	sb->sb_cfil_thread = NULL;
1404	sb->sb_flags &= ~SB_LOCK;
1405
1406	if (sb->sb_wantlock > `0`)
1407	wakeup(&sb->sb_flags);
1408	}
1409	}
1410
1411	cfil_sock_id_t
1412	cfil_sock_id_from_socket(struct socket *so)
1413	{
1414	if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil)
1415	return (so->so_cfil->cfi_sock_id);
1416	else
1417	return (CFIL_SOCK_ID_NONE);
1418	}
1419
1420	static bool
1421	cfil_socket_safe_lock(struct inpcb *inp)
1422	{
1423	if (in_pcb_checkstate(inp, WNT_ACQUIRE, `0`) != WNT_STOPUSING) {
1424	socket_lock(inp->inp_socket, `1`);
1425	if (in_pcb_checkstate(inp, WNT_RELEASE, `1`) != WNT_STOPUSING) {
1426	return true;
1427	}
1428	socket_unlock(inp->inp_socket, `1`);
1429	}
1430	return false;
1431	}
1432
1433	static struct socket *
1434	cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1435	{
1436	struct socket *so = NULL;
1437	u_int64_t gencnt = cfil_sock_id >> `32`;
1438	u_int32_t flowhash = (u_int32_t)(cfil_sock_id & `0x0ffffffff`);
1439	struct inpcb *inp = NULL;
1440	struct inpcbinfo *pcbinfo = NULL;
1441
1442	#if VERDICT_DEBUG
1443	CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1444	#endif
1445
1446	if (udp_only)
1447	goto find_udp;
1448
1449	pcbinfo = &tcbinfo;
1450	lck_rw_lock_shared(pcbinfo->ipi_lock);
1451	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1452	if (inp->inp_state != INPCB_STATE_DEAD &&
1453	inp->inp_socket != NULL &&
1454	inp->inp_flowhash == flowhash &&
1455	(inp->inp_socket->so_gencnt & `0x0ffffffff`) == gencnt &&
1456	inp->inp_socket->so_cfil != NULL) {
1457	if (cfil_socket_safe_lock(inp))
1458	so = inp->inp_socket;
1459	break;
1460	}
1461	}
1462	lck_rw_done(pcbinfo->ipi_lock);
1463	if (so != NULL) {
1464	goto done;
1465	}
1466
1467	find_udp:
1468
1469	pcbinfo = &udbinfo;
1470	lck_rw_lock_shared(pcbinfo->ipi_lock);
1471	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1472	if (inp->inp_state != INPCB_STATE_DEAD &&
1473	inp->inp_socket != NULL &&
1474	inp->inp_socket->so_cfil_db != NULL &&
1475	(inp->inp_socket->so_gencnt & `0x0ffffffff`) == gencnt) {
1476	if (cfil_socket_safe_lock(inp))
1477	so = inp->inp_socket;
1478	break;
1479	}
1480	}
1481	lck_rw_done(pcbinfo->ipi_lock);
1482
1483	done:
1484	if (so == NULL) {
1485	OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1486	CFIL_LOG(LOG_DEBUG,
1487	"no socket for sock_id %llx gencnt %llx flowhash %x",
1488	cfil_sock_id, gencnt, flowhash);
1489	}
1490
1491	return (so);
1492	}
1493
1494	static struct socket *
1495	cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1496	{
1497	struct socket *so = NULL;
1498	struct inpcb *inp = NULL;
1499	struct inpcbinfo *pcbinfo = &tcbinfo;
1500
1501	lck_rw_lock_shared(pcbinfo->ipi_lock);
1502	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1503	if (inp->inp_state != INPCB_STATE_DEAD &&
1504	inp->inp_socket != NULL &&
1505	uuid_compare(inp->necp_client_uuid, necp_client_uuid) == `0`) {
1506	*cfil_attached = (inp->inp_socket->so_cfil != NULL);
1507	if (cfil_socket_safe_lock(inp))
1508	so = inp->inp_socket;
1509	break;
1510	}
1511	}
1512	lck_rw_done(pcbinfo->ipi_lock);
1513	if (so != NULL) {
1514	goto done;
1515	}
1516
1517	pcbinfo = &udbinfo;
1518	lck_rw_lock_shared(pcbinfo->ipi_lock);
1519	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1520	if (inp->inp_state != INPCB_STATE_DEAD &&
1521	inp->inp_socket != NULL &&
1522	uuid_compare(inp->necp_client_uuid, necp_client_uuid) == `0`) {
1523	*cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1524	if (cfil_socket_safe_lock(inp))
1525	so = inp->inp_socket;
1526	break;
1527	}
1528	}
1529	lck_rw_done(pcbinfo->ipi_lock);
1530
1531	done:
1532	return (so);
1533	}
1534
1535	static errno_t
1536	cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1537	int flags)
1538	{
1539	#pragma unused(kctlref, flags)
1540	errno_t error = `0`;
1541	struct cfil_msg_hdr *msghdr;
1542	struct content_filter cfc = (struct* content_filter *)unitinfo;
1543	struct socket *so;
1544	struct cfil_msg_action *action_msg;
1545	struct cfil_entry *entry;
1546	struct cfil_info *cfil_info = NULL;
1547
1548	CFIL_LOG(LOG_INFO, "");
1549
1550	if (content_filters == NULL) {
1551	CFIL_LOG(LOG_ERR, "no content filter");
1552	error = EINVAL;
1553	goto done;
1554	}
1555	if (kcunit > MAX_CONTENT_FILTER) {
1556	CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1557	kcunit, MAX_CONTENT_FILTER);
1558	error = EINVAL;
1559	goto done;
1560	}
1561
1562	if (m_length(m) < sizeof(struct cfil_msg_hdr)) {
1563	CFIL_LOG(LOG_ERR, "too short %u", m_length(m));
1564	error = EINVAL;
1565	goto done;
1566	}
1567	msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1568	if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1569	CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1570	error = EINVAL;
1571	goto done;
1572	}
1573	if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1574	CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1575	error = EINVAL;
1576	goto done;
1577	}
1578	/ Validate action operation /
1579	switch (msghdr->cfm_op) {
1580	case CFM_OP_DATA_UPDATE:
1581	OSIncrementAtomic(
1582	&cfil_stats.cfs_ctl_action_data_update);
1583	break;
1584	case CFM_OP_DROP:
1585	OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1586	break;
1587	case CFM_OP_BLESS_CLIENT:
1588	if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1589	OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1590	error = EINVAL;
1591	CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1592	msghdr->cfm_len,
1593	msghdr->cfm_op);
1594	goto done;
1595	}
1596	error = cfil_action_bless_client(kcunit, msghdr);
1597	goto done;
1598	default:
1599	OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1600	CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1601	error = EINVAL;
1602	goto done;
1603	}
1604	if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1605	OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1606	error = EINVAL;
1607	CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1608	msghdr->cfm_len,
1609	msghdr->cfm_op);
1610	goto done;
1611	}
1612	cfil_rw_lock_shared(&cfil_lck_rw);
1613	if (cfc != (void *)content_filters[kcunit - `1`]) {
1614	CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1615	kcunit);
1616	error = EINVAL;
1617	cfil_rw_unlock_shared(&cfil_lck_rw);
1618	goto done;
1619	}
1620	cfil_rw_unlock_shared(&cfil_lck_rw);
1621
1622	// Search for socket (TCP+UDP and lock so)
1623	so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1624	if (so == NULL) {
1625	CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1626	msghdr->cfm_sock_id);
1627	error = EINVAL;
1628	goto done;
1629	}
1630
1631	cfil_info = so->so_cfil_db != NULL ?
1632	cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1633
1634	if (cfil_info == NULL) {
1635	CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1636	(uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1637	error = EINVAL;
1638	goto unlock;
1639	} else if (cfil_info->cfi_flags & CFIF_DROP) {
1640	CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1641	(uint64_t)VM_KERNEL_ADDRPERM(so));
1642	error = EINVAL;
1643	goto unlock;
1644	}
1645	entry = &cfil_info->cfi_entries[kcunit - `1`];
1646	if (entry->cfe_filter == NULL) {
1647	CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1648	(uint64_t)VM_KERNEL_ADDRPERM(so));
1649	error = EINVAL;
1650	goto unlock;
1651	}
1652
1653	if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)
1654	entry->cfe_flags \|= CFEF_DATA_START;
1655	else {
1656	CFIL_LOG(LOG_ERR,
1657	"so %llx attached not sent for %u",
1658	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1659	error = EINVAL;
1660	goto unlock;
1661	}
1662
1663	microuptime(&entry->cfe_last_action);
1664	CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
1665
1666	action_msg = (struct cfil_msg_action *)msghdr;
1667
1668	switch (msghdr->cfm_op) {
1669	case CFM_OP_DATA_UPDATE:
1670	#if VERDICT_DEBUG
1671	CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1672	(uint64_t)VM_KERNEL_ADDRPERM(so),
1673	cfil_info->cfi_sock_id,
1674	action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1675	action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1676	#endif
1677	if (action_msg->cfa_out_peek_offset != `0` \|\|
1678	action_msg->cfa_out_pass_offset != `0`)
1679	error = cfil_action_data_pass(so, cfil_info, kcunit, `1`,
1680	action_msg->cfa_out_pass_offset,
1681	action_msg->cfa_out_peek_offset);
1682	if (error == EJUSTRETURN)
1683	error = `0`;
1684	if (error != `0`)
1685	break;
1686	if (action_msg->cfa_in_peek_offset != `0` \|\|
1687	action_msg->cfa_in_pass_offset != `0`)
1688	error = cfil_action_data_pass(so, cfil_info, kcunit, `0`,
1689	action_msg->cfa_in_pass_offset,
1690	action_msg->cfa_in_peek_offset);
1691	if (error == EJUSTRETURN)
1692	error = `0`;
1693	break;
1694
1695	case CFM_OP_DROP:
1696	error = cfil_action_drop(so, cfil_info, kcunit);
1697	break;
1698
1699	default:
1700	error = EINVAL;
1701	break;
1702	}
1703	unlock:
1704	socket_unlock(so, `1`);
1705	done:
1706	mbuf_freem(m);
1707
1708	if (error == `0`)
1709	OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
1710	else
1711	OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
1712
1713	return (error);
1714	}
1715
1716	static errno_t
1717	cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1718	int opt, void data, size_t len)
1719	{
1720	#pragma unused(kctlref, opt)
1721	struct cfil_info *cfil_info = NULL;
1722	errno_t error = `0`;
1723	struct content_filter cfc = (struct* content_filter *)unitinfo;
1724
1725	CFIL_LOG(LOG_NOTICE, "");
1726
1727	cfil_rw_lock_shared(&cfil_lck_rw);
1728
1729	if (content_filters == NULL) {
1730	CFIL_LOG(LOG_ERR, "no content filter");
1731	error = EINVAL;
1732	goto done;
1733	}
1734	if (kcunit > MAX_CONTENT_FILTER) {
1735	CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1736	kcunit, MAX_CONTENT_FILTER);
1737	error = EINVAL;
1738	goto done;
1739	}
1740	if (cfc != (void *)content_filters[kcunit - `1`]) {
1741	CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1742	kcunit);
1743	error = EINVAL;
1744	goto done;
1745	}
1746	switch (opt) {
1747	case CFIL_OPT_NECP_CONTROL_UNIT:
1748	if (len < sizeof*(uint32_t)) {
1749	CFIL_LOG(LOG_ERR, "len too small %lu", *len);
1750	error = EINVAL;
1751	goto done;
1752	}
1753	if (data != NULL) {
1754	(uint32_t )data = cfc->cf_necp_control_unit;
1755	}
1756	break;
1757	case CFIL_OPT_GET_SOCKET_INFO:
1758	if (len != sizeof(struct* cfil_opt_sock_info)) {
1759	CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
1760	error = EINVAL;
1761	goto done;
1762	}
1763	if (data == NULL) {
1764	CFIL_LOG(LOG_ERR, "data not passed");
1765	error = EINVAL;
1766	goto done;
1767	}
1768
1769	struct cfil_opt_sock_info *sock_info =
1770	(struct cfil_opt_sock_info *) data;
1771
1772	// Unlock here so that we never hold both cfil_lck_rw and the
1773	// socket_lock at the same time. Otherwise, this can deadlock
1774	// because soclose() takes the socket_lock and then exclusive
1775	// cfil_lck_rw and we require the opposite order.
1776
1777	// WARNING: Be sure to never use anything protected
1778	// by cfil_lck_rw beyond this point.
1779	// WARNING: Be sure to avoid fallthrough and
1780	// goto return_already_unlocked from this branch.
1781	cfil_rw_unlock_shared(&cfil_lck_rw);
1782
1783	// Search (TCP+UDP) and lock socket
1784	struct socket *sock =
1785	cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
1786	if (sock == NULL) {
1787	#if LIFECYCLE_DEBUG
1788	CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
1789	sock_info->cfs_sock_id);
1790	#endif
1791	error = ENOENT;
1792	goto return_already_unlocked;
1793	}
1794
1795	cfil_info = (sock->so_cfil_db != NULL) ?
1796	cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
1797
1798	if (cfil_info == NULL) {
1799	#if LIFECYCLE_DEBUG
1800	CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
1801	(uint64_t)VM_KERNEL_ADDRPERM(sock));
1802	#endif
1803	error = EINVAL;
1804	socket_unlock(sock, `1`);
1805	goto return_already_unlocked;
1806	}
1807
1808	// Fill out family, type, and protocol
1809	sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
1810	sock_info->cfs_sock_type = sock->so_proto->pr_type;
1811	sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
1812
1813	// Source and destination addresses
1814	struct inpcb *inp = sotoinpcb(sock);
1815	if (inp->inp_vflag & INP_IPV6) {
1816	struct in6_addr laddr = NULL, faddr = NULL;
1817	u_int16_t lport = `0`, fport = `0`;
1818
1819	cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
1820	&laddr, &faddr, &lport, &fport);
1821	fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
1822	fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
1823	} else if (inp->inp_vflag & INP_IPV4) {
1824	struct in_addr laddr = {`0`}, faddr = {`0`};
1825	u_int16_t lport = `0`, fport = `0`;
1826
1827	cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
1828	&laddr, &faddr, &lport, &fport);
1829	fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
1830	fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
1831	}
1832
1833	// Set the pid info
1834	sock_info->cfs_pid = sock->last_pid;
1835	memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
1836
1837	if (sock->so_flags & SOF_DELEGATED) {
1838	sock_info->cfs_e_pid = sock->e_pid;
1839	memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
1840	} else {
1841	sock_info->cfs_e_pid = sock->last_pid;
1842	memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
1843	}
1844
1845	socket_unlock(sock, `1`);
1846
1847	goto return_already_unlocked;
1848	default:
1849	error = ENOPROTOOPT;
1850	break;
1851	}
1852	done:
1853	cfil_rw_unlock_shared(&cfil_lck_rw);
1854
1855	return (error);
1856
1857	return_already_unlocked:
1858
1859	return (error);
1860	}
1861
1862	static errno_t
1863	cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1864	int opt, void *data, size_t len)
1865	{
1866	#pragma unused(kctlref, opt)
1867	errno_t error = `0`;
1868	struct content_filter cfc = (struct* content_filter *)unitinfo;
1869
1870	CFIL_LOG(LOG_NOTICE, "");
1871
1872	cfil_rw_lock_exclusive(&cfil_lck_rw);
1873
1874	if (content_filters == NULL) {
1875	CFIL_LOG(LOG_ERR, "no content filter");
1876	error = EINVAL;
1877	goto done;
1878	}
1879	if (kcunit > MAX_CONTENT_FILTER) {
1880	CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1881	kcunit, MAX_CONTENT_FILTER);
1882	error = EINVAL;
1883	goto done;
1884	}
1885	if (cfc != (void *)content_filters[kcunit - `1`]) {
1886	CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1887	kcunit);
1888	error = EINVAL;
1889	goto done;
1890	}
1891	switch (opt) {
1892	case CFIL_OPT_NECP_CONTROL_UNIT:
1893	if (len < sizeof(uint32_t)) {
1894	CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
1895	"len too small %lu", len);
1896	error = EINVAL;
1897	goto done;
1898	}
1899	if (cfc->cf_necp_control_unit != `0`) {
1900	CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
1901	"already set %u",
1902	cfc->cf_necp_control_unit);
1903	error = EINVAL;
1904	goto done;
1905	}
1906	cfc->cf_necp_control_unit = (uint32_t )data;
1907	break;
1908	default:
1909	error = ENOPROTOOPT;
1910	break;
1911	}
1912	done:
1913	cfil_rw_unlock_exclusive(&cfil_lck_rw);
1914
1915	return (error);
1916	}
1917
1918
1919	static void
1920	cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void unitinfo, int* flags)
1921	{
1922	#pragma unused(kctlref, flags)
1923	struct content_filter cfc = (struct* content_filter *)unitinfo;
1924	struct socket *so = NULL;
1925	int error;
1926	struct cfil_entry *entry;
1927	struct cfil_info *cfil_info = NULL;
1928
1929	CFIL_LOG(LOG_INFO, "");
1930
1931	if (content_filters == NULL) {
1932	CFIL_LOG(LOG_ERR, "no content filter");
1933	OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1934	return;
1935	}
1936	if (kcunit > MAX_CONTENT_FILTER) {
1937	CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1938	kcunit, MAX_CONTENT_FILTER);
1939	OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1940	return;
1941	}
1942	cfil_rw_lock_shared(&cfil_lck_rw);
1943	if (cfc != (void *)content_filters[kcunit - `1`]) {
1944	CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1945	kcunit);
1946	OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1947	goto done;
1948	}
1949	/ Let's assume the flow control is lifted /
1950	if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
1951	if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
1952	cfil_rw_lock_exclusive(&cfil_lck_rw);
1953
1954	cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
1955
1956	cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
1957	LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
1958	}
1959	/*
1960	* Flow control will be raised again as soon as an entry cannot enqueue
1961	* to the kernel control socket
1962	*/
1963	while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == `0`) {
1964	verify_content_filter(cfc);
1965
1966	cfil_rw_lock_assert_held(&cfil_lck_rw, `0`);
1967
1968	/ Find an entry that is flow controlled /
1969	TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1970	if (entry->cfe_cfil_info == NULL \|\|
1971	entry->cfe_cfil_info->cfi_so == NULL)
1972	continue;
1973	if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == `0`)
1974	continue;
1975	}
1976	if (entry == NULL)
1977	break;
1978
1979	OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
1980
1981	cfil_info = entry->cfe_cfil_info;
1982	so = cfil_info->cfi_so;
1983
1984	cfil_rw_unlock_shared(&cfil_lck_rw);
1985	socket_lock(so, `1`);
1986
1987	do {
1988	error = cfil_acquire_sockbuf(so, cfil_info, `1`);
1989	if (error == `0`)
1990	error = cfil_data_service_ctl_q(so, cfil_info, kcunit, `1`);
1991	cfil_release_sockbuf(so, `1`);
1992	if (error != `0`)
1993	break;
1994
1995	error = cfil_acquire_sockbuf(so, cfil_info, `0`);
1996	if (error == `0`)
1997	error = cfil_data_service_ctl_q(so, cfil_info, kcunit, `0`);
1998	cfil_release_sockbuf(so, `0`);
1999	} while (`0`);
2000
2001	socket_lock_assert_owned(so);
2002	socket_unlock(so, `1`);
2003
2004	cfil_rw_lock_shared(&cfil_lck_rw);
2005	}
2006	done:
2007	cfil_rw_unlock_shared(&cfil_lck_rw);
2008	}
2009
2010	void
2011	cfil_init(void)
2012	{
2013	struct kern_ctl_reg kern_ctl;
2014	errno_t error = `0`;
2015	vm_size_t content_filter_size = `0`; / size of content_filter /
2016	vm_size_t cfil_info_size = `0`; / size of cfil_info /
2017	vm_size_t cfil_hash_entry_size = `0`; / size of cfil_hash_entry /
2018	vm_size_t cfil_db_size = `0`; / size of cfil_db /
2019	unsigned int mbuf_limit = `0`;
2020
2021	CFIL_LOG(LOG_NOTICE, "");
2022
2023	/*
2024	* Compile time verifications
2025	*/
2026	_CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2027	_CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == `0`);
2028	_CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == `0`);
2029	_CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == `0`);
2030
2031	/*
2032	* Runtime time verifications
2033	*/
2034	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2035	sizeof(uint32_t)));
2036	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2037	sizeof(uint32_t)));
2038	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2039	sizeof(uint32_t)));
2040	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2041	sizeof(uint32_t)));
2042
2043	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2044	sizeof(uint32_t)));
2045	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2046	sizeof(uint32_t)));
2047
2048	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2049	sizeof(uint32_t)));
2050	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2051	sizeof(uint32_t)));
2052	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2053	sizeof(uint32_t)));
2054	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2055	sizeof(uint32_t)));
2056
2057	/*
2058	* Zone for content filters kernel control sockets
2059	*/
2060	content_filter_size = sizeof(struct content_filter);
2061	content_filter_zone = zinit(content_filter_size,
2062	CONTENT_FILTER_ZONE_MAX * content_filter_size,
2063	`0`,
2064	CONTENT_FILTER_ZONE_NAME);
2065	if (content_filter_zone == NULL) {
2066	panic("%s: zinit(%s) failed", __func__,
2067	CONTENT_FILTER_ZONE_NAME);
2068	/ NOTREACHED /
2069	}
2070	zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
2071	zone_change(content_filter_zone, Z_EXPAND, TRUE);
2072
2073	/*
2074	* Zone for per socket content filters
2075	*/
2076	cfil_info_size = sizeof(struct cfil_info);
2077	cfil_info_zone = zinit(cfil_info_size,
2078	CFIL_INFO_ZONE_MAX * cfil_info_size,
2079	`0`,
2080	CFIL_INFO_ZONE_NAME);
2081	if (cfil_info_zone == NULL) {
2082	panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
2083	/ NOTREACHED /
2084	}
2085	zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
2086	zone_change(cfil_info_zone, Z_EXPAND, TRUE);
2087
2088	/*
2089	* Zone for content filters cfil hash entries and db
2090	*/
2091	cfil_hash_entry_size = sizeof(struct cfil_hash_entry);
2092	cfil_hash_entry_zone = zinit(cfil_hash_entry_size,
2093	CFIL_HASH_ENTRY_ZONE_MAX * cfil_hash_entry_size,
2094	`0`,
2095	CFIL_HASH_ENTRY_ZONE_NAME);
2096	if (cfil_hash_entry_zone == NULL) {
2097	panic("%s: zinit(%s) failed", __func__, CFIL_HASH_ENTRY_ZONE_NAME);
2098	/ NOTREACHED /
2099	}
2100	zone_change(cfil_hash_entry_zone, Z_CALLERACCT, FALSE);
2101	zone_change(cfil_hash_entry_zone, Z_EXPAND, TRUE);
2102
2103	cfil_db_size = sizeof(struct cfil_db);
2104	cfil_db_zone = zinit(cfil_db_size,
2105	CFIL_DB_ZONE_MAX * cfil_db_size,
2106	`0`,
2107	CFIL_DB_ZONE_NAME);
2108	if (cfil_db_zone == NULL) {
2109	panic("%s: zinit(%s) failed", __func__, CFIL_DB_ZONE_NAME);
2110	/ NOTREACHED /
2111	}
2112	zone_change(cfil_db_zone, Z_CALLERACCT, FALSE);
2113	zone_change(cfil_db_zone, Z_EXPAND, TRUE);
2114
2115	/*
2116	* Allocate locks
2117	*/
2118	cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2119	if (cfil_lck_grp_attr == NULL) {
2120	panic("%s: lck_grp_attr_alloc_init failed", __func__);
2121	/ NOTREACHED /
2122	}
2123	cfil_lck_grp = lck_grp_alloc_init("content filter",
2124	cfil_lck_grp_attr);
2125	if (cfil_lck_grp == NULL) {
2126	panic("%s: lck_grp_alloc_init failed", __func__);
2127	/ NOTREACHED /
2128	}
2129	cfil_lck_attr = lck_attr_alloc_init();
2130	if (cfil_lck_attr == NULL) {
2131	panic("%s: lck_attr_alloc_init failed", __func__);
2132	/ NOTREACHED /
2133	}
2134	lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2135
2136	TAILQ_INIT(&cfil_sock_head);
2137
2138	/*
2139	* Register kernel control
2140	*/
2141	bzero(&kern_ctl, sizeof(kern_ctl));
2142	strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2143	sizeof(kern_ctl.ctl_name));
2144	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED \| CTL_FLAG_REG_EXTENDED;
2145	kern_ctl.ctl_sendsize = `512` * `1024`; / enough? /
2146	kern_ctl.ctl_recvsize = `512` * `1024`; / enough? /
2147	kern_ctl.ctl_connect = cfil_ctl_connect;
2148	kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2149	kern_ctl.ctl_send = cfil_ctl_send;
2150	kern_ctl.ctl_getopt = cfil_ctl_getopt;
2151	kern_ctl.ctl_setopt = cfil_ctl_setopt;
2152	kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2153	error = ctl_register(&kern_ctl, &cfil_kctlref);
2154	if (error != `0`) {
2155	CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2156	return;
2157	}
2158
2159	// Spawn thread for gargage collection
2160	if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2161	&cfil_udp_gc_thread) != KERN_SUCCESS) {
2162	panic_plain("%s: Can't create UDP GC thread", __func__);
2163	/ NOTREACHED /
2164	}
2165	/ this must not fail /
2166	VERIFY(cfil_udp_gc_thread != NULL);
2167
2168	// Set UDP per-flow mbuf thresholds to 1/32 of platform max
2169	mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2170	cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2171	cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2172	}
2173
2174	struct cfil_info *
2175	cfil_info_alloc(struct socket so, struct* cfil_hash_entry *hash_entry)
2176	{
2177	int kcunit;
2178	struct cfil_info *cfil_info = NULL;
2179	struct inpcb *inp = sotoinpcb(so);
2180
2181	CFIL_LOG(LOG_INFO, "");
2182
2183	socket_lock_assert_owned(so);
2184
2185	cfil_info = zalloc(cfil_info_zone);
2186	if (cfil_info == NULL)
2187	goto done;
2188	bzero(cfil_info, sizeof(struct cfil_info));
2189
2190	cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2191	cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2192
2193	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2194	struct cfil_entry *entry;
2195
2196	entry = &cfil_info->cfi_entries[kcunit - `1`];
2197	entry->cfe_cfil_info = cfil_info;
2198
2199	/ Initialize the filter entry /
2200	entry->cfe_filter = NULL;
2201	entry->cfe_flags = `0`;
2202	entry->cfe_necp_control_unit = `0`;
2203	entry->cfe_snd.cfe_pass_offset = `0`;
2204	entry->cfe_snd.cfe_peek_offset = `0`;
2205	entry->cfe_snd.cfe_peeked = `0`;
2206	entry->cfe_rcv.cfe_pass_offset = `0`;
2207	entry->cfe_rcv.cfe_peek_offset = `0`;
2208	entry->cfe_rcv.cfe_peeked = `0`;
2209	/*
2210	* Timestamp the last action to avoid pre-maturely
2211	* triggering garbage collection
2212	*/
2213	microuptime(&entry->cfe_last_action);
2214
2215	cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2216	cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2217	cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2218	cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2219	}
2220
2221	cfil_rw_lock_exclusive(&cfil_lck_rw);
2222
2223	/*
2224	* Create a cfi_sock_id that's not the socket pointer!
2225	*/
2226
2227	if (hash_entry == NULL) {
2228	// This is the TCP case, cfil_info is tracked per socket
2229	if (inp->inp_flowhash == `0`)
2230	inp->inp_flowhash = inp_calc_flowhash(inp);
2231
2232	so->so_cfil = cfil_info;
2233	cfil_info->cfi_so = so;
2234	cfil_info->cfi_sock_id =
2235	((so->so_gencnt << `32`) \| inp->inp_flowhash);
2236	} else {
2237	// This is the UDP case, cfil_info is tracked in per-socket hash
2238	cfil_info->cfi_so = so;
2239	hash_entry->cfentry_cfil = cfil_info;
2240	cfil_info->cfi_hash_entry = hash_entry;
2241	cfil_info->cfi_sock_id = ((so->so_gencnt << `32`) \| (hash_entry->cfentry_flowhash & `0xffffffff`));
2242	CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2243	inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2244
2245	// Wake up gc thread if this is first flow added
2246	if (cfil_sock_udp_attached_count == `0`) {
2247	thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2248	}
2249
2250	cfil_sock_udp_attached_count++;
2251	}
2252
2253	TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2254
2255	cfil_sock_attached_count++;
2256
2257	cfil_rw_unlock_exclusive(&cfil_lck_rw);
2258
2259	done:
2260	if (cfil_info != NULL)
2261	OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2262	else
2263	OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2264
2265	return (cfil_info);
2266	}
2267
2268	int
2269	cfil_info_attach_unit(struct socket so, uint32_t filter_control_unit, struct* cfil_info *cfil_info)
2270	{
2271	int kcunit;
2272	int attached = `0`;
2273
2274	CFIL_LOG(LOG_INFO, "");
2275
2276	socket_lock_assert_owned(so);
2277
2278	cfil_rw_lock_exclusive(&cfil_lck_rw);
2279
2280	for (kcunit = `1`;
2281	content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2282	kcunit++) {
2283	struct content_filter *cfc = content_filters[kcunit - `1`];
2284	struct cfil_entry *entry;
2285
2286	if (cfc == NULL)
2287	continue;
2288	if (cfc->cf_necp_control_unit != filter_control_unit)
2289	continue;
2290
2291	entry = &cfil_info->cfi_entries[kcunit - `1`];
2292
2293	entry->cfe_filter = cfc;
2294	entry->cfe_necp_control_unit = filter_control_unit;
2295	TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2296	cfc->cf_sock_count++;
2297	verify_content_filter(cfc);
2298	attached = `1`;
2299	entry->cfe_flags \|= CFEF_CFIL_ATTACHED;
2300	break;
2301	}
2302
2303	cfil_rw_unlock_exclusive(&cfil_lck_rw);
2304
2305	return (attached);
2306	}
2307
2308	static void
2309	cfil_info_free(struct cfil_info *cfil_info)
2310	{
2311	int kcunit;
2312	uint64_t in_drain = `0`;
2313	uint64_t out_drained = `0`;
2314
2315	if (cfil_info == NULL)
2316	return;
2317
2318	CFIL_LOG(LOG_INFO, "");
2319
2320	cfil_rw_lock_exclusive(&cfil_lck_rw);
2321
2322	for (kcunit = `1`;
2323	content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2324	kcunit++) {
2325	struct cfil_entry *entry;
2326	struct content_filter *cfc;
2327
2328	entry = &cfil_info->cfi_entries[kcunit - `1`];
2329
2330	/ Don't be silly and try to detach twice /
2331	if (entry->cfe_filter == NULL)
2332	continue;
2333
2334	cfc = content_filters[kcunit - `1`];
2335
2336	VERIFY(cfc == entry->cfe_filter);
2337
2338	entry->cfe_filter = NULL;
2339	entry->cfe_necp_control_unit = `0`;
2340	TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2341	cfc->cf_sock_count--;
2342
2343	verify_content_filter(cfc);
2344	}
2345	if (cfil_info->cfi_hash_entry != NULL)
2346	cfil_sock_udp_attached_count--;
2347	cfil_sock_attached_count--;
2348	TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2349
2350	out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2351	in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2352
2353	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2354	struct cfil_entry *entry;
2355
2356	entry = &cfil_info->cfi_entries[kcunit - `1`];
2357	out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2358	in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2359	out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2360	in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2361	}
2362	cfil_rw_unlock_exclusive(&cfil_lck_rw);
2363
2364	if (out_drained)
2365	OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2366	if (in_drain)
2367	OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2368
2369	zfree(cfil_info_zone, cfil_info);
2370	}
2371
2372	/*
2373	* Entry point from Sockets layer
2374	* The socket is locked.
2375	*/
2376	errno_t
2377	cfil_sock_attach(struct socket *so)
2378	{
2379	errno_t error = `0`;
2380	uint32_t filter_control_unit;
2381
2382	socket_lock_assert_owned(so);
2383
2384	/ Limit ourselves to TCP that are not MPTCP subflows /
2385	if ((so->so_proto->pr_domain->dom_family != PF_INET &&
2386	so->so_proto->pr_domain->dom_family != PF_INET6) \|\|
2387	so->so_proto->pr_type != SOCK_STREAM \|\|
2388	so->so_proto->pr_protocol != IPPROTO_TCP \|\|
2389	(so->so_flags & SOF_MP_SUBFLOW) != `0` \|\|
2390	(so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != `0`)
2391	goto done;
2392
2393	filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2394	if (filter_control_unit == `0`)
2395	goto done;
2396
2397	if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != `0`) {
2398	OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2399	goto done;
2400	}
2401	if (cfil_active_count == `0`) {
2402	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2403	goto done;
2404	}
2405	if (so->so_cfil != NULL) {
2406	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2407	CFIL_LOG(LOG_ERR, "already attached");
2408	} else {
2409	cfil_info_alloc(so, NULL);
2410	if (so->so_cfil == NULL) {
2411	error = ENOMEM;
2412	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2413	goto done;
2414	}
2415	}
2416	if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == `0`) {
2417	CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2418	filter_control_unit);
2419	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2420	goto done;
2421	}
2422	CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2423	(uint64_t)VM_KERNEL_ADDRPERM(so),
2424	filter_control_unit, so->so_cfil->cfi_sock_id);
2425
2426	so->so_flags \|= SOF_CONTENT_FILTER;
2427	OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2428
2429	/ Hold a reference on the socket /
2430	so->so_usecount++;
2431
2432	error = cfil_dispatch_attach_event(so, so->so_cfil, filter_control_unit);
2433	/ We can recover from flow control or out of memory errors /
2434	if (error == ENOBUFS \|\| error == ENOMEM)
2435	error = `0`;
2436	else if (error != `0`)
2437	goto done;
2438
2439	CFIL_INFO_VERIFY(so->so_cfil);
2440	done:
2441	return (error);
2442	}
2443
2444	/*
2445	* Entry point from Sockets layer
2446	* The socket is locked.
2447	*/
2448	errno_t
2449	cfil_sock_detach(struct socket *so)
2450	{
2451	if (IS_UDP(so)) {
2452	cfil_db_free(so);
2453	return (`0`);
2454	}
2455
2456	if (so->so_cfil) {
2457	if (so->so_flags & SOF_CONTENT_FILTER) {
2458	so->so_flags &= ~SOF_CONTENT_FILTER;
2459	VERIFY(so->so_usecount > `0`);
2460	so->so_usecount--;
2461	}
2462	cfil_info_free(so->so_cfil);
2463	so->so_cfil = NULL;
2464	OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2465	}
2466	return (`0`);
2467	}
2468
2469	static int
2470	cfil_dispatch_attach_event(struct socket so, struct* cfil_info *cfil_info, uint32_t filter_control_unit)
2471	{
2472	errno_t error = `0`;
2473	struct cfil_entry *entry = NULL;
2474	struct cfil_msg_sock_attached msg_attached;
2475	uint32_t kcunit;
2476	struct content_filter *cfc = NULL;
2477
2478	socket_lock_assert_owned(so);
2479
2480	cfil_rw_lock_shared(&cfil_lck_rw);
2481
2482	if (so->so_proto == NULL \|\| so->so_proto->pr_domain == NULL) {
2483	error = EINVAL;
2484	goto done;
2485	}
2486	/*
2487	* Find the matching filter unit
2488	*/
2489	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2490	cfc = content_filters[kcunit - `1`];
2491
2492	if (cfc == NULL)
2493	continue;
2494	if (cfc->cf_necp_control_unit != filter_control_unit)
2495	continue;
2496	entry = &cfil_info->cfi_entries[kcunit - `1`];
2497	if (entry->cfe_filter == NULL)
2498	continue;
2499
2500	VERIFY(cfc == entry->cfe_filter);
2501
2502	break;
2503	}
2504
2505	if (entry == NULL \|\| entry->cfe_filter == NULL)
2506	goto done;
2507
2508	if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED))
2509	goto done;
2510
2511	CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
2512	(uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit, kcunit);
2513
2514	/ Would be wasteful to try when flow controlled /
2515	if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2516	error = ENOBUFS;
2517	goto done;
2518	}
2519
2520	bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
2521	msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
2522	msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
2523	msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
2524	msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
2525	msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2526
2527	msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
2528	msg_attached.cfs_sock_type = so->so_proto->pr_type;
2529	msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
2530	msg_attached.cfs_pid = so->last_pid;
2531	memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
2532	if (so->so_flags & SOF_DELEGATED) {
2533	msg_attached.cfs_e_pid = so->e_pid;
2534	memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
2535	} else {
2536	msg_attached.cfs_e_pid = so->last_pid;
2537	memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
2538	}
2539
2540	#if LIFECYCLE_DEBUG
2541	CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
2542	entry->cfe_cfil_info->cfi_sock_id);
2543	#endif
2544
2545	error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2546	entry->cfe_filter->cf_kcunit,
2547	&msg_attached,
2548	sizeof(struct cfil_msg_sock_attached),
2549	CTL_DATA_EOR);
2550	if (error != `0`) {
2551	CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
2552	goto done;
2553	}
2554	microuptime(&entry->cfe_last_event);
2555	cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
2556	cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
2557
2558	entry->cfe_flags \|= CFEF_SENT_SOCK_ATTACHED;
2559	OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
2560	done:
2561
2562	/ We can recover from flow control /
2563	if (error == ENOBUFS) {
2564	entry->cfe_flags \|= CFEF_FLOW_CONTROLLED;
2565	OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
2566
2567	if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2568	cfil_rw_lock_exclusive(&cfil_lck_rw);
2569
2570	cfc->cf_flags \|= CFF_FLOW_CONTROLLED;
2571
2572	cfil_rw_unlock_exclusive(&cfil_lck_rw);
2573	} else {
2574	if (error != `0`)
2575	OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
2576
2577	cfil_rw_unlock_shared(&cfil_lck_rw);
2578	}
2579	return (error);
2580	}
2581
2582	static int
2583	cfil_dispatch_disconnect_event(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing)
2584	{
2585	errno_t error = `0`;
2586	struct mbuf *msg = NULL;
2587	struct cfil_entry *entry;
2588	struct cfe_buf *entrybuf;
2589	struct cfil_msg_hdr msg_disconnected;
2590	struct content_filter *cfc;
2591
2592	socket_lock_assert_owned(so);
2593
2594	cfil_rw_lock_shared(&cfil_lck_rw);
2595
2596	entry = &cfil_info->cfi_entries[kcunit - `1`];
2597	if (outgoing)
2598	entrybuf = &entry->cfe_snd;
2599	else
2600	entrybuf = &entry->cfe_rcv;
2601
2602	cfc = entry->cfe_filter;
2603	if (cfc == NULL)
2604	goto done;
2605
2606	CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2607	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2608
2609	/*
2610	* Send the disconnection event once
2611	*/
2612	if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) \|\|
2613	(!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
2614	CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
2615	(uint64_t)VM_KERNEL_ADDRPERM(so));
2616	goto done;
2617	}
2618
2619	/*
2620	* We're not disconnected as long as some data is waiting
2621	* to be delivered to the filter
2622	*/
2623	if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == `0`) {
2624	CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
2625	(uint64_t)VM_KERNEL_ADDRPERM(so));
2626	error = EBUSY;
2627	goto done;
2628	}
2629	/ Would be wasteful to try when flow controlled /
2630	if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2631	error = ENOBUFS;
2632	goto done;
2633	}
2634
2635	#if LIFECYCLE_DEBUG
2636	cfil_info_log(LOG_ERR, cfil_info, outgoing ?
2637	"CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
2638	"CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
2639	#endif
2640
2641	bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
2642	msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
2643	msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
2644	msg_disconnected.cfm_type = CFM_TYPE_EVENT;
2645	msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
2646	CFM_OP_DISCONNECT_IN;
2647	msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2648	error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2649	entry->cfe_filter->cf_kcunit,
2650	&msg_disconnected,
2651	sizeof(struct cfil_msg_hdr),
2652	CTL_DATA_EOR);
2653	if (error != `0`) {
2654	CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
2655	mbuf_freem(msg);
2656	goto done;
2657	}
2658	microuptime(&entry->cfe_last_event);
2659	CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
2660
2661	/ Remember we have sent the disconnection message /
2662	if (outgoing) {
2663	entry->cfe_flags \|= CFEF_SENT_DISCONNECT_OUT;
2664	OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
2665	} else {
2666	entry->cfe_flags \|= CFEF_SENT_DISCONNECT_IN;
2667	OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
2668	}
2669	done:
2670	if (error == ENOBUFS) {
2671	entry->cfe_flags \|= CFEF_FLOW_CONTROLLED;
2672	OSIncrementAtomic(
2673	&cfil_stats.cfs_disconnect_event_flow_control);
2674
2675	if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2676	cfil_rw_lock_exclusive(&cfil_lck_rw);
2677
2678	cfc->cf_flags \|= CFF_FLOW_CONTROLLED;
2679
2680	cfil_rw_unlock_exclusive(&cfil_lck_rw);
2681	} else {
2682	if (error != `0`)
2683	OSIncrementAtomic(
2684	&cfil_stats.cfs_disconnect_event_fail);
2685
2686	cfil_rw_unlock_shared(&cfil_lck_rw);
2687	}
2688	return (error);
2689	}
2690
2691	int
2692	cfil_dispatch_closed_event(struct socket so, struct* cfil_info cfil_info, int* kcunit)
2693	{
2694	struct cfil_entry *entry;
2695	struct cfil_msg_sock_closed msg_closed;
2696	errno_t error = `0`;
2697	struct content_filter *cfc;
2698
2699	socket_lock_assert_owned(so);
2700
2701	cfil_rw_lock_shared(&cfil_lck_rw);
2702
2703	entry = &cfil_info->cfi_entries[kcunit - `1`];
2704	cfc = entry->cfe_filter;
2705	if (cfc == NULL)
2706	goto done;
2707
2708	CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
2709	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
2710
2711	/ Would be wasteful to try when flow controlled /
2712	if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2713	error = ENOBUFS;
2714	goto done;
2715	}
2716	/*
2717	* Send a single closed message per filter
2718	*/
2719	if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != `0`)
2720	goto done;
2721	if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == `0`)
2722	goto done;
2723
2724	microuptime(&entry->cfe_last_event);
2725	CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
2726
2727	bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
2728	msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
2729	msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
2730	msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
2731	msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
2732	msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2733	msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
2734	msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
2735	memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t)*CFI_MAX_TIME_LOG_ENTRY);
2736	memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char)*CFI_MAX_TIME_LOG_ENTRY);
2737	msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
2738
2739	#if LIFECYCLE_DEBUG
2740	CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
2741	#endif
2742	/ for debugging*
2743	if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
2744	msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
2745	}
2746	for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
2747	CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
2748	}
2749	*/
2750
2751	error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2752	entry->cfe_filter->cf_kcunit,
2753	&msg_closed,
2754	sizeof(struct cfil_msg_sock_closed),
2755	CTL_DATA_EOR);
2756	if (error != `0`) {
2757	CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
2758	error);
2759	goto done;
2760	}
2761
2762	entry->cfe_flags \|= CFEF_SENT_SOCK_CLOSED;
2763	OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
2764	done:
2765	/ We can recover from flow control /
2766	if (error == ENOBUFS) {
2767	entry->cfe_flags \|= CFEF_FLOW_CONTROLLED;
2768	OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
2769
2770	if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2771	cfil_rw_lock_exclusive(&cfil_lck_rw);
2772
2773	cfc->cf_flags \|= CFF_FLOW_CONTROLLED;
2774
2775	cfil_rw_unlock_exclusive(&cfil_lck_rw);
2776	} else {
2777	if (error != `0`)
2778	OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
2779
2780	cfil_rw_unlock_shared(&cfil_lck_rw);
2781	}
2782
2783	return (error);
2784	}
2785
2786	static void
2787	fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
2788	struct in6_addr *ip6, u_int16_t port)
2789	{
2790	struct sockaddr_in6 *sin6 = &sin46->sin6;
2791
2792	sin6->sin6_family = AF_INET6;
2793	sin6->sin6_len = sizeof(*sin6);
2794	sin6->sin6_port = port;
2795	sin6->sin6_addr = *ip6;
2796	if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
2797	sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[`1`]);
2798	sin6->sin6_addr.s6_addr16[`1`] = `0`;
2799	}
2800	}
2801
2802	static void
2803	fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
2804	struct in_addr ip, u_int16_t port)
2805	{
2806	struct sockaddr_in *sin = &sin46->sin;
2807
2808	sin->sin_family = AF_INET;
2809	sin->sin_len = sizeof(*sin);
2810	sin->sin_port = port;
2811	sin->sin_addr.s_addr = ip.s_addr;
2812	}
2813
2814	static void
2815	cfil_get_flow_address_v6(struct cfil_hash_entry entry, struct* inpcb *inp,
2816	struct in6_addr laddr, struct in6_addr faddr,
2817	u_int16_t lport, u_int16_t fport)
2818	{
2819	if (entry != NULL) {
2820	*laddr = &entry->cfentry_laddr.addr6;
2821	*faddr = &entry->cfentry_faddr.addr6;
2822	*lport = entry->cfentry_lport;
2823	*fport = entry->cfentry_fport;
2824	} else {
2825	*laddr = &inp->in6p_laddr;
2826	*faddr = &inp->in6p_faddr;
2827	*lport = inp->inp_lport;
2828	*fport = inp->inp_fport;
2829	}
2830	}
2831
2832	static void
2833	cfil_get_flow_address(struct cfil_hash_entry entry, struct* inpcb *inp,
2834	struct in_addr laddr, struct* in_addr *faddr,
2835	u_int16_t lport, u_int16_t fport)
2836	{
2837	if (entry != NULL) {
2838	*laddr = entry->cfentry_laddr.addr46.ia46_addr4;
2839	*faddr = entry->cfentry_faddr.addr46.ia46_addr4;
2840	*lport = entry->cfentry_lport;
2841	*fport = entry->cfentry_fport;
2842	} else {
2843	*laddr = inp->inp_laddr;
2844	*faddr = inp->inp_faddr;
2845	*lport = inp->inp_lport;
2846	*fport = inp->inp_fport;
2847	}
2848	}
2849
2850	static int
2851	cfil_dispatch_data_event(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing,
2852	struct mbuf data, unsigned* int copyoffset, unsigned int copylen)
2853	{
2854	errno_t error = `0`;
2855	struct mbuf *copy = NULL;
2856	struct mbuf *msg = NULL;
2857	unsigned int one = `1`;
2858	struct cfil_msg_data_event *data_req;
2859	size_t hdrsize;
2860	struct inpcb inp = (struct* inpcb *)so->so_pcb;
2861	struct cfil_entry *entry;
2862	struct cfe_buf *entrybuf;
2863	struct content_filter *cfc;
2864	struct timeval tv;
2865
2866	cfil_rw_lock_shared(&cfil_lck_rw);
2867
2868	entry = &cfil_info->cfi_entries[kcunit - `1`];
2869	if (outgoing)
2870	entrybuf = &entry->cfe_snd;
2871	else
2872	entrybuf = &entry->cfe_rcv;
2873
2874	cfc = entry->cfe_filter;
2875	if (cfc == NULL)
2876	goto done;
2877
2878	data = cfil_data_start(data);
2879	if (data == NULL \|\| (data->m_flags & M_PKTHDR) == `0`) {
2880	CFIL_LOG(LOG_ERR, "NOT PKTHDR");
2881	goto done;
2882	}
2883
2884	CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2885	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2886
2887	socket_lock_assert_owned(so);
2888
2889	/ Would be wasteful to try /
2890	if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2891	error = ENOBUFS;
2892	goto done;
2893	}
2894
2895	/ Make a copy of the data to pass to kernel control socket /
2896	copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
2897	M_COPYM_NOOP_HDR);
2898	if (copy == NULL) {
2899	CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
2900	error = ENOMEM;
2901	goto done;
2902	}
2903
2904	/ We need an mbuf packet for the message header /
2905	hdrsize = sizeof(struct cfil_msg_data_event);
2906	error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
2907	if (error != `0`) {
2908	CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
2909	m_freem(copy);
2910	/*
2911	* ENOBUFS is to indicate flow control
2912	*/
2913	error = ENOMEM;
2914	goto done;
2915	}
2916	mbuf_setlen(msg, hdrsize);
2917	mbuf_pkthdr_setlen(msg, hdrsize + copylen);
2918	msg->m_next = copy;
2919	data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
2920	bzero(data_req, hdrsize);
2921	data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
2922	data_req->cfd_msghdr.cfm_version = `1`;
2923	data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
2924	data_req->cfd_msghdr.cfm_op =
2925	outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
2926	data_req->cfd_msghdr.cfm_sock_id =
2927	entry->cfe_cfil_info->cfi_sock_id;
2928	data_req->cfd_start_offset = entrybuf->cfe_peeked;
2929	data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
2930
2931	/*
2932	* TBD:
2933	* For non connected sockets need to copy addresses from passed
2934	* parameters
2935	*/
2936	if (inp->inp_vflag & INP_IPV6) {
2937	struct in6_addr laddr = NULL, faddr = NULL;
2938	u_int16_t lport = `0`, fport = `0`;
2939
2940	cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2941	&laddr, &faddr, &lport, &fport);
2942	if (outgoing) {
2943	fill_ip6_sockaddr_4_6(&data_req->cfc_src, laddr, lport);
2944	fill_ip6_sockaddr_4_6(&data_req->cfc_dst, faddr, fport);
2945	} else {
2946	fill_ip6_sockaddr_4_6(&data_req->cfc_src, faddr, fport);
2947	fill_ip6_sockaddr_4_6(&data_req->cfc_dst, laddr, lport);
2948	}
2949	} else if (inp->inp_vflag & INP_IPV4) {
2950	struct in_addr laddr = {`0`}, faddr = {`0`};
2951	u_int16_t lport = `0`, fport = `0`;
2952
2953	cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2954	&laddr, &faddr, &lport, &fport);
2955
2956	if (outgoing) {
2957	fill_ip_sockaddr_4_6(&data_req->cfc_src, laddr, lport);
2958	fill_ip_sockaddr_4_6(&data_req->cfc_dst, faddr, fport);
2959	} else {
2960	fill_ip_sockaddr_4_6(&data_req->cfc_src, faddr, fport);
2961	fill_ip_sockaddr_4_6(&data_req->cfc_dst, laddr, lport);
2962	}
2963	}
2964
2965	microuptime(&tv);
2966	CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
2967
2968	/ Pass the message to the content filter /
2969	error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
2970	entry->cfe_filter->cf_kcunit,
2971	msg, CTL_DATA_EOR);
2972	if (error != `0`) {
2973	CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
2974	mbuf_freem(msg);
2975	goto done;
2976	}
2977	entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
2978	OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
2979
2980	#if VERDICT_DEBUG
2981	CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
2982	(uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
2983	#endif
2984
2985	done:
2986	if (error == ENOBUFS) {
2987	entry->cfe_flags \|= CFEF_FLOW_CONTROLLED;
2988	OSIncrementAtomic(
2989	&cfil_stats.cfs_data_event_flow_control);
2990
2991	if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2992	cfil_rw_lock_exclusive(&cfil_lck_rw);
2993
2994	cfc->cf_flags \|= CFF_FLOW_CONTROLLED;
2995
2996	cfil_rw_unlock_exclusive(&cfil_lck_rw);
2997	} else {
2998	if (error != `0`)
2999	OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3000
3001	cfil_rw_unlock_shared(&cfil_lck_rw);
3002	}
3003	return (error);
3004	}
3005
3006	/*
3007	* Process the queue of data waiting to be delivered to content filter
3008	*/
3009	static int
3010	cfil_data_service_ctl_q(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing)
3011	{
3012	errno_t error = `0`;
3013	struct mbuf data, tmp = NULL;
3014	unsigned int datalen = `0`, copylen = `0`, copyoffset = `0`;
3015	struct cfil_entry *entry;
3016	struct cfe_buf *entrybuf;
3017	uint64_t currentoffset = `0`;
3018
3019	if (cfil_info == NULL)
3020	return (`0`);
3021
3022	CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3023	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3024
3025	socket_lock_assert_owned(so);
3026
3027	entry = &cfil_info->cfi_entries[kcunit - `1`];
3028	if (outgoing)
3029	entrybuf = &entry->cfe_snd;
3030	else
3031	entrybuf = &entry->cfe_rcv;
3032
3033	/ Send attached message if not yet done /
3034	if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == `0`) {
3035	error = cfil_dispatch_attach_event(so, cfil_info, kcunit);
3036	if (error != `0`) {
3037	/ We can recover from flow control /
3038	if (error == ENOBUFS \|\| error == ENOMEM)
3039	error = `0`;
3040	goto done;
3041	}
3042	} else if ((entry->cfe_flags & CFEF_DATA_START) == `0`) {
3043	OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3044	goto done;
3045	}
3046
3047	#if DATA_DEBUG
3048	CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3049	entrybuf->cfe_pass_offset,
3050	entrybuf->cfe_peeked,
3051	entrybuf->cfe_peek_offset);
3052	#endif
3053
3054	/ Move all data that can pass /
3055	while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3056	entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3057	datalen = cfil_data_length(data, NULL, NULL);
3058	tmp = data;
3059
3060	if (entrybuf->cfe_ctl_q.q_start + datalen <=
3061	entrybuf->cfe_pass_offset) {
3062	/*
3063	* The first mbuf can fully pass
3064	*/
3065	copylen = datalen;
3066	} else {
3067	/*
3068	* The first mbuf can partially pass
3069	*/
3070	copylen = entrybuf->cfe_pass_offset -
3071	entrybuf->cfe_ctl_q.q_start;
3072	}
3073	VERIFY(copylen <= datalen);
3074
3075	#if DATA_DEBUG
3076	CFIL_LOG(LOG_DEBUG,
3077	"CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3078	"datalen %u copylen %u",
3079	(uint64_t)VM_KERNEL_ADDRPERM(tmp),
3080	entrybuf->cfe_ctl_q.q_start,
3081	entrybuf->cfe_peeked,
3082	entrybuf->cfe_pass_offset,
3083	entrybuf->cfe_peek_offset,
3084	datalen, copylen);
3085	#endif
3086
3087	/*
3088	* Data that passes has been peeked at explicitly or
3089	* implicitly
3090	*/
3091	if (entrybuf->cfe_ctl_q.q_start + copylen >
3092	entrybuf->cfe_peeked)
3093	entrybuf->cfe_peeked =
3094	entrybuf->cfe_ctl_q.q_start + copylen;
3095	/*
3096	* Stop on partial pass
3097	*/
3098	if (copylen < datalen)
3099	break;
3100
3101	/ All good, move full data from ctl queue to pending queue /
3102	cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3103
3104	cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3105	if (outgoing)
3106	OSAddAtomic64(datalen,
3107	&cfil_stats.cfs_pending_q_out_enqueued);
3108	else
3109	OSAddAtomic64(datalen,
3110	&cfil_stats.cfs_pending_q_in_enqueued);
3111	}
3112	CFIL_INFO_VERIFY(cfil_info);
3113	if (tmp != NULL)
3114	CFIL_LOG(LOG_DEBUG,
3115	"%llx first %llu peeked %llu pass %llu peek %llu"
3116	"datalen %u copylen %u",
3117	(uint64_t)VM_KERNEL_ADDRPERM(tmp),
3118	entrybuf->cfe_ctl_q.q_start,
3119	entrybuf->cfe_peeked,
3120	entrybuf->cfe_pass_offset,
3121	entrybuf->cfe_peek_offset,
3122	datalen, copylen);
3123	tmp = NULL;
3124
3125	/ Now deal with remaining data the filter wants to peek at /
3126	for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3127	currentoffset = entrybuf->cfe_ctl_q.q_start;
3128	data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3129	data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3130	currentoffset += datalen) {
3131	datalen = cfil_data_length(data, NULL, NULL);
3132	tmp = data;
3133
3134	/ We've already peeked at this mbuf /
3135	if (currentoffset + datalen <= entrybuf->cfe_peeked)
3136	continue;
3137	/*
3138	* The data in the first mbuf may have been
3139	* partially peeked at
3140	*/
3141	copyoffset = entrybuf->cfe_peeked - currentoffset;
3142	VERIFY(copyoffset < datalen);
3143	copylen = datalen - copyoffset;
3144	VERIFY(copylen <= datalen);
3145	/*
3146	* Do not copy more than needed
3147	*/
3148	if (currentoffset + copyoffset + copylen >
3149	entrybuf->cfe_peek_offset) {
3150	copylen = entrybuf->cfe_peek_offset -
3151	(currentoffset + copyoffset);
3152	}
3153
3154	#if DATA_DEBUG
3155	CFIL_LOG(LOG_DEBUG,
3156	"CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3157	"datalen %u copylen %u copyoffset %u",
3158	(uint64_t)VM_KERNEL_ADDRPERM(tmp),
3159	currentoffset,
3160	entrybuf->cfe_peeked,
3161	entrybuf->cfe_pass_offset,
3162	entrybuf->cfe_peek_offset,
3163	datalen, copylen, copyoffset);
3164	#endif
3165
3166	/*
3167	* Stop if there is nothing more to peek at
3168	*/
3169	if (copylen == `0`)
3170	break;
3171	/*
3172	* Let the filter get a peek at this span of data
3173	*/
3174	error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3175	outgoing, data, copyoffset, copylen);
3176	if (error != `0`) {
3177	/ On error, leave data in ctl_q /
3178	break;
3179	}
3180	entrybuf->cfe_peeked += copylen;
3181	if (outgoing)
3182	OSAddAtomic64(copylen,
3183	&cfil_stats.cfs_ctl_q_out_peeked);
3184	else
3185	OSAddAtomic64(copylen,
3186	&cfil_stats.cfs_ctl_q_in_peeked);
3187
3188	/ Stop when data could not be fully peeked at /
3189	if (copylen + copyoffset < datalen)
3190	break;
3191	}
3192	CFIL_INFO_VERIFY(cfil_info);
3193	if (tmp != NULL)
3194	CFIL_LOG(LOG_DEBUG,
3195	"%llx first %llu peeked %llu pass %llu peek %llu"
3196	"datalen %u copylen %u copyoffset %u",
3197	(uint64_t)VM_KERNEL_ADDRPERM(tmp),
3198	currentoffset,
3199	entrybuf->cfe_peeked,
3200	entrybuf->cfe_pass_offset,
3201	entrybuf->cfe_peek_offset,
3202	datalen, copylen, copyoffset);
3203
3204	/*
3205	* Process data that has passed the filter
3206	*/
3207	error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3208	if (error != `0`) {
3209	CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3210	error);
3211	goto done;
3212	}
3213
3214	/*
3215	* Dispatch disconnect events that could not be sent
3216	*/
3217	if (cfil_info == NULL)
3218	goto done;
3219	else if (outgoing) {
3220	if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
3221	!(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT))
3222	cfil_dispatch_disconnect_event(so, cfil_info, kcunit, `1`);
3223	} else {
3224	if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
3225	!(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))
3226	cfil_dispatch_disconnect_event(so, cfil_info, kcunit, `0`);
3227	}
3228
3229	done:
3230	CFIL_LOG(LOG_DEBUG,
3231	"first %llu peeked %llu pass %llu peek %llu",
3232	entrybuf->cfe_ctl_q.q_start,
3233	entrybuf->cfe_peeked,
3234	entrybuf->cfe_pass_offset,
3235	entrybuf->cfe_peek_offset);
3236
3237	CFIL_INFO_VERIFY(cfil_info);
3238	return (error);
3239	}
3240
3241	/*
3242	* cfil_data_filter()
3243	*
3244	* Process data for a content filter installed on a socket
3245	*/
3246	int
3247	cfil_data_filter(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing,
3248	struct mbuf *data, uint64_t datalen)
3249	{
3250	errno_t error = `0`;
3251	struct cfil_entry *entry;
3252	struct cfe_buf *entrybuf;
3253
3254	CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3255	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3256
3257	socket_lock_assert_owned(so);
3258
3259	entry = &cfil_info->cfi_entries[kcunit - `1`];
3260	if (outgoing)
3261	entrybuf = &entry->cfe_snd;
3262	else
3263	entrybuf = &entry->cfe_rcv;
3264
3265	/ Are we attached to the filter? /
3266	if (entry->cfe_filter == NULL) {
3267	error = `0`;
3268	goto done;
3269	}
3270
3271	/ Dispatch to filters /
3272	cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
3273	if (outgoing)
3274	OSAddAtomic64(datalen,
3275	&cfil_stats.cfs_ctl_q_out_enqueued);
3276	else
3277	OSAddAtomic64(datalen,
3278	&cfil_stats.cfs_ctl_q_in_enqueued);
3279
3280	error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
3281	if (error != `0`) {
3282	CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
3283	error);
3284	}
3285	/*
3286	* We have to return EJUSTRETURN in all cases to avoid double free
3287	* by socket layer
3288	*/
3289	error = EJUSTRETURN;
3290	done:
3291	CFIL_INFO_VERIFY(cfil_info);
3292
3293	CFIL_LOG(LOG_INFO, "return %d", error);
3294	return (error);
3295	}
3296
3297	/*
3298	* cfil_service_inject_queue() re-inject data that passed the
3299	* content filters
3300	*/
3301	static int
3302	cfil_service_inject_queue(struct socket so, struct* cfil_info cfil_info, int* outgoing)
3303	{
3304	mbuf_t data;
3305	unsigned int datalen;
3306	int mbcnt = `0`;
3307	int mbnum = `0`;
3308	errno_t error = `0`;
3309	struct cfi_buf *cfi_buf;
3310	struct cfil_queue *inject_q;
3311	int need_rwakeup = `0`;
3312	int count = `0`;
3313
3314	if (cfil_info == NULL)
3315	return (`0`);
3316
3317	socket_lock_assert_owned(so);
3318
3319	if (outgoing) {
3320	cfi_buf = &cfil_info->cfi_snd;
3321	cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
3322	} else {
3323	cfi_buf = &cfil_info->cfi_rcv;
3324	cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
3325	}
3326	inject_q = &cfi_buf->cfi_inject_q;
3327
3328	if (cfil_queue_empty(inject_q))
3329	return (`0`);
3330
3331	#if DATA_DEBUG \| VERDICT_DEBUG
3332	CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
3333	(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
3334	#endif
3335
3336	while ((data = cfil_queue_first(inject_q)) != NULL) {
3337	datalen = cfil_data_length(data, &mbcnt, &mbnum);
3338
3339	#if DATA_DEBUG
3340	CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE INJECT-Q: <%s>: <so %llx> data %llx datalen %u (mbcnt %u)",
3341	remote_addr_ptr ? "UNCONNECTED" : "CONNECTED",
3342	(uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
3343	#endif
3344
3345	/ Remove data from queue and adjust stats /
3346	cfil_queue_remove(inject_q, data, datalen);
3347	cfi_buf->cfi_pending_first += datalen;
3348	cfi_buf->cfi_pending_mbcnt -= mbcnt;
3349	cfi_buf->cfi_pending_mbnum -= mbnum;
3350	cfil_info_buf_verify(cfi_buf);
3351
3352	if (outgoing) {
3353	error = sosend_reinject(so, NULL, data, NULL, `0`);
3354	if (error != `0`) {
3355	#if DATA_DEBUG
3356	cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
3357	CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
3358	#endif
3359	break;
3360	}
3361	// At least one injection succeeded, need to wake up pending threads.
3362	need_rwakeup = `1`;
3363	} else {
3364	data->m_flags \|= M_SKIPCFIL;
3365
3366	/*
3367	* NOTE: We currently only support TCP and UDP.
3368	* For RAWIP, MPTCP and message TCP we'll
3369	* need to call the appropriate sbappendxxx()
3370	* of fix sock_inject_data_in()
3371	*/
3372	if (IS_UDP(so) == TRUE) {
3373	if (sbappendchain(&so->so_rcv, data, `0`))
3374	need_rwakeup = `1`;
3375	} else {
3376	if (sbappendstream(&so->so_rcv, data))
3377	need_rwakeup = `1`;
3378	}
3379	}
3380
3381	if (outgoing)
3382	OSAddAtomic64(datalen,
3383	&cfil_stats.cfs_inject_q_out_passed);
3384	else
3385	OSAddAtomic64(datalen,
3386	&cfil_stats.cfs_inject_q_in_passed);
3387
3388	count++;
3389	}
3390
3391	#if DATA_DEBUG \| VERDICT_DEBUG
3392	CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
3393	(uint64_t)VM_KERNEL_ADDRPERM(so), count);
3394	#endif
3395
3396	/ A single wakeup is for several packets is more efficient /
3397	if (need_rwakeup) {
3398	if (outgoing == TRUE)
3399	sowwakeup(so);
3400	else
3401	sorwakeup(so);
3402	}
3403
3404	if (error != `0` && cfil_info) {
3405	if (error == ENOBUFS)
3406	OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
3407	if (error == ENOMEM)
3408	OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
3409
3410	if (outgoing) {
3411	cfil_info->cfi_flags \|= CFIF_RETRY_INJECT_OUT;
3412	OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
3413	} else {
3414	cfil_info->cfi_flags \|= CFIF_RETRY_INJECT_IN;
3415	OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
3416	}
3417	}
3418
3419	/*
3420	* Notify
3421	*/
3422	if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
3423	cfil_sock_notify_shutdown(so, SHUT_WR);
3424	if (cfil_sock_data_pending(&so->so_snd) == `0`)
3425	soshutdownlock_final(so, SHUT_WR);
3426	}
3427	if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
3428	if (cfil_filters_attached(so) == `0`) {
3429	CFIL_LOG(LOG_INFO, "so %llx waking",
3430	(uint64_t)VM_KERNEL_ADDRPERM(so));
3431	wakeup((caddr_t)cfil_info);
3432	}
3433	}
3434
3435	CFIL_INFO_VERIFY(cfil_info);
3436
3437	return (error);
3438	}
3439
3440	static int
3441	cfil_service_pending_queue(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing)
3442	{
3443	uint64_t passlen, curlen;
3444	mbuf_t data;
3445	unsigned int datalen;
3446	errno_t error = `0`;
3447	struct cfil_entry *entry;
3448	struct cfe_buf *entrybuf;
3449	struct cfil_queue *pending_q;
3450
3451	CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3452	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3453
3454	socket_lock_assert_owned(so);
3455
3456	entry = &cfil_info->cfi_entries[kcunit - `1`];
3457	if (outgoing)
3458	entrybuf = &entry->cfe_snd;
3459	else
3460	entrybuf = &entry->cfe_rcv;
3461
3462	pending_q = &entrybuf->cfe_pending_q;
3463
3464	passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
3465
3466	/*
3467	* Locate the chunks of data that we can pass to the next filter
3468	* A data chunk must be on mbuf boundaries
3469	*/
3470	curlen = `0`;
3471	while ((data = cfil_queue_first(pending_q)) != NULL) {
3472	datalen = cfil_data_length(data, NULL, NULL);
3473
3474	#if DATA_DEBUG
3475	CFIL_LOG(LOG_DEBUG,
3476	"CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
3477	(uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
3478	passlen, curlen);
3479	#endif
3480
3481	if (curlen + datalen > passlen)
3482	break;
3483
3484	cfil_queue_remove(pending_q, data, datalen);
3485
3486	curlen += datalen;
3487
3488	for (kcunit += `1`;
3489	kcunit <= MAX_CONTENT_FILTER;
3490	kcunit++) {
3491	error = cfil_data_filter(so, cfil_info, kcunit, outgoing,
3492	data, datalen);
3493	/ 0 means passed so we can continue /
3494	if (error != `0`)
3495	break;
3496	}
3497	/ When data has passed all filters, re-inject /
3498	if (error == `0`) {
3499	if (outgoing) {
3500	cfil_queue_enqueue(
3501	&cfil_info->cfi_snd.cfi_inject_q,
3502	data, datalen);
3503	OSAddAtomic64(datalen,
3504	&cfil_stats.cfs_inject_q_out_enqueued);
3505	} else {
3506	cfil_queue_enqueue(
3507	&cfil_info->cfi_rcv.cfi_inject_q,
3508	data, datalen);
3509	OSAddAtomic64(datalen,
3510	&cfil_stats.cfs_inject_q_in_enqueued);
3511	}
3512	}
3513	}
3514
3515	CFIL_INFO_VERIFY(cfil_info);
3516
3517	return (error);
3518	}
3519
3520	int
3521	cfil_update_data_offsets(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing,
3522	uint64_t pass_offset, uint64_t peek_offset)
3523	{
3524	errno_t error = `0`;
3525	struct cfil_entry *entry = NULL;
3526	struct cfe_buf *entrybuf;
3527	int updated = `0`;
3528
3529	CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
3530
3531	socket_lock_assert_owned(so);
3532
3533	if (cfil_info == NULL) {
3534	CFIL_LOG(LOG_ERR, "so %llx cfil detached",
3535	(uint64_t)VM_KERNEL_ADDRPERM(so));
3536	error = `0`;
3537	goto done;
3538	} else if (cfil_info->cfi_flags & CFIF_DROP) {
3539	CFIL_LOG(LOG_ERR, "so %llx drop set",
3540	(uint64_t)VM_KERNEL_ADDRPERM(so));
3541	error = EPIPE;
3542	goto done;
3543	}
3544
3545	entry = &cfil_info->cfi_entries[kcunit - `1`];
3546	if (outgoing)
3547	entrybuf = &entry->cfe_snd;
3548	else
3549	entrybuf = &entry->cfe_rcv;
3550
3551	/ Record updated offsets for this content filter /
3552	if (pass_offset > entrybuf->cfe_pass_offset) {
3553	entrybuf->cfe_pass_offset = pass_offset;
3554
3555	if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset)
3556	entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
3557	updated = `1`;
3558	} else {
3559	CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
3560	pass_offset, entrybuf->cfe_pass_offset);
3561	}
3562	/ Filter does not want or need to see data that's allowed to pass /
3563	if (peek_offset > entrybuf->cfe_pass_offset &&
3564	peek_offset > entrybuf->cfe_peek_offset) {
3565	entrybuf->cfe_peek_offset = peek_offset;
3566	updated = `1`;
3567	}
3568	/ Nothing to do /
3569	if (updated == `0`)
3570	goto done;
3571
3572	/ Move data held in control queue to pending queue if needed /
3573	error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
3574	if (error != `0`) {
3575	CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
3576	error);
3577	goto done;
3578	}
3579	error = EJUSTRETURN;
3580
3581	done:
3582	/*
3583	* The filter is effectively detached when pass all from both sides
3584	* or when the socket is closed and no more data is waiting
3585	* to be delivered to the filter
3586	*/
3587	if (entry != NULL &&
3588	((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
3589	entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) \|\|
3590	((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
3591	cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
3592	cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
3593	entry->cfe_flags \|= CFEF_CFIL_DETACHED;
3594	#if LIFECYCLE_DEBUG
3595	cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3596	"CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
3597	"CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
3598	#endif
3599	CFIL_LOG(LOG_INFO, "so %llx detached %u",
3600	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3601	if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
3602	cfil_filters_attached(so) == `0`) {
3603	#if LIFECYCLE_DEBUG
3604	cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
3605	#endif
3606	CFIL_LOG(LOG_INFO, "so %llx waking",
3607	(uint64_t)VM_KERNEL_ADDRPERM(so));
3608	wakeup((caddr_t)cfil_info);
3609	}
3610	}
3611	CFIL_INFO_VERIFY(cfil_info);
3612	CFIL_LOG(LOG_INFO, "return %d", error);
3613	return (error);
3614	}
3615
3616	/*
3617	* Update pass offset for socket when no data is pending
3618	*/
3619	static int
3620	cfil_set_socket_pass_offset(struct socket so, struct* cfil_info cfil_info, int* outgoing)
3621	{
3622	struct cfi_buf *cfi_buf;
3623	struct cfil_entry *entry;
3624	struct cfe_buf *entrybuf;
3625	uint32_t kcunit;
3626	uint64_t pass_offset = `0`;
3627
3628	if (cfil_info == NULL)
3629	return (`0`);
3630
3631	CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
3632	(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
3633
3634	socket_lock_assert_owned(so);
3635
3636	if (outgoing)
3637	cfi_buf = &cfil_info->cfi_snd;
3638	else
3639	cfi_buf = &cfil_info->cfi_rcv;
3640
3641	CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
3642	(uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
3643	cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
3644
3645	if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == `0`) {
3646	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3647	entry = &cfil_info->cfi_entries[kcunit - `1`];
3648
3649	/ Are we attached to a filter? /
3650	if (entry->cfe_filter == NULL)
3651	continue;
3652
3653	if (outgoing)
3654	entrybuf = &entry->cfe_snd;
3655	else
3656	entrybuf = &entry->cfe_rcv;
3657
3658	if (pass_offset == `0` \|\|
3659	entrybuf->cfe_pass_offset < pass_offset)
3660	pass_offset = entrybuf->cfe_pass_offset;
3661	}
3662	cfi_buf->cfi_pass_offset = pass_offset;
3663	}
3664
3665	CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
3666	(uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
3667
3668	return (`0`);
3669	}
3670
3671	int
3672	cfil_action_data_pass(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing,
3673	uint64_t pass_offset, uint64_t peek_offset)
3674	{
3675	errno_t error = `0`;
3676
3677	CFIL_LOG(LOG_INFO, "");
3678
3679	socket_lock_assert_owned(so);
3680
3681	error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
3682	if (error != `0`) {
3683	CFIL_LOG(LOG_INFO, "so %llx %s dropped",
3684	(uint64_t)VM_KERNEL_ADDRPERM(so),
3685	outgoing ? "out" : "in");
3686	goto release;
3687	}
3688
3689	error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
3690	pass_offset, peek_offset);
3691
3692	cfil_service_inject_queue(so, cfil_info, outgoing);
3693
3694	cfil_set_socket_pass_offset(so, cfil_info, outgoing);
3695	release:
3696	CFIL_INFO_VERIFY(cfil_info);
3697	cfil_release_sockbuf(so, outgoing);
3698
3699	return (error);
3700	}
3701
3702
3703	static void
3704	cfil_flush_queues(struct socket so, struct* cfil_info *cfil_info)
3705	{
3706	struct cfil_entry *entry;
3707	int kcunit;
3708	uint64_t drained;
3709
3710	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| cfil_info == NULL)
3711	goto done;
3712
3713	socket_lock_assert_owned(so);
3714
3715	/*
3716	* Flush the output queues and ignore errors as long as
3717	* we are attached
3718	*/
3719	(void) cfil_acquire_sockbuf(so, cfil_info, `1`);
3720	if (cfil_info != NULL) {
3721	drained = `0`;
3722	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3723	entry = &cfil_info->cfi_entries[kcunit - `1`];
3724
3725	drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
3726	drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
3727	}
3728	drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
3729
3730	if (drained) {
3731	if (cfil_info->cfi_flags & CFIF_DROP)
3732	OSIncrementAtomic(
3733	&cfil_stats.cfs_flush_out_drop);
3734	else
3735	OSIncrementAtomic(
3736	&cfil_stats.cfs_flush_out_close);
3737	}
3738	}
3739	cfil_release_sockbuf(so, `1`);
3740
3741	/*
3742	* Flush the input queues
3743	*/
3744	(void) cfil_acquire_sockbuf(so, cfil_info, `0`);
3745	if (cfil_info != NULL) {
3746	drained = `0`;
3747	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3748	entry = &cfil_info->cfi_entries[kcunit - `1`];
3749
3750	drained += cfil_queue_drain(
3751	&entry->cfe_rcv.cfe_ctl_q);
3752	drained += cfil_queue_drain(
3753	&entry->cfe_rcv.cfe_pending_q);
3754	}
3755	drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
3756
3757	if (drained) {
3758	if (cfil_info->cfi_flags & CFIF_DROP)
3759	OSIncrementAtomic(
3760	&cfil_stats.cfs_flush_in_drop);
3761	else
3762	OSIncrementAtomic(
3763	&cfil_stats.cfs_flush_in_close);
3764	}
3765	}
3766	cfil_release_sockbuf(so, `0`);
3767	done:
3768	CFIL_INFO_VERIFY(cfil_info);
3769	}
3770
3771	int
3772	cfil_action_drop(struct socket so, struct* cfil_info *cfil_info, uint32_t kcunit)
3773	{
3774	errno_t error = `0`;
3775	struct cfil_entry *entry;
3776	struct proc *p;
3777
3778	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| cfil_info == NULL)
3779	goto done;
3780
3781	socket_lock_assert_owned(so);
3782
3783	entry = &cfil_info->cfi_entries[kcunit - `1`];
3784
3785	/ Are we attached to the filter? /
3786	if (entry->cfe_filter == NULL)
3787	goto done;
3788
3789	cfil_info->cfi_flags \|= CFIF_DROP;
3790
3791	p = current_proc();
3792
3793	/*
3794	* Force the socket to be marked defunct
3795	* (forcing fixed along with rdar://19391339)
3796	*/
3797	if (so->so_cfil_db == NULL) {
3798	error = sosetdefunct(p, so,
3799	SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER \| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
3800	FALSE);
3801
3802	/ Flush the socket buffer and disconnect /
3803	if (error == `0`)
3804	error = sodefunct(p, so,
3805	SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER \| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
3806	}
3807
3808	/ The filter is done, mark as detached /
3809	entry->cfe_flags \|= CFEF_CFIL_DETACHED;
3810	#if LIFECYCLE_DEBUG
3811	cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
3812	#endif
3813	CFIL_LOG(LOG_INFO, "so %llx detached %u",
3814	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3815
3816	/ Pending data needs to go /
3817	cfil_flush_queues(so, cfil_info);
3818
3819	if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
3820	if (cfil_filters_attached(so) == `0`) {
3821	CFIL_LOG(LOG_INFO, "so %llx waking",
3822	(uint64_t)VM_KERNEL_ADDRPERM(so));
3823	wakeup((caddr_t)cfil_info);
3824	}
3825	}
3826	done:
3827	return (error);
3828	}
3829
3830	int
3831	cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
3832	{
3833	errno_t error = `0`;
3834	struct cfil_info *cfil_info = NULL;
3835
3836	bool cfil_attached = false;
3837	struct cfil_msg_bless_client blessmsg = (struct* cfil_msg_bless_client *)msghdr;
3838
3839	// Search and lock socket
3840	struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
3841	if (so == NULL) {
3842	error = ENOENT;
3843	} else {
3844	// The client gets a pass automatically
3845	cfil_info = (so->so_cfil_db != NULL) ?
3846	cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
3847
3848	if (cfil_attached) {
3849	#if VERDICT_DEBUG
3850	if (cfil_info != NULL) {
3851	CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
3852	cfil_info->cfi_hash_entry ? "UDP" : "TCP",
3853	(uint64_t)VM_KERNEL_ADDRPERM(so),
3854	cfil_info->cfi_sock_id);
3855	}
3856	#endif
3857	(void)cfil_action_data_pass(so, cfil_info, kcunit, `1`, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
3858	(void)cfil_action_data_pass(so, cfil_info, kcunit, `0`, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
3859	} else {
3860	so->so_flags1 \|= SOF1_CONTENT_FILTER_SKIP;
3861	}
3862	socket_unlock(so, `1`);
3863	}
3864
3865	return (error);
3866	}
3867
3868	static int
3869	cfil_update_entry_offsets(struct socket so, struct* cfil_info cfil_info, int* outgoing, unsigned int datalen)
3870	{
3871	struct cfil_entry *entry;
3872	struct cfe_buf *entrybuf;
3873	uint32_t kcunit;
3874
3875	CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
3876	(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
3877
3878	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3879	entry = &cfil_info->cfi_entries[kcunit - `1`];
3880
3881	/ Are we attached to the filter? /
3882	if (entry->cfe_filter == NULL)
3883	continue;
3884
3885	if (outgoing)
3886	entrybuf = &entry->cfe_snd;
3887	else
3888	entrybuf = &entry->cfe_rcv;
3889
3890	entrybuf->cfe_ctl_q.q_start += datalen;
3891	entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
3892	entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
3893	if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset)
3894	entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
3895
3896	entrybuf->cfe_ctl_q.q_end += datalen;
3897
3898	entrybuf->cfe_pending_q.q_start += datalen;
3899	entrybuf->cfe_pending_q.q_end += datalen;
3900	}
3901	CFIL_INFO_VERIFY(cfil_info);
3902	return (`0`);
3903	}
3904
3905	int
3906	cfil_data_common(struct socket so, struct* cfil_info cfil_info, int* outgoing, struct sockaddr *to,
3907	struct mbuf data, struct* mbuf *control, uint32_t flags)
3908	{
3909	#pragma unused(to, control, flags)
3910	errno_t error = `0`;
3911	unsigned int datalen;
3912	int mbcnt = `0`;
3913	int mbnum = `0`;
3914	int kcunit;
3915	struct cfi_buf *cfi_buf;
3916	struct mbuf *chain = NULL;
3917
3918	if (cfil_info == NULL) {
3919	CFIL_LOG(LOG_ERR, "so %llx cfil detached",
3920	(uint64_t)VM_KERNEL_ADDRPERM(so));
3921	error = `0`;
3922	goto done;
3923	} else if (cfil_info->cfi_flags & CFIF_DROP) {
3924	CFIL_LOG(LOG_ERR, "so %llx drop set",
3925	(uint64_t)VM_KERNEL_ADDRPERM(so));
3926	error = EPIPE;
3927	goto done;
3928	}
3929
3930	datalen = cfil_data_length(data, &mbcnt, &mbnum);
3931
3932	if (outgoing)
3933	cfi_buf = &cfil_info->cfi_snd;
3934	else
3935	cfi_buf = &cfil_info->cfi_rcv;
3936
3937	cfi_buf->cfi_pending_last += datalen;
3938	cfi_buf->cfi_pending_mbcnt += mbcnt;
3939	cfi_buf->cfi_pending_mbnum += mbnum;
3940
3941	if (IS_UDP(so)) {
3942	if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max \|\|
3943	cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
3944	cfi_buf->cfi_tail_drop_cnt++;
3945	cfi_buf->cfi_pending_mbcnt -= mbcnt;
3946	cfi_buf->cfi_pending_mbnum -= mbnum;
3947	return (EPIPE);
3948	}
3949	}
3950
3951	cfil_info_buf_verify(cfi_buf);
3952
3953	#if DATA_DEBUG
3954	CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
3955	(uint64_t)VM_KERNEL_ADDRPERM(so),
3956	outgoing ? "OUT" : "IN",
3957	(uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
3958	(uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
3959	cfi_buf->cfi_pending_last,
3960	cfi_buf->cfi_pending_mbcnt,
3961	cfi_buf->cfi_pass_offset);
3962	#endif
3963
3964	/ Fast path when below pass offset /
3965	if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
3966	cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
3967	#if DATA_DEBUG
3968	CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
3969	#endif
3970	} else {
3971	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3972	// Is cfil attached to this filter?
3973	if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
3974	if (IS_UDP(so)) {
3975	/ UDP only:*
3976	* Chain addr (incoming only TDB), control (optional) and data into one chain.
3977	* This full chain will be reinjected into socket after recieving verdict.
3978	*/
3979	(void) cfil_udp_save_socket_state(cfil_info, data);
3980	chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
3981	if (chain == NULL) {
3982	return (ENOBUFS);
3983	}
3984	data = chain;
3985	}
3986	error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
3987	datalen);
3988	}
3989	/ 0 means passed so continue with next filter /
3990	if (error != `0`)
3991	break;
3992	}
3993	}
3994
3995	/ Move cursor if no filter claimed the data /
3996	if (error == `0`) {
3997	cfi_buf->cfi_pending_first += datalen;
3998	cfi_buf->cfi_pending_mbcnt -= mbcnt;
3999	cfi_buf->cfi_pending_mbnum -= mbnum;
4000	cfil_info_buf_verify(cfi_buf);
4001	}
4002	done:
4003	CFIL_INFO_VERIFY(cfil_info);
4004
4005	return (error);
4006	}
4007
4008	/*
4009	* Callback from socket layer sosendxxx()
4010	*/
4011	int
4012	cfil_sock_data_out(struct socket so, struct* sockaddr *to,
4013	struct mbuf data, struct* mbuf *control, uint32_t flags)
4014	{
4015	int error = `0`;
4016
4017	if (IS_UDP(so)) {
4018	return (cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags));
4019	}
4020
4021	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL)
4022	return (`0`);
4023
4024	socket_lock_assert_owned(so);
4025
4026	if (so->so_cfil->cfi_flags & CFIF_DROP) {
4027	CFIL_LOG(LOG_ERR, "so %llx drop set",
4028	(uint64_t)VM_KERNEL_ADDRPERM(so));
4029	return (EPIPE);
4030	}
4031	if (control != NULL) {
4032	CFIL_LOG(LOG_ERR, "so %llx control",
4033	(uint64_t)VM_KERNEL_ADDRPERM(so));
4034	OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4035	}
4036	if ((flags & MSG_OOB)) {
4037	CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4038	(uint64_t)VM_KERNEL_ADDRPERM(so));
4039	OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4040	}
4041	if ((so->so_snd.sb_flags & SB_LOCK) == `0`)
4042	panic("so %p SB_LOCK not set", so);
4043
4044	if (so->so_snd.sb_cfil_thread != NULL)
4045	panic("%s sb_cfil_thread %p not NULL", __func__,
4046	so->so_snd.sb_cfil_thread);
4047
4048	error = cfil_data_common(so, so->so_cfil, `1`, to, data, control, flags);
4049
4050	return (error);
4051	}
4052
4053	/*
4054	* Callback from socket layer sbappendxxx()
4055	*/
4056	int
4057	cfil_sock_data_in(struct socket so, struct* sockaddr *from,
4058	struct mbuf data, struct* mbuf *control, uint32_t flags)
4059	{
4060	int error = `0`;
4061
4062	if (IS_UDP(so)) {
4063	return (cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags));
4064	}
4065
4066	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL)
4067	return (`0`);
4068
4069	socket_lock_assert_owned(so);
4070
4071	if (so->so_cfil->cfi_flags & CFIF_DROP) {
4072	CFIL_LOG(LOG_ERR, "so %llx drop set",
4073	(uint64_t)VM_KERNEL_ADDRPERM(so));
4074	return (EPIPE);
4075	}
4076	if (control != NULL) {
4077	CFIL_LOG(LOG_ERR, "so %llx control",
4078	(uint64_t)VM_KERNEL_ADDRPERM(so));
4079	OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
4080	}
4081	if (data->m_type == MT_OOBDATA) {
4082	CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4083	(uint64_t)VM_KERNEL_ADDRPERM(so));
4084	OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
4085	}
4086	error = cfil_data_common(so, so->so_cfil, `0`, from, data, control, flags);
4087
4088	return (error);
4089	}
4090
4091	/*
4092	* Callback from socket layer soshutdownxxx()
4093	*
4094	* We may delay the shutdown write if there's outgoing data in process.
4095	*
4096	* There is no point in delaying the shutdown read because the process
4097	* indicated that it does not want to read anymore data.
4098	*/
4099	int
4100	cfil_sock_shutdown(struct socket so, int* *how)
4101	{
4102	int error = `0`;
4103
4104	if (IS_UDP(so)) {
4105	return (cfil_sock_udp_shutdown(so, how));
4106	}
4107
4108	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL)
4109	goto done;
4110
4111	socket_lock_assert_owned(so);
4112
4113	CFIL_LOG(LOG_INFO, "so %llx how %d",
4114	(uint64_t)VM_KERNEL_ADDRPERM(so), *how);
4115
4116	/*
4117	* Check the state of the socket before the content filter
4118	*/
4119	if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != `0`) {
4120	/ read already shut down /
4121	error = ENOTCONN;
4122	goto done;
4123	}
4124	if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != `0`) {
4125	/ write already shut down /
4126	error = ENOTCONN;
4127	goto done;
4128	}
4129
4130	if ((so->so_cfil->cfi_flags & CFIF_DROP) != `0`) {
4131	CFIL_LOG(LOG_ERR, "so %llx drop set",
4132	(uint64_t)VM_KERNEL_ADDRPERM(so));
4133	goto done;
4134	}
4135
4136	/*
4137	* shutdown read: SHUT_RD or SHUT_RDWR
4138	*/
4139	if (*how != SHUT_WR) {
4140	if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
4141	error = ENOTCONN;
4142	goto done;
4143	}
4144	so->so_cfil->cfi_flags \|= CFIF_SHUT_RD;
4145	cfil_sock_notify_shutdown(so, SHUT_RD);
4146	}
4147	/*
4148	* shutdown write: SHUT_WR or SHUT_RDWR
4149	*/
4150	if (*how != SHUT_RD) {
4151	if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
4152	error = ENOTCONN;
4153	goto done;
4154	}
4155	so->so_cfil->cfi_flags \|= CFIF_SHUT_WR;
4156	cfil_sock_notify_shutdown(so, SHUT_WR);
4157	/*
4158	* When outgoing data is pending, we delay the shutdown at the
4159	* protocol level until the content filters give the final
4160	* verdict on the pending data.
4161	*/
4162	if (cfil_sock_data_pending(&so->so_snd) != `0`) {
4163	/*
4164	* When shutting down the read and write sides at once
4165	* we can proceed to the final shutdown of the read
4166	* side. Otherwise, we just return.
4167	*/
4168	if (*how == SHUT_WR) {
4169	error = EJUSTRETURN;
4170	} else if (*how == SHUT_RDWR) {
4171	*how = SHUT_RD;
4172	}
4173	}
4174	}
4175	done:
4176	return (error);
4177	}
4178
4179	/*
4180	* This is called when the socket is closed and there is no more
4181	* opportunity for filtering
4182	*/
4183	void
4184	cfil_sock_is_closed(struct socket *so)
4185	{
4186	errno_t error = `0`;
4187	int kcunit;
4188
4189	if (IS_UDP(so)) {
4190	cfil_sock_udp_is_closed(so);
4191	return;
4192	}
4193
4194	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL)
4195	return;
4196
4197	CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4198
4199	socket_lock_assert_owned(so);
4200
4201	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4202	/ Let the filters know of the closing /
4203	error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
4204	}
4205
4206	/ Last chance to push passed data out /
4207	error = cfil_acquire_sockbuf(so, so->so_cfil, `1`);
4208	if (error == `0`)
4209	cfil_service_inject_queue(so, so->so_cfil, `1`);
4210	cfil_release_sockbuf(so, `1`);
4211
4212	so->so_cfil->cfi_flags \|= CFIF_SOCK_CLOSED;
4213
4214	/ Pending data needs to go /
4215	cfil_flush_queues(so, so->so_cfil);
4216
4217	CFIL_INFO_VERIFY(so->so_cfil);
4218	}
4219
4220	/*
4221	* This is called when the socket is disconnected so let the filters
4222	* know about the disconnection and that no more data will come
4223	*
4224	* The how parameter has the same values as soshutown()
4225	*/
4226	void
4227	cfil_sock_notify_shutdown(struct socket so, int* how)
4228	{
4229	errno_t error = `0`;
4230	int kcunit;
4231
4232	if (IS_UDP(so)) {
4233	cfil_sock_udp_notify_shutdown(so, how, `0`, `0`);
4234	return;
4235	}
4236
4237	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL)
4238	return;
4239
4240	CFIL_LOG(LOG_INFO, "so %llx how %d",
4241	(uint64_t)VM_KERNEL_ADDRPERM(so), how);
4242
4243	socket_lock_assert_owned(so);
4244
4245	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4246	/ Disconnect incoming side /
4247	if (how != SHUT_WR)
4248	error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, `0`);
4249	/ Disconnect outgoing side /
4250	if (how != SHUT_RD)
4251	error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, `1`);
4252	}
4253	}
4254
4255	static int
4256	cfil_filters_attached(struct socket *so)
4257	{
4258	struct cfil_entry *entry;
4259	uint32_t kcunit;
4260	int attached = `0`;
4261
4262	if (IS_UDP(so)) {
4263	return cfil_filters_udp_attached(so, FALSE);
4264	}
4265
4266	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL)
4267	return (`0`);
4268
4269	socket_lock_assert_owned(so);
4270
4271	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4272	entry = &so->so_cfil->cfi_entries[kcunit - `1`];
4273
4274	/ Are we attached to the filter? /
4275	if (entry->cfe_filter == NULL)
4276	continue;
4277	if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == `0`)
4278	continue;
4279	if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != `0`)
4280	continue;
4281	attached = `1`;
4282	break;
4283	}
4284
4285	return (attached);
4286	}
4287
4288	/*
4289	* This is called when the socket is closed and we are waiting for
4290	* the filters to gives the final pass or drop
4291	*/
4292	void
4293	cfil_sock_close_wait(struct socket *so)
4294	{
4295	lck_mtx_t *mutex_held;
4296	struct timespec ts;
4297	int error;
4298
4299	if (IS_UDP(so)) {
4300	cfil_sock_udp_close_wait(so);
4301	return;
4302	}
4303
4304	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL)
4305	return;
4306
4307	CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4308
4309	if (so->so_proto->pr_getlock != NULL)
4310	mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
4311	else
4312	mutex_held = so->so_proto->pr_domain->dom_mtx;
4313	LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
4314
4315	while (cfil_filters_attached(so)) {
4316	/*
4317	* Notify the filters we are going away so they can detach
4318	*/
4319	cfil_sock_notify_shutdown(so, SHUT_RDWR);
4320
4321	/*
4322	* Make sure we need to wait after the filter are notified
4323	* of the disconnection
4324	*/
4325	if (cfil_filters_attached(so) == `0`)
4326	break;
4327
4328	CFIL_LOG(LOG_INFO, "so %llx waiting",
4329	(uint64_t)VM_KERNEL_ADDRPERM(so));
4330
4331	ts.tv_sec = cfil_close_wait_timeout / `1000`;
4332	ts.tv_nsec = (cfil_close_wait_timeout % `1000`) *
4333	NSEC_PER_USEC * `1000`;
4334
4335	OSIncrementAtomic(&cfil_stats.cfs_close_wait);
4336	so->so_cfil->cfi_flags \|= CFIF_CLOSE_WAIT;
4337	error = msleep((caddr_t)so->so_cfil, mutex_held,
4338	PSOCK \| PCATCH, "cfil_sock_close_wait", &ts);
4339	so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
4340
4341	CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
4342	(uint64_t)VM_KERNEL_ADDRPERM(so), (error != `0`));
4343
4344	/*
4345	* Force close in case of timeout
4346	*/
4347	if (error != `0`) {
4348	OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
4349	break;
4350	}
4351	}
4352
4353	}
4354
4355	/*
4356	* Returns the size of the data held by the content filter by using
4357	*/
4358	int32_t
4359	cfil_sock_data_pending(struct sockbuf *sb)
4360	{
4361	struct socket *so = sb->sb_so;
4362	uint64_t pending = `0`;
4363
4364	if (IS_UDP(so)) {
4365	return (cfil_sock_udp_data_pending(sb, FALSE));
4366	}
4367
4368	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_cfil != NULL) {
4369	struct cfi_buf *cfi_buf;
4370
4371	socket_lock_assert_owned(so);
4372
4373	if ((sb->sb_flags & SB_RECV) == `0`)
4374	cfi_buf = &so->so_cfil->cfi_snd;
4375	else
4376	cfi_buf = &so->so_cfil->cfi_rcv;
4377
4378	pending = cfi_buf->cfi_pending_last -
4379	cfi_buf->cfi_pending_first;
4380
4381	/*
4382	* If we are limited by the "chars of mbufs used" roughly
4383	* adjust so we won't overcommit
4384	*/
4385	if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt)
4386	pending = cfi_buf->cfi_pending_mbcnt;
4387	}
4388
4389	VERIFY(pending < INT32_MAX);
4390
4391	return (int32_t)(pending);
4392	}
4393
4394	/*
4395	* Return the socket buffer space used by data being held by content filters
4396	* so processes won't clog the socket buffer
4397	*/
4398	int32_t
4399	cfil_sock_data_space(struct sockbuf *sb)
4400	{
4401	struct socket *so = sb->sb_so;
4402	uint64_t pending = `0`;
4403
4404	if (IS_UDP(so)) {
4405	return (cfil_sock_udp_data_pending(sb, TRUE));
4406	}
4407
4408	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_cfil != NULL &&
4409	so->so_snd.sb_cfil_thread != current_thread()) {
4410	struct cfi_buf *cfi_buf;
4411
4412	socket_lock_assert_owned(so);
4413
4414	if ((sb->sb_flags & SB_RECV) == `0`)
4415	cfi_buf = &so->so_cfil->cfi_snd;
4416	else
4417	cfi_buf = &so->so_cfil->cfi_rcv;
4418
4419	pending = cfi_buf->cfi_pending_last -
4420	cfi_buf->cfi_pending_first;
4421
4422	/*
4423	* If we are limited by the "chars of mbufs used" roughly
4424	* adjust so we won't overcommit
4425	*/
4426	if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending)
4427	pending = cfi_buf->cfi_pending_mbcnt;
4428	}
4429
4430	VERIFY(pending < INT32_MAX);
4431
4432	return (int32_t)(pending);
4433	}
4434
4435	/*
4436	* A callback from the socket and protocol layer when data becomes
4437	* available in the socket buffer to give a chance for the content filter
4438	* to re-inject data that was held back
4439	*/
4440	void
4441	cfil_sock_buf_update(struct sockbuf *sb)
4442	{
4443	int outgoing;
4444	int error;
4445	struct socket *so = sb->sb_so;
4446
4447	if (IS_UDP(so)) {
4448	cfil_sock_udp_buf_update(sb);
4449	return;
4450	}
4451
4452	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL)
4453	return;
4454
4455	if (!cfil_sbtrim)
4456	return;
4457
4458	socket_lock_assert_owned(so);
4459
4460	if ((sb->sb_flags & SB_RECV) == `0`) {
4461	if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == `0`)
4462	return;
4463	outgoing = `1`;
4464	OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
4465	} else {
4466	if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == `0`)
4467	return;
4468	outgoing = `0`;
4469	OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
4470	}
4471
4472	CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
4473	(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4474
4475	error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
4476	if (error == `0`)
4477	cfil_service_inject_queue(so, so->so_cfil, outgoing);
4478	cfil_release_sockbuf(so, outgoing);
4479	}
4480
4481	int
4482	sysctl_cfil_filter_list(struct sysctl_oid oidp, void* arg1, int* arg2,
4483	struct sysctl_req *req)
4484	{
4485	#pragma unused(oidp, arg1, arg2)
4486	int error = `0`;
4487	size_t len = `0`;
4488	u_int32_t i;
4489
4490	/ Read only /
4491	if (req->newptr != USER_ADDR_NULL)
4492	return (EPERM);
4493
4494	cfil_rw_lock_shared(&cfil_lck_rw);
4495
4496	for (i = `0`; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
4497	struct cfil_filter_stat filter_stat;
4498	struct content_filter *cfc = content_filters[i];
4499
4500	if (cfc == NULL)
4501	continue;
4502
4503	/ If just asking for the size /
4504	if (req->oldptr == USER_ADDR_NULL) {
4505	len += sizeof(struct cfil_filter_stat);
4506	continue;
4507	}
4508
4509	bzero(&filter_stat, sizeof(struct cfil_filter_stat));
4510	filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
4511	filter_stat.cfs_filter_id = cfc->cf_kcunit;
4512	filter_stat.cfs_flags = cfc->cf_flags;
4513	filter_stat.cfs_sock_count = cfc->cf_sock_count;
4514	filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
4515
4516	error = SYSCTL_OUT(req, &filter_stat,
4517	sizeof (struct cfil_filter_stat));
4518	if (error != `0`)
4519	break;
4520	}
4521	/ If just asking for the size /
4522	if (req->oldptr == USER_ADDR_NULL)
4523	req->oldidx = len;
4524
4525	cfil_rw_unlock_shared(&cfil_lck_rw);
4526
4527	#if SHOW_DEBUG
4528	if (req->oldptr != USER_ADDR_NULL) {
4529	for (i = `1`; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
4530	cfil_filter_show(i);
4531	}
4532	}
4533	#endif
4534
4535	return (error);
4536	}
4537
4538	static int sysctl_cfil_sock_list(struct sysctl_oid oidp, void* arg1, int* arg2,
4539	struct sysctl_req *req)
4540	{
4541	#pragma unused(oidp, arg1, arg2)
4542	int error = `0`;
4543	u_int32_t i;
4544	struct cfil_info *cfi;
4545
4546	/ Read only /
4547	if (req->newptr != USER_ADDR_NULL)
4548	return (EPERM);
4549
4550	cfil_rw_lock_shared(&cfil_lck_rw);
4551
4552	/*
4553	* If just asking for the size,
4554	*/
4555	if (req->oldptr == USER_ADDR_NULL) {
4556	req->oldidx = cfil_sock_attached_count *
4557	sizeof(struct cfil_sock_stat);
4558	/ Bump the length in case new sockets gets attached /
4559	req->oldidx += req->oldidx >> `3`;
4560	goto done;
4561	}
4562
4563	TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
4564	struct cfil_entry *entry;
4565	struct cfil_sock_stat stat;
4566	struct socket *so = cfi->cfi_so;
4567
4568	bzero(&stat, sizeof(struct cfil_sock_stat));
4569	stat.cfs_len = sizeof(struct cfil_sock_stat);
4570	stat.cfs_sock_id = cfi->cfi_sock_id;
4571	stat.cfs_flags = cfi->cfi_flags;
4572
4573	if (so != NULL) {
4574	stat.cfs_pid = so->last_pid;
4575	memcpy(stat.cfs_uuid, so->last_uuid,
4576	sizeof(uuid_t));
4577	if (so->so_flags & SOF_DELEGATED) {
4578	stat.cfs_e_pid = so->e_pid;
4579	memcpy(stat.cfs_e_uuid, so->e_uuid,
4580	sizeof(uuid_t));
4581	} else {
4582	stat.cfs_e_pid = so->last_pid;
4583	memcpy(stat.cfs_e_uuid, so->last_uuid,
4584	sizeof(uuid_t));
4585	}
4586
4587	stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
4588	stat.cfs_sock_type = so->so_proto->pr_type;
4589	stat.cfs_sock_protocol = so->so_proto->pr_protocol;
4590	}
4591
4592	stat.cfs_snd.cbs_pending_first =
4593	cfi->cfi_snd.cfi_pending_first;
4594	stat.cfs_snd.cbs_pending_last =
4595	cfi->cfi_snd.cfi_pending_last;
4596	stat.cfs_snd.cbs_inject_q_len =
4597	cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
4598	stat.cfs_snd.cbs_pass_offset =
4599	cfi->cfi_snd.cfi_pass_offset;
4600
4601	stat.cfs_rcv.cbs_pending_first =
4602	cfi->cfi_rcv.cfi_pending_first;
4603	stat.cfs_rcv.cbs_pending_last =
4604	cfi->cfi_rcv.cfi_pending_last;
4605	stat.cfs_rcv.cbs_inject_q_len =
4606	cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
4607	stat.cfs_rcv.cbs_pass_offset =
4608	cfi->cfi_rcv.cfi_pass_offset;
4609
4610	for (i = `0`; i < MAX_CONTENT_FILTER; i++) {
4611	struct cfil_entry_stat *estat;
4612	struct cfe_buf *ebuf;
4613	struct cfe_buf_stat *sbuf;
4614
4615	entry = &cfi->cfi_entries[i];
4616
4617	estat = &stat.ces_entries[i];
4618
4619	estat->ces_len = sizeof(struct cfil_entry_stat);
4620	estat->ces_filter_id = entry->cfe_filter ?
4621	entry->cfe_filter->cf_kcunit : `0`;
4622	estat->ces_flags = entry->cfe_flags;
4623	estat->ces_necp_control_unit =
4624	entry->cfe_necp_control_unit;
4625
4626	estat->ces_last_event.tv_sec =
4627	(int64_t)entry->cfe_last_event.tv_sec;
4628	estat->ces_last_event.tv_usec =
4629	(int64_t)entry->cfe_last_event.tv_usec;
4630
4631	estat->ces_last_action.tv_sec =
4632	(int64_t)entry->cfe_last_action.tv_sec;
4633	estat->ces_last_action.tv_usec =
4634	(int64_t)entry->cfe_last_action.tv_usec;
4635
4636	ebuf = &entry->cfe_snd;
4637	sbuf = &estat->ces_snd;
4638	sbuf->cbs_pending_first =
4639	cfil_queue_offset_first(&ebuf->cfe_pending_q);
4640	sbuf->cbs_pending_last =
4641	cfil_queue_offset_last(&ebuf->cfe_pending_q);
4642	sbuf->cbs_ctl_first =
4643	cfil_queue_offset_first(&ebuf->cfe_ctl_q);
4644	sbuf->cbs_ctl_last =
4645	cfil_queue_offset_last(&ebuf->cfe_ctl_q);
4646	sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
4647	sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
4648	sbuf->cbs_peeked = ebuf->cfe_peeked;
4649
4650	ebuf = &entry->cfe_rcv;
4651	sbuf = &estat->ces_rcv;
4652	sbuf->cbs_pending_first =
4653	cfil_queue_offset_first(&ebuf->cfe_pending_q);
4654	sbuf->cbs_pending_last =
4655	cfil_queue_offset_last(&ebuf->cfe_pending_q);
4656	sbuf->cbs_ctl_first =
4657	cfil_queue_offset_first(&ebuf->cfe_ctl_q);
4658	sbuf->cbs_ctl_last =
4659	cfil_queue_offset_last(&ebuf->cfe_ctl_q);
4660	sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
4661	sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
4662	sbuf->cbs_peeked = ebuf->cfe_peeked;
4663	}
4664	error = SYSCTL_OUT(req, &stat,
4665	sizeof (struct cfil_sock_stat));
4666	if (error != `0`)
4667	break;
4668	}
4669	done:
4670	cfil_rw_unlock_shared(&cfil_lck_rw);
4671
4672	#if SHOW_DEBUG
4673	if (req->oldptr != USER_ADDR_NULL) {
4674	cfil_info_show();
4675	}
4676	#endif
4677
4678	return (error);
4679	}
4680
4681	/*
4682	* UDP Socket Support
4683	*/
4684	static void
4685	cfil_hash_entry_log(int level, struct socket so, struct* cfil_hash_entry entry, uint64_t sockId, const* char* msg)
4686	{
4687	char local[MAX_IPv6_STR_LEN+`6`];
4688	char remote[MAX_IPv6_STR_LEN+`6`];
4689	const void *addr;
4690
4691	// No sock or not UDP, no-op
4692	if (so == NULL \|\| entry == NULL) {
4693	return;
4694	}
4695
4696	local[`0`] = remote[`0`] = `0x0`;
4697
4698	switch (entry->cfentry_family) {
4699	case AF_INET6:
4700	addr = &entry->cfentry_laddr.addr6;
4701	inet_ntop(AF_INET6, addr, local, sizeof(local));
4702	addr = &entry->cfentry_faddr.addr6;
4703	inet_ntop(AF_INET6, addr, remote, sizeof(local));
4704	break;
4705	case AF_INET:
4706	addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
4707	inet_ntop(AF_INET, addr, local, sizeof(local));
4708	addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
4709	inet_ntop(AF_INET, addr, remote, sizeof(local));
4710	break;
4711	default:
4712	return;
4713	}
4714
4715	CFIL_LOG(level, "<%s>: <UDP so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
4716	msg,
4717	(uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
4718	ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote);
4719	}
4720
4721	static void
4722	cfil_inp_log(int level, struct socket so, const* char* msg)
4723	{
4724	struct inpcb *inp = NULL;
4725	char local[MAX_IPv6_STR_LEN+`6`];
4726	char remote[MAX_IPv6_STR_LEN+`6`];
4727	const void *addr;
4728
4729	if (so == NULL) {
4730	return;
4731	}
4732
4733	inp = sotoinpcb(so);
4734	if (inp == NULL) {
4735	return;
4736	}
4737
4738	local[`0`] = remote[`0`] = `0x0`;
4739
4740	#if INET6
4741	if (inp->inp_vflag & INP_IPV6) {
4742	addr = &inp->in6p_laddr.s6_addr32;
4743	inet_ntop(AF_INET6, addr, local, sizeof(local));
4744	addr = &inp->in6p_faddr.s6_addr32;
4745	inet_ntop(AF_INET6, addr, remote, sizeof(local));
4746	} else
4747	#endif /* INET6 */
4748	{
4749	addr = &inp->inp_laddr.s_addr;
4750	inet_ntop(AF_INET, addr, local, sizeof(local));
4751	addr = &inp->inp_faddr.s_addr;
4752	inet_ntop(AF_INET, addr, remote, sizeof(local));
4753	}
4754
4755	if (so->so_cfil != NULL)
4756	CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
4757	msg, IS_UDP(so) ? "UDP" : "TCP",
4758	(uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
4759	ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
4760	else
4761	CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
4762	msg, IS_UDP(so) ? "UDP" : "TCP",
4763	(uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
4764	ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
4765	}
4766
4767	static void
4768	cfil_info_log(int level, struct cfil_info cfil_info, const* char* msg)
4769	{
4770	if (cfil_info == NULL)
4771	return;
4772
4773	if (cfil_info->cfi_hash_entry != NULL)
4774	cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
4775	else
4776	cfil_inp_log(level, cfil_info->cfi_so, msg);
4777	}
4778
4779	errno_t
4780	cfil_db_init(struct socket *so)
4781	{
4782	errno_t error = `0`;
4783	struct cfil_db *db = NULL;
4784
4785	CFIL_LOG(LOG_INFO, "");
4786
4787	db = zalloc(cfil_db_zone);
4788	if (db == NULL) {
4789	error = ENOMEM;
4790	goto done;
4791	}
4792	bzero(db, sizeof(struct cfil_db));
4793	db->cfdb_so = so;
4794	db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
4795	if (db->cfdb_hashbase == NULL) {
4796	zfree(cfil_db_zone, db);
4797	db = NULL;
4798	error = ENOMEM;
4799	goto done;
4800	}
4801
4802	so->so_cfil_db = db;
4803
4804	done:
4805	return (error);
4806	}
4807
4808	void
4809	cfil_db_free(struct socket *so)
4810	{
4811	struct cfil_hash_entry *entry = NULL;
4812	struct cfil_hash_entry *temp_entry = NULL;
4813	struct cfilhashhead *cfilhash = NULL;
4814	struct cfil_db *db = NULL;
4815
4816	CFIL_LOG(LOG_INFO, "");
4817
4818	if (so == NULL \|\| so->so_cfil_db == NULL) {
4819	return;
4820	}
4821	db = so->so_cfil_db;
4822
4823	#if LIFECYCLE_DEBUG
4824	CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
4825	(uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
4826	#endif
4827
4828	for (int i = `0`; i < CFILHASHSIZE; i++) {
4829	cfilhash = &db->cfdb_hashbase[i];
4830	LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
4831	if (entry->cfentry_cfil != NULL) {
4832	#if LIFECYCLE_DEBUG
4833	cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
4834	#endif
4835	cfil_info_free(entry->cfentry_cfil);
4836	OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
4837	entry->cfentry_cfil = NULL;
4838	}
4839
4840	cfil_db_delete_entry(db, entry);
4841	if (so->so_flags & SOF_CONTENT_FILTER) {
4842	if (db->cfdb_count == `0`)
4843	so->so_flags &= ~SOF_CONTENT_FILTER;
4844	VERIFY(so->so_usecount > `0`);
4845	so->so_usecount--;
4846	}
4847	}
4848	}
4849
4850	// Make sure all entries are cleaned up!
4851	VERIFY(db->cfdb_count == `0`);
4852	#if LIFECYCLE_DEBUG
4853	CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
4854	#endif
4855
4856	FREE(db->cfdb_hashbase, M_CFIL);
4857	zfree(cfil_db_zone, db);
4858	so->so_cfil_db = NULL;
4859	}
4860
4861	static bool
4862	fill_cfil_hash_entry_from_address(struct cfil_hash_entry entry, bool isLocal, struct* sockaddr *addr)
4863	{
4864	struct sockaddr_in *sin = NULL;
4865	struct sockaddr_in6 *sin6 = NULL;
4866
4867	if (entry == NULL \|\| addr == NULL) {
4868	return FALSE;
4869	}
4870
4871	switch (addr->sa_family) {
4872	case AF_INET:
4873	sin = satosin(addr);
4874	if (sin->sin_len != sizeof(*sin)) {
4875	return FALSE;
4876	}
4877	if (isLocal == TRUE) {
4878	entry->cfentry_lport = sin->sin_port;
4879	entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
4880	} else {
4881	entry->cfentry_fport = sin->sin_port;
4882	entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
4883	}
4884	entry->cfentry_family = AF_INET;
4885	return TRUE;
4886	case AF_INET6:
4887	sin6 = satosin6(addr);
4888	if (sin6->sin6_len != sizeof(*sin6)) {
4889	return FALSE;
4890	}
4891	if (isLocal == TRUE) {
4892	entry->cfentry_lport = sin6->sin6_port;
4893	entry->cfentry_laddr.addr6 = sin6->sin6_addr;
4894	} else {
4895	entry->cfentry_fport = sin6->sin6_port;
4896	entry->cfentry_faddr.addr6 = sin6->sin6_addr;
4897	}
4898	entry->cfentry_family = AF_INET6;
4899	return TRUE;
4900	default:
4901	return FALSE;
4902	}
4903	}
4904
4905	static bool
4906	fill_cfil_hash_entry_from_inp(struct cfil_hash_entry entry, bool isLocal, struct* inpcb *inp)
4907	{
4908	if (entry == NULL \|\| inp == NULL) {
4909	return FALSE;
4910	}
4911
4912	if (inp->inp_vflag & INP_IPV4) {
4913	if (isLocal == TRUE) {
4914	entry->cfentry_lport = inp->inp_lport;
4915	entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
4916	} else {
4917	entry->cfentry_fport = inp->inp_fport;
4918	entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
4919	}
4920	entry->cfentry_family = AF_INET;
4921	return TRUE;
4922	} else if (inp->inp_vflag & INP_IPV6) {
4923	if (isLocal == TRUE) {
4924	entry->cfentry_lport = inp->inp_lport;
4925	entry->cfentry_laddr.addr6 = inp->in6p_laddr;
4926	} else {
4927	entry->cfentry_fport = inp->inp_fport;
4928	entry->cfentry_faddr.addr6 = inp->in6p_faddr;
4929	}
4930	entry->cfentry_family = AF_INET6;
4931	return TRUE;
4932	}
4933	return FALSE;
4934	}
4935
4936	bool
4937	check_port(struct sockaddr *addr, u_short port)
4938	{
4939	struct sockaddr_in *sin = NULL;
4940	struct sockaddr_in6 *sin6 = NULL;
4941
4942	if (addr == NULL \|\| port == `0`) {
4943	return FALSE;
4944	}
4945
4946	switch (addr->sa_family) {
4947	case AF_INET:
4948	sin = satosin(addr);
4949	if (sin->sin_len != sizeof(*sin)) {
4950	return FALSE;
4951	}
4952	if (port == ntohs(sin->sin_port)) {
4953	return TRUE;
4954	}
4955	break;
4956	case AF_INET6:
4957	sin6 = satosin6(addr);
4958	if (sin6->sin6_len != sizeof(*sin6)) {
4959	return FALSE;
4960	}
4961	if (port == ntohs(sin6->sin6_port)) {
4962	return TRUE;
4963	}
4964	break;
4965	default:
4966	break;
4967	}
4968	return FALSE;
4969	}
4970
4971	struct cfil_hash_entry *
4972	cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
4973	{
4974	struct cfilhashhead *cfilhash = NULL;
4975	u_int32_t flowhash = (u_int32_t)(sock_id & `0x0ffffffff`);
4976	struct cfil_hash_entry *nextentry;
4977
4978	if (db == NULL \|\| db->cfdb_hashbase == NULL \|\| sock_id == `0`) {
4979	return NULL;
4980	}
4981
4982	flowhash &= db->cfdb_hashmask;
4983	cfilhash = &db->cfdb_hashbase[flowhash];
4984
4985	LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
4986	if (nextentry->cfentry_cfil != NULL &&
4987	nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
4988	CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
4989	(uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
4990	cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, `0`, "CFIL: UDP found entry");
4991	return nextentry;
4992	}
4993	}
4994
4995	CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
4996	(uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
4997	return NULL;
4998	}
4999
5000	struct cfil_hash_entry *
5001	cfil_db_lookup_entry(struct cfil_db db, struct* sockaddr local, struct* sockaddr *remote)
5002	{
5003	struct cfil_hash_entry matchentry;
5004	struct cfil_hash_entry *nextentry = NULL;
5005	struct inpcb *inp = sotoinpcb(db->cfdb_so);
5006	u_int32_t hashkey_faddr = `0`, hashkey_laddr = `0`;
5007	int inp_hash_element = `0`;
5008	struct cfilhashhead *cfilhash = NULL;
5009
5010	CFIL_LOG(LOG_INFO, "");
5011
5012	if (inp == NULL) {
5013	goto done;
5014	}
5015
5016	if (local != NULL) {
5017	fill_cfil_hash_entry_from_address(&matchentry, TRUE, local);
5018	} else {
5019	fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp);
5020	}
5021	if (remote != NULL) {
5022	fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote);
5023	} else {
5024	fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp);
5025	}
5026
5027	#if INET6
5028	if (inp->inp_vflag & INP_IPV6) {
5029	hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[`3`];
5030	hashkey_laddr = matchentry.cfentry_laddr.addr6.s6_addr32[`3`];
5031	} else
5032	#endif /* INET6 */
5033	{
5034	hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
5035	hashkey_laddr = matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr;
5036	}
5037
5038	inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5039	matchentry.cfentry_lport, matchentry.cfentry_fport);
5040	inp_hash_element &= db->cfdb_hashmask;
5041
5042	cfilhash = &db->cfdb_hashbase[inp_hash_element];
5043
5044	LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5045
5046	#if INET6
5047	if ((inp->inp_vflag & INP_IPV6) &&
5048	nextentry->cfentry_lport == matchentry.cfentry_lport &&
5049	nextentry->cfentry_fport == matchentry.cfentry_fport &&
5050	IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6) &&
5051	IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
5052	#if DATA_DEBUG
5053	cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, `0`, "CFIL LOOKUP ENTRY: UDP V6 found entry");
5054	#endif
5055	return nextentry;
5056	} else
5057	#endif /* INET6 */
5058	if (nextentry->cfentry_lport == matchentry.cfentry_lport &&
5059	nextentry->cfentry_fport == matchentry.cfentry_fport &&
5060	nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr &&
5061	nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
5062	#if DATA_DEBUG
5063	cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, `0`, "CFIL LOOKUP ENTRY: UDP V4 found entry");
5064	#endif
5065	return nextentry;
5066	}
5067	}
5068
5069	done:
5070	#if DATA_DEBUG
5071	cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, `0`, "CFIL LOOKUP ENTRY: UDP no entry found");
5072	#endif
5073	return NULL;
5074	}
5075
5076	void
5077	cfil_db_delete_entry(struct cfil_db db, struct* cfil_hash_entry *hash_entry)
5078	{
5079	if (hash_entry == NULL)
5080	return;
5081
5082	LIST_REMOVE(hash_entry, cfentry_link);
5083	zfree(cfil_hash_entry_zone, hash_entry);
5084	db->cfdb_count--;
5085	if (db->cfdb_only_entry == hash_entry)
5086	db->cfdb_only_entry = NULL;
5087	}
5088
5089	struct cfil_hash_entry *
5090	cfil_db_add_entry(struct cfil_db db, struct* sockaddr local, struct* sockaddr *remote)
5091	{
5092	struct cfil_hash_entry *entry = NULL;
5093	struct inpcb *inp = sotoinpcb(db->cfdb_so);
5094	u_int32_t hashkey_faddr = `0`, hashkey_laddr = `0`;
5095	int inp_hash_element = `0`;
5096	struct cfilhashhead *cfilhash = NULL;
5097
5098	CFIL_LOG(LOG_INFO, "");
5099
5100	if (inp == NULL) {
5101	goto done;
5102	}
5103
5104	entry = zalloc(cfil_hash_entry_zone);
5105	if (entry == NULL) {
5106	goto done;
5107	}
5108	bzero(entry, sizeof(struct cfil_hash_entry));
5109
5110	if (local != NULL) {
5111	fill_cfil_hash_entry_from_address(entry, TRUE, local);
5112	} else {
5113	fill_cfil_hash_entry_from_inp(entry, TRUE, inp);
5114	}
5115	if (remote != NULL) {
5116	fill_cfil_hash_entry_from_address(entry, FALSE, remote);
5117	} else {
5118	fill_cfil_hash_entry_from_inp(entry, FALSE, inp);
5119	}
5120	entry->cfentry_lastused = net_uptime();
5121
5122	#if INET6
5123	if (inp->inp_vflag & INP_IPV6) {
5124	hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[`3`];
5125	hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[`3`];
5126	} else
5127	#endif /* INET6 */
5128	{
5129	hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5130	hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5131	}
5132	entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5133	entry->cfentry_lport, entry->cfentry_fport);
5134	inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
5135
5136	cfilhash = &db->cfdb_hashbase[inp_hash_element];
5137
5138	LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
5139	db->cfdb_count++;
5140	db->cfdb_only_entry = entry;
5141	cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, `0`, "CFIL: cfil_db_add_entry: ADDED");
5142
5143	done:
5144	CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
5145	return entry;
5146	}
5147
5148	struct cfil_info *
5149	cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
5150	{
5151	struct cfil_hash_entry *hash_entry = NULL;
5152
5153	CFIL_LOG(LOG_INFO, "");
5154
5155	if (db == NULL \|\| id == `0`) {
5156	CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
5157	(uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), id);
5158	return NULL;
5159	}
5160
5161	// This is an optimization for connected UDP socket which only has one flow.
5162	// No need to do the hash lookup.
5163	if (db->cfdb_count == `1`) {
5164	if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
5165	db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
5166	return (db->cfdb_only_entry->cfentry_cfil);
5167	}
5168	}
5169
5170	hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
5171	return (hash_entry != NULL ? hash_entry->cfentry_cfil : NULL);
5172	}
5173
5174	struct cfil_hash_entry *
5175	cfil_sock_udp_get_flow(struct socket so, uint32_t filter_control_unit, bool outgoing, struct* sockaddr local, struct* sockaddr *remote)
5176	{
5177	#pragma unused(so, filter_control_unit, outgoing, local, remote)
5178	struct cfil_hash_entry *hash_entry = NULL;
5179
5180	errno_t error = `0`;
5181	socket_lock_assert_owned(so);
5182
5183	// If new socket, allocate cfil db
5184	if (so->so_cfil_db == NULL) {
5185	if (cfil_db_init(so) != `0`) {
5186	return (NULL);
5187	}
5188	}
5189
5190	// See if flow already exists.
5191	hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote);
5192	if (hash_entry != NULL) {
5193	return (hash_entry);
5194	}
5195
5196	hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
5197	if (hash_entry == NULL) {
5198	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5199	CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
5200	return (NULL);
5201	}
5202
5203	if (cfil_info_alloc(so, hash_entry) == NULL \|\|
5204	hash_entry->cfentry_cfil == NULL) {
5205	cfil_db_delete_entry(so->so_cfil_db, hash_entry);
5206	CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
5207	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5208	return (NULL);
5209	}
5210
5211	#if LIFECYCLE_DEBUG
5212	cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
5213	#endif
5214
5215	if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == `0`) {
5216	CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
5217	filter_control_unit);
5218	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
5219	return (NULL);
5220	}
5221	CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
5222	(uint64_t)VM_KERNEL_ADDRPERM(so),
5223	filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
5224
5225	so->so_flags \|= SOF_CONTENT_FILTER;
5226	OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
5227
5228	/ Hold a reference on the socket for each flow /
5229	so->so_usecount++;
5230
5231	error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, filter_control_unit);
5232	/ We can recover from flow control or out of memory errors /
5233	if (error != `0` && error != ENOBUFS && error != ENOMEM)
5234	return (NULL);
5235
5236	CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
5237	return (hash_entry);
5238	}
5239
5240	errno_t
5241	cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
5242	struct sockaddr local, struct* sockaddr *remote,
5243	struct mbuf data, struct* mbuf *control, uint32_t flags)
5244	{
5245	#pragma unused(outgoing, so, local, remote, data, control, flags)
5246	errno_t error = `0`;
5247	uint32_t filter_control_unit;
5248	struct cfil_hash_entry *hash_entry = NULL;
5249	struct cfil_info *cfil_info = NULL;
5250
5251	socket_lock_assert_owned(so);
5252
5253	if (cfil_active_count == `0`) {
5254	CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
5255	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
5256	return (error);
5257	}
5258
5259	filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5260	if (filter_control_unit == `0`) {
5261	CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
5262	return (error);
5263	}
5264
5265	if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != `0`) {
5266	CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
5267	OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
5268	return (error);
5269	}
5270
5271	hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote);
5272	if (hash_entry == NULL \|\| hash_entry->cfentry_cfil == NULL) {
5273	CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
5274	return (EPIPE);
5275	}
5276	// Update last used timestamp, this is for flow Idle TO
5277	hash_entry->cfentry_lastused = net_uptime();
5278	cfil_info = hash_entry->cfentry_cfil;
5279
5280	if (cfil_info->cfi_flags & CFIF_DROP) {
5281	#if DATA_DEBUG
5282	cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, `0`, "CFIL: UDP DROP");
5283	#endif
5284	return (EPIPE);
5285	}
5286	if (control != NULL) {
5287	OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
5288	}
5289	if (data->m_type == MT_OOBDATA) {
5290	CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5291	(uint64_t)VM_KERNEL_ADDRPERM(so));
5292	OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
5293	}
5294
5295	error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
5296
5297	return (error);
5298	}
5299
5300	/*
5301	* Go through all UDP flows for specified socket and returns TRUE if
5302	* any flow is still attached. If need_wait is TRUE, wait on first
5303	* attached flow.
5304	*/
5305	static int
5306	cfil_filters_udp_attached(struct socket *so, bool need_wait)
5307	{
5308	struct timespec ts;
5309	lck_mtx_t *mutex_held;
5310	struct cfilhashhead *cfilhash = NULL;
5311	struct cfil_db *db = NULL;
5312	struct cfil_hash_entry *hash_entry = NULL;
5313	struct cfil_hash_entry *temp_hash_entry = NULL;
5314	struct cfil_info *cfil_info = NULL;
5315	struct cfil_entry *entry = NULL;
5316	errno_t error = `0`;
5317	int kcunit;
5318	int attached = `0`;
5319
5320	socket_lock_assert_owned(so);
5321
5322	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_cfil_db != NULL) {
5323
5324	if (so->so_proto->pr_getlock != NULL)
5325	mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5326	else
5327	mutex_held = so->so_proto->pr_domain->dom_mtx;
5328	LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5329
5330	db = so->so_cfil_db;
5331
5332	for (int i = `0`; i < CFILHASHSIZE; i++) {
5333	cfilhash = &db->cfdb_hashbase[i];
5334
5335	LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5336
5337	if (hash_entry->cfentry_cfil != NULL) {
5338
5339	cfil_info = hash_entry->cfentry_cfil;
5340	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5341	entry = &cfil_info->cfi_entries[kcunit - `1`];
5342
5343	/ Are we attached to the filter? /
5344	if (entry->cfe_filter == NULL) {
5345	continue;
5346	}
5347
5348	if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == `0`)
5349	continue;
5350	if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != `0`)
5351	continue;
5352
5353	attached = `1`;
5354
5355	if (need_wait == TRUE) {
5356	#if LIFECYCLE_DEBUG
5357	cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
5358	#endif
5359
5360	ts.tv_sec = cfil_close_wait_timeout / `1000`;
5361	ts.tv_nsec = (cfil_close_wait_timeout % `1000`) *
5362	NSEC_PER_USEC * `1000`;
5363
5364	OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5365	cfil_info->cfi_flags \|= CFIF_CLOSE_WAIT;
5366	error = msleep((caddr_t)cfil_info, mutex_held,
5367	PSOCK \| PCATCH, "cfil_filters_udp_attached", &ts);
5368	cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
5369
5370	#if LIFECYCLE_DEBUG
5371	cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
5372	#endif
5373
5374	/*
5375	* Force close in case of timeout
5376	*/
5377	if (error != `0`) {
5378	OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5379	#if LIFECYCLE_DEBUG
5380	cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
5381	#endif
5382	entry->cfe_flags \|= CFEF_CFIL_DETACHED;
5383	break;
5384	}
5385	}
5386	goto done;
5387	}
5388	}
5389	}
5390	}
5391	}
5392
5393	done:
5394	return (attached);
5395	}
5396
5397	int32_t
5398	cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
5399	{
5400	struct socket *so = sb->sb_so;
5401	struct cfi_buf *cfi_buf;
5402	uint64_t pending = `0`;
5403	uint64_t total_pending = `0`;
5404	struct cfilhashhead *cfilhash = NULL;
5405	struct cfil_db *db = NULL;
5406	struct cfil_hash_entry *hash_entry = NULL;
5407	struct cfil_hash_entry *temp_hash_entry = NULL;
5408
5409	socket_lock_assert_owned(so);
5410
5411	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_cfil_db != NULL &&
5412	(check_thread == FALSE \|\| so->so_snd.sb_cfil_thread != current_thread())) {
5413
5414	db = so->so_cfil_db;
5415
5416	for (int i = `0`; i < CFILHASHSIZE; i++) {
5417	cfilhash = &db->cfdb_hashbase[i];
5418
5419	LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5420
5421	if (hash_entry->cfentry_cfil != NULL) {
5422	if ((sb->sb_flags & SB_RECV) == `0`)
5423	cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
5424	else
5425	cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
5426
5427	pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
5428	/*
5429	* If we are limited by the "chars of mbufs used" roughly
5430	* adjust so we won't overcommit
5431	*/
5432	if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending)
5433	pending = cfi_buf->cfi_pending_mbcnt;
5434
5435	total_pending += pending;
5436	}
5437	}
5438	}
5439
5440	VERIFY(total_pending < INT32_MAX);
5441	#if DATA_DEBUG
5442	CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
5443	(uint64_t)VM_KERNEL_ADDRPERM(so),
5444	total_pending, check_thread);
5445	#endif
5446	}
5447
5448	return (int32_t)(total_pending);
5449	}
5450
5451	int
5452	cfil_sock_udp_notify_shutdown(struct socket so, int* how, int drop_flag, int shut_flag)
5453	{
5454	struct cfil_info *cfil_info = NULL;
5455	struct cfilhashhead *cfilhash = NULL;
5456	struct cfil_db *db = NULL;
5457	struct cfil_hash_entry *hash_entry = NULL;
5458	struct cfil_hash_entry *temp_hash_entry = NULL;
5459	errno_t error = `0`;
5460	int done_count = `0`;
5461	int kcunit;
5462
5463	socket_lock_assert_owned(so);
5464
5465	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_cfil_db != NULL) {
5466
5467	db = so->so_cfil_db;
5468
5469	for (int i = `0`; i < CFILHASHSIZE; i++) {
5470	cfilhash = &db->cfdb_hashbase[i];
5471
5472	LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5473
5474	if (hash_entry->cfentry_cfil != NULL) {
5475	cfil_info = hash_entry->cfentry_cfil;
5476
5477	// This flow is marked as DROP
5478	if (cfil_info->cfi_flags & drop_flag) {
5479	done_count++;
5480	continue;
5481	}
5482
5483	// This flow has been shut already, skip
5484	if (cfil_info->cfi_flags & shut_flag) {
5485	continue;
5486	}
5487	// Mark flow as shut
5488	cfil_info->cfi_flags \|= shut_flag;
5489	done_count++;
5490
5491	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5492	/ Disconnect incoming side /
5493	if (how != SHUT_WR) {
5494	error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, `0`);
5495	}
5496	/ Disconnect outgoing side /
5497	if (how != SHUT_RD) {
5498	error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, `1`);
5499	}
5500	}
5501	}
5502	}
5503	}
5504	}
5505
5506	if (done_count == `0`) {
5507	error = ENOTCONN;
5508	}
5509	return (error);
5510	}
5511
5512	int
5513	cfil_sock_udp_shutdown(struct socket so, int* *how)
5514	{
5515	int error = `0`;
5516
5517	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| (so->so_cfil_db == NULL))
5518	goto done;
5519
5520	socket_lock_assert_owned(so);
5521
5522	CFIL_LOG(LOG_INFO, "so %llx how %d",
5523	(uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5524
5525	/*
5526	* Check the state of the socket before the content filter
5527	*/
5528	if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != `0`) {
5529	/ read already shut down /
5530	error = ENOTCONN;
5531	goto done;
5532	}
5533	if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != `0`) {
5534	/ write already shut down /
5535	error = ENOTCONN;
5536	goto done;
5537	}
5538
5539	/*
5540	* shutdown read: SHUT_RD or SHUT_RDWR
5541	*/
5542	if (*how != SHUT_WR) {
5543	error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
5544	if (error != `0`)
5545	goto done;
5546	}
5547	/*
5548	* shutdown write: SHUT_WR or SHUT_RDWR
5549	*/
5550	if (*how != SHUT_RD) {
5551	error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
5552	if (error != `0`)
5553	goto done;
5554
5555	/*
5556	* When outgoing data is pending, we delay the shutdown at the
5557	* protocol level until the content filters give the final
5558	* verdict on the pending data.
5559	*/
5560	if (cfil_sock_data_pending(&so->so_snd) != `0`) {
5561	/*
5562	* When shutting down the read and write sides at once
5563	* we can proceed to the final shutdown of the read
5564	* side. Otherwise, we just return.
5565	*/
5566	if (*how == SHUT_WR) {
5567	error = EJUSTRETURN;
5568	} else if (*how == SHUT_RDWR) {
5569	*how = SHUT_RD;
5570	}
5571	}
5572	}
5573	done:
5574	return (error);
5575	}
5576
5577	void
5578	cfil_sock_udp_close_wait(struct socket *so)
5579	{
5580	socket_lock_assert_owned(so);
5581
5582	while (cfil_filters_udp_attached(so, FALSE)) {
5583	/*
5584	* Notify the filters we are going away so they can detach
5585	*/
5586	cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, `0`, `0`);
5587
5588	/*
5589	* Make sure we need to wait after the filter are notified
5590	* of the disconnection
5591	*/
5592	if (cfil_filters_udp_attached(so, TRUE) == `0`)
5593	break;
5594	}
5595	}
5596
5597	void
5598	cfil_sock_udp_is_closed(struct socket *so)
5599	{
5600	struct cfil_info *cfil_info = NULL;
5601	struct cfilhashhead *cfilhash = NULL;
5602	struct cfil_db *db = NULL;
5603	struct cfil_hash_entry *hash_entry = NULL;
5604	struct cfil_hash_entry *temp_hash_entry = NULL;
5605	errno_t error = `0`;
5606	int kcunit;
5607
5608	socket_lock_assert_owned(so);
5609
5610	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_cfil_db != NULL) {
5611
5612	db = so->so_cfil_db;
5613
5614	for (int i = `0`; i < CFILHASHSIZE; i++) {
5615	cfilhash = &db->cfdb_hashbase[i];
5616
5617	LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5618	if (hash_entry->cfentry_cfil != NULL) {
5619
5620	cfil_info = hash_entry->cfentry_cfil;
5621
5622	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5623	/ Let the filters know of the closing /
5624	error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
5625	}
5626
5627	/ Last chance to push passed data out /
5628	error = cfil_acquire_sockbuf(so, cfil_info, `1`);
5629	if (error == `0`)
5630	cfil_service_inject_queue(so, cfil_info, `1`);
5631	cfil_release_sockbuf(so, `1`);
5632
5633	cfil_info->cfi_flags \|= CFIF_SOCK_CLOSED;
5634
5635	/ Pending data needs to go /
5636	cfil_flush_queues(so, cfil_info);
5637
5638	CFIL_INFO_VERIFY(cfil_info);
5639	}
5640	}
5641	}
5642	}
5643	}
5644
5645	void
5646	cfil_sock_udp_buf_update(struct sockbuf *sb)
5647	{
5648	struct cfil_info *cfil_info = NULL;
5649	struct cfilhashhead *cfilhash = NULL;
5650	struct cfil_db *db = NULL;
5651	struct cfil_hash_entry *hash_entry = NULL;
5652	struct cfil_hash_entry *temp_hash_entry = NULL;
5653	errno_t error = `0`;
5654	int outgoing;
5655	struct socket *so = sb->sb_so;
5656
5657	socket_lock_assert_owned(so);
5658
5659	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_cfil_db != NULL) {
5660
5661	if (!cfil_sbtrim)
5662	return;
5663
5664	db = so->so_cfil_db;
5665
5666	for (int i = `0`; i < CFILHASHSIZE; i++) {
5667	cfilhash = &db->cfdb_hashbase[i];
5668
5669	LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5670	if (hash_entry->cfentry_cfil != NULL) {
5671
5672	cfil_info = hash_entry->cfentry_cfil;
5673
5674	if ((sb->sb_flags & SB_RECV) == `0`) {
5675	if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == `0`)
5676	return;
5677	outgoing = `1`;
5678	OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5679	} else {
5680	if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == `0`)
5681	return;
5682	outgoing = `0`;
5683	OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5684	}
5685
5686	CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5687	(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5688
5689	error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
5690	if (error == `0`)
5691	cfil_service_inject_queue(so, cfil_info, outgoing);
5692	cfil_release_sockbuf(so, outgoing);
5693	}
5694	}
5695	}
5696	}
5697	}
5698
5699	void
5700	cfil_filter_show(u_int32_t kcunit)
5701	{
5702	struct content_filter *cfc = NULL;
5703	struct cfil_entry *entry;
5704	int count = `0`;
5705
5706	if (content_filters == NULL) {
5707	return;
5708	}
5709	if (kcunit > MAX_CONTENT_FILTER) {
5710	return;
5711	}
5712
5713	cfil_rw_lock_shared(&cfil_lck_rw);
5714
5715	if (content_filters[kcunit - `1`] == NULL) {
5716	cfil_rw_unlock_shared(&cfil_lck_rw);
5717	return;
5718	}
5719	cfc = content_filters[kcunit - `1`];
5720
5721	CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
5722	kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
5723	if (cfc->cf_flags & CFF_DETACHING)
5724	CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
5725	if (cfc->cf_flags & CFF_ACTIVE)
5726	CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
5727	if (cfc->cf_flags & CFF_FLOW_CONTROLLED)
5728	CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
5729
5730	TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
5731
5732	if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
5733	struct cfil_info *cfil_info = entry->cfe_cfil_info;
5734
5735	count++;
5736
5737	if (entry->cfe_flags & CFEF_CFIL_DETACHED)
5738	cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
5739	else
5740	cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
5741	}
5742	}
5743
5744	CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
5745
5746	cfil_rw_unlock_shared(&cfil_lck_rw);
5747
5748	}
5749
5750	void
5751	cfil_info_show(void)
5752	{
5753	struct cfil_info *cfil_info;
5754	int count = `0`;
5755
5756	cfil_rw_lock_shared(&cfil_lck_rw);
5757
5758	CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
5759
5760	TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
5761
5762	count++;
5763
5764	cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
5765
5766	if (cfil_info->cfi_flags & CFIF_DROP)
5767	CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
5768	if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)
5769	CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
5770	if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED)
5771	CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
5772	if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN)
5773	CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
5774	if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT)
5775	CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
5776	if (cfil_info->cfi_flags & CFIF_SHUT_WR)
5777	CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
5778	if (cfil_info->cfi_flags & CFIF_SHUT_RD)
5779	CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
5780	}
5781
5782	CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
5783
5784	cfil_rw_unlock_shared(&cfil_lck_rw);
5785	}
5786
5787	bool
5788	cfil_info_idle_timed_out(struct cfil_info cfil_info, int* timeout, u_int32_t current_time)
5789	{
5790	if (cfil_info && cfil_info->cfi_hash_entry &&
5791	(current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int32_t)timeout)) {
5792	#if GC_DEBUG
5793	cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
5794	#endif
5795	return true;
5796	}
5797	return false;
5798	}
5799
5800	bool
5801	cfil_info_action_timed_out(struct cfil_info cfil_info, int* timeout)
5802	{
5803	struct cfil_entry *entry;
5804	struct timeval current_tv;
5805	struct timeval diff_time;
5806
5807	if (cfil_info == NULL)
5808	return false;
5809
5810	/*
5811	* If we have queued up more data than passed offset and we haven't received
5812	* an action from user space for a while (the user space filter might have crashed),
5813	* return action timed out.
5814	*/
5815	if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset \|\|
5816	cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
5817
5818	microuptime(&current_tv);
5819
5820	for (int kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5821	entry = &cfil_info->cfi_entries[kcunit - `1`];
5822
5823	if (entry->cfe_filter == NULL)
5824	continue;
5825
5826	if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset \|\|
5827	cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
5828	// haven't gotten an action from this filter, check timeout
5829	timersub(&current_tv, &entry->cfe_last_action, &diff_time);
5830	if (diff_time.tv_sec >= timeout) {
5831	#if GC_DEBUG
5832	cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
5833	#endif
5834	return true;
5835	}
5836	}
5837	}
5838	}
5839	return false;
5840	}
5841
5842	bool
5843	cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
5844	{
5845	if (cfil_info == NULL)
5846	return false;
5847
5848	/*
5849	* Clean up flow if it exceeded queue thresholds
5850	*/
5851	if (cfil_info->cfi_snd.cfi_tail_drop_cnt \|\|
5852	cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
5853	#if GC_DEBUG
5854	CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
5855	cfil_udp_gc_mbuf_num_max,
5856	cfil_udp_gc_mbuf_cnt_max,
5857	cfil_info->cfi_snd.cfi_tail_drop_cnt,
5858	cfil_info->cfi_rcv.cfi_tail_drop_cnt);
5859	cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
5860	#endif
5861	return true;
5862	}
5863
5864	return false;
5865	}
5866
5867	static void
5868	cfil_udp_gc_thread_sleep(bool forever)
5869	{
5870	if (forever) {
5871	(void) assert_wait((event_t) &cfil_sock_udp_attached_count,
5872	THREAD_INTERRUPTIBLE);
5873	} else {
5874	uint64_t deadline = `0`;
5875	nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
5876	clock_absolutetime_interval_to_deadline(deadline, &deadline);
5877
5878	(void) assert_wait_deadline(&cfil_sock_udp_attached_count,
5879	THREAD_INTERRUPTIBLE, deadline);
5880	}
5881	}
5882
5883	static void
5884	cfil_udp_gc_thread_func(void *v, wait_result_t w)
5885	{
5886	#pragma unused(v, w)
5887
5888	ASSERT(cfil_udp_gc_thread == current_thread());
5889	thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
5890
5891	// Kick off gc shortly
5892	cfil_udp_gc_thread_sleep(false);
5893	thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
5894	/ NOTREACHED /
5895	}
5896
5897	static void
5898	cfil_info_udp_expire(void *v, wait_result_t w)
5899	{
5900	#pragma unused(v, w)
5901
5902	static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
5903	static uint32_t expired_count = `0`;
5904
5905	struct cfil_info *cfil_info;
5906	struct cfil_hash_entry *hash_entry;
5907	struct cfil_db *db;
5908	struct socket *so;
5909	u_int32_t current_time = `0`;
5910
5911	current_time = net_uptime();
5912
5913	// Get all expired UDP flow ids
5914	cfil_rw_lock_shared(&cfil_lck_rw);
5915
5916	if (cfil_sock_udp_attached_count == `0`) {
5917	cfil_rw_unlock_shared(&cfil_lck_rw);
5918	goto go_sleep;
5919	}
5920
5921	TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
5922	if (expired_count >= UDP_FLOW_GC_MAX_COUNT)
5923	break;
5924
5925	if (IS_UDP(cfil_info->cfi_so)) {
5926	if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) \|\|
5927	cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) \|\|
5928	cfil_info_buffer_threshold_exceeded(cfil_info)) {
5929	expired_array[expired_count] = cfil_info->cfi_sock_id;
5930	expired_count++;
5931	}
5932	}
5933	}
5934	cfil_rw_unlock_shared(&cfil_lck_rw);
5935
5936	if (expired_count == `0`)
5937	goto go_sleep;
5938
5939	for (uint32_t i = `0`; i < expired_count; i++) {
5940
5941	// Search for socket (UDP only and lock so)
5942	so = cfil_socket_from_sock_id(expired_array[i], true);
5943	if (so == NULL) {
5944	continue;
5945	}
5946
5947	cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
5948	if (cfil_info == NULL) {
5949	goto unlock;
5950	}
5951
5952	db = so->so_cfil_db;
5953	hash_entry = cfil_info->cfi_hash_entry;
5954
5955	if (db == NULL \|\| hash_entry == NULL) {
5956	goto unlock;
5957	}
5958
5959	#if GC_DEBUG \|\| LIFECYCLE_DEBUG
5960	cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
5961	#endif
5962
5963	cfil_db_delete_entry(db, hash_entry);
5964	cfil_info_free(cfil_info);
5965	OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5966
5967	if (so->so_flags & SOF_CONTENT_FILTER) {
5968	if (db->cfdb_count == `0`)
5969	so->so_flags &= ~SOF_CONTENT_FILTER;
5970	VERIFY(so->so_usecount > `0`);
5971	so->so_usecount--;
5972	}
5973	unlock:
5974	socket_unlock(so, `1`);
5975	}
5976
5977	#if GC_DEBUG
5978	CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
5979	#endif
5980	expired_count = `0`;
5981
5982	go_sleep:
5983
5984	// Sleep forever (until waken up) if no more UDP flow to clean
5985	cfil_rw_lock_shared(&cfil_lck_rw);
5986	cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == `0` ? true : false);
5987	cfil_rw_unlock_shared(&cfil_lck_rw);
5988	thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
5989	/ NOTREACHED /
5990	}
5991
5992	struct m_tag *
5993	cfil_udp_save_socket_state(struct cfil_info cfil_info, struct* mbuf *m)
5994	{
5995	struct m_tag *tag = NULL;
5996	struct cfil_tag *ctag = NULL;
5997	struct cfil_hash_entry *hash_entry = NULL;
5998
5999	if (cfil_info == NULL \|\| cfil_info->cfi_so == NULL \|\|
6000	cfil_info->cfi_hash_entry == NULL \|\| m == NULL \|\| !(m->m_flags & M_PKTHDR)) {
6001	return NULL;
6002	}
6003
6004	/ Allocate a tag /
6005	tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
6006	sizeof(struct cfil_tag), M_DONTWAIT, m);
6007
6008	if (tag) {
6009	ctag = (struct cfil_tag*)(tag + `1`);
6010	ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
6011	ctag->cfil_so_options = cfil_info->cfi_so->so_options;
6012
6013	hash_entry = cfil_info->cfi_hash_entry;
6014	if (hash_entry->cfentry_family == AF_INET6) {
6015	fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
6016	&hash_entry->cfentry_faddr.addr6,
6017	hash_entry->cfentry_fport);
6018	} else if (hash_entry->cfentry_family == AF_INET) {
6019	fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
6020	hash_entry->cfentry_faddr.addr46.ia46_addr4,
6021	hash_entry->cfentry_fport);
6022	}
6023	m_tag_prepend(m, tag);
6024	return (tag);
6025	}
6026	return NULL;
6027	}
6028
6029	struct m_tag *
6030	cfil_udp_get_socket_state(struct mbuf m, uint32_t state_change_cnt, short *options,
6031	struct sockaddr **faddr)
6032	{
6033	struct m_tag *tag = NULL;
6034	struct cfil_tag *ctag = NULL;
6035
6036	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
6037	if (tag) {
6038	ctag = (struct cfil_tag *)(tag + `1`);
6039	if (state_change_cnt)
6040	*state_change_cnt = ctag->cfil_so_state_change_cnt;
6041	if (options)
6042	*options = ctag->cfil_so_options;
6043	if (faddr)
6044	faddr = (struct* sockaddr *) &ctag->cfil_faddr;
6045
6046	/*
6047	* Unlink tag and hand it over to caller.
6048	* Note that caller will be responsible to free it.
6049	*/
6050	m_tag_unlink(m, tag);
6051	return tag;
6052	}
6053	return NULL;
6054	}
6055
6056
6057

Browse the source code of codebrowser/bsd/net/content_filter.c