1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <i386/proc_reg.h>
30#include <i386/cpuid.h>
31#include <i386/tsc.h>
32#include <vm/pmap.h>
33#include <vm/vm_map.h>
34#include <i386/pmap_internal.h>
35#include <i386/pmap_pcid.h>
36
37/*
38 * PCID (Process context identifier) aka tagged TLB support.
39 * On processors with this feature, unless disabled via the -pmap_pcid_disable
40 * boot-arg, the following algorithm is in effect:
41 * Each processor maintains an array of tag refcounts indexed by tag.
42 * Each address space maintains an array of tags indexed by CPU number.
43 * Each address space maintains a coherency vector, indexed by CPU
44 * indicating that the TLB state for that address space has a pending
45 * invalidation.
46 * On a context switch, a refcounted tag is lazily assigned to the newly
47 * dispatched (CPU, address space) tuple.
48 * When an inactive address space is invalidated on a remote CPU, it is marked
49 * for invalidation upon the next dispatch. Some invalidations are
50 * also processed at the user/kernel boundary.
51 * Provisions are made for the case where a CPU is overcommmitted, i.e.
52 * more active address spaces exist than the number of logical tags
53 * provided for by the processor architecture (currently 4096).
54 * The algorithm assumes the processor remaps the logical tags
55 * to physical TLB context IDs in an LRU fashion for efficiency. (DRK '10)
56 */
57
58uint32_t pmap_pcid_ncpus;
59boolean_t pmap_pcid_disabled = FALSE;
60pcid_cdata_t pcid_data[MAX_CPUS] __attribute__((aligned(64)));
61
62void pmap_pcid_configure(void) {
63 int ccpu = cpu_number();
64 uintptr_t cr4 = get_cr4();
65 boolean_t pcid_present = FALSE;
66
67 pmap_pcid_log("PCID configure invoked on CPU %d\n", ccpu);
68 pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0);
69 pmap_assert(cpu_mode_is64bit());
70
71 if (PE_parse_boot_argn("-pmap_pcid_disable", &pmap_pcid_disabled, sizeof (pmap_pcid_disabled))) {
72 pmap_pcid_log("PMAP: PCID feature disabled\n");
73 printf("PMAP: PCID feature disabled, %u\n", pmap_pcid_disabled);
74 kprintf("PMAP: PCID feature disabled %u\n", pmap_pcid_disabled);
75 }
76 /* no_shared_cr3+PCID is currently unsupported */
77 //todo remove nscr3
78#if DEBUG
79 if (pmap_pcid_disabled == FALSE)
80 no_shared_cr3 = FALSE;
81 else
82 no_shared_cr3 = TRUE;
83#else
84 if (no_shared_cr3)
85 pmap_pcid_disabled = TRUE;
86#endif
87 if (pmap_pcid_disabled || no_shared_cr3) {
88 unsigned i;
89 /* Reset PCID status, as we may have picked up
90 * strays if discovered prior to platform
91 * expert initialization.
92 */
93 for (i = 0; i < real_ncpus; i++) {
94 if (cpu_datap(i)) {
95 cpu_datap(i)->cpu_pmap_pcid_enabled = FALSE;
96 }
97 pmap_pcid_ncpus = 0;
98 }
99 cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE;
100 return;
101 }
102 /* DRKTODO: assert if features haven't been discovered yet. Redundant
103 * invocation of cpu_mode_init and descendants masks this for now.
104 */
105 if ((cpuid_features() & CPUID_FEATURE_PCID))
106 pcid_present = TRUE;
107 else {
108 cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE;
109 pmap_pcid_log("PMAP: PCID not detected CPU %d\n", ccpu);
110 return;
111 }
112 if ((cr4 & (CR4_PCIDE | CR4_PGE)) == (CR4_PCIDE|CR4_PGE)) {
113 cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE;
114 pmap_pcid_log("PMAP: PCID already enabled %d\n", ccpu);
115 return;
116 }
117 if (pcid_present == TRUE) {
118 pmap_pcid_log("Pre-PCID:CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, cr4);
119
120 if (cpu_number() >= PMAP_PCID_MAX_CPUS) {
121 panic("PMAP_PCID_MAX_CPUS %d\n", cpu_number());
122 }
123 if ((get_cr4() & CR4_PGE) == 0) {
124 set_cr4(get_cr4() | CR4_PGE);
125 pmap_pcid_log("Toggled PGE ON (CPU: %d\n", ccpu);
126 }
127 set_cr4(get_cr4() | CR4_PCIDE);
128 pmap_pcid_log("Post PCID: CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, get_cr4());
129 tlb_flush_global();
130 cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE;
131
132 if (OSIncrementAtomic(&pmap_pcid_ncpus) == machine_info.max_cpus) {
133 pmap_pcid_log("All PCIDs enabled: real_ncpus: %d, pmap_pcid_ncpus: %d\n", real_ncpus, pmap_pcid_ncpus);
134 }
135 cpu_datap(ccpu)->cpu_pmap_pcid_coherentp =
136 cpu_datap(ccpu)->cpu_pmap_pcid_coherentp_kernel =
137 &(kernel_pmap->pmap_pcid_coherency_vector[ccpu]);
138 cpu_datap(ccpu)->cpu_pcid_data = &pcid_data[ccpu];
139 cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[0] = 1;
140 }
141}
142
143void pmap_pcid_initialize(pmap_t p) {
144 unsigned i;
145 unsigned nc = sizeof(p->pmap_pcid_cpus)/sizeof(pcid_t);
146
147 pmap_assert(nc >= real_ncpus);
148 for (i = 0; i < nc; i++) {
149 p->pmap_pcid_cpus[i] = PMAP_PCID_INVALID_PCID;
150 /* We assume here that the coherency vector is zeroed by
151 * pmap_create
152 */
153 }
154}
155
156void pmap_pcid_initialize_kernel(pmap_t p) {
157 unsigned i;
158 unsigned nc = sizeof(p->pmap_pcid_cpus)/sizeof(pcid_t);
159
160 for (i = 0; i < nc; i++) {
161 p->pmap_pcid_cpus[i] = 0;
162 /* We assume here that the coherency vector is zeroed by
163 * pmap_create
164 */
165 }
166}
167
168pcid_t pmap_pcid_allocate_pcid(int ccpu) {
169 int i;
170 pcid_ref_t cur_min = 0xFF;
171 uint32_t cur_min_index = ~1;
172 pcid_ref_t *cpu_pcid_refcounts = &cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[0];
173 pcid_ref_t old_count;
174
175 if ((i = cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_free_hint) != 0) {
176 if (cpu_pcid_refcounts[i] == 0) {
177 (void)__sync_fetch_and_add(&cpu_pcid_refcounts[i], 1);
178 cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_free_hint = 0;
179 return i;
180 }
181 }
182 /* Linear scan to discover free slot, with hint. Room for optimization
183 * but with intelligent prefetchers this should be
184 * adequately performant, as it is invoked
185 * only on first dispatch of a new address space onto
186 * a given processor. DRKTODO: use larger loads and
187 * zero byte discovery -- any pattern != ~1 should
188 * signify a free slot.
189 */
190 for (i = PMAP_PCID_MIN_PCID; i < PMAP_PCID_MAX_PCID; i++) {
191 pcid_ref_t cur_refcount = cpu_pcid_refcounts[i];
192
193 pmap_assert(cur_refcount < PMAP_PCID_MAX_REFCOUNT);
194
195 if (cur_refcount == 0) {
196 (void)__sync_fetch_and_add(&cpu_pcid_refcounts[i], 1);
197 return i;
198 } else {
199 if (cur_refcount < cur_min) {
200 cur_min_index = i;
201 cur_min = cur_refcount;
202 }
203 }
204 }
205 pmap_assert(cur_min_index > 0 && cur_min_index < PMAP_PCID_MAX_PCID);
206 /* Consider "rebalancing" tags actively in highly oversubscribed cases
207 * perhaps selecting tags with lower activity.
208 */
209
210 old_count = __sync_fetch_and_add(&cpu_pcid_refcounts[cur_min_index], 1);
211 pmap_assert(old_count < PMAP_PCID_MAX_REFCOUNT);
212 return (cur_min_index);
213}
214
215void pmap_pcid_deallocate_pcid(int ccpu, pmap_t tpmap) {
216 pcid_t pcid;
217 pmap_t lp;
218 pcid_ref_t prior_count;
219
220 pcid = tpmap->pmap_pcid_cpus[ccpu];
221 pmap_assert(pcid != PMAP_PCID_INVALID_PCID);
222 if (pcid == PMAP_PCID_INVALID_PCID)
223 return;
224
225 lp = cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_last_pmap_dispatched[pcid];
226 pmap_assert(pcid > 0 && pcid < PMAP_PCID_MAX_PCID);
227 pmap_assert(cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[pcid] >= 1);
228
229 if (lp == tpmap)
230 (void)__sync_bool_compare_and_swap(&cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_last_pmap_dispatched[pcid], tpmap, PMAP_INVALID);
231
232 if ((prior_count = __sync_fetch_and_sub(&cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[pcid], 1)) == 1) {
233 cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_free_hint = pcid;
234 }
235 pmap_assert(prior_count <= PMAP_PCID_MAX_REFCOUNT);
236}
237
238void pmap_destroy_pcid_sync(pmap_t p) {
239 int i;
240 pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0);
241 for (i = 0; i < PMAP_PCID_MAX_CPUS; i++)
242 if (p->pmap_pcid_cpus[i] != PMAP_PCID_INVALID_PCID)
243 pmap_pcid_deallocate_pcid(i, p);
244}
245
246pcid_t pcid_for_pmap_cpu_tuple(pmap_t cpmap, thread_t cthread, int ccpu) {
247 pmap_t active_pmap = cpmap;
248
249 if (__improbable(cpmap->pagezero_accessible)) {
250 if ((cthread->machine.specFlags & CopyIOActive) == 0) {
251 active_pmap = kernel_pmap;
252 }
253 }
254
255 return active_pmap->pmap_pcid_cpus[ccpu];
256}
257int npz = 0;
258
259#if PMAP_ASSERT
260#define PCID_RECORD_SIZE 128
261uint64_t pcid_record_array[PCID_RECORD_SIZE];
262#endif
263
264void pmap_pcid_activate(pmap_t tpmap, int ccpu, boolean_t nopagezero, boolean_t copyio) {
265 pcid_t new_pcid = tpmap->pmap_pcid_cpus[ccpu];
266 pmap_t last_pmap;
267 boolean_t pcid_conflict = FALSE, pending_flush = FALSE;
268 pcid_cdata_t *pcdata = cpu_datap(ccpu)->cpu_pcid_data;
269
270 pmap_assert(cpu_datap(ccpu)->cpu_pmap_pcid_enabled);
271 if (__improbable(new_pcid == PMAP_PCID_INVALID_PCID)) {
272 new_pcid = tpmap->pmap_pcid_cpus[ccpu] = pmap_pcid_allocate_pcid(ccpu);
273 }
274
275 pmap_assert(new_pcid != PMAP_PCID_INVALID_PCID);
276#ifdef PCID_ASSERT
277 cpu_datap(ccpu)->cpu_last_pcid = cpu_datap(ccpu)->cpu_active_pcid;
278#endif
279 cpu_datap(ccpu)->cpu_active_pcid = new_pcid;
280
281 pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0);
282 if (__probable(pending_flush == FALSE)) {
283 last_pmap = pcdata->cpu_pcid_last_pmap_dispatched[new_pcid];
284 pcid_conflict = ((last_pmap != NULL) && (tpmap != last_pmap));
285 }
286 if (__improbable(pending_flush || pcid_conflict)) {
287 pmap_pcid_validate_cpu(tpmap, ccpu);
288 }
289 /* Consider making this a unique id */
290 pcdata->cpu_pcid_last_pmap_dispatched[new_pcid] = tpmap;
291
292 pmap_assert(new_pcid < PMAP_PCID_MAX_PCID);
293 pmap_assert(((tpmap == kernel_pmap) && new_pcid == 0) ||
294 ((new_pcid != PMAP_PCID_INVALID_PCID) && (new_pcid != 0)));
295#if PMAP_ASSERT
296 pcid_record_array[ccpu % PCID_RECORD_SIZE] = tpmap->pm_cr3 | new_pcid | (((uint64_t)(!(pending_flush || pcid_conflict))) <<63);
297 pml4_entry_t *pml4 = pmap64_pml4(tpmap, 0ULL);
298 /* Diagnostic to detect pagetable anchor corruption */
299 if (pml4[KERNEL_PML4_INDEX] != kernel_pmap->pm_pml4[KERNEL_PML4_INDEX])
300 __asm__ volatile("int3");
301#endif /* PMAP_ASSERT */
302
303 pmap_paddr_t ncr3 = tpmap->pm_cr3;
304
305 if (__improbable(nopagezero)) {
306 pending_flush = TRUE;
307 if (copyio == FALSE) {
308 new_pcid = kernel_pmap->pmap_pcid_cpus[ccpu];
309 ncr3 = kernel_pmap->pm_cr3;
310 }
311 cpu_datap(ccpu)->cpu_kernel_pcid = kernel_pmap->pmap_pcid_cpus[ccpu];
312 npz++;
313 }
314
315 uint64_t preserve = !(pending_flush || pcid_conflict);
316 set_cr3_composed(ncr3, new_pcid, preserve);
317#if DEBUG
318 cpu_datap(ccpu)->cpu_pcid_last_cr3 = ncr3 | new_pcid | preserve << 63;
319#endif
320 uint64_t spcid = (new_pcid + PMAP_PCID_MAX_PCID);
321 if (new_pcid == 0) {
322 spcid = 0;
323 }
324 uint64_t scr3 = tpmap->pm_ucr3 | spcid;
325
326 cpu_datap(ccpu)->cpu_ucr3 = scr3;
327 cpu_shadowp(ccpu)->cpu_ucr3 = scr3;
328
329 cpu_shadowp(ccpu)->cpu_task_cr3 = ncr3 | new_pcid;
330
331 if (!pending_flush) {
332 /* We did not previously observe a pending invalidation for this
333 * ASID. However, the load from the coherency vector
334 * could've been reordered ahead of the store to the
335 * active_cr3 field (in the context switch path, our
336 * caller). Re-consult the pending invalidation vector
337 * after the CR3 write. We rely on MOV CR3's documented
338 * serializing property to avoid insertion of an expensive
339 * barrier. (DRK)
340 */
341 pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0);
342 if (__improbable(pending_flush != 0)) {
343 pmap_pcid_validate_cpu(tpmap, ccpu);
344 set_cr3_composed(ncr3, new_pcid, FALSE);
345 }
346 }
347 cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = &(tpmap->pmap_pcid_coherency_vector[ccpu]);
348#if DEBUG
349 KERNEL_DEBUG_CONSTANT(0x9c1d0000, tpmap, new_pcid, pending_flush, pcid_conflict, 0);
350#endif
351}
352