| 1 | /* |
| 2 | * Copyright (c) 2011 Apple Computer, Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | /* Manage timers */ |
| 30 | |
| 31 | #include <mach/mach_types.h> |
| 32 | #include <kern/cpu_data.h> /* current_thread() */ |
| 33 | #include <kern/kalloc.h> |
| 34 | #include <stdatomic.h> |
| 35 | #include <sys/errno.h> |
| 36 | #include <sys/vm.h> |
| 37 | #include <sys/ktrace.h> |
| 38 | |
| 39 | #include <machine/machine_routines.h> |
| 40 | #if defined(__x86_64__) |
| 41 | #include <i386/mp.h> |
| 42 | #endif /* defined(__x86_64__) */ |
| 43 | |
| 44 | #include <kperf/kperf.h> |
| 45 | #include <kperf/buffer.h> |
| 46 | #include <kperf/context.h> |
| 47 | #include <kperf/action.h> |
| 48 | #include <kperf/kperf_timer.h> |
| 49 | #include <kperf/kperf_arch.h> |
| 50 | #include <kperf/pet.h> |
| 51 | #include <kperf/sample.h> |
| 52 | |
| 53 | /* the list of timers */ |
| 54 | struct kperf_timer *kperf_timerv = NULL; |
| 55 | unsigned int kperf_timerc = 0; |
| 56 | |
| 57 | static unsigned int pet_timer_id = 999; |
| 58 | |
| 59 | /* maximum number of timers we can construct */ |
| 60 | #define TIMER_MAX (16) |
| 61 | |
| 62 | static uint64_t min_period_abstime; |
| 63 | static uint64_t min_period_bg_abstime; |
| 64 | static uint64_t min_period_pet_abstime; |
| 65 | static uint64_t min_period_pet_bg_abstime; |
| 66 | |
| 67 | static uint64_t |
| 68 | kperf_timer_min_period_abstime(void) |
| 69 | { |
| 70 | if (ktrace_background_active()) { |
| 71 | return min_period_bg_abstime; |
| 72 | } else { |
| 73 | return min_period_abstime; |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | static uint64_t |
| 78 | kperf_timer_min_pet_period_abstime(void) |
| 79 | { |
| 80 | if (ktrace_background_active()) { |
| 81 | return min_period_pet_bg_abstime; |
| 82 | } else { |
| 83 | return min_period_pet_abstime; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | static void |
| 88 | kperf_timer_schedule(struct kperf_timer *timer, uint64_t now) |
| 89 | { |
| 90 | BUF_INFO(PERF_TM_SCHED, timer->period); |
| 91 | |
| 92 | /* if we re-programmed the timer to zero, just drop it */ |
| 93 | if (timer->period == 0) { |
| 94 | return; |
| 95 | } |
| 96 | |
| 97 | /* calculate deadline */ |
| 98 | uint64_t deadline = now + timer->period; |
| 99 | |
| 100 | /* re-schedule the timer, making sure we don't apply slop */ |
| 101 | timer_call_enter(&timer->tcall, deadline, TIMER_CALL_SYS_CRITICAL); |
| 102 | } |
| 103 | |
| 104 | static void |
| 105 | kperf_sample_cpu(struct kperf_timer *timer, bool system_sample, |
| 106 | bool only_system) |
| 107 | { |
| 108 | assert(timer != NULL); |
| 109 | |
| 110 | /* Always cut a tracepoint to show a sample event occurred */ |
| 111 | BUF_DATA(PERF_TM_HNDLR | DBG_FUNC_START, 0); |
| 112 | |
| 113 | int ncpu = cpu_number(); |
| 114 | |
| 115 | struct kperf_sample *intbuf = kperf_intr_sample_buffer(); |
| 116 | #if DEVELOPMENT || DEBUG |
| 117 | intbuf->sample_time = mach_absolute_time(); |
| 118 | #endif /* DEVELOPMENT || DEBUG */ |
| 119 | |
| 120 | /* On a timer, we can see the "real" current thread */ |
| 121 | thread_t thread = current_thread(); |
| 122 | task_t task = get_threadtask(thread); |
| 123 | struct kperf_context ctx = { |
| 124 | .cur_thread = thread, |
| 125 | .cur_task = task, |
| 126 | .cur_pid = task_pid(task), |
| 127 | .trigger_type = TRIGGER_TYPE_TIMER, |
| 128 | .trigger_id = (unsigned int)(timer - kperf_timerv), |
| 129 | }; |
| 130 | |
| 131 | if (ctx.trigger_id == pet_timer_id && ncpu < machine_info.logical_cpu_max) { |
| 132 | kperf_tid_on_cpus[ncpu] = thread_tid(ctx.cur_thread); |
| 133 | } |
| 134 | |
| 135 | /* make sure sampling is on */ |
| 136 | unsigned int status = kperf_sampling_status(); |
| 137 | if (status == KPERF_SAMPLING_OFF) { |
| 138 | BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, SAMPLE_OFF); |
| 139 | return; |
| 140 | } else if (status == KPERF_SAMPLING_SHUTDOWN) { |
| 141 | BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, SAMPLE_SHUTDOWN); |
| 142 | return; |
| 143 | } |
| 144 | |
| 145 | /* call the action -- kernel-only from interrupt, pend user */ |
| 146 | int r = kperf_sample(intbuf, &ctx, timer->actionid, |
| 147 | SAMPLE_FLAG_PEND_USER | (system_sample ? SAMPLE_FLAG_SYSTEM : 0) | |
| 148 | (only_system ? SAMPLE_FLAG_ONLY_SYSTEM : 0)); |
| 149 | |
| 150 | /* end tracepoint is informational */ |
| 151 | BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, r); |
| 152 | |
| 153 | (void)atomic_fetch_and_explicit(&timer->pending_cpus, |
| 154 | ~(UINT64_C(1) << ncpu), memory_order_relaxed); |
| 155 | } |
| 156 | |
| 157 | void |
| 158 | kperf_ipi_handler(void *param) |
| 159 | { |
| 160 | kperf_sample_cpu((struct kperf_timer *)param, false, false); |
| 161 | } |
| 162 | |
| 163 | static void |
| 164 | kperf_timer_handler(void *param0, __unused void *param1) |
| 165 | { |
| 166 | struct kperf_timer *timer = param0; |
| 167 | unsigned int ntimer = (unsigned int)(timer - kperf_timerv); |
| 168 | unsigned int ncpus = machine_info.logical_cpu_max; |
| 169 | bool system_only_self = true; |
| 170 | |
| 171 | if (timer->actionid == 0) { |
| 172 | return; |
| 173 | } |
| 174 | |
| 175 | timer->active = 1; |
| 176 | #if DEVELOPMENT || DEBUG |
| 177 | timer->fire_time = mach_absolute_time(); |
| 178 | #endif /* DEVELOPMENT || DEBUG */ |
| 179 | |
| 180 | /* along the lines of do not ipi if we are all shutting down */ |
| 181 | if (kperf_sampling_status() == KPERF_SAMPLING_SHUTDOWN) { |
| 182 | goto deactivate; |
| 183 | } |
| 184 | |
| 185 | BUF_DATA(PERF_TM_FIRE, ntimer, ntimer == pet_timer_id, timer->period, |
| 186 | timer->actionid); |
| 187 | |
| 188 | if (ntimer == pet_timer_id) { |
| 189 | kperf_pet_fire_before(); |
| 190 | |
| 191 | /* clean-up the thread-on-CPUs cache */ |
| 192 | bzero(kperf_tid_on_cpus, ncpus * sizeof(*kperf_tid_on_cpus)); |
| 193 | } |
| 194 | |
| 195 | /* |
| 196 | * IPI other cores only if the action has non-system samplers. |
| 197 | */ |
| 198 | if (kperf_action_has_non_system(timer->actionid)) { |
| 199 | /* |
| 200 | * If the core that's handling the timer is not scheduling |
| 201 | * threads, only run system samplers. |
| 202 | */ |
| 203 | system_only_self = kperf_mp_broadcast_other_running(timer); |
| 204 | } |
| 205 | kperf_sample_cpu(timer, true, system_only_self); |
| 206 | |
| 207 | /* release the pet thread? */ |
| 208 | if (ntimer == pet_timer_id) { |
| 209 | /* PET mode is responsible for rearming the timer */ |
| 210 | kperf_pet_fire_after(); |
| 211 | } else { |
| 212 | /* |
| 213 | * FIXME: Get the current time from elsewhere. The next |
| 214 | * timer's period now includes the time taken to reach this |
| 215 | * point. This causes a bias towards longer sampling periods |
| 216 | * than requested. |
| 217 | */ |
| 218 | kperf_timer_schedule(timer, mach_absolute_time()); |
| 219 | } |
| 220 | |
| 221 | deactivate: |
| 222 | timer->active = 0; |
| 223 | } |
| 224 | |
| 225 | /* program the timer from the PET thread */ |
| 226 | void |
| 227 | kperf_timer_pet_rearm(uint64_t elapsed_ticks) |
| 228 | { |
| 229 | struct kperf_timer *timer = NULL; |
| 230 | uint64_t period = 0; |
| 231 | uint64_t deadline; |
| 232 | |
| 233 | /* |
| 234 | * If the pet_timer_id is invalid, it has been disabled, so this should |
| 235 | * do nothing. |
| 236 | */ |
| 237 | if (pet_timer_id >= kperf_timerc) { |
| 238 | return; |
| 239 | } |
| 240 | |
| 241 | unsigned int status = kperf_sampling_status(); |
| 242 | /* do not reprogram the timer if it has been shutdown or sampling is off */ |
| 243 | if (status == KPERF_SAMPLING_OFF) { |
| 244 | BUF_INFO(PERF_PET_END, SAMPLE_OFF); |
| 245 | return; |
| 246 | } else if (status == KPERF_SAMPLING_SHUTDOWN) { |
| 247 | BUF_INFO(PERF_PET_END, SAMPLE_SHUTDOWN); |
| 248 | return; |
| 249 | } |
| 250 | |
| 251 | timer = &(kperf_timerv[pet_timer_id]); |
| 252 | |
| 253 | /* if we re-programmed the timer to zero, just drop it */ |
| 254 | if (!timer->period) { |
| 255 | return; |
| 256 | } |
| 257 | |
| 258 | /* subtract the time the pet sample took being careful not to underflow */ |
| 259 | if (timer->period > elapsed_ticks) { |
| 260 | period = timer->period - elapsed_ticks; |
| 261 | } |
| 262 | |
| 263 | /* make sure we don't set the next PET sample to happen too soon */ |
| 264 | if (period < min_period_pet_abstime) { |
| 265 | period = min_period_pet_abstime; |
| 266 | } |
| 267 | |
| 268 | /* we probably took so long in the PET thread, it makes sense to take |
| 269 | * the time again. |
| 270 | */ |
| 271 | deadline = mach_absolute_time() + period; |
| 272 | |
| 273 | BUF_INFO(PERF_PET_SCHED, timer->period, period, elapsed_ticks, deadline); |
| 274 | |
| 275 | /* re-schedule the timer, making sure we don't apply slop */ |
| 276 | timer_call_enter(&timer->tcall, deadline, TIMER_CALL_SYS_CRITICAL); |
| 277 | |
| 278 | return; |
| 279 | } |
| 280 | |
| 281 | /* turn on all the timers */ |
| 282 | void |
| 283 | kperf_timer_go(void) |
| 284 | { |
| 285 | /* get the PET thread going */ |
| 286 | if (pet_timer_id < kperf_timerc) { |
| 287 | kperf_pet_config(kperf_timerv[pet_timer_id].actionid); |
| 288 | } |
| 289 | |
| 290 | uint64_t now = mach_absolute_time(); |
| 291 | |
| 292 | for (unsigned int i = 0; i < kperf_timerc; i++) { |
| 293 | if (kperf_timerv[i].period == 0) { |
| 294 | continue; |
| 295 | } |
| 296 | |
| 297 | kperf_timer_schedule(&(kperf_timerv[i]), now); |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | void |
| 302 | kperf_timer_stop(void) |
| 303 | { |
| 304 | for (unsigned int i = 0; i < kperf_timerc; i++) { |
| 305 | if (kperf_timerv[i].period == 0) { |
| 306 | continue; |
| 307 | } |
| 308 | |
| 309 | /* wait for the timer to stop */ |
| 310 | while (kperf_timerv[i].active); |
| 311 | |
| 312 | timer_call_cancel(&kperf_timerv[i].tcall); |
| 313 | } |
| 314 | |
| 315 | /* wait for PET to stop, too */ |
| 316 | kperf_pet_config(0); |
| 317 | } |
| 318 | |
| 319 | unsigned int |
| 320 | kperf_timer_get_petid(void) |
| 321 | { |
| 322 | return pet_timer_id; |
| 323 | } |
| 324 | |
| 325 | int |
| 326 | kperf_timer_set_petid(unsigned int timerid) |
| 327 | { |
| 328 | if (timerid < kperf_timerc) { |
| 329 | uint64_t min_period; |
| 330 | |
| 331 | min_period = kperf_timer_min_pet_period_abstime(); |
| 332 | if (kperf_timerv[timerid].period < min_period) { |
| 333 | kperf_timerv[timerid].period = min_period; |
| 334 | } |
| 335 | kperf_pet_config(kperf_timerv[timerid].actionid); |
| 336 | } else { |
| 337 | /* clear the PET trigger if it's a bogus ID */ |
| 338 | kperf_pet_config(0); |
| 339 | } |
| 340 | |
| 341 | pet_timer_id = timerid; |
| 342 | |
| 343 | return 0; |
| 344 | } |
| 345 | |
| 346 | int |
| 347 | kperf_timer_get_period(unsigned int timerid, uint64_t *period_abstime) |
| 348 | { |
| 349 | if (timerid >= kperf_timerc) { |
| 350 | return EINVAL; |
| 351 | } |
| 352 | |
| 353 | *period_abstime = kperf_timerv[timerid].period; |
| 354 | return 0; |
| 355 | } |
| 356 | |
| 357 | int |
| 358 | kperf_timer_set_period(unsigned int timerid, uint64_t period_abstime) |
| 359 | { |
| 360 | uint64_t min_period; |
| 361 | |
| 362 | if (timerid >= kperf_timerc) { |
| 363 | return EINVAL; |
| 364 | } |
| 365 | |
| 366 | if (pet_timer_id == timerid) { |
| 367 | min_period = kperf_timer_min_pet_period_abstime(); |
| 368 | } else { |
| 369 | min_period = kperf_timer_min_period_abstime(); |
| 370 | } |
| 371 | |
| 372 | if (period_abstime > 0 && period_abstime < min_period) { |
| 373 | period_abstime = min_period; |
| 374 | } |
| 375 | |
| 376 | kperf_timerv[timerid].period = period_abstime; |
| 377 | |
| 378 | /* FIXME: re-program running timers? */ |
| 379 | |
| 380 | return 0; |
| 381 | } |
| 382 | |
| 383 | int |
| 384 | kperf_timer_get_action(unsigned int timerid, uint32_t *action) |
| 385 | { |
| 386 | if (timerid >= kperf_timerc) { |
| 387 | return EINVAL; |
| 388 | } |
| 389 | |
| 390 | *action = kperf_timerv[timerid].actionid; |
| 391 | return 0; |
| 392 | } |
| 393 | |
| 394 | int |
| 395 | kperf_timer_set_action(unsigned int timerid, uint32_t action) |
| 396 | { |
| 397 | if (timerid >= kperf_timerc) { |
| 398 | return EINVAL; |
| 399 | } |
| 400 | |
| 401 | kperf_timerv[timerid].actionid = action; |
| 402 | return 0; |
| 403 | } |
| 404 | |
| 405 | unsigned int |
| 406 | kperf_timer_get_count(void) |
| 407 | { |
| 408 | return kperf_timerc; |
| 409 | } |
| 410 | |
| 411 | void |
| 412 | kperf_timer_reset(void) |
| 413 | { |
| 414 | kperf_timer_set_petid(999); |
| 415 | kperf_set_pet_idle_rate(KPERF_PET_DEFAULT_IDLE_RATE); |
| 416 | kperf_set_lightweight_pet(0); |
| 417 | for (unsigned int i = 0; i < kperf_timerc; i++) { |
| 418 | kperf_timerv[i].period = 0; |
| 419 | kperf_timerv[i].actionid = 0; |
| 420 | kperf_timerv[i].pending_cpus = 0; |
| 421 | } |
| 422 | } |
| 423 | |
| 424 | extern int |
| 425 | kperf_timer_set_count(unsigned int count) |
| 426 | { |
| 427 | struct kperf_timer *new_timerv = NULL, *old_timerv = NULL; |
| 428 | unsigned int old_count; |
| 429 | |
| 430 | if (min_period_abstime == 0) { |
| 431 | nanoseconds_to_absolutetime(KP_MIN_PERIOD_NS, &min_period_abstime); |
| 432 | nanoseconds_to_absolutetime(KP_MIN_PERIOD_BG_NS, &min_period_bg_abstime); |
| 433 | nanoseconds_to_absolutetime(KP_MIN_PERIOD_PET_NS, &min_period_pet_abstime); |
| 434 | nanoseconds_to_absolutetime(KP_MIN_PERIOD_PET_BG_NS, |
| 435 | &min_period_pet_bg_abstime); |
| 436 | assert(min_period_abstime > 0); |
| 437 | } |
| 438 | |
| 439 | if (count == kperf_timerc) { |
| 440 | return 0; |
| 441 | } |
| 442 | if (count > TIMER_MAX) { |
| 443 | return EINVAL; |
| 444 | } |
| 445 | |
| 446 | /* TODO: allow shrinking? */ |
| 447 | if (count < kperf_timerc) { |
| 448 | return EINVAL; |
| 449 | } |
| 450 | |
| 451 | /* |
| 452 | * Make sure kperf is initialized when creating the array for the first |
| 453 | * time. |
| 454 | */ |
| 455 | if (kperf_timerc == 0) { |
| 456 | int r; |
| 457 | |
| 458 | /* main kperf */ |
| 459 | if ((r = kperf_init())) { |
| 460 | return r; |
| 461 | } |
| 462 | } |
| 463 | |
| 464 | /* |
| 465 | * Shut down any running timers since we will be messing with the timer |
| 466 | * call structures. |
| 467 | */ |
| 468 | kperf_timer_stop(); |
| 469 | |
| 470 | /* create a new array */ |
| 471 | new_timerv = kalloc_tag(count * sizeof(struct kperf_timer), |
| 472 | VM_KERN_MEMORY_DIAG); |
| 473 | if (new_timerv == NULL) { |
| 474 | return ENOMEM; |
| 475 | } |
| 476 | old_timerv = kperf_timerv; |
| 477 | old_count = kperf_timerc; |
| 478 | |
| 479 | if (old_timerv != NULL) { |
| 480 | bcopy(kperf_timerv, new_timerv, |
| 481 | kperf_timerc * sizeof(struct kperf_timer)); |
| 482 | } |
| 483 | |
| 484 | /* zero the new entries */ |
| 485 | bzero(&(new_timerv[kperf_timerc]), |
| 486 | (count - old_count) * sizeof(struct kperf_timer)); |
| 487 | |
| 488 | /* (re-)setup the timer call info for all entries */ |
| 489 | for (unsigned int i = 0; i < count; i++) { |
| 490 | timer_call_setup(&new_timerv[i].tcall, kperf_timer_handler, &new_timerv[i]); |
| 491 | } |
| 492 | |
| 493 | kperf_timerv = new_timerv; |
| 494 | kperf_timerc = count; |
| 495 | |
| 496 | if (old_timerv != NULL) { |
| 497 | kfree(old_timerv, old_count * sizeof(struct kperf_timer)); |
| 498 | } |
| 499 | |
| 500 | return 0; |
| 501 | } |
| 502 | |