| 1 | /* |
| 2 | * Copyright (c) 2000-2012 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | /* |
| 29 | * @OSF_COPYRIGHT@ |
| 30 | */ |
| 31 | |
| 32 | /* |
| 33 | * File: i386/rtclock.c |
| 34 | * Purpose: Routines for handling the machine dependent |
| 35 | * real-time clock. Historically, this clock is |
| 36 | * generated by the Intel 8254 Programmable Interval |
| 37 | * Timer, but local apic timers are now used for |
| 38 | * this purpose with the master time reference being |
| 39 | * the cpu clock counted by the timestamp MSR. |
| 40 | */ |
| 41 | |
| 42 | |
| 43 | #include <mach/mach_types.h> |
| 44 | |
| 45 | #include <kern/cpu_data.h> |
| 46 | #include <kern/cpu_number.h> |
| 47 | #include <kern/clock.h> |
| 48 | #include <kern/host_notify.h> |
| 49 | #include <kern/macro_help.h> |
| 50 | #include <kern/misc_protos.h> |
| 51 | #include <kern/spl.h> |
| 52 | #include <kern/assert.h> |
| 53 | #include <kern/timer_queue.h> |
| 54 | #include <mach/vm_prot.h> |
| 55 | #include <vm/pmap.h> |
| 56 | #include <vm/vm_kern.h> /* for kernel_map */ |
| 57 | #include <architecture/i386/pio.h> |
| 58 | #include <i386/machine_cpu.h> |
| 59 | #include <i386/cpuid.h> |
| 60 | #include <i386/cpu_threads.h> |
| 61 | #include <i386/mp.h> |
| 62 | #include <i386/machine_routines.h> |
| 63 | #include <i386/pal_routines.h> |
| 64 | #include <i386/proc_reg.h> |
| 65 | #include <i386/misc_protos.h> |
| 66 | #include <pexpert/pexpert.h> |
| 67 | #include <machine/limits.h> |
| 68 | #include <machine/commpage.h> |
| 69 | #include <sys/kdebug.h> |
| 70 | #include <i386/tsc.h> |
| 71 | #include <i386/rtclock_protos.h> |
| 72 | #define UI_CPUFREQ_ROUNDING_FACTOR 10000000 |
| 73 | |
| 74 | int rtclock_init(void); |
| 75 | |
| 76 | uint64_t tsc_rebase_abs_time = 0; |
| 77 | |
| 78 | static void rtc_set_timescale(uint64_t cycles); |
| 79 | static uint64_t rtc_export_speed(uint64_t cycles); |
| 80 | |
| 81 | void |
| 82 | rtc_timer_start(void) |
| 83 | { |
| 84 | /* |
| 85 | * Force a complete re-evaluation of timer deadlines. |
| 86 | */ |
| 87 | x86_lcpu()->rtcDeadline = EndOfAllTime; |
| 88 | timer_resync_deadlines(); |
| 89 | } |
| 90 | |
| 91 | static inline uint32_t |
| 92 | _absolutetime_to_microtime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *microsecs) |
| 93 | { |
| 94 | uint32_t remain; |
| 95 | *secs = abstime / (uint64_t)NSEC_PER_SEC; |
| 96 | remain = (uint32_t)(abstime % (uint64_t)NSEC_PER_SEC); |
| 97 | *microsecs = remain / NSEC_PER_USEC; |
| 98 | return remain; |
| 99 | } |
| 100 | |
| 101 | static inline void |
| 102 | _absolutetime_to_nanotime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *nanosecs) |
| 103 | { |
| 104 | *secs = abstime / (uint64_t)NSEC_PER_SEC; |
| 105 | *nanosecs = (clock_usec_t)(abstime % (uint64_t)NSEC_PER_SEC); |
| 106 | } |
| 107 | |
| 108 | /* |
| 109 | * Nanotime/mach_absolutime_time |
| 110 | * ----------------------------- |
| 111 | * The timestamp counter (TSC) - which counts cpu clock cycles and can be read |
| 112 | * efficiently by the kernel and in userspace - is the reference for all timing. |
| 113 | * The cpu clock rate is platform-dependent and may stop or be reset when the |
| 114 | * processor is napped/slept. As a result, nanotime is the software abstraction |
| 115 | * used to maintain a monotonic clock, adjusted from an outside reference as needed. |
| 116 | * |
| 117 | * The kernel maintains nanotime information recording: |
| 118 | * - the ratio of tsc to nanoseconds |
| 119 | * with this ratio expressed as a 32-bit scale and shift |
| 120 | * (power of 2 divider); |
| 121 | * - { tsc_base, ns_base } pair of corresponding timestamps. |
| 122 | * |
| 123 | * The tuple {tsc_base, ns_base, scale, shift} is exported in the commpage |
| 124 | * for the userspace nanotime routine to read. |
| 125 | * |
| 126 | * All of the routines which update the nanotime data are non-reentrant. This must |
| 127 | * be guaranteed by the caller. |
| 128 | */ |
| 129 | static inline void |
| 130 | rtc_nanotime_set_commpage(pal_rtc_nanotime_t *rntp) |
| 131 | { |
| 132 | commpage_set_nanotime(rntp->tsc_base, rntp->ns_base, rntp->scale, rntp->shift); |
| 133 | } |
| 134 | |
| 135 | /* |
| 136 | * rtc_nanotime_init: |
| 137 | * |
| 138 | * Intialize the nanotime info from the base time. |
| 139 | */ |
| 140 | static inline void |
| 141 | _rtc_nanotime_init(pal_rtc_nanotime_t *rntp, uint64_t base) |
| 142 | { |
| 143 | uint64_t tsc = rdtsc64(); |
| 144 | |
| 145 | _pal_rtc_nanotime_store(tsc, base, rntp->scale, rntp->shift, rntp); |
| 146 | } |
| 147 | |
| 148 | void |
| 149 | rtc_nanotime_init(uint64_t base) |
| 150 | { |
| 151 | _rtc_nanotime_init(&pal_rtc_nanotime_info, base); |
| 152 | rtc_nanotime_set_commpage(&pal_rtc_nanotime_info); |
| 153 | } |
| 154 | |
| 155 | /* |
| 156 | * rtc_nanotime_init_commpage: |
| 157 | * |
| 158 | * Call back from the commpage initialization to |
| 159 | * cause the commpage data to be filled in once the |
| 160 | * commpages have been created. |
| 161 | */ |
| 162 | void |
| 163 | rtc_nanotime_init_commpage(void) |
| 164 | { |
| 165 | spl_t s = splclock(); |
| 166 | |
| 167 | rtc_nanotime_set_commpage(&pal_rtc_nanotime_info); |
| 168 | splx(s); |
| 169 | } |
| 170 | |
| 171 | /* |
| 172 | * rtc_nanotime_read: |
| 173 | * |
| 174 | * Returns the current nanotime value, accessable from any |
| 175 | * context. |
| 176 | */ |
| 177 | static inline uint64_t |
| 178 | rtc_nanotime_read(void) |
| 179 | { |
| 180 | return _rtc_nanotime_read(&pal_rtc_nanotime_info); |
| 181 | } |
| 182 | |
| 183 | /* |
| 184 | * rtc_clock_napped: |
| 185 | * |
| 186 | * Invoked from power management when we exit from a low C-State (>= C4) |
| 187 | * and the TSC has stopped counting. The nanotime data is updated according |
| 188 | * to the provided value which represents the new value for nanotime. |
| 189 | */ |
| 190 | void |
| 191 | rtc_clock_napped(uint64_t base, uint64_t tsc_base) |
| 192 | { |
| 193 | pal_rtc_nanotime_t *rntp = &pal_rtc_nanotime_info; |
| 194 | uint64_t oldnsecs; |
| 195 | uint64_t newnsecs; |
| 196 | uint64_t tsc; |
| 197 | |
| 198 | assert(!ml_get_interrupts_enabled()); |
| 199 | tsc = rdtsc64(); |
| 200 | oldnsecs = rntp->ns_base + _rtc_tsc_to_nanoseconds(tsc - rntp->tsc_base, rntp); |
| 201 | newnsecs = base + _rtc_tsc_to_nanoseconds(tsc - tsc_base, rntp); |
| 202 | |
| 203 | /* |
| 204 | * Only update the base values if time using the new base values |
| 205 | * is later than the time using the old base values. |
| 206 | */ |
| 207 | if (oldnsecs < newnsecs) { |
| 208 | _pal_rtc_nanotime_store(tsc_base, base, rntp->scale, rntp->shift, rntp); |
| 209 | rtc_nanotime_set_commpage(rntp); |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | /* |
| 214 | * Invoked from power management to correct the SFLM TSC entry drift problem: |
| 215 | * a small delta is added to the tsc_base. This is equivalent to nudgin time |
| 216 | * backwards. We require this to be on the order of a TSC quantum which won't |
| 217 | * cause callers of mach_absolute_time() to see time going backwards! |
| 218 | */ |
| 219 | void |
| 220 | rtc_clock_adjust(uint64_t tsc_base_delta) |
| 221 | { |
| 222 | pal_rtc_nanotime_t *rntp = &pal_rtc_nanotime_info; |
| 223 | |
| 224 | assert(!ml_get_interrupts_enabled()); |
| 225 | assert(tsc_base_delta < 100ULL); /* i.e. it's small */ |
| 226 | _rtc_nanotime_adjust(tsc_base_delta, rntp); |
| 227 | rtc_nanotime_set_commpage(rntp); |
| 228 | } |
| 229 | |
| 230 | void |
| 231 | rtc_clock_stepping(__unused uint32_t new_frequency, |
| 232 | __unused uint32_t old_frequency) |
| 233 | { |
| 234 | panic("rtc_clock_stepping unsupported" ); |
| 235 | } |
| 236 | |
| 237 | void |
| 238 | rtc_clock_stepped(__unused uint32_t new_frequency, |
| 239 | __unused uint32_t old_frequency) |
| 240 | { |
| 241 | panic("rtc_clock_stepped unsupported" ); |
| 242 | } |
| 243 | |
| 244 | /* |
| 245 | * rtc_sleep_wakeup: |
| 246 | * |
| 247 | * Invoked from power management when we have awoken from a sleep (S3) |
| 248 | * and the TSC has been reset, or from Deep Idle (S0) sleep when the TSC |
| 249 | * has progressed. The nanotime data is updated based on the passed-in value. |
| 250 | * |
| 251 | * The caller must guarantee non-reentrancy. |
| 252 | */ |
| 253 | void |
| 254 | rtc_sleep_wakeup( |
| 255 | uint64_t base) |
| 256 | { |
| 257 | /* Set fixed configuration for lapic timers */ |
| 258 | rtc_timer->rtc_config(); |
| 259 | |
| 260 | /* |
| 261 | * Reset nanotime. |
| 262 | * The timestamp counter will have been reset |
| 263 | * but nanotime (uptime) marches onward. |
| 264 | */ |
| 265 | rtc_nanotime_init(base); |
| 266 | } |
| 267 | |
| 268 | void |
| 269 | rtc_decrementer_configure(void) { |
| 270 | rtc_timer->rtc_config(); |
| 271 | } |
| 272 | /* |
| 273 | * rtclock_early_init() is called very early at boot to |
| 274 | * establish mach_absolute_time() and set it to zero. |
| 275 | */ |
| 276 | void |
| 277 | rtclock_early_init(void) |
| 278 | { |
| 279 | assert(tscFreq); |
| 280 | rtc_set_timescale(tscFreq); |
| 281 | } |
| 282 | |
| 283 | /* |
| 284 | * Initialize the real-time clock device. |
| 285 | * In addition, various variables used to support the clock are initialized. |
| 286 | */ |
| 287 | int |
| 288 | rtclock_init(void) |
| 289 | { |
| 290 | uint64_t cycles; |
| 291 | |
| 292 | assert(!ml_get_interrupts_enabled()); |
| 293 | |
| 294 | if (cpu_number() == master_cpu) { |
| 295 | |
| 296 | assert(tscFreq); |
| 297 | |
| 298 | /* |
| 299 | * Adjust and set the exported cpu speed. |
| 300 | */ |
| 301 | cycles = rtc_export_speed(tscFreq); |
| 302 | |
| 303 | /* |
| 304 | * Set min/max to actual. |
| 305 | * ACPI may update these later if speed-stepping is detected. |
| 306 | */ |
| 307 | gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles; |
| 308 | gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles; |
| 309 | |
| 310 | rtc_timer_init(); |
| 311 | clock_timebase_init(); |
| 312 | ml_init_lock_timeout(); |
| 313 | ml_init_delay_spin_threshold(10); |
| 314 | } |
| 315 | |
| 316 | /* Set fixed configuration for lapic timers */ |
| 317 | rtc_timer->rtc_config(); |
| 318 | rtc_timer_start(); |
| 319 | |
| 320 | return (1); |
| 321 | } |
| 322 | |
| 323 | // utility routine |
| 324 | // Code to calculate how many processor cycles are in a second... |
| 325 | |
| 326 | static void |
| 327 | rtc_set_timescale(uint64_t cycles) |
| 328 | { |
| 329 | pal_rtc_nanotime_t *rntp = &pal_rtc_nanotime_info; |
| 330 | uint32_t shift = 0; |
| 331 | |
| 332 | /* the "scale" factor will overflow unless cycles>SLOW_TSC_THRESHOLD */ |
| 333 | |
| 334 | while ( cycles <= SLOW_TSC_THRESHOLD) { |
| 335 | shift++; |
| 336 | cycles <<= 1; |
| 337 | } |
| 338 | |
| 339 | rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles); |
| 340 | |
| 341 | rntp->shift = shift; |
| 342 | |
| 343 | /* |
| 344 | * On some platforms, the TSC is not reset at warm boot. But the |
| 345 | * rebase time must be relative to the current boot so we can't use |
| 346 | * mach_absolute_time(). Instead, we convert the TSC delta since boot |
| 347 | * to nanoseconds. |
| 348 | */ |
| 349 | if (tsc_rebase_abs_time == 0) |
| 350 | tsc_rebase_abs_time = _rtc_tsc_to_nanoseconds( |
| 351 | rdtsc64() - tsc_at_boot, rntp); |
| 352 | |
| 353 | rtc_nanotime_init(0); |
| 354 | } |
| 355 | |
| 356 | static uint64_t |
| 357 | rtc_export_speed(uint64_t cyc_per_sec) |
| 358 | { |
| 359 | pal_rtc_nanotime_t *rntp = &pal_rtc_nanotime_info; |
| 360 | uint64_t cycles; |
| 361 | |
| 362 | if (rntp->shift != 0 ) |
| 363 | printf("Slow TSC, rtc_nanotime.shift == %d\n" , rntp->shift); |
| 364 | |
| 365 | /* Round: */ |
| 366 | cycles = ((cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2)) |
| 367 | / UI_CPUFREQ_ROUNDING_FACTOR) |
| 368 | * UI_CPUFREQ_ROUNDING_FACTOR; |
| 369 | |
| 370 | /* |
| 371 | * Set current measured speed. |
| 372 | */ |
| 373 | if (cycles >= 0x100000000ULL) { |
| 374 | gPEClockFrequencyInfo.cpu_clock_rate_hz = 0xFFFFFFFFUL; |
| 375 | } else { |
| 376 | gPEClockFrequencyInfo.cpu_clock_rate_hz = (unsigned long)cycles; |
| 377 | } |
| 378 | gPEClockFrequencyInfo.cpu_frequency_hz = cycles; |
| 379 | |
| 380 | kprintf("[RTCLOCK] frequency %llu (%llu)\n" , cycles, cyc_per_sec); |
| 381 | return(cycles); |
| 382 | } |
| 383 | |
| 384 | void |
| 385 | clock_get_system_microtime( |
| 386 | clock_sec_t *secs, |
| 387 | clock_usec_t *microsecs) |
| 388 | { |
| 389 | uint64_t now = rtc_nanotime_read(); |
| 390 | |
| 391 | _absolutetime_to_microtime(now, secs, microsecs); |
| 392 | } |
| 393 | |
| 394 | void |
| 395 | clock_get_system_nanotime( |
| 396 | clock_sec_t *secs, |
| 397 | clock_nsec_t *nanosecs) |
| 398 | { |
| 399 | uint64_t now = rtc_nanotime_read(); |
| 400 | |
| 401 | _absolutetime_to_nanotime(now, secs, nanosecs); |
| 402 | } |
| 403 | |
| 404 | void |
| 405 | clock_gettimeofday_set_commpage(uint64_t abstime, uint64_t sec, uint64_t frac, uint64_t scale, uint64_t tick_per_sec) |
| 406 | { |
| 407 | commpage_set_timestamp(abstime, sec, frac, scale, tick_per_sec); |
| 408 | } |
| 409 | |
| 410 | void |
| 411 | clock_timebase_info( |
| 412 | mach_timebase_info_t info) |
| 413 | { |
| 414 | info->numer = info->denom = 1; |
| 415 | } |
| 416 | |
| 417 | /* |
| 418 | * Real-time clock device interrupt. |
| 419 | */ |
| 420 | void |
| 421 | rtclock_intr( |
| 422 | x86_saved_state_t *tregs) |
| 423 | { |
| 424 | uint64_t rip; |
| 425 | boolean_t user_mode = FALSE; |
| 426 | |
| 427 | assert(get_preemption_level() > 0); |
| 428 | assert(!ml_get_interrupts_enabled()); |
| 429 | |
| 430 | if (is_saved_state64(tregs) == TRUE) { |
| 431 | x86_saved_state64_t *regs; |
| 432 | |
| 433 | regs = saved_state64(tregs); |
| 434 | |
| 435 | if (regs->isf.cs & 0x03) |
| 436 | user_mode = TRUE; |
| 437 | rip = regs->isf.rip; |
| 438 | } else { |
| 439 | x86_saved_state32_t *regs; |
| 440 | |
| 441 | regs = saved_state32(tregs); |
| 442 | |
| 443 | if (regs->cs & 0x03) |
| 444 | user_mode = TRUE; |
| 445 | rip = regs->eip; |
| 446 | } |
| 447 | |
| 448 | /* call the generic etimer */ |
| 449 | timer_intr(user_mode, rip); |
| 450 | } |
| 451 | |
| 452 | |
| 453 | /* |
| 454 | * Request timer pop from the hardware |
| 455 | */ |
| 456 | |
| 457 | uint64_t |
| 458 | setPop(uint64_t time) |
| 459 | { |
| 460 | uint64_t now; |
| 461 | uint64_t pop; |
| 462 | |
| 463 | /* 0 and EndOfAllTime are special-cases for "clear the timer" */ |
| 464 | if (time == 0 || time == EndOfAllTime ) { |
| 465 | time = EndOfAllTime; |
| 466 | now = 0; |
| 467 | pop = rtc_timer->rtc_set(0, 0); |
| 468 | } else { |
| 469 | now = rtc_nanotime_read(); /* The time in nanoseconds */ |
| 470 | pop = rtc_timer->rtc_set(time, now); |
| 471 | } |
| 472 | |
| 473 | /* Record requested and actual deadlines set */ |
| 474 | x86_lcpu()->rtcDeadline = time; |
| 475 | x86_lcpu()->rtcPop = pop; |
| 476 | |
| 477 | return pop - now; |
| 478 | } |
| 479 | |
| 480 | uint64_t |
| 481 | mach_absolute_time(void) |
| 482 | { |
| 483 | return rtc_nanotime_read(); |
| 484 | } |
| 485 | |
| 486 | uint64_t |
| 487 | mach_approximate_time(void) |
| 488 | { |
| 489 | return rtc_nanotime_read(); |
| 490 | } |
| 491 | |
| 492 | void |
| 493 | clock_interval_to_absolutetime_interval( |
| 494 | uint32_t interval, |
| 495 | uint32_t scale_factor, |
| 496 | uint64_t *result) |
| 497 | { |
| 498 | *result = (uint64_t)interval * scale_factor; |
| 499 | } |
| 500 | |
| 501 | void |
| 502 | absolutetime_to_microtime( |
| 503 | uint64_t abstime, |
| 504 | clock_sec_t *secs, |
| 505 | clock_usec_t *microsecs) |
| 506 | { |
| 507 | _absolutetime_to_microtime(abstime, secs, microsecs); |
| 508 | } |
| 509 | |
| 510 | void |
| 511 | nanotime_to_absolutetime( |
| 512 | clock_sec_t secs, |
| 513 | clock_nsec_t nanosecs, |
| 514 | uint64_t *result) |
| 515 | { |
| 516 | *result = ((uint64_t)secs * NSEC_PER_SEC) + nanosecs; |
| 517 | } |
| 518 | |
| 519 | void |
| 520 | absolutetime_to_nanoseconds( |
| 521 | uint64_t abstime, |
| 522 | uint64_t *result) |
| 523 | { |
| 524 | *result = abstime; |
| 525 | } |
| 526 | |
| 527 | void |
| 528 | nanoseconds_to_absolutetime( |
| 529 | uint64_t nanoseconds, |
| 530 | uint64_t *result) |
| 531 | { |
| 532 | *result = nanoseconds; |
| 533 | } |
| 534 | |
| 535 | void |
| 536 | machine_delay_until( |
| 537 | uint64_t interval, |
| 538 | uint64_t deadline) |
| 539 | { |
| 540 | (void)interval; |
| 541 | while (mach_absolute_time() < deadline) { |
| 542 | cpu_pause(); |
| 543 | } |
| 544 | } |
| 545 | |