1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Kernel timekeeping code and accessor functions. Based on code from
4 * timer.c, moved in commit 8524070b7982.
5 */
6 #include <linux/timekeeper_internal.h>
7 #include <linux/module.h>
8 #include <linux/interrupt.h>
9 #include <linux/percpu.h>
10 #include <linux/init.h>
11 #include <linux/mm.h>
12 #include <linux/nmi.h>
13 #include <linux/sched.h>
14 #include <linux/sched/loadavg.h>
15 #include <linux/sched/clock.h>
16 #include <linux/syscore_ops.h>
17 #include <linux/clocksource.h>
18 #include <linux/jiffies.h>
19 #include <linux/time.h>
20 #include <linux/timex.h>
21 #include <linux/tick.h>
22 #include <linux/stop_machine.h>
23 #include <linux/pvclock_gtod.h>
24 #include <linux/compiler.h>
25 #include <linux/audit.h>
26 #include <linux/random.h>
27
28 #include "tick-internal.h"
29 #include "ntp_internal.h"
30 #include "timekeeping_internal.h"
31
32 #define TK_CLEAR_NTP (1 << 0)
33 #define TK_MIRROR (1 << 1)
34 #define TK_CLOCK_WAS_SET (1 << 2)
35
36 enum timekeeping_adv_mode {
37 /* Update timekeeper when a tick has passed */
38 TK_ADV_TICK,
39
40 /* Update timekeeper on a direct frequency change */
41 TK_ADV_FREQ
42 };
43
44 DEFINE_RAW_SPINLOCK(timekeeper_lock);
45
46 /*
47 * The most important data for readout fits into a single 64 byte
48 * cache line.
49 */
50 static struct {
51 seqcount_raw_spinlock_t seq;
52 struct timekeeper timekeeper;
53 } tk_core ____cacheline_aligned = {
54 .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_core.seq, &timekeeper_lock),
55 };
56
57 static struct timekeeper shadow_timekeeper;
58
59 /* flag for if timekeeping is suspended */
60 int __read_mostly timekeeping_suspended;
61
62 /**
63 * struct tk_fast - NMI safe timekeeper
64 * @seq: Sequence counter for protecting updates. The lowest bit
65 * is the index for the tk_read_base array
66 * @base: tk_read_base array. Access is indexed by the lowest bit of
67 * @seq.
68 *
69 * See @update_fast_timekeeper() below.
70 */
71 struct tk_fast {
72 seqcount_latch_t seq;
73 struct tk_read_base base[2];
74 };
75
76 /* Suspend-time cycles value for halted fast timekeeper. */
77 static u64 cycles_at_suspend;
78
dummy_clock_read(struct clocksource * cs)79 static u64 dummy_clock_read(struct clocksource *cs)
80 {
81 if (timekeeping_suspended)
82 return cycles_at_suspend;
83 return local_clock();
84 }
85
86 static struct clocksource dummy_clock = {
87 .read = dummy_clock_read,
88 };
89
90 /*
91 * Boot time initialization which allows local_clock() to be utilized
92 * during early boot when clocksources are not available. local_clock()
93 * returns nanoseconds already so no conversion is required, hence mult=1
94 * and shift=0. When the first proper clocksource is installed then
95 * the fast time keepers are updated with the correct values.
96 */
97 #define FAST_TK_INIT \
98 { \
99 .clock = &dummy_clock, \
100 .mask = CLOCKSOURCE_MASK(64), \
101 .mult = 1, \
102 .shift = 0, \
103 }
104
105 static struct tk_fast tk_fast_mono ____cacheline_aligned = {
106 .seq = SEQCNT_LATCH_ZERO(tk_fast_mono.seq),
107 .base[0] = FAST_TK_INIT,
108 .base[1] = FAST_TK_INIT,
109 };
110
111 static struct tk_fast tk_fast_raw ____cacheline_aligned = {
112 .seq = SEQCNT_LATCH_ZERO(tk_fast_raw.seq),
113 .base[0] = FAST_TK_INIT,
114 .base[1] = FAST_TK_INIT,
115 };
116
tk_normalize_xtime(struct timekeeper * tk)117 static inline void tk_normalize_xtime(struct timekeeper *tk)
118 {
119 while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
120 tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
121 tk->xtime_sec++;
122 }
123 while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) {
124 tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
125 tk->raw_sec++;
126 }
127 }
128
tk_xtime(const struct timekeeper * tk)129 static inline struct timespec64 tk_xtime(const struct timekeeper *tk)
130 {
131 struct timespec64 ts;
132
133 ts.tv_sec = tk->xtime_sec;
134 ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
135 return ts;
136 }
137
tk_set_xtime(struct timekeeper * tk,const struct timespec64 * ts)138 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
139 {
140 tk->xtime_sec = ts->tv_sec;
141 tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
142 }
143
tk_xtime_add(struct timekeeper * tk,const struct timespec64 * ts)144 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
145 {
146 tk->xtime_sec += ts->tv_sec;
147 tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
148 tk_normalize_xtime(tk);
149 }
150
tk_set_wall_to_mono(struct timekeeper * tk,struct timespec64 wtm)151 static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
152 {
153 struct timespec64 tmp;
154
155 /*
156 * Verify consistency of: offset_real = -wall_to_monotonic
157 * before modifying anything
158 */
159 set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec,
160 -tk->wall_to_monotonic.tv_nsec);
161 WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp));
162 tk->wall_to_monotonic = wtm;
163 set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
164 tk->offs_real = timespec64_to_ktime(tmp);
165 tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
166 }
167
tk_update_sleep_time(struct timekeeper * tk,ktime_t delta)168 static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
169 {
170 tk->offs_boot = ktime_add(tk->offs_boot, delta);
171 /*
172 * Timespec representation for VDSO update to avoid 64bit division
173 * on every update.
174 */
175 tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
176 }
177
178 /*
179 * tk_clock_read - atomic clocksource read() helper
180 *
181 * This helper is necessary to use in the read paths because, while the
182 * seqcount ensures we don't return a bad value while structures are updated,
183 * it doesn't protect from potential crashes. There is the possibility that
184 * the tkr's clocksource may change between the read reference, and the
185 * clock reference passed to the read function. This can cause crashes if
186 * the wrong clocksource is passed to the wrong read function.
187 * This isn't necessary to use when holding the timekeeper_lock or doing
188 * a read of the fast-timekeeper tkrs (which is protected by its own locking
189 * and update logic).
190 */
tk_clock_read(const struct tk_read_base * tkr)191 static inline u64 tk_clock_read(const struct tk_read_base *tkr)
192 {
193 struct clocksource *clock = READ_ONCE(tkr->clock);
194
195 return clock->read(clock);
196 }
197
198 /**
199 * tk_setup_internals - Set up internals to use clocksource clock.
200 *
201 * @tk: The target timekeeper to setup.
202 * @clock: Pointer to clocksource.
203 *
204 * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
205 * pair and interval request.
206 *
207 * Unless you're the timekeeping code, you should not be using this!
208 */
tk_setup_internals(struct timekeeper * tk,struct clocksource * clock)209 static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
210 {
211 u64 interval;
212 u64 tmp, ntpinterval;
213 struct clocksource *old_clock;
214
215 ++tk->cs_was_changed_seq;
216 old_clock = tk->tkr_mono.clock;
217 tk->tkr_mono.clock = clock;
218 tk->tkr_mono.mask = clock->mask;
219 tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
220
221 tk->tkr_raw.clock = clock;
222 tk->tkr_raw.mask = clock->mask;
223 tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
224
225 /* Do the ns -> cycle conversion first, using original mult */
226 tmp = NTP_INTERVAL_LENGTH;
227 tmp <<= clock->shift;
228 ntpinterval = tmp;
229 tmp += clock->mult/2;
230 do_div(tmp, clock->mult);
231 if (tmp == 0)
232 tmp = 1;
233
234 interval = (u64) tmp;
235 tk->cycle_interval = interval;
236
237 /* Go back from cycles -> shifted ns */
238 tk->xtime_interval = interval * clock->mult;
239 tk->xtime_remainder = ntpinterval - tk->xtime_interval;
240 tk->raw_interval = interval * clock->mult;
241
242 /* if changing clocks, convert xtime_nsec shift units */
243 if (old_clock) {
244 int shift_change = clock->shift - old_clock->shift;
245 if (shift_change < 0) {
246 tk->tkr_mono.xtime_nsec >>= -shift_change;
247 tk->tkr_raw.xtime_nsec >>= -shift_change;
248 } else {
249 tk->tkr_mono.xtime_nsec <<= shift_change;
250 tk->tkr_raw.xtime_nsec <<= shift_change;
251 }
252 }
253
254 tk->tkr_mono.shift = clock->shift;
255 tk->tkr_raw.shift = clock->shift;
256
257 tk->ntp_error = 0;
258 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
259 tk->ntp_tick = ntpinterval << tk->ntp_error_shift;
260
261 /*
262 * The timekeeper keeps its own mult values for the currently
263 * active clocksource. These value will be adjusted via NTP
264 * to counteract clock drifting.
265 */
266 tk->tkr_mono.mult = clock->mult;
267 tk->tkr_raw.mult = clock->mult;
268 tk->ntp_err_mult = 0;
269 tk->skip_second_overflow = 0;
270 }
271
272 /* Timekeeper helper functions. */
delta_to_ns_safe(const struct tk_read_base * tkr,u64 delta)273 static noinline u64 delta_to_ns_safe(const struct tk_read_base *tkr, u64 delta)
274 {
275 return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift);
276 }
277
timekeeping_cycles_to_ns(const struct tk_read_base * tkr,u64 cycles)278 static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
279 {
280 /* Calculate the delta since the last update_wall_time() */
281 u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask;
282
283 /*
284 * This detects both negative motion and the case where the delta
285 * overflows the multiplication with tkr->mult.
286 */
287 if (unlikely(delta > tkr->clock->max_cycles)) {
288 /*
289 * Handle clocksource inconsistency between CPUs to prevent
290 * time from going backwards by checking for the MSB of the
291 * mask being set in the delta.
292 */
293 if (delta & ~(mask >> 1))
294 return tkr->xtime_nsec >> tkr->shift;
295
296 return delta_to_ns_safe(tkr, delta);
297 }
298
299 return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift;
300 }
301
timekeeping_get_ns(const struct tk_read_base * tkr)302 static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
303 {
304 return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr));
305 }
306
307 /**
308 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
309 * @tkr: Timekeeping readout base from which we take the update
310 * @tkf: Pointer to NMI safe timekeeper
311 *
312 * We want to use this from any context including NMI and tracing /
313 * instrumenting the timekeeping code itself.
314 *
315 * Employ the latch technique; see @raw_write_seqcount_latch.
316 *
317 * So if a NMI hits the update of base[0] then it will use base[1]
318 * which is still consistent. In the worst case this can result is a
319 * slightly wrong timestamp (a few nanoseconds). See
320 * @ktime_get_mono_fast_ns.
321 */
update_fast_timekeeper(const struct tk_read_base * tkr,struct tk_fast * tkf)322 static void update_fast_timekeeper(const struct tk_read_base *tkr,
323 struct tk_fast *tkf)
324 {
325 struct tk_read_base *base = tkf->base;
326
327 /* Force readers off to base[1] */
328 raw_write_seqcount_latch(&tkf->seq);
329
330 /* Update base[0] */
331 memcpy(base, tkr, sizeof(*base));
332
333 /* Force readers back to base[0] */
334 raw_write_seqcount_latch(&tkf->seq);
335
336 /* Update base[1] */
337 memcpy(base + 1, base, sizeof(*base));
338 }
339
__ktime_get_fast_ns(struct tk_fast * tkf)340 static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
341 {
342 struct tk_read_base *tkr;
343 unsigned int seq;
344 u64 now;
345
346 do {
347 seq = raw_read_seqcount_latch(&tkf->seq);
348 tkr = tkf->base + (seq & 0x01);
349 now = ktime_to_ns(tkr->base);
350 now += timekeeping_get_ns(tkr);
351 } while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
352
353 return now;
354 }
355
356 /**
357 * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic
358 *
359 * This timestamp is not guaranteed to be monotonic across an update.
360 * The timestamp is calculated by:
361 *
362 * now = base_mono + clock_delta * slope
363 *
364 * So if the update lowers the slope, readers who are forced to the
365 * not yet updated second array are still using the old steeper slope.
366 *
367 * tmono
368 * ^
369 * | o n
370 * | o n
371 * | u
372 * | o
373 * |o
374 * |12345678---> reader order
375 *
376 * o = old slope
377 * u = update
378 * n = new slope
379 *
380 * So reader 6 will observe time going backwards versus reader 5.
381 *
382 * While other CPUs are likely to be able to observe that, the only way
383 * for a CPU local observation is when an NMI hits in the middle of
384 * the update. Timestamps taken from that NMI context might be ahead
385 * of the following timestamps. Callers need to be aware of that and
386 * deal with it.
387 */
ktime_get_mono_fast_ns(void)388 u64 notrace ktime_get_mono_fast_ns(void)
389 {
390 return __ktime_get_fast_ns(&tk_fast_mono);
391 }
392 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
393
394 /**
395 * ktime_get_raw_fast_ns - Fast NMI safe access to clock monotonic raw
396 *
397 * Contrary to ktime_get_mono_fast_ns() this is always correct because the
398 * conversion factor is not affected by NTP/PTP correction.
399 */
ktime_get_raw_fast_ns(void)400 u64 notrace ktime_get_raw_fast_ns(void)
401 {
402 return __ktime_get_fast_ns(&tk_fast_raw);
403 }
404 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
405
406 /**
407 * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
408 *
409 * To keep it NMI safe since we're accessing from tracing, we're not using a
410 * separate timekeeper with updates to monotonic clock and boot offset
411 * protected with seqcounts. This has the following minor side effects:
412 *
413 * (1) Its possible that a timestamp be taken after the boot offset is updated
414 * but before the timekeeper is updated. If this happens, the new boot offset
415 * is added to the old timekeeping making the clock appear to update slightly
416 * earlier:
417 * CPU 0 CPU 1
418 * timekeeping_inject_sleeptime64()
419 * __timekeeping_inject_sleeptime(tk, delta);
420 * timestamp();
421 * timekeeping_update(tk, TK_CLEAR_NTP...);
422 *
423 * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
424 * partially updated. Since the tk->offs_boot update is a rare event, this
425 * should be a rare occurrence which postprocessing should be able to handle.
426 *
427 * The caveats vs. timestamp ordering as documented for ktime_get_mono_fast_ns()
428 * apply as well.
429 */
ktime_get_boot_fast_ns(void)430 u64 notrace ktime_get_boot_fast_ns(void)
431 {
432 struct timekeeper *tk = &tk_core.timekeeper;
433
434 return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot)));
435 }
436 EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
437
438 /**
439 * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock.
440 *
441 * The same limitations as described for ktime_get_boot_fast_ns() apply. The
442 * mono time and the TAI offset are not read atomically which may yield wrong
443 * readouts. However, an update of the TAI offset is an rare event e.g., caused
444 * by settime or adjtimex with an offset. The user of this function has to deal
445 * with the possibility of wrong timestamps in post processing.
446 */
ktime_get_tai_fast_ns(void)447 u64 notrace ktime_get_tai_fast_ns(void)
448 {
449 struct timekeeper *tk = &tk_core.timekeeper;
450
451 return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai)));
452 }
453 EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns);
454
__ktime_get_real_fast(struct tk_fast * tkf,u64 * mono)455 static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
456 {
457 struct tk_read_base *tkr;
458 u64 basem, baser, delta;
459 unsigned int seq;
460
461 do {
462 seq = raw_read_seqcount_latch(&tkf->seq);
463 tkr = tkf->base + (seq & 0x01);
464 basem = ktime_to_ns(tkr->base);
465 baser = ktime_to_ns(tkr->base_real);
466 delta = timekeeping_get_ns(tkr);
467 } while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
468
469 if (mono)
470 *mono = basem + delta;
471 return baser + delta;
472 }
473
474 /**
475 * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime.
476 *
477 * See ktime_get_mono_fast_ns() for documentation of the time stamp ordering.
478 */
ktime_get_real_fast_ns(void)479 u64 ktime_get_real_fast_ns(void)
480 {
481 return __ktime_get_real_fast(&tk_fast_mono, NULL);
482 }
483 EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
484
485 /**
486 * ktime_get_fast_timestamps: - NMI safe timestamps
487 * @snapshot: Pointer to timestamp storage
488 *
489 * Stores clock monotonic, boottime and realtime timestamps.
490 *
491 * Boot time is a racy access on 32bit systems if the sleep time injection
492 * happens late during resume and not in timekeeping_resume(). That could
493 * be avoided by expanding struct tk_read_base with boot offset for 32bit
494 * and adding more overhead to the update. As this is a hard to observe
495 * once per resume event which can be filtered with reasonable effort using
496 * the accurate mono/real timestamps, it's probably not worth the trouble.
497 *
498 * Aside of that it might be possible on 32 and 64 bit to observe the
499 * following when the sleep time injection happens late:
500 *
501 * CPU 0 CPU 1
502 * timekeeping_resume()
503 * ktime_get_fast_timestamps()
504 * mono, real = __ktime_get_real_fast()
505 * inject_sleep_time()
506 * update boot offset
507 * boot = mono + bootoffset;
508 *
509 * That means that boot time already has the sleep time adjustment, but
510 * real time does not. On the next readout both are in sync again.
511 *
512 * Preventing this for 64bit is not really feasible without destroying the
513 * careful cache layout of the timekeeper because the sequence count and
514 * struct tk_read_base would then need two cache lines instead of one.
515 *
516 * Access to the time keeper clock source is disabled across the innermost
517 * steps of suspend/resume. The accessors still work, but the timestamps
518 * are frozen until time keeping is resumed which happens very early.
519 *
520 * For regular suspend/resume there is no observable difference vs. sched
521 * clock, but it might affect some of the nasty low level debug printks.
522 *
523 * OTOH, access to sched clock is not guaranteed across suspend/resume on
524 * all systems either so it depends on the hardware in use.
525 *
526 * If that turns out to be a real problem then this could be mitigated by
527 * using sched clock in a similar way as during early boot. But it's not as
528 * trivial as on early boot because it needs some careful protection
529 * against the clock monotonic timestamp jumping backwards on resume.
530 */
ktime_get_fast_timestamps(struct ktime_timestamps * snapshot)531 void ktime_get_fast_timestamps(struct ktime_timestamps *snapshot)
532 {
533 struct timekeeper *tk = &tk_core.timekeeper;
534
535 snapshot->real = __ktime_get_real_fast(&tk_fast_mono, &snapshot->mono);
536 snapshot->boot = snapshot->mono + ktime_to_ns(data_race(tk->offs_boot));
537 }
538
539 /**
540 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
541 * @tk: Timekeeper to snapshot.
542 *
543 * It generally is unsafe to access the clocksource after timekeeping has been
544 * suspended, so take a snapshot of the readout base of @tk and use it as the
545 * fast timekeeper's readout base while suspended. It will return the same
546 * number of cycles every time until timekeeping is resumed at which time the
547 * proper readout base for the fast timekeeper will be restored automatically.
548 */
halt_fast_timekeeper(const struct timekeeper * tk)549 static void halt_fast_timekeeper(const struct timekeeper *tk)
550 {
551 static struct tk_read_base tkr_dummy;
552 const struct tk_read_base *tkr = &tk->tkr_mono;
553
554 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
555 cycles_at_suspend = tk_clock_read(tkr);
556 tkr_dummy.clock = &dummy_clock;
557 tkr_dummy.base_real = tkr->base + tk->offs_real;
558 update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
559
560 tkr = &tk->tkr_raw;
561 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
562 tkr_dummy.clock = &dummy_clock;
563 update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
564 }
565
566 static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
567
update_pvclock_gtod(struct timekeeper * tk,bool was_set)568 static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
569 {
570 raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
571 }
572
573 /**
574 * pvclock_gtod_register_notifier - register a pvclock timedata update listener
575 * @nb: Pointer to the notifier block to register
576 */
pvclock_gtod_register_notifier(struct notifier_block * nb)577 int pvclock_gtod_register_notifier(struct notifier_block *nb)
578 {
579 struct timekeeper *tk = &tk_core.timekeeper;
580 unsigned long flags;
581 int ret;
582
583 raw_spin_lock_irqsave(&timekeeper_lock, flags);
584 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
585 update_pvclock_gtod(tk, true);
586 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
587
588 return ret;
589 }
590 EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
591
592 /**
593 * pvclock_gtod_unregister_notifier - unregister a pvclock
594 * timedata update listener
595 * @nb: Pointer to the notifier block to unregister
596 */
pvclock_gtod_unregister_notifier(struct notifier_block * nb)597 int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
598 {
599 unsigned long flags;
600 int ret;
601
602 raw_spin_lock_irqsave(&timekeeper_lock, flags);
603 ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
604 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
605
606 return ret;
607 }
608 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
609
610 /*
611 * tk_update_leap_state - helper to update the next_leap_ktime
612 */
tk_update_leap_state(struct timekeeper * tk)613 static inline void tk_update_leap_state(struct timekeeper *tk)
614 {
615 tk->next_leap_ktime = ntp_get_next_leap();
616 if (tk->next_leap_ktime != KTIME_MAX)
617 /* Convert to monotonic time */
618 tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
619 }
620
621 /*
622 * Update the ktime_t based scalar nsec members of the timekeeper
623 */
tk_update_ktime_data(struct timekeeper * tk)624 static inline void tk_update_ktime_data(struct timekeeper *tk)
625 {
626 u64 seconds;
627 u32 nsec;
628
629 /*
630 * The xtime based monotonic readout is:
631 * nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now();
632 * The ktime based monotonic readout is:
633 * nsec = base_mono + now();
634 * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec
635 */
636 seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
637 nsec = (u32) tk->wall_to_monotonic.tv_nsec;
638 tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
639
640 /*
641 * The sum of the nanoseconds portions of xtime and
642 * wall_to_monotonic can be greater/equal one second. Take
643 * this into account before updating tk->ktime_sec.
644 */
645 nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
646 if (nsec >= NSEC_PER_SEC)
647 seconds++;
648 tk->ktime_sec = seconds;
649
650 /* Update the monotonic raw base */
651 tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
652 }
653
654 /* must hold timekeeper_lock */
timekeeping_update(struct timekeeper * tk,unsigned int action)655 static void timekeeping_update(struct timekeeper *tk, unsigned int action)
656 {
657 if (action & TK_CLEAR_NTP) {
658 tk->ntp_error = 0;
659 ntp_clear();
660 }
661
662 tk_update_leap_state(tk);
663 tk_update_ktime_data(tk);
664
665 update_vsyscall(tk);
666 update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
667
668 tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real;
669 update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
670 update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
671
672 if (action & TK_CLOCK_WAS_SET)
673 tk->clock_was_set_seq++;
674 /*
675 * The mirroring of the data to the shadow-timekeeper needs
676 * to happen last here to ensure we don't over-write the
677 * timekeeper structure on the next update with stale data
678 */
679 if (action & TK_MIRROR)
680 memcpy(&shadow_timekeeper, &tk_core.timekeeper,
681 sizeof(tk_core.timekeeper));
682 }
683
684 /**
685 * timekeeping_forward_now - update clock to the current time
686 * @tk: Pointer to the timekeeper to update
687 *
688 * Forward the current clock to update its state since the last call to
689 * update_wall_time(). This is useful before significant clock changes,
690 * as it avoids having to deal with this time offset explicitly.
691 */
timekeeping_forward_now(struct timekeeper * tk)692 static void timekeeping_forward_now(struct timekeeper *tk)
693 {
694 u64 cycle_now, delta;
695
696 cycle_now = tk_clock_read(&tk->tkr_mono);
697 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
698 tk->tkr_mono.clock->max_raw_delta);
699 tk->tkr_mono.cycle_last = cycle_now;
700 tk->tkr_raw.cycle_last = cycle_now;
701
702 while (delta > 0) {
703 u64 max = tk->tkr_mono.clock->max_cycles;
704 u64 incr = delta < max ? delta : max;
705
706 tk->tkr_mono.xtime_nsec += incr * tk->tkr_mono.mult;
707 tk->tkr_raw.xtime_nsec += incr * tk->tkr_raw.mult;
708 tk_normalize_xtime(tk);
709 delta -= incr;
710 }
711 }
712
713 /**
714 * ktime_get_real_ts64 - Returns the time of day in a timespec64.
715 * @ts: pointer to the timespec to be set
716 *
717 * Returns the time of day in a timespec64 (WARN if suspended).
718 */
ktime_get_real_ts64(struct timespec64 * ts)719 void ktime_get_real_ts64(struct timespec64 *ts)
720 {
721 struct timekeeper *tk = &tk_core.timekeeper;
722 unsigned int seq;
723 u64 nsecs;
724
725 WARN_ON(timekeeping_suspended);
726
727 do {
728 seq = read_seqcount_begin(&tk_core.seq);
729
730 ts->tv_sec = tk->xtime_sec;
731 nsecs = timekeeping_get_ns(&tk->tkr_mono);
732
733 } while (read_seqcount_retry(&tk_core.seq, seq));
734
735 ts->tv_nsec = 0;
736 timespec64_add_ns(ts, nsecs);
737 }
738 EXPORT_SYMBOL(ktime_get_real_ts64);
739
ktime_get(void)740 ktime_t ktime_get(void)
741 {
742 struct timekeeper *tk = &tk_core.timekeeper;
743 unsigned int seq;
744 ktime_t base;
745 u64 nsecs;
746
747 WARN_ON(timekeeping_suspended);
748
749 do {
750 seq = read_seqcount_begin(&tk_core.seq);
751 base = tk->tkr_mono.base;
752 nsecs = timekeeping_get_ns(&tk->tkr_mono);
753
754 } while (read_seqcount_retry(&tk_core.seq, seq));
755
756 return ktime_add_ns(base, nsecs);
757 }
758 EXPORT_SYMBOL_GPL(ktime_get);
759
ktime_get_resolution_ns(void)760 u32 ktime_get_resolution_ns(void)
761 {
762 struct timekeeper *tk = &tk_core.timekeeper;
763 unsigned int seq;
764 u32 nsecs;
765
766 WARN_ON(timekeeping_suspended);
767
768 do {
769 seq = read_seqcount_begin(&tk_core.seq);
770 nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift;
771 } while (read_seqcount_retry(&tk_core.seq, seq));
772
773 return nsecs;
774 }
775 EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);
776
777 static ktime_t *offsets[TK_OFFS_MAX] = {
778 [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real,
779 [TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot,
780 [TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai,
781 };
782
ktime_get_with_offset(enum tk_offsets offs)783 ktime_t ktime_get_with_offset(enum tk_offsets offs)
784 {
785 struct timekeeper *tk = &tk_core.timekeeper;
786 unsigned int seq;
787 ktime_t base, *offset = offsets[offs];
788 u64 nsecs;
789
790 WARN_ON(timekeeping_suspended);
791
792 do {
793 seq = read_seqcount_begin(&tk_core.seq);
794 base = ktime_add(tk->tkr_mono.base, *offset);
795 nsecs = timekeeping_get_ns(&tk->tkr_mono);
796
797 } while (read_seqcount_retry(&tk_core.seq, seq));
798
799 return ktime_add_ns(base, nsecs);
800
801 }
802 EXPORT_SYMBOL_GPL(ktime_get_with_offset);
803
ktime_get_coarse_with_offset(enum tk_offsets offs)804 ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
805 {
806 struct timekeeper *tk = &tk_core.timekeeper;
807 unsigned int seq;
808 ktime_t base, *offset = offsets[offs];
809 u64 nsecs;
810
811 WARN_ON(timekeeping_suspended);
812
813 do {
814 seq = read_seqcount_begin(&tk_core.seq);
815 base = ktime_add(tk->tkr_mono.base, *offset);
816 nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
817
818 } while (read_seqcount_retry(&tk_core.seq, seq));
819
820 return ktime_add_ns(base, nsecs);
821 }
822 EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
823
824 /**
825 * ktime_mono_to_any() - convert monotonic time to any other time
826 * @tmono: time to convert.
827 * @offs: which offset to use
828 */
ktime_mono_to_any(ktime_t tmono,enum tk_offsets offs)829 ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
830 {
831 ktime_t *offset = offsets[offs];
832 unsigned int seq;
833 ktime_t tconv;
834
835 do {
836 seq = read_seqcount_begin(&tk_core.seq);
837 tconv = ktime_add(tmono, *offset);
838 } while (read_seqcount_retry(&tk_core.seq, seq));
839
840 return tconv;
841 }
842 EXPORT_SYMBOL_GPL(ktime_mono_to_any);
843
844 /**
845 * ktime_get_raw - Returns the raw monotonic time in ktime_t format
846 */
ktime_get_raw(void)847 ktime_t ktime_get_raw(void)
848 {
849 struct timekeeper *tk = &tk_core.timekeeper;
850 unsigned int seq;
851 ktime_t base;
852 u64 nsecs;
853
854 do {
855 seq = read_seqcount_begin(&tk_core.seq);
856 base = tk->tkr_raw.base;
857 nsecs = timekeeping_get_ns(&tk->tkr_raw);
858
859 } while (read_seqcount_retry(&tk_core.seq, seq));
860
861 return ktime_add_ns(base, nsecs);
862 }
863 EXPORT_SYMBOL_GPL(ktime_get_raw);
864
865 /**
866 * ktime_get_ts64 - get the monotonic clock in timespec64 format
867 * @ts: pointer to timespec variable
868 *
869 * The function calculates the monotonic clock from the realtime
870 * clock and the wall_to_monotonic offset and stores the result
871 * in normalized timespec64 format in the variable pointed to by @ts.
872 */
ktime_get_ts64(struct timespec64 * ts)873 void ktime_get_ts64(struct timespec64 *ts)
874 {
875 struct timekeeper *tk = &tk_core.timekeeper;
876 struct timespec64 tomono;
877 unsigned int seq;
878 u64 nsec;
879
880 WARN_ON(timekeeping_suspended);
881
882 do {
883 seq = read_seqcount_begin(&tk_core.seq);
884 ts->tv_sec = tk->xtime_sec;
885 nsec = timekeeping_get_ns(&tk->tkr_mono);
886 tomono = tk->wall_to_monotonic;
887
888 } while (read_seqcount_retry(&tk_core.seq, seq));
889
890 ts->tv_sec += tomono.tv_sec;
891 ts->tv_nsec = 0;
892 timespec64_add_ns(ts, nsec + tomono.tv_nsec);
893 }
894 EXPORT_SYMBOL_GPL(ktime_get_ts64);
895
896 /**
897 * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC
898 *
899 * Returns the seconds portion of CLOCK_MONOTONIC with a single non
900 * serialized read. tk->ktime_sec is of type 'unsigned long' so this
901 * works on both 32 and 64 bit systems. On 32 bit systems the readout
902 * covers ~136 years of uptime which should be enough to prevent
903 * premature wrap arounds.
904 */
ktime_get_seconds(void)905 time64_t ktime_get_seconds(void)
906 {
907 struct timekeeper *tk = &tk_core.timekeeper;
908
909 WARN_ON(timekeeping_suspended);
910 return tk->ktime_sec;
911 }
912 EXPORT_SYMBOL_GPL(ktime_get_seconds);
913
914 /**
915 * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME
916 *
917 * Returns the wall clock seconds since 1970.
918 *
919 * For 64bit systems the fast access to tk->xtime_sec is preserved. On
920 * 32bit systems the access must be protected with the sequence
921 * counter to provide "atomic" access to the 64bit tk->xtime_sec
922 * value.
923 */
ktime_get_real_seconds(void)924 time64_t ktime_get_real_seconds(void)
925 {
926 struct timekeeper *tk = &tk_core.timekeeper;
927 time64_t seconds;
928 unsigned int seq;
929
930 if (IS_ENABLED(CONFIG_64BIT))
931 return tk->xtime_sec;
932
933 do {
934 seq = read_seqcount_begin(&tk_core.seq);
935 seconds = tk->xtime_sec;
936
937 } while (read_seqcount_retry(&tk_core.seq, seq));
938
939 return seconds;
940 }
941 EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
942
943 /**
944 * __ktime_get_real_seconds - The same as ktime_get_real_seconds
945 * but without the sequence counter protect. This internal function
946 * is called just when timekeeping lock is already held.
947 */
__ktime_get_real_seconds(void)948 noinstr time64_t __ktime_get_real_seconds(void)
949 {
950 struct timekeeper *tk = &tk_core.timekeeper;
951
952 return tk->xtime_sec;
953 }
954
955 /**
956 * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter
957 * @systime_snapshot: pointer to struct receiving the system time snapshot
958 */
ktime_get_snapshot(struct system_time_snapshot * systime_snapshot)959 void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
960 {
961 struct timekeeper *tk = &tk_core.timekeeper;
962 u32 mono_mult, mono_shift;
963 unsigned int seq;
964 ktime_t base_raw;
965 ktime_t base_real;
966 ktime_t base_boot;
967 u64 nsec_raw;
968 u64 nsec_real;
969 u64 now;
970
971 WARN_ON_ONCE(timekeeping_suspended);
972
973 do {
974 seq = read_seqcount_begin(&tk_core.seq);
975 now = tk_clock_read(&tk->tkr_mono);
976 systime_snapshot->cs_id = tk->tkr_mono.clock->id;
977 systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
978 systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
979 base_real = ktime_add(tk->tkr_mono.base,
980 tk_core.timekeeper.offs_real);
981 base_boot = ktime_add(tk->tkr_mono.base,
982 tk_core.timekeeper.offs_boot);
983 base_raw = tk->tkr_raw.base;
984 nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
985 nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
986 mono_mult = tk->tkr_mono.mult;
987 mono_shift = tk->tkr_mono.shift;
988 } while (read_seqcount_retry(&tk_core.seq, seq));
989
990 systime_snapshot->cycles = now;
991 systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
992 systime_snapshot->boot = ktime_add_ns(base_boot, nsec_real);
993 systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
994 systime_snapshot->mono_shift = mono_shift;
995 systime_snapshot->mono_mult = mono_mult;
996 }
997 EXPORT_SYMBOL_GPL(ktime_get_snapshot);
998
999 /* Scale base by mult/div checking for overflow */
scale64_check_overflow(u64 mult,u64 div,u64 * base)1000 static int scale64_check_overflow(u64 mult, u64 div, u64 *base)
1001 {
1002 u64 tmp, rem;
1003
1004 tmp = div64_u64_rem(*base, div, &rem);
1005
1006 if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) ||
1007 ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem)))
1008 return -EOVERFLOW;
1009 tmp *= mult;
1010
1011 rem = div64_u64(rem * mult, div);
1012 *base = tmp + rem;
1013 return 0;
1014 }
1015
1016 /**
1017 * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval
1018 * @history: Snapshot representing start of history
1019 * @partial_history_cycles: Cycle offset into history (fractional part)
1020 * @total_history_cycles: Total history length in cycles
1021 * @discontinuity: True indicates clock was set on history period
1022 * @ts: Cross timestamp that should be adjusted using
1023 * partial/total ratio
1024 *
1025 * Helper function used by get_device_system_crosststamp() to correct the
1026 * crosstimestamp corresponding to the start of the current interval to the
1027 * system counter value (timestamp point) provided by the driver. The
1028 * total_history_* quantities are the total history starting at the provided
1029 * reference point and ending at the start of the current interval. The cycle
1030 * count between the driver timestamp point and the start of the current
1031 * interval is partial_history_cycles.
1032 */
adjust_historical_crosststamp(struct system_time_snapshot * history,u64 partial_history_cycles,u64 total_history_cycles,bool discontinuity,struct system_device_crosststamp * ts)1033 static int adjust_historical_crosststamp(struct system_time_snapshot *history,
1034 u64 partial_history_cycles,
1035 u64 total_history_cycles,
1036 bool discontinuity,
1037 struct system_device_crosststamp *ts)
1038 {
1039 struct timekeeper *tk = &tk_core.timekeeper;
1040 u64 corr_raw, corr_real;
1041 bool interp_forward;
1042 int ret;
1043
1044 if (total_history_cycles == 0 || partial_history_cycles == 0)
1045 return 0;
1046
1047 /* Interpolate shortest distance from beginning or end of history */
1048 interp_forward = partial_history_cycles > total_history_cycles / 2;
1049 partial_history_cycles = interp_forward ?
1050 total_history_cycles - partial_history_cycles :
1051 partial_history_cycles;
1052
1053 /*
1054 * Scale the monotonic raw time delta by:
1055 * partial_history_cycles / total_history_cycles
1056 */
1057 corr_raw = (u64)ktime_to_ns(
1058 ktime_sub(ts->sys_monoraw, history->raw));
1059 ret = scale64_check_overflow(partial_history_cycles,
1060 total_history_cycles, &corr_raw);
1061 if (ret)
1062 return ret;
1063
1064 /*
1065 * If there is a discontinuity in the history, scale monotonic raw
1066 * correction by:
1067 * mult(real)/mult(raw) yielding the realtime correction
1068 * Otherwise, calculate the realtime correction similar to monotonic
1069 * raw calculation
1070 */
1071 if (discontinuity) {
1072 corr_real = mul_u64_u32_div
1073 (corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult);
1074 } else {
1075 corr_real = (u64)ktime_to_ns(
1076 ktime_sub(ts->sys_realtime, history->real));
1077 ret = scale64_check_overflow(partial_history_cycles,
1078 total_history_cycles, &corr_real);
1079 if (ret)
1080 return ret;
1081 }
1082
1083 /* Fixup monotonic raw and real time time values */
1084 if (interp_forward) {
1085 ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw);
1086 ts->sys_realtime = ktime_add_ns(history->real, corr_real);
1087 } else {
1088 ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw);
1089 ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real);
1090 }
1091
1092 return 0;
1093 }
1094
1095 /*
1096 * timestamp_in_interval - true if ts is chronologically in [start, end]
1097 *
1098 * True if ts occurs chronologically at or after start, and before or at end.
1099 */
timestamp_in_interval(u64 start,u64 end,u64 ts)1100 static bool timestamp_in_interval(u64 start, u64 end, u64 ts)
1101 {
1102 if (ts >= start && ts <= end)
1103 return true;
1104 if (start > end && (ts >= start || ts <= end))
1105 return true;
1106 return false;
1107 }
1108
convert_clock(u64 * val,u32 numerator,u32 denominator)1109 static bool convert_clock(u64 *val, u32 numerator, u32 denominator)
1110 {
1111 u64 rem, res;
1112
1113 if (!numerator || !denominator)
1114 return false;
1115
1116 res = div64_u64_rem(*val, denominator, &rem) * numerator;
1117 *val = res + div_u64(rem * numerator, denominator);
1118 return true;
1119 }
1120
convert_base_to_cs(struct system_counterval_t * scv)1121 static bool convert_base_to_cs(struct system_counterval_t *scv)
1122 {
1123 struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock;
1124 struct clocksource_base *base;
1125 u32 num, den;
1126
1127 /* The timestamp was taken from the time keeper clock source */
1128 if (cs->id == scv->cs_id)
1129 return true;
1130
1131 /*
1132 * Check whether cs_id matches the base clock. Prevent the compiler from
1133 * re-evaluating @base as the clocksource might change concurrently.
1134 */
1135 base = READ_ONCE(cs->base);
1136 if (!base || base->id != scv->cs_id)
1137 return false;
1138
1139 num = scv->use_nsecs ? cs->freq_khz : base->numerator;
1140 den = scv->use_nsecs ? USEC_PER_SEC : base->denominator;
1141
1142 if (!convert_clock(&scv->cycles, num, den))
1143 return false;
1144
1145 scv->cycles += base->offset;
1146 return true;
1147 }
1148
convert_cs_to_base(u64 * cycles,enum clocksource_ids base_id)1149 static bool convert_cs_to_base(u64 *cycles, enum clocksource_ids base_id)
1150 {
1151 struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock;
1152 struct clocksource_base *base;
1153
1154 /*
1155 * Check whether base_id matches the base clock. Prevent the compiler from
1156 * re-evaluating @base as the clocksource might change concurrently.
1157 */
1158 base = READ_ONCE(cs->base);
1159 if (!base || base->id != base_id)
1160 return false;
1161
1162 *cycles -= base->offset;
1163 if (!convert_clock(cycles, base->denominator, base->numerator))
1164 return false;
1165 return true;
1166 }
1167
convert_ns_to_cs(u64 * delta)1168 static bool convert_ns_to_cs(u64 *delta)
1169 {
1170 struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
1171
1172 if (BITS_TO_BYTES(fls64(*delta) + tkr->shift) >= sizeof(*delta))
1173 return false;
1174
1175 *delta = div_u64((*delta << tkr->shift) - tkr->xtime_nsec, tkr->mult);
1176 return true;
1177 }
1178
1179 /**
1180 * ktime_real_to_base_clock() - Convert CLOCK_REALTIME timestamp to a base clock timestamp
1181 * @treal: CLOCK_REALTIME timestamp to convert
1182 * @base_id: base clocksource id
1183 * @cycles: pointer to store the converted base clock timestamp
1184 *
1185 * Converts a supplied, future realtime clock value to the corresponding base clock value.
1186 *
1187 * Return: true if the conversion is successful, false otherwise.
1188 */
ktime_real_to_base_clock(ktime_t treal,enum clocksource_ids base_id,u64 * cycles)1189 bool ktime_real_to_base_clock(ktime_t treal, enum clocksource_ids base_id, u64 *cycles)
1190 {
1191 struct timekeeper *tk = &tk_core.timekeeper;
1192 unsigned int seq;
1193 u64 delta;
1194
1195 do {
1196 seq = read_seqcount_begin(&tk_core.seq);
1197 if ((u64)treal < tk->tkr_mono.base_real)
1198 return false;
1199 delta = (u64)treal - tk->tkr_mono.base_real;
1200 if (!convert_ns_to_cs(&delta))
1201 return false;
1202 *cycles = tk->tkr_mono.cycle_last + delta;
1203 if (!convert_cs_to_base(cycles, base_id))
1204 return false;
1205 } while (read_seqcount_retry(&tk_core.seq, seq));
1206
1207 return true;
1208 }
1209 EXPORT_SYMBOL_GPL(ktime_real_to_base_clock);
1210
1211 /**
1212 * get_device_system_crosststamp - Synchronously capture system/device timestamp
1213 * @get_time_fn: Callback to get simultaneous device time and
1214 * system counter from the device driver
1215 * @ctx: Context passed to get_time_fn()
1216 * @history_begin: Historical reference point used to interpolate system
1217 * time when counter provided by the driver is before the current interval
1218 * @xtstamp: Receives simultaneously captured system and device time
1219 *
1220 * Reads a timestamp from a device and correlates it to system time
1221 */
get_device_system_crosststamp(int (* get_time_fn)(ktime_t * device_time,struct system_counterval_t * sys_counterval,void * ctx),void * ctx,struct system_time_snapshot * history_begin,struct system_device_crosststamp * xtstamp)1222 int get_device_system_crosststamp(int (*get_time_fn)
1223 (ktime_t *device_time,
1224 struct system_counterval_t *sys_counterval,
1225 void *ctx),
1226 void *ctx,
1227 struct system_time_snapshot *history_begin,
1228 struct system_device_crosststamp *xtstamp)
1229 {
1230 struct system_counterval_t system_counterval = {};
1231 struct timekeeper *tk = &tk_core.timekeeper;
1232 u64 cycles, now, interval_start;
1233 unsigned int clock_was_set_seq = 0;
1234 ktime_t base_real, base_raw;
1235 u64 nsec_real, nsec_raw;
1236 u8 cs_was_changed_seq;
1237 unsigned int seq;
1238 bool do_interp;
1239 int ret;
1240
1241 do {
1242 seq = read_seqcount_begin(&tk_core.seq);
1243 /*
1244 * Try to synchronously capture device time and a system
1245 * counter value calling back into the device driver
1246 */
1247 ret = get_time_fn(&xtstamp->device, &system_counterval, ctx);
1248 if (ret)
1249 return ret;
1250
1251 /*
1252 * Verify that the clocksource ID associated with the captured
1253 * system counter value is the same as for the currently
1254 * installed timekeeper clocksource
1255 */
1256 if (system_counterval.cs_id == CSID_GENERIC ||
1257 !convert_base_to_cs(&system_counterval))
1258 return -ENODEV;
1259 cycles = system_counterval.cycles;
1260
1261 /*
1262 * Check whether the system counter value provided by the
1263 * device driver is on the current timekeeping interval.
1264 */
1265 now = tk_clock_read(&tk->tkr_mono);
1266 interval_start = tk->tkr_mono.cycle_last;
1267 if (!timestamp_in_interval(interval_start, now, cycles)) {
1268 clock_was_set_seq = tk->clock_was_set_seq;
1269 cs_was_changed_seq = tk->cs_was_changed_seq;
1270 cycles = interval_start;
1271 do_interp = true;
1272 } else {
1273 do_interp = false;
1274 }
1275
1276 base_real = ktime_add(tk->tkr_mono.base,
1277 tk_core.timekeeper.offs_real);
1278 base_raw = tk->tkr_raw.base;
1279
1280 nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles);
1281 nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles);
1282 } while (read_seqcount_retry(&tk_core.seq, seq));
1283
1284 xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
1285 xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw);
1286
1287 /*
1288 * Interpolate if necessary, adjusting back from the start of the
1289 * current interval
1290 */
1291 if (do_interp) {
1292 u64 partial_history_cycles, total_history_cycles;
1293 bool discontinuity;
1294
1295 /*
1296 * Check that the counter value is not before the provided
1297 * history reference and that the history doesn't cross a
1298 * clocksource change
1299 */
1300 if (!history_begin ||
1301 !timestamp_in_interval(history_begin->cycles,
1302 cycles, system_counterval.cycles) ||
1303 history_begin->cs_was_changed_seq != cs_was_changed_seq)
1304 return -EINVAL;
1305 partial_history_cycles = cycles - system_counterval.cycles;
1306 total_history_cycles = cycles - history_begin->cycles;
1307 discontinuity =
1308 history_begin->clock_was_set_seq != clock_was_set_seq;
1309
1310 ret = adjust_historical_crosststamp(history_begin,
1311 partial_history_cycles,
1312 total_history_cycles,
1313 discontinuity, xtstamp);
1314 if (ret)
1315 return ret;
1316 }
1317
1318 return 0;
1319 }
1320 EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
1321
1322 /**
1323 * timekeeping_clocksource_has_base - Check whether the current clocksource
1324 * is based on given a base clock
1325 * @id: base clocksource ID
1326 *
1327 * Note: The return value is a snapshot which can become invalid right
1328 * after the function returns.
1329 *
1330 * Return: true if the timekeeper clocksource has a base clock with @id,
1331 * false otherwise
1332 */
timekeeping_clocksource_has_base(enum clocksource_ids id)1333 bool timekeeping_clocksource_has_base(enum clocksource_ids id)
1334 {
1335 /*
1336 * This is a snapshot, so no point in using the sequence
1337 * count. Just prevent the compiler from re-evaluating @base as the
1338 * clocksource might change concurrently.
1339 */
1340 struct clocksource_base *base = READ_ONCE(tk_core.timekeeper.tkr_mono.clock->base);
1341
1342 return base ? base->id == id : false;
1343 }
1344 EXPORT_SYMBOL_GPL(timekeeping_clocksource_has_base);
1345
1346 /**
1347 * do_settimeofday64 - Sets the time of day.
1348 * @ts: pointer to the timespec64 variable containing the new time
1349 *
1350 * Sets the time of day to the new time and update NTP and notify hrtimers
1351 */
do_settimeofday64(const struct timespec64 * ts)1352 int do_settimeofday64(const struct timespec64 *ts)
1353 {
1354 struct timekeeper *tk = &tk_core.timekeeper;
1355 struct timespec64 ts_delta, xt;
1356 unsigned long flags;
1357 int ret = 0;
1358
1359 if (!timespec64_valid_settod(ts))
1360 return -EINVAL;
1361
1362 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1363 write_seqcount_begin(&tk_core.seq);
1364
1365 timekeeping_forward_now(tk);
1366
1367 xt = tk_xtime(tk);
1368 ts_delta = timespec64_sub(*ts, xt);
1369
1370 if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) {
1371 ret = -EINVAL;
1372 goto out;
1373 }
1374
1375 tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta));
1376
1377 tk_set_xtime(tk, ts);
1378 out:
1379 timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
1380
1381 write_seqcount_end(&tk_core.seq);
1382 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1383
1384 /* Signal hrtimers about time change */
1385 clock_was_set(CLOCK_SET_WALL);
1386
1387 if (!ret) {
1388 audit_tk_injoffset(ts_delta);
1389 add_device_randomness(ts, sizeof(*ts));
1390 }
1391
1392 return ret;
1393 }
1394 EXPORT_SYMBOL(do_settimeofday64);
1395
1396 /**
1397 * timekeeping_inject_offset - Adds or subtracts from the current time.
1398 * @ts: Pointer to the timespec variable containing the offset
1399 *
1400 * Adds or subtracts an offset value from the current time.
1401 */
timekeeping_inject_offset(const struct timespec64 * ts)1402 static int timekeeping_inject_offset(const struct timespec64 *ts)
1403 {
1404 struct timekeeper *tk = &tk_core.timekeeper;
1405 unsigned long flags;
1406 struct timespec64 tmp;
1407 int ret = 0;
1408
1409 if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
1410 return -EINVAL;
1411
1412 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1413 write_seqcount_begin(&tk_core.seq);
1414
1415 timekeeping_forward_now(tk);
1416
1417 /* Make sure the proposed value is valid */
1418 tmp = timespec64_add(tk_xtime(tk), *ts);
1419 if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 ||
1420 !timespec64_valid_settod(&tmp)) {
1421 ret = -EINVAL;
1422 goto error;
1423 }
1424
1425 tk_xtime_add(tk, ts);
1426 tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *ts));
1427
1428 error: /* even if we error out, we forwarded the time, so call update */
1429 timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
1430
1431 write_seqcount_end(&tk_core.seq);
1432 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1433
1434 /* Signal hrtimers about time change */
1435 clock_was_set(CLOCK_SET_WALL);
1436
1437 return ret;
1438 }
1439
1440 /*
1441 * Indicates if there is an offset between the system clock and the hardware
1442 * clock/persistent clock/rtc.
1443 */
1444 int persistent_clock_is_local;
1445
1446 /*
1447 * Adjust the time obtained from the CMOS to be UTC time instead of
1448 * local time.
1449 *
1450 * This is ugly, but preferable to the alternatives. Otherwise we
1451 * would either need to write a program to do it in /etc/rc (and risk
1452 * confusion if the program gets run more than once; it would also be
1453 * hard to make the program warp the clock precisely n hours) or
1454 * compile in the timezone information into the kernel. Bad, bad....
1455 *
1456 * - TYT, 1992-01-01
1457 *
1458 * The best thing to do is to keep the CMOS clock in universal time (UTC)
1459 * as real UNIX machines always do it. This avoids all headaches about
1460 * daylight saving times and warping kernel clocks.
1461 */
timekeeping_warp_clock(void)1462 void timekeeping_warp_clock(void)
1463 {
1464 if (sys_tz.tz_minuteswest != 0) {
1465 struct timespec64 adjust;
1466
1467 persistent_clock_is_local = 1;
1468 adjust.tv_sec = sys_tz.tz_minuteswest * 60;
1469 adjust.tv_nsec = 0;
1470 timekeeping_inject_offset(&adjust);
1471 }
1472 }
1473
1474 /*
1475 * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic
1476 */
__timekeeping_set_tai_offset(struct timekeeper * tk,s32 tai_offset)1477 static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
1478 {
1479 tk->tai_offset = tai_offset;
1480 tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0));
1481 }
1482
1483 /*
1484 * change_clocksource - Swaps clocksources if a new one is available
1485 *
1486 * Accumulates current time interval and initializes new clocksource
1487 */
change_clocksource(void * data)1488 static int change_clocksource(void *data)
1489 {
1490 struct timekeeper *tk = &tk_core.timekeeper;
1491 struct clocksource *new, *old = NULL;
1492 unsigned long flags;
1493 bool change = false;
1494
1495 new = (struct clocksource *) data;
1496
1497 /*
1498 * If the cs is in module, get a module reference. Succeeds
1499 * for built-in code (owner == NULL) as well.
1500 */
1501 if (try_module_get(new->owner)) {
1502 if (!new->enable || new->enable(new) == 0)
1503 change = true;
1504 else
1505 module_put(new->owner);
1506 }
1507
1508 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1509 write_seqcount_begin(&tk_core.seq);
1510
1511 timekeeping_forward_now(tk);
1512
1513 if (change) {
1514 old = tk->tkr_mono.clock;
1515 tk_setup_internals(tk, new);
1516 }
1517
1518 timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
1519
1520 write_seqcount_end(&tk_core.seq);
1521 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1522
1523 if (old) {
1524 if (old->disable)
1525 old->disable(old);
1526
1527 module_put(old->owner);
1528 }
1529
1530 return 0;
1531 }
1532
1533 /**
1534 * timekeeping_notify - Install a new clock source
1535 * @clock: pointer to the clock source
1536 *
1537 * This function is called from clocksource.c after a new, better clock
1538 * source has been registered. The caller holds the clocksource_mutex.
1539 */
timekeeping_notify(struct clocksource * clock)1540 int timekeeping_notify(struct clocksource *clock)
1541 {
1542 struct timekeeper *tk = &tk_core.timekeeper;
1543
1544 if (tk->tkr_mono.clock == clock)
1545 return 0;
1546 stop_machine(change_clocksource, clock, NULL);
1547 tick_clock_notify();
1548 return tk->tkr_mono.clock == clock ? 0 : -1;
1549 }
1550
1551 /**
1552 * ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec
1553 * @ts: pointer to the timespec64 to be set
1554 *
1555 * Returns the raw monotonic time (completely un-modified by ntp)
1556 */
ktime_get_raw_ts64(struct timespec64 * ts)1557 void ktime_get_raw_ts64(struct timespec64 *ts)
1558 {
1559 struct timekeeper *tk = &tk_core.timekeeper;
1560 unsigned int seq;
1561 u64 nsecs;
1562
1563 do {
1564 seq = read_seqcount_begin(&tk_core.seq);
1565 ts->tv_sec = tk->raw_sec;
1566 nsecs = timekeeping_get_ns(&tk->tkr_raw);
1567
1568 } while (read_seqcount_retry(&tk_core.seq, seq));
1569
1570 ts->tv_nsec = 0;
1571 timespec64_add_ns(ts, nsecs);
1572 }
1573 EXPORT_SYMBOL(ktime_get_raw_ts64);
1574
1575
1576 /**
1577 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
1578 */
timekeeping_valid_for_hres(void)1579 int timekeeping_valid_for_hres(void)
1580 {
1581 struct timekeeper *tk = &tk_core.timekeeper;
1582 unsigned int seq;
1583 int ret;
1584
1585 do {
1586 seq = read_seqcount_begin(&tk_core.seq);
1587
1588 ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
1589
1590 } while (read_seqcount_retry(&tk_core.seq, seq));
1591
1592 return ret;
1593 }
1594
1595 /**
1596 * timekeeping_max_deferment - Returns max time the clocksource can be deferred
1597 */
timekeeping_max_deferment(void)1598 u64 timekeeping_max_deferment(void)
1599 {
1600 struct timekeeper *tk = &tk_core.timekeeper;
1601 unsigned int seq;
1602 u64 ret;
1603
1604 do {
1605 seq = read_seqcount_begin(&tk_core.seq);
1606
1607 ret = tk->tkr_mono.clock->max_idle_ns;
1608
1609 } while (read_seqcount_retry(&tk_core.seq, seq));
1610
1611 return ret;
1612 }
1613
1614 /**
1615 * read_persistent_clock64 - Return time from the persistent clock.
1616 * @ts: Pointer to the storage for the readout value
1617 *
1618 * Weak dummy function for arches that do not yet support it.
1619 * Reads the time from the battery backed persistent clock.
1620 * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
1621 *
1622 * XXX - Do be sure to remove it once all arches implement it.
1623 */
read_persistent_clock64(struct timespec64 * ts)1624 void __weak read_persistent_clock64(struct timespec64 *ts)
1625 {
1626 ts->tv_sec = 0;
1627 ts->tv_nsec = 0;
1628 }
1629
1630 /**
1631 * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
1632 * from the boot.
1633 * @wall_time: current time as returned by persistent clock
1634 * @boot_offset: offset that is defined as wall_time - boot_time
1635 *
1636 * Weak dummy function for arches that do not yet support it.
1637 *
1638 * The default function calculates offset based on the current value of
1639 * local_clock(). This way architectures that support sched_clock() but don't
1640 * support dedicated boot time clock will provide the best estimate of the
1641 * boot time.
1642 */
1643 void __weak __init
read_persistent_wall_and_boot_offset(struct timespec64 * wall_time,struct timespec64 * boot_offset)1644 read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
1645 struct timespec64 *boot_offset)
1646 {
1647 read_persistent_clock64(wall_time);
1648 *boot_offset = ns_to_timespec64(local_clock());
1649 }
1650
1651 /*
1652 * Flag reflecting whether timekeeping_resume() has injected sleeptime.
1653 *
1654 * The flag starts of false and is only set when a suspend reaches
1655 * timekeeping_suspend(), timekeeping_resume() sets it to false when the
1656 * timekeeper clocksource is not stopping across suspend and has been
1657 * used to update sleep time. If the timekeeper clocksource has stopped
1658 * then the flag stays true and is used by the RTC resume code to decide
1659 * whether sleeptime must be injected and if so the flag gets false then.
1660 *
1661 * If a suspend fails before reaching timekeeping_resume() then the flag
1662 * stays false and prevents erroneous sleeptime injection.
1663 */
1664 static bool suspend_timing_needed;
1665
1666 /* Flag for if there is a persistent clock on this platform */
1667 static bool persistent_clock_exists;
1668
1669 /*
1670 * timekeeping_init - Initializes the clocksource and common timekeeping values
1671 */
timekeeping_init(void)1672 void __init timekeeping_init(void)
1673 {
1674 struct timespec64 wall_time, boot_offset, wall_to_mono;
1675 struct timekeeper *tk = &tk_core.timekeeper;
1676 struct clocksource *clock;
1677 unsigned long flags;
1678
1679 read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
1680 if (timespec64_valid_settod(&wall_time) &&
1681 timespec64_to_ns(&wall_time) > 0) {
1682 persistent_clock_exists = true;
1683 } else if (timespec64_to_ns(&wall_time) != 0) {
1684 pr_warn("Persistent clock returned invalid value");
1685 wall_time = (struct timespec64){0};
1686 }
1687
1688 if (timespec64_compare(&wall_time, &boot_offset) < 0)
1689 boot_offset = (struct timespec64){0};
1690
1691 /*
1692 * We want set wall_to_mono, so the following is true:
1693 * wall time + wall_to_mono = boot time
1694 */
1695 wall_to_mono = timespec64_sub(boot_offset, wall_time);
1696
1697 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1698 write_seqcount_begin(&tk_core.seq);
1699 ntp_init();
1700
1701 clock = clocksource_default_clock();
1702 if (clock->enable)
1703 clock->enable(clock);
1704 tk_setup_internals(tk, clock);
1705
1706 tk_set_xtime(tk, &wall_time);
1707 tk->raw_sec = 0;
1708
1709 tk_set_wall_to_mono(tk, wall_to_mono);
1710
1711 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
1712
1713 write_seqcount_end(&tk_core.seq);
1714 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1715 }
1716
1717 /* time in seconds when suspend began for persistent clock */
1718 static struct timespec64 timekeeping_suspend_time;
1719
1720 /**
1721 * __timekeeping_inject_sleeptime - Internal function to add sleep interval
1722 * @tk: Pointer to the timekeeper to be updated
1723 * @delta: Pointer to the delta value in timespec64 format
1724 *
1725 * Takes a timespec offset measuring a suspend interval and properly
1726 * adds the sleep offset to the timekeeping variables.
1727 */
__timekeeping_inject_sleeptime(struct timekeeper * tk,const struct timespec64 * delta)1728 static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
1729 const struct timespec64 *delta)
1730 {
1731 if (!timespec64_valid_strict(delta)) {
1732 printk_deferred(KERN_WARNING
1733 "__timekeeping_inject_sleeptime: Invalid "
1734 "sleep delta value!\n");
1735 return;
1736 }
1737 tk_xtime_add(tk, delta);
1738 tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
1739 tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
1740 tk_debug_account_sleep_time(delta);
1741 }
1742
1743 #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
1744 /*
1745 * We have three kinds of time sources to use for sleep time
1746 * injection, the preference order is:
1747 * 1) non-stop clocksource
1748 * 2) persistent clock (ie: RTC accessible when irqs are off)
1749 * 3) RTC
1750 *
1751 * 1) and 2) are used by timekeeping, 3) by RTC subsystem.
1752 * If system has neither 1) nor 2), 3) will be used finally.
1753 *
1754 *
1755 * If timekeeping has injected sleeptime via either 1) or 2),
1756 * 3) becomes needless, so in this case we don't need to call
1757 * rtc_resume(), and this is what timekeeping_rtc_skipresume()
1758 * means.
1759 */
timekeeping_rtc_skipresume(void)1760 bool timekeeping_rtc_skipresume(void)
1761 {
1762 return !suspend_timing_needed;
1763 }
1764
1765 /*
1766 * 1) can be determined whether to use or not only when doing
1767 * timekeeping_resume() which is invoked after rtc_suspend(),
1768 * so we can't skip rtc_suspend() surely if system has 1).
1769 *
1770 * But if system has 2), 2) will definitely be used, so in this
1771 * case we don't need to call rtc_suspend(), and this is what
1772 * timekeeping_rtc_skipsuspend() means.
1773 */
timekeeping_rtc_skipsuspend(void)1774 bool timekeeping_rtc_skipsuspend(void)
1775 {
1776 return persistent_clock_exists;
1777 }
1778
1779 /**
1780 * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
1781 * @delta: pointer to a timespec64 delta value
1782 *
1783 * This hook is for architectures that cannot support read_persistent_clock64
1784 * because their RTC/persistent clock is only accessible when irqs are enabled.
1785 * and also don't have an effective nonstop clocksource.
1786 *
1787 * This function should only be called by rtc_resume(), and allows
1788 * a suspend offset to be injected into the timekeeping values.
1789 */
timekeeping_inject_sleeptime64(const struct timespec64 * delta)1790 void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
1791 {
1792 struct timekeeper *tk = &tk_core.timekeeper;
1793 unsigned long flags;
1794
1795 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1796 write_seqcount_begin(&tk_core.seq);
1797
1798 suspend_timing_needed = false;
1799
1800 timekeeping_forward_now(tk);
1801
1802 __timekeeping_inject_sleeptime(tk, delta);
1803
1804 timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
1805
1806 write_seqcount_end(&tk_core.seq);
1807 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1808
1809 /* Signal hrtimers about time change */
1810 clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT);
1811 }
1812 #endif
1813
1814 /**
1815 * timekeeping_resume - Resumes the generic timekeeping subsystem.
1816 */
timekeeping_resume(void)1817 void timekeeping_resume(void)
1818 {
1819 struct timekeeper *tk = &tk_core.timekeeper;
1820 struct clocksource *clock = tk->tkr_mono.clock;
1821 unsigned long flags;
1822 struct timespec64 ts_new, ts_delta;
1823 u64 cycle_now, nsec;
1824 bool inject_sleeptime = false;
1825
1826 read_persistent_clock64(&ts_new);
1827
1828 clockevents_resume();
1829 clocksource_resume();
1830
1831 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1832 write_seqcount_begin(&tk_core.seq);
1833
1834 /*
1835 * After system resumes, we need to calculate the suspended time and
1836 * compensate it for the OS time. There are 3 sources that could be
1837 * used: Nonstop clocksource during suspend, persistent clock and rtc
1838 * device.
1839 *
1840 * One specific platform may have 1 or 2 or all of them, and the
1841 * preference will be:
1842 * suspend-nonstop clocksource -> persistent clock -> rtc
1843 * The less preferred source will only be tried if there is no better
1844 * usable source. The rtc part is handled separately in rtc core code.
1845 */
1846 cycle_now = tk_clock_read(&tk->tkr_mono);
1847 nsec = clocksource_stop_suspend_timing(clock, cycle_now);
1848 if (nsec > 0) {
1849 ts_delta = ns_to_timespec64(nsec);
1850 inject_sleeptime = true;
1851 } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
1852 ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
1853 inject_sleeptime = true;
1854 }
1855
1856 if (inject_sleeptime) {
1857 suspend_timing_needed = false;
1858 __timekeeping_inject_sleeptime(tk, &ts_delta);
1859 }
1860
1861 /* Re-base the last cycle value */
1862 tk->tkr_mono.cycle_last = cycle_now;
1863 tk->tkr_raw.cycle_last = cycle_now;
1864
1865 tk->ntp_error = 0;
1866 timekeeping_suspended = 0;
1867 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
1868 write_seqcount_end(&tk_core.seq);
1869 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1870
1871 touch_softlockup_watchdog();
1872
1873 /* Resume the clockevent device(s) and hrtimers */
1874 tick_resume();
1875 /* Notify timerfd as resume is equivalent to clock_was_set() */
1876 timerfd_resume();
1877 }
1878
timekeeping_suspend(void)1879 int timekeeping_suspend(void)
1880 {
1881 struct timekeeper *tk = &tk_core.timekeeper;
1882 unsigned long flags;
1883 struct timespec64 delta, delta_delta;
1884 static struct timespec64 old_delta;
1885 struct clocksource *curr_clock;
1886 u64 cycle_now;
1887
1888 read_persistent_clock64(&timekeeping_suspend_time);
1889
1890 /*
1891 * On some systems the persistent_clock can not be detected at
1892 * timekeeping_init by its return value, so if we see a valid
1893 * value returned, update the persistent_clock_exists flag.
1894 */
1895 if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
1896 persistent_clock_exists = true;
1897
1898 suspend_timing_needed = true;
1899
1900 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1901 write_seqcount_begin(&tk_core.seq);
1902 timekeeping_forward_now(tk);
1903 timekeeping_suspended = 1;
1904
1905 /*
1906 * Since we've called forward_now, cycle_last stores the value
1907 * just read from the current clocksource. Save this to potentially
1908 * use in suspend timing.
1909 */
1910 curr_clock = tk->tkr_mono.clock;
1911 cycle_now = tk->tkr_mono.cycle_last;
1912 clocksource_start_suspend_timing(curr_clock, cycle_now);
1913
1914 if (persistent_clock_exists) {
1915 /*
1916 * To avoid drift caused by repeated suspend/resumes,
1917 * which each can add ~1 second drift error,
1918 * try to compensate so the difference in system time
1919 * and persistent_clock time stays close to constant.
1920 */
1921 delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
1922 delta_delta = timespec64_sub(delta, old_delta);
1923 if (abs(delta_delta.tv_sec) >= 2) {
1924 /*
1925 * if delta_delta is too large, assume time correction
1926 * has occurred and set old_delta to the current delta.
1927 */
1928 old_delta = delta;
1929 } else {
1930 /* Otherwise try to adjust old_system to compensate */
1931 timekeeping_suspend_time =
1932 timespec64_add(timekeeping_suspend_time, delta_delta);
1933 }
1934 }
1935
1936 timekeeping_update(tk, TK_MIRROR);
1937 halt_fast_timekeeper(tk);
1938 write_seqcount_end(&tk_core.seq);
1939 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1940
1941 tick_suspend();
1942 clocksource_suspend();
1943 clockevents_suspend();
1944
1945 return 0;
1946 }
1947
1948 /* sysfs resume/suspend bits for timekeeping */
1949 static struct syscore_ops timekeeping_syscore_ops = {
1950 .resume = timekeeping_resume,
1951 .suspend = timekeeping_suspend,
1952 };
1953
timekeeping_init_ops(void)1954 static int __init timekeeping_init_ops(void)
1955 {
1956 register_syscore_ops(&timekeeping_syscore_ops);
1957 return 0;
1958 }
1959 device_initcall(timekeeping_init_ops);
1960
1961 /*
1962 * Apply a multiplier adjustment to the timekeeper
1963 */
timekeeping_apply_adjustment(struct timekeeper * tk,s64 offset,s32 mult_adj)1964 static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
1965 s64 offset,
1966 s32 mult_adj)
1967 {
1968 s64 interval = tk->cycle_interval;
1969
1970 if (mult_adj == 0) {
1971 return;
1972 } else if (mult_adj == -1) {
1973 interval = -interval;
1974 offset = -offset;
1975 } else if (mult_adj != 1) {
1976 interval *= mult_adj;
1977 offset *= mult_adj;
1978 }
1979
1980 /*
1981 * So the following can be confusing.
1982 *
1983 * To keep things simple, lets assume mult_adj == 1 for now.
1984 *
1985 * When mult_adj != 1, remember that the interval and offset values
1986 * have been appropriately scaled so the math is the same.
1987 *
1988 * The basic idea here is that we're increasing the multiplier
1989 * by one, this causes the xtime_interval to be incremented by
1990 * one cycle_interval. This is because:
1991 * xtime_interval = cycle_interval * mult
1992 * So if mult is being incremented by one:
1993 * xtime_interval = cycle_interval * (mult + 1)
1994 * Its the same as:
1995 * xtime_interval = (cycle_interval * mult) + cycle_interval
1996 * Which can be shortened to:
1997 * xtime_interval += cycle_interval
1998 *
1999 * So offset stores the non-accumulated cycles. Thus the current
2000 * time (in shifted nanoseconds) is:
2001 * now = (offset * adj) + xtime_nsec
2002 * Now, even though we're adjusting the clock frequency, we have
2003 * to keep time consistent. In other words, we can't jump back
2004 * in time, and we also want to avoid jumping forward in time.
2005 *
2006 * So given the same offset value, we need the time to be the same
2007 * both before and after the freq adjustment.
2008 * now = (offset * adj_1) + xtime_nsec_1
2009 * now = (offset * adj_2) + xtime_nsec_2
2010 * So:
2011 * (offset * adj_1) + xtime_nsec_1 =
2012 * (offset * adj_2) + xtime_nsec_2
2013 * And we know:
2014 * adj_2 = adj_1 + 1
2015 * So:
2016 * (offset * adj_1) + xtime_nsec_1 =
2017 * (offset * (adj_1+1)) + xtime_nsec_2
2018 * (offset * adj_1) + xtime_nsec_1 =
2019 * (offset * adj_1) + offset + xtime_nsec_2
2020 * Canceling the sides:
2021 * xtime_nsec_1 = offset + xtime_nsec_2
2022 * Which gives us:
2023 * xtime_nsec_2 = xtime_nsec_1 - offset
2024 * Which simplifies to:
2025 * xtime_nsec -= offset
2026 */
2027 if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
2028 /* NTP adjustment caused clocksource mult overflow */
2029 WARN_ON_ONCE(1);
2030 return;
2031 }
2032
2033 tk->tkr_mono.mult += mult_adj;
2034 tk->xtime_interval += interval;
2035 tk->tkr_mono.xtime_nsec -= offset;
2036 }
2037
2038 /*
2039 * Adjust the timekeeper's multiplier to the correct frequency
2040 * and also to reduce the accumulated error value.
2041 */
timekeeping_adjust(struct timekeeper * tk,s64 offset)2042 static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
2043 {
2044 u32 mult;
2045
2046 /*
2047 * Determine the multiplier from the current NTP tick length.
2048 * Avoid expensive division when the tick length doesn't change.
2049 */
2050 if (likely(tk->ntp_tick == ntp_tick_length())) {
2051 mult = tk->tkr_mono.mult - tk->ntp_err_mult;
2052 } else {
2053 tk->ntp_tick = ntp_tick_length();
2054 mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) -
2055 tk->xtime_remainder, tk->cycle_interval);
2056 }
2057
2058 /*
2059 * If the clock is behind the NTP time, increase the multiplier by 1
2060 * to catch up with it. If it's ahead and there was a remainder in the
2061 * tick division, the clock will slow down. Otherwise it will stay
2062 * ahead until the tick length changes to a non-divisible value.
2063 */
2064 tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0;
2065 mult += tk->ntp_err_mult;
2066
2067 timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult);
2068
2069 if (unlikely(tk->tkr_mono.clock->maxadj &&
2070 (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
2071 > tk->tkr_mono.clock->maxadj))) {
2072 printk_once(KERN_WARNING
2073 "Adjusting %s more than 11%% (%ld vs %ld)\n",
2074 tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
2075 (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
2076 }
2077
2078 /*
2079 * It may be possible that when we entered this function, xtime_nsec
2080 * was very small. Further, if we're slightly speeding the clocksource
2081 * in the code above, its possible the required corrective factor to
2082 * xtime_nsec could cause it to underflow.
2083 *
2084 * Now, since we have already accumulated the second and the NTP
2085 * subsystem has been notified via second_overflow(), we need to skip
2086 * the next update.
2087 */
2088 if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
2089 tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC <<
2090 tk->tkr_mono.shift;
2091 tk->xtime_sec--;
2092 tk->skip_second_overflow = 1;
2093 }
2094 }
2095
2096 /*
2097 * accumulate_nsecs_to_secs - Accumulates nsecs into secs
2098 *
2099 * Helper function that accumulates the nsecs greater than a second
2100 * from the xtime_nsec field to the xtime_secs field.
2101 * It also calls into the NTP code to handle leapsecond processing.
2102 */
accumulate_nsecs_to_secs(struct timekeeper * tk)2103 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
2104 {
2105 u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
2106 unsigned int clock_set = 0;
2107
2108 while (tk->tkr_mono.xtime_nsec >= nsecps) {
2109 int leap;
2110
2111 tk->tkr_mono.xtime_nsec -= nsecps;
2112 tk->xtime_sec++;
2113
2114 /*
2115 * Skip NTP update if this second was accumulated before,
2116 * i.e. xtime_nsec underflowed in timekeeping_adjust()
2117 */
2118 if (unlikely(tk->skip_second_overflow)) {
2119 tk->skip_second_overflow = 0;
2120 continue;
2121 }
2122
2123 /* Figure out if its a leap sec and apply if needed */
2124 leap = second_overflow(tk->xtime_sec);
2125 if (unlikely(leap)) {
2126 struct timespec64 ts;
2127
2128 tk->xtime_sec += leap;
2129
2130 ts.tv_sec = leap;
2131 ts.tv_nsec = 0;
2132 tk_set_wall_to_mono(tk,
2133 timespec64_sub(tk->wall_to_monotonic, ts));
2134
2135 __timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
2136
2137 clock_set = TK_CLOCK_WAS_SET;
2138 }
2139 }
2140 return clock_set;
2141 }
2142
2143 /*
2144 * logarithmic_accumulation - shifted accumulation of cycles
2145 *
2146 * This functions accumulates a shifted interval of cycles into
2147 * a shifted interval nanoseconds. Allows for O(log) accumulation
2148 * loop.
2149 *
2150 * Returns the unconsumed cycles.
2151 */
logarithmic_accumulation(struct timekeeper * tk,u64 offset,u32 shift,unsigned int * clock_set)2152 static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
2153 u32 shift, unsigned int *clock_set)
2154 {
2155 u64 interval = tk->cycle_interval << shift;
2156 u64 snsec_per_sec;
2157
2158 /* If the offset is smaller than a shifted interval, do nothing */
2159 if (offset < interval)
2160 return offset;
2161
2162 /* Accumulate one shifted interval */
2163 offset -= interval;
2164 tk->tkr_mono.cycle_last += interval;
2165 tk->tkr_raw.cycle_last += interval;
2166
2167 tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
2168 *clock_set |= accumulate_nsecs_to_secs(tk);
2169
2170 /* Accumulate raw time */
2171 tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
2172 snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
2173 while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
2174 tk->tkr_raw.xtime_nsec -= snsec_per_sec;
2175 tk->raw_sec++;
2176 }
2177
2178 /* Accumulate error between NTP and clock interval */
2179 tk->ntp_error += tk->ntp_tick << shift;
2180 tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
2181 (tk->ntp_error_shift + shift);
2182
2183 return offset;
2184 }
2185
2186 /*
2187 * timekeeping_advance - Updates the timekeeper to the current time and
2188 * current NTP tick length
2189 */
timekeeping_advance(enum timekeeping_adv_mode mode)2190 static bool timekeeping_advance(enum timekeeping_adv_mode mode)
2191 {
2192 struct timekeeper *real_tk = &tk_core.timekeeper;
2193 struct timekeeper *tk = &shadow_timekeeper;
2194 u64 offset;
2195 int shift = 0, maxshift;
2196 unsigned int clock_set = 0;
2197 unsigned long flags;
2198
2199 raw_spin_lock_irqsave(&timekeeper_lock, flags);
2200
2201 /* Make sure we're fully resumed: */
2202 if (unlikely(timekeeping_suspended))
2203 goto out;
2204
2205 offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
2206 tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
2207 tk->tkr_mono.clock->max_raw_delta);
2208
2209 /* Check if there's really nothing to do */
2210 if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
2211 goto out;
2212
2213 /*
2214 * With NO_HZ we may have to accumulate many cycle_intervals
2215 * (think "ticks") worth of time at once. To do this efficiently,
2216 * we calculate the largest doubling multiple of cycle_intervals
2217 * that is smaller than the offset. We then accumulate that
2218 * chunk in one go, and then try to consume the next smaller
2219 * doubled multiple.
2220 */
2221 shift = ilog2(offset) - ilog2(tk->cycle_interval);
2222 shift = max(0, shift);
2223 /* Bound shift to one less than what overflows tick_length */
2224 maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
2225 shift = min(shift, maxshift);
2226 while (offset >= tk->cycle_interval) {
2227 offset = logarithmic_accumulation(tk, offset, shift,
2228 &clock_set);
2229 if (offset < tk->cycle_interval<<shift)
2230 shift--;
2231 }
2232
2233 /* Adjust the multiplier to correct NTP error */
2234 timekeeping_adjust(tk, offset);
2235
2236 /*
2237 * Finally, make sure that after the rounding
2238 * xtime_nsec isn't larger than NSEC_PER_SEC
2239 */
2240 clock_set |= accumulate_nsecs_to_secs(tk);
2241
2242 write_seqcount_begin(&tk_core.seq);
2243 /*
2244 * Update the real timekeeper.
2245 *
2246 * We could avoid this memcpy by switching pointers, but that
2247 * requires changes to all other timekeeper usage sites as
2248 * well, i.e. move the timekeeper pointer getter into the
2249 * spinlocked/seqcount protected sections. And we trade this
2250 * memcpy under the tk_core.seq against one before we start
2251 * updating.
2252 */
2253 timekeeping_update(tk, clock_set);
2254 memcpy(real_tk, tk, sizeof(*tk));
2255 /* The memcpy must come last. Do not put anything here! */
2256 write_seqcount_end(&tk_core.seq);
2257 out:
2258 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
2259
2260 return !!clock_set;
2261 }
2262
2263 /**
2264 * update_wall_time - Uses the current clocksource to increment the wall time
2265 *
2266 */
update_wall_time(void)2267 void update_wall_time(void)
2268 {
2269 if (timekeeping_advance(TK_ADV_TICK))
2270 clock_was_set_delayed();
2271 }
2272
2273 /**
2274 * getboottime64 - Return the real time of system boot.
2275 * @ts: pointer to the timespec64 to be set
2276 *
2277 * Returns the wall-time of boot in a timespec64.
2278 *
2279 * This is based on the wall_to_monotonic offset and the total suspend
2280 * time. Calls to settimeofday will affect the value returned (which
2281 * basically means that however wrong your real time clock is at boot time,
2282 * you get the right time here).
2283 */
getboottime64(struct timespec64 * ts)2284 void getboottime64(struct timespec64 *ts)
2285 {
2286 struct timekeeper *tk = &tk_core.timekeeper;
2287 ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
2288
2289 *ts = ktime_to_timespec64(t);
2290 }
2291 EXPORT_SYMBOL_GPL(getboottime64);
2292
ktime_get_coarse_real_ts64(struct timespec64 * ts)2293 void ktime_get_coarse_real_ts64(struct timespec64 *ts)
2294 {
2295 struct timekeeper *tk = &tk_core.timekeeper;
2296 unsigned int seq;
2297
2298 do {
2299 seq = read_seqcount_begin(&tk_core.seq);
2300
2301 *ts = tk_xtime(tk);
2302 } while (read_seqcount_retry(&tk_core.seq, seq));
2303 }
2304 EXPORT_SYMBOL(ktime_get_coarse_real_ts64);
2305
ktime_get_coarse_ts64(struct timespec64 * ts)2306 void ktime_get_coarse_ts64(struct timespec64 *ts)
2307 {
2308 struct timekeeper *tk = &tk_core.timekeeper;
2309 struct timespec64 now, mono;
2310 unsigned int seq;
2311
2312 do {
2313 seq = read_seqcount_begin(&tk_core.seq);
2314
2315 now = tk_xtime(tk);
2316 mono = tk->wall_to_monotonic;
2317 } while (read_seqcount_retry(&tk_core.seq, seq));
2318
2319 set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec,
2320 now.tv_nsec + mono.tv_nsec);
2321 }
2322 EXPORT_SYMBOL(ktime_get_coarse_ts64);
2323
2324 /*
2325 * Must hold jiffies_lock
2326 */
do_timer(unsigned long ticks)2327 void do_timer(unsigned long ticks)
2328 {
2329 jiffies_64 += ticks;
2330 calc_global_load();
2331 }
2332
2333 /**
2334 * ktime_get_update_offsets_now - hrtimer helper
2335 * @cwsseq: pointer to check and store the clock was set sequence number
2336 * @offs_real: pointer to storage for monotonic -> realtime offset
2337 * @offs_boot: pointer to storage for monotonic -> boottime offset
2338 * @offs_tai: pointer to storage for monotonic -> clock tai offset
2339 *
2340 * Returns current monotonic time and updates the offsets if the
2341 * sequence number in @cwsseq and timekeeper.clock_was_set_seq are
2342 * different.
2343 *
2344 * Called from hrtimer_interrupt() or retrigger_next_event()
2345 */
ktime_get_update_offsets_now(unsigned int * cwsseq,ktime_t * offs_real,ktime_t * offs_boot,ktime_t * offs_tai)2346 ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
2347 ktime_t *offs_boot, ktime_t *offs_tai)
2348 {
2349 struct timekeeper *tk = &tk_core.timekeeper;
2350 unsigned int seq;
2351 ktime_t base;
2352 u64 nsecs;
2353
2354 do {
2355 seq = read_seqcount_begin(&tk_core.seq);
2356
2357 base = tk->tkr_mono.base;
2358 nsecs = timekeeping_get_ns(&tk->tkr_mono);
2359 base = ktime_add_ns(base, nsecs);
2360
2361 if (*cwsseq != tk->clock_was_set_seq) {
2362 *cwsseq = tk->clock_was_set_seq;
2363 *offs_real = tk->offs_real;
2364 *offs_boot = tk->offs_boot;
2365 *offs_tai = tk->offs_tai;
2366 }
2367
2368 /* Handle leapsecond insertion adjustments */
2369 if (unlikely(base >= tk->next_leap_ktime))
2370 *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0));
2371
2372 } while (read_seqcount_retry(&tk_core.seq, seq));
2373
2374 return base;
2375 }
2376
2377 /*
2378 * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
2379 */
timekeeping_validate_timex(const struct __kernel_timex * txc)2380 static int timekeeping_validate_timex(const struct __kernel_timex *txc)
2381 {
2382 if (txc->modes & ADJ_ADJTIME) {
2383 /* singleshot must not be used with any other mode bits */
2384 if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
2385 return -EINVAL;
2386 if (!(txc->modes & ADJ_OFFSET_READONLY) &&
2387 !capable(CAP_SYS_TIME))
2388 return -EPERM;
2389 } else {
2390 /* In order to modify anything, you gotta be super-user! */
2391 if (txc->modes && !capable(CAP_SYS_TIME))
2392 return -EPERM;
2393 /*
2394 * if the quartz is off by more than 10% then
2395 * something is VERY wrong!
2396 */
2397 if (txc->modes & ADJ_TICK &&
2398 (txc->tick < 900000/USER_HZ ||
2399 txc->tick > 1100000/USER_HZ))
2400 return -EINVAL;
2401 }
2402
2403 if (txc->modes & ADJ_SETOFFSET) {
2404 /* In order to inject time, you gotta be super-user! */
2405 if (!capable(CAP_SYS_TIME))
2406 return -EPERM;
2407
2408 /*
2409 * Validate if a timespec/timeval used to inject a time
2410 * offset is valid. Offsets can be positive or negative, so
2411 * we don't check tv_sec. The value of the timeval/timespec
2412 * is the sum of its fields,but *NOTE*:
2413 * The field tv_usec/tv_nsec must always be non-negative and
2414 * we can't have more nanoseconds/microseconds than a second.
2415 */
2416 if (txc->time.tv_usec < 0)
2417 return -EINVAL;
2418
2419 if (txc->modes & ADJ_NANO) {
2420 if (txc->time.tv_usec >= NSEC_PER_SEC)
2421 return -EINVAL;
2422 } else {
2423 if (txc->time.tv_usec >= USEC_PER_SEC)
2424 return -EINVAL;
2425 }
2426 }
2427
2428 /*
2429 * Check for potential multiplication overflows that can
2430 * only happen on 64-bit systems:
2431 */
2432 if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
2433 if (LLONG_MIN / PPM_SCALE > txc->freq)
2434 return -EINVAL;
2435 if (LLONG_MAX / PPM_SCALE < txc->freq)
2436 return -EINVAL;
2437 }
2438
2439 return 0;
2440 }
2441
2442 /**
2443 * random_get_entropy_fallback - Returns the raw clock source value,
2444 * used by random.c for platforms with no valid random_get_entropy().
2445 */
random_get_entropy_fallback(void)2446 unsigned long random_get_entropy_fallback(void)
2447 {
2448 struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
2449 struct clocksource *clock = READ_ONCE(tkr->clock);
2450
2451 if (unlikely(timekeeping_suspended || !clock))
2452 return 0;
2453 return clock->read(clock);
2454 }
2455 EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
2456
2457 /**
2458 * do_adjtimex() - Accessor function to NTP __do_adjtimex function
2459 * @txc: Pointer to kernel_timex structure containing NTP parameters
2460 */
do_adjtimex(struct __kernel_timex * txc)2461 int do_adjtimex(struct __kernel_timex *txc)
2462 {
2463 struct timekeeper *tk = &tk_core.timekeeper;
2464 struct audit_ntp_data ad;
2465 bool offset_set = false;
2466 bool clock_set = false;
2467 struct timespec64 ts;
2468 unsigned long flags;
2469 s32 orig_tai, tai;
2470 int ret;
2471
2472 /* Validate the data before disabling interrupts */
2473 ret = timekeeping_validate_timex(txc);
2474 if (ret)
2475 return ret;
2476 add_device_randomness(txc, sizeof(*txc));
2477
2478 if (txc->modes & ADJ_SETOFFSET) {
2479 struct timespec64 delta;
2480 delta.tv_sec = txc->time.tv_sec;
2481 delta.tv_nsec = txc->time.tv_usec;
2482 if (!(txc->modes & ADJ_NANO))
2483 delta.tv_nsec *= 1000;
2484 ret = timekeeping_inject_offset(&delta);
2485 if (ret)
2486 return ret;
2487
2488 offset_set = delta.tv_sec != 0;
2489 audit_tk_injoffset(delta);
2490 }
2491
2492 audit_ntp_init(&ad);
2493
2494 ktime_get_real_ts64(&ts);
2495 add_device_randomness(&ts, sizeof(ts));
2496
2497 raw_spin_lock_irqsave(&timekeeper_lock, flags);
2498 write_seqcount_begin(&tk_core.seq);
2499
2500 orig_tai = tai = tk->tai_offset;
2501 ret = __do_adjtimex(txc, &ts, &tai, &ad);
2502
2503 if (tai != orig_tai) {
2504 __timekeeping_set_tai_offset(tk, tai);
2505 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
2506 clock_set = true;
2507 }
2508 tk_update_leap_state(tk);
2509
2510 write_seqcount_end(&tk_core.seq);
2511 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
2512
2513 audit_ntp_log(&ad);
2514
2515 /* Update the multiplier immediately if frequency was set directly */
2516 if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
2517 clock_set |= timekeeping_advance(TK_ADV_FREQ);
2518
2519 if (clock_set)
2520 clock_was_set(CLOCK_SET_WALL);
2521
2522 ntp_notify_cmos_timer(offset_set);
2523
2524 return ret;
2525 }
2526
2527 #ifdef CONFIG_NTP_PPS
2528 /**
2529 * hardpps() - Accessor function to NTP __hardpps function
2530 * @phase_ts: Pointer to timespec64 structure representing phase timestamp
2531 * @raw_ts: Pointer to timespec64 structure representing raw timestamp
2532 */
hardpps(const struct timespec64 * phase_ts,const struct timespec64 * raw_ts)2533 void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
2534 {
2535 unsigned long flags;
2536
2537 raw_spin_lock_irqsave(&timekeeper_lock, flags);
2538 write_seqcount_begin(&tk_core.seq);
2539
2540 __hardpps(phase_ts, raw_ts);
2541
2542 write_seqcount_end(&tk_core.seq);
2543 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
2544 }
2545 EXPORT_SYMBOL(hardpps);
2546 #endif /* CONFIG_NTP_PPS */
2547