aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/entry/vdso/vclock_gettime.c')
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c99
1 files changed, 53 insertions, 46 deletions
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index ca94fa649251..5dd363d54348 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -36,6 +36,11 @@ static notrace cycle_t vread_hpet(void)
36} 36}
37#endif 37#endif
38 38
39#ifdef CONFIG_PARAVIRT_CLOCK
40extern u8 pvclock_page
41 __attribute__((visibility("hidden")));
42#endif
43
39#ifndef BUILD_VDSO32 44#ifndef BUILD_VDSO32
40 45
41#include <linux/kernel.h> 46#include <linux/kernel.h>
@@ -62,63 +67,65 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
62 67
63#ifdef CONFIG_PARAVIRT_CLOCK 68#ifdef CONFIG_PARAVIRT_CLOCK
64 69
65static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu) 70static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
66{ 71{
67 const struct pvclock_vsyscall_time_info *pvti_base; 72 return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
68 int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
69 int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
70
71 BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
72
73 pvti_base = (struct pvclock_vsyscall_time_info *)
74 __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
75
76 return &pvti_base[offset];
77} 73}
78 74
79static notrace cycle_t vread_pvclock(int *mode) 75static notrace cycle_t vread_pvclock(int *mode)
80{ 76{
81 const struct pvclock_vsyscall_time_info *pvti; 77 const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
82 cycle_t ret; 78 cycle_t ret;
83 u64 last; 79 u64 tsc, pvti_tsc;
84 u32 version; 80 u64 last, delta, pvti_system_time;
85 u8 flags; 81 u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
86 unsigned cpu, cpu1;
87
88 82
89 /* 83 /*
90 * Note: hypervisor must guarantee that: 84 * Note: The kernel and hypervisor must guarantee that cpu ID
91 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. 85 * number maps 1:1 to per-CPU pvclock time info.
92 * 2. that per-CPU pvclock time info is updated if the 86 *
93 * underlying CPU changes. 87 * Because the hypervisor is entirely unaware of guest userspace
94 * 3. that version is increased whenever underlying CPU 88 * preemption, it cannot guarantee that per-CPU pvclock time
95 * changes. 89 * info is updated if the underlying CPU changes or that that
90 * version is increased whenever underlying CPU changes.
96 * 91 *
92 * On KVM, we are guaranteed that pvti updates for any vCPU are
93 * atomic as seen by *all* vCPUs. This is an even stronger
94 * guarantee than we get with a normal seqlock.
95 *
96 * On Xen, we don't appear to have that guarantee, but Xen still
97 * supplies a valid seqlock using the version field.
98
99 * We only do pvclock vdso timing at all if
100 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
101 * mean that all vCPUs have matching pvti and that the TSC is
102 * synced, so we can just look at vCPU 0's pvti.
97 */ 103 */
98 do { 104
99 cpu = __getcpu() & VGETCPU_CPU_MASK; 105 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
100 /* TODO: We can put vcpu id into higher bits of pvti.version.
101 * This will save a couple of cycles by getting rid of
102 * __getcpu() calls (Gleb).
103 */
104
105 pvti = get_pvti(cpu);
106
107 version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
108
109 /*
110 * Test we're still on the cpu as well as the version.
111 * We could have been migrated just after the first
112 * vgetcpu but before fetching the version, so we
113 * wouldn't notice a version change.
114 */
115 cpu1 = __getcpu() & VGETCPU_CPU_MASK;
116 } while (unlikely(cpu != cpu1 ||
117 (pvti->pvti.version & 1) ||
118 pvti->pvti.version != version));
119
120 if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
121 *mode = VCLOCK_NONE; 106 *mode = VCLOCK_NONE;
107 return 0;
108 }
109
110 do {
111 version = pvti->version;
112
113 /* This is also a read barrier, so we'll read version first. */
114 tsc = rdtsc_ordered();
115
116 pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
117 pvti_tsc_shift = pvti->tsc_shift;
118 pvti_system_time = pvti->system_time;
119 pvti_tsc = pvti->tsc_timestamp;
120
121 /* Make sure that the version double-check is last. */
122 smp_rmb();
123 } while (unlikely((version & 1) || version != pvti->version));
124
125 delta = tsc - pvti_tsc;
126 ret = pvti_system_time +
127 pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
128 pvti_tsc_shift);
122 129
123 /* refer to tsc.c read_tsc() comment for rationale */ 130 /* refer to tsc.c read_tsc() comment for rationale */
124 last = gtod->cycle_last; 131 last = gtod->cycle_last;