diff options
Diffstat (limited to 'arch/x86/entry/vdso/vclock_gettime.c')
-rw-r--r-- | arch/x86/entry/vdso/vclock_gettime.c | 99 |
1 files changed, 53 insertions, 46 deletions
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index ca94fa649251..5dd363d54348 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c | |||
@@ -36,6 +36,11 @@ static notrace cycle_t vread_hpet(void) | |||
36 | } | 36 | } |
37 | #endif | 37 | #endif |
38 | 38 | ||
39 | #ifdef CONFIG_PARAVIRT_CLOCK | ||
40 | extern u8 pvclock_page | ||
41 | __attribute__((visibility("hidden"))); | ||
42 | #endif | ||
43 | |||
39 | #ifndef BUILD_VDSO32 | 44 | #ifndef BUILD_VDSO32 |
40 | 45 | ||
41 | #include <linux/kernel.h> | 46 | #include <linux/kernel.h> |
@@ -62,63 +67,65 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) | |||
62 | 67 | ||
63 | #ifdef CONFIG_PARAVIRT_CLOCK | 68 | #ifdef CONFIG_PARAVIRT_CLOCK |
64 | 69 | ||
65 | static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu) | 70 | static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) |
66 | { | 71 | { |
67 | const struct pvclock_vsyscall_time_info *pvti_base; | 72 | return (const struct pvclock_vsyscall_time_info *)&pvclock_page; |
68 | int idx = cpu / (PAGE_SIZE/PVTI_SIZE); | ||
69 | int offset = cpu % (PAGE_SIZE/PVTI_SIZE); | ||
70 | |||
71 | BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END); | ||
72 | |||
73 | pvti_base = (struct pvclock_vsyscall_time_info *) | ||
74 | __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx); | ||
75 | |||
76 | return &pvti_base[offset]; | ||
77 | } | 73 | } |
78 | 74 | ||
79 | static notrace cycle_t vread_pvclock(int *mode) | 75 | static notrace cycle_t vread_pvclock(int *mode) |
80 | { | 76 | { |
81 | const struct pvclock_vsyscall_time_info *pvti; | 77 | const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; |
82 | cycle_t ret; | 78 | cycle_t ret; |
83 | u64 last; | 79 | u64 tsc, pvti_tsc; |
84 | u32 version; | 80 | u64 last, delta, pvti_system_time; |
85 | u8 flags; | 81 | u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift; |
86 | unsigned cpu, cpu1; | ||
87 | |||
88 | 82 | ||
89 | /* | 83 | /* |
90 | * Note: hypervisor must guarantee that: | 84 | * Note: The kernel and hypervisor must guarantee that cpu ID |
91 | * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. | 85 | * number maps 1:1 to per-CPU pvclock time info. |
92 | * 2. that per-CPU pvclock time info is updated if the | 86 | * |
93 | * underlying CPU changes. | 87 | * Because the hypervisor is entirely unaware of guest userspace |
94 | * 3. that version is increased whenever underlying CPU | 88 | * preemption, it cannot guarantee that per-CPU pvclock time |
95 | * changes. | 89 | * info is updated if the underlying CPU changes or that that |
90 | * version is increased whenever underlying CPU changes. | ||
96 | * | 91 | * |
92 | * On KVM, we are guaranteed that pvti updates for any vCPU are | ||
93 | * atomic as seen by *all* vCPUs. This is an even stronger | ||
94 | * guarantee than we get with a normal seqlock. | ||
95 | * | ||
96 | * On Xen, we don't appear to have that guarantee, but Xen still | ||
97 | * supplies a valid seqlock using the version field. | ||
98 | |||
99 | * We only do pvclock vdso timing at all if | ||
100 | * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to | ||
101 | * mean that all vCPUs have matching pvti and that the TSC is | ||
102 | * synced, so we can just look at vCPU 0's pvti. | ||
97 | */ | 103 | */ |
98 | do { | 104 | |
99 | cpu = __getcpu() & VGETCPU_CPU_MASK; | 105 | if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { |
100 | /* TODO: We can put vcpu id into higher bits of pvti.version. | ||
101 | * This will save a couple of cycles by getting rid of | ||
102 | * __getcpu() calls (Gleb). | ||
103 | */ | ||
104 | |||
105 | pvti = get_pvti(cpu); | ||
106 | |||
107 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); | ||
108 | |||
109 | /* | ||
110 | * Test we're still on the cpu as well as the version. | ||
111 | * We could have been migrated just after the first | ||
112 | * vgetcpu but before fetching the version, so we | ||
113 | * wouldn't notice a version change. | ||
114 | */ | ||
115 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; | ||
116 | } while (unlikely(cpu != cpu1 || | ||
117 | (pvti->pvti.version & 1) || | ||
118 | pvti->pvti.version != version)); | ||
119 | |||
120 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) | ||
121 | *mode = VCLOCK_NONE; | 106 | *mode = VCLOCK_NONE; |
107 | return 0; | ||
108 | } | ||
109 | |||
110 | do { | ||
111 | version = pvti->version; | ||
112 | |||
113 | /* This is also a read barrier, so we'll read version first. */ | ||
114 | tsc = rdtsc_ordered(); | ||
115 | |||
116 | pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; | ||
117 | pvti_tsc_shift = pvti->tsc_shift; | ||
118 | pvti_system_time = pvti->system_time; | ||
119 | pvti_tsc = pvti->tsc_timestamp; | ||
120 | |||
121 | /* Make sure that the version double-check is last. */ | ||
122 | smp_rmb(); | ||
123 | } while (unlikely((version & 1) || version != pvti->version)); | ||
124 | |||
125 | delta = tsc - pvti_tsc; | ||
126 | ret = pvti_system_time + | ||
127 | pvclock_scale_delta(delta, pvti_tsc_to_system_mul, | ||
128 | pvti_tsc_shift); | ||
122 | 129 | ||
123 | /* refer to tsc.c read_tsc() comment for rationale */ | 130 | /* refer to tsc.c read_tsc() comment for rationale */ |
124 | last = gtod->cycle_last; | 131 | last = gtod->cycle_last; |