[glsdk/meta-ti-glsdk.git] / recipes-kernel / linux / linux-ti33x-psp-3.2 / 3.2.12 / 0009-perf-x86-Fix-local-vs-remote-memory-events-for-NHM-W.patch
1 From 1c301a267126e6e01056eda0164a67731b738f5f Mon Sep 17 00:00:00 2001
2 From: Peter Zijlstra <a.p.zijlstra@chello.nl>
3 Date: Mon, 5 Mar 2012 23:59:25 +0100
4 Subject: [PATCH 09/42] perf/x86: Fix local vs remote memory events for
5 NHM/WSM
7 commit 87e24f4b67e68d9fd8df16e0bf9c66d1ad2a2533 upstream.
9 Verified using the below proglet.. before:
11 [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
12 remote write
14 Performance counter stats for './numa 0':
16 2,101,554 node-stores
17 2,096,931 node-store-misses
19 5.021546079 seconds time elapsed
21 [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
22 local write
24 Performance counter stats for './numa 1':
26 501,137 node-stores
27 199 node-store-misses
29 5.124451068 seconds time elapsed
31 After:
33 [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
34 remote write
36 Performance counter stats for './numa 0':
38 2,107,516 node-stores
39 2,097,187 node-store-misses
41 5.012755149 seconds time elapsed
43 [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
44 local write
46 Performance counter stats for './numa 1':
48 2,063,355 node-stores
49 165 node-store-misses
51 5.082091494 seconds time elapsed
53 #define _GNU_SOURCE
55 #include <sched.h>
56 #include <stdio.h>
57 #include <errno.h>
58 #include <sys/mman.h>
59 #include <sys/types.h>
60 #include <dirent.h>
61 #include <signal.h>
62 #include <unistd.h>
63 #include <numaif.h>
64 #include <stdlib.h>
66 #define SIZE (32*1024*1024)
68 volatile int done;
70 void sig_done(int sig)
71 {
72 done = 1;
73 }
75 int main(int argc, char **argv)
76 {
77 cpu_set_t *mask, *mask2;
78 size_t size;
79 int i, err, t;
80 int nrcpus = 1024;
81 char *mem;
82 unsigned long nodemask = 0x01; /* node 0 */
83 DIR *node;
84 struct dirent *de;
85 int read = 0;
86 int local = 0;
88 if (argc < 2) {
89 printf("usage: %s [0-3]\n", argv[0]);
90 printf(" bit0 - local/remote\n");
91 printf(" bit1 - read/write\n");
92 exit(0);
93 }
95 switch (atoi(argv[1])) {
96 case 0:
97 printf("remote write\n");
98 break;
99 case 1:
100 printf("local write\n");
101 local = 1;
102 break;
103 case 2:
104 printf("remote read\n");
105 read = 1;
106 break;
107 case 3:
108 printf("local read\n");
109 local = 1;
110 read = 1;
111 break;
112 }
114 mask = CPU_ALLOC(nrcpus);
115 size = CPU_ALLOC_SIZE(nrcpus);
116 CPU_ZERO_S(size, mask);
118 node = opendir("/sys/devices/system/node/node0/");
119 if (!node)
120 perror("opendir");
121 while ((de = readdir(node))) {
122 int cpu;
124 if (sscanf(de->d_name, "cpu%d", &cpu) == 1)
125 CPU_SET_S(cpu, size, mask);
126 }
127 closedir(node);
129 mask2 = CPU_ALLOC(nrcpus);
130 CPU_ZERO_S(size, mask2);
131 for (i = 0; i < size; i++)
132 CPU_SET_S(i, size, mask2);
133 CPU_XOR_S(size, mask2, mask2, mask); // invert
135 if (!local)
136 mask = mask2;
138 err = sched_setaffinity(0, size, mask);
139 if (err)
140 perror("sched_setaffinity");
142 mem = mmap(0, SIZE, PROT_READ|PROT_WRITE,
143 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
144 err = mbind(mem, SIZE, MPOL_BIND, &nodemask, 8*sizeof(nodemask), MPOL_MF_MOVE);
145 if (err)
146 perror("mbind");
148 signal(SIGALRM, sig_done);
149 alarm(5);
151 if (!read) {
152 while (!done) {
153 for (i = 0; i < SIZE; i++)
154 mem[i] = 0x01;
155 }
156 } else {
157 while (!done) {
158 for (i = 0; i < SIZE; i++)
159 t += *(volatile char *)(mem + i);
160 }
161 }
163 return 0;
164 }
166 Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
167 Cc: Stephane Eranian <eranian@google.com>
168 Link: http://lkml.kernel.org/n/tip-tq73sxus35xmqpojf7ootxgs@git.kernel.org
169 Signed-off-by: Ingo Molnar <mingo@elte.hu>
170 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
171 ---
172 arch/x86/kernel/cpu/perf_event_intel.c | 17 +++++++++--------
173 1 file changed, 9 insertions(+), 8 deletions(-)
175 diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
176 index 121f1be..957c216 100644
177 --- a/arch/x86/kernel/cpu/perf_event_intel.c
178 +++ b/arch/x86/kernel/cpu/perf_event_intel.c
179 @@ -389,14 +389,15 @@ static __initconst const u64 westmere_hw_cache_event_ids
180 #define NHM_LOCAL_DRAM (1 << 14)
181 #define NHM_NON_DRAM (1 << 15)
183 -#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
184 +#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
185 +#define NHM_REMOTE (NHM_REMOTE_DRAM)
187 #define NHM_DMND_READ (NHM_DMND_DATA_RD)
188 #define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
189 #define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
191 #define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
192 -#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
193 +#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
194 #define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
196 static __initconst const u64 nehalem_hw_cache_extra_regs
197 @@ -420,16 +421,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
198 },
199 [ C(NODE) ] = {
200 [ C(OP_READ) ] = {
201 - [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
202 - [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
203 + [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
204 + [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE,
205 },
206 [ C(OP_WRITE) ] = {
207 - [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
208 - [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
209 + [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
210 + [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE,
211 },
212 [ C(OP_PREFETCH) ] = {
213 - [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
214 - [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
215 + [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
216 + [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE,
217 },
218 },
219 };
220 --
221 1.7.9.4