]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - glsdk/meta-ti-glsdk.git/blob - recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.12/0009-perf-x86-Fix-local-vs-remote-memory-events-for-NHM-W.patch
linux-ti33x-psp 3.2: update to 3.2.13
[glsdk/meta-ti-glsdk.git] / recipes-kernel / linux / linux-ti33x-psp-3.2 / 3.2.12 / 0009-perf-x86-Fix-local-vs-remote-memory-events-for-NHM-W.patch
1 From 1c301a267126e6e01056eda0164a67731b738f5f Mon Sep 17 00:00:00 2001
2 From: Peter Zijlstra <a.p.zijlstra@chello.nl>
3 Date: Mon, 5 Mar 2012 23:59:25 +0100
4 Subject: [PATCH 09/42] perf/x86: Fix local vs remote memory events for
5  NHM/WSM
7 commit 87e24f4b67e68d9fd8df16e0bf9c66d1ad2a2533 upstream.
9 Verified using the below proglet.. before:
11 [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
12 remote write
14  Performance counter stats for './numa 0':
16          2,101,554 node-stores
17          2,096,931 node-store-misses
19        5.021546079 seconds time elapsed
21 [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
22 local write
24  Performance counter stats for './numa 1':
26            501,137 node-stores
27                199 node-store-misses
29        5.124451068 seconds time elapsed
31 After:
33 [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
34 remote write
36  Performance counter stats for './numa 0':
38          2,107,516 node-stores
39          2,097,187 node-store-misses
41        5.012755149 seconds time elapsed
43 [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
44 local write
46  Performance counter stats for './numa 1':
48          2,063,355 node-stores
49                165 node-store-misses
51        5.082091494 seconds time elapsed
53 #define _GNU_SOURCE
55 #include <sched.h>
56 #include <stdio.h>
57 #include <errno.h>
58 #include <sys/mman.h>
59 #include <sys/types.h>
60 #include <dirent.h>
61 #include <signal.h>
62 #include <unistd.h>
63 #include <numaif.h>
64 #include <stdlib.h>
66 #define SIZE (32*1024*1024)
68 volatile int done;
70 void sig_done(int sig)
71 {
72         done = 1;
73 }
75 int main(int argc, char **argv)
76 {
77         cpu_set_t *mask, *mask2;
78         size_t size;
79         int i, err, t;
80         int nrcpus = 1024;
81         char *mem;
82         unsigned long nodemask = 0x01; /* node 0 */
83         DIR *node;
84         struct dirent *de;
85         int read = 0;
86         int local = 0;
88         if (argc < 2) {
89                 printf("usage: %s [0-3]\n", argv[0]);
90                 printf("  bit0 - local/remote\n");
91                 printf("  bit1 - read/write\n");
92                 exit(0);
93         }
95         switch (atoi(argv[1])) {
96         case 0:
97                 printf("remote write\n");
98                 break;
99         case 1:
100                 printf("local write\n");
101                 local = 1;
102                 break;
103         case 2:
104                 printf("remote read\n");
105                 read = 1;
106                 break;
107         case 3:
108                 printf("local read\n");
109                 local = 1;
110                 read = 1;
111                 break;
112         }
114         mask = CPU_ALLOC(nrcpus);
115         size = CPU_ALLOC_SIZE(nrcpus);
116         CPU_ZERO_S(size, mask);
118         node = opendir("/sys/devices/system/node/node0/");
119         if (!node)
120                 perror("opendir");
121         while ((de = readdir(node))) {
122                 int cpu;
124                 if (sscanf(de->d_name, "cpu%d", &cpu) == 1)
125                         CPU_SET_S(cpu, size, mask);
126         }
127         closedir(node);
129         mask2 = CPU_ALLOC(nrcpus);
130         CPU_ZERO_S(size, mask2);
131         for (i = 0; i < size; i++)
132                 CPU_SET_S(i, size, mask2);
133         CPU_XOR_S(size, mask2, mask2, mask); // invert
135         if (!local)
136                 mask = mask2;
138         err = sched_setaffinity(0, size, mask);
139         if (err)
140                 perror("sched_setaffinity");
142         mem = mmap(0, SIZE, PROT_READ|PROT_WRITE,
143                         MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
144         err = mbind(mem, SIZE, MPOL_BIND, &nodemask, 8*sizeof(nodemask), MPOL_MF_MOVE);
145         if (err)
146                 perror("mbind");
148         signal(SIGALRM, sig_done);
149         alarm(5);
151         if (!read) {
152                 while (!done) {
153                         for (i = 0; i < SIZE; i++)
154                                 mem[i] = 0x01;
155                 }
156         } else {
157                 while (!done) {
158                         for (i = 0; i < SIZE; i++)
159                                 t += *(volatile char *)(mem + i);
160                 }
161         }
163         return 0;
166 Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
167 Cc: Stephane Eranian <eranian@google.com>
168 Link: http://lkml.kernel.org/n/tip-tq73sxus35xmqpojf7ootxgs@git.kernel.org
169 Signed-off-by: Ingo Molnar <mingo@elte.hu>
170 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
171 ---
172  arch/x86/kernel/cpu/perf_event_intel.c |   17 +++++++++--------
173  1 file changed, 9 insertions(+), 8 deletions(-)
175 diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
176 index 121f1be..957c216 100644
177 --- a/arch/x86/kernel/cpu/perf_event_intel.c
178 +++ b/arch/x86/kernel/cpu/perf_event_intel.c
179 @@ -389,14 +389,15 @@ static __initconst const u64 westmere_hw_cache_event_ids
180  #define NHM_LOCAL_DRAM         (1 << 14)
181  #define NHM_NON_DRAM           (1 << 15)
182  
183 -#define NHM_ALL_DRAM           (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
184 +#define NHM_LOCAL              (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
185 +#define NHM_REMOTE             (NHM_REMOTE_DRAM)
186  
187  #define NHM_DMND_READ          (NHM_DMND_DATA_RD)
188  #define NHM_DMND_WRITE         (NHM_DMND_RFO|NHM_DMND_WB)
189  #define NHM_DMND_PREFETCH      (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
190  
191  #define NHM_L3_HIT     (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
192 -#define NHM_L3_MISS    (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
193 +#define NHM_L3_MISS    (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
194  #define NHM_L3_ACCESS  (NHM_L3_HIT|NHM_L3_MISS)
195  
196  static __initconst const u64 nehalem_hw_cache_extra_regs
197 @@ -420,16 +421,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
198   },
199   [ C(NODE) ] = {
200         [ C(OP_READ) ] = {
201 -               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
202 -               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
203 +               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
204 +               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE,
205         },
206         [ C(OP_WRITE) ] = {
207 -               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
208 -               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
209 +               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
210 +               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE,
211         },
212         [ C(OP_PREFETCH) ] = {
213 -               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
214 -               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
215 +               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
216 +               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE,
217         },
218   },
219  };
220 -- 
221 1.7.9.4