12ffebcaf26aba9806e83bb8c16503e21dec9e9f
1 /*
2 * This confidential and proprietary software may be used only as
3 * authorised by a licensing agreement from ARM Limited
4 * (C) COPYRIGHT 2011-2012 ARM Limited
5 * ALL RIGHTS RESERVED
6 * The entire notice above must be reproduced on all authorised
7 * copies and copies may only be made to the extent permitted
8 * by a licensing agreement from ARM Limited.
9 */
11 #include "gator.h"
13 #include <linux/module.h>
14 #include <linux/time.h>
15 #include <linux/math64.h>
16 #include <linux/slab.h>
17 #include <asm/io.h>
19 /* Mali T6xx DDK includes */
20 #include "linux/mali_linux_trace.h"
21 #include "kbase/src/common/mali_kbase.h"
22 #include "kbase/src/linux/mali_kbase_mem_linux.h"
24 #include "gator_events_mali_common.h"
26 /* Blocks for HW counters */
27 enum
28 {
29 JM_BLOCK = 0,
30 TILER_BLOCK,
31 SHADER_BLOCK,
32 MMU_BLOCK
33 };
35 /* Counters for Mali-T6xx:
36 *
37 * - HW counters, 4 blocks
38 * For HW counters we need strings to create /dev/gator/events files.
39 * Enums are not needed because the position of the HW name in the array is the same
40 * of the corresponding value in the received block of memory.
41 * HW counters are requested by calculating a bitmask, passed then to the driver.
42 * Every millisecond a HW counters dump is requested, and if the previous has been completed they are read.
43 */
45 /* Hardware Counters */
46 static const char* const hardware_counter_names [] =
47 {
48 /* Job Manager */
49 "",
50 "",
51 "",
52 "",
53 "MESSAGES_SENT",
54 "MESSAGES_RECEIVED",
55 "GPU_ACTIVE", /* 6 */
56 "IRQ_ACTIVE",
57 "JS0_JOBS",
58 "JS0_TASKS",
59 "JS0_ACTIVE",
60 "",
61 "JS0_WAIT_READ",
62 "JS0_WAIT_ISSUE",
63 "JS0_WAIT_DEPEND",
64 "JS0_WAIT_FINISH",
65 "JS1_JOBS",
66 "JS1_TASKS",
67 "JS1_ACTIVE",
68 "",
69 "JS1_WAIT_READ",
70 "JS1_WAIT_ISSUE",
71 "JS1_WAIT_DEPEND",
72 "JS1_WAIT_FINISH",
73 "JS2_JOBS",
74 "JS2_TASKS",
75 "JS2_ACTIVE",
76 "",
77 "JS2_WAIT_READ",
78 "JS2_WAIT_ISSUE",
79 "JS2_WAIT_DEPEND",
80 "JS2_WAIT_FINISH",
81 "JS3_JOBS",
82 "JS3_TASKS",
83 "JS3_ACTIVE",
84 "",
85 "JS3_WAIT_READ",
86 "JS3_WAIT_ISSUE",
87 "JS3_WAIT_DEPEND",
88 "JS3_WAIT_FINISH",
89 "JS4_JOBS",
90 "JS4_TASKS",
91 "JS4_ACTIVE",
92 "",
93 "JS4_WAIT_READ",
94 "JS4_WAIT_ISSUE",
95 "JS4_WAIT_DEPEND",
96 "JS4_WAIT_FINISH",
97 "JS5_JOBS",
98 "JS5_TASKS",
99 "JS5_ACTIVE",
100 "",
101 "JS5_WAIT_READ",
102 "JS5_WAIT_ISSUE",
103 "JS5_WAIT_DEPEND",
104 "JS5_WAIT_FINISH",
105 "JS6_JOBS",
106 "JS6_TASKS",
107 "JS6_ACTIVE",
108 "",
109 "JS6_WAIT_READ",
110 "JS6_WAIT_ISSUE",
111 "JS6_WAIT_DEPEND",
112 "JS6_WAIT_FINISH",
114 /*Tiler*/
115 "",
116 "",
117 "",
118 "JOBS_PROCESSED",
119 "TRIANGLES",
120 "QUADS",
121 "POLYGONS",
122 "POINTS",
123 "LINES",
124 "VCACHE_HIT",
125 "VCACHE_MISS",
126 "FRONT_FACING",
127 "BACK_FACING",
128 "PRIM_VISIBLE",
129 "PRIM_CULLED",
130 "PRIM_CLIPPED",
131 "LEVEL0",
132 "LEVEL1",
133 "LEVEL2",
134 "LEVEL3",
135 "LEVEL4",
136 "LEVEL5",
137 "LEVEL6",
138 "LEVEL7",
139 "COMMAND_1",
140 "COMMAND_2",
141 "COMMAND_3",
142 "COMMAND_4",
143 "COMMAND_4_7",
144 "COMMAND_8_15",
145 "COMMAND_16_63",
146 "COMMAND_64",
147 "COMPRESS_IN",
148 "COMPRESS_OUT",
149 "COMPRESS_FLUSH",
150 "TIMESTAMPS",
151 "PCACHE_HIT",
152 "PCACHE_MISS",
153 "PCACHE_LINE",
154 "PCACHE_STALL",
155 "WRBUF_HIT",
156 "WRBUF_MISS",
157 "WRBUF_LINE",
158 "WRBUF_PARTIAL",
159 "WRBUF_STALL",
160 "ACTIVE",
161 "LOADING_DESC",
162 "INDEX_WAIT",
163 "INDEX_RANGE_WAIT",
164 "VERTEX_WAIT",
165 "PCACHE_WAIT",
166 "WRBUF_WAIT",
167 "BUS_READ",
168 "BUS_WRITE",
169 "",
170 "",
171 "",
172 "",
173 "",
174 "UTLB_STALL",
175 "UTLB_REPLAY_MISS",
176 "UTLB_REPLAY_FULL",
177 "UTLB_NEW_MISS",
178 "UTLB_HIT",
180 /* Shader Core */
181 "",
182 "",
183 "",
184 "SHADER_CORE_ACTIVE",
185 "FRAG_ACTIVE",
186 "FRAG_PRIMATIVES",
187 "FRAG_PRIMATIVES_DROPPED",
188 "FRAG_CYCLE_DESC",
189 "FRAG_CYCLES_PLR",
190 "FRAG_CYCLES_VERT",
191 "FRAG_CYCLES_TRISETUP",
192 "FRAG_CYCLES_RAST",
193 "FRAG_THREADS",
194 "FRAG_DUMMY_THREADS",
195 "FRAG_QUADS_RAST",
196 "FRAG_QUADS_EZS_TEST",
197 "FRAG_QUADS_EZS_KILLED",
198 "FRAG_QUADS_LZS_TEST",
199 "FRAG_QUADS_LZS_KILLED",
200 "FRAG_CYCLE_NO_TILE",
201 "FRAG_NUM_TILES",
202 "FRAG_TRANS_ELIM",
203 "COMPUTE_ACTIVE",
204 "COMPUTE_TASKS",
205 "COMPUTE_THREADS",
206 "COMPUTE_CYCLES_DESC",
207 "TRIPIPE_ACTIVE",
208 "ARITH_WORDS",
209 "ARITH_CYCLES_REG",
210 "ARITH_CYCLES_L0",
211 "ARITH_FRAG_DEPEND",
212 "LS_WORDS",
213 "LS_ISSUES",
214 "LS_RESTARTS",
215 "LS_REISSUES_MISS",
216 "LS_REISSUES_VD",
217 "LS_REISSUE_ATTRIB_MISS",
218 "LS_NO_WB",
219 "TEX_WORDS",
220 "TEX_BUBBLES",
221 "TEX_WORDS_L0",
222 "TEX_WORDS_DESC",
223 "TEX_THREADS",
224 "TEX_RECIRC_FMISS",
225 "TEX_RECIRC_DESC",
226 "TEX_RECIRC_MULTI",
227 "TEX_RECIRC_PMISS",
228 "TEX_RECIRC_CONF",
229 "LSC_READ_HITS",
230 "LSC_READ_MISSES",
231 "LSC_WRITE_HITS",
232 "LSC_WRITE_MISSES",
233 "LSC_ATOMIC_HITS",
234 "LSC_ATOMIC_MISSES",
235 "LSC_LINE_FETCHES",
236 "LSC_DIRTY_LINE",
237 "LSC_SNOOPS",
238 "AXI_TLB_STALL",
239 "AXI_TLB_MIESS",
240 "AXI_TLB_TRANSACTION",
241 "LS_TLB_MISS",
242 "LS_TLB_HIT",
243 "AXI_BEATS_READ",
244 "AXI_BEATS_WRITTEN",
246 /*L2 and MMU */
247 "",
248 "",
249 "",
250 "",
251 "MMU_TABLE_WALK",
252 "MMU_REPLAY_MISS",
253 "MMU_REPLAY_FULL",
254 "MMU_NEW_MISS",
255 "MMU_HIT",
256 "",
257 "",
258 "",
259 "",
260 "",
261 "",
262 "",
263 "UTLB_STALL",
264 "UTLB_REPLAY_MISS",
265 "UTLB_REPLAY_FULL",
266 "UTLB_NEW_MISS",
267 "UTLB_HIT",
268 "",
269 "",
270 "",
271 "",
272 "",
273 "",
274 "",
275 "",
276 "",
277 "L2_WRITE_BEATS",
278 "L2_READ_BEATS",
279 "L2_ANY_LOOKUP",
280 "L2_READ_LOOKUP",
281 "L2_SREAD_LOOKUP",
282 "L2_READ_REPLAY",
283 "L2_READ_SNOOP",
284 "L2_READ_HIT",
285 "L2_CLEAN_MISS",
286 "L2_WRITE_LOOKUP",
287 "L2_SWRITE_LOOKUP",
288 "L2_WRITE_REPLAY",
289 "L2_WRITE_SNOOP",
290 "L2_WRITE_HIT",
291 "L2_EXT_READ_FULL",
292 "L2_EXT_READ_HALF",
293 "L2_EXT_WRITE_FULL",
294 "L2_EXT_WRITE_HALF",
295 "L2_EXT_READ",
296 "L2_EXT_READ_LINE",
297 "L2_EXT_WRITE",
298 "L2_EXT_WRITE_LINE",
299 "L2_EXT_WRITE_SMALL",
300 "L2_EXT_BARRIER",
301 "L2_EXT_AR_STALL",
302 "L2_EXT_R_BUF_FULL",
303 "L2_EXT_RD_BUF_FULL",
304 "L2_EXT_R_RAW",
305 "L2_EXT_W_STALL",
306 "L2_EXT_W_BUF_FULL",
307 "L2_EXT_R_W_HAZARD",
308 "L2_TAG_HAZARD",
309 "L2_SNOOP_FULL",
310 "L2_REPLAY_FULL"
311 };
313 #define NUMBER_OF_HARDWARE_COUNTERS (sizeof(hardware_counter_names) / sizeof(hardware_counter_names[0]))
315 #define GET_HW_BLOCK(c) (((c) >> 6) & 0x3)
316 #define GET_COUNTER_OFFSET(c) ((c) & 0x3f)
318 /* Memory to dump hardware counters into */
319 static void *kernel_dump_buffer;
321 /* kbase context and device */
322 static kbase_context *kbcontext = NULL;
323 static struct kbase_device *kbdevice = NULL;
325 extern struct kbase_device *kbase_find_device(int minor);
326 static volatile bool kbase_device_busy = false;
327 static unsigned int num_hardware_counters_enabled;
329 /*
330 * gatorfs variables for counter enable state
331 */
332 static mali_counter counters[NUMBER_OF_HARDWARE_COUNTERS];
334 /* An array used to return the data we recorded
335 * as key,value pairs hence the *2
336 */
337 static unsigned long counter_dump[NUMBER_OF_HARDWARE_COUNTERS * 2];
339 static int start(void)
340 {
341 kbase_uk_hwcnt_setup setup;
342 mali_error err;
343 int cnt;
344 u16 bitmask[] = {0, 0, 0, 0};
346 /* Setup HW counters */
347 num_hardware_counters_enabled = 0;
349 if(NUMBER_OF_HARDWARE_COUNTERS != 256)
350 {
351 pr_debug("Unexpected number of hardware counters defined: expecting 256, got %d\n", NUMBER_OF_HARDWARE_COUNTERS);
352 }
354 /* Calculate enable bitmasks based on counters_enabled array */
355 for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++)
356 {
357 const mali_counter *counter = &counters[cnt];
358 if (counter->enabled)
359 {
360 int block = GET_HW_BLOCK(cnt);
361 int enable_bit = GET_COUNTER_OFFSET(cnt) / 4;
362 bitmask[block] |= (1 << enable_bit);
363 pr_debug("gator: Mali-T6xx: hardware counter %s selected [%d]\n", hardware_counter_names[cnt], cnt);
364 num_hardware_counters_enabled++;
365 }
366 }
368 /* Create a kbase context for HW counters */
369 if (num_hardware_counters_enabled > 0)
370 {
371 kbdevice = kbase_find_device(-1);
373 if (kbcontext)
374 return -1;
376 kbcontext = kbase_create_context(kbdevice);
377 if (!kbcontext)
378 {
379 pr_debug("gator: Mali-T6xx: error creating kbase context\n");
380 goto out;
381 }
383 /*
384 * The amount of memory needed to store the dump (bytes)
385 * DUMP_SIZE = number of core groups
386 * * number of blocks (always 8 for midgard)
387 * * number of counters per block (always 64 for midgard)
388 * * number of bytes per counter (always 4 in midgard)
389 * For a Mali-T6xx with a single core group = 1 * 8 * 64 * 4
390 */
391 kernel_dump_buffer = kbase_va_alloc(kbcontext, 2048);
392 if (!kernel_dump_buffer)
393 {
394 pr_debug("gator: Mali-T6xx: error trying to allocate va\n");
395 goto destroy_context;
396 }
398 setup.dump_buffer = (uintptr_t)kernel_dump_buffer;
399 setup.jm_bm = bitmask[JM_BLOCK];
400 setup.tiler_bm = bitmask[TILER_BLOCK];
401 setup.shader_bm = bitmask[SHADER_BLOCK];
402 setup.mmu_l2_bm = bitmask[MMU_BLOCK];
403 /* These counters do not exist on Mali-T60x */
404 setup.l3_cache_bm = 0;
406 /* Use kbase API to enable hardware counters and provide dump buffer */
407 err = kbase_instr_hwcnt_enable(kbcontext, &setup);
408 if (err != MALI_ERROR_NONE)
409 {
410 pr_debug("gator: Mali-T6xx: can't setup hardware counters\n");
411 goto free_buffer;
412 }
413 pr_debug("gator: Mali-T6xx: hardware counters enabled\n");
414 kbase_instr_hwcnt_clear(kbcontext);
415 pr_debug("gator: Mali-T6xx: hardware counters cleared \n");
417 kbase_device_busy = false;
418 }
420 return 0;
422 free_buffer:
423 kbase_va_free(kbcontext, kernel_dump_buffer);
424 destroy_context:
425 kbase_destroy_context(kbcontext);
426 out:
427 return -1;
428 }
430 static void stop(void) {
431 unsigned int cnt;
432 kbase_context *temp_kbcontext;
434 pr_debug("gator: Mali-T6xx: stop\n");
436 /* Set all counters as disabled */
437 for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) {
438 counters[cnt].enabled = 0;
439 }
441 /* Destroy the context for HW counters */
442 if (num_hardware_counters_enabled > 0 && kbcontext != NULL)
443 {
444 /*
445 * Set the global variable to NULL before destroying it, because
446 * other function will check this before using it.
447 */
448 temp_kbcontext = kbcontext;
449 kbcontext = NULL;
451 kbase_instr_hwcnt_disable(temp_kbcontext);
452 kbase_va_free(temp_kbcontext, kernel_dump_buffer);
453 kbase_destroy_context(temp_kbcontext);
454 pr_debug("gator: Mali-T6xx: hardware counters stopped\n");
455 }
456 }
458 static int read(int **buffer) {
459 int cnt;
460 int len = 0;
461 u32 value = 0;
462 mali_bool success;
464 if (smp_processor_id()!=0)
465 {
466 return 0;
467 }
469 /*
470 * Report the HW counters
471 * Only process hardware counters if at least one of the hardware counters is enabled.
472 */
473 if (num_hardware_counters_enabled > 0)
474 {
475 const unsigned int vithar_blocks[] = {
476 0x700, /* VITHAR_JOB_MANAGER, Block 0 */
477 0x400, /* VITHAR_TILER, Block 1 */
478 0x000, /* VITHAR_SHADER_CORE, Block 2 */
479 0x500 /* VITHAR_MEMORY_SYSTEM, Block 3 */
480 };
482 if (!kbcontext)
483 {
484 return -1;
485 }
487 // TODO: SYMBOL_GET (all kbase functions)
488 if (kbase_instr_hwcnt_dump_complete(kbcontext, &success) == MALI_TRUE)
489 {
490 kbase_device_busy = false;
492 if (success == MALI_TRUE)
493 {
494 for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++)
495 {
496 const mali_counter *counter = &counters[cnt];
497 if (counter->enabled)
498 {
499 const int block = GET_HW_BLOCK(cnt);
500 const int counter_offset = GET_COUNTER_OFFSET(cnt);
501 const u32 *counter_block = (u32 *)((uintptr_t)kernel_dump_buffer + vithar_blocks[block]);
502 const u32 *counter_address = counter_block + counter_offset;
504 value = *counter_address;
506 if(block == SHADER_BLOCK){
507 /* (counter_address + 0x000) has already been accounted-for above. */
508 value += *(counter_address + 0x100);
509 value += *(counter_address + 0x200);
510 value += *(counter_address + 0x300);
511 }
513 counter_dump[len++] = counter->key;
514 counter_dump[len++] = value;
515 }
516 }
517 }
518 }
520 if (! kbase_device_busy)
521 {
522 kbase_device_busy = true;
523 kbase_instr_hwcnt_dump_irq(kbcontext);
524 }
525 }
527 /* Update the buffer */
528 if (buffer) {
529 *buffer = (int*) counter_dump;
530 }
532 return len;
533 }
535 static int create_files(struct super_block *sb, struct dentry *root)
536 {
537 unsigned int event;
538 /*
539 * Create the filesystem for all events
540 */
541 int counter_index = 0;
542 const char* mali_name = gator_mali_get_mali_name();
544 for (event = 0; event < NUMBER_OF_HARDWARE_COUNTERS; event++)
545 {
546 if (gator_mali_create_file_system(mali_name, hardware_counter_names[counter_index], sb, root, &counters[event]) != 0)
547 return -1;
548 counter_index++;
549 }
551 return 0;
552 }
555 static struct gator_interface gator_events_mali_t6xx_interface = {
556 .create_files = create_files,
557 .start = start,
558 .stop = stop,
559 .read = read
560 };
562 int gator_events_mali_t6xx_hw_init(void)
563 {
564 pr_debug("gator: Mali-T6xx: sw_counters init\n");
566 gator_mali_initialise_counters(counters, NUMBER_OF_HARDWARE_COUNTERS);
568 return gator_events_install(&gator_events_mali_t6xx_interface);
569 }
571 gator_events_init(gator_events_mali_t6xx_hw_init);