aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'tests/amdgpu/basic_tests.c')
-rw-r--r--tests/amdgpu/basic_tests.c1122
1 files changed, 868 insertions, 254 deletions
diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
index bfda21b1..1adbddd9 100644
--- a/tests/amdgpu/basic_tests.c
+++ b/tests/amdgpu/basic_tests.c
@@ -21,16 +21,13 @@
21 * 21 *
22*/ 22*/
23 23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif
27
28#include <stdio.h> 24#include <stdio.h>
29#include <stdlib.h> 25#include <stdlib.h>
30#include <unistd.h> 26#include <unistd.h>
31#ifdef HAVE_ALLOCA_H 27#ifdef HAVE_ALLOCA_H
32# include <alloca.h> 28# include <alloca.h>
33#endif 29#endif
30#include <sys/wait.h>
34 31
35#include "CUnit/Basic.h" 32#include "CUnit/Basic.h"
36 33
@@ -40,27 +37,38 @@
40static amdgpu_device_handle device_handle; 37static amdgpu_device_handle device_handle;
41static uint32_t major_version; 38static uint32_t major_version;
42static uint32_t minor_version; 39static uint32_t minor_version;
40static uint32_t family_id;
43 41
44static void amdgpu_query_info_test(void); 42static void amdgpu_query_info_test(void);
45static void amdgpu_memory_alloc(void);
46static void amdgpu_command_submission_gfx(void); 43static void amdgpu_command_submission_gfx(void);
47static void amdgpu_command_submission_compute(void); 44static void amdgpu_command_submission_compute(void);
45static void amdgpu_command_submission_multi_fence(void);
48static void amdgpu_command_submission_sdma(void); 46static void amdgpu_command_submission_sdma(void);
49static void amdgpu_userptr_test(void); 47static void amdgpu_userptr_test(void);
50static void amdgpu_semaphore_test(void); 48static void amdgpu_semaphore_test(void);
49static void amdgpu_sync_dependency_test(void);
50static void amdgpu_bo_eviction_test(void);
51 51
52static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 52static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
53static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 53static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
54static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 54static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
55 55static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
56 unsigned ip_type,
57 int instance, int pm4_dw, uint32_t *pm4_src,
58 int res_cnt, amdgpu_bo_handle *resources,
59 struct amdgpu_cs_ib_info *ib_info,
60 struct amdgpu_cs_request *ibs_request);
61
56CU_TestInfo basic_tests[] = { 62CU_TestInfo basic_tests[] = {
57 { "Query Info Test", amdgpu_query_info_test }, 63 { "Query Info Test", amdgpu_query_info_test },
58 { "Memory alloc Test", amdgpu_memory_alloc },
59 { "Userptr Test", amdgpu_userptr_test }, 64 { "Userptr Test", amdgpu_userptr_test },
65 { "bo eviction Test", amdgpu_bo_eviction_test },
60 { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 66 { "Command submission Test (GFX)", amdgpu_command_submission_gfx },
61 { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 67 { "Command submission Test (Compute)", amdgpu_command_submission_compute },
68 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
62 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 69 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
63 { "SW semaphore Test", amdgpu_semaphore_test }, 70 { "SW semaphore Test", amdgpu_semaphore_test },
71 { "Sync dependency Test", amdgpu_sync_dependency_test },
64 CU_TEST_INFO_NULL, 72 CU_TEST_INFO_NULL,
65}; 73};
66#define BUFFER_SIZE (8 * 1024) 74#define BUFFER_SIZE (8 * 1024)
@@ -197,22 +205,110 @@ CU_TestInfo basic_tests[] = {
197# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 205# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
198# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 206# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
199 207
208#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \
209 (((b) & 0x1) << 26) | \
210 (((t) & 0x1) << 23) | \
211 (((s) & 0x1) << 22) | \
212 (((cnt) & 0xFFFFF) << 0))
213#define SDMA_OPCODE_COPY_SI 3
214#define SDMA_OPCODE_CONSTANT_FILL_SI 13
215#define SDMA_NOP_SI 0xf
216#define GFX_COMPUTE_NOP_SI 0x80000000
217#define PACKET3_DMA_DATA_SI 0x41
218# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27)
219 /* 0 - ME
220 * 1 - PFP
221 */
222# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20)
223 /* 0 - DST_ADDR using DAS
224 * 1 - GDS
225 * 3 - DST_ADDR using L2
226 */
227# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29)
228 /* 0 - SRC_ADDR using SAS
229 * 1 - GDS
230 * 2 - DATA
231 * 3 - SRC_ADDR using L2
232 */
233# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31)
234
235
236#define PKT3_CONTEXT_CONTROL 0x28
237#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
238#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
239#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
240
241#define PKT3_CLEAR_STATE 0x12
242
243#define PKT3_SET_SH_REG 0x76
244#define PACKET3_SET_SH_REG_START 0x00002c00
245
246#define PACKET3_DISPATCH_DIRECT 0x15
247
248
249/* gfx 8 */
250#define mmCOMPUTE_PGM_LO 0x2e0c
251#define mmCOMPUTE_PGM_RSRC1 0x2e12
252#define mmCOMPUTE_TMPRING_SIZE 0x2e18
253#define mmCOMPUTE_USER_DATA_0 0x2e40
254#define mmCOMPUTE_USER_DATA_1 0x2e41
255#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15
256#define mmCOMPUTE_NUM_THREAD_X 0x2e07
257
258
259
260#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
261 ((num & 0x0000ff00) << 8) | \
262 ((num & 0x00ff0000) >> 8) | \
263 ((num & 0x000000ff) << 24))
264
265
266/* Shader code
267 * void main()
268{
269
270 float x = some_input;
271 for (unsigned i = 0; i < 1000000; i++)
272 x = sin(x);
273
274 u[0] = 42u;
275}
276*/
277
278static uint32_t shader_bin[] = {
279 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
280 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
281 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
282 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
283};
284
285#define CODE_OFFSET 512
286#define DATA_OFFSET 1024
287
288
200int suite_basic_tests_init(void) 289int suite_basic_tests_init(void)
201{ 290{
291 struct amdgpu_gpu_info gpu_info = {0};
202 int r; 292 int r;
203 293
204 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 294 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
205 &minor_version, &device_handle); 295 &minor_version, &device_handle);
206 296
207 if (r == 0) 297 if (r) {
208 return CUE_SUCCESS;
209 else {
210 if ((r == -EACCES) && (errno == EACCES)) 298 if ((r == -EACCES) && (errno == EACCES))
211 printf("\n\nError:%s. " 299 printf("\n\nError:%s. "
212 "Hint:Try to run this test program as root.", 300 "Hint:Try to run this test program as root.",
213 strerror(errno)); 301 strerror(errno));
214 return CUE_SINIT_FAILED; 302 return CUE_SINIT_FAILED;
215 } 303 }
304
305 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
306 if (r)
307 return CUE_SINIT_FAILED;
308
309 family_id = gpu_info.family_id;
310
311 return CUE_SUCCESS;
216} 312}
217 313
218int suite_basic_tests_clean(void) 314int suite_basic_tests_clean(void)
@@ -239,53 +335,6 @@ static void amdgpu_query_info_test(void)
239 CU_ASSERT_EQUAL(r, 0); 335 CU_ASSERT_EQUAL(r, 0);
240} 336}
241 337
242static void amdgpu_memory_alloc(void)
243{
244 amdgpu_bo_handle bo;
245 amdgpu_va_handle va_handle;
246 uint64_t bo_mc;
247 int r;
248
249 /* Test visible VRAM */
250 bo = gpu_mem_alloc(device_handle,
251 4096, 4096,
252 AMDGPU_GEM_DOMAIN_VRAM,
253 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
254 &bo_mc, &va_handle);
255
256 r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
257 CU_ASSERT_EQUAL(r, 0);
258
259 /* Test invisible VRAM */
260 bo = gpu_mem_alloc(device_handle,
261 4096, 4096,
262 AMDGPU_GEM_DOMAIN_VRAM,
263 AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
264 &bo_mc, &va_handle);
265
266 r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
267 CU_ASSERT_EQUAL(r, 0);
268
269 /* Test GART Cacheable */
270 bo = gpu_mem_alloc(device_handle,
271 4096, 4096,
272 AMDGPU_GEM_DOMAIN_GTT,
273 0, &bo_mc, &va_handle);
274
275 r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
276 CU_ASSERT_EQUAL(r, 0);
277
278 /* Test GART USWC */
279 bo = gpu_mem_alloc(device_handle,
280 4096, 4096,
281 AMDGPU_GEM_DOMAIN_GTT,
282 AMDGPU_GEM_CREATE_CPU_GTT_USWC,
283 &bo_mc, &va_handle);
284
285 r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
286 CU_ASSERT_EQUAL(r, 0);
287}
288
289static void amdgpu_command_submission_gfx_separate_ibs(void) 338static void amdgpu_command_submission_gfx_separate_ibs(void)
290{ 339{
291 amdgpu_context_handle context_handle; 340 amdgpu_context_handle context_handle;
@@ -299,7 +348,7 @@ static void amdgpu_command_submission_gfx_separate_ibs(void)
299 uint32_t expired; 348 uint32_t expired;
300 amdgpu_bo_list_handle bo_list; 349 amdgpu_bo_list_handle bo_list;
301 amdgpu_va_handle va_handle, va_handle_ce; 350 amdgpu_va_handle va_handle, va_handle_ce;
302 int r; 351 int r, i = 0;
303 352
304 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 353 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
305 CU_ASSERT_EQUAL(r, 0); 354 CU_ASSERT_EQUAL(r, 0);
@@ -324,12 +373,14 @@ static void amdgpu_command_submission_gfx_separate_ibs(void)
324 373
325 /* IT_SET_CE_DE_COUNTERS */ 374 /* IT_SET_CE_DE_COUNTERS */
326 ptr = ib_result_ce_cpu; 375 ptr = ib_result_ce_cpu;
327 ptr[0] = 0xc0008900; 376 if (family_id != AMDGPU_FAMILY_SI) {
328 ptr[1] = 0; 377 ptr[i++] = 0xc0008900;
329 ptr[2] = 0xc0008400; 378 ptr[i++] = 0;
330 ptr[3] = 1; 379 }
380 ptr[i++] = 0xc0008400;
381 ptr[i++] = 1;
331 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 382 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
332 ib_info[0].size = 4; 383 ib_info[0].size = i;
333 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 384 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
334 385
335 /* IT_WAIT_ON_CE_COUNTER */ 386 /* IT_WAIT_ON_CE_COUNTER */
@@ -388,7 +439,7 @@ static void amdgpu_command_submission_gfx_shared_ib(void)
388 uint32_t expired; 439 uint32_t expired;
389 amdgpu_bo_list_handle bo_list; 440 amdgpu_bo_list_handle bo_list;
390 amdgpu_va_handle va_handle; 441 amdgpu_va_handle va_handle;
391 int r; 442 int r, i = 0;
392 443
393 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 444 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
394 CU_ASSERT_EQUAL(r, 0); 445 CU_ASSERT_EQUAL(r, 0);
@@ -407,12 +458,14 @@ static void amdgpu_command_submission_gfx_shared_ib(void)
407 458
408 /* IT_SET_CE_DE_COUNTERS */ 459 /* IT_SET_CE_DE_COUNTERS */
409 ptr = ib_result_cpu; 460 ptr = ib_result_cpu;
410 ptr[0] = 0xc0008900; 461 if (family_id != AMDGPU_FAMILY_SI) {
411 ptr[1] = 0; 462 ptr[i++] = 0xc0008900;
412 ptr[2] = 0xc0008400; 463 ptr[i++] = 0;
413 ptr[3] = 1; 464 }
465 ptr[i++] = 0xc0008400;
466 ptr[i++] = 1;
414 ib_info[0].ib_mc_address = ib_result_mc_address; 467 ib_info[0].ib_mc_address = ib_result_mc_address;
415 ib_info[0].size = 4; 468 ib_info[0].size = i;
416 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 469 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
417 470
418 ptr = (uint32_t *)ib_result_cpu + 4; 471 ptr = (uint32_t *)ib_result_cpu + 4;
@@ -467,6 +520,156 @@ static void amdgpu_command_submission_gfx_cp_copy_data(void)
467 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 520 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
468} 521}
469 522
523static void amdgpu_bo_eviction_test(void)
524{
525 const int sdma_write_length = 1024;
526 const int pm4_dw = 256;
527 amdgpu_context_handle context_handle;
528 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
529 amdgpu_bo_handle *resources;
530 uint32_t *pm4;
531 struct amdgpu_cs_ib_info *ib_info;
532 struct amdgpu_cs_request *ibs_request;
533 uint64_t bo1_mc, bo2_mc;
534 volatile unsigned char *bo1_cpu, *bo2_cpu;
535 int i, j, r, loop1, loop2;
536 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
537 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
538 struct amdgpu_heap_info vram_info, gtt_info;
539
540 pm4 = calloc(pm4_dw, sizeof(*pm4));
541 CU_ASSERT_NOT_EQUAL(pm4, NULL);
542
543 ib_info = calloc(1, sizeof(*ib_info));
544 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
545
546 ibs_request = calloc(1, sizeof(*ibs_request));
547 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
548
549 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
550 CU_ASSERT_EQUAL(r, 0);
551
552 /* prepare resource */
553 resources = calloc(4, sizeof(amdgpu_bo_handle));
554 CU_ASSERT_NOT_EQUAL(resources, NULL);
555
556 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
557 0, &vram_info);
558 CU_ASSERT_EQUAL(r, 0);
559
560 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
561 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
562 CU_ASSERT_EQUAL(r, 0);
563 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
564 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
565 CU_ASSERT_EQUAL(r, 0);
566
567 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
568 0, &gtt_info);
569 CU_ASSERT_EQUAL(r, 0);
570
571 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
572 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
573 CU_ASSERT_EQUAL(r, 0);
574 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
575 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
576 CU_ASSERT_EQUAL(r, 0);
577
578
579
580 loop1 = loop2 = 0;
581 /* run 9 circle to test all mapping combination */
582 while(loop1 < 2) {
583 while(loop2 < 2) {
584 /* allocate UC bo1for sDMA use */
585 r = amdgpu_bo_alloc_and_map(device_handle,
586 sdma_write_length, 4096,
587 AMDGPU_GEM_DOMAIN_GTT,
588 gtt_flags[loop1], &bo1,
589 (void**)&bo1_cpu, &bo1_mc,
590 &bo1_va_handle);
591 CU_ASSERT_EQUAL(r, 0);
592
593 /* set bo1 */
594 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
595
596 /* allocate UC bo2 for sDMA use */
597 r = amdgpu_bo_alloc_and_map(device_handle,
598 sdma_write_length, 4096,
599 AMDGPU_GEM_DOMAIN_GTT,
600 gtt_flags[loop2], &bo2,
601 (void**)&bo2_cpu, &bo2_mc,
602 &bo2_va_handle);
603 CU_ASSERT_EQUAL(r, 0);
604
605 /* clear bo2 */
606 memset((void*)bo2_cpu, 0, sdma_write_length);
607
608 resources[0] = bo1;
609 resources[1] = bo2;
610 resources[2] = vram_max[loop2];
611 resources[3] = gtt_max[loop2];
612
613 /* fulfill PM4: test DMA copy linear */
614 i = j = 0;
615 if (family_id == AMDGPU_FAMILY_SI) {
616 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
617 sdma_write_length);
618 pm4[i++] = 0xffffffff & bo2_mc;
619 pm4[i++] = 0xffffffff & bo1_mc;
620 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
621 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
622 } else {
623 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
624 if (family_id >= AMDGPU_FAMILY_AI)
625 pm4[i++] = sdma_write_length - 1;
626 else
627 pm4[i++] = sdma_write_length;
628 pm4[i++] = 0;
629 pm4[i++] = 0xffffffff & bo1_mc;
630 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
631 pm4[i++] = 0xffffffff & bo2_mc;
632 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
633 }
634
635 amdgpu_test_exec_cs_helper(context_handle,
636 AMDGPU_HW_IP_DMA, 0,
637 i, pm4,
638 4, resources,
639 ib_info, ibs_request);
640
641 /* verify if SDMA test result meets with expected */
642 i = 0;
643 while(i < sdma_write_length) {
644 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
645 }
646 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
647 sdma_write_length);
648 CU_ASSERT_EQUAL(r, 0);
649 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
650 sdma_write_length);
651 CU_ASSERT_EQUAL(r, 0);
652 loop2++;
653 }
654 loop2 = 0;
655 loop1++;
656 }
657 amdgpu_bo_free(vram_max[0]);
658 amdgpu_bo_free(vram_max[1]);
659 amdgpu_bo_free(gtt_max[0]);
660 amdgpu_bo_free(gtt_max[1]);
661 /* clean resources */
662 free(resources);
663 free(ibs_request);
664 free(ib_info);
665 free(pm4);
666
667 /* end of test */
668 r = amdgpu_cs_ctx_free(context_handle);
669 CU_ASSERT_EQUAL(r, 0);
670}
671
672
470static void amdgpu_command_submission_gfx(void) 673static void amdgpu_command_submission_gfx(void)
471{ 674{
472 /* write data using the CP */ 675 /* write data using the CP */
@@ -493,10 +696,19 @@ static void amdgpu_semaphore_test(void)
493 struct amdgpu_cs_fence fence_status = {0}; 696 struct amdgpu_cs_fence fence_status = {0};
494 uint32_t *ptr; 697 uint32_t *ptr;
495 uint32_t expired; 698 uint32_t expired;
699 uint32_t sdma_nop, gfx_nop;
496 amdgpu_bo_list_handle bo_list[2]; 700 amdgpu_bo_list_handle bo_list[2];
497 amdgpu_va_handle va_handle[2]; 701 amdgpu_va_handle va_handle[2];
498 int r, i; 702 int r, i;
499 703
704 if (family_id == AMDGPU_FAMILY_SI) {
705 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
706 gfx_nop = GFX_COMPUTE_NOP_SI;
707 } else {
708 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
709 gfx_nop = GFX_COMPUTE_NOP;
710 }
711
500 r = amdgpu_cs_create_semaphore(&sem); 712 r = amdgpu_cs_create_semaphore(&sem);
501 CU_ASSERT_EQUAL(r, 0); 713 CU_ASSERT_EQUAL(r, 0);
502 for (i = 0; i < 2; i++) { 714 for (i = 0; i < 2; i++) {
@@ -516,7 +728,7 @@ static void amdgpu_semaphore_test(void)
516 728
517 /* 1. same context different engine */ 729 /* 1. same context different engine */
518 ptr = ib_result_cpu[0]; 730 ptr = ib_result_cpu[0];
519 ptr[0] = SDMA_NOP; 731 ptr[0] = sdma_nop;
520 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 732 ib_info[0].ib_mc_address = ib_result_mc_address[0];
521 ib_info[0].size = 1; 733 ib_info[0].size = 1;
522 734
@@ -533,7 +745,7 @@ static void amdgpu_semaphore_test(void)
533 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 745 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
534 CU_ASSERT_EQUAL(r, 0); 746 CU_ASSERT_EQUAL(r, 0);
535 ptr = ib_result_cpu[1]; 747 ptr = ib_result_cpu[1];
536 ptr[0] = GFX_COMPUTE_NOP; 748 ptr[0] = gfx_nop;
537 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 749 ib_info[1].ib_mc_address = ib_result_mc_address[1];
538 ib_info[1].size = 1; 750 ib_info[1].size = 1;
539 751
@@ -557,7 +769,7 @@ static void amdgpu_semaphore_test(void)
557 769
558 /* 2. same engine different context */ 770 /* 2. same engine different context */
559 ptr = ib_result_cpu[0]; 771 ptr = ib_result_cpu[0];
560 ptr[0] = GFX_COMPUTE_NOP; 772 ptr[0] = gfx_nop;
561 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 773 ib_info[0].ib_mc_address = ib_result_mc_address[0];
562 ib_info[0].size = 1; 774 ib_info[0].size = 1;
563 775
@@ -574,7 +786,7 @@ static void amdgpu_semaphore_test(void)
574 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem); 786 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
575 CU_ASSERT_EQUAL(r, 0); 787 CU_ASSERT_EQUAL(r, 0);
576 ptr = ib_result_cpu[1]; 788 ptr = ib_result_cpu[1];
577 ptr[0] = GFX_COMPUTE_NOP; 789 ptr[0] = gfx_nop;
578 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 790 ib_info[1].ib_mc_address = ib_result_mc_address[1];
579 ib_info[1].size = 1; 791 ib_info[1].size = 1;
580 792
@@ -595,6 +807,7 @@ static void amdgpu_semaphore_test(void)
595 500000000, 0, &expired); 807 500000000, 0, &expired);
596 CU_ASSERT_EQUAL(r, 0); 808 CU_ASSERT_EQUAL(r, 0);
597 CU_ASSERT_EQUAL(expired, true); 809 CU_ASSERT_EQUAL(expired, true);
810
598 for (i = 0; i < 2; i++) { 811 for (i = 0; i < 2; i++) {
599 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 812 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
600 ib_result_mc_address[i], 4096); 813 ib_result_mc_address[i], 4096);
@@ -622,14 +835,18 @@ static void amdgpu_command_submission_compute_nop(void)
622 struct amdgpu_cs_fence fence_status; 835 struct amdgpu_cs_fence fence_status;
623 uint32_t *ptr; 836 uint32_t *ptr;
624 uint32_t expired; 837 uint32_t expired;
625 int i, r, instance; 838 int r, instance;
626 amdgpu_bo_list_handle bo_list; 839 amdgpu_bo_list_handle bo_list;
627 amdgpu_va_handle va_handle; 840 amdgpu_va_handle va_handle;
841 struct drm_amdgpu_info_hw_ip info;
842
843 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
844 CU_ASSERT_EQUAL(r, 0);
628 845
629 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 846 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
630 CU_ASSERT_EQUAL(r, 0); 847 CU_ASSERT_EQUAL(r, 0);
631 848
632 for (instance = 0; instance < 8; instance++) { 849 for (instance = 0; (1 << instance) & info.available_rings; instance++) {
633 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 850 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
634 AMDGPU_GEM_DOMAIN_GTT, 0, 851 AMDGPU_GEM_DOMAIN_GTT, 0,
635 &ib_result_handle, &ib_result_cpu, 852 &ib_result_handle, &ib_result_cpu,
@@ -641,8 +858,8 @@ static void amdgpu_command_submission_compute_nop(void)
641 CU_ASSERT_EQUAL(r, 0); 858 CU_ASSERT_EQUAL(r, 0);
642 859
643 ptr = ib_result_cpu; 860 ptr = ib_result_cpu;
644 for (i = 0; i < 16; ++i) 861 memset(ptr, 0, 16);
645 ptr[i] = 0xffff1000; 862 ptr[0]=PACKET3(PACKET3_NOP, 14);
646 863
647 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 864 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
648 ib_info.ib_mc_address = ib_result_mc_address; 865 ib_info.ib_mc_address = ib_result_mc_address;
@@ -805,9 +1022,10 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
805 struct amdgpu_cs_request *ibs_request; 1022 struct amdgpu_cs_request *ibs_request;
806 uint64_t bo_mc; 1023 uint64_t bo_mc;
807 volatile uint32_t *bo_cpu; 1024 volatile uint32_t *bo_cpu;
808 int i, j, r, loop; 1025 int i, j, r, loop, ring_id;
809 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1026 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
810 amdgpu_va_handle va_handle; 1027 amdgpu_va_handle va_handle;
1028 struct drm_amdgpu_info_hw_ip hw_ip_info;
811 1029
812 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1030 pm4 = calloc(pm4_dw, sizeof(*pm4));
813 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1031 CU_ASSERT_NOT_EQUAL(pm4, NULL);
@@ -818,6 +1036,9 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
818 ibs_request = calloc(1, sizeof(*ibs_request)); 1036 ibs_request = calloc(1, sizeof(*ibs_request));
819 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1037 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
820 1038
1039 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1040 CU_ASSERT_EQUAL(r, 0);
1041
821 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1042 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
822 CU_ASSERT_EQUAL(r, 0); 1043 CU_ASSERT_EQUAL(r, 0);
823 1044
@@ -825,58 +1046,66 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
825 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1046 resources = calloc(1, sizeof(amdgpu_bo_handle));
826 CU_ASSERT_NOT_EQUAL(resources, NULL); 1047 CU_ASSERT_NOT_EQUAL(resources, NULL);
827 1048
828 loop = 0; 1049 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
829 while(loop < 2) { 1050 loop = 0;
830 /* allocate UC bo for sDMA use */ 1051 while(loop < 2) {
831 r = amdgpu_bo_alloc_and_map(device_handle, 1052 /* allocate UC bo for sDMA use */
832 sdma_write_length * sizeof(uint32_t), 1053 r = amdgpu_bo_alloc_and_map(device_handle,
833 4096, AMDGPU_GEM_DOMAIN_GTT, 1054 sdma_write_length * sizeof(uint32_t),
834 gtt_flags[loop], &bo, (void**)&bo_cpu, 1055 4096, AMDGPU_GEM_DOMAIN_GTT,
835 &bo_mc, &va_handle); 1056 gtt_flags[loop], &bo, (void**)&bo_cpu,
836 CU_ASSERT_EQUAL(r, 0); 1057 &bo_mc, &va_handle);
1058 CU_ASSERT_EQUAL(r, 0);
837 1059
838 /* clear bo */ 1060 /* clear bo */
839 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 1061 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
840
841
842 resources[0] = bo;
843
844 /* fulfill PM4: test DMA write-linear */
845 i = j = 0;
846 if (ip_type == AMDGPU_HW_IP_DMA) {
847 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
848 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
849 pm4[i++] = 0xffffffff & bo_mc;
850 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
851 pm4[i++] = sdma_write_length;
852 while(j++ < sdma_write_length)
853 pm4[i++] = 0xdeadbeaf;
854 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
855 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
856 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
857 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
858 pm4[i++] = 0xfffffffc & bo_mc;
859 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
860 while(j++ < sdma_write_length)
861 pm4[i++] = 0xdeadbeaf;
862 }
863 1062
864 amdgpu_test_exec_cs_helper(context_handle, 1063 resources[0] = bo;
865 ip_type, 0,
866 i, pm4,
867 1, resources,
868 ib_info, ibs_request);
869 1064
870 /* verify if SDMA test result meets with expected */ 1065 /* fulfill PM4: test DMA write-linear */
871 i = 0; 1066 i = j = 0;
872 while(i < sdma_write_length) { 1067 if (ip_type == AMDGPU_HW_IP_DMA) {
873 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1068 if (family_id == AMDGPU_FAMILY_SI)
874 } 1069 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1070 sdma_write_length);
1071 else
1072 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1073 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1074 pm4[i++] = 0xffffffff & bo_mc;
1075 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1076 if (family_id >= AMDGPU_FAMILY_AI)
1077 pm4[i++] = sdma_write_length - 1;
1078 else if (family_id != AMDGPU_FAMILY_SI)
1079 pm4[i++] = sdma_write_length;
1080 while(j++ < sdma_write_length)
1081 pm4[i++] = 0xdeadbeaf;
1082 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1083 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1084 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1085 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1086 pm4[i++] = 0xfffffffc & bo_mc;
1087 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1088 while(j++ < sdma_write_length)
1089 pm4[i++] = 0xdeadbeaf;
1090 }
875 1091
876 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1092 amdgpu_test_exec_cs_helper(context_handle,
877 sdma_write_length * sizeof(uint32_t)); 1093 ip_type, ring_id,
878 CU_ASSERT_EQUAL(r, 0); 1094 i, pm4,
879 loop++; 1095 1, resources,
1096 ib_info, ibs_request);
1097
1098 /* verify if SDMA test result meets with expected */
1099 i = 0;
1100 while(i < sdma_write_length) {
1101 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1102 }
1103
1104 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1105 sdma_write_length * sizeof(uint32_t));
1106 CU_ASSERT_EQUAL(r, 0);
1107 loop++;
1108 }
880 } 1109 }
881 /* clean resources */ 1110 /* clean resources */
882 free(resources); 1111 free(resources);
@@ -906,9 +1135,10 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
906 struct amdgpu_cs_request *ibs_request; 1135 struct amdgpu_cs_request *ibs_request;
907 uint64_t bo_mc; 1136 uint64_t bo_mc;
908 volatile uint32_t *bo_cpu; 1137 volatile uint32_t *bo_cpu;
909 int i, j, r, loop; 1138 int i, j, r, loop, ring_id;
910 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1139 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
911 amdgpu_va_handle va_handle; 1140 amdgpu_va_handle va_handle;
1141 struct drm_amdgpu_info_hw_ip hw_ip_info;
912 1142
913 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1143 pm4 = calloc(pm4_dw, sizeof(*pm4));
914 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1144 CU_ASSERT_NOT_EQUAL(pm4, NULL);
@@ -919,6 +1149,9 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
919 ibs_request = calloc(1, sizeof(*ibs_request)); 1149 ibs_request = calloc(1, sizeof(*ibs_request));
920 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1150 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
921 1151
1152 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1153 CU_ASSERT_EQUAL(r, 0);
1154
922 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1155 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
923 CU_ASSERT_EQUAL(r, 0); 1156 CU_ASSERT_EQUAL(r, 0);
924 1157
@@ -926,60 +1159,86 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
926 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1159 resources = calloc(1, sizeof(amdgpu_bo_handle));
927 CU_ASSERT_NOT_EQUAL(resources, NULL); 1160 CU_ASSERT_NOT_EQUAL(resources, NULL);
928 1161
929 loop = 0; 1162 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
930 while(loop < 2) { 1163 loop = 0;
931 /* allocate UC bo for sDMA use */ 1164 while(loop < 2) {
932 r = amdgpu_bo_alloc_and_map(device_handle, 1165 /* allocate UC bo for sDMA use */
933 sdma_write_length, 4096, 1166 r = amdgpu_bo_alloc_and_map(device_handle,
934 AMDGPU_GEM_DOMAIN_GTT, 1167 sdma_write_length, 4096,
935 gtt_flags[loop], &bo, (void**)&bo_cpu, 1168 AMDGPU_GEM_DOMAIN_GTT,
936 &bo_mc, &va_handle); 1169 gtt_flags[loop], &bo, (void**)&bo_cpu,
937 CU_ASSERT_EQUAL(r, 0); 1170 &bo_mc, &va_handle);
1171 CU_ASSERT_EQUAL(r, 0);
938 1172
939 /* clear bo */ 1173 /* clear bo */
940 memset((void*)bo_cpu, 0, sdma_write_length); 1174 memset((void*)bo_cpu, 0, sdma_write_length);
941
942 resources[0] = bo;
943
944 /* fulfill PM4: test DMA const fill */
945 i = j = 0;
946 if (ip_type == AMDGPU_HW_IP_DMA) {
947 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
948 SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
949 pm4[i++] = 0xffffffff & bo_mc;
950 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
951 pm4[i++] = 0xdeadbeaf;
952 pm4[i++] = sdma_write_length;
953 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
954 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
955 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
956 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
957 PACKET3_DMA_DATA_DST_SEL(0) |
958 PACKET3_DMA_DATA_SRC_SEL(2) |
959 PACKET3_DMA_DATA_CP_SYNC;
960 pm4[i++] = 0xdeadbeaf;
961 pm4[i++] = 0;
962 pm4[i++] = 0xfffffffc & bo_mc;
963 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
964 pm4[i++] = sdma_write_length;
965 }
966 1175
967 amdgpu_test_exec_cs_helper(context_handle, 1176 resources[0] = bo;
968 ip_type, 0,
969 i, pm4,
970 1, resources,
971 ib_info, ibs_request);
972 1177
973 /* verify if SDMA test result meets with expected */ 1178 /* fulfill PM4: test DMA const fill */
974 i = 0; 1179 i = j = 0;
975 while(i < (sdma_write_length / 4)) { 1180 if (ip_type == AMDGPU_HW_IP_DMA) {
976 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1181 if (family_id == AMDGPU_FAMILY_SI) {
977 } 1182 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1183 0, 0, 0,
1184 sdma_write_length / 4);
1185 pm4[i++] = 0xfffffffc & bo_mc;
1186 pm4[i++] = 0xdeadbeaf;
1187 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1188 } else {
1189 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1190 SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1191 pm4[i++] = 0xffffffff & bo_mc;
1192 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1193 pm4[i++] = 0xdeadbeaf;
1194 if (family_id >= AMDGPU_FAMILY_AI)
1195 pm4[i++] = sdma_write_length - 1;
1196 else
1197 pm4[i++] = sdma_write_length;
1198 }
1199 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1200 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1201 if (family_id == AMDGPU_FAMILY_SI) {
1202 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1203 pm4[i++] = 0xdeadbeaf;
1204 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1205 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1206 PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1207 PACKET3_DMA_DATA_SI_CP_SYNC;
1208 pm4[i++] = 0xffffffff & bo_mc;
1209 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1210 pm4[i++] = sdma_write_length;
1211 } else {
1212 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1213 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1214 PACKET3_DMA_DATA_DST_SEL(0) |
1215 PACKET3_DMA_DATA_SRC_SEL(2) |
1216 PACKET3_DMA_DATA_CP_SYNC;
1217 pm4[i++] = 0xdeadbeaf;
1218 pm4[i++] = 0;
1219 pm4[i++] = 0xfffffffc & bo_mc;
1220 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1221 pm4[i++] = sdma_write_length;
1222 }
1223 }
978 1224
979 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1225 amdgpu_test_exec_cs_helper(context_handle,
980 sdma_write_length); 1226 ip_type, ring_id,
981 CU_ASSERT_EQUAL(r, 0); 1227 i, pm4,
982 loop++; 1228 1, resources,
1229 ib_info, ibs_request);
1230
1231 /* verify if SDMA test result meets with expected */
1232 i = 0;
1233 while(i < (sdma_write_length / 4)) {
1234 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1235 }
1236
1237 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1238 sdma_write_length);
1239 CU_ASSERT_EQUAL(r, 0);
1240 loop++;
1241 }
983 } 1242 }
984 /* clean resources */ 1243 /* clean resources */
985 free(resources); 1244 free(resources);
@@ -1009,9 +1268,10 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1009 struct amdgpu_cs_request *ibs_request; 1268 struct amdgpu_cs_request *ibs_request;
1010 uint64_t bo1_mc, bo2_mc; 1269 uint64_t bo1_mc, bo2_mc;
1011 volatile unsigned char *bo1_cpu, *bo2_cpu; 1270 volatile unsigned char *bo1_cpu, *bo2_cpu;
1012 int i, j, r, loop1, loop2; 1271 int i, j, r, loop1, loop2, ring_id;
1013 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1272 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1014 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 1273 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1274 struct drm_amdgpu_info_hw_ip hw_ip_info;
1015 1275
1016 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1276 pm4 = calloc(pm4_dw, sizeof(*pm4));
1017 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1277 CU_ASSERT_NOT_EQUAL(pm4, NULL);
@@ -1022,6 +1282,9 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1022 ibs_request = calloc(1, sizeof(*ibs_request)); 1282 ibs_request = calloc(1, sizeof(*ibs_request));
1023 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1283 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1024 1284
1285 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1286 CU_ASSERT_EQUAL(r, 0);
1287
1025 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1288 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1026 CU_ASSERT_EQUAL(r, 0); 1289 CU_ASSERT_EQUAL(r, 0);
1027 1290
@@ -1029,81 +1292,111 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1029 resources = calloc(2, sizeof(amdgpu_bo_handle)); 1292 resources = calloc(2, sizeof(amdgpu_bo_handle));
1030 CU_ASSERT_NOT_EQUAL(resources, NULL); 1293 CU_ASSERT_NOT_EQUAL(resources, NULL);
1031 1294
1032 loop1 = loop2 = 0; 1295 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1033 /* run 9 circle to test all mapping combination */ 1296 loop1 = loop2 = 0;
1034 while(loop1 < 2) { 1297 /* run 9 circle to test all mapping combination */
1035 while(loop2 < 2) { 1298 while(loop1 < 2) {
1036 /* allocate UC bo1for sDMA use */ 1299 while(loop2 < 2) {
1037 r = amdgpu_bo_alloc_and_map(device_handle, 1300 /* allocate UC bo1for sDMA use */
1038 sdma_write_length, 4096, 1301 r = amdgpu_bo_alloc_and_map(device_handle,
1039 AMDGPU_GEM_DOMAIN_GTT, 1302 sdma_write_length, 4096,
1040 gtt_flags[loop1], &bo1, 1303 AMDGPU_GEM_DOMAIN_GTT,
1041 (void**)&bo1_cpu, &bo1_mc, 1304 gtt_flags[loop1], &bo1,
1042 &bo1_va_handle); 1305 (void**)&bo1_cpu, &bo1_mc,
1043 CU_ASSERT_EQUAL(r, 0); 1306 &bo1_va_handle);
1044 1307 CU_ASSERT_EQUAL(r, 0);
1045 /* set bo1 */ 1308
1046 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 1309 /* set bo1 */
1047 1310 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1048 /* allocate UC bo2 for sDMA use */ 1311
1049 r = amdgpu_bo_alloc_and_map(device_handle, 1312 /* allocate UC bo2 for sDMA use */
1050 sdma_write_length, 4096, 1313 r = amdgpu_bo_alloc_and_map(device_handle,
1051 AMDGPU_GEM_DOMAIN_GTT, 1314 sdma_write_length, 4096,
1052 gtt_flags[loop2], &bo2, 1315 AMDGPU_GEM_DOMAIN_GTT,
1053 (void**)&bo2_cpu, &bo2_mc, 1316 gtt_flags[loop2], &bo2,
1054 &bo2_va_handle); 1317 (void**)&bo2_cpu, &bo2_mc,
1055 CU_ASSERT_EQUAL(r, 0); 1318 &bo2_va_handle);
1056 1319 CU_ASSERT_EQUAL(r, 0);
1057 /* clear bo2 */ 1320
1058 memset((void*)bo2_cpu, 0, sdma_write_length); 1321 /* clear bo2 */
1059 1322 memset((void*)bo2_cpu, 0, sdma_write_length);
1060 resources[0] = bo1; 1323
1061 resources[1] = bo2; 1324 resources[0] = bo1;
1062 1325 resources[1] = bo2;
1063 /* fulfill PM4: test DMA copy linear */ 1326
1064 i = j = 0; 1327 /* fulfill PM4: test DMA copy linear */
1065 if (ip_type == AMDGPU_HW_IP_DMA) { 1328 i = j = 0;
1066 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 1329 if (ip_type == AMDGPU_HW_IP_DMA) {
1067 pm4[i++] = sdma_write_length; 1330 if (family_id == AMDGPU_FAMILY_SI) {
1068 pm4[i++] = 0; 1331 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1069 pm4[i++] = 0xffffffff & bo1_mc; 1332 0, 0, 0,
1070 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1333 sdma_write_length);
1071 pm4[i++] = 0xffffffff & bo2_mc; 1334 pm4[i++] = 0xffffffff & bo2_mc;
1072 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1335 pm4[i++] = 0xffffffff & bo1_mc;
1073 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1336 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1074 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1337 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1075 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1338 } else {
1076 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1339 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1077 PACKET3_DMA_DATA_DST_SEL(0) | 1340 SDMA_COPY_SUB_OPCODE_LINEAR,
1078 PACKET3_DMA_DATA_SRC_SEL(0) | 1341 0);
1079 PACKET3_DMA_DATA_CP_SYNC; 1342 if (family_id >= AMDGPU_FAMILY_AI)
1080 pm4[i++] = 0xfffffffc & bo1_mc; 1343 pm4[i++] = sdma_write_length - 1;
1081 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1344 else
1082 pm4[i++] = 0xfffffffc & bo2_mc; 1345 pm4[i++] = sdma_write_length;
1083 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1346 pm4[i++] = 0;
1084 pm4[i++] = sdma_write_length; 1347 pm4[i++] = 0xffffffff & bo1_mc;
1085 } 1348 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1086 1349 pm4[i++] = 0xffffffff & bo2_mc;
1087 amdgpu_test_exec_cs_helper(context_handle, 1350 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1088 ip_type, 0, 1351 }
1089 i, pm4, 1352 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1090 2, resources, 1353 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1091 ib_info, ibs_request); 1354 if (family_id == AMDGPU_FAMILY_SI) {
1092 1355 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1093 /* verify if SDMA test result meets with expected */ 1356 pm4[i++] = 0xfffffffc & bo1_mc;
1094 i = 0; 1357 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1095 while(i < sdma_write_length) { 1358 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1096 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 1359 PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1360 PACKET3_DMA_DATA_SI_CP_SYNC |
1361 (0xffff00000000 & bo1_mc) >> 32;
1362 pm4[i++] = 0xfffffffc & bo2_mc;
1363 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1364 pm4[i++] = sdma_write_length;
1365 } else {
1366 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1367 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1368 PACKET3_DMA_DATA_DST_SEL(0) |
1369 PACKET3_DMA_DATA_SRC_SEL(0) |
1370 PACKET3_DMA_DATA_CP_SYNC;
1371 pm4[i++] = 0xfffffffc & bo1_mc;
1372 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1373 pm4[i++] = 0xfffffffc & bo2_mc;
1374 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1375 pm4[i++] = sdma_write_length;
1376 }
1377 }
1378
1379 amdgpu_test_exec_cs_helper(context_handle,
1380 ip_type, ring_id,
1381 i, pm4,
1382 2, resources,
1383 ib_info, ibs_request);
1384
1385 /* verify if SDMA test result meets with expected */
1386 i = 0;
1387 while(i < sdma_write_length) {
1388 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1389 }
1390 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1391 sdma_write_length);
1392 CU_ASSERT_EQUAL(r, 0);
1393 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1394 sdma_write_length);
1395 CU_ASSERT_EQUAL(r, 0);
1396 loop2++;
1097 } 1397 }
1098 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 1398 loop1++;
1099 sdma_write_length);
1100 CU_ASSERT_EQUAL(r, 0);
1101 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1102 sdma_write_length);
1103 CU_ASSERT_EQUAL(r, 0);
1104 loop2++;
1105 } 1399 }
1106 loop1++;
1107 } 1400 }
1108 /* clean resources */ 1401 /* clean resources */
1109 free(resources); 1402 free(resources);
@@ -1128,6 +1421,106 @@ static void amdgpu_command_submission_sdma(void)
1128 amdgpu_command_submission_sdma_copy_linear(); 1421 amdgpu_command_submission_sdma_copy_linear();
1129} 1422}
1130 1423
1424static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1425{
1426 amdgpu_context_handle context_handle;
1427 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1428 void *ib_result_cpu, *ib_result_ce_cpu;
1429 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1430 struct amdgpu_cs_request ibs_request[2] = {0};
1431 struct amdgpu_cs_ib_info ib_info[2];
1432 struct amdgpu_cs_fence fence_status[2] = {0};
1433 uint32_t *ptr;
1434 uint32_t expired;
1435 amdgpu_bo_list_handle bo_list;
1436 amdgpu_va_handle va_handle, va_handle_ce;
1437 int r;
1438 int i = 0, ib_cs_num = 2;
1439
1440 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1441 CU_ASSERT_EQUAL(r, 0);
1442
1443 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1444 AMDGPU_GEM_DOMAIN_GTT, 0,
1445 &ib_result_handle, &ib_result_cpu,
1446 &ib_result_mc_address, &va_handle);
1447 CU_ASSERT_EQUAL(r, 0);
1448
1449 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1450 AMDGPU_GEM_DOMAIN_GTT, 0,
1451 &ib_result_ce_handle, &ib_result_ce_cpu,
1452 &ib_result_ce_mc_address, &va_handle_ce);
1453 CU_ASSERT_EQUAL(r, 0);
1454
1455 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1456 ib_result_ce_handle, &bo_list);
1457 CU_ASSERT_EQUAL(r, 0);
1458
1459 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1460
1461 /* IT_SET_CE_DE_COUNTERS */
1462 ptr = ib_result_ce_cpu;
1463 if (family_id != AMDGPU_FAMILY_SI) {
1464 ptr[i++] = 0xc0008900;
1465 ptr[i++] = 0;
1466 }
1467 ptr[i++] = 0xc0008400;
1468 ptr[i++] = 1;
1469 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1470 ib_info[0].size = i;
1471 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1472
1473 /* IT_WAIT_ON_CE_COUNTER */
1474 ptr = ib_result_cpu;
1475 ptr[0] = 0xc0008600;
1476 ptr[1] = 0x00000001;
1477 ib_info[1].ib_mc_address = ib_result_mc_address;
1478 ib_info[1].size = 2;
1479
1480 for (i = 0; i < ib_cs_num; i++) {
1481 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1482 ibs_request[i].number_of_ibs = 2;
1483 ibs_request[i].ibs = ib_info;
1484 ibs_request[i].resources = bo_list;
1485 ibs_request[i].fence_info.handle = NULL;
1486 }
1487
1488 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1489
1490 CU_ASSERT_EQUAL(r, 0);
1491
1492 for (i = 0; i < ib_cs_num; i++) {
1493 fence_status[i].context = context_handle;
1494 fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1495 fence_status[i].fence = ibs_request[i].seq_no;
1496 }
1497
1498 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1499 AMDGPU_TIMEOUT_INFINITE,
1500 &expired, NULL);
1501 CU_ASSERT_EQUAL(r, 0);
1502
1503 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1504 ib_result_mc_address, 4096);
1505 CU_ASSERT_EQUAL(r, 0);
1506
1507 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1508 ib_result_ce_mc_address, 4096);
1509 CU_ASSERT_EQUAL(r, 0);
1510
1511 r = amdgpu_bo_list_destroy(bo_list);
1512 CU_ASSERT_EQUAL(r, 0);
1513
1514 r = amdgpu_cs_ctx_free(context_handle);
1515 CU_ASSERT_EQUAL(r, 0);
1516}
1517
1518static void amdgpu_command_submission_multi_fence(void)
1519{
1520 amdgpu_command_submission_multi_fence_wait_all(true);
1521 amdgpu_command_submission_multi_fence_wait_all(false);
1522}
1523
1131static void amdgpu_userptr_test(void) 1524static void amdgpu_userptr_test(void)
1132{ 1525{
1133 int i, r, j; 1526 int i, r, j;
@@ -1175,15 +1568,28 @@ static void amdgpu_userptr_test(void)
1175 handle = buf_handle; 1568 handle = buf_handle;
1176 1569
1177 j = i = 0; 1570 j = i = 0;
1178 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1571
1179 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 1572 if (family_id == AMDGPU_FAMILY_SI)
1573 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1574 sdma_write_length);
1575 else
1576 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1577 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1180 pm4[i++] = 0xffffffff & bo_mc; 1578 pm4[i++] = 0xffffffff & bo_mc;
1181 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1579 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1182 pm4[i++] = sdma_write_length; 1580 if (family_id >= AMDGPU_FAMILY_AI)
1581 pm4[i++] = sdma_write_length - 1;
1582 else if (family_id != AMDGPU_FAMILY_SI)
1583 pm4[i++] = sdma_write_length;
1183 1584
1184 while (j++ < sdma_write_length) 1585 while (j++ < sdma_write_length)
1185 pm4[i++] = 0xdeadbeaf; 1586 pm4[i++] = 0xdeadbeaf;
1186 1587
1588 if (!fork()) {
1589 pm4[0] = 0x0;
1590 exit(0);
1591 }
1592
1187 amdgpu_test_exec_cs_helper(context_handle, 1593 amdgpu_test_exec_cs_helper(context_handle,
1188 AMDGPU_HW_IP_DMA, 0, 1594 AMDGPU_HW_IP_DMA, 0,
1189 i, pm4, 1595 i, pm4,
@@ -1207,4 +1613,212 @@ static void amdgpu_userptr_test(void)
1207 1613
1208 r = amdgpu_cs_ctx_free(context_handle); 1614 r = amdgpu_cs_ctx_free(context_handle);
1209 CU_ASSERT_EQUAL(r, 0); 1615 CU_ASSERT_EQUAL(r, 0);
1616
1617 wait(NULL);
1618}
1619
1620static void amdgpu_sync_dependency_test(void)
1621{
1622 amdgpu_context_handle context_handle[2];
1623 amdgpu_bo_handle ib_result_handle;
1624 void *ib_result_cpu;
1625 uint64_t ib_result_mc_address;
1626 struct amdgpu_cs_request ibs_request;
1627 struct amdgpu_cs_ib_info ib_info;
1628 struct amdgpu_cs_fence fence_status;
1629 uint32_t expired;
1630 int i, j, r;
1631 amdgpu_bo_list_handle bo_list;
1632 amdgpu_va_handle va_handle;
1633 static uint32_t *ptr;
1634 uint64_t seq_no;
1635
1636 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
1637 CU_ASSERT_EQUAL(r, 0);
1638 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
1639 CU_ASSERT_EQUAL(r, 0);
1640
1641 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
1642 AMDGPU_GEM_DOMAIN_GTT, 0,
1643 &ib_result_handle, &ib_result_cpu,
1644 &ib_result_mc_address, &va_handle);
1645 CU_ASSERT_EQUAL(r, 0);
1646
1647 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1648 &bo_list);
1649 CU_ASSERT_EQUAL(r, 0);
1650
1651 ptr = ib_result_cpu;
1652 i = 0;
1653
1654 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
1655
1656 /* Dispatch minimal init config and verify it's executed */
1657 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
1658 ptr[i++] = 0x80000000;
1659 ptr[i++] = 0x80000000;
1660
1661 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
1662 ptr[i++] = 0x80000000;
1663
1664
1665 /* Program compute regs */
1666 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1667 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1668 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
1669 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
1670
1671
1672 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1673 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
1674 /*
1675 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0
1676 SGPRS = 1
1677 PRIORITY = 0
1678 FLOAT_MODE = 192 (0xc0)
1679 PRIV = 0
1680 DX10_CLAMP = 1
1681 DEBUG_MODE = 0
1682 IEEE_MODE = 0
1683 BULKY = 0
1684 CDBG_USER = 0
1685 *
1686 */
1687 ptr[i++] = 0x002c0040;
1688
1689
1690 /*
1691 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
1692 USER_SGPR = 8
1693 TRAP_PRESENT = 0
1694 TGID_X_EN = 0
1695 TGID_Y_EN = 0
1696 TGID_Z_EN = 0
1697 TG_SIZE_EN = 0
1698 TIDIG_COMP_CNT = 0
1699 EXCP_EN_MSB = 0
1700 LDS_SIZE = 0
1701 EXCP_EN = 0
1702 *
1703 */
1704 ptr[i++] = 0x00000010;
1705
1706
1707/*
1708 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
1709 WAVESIZE = 0
1710 *
1711 */
1712 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1713 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
1714 ptr[i++] = 0x00000100;
1715
1716 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1717 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
1718 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
1719 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
1720
1721 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1722 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
1723 ptr[i++] = 0;
1724
1725 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
1726 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
1727 ptr[i++] = 1;
1728 ptr[i++] = 1;
1729 ptr[i++] = 1;
1730
1731
1732 /* Dispatch */
1733 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1734 ptr[i++] = 1;
1735 ptr[i++] = 1;
1736 ptr[i++] = 1;
1737 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
1738
1739
1740 while (i & 7)
1741 ptr[i++] = 0xffff1000; /* type3 nop packet */
1742
1743 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1744 ib_info.ib_mc_address = ib_result_mc_address;
1745 ib_info.size = i;
1746
1747 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1748 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1749 ibs_request.ring = 0;
1750 ibs_request.number_of_ibs = 1;
1751 ibs_request.ibs = &ib_info;
1752 ibs_request.resources = bo_list;
1753 ibs_request.fence_info.handle = NULL;
1754
1755 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
1756 CU_ASSERT_EQUAL(r, 0);
1757 seq_no = ibs_request.seq_no;
1758
1759
1760
1761 /* Prepare second command with dependency on the first */
1762 j = i;
1763 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
1764 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1765 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
1766 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
1767 ptr[i++] = 99;
1768
1769 while (i & 7)
1770 ptr[i++] = 0xffff1000; /* type3 nop packet */
1771
1772 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1773 ib_info.ib_mc_address = ib_result_mc_address + j * 4;
1774 ib_info.size = i - j;
1775
1776 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1777 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1778 ibs_request.ring = 0;
1779 ibs_request.number_of_ibs = 1;
1780 ibs_request.ibs = &ib_info;
1781 ibs_request.resources = bo_list;
1782 ibs_request.fence_info.handle = NULL;
1783
1784 ibs_request.number_of_dependencies = 1;
1785
1786 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
1787 ibs_request.dependencies[0].context = context_handle[1];
1788 ibs_request.dependencies[0].ip_instance = 0;
1789 ibs_request.dependencies[0].ring = 0;
1790 ibs_request.dependencies[0].fence = seq_no;
1791
1792
1793 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
1794 CU_ASSERT_EQUAL(r, 0);
1795
1796
1797 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1798 fence_status.context = context_handle[0];
1799 fence_status.ip_type = AMDGPU_HW_IP_GFX;
1800 fence_status.ip_instance = 0;
1801 fence_status.ring = 0;
1802 fence_status.fence = ibs_request.seq_no;
1803
1804 r = amdgpu_cs_query_fence_status(&fence_status,
1805 AMDGPU_TIMEOUT_INFINITE,0, &expired);
1806 CU_ASSERT_EQUAL(r, 0);
1807
1808 /* Expect the second command to wait for shader to complete */
1809 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
1810
1811 r = amdgpu_bo_list_destroy(bo_list);
1812 CU_ASSERT_EQUAL(r, 0);
1813
1814 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1815 ib_result_mc_address, 4096);
1816 CU_ASSERT_EQUAL(r, 0);
1817
1818 r = amdgpu_cs_ctx_free(context_handle[0]);
1819 CU_ASSERT_EQUAL(r, 0);
1820 r = amdgpu_cs_ctx_free(context_handle[1]);
1821 CU_ASSERT_EQUAL(r, 0);
1822
1823 free(ibs_request.dependencies);
1210} 1824}