aboutsummaryrefslogtreecommitdiffstats
path: root/amdgpu
diff options
context:
space:
mode:
authorKen Wang2015-07-10 09:22:27 -0500
committerAlex Deucher2015-08-05 12:47:52 -0500
commit926c80568691e04abdfcd21b6e9be61331e95b03 (patch)
treebc358b99874cfc930c8e3853b9d3db3f7b4ecee9 /amdgpu
parent01e4546ff34a57faaefd41fce323c691902501c5 (diff)
downloadexternal-libdrm-926c80568691e04abdfcd21b6e9be61331e95b03.tar.gz
external-libdrm-926c80568691e04abdfcd21b6e9be61331e95b03.tar.xz
external-libdrm-926c80568691e04abdfcd21b6e9be61331e95b03.zip
amdgpu : move management of user fence from libdrm to UMD
Signed-off-by: Ken Wang <Qingqing.Wang@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
Diffstat (limited to 'amdgpu')
-rw-r--r--amdgpu/amdgpu.h39
-rw-r--r--amdgpu/amdgpu_cs.c130
-rw-r--r--amdgpu/amdgpu_internal.h5
3 files changed, 52 insertions, 122 deletions
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index 76319687..125377c2 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -311,6 +311,20 @@ struct amdgpu_cs_ib_info {
311}; 311};
312 312
313/** 313/**
314 * Structure describing fence information
315 *
316 * \sa amdgpu_cs_request, amdgpu_cs_query_fence,
317 * amdgpu_cs_submit(), amdgpu_cs_query_fence_status()
318*/
319struct amdgpu_cs_fence_info {
320 /** buffer object for the fence */
321 amdgpu_bo_handle handle;
322
323 /** fence offset in the unit of sizeof(uint64_t) */
324 uint64_t offset;
325};
326
327/**
314 * Structure describing submission request 328 * Structure describing submission request
315 * 329 *
316 * \note We could have several IBs as packet. e.g. CE, CE, DE case for gfx 330 * \note We could have several IBs as packet. e.g. CE, CE, DE case for gfx
@@ -357,6 +371,16 @@ struct amdgpu_cs_request {
357 * IBs to submit. Those IBs will be submit together as single entity 371 * IBs to submit. Those IBs will be submit together as single entity
358 */ 372 */
359 struct amdgpu_cs_ib_info *ibs; 373 struct amdgpu_cs_ib_info *ibs;
374
375 /**
376 * The returned sequence number for the command submission
377 */
378 uint64_t seq_no;
379
380 /**
381 * The fence information
382 */
383 struct amdgpu_cs_fence_info fence_info;
360}; 384};
361 385
362/** 386/**
@@ -841,22 +865,20 @@ int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
841 * from the same GPU context to the same ip:ip_instance:ring will be executed in 865 * from the same GPU context to the same ip:ip_instance:ring will be executed in
842 * order. 866 * order.
843 * 867 *
868 * The caller can specify the user fence buffer/location with the fence_info in the
869 * cs_request.The sequence number is returned via the 'seq_no' paramter
870 * in ibs_request structure.
871 *
844 * 872 *
845 * \param dev - \c [in] Device handle. 873 * \param dev - \c [in] Device handle.
846 * See #amdgpu_device_initialize() 874 * See #amdgpu_device_initialize()
847 * \param context - \c [in] GPU Context 875 * \param context - \c [in] GPU Context
848 * \param flags - \c [in] Global submission flags 876 * \param flags - \c [in] Global submission flags
849 * \param ibs_request - \c [in] Pointer to submission requests. 877 * \param ibs_request - \c [in/out] Pointer to submission requests.
850 * We could submit to the several 878 * We could submit to the several
851 * engines/rings simulteniously as 879 * engines/rings simulteniously as
852 * 'atomic' operation 880 * 'atomic' operation
853 * \param number_of_requests - \c [in] Number of submission requests 881 * \param number_of_requests - \c [in] Number of submission requests
854 * \param fences - \c [out] Pointer to array of data to get
855 * fences to identify submission
856 * requests. Timestamps are valid
857 * in this GPU context and could be used
858 * to identify/detect completion of
859 * submission request
860 * 882 *
861 * \return 0 on success\n 883 * \return 0 on success\n
862 * <0 - Negative POSIX Error code 884 * <0 - Negative POSIX Error code
@@ -873,8 +895,7 @@ int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
873int amdgpu_cs_submit(amdgpu_context_handle context, 895int amdgpu_cs_submit(amdgpu_context_handle context,
874 uint64_t flags, 896 uint64_t flags,
875 struct amdgpu_cs_request *ibs_request, 897 struct amdgpu_cs_request *ibs_request,
876 uint32_t number_of_requests, 898 uint32_t number_of_requests);
877 uint64_t *fences);
878 899
879/** 900/**
880 * Query status of Command Buffer Submission 901 * Query status of Command Buffer Submission
diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
index d9aa22d6..1978e47a 100644
--- a/amdgpu/amdgpu_cs.c
+++ b/amdgpu/amdgpu_cs.c
@@ -43,8 +43,6 @@
43int amdgpu_cs_ctx_create(amdgpu_device_handle dev, 43int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
44 amdgpu_context_handle *context) 44 amdgpu_context_handle *context)
45{ 45{
46 struct amdgpu_bo_alloc_request alloc_buffer = {};
47 struct amdgpu_bo_alloc_result info = {};
48 struct amdgpu_context *gpu_context; 46 struct amdgpu_context *gpu_context;
49 union drm_amdgpu_ctx args; 47 union drm_amdgpu_ctx args;
50 int r; 48 int r;
@@ -62,44 +60,22 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
62 60
63 r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL); 61 r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
64 if (r) 62 if (r)
65 goto error_mutex; 63 goto error;
66
67 /* Create the fence BO */
68 alloc_buffer.alloc_size = 4 * 1024;
69 alloc_buffer.phys_alignment = 4 * 1024;
70 alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT;
71
72 r = amdgpu_bo_alloc(dev, &alloc_buffer, &info);
73 if (r)
74 goto error_fence_alloc;
75 gpu_context->fence_bo = info.buf_handle;
76
77 r = amdgpu_bo_cpu_map(gpu_context->fence_bo, &gpu_context->fence_cpu);
78 if (r)
79 goto error_fence_map;
80 64
81 /* Create the context */ 65 /* Create the context */
82 memset(&args, 0, sizeof(args)); 66 memset(&args, 0, sizeof(args));
83 args.in.op = AMDGPU_CTX_OP_ALLOC_CTX; 67 args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
84 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args)); 68 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
85 if (r) 69 if (r)
86 goto error_kernel; 70 goto error;
87 71
88 gpu_context->id = args.out.alloc.ctx_id; 72 gpu_context->id = args.out.alloc.ctx_id;
89 *context = (amdgpu_context_handle)gpu_context; 73 *context = (amdgpu_context_handle)gpu_context;
90 74
91 return 0; 75 return 0;
92 76
93error_kernel: 77error:
94 amdgpu_bo_cpu_unmap(gpu_context->fence_bo);
95
96error_fence_map:
97 amdgpu_bo_free(gpu_context->fence_bo);
98
99error_fence_alloc:
100 pthread_mutex_destroy(&gpu_context->sequence_mutex); 78 pthread_mutex_destroy(&gpu_context->sequence_mutex);
101
102error_mutex:
103 free(gpu_context); 79 free(gpu_context);
104 return r; 80 return r;
105} 81}
@@ -120,14 +96,6 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context)
120 if (NULL == context) 96 if (NULL == context)
121 return -EINVAL; 97 return -EINVAL;
122 98
123 r = amdgpu_bo_cpu_unmap(context->fence_bo);
124 if (r)
125 return r;
126
127 r = amdgpu_bo_free(context->fence_bo);
128 if (r)
129 return r;
130
131 pthread_mutex_destroy(&context->sequence_mutex); 99 pthread_mutex_destroy(&context->sequence_mutex);
132 100
133 /* now deal with kernel side */ 101 /* now deal with kernel side */
@@ -163,11 +131,6 @@ int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
163 return r; 131 return r;
164} 132}
165 133
166static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring)
167{
168 return ip * AMDGPU_CS_MAX_RINGS + ring;
169}
170
171/** 134/**
172 * Submit command to kernel DRM 135 * Submit command to kernel DRM
173 * \param dev - \c [in] Device handle 136 * \param dev - \c [in] Device handle
@@ -179,8 +142,7 @@ static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring)
179 * \sa amdgpu_cs_submit() 142 * \sa amdgpu_cs_submit()
180*/ 143*/
181static int amdgpu_cs_submit_one(amdgpu_context_handle context, 144static int amdgpu_cs_submit_one(amdgpu_context_handle context,
182 struct amdgpu_cs_request *ibs_request, 145 struct amdgpu_cs_request *ibs_request)
183 uint64_t *fence)
184{ 146{
185 union drm_amdgpu_cs cs; 147 union drm_amdgpu_cs cs;
186 uint64_t *chunk_array; 148 uint64_t *chunk_array;
@@ -188,6 +150,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
188 struct drm_amdgpu_cs_chunk_data *chunk_data; 150 struct drm_amdgpu_cs_chunk_data *chunk_data;
189 struct drm_amdgpu_cs_chunk_dep *dependencies = NULL; 151 struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
190 uint32_t i, size; 152 uint32_t i, size;
153 bool user_fence;
191 int r = 0; 154 int r = 0;
192 155
193 if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM) 156 if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
@@ -196,13 +159,15 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
196 return -EINVAL; 159 return -EINVAL;
197 if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT) 160 if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
198 return -EINVAL; 161 return -EINVAL;
162 user_fence = (ibs_request->fence_info.handle != NULL);
199 163
200 size = ibs_request->number_of_ibs + 2; 164 size = ibs_request->number_of_ibs + (user_fence ? 2 : 1);
201 165
202 chunk_array = alloca(sizeof(uint64_t) * size); 166 chunk_array = alloca(sizeof(uint64_t) * size);
203 chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size); 167 chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
204 168
205 size = ibs_request->number_of_ibs + 1; 169 size = ibs_request->number_of_ibs + (user_fence ? 1 : 0);
170
206 chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size); 171 chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
207 172
208 memset(&cs, 0, sizeof(cs)); 173 memset(&cs, 0, sizeof(cs));
@@ -232,8 +197,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
232 197
233 pthread_mutex_lock(&context->sequence_mutex); 198 pthread_mutex_lock(&context->sequence_mutex);
234 199
235 if (ibs_request->ip_type != AMDGPU_HW_IP_UVD && 200 if (user_fence) {
236 ibs_request->ip_type != AMDGPU_HW_IP_VCE) {
237 i = cs.in.num_chunks++; 201 i = cs.in.num_chunks++;
238 202
239 /* fence chunk */ 203 /* fence chunk */
@@ -243,11 +207,10 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
243 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; 207 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
244 208
245 /* fence bo handle */ 209 /* fence bo handle */
246 chunk_data[i].fence_data.handle = context->fence_bo->handle; 210 chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle;
247 /* offset */ 211 /* offset */
248 chunk_data[i].fence_data.offset = amdgpu_cs_fence_index( 212 chunk_data[i].fence_data.offset =
249 ibs_request->ip_type, ibs_request->ring); 213 ibs_request->fence_info.offset * sizeof(uint64_t);
250 chunk_data[i].fence_data.offset *= sizeof(uint64_t);
251 } 214 }
252 215
253 if (ibs_request->number_of_dependencies) { 216 if (ibs_request->number_of_dependencies) {
@@ -283,7 +246,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
283 if (r) 246 if (r)
284 goto error_unlock; 247 goto error_unlock;
285 248
286 *fence = cs.out.handle; 249 ibs_request->seq_no = cs.out.handle;
287 250
288error_unlock: 251error_unlock:
289 pthread_mutex_unlock(&context->sequence_mutex); 252 pthread_mutex_unlock(&context->sequence_mutex);
@@ -294,25 +257,23 @@ error_unlock:
294int amdgpu_cs_submit(amdgpu_context_handle context, 257int amdgpu_cs_submit(amdgpu_context_handle context,
295 uint64_t flags, 258 uint64_t flags,
296 struct amdgpu_cs_request *ibs_request, 259 struct amdgpu_cs_request *ibs_request,
297 uint32_t number_of_requests, 260 uint32_t number_of_requests)
298 uint64_t *fences)
299{ 261{
300 uint32_t i; 262 uint32_t i;
301 int r; 263 int r;
264 uint64_t bo_size;
265 uint64_t bo_offset;
302 266
303 if (NULL == context) 267 if (NULL == context)
304 return -EINVAL; 268 return -EINVAL;
305 if (NULL == ibs_request) 269 if (NULL == ibs_request)
306 return -EINVAL; 270 return -EINVAL;
307 if (NULL == fences)
308 return -EINVAL;
309 271
310 r = 0; 272 r = 0;
311 for (i = 0; i < number_of_requests; i++) { 273 for (i = 0; i < number_of_requests; i++) {
312 r = amdgpu_cs_submit_one(context, ibs_request, fences); 274 r = amdgpu_cs_submit_one(context, ibs_request);
313 if (r) 275 if (r)
314 break; 276 break;
315 fences++;
316 ibs_request++; 277 ibs_request++;
317 } 278 }
318 279
@@ -380,10 +341,6 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
380 uint64_t flags, 341 uint64_t flags,
381 uint32_t *expired) 342 uint32_t *expired)
382{ 343{
383 amdgpu_context_handle context;
384 uint64_t *expired_fence;
385 unsigned ip_type, ip_instance;
386 uint32_t ring;
387 bool busy = true; 344 bool busy = true;
388 int r; 345 int r;
389 346
@@ -398,57 +355,14 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
398 if (fence->ring >= AMDGPU_CS_MAX_RINGS) 355 if (fence->ring >= AMDGPU_CS_MAX_RINGS)
399 return -EINVAL; 356 return -EINVAL;
400 357
401 context = fence->context;
402 ip_type = fence->ip_type;
403 ip_instance = fence->ip_instance;
404 ring = fence->ring;
405 expired_fence = &context->expired_fences[ip_type][ip_instance][ring];
406 *expired = false; 358 *expired = false;
407 359
408 pthread_mutex_lock(&context->sequence_mutex); 360 r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type,
409 if (fence->fence <= *expired_fence) { 361 fence->ip_instance, fence->ring,
410 /* This fence value is expired already. */ 362 fence->fence, timeout_ns, flags, &busy);
411 pthread_mutex_unlock(&context->sequence_mutex);
412 *expired = true;
413 return 0;
414 }
415
416 /* Check the user fence only if the IP supports user fences. */
417 if (fence->ip_type != AMDGPU_HW_IP_UVD &&
418 fence->ip_type != AMDGPU_HW_IP_VCE) {
419 uint64_t *signaled_fence = context->fence_cpu;
420 signaled_fence += amdgpu_cs_fence_index(ip_type, ring);
421
422 if (fence->fence <= *signaled_fence) {
423 /* This fence value is signaled already. */
424 *expired_fence = *signaled_fence;
425 pthread_mutex_unlock(&context->sequence_mutex);
426 *expired = true;
427 return 0;
428 }
429
430 /* Checking the user fence is enough. */
431 if (timeout_ns == 0) {
432 pthread_mutex_unlock(&context->sequence_mutex);
433 return 0;
434 }
435 }
436 363
437 pthread_mutex_unlock(&context->sequence_mutex); 364 if (!r && !busy)
438
439 r = amdgpu_ioctl_wait_cs(context, ip_type, ip_instance, ring,
440 fence->fence, timeout_ns,
441 flags, &busy);
442 if (!r && !busy) {
443 *expired = true; 365 *expired = true;
444 pthread_mutex_lock(&context->sequence_mutex);
445 /* The thread doesn't hold sequence_mutex. Other thread could
446 update *expired_fence already. Check whether there is a
447 newerly expired fence. */
448 if (fence->fence > *expired_fence)
449 *expired_fence = fence->fence;
450 pthread_mutex_unlock(&context->sequence_mutex);
451 }
452 366
453 return r; 367 return r;
454} 368}
diff --git a/amdgpu/amdgpu_internal.h b/amdgpu/amdgpu_internal.h
index e35923ff..bf7788dd 100644
--- a/amdgpu/amdgpu_internal.h
+++ b/amdgpu/amdgpu_internal.h
@@ -109,11 +109,6 @@ struct amdgpu_context {
109 /** Mutex for accessing fences and to maintain command submissions 109 /** Mutex for accessing fences and to maintain command submissions
110 in good sequence. */ 110 in good sequence. */
111 pthread_mutex_t sequence_mutex; 111 pthread_mutex_t sequence_mutex;
112 /** Buffer for user fences */
113 struct amdgpu_bo *fence_bo;
114 void *fence_cpu;
115 /** The newest expired fence for the ring of the ip blocks. */
116 uint64_t expired_fences[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
117 /* context id*/ 112 /* context id*/
118 uint32_t id; 113 uint32_t id;
119}; 114};