diff options
author | Ken Wang | 2015-07-10 09:22:27 -0500 |
---|---|---|
committer | Alex Deucher | 2015-08-05 12:47:52 -0500 |
commit | 926c80568691e04abdfcd21b6e9be61331e95b03 (patch) | |
tree | bc358b99874cfc930c8e3853b9d3db3f7b4ecee9 /amdgpu | |
parent | 01e4546ff34a57faaefd41fce323c691902501c5 (diff) | |
download | external-libdrm-926c80568691e04abdfcd21b6e9be61331e95b03.tar.gz external-libdrm-926c80568691e04abdfcd21b6e9be61331e95b03.tar.xz external-libdrm-926c80568691e04abdfcd21b6e9be61331e95b03.zip |
amdgpu : move management of user fence from libdrm to UMD
Signed-off-by: Ken Wang <Qingqing.Wang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
Diffstat (limited to 'amdgpu')
-rw-r--r-- | amdgpu/amdgpu.h | 39 | ||||
-rw-r--r-- | amdgpu/amdgpu_cs.c | 130 | ||||
-rw-r--r-- | amdgpu/amdgpu_internal.h | 5 |
3 files changed, 52 insertions, 122 deletions
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index 76319687..125377c2 100644 --- a/amdgpu/amdgpu.h +++ b/amdgpu/amdgpu.h | |||
@@ -311,6 +311,20 @@ struct amdgpu_cs_ib_info { | |||
311 | }; | 311 | }; |
312 | 312 | ||
313 | /** | 313 | /** |
314 | * Structure describing fence information | ||
315 | * | ||
316 | * \sa amdgpu_cs_request, amdgpu_cs_query_fence, | ||
317 | * amdgpu_cs_submit(), amdgpu_cs_query_fence_status() | ||
318 | */ | ||
319 | struct amdgpu_cs_fence_info { | ||
320 | /** buffer object for the fence */ | ||
321 | amdgpu_bo_handle handle; | ||
322 | |||
323 | /** fence offset in the unit of sizeof(uint64_t) */ | ||
324 | uint64_t offset; | ||
325 | }; | ||
326 | |||
327 | /** | ||
314 | * Structure describing submission request | 328 | * Structure describing submission request |
315 | * | 329 | * |
316 | * \note We could have several IBs as packet. e.g. CE, CE, DE case for gfx | 330 | * \note We could have several IBs as packet. e.g. CE, CE, DE case for gfx |
@@ -357,6 +371,16 @@ struct amdgpu_cs_request { | |||
357 | * IBs to submit. Those IBs will be submit together as single entity | 371 | * IBs to submit. Those IBs will be submit together as single entity |
358 | */ | 372 | */ |
359 | struct amdgpu_cs_ib_info *ibs; | 373 | struct amdgpu_cs_ib_info *ibs; |
374 | |||
375 | /** | ||
376 | * The returned sequence number for the command submission | ||
377 | */ | ||
378 | uint64_t seq_no; | ||
379 | |||
380 | /** | ||
381 | * The fence information | ||
382 | */ | ||
383 | struct amdgpu_cs_fence_info fence_info; | ||
360 | }; | 384 | }; |
361 | 385 | ||
362 | /** | 386 | /** |
@@ -841,22 +865,20 @@ int amdgpu_cs_query_reset_state(amdgpu_context_handle context, | |||
841 | * from the same GPU context to the same ip:ip_instance:ring will be executed in | 865 | * from the same GPU context to the same ip:ip_instance:ring will be executed in |
842 | * order. | 866 | * order. |
843 | * | 867 | * |
868 | * The caller can specify the user fence buffer/location with the fence_info in the | ||
869 | * cs_request.The sequence number is returned via the 'seq_no' paramter | ||
870 | * in ibs_request structure. | ||
871 | * | ||
844 | * | 872 | * |
845 | * \param dev - \c [in] Device handle. | 873 | * \param dev - \c [in] Device handle. |
846 | * See #amdgpu_device_initialize() | 874 | * See #amdgpu_device_initialize() |
847 | * \param context - \c [in] GPU Context | 875 | * \param context - \c [in] GPU Context |
848 | * \param flags - \c [in] Global submission flags | 876 | * \param flags - \c [in] Global submission flags |
849 | * \param ibs_request - \c [in] Pointer to submission requests. | 877 | * \param ibs_request - \c [in/out] Pointer to submission requests. |
850 | * We could submit to the several | 878 | * We could submit to the several |
851 | * engines/rings simulteniously as | 879 | * engines/rings simulteniously as |
852 | * 'atomic' operation | 880 | * 'atomic' operation |
853 | * \param number_of_requests - \c [in] Number of submission requests | 881 | * \param number_of_requests - \c [in] Number of submission requests |
854 | * \param fences - \c [out] Pointer to array of data to get | ||
855 | * fences to identify submission | ||
856 | * requests. Timestamps are valid | ||
857 | * in this GPU context and could be used | ||
858 | * to identify/detect completion of | ||
859 | * submission request | ||
860 | * | 882 | * |
861 | * \return 0 on success\n | 883 | * \return 0 on success\n |
862 | * <0 - Negative POSIX Error code | 884 | * <0 - Negative POSIX Error code |
@@ -873,8 +895,7 @@ int amdgpu_cs_query_reset_state(amdgpu_context_handle context, | |||
873 | int amdgpu_cs_submit(amdgpu_context_handle context, | 895 | int amdgpu_cs_submit(amdgpu_context_handle context, |
874 | uint64_t flags, | 896 | uint64_t flags, |
875 | struct amdgpu_cs_request *ibs_request, | 897 | struct amdgpu_cs_request *ibs_request, |
876 | uint32_t number_of_requests, | 898 | uint32_t number_of_requests); |
877 | uint64_t *fences); | ||
878 | 899 | ||
879 | /** | 900 | /** |
880 | * Query status of Command Buffer Submission | 901 | * Query status of Command Buffer Submission |
diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c index d9aa22d6..1978e47a 100644 --- a/amdgpu/amdgpu_cs.c +++ b/amdgpu/amdgpu_cs.c | |||
@@ -43,8 +43,6 @@ | |||
43 | int amdgpu_cs_ctx_create(amdgpu_device_handle dev, | 43 | int amdgpu_cs_ctx_create(amdgpu_device_handle dev, |
44 | amdgpu_context_handle *context) | 44 | amdgpu_context_handle *context) |
45 | { | 45 | { |
46 | struct amdgpu_bo_alloc_request alloc_buffer = {}; | ||
47 | struct amdgpu_bo_alloc_result info = {}; | ||
48 | struct amdgpu_context *gpu_context; | 46 | struct amdgpu_context *gpu_context; |
49 | union drm_amdgpu_ctx args; | 47 | union drm_amdgpu_ctx args; |
50 | int r; | 48 | int r; |
@@ -62,44 +60,22 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev, | |||
62 | 60 | ||
63 | r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL); | 61 | r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL); |
64 | if (r) | 62 | if (r) |
65 | goto error_mutex; | 63 | goto error; |
66 | |||
67 | /* Create the fence BO */ | ||
68 | alloc_buffer.alloc_size = 4 * 1024; | ||
69 | alloc_buffer.phys_alignment = 4 * 1024; | ||
70 | alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT; | ||
71 | |||
72 | r = amdgpu_bo_alloc(dev, &alloc_buffer, &info); | ||
73 | if (r) | ||
74 | goto error_fence_alloc; | ||
75 | gpu_context->fence_bo = info.buf_handle; | ||
76 | |||
77 | r = amdgpu_bo_cpu_map(gpu_context->fence_bo, &gpu_context->fence_cpu); | ||
78 | if (r) | ||
79 | goto error_fence_map; | ||
80 | 64 | ||
81 | /* Create the context */ | 65 | /* Create the context */ |
82 | memset(&args, 0, sizeof(args)); | 66 | memset(&args, 0, sizeof(args)); |
83 | args.in.op = AMDGPU_CTX_OP_ALLOC_CTX; | 67 | args.in.op = AMDGPU_CTX_OP_ALLOC_CTX; |
84 | r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args)); | 68 | r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args)); |
85 | if (r) | 69 | if (r) |
86 | goto error_kernel; | 70 | goto error; |
87 | 71 | ||
88 | gpu_context->id = args.out.alloc.ctx_id; | 72 | gpu_context->id = args.out.alloc.ctx_id; |
89 | *context = (amdgpu_context_handle)gpu_context; | 73 | *context = (amdgpu_context_handle)gpu_context; |
90 | 74 | ||
91 | return 0; | 75 | return 0; |
92 | 76 | ||
93 | error_kernel: | 77 | error: |
94 | amdgpu_bo_cpu_unmap(gpu_context->fence_bo); | ||
95 | |||
96 | error_fence_map: | ||
97 | amdgpu_bo_free(gpu_context->fence_bo); | ||
98 | |||
99 | error_fence_alloc: | ||
100 | pthread_mutex_destroy(&gpu_context->sequence_mutex); | 78 | pthread_mutex_destroy(&gpu_context->sequence_mutex); |
101 | |||
102 | error_mutex: | ||
103 | free(gpu_context); | 79 | free(gpu_context); |
104 | return r; | 80 | return r; |
105 | } | 81 | } |
@@ -120,14 +96,6 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context) | |||
120 | if (NULL == context) | 96 | if (NULL == context) |
121 | return -EINVAL; | 97 | return -EINVAL; |
122 | 98 | ||
123 | r = amdgpu_bo_cpu_unmap(context->fence_bo); | ||
124 | if (r) | ||
125 | return r; | ||
126 | |||
127 | r = amdgpu_bo_free(context->fence_bo); | ||
128 | if (r) | ||
129 | return r; | ||
130 | |||
131 | pthread_mutex_destroy(&context->sequence_mutex); | 99 | pthread_mutex_destroy(&context->sequence_mutex); |
132 | 100 | ||
133 | /* now deal with kernel side */ | 101 | /* now deal with kernel side */ |
@@ -163,11 +131,6 @@ int amdgpu_cs_query_reset_state(amdgpu_context_handle context, | |||
163 | return r; | 131 | return r; |
164 | } | 132 | } |
165 | 133 | ||
166 | static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring) | ||
167 | { | ||
168 | return ip * AMDGPU_CS_MAX_RINGS + ring; | ||
169 | } | ||
170 | |||
171 | /** | 134 | /** |
172 | * Submit command to kernel DRM | 135 | * Submit command to kernel DRM |
173 | * \param dev - \c [in] Device handle | 136 | * \param dev - \c [in] Device handle |
@@ -179,8 +142,7 @@ static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring) | |||
179 | * \sa amdgpu_cs_submit() | 142 | * \sa amdgpu_cs_submit() |
180 | */ | 143 | */ |
181 | static int amdgpu_cs_submit_one(amdgpu_context_handle context, | 144 | static int amdgpu_cs_submit_one(amdgpu_context_handle context, |
182 | struct amdgpu_cs_request *ibs_request, | 145 | struct amdgpu_cs_request *ibs_request) |
183 | uint64_t *fence) | ||
184 | { | 146 | { |
185 | union drm_amdgpu_cs cs; | 147 | union drm_amdgpu_cs cs; |
186 | uint64_t *chunk_array; | 148 | uint64_t *chunk_array; |
@@ -188,6 +150,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context, | |||
188 | struct drm_amdgpu_cs_chunk_data *chunk_data; | 150 | struct drm_amdgpu_cs_chunk_data *chunk_data; |
189 | struct drm_amdgpu_cs_chunk_dep *dependencies = NULL; | 151 | struct drm_amdgpu_cs_chunk_dep *dependencies = NULL; |
190 | uint32_t i, size; | 152 | uint32_t i, size; |
153 | bool user_fence; | ||
191 | int r = 0; | 154 | int r = 0; |
192 | 155 | ||
193 | if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM) | 156 | if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM) |
@@ -196,13 +159,15 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context, | |||
196 | return -EINVAL; | 159 | return -EINVAL; |
197 | if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT) | 160 | if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT) |
198 | return -EINVAL; | 161 | return -EINVAL; |
162 | user_fence = (ibs_request->fence_info.handle != NULL); | ||
199 | 163 | ||
200 | size = ibs_request->number_of_ibs + 2; | 164 | size = ibs_request->number_of_ibs + (user_fence ? 2 : 1); |
201 | 165 | ||
202 | chunk_array = alloca(sizeof(uint64_t) * size); | 166 | chunk_array = alloca(sizeof(uint64_t) * size); |
203 | chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size); | 167 | chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size); |
204 | 168 | ||
205 | size = ibs_request->number_of_ibs + 1; | 169 | size = ibs_request->number_of_ibs + (user_fence ? 1 : 0); |
170 | |||
206 | chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size); | 171 | chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size); |
207 | 172 | ||
208 | memset(&cs, 0, sizeof(cs)); | 173 | memset(&cs, 0, sizeof(cs)); |
@@ -232,8 +197,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context, | |||
232 | 197 | ||
233 | pthread_mutex_lock(&context->sequence_mutex); | 198 | pthread_mutex_lock(&context->sequence_mutex); |
234 | 199 | ||
235 | if (ibs_request->ip_type != AMDGPU_HW_IP_UVD && | 200 | if (user_fence) { |
236 | ibs_request->ip_type != AMDGPU_HW_IP_VCE) { | ||
237 | i = cs.in.num_chunks++; | 201 | i = cs.in.num_chunks++; |
238 | 202 | ||
239 | /* fence chunk */ | 203 | /* fence chunk */ |
@@ -243,11 +207,10 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context, | |||
243 | chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; | 207 | chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; |
244 | 208 | ||
245 | /* fence bo handle */ | 209 | /* fence bo handle */ |
246 | chunk_data[i].fence_data.handle = context->fence_bo->handle; | 210 | chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle; |
247 | /* offset */ | 211 | /* offset */ |
248 | chunk_data[i].fence_data.offset = amdgpu_cs_fence_index( | 212 | chunk_data[i].fence_data.offset = |
249 | ibs_request->ip_type, ibs_request->ring); | 213 | ibs_request->fence_info.offset * sizeof(uint64_t); |
250 | chunk_data[i].fence_data.offset *= sizeof(uint64_t); | ||
251 | } | 214 | } |
252 | 215 | ||
253 | if (ibs_request->number_of_dependencies) { | 216 | if (ibs_request->number_of_dependencies) { |
@@ -283,7 +246,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context, | |||
283 | if (r) | 246 | if (r) |
284 | goto error_unlock; | 247 | goto error_unlock; |
285 | 248 | ||
286 | *fence = cs.out.handle; | 249 | ibs_request->seq_no = cs.out.handle; |
287 | 250 | ||
288 | error_unlock: | 251 | error_unlock: |
289 | pthread_mutex_unlock(&context->sequence_mutex); | 252 | pthread_mutex_unlock(&context->sequence_mutex); |
@@ -294,25 +257,23 @@ error_unlock: | |||
294 | int amdgpu_cs_submit(amdgpu_context_handle context, | 257 | int amdgpu_cs_submit(amdgpu_context_handle context, |
295 | uint64_t flags, | 258 | uint64_t flags, |
296 | struct amdgpu_cs_request *ibs_request, | 259 | struct amdgpu_cs_request *ibs_request, |
297 | uint32_t number_of_requests, | 260 | uint32_t number_of_requests) |
298 | uint64_t *fences) | ||
299 | { | 261 | { |
300 | uint32_t i; | 262 | uint32_t i; |
301 | int r; | 263 | int r; |
264 | uint64_t bo_size; | ||
265 | uint64_t bo_offset; | ||
302 | 266 | ||
303 | if (NULL == context) | 267 | if (NULL == context) |
304 | return -EINVAL; | 268 | return -EINVAL; |
305 | if (NULL == ibs_request) | 269 | if (NULL == ibs_request) |
306 | return -EINVAL; | 270 | return -EINVAL; |
307 | if (NULL == fences) | ||
308 | return -EINVAL; | ||
309 | 271 | ||
310 | r = 0; | 272 | r = 0; |
311 | for (i = 0; i < number_of_requests; i++) { | 273 | for (i = 0; i < number_of_requests; i++) { |
312 | r = amdgpu_cs_submit_one(context, ibs_request, fences); | 274 | r = amdgpu_cs_submit_one(context, ibs_request); |
313 | if (r) | 275 | if (r) |
314 | break; | 276 | break; |
315 | fences++; | ||
316 | ibs_request++; | 277 | ibs_request++; |
317 | } | 278 | } |
318 | 279 | ||
@@ -380,10 +341,6 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence, | |||
380 | uint64_t flags, | 341 | uint64_t flags, |
381 | uint32_t *expired) | 342 | uint32_t *expired) |
382 | { | 343 | { |
383 | amdgpu_context_handle context; | ||
384 | uint64_t *expired_fence; | ||
385 | unsigned ip_type, ip_instance; | ||
386 | uint32_t ring; | ||
387 | bool busy = true; | 344 | bool busy = true; |
388 | int r; | 345 | int r; |
389 | 346 | ||
@@ -398,57 +355,14 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence, | |||
398 | if (fence->ring >= AMDGPU_CS_MAX_RINGS) | 355 | if (fence->ring >= AMDGPU_CS_MAX_RINGS) |
399 | return -EINVAL; | 356 | return -EINVAL; |
400 | 357 | ||
401 | context = fence->context; | ||
402 | ip_type = fence->ip_type; | ||
403 | ip_instance = fence->ip_instance; | ||
404 | ring = fence->ring; | ||
405 | expired_fence = &context->expired_fences[ip_type][ip_instance][ring]; | ||
406 | *expired = false; | 358 | *expired = false; |
407 | 359 | ||
408 | pthread_mutex_lock(&context->sequence_mutex); | 360 | r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type, |
409 | if (fence->fence <= *expired_fence) { | 361 | fence->ip_instance, fence->ring, |
410 | /* This fence value is expired already. */ | 362 | fence->fence, timeout_ns, flags, &busy); |
411 | pthread_mutex_unlock(&context->sequence_mutex); | ||
412 | *expired = true; | ||
413 | return 0; | ||
414 | } | ||
415 | |||
416 | /* Check the user fence only if the IP supports user fences. */ | ||
417 | if (fence->ip_type != AMDGPU_HW_IP_UVD && | ||
418 | fence->ip_type != AMDGPU_HW_IP_VCE) { | ||
419 | uint64_t *signaled_fence = context->fence_cpu; | ||
420 | signaled_fence += amdgpu_cs_fence_index(ip_type, ring); | ||
421 | |||
422 | if (fence->fence <= *signaled_fence) { | ||
423 | /* This fence value is signaled already. */ | ||
424 | *expired_fence = *signaled_fence; | ||
425 | pthread_mutex_unlock(&context->sequence_mutex); | ||
426 | *expired = true; | ||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | /* Checking the user fence is enough. */ | ||
431 | if (timeout_ns == 0) { | ||
432 | pthread_mutex_unlock(&context->sequence_mutex); | ||
433 | return 0; | ||
434 | } | ||
435 | } | ||
436 | 363 | ||
437 | pthread_mutex_unlock(&context->sequence_mutex); | 364 | if (!r && !busy) |
438 | |||
439 | r = amdgpu_ioctl_wait_cs(context, ip_type, ip_instance, ring, | ||
440 | fence->fence, timeout_ns, | ||
441 | flags, &busy); | ||
442 | if (!r && !busy) { | ||
443 | *expired = true; | 365 | *expired = true; |
444 | pthread_mutex_lock(&context->sequence_mutex); | ||
445 | /* The thread doesn't hold sequence_mutex. Other thread could | ||
446 | update *expired_fence already. Check whether there is a | ||
447 | newerly expired fence. */ | ||
448 | if (fence->fence > *expired_fence) | ||
449 | *expired_fence = fence->fence; | ||
450 | pthread_mutex_unlock(&context->sequence_mutex); | ||
451 | } | ||
452 | 366 | ||
453 | return r; | 367 | return r; |
454 | } | 368 | } |
diff --git a/amdgpu/amdgpu_internal.h b/amdgpu/amdgpu_internal.h index e35923ff..bf7788dd 100644 --- a/amdgpu/amdgpu_internal.h +++ b/amdgpu/amdgpu_internal.h | |||
@@ -109,11 +109,6 @@ struct amdgpu_context { | |||
109 | /** Mutex for accessing fences and to maintain command submissions | 109 | /** Mutex for accessing fences and to maintain command submissions |
110 | in good sequence. */ | 110 | in good sequence. */ |
111 | pthread_mutex_t sequence_mutex; | 111 | pthread_mutex_t sequence_mutex; |
112 | /** Buffer for user fences */ | ||
113 | struct amdgpu_bo *fence_bo; | ||
114 | void *fence_cpu; | ||
115 | /** The newest expired fence for the ring of the ip blocks. */ | ||
116 | uint64_t expired_fences[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS]; | ||
117 | /* context id*/ | 112 | /* context id*/ |
118 | uint32_t id; | 113 | uint32_t id; |
119 | }; | 114 | }; |