aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile.am5
-rw-r--r--Makefile.sources1
-rw-r--r--amdgpu/Makefile.am54
-rw-r--r--amdgpu/amdgpu.h1276
-rw-r--r--amdgpu/amdgpu_bo.c626
-rw-r--r--amdgpu/amdgpu_cs.c981
-rw-r--r--amdgpu/amdgpu_device.c241
-rw-r--r--amdgpu/amdgpu_gpu_info.c275
-rw-r--r--amdgpu/amdgpu_internal.h208
-rw-r--r--amdgpu/amdgpu_vamgr.c169
-rw-r--r--amdgpu/libdrm_amdgpu.pc.in10
-rw-r--r--amdgpu/util_hash.c382
-rw-r--r--amdgpu/util_hash.h99
-rw-r--r--amdgpu/util_hash_table.c257
-rw-r--r--amdgpu/util_hash_table.h65
-rw-r--r--configure.ac19
-rw-r--r--include/drm/amdgpu_drm.h590
17 files changed, 5258 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am
index 13df80c5..cb808ff9 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -57,6 +57,10 @@ if HAVE_RADEON
57RADEON_SUBDIR = radeon 57RADEON_SUBDIR = radeon
58endif 58endif
59 59
60if HAVE_AMDGPU
61AMDGPU_SUBDIR = amdgpu
62endif
63
60if HAVE_OMAP 64if HAVE_OMAP
61OMAP_SUBDIR = omap 65OMAP_SUBDIR = omap
62endif 66endif
@@ -85,6 +89,7 @@ SUBDIRS = \
85 $(INTEL_SUBDIR) \ 89 $(INTEL_SUBDIR) \
86 $(NOUVEAU_SUBDIR) \ 90 $(NOUVEAU_SUBDIR) \
87 $(RADEON_SUBDIR) \ 91 $(RADEON_SUBDIR) \
92 $(AMDGPU_SUBDIR) \
88 $(OMAP_SUBDIR) \ 93 $(OMAP_SUBDIR) \
89 $(EXYNOS_SUBDIR) \ 94 $(EXYNOS_SUBDIR) \
90 $(FREEDRENO_SUBDIR) \ 95 $(FREEDRENO_SUBDIR) \
diff --git a/Makefile.sources b/Makefile.sources
index ab1172d2..a77f48de 100644
--- a/Makefile.sources
+++ b/Makefile.sources
@@ -28,6 +28,7 @@ LIBDRM_INCLUDE_H_FILES := \
28 include/drm/qxl_drm.h \ 28 include/drm/qxl_drm.h \
29 include/drm/r128_drm.h \ 29 include/drm/r128_drm.h \
30 include/drm/radeon_drm.h \ 30 include/drm/radeon_drm.h \
31 include/drm/amdgpu_drm.h \
31 include/drm/savage_drm.h \ 32 include/drm/savage_drm.h \
32 include/drm/sis_drm.h \ 33 include/drm/sis_drm.h \
33 include/drm/tegra_drm.h \ 34 include/drm/tegra_drm.h \
diff --git a/amdgpu/Makefile.am b/amdgpu/Makefile.am
new file mode 100644
index 00000000..82e78c7f
--- /dev/null
+++ b/amdgpu/Makefile.am
@@ -0,0 +1,54 @@
1# Copyright © 2008 Jérôme Glisse
2#
3# Permission is hereby granted, free of charge, to any person obtaining a
4# copy of this software and associated documentation files (the "Software"),
5# to deal in the Software without restriction, including without limitation
6# the rights to use, copy, modify, merge, publish, distribute, sublicense,
7# and/or sell copies of the Software, and to permit persons to whom the
8# Software is furnished to do so, subject to the following conditions:
9#
10# The above copyright notice and this permission notice (including the next
11# paragraph) shall be included in all copies or substantial portions of the
12# Software.
13#
14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20# IN THE SOFTWARE.
21#
22# Authors:
23# Jérôme Glisse <glisse@freedesktop.org>
24
25AM_CFLAGS = \
26 $(WARN_CFLAGS) -Wno-switch-enum \
27 -I$(top_srcdir) \
28 $(PTHREADSTUBS_CFLAGS) \
29 -I$(top_srcdir)/include/drm
30
31libdrm_amdgpu_la_LTLIBRARIES = libdrm_amdgpu.la
32libdrm_amdgpu_ladir = $(libdir)
33libdrm_amdgpu_la_LDFLAGS = -version-number 1:0:0 -no-undefined
34libdrm_amdgpu_la_LIBADD = ../libdrm.la @PTHREADSTUBS_LIBS@
35
36libdrm_amdgpu_la_SOURCES = \
37 amdgpu.h \
38 amdgpu_bo.c \
39 amdgpu_cs.c \
40 amdgpu_device.c \
41 amdgpu_gpu_info.c \
42 amdgpu_internal.h \
43 amdgpu_vamgr.c \
44 util_hash.c \
45 util_hash.h \
46 util_hash_table.c \
47 util_hash_table.h
48
49libdrm_amdgpuincludedir = ${includedir}/libdrm
50libdrm_amdgpuinclude_HEADERS = \
51 amdgpu.h
52
53pkgconfigdir = @pkgconfigdir@
54pkgconfig_DATA = libdrm_amdgpu.pc
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
new file mode 100644
index 00000000..11a86eff
--- /dev/null
+++ b/amdgpu/amdgpu.h
@@ -0,0 +1,1276 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24/**
25 * \file amdgpu.h
26 *
27 * Declare public libdrm_amdgpu API
28 *
29 * This file define API exposed by libdrm_amdgpu library.
30 * User wanted to use libdrm_amdgpu functionality must include
31 * this file.
32 *
33 */
34#ifndef _AMDGPU_H_
35#define _AMDGPU_H_
36
37#include <stdint.h>
38#include <stdbool.h>
39
40struct drm_amdgpu_info_hw_ip;
41
42/*--------------------------------------------------------------------------*/
43/* --------------------------- Defines ------------------------------------ */
44/*--------------------------------------------------------------------------*/
45
46/**
47 * Define max. number of Command Buffers (IB) which could be sent to the single
48 * hardware IP to accommodate CE/DE requirements
49 *
50 * \sa amdgpu_cs_ib_info
51*/
52#define AMDGPU_CS_MAX_IBS_PER_SUBMIT 4
53
54/**
55 *
56 */
57#define AMDGPU_TIMEOUT_INFINITE 0xffffffffffffffffull
58
59/**
60 * The special flag for GFX submission to identify that this is CE IB
61 * \sa amdgpu_cs_ib_info
62*/
63#define AMDGPU_CS_GFX_IB_CE 0x1
64
65/**
66 * The special flag to mark that this IB will re-used
67 * by client and should not be automatically return back
68 * to free pool by libdrm_amdgpu when submission is completed.
69 *
70 * \sa amdgpu_cs_ib_info
71*/
72#define AMDGPU_CS_REUSE_IB 0x2
73
74/**
75 * The special resource flag for IB submission.
76 * When VRAM is full, some resources may be moved to GTT to make place
77 * for other resources which want to be in VRAM. This flag affects the order
78 * in which resources are moved back to VRAM until there is no space there.
79 * The resources with the highest priority will be moved first.
80 * The value can be between 0 and 15, inclusive.
81 */
82#define AMDGPU_IB_RESOURCE_PRIORITY(x) ((x) & 0xf)
83
84
85/*--------------------------------------------------------------------------*/
86/* ----------------------------- Enums ------------------------------------ */
87/*--------------------------------------------------------------------------*/
88
89/**
90 * Enum describing possible handle types
91 *
92 * \sa amdgpu_bo_import, amdgpu_bo_export
93 *
94*/
95enum amdgpu_bo_handle_type {
96 /** GEM flink name (needs DRM authentication, used by DRI2) */
97 amdgpu_bo_handle_type_gem_flink_name = 0,
98
99 /** KMS handle which is used by all driver ioctls */
100 amdgpu_bo_handle_type_kms = 1,
101
102 /** DMA-buf fd handle */
103 amdgpu_bo_handle_type_dma_buf_fd = 2
104};
105
106/**
107 * Enum describing possible context reset states
108 *
109 * \sa amdgpu_cs_query_reset_state()
110 *
111*/
112enum amdgpu_cs_ctx_reset_state {
113 /** No reset was detected */
114 amdgpu_cs_reset_no_error = 0,
115
116 /** Reset/TDR was detected and context caused */
117 amdgpu_cs_reset_guilty = 1,
118
119 /** Reset/TDR was detected caused by other context */
120 amdgpu_cs_reset_innocent = 2,
121
122 /** Reset TDR was detected by cause of it unknown */
123 amdgpu_cs_reset_unknown = 3
124};
125
126/**
127 * For performance reasons and to simplify logic libdrm_amdgpu will handle
128 * IBs only some pre-defined sizes.
129 *
130 * \sa amdgpu_cs_alloc_ib()
131 */
132enum amdgpu_cs_ib_size {
133 amdgpu_cs_ib_size_4K = 1,
134 amdgpu_cs_ib_size_16K = 2,
135 amdgpu_cs_ib_size_32K = 3,
136 amdgpu_cs_ib_size_64K = 4,
137 amdgpu_cs_ib_size_128K = 5
138};
139
140/** The number of different IB sizes */
141#define AMDGPU_CS_IB_SIZE_NUM 6
142
143
144/*--------------------------------------------------------------------------*/
145/* -------------------------- Datatypes ----------------------------------- */
146/*--------------------------------------------------------------------------*/
147
148/**
149 * Define opaque pointer to context associated with fd.
150 * This context will be returned as the result of
151 * "initialize" function and should be pass as the first
152 * parameter to any API call
153 */
154typedef struct amdgpu_device *amdgpu_device_handle;
155
156/**
157 * Define GPU Context type as pointer to opaque structure
158 * Example of GPU Context is the "rendering" context associated
159 * with OpenGL context (glCreateContext)
160 */
161typedef struct amdgpu_context *amdgpu_context_handle;
162
163/**
164 * Define handle for amdgpu resources: buffer, GDS, etc.
165 */
166typedef struct amdgpu_bo *amdgpu_bo_handle;
167
168/**
169 * Define handle to be used when dealing with command
170 * buffers (a.k.a. ibs)
171 *
172 */
173typedef struct amdgpu_ib *amdgpu_ib_handle;
174
175
176/*--------------------------------------------------------------------------*/
177/* -------------------------- Structures ---------------------------------- */
178/*--------------------------------------------------------------------------*/
179
180/**
181 * Structure describing memory allocation request
182 *
183 * \sa amdgpu_bo_alloc()
184 *
185*/
186struct amdgpu_bo_alloc_request {
187 /** Allocation request. It must be aligned correctly. */
188 uint64_t alloc_size;
189
190 /**
191 * It may be required to have some specific alignment requirements
192 * for physical back-up storage (e.g. for displayable surface).
193 * If 0 there is no special alignment requirement
194 */
195 uint64_t phys_alignment;
196
197 /**
198 * UMD should specify where to allocate memory and how it
199 * will be accessed by the CPU.
200 */
201 uint32_t preferred_heap;
202
203 /** Additional flags passed on allocation */
204 uint64_t flags;
205};
206
207/**
208 * Structure describing memory allocation request
209 *
210 * \sa amdgpu_bo_alloc()
211*/
212struct amdgpu_bo_alloc_result {
213 /** Assigned virtual MC Base Address */
214 uint64_t virtual_mc_base_address;
215
216 /** Handle of allocated memory to be used by the given process only. */
217 amdgpu_bo_handle buf_handle;
218};
219
220/**
221 * Special UMD specific information associated with buffer.
222 *
223 * It may be need to pass some buffer charactersitic as part
224 * of buffer sharing. Such information are defined UMD and
225 * opaque for libdrm_amdgpu as well for kernel driver.
226 *
227 * \sa amdgpu_bo_set_metadata(), amdgpu_bo_query_info,
228 * amdgpu_bo_import(), amdgpu_bo_export
229 *
230*/
231struct amdgpu_bo_metadata {
232 /** Special flag associated with surface */
233 uint64_t flags;
234
235 /**
236 * ASIC-specific tiling information (also used by DCE).
237 * The encoding is defined by the AMDGPU_TILING_* definitions.
238 */
239 uint64_t tiling_info;
240
241 /** Size of metadata associated with the buffer, in bytes. */
242 uint32_t size_metadata;
243
244 /** UMD specific metadata. Opaque for kernel */
245 uint32_t umd_metadata[64];
246};
247
248/**
249 * Structure describing allocated buffer. Client may need
250 * to query such information as part of 'sharing' buffers mechanism
251 *
252 * \sa amdgpu_bo_set_metadata(), amdgpu_bo_query_info(),
253 * amdgpu_bo_import(), amdgpu_bo_export()
254*/
255struct amdgpu_bo_info {
256 /** Allocated memory size */
257 uint64_t alloc_size;
258
259 /**
260 * It may be required to have some specific alignment requirements
261 * for physical back-up storage.
262 */
263 uint64_t phys_alignment;
264
265 /**
266 * Assigned virtual MC Base Address.
267 * \note This information will be returned only if this buffer was
268 * allocated in the same process otherwise 0 will be returned.
269 */
270 uint64_t virtual_mc_base_address;
271
272 /** Heap where to allocate memory. */
273 uint32_t preferred_heap;
274
275 /** Additional allocation flags. */
276 uint64_t alloc_flags;
277
278 /** Metadata associated with buffer if any. */
279 struct amdgpu_bo_metadata metadata;
280};
281
282/**
283 * Structure with information about "imported" buffer
284 *
285 * \sa amdgpu_bo_import()
286 *
287 */
288struct amdgpu_bo_import_result {
289 /** Handle of memory/buffer to use */
290 amdgpu_bo_handle buf_handle;
291
292 /** Buffer size */
293 uint64_t alloc_size;
294
295 /** Assigned virtual MC Base Address */
296 uint64_t virtual_mc_base_address;
297};
298
299
300/**
301 *
302 * Structure to describe GDS partitioning information.
303 * \note OA and GWS resources are asscoiated with GDS partition
304 *
305 * \sa amdgpu_gpu_resource_query_gds_info
306 *
307*/
308struct amdgpu_gds_resource_info {
309 uint32_t gds_gfx_partition_size;
310 uint32_t compute_partition_size;
311 uint32_t gds_total_size;
312 uint32_t gws_per_gfx_partition;
313 uint32_t gws_per_compute_partition;
314 uint32_t oa_per_gfx_partition;
315 uint32_t oa_per_compute_partition;
316};
317
318
319
320/**
321 * Structure describing result of request to allocate GDS
322 *
323 * \sa amdgpu_gpu_resource_gds_alloc
324 *
325*/
326struct amdgpu_gds_alloc_info {
327 /** Handle assigned to gds allocation */
328 amdgpu_bo_handle resource_handle;
329
330 /** How much was really allocated */
331 uint32_t gds_memory_size;
332
333 /** Number of GWS resources allocated */
334 uint32_t gws;
335
336 /** Number of OA resources allocated */
337 uint32_t oa;
338};
339
340/**
341 * Structure to described allocated command buffer (a.k.a. IB)
342 *
343 * \sa amdgpu_cs_alloc_ib()
344 *
345*/
346struct amdgpu_cs_ib_alloc_result {
347 /** IB allocation handle */
348 amdgpu_ib_handle handle;
349
350 /** Assigned GPU VM MC Address of command buffer */
351 uint64_t mc_address;
352
353 /** Address to be used for CPU access */
354 void *cpu;
355};
356
357/**
358 * Structure describing IB
359 *
360 * \sa amdgpu_cs_request, amdgpu_cs_submit()
361 *
362*/
363struct amdgpu_cs_ib_info {
364 /** Special flags */
365 uint64_t flags;
366
367 /** Handle of command buffer */
368 amdgpu_ib_handle ib_handle;
369
370 /**
371 * Size of Command Buffer to be submitted.
372 * - The size is in units of dwords (4 bytes).
373 * - Must be less or equal to the size of allocated IB
374 * - Could be 0
375 */
376 uint32_t size;
377};
378
379/**
380 * Structure describing submission request
381 *
382 * \note We could have several IBs as packet. e.g. CE, CE, DE case for gfx
383 *
384 * \sa amdgpu_cs_submit()
385*/
386struct amdgpu_cs_request {
387 /** Specify flags with additional information */
388 uint64_t flags;
389
390 /** Specify HW IP block type to which to send the IB. */
391 unsigned ip_type;
392
393 /** IP instance index if there are several IPs of the same type. */
394 unsigned ip_instance;
395
396 /**
397 * Specify ring index of the IP. We could have several rings
398 * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1.
399 */
400 uint32_t ring;
401
402 /**
403 * Specify number of resource handles passed.
404 * Size of 'handles' array
405 *
406 */
407 uint32_t number_of_resources;
408
409 /** Array of resources used by submission. */
410 amdgpu_bo_handle *resources;
411
412 /** Array of resources flags. This is optional and can be NULL. */
413 uint8_t *resource_flags;
414
415 /** Number of IBs to submit in the field ibs. */
416 uint32_t number_of_ibs;
417
418 /**
419 * IBs to submit. Those IBs will be submit together as single entity
420 */
421 struct amdgpu_cs_ib_info *ibs;
422};
423
424/**
425 * Structure describing request to check submission state using fence
426 *
427 * \sa amdgpu_cs_query_fence_status()
428 *
429*/
430struct amdgpu_cs_query_fence {
431
432 /** In which context IB was sent to execution */
433 amdgpu_context_handle context;
434
435 /** Timeout in nanoseconds. */
436 uint64_t timeout_ns;
437
438 /** To which HW IP type the fence belongs */
439 unsigned ip_type;
440
441 /** IP instance index if there are several IPs of the same type. */
442 unsigned ip_instance;
443
444 /** Ring index of the HW IP */
445 uint32_t ring;
446
447 /** Flags */
448 uint64_t flags;
449
450 /** Specify fence for which we need to check
451 * submission status.*/
452 uint64_t fence;
453};
454
455/**
456 * Structure which provide information about GPU VM MC Address space
457 * alignments requirements
458 *
459 * \sa amdgpu_query_buffer_size_alignment
460 */
461struct amdgpu_buffer_size_alignments {
462 /** Size alignment requirement for allocation in
463 * local memory */
464 uint64_t size_local;
465
466 /**
467 * Size alignment requirement for allocation in remote memory
468 */
469 uint64_t size_remote;
470};
471
472
473/**
474 * Structure which provide information about heap
475 *
476 * \sa amdgpu_query_heap_info()
477 *
478 */
479struct amdgpu_heap_info {
480 /** Theoretical max. available memory in the given heap */
481 uint64_t heap_size;
482
483 /**
484 * Number of bytes allocated in the heap. This includes all processes
485 * and private allocations in the kernel. It changes when new buffers
486 * are allocated, freed, and moved. It cannot be larger than
487 * heap_size.
488 */
489 uint64_t heap_usage;
490
491 /**
492 * Theoretical possible max. size of buffer which
493 * could be allocated in the given heap
494 */
495 uint64_t max_allocation;
496};
497
498
499
500/**
501 * Describe GPU h/w info needed for UMD correct initialization
502 *
503 * \sa amdgpu_query_gpu_info()
504*/
505struct amdgpu_gpu_info {
506 /** Asic id */
507 uint32_t asic_id;
508 /**< Chip revision */
509 uint32_t chip_rev;
510 /** Chip external revision */
511 uint32_t chip_external_rev;
512 /** Family ID */
513 uint32_t family_id;
514 /** Special flags */
515 uint64_t ids_flags;
516 /** max engine clock*/
517 uint64_t max_engine_clk;
518 /** number of shader engines */
519 uint32_t num_shader_engines;
520 /** number of shader arrays per engine */
521 uint32_t num_shader_arrays_per_engine;
522 /** Number of available good shader pipes */
523 uint32_t avail_quad_shader_pipes;
524 /** Max. number of shader pipes.(including good and bad pipes */
525 uint32_t max_quad_shader_pipes;
526 /** Number of parameter cache entries per shader quad pipe */
527 uint32_t cache_entries_per_quad_pipe;
528 /** Number of available graphics context */
529 uint32_t num_hw_gfx_contexts;
530 /** Number of render backend pipes */
531 uint32_t rb_pipes;
532 /** Active render backend pipe number */
533 uint32_t active_rb_pipes;
534 /** Enabled render backend pipe mask */
535 uint32_t enabled_rb_pipes_mask;
536 /** Frequency of GPU Counter */
537 uint32_t gpu_counter_freq;
538 /** CC_RB_BACKEND_DISABLE.BACKEND_DISABLE per SE */
539 uint32_t backend_disable[4];
540 /** Value of MC_ARB_RAMCFG register*/
541 uint32_t mc_arb_ramcfg;
542 /** Value of GB_ADDR_CONFIG */
543 uint32_t gb_addr_cfg;
544 /** Values of the GB_TILE_MODE0..31 registers */
545 uint32_t gb_tile_mode[32];
546 /** Values of GB_MACROTILE_MODE0..15 registers */
547 uint32_t gb_macro_tile_mode[16];
548 /** Value of PA_SC_RASTER_CONFIG register per SE */
549 uint32_t pa_sc_raster_cfg[4];
550 /** Value of PA_SC_RASTER_CONFIG_1 register per SE */
551 uint32_t pa_sc_raster_cfg1[4];
552 /* CU info */
553 uint32_t cu_active_number;
554 uint32_t cu_ao_mask;
555 uint32_t cu_bitmap[4][4];
556};
557
558
559/*--------------------------------------------------------------------------*/
560/*------------------------- Functions --------------------------------------*/
561/*--------------------------------------------------------------------------*/
562
563/*
564 * Initialization / Cleanup
565 *
566*/
567
568
569/**
570 *
571 * \param fd - \c [in] File descriptor for AMD GPU device
572 * received previously as the result of
573 * e.g. drmOpen() call.
574 * For legacy fd type, the DRI2/DRI3 authentication
575 * should be done before calling this function.
576 * \param major_version - \c [out] Major version of library. It is assumed
577 * that adding new functionality will cause
578 * increase in major version
579 * \param minor_version - \c [out] Minor version of library
580 * \param device_handle - \c [out] Pointer to opaque context which should
581 * be passed as the first parameter on each
582 * API call
583 *
584 *
585 * \return 0 on success\n
586 * >0 - AMD specific error code\n
587 * <0 - Negative POSIX Error code
588 *
589 *
590 * \sa amdgpu_device_deinitialize()
591*/
592int amdgpu_device_initialize(int fd,
593 uint32_t *major_version,
594 uint32_t *minor_version,
595 amdgpu_device_handle *device_handle);
596
597
598
599/**
600 *
601 * When access to such library does not needed any more the special
602 * function must be call giving opportunity to clean up any
603 * resources if needed.
604 *
605 * \param device_handle - \c [in] Context associated with file
606 * descriptor for AMD GPU device
607 * received previously as the
608 * result e.g. of drmOpen() call.
609 *
610 * \return 0 on success\n
611 * >0 - AMD specific error code\n
612 * <0 - Negative POSIX Error code
613 *
614 * \sa amdgpu_device_initialize()
615 *
616*/
617int amdgpu_device_deinitialize(amdgpu_device_handle device_handle);
618
619
620/*
621 * Memory Management
622 *
623*/
624
625/**
626 * Allocate memory to be used by UMD for GPU related operations
627 *
628 * \param dev - \c [in] Device handle.
629 * See #amdgpu_device_initialize()
630 * \param alloc_buffer - \c [in] Pointer to the structure describing an
631 * allocation request
632 * \param info - \c [out] Pointer to structure which return
633 * information about allocated memory
634 *
635 * \return 0 on success\n
636 * >0 - AMD specific error code\n
637 * <0 - Negative POSIX Error code
638 *
639 * \sa amdgpu_bo_free()
640*/
641int amdgpu_bo_alloc(amdgpu_device_handle dev,
642 struct amdgpu_bo_alloc_request *alloc_buffer,
643 struct amdgpu_bo_alloc_result *info);
644
645/**
646 * Associate opaque data with buffer to be queried by another UMD
647 *
648 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
649 * \param buf_handle - \c [in] Buffer handle
650 * \param info - \c [in] Metadata to associated with buffer
651 *
652 * \return 0 on success\n
653 * >0 - AMD specific error code\n
654 * <0 - Negative POSIX Error code
655*/
656int amdgpu_bo_set_metadata(amdgpu_bo_handle buf_handle,
657 struct amdgpu_bo_metadata *info);
658
659/**
660 * Query buffer information including metadata previusly associated with
661 * buffer.
662 *
663 * \param dev - \c [in] Device handle.
664 * See #amdgpu_device_initialize()
665 * \param buf_handle - \c [in] Buffer handle
666 * \param info - \c [out] Structure describing buffer
667 *
668 * \return 0 on success\n
669 * >0 - AMD specific error code\n
670 * <0 - Negative POSIX Error code
671 *
672 * \sa amdgpu_bo_set_metadata(), amdgpu_bo_alloc()
673*/
674int amdgpu_bo_query_info(amdgpu_bo_handle buf_handle,
675 struct amdgpu_bo_info *info);
676
677/**
678 * Allow others to get access to buffer
679 *
680 * \param dev - \c [in] Device handle.
681 * See #amdgpu_device_initialize()
682 * \param buf_handle - \c [in] Buffer handle
683 * \param type - \c [in] Type of handle requested
684 * \param shared_handle - \c [out] Special "shared" handle
685 *
686 * \return 0 on success\n
687 * >0 - AMD specific error code\n
688 * <0 - Negative POSIX Error code
689 *
690 * \sa amdgpu_bo_import()
691 *
692*/
693int amdgpu_bo_export(amdgpu_bo_handle buf_handle,
694 enum amdgpu_bo_handle_type type,
695 uint32_t *shared_handle);
696
697/**
698 * Request access to "shared" buffer
699 *
700 * \param dev - \c [in] Device handle.
701 * See #amdgpu_device_initialize()
702 * \param type - \c [in] Type of handle requested
703 * \param shared_handle - \c [in] Shared handle received as result "import"
704 * operation
705 * \param output - \c [out] Pointer to structure with information
706 * about imported buffer
707 *
708 * \return 0 on success\n
709 * >0 - AMD specific error code\n
710 * <0 - Negative POSIX Error code
711 *
712 * \note Buffer must be "imported" only using new "fd" (different from
713 * one used by "exporter").
714 *
715 * \sa amdgpu_bo_export()
716 *
717*/
718int amdgpu_bo_import(amdgpu_device_handle dev,
719 enum amdgpu_bo_handle_type type,
720 uint32_t shared_handle,
721 struct amdgpu_bo_import_result *output);
722
723/**
724 * Free previosuly allocated memory
725 *
726 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
727 * \param buf_handle - \c [in] Buffer handle to free
728 *
729 * \return 0 on success\n
730 * >0 - AMD specific error code\n
731 * <0 - Negative POSIX Error code
732 *
733 * \note In the case of memory shared between different applications all
734 * resources will be “physically” freed only all such applications
735 * will be terminated
736 * \note If is UMD responsibility to ‘free’ buffer only when there is no
737 * more GPU access
738 *
739 * \sa amdgpu_bo_set_metadata(), amdgpu_bo_alloc()
740 *
741*/
742int amdgpu_bo_free(amdgpu_bo_handle buf_handle);
743
744/**
745 * Request CPU access to GPU accessable memory
746 *
747 * \param buf_handle - \c [in] Buffer handle
748 * \param cpu - \c [out] CPU address to be used for access
749 *
750 * \return 0 on success\n
751 * >0 - AMD specific error code\n
752 * <0 - Negative POSIX Error code
753 *
754 * \sa amdgpu_bo_cpu_unmap()
755 *
756*/
757int amdgpu_bo_cpu_map(amdgpu_bo_handle buf_handle, void **cpu);
758
759/**
760 * Release CPU access to GPU memory
761 *
762 * \param buf_handle - \c [in] Buffer handle
763 *
764 * \return 0 on success\n
765 * >0 - AMD specific error code\n
766 * <0 - Negative POSIX Error code
767 *
768 * \sa amdgpu_bo_cpu_map()
769 *
770*/
771int amdgpu_bo_cpu_unmap(amdgpu_bo_handle buf_handle);
772
773
774/**
775 * Wait until a buffer is not used by the device.
776 *
777 * \param dev - \c [in] Device handle. See #amdgpu_lib_initialize()
778 * \param buf_handle - \c [in] Buffer handle.
779 * \param timeout_ns - Timeout in nanoseconds.
780 * \param buffer_busy - 0 if buffer is idle, all GPU access was completed
781 * and no GPU access is scheduled.
782 * 1 GPU access is in fly or scheduled
783 *
784 * \return 0 - on success
785 * <0 - AMD specific error code
786 */
787int amdgpu_bo_wait_for_idle(amdgpu_bo_handle buf_handle,
788 uint64_t timeout_ns,
789 bool *buffer_busy);
790
791
792/*
793 * Special GPU Resources
794 *
795*/
796
797
798
799/**
800 * Query information about GDS
801 *
802 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
803 * \param gds_info - \c [out] Pointer to structure to get GDS information
804 *
805 * \return 0 on success\n
806 * >0 - AMD specific error code\n
807 * <0 - Negative POSIX Error code
808 *
809*/
810int amdgpu_gpu_resource_query_gds_info(amdgpu_device_handle dev,
811 struct amdgpu_gds_resource_info *
812 gds_info);
813
814
815/**
816 * Allocate GDS partitions
817 *
818 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
819 * \param gds_size - \c [in] Size of gds allocation. Must be aligned
820 * accordingly.
821 * \param alloc_info - \c [out] Pointer to structure to receive information
822 * about allocation
823 *
824 * \return 0 on success\n
825 * >0 - AMD specific error code\n
826 * <0 - Negative POSIX Error code
827 *
828 *
829*/
830int amdgpu_gpu_resource_gds_alloc(amdgpu_device_handle dev,
831 uint32_t gds_size,
832 struct amdgpu_gds_alloc_info *alloc_info);
833
834
835
836
837/**
838 * Release GDS resource. When GDS and associated resources not needed any
839 * more UMD should free them
840 *
841 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
842 * \param handle - \c [in] Handle assigned to GDS allocation
843 *
844 * \return 0 on success\n
845 * >0 - AMD specific error code\n
846 * <0 - Negative POSIX Error code
847 *
848*/
849int amdgpu_gpu_resource_gds_free(amdgpu_bo_handle handle);
850
851
852
853/*
854 * GPU Execution context
855 *
856*/
857
858/**
859 * Create GPU execution Context
860 *
861 * For the purpose of GPU Scheduler and GPU Robustness extensions it is
862 * necessary to have information/identify rendering/compute contexts.
863 * It also may be needed to associate some specific requirements with such
864 * contexts. Kernel driver will guarantee that submission from the same
865 * context will always be executed in order (first come, first serve).
866 *
867 *
868 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
869 * \param context - \c [out] GPU Context handle
870 *
871 * \return 0 on success\n
872 * >0 - AMD specific error code\n
873 * <0 - Negative POSIX Error code
874 *
875 * \sa amdgpu_cs_ctx_free()
876 *
877*/
878int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
879 amdgpu_context_handle *context);
880
881/**
882 *
883 * Destroy GPU execution context when not needed any more
884 *
885 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
886 * \param context - \c [in] GPU Context handle
887 *
888 * \return 0 on success\n
889 * >0 - AMD specific error code\n
890 * <0 - Negative POSIX Error code
891 *
892 * \sa amdgpu_cs_ctx_create()
893 *
894*/
895int amdgpu_cs_ctx_free(amdgpu_device_handle dev,
896 amdgpu_context_handle context);
897
898/**
899 * Query reset state for the specific GPU Context
900 *
901 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
902 * \param context - \c [in] GPU Context handle
903 * \param state - \c [out] Reset state status
904 *
905 * \return 0 on success\n
906 * >0 - AMD specific error code\n
907 * <0 - Negative POSIX Error code
908 *
909 * \sa amdgpu_cs_ctx_create()
910 *
911*/
912int amdgpu_cs_query_reset_state(amdgpu_device_handle dev,
913 amdgpu_context_handle context,
914 enum amdgpu_cs_ctx_reset_state *state);
915
916
917/*
918 * Command Buffers Management
919 *
920*/
921
922
923/**
924 * Allocate memory to be filled with PM4 packets and be served as the first
925 * entry point of execution (a.k.a. Indirect Buffer)
926 *
927 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
928 * \param context - \c [in] GPU Context which will use IB
929 * \param ib_size - \c [in] Size of allocation
930 * \param output - \c [out] Pointer to structure to get information about
931 * allocated IB
932 *
933 * \return 0 on success\n
934 * >0 - AMD specific error code\n
935 * <0 - Negative POSIX Error code
936 *
937 * \sa amdgpu_cs_free_ib()
938 *
939*/
940int amdgpu_cs_alloc_ib(amdgpu_device_handle dev,
941 amdgpu_context_handle context,
942 enum amdgpu_cs_ib_size ib_size,
943 struct amdgpu_cs_ib_alloc_result *output);
944
945/**
946 * If UMD has allocates IBs which doesn’t need any more than those IBs must
947 * be explicitly freed
948 *
949 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
950 * \param context - \c [in] GPU Context containing IB
951 * \param handle - \c [in] IB handle
952 *
953 * \return 0 on success\n
954 * >0 - AMD specific error code\n
955 * <0 - Negative POSIX Error code
956 *
957 * \note Libdrm_amdgpu will guarantee that it will correctly detect when it
958 * is safe to return IB to free pool
959 *
960 * \sa amdgpu_cs_alloc_ib()
961 *
962*/
963int amdgpu_cs_free_ib(amdgpu_device_handle dev,
964 amdgpu_context_handle context,
965 amdgpu_ib_handle handle);
966
967/**
968 * Send request to submit command buffers to hardware.
969 *
970 * Kernel driver could use GPU Scheduler to make decision when physically
971 * sent this request to the hardware. Accordingly this request could be put
972 * in queue and sent for execution later. The only guarantee is that request
973 * from the same GPU context to the same ip:ip_instance:ring will be executed in
974 * order.
975 *
976 *
977 * \param dev - \c [in] Device handle.
978 * See #amdgpu_device_initialize()
979 * \param context - \c [in] GPU Context
980 * \param flags - \c [in] Global submission flags
981 * \param ibs_request - \c [in] Pointer to submission requests.
982 * We could submit to the several
983 * engines/rings simulteniously as
984 * 'atomic' operation
985 * \param number_of_requests - \c [in] Number of submission requests
986 * \param fences - \c [out] Pointer to array of data to get
987 * fences to identify submission
988 * requests. Timestamps are valid
989 * in this GPU context and could be used
990 * to identify/detect completion of
991 * submission request
992 *
993 * \return 0 on success\n
994 * >0 - AMD specific error code\n
995 * <0 - Negative POSIX Error code
996 *
997 * \note It is assumed that by default IB will be returned to free pool
998 * automatically by libdrm_amdgpu when submission will completed.
999 * It is possible for UMD to make decision to re-use the same IB in
1000 * this case it should be explicitly freed.\n
1001 * Accordingly, by default, after submission UMD should not touch passed
1002 * IBs. If UMD needs to re-use IB then the special flag AMDGPU_CS_REUSE_IB
1003 * must be passed.
1004 *
1005 * \note It is required to pass correct resource list with buffer handles
1006 * which will be accessible by command buffers from submission
1007 * This will allow kernel driver to correctly implement "paging".
1008 * Failure to do so will have unpredictable results.
1009 *
1010 * \sa amdgpu_command_buffer_alloc(), amdgpu_command_buffer_free(),
1011 * amdgpu_cs_query_fence_status()
1012 *
1013*/
1014int amdgpu_cs_submit(amdgpu_device_handle dev,
1015 amdgpu_context_handle context,
1016 uint64_t flags,
1017 struct amdgpu_cs_request *ibs_request,
1018 uint32_t number_of_requests,
1019 uint64_t *fences);
1020
1021/**
1022 * Query status of Command Buffer Submission
1023 *
1024 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
1025 * \param fence - \c [in] Structure describing fence to query
1026 * \param expired - \c [out] If fence expired or not.\n
1027 * 0 – if fence is not expired\n
1028 * !0 - otherwise
1029 *
1030 * \return 0 on success\n
1031 * >0 - AMD specific error code\n
1032 * <0 - Negative POSIX Error code
1033 *
1034 * \note If UMD wants only to check operation status and returned immediately
1035 * then timeout value as 0 must be passed. In this case success will be
1036 * returned in the case if submission was completed or timeout error
1037 * code.
1038 *
1039 * \sa amdgpu_cs_submit()
1040*/
1041int amdgpu_cs_query_fence_status(amdgpu_device_handle dev,
1042 struct amdgpu_cs_query_fence *fence,
1043 uint32_t *expired);
1044
1045
1046/*
1047 * Query / Info API
1048 *
1049*/
1050
1051
1052/**
1053 * Query allocation size alignments
1054 *
1055 * UMD should query information about GPU VM MC size alignments requirements
1056 * to be able correctly choose required allocation size and implement
1057 * internal optimization if needed.
1058 *
1059 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
1060 * \param info - \c [out] Pointer to structure to get size alignment
1061 * requirements
1062 *
1063 * \return 0 on success\n
1064 * >0 - AMD specific error code\n
1065 * <0 - Negative POSIX Error code
1066 *
1067*/
1068int amdgpu_query_buffer_size_alignment(amdgpu_device_handle dev,
1069 struct amdgpu_buffer_size_alignments
1070 *info);
1071
1072
1073
1074/**
1075 * Query firmware versions
1076 *
1077 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
1078 * \param fw_type - \c [in] AMDGPU_INFO_FW_*
1079 * \param ip_instance - \c [in] Index of the IP block of the same type.
1080 * \param index - \c [in] Index of the engine. (for SDMA and MEC)
1081 * \param version - \c [out] Pointer to to the "version" return value
1082 * \param feature - \c [out] Pointer to to the "feature" return value
1083 *
1084 * \return 0 on success\n
1085 * >0 - AMD specific error code\n
1086 * <0 - Negative POSIX Error code
1087 *
1088*/
1089int amdgpu_query_firmware_version(amdgpu_device_handle dev, unsigned fw_type,
1090 unsigned ip_instance, unsigned index,
1091 uint32_t *version, uint32_t *feature);
1092
1093
1094
1095/**
1096 * Query the number of HW IP instances of a certain type.
1097 *
1098 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
1099 * \param type - \c [in] Hardware IP block type = AMDGPU_HW_IP_*
1100 * \param count - \c [out] Pointer to structure to get information
1101 *
1102 * \return 0 on success\n
1103 * >0 - AMD specific error code\n
1104 * <0 - Negative POSIX Error code
1105*/
1106int amdgpu_query_hw_ip_count(amdgpu_device_handle dev, unsigned type,
1107 uint32_t *count);
1108
1109
1110
1111/**
1112 * Query engine information
1113 *
1114 * This query allows UMD to query information different engines and their
1115 * capabilities.
1116 *
1117 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
1118 * \param type - \c [in] Hardware IP block type = AMDGPU_HW_IP_*
1119 * \param ip_instance - \c [in] Index of the IP block of the same type.
1120 * \param info - \c [out] Pointer to structure to get information
1121 *
1122 * \return 0 on success\n
1123 * >0 - AMD specific error code\n
1124 * <0 - Negative POSIX Error code
1125*/
1126int amdgpu_query_hw_ip_info(amdgpu_device_handle dev, unsigned type,
1127 unsigned ip_instance,
1128 struct drm_amdgpu_info_hw_ip *info);
1129
1130
1131
1132
1133/**
1134 * Query heap information
1135 *
1136 * This query allows UMD to query potentially available memory resources and
1137 * adjust their logic if necessary.
1138 *
1139 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
1140 * \param heap - \c [in] Heap type
1141 * \param info - \c [in] Pointer to structure to get needed information
1142 *
1143 * \return 0 on success\n
1144 * >0 - AMD specific error code\n
1145 * <0 - Negative POSIX Error code
1146 *
1147*/
1148int amdgpu_query_heap_info(amdgpu_device_handle dev,
1149 uint32_t heap,
1150 uint32_t flags,
1151 struct amdgpu_heap_info *info);
1152
1153
1154
1155/**
1156 * Get the CRTC ID from the mode object ID
1157 *
1158 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
1159 * \param id - \c [in] Mode object ID
1160 * \param result - \c [in] Pointer to the CRTC ID
1161 *
1162 * \return 0 on success\n
1163 * >0 - AMD specific error code\n
1164 * <0 - Negative POSIX Error code
1165 *
1166*/
1167int amdgpu_query_crtc_from_id(amdgpu_device_handle dev, unsigned id,
1168 int32_t *result);
1169
1170
1171
1172/**
1173 * Query GPU H/w Info
1174 *
1175 * Query hardware specific information
1176 *
1177 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
1178 * \param heap - \c [in] Heap type
1179 * \param info - \c [in] Pointer to structure to get needed information
1180 *
1181 * \return 0 on success\n
1182 * >0 - AMD specific error code\n
1183 * <0 - Negative POSIX Error code
1184 *
1185*/
1186int amdgpu_query_gpu_info(amdgpu_device_handle dev,
1187 struct amdgpu_gpu_info *info);
1188
1189
1190
1191/**
1192 * Query hardware or driver information.
1193 *
1194 * The return size is query-specific and depends on the "info_id" parameter.
1195 * No more than "size" bytes is returned.
1196 *
1197 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
1198 * \param info_id - \c [in] AMDGPU_INFO_*
1199 * \param size - \c [in] Size of the returned value.
1200 * \param value - \c [out] Pointer to the return value.
1201 *
1202 * \return 0 on success\n
1203 * >0 - AMD specific error code\n
1204 * <0 - Negative POSIX error code
1205 *
1206*/
1207int amdgpu_query_info(amdgpu_device_handle dev, unsigned info_id,
1208 unsigned size, void *value);
1209
1210
1211
1212/**
1213 * Read a set of consecutive memory-mapped registers.
1214 * Not all registers are allowed to be read by userspace.
1215 *
1216 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize(
1217 * \param dword_offset - \c [in] Register offset in dwords
1218 * \param count - \c [in] The number of registers to read starting
1219 * from the offset
1220 * \param instance - \c [in] GRBM_GFX_INDEX selector. It may have other
1221 * uses. Set it to 0xffffffff if unsure.
1222 * \param flags - \c [in] Flags with additional information.
1223 * \param values - \c [out] The pointer to return values.
1224 *
1225 * \return 0 on success\n
1226 * >0 - AMD specific error code\n
1227 * <0 - Negative POSIX error code
1228 *
1229*/
1230int amdgpu_read_mm_registers(amdgpu_device_handle dev, unsigned dword_offset,
1231 unsigned count, uint32_t instance, uint32_t flags,
1232 uint32_t *values);
1233
1234
1235
1236/**
1237 * Request GPU access to user allocated memory e.g. via "malloc"
1238 *
1239 * \param dev - [in] Device handle. See #amdgpu_device_initialize()
1240 * \param cpu - [in] CPU address of user allocated memory which we
1241 * want to map to GPU address space (make GPU accessible)
1242 * (This address must be correctly aligned).
1243 * \param size - [in] Size of allocation (must be correctly aligned)
1244 * \param amdgpu_bo_alloc_result - [out] Handle of allocation to be passed as resource
1245 * on submission and be used in other operations.(e.g. for VA submission)
1246 * ( Temporally defined amdgpu_bo_alloc_result as parameter for return mc address. )
1247 *
1248 *
1249 * \return 0 on success
1250 * >0 - AMD specific error code
1251 * <0 - Negative POSIX Error code
1252 *
1253 *
1254 * \note
1255 * This call doesn't guarantee that such memory will be persistently
1256 * "locked" / make non-pageable. The purpose of this call is to provide
1257 * opportunity for GPU get access to this resource during submission.
1258 *
1259 * The maximum amount of memory which could be mapped in this call depends
1260 * if overcommit is disabled or not. If overcommit is disabled than the max.
1261 * amount of memory to be pinned will be limited by left "free" size in total
1262 * amount of memory which could be locked simultaneously ("GART" size).
1263 *
1264 * Supported (theoretical) max. size of mapping is restricted only by
1265 * "GART" size.
1266 *
1267 * It is responsibility of caller to correctly specify access rights
1268 * on VA assignment.
1269*/
1270int amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev,
1271 void *cpu,
1272 uint64_t size,
1273 struct amdgpu_bo_alloc_result *info);
1274
1275
1276#endif /* #ifdef _AMDGPU_H_ */
diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
new file mode 100644
index 00000000..8ba45929
--- /dev/null
+++ b/amdgpu/amdgpu_bo.c
@@ -0,0 +1,626 @@
1/*
2 * Copyright © 2014 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif
27
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <errno.h>
32#include <fcntl.h>
33#include <unistd.h>
34#include <sys/ioctl.h>
35#include <sys/mman.h>
36#include <sys/time.h>
37
38#include "libdrm_macros.h"
39#include "xf86drm.h"
40#include "amdgpu_drm.h"
41#include "amdgpu_internal.h"
42#include "util_hash_table.h"
43
44static void amdgpu_close_kms_handle(amdgpu_device_handle dev,
45 uint32_t handle)
46{
47 struct drm_gem_close args = {};
48
49 args.handle = handle;
50 drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &args);
51}
52
53void amdgpu_bo_free_internal(amdgpu_bo_handle bo)
54{
55 /* Remove the buffer from the hash tables. */
56 pthread_mutex_lock(&bo->dev->bo_table_mutex);
57 util_hash_table_remove(bo->dev->bo_handles,
58 (void*)(uintptr_t)bo->handle);
59 if (bo->flink_name) {
60 util_hash_table_remove(bo->dev->bo_flink_names,
61 (void*)(uintptr_t)bo->flink_name);
62 }
63 pthread_mutex_unlock(&bo->dev->bo_table_mutex);
64
65 /* Release CPU access. */
66 if (bo->cpu_map_count > 0) {
67 bo->cpu_map_count = 1;
68 amdgpu_bo_cpu_unmap(bo);
69 }
70
71 amdgpu_close_kms_handle(bo->dev, bo->handle);
72 pthread_mutex_destroy(&bo->cpu_access_mutex);
73 amdgpu_vamgr_free_va(&bo->dev->vamgr, bo->virtual_mc_base_address, bo->alloc_size);
74 free(bo);
75}
76
77int amdgpu_bo_alloc(amdgpu_device_handle dev,
78 struct amdgpu_bo_alloc_request *alloc_buffer,
79 struct amdgpu_bo_alloc_result *info)
80{
81 struct amdgpu_bo *bo;
82 union drm_amdgpu_gem_create args;
83 unsigned heap = alloc_buffer->preferred_heap;
84 int r = 0;
85
86 /* It's an error if the heap is not specified */
87 if (!(heap & (AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM)))
88 return -EINVAL;
89
90 bo = calloc(1, sizeof(struct amdgpu_bo));
91 if (!bo)
92 return -ENOMEM;
93
94 atomic_set(&bo->refcount, 1);
95 bo->dev = dev;
96 bo->alloc_size = alloc_buffer->alloc_size;
97
98 memset(&args, 0, sizeof(args));
99 args.in.bo_size = alloc_buffer->alloc_size;
100 args.in.alignment = alloc_buffer->phys_alignment;
101
102 /* Set the placement. */
103 args.in.domains = heap & AMDGPU_GEM_DOMAIN_MASK;
104 args.in.domain_flags = alloc_buffer->flags & AMDGPU_GEM_CREATE_CPU_GTT_MASK;
105
106 /* Allocate the buffer with the preferred heap. */
107 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_CREATE,
108 &args, sizeof(args));
109 if (r) {
110 free(bo);
111 return r;
112 }
113
114 bo->handle = args.out.handle;
115
116 pthread_mutex_init(&bo->cpu_access_mutex, NULL);
117
118 /* map the buffer to the GPU virtual address space */
119 {
120 union drm_amdgpu_gem_va va;
121
122 memset(&va, 0, sizeof(va));
123
124 bo->virtual_mc_base_address = amdgpu_vamgr_find_va(&dev->vamgr, alloc_buffer->alloc_size, alloc_buffer->phys_alignment);
125
126 va.in.handle = bo->handle;
127 va.in.operation = AMDGPU_VA_OP_MAP;
128 va.in.flags = AMDGPU_VM_PAGE_READABLE |
129 AMDGPU_VM_PAGE_WRITEABLE |
130 AMDGPU_VM_PAGE_EXECUTABLE;
131 va.in.va_address = bo->virtual_mc_base_address;
132
133 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_VA, &va, sizeof(va));
134 if (r || va.out.result == AMDGPU_VA_RESULT_ERROR) {
135 amdgpu_bo_free_internal(bo);
136 return r;
137 }
138 pthread_mutex_lock(&dev->bo_table_mutex);
139
140 util_hash_table_set(dev->bo_vas,
141 (void*)(uintptr_t)bo->virtual_mc_base_address, bo);
142 pthread_mutex_unlock(&dev->bo_table_mutex);
143 }
144
145 info->buf_handle = bo;
146 info->virtual_mc_base_address = bo->virtual_mc_base_address;
147 return 0;
148}
149
150int amdgpu_bo_set_metadata(amdgpu_bo_handle bo,
151 struct amdgpu_bo_metadata *info)
152{
153 struct drm_amdgpu_gem_metadata args = {};
154
155 args.handle = bo->handle;
156 args.op = AMDGPU_GEM_METADATA_OP_SET_METADATA;
157 args.data.flags = info->flags;
158 args.data.tiling_info = info->tiling_info;
159
160 if (info->size_metadata > sizeof(args.data.data))
161 return -EINVAL;
162
163 if (info->size_metadata) {
164 args.data.data_size_bytes = info->size_metadata;
165 memcpy(args.data.data, info->umd_metadata, info->size_metadata);
166 }
167
168 return drmCommandWriteRead(bo->dev->fd,
169 DRM_AMDGPU_GEM_METADATA,
170 &args, sizeof(args));
171}
172
173int amdgpu_bo_query_info(amdgpu_bo_handle bo,
174 struct amdgpu_bo_info *info)
175{
176 struct drm_amdgpu_gem_metadata metadata = {};
177 struct drm_amdgpu_gem_create_in bo_info = {};
178 struct drm_amdgpu_gem_op gem_op = {};
179 int r;
180
181 /* Query metadata. */
182 metadata.handle = bo->handle;
183 metadata.op = AMDGPU_GEM_METADATA_OP_GET_METADATA;
184
185 r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_METADATA,
186 &metadata, sizeof(metadata));
187 if (r)
188 return r;
189
190 if (metadata.data.data_size_bytes >
191 sizeof(info->metadata.umd_metadata))
192 return -EINVAL;
193
194 /* Query buffer info. */
195 gem_op.handle = bo->handle;
196 gem_op.op = AMDGPU_GEM_OP_GET_GEM_CREATE_INFO;
197 gem_op.value = (intptr_t)&bo_info;
198
199 r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_OP,
200 &gem_op, sizeof(gem_op));
201 if (r)
202 return r;
203
204 memset(info, 0, sizeof(*info));
205 info->alloc_size = bo_info.bo_size;
206 info->phys_alignment = bo_info.alignment;
207 info->virtual_mc_base_address = bo->virtual_mc_base_address;
208 info->preferred_heap = bo_info.domains;
209 info->alloc_flags = bo_info.domain_flags;
210 info->metadata.flags = metadata.data.flags;
211 info->metadata.tiling_info = metadata.data.tiling_info;
212
213 info->metadata.size_metadata = metadata.data.data_size_bytes;
214 if (metadata.data.data_size_bytes > 0)
215 memcpy(info->metadata.umd_metadata, metadata.data.data,
216 metadata.data.data_size_bytes);
217
218 return 0;
219}
220
221static void amdgpu_add_handle_to_table(amdgpu_bo_handle bo)
222{
223 pthread_mutex_lock(&bo->dev->bo_table_mutex);
224 util_hash_table_set(bo->dev->bo_handles,
225 (void*)(uintptr_t)bo->handle, bo);
226 pthread_mutex_unlock(&bo->dev->bo_table_mutex);
227}
228
229static int amdgpu_bo_export_flink(amdgpu_bo_handle bo)
230{
231 struct drm_gem_flink flink;
232 int fd, dma_fd;
233 uint32_t handle;
234 int r;
235
236 fd = bo->dev->fd;
237 handle = bo->handle;
238 if (bo->flink_name)
239 return 0;
240
241
242 if (bo->dev->flink_fd != bo->dev->fd) {
243 r = drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC,
244 &dma_fd);
245 if (!r) {
246 r = drmPrimeFDToHandle(bo->dev->flink_fd, dma_fd, &handle);
247 close(dma_fd);
248 }
249 if (r)
250 return r;
251 fd = bo->dev->flink_fd;
252 }
253 memset(&flink, 0, sizeof(flink));
254 flink.handle = handle;
255
256 r = drmIoctl(fd, DRM_IOCTL_GEM_FLINK, &flink);
257 if (r)
258 return r;
259
260 bo->flink_name = flink.name;
261
262 if (bo->dev->flink_fd != bo->dev->fd) {
263 struct drm_gem_close args = {};
264 args.handle = handle;
265 drmIoctl(bo->dev->flink_fd, DRM_IOCTL_GEM_CLOSE, &args);
266 }
267
268 pthread_mutex_lock(&bo->dev->bo_table_mutex);
269 util_hash_table_set(bo->dev->bo_flink_names,
270 (void*)(uintptr_t)bo->flink_name,
271 bo);
272 pthread_mutex_unlock(&bo->dev->bo_table_mutex);
273
274 return 0;
275}
276
277int amdgpu_bo_export(amdgpu_bo_handle bo,
278 enum amdgpu_bo_handle_type type,
279 uint32_t *shared_handle)
280{
281 int r;
282
283 switch (type) {
284 case amdgpu_bo_handle_type_gem_flink_name:
285 r = amdgpu_bo_export_flink(bo);
286 if (r)
287 return r;
288
289 *shared_handle = bo->flink_name;
290 return 0;
291
292 case amdgpu_bo_handle_type_kms:
293 r = amdgpu_bo_export_flink(bo);
294 if (r)
295 return r;
296
297 amdgpu_add_handle_to_table(bo);
298 *shared_handle = bo->handle;
299 return 0;
300
301 case amdgpu_bo_handle_type_dma_buf_fd:
302 amdgpu_add_handle_to_table(bo);
303 return drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC,
304 (int*)shared_handle);
305 }
306 return -EINVAL;
307}
308
309int amdgpu_bo_import(amdgpu_device_handle dev,
310 enum amdgpu_bo_handle_type type,
311 uint32_t shared_handle,
312 struct amdgpu_bo_import_result *output)
313{
314 struct drm_gem_open open_arg = {};
315 union drm_amdgpu_gem_va va;
316 struct amdgpu_bo *bo = NULL;
317 int r;
318 int dma_fd;
319 uint64_t dma_buf_size = 0;
320
321 /* Convert a DMA buf handle to a KMS handle now. */
322 if (type == amdgpu_bo_handle_type_dma_buf_fd) {
323 uint32_t handle;
324 off_t size;
325
326 /* Get a KMS handle. */
327 r = drmPrimeFDToHandle(dev->fd, shared_handle, &handle);
328 if (r) {
329 return r;
330 }
331
332 /* Query the buffer size. */
333 size = lseek(shared_handle, 0, SEEK_END);
334 if (size == (off_t)-1) {
335 amdgpu_close_kms_handle(dev, handle);
336 return -errno;
337 }
338 lseek(shared_handle, 0, SEEK_SET);
339
340 dma_buf_size = size;
341 shared_handle = handle;
342 }
343
344 /* We must maintain a list of pairs <handle, bo>, so that we always
345 * return the same amdgpu_bo instance for the same handle. */
346 pthread_mutex_lock(&dev->bo_table_mutex);
347
348 /* If we have already created a buffer with this handle, find it. */
349 switch (type) {
350 case amdgpu_bo_handle_type_gem_flink_name:
351 bo = util_hash_table_get(dev->bo_flink_names,
352 (void*)(uintptr_t)shared_handle);
353 break;
354
355 case amdgpu_bo_handle_type_dma_buf_fd:
356 bo = util_hash_table_get(dev->bo_handles,
357 (void*)(uintptr_t)shared_handle);
358 break;
359
360 case amdgpu_bo_handle_type_kms:
361 /* Importing a KMS handle in not allowed. */
362 pthread_mutex_unlock(&dev->bo_table_mutex);
363 return -EPERM;
364
365 default:
366 pthread_mutex_unlock(&dev->bo_table_mutex);
367 return -EINVAL;
368 }
369
370 if (bo) {
371 pthread_mutex_unlock(&dev->bo_table_mutex);
372
373 /* The buffer already exists, just bump the refcount. */
374 atomic_inc(&bo->refcount);
375
376 output->buf_handle = bo;
377 output->alloc_size = bo->alloc_size;
378 output->virtual_mc_base_address =
379 bo->virtual_mc_base_address;
380 return 0;
381 }
382
383 bo = calloc(1, sizeof(struct amdgpu_bo));
384 if (!bo) {
385 pthread_mutex_unlock(&dev->bo_table_mutex);
386 if (type == amdgpu_bo_handle_type_dma_buf_fd) {
387 amdgpu_close_kms_handle(dev, shared_handle);
388 }
389 return -ENOMEM;
390 }
391
392 /* Open the handle. */
393 switch (type) {
394 case amdgpu_bo_handle_type_gem_flink_name:
395 open_arg.name = shared_handle;
396 r = drmIoctl(dev->flink_fd, DRM_IOCTL_GEM_OPEN, &open_arg);
397 if (r) {
398 free(bo);
399 pthread_mutex_unlock(&dev->bo_table_mutex);
400 return r;
401 }
402
403 bo->handle = open_arg.handle;
404 if (dev->flink_fd != dev->fd) {
405 r = drmPrimeHandleToFD(dev->flink_fd, bo->handle, DRM_CLOEXEC, &dma_fd);
406 if (r) {
407 free(bo);
408 pthread_mutex_unlock(&dev->bo_table_mutex);
409 return r;
410 }
411 r = drmPrimeFDToHandle(dev->fd, dma_fd, &bo->handle );
412
413 close(dma_fd);
414
415 if (r) {
416 free(bo);
417 pthread_mutex_unlock(&dev->bo_table_mutex);
418 return r;
419 }
420 }
421 bo->flink_name = shared_handle;
422 bo->alloc_size = open_arg.size;
423 util_hash_table_set(dev->bo_flink_names,
424 (void*)(uintptr_t)bo->flink_name, bo);
425 break;
426
427 case amdgpu_bo_handle_type_dma_buf_fd:
428 bo->handle = shared_handle;
429 bo->alloc_size = dma_buf_size;
430 break;
431
432 case amdgpu_bo_handle_type_kms:
433 assert(0); /* unreachable */
434 }
435
436 /* Initialize it. */
437 atomic_set(&bo->refcount, 1);
438 bo->dev = dev;
439 pthread_mutex_init(&bo->cpu_access_mutex, NULL);
440
441 bo->virtual_mc_base_address = amdgpu_vamgr_find_va(&dev->vamgr, bo->alloc_size, 1 << 20);
442
443 memset(&va, 0, sizeof(va));
444 va.in.handle = bo->handle;
445 va.in.operation = AMDGPU_VA_OP_MAP;
446 va.in.va_address = bo->virtual_mc_base_address;
447 va.in.flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
448 AMDGPU_VM_PAGE_EXECUTABLE;
449
450 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_VA, &va, sizeof(va));
451 if (r || va.out.result == AMDGPU_VA_RESULT_ERROR) {
452 pthread_mutex_unlock(&dev->bo_table_mutex);
453 amdgpu_vamgr_free_va(&dev->vamgr, bo->virtual_mc_base_address, bo->alloc_size);
454 amdgpu_bo_reference(&bo, NULL);
455 return r;
456 }
457
458 util_hash_table_set(dev->bo_vas,
459 (void*)(uintptr_t)bo->virtual_mc_base_address, bo);
460 util_hash_table_set(dev->bo_handles, (void*)(uintptr_t)bo->handle, bo);
461 pthread_mutex_unlock(&dev->bo_table_mutex);
462
463 output->buf_handle = bo;
464 output->alloc_size = bo->alloc_size;
465 output->virtual_mc_base_address = bo->virtual_mc_base_address;
466 return 0;
467}
468
469int amdgpu_bo_free(amdgpu_bo_handle buf_handle)
470{
471 /* Just drop the reference. */
472 amdgpu_bo_reference(&buf_handle, NULL);
473 return 0;
474}
475
476int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu)
477{
478 union drm_amdgpu_gem_mmap args;
479 void *ptr;
480 int r;
481
482 pthread_mutex_lock(&bo->cpu_access_mutex);
483
484 if (bo->cpu_ptr) {
485 /* already mapped */
486 assert(bo->cpu_map_count > 0);
487 bo->cpu_map_count++;
488 *cpu = bo->cpu_ptr;
489 pthread_mutex_unlock(&bo->cpu_access_mutex);
490 return 0;
491 }
492
493 assert(bo->cpu_map_count == 0);
494
495 memset(&args, 0, sizeof(args));
496
497 /* Query the buffer address (args.addr_ptr).
498 * The kernel driver ignores the offset and size parameters. */
499 args.in.handle = bo->handle;
500
501 r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_MMAP, &args,
502 sizeof(args));
503 if (r) {
504 pthread_mutex_unlock(&bo->cpu_access_mutex);
505 return r;
506 }
507
508 /* Map the buffer. */
509 ptr = drm_mmap(NULL, bo->alloc_size, PROT_READ | PROT_WRITE, MAP_SHARED,
510 bo->dev->fd, args.out.addr_ptr);
511 if (ptr == MAP_FAILED) {
512 pthread_mutex_unlock(&bo->cpu_access_mutex);
513 return -errno;
514 }
515
516 bo->cpu_ptr = ptr;
517 bo->cpu_map_count = 1;
518 pthread_mutex_unlock(&bo->cpu_access_mutex);
519
520 *cpu = ptr;
521 return 0;
522}
523
524int amdgpu_bo_cpu_unmap(amdgpu_bo_handle bo)
525{
526 int r;
527
528 pthread_mutex_lock(&bo->cpu_access_mutex);
529 assert(bo->cpu_map_count >= 0);
530
531 if (bo->cpu_map_count == 0) {
532 /* not mapped */
533 pthread_mutex_unlock(&bo->cpu_access_mutex);
534 return -EBADMSG;
535 }
536
537 bo->cpu_map_count--;
538 if (bo->cpu_map_count > 0) {
539 /* mapped multiple times */
540 pthread_mutex_unlock(&bo->cpu_access_mutex);
541 return 0;
542 }
543
544 r = drm_munmap(bo->cpu_ptr, bo->alloc_size) == 0 ? 0 : -errno;
545 bo->cpu_ptr = NULL;
546 pthread_mutex_unlock(&bo->cpu_access_mutex);
547 return r;
548}
549
550int amdgpu_query_buffer_size_alignment(amdgpu_device_handle dev,
551 struct amdgpu_buffer_size_alignments *info)
552{
553 info->size_local = dev->dev_info.pte_fragment_size;
554 info->size_remote = dev->dev_info.gart_page_size;
555 return 0;
556}
557
558int amdgpu_bo_wait_for_idle(amdgpu_bo_handle bo,
559 uint64_t timeout_ns,
560 bool *busy)
561{
562 union drm_amdgpu_gem_wait_idle args;
563 int r;
564
565 memset(&args, 0, sizeof(args));
566 args.in.handle = bo->handle;
567 args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
568
569 r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_WAIT_IDLE,
570 &args, sizeof(args));
571
572 if (r == 0) {
573 *busy = args.out.status;
574 return 0;
575 } else {
576 fprintf(stderr, "amdgpu: GEM_WAIT_IDLE failed with %i\n", r);
577 return r;
578 }
579}
580
581int amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev,
582 void *cpu,
583 uint64_t size,
584 struct amdgpu_bo_alloc_result *info)
585{
586 int r;
587 struct amdgpu_bo *bo;
588 struct drm_amdgpu_gem_userptr args;
589 union drm_amdgpu_gem_va va;
590
591 memset(&args, 0, sizeof(args));
592 args.addr = (uint64_t)cpu;
593 args.flags = AMDGPU_GEM_USERPTR_ANONONLY | AMDGPU_GEM_USERPTR_REGISTER;
594 args.size = size;
595 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_USERPTR,
596 &args, sizeof(args));
597 if (r)
598 return r;
599
600 bo = calloc(1, sizeof(struct amdgpu_bo));
601 if (!bo)
602 return -ENOMEM;
603
604 atomic_set(&bo->refcount, 1);
605 bo->dev = dev;
606 bo->alloc_size = size;
607 bo->handle = args.handle;
608 bo->virtual_mc_base_address = amdgpu_vamgr_find_va(&dev->vamgr, size, 4 * 1024);
609
610 memset(&va, 0, sizeof(va));
611 va.in.handle = bo->handle;
612 va.in.operation = AMDGPU_VA_OP_MAP;
613 va.in.flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
614 AMDGPU_VM_PAGE_EXECUTABLE;
615 va.in.va_address = bo->virtual_mc_base_address;
616 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_VA, &va, sizeof(va));
617 if (r || va.out.result == AMDGPU_VA_RESULT_ERROR) {
618 amdgpu_bo_free_internal(bo);
619 return r;
620 }
621 util_hash_table_set(dev->bo_vas,
622 (void*)(uintptr_t)bo->virtual_mc_base_address, bo);
623 info->buf_handle = bo;
624 info->virtual_mc_base_address = bo->virtual_mc_base_address;
625 return r;
626}
diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
new file mode 100644
index 00000000..614904da
--- /dev/null
+++ b/amdgpu/amdgpu_cs.c
@@ -0,0 +1,981 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23#include <stdlib.h>
24#include <stdio.h>
25#include <string.h>
26#include <errno.h>
27#include <pthread.h>
28#include <sched.h>
29#include <sys/ioctl.h>
30
31#include "xf86drm.h"
32#include "amdgpu_drm.h"
33#include "amdgpu_internal.h"
34
35/**
36 * Create an IB buffer.
37 *
38 * \param dev - \c [in] Device handle
39 * \param context - \c [in] GPU Context
40 * \param ib_size - \c [in] Size of allocation
41 * \param ib - \c [out] return the pointer to the created IB buffer
42 *
43 * \return 0 on success otherwise POSIX Error code
44*/
45static int amdgpu_cs_create_ib(amdgpu_device_handle dev,
46 amdgpu_context_handle context,
47 enum amdgpu_cs_ib_size ib_size,
48 amdgpu_ib_handle *ib)
49{
50 struct amdgpu_bo_alloc_request alloc_buffer;
51 struct amdgpu_bo_alloc_result info;
52 int r;
53 void *cpu;
54 struct amdgpu_ib *new_ib;
55
56 memset(&alloc_buffer, 0, sizeof(alloc_buffer));
57
58 switch (ib_size) {
59 case amdgpu_cs_ib_size_4K:
60 alloc_buffer.alloc_size = 4 * 1024;
61 break;
62 case amdgpu_cs_ib_size_16K:
63 alloc_buffer.alloc_size = 16 * 1024;
64 break;
65 case amdgpu_cs_ib_size_32K:
66 alloc_buffer.alloc_size = 32 * 1024;
67 break;
68 case amdgpu_cs_ib_size_64K:
69 alloc_buffer.alloc_size = 64 * 1024;
70 break;
71 case amdgpu_cs_ib_size_128K:
72 alloc_buffer.alloc_size = 128 * 1024;
73 break;
74 default:
75 return -EINVAL;
76 }
77
78 alloc_buffer.phys_alignment = 4 * 1024;
79
80 alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT;
81
82 r = amdgpu_bo_alloc(dev,
83 &alloc_buffer,
84 &info);
85 if (r)
86 return r;
87
88 r = amdgpu_bo_cpu_map(info.buf_handle, &cpu);
89 if (r) {
90 amdgpu_bo_free(info.buf_handle);
91 return r;
92 }
93
94 new_ib = malloc(sizeof(struct amdgpu_ib));
95 if (NULL == new_ib) {
96 amdgpu_bo_cpu_unmap(info.buf_handle);
97 amdgpu_bo_free(info.buf_handle);
98 return -ENOMEM;
99 }
100
101 new_ib->buf_handle = info.buf_handle;
102 new_ib->cpu = cpu;
103 new_ib->virtual_mc_base_address = info.virtual_mc_base_address;
104 new_ib->ib_size = ib_size;
105 *ib = new_ib;
106 return 0;
107}
108
109/**
110 * Destroy an IB buffer.
111 *
112 * \param dev - \c [in] Device handle
113 * \param ib - \c [in] the IB buffer
114 *
115 * \return 0 on success otherwise POSIX Error code
116*/
117static int amdgpu_cs_destroy_ib(amdgpu_device_handle dev,
118 amdgpu_ib_handle ib)
119{
120 int r;
121 r = amdgpu_bo_cpu_unmap(ib->buf_handle);
122 if (r)
123 return r;
124
125 r = amdgpu_bo_free(ib->buf_handle);
126 if (r)
127 return r;
128
129 free(ib);
130 return 0;
131}
132
133/**
134 * Initialize IB pools to empty.
135 *
136 * \param context - \c [in] GPU Context
137 *
138 * \return 0 on success otherwise POSIX Error code
139*/
140static int amdgpu_cs_init_ib_pool(amdgpu_context_handle context)
141{
142 int i;
143 int r;
144
145 r = pthread_mutex_init(&context->pool_mutex, NULL);
146 if (r)
147 return r;
148
149 for (i = 0; i < AMDGPU_CS_IB_SIZE_NUM; i++)
150 LIST_INITHEAD(&context->ib_pools[i]);
151
152 return 0;
153}
154
155/**
156 * Allocate an IB buffer from IB pools.
157 *
158 * \param dev - \c [in] Device handle
159 * \param context - \c [in] GPU Context
160 * \param ib_size - \c [in] Size of allocation
161 * \param ib - \c [out] return the pointer to the allocated IB buffer
162 *
163 * \return 0 on success otherwise POSIX Error code
164*/
165static int amdgpu_cs_alloc_from_ib_pool(amdgpu_device_handle dev,
166 amdgpu_context_handle context,
167 enum amdgpu_cs_ib_size ib_size,
168 amdgpu_ib_handle *ib)
169{
170 int r;
171 struct list_head *head;
172 head = &context->ib_pools[ib_size];
173
174 r = -ENOMEM;
175 pthread_mutex_lock(&context->pool_mutex);
176 if (!LIST_IS_EMPTY(head)) {
177 *ib = LIST_ENTRY(struct amdgpu_ib, head->next, list_node);
178 LIST_DEL(&(*ib)->list_node);
179 r = 0;
180 }
181 pthread_mutex_unlock(&context->pool_mutex);
182
183 return r;
184}
185
186/**
187 * Free an IB buffer to IB pools.
188 *
189 * \param context - \c [in] GPU Context
190 * \param ib - \c [in] the IB buffer
191 *
192 * \return N/A
193*/
194static void amdgpu_cs_free_to_ib_pool(amdgpu_context_handle context,
195 amdgpu_ib_handle ib)
196{
197 struct list_head *head;
198 head = &context->ib_pools[ib->ib_size];
199 pthread_mutex_lock(&context->pool_mutex);
200 LIST_ADD(&ib->list_node, head);
201 pthread_mutex_unlock(&context->pool_mutex);
202 return;
203}
204
205/**
206 * Destroy all IB buffers in pools
207 *
208 * \param dev - \c [in] Device handle
209 * \param context - \c [in] GPU Context
210 *
211 * \return 0 on success otherwise POSIX Error code
212*/
213static int amdgpu_cs_destroy_ib_pool(amdgpu_device_handle dev,
214 amdgpu_context_handle context)
215{
216 int i;
217 int r;
218 struct list_head *head;
219 struct amdgpu_ib *next;
220 struct amdgpu_ib *storage;
221
222 r = 0;
223 pthread_mutex_lock(&context->pool_mutex);
224 for (i = 0; i < AMDGPU_CS_IB_SIZE_NUM; i++) {
225 head = &context->ib_pools[i];
226 LIST_FOR_EACH_ENTRY_SAFE(next, storage, head, list_node) {
227 r = amdgpu_cs_destroy_ib(dev, next);
228 if (r)
229 break;
230 }
231 }
232 pthread_mutex_unlock(&context->pool_mutex);
233 pthread_mutex_destroy(&context->pool_mutex);
234 return r;
235}
236
237/**
238 * Initialize pending IB lists
239 *
240 * \param context - \c [in] GPU Context
241 *
242 * \return 0 on success otherwise POSIX Error code
243*/
244static int amdgpu_cs_init_pendings(amdgpu_context_handle context)
245{
246 unsigned ip, inst;
247 uint32_t ring;
248 int r;
249
250 r = pthread_mutex_init(&context->pendings_mutex, NULL);
251 if (r)
252 return r;
253
254 for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++)
255 for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++)
256 for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++)
257 LIST_INITHEAD(&context->pendings[ip][inst][ring]);
258
259 LIST_INITHEAD(&context->freed);
260 return 0;
261}
262
263/**
264 * Free pending IBs
265 *
266 * \param dev - \c [in] Device handle
267 * \param context - \c [in] GPU Context
268 *
269 * \return 0 on success otherwise POSIX Error code
270*/
271static int amdgpu_cs_destroy_pendings(amdgpu_device_handle dev,
272 amdgpu_context_handle context)
273{
274 int ip, inst;
275 uint32_t ring;
276 int r;
277 struct amdgpu_ib *next;
278 struct amdgpu_ib *s;
279 struct list_head *head;
280
281 r = 0;
282 pthread_mutex_lock(&context->pendings_mutex);
283 for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++)
284 for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++)
285 for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++) {
286 head = &context->pendings[ip][inst][ring];
287 LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) {
288 r = amdgpu_cs_destroy_ib(dev, next);
289 if (r)
290 break;
291 }
292 }
293
294 head = &context->freed;
295 LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) {
296 r = amdgpu_cs_destroy_ib(dev, next);
297 if (r)
298 break;
299 }
300
301 pthread_mutex_unlock(&context->pendings_mutex);
302 pthread_mutex_destroy(&context->pendings_mutex);
303 return r;
304}
305
306/**
307 * Add IB to pending IB lists without holding sequence_mutex.
308 *
309 * \param context - \c [in] GPU Context
310 * \param ib - \c [in] ib to added to pending lists
311 * \param ip - \c [in] hw ip block
312 * \param ip_instance - \c [in] instance of the hw ip block
313 * \param ring - \c [in] Ring of hw ip
314 *
315 * \return N/A
316*/
317static void amdgpu_cs_add_pending(amdgpu_context_handle context,
318 amdgpu_ib_handle ib,
319 unsigned ip, unsigned ip_instance,
320 uint32_t ring)
321{
322 struct list_head *head;
323 pthread_mutex_lock(&context->pendings_mutex);
324 head = &context->pendings[ip][ip_instance][ring];
325 LIST_ADDTAIL(&ib->list_node, head);
326 pthread_mutex_unlock(&context->pendings_mutex);
327 return;
328}
329
330/**
331 * Garbage collector on a pending IB list without holding pendings_mutex.
332 * This function by itself is not multithread safe.
333 *
334 * \param context - \c [in] GPU Context
335 * \param ip - \c [in] hw ip block
336 * \param ip_instance - \c [in] instance of the hw ip block
337 * \param ring - \c [in] Ring of hw ip
338 * \param expired_fence - \c [in] fence expired
339 *
340 * \return N/A
341 * \note Hold pendings_mutex before calling this function.
342*/
343static void amdgpu_cs_pending_gc_not_safe(amdgpu_context_handle context,
344 unsigned ip, unsigned ip_instance,
345 uint32_t ring,
346 uint64_t expired_fence)
347{
348 struct list_head *head;
349 struct amdgpu_ib *next;
350 struct amdgpu_ib *s;
351 int r;
352
353 head = &context->pendings[ip][ip_instance][ring];
354 LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node)
355 if (next->cs_handle <= expired_fence) {
356 LIST_DEL(&next->list_node);
357 amdgpu_cs_free_to_ib_pool(context, next);
358 } else {
359 /* The pending list is a sorted list.
360 There is no need to continue. */
361 break;
362 }
363
364 /* walk the freed list as well */
365 head = &context->freed;
366 LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) {
367 bool busy;
368
369 r = amdgpu_bo_wait_for_idle(next->buf_handle, 0, &busy);
370 if (r || busy)
371 break;
372
373 LIST_DEL(&next->list_node);
374 amdgpu_cs_free_to_ib_pool(context, next);
375 }
376
377 return;
378}
379
380/**
381 * Garbage collector on a pending IB list
382 *
383 * \param context - \c [in] GPU Context
384 * \param ip - \c [in] hw ip block
385 * \param ip_instance - \c [in] instance of the hw ip block
386 * \param ring - \c [in] Ring of hw ip
387 * \param expired_fence - \c [in] fence expired
388 *
389 * \return N/A
390*/
391static void amdgpu_cs_pending_gc(amdgpu_context_handle context,
392 unsigned ip, unsigned ip_instance,
393 uint32_t ring,
394 uint64_t expired_fence)
395{
396 pthread_mutex_lock(&context->pendings_mutex);
397 amdgpu_cs_pending_gc_not_safe(context, ip, ip_instance, ring,
398 expired_fence);
399 pthread_mutex_unlock(&context->pendings_mutex);
400 return;
401}
402
403/**
404 * Garbage collector on all pending IB lists
405 *
406 * \param context - \c [in] GPU Context
407 *
408 * \return N/A
409*/
410static void amdgpu_cs_all_pending_gc(amdgpu_context_handle context)
411{
412 unsigned ip, inst;
413 uint32_t ring;
414 uint64_t expired_fences[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
415
416 pthread_mutex_lock(&context->sequence_mutex);
417 for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++)
418 for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++)
419 for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++)
420 expired_fences[ip][inst][ring] =
421 context->expired_fences[ip][inst][ring];
422 pthread_mutex_unlock(&context->sequence_mutex);
423
424 pthread_mutex_lock(&context->pendings_mutex);
425 for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++)
426 for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++)
427 for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++)
428 amdgpu_cs_pending_gc_not_safe(context, ip, inst, ring,
429 expired_fences[ip][inst][ring]);
430 pthread_mutex_unlock(&context->pendings_mutex);
431}
432
433/**
434 * Allocate an IB buffer
435 * If there is no free IB buffer in pools, create one.
436 *
437 * \param dev - \c [in] Device handle
438 * \param context - \c [in] GPU Context
439 * \param ib_size - \c [in] Size of allocation
440 * \param ib - \c [out] return the pointer to the allocated IB buffer
441 *
442 * \return 0 on success otherwise POSIX Error code
443*/
444static int amdgpu_cs_alloc_ib_local(amdgpu_device_handle dev,
445 amdgpu_context_handle context,
446 enum amdgpu_cs_ib_size ib_size,
447 amdgpu_ib_handle *ib)
448{
449 int r;
450
451 r = amdgpu_cs_alloc_from_ib_pool(dev, context, ib_size, ib);
452 if (!r)
453 return r;
454
455 amdgpu_cs_all_pending_gc(context);
456
457 /* Retry to allocate from free IB pools after garbage collector. */
458 r = amdgpu_cs_alloc_from_ib_pool(dev, context, ib_size, ib);
459 if (!r)
460 return r;
461
462 /* There is no suitable IB in free pools. Create one. */
463 r = amdgpu_cs_create_ib(dev, context, ib_size, ib);
464 return r;
465}
466
467int amdgpu_cs_alloc_ib(amdgpu_device_handle dev,
468 amdgpu_context_handle context,
469 enum amdgpu_cs_ib_size ib_size,
470 struct amdgpu_cs_ib_alloc_result *output)
471{
472 int r;
473 amdgpu_ib_handle ib;
474
475 if (NULL == dev)
476 return -EINVAL;
477 if (NULL == context)
478 return -EINVAL;
479 if (NULL == output)
480 return -EINVAL;
481 if (ib_size >= AMDGPU_CS_IB_SIZE_NUM)
482 return -EINVAL;
483
484 r = amdgpu_cs_alloc_ib_local(dev, context, ib_size, &ib);
485 if (!r) {
486 output->handle = ib;
487 output->cpu = ib->cpu;
488 output->mc_address = ib->virtual_mc_base_address;
489 }
490
491 return r;
492}
493
494int amdgpu_cs_free_ib(amdgpu_device_handle dev,
495 amdgpu_context_handle context,
496 amdgpu_ib_handle handle)
497{
498 if (NULL == dev)
499 return -EINVAL;
500 if (NULL == context)
501 return -EINVAL;
502 if (NULL == handle)
503 return -EINVAL;
504
505 pthread_mutex_lock(&context->pendings_mutex);
506 LIST_ADD(&handle->list_node, &context->freed);
507 pthread_mutex_unlock(&context->pendings_mutex);
508 return 0;
509}
510
511/**
512 * Create command submission context
513 *
514 * \param dev - \c [in] amdgpu device handle
515 * \param context - \c [out] amdgpu context handle
516 *
517 * \return 0 on success otherwise POSIX Error code
518*/
519int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
520 amdgpu_context_handle *context)
521{
522 struct amdgpu_context *gpu_context;
523 union drm_amdgpu_ctx args;
524 int r;
525
526 if (NULL == dev)
527 return -EINVAL;
528 if (NULL == context)
529 return -EINVAL;
530
531 gpu_context = calloc(1, sizeof(struct amdgpu_context));
532 if (NULL == gpu_context)
533 return -ENOMEM;
534
535 r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
536 if (r)
537 goto error_mutex;
538
539 r = amdgpu_cs_init_ib_pool(gpu_context);
540 if (r)
541 goto error_pool;
542
543 r = amdgpu_cs_init_pendings(gpu_context);
544 if (r)
545 goto error_pendings;
546
547 r = amdgpu_cs_alloc_ib_local(dev, gpu_context, amdgpu_cs_ib_size_4K,
548 &gpu_context->fence_ib);
549 if (r)
550 goto error_fence_ib;
551
552
553 memset(&args, 0, sizeof(args));
554 args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
555 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
556 if (r)
557 goto error_kernel;
558
559 gpu_context->id = args.out.alloc.ctx_id;
560 *context = (amdgpu_context_handle)gpu_context;
561
562 return 0;
563
564error_kernel:
565 amdgpu_cs_free_ib(dev, gpu_context, gpu_context->fence_ib);
566
567error_fence_ib:
568 amdgpu_cs_destroy_pendings(dev, gpu_context);
569
570error_pendings:
571 amdgpu_cs_destroy_ib_pool(dev, gpu_context);
572
573error_pool:
574 pthread_mutex_destroy(&gpu_context->sequence_mutex);
575
576error_mutex:
577 free(gpu_context);
578 return r;
579}
580
581/**
582 * Release command submission context
583 *
584 * \param dev - \c [in] amdgpu device handle
585 * \param context - \c [in] amdgpu context handle
586 *
587 * \return 0 on success otherwise POSIX Error code
588*/
589int amdgpu_cs_ctx_free(amdgpu_device_handle dev,
590 amdgpu_context_handle context)
591{
592 int r;
593 union drm_amdgpu_ctx args;
594
595 if (NULL == dev)
596 return -EINVAL;
597 if (NULL == context)
598 return -EINVAL;
599
600 r = amdgpu_cs_free_ib(dev, context, context->fence_ib);
601 if (r)
602 return r;
603
604 r = amdgpu_cs_destroy_pendings(dev, context);
605 if (r)
606 return r;
607
608 r = amdgpu_cs_destroy_ib_pool(dev, context);
609 if (r)
610 return r;
611
612 pthread_mutex_destroy(&context->sequence_mutex);
613
614 /* now deal with kernel side */
615 memset(&args, 0, sizeof(args));
616 args.in.op = AMDGPU_CTX_OP_FREE_CTX;
617 args.in.ctx_id = context->id;
618 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
619
620 free(context);
621
622 return r;
623}
624
625static int amdgpu_cs_create_bo_list(amdgpu_device_handle dev,
626 amdgpu_context_handle context,
627 struct amdgpu_cs_request *request,
628 amdgpu_ib_handle fence_ib,
629 uint32_t *handle)
630{
631 struct drm_amdgpu_bo_list_entry *list;
632 union drm_amdgpu_bo_list args;
633 unsigned num_resources;
634 unsigned i;
635 int r;
636
637 num_resources = request->number_of_resources;
638 if (fence_ib)
639 ++num_resources;
640
641 list = alloca(sizeof(struct drm_amdgpu_bo_list_entry) * num_resources);
642
643 memset(&args, 0, sizeof(args));
644 args.in.operation = AMDGPU_BO_LIST_OP_CREATE;
645 args.in.bo_number = num_resources;
646 args.in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry);
647 args.in.bo_info_ptr = (uint64_t)(uintptr_t)list;
648
649 for (i = 0; i < request->number_of_resources; i++) {
650 list[i].bo_handle = request->resources[i]->handle;
651 if (request->resource_flags)
652 list[i].bo_priority = request->resource_flags[i];
653 else
654 list[i].bo_priority = 0;
655 }
656
657 if (fence_ib)
658 list[i].bo_handle = fence_ib->buf_handle->handle;
659
660 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_BO_LIST,
661 &args, sizeof(args));
662 if (r)
663 return r;
664
665 *handle = args.out.list_handle;
666 return 0;
667}
668
669static int amdgpu_cs_free_bo_list(amdgpu_device_handle dev, uint32_t handle)
670{
671 union drm_amdgpu_bo_list args;
672 int r;
673
674 memset(&args, 0, sizeof(args));
675 args.in.operation = AMDGPU_BO_LIST_OP_DESTROY;
676 args.in.list_handle = handle;
677
678 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_BO_LIST,
679 &args, sizeof(args));
680
681 return r;
682}
683
684static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring)
685{
686 return ip * AMDGPU_CS_MAX_RINGS + ring;
687}
688
689/**
690 * Submit command to kernel DRM
691 * \param dev - \c [in] Device handle
692 * \param context - \c [in] GPU Context
693 * \param ibs_request - \c [in] Pointer to submission requests
694 * \param fence - \c [out] return fence for this submission
695 *
696 * \return 0 on success otherwise POSIX Error code
697 * \sa amdgpu_cs_submit()
698*/
699static int amdgpu_cs_submit_one(amdgpu_device_handle dev,
700 amdgpu_context_handle context,
701 struct amdgpu_cs_request *ibs_request,
702 uint64_t *fence)
703{
704 int r;
705 uint32_t i, size;
706 union drm_amdgpu_cs cs;
707 uint64_t *chunk_array;
708 struct drm_amdgpu_cs_chunk *chunks;
709 struct drm_amdgpu_cs_chunk_data *chunk_data;
710
711 if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
712 return -EINVAL;
713 if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS)
714 return -EINVAL;
715 if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
716 return -EINVAL;
717
718 size = (ibs_request->number_of_ibs + 1) * ((sizeof(uint64_t) +
719 sizeof(struct drm_amdgpu_cs_chunk) +
720 sizeof(struct drm_amdgpu_cs_chunk_data)) +
721 ibs_request->number_of_resources + 1) *
722 sizeof(struct drm_amdgpu_bo_list_entry);
723 chunk_array = malloc(size);
724 if (NULL == chunk_array)
725 return -ENOMEM;
726 memset(chunk_array, 0, size);
727
728 chunks = (struct drm_amdgpu_cs_chunk *)(chunk_array + ibs_request->number_of_ibs + 1);
729 chunk_data = (struct drm_amdgpu_cs_chunk_data *)(chunks + ibs_request->number_of_ibs + 1);
730
731 memset(&cs, 0, sizeof(cs));
732 cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
733 cs.in.ctx_id = context->id;
734 cs.in.num_chunks = ibs_request->number_of_ibs;
735 /* IB chunks */
736 for (i = 0; i < ibs_request->number_of_ibs; i++) {
737 struct amdgpu_cs_ib_info *ib;
738 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
739 chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
740 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
741 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
742
743 ib = &ibs_request->ibs[i];
744
745 chunk_data[i].ib_data.handle = ib->ib_handle->buf_handle->handle;
746 chunk_data[i].ib_data.va_start = ib->ib_handle->virtual_mc_base_address;
747 chunk_data[i].ib_data.ib_bytes = ib->size * 4;
748 chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
749 chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
750 chunk_data[i].ib_data.ring = ibs_request->ring;
751
752 if (ib->flags & AMDGPU_CS_GFX_IB_CE)
753 chunk_data[i].ib_data.flags = AMDGPU_IB_FLAG_CE;
754 }
755
756 r = amdgpu_cs_create_bo_list(dev, context, ibs_request, NULL,
757 &cs.in.bo_list_handle);
758 if (r)
759 goto error_unlock;
760
761 pthread_mutex_lock(&context->sequence_mutex);
762
763 if (ibs_request->ip_type != AMDGPU_HW_IP_UVD &&
764 ibs_request->ip_type != AMDGPU_HW_IP_VCE) {
765 i = cs.in.num_chunks++;
766
767 /* fence chunk */
768 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
769 chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
770 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
771 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
772
773 /* fence bo handle */
774 chunk_data[i].fence_data.handle = context->fence_ib->buf_handle->handle;
775 /* offset */
776 chunk_data[i].fence_data.offset = amdgpu_cs_fence_index(
777 ibs_request->ip_type, ibs_request->ring);
778 chunk_data[i].fence_data.offset *= sizeof(uint64_t);
779 }
780
781 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CS,
782 &cs, sizeof(cs));
783 if (r)
784 goto error_unlock;
785
786
787 /* Hold sequence_mutex while adding record to the pending list.
788 So the pending list is a sorted list according to fence value. */
789
790 for (i = 0; i < ibs_request->number_of_ibs; i++) {
791 struct amdgpu_cs_ib_info *ib;
792
793 ib = &ibs_request->ibs[i];
794 if (ib->flags & AMDGPU_CS_REUSE_IB)
795 continue;
796
797 ib->ib_handle->cs_handle = cs.out.handle;
798
799 amdgpu_cs_add_pending(context, ib->ib_handle, ibs_request->ip_type,
800 ibs_request->ip_instance,
801 ibs_request->ring);
802 }
803
804 *fence = cs.out.handle;
805
806 pthread_mutex_unlock(&context->sequence_mutex);
807
808 r = amdgpu_cs_free_bo_list(dev, cs.in.bo_list_handle);
809 if (r)
810 goto error_free;
811
812 free(chunk_array);
813 return 0;
814
815error_unlock:
816 pthread_mutex_unlock(&context->sequence_mutex);
817
818error_free:
819 free(chunk_array);
820 return r;
821}
822
823int amdgpu_cs_submit(amdgpu_device_handle dev,
824 amdgpu_context_handle context,
825 uint64_t flags,
826 struct amdgpu_cs_request *ibs_request,
827 uint32_t number_of_requests,
828 uint64_t *fences)
829{
830 int r;
831 uint32_t i;
832
833 if (NULL == dev)
834 return -EINVAL;
835 if (NULL == context)
836 return -EINVAL;
837 if (NULL == ibs_request)
838 return -EINVAL;
839 if (NULL == fences)
840 return -EINVAL;
841
842 r = 0;
843 for (i = 0; i < number_of_requests; i++) {
844 r = amdgpu_cs_submit_one(dev, context, ibs_request, fences);
845 if (r)
846 break;
847 fences++;
848 ibs_request++;
849 }
850
851 return r;
852}
853
854/**
855 * Calculate absolute timeout.
856 *
857 * \param timeout - \c [in] timeout in nanoseconds.
858 *
859 * \return absolute timeout in nanoseconds
860*/
861uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout)
862{
863 int r;
864
865 if (timeout != AMDGPU_TIMEOUT_INFINITE) {
866 struct timespec current;
867 r = clock_gettime(CLOCK_MONOTONIC, &current);
868 if (r)
869 return r;
870
871 timeout += ((uint64_t)current.tv_sec) * 1000000000ull;
872 timeout += current.tv_nsec;
873 }
874 return timeout;
875}
876
877static int amdgpu_ioctl_wait_cs(amdgpu_device_handle dev,
878 unsigned ip,
879 unsigned ip_instance,
880 uint32_t ring,
881 uint64_t handle,
882 uint64_t timeout_ns,
883 bool *busy)
884{
885 union drm_amdgpu_wait_cs args;
886 int r;
887
888 memset(&args, 0, sizeof(args));
889 args.in.handle = handle;
890 args.in.ip_type = ip;
891 args.in.ip_instance = ip_instance;
892 args.in.ring = ring;
893 args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
894
895 /* Handle errors manually here because of timeout */
896 r = ioctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args);
897 if (r == -1 && (errno == EINTR || errno == EAGAIN)) {
898 *busy = true;
899 return 0;
900 } else if (r)
901 return -errno;
902
903 *busy = args.out.status;
904 return 0;
905}
906
907int amdgpu_cs_query_fence_status(amdgpu_device_handle dev,
908 struct amdgpu_cs_query_fence *fence,
909 uint32_t *expired)
910{
911 amdgpu_context_handle context;
912 uint64_t *signaled_fence;
913 uint64_t *expired_fence;
914 unsigned ip_type, ip_instance;
915 uint32_t ring;
916 bool busy = true;
917 int r;
918
919 if (NULL == dev)
920 return -EINVAL;
921 if (NULL == fence)
922 return -EINVAL;
923 if (NULL == expired)
924 return -EINVAL;
925 if (NULL == fence->context)
926 return -EINVAL;
927 if (fence->ip_type >= AMDGPU_HW_IP_NUM)
928 return -EINVAL;
929 if (fence->ring >= AMDGPU_CS_MAX_RINGS)
930 return -EINVAL;
931
932 context = fence->context;
933 ip_type = fence->ip_type;
934 ip_instance = fence->ip_instance;
935 ring = fence->ring;
936 signaled_fence = context->fence_ib->cpu;
937 signaled_fence += amdgpu_cs_fence_index(ip_type, ring);
938 expired_fence = &context->expired_fences[ip_type][ip_instance][ring];
939 *expired = false;
940
941 pthread_mutex_lock(&context->sequence_mutex);
942 if (fence->fence <= *expired_fence) {
943 /* This fence value is expired already. */
944 pthread_mutex_unlock(&context->sequence_mutex);
945 *expired = true;
946 return 0;
947 }
948
949 if (fence->fence <= *signaled_fence) {
950 /* This fence value is signaled already. */
951 *expired_fence = *signaled_fence;
952 pthread_mutex_unlock(&context->sequence_mutex);
953 amdgpu_cs_pending_gc(context, ip_type, ip_instance, ring,
954 fence->fence);
955 *expired = true;
956 return 0;
957 }
958
959 pthread_mutex_unlock(&context->sequence_mutex);
960
961 r = amdgpu_ioctl_wait_cs(dev, ip_type, ip_instance, ring,
962 fence->fence, fence->timeout_ns, &busy);
963 if (!r && !busy) {
964 *expired = true;
965 pthread_mutex_lock(&context->sequence_mutex);
966 /* The thread doesn't hold sequence_mutex. Other thread could
967 update *expired_fence already. Check whether there is a
968 newerly expired fence. */
969 if (fence->fence > *expired_fence) {
970 *expired_fence = fence->fence;
971 pthread_mutex_unlock(&context->sequence_mutex);
972 amdgpu_cs_pending_gc(context, ip_type, ip_instance,
973 ring, fence->fence);
974 } else {
975 pthread_mutex_unlock(&context->sequence_mutex);
976 }
977 }
978
979 return r;
980}
981
diff --git a/amdgpu/amdgpu_device.c b/amdgpu/amdgpu_device.c
new file mode 100644
index 00000000..c610fd38
--- /dev/null
+++ b/amdgpu/amdgpu_device.c
@@ -0,0 +1,241 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24/**
25 * \file amdgpu_device.c
26 *
27 * Implementation of functions for AMD GPU device
28 *
29 *
30 */
31
32#include <sys/stat.h>
33#include <errno.h>
34#include <string.h>
35#include <stdio.h>
36#include <stdlib.h>
37
38#include "xf86drm.h"
39#include "amdgpu_drm.h"
40#include "amdgpu_internal.h"
41#include "util_hash_table.h"
42
43#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
44#define UINT_TO_PTR(x) ((void *)((intptr_t)(x)))
45#define RENDERNODE_MINOR_MASK 0xff7f
46
47pthread_mutex_t fd_mutex = PTHREAD_MUTEX_INITIALIZER;
48static struct util_hash_table *fd_tab;
49
50static unsigned handle_hash(void *key)
51{
52 return PTR_TO_UINT(key);
53}
54
55static int handle_compare(void *key1, void *key2)
56{
57 return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
58}
59
60static unsigned fd_hash(void *key)
61{
62 int fd = PTR_TO_UINT(key);
63 struct stat stat;
64 fstat(fd, &stat);
65
66 if (!S_ISCHR(stat.st_mode))
67 return stat.st_dev ^ stat.st_ino;
68 else
69 return stat.st_dev ^ (stat.st_rdev & RENDERNODE_MINOR_MASK);
70}
71
72static int fd_compare(void *key1, void *key2)
73{
74 int fd1 = PTR_TO_UINT(key1);
75 int fd2 = PTR_TO_UINT(key2);
76 struct stat stat1, stat2;
77 fstat(fd1, &stat1);
78 fstat(fd2, &stat2);
79
80 if (!S_ISCHR(stat1.st_mode) || !S_ISCHR(stat2.st_mode))
81 return stat1.st_dev != stat2.st_dev ||
82 stat1.st_ino != stat2.st_ino;
83 else
84 return major(stat1.st_rdev) != major(stat2.st_rdev) ||
85 (minor(stat1.st_rdev) & RENDERNODE_MINOR_MASK) !=
86 (minor(stat2.st_rdev) & RENDERNODE_MINOR_MASK);
87}
88
89/**
90* Get the authenticated form fd,
91*
92* \param fd - \c [in] File descriptor for AMD GPU device
93* \param auth - \c [out] Pointer to output the fd is authenticated or not
94* A render node fd, output auth = 0
95* A legacy fd, get the authenticated for compatibility root
96*
97* \return 0 on success\n
98* >0 - AMD specific error code\n
99* <0 - Negative POSIX Error code
100*/
101static int amdgpu_get_auth(int fd, int *auth)
102{
103 int r = 0;
104 drm_client_t client;
105
106 if (drmGetNodeTypeFromFd(fd) == DRM_NODE_RENDER)
107 *auth = 0;
108 else {
109 client.idx = 0;
110 r = drmIoctl(fd, DRM_IOCTL_GET_CLIENT, &client);
111 if (!r)
112 *auth = client.auth;
113 }
114 return r;
115}
116
117int amdgpu_device_initialize(int fd,
118 uint32_t *major_version,
119 uint32_t *minor_version,
120 amdgpu_device_handle *device_handle)
121{
122 struct amdgpu_device *dev;
123 drmVersionPtr version;
124 int r;
125 int flag_auth = 0;
126 int flag_authexist=0;
127 uint32_t accel_working;
128
129 *device_handle = NULL;
130
131 pthread_mutex_lock(&fd_mutex);
132 if (!fd_tab)
133 fd_tab = util_hash_table_create(fd_hash, fd_compare);
134 r = amdgpu_get_auth(fd, &flag_auth);
135 if (r) {
136 pthread_mutex_unlock(&fd_mutex);
137 return r;
138 }
139 dev = util_hash_table_get(fd_tab, UINT_TO_PTR(fd));
140 if (dev) {
141 r = amdgpu_get_auth(dev->fd, &flag_authexist);
142 if (r) {
143 pthread_mutex_unlock(&fd_mutex);
144 return r;
145 }
146 if ((flag_auth) && (!flag_authexist)) {
147 dev->flink_fd = fd;
148 }
149 *major_version = dev->major_version;
150 *minor_version = dev->minor_version;
151 amdgpu_device_reference(device_handle, dev);
152 pthread_mutex_unlock(&fd_mutex);
153 return 0;
154 }
155
156 dev = calloc(1, sizeof(struct amdgpu_device));
157 if (!dev) {
158 pthread_mutex_unlock(&fd_mutex);
159 return -ENOMEM;
160 }
161
162 atomic_set(&dev->refcount, 1);
163
164 version = drmGetVersion(fd);
165 if (version->version_major != 3) {
166 fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
167 "only compatible with 3.x.x.\n",
168 __func__,
169 version->version_major,
170 version->version_minor,
171 version->version_patchlevel);
172 drmFreeVersion(version);
173 r = -EBADF;
174 goto cleanup;
175 }
176
177 dev->fd = fd;
178 dev->flink_fd = fd;
179 dev->major_version = version->version_major;
180 dev->minor_version = version->version_minor;
181 drmFreeVersion(version);
182
183 dev->bo_flink_names = util_hash_table_create(handle_hash,
184 handle_compare);
185 dev->bo_handles = util_hash_table_create(handle_hash, handle_compare);
186 dev->bo_vas = util_hash_table_create(handle_hash, handle_compare);
187 pthread_mutex_init(&dev->bo_table_mutex, NULL);
188
189 /* Check if acceleration is working. */
190 r = amdgpu_query_info(dev, AMDGPU_INFO_ACCEL_WORKING, 4, &accel_working);
191 if (r)
192 goto cleanup;
193 if (!accel_working) {
194 r = -EBADF;
195 goto cleanup;
196 }
197
198 r = amdgpu_query_gpu_info_init(dev);
199 if (r)
200 goto cleanup;
201
202 amdgpu_vamgr_init(dev);
203
204 *major_version = dev->major_version;
205 *minor_version = dev->minor_version;
206 *device_handle = dev;
207 util_hash_table_set(fd_tab, UINT_TO_PTR(fd), dev);
208 pthread_mutex_unlock(&fd_mutex);
209
210 return 0;
211
212cleanup:
213 free(dev);
214 pthread_mutex_unlock(&fd_mutex);
215 return r;
216}
217
218void amdgpu_device_free_internal(amdgpu_device_handle dev)
219{
220 util_hash_table_destroy(dev->bo_flink_names);
221 util_hash_table_destroy(dev->bo_handles);
222 util_hash_table_destroy(dev->bo_vas);
223 pthread_mutex_destroy(&dev->bo_table_mutex);
224 pthread_mutex_destroy(&(dev->vamgr.bo_va_mutex));
225 util_hash_table_remove(fd_tab, UINT_TO_PTR(dev->fd));
226 free(dev);
227}
228
229int amdgpu_device_deinitialize(amdgpu_device_handle dev)
230{
231 amdgpu_device_reference(&dev, NULL);
232 return 0;
233}
234
235void amdgpu_device_reference(struct amdgpu_device **dst,
236 struct amdgpu_device *src)
237{
238 if (update_references(&(*dst)->refcount, &src->refcount))
239 amdgpu_device_free_internal(*dst);
240 *dst = src;
241}
diff --git a/amdgpu/amdgpu_gpu_info.c b/amdgpu/amdgpu_gpu_info.c
new file mode 100644
index 00000000..0b777316
--- /dev/null
+++ b/amdgpu/amdgpu_gpu_info.c
@@ -0,0 +1,275 @@
1/*
2 * Copyright © 2014 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24#include <errno.h>
25#include <string.h>
26
27#include "amdgpu.h"
28#include "amdgpu_drm.h"
29#include "amdgpu_internal.h"
30#include "xf86drm.h"
31
32int amdgpu_query_info(amdgpu_device_handle dev, unsigned info_id,
33 unsigned size, void *value)
34{
35 struct drm_amdgpu_info request;
36
37 memset(&request, 0, sizeof(request));
38 request.return_pointer = (uintptr_t)value;
39 request.return_size = size;
40 request.query = info_id;
41
42 return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
43 sizeof(struct drm_amdgpu_info));
44}
45
46int amdgpu_query_crtc_from_id(amdgpu_device_handle dev, unsigned id,
47 int32_t *result)
48{
49 struct drm_amdgpu_info request;
50
51 memset(&request, 0, sizeof(request));
52 request.return_pointer = (uintptr_t)result;
53 request.return_size = sizeof(*result);
54 request.query = AMDGPU_INFO_CRTC_FROM_ID;
55 request.mode_crtc.id = id;
56
57 return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
58 sizeof(struct drm_amdgpu_info));
59}
60
61int amdgpu_read_mm_registers(amdgpu_device_handle dev, unsigned dword_offset,
62 unsigned count, uint32_t instance, uint32_t flags,
63 uint32_t *values)
64{
65 struct drm_amdgpu_info request;
66
67 memset(&request, 0, sizeof(request));
68 request.return_pointer = (uintptr_t)values;
69 request.return_size = count * sizeof(uint32_t);
70 request.query = AMDGPU_INFO_READ_MMR_REG;
71 request.read_mmr_reg.dword_offset = dword_offset;
72 request.read_mmr_reg.count = count;
73 request.read_mmr_reg.instance = instance;
74 request.read_mmr_reg.flags = flags;
75
76 return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
77 sizeof(struct drm_amdgpu_info));
78}
79
80int amdgpu_query_hw_ip_count(amdgpu_device_handle dev, unsigned type,
81 uint32_t *count)
82{
83 struct drm_amdgpu_info request;
84
85 memset(&request, 0, sizeof(request));
86 request.return_pointer = (uintptr_t)count;
87 request.return_size = sizeof(*count);
88 request.query = AMDGPU_INFO_HW_IP_COUNT;
89 request.query_hw_ip.type = type;
90
91 return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
92 sizeof(struct drm_amdgpu_info));
93}
94
95int amdgpu_query_hw_ip_info(amdgpu_device_handle dev, unsigned type,
96 unsigned ip_instance,
97 struct drm_amdgpu_info_hw_ip *info)
98{
99 struct drm_amdgpu_info request;
100
101 memset(&request, 0, sizeof(request));
102 request.return_pointer = (uintptr_t)info;
103 request.return_size = sizeof(*info);
104 request.query = AMDGPU_INFO_HW_IP_INFO;
105 request.query_hw_ip.type = type;
106 request.query_hw_ip.ip_instance = ip_instance;
107
108 return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
109 sizeof(struct drm_amdgpu_info));
110}
111
112int amdgpu_query_firmware_version(amdgpu_device_handle dev, unsigned fw_type,
113 unsigned ip_instance, unsigned index,
114 uint32_t *version, uint32_t *feature)
115{
116 struct drm_amdgpu_info request;
117 struct drm_amdgpu_info_firmware firmware;
118 int r;
119
120 memset(&request, 0, sizeof(request));
121 request.return_pointer = (uintptr_t)&firmware;
122 request.return_size = sizeof(firmware);
123 request.query = AMDGPU_INFO_FW_VERSION;
124 request.query_fw.fw_type = fw_type;
125 request.query_fw.ip_instance = ip_instance;
126 request.query_fw.index = index;
127
128 r = drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
129 sizeof(struct drm_amdgpu_info));
130 if (r)
131 return r;
132
133 *version = firmware.ver;
134 *feature = firmware.feature;
135 return 0;
136}
137
138int amdgpu_query_gpu_info_init(amdgpu_device_handle dev)
139{
140 int r, i;
141
142 r = amdgpu_query_info(dev, AMDGPU_INFO_DEV_INFO, sizeof(dev->dev_info),
143 &dev->dev_info);
144 if (r)
145 return r;
146
147 dev->info.asic_id = dev->dev_info.device_id;
148 dev->info.chip_rev = dev->dev_info.chip_rev;
149 dev->info.chip_external_rev = dev->dev_info.external_rev;
150 dev->info.family_id = dev->dev_info.family;
151 dev->info.max_engine_clk = dev->dev_info.max_engine_clock;
152 dev->info.gpu_counter_freq = dev->dev_info.gpu_counter_freq;
153 dev->info.enabled_rb_pipes_mask = dev->dev_info.enabled_rb_pipes_mask;
154 dev->info.rb_pipes = dev->dev_info.num_rb_pipes;
155 dev->info.ids_flags = dev->dev_info.ids_flags;
156 dev->info.num_hw_gfx_contexts = dev->dev_info.num_hw_gfx_contexts;
157 dev->info.num_shader_engines = dev->dev_info.num_shader_engines;
158 dev->info.num_shader_arrays_per_engine =
159 dev->dev_info.num_shader_arrays_per_engine;
160
161 for (i = 0; i < (int)dev->info.num_shader_engines; i++) {
162 unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) |
163 (AMDGPU_INFO_MMR_SH_INDEX_MASK <<
164 AMDGPU_INFO_MMR_SH_INDEX_SHIFT);
165
166 r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0,
167 &dev->info.backend_disable[i]);
168 if (r)
169 return r;
170 /* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */
171 dev->info.backend_disable[i] =
172 (dev->info.backend_disable[i] >> 16) & 0xff;
173
174 r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0,
175 &dev->info.pa_sc_raster_cfg[i]);
176 if (r)
177 return r;
178
179 r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0,
180 &dev->info.pa_sc_raster_cfg1[i]);
181 if (r)
182 return r;
183 }
184
185 r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0,
186 dev->info.gb_tile_mode);
187 if (r)
188 return r;
189
190 r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0,
191 dev->info.gb_macro_tile_mode);
192 if (r)
193 return r;
194
195 r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0,
196 &dev->info.gb_addr_cfg);
197 if (r)
198 return r;
199
200 r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0,
201 &dev->info.mc_arb_ramcfg);
202 if (r)
203 return r;
204
205 dev->info.cu_active_number = dev->dev_info.cu_active_number;
206 dev->info.cu_ao_mask = dev->dev_info.cu_ao_mask;
207 memcpy(&dev->info.cu_bitmap[0][0], &dev->dev_info.cu_bitmap[0][0], sizeof(dev->info.cu_bitmap));
208
209 /* TODO: info->max_quad_shader_pipes is not set */
210 /* TODO: info->avail_quad_shader_pipes is not set */
211 /* TODO: info->cache_entries_per_quad_pipe is not set */
212 /* TODO: info->active_rb_pipes is not set */
213 return 0;
214}
215
216int amdgpu_query_gpu_info(amdgpu_device_handle dev,
217 struct amdgpu_gpu_info *info)
218{
219 /* Get ASIC info*/
220 *info = dev->info;
221
222 return 0;
223}
224
225int amdgpu_query_heap_info(amdgpu_device_handle dev,
226 uint32_t heap,
227 uint32_t flags,
228 struct amdgpu_heap_info *info)
229{
230 struct drm_amdgpu_info_vram_gtt vram_gtt_info;
231 int r;
232
233 r = amdgpu_query_info(dev, AMDGPU_INFO_VRAM_GTT,
234 sizeof(vram_gtt_info), &vram_gtt_info);
235 if (r)
236 return r;
237
238 /* Get heap information */
239 switch (heap) {
240 case AMDGPU_GEM_DOMAIN_VRAM:
241 /* query visible only vram heap */
242 if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
243 info->heap_size = vram_gtt_info.vram_cpu_accessible_size;
244 else /* query total vram heap */
245 info->heap_size = vram_gtt_info.vram_size;
246
247 info->max_allocation = vram_gtt_info.vram_cpu_accessible_size;
248
249 if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
250 r = amdgpu_query_info(dev, AMDGPU_INFO_VIS_VRAM_USAGE,
251 sizeof(info->heap_usage),
252 &info->heap_usage);
253 else
254 r = amdgpu_query_info(dev, AMDGPU_INFO_VRAM_USAGE,
255 sizeof(info->heap_usage),
256 &info->heap_usage);
257 if (r)
258 return r;
259 break;
260 case AMDGPU_GEM_DOMAIN_GTT:
261 info->heap_size = vram_gtt_info.gtt_size;
262 info->max_allocation = vram_gtt_info.vram_cpu_accessible_size;
263
264 r = amdgpu_query_info(dev, AMDGPU_INFO_GTT_USAGE,
265 sizeof(info->heap_usage),
266 &info->heap_usage);
267 if (r)
268 return r;
269 break;
270 default:
271 return -EINVAL;
272 }
273
274 return 0;
275}
diff --git a/amdgpu/amdgpu_internal.h b/amdgpu/amdgpu_internal.h
new file mode 100644
index 00000000..8346f16b
--- /dev/null
+++ b/amdgpu/amdgpu_internal.h
@@ -0,0 +1,208 @@
1/*
2 * Copyright © 2014 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24#ifndef _AMDGPU_INTERNAL_H_
25#define _AMDGPU_INTERNAL_H_
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include <assert.h>
32#include <pthread.h>
33#include "xf86atomic.h"
34#include "amdgpu.h"
35#include "util_double_list.h"
36
37#define AMDGPU_CS_MAX_RINGS 8
38
39struct amdgpu_bo_va_hole {
40 struct list_head list;
41 uint64_t offset;
42 uint64_t size;
43};
44
45struct amdgpu_bo_va_mgr {
46 /* the start virtual address */
47 uint64_t va_offset;
48 struct list_head va_holes;
49 pthread_mutex_t bo_va_mutex;
50 uint32_t va_alignment;
51};
52
53struct amdgpu_device {
54 atomic_t refcount;
55 int fd;
56 int flink_fd;
57 unsigned major_version;
58 unsigned minor_version;
59
60 /** List of buffer handles. Protected by bo_table_mutex. */
61 struct util_hash_table *bo_handles;
62 /** List of buffer GEM flink names. Protected by bo_table_mutex. */
63 struct util_hash_table *bo_flink_names;
64 /** List of buffer virtual memory ranges. Protected by bo_table_mutex. */
65 struct util_hash_table *bo_vas;
66 /** This protects all hash tables. */
67 pthread_mutex_t bo_table_mutex;
68 struct amdgpu_bo_va_mgr vamgr;
69 struct drm_amdgpu_info_device dev_info;
70 struct amdgpu_gpu_info info;
71};
72
73struct amdgpu_bo {
74 atomic_t refcount;
75 struct amdgpu_device *dev;
76
77 uint64_t alloc_size;
78 uint64_t virtual_mc_base_address;
79
80 uint32_t handle;
81 uint32_t flink_name;
82
83 pthread_mutex_t cpu_access_mutex;
84 void *cpu_ptr;
85 int cpu_map_count;
86};
87
88/*
89 * There are three mutexes.
90 * To avoid deadlock, only hold the mutexes in this order:
91 * sequence_mutex -> pendings_mutex -> pool_mutex.
92*/
93struct amdgpu_context {
94 /** Mutex for accessing fences and to maintain command submissions
95 and pending lists in good sequence. */
96 pthread_mutex_t sequence_mutex;
97 /** Buffer for user fences */
98 struct amdgpu_ib *fence_ib;
99 /** The newest expired fence for the ring of the ip blocks. */
100 uint64_t expired_fences[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
101 /** Mutex for accessing pendings list. */
102 pthread_mutex_t pendings_mutex;
103 /** Pending IBs. */
104 struct list_head pendings[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
105 /** Freed IBs not yet in pool */
106 struct list_head freed;
107 /** Mutex for accessing free ib pool. */
108 pthread_mutex_t pool_mutex;
109 /** Internal free IB pools. */
110 struct list_head ib_pools[AMDGPU_CS_IB_SIZE_NUM];
111 /* context id*/
112 uint32_t id;
113};
114
115struct amdgpu_ib {
116 struct list_head list_node;
117 amdgpu_bo_handle buf_handle;
118 void *cpu;
119 uint64_t virtual_mc_base_address;
120 enum amdgpu_cs_ib_size ib_size;
121 uint64_t cs_handle;
122};
123
124/**
125 * Functions.
126 */
127
128void amdgpu_device_free_internal(amdgpu_device_handle dev);
129
130void amdgpu_bo_free_internal(amdgpu_bo_handle bo);
131
132void amdgpu_vamgr_init(struct amdgpu_device *dev);
133
134uint64_t amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr,
135 uint64_t size, uint64_t alignment);
136
137void amdgpu_vamgr_free_va(struct amdgpu_bo_va_mgr *mgr, uint64_t va,
138 uint64_t size);
139
140int amdgpu_query_gpu_info_init(amdgpu_device_handle dev);
141
142uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout);
143
144/**
145 * Inline functions.
146 */
147
148/**
149 * Increment src and decrement dst as if we were updating references
150 * for an assignment between 2 pointers of some objects.
151 *
152 * \return true if dst is 0
153 */
154static inline bool update_references(atomic_t *dst, atomic_t *src)
155{
156 if (dst != src) {
157 /* bump src first */
158 if (src) {
159 assert(atomic_read(src) > 0);
160 atomic_inc(src);
161 }
162 if (dst) {
163 assert(atomic_read(dst) > 0);
164 return atomic_dec_and_test(dst);
165 }
166 }
167 return false;
168}
169
170/**
171 * Assignment between two amdgpu_bo pointers with reference counting.
172 *
173 * Usage:
174 * struct amdgpu_bo *dst = ... , *src = ...;
175 *
176 * dst = src;
177 * // No reference counting. Only use this when you need to move
178 * // a reference from one pointer to another.
179 *
180 * amdgpu_bo_reference(&dst, src);
181 * // Reference counters are updated. dst is decremented and src is
182 * // incremented. dst is freed if its reference counter is 0.
183 */
184static inline void amdgpu_bo_reference(struct amdgpu_bo **dst,
185 struct amdgpu_bo *src)
186{
187 if (update_references(&(*dst)->refcount, &src->refcount))
188 amdgpu_bo_free_internal(*dst);
189 *dst = src;
190}
191
192/**
193 * Assignment between two amdgpu_device pointers with reference counting.
194 *
195 * Usage:
196 * struct amdgpu_device *dst = ... , *src = ...;
197 *
198 * dst = src;
199 * // No reference counting. Only use this when you need to move
200 * // a reference from one pointer to another.
201 *
202 * amdgpu_device_reference(&dst, src);
203 * // Reference counters are updated. dst is decremented and src is
204 * // incremented. dst is freed if its reference counter is 0.
205 */
206void amdgpu_device_reference(struct amdgpu_device **dst,
207 struct amdgpu_device *src);
208#endif
diff --git a/amdgpu/amdgpu_vamgr.c b/amdgpu/amdgpu_vamgr.c
new file mode 100644
index 00000000..23359122
--- /dev/null
+++ b/amdgpu/amdgpu_vamgr.c
@@ -0,0 +1,169 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24#include <stdlib.h>
25#include <string.h>
26#include "amdgpu.h"
27#include "amdgpu_drm.h"
28#include "amdgpu_internal.h"
29#include "util_math.h"
30
31void amdgpu_vamgr_init(struct amdgpu_device *dev)
32{
33 struct amdgpu_bo_va_mgr *vamgr = &dev->vamgr;
34
35 vamgr->va_offset = dev->dev_info.virtual_address_offset;
36 vamgr->va_alignment = dev->dev_info.virtual_address_alignment;
37
38 list_inithead(&vamgr->va_holes);
39 pthread_mutex_init(&vamgr->bo_va_mutex, NULL);
40}
41
42uint64_t amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr,
43 uint64_t size, uint64_t alignment)
44{
45 struct amdgpu_bo_va_hole *hole, *n;
46 uint64_t offset = 0, waste = 0;
47
48 alignment = MAX2(alignment, mgr->va_alignment);
49 size = ALIGN(size, mgr->va_alignment);
50
51 pthread_mutex_lock(&mgr->bo_va_mutex);
52 /* TODO: using more appropriate way to track the holes */
53 /* first look for a hole */
54 LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
55 offset = hole->offset;
56 waste = offset % alignment;
57 waste = waste ? alignment - waste : 0;
58 offset += waste;
59 if (offset >= (hole->offset + hole->size)) {
60 continue;
61 }
62 if (!waste && hole->size == size) {
63 offset = hole->offset;
64 list_del(&hole->list);
65 free(hole);
66 pthread_mutex_unlock(&mgr->bo_va_mutex);
67 return offset;
68 }
69 if ((hole->size - waste) > size) {
70 if (waste) {
71 n = calloc(1,
72 sizeof(struct amdgpu_bo_va_hole));
73 n->size = waste;
74 n->offset = hole->offset;
75 list_add(&n->list, &hole->list);
76 }
77 hole->size -= (size + waste);
78 hole->offset += size + waste;
79 pthread_mutex_unlock(&mgr->bo_va_mutex);
80 return offset;
81 }
82 if ((hole->size - waste) == size) {
83 hole->size = waste;
84 pthread_mutex_unlock(&mgr->bo_va_mutex);
85 return offset;
86 }
87 }
88
89 offset = mgr->va_offset;
90 waste = offset % alignment;
91 waste = waste ? alignment - waste : 0;
92 if (waste) {
93 n = calloc(1, sizeof(struct amdgpu_bo_va_hole));
94 n->size = waste;
95 n->offset = offset;
96 list_add(&n->list, &mgr->va_holes);
97 }
98 offset += waste;
99 mgr->va_offset += size + waste;
100 pthread_mutex_unlock(&mgr->bo_va_mutex);
101 return offset;
102}
103
104void amdgpu_vamgr_free_va(struct amdgpu_bo_va_mgr *mgr, uint64_t va,
105 uint64_t size)
106{
107 struct amdgpu_bo_va_hole *hole;
108
109 size = ALIGN(size, mgr->va_alignment);
110
111 pthread_mutex_lock(&mgr->bo_va_mutex);
112 if ((va + size) == mgr->va_offset) {
113 mgr->va_offset = va;
114 /* Delete uppermost hole if it reaches the new top */
115 if (!LIST_IS_EMPTY(&mgr->va_holes)) {
116 hole = container_of(mgr->va_holes.next, hole, list);
117 if ((hole->offset + hole->size) == va) {
118 mgr->va_offset = hole->offset;
119 list_del(&hole->list);
120 free(hole);
121 }
122 }
123 } else {
124 struct amdgpu_bo_va_hole *next;
125
126 hole = container_of(&mgr->va_holes, hole, list);
127 LIST_FOR_EACH_ENTRY(next, &mgr->va_holes, list) {
128 if (next->offset < va)
129 break;
130 hole = next;
131 }
132
133 if (&hole->list != &mgr->va_holes) {
134 /* Grow upper hole if it's adjacent */
135 if (hole->offset == (va + size)) {
136 hole->offset = va;
137 hole->size += size;
138 /* Merge lower hole if it's adjacent */
139 if (next != hole
140 && &next->list != &mgr->va_holes
141 && (next->offset + next->size) == va) {
142 next->size += hole->size;
143 list_del(&hole->list);
144 free(hole);
145 }
146 goto out;
147 }
148 }
149
150 /* Grow lower hole if it's adjacent */
151 if (next != hole && &next->list != &mgr->va_holes &&
152 (next->offset + next->size) == va) {
153 next->size += size;
154 goto out;
155 }
156
157 /* FIXME on allocation failure we just lose virtual address space
158 * maybe print a warning
159 */
160 next = calloc(1, sizeof(struct amdgpu_bo_va_hole));
161 if (next) {
162 next->size = size;
163 next->offset = va;
164 list_add(&next->list, &hole->list);
165 }
166 }
167out:
168 pthread_mutex_unlock(&mgr->bo_va_mutex);
169}
diff --git a/amdgpu/libdrm_amdgpu.pc.in b/amdgpu/libdrm_amdgpu.pc.in
new file mode 100644
index 00000000..417865e5
--- /dev/null
+++ b/amdgpu/libdrm_amdgpu.pc.in
@@ -0,0 +1,10 @@
1prefix=@prefix@
2exec_prefix=@exec_prefix@
3libdir=@libdir@
4includedir=@includedir@
5
6Name: libdrm_amdgpu
7Description: Userspace interface to kernel DRM services for amdgpu
8Version: @PACKAGE_VERSION@
9Libs: -L${libdir} -ldrm_amdgpu
10Cflags: -I${includedir} -I${includedir}/libdrm
diff --git a/amdgpu/util_hash.c b/amdgpu/util_hash.c
new file mode 100644
index 00000000..b1e12c4c
--- /dev/null
+++ b/amdgpu/util_hash.c
@@ -0,0 +1,382 @@
1/**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Zack Rusin <zackr@vmware.com>
31 */
32
33#include "util_hash.h"
34
35#include <stdlib.h>
36#include <assert.h>
37
38#define MAX(a, b) ((a > b) ? (a) : (b))
39
40static const int MinNumBits = 4;
41
42static const unsigned char prime_deltas[] = {
43 0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3,
44 1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0
45};
46
47static int primeForNumBits(int numBits)
48{
49 return (1 << numBits) + prime_deltas[numBits];
50}
51
52/* Returns the smallest integer n such that
53 primeForNumBits(n) >= hint.
54*/
55static int countBits(int hint)
56{
57 int numBits = 0;
58 int bits = hint;
59
60 while (bits > 1) {
61 bits >>= 1;
62 numBits++;
63 }
64
65 if (numBits >= (int)sizeof(prime_deltas)) {
66 numBits = sizeof(prime_deltas) - 1;
67 } else if (primeForNumBits(numBits) < hint) {
68 ++numBits;
69 }
70 return numBits;
71}
72
73struct util_node {
74 struct util_node *next;
75 unsigned key;
76 void *value;
77};
78
79struct util_hash_data {
80 struct util_node *fakeNext;
81 struct util_node **buckets;
82 int size;
83 int nodeSize;
84 short userNumBits;
85 short numBits;
86 int numBuckets;
87};
88
89struct util_hash {
90 union {
91 struct util_hash_data *d;
92 struct util_node *e;
93 } data;
94};
95
96static void *util_data_allocate_node(struct util_hash_data *hash)
97{
98 return malloc(hash->nodeSize);
99}
100
101static void util_free_node(struct util_node *node)
102{
103 free(node);
104}<