diff options
author | Michel Thierry | 2015-09-03 09:23:58 -0500 |
---|---|---|
committer | Kristian Høgsberg Kristensen | 2015-12-14 13:31:19 -0600 |
commit | 3350add5cc166b5d0e829377747bf6a94a4b0c09 (patch) | |
tree | a6c3216732feb4c8ceac5c3e0da69a096d4b7851 | |
parent | 7d74a83d22e694b2cd71e40992fd5a970d227e32 (diff) | |
download | external-libdrm-3350add5cc166b5d0e829377747bf6a94a4b0c09.tar.gz external-libdrm-3350add5cc166b5d0e829377747bf6a94a4b0c09.tar.xz external-libdrm-3350add5cc166b5d0e829377747bf6a94a4b0c09.zip |
intel: 48b ppgtt support (EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag)
Gen8+ supports 48-bit virtual addresses, but some objects must always be
allocated inside the 32-bit address range.
In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
General State Heap (GSH) or Instruction State Heap (ISH) must be in a
32-bit range, because the General State Offset and Instruction State Offset
are limited to 32-bits.
The i915 driver has been modified to provide a flag to set when the 4GB
limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
48-bit range will only be used when explicitly requested.
Callers to the existing drm_intel_bo_emit_reloc function should set the
use_48b_address_range flag beforehand, in order to use full ppgtt range.
v2: Make set/clear functions nops on pre-gen8 platforms, and use them
internally in emit_reloc functions (Ben)
s/48BADDRESS/48B_ADDRESS/ (Dave)
v3: Keep set/clear functions internal, no-one needs to use them directly.
v4: Don't set 48bit-support flag in emit reloc, check for ppgtt type
before enabling set/clear function, print full offsets in debug
statements, using port of lower_32_bits and upper_32_bits from linux
kernel (Michał)
References: http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
Reviewed-by: Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Signed-off-by: Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
-rw-r--r-- | intel/intel_bufmgr.c | 11 | ||||
-rw-r--r-- | intel/intel_bufmgr.h | 1 | ||||
-rw-r--r-- | intel/intel_bufmgr_gem.c | 88 | ||||
-rw-r--r-- | intel/intel_bufmgr_priv.h | 14 |
4 files changed, 95 insertions, 19 deletions
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c index 14ea9f9f..0856e600 100644 --- a/intel/intel_bufmgr.c +++ b/intel/intel_bufmgr.c | |||
@@ -293,6 +293,17 @@ drm_intel_bo_madvise(drm_intel_bo *bo, int madv) | |||
293 | } | 293 | } |
294 | 294 | ||
295 | int | 295 | int |
296 | drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) | ||
297 | { | ||
298 | if (bo->bufmgr->bo_use_48b_address_range) { | ||
299 | bo->bufmgr->bo_use_48b_address_range(bo, enable); | ||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | return -ENODEV; | ||
304 | } | ||
305 | |||
306 | int | ||
296 | drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) | 307 | drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) |
297 | { | 308 | { |
298 | return bo->bufmgr->bo_references(bo, target_bo); | 309 | return bo->bufmgr->bo_references(bo, target_bo); |
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h index 95eecb80..a14c78fc 100644 --- a/intel/intel_bufmgr.h +++ b/intel/intel_bufmgr.h | |||
@@ -164,6 +164,7 @@ int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, | |||
164 | int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name); | 164 | int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name); |
165 | int drm_intel_bo_busy(drm_intel_bo *bo); | 165 | int drm_intel_bo_busy(drm_intel_bo *bo); |
166 | int drm_intel_bo_madvise(drm_intel_bo *bo, int madv); | 166 | int drm_intel_bo_madvise(drm_intel_bo *bo, int madv); |
167 | int drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable); | ||
167 | 168 | ||
168 | int drm_intel_bo_disable_reuse(drm_intel_bo *bo); | 169 | int drm_intel_bo_disable_reuse(drm_intel_bo *bo); |
169 | int drm_intel_bo_is_reusable(drm_intel_bo *bo); | 170 | int drm_intel_bo_is_reusable(drm_intel_bo *bo); |
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index 63122d09..1eae898a 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c | |||
@@ -83,6 +83,22 @@ | |||
83 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) | 83 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) |
84 | #define MAX2(A, B) ((A) > (B) ? (A) : (B)) | 84 | #define MAX2(A, B) ((A) > (B) ? (A) : (B)) |
85 | 85 | ||
86 | /** | ||
87 | * upper_32_bits - return bits 32-63 of a number | ||
88 | * @n: the number we're accessing | ||
89 | * | ||
90 | * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress | ||
91 | * the "right shift count >= width of type" warning when that quantity is | ||
92 | * 32-bits. | ||
93 | */ | ||
94 | #define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16)) | ||
95 | |||
96 | /** | ||
97 | * lower_32_bits - return bits 0-31 of a number | ||
98 | * @n: the number we're accessing | ||
99 | */ | ||
100 | #define lower_32_bits(n) ((__u32)(n)) | ||
101 | |||
86 | typedef struct _drm_intel_bo_gem drm_intel_bo_gem; | 102 | typedef struct _drm_intel_bo_gem drm_intel_bo_gem; |
87 | 103 | ||
88 | struct drm_intel_gem_bo_bucket { | 104 | struct drm_intel_gem_bo_bucket { |
@@ -237,6 +253,15 @@ struct _drm_intel_bo_gem { | |||
237 | bool is_userptr; | 253 | bool is_userptr; |
238 | 254 | ||
239 | /** | 255 | /** |
256 | * Boolean of whether this buffer can be placed in the full 48-bit | ||
257 | * address range on gen8+. | ||
258 | * | ||
259 | * By default, buffers will be keep in a 32-bit range, unless this | ||
260 | * flag is explicitly set. | ||
261 | */ | ||
262 | bool use_48b_address_range; | ||
263 | |||
264 | /** | ||
240 | * Size in bytes of this buffer and its relocation descendents. | 265 | * Size in bytes of this buffer and its relocation descendents. |
241 | * | 266 | * |
242 | * Used to avoid costly tree walking in | 267 | * Used to avoid costly tree walking in |
@@ -400,14 +425,16 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) | |||
400 | drm_intel_bo_gem *target_gem = | 425 | drm_intel_bo_gem *target_gem = |
401 | (drm_intel_bo_gem *) target_bo; | 426 | (drm_intel_bo_gem *) target_bo; |
402 | 427 | ||
403 | DBG("%2d: %d (%s)@0x%08llx -> " | 428 | DBG("%2d: %d (%s)@0x%08x %08x -> " |
404 | "%d (%s)@0x%08lx + 0x%08x\n", | 429 | "%d (%s)@0x%08x %08x + 0x%08x\n", |
405 | i, | 430 | i, |
406 | bo_gem->gem_handle, bo_gem->name, | 431 | bo_gem->gem_handle, bo_gem->name, |
407 | (unsigned long long)bo_gem->relocs[j].offset, | 432 | upper_32_bits(bo_gem->relocs[j].offset), |
433 | lower_32_bits(bo_gem->relocs[j].offset), | ||
408 | target_gem->gem_handle, | 434 | target_gem->gem_handle, |
409 | target_gem->name, | 435 | target_gem->name, |
410 | target_bo->offset64, | 436 | upper_32_bits(target_bo->offset64), |
437 | lower_32_bits(target_bo->offset64), | ||
411 | bo_gem->relocs[j].delta); | 438 | bo_gem->relocs[j].delta); |
412 | } | 439 | } |
413 | } | 440 | } |
@@ -473,11 +500,15 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) | |||
473 | drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; | 500 | drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; |
474 | drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; | 501 | drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; |
475 | int index; | 502 | int index; |
503 | int flags = 0; | ||
504 | |||
505 | if (need_fence) | ||
506 | flags |= EXEC_OBJECT_NEEDS_FENCE; | ||
507 | if (bo_gem->use_48b_address_range) | ||
508 | flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; | ||
476 | 509 | ||
477 | if (bo_gem->validate_index != -1) { | 510 | if (bo_gem->validate_index != -1) { |
478 | if (need_fence) | 511 | bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags; |
479 | bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= | ||
480 | EXEC_OBJECT_NEEDS_FENCE; | ||
481 | return; | 512 | return; |
482 | } | 513 | } |
483 | 514 | ||
@@ -506,13 +537,9 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) | |||
506 | bufmgr_gem->exec2_objects[index].alignment = bo->align; | 537 | bufmgr_gem->exec2_objects[index].alignment = bo->align; |
507 | bufmgr_gem->exec2_objects[index].offset = 0; | 538 | bufmgr_gem->exec2_objects[index].offset = 0; |
508 | bufmgr_gem->exec_bos[index] = bo; | 539 | bufmgr_gem->exec_bos[index] = bo; |
509 | bufmgr_gem->exec2_objects[index].flags = 0; | 540 | bufmgr_gem->exec2_objects[index].flags = flags; |
510 | bufmgr_gem->exec2_objects[index].rsvd1 = 0; | 541 | bufmgr_gem->exec2_objects[index].rsvd1 = 0; |
511 | bufmgr_gem->exec2_objects[index].rsvd2 = 0; | 542 | bufmgr_gem->exec2_objects[index].rsvd2 = 0; |
512 | if (need_fence) { | ||
513 | bufmgr_gem->exec2_objects[index].flags |= | ||
514 | EXEC_OBJECT_NEEDS_FENCE; | ||
515 | } | ||
516 | bufmgr_gem->exec_count++; | 543 | bufmgr_gem->exec_count++; |
517 | } | 544 | } |
518 | 545 | ||
@@ -785,6 +812,7 @@ retry: | |||
785 | bo_gem->used_as_reloc_target = false; | 812 | bo_gem->used_as_reloc_target = false; |
786 | bo_gem->has_error = false; | 813 | bo_gem->has_error = false; |
787 | bo_gem->reusable = true; | 814 | bo_gem->reusable = true; |
815 | bo_gem->use_48b_address_range = false; | ||
788 | 816 | ||
789 | drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment); | 817 | drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment); |
790 | 818 | ||
@@ -931,6 +959,7 @@ drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, | |||
931 | bo_gem->used_as_reloc_target = false; | 959 | bo_gem->used_as_reloc_target = false; |
932 | bo_gem->has_error = false; | 960 | bo_gem->has_error = false; |
933 | bo_gem->reusable = false; | 961 | bo_gem->reusable = false; |
962 | bo_gem->use_48b_address_range = false; | ||
934 | 963 | ||
935 | drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); | 964 | drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); |
936 | 965 | ||
@@ -1086,6 +1115,7 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, | |||
1086 | bo_gem->bo.handle = open_arg.handle; | 1115 | bo_gem->bo.handle = open_arg.handle; |
1087 | bo_gem->global_name = handle; | 1116 | bo_gem->global_name = handle; |
1088 | bo_gem->reusable = false; | 1117 | bo_gem->reusable = false; |
1118 | bo_gem->use_48b_address_range = false; | ||
1089 | 1119 | ||
1090 | memclear(get_tiling); | 1120 | memclear(get_tiling); |
1091 | get_tiling.handle = bo_gem->gem_handle; | 1121 | get_tiling.handle = bo_gem->gem_handle; |
@@ -1935,6 +1965,13 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, | |||
1935 | return 0; | 1965 | return 0; |
1936 | } | 1966 | } |
1937 | 1967 | ||
1968 | static void | ||
1969 | drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) | ||
1970 | { | ||
1971 | drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; | ||
1972 | bo_gem->use_48b_address_range = enable; | ||
1973 | } | ||
1974 | |||
1938 | static int | 1975 | static int |
1939 | drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, | 1976 | drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, |
1940 | drm_intel_bo *target_bo, uint32_t target_offset, | 1977 | drm_intel_bo *target_bo, uint32_t target_offset, |
@@ -2078,10 +2115,12 @@ drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) | |||
2078 | 2115 | ||
2079 | /* Update the buffer offset */ | 2116 | /* Update the buffer offset */ |
2080 | if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { | 2117 | if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { |
2081 | DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", | 2118 | DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", |
2082 | bo_gem->gem_handle, bo_gem->name, bo->offset64, | 2119 | bo_gem->gem_handle, bo_gem->name, |
2083 | (unsigned long long)bufmgr_gem->exec_objects[i]. | 2120 | upper_32_bits(bo->offset64), |
2084 | offset); | 2121 | lower_32_bits(bo->offset64), |
2122 | upper_32_bits(bufmgr_gem->exec_objects[i].offset), | ||
2123 | lower_32_bits(bufmgr_gem->exec_objects[i].offset)); | ||
2085 | bo->offset64 = bufmgr_gem->exec_objects[i].offset; | 2124 | bo->offset64 = bufmgr_gem->exec_objects[i].offset; |
2086 | bo->offset = bufmgr_gem->exec_objects[i].offset; | 2125 | bo->offset = bufmgr_gem->exec_objects[i].offset; |
2087 | } | 2126 | } |
@@ -2099,9 +2138,12 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) | |||
2099 | 2138 | ||
2100 | /* Update the buffer offset */ | 2139 | /* Update the buffer offset */ |
2101 | if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { | 2140 | if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { |
2102 | DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", | 2141 | DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", |
2103 | bo_gem->gem_handle, bo_gem->name, bo->offset64, | 2142 | bo_gem->gem_handle, bo_gem->name, |
2104 | (unsigned long long)bufmgr_gem->exec2_objects[i].offset); | 2143 | upper_32_bits(bo->offset64), |
2144 | lower_32_bits(bo->offset64), | ||
2145 | upper_32_bits(bufmgr_gem->exec2_objects[i].offset), | ||
2146 | lower_32_bits(bufmgr_gem->exec2_objects[i].offset)); | ||
2105 | bo->offset64 = bufmgr_gem->exec2_objects[i].offset; | 2147 | bo->offset64 = bufmgr_gem->exec2_objects[i].offset; |
2106 | bo->offset = bufmgr_gem->exec2_objects[i].offset; | 2148 | bo->offset = bufmgr_gem->exec2_objects[i].offset; |
2107 | } | 2149 | } |
@@ -2486,6 +2528,7 @@ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int s | |||
2486 | bo_gem->used_as_reloc_target = false; | 2528 | bo_gem->used_as_reloc_target = false; |
2487 | bo_gem->has_error = false; | 2529 | bo_gem->has_error = false; |
2488 | bo_gem->reusable = false; | 2530 | bo_gem->reusable = false; |
2531 | bo_gem->use_48b_address_range = false; | ||
2489 | 2532 | ||
2490 | DRMINITLISTHEAD(&bo_gem->vma_list); | 2533 | DRMINITLISTHEAD(&bo_gem->vma_list); |
2491 | DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); | 2534 | DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); |
@@ -3283,6 +3326,13 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) | |||
3283 | } | 3326 | } |
3284 | } | 3327 | } |
3285 | 3328 | ||
3329 | if (bufmgr_gem->gen >= 8) { | ||
3330 | gp.param = I915_PARAM_HAS_ALIASING_PPGTT; | ||
3331 | ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); | ||
3332 | if (ret == 0 && *gp.value == 3) | ||
3333 | bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range; | ||
3334 | } | ||
3335 | |||
3286 | /* Let's go with one relocation per every 2 dwords (but round down a bit | 3336 | /* Let's go with one relocation per every 2 dwords (but round down a bit |
3287 | * since a power of two will mean an extra page allocation for the reloc | 3337 | * since a power of two will mean an extra page allocation for the reloc |
3288 | * buffer). | 3338 | * buffer). |
diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h index 59ebd186..5c17ffbe 100644 --- a/intel/intel_bufmgr_priv.h +++ b/intel/intel_bufmgr_priv.h | |||
@@ -152,6 +152,20 @@ struct _drm_intel_bufmgr { | |||
152 | void (*destroy) (drm_intel_bufmgr *bufmgr); | 152 | void (*destroy) (drm_intel_bufmgr *bufmgr); |
153 | 153 | ||
154 | /** | 154 | /** |
155 | * Indicate if the buffer can be placed anywhere in the full ppgtt | ||
156 | * address range (2^48). | ||
157 | * | ||
158 | * Any resource used with flat/heapless (0x00000000-0xfffff000) | ||
159 | * General State Heap (GSH) or Intructions State Heap (ISH) must | ||
160 | * be in a 32-bit range. 48-bit range will only be used when explicitly | ||
161 | * requested. | ||
162 | * | ||
163 | * \param bo Buffer to set the use_48b_address_range flag. | ||
164 | * \param enable The flag value. | ||
165 | */ | ||
166 | void (*bo_use_48b_address_range) (drm_intel_bo *bo, uint32_t enable); | ||
167 | |||
168 | /** | ||
155 | * Add relocation entry in reloc_buf, which will be updated with the | 169 | * Add relocation entry in reloc_buf, which will be updated with the |
156 | * target buffer's real offset on on command submission. | 170 | * target buffer's real offset on on command submission. |
157 | * | 171 | * |