diff options
author | Pauli Nieminen | 2009-08-29 04:08:57 -0500 |
---|---|---|
committer | Pauli Nieminen | 2010-03-17 05:42:21 -0500 |
commit | 966c9907c040b4fe4b288b4a9d82598797aee743 (patch) | |
tree | 59a9f29ede30ab039a3a4151d2a9b1a9d30796cc /radeon | |
parent | 21105bc186d188f0bfc2f41c52b4b0ceb6742cf5 (diff) | |
download | libdrm-966c9907c040b4fe4b288b4a9d82598797aee743.tar.gz libdrm-966c9907c040b4fe4b288b4a9d82598797aee743.tar.xz libdrm-966c9907c040b4fe4b288b4a9d82598797aee743.zip |
libdrm_radeon: Optimize cs_gem_reloc to do less looping.
bo->referenced_in_cs is checked if bo is already in cs. Adding and removing
reference in bo is done with atomic operations to allow parallel access to a
bo from multiple contexts.
cs->id generation code quarentees there is not duplicated ids which limits
number of cs->ids to 32. If there is more cs objects rest will get id 0.
V2:
- Fix configure to check for atomics operations if libdrm_radeon is only selected.
- Make atomic operations private to libdrm.
This optimization decreases cs_write_reloc share of torcs profiling from 4.3%
to 2.6%.
Tested-by: Michel Dänzer <michel@daenzer.net>
Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
Diffstat (limited to 'radeon')
-rw-r--r-- | radeon/radeon_bo_gem.c | 9 | ||||
-rw-r--r-- | radeon/radeon_bo_gem.h | 1 | ||||
-rw-r--r-- | radeon/radeon_cs.c | 6 | ||||
-rw-r--r-- | radeon/radeon_cs.h | 2 | ||||
-rw-r--r-- | radeon/radeon_cs_gem.c | 123 | ||||
-rw-r--r-- | radeon/radeon_cs_int.h | 1 |
6 files changed, 110 insertions, 32 deletions
diff --git a/radeon/radeon_bo_gem.c b/radeon/radeon_bo_gem.c index bc8058d8..081ccb9f 100644 --- a/radeon/radeon_bo_gem.c +++ b/radeon/radeon_bo_gem.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <sys/mman.h> | 39 | #include <sys/mman.h> |
40 | #include <errno.h> | 40 | #include <errno.h> |
41 | #include "xf86drm.h" | 41 | #include "xf86drm.h" |
42 | #include "xf86atomic.h" | ||
42 | #include "drm.h" | 43 | #include "drm.h" |
43 | #include "radeon_drm.h" | 44 | #include "radeon_drm.h" |
44 | #include "radeon_bo.h" | 45 | #include "radeon_bo.h" |
@@ -49,6 +50,7 @@ struct radeon_bo_gem { | |||
49 | struct radeon_bo_int base; | 50 | struct radeon_bo_int base; |
50 | uint32_t name; | 51 | uint32_t name; |
51 | int map_count; | 52 | int map_count; |
53 | atomic_t reloc_in_cs; | ||
52 | void *priv_ptr; | 54 | void *priv_ptr; |
53 | }; | 55 | }; |
54 | 56 | ||
@@ -80,6 +82,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, | |||
80 | bo->base.domains = domains; | 82 | bo->base.domains = domains; |
81 | bo->base.flags = flags; | 83 | bo->base.flags = flags; |
82 | bo->base.ptr = NULL; | 84 | bo->base.ptr = NULL; |
85 | atomic_set(&bo->reloc_in_cs, 0); | ||
83 | bo->map_count = 0; | 86 | bo->map_count = 0; |
84 | if (handle) { | 87 | if (handle) { |
85 | struct drm_gem_open open_arg; | 88 | struct drm_gem_open open_arg; |
@@ -309,6 +312,12 @@ uint32_t radeon_gem_name_bo(struct radeon_bo *bo) | |||
309 | return bo_gem->name; | 312 | return bo_gem->name; |
310 | } | 313 | } |
311 | 314 | ||
315 | void *radeon_gem_get_reloc_in_cs(struct radeon_bo *bo) | ||
316 | { | ||
317 | struct radeon_bo_gem *bo_gem = (struct radeon_bo_gem*)bo; | ||
318 | return &bo_gem->reloc_in_cs; | ||
319 | } | ||
320 | |||
312 | int radeon_gem_get_kernel_name(struct radeon_bo *bo, uint32_t *name) | 321 | int radeon_gem_get_kernel_name(struct radeon_bo *bo, uint32_t *name) |
313 | { | 322 | { |
314 | struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; | 323 | struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; |
diff --git a/radeon/radeon_bo_gem.h b/radeon/radeon_bo_gem.h index c56c58e9..0af8610b 100644 --- a/radeon/radeon_bo_gem.h +++ b/radeon/radeon_bo_gem.h | |||
@@ -38,6 +38,7 @@ struct radeon_bo_manager *radeon_bo_manager_gem_ctor(int fd); | |||
38 | void radeon_bo_manager_gem_dtor(struct radeon_bo_manager *bom); | 38 | void radeon_bo_manager_gem_dtor(struct radeon_bo_manager *bom); |
39 | 39 | ||
40 | uint32_t radeon_gem_name_bo(struct radeon_bo *bo); | 40 | uint32_t radeon_gem_name_bo(struct radeon_bo *bo); |
41 | void *radeon_gem_get_reloc_in_cs(struct radeon_bo *bo); | ||
41 | int radeon_gem_set_domain(struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain); | 42 | int radeon_gem_set_domain(struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain); |
42 | int radeon_gem_get_kernel_name(struct radeon_bo *bo, uint32_t *name); | 43 | int radeon_gem_get_kernel_name(struct radeon_bo *bo, uint32_t *name); |
43 | #endif | 44 | #endif |
diff --git a/radeon/radeon_cs.c b/radeon/radeon_cs.c index cc9be398..d0e922be 100644 --- a/radeon/radeon_cs.c +++ b/radeon/radeon_cs.c | |||
@@ -88,3 +88,9 @@ void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *d | |||
88 | csi->space_flush_fn = fn; | 88 | csi->space_flush_fn = fn; |
89 | csi->space_flush_data = data; | 89 | csi->space_flush_data = data; |
90 | } | 90 | } |
91 | |||
92 | uint32_t radeon_cs_get_id(struct radeon_cs *cs) | ||
93 | { | ||
94 | struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; | ||
95 | return csi->id; | ||
96 | } | ||
diff --git a/radeon/radeon_cs.h b/radeon/radeon_cs.h index 49d5d9a6..7f6ee68b 100644 --- a/radeon/radeon_cs.h +++ b/radeon/radeon_cs.h | |||
@@ -85,7 +85,7 @@ extern int radeon_cs_write_reloc(struct radeon_cs *cs, | |||
85 | uint32_t read_domain, | 85 | uint32_t read_domain, |
86 | uint32_t write_domain, | 86 | uint32_t write_domain, |
87 | uint32_t flags); | 87 | uint32_t flags); |
88 | 88 | extern uint32_t radeon_cs_get_id(struct radeon_cs *cs); | |
89 | /* | 89 | /* |
90 | * add a persistent BO to the list | 90 | * add a persistent BO to the list |
91 | * a persistent BO is one that will be referenced across flushes, | 91 | * a persistent BO is one that will be referenced across flushes, |
diff --git a/radeon/radeon_cs_gem.c b/radeon/radeon_cs_gem.c index 45a219c3..28ef5f64 100644 --- a/radeon/radeon_cs_gem.c +++ b/radeon/radeon_cs_gem.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <assert.h> | 32 | #include <assert.h> |
33 | #include <errno.h> | 33 | #include <errno.h> |
34 | #include <stdlib.h> | 34 | #include <stdlib.h> |
35 | #include <pthread.h> | ||
35 | #include <sys/mman.h> | 36 | #include <sys/mman.h> |
36 | #include <sys/ioctl.h> | 37 | #include <sys/ioctl.h> |
37 | #include "radeon_cs.h" | 38 | #include "radeon_cs.h" |
@@ -41,6 +42,7 @@ | |||
41 | #include "radeon_bo_gem.h" | 42 | #include "radeon_bo_gem.h" |
42 | #include "drm.h" | 43 | #include "drm.h" |
43 | #include "xf86drm.h" | 44 | #include "xf86drm.h" |
45 | #include "xf86atomic.h" | ||
44 | #include "radeon_drm.h" | 46 | #include "radeon_drm.h" |
45 | 47 | ||
46 | struct radeon_cs_manager_gem { | 48 | struct radeon_cs_manager_gem { |
@@ -68,6 +70,50 @@ struct cs_gem { | |||
68 | struct radeon_bo_int **relocs_bo; | 70 | struct radeon_bo_int **relocs_bo; |
69 | }; | 71 | }; |
70 | 72 | ||
73 | static pthread_mutex_t id_mutex = PTHREAD_MUTEX_INITIALIZER; | ||
74 | static uint32_t cs_id_source = 0; | ||
75 | |||
76 | /** | ||
77 | * result is undefined if called with ~0 | ||
78 | */ | ||
79 | static uint32_t get_first_zero(const uint32_t n) | ||
80 | { | ||
81 | /* __builtin_ctz returns number of trailing zeros. */ | ||
82 | return 1 << __builtin_ctz(~n); | ||
83 | } | ||
84 | |||
85 | /** | ||
86 | * Returns a free id for cs. | ||
87 | * If there is no free id we return zero | ||
88 | **/ | ||
89 | static uint32_t generate_id(void) | ||
90 | { | ||
91 | uint32_t r = 0; | ||
92 | pthread_mutex_lock( &id_mutex ); | ||
93 | /* check for free ids */ | ||
94 | if (cs_id_source != ~r) { | ||
95 | /* find first zero bit */ | ||
96 | r = get_first_zero(cs_id_source); | ||
97 | |||
98 | /* set id as reserved */ | ||
99 | cs_id_source |= r; | ||
100 | } | ||
101 | pthread_mutex_unlock( &id_mutex ); | ||
102 | return r; | ||
103 | } | ||
104 | |||
105 | /** | ||
106 | * Free the id for later reuse | ||
107 | **/ | ||
108 | static void free_id(uint32_t id) | ||
109 | { | ||
110 | pthread_mutex_lock( &id_mutex ); | ||
111 | |||
112 | cs_id_source &= ~id; | ||
113 | |||
114 | pthread_mutex_unlock( &id_mutex ); | ||
115 | } | ||
116 | |||
71 | static struct radeon_cs_int *cs_gem_create(struct radeon_cs_manager *csm, | 117 | static struct radeon_cs_int *cs_gem_create(struct radeon_cs_manager *csm, |
72 | uint32_t ndw) | 118 | uint32_t ndw) |
73 | { | 119 | { |
@@ -90,6 +136,7 @@ static struct radeon_cs_int *cs_gem_create(struct radeon_cs_manager *csm, | |||
90 | } | 136 | } |
91 | csg->base.relocs_total_size = 0; | 137 | csg->base.relocs_total_size = 0; |
92 | csg->base.crelocs = 0; | 138 | csg->base.crelocs = 0; |
139 | csg->base.id = generate_id(); | ||
93 | csg->nrelocs = 4096 / (4 * 4) ; | 140 | csg->nrelocs = 4096 / (4 * 4) ; |
94 | csg->relocs_bo = (struct radeon_bo_int**)calloc(1, | 141 | csg->relocs_bo = (struct radeon_bo_int**)calloc(1, |
95 | csg->nrelocs*sizeof(void*)); | 142 | csg->nrelocs*sizeof(void*)); |
@@ -141,38 +188,45 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs, | |||
141 | if (write_domain == RADEON_GEM_DOMAIN_CPU) { | 188 | if (write_domain == RADEON_GEM_DOMAIN_CPU) { |
142 | return -EINVAL; | 189 | return -EINVAL; |
143 | } | 190 | } |
144 | /* check if bo is already referenced */ | 191 | /* use bit field hash function to determine |
145 | for(i = 0; i < cs->crelocs; i++) { | 192 | if this bo is for sure not in this cs.*/ |
146 | idx = i * RELOC_SIZE; | 193 | if ((atomic_read((atomic_t *)radeon_gem_get_reloc_in_cs(bo)) & cs->id)) { |
147 | reloc = (struct cs_reloc_gem*)&csg->relocs[idx]; | 194 | /* check if bo is already referenced. |
148 | if (reloc->handle == bo->handle) { | 195 | * Scanning from end to begin reduces cycles with mesa because |
149 | /* Check domains must be in read or write. As we check already | 196 | * it often relocates same shared dma bo again. */ |
150 | * checked that in argument one of the read or write domain was | 197 | for(i = cs->crelocs; i != 0;) { |
151 | * set we only need to check that if previous reloc as the read | 198 | --i; |
152 | * domain set then the read_domain should also be set for this | 199 | idx = i * RELOC_SIZE; |
153 | * new relocation. | 200 | reloc = (struct cs_reloc_gem*)&csg->relocs[idx]; |
154 | */ | 201 | if (reloc->handle == bo->handle) { |
155 | /* the DDX expects to read and write from same pixmap */ | 202 | /* Check domains must be in read or write. As we check already |
156 | if (write_domain && (reloc->read_domain & write_domain)) { | 203 | * checked that in argument one of the read or write domain was |
157 | reloc->read_domain = 0; | 204 | * set we only need to check that if previous reloc as the read |
158 | reloc->write_domain = write_domain; | 205 | * domain set then the read_domain should also be set for this |
159 | } else if (read_domain & reloc->write_domain) { | 206 | * new relocation. |
160 | reloc->read_domain = 0; | 207 | */ |
161 | } else { | 208 | /* the DDX expects to read and write from same pixmap */ |
162 | if (write_domain != reloc->write_domain) | 209 | if (write_domain && (reloc->read_domain & write_domain)) { |
163 | return -EINVAL; | 210 | reloc->read_domain = 0; |
164 | if (read_domain != reloc->read_domain) | 211 | reloc->write_domain = write_domain; |
165 | return -EINVAL; | 212 | } else if (read_domain & reloc->write_domain) { |
213 | reloc->read_domain = 0; | ||
214 | } else { | ||
215 | if (write_domain != reloc->write_domain) | ||
216 | return -EINVAL; | ||
217 | if (read_domain != reloc->read_domain) | ||
218 | return -EINVAL; | ||
219 | } | ||
220 | |||
221 | reloc->read_domain |= read_domain; | ||
222 | reloc->write_domain |= write_domain; | ||
223 | /* update flags */ | ||
224 | reloc->flags |= (flags & reloc->flags); | ||
225 | /* write relocation packet */ | ||
226 | radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000); | ||
227 | radeon_cs_write_dword((struct radeon_cs *)cs, idx); | ||
228 | return 0; | ||
166 | } | 229 | } |
167 | |||
168 | reloc->read_domain |= read_domain; | ||
169 | reloc->write_domain |= write_domain; | ||
170 | /* update flags */ | ||
171 | reloc->flags |= (flags & reloc->flags); | ||
172 | /* write relocation packet */ | ||
173 | radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000); | ||
174 | radeon_cs_write_dword((struct radeon_cs *)cs, idx); | ||
175 | return 0; | ||
176 | } | 230 | } |
177 | } | 231 | } |
178 | /* new relocation */ | 232 | /* new relocation */ |
@@ -203,6 +257,8 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs, | |||
203 | reloc->flags = flags; | 257 | reloc->flags = flags; |
204 | csg->chunks[1].length_dw += RELOC_SIZE; | 258 | csg->chunks[1].length_dw += RELOC_SIZE; |
205 | radeon_bo_ref(bo); | 259 | radeon_bo_ref(bo); |
260 | /* bo might be referenced from another context so have to use atomic opertions */ | ||
261 | atomic_add((atomic_t *)radeon_gem_get_reloc_in_cs(bo), cs->id); | ||
206 | cs->relocs_total_size += boi->size; | 262 | cs->relocs_total_size += boi->size; |
207 | radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000); | 263 | radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000); |
208 | radeon_cs_write_dword((struct radeon_cs *)cs, idx); | 264 | radeon_cs_write_dword((struct radeon_cs *)cs, idx); |
@@ -288,6 +344,8 @@ static int cs_gem_emit(struct radeon_cs_int *cs) | |||
288 | &csg->cs, sizeof(struct drm_radeon_cs)); | 344 | &csg->cs, sizeof(struct drm_radeon_cs)); |
289 | for (i = 0; i < csg->base.crelocs; i++) { | 345 | for (i = 0; i < csg->base.crelocs; i++) { |
290 | csg->relocs_bo[i]->space_accounted = 0; | 346 | csg->relocs_bo[i]->space_accounted = 0; |
347 | /* bo might be referenced from another context so have to use atomic opertions */ | ||
348 | atomic_dec((atomic_t *)radeon_gem_get_reloc_in_cs((struct radeon_bo*)csg->relocs_bo[i]), cs->id); | ||
291 | radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]); | 349 | radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]); |
292 | csg->relocs_bo[i] = NULL; | 350 | csg->relocs_bo[i] = NULL; |
293 | } | 351 | } |
@@ -302,6 +360,7 @@ static int cs_gem_destroy(struct radeon_cs_int *cs) | |||
302 | { | 360 | { |
303 | struct cs_gem *csg = (struct cs_gem*)cs; | 361 | struct cs_gem *csg = (struct cs_gem*)cs; |
304 | 362 | ||
363 | free_id(cs->id); | ||
305 | free(csg->relocs_bo); | 364 | free(csg->relocs_bo); |
306 | free(cs->relocs); | 365 | free(cs->relocs); |
307 | free(cs->packets); | 366 | free(cs->packets); |
@@ -317,6 +376,8 @@ static int cs_gem_erase(struct radeon_cs_int *cs) | |||
317 | if (csg->relocs_bo) { | 376 | if (csg->relocs_bo) { |
318 | for (i = 0; i < csg->base.crelocs; i++) { | 377 | for (i = 0; i < csg->base.crelocs; i++) { |
319 | if (csg->relocs_bo[i]) { | 378 | if (csg->relocs_bo[i]) { |
379 | /* bo might be referenced from another context so have to use atomic opertions */ | ||
380 | atomic_dec((atomic_t *)radeon_gem_get_reloc_in_cs((struct radeon_bo*)csg->relocs_bo[i]), cs->id); | ||
320 | radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]); | 381 | radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]); |
321 | csg->relocs_bo[i] = NULL; | 382 | csg->relocs_bo[i] = NULL; |
322 | } | 383 | } |
diff --git a/radeon/radeon_cs_int.h b/radeon/radeon_cs_int.h index 8ba76bf9..6cee5742 100644 --- a/radeon/radeon_cs_int.h +++ b/radeon/radeon_cs_int.h | |||
@@ -28,6 +28,7 @@ struct radeon_cs_int { | |||
28 | int bo_count; | 28 | int bo_count; |
29 | void (*space_flush_fn)(void *); | 29 | void (*space_flush_fn)(void *); |
30 | void *space_flush_data; | 30 | void *space_flush_data; |
31 | uint32_t id; | ||
31 | }; | 32 | }; |
32 | 33 | ||
33 | /* cs functions */ | 34 | /* cs functions */ |