intel/context: create/destroy implementation

[glsdk/libdrm.git] / intel / intel_bufmgr_gem.c
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c

index d56593a56d6447891641f60fa15e11ac56ece4b1..a957c2855dabd2bbbf906f78b030b6c5549a963b 100644 (file)
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -1,7 +1,7 @@
  /**************************************************************************
   *
   * Copyright © 2007 Red Hat Inc.
- * Copyright © 2007 Intel Corporation
+ * Copyright © 2007-2012 Intel Corporation
   * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
   * All Rights Reserved.
   *
@@ -119,6 +119,7 @@ typedef struct _drm_intel_bufmgr_gem {
         unsigned int has_blt : 1;
         unsigned int has_relaxed_fencing : 1;
         unsigned int has_llc : 1;
+       unsigned int has_wait_timeout : 1;
         unsigned int bo_reuse : 1;
         unsigned int no_exec : 1;
         bool fenced_relocs;
@@ -221,6 +222,9 @@ struct _drm_intel_bo_gem {
         bool mapped_cpu_write;
  
         uint32_t aub_offset;
+
+       drm_intel_aub_annotation *aub_annotations;
+       unsigned aub_annotation_count;
  };
  
  static unsigned int
@@ -735,6 +739,8 @@ retry:
         bo_gem->used_as_reloc_target = false;
         bo_gem->has_error = false;
         bo_gem->reusable = true;
+       bo_gem->aub_annotations = NULL;
+       bo_gem->aub_annotation_count = 0;
  
         drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
  
@@ -926,6 +932,7 @@ drm_intel_gem_bo_free(drm_intel_bo *bo)
                 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
                     bo_gem->gem_handle, bo_gem->name, strerror(errno));
         }
+       free(bo_gem->aub_annotations);
         free(bo);
  }
  
@@ -1189,15 +1196,13 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
         return 0;
  }
  
-int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+static int
+map_gtt(drm_intel_bo *bo)
  {
         drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
         drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
-       struct drm_i915_gem_set_domain set_domain;
         int ret;
  
-       pthread_mutex_lock(&bufmgr_gem->lock);
-
         if (bo_gem->map_count++ == 0)
                 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
  
@@ -1223,7 +1228,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
                             strerror(errno));
                         if (--bo_gem->map_count == 0)
                                 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
-                       pthread_mutex_unlock(&bufmgr_gem->lock);
                         return ret;
                 }
  
@@ -1240,7 +1244,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
                             strerror(errno));
                         if (--bo_gem->map_count == 0)
                                 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
-                       pthread_mutex_unlock(&bufmgr_gem->lock);
                         return ret;
                 }
         }
@@ -1250,7 +1253,33 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
         DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
             bo_gem->gtt_virtual);
  
-       /* Now move it to the GTT domain so that the CPU caches are flushed */
+       return 0;
+}
+
+int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+       struct drm_i915_gem_set_domain set_domain;
+       int ret;
+
+       pthread_mutex_lock(&bufmgr_gem->lock);
+
+       ret = map_gtt(bo);
+       if (ret) {
+               pthread_mutex_unlock(&bufmgr_gem->lock);
+               return ret;
+       }
+
+       /* Now move it to the GTT domain so that the GPU and CPU
+        * caches are flushed and the GPU isn't actively using the
+        * buffer.
+        *
+        * The pagefault handler does this domain change for us when
+        * it has unbound the BO from the GTT, but it's up to us to
+        * tell it when we're about to use things if we had done
+        * rendering and it still happens to be bound to the GTT.
+        */
         VG_CLEAR(set_domain);
         set_domain.handle = bo_gem->gem_handle;
         set_domain.read_domains = I915_GEM_DOMAIN_GTT;
@@ -1271,6 +1300,42 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
         return 0;
  }
  
+/**
+ * Performs a mapping of the buffer object like the normal GTT
+ * mapping, but avoids waiting for the GPU to be done reading from or
+ * rendering to the buffer.
+ *
+ * This is used in the implementation of GL_ARB_map_buffer_range: The
+ * user asks to create a buffer, then does a mapping, fills some
+ * space, runs a drawing command, then asks to map it again without
+ * synchronizing because it guarantees that it won't write over the
+ * data that the GPU is busy using (or, more specifically, that if it
+ * does write over the data, it acknowledges that rendering is
+ * undefined).
+ */
+
+int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+       int ret;
+
+       /* If the CPU cache isn't coherent with the GTT, then use a
+        * regular synchronized mapping.  The problem is that we don't
+        * track where the buffer was last used on the CPU side in
+        * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
+        * we would potentially corrupt the buffer even when the user
+        * does reasonable things.
+        */
+       if (!bufmgr_gem->has_llc)
+               return drm_intel_gem_bo_map_gtt(bo);
+
+       pthread_mutex_lock(&bufmgr_gem->lock);
+       ret = map_gtt(bo);
+       pthread_mutex_unlock(&bufmgr_gem->lock);
+
+       return ret;
+}
+
  static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
  {
         drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
@@ -1414,6 +1479,58 @@ drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
         drm_intel_gem_bo_start_gtt_access(bo, 1);
  }
  
+/**
+ * Waits on a BO for the given amount of time.
+ *
+ * @bo: buffer object to wait for
+ * @timeout_ns: amount of time to wait in nanoseconds.
+ *   If value is less than 0, an infinite wait will occur.
+ *
+ * Returns 0 if the wait was successful ie. the last batch referencing the
+ * object has completed within the allotted time. Otherwise some negative return
+ * value describes the error. Of particular interest is -ETIME when the wait has
+ * failed to yield the desired result.
+ *
+ * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
+ * the operation to give up after a certain amount of time. Another subtle
+ * difference is the internal locking semantics are different (this variant does
+ * not hold the lock for the duration of the wait). This makes the wait subject
+ * to a larger userspace race window.
+ *
+ * The implementation shall wait until the object is no longer actively
+ * referenced within a batch buffer at the time of the call. The wait will
+ * not guarantee that the buffer is re-issued via another thread, or an flinked
+ * handle. Userspace must make sure this race does not occur if such precision
+ * is important.
+ */
+int drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+       struct drm_i915_gem_wait wait;
+       int ret;
+
+       if (!bufmgr_gem->has_wait_timeout) {
+               DBG("%s:%d: Timed wait is not supported. Falling back to "
+                   "infinite wait\n", __FILE__, __LINE__);
+               if (timeout_ns) {
+                       drm_intel_gem_bo_wait_rendering(bo);
+                       return 0;
+               } else {
+                       return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
+               }
+       }
+
+       wait.bo_handle = bo_gem->gem_handle;
+       wait.timeout_ns = timeout_ns;
+       wait.flags = 0;
+       ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
+       if (ret == -1)
+               return -errno;
+
+       return ret;
+}
+
  /**
   * Sets the object to the GTT read and possibly write domain, used by the X
   * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
@@ -1822,26 +1939,58 @@ aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
         aub_write_bo_data(bo, offset, size);
  }
  
+/**
+ * Break up large objects into multiple writes.  Otherwise a 128kb VBO
+ * would overflow the 16 bits of size field in the packet header and
+ * everything goes badly after that.
+ */
  static void
-aub_write_bo(drm_intel_bo *bo)
+aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
+                           uint32_t offset, uint32_t size)
  {
         uint32_t block_size;
-       uint32_t offset;
+       uint32_t sub_offset;
  
-       aub_bo_get_address(bo);
-
-       /* Break up large objects into multiple writes.  Otherwise a
-        * 128kb VBO would overflow the 16 bits of size field in the
-        * packet header and everything goes badly after that.
-        */
-       for (offset = 0; offset < bo->size; offset += block_size) {
-               block_size = bo->size - offset;
+       for (sub_offset = 0; sub_offset < size; sub_offset += block_size) {
+               block_size = size - sub_offset;
  
                 if (block_size > 8 * 4096)
                         block_size = 8 * 4096;
  
-               aub_write_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
-                                     offset, block_size);
+               aub_write_trace_block(bo, type, subtype, offset + sub_offset,
+                                     block_size);
+       }
+}
+
+static void
+aub_write_bo(drm_intel_bo *bo)
+{
+       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+       uint32_t offset = 0;
+       unsigned i;
+
+       aub_bo_get_address(bo);
+
+       /* Write out each annotated section separately. */
+       for (i = 0; i < bo_gem->aub_annotation_count; ++i) {
+               drm_intel_aub_annotation *annotation =
+                       &bo_gem->aub_annotations[i];
+               uint32_t ending_offset = annotation->ending_offset;
+               if (ending_offset > bo->size)
+                       ending_offset = bo->size;
+               if (ending_offset > offset) {
+                       aub_write_large_trace_block(bo, annotation->type,
+                                                   annotation->subtype,
+                                                   offset,
+                                                   ending_offset - offset);
+                       offset = ending_offset;
+               }
+       }
+
+       /* Write out any remaining unannotated data */
+       if (offset < bo->size) {
+               aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
+                                           offset, bo->size - offset);
         }
  }
  
@@ -1931,23 +2080,31 @@ aub_exec(drm_intel_bo *bo, int ring_flag, int used)
         drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
         drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
         int i;
+       bool batch_buffer_needs_annotations;
  
         if (!bufmgr_gem->aub_file)
                 return;
  
-       /* Write out all but the batchbuffer to AUB memory */
-       for (i = 0; i < bufmgr_gem->exec_count - 1; i++) {
-               if (bufmgr_gem->exec_bos[i] != bo)
-                       aub_write_bo(bufmgr_gem->exec_bos[i]);
+       /* If batch buffer is not annotated, annotate it the best we
+        * can.
+        */
+       batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0;
+       if (batch_buffer_needs_annotations) {
+               drm_intel_aub_annotation annotations[2] = {
+                       { AUB_TRACE_TYPE_BATCH, 0, used },
+                       { AUB_TRACE_TYPE_NOTYPE, 0, bo->size }
+               };
+               drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2);
         }
  
-       aub_bo_get_address(bo);
+       /* Write out all buffers to AUB memory */
+       for (i = 0; i < bufmgr_gem->exec_count; i++) {
+               aub_write_bo(bufmgr_gem->exec_bos[i]);
+       }
  
-       /* Dump the batchbuffer. */
-       aub_write_trace_block(bo, AUB_TRACE_TYPE_BATCH, 0,
-                             0, used);
-       aub_write_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
-                             used, bo->size - used);
+       /* Remove any annotations we added */
+       if (batch_buffer_needs_annotations)
+               drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0);
  
         /* Dump ring buffer */
         aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag);
@@ -2030,9 +2187,9 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
  }
  
  static int
-drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
-                       drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
-                       unsigned int flags)
+do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
+        drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
+        unsigned int flags)
  {
         drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
         struct drm_i915_gem_execbuffer2 execbuf;
@@ -2074,7 +2231,10 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
         execbuf.DR1 = 0;
         execbuf.DR4 = DR4;
         execbuf.flags = flags;
-       execbuf.rsvd1 = 0;
+       if (ctx == NULL)
+               i915_execbuffer2_set_context_id(execbuf, 0);
+       else
+               i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
         execbuf.rsvd2 = 0;
  
         aub_exec(bo, flags, used);
@@ -2122,9 +2282,24 @@ drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
                        drm_clip_rect_t *cliprects, int num_cliprects,
                        int DR4)
  {
-       return drm_intel_gem_bo_mrb_exec2(bo, used,
-                                       cliprects, num_cliprects, DR4,
-                                       I915_EXEC_RENDER);
+       return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
+                       I915_EXEC_RENDER);
+}
+
+static int
+drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
+                       drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
+                       unsigned int flags)
+{
+       return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
+                       flags);
+}
+
+int
+drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
+                             int used, unsigned int flags)
+{
+       return do_exec2(bo, used, ctx, NULL, 0, 0, flags);
  }
  
  static int
@@ -2589,6 +2764,7 @@ get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
                 }
         }
  
+       VG_CLEAR(devid);
         VG_CLEAR(gp);
         gp.param = I915_PARAM_CHIPSET_ID;
         gp.value = &devid;
@@ -2664,6 +2840,92 @@ drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
         }
  }
  
+drm_intel_context *
+drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
+       struct drm_i915_gem_context_create create;
+       drm_i915_getparam_t gp;
+       drm_intel_context *context = NULL;
+       int tmp = 0, ret;
+
+       ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
+       if (ret != 0) {
+               fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
+                       strerror(errno));
+               return NULL;
+       }
+
+       context = calloc(1, sizeof(*context));
+       context->ctx_id = create.ctx_id;
+       context->bufmgr = bufmgr;
+
+       return context;
+}
+
+void
+drm_intel_gem_context_destroy(drm_intel_context *ctx)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem;
+       struct drm_i915_gem_context_destroy destroy;
+       int ret;
+
+       if (ctx == NULL)
+               return;
+
+       bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
+       destroy.ctx_id = ctx->ctx_id;
+       ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
+                      &destroy);
+       if (ret != 0)
+               fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
+                       strerror(errno));
+
+       free(ctx);
+}
+
+
+/**
+ * Annotate the given bo for use in aub dumping.
+ *
+ * \param annotations is an array of drm_intel_aub_annotation objects
+ * describing the type of data in various sections of the bo.  Each
+ * element of the array specifies the type and subtype of a section of
+ * the bo, and the past-the-end offset of that section.  The elements
+ * of \c annotations must be sorted so that ending_offset is
+ * increasing.
+ *
+ * \param count is the number of elements in the \c annotations array.
+ * If \c count is zero, then \c annotations will not be dereferenced.
+ *
+ * Annotations are copied into a private data structure, so caller may
+ * re-use the memory pointed to by \c annotations after the call
+ * returns.
+ *
+ * Annotations are stored for the lifetime of the bo; to reset to the
+ * default state (no annotations), call this function with a \c count
+ * of zero.
+ */
+void
+drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
+                                        drm_intel_aub_annotation *annotations,
+                                        unsigned count)
+{
+       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+       unsigned size = sizeof(*annotations) * count;
+       drm_intel_aub_annotation *new_annotations =
+               count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL;
+       if (new_annotations == NULL) {
+               free(bo_gem->aub_annotations);
+               bo_gem->aub_annotations = NULL;
+               bo_gem->aub_annotation_count = 0;
+               return;
+       }
+       memcpy(new_annotations, annotations, size);
+       bo_gem->aub_annotations = new_annotations;
+       bo_gem->aub_annotation_count = count;
+}
+
  /**
   * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
   * and manage map buffer objections.
@@ -2732,6 +2994,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
                 bufmgr_gem->gtt_size -= 256*1024*1024;
         }
  
+       VG_CLEAR(gp);
         gp.value = &tmp;
  
         gp.param = I915_PARAM_HAS_EXECBUF2;
@@ -2751,9 +3014,13 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
         ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
         bufmgr_gem->has_relaxed_fencing = ret == 0;
  
+       gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
+       ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+       bufmgr_gem->has_wait_timeout = ret == 0;
+
         gp.param = I915_PARAM_HAS_LLC;
         ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
-       if (ret == -EINVAL) {
+       if (ret != 0) {
                 /* Kernel does not supports HAS_LLC query, fallback to GPU
                  * generation detection and assume that we have LLC on GEN6/7
                  */