i965/sync: Implement DRI2_Fence extension

This enables EGL_KHR_fence_sync and EGL_KHR_wait_sync.

Below is the difference in piglit results, before and after this patch.
No regressions and several tests improve from 'skip' to 'pass'. Out of
EGL_KHR_fence_sync tests, two of the multithreaded tests skip; all other
tests pass.

  cmdline: piglit run -p gbm -t sync tests/quick.py
  mesa: master@1ac7db0
  piglit: 4069bec
  hw: Ivybridge

        | before after
  ------+-------------
   pass |     32    46
   fail |      0     0
  crash |      0     0
   skip |     35    21
  total |     67    67

v2:
  - Set fence->signalled = true in brw_fence_has_completed() too.

Reviewed-by: Daniel Stone <daniels@collabora.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Chad Versace 2015-05-05 19:05:32 -07:00
parent 2516d835b1
commit c636284ee8
4 changed files with 160 additions and 41 deletions

View File

@ -60,8 +60,8 @@ Note: some of the new features are only available with certain drivers.
<li>GL_ARB_viewport_array, GL_AMD_vertex_shader_viewport_index on i965/gen6</li>
<li>GL_EXT_draw_buffers2 on freedreno</li>
<li>GL_OES_EGL_sync on all drivers</li>
<li>EGL_KHR_fence_sync on freedreno, nv50, nvc0, r600, radeonsi</li>
<li>EGL_KHR_wait_sync on freedreno, nv50, nvc0, r600, radeonsi</li>
<li>EGL_KHR_fence_sync on i965, freedreno, nv50, nvc0, r600, radeonsi</li>
<li>EGL_KHR_wait_sync on i965, freedreno, nv50, nvc0, r600, radeonsi</li>
<li>EGL_KHR_cl_event2 on freedreno, nv50, nvc0, r600, radeonsi</li>
<li>GL_AMD_performance_monitor on nvc0</li>
</ul>

View File

@ -909,6 +909,7 @@ static const __DRIrobustnessExtension dri2Robustness = {
static const __DRIextension *intelScreenExtensions[] = {
&intelTexBufferExtension.base,
&intelFenceExtension.base,
&intelFlushExtension.base,
&intelImageExtension.base,
&intelRendererQueryExtension.base,
@ -918,6 +919,7 @@ static const __DRIextension *intelScreenExtensions[] = {
static const __DRIextension *intelRobustScreenExtensions[] = {
&intelTexBufferExtension.base,
&intelFenceExtension.base,
&intelFlushExtension.base,
&intelImageExtension.base,
&intelRendererQueryExtension.base,

View File

@ -30,6 +30,9 @@
#include <stdbool.h>
#include <sys/time.h>
#include <GL/internal/dri_interface.h>
#include "dri_util.h"
#include "intel_bufmgr.h"
#include "brw_device_info.h"
@ -76,6 +79,7 @@ extern void intelDestroyContext(__DRIcontext * driContextPriv);
extern GLboolean intelUnbindContext(__DRIcontext * driContextPriv);
PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void);
extern const __DRI2fenceExtension intelFenceExtension;
extern GLboolean
intelMakeCurrent(__DRIcontext * driContextPriv,

View File

@ -25,11 +25,11 @@
*
*/
/** @file intel_syncobj.c
/**
* \file
* \brief Support for GL_ARB_sync and EGL_KHR_fence_sync.
*
* Support for ARB_sync
*
* ARB_sync is implemented by flushing the current batchbuffer and keeping a
* GL_ARB_sync is implemented by flushing the current batchbuffer and keeping a
* reference on it. We can then check for completion or wait for completion
* using the normal buffer object mechanisms. This does mean that if an
* application is using many sync objects, it will emit small batchbuffers
@ -44,13 +44,94 @@
#include "intel_batchbuffer.h"
#include "intel_reg.h"
struct brw_fence {
/** The fence waits for completion of this batch. */
drm_intel_bo *batch_bo;
bool signalled;
};
struct intel_gl_sync_object {
struct gl_sync_object Base;
/** Batch associated with this sync object */
drm_intel_bo *bo;
struct brw_fence fence;
};
static void
brw_fence_finish(struct brw_fence *fence)
{
if (fence->batch_bo)
drm_intel_bo_unreference(fence->batch_bo);
}
static void
brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
{
assert(!fence->batch_bo);
assert(!fence->signalled);
intel_batchbuffer_emit_mi_flush(brw);
fence->batch_bo = brw->batch.bo;
drm_intel_bo_reference(fence->batch_bo);
intel_batchbuffer_flush(brw);
}
static bool
brw_fence_has_completed(struct brw_fence *fence)
{
if (fence->signalled)
return true;
if (fence->batch_bo && !drm_intel_bo_busy(fence->batch_bo)) {
drm_intel_bo_unreference(fence->batch_bo);
fence->batch_bo = NULL;
fence->signalled = true;
return true;
}
return false;
}
/**
* Return true if the function successfully signals or has already signalled.
* (This matches the behavior expected from __DRI2fence::client_wait_sync).
*/
static bool
brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
uint64_t timeout)
{
if (fence->signalled)
return true;
assert(fence->batch_bo);
/* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
* immediately for timeouts <= 0. The best we can do is to clamp the
* timeout to INT64_MAX. This limits the maximum timeout from 584 years to
* 292 years - likely not a big deal.
*/
if (timeout > INT64_MAX)
timeout = INT64_MAX;
if (drm_intel_gem_bo_wait(fence->batch_bo, timeout) != 0)
return false;
fence->signalled = true;
drm_intel_bo_unreference(fence->batch_bo);
fence->batch_bo = NULL;
return true;
}
static void
brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence)
{
/* We have nothing to do for WaitSync. Our GL command stream is sequential,
* so given that the sync object has already flushed the batchbuffer, any
* batchbuffers coming after this waitsync will naturally not occur until
* the previous one is done.
*/
}
static struct gl_sync_object *
intel_gl_new_sync_object(struct gl_context *ctx, GLuint id)
{
@ -68,9 +149,7 @@ intel_gl_delete_sync_object(struct gl_context *ctx, struct gl_sync_object *s)
{
struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s;
if (sync->bo)
drm_intel_bo_unreference(sync->bo);
brw_fence_finish(&sync->fence);
free(sync);
}
@ -81,56 +160,37 @@ intel_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *s,
struct brw_context *brw = brw_context(ctx);
struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s;
assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
intel_batchbuffer_emit_mi_flush(brw);
sync->bo = brw->batch.bo;
drm_intel_bo_reference(sync->bo);
intel_batchbuffer_flush(brw);
brw_fence_insert(brw, &sync->fence);
}
static void
intel_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *s,
GLbitfield flags, GLuint64 timeout)
{
struct brw_context *brw = brw_context(ctx);
struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s;
/* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
* immediately for timeouts <= 0. The best we can do is to clamp the
* timeout to INT64_MAX. This limits the maximum timeout from 584 years to
* 292 years - likely not a big deal.
*/
if (timeout > INT64_MAX)
timeout = INT64_MAX;
if (sync->bo && drm_intel_gem_bo_wait(sync->bo, timeout) == 0) {
if (brw_fence_client_wait(brw, &sync->fence, timeout))
s->StatusFlag = 1;
drm_intel_bo_unreference(sync->bo);
sync->bo = NULL;
}
}
/* We have nothing to do for WaitSync. Our GL command stream is sequential,
* so given that the sync object has already flushed the batchbuffer,
* any batchbuffers coming after this waitsync will naturally not occur until
* the previous one is done.
*/
static void
intel_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *s,
GLbitfield flags, GLuint64 timeout)
{
struct brw_context *brw = brw_context(ctx);
struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s;
brw_fence_server_wait(brw, &sync->fence);
}
static void intel_check_sync(struct gl_context *ctx, struct gl_sync_object *s)
static void
intel_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *s)
{
struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s;
if (sync->bo && !drm_intel_bo_busy(sync->bo)) {
drm_intel_bo_unreference(sync->bo);
sync->bo = NULL;
if (brw_fence_has_completed(&sync->fence))
s->StatusFlag = 1;
}
}
void
@ -143,3 +203,56 @@ intel_init_syncobj_functions(struct dd_function_table *functions)
functions->ClientWaitSync = intel_gl_client_wait_sync;
functions->ServerWaitSync = intel_gl_server_wait_sync;
}
static void *
intel_dri_create_fence(__DRIcontext *ctx)
{
struct brw_context *brw = ctx->driverPrivate;
struct brw_fence *fence;
fence = calloc(1, sizeof(*fence));
if (!fence)
return NULL;
brw_fence_insert(brw, fence);
return fence;
}
static void
intel_dri_destroy_fence(__DRIscreen *screen, void *driver_fence)
{
struct brw_fence *fence = driver_fence;
brw_fence_finish(fence);
free(fence);
}
static GLboolean
intel_dri_client_wait_sync(__DRIcontext *ctx, void *driver_fence, unsigned flags,
uint64_t timeout)
{
struct brw_context *brw = ctx->driverPrivate;
struct brw_fence *fence = driver_fence;
return brw_fence_client_wait(brw, fence, timeout);
}
static void
intel_dri_server_wait_sync(__DRIcontext *ctx, void *driver_fence, unsigned flags)
{
struct brw_context *brw = ctx->driverPrivate;
struct brw_fence *fence = driver_fence;
brw_fence_server_wait(brw, fence);
}
const __DRI2fenceExtension intelFenceExtension = {
.base = { __DRI2_FENCE, 1 },
.create_fence = intel_dri_create_fence,
.destroy_fence = intel_dri_destroy_fence,
.client_wait_sync = intel_dri_client_wait_sync,
.server_wait_sync = intel_dri_server_wait_sync,
.get_fence_from_cl_event = NULL,
};