From 3e424bcdfcef19682f9b651f7c1a04e32f18be5c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 11 May 2020 13:28:58 -0700
Subject: [PATCH] freedreno: Split the fd_batch_resource_used by read vs write.

This is for an optimization I plan in a following commit.  I found I had
to add likely()s to avoid a perf regression from branch prediction.

On the drawoverhead 8 UBOs test, the HW can't quite keep up with the CPU,
but if I set nohw then this change is 1.32023% +/- 0.373053% (n=10).

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4996>
---
 .../drivers/freedreno/a6xx/fd6_blitter.c      |   4 +-
 .../drivers/freedreno/freedreno_batch.c       | 118 +++++++++++-------
 .../drivers/freedreno/freedreno_batch.h       |   3 +-
 .../drivers/freedreno/freedreno_draw.c        |   4 +-
 .../drivers/freedreno/freedreno_query_acc.c   |   2 +-
 5 files changed, 77 insertions(+), 54 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c
index 82b0aff3ae9..9b5231d55e3 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c
@@ -643,8 +643,8 @@ handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 
 	fd_screen_lock(ctx->screen);
 
-	fd_batch_resource_used(batch, fd_resource(info->src.resource), false);
-	fd_batch_resource_used(batch, fd_resource(info->dst.resource), true);
+	fd_batch_resource_read(batch, fd_resource(info->src.resource));
+	fd_batch_resource_write(batch, fd_resource(info->dst.resource));
 
 	fd_screen_unlock(ctx->screen);
 
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c
index fcb11546b81..a653e0d91a8 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -392,57 +392,17 @@ flush_write_batch(struct fd_resource *rsc)
 	fd_batch_reference_locked(&b, NULL);
 }
 
-void
-fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc, bool write)
+static bool
+fd_batch_references_resource(struct fd_batch *batch, struct fd_resource *rsc)
 {
-	fd_screen_assert_locked(batch->ctx->screen);
+	return rsc->batch_mask & (1 << batch->idx);
+}
 
-	if (rsc->stencil)
-		fd_batch_resource_used(batch, rsc->stencil, write);
+static void
+fd_batch_add_resource(struct fd_batch *batch, struct fd_resource *rsc)
+{
 
-	DBG("%p: %s %p", batch, write ? "write" : "read", rsc);
-
-	if (write)
-		rsc->valid = true;
-
-	/* note, invalidate write batch, to avoid further writes to rsc
-	 * resulting in a write-after-read hazard.
-	 */
-
-	if (write) {
-		/* if we are pending read or write by any other batch: */
-		if (rsc->batch_mask & ~(1 << batch->idx)) {
-			struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
-			struct fd_batch *dep;
-
-			if (rsc->write_batch && rsc->write_batch != batch)
-				flush_write_batch(rsc);
-
-			foreach_batch(dep, cache, rsc->batch_mask) {
-				struct fd_batch *b = NULL;
-				if (dep == batch)
-					continue;
-				/* note that batch_add_dep could flush and unref dep, so
-				 * we need to hold a reference to keep it live for the
-				 * fd_bc_invalidate_batch()
-				 */
-				fd_batch_reference(&b, dep);
-				fd_batch_add_dep(batch, b);
-				fd_bc_invalidate_batch(b, false);
-				fd_batch_reference_locked(&b, NULL);
-			}
-		}
-		fd_batch_reference_locked(&rsc->write_batch, batch);
-	} else {
-		/* If reading a resource pending a write, go ahead and flush the
-		 * writer.  This avoids situations where we end up having to
-		 * flush the current batch in _resource_used()
-		 */
-		if (rsc->write_batch && rsc->write_batch != batch)
-			flush_write_batch(rsc);
-	}
-
-	if (rsc->batch_mask & (1 << batch->idx)) {
+	if (likely(fd_batch_references_resource(batch, rsc))) {
 		debug_assert(_mesa_set_search(batch->resources, rsc));
 		return;
 	}
@@ -453,6 +413,68 @@ fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc, bool wri
 	rsc->batch_mask |= (1 << batch->idx);
 }
 
+void
+fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)
+{
+	fd_screen_assert_locked(batch->ctx->screen);
+
+	if (rsc->stencil)
+		fd_batch_resource_write(batch, rsc->stencil);
+
+	DBG("%p: write %p", batch, rsc);
+
+	rsc->valid = true;
+
+	/* note, invalidate write batch, to avoid further writes to rsc
+	 * resulting in a write-after-read hazard.
+	 */
+	/* if we are pending read or write by any other batch: */
+	if (unlikely(rsc->batch_mask & ~(1 << batch->idx))) {
+		struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
+		struct fd_batch *dep;
+
+		if (rsc->write_batch && rsc->write_batch != batch)
+			flush_write_batch(rsc);
+
+		foreach_batch(dep, cache, rsc->batch_mask) {
+			struct fd_batch *b = NULL;
+			if (dep == batch)
+				continue;
+			/* note that batch_add_dep could flush and unref dep, so
+			 * we need to hold a reference to keep it live for the
+			 * fd_bc_invalidate_batch()
+			 */
+			fd_batch_reference(&b, dep);
+			fd_batch_add_dep(batch, b);
+			fd_bc_invalidate_batch(b, false);
+			fd_batch_reference_locked(&b, NULL);
+		}
+	}
+	fd_batch_reference_locked(&rsc->write_batch, batch);
+
+	fd_batch_add_resource(batch, rsc);
+}
+
+void
+fd_batch_resource_read(struct fd_batch *batch, struct fd_resource *rsc)
+{
+	fd_screen_assert_locked(batch->ctx->screen);
+
+	if (rsc->stencil)
+		fd_batch_resource_read(batch, rsc->stencil);
+
+	DBG("%p: read %p", batch, rsc);
+
+	/* If reading a resource pending a write, go ahead and flush the
+	 * writer.  This avoids situations where we end up having to
+	 * flush the current batch in _resource_used()
+	 */
+	if (unlikely(rsc->write_batch && rsc->write_batch != batch))
+		flush_write_batch(rsc);
+
+	fd_batch_add_resource(batch, rsc);
+}
+
 void
 fd_batch_check_size(struct fd_batch *batch)
 {
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h
index 479d78d5eca..8ec3700b7ad 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -256,7 +256,8 @@ struct fd_batch * fd_batch_create(struct fd_context *ctx, bool nondraw);
 void fd_batch_reset(struct fd_batch *batch);
 void fd_batch_flush(struct fd_batch *batch);
 void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep);
-void fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc, bool write);
+void fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc);
+void fd_batch_resource_read(struct fd_batch *batch, struct fd_resource *rsc);
 void fd_batch_check_size(struct fd_batch *batch);
 
 /* not called directly: */
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index b36e89dcb01..d3d68a14131 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -47,7 +47,7 @@ resource_read(struct fd_batch *batch, struct pipe_resource *prsc)
 {
 	if (!prsc)
 		return;
-	fd_batch_resource_used(batch, fd_resource(prsc), false);
+	fd_batch_resource_read(batch, fd_resource(prsc));
 }
 
 static void
@@ -55,7 +55,7 @@ resource_written(struct fd_batch *batch, struct pipe_resource *prsc)
 {
 	if (!prsc)
 		return;
-	fd_batch_resource_used(batch, fd_resource(prsc), true);
+	fd_batch_resource_write(batch, fd_resource(prsc));
 }
 
 static void
diff --git a/src/gallium/drivers/freedreno/freedreno_query_acc.c b/src/gallium/drivers/freedreno/freedreno_query_acc.c
index 89312ed14c1..888f7763d85 100644
--- a/src/gallium/drivers/freedreno/freedreno_query_acc.c
+++ b/src/gallium/drivers/freedreno/freedreno_query_acc.c
@@ -88,7 +88,7 @@ fd_acc_query_resume(struct fd_acc_query *aq, struct fd_batch *batch)
 	p->resume(aq, aq->batch);
 
 	fd_screen_lock(batch->ctx->screen);
-	fd_batch_resource_used(batch, fd_resource(aq->prsc), true);
+	fd_batch_resource_write(batch, fd_resource(aq->prsc));
 	fd_screen_unlock(batch->ctx->screen);
 }