nv50: fake enough resume support pre-nva0 to pass gles3 requirements

GLES3 supports pause/resume of xfb. However since there's no geometry shader support in ES3, it's a lot easier to figure out the offsets to use. This makes it work for the dEQP tests at least. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Reviewed-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8346>
2021-01-06 00:48:23 -05:00 · 2021-01-06 00:48:23 -05:00 · c0171c4626
parent e0a2af3325
commit c0171c4626
4 changed files with 33 additions and 6 deletions
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@ -161,6 +161,11 @@ struct nv50_context {
   uint8_t num_so_targets;
   uint8_t so_targets_dirty;
   struct pipe_stream_output_target *so_target[4];
+   /* keeps track of how much of an SO is used. normally this doesn't work in
+    * the presence of GS, but this only needs to work for ES 3.0 which doesn't
+    * have GS or any other oddities. only used pre-NVA0.
+    */
+   uint32_t so_used[4];

   struct pipe_framebuffer_state framebuffer;
   struct pipe_blend_color blend_colour;
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@ -696,11 +696,17 @@ nv50_stream_output_validate(struct nv50_context *nv50)

      const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;

-      if (n == 4 && !targ->clean)
-         nv84_hw_query_fifo_wait(push, nv50_query(targ->pq));
+      uint32_t so_used = 0;
+
+      if (!targ->clean) {
+         if (n == 4)
+            nv84_hw_query_fifo_wait(push, nv50_query(targ->pq));
+         else
+            so_used = nv50->so_used[i];
+      }
      BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
-      PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
-      PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
+      PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset + so_used);
+      PUSH_DATA (push, buf->address + targ->pipe.buffer_offset + so_used);
      PUSH_DATA (push, so->num_attribs[i]);
      if (n == 4) {
         PUSH_DATA(push, targ->pipe.buffer_size);
@ -714,9 +720,10 @@ nv50_stream_output_validate(struct nv50_context *nv50)
            targ->clean = false;
         }
      } else {
-         const unsigned limit = targ->pipe.buffer_size /
+         const unsigned limit = (targ->pipe.buffer_size - so_used) /
            (so->stride[i] * nv50->state.prim_size);
         prims = MIN2(prims, limit);
+         targ->clean = false;
      }
      targ->stride = so->stride[i];
      BCTX_REFN(nv50->bufctx_3d, 3D_SO, buf, WR);
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@ -1207,8 +1207,10 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
         serialize = false;
      }

-      if (targets[i] && !append)
+      if (targets[i] && !append) {
         nv50_so_target(targets[i])->clean = true;
+         nv50->so_used[i] = 0;
+      }

      pipe_so_target_reference(&nv50->so_target[i], targets[i]);
   }
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@ -23,6 +23,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
 #include "util/u_inlines.h"
+#include "util/u_prim.h"
 #include "util/format/u_format.h"
 #include "translate/translate.h"

@ -845,6 +846,18 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
      PUSH_DATA (push, 0x00010000 * !!nv50->state.mul_zero_wins);
   }

+   /* Make starting/pausing streamout work pre-NVA0 enough for ES3.0. This
+    * means counting vertices in a vertex shader when it has so outputs.
+    */
+   if (nv50->screen->base.class_3d < NVA0_3D_CLASS &&
+       nv50->vertprog->pipe.stream_output.num_outputs) {
+      for (int i = 0; i < nv50->num_so_targets; i++) {
+         nv50->so_used[i] += info->instance_count *
+            u_stream_outputs_for_vertices(info->mode, draws[0].count) *
+            nv50->vertprog->pipe.stream_output.stride[i] * 4;
+      }
+   }
+
   if (nv50->vbo_fifo) {
      nv50_push_vbo(nv50, info, indirect, &draws[0]);
      goto cleanup;