From 26d1886a7cbb018e78854e838649099681e8f927 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Tue, 13 Jul 2021 16:38:12 -0400 Subject: [PATCH] nine: optimize texture binds a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this can just iterate over the mask of active textures instead of always iterating over and rebinding all textures Acked-by: Marek Olšák Reviewed-by: Part-of: --- src/gallium/frontends/nine/nine_state.c | 57 +++++++++++++++---------- src/gallium/frontends/nine/nine_state.h | 2 + 2 files changed, 36 insertions(+), 23 deletions(-) diff --git a/src/gallium/frontends/nine/nine_state.c b/src/gallium/frontends/nine/nine_state.c index 86ed76602f0..8ef71b15afb 100644 --- a/src/gallium/frontends/nine/nine_state.c +++ b/src/gallium/frontends/nine/nine_state.c @@ -995,30 +995,24 @@ update_textures_and_samplers(struct NineDevice9 *device) struct nine_context *context = &device->context; struct pipe_context *pipe = context->pipe; struct pipe_sampler_view *view[NINE_MAX_SAMPLERS]; - unsigned num_textures; - unsigned i; + unsigned num_textures = 0; boolean commit_samplers; uint16_t sampler_mask = context->ps ? context->ps->sampler_mask : device->ff.ps->sampler_mask; - /* TODO: Can we reduce iterations here ? */ - commit_samplers = FALSE; + uint16_t prev_mask = context->bound_samplers_mask_ps; context->bound_samplers_mask_ps = 0; - for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_PS; ++i) { + const uint16_t ps_mask = sampler_mask | context->enabled_samplers_mask_ps; + /* iterate over extant+enabled mask */ + u_foreach_bit(i, ps_mask) { const unsigned s = NINE_SAMPLER_PS(i); int sRGB; - if (!context->texture[s].enabled && !(sampler_mask & (1 << i))) { - view[i] = NULL; - continue; - } - - if (context->texture[s].enabled) { + if (context->enabled_samplers_mask_ps & BITFIELD_BIT(i)) { sRGB = context->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0; view[i] = context->texture[s].view[sRGB]; - num_textures = i + 1; if (update_sampler_derived(context, s) || (context->changed.sampler[s] & 0x05fe)) { context->changed.sampler[s] = 0; @@ -1033,7 +1027,6 @@ update_textures_and_samplers(struct NineDevice9 *device) * anymore, but they're going to be removed as long as texture * or sampler states are changed. */ view[i] = device->dummy_sampler_view; - num_textures = i + 1; cso_single_sampler(context->cso, PIPE_SHADER_FRAGMENT, s - NINE_SAMPLER_PS(0), &device->dummy_sampler_state); @@ -1041,32 +1034,34 @@ update_textures_and_samplers(struct NineDevice9 *device) commit_samplers = TRUE; context->changed.sampler[s] = ~0; } + num_textures = i + 1; context->bound_samplers_mask_ps |= (1 << s); } + /* fill in unused samplers */ + u_foreach_bit(i, BITFIELD_MASK(num_textures) & ~ps_mask) + view[i] = NULL; - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, num_textures, 0, view); + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, num_textures, + /* unbind trailing based on bitcount of shifted mask */ + util_bitcount(prev_mask >> num_textures), view); if (commit_samplers) cso_single_sampler_done(context->cso, PIPE_SHADER_FRAGMENT); commit_samplers = FALSE; sampler_mask = context->programmable_vs ? context->vs->sampler_mask : 0; + prev_mask = context->bound_samplers_mask_vs; context->bound_samplers_mask_vs = 0; - for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_VS; ++i) { + const uint16_t vs_mask = sampler_mask | context->enabled_samplers_mask_vs; + u_foreach_bit(i, vs_mask) { const unsigned s = NINE_SAMPLER_VS(i); int sRGB; - if (!context->texture[s].enabled && !(sampler_mask & (1 << i))) { - view[i] = NULL; - continue; - } - if (context->texture[s].enabled) { sRGB = context->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0; view[i] = context->texture[s].view[sRGB]; - num_textures = i + 1; if (update_sampler_derived(context, s) || (context->changed.sampler[s] & 0x05fe)) { context->changed.sampler[s] = 0; @@ -1081,7 +1076,6 @@ update_textures_and_samplers(struct NineDevice9 *device) * anymore, but they're going to be removed as long as texture * or sampler states are changed. */ view[i] = device->dummy_sampler_view; - num_textures = i + 1; cso_single_sampler(context->cso, PIPE_SHADER_VERTEX, s - NINE_SAMPLER_VS(0), &device->dummy_sampler_state); @@ -1089,11 +1083,17 @@ update_textures_and_samplers(struct NineDevice9 *device) commit_samplers = TRUE; context->changed.sampler[s] = ~0; } + num_textures = i + 1; context->bound_samplers_mask_vs |= (1 << i); } + /* fill in unused samplers */ + u_foreach_bit(i, BITFIELD_MASK(num_textures) & ~vs_mask) + view[i] = NULL; - pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, num_textures, 0, view); + pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, num_textures, + /* unbind trailing based on bitcount of shifted mask */ + util_bitcount(prev_mask >> num_textures), view); if (commit_samplers) cso_single_sampler_done(context->cso, PIPE_SHADER_VERTEX); @@ -1475,6 +1475,17 @@ CSMT_ITEM_NO_WAIT(nine_context_set_texture_apply, uint fetch4_compatible = (fetch4_shadow_enabled >> 2) & 1; context->texture[stage].enabled = enabled; + if (enabled) { + if (stage < NINE_MAX_SAMPLERS_PS) + context->enabled_samplers_mask_ps |= BITFIELD_BIT(stage - NINE_SAMPLER_PS(0)); + else if (stage >= NINE_SAMPLER_VS(0)) + context->enabled_samplers_mask_vs |= BITFIELD_BIT(stage - NINE_SAMPLER_VS(0)); + } else { + if (stage < NINE_MAX_SAMPLERS_PS) + context->enabled_samplers_mask_ps &= ~BITFIELD_BIT(stage - NINE_SAMPLER_PS(0)); + else if (stage >= NINE_SAMPLER_VS(0)) + context->enabled_samplers_mask_vs &= ~BITFIELD_BIT(stage - NINE_SAMPLER_VS(0)); + } context->samplers_shadow &= ~(1 << stage); context->samplers_shadow |= shadow << stage; context->samplers_fetch4 &= ~(1 << stage); diff --git a/src/gallium/frontends/nine/nine_state.h b/src/gallium/frontends/nine/nine_state.h index dcb66ca87f8..0da60dbaf8a 100644 --- a/src/gallium/frontends/nine/nine_state.h +++ b/src/gallium/frontends/nine/nine_state.h @@ -302,7 +302,9 @@ struct nine_context { uint32_t samplers_fetch4; uint8_t bound_samplers_mask_vs; + uint8_t enabled_samplers_mask_vs; uint16_t bound_samplers_mask_ps; + uint16_t enabled_samplers_mask_ps; int dummy_vbo_bound_at; /* -1 = not bound , >= 0 = bound index */ boolean vbo_bound_done;