anv: Emulate texture swizzle in the shader when needed
Now that we have the descriptor buffer mechanism, emulated texture swizzle can be implemented in a very non-invasive way. Previous attempts all tried to extend the push constant based image param mechanism which was gross. This could, in theory, be done much faster with a magic back-end instruction which does indirect MOVs but Vulkan on IVB is already so slow this isn't going to matter much. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104355 Cc: "19.1" <mesa-stable@lists.freedesktop.org> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
This commit is contained in:
parent
ea479fdc1d
commit
d2aa65eb18
|
@ -103,6 +103,12 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
|
||||||
type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC))
|
type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC))
|
||||||
data |= ANV_DESCRIPTOR_ADDRESS_RANGE;
|
data |= ANV_DESCRIPTOR_ADDRESS_RANGE;
|
||||||
|
|
||||||
|
/* On Ivy Bridge and Bay Trail, we need swizzles textures in the shader */
|
||||||
|
if (device->info.gen == 7 && !device->info.is_haswell &&
|
||||||
|
(type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
|
||||||
|
type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER))
|
||||||
|
data |= ANV_DESCRIPTOR_TEXTURE_SWIZZLE;
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,6 +129,9 @@ anv_descriptor_data_size(enum anv_descriptor_data data)
|
||||||
if (data & ANV_DESCRIPTOR_ADDRESS_RANGE)
|
if (data & ANV_DESCRIPTOR_ADDRESS_RANGE)
|
||||||
size += sizeof(struct anv_address_range_descriptor);
|
size += sizeof(struct anv_address_range_descriptor);
|
||||||
|
|
||||||
|
if (data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE)
|
||||||
|
size += sizeof(struct anv_texture_swizzle_descriptor);
|
||||||
|
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1184,6 +1193,26 @@ anv_descriptor_set_write_image_view(struct anv_device *device,
|
||||||
|
|
||||||
anv_descriptor_set_write_image_param(desc_map, image_param);
|
anv_descriptor_set_write_image_param(desc_map, image_param);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) {
|
||||||
|
assert(!(bind_layout->data & ANV_DESCRIPTOR_SAMPLED_IMAGE));
|
||||||
|
assert(image_view);
|
||||||
|
struct anv_texture_swizzle_descriptor desc_data[3];
|
||||||
|
memset(desc_data, 0, sizeof(desc_data));
|
||||||
|
|
||||||
|
for (unsigned p = 0; p < image_view->n_planes; p++) {
|
||||||
|
desc_data[p] = (struct anv_texture_swizzle_descriptor) {
|
||||||
|
.swizzle = {
|
||||||
|
(uint8_t)image_view->planes[p].isl.swizzle.r,
|
||||||
|
(uint8_t)image_view->planes[p].isl.swizzle.g,
|
||||||
|
(uint8_t)image_view->planes[p].isl.swizzle.b,
|
||||||
|
(uint8_t)image_view->planes[p].isl.swizzle.a,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
memcpy(desc_map, desc_data,
|
||||||
|
MAX2(1, bind_layout->max_plane_count) * sizeof(desc_data[0]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -1278,6 +1278,10 @@ anv_image_fill_surface_state(struct anv_device *device,
|
||||||
if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT)
|
if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT)
|
||||||
view.swizzle = anv_swizzle_for_render(view.swizzle);
|
view.swizzle = anv_swizzle_for_render(view.swizzle);
|
||||||
|
|
||||||
|
/* On Ivy Bridge and Bay Trail we do the swizzle in the shader */
|
||||||
|
if (device->info.gen == 7 && !device->info.is_haswell)
|
||||||
|
view.swizzle = ISL_SWIZZLE_IDENTITY;
|
||||||
|
|
||||||
/* If this is a HiZ buffer we can sample from with a programmable clear
|
/* If this is a HiZ buffer we can sample from with a programmable clear
|
||||||
* value (SKL+), define the clear value to the optimal constant.
|
* value (SKL+), define the clear value to the optimal constant.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -899,13 +899,98 @@ tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
|
||||||
return plane;
|
return plane;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static nir_ssa_def *
|
||||||
|
build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx,
|
||||||
|
unsigned start, unsigned end)
|
||||||
|
{
|
||||||
|
if (start == end - 1) {
|
||||||
|
return srcs[start];
|
||||||
|
} else {
|
||||||
|
unsigned mid = start + (end - start) / 2;
|
||||||
|
return nir_bcsel(b, nir_ilt(b, idx, nir_imm_int(b, mid)),
|
||||||
|
build_def_array_select(b, srcs, idx, start, mid),
|
||||||
|
build_def_array_select(b, srcs, idx, mid, end));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
lower_gen7_tex_swizzle(nir_tex_instr *tex, unsigned plane,
|
||||||
|
struct apply_pipeline_layout_state *state)
|
||||||
|
{
|
||||||
|
assert(state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell);
|
||||||
|
if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ||
|
||||||
|
nir_tex_instr_is_query(tex) ||
|
||||||
|
tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */
|
||||||
|
(tex->is_shadow && tex->is_new_style_shadow))
|
||||||
|
return;
|
||||||
|
|
||||||
|
int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
|
||||||
|
assert(deref_src_idx >= 0);
|
||||||
|
|
||||||
|
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
|
||||||
|
UNUSED nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||||
|
|
||||||
|
UNUSED unsigned set = var->data.descriptor_set;
|
||||||
|
UNUSED unsigned binding = var->data.binding;
|
||||||
|
UNUSED const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||||
|
&state->layout->set[set].layout->binding[binding];
|
||||||
|
assert(bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE);
|
||||||
|
|
||||||
|
nir_builder *b = &state->builder;
|
||||||
|
b->cursor = nir_before_instr(&tex->instr);
|
||||||
|
|
||||||
|
const unsigned plane_offset =
|
||||||
|
plane * sizeof(struct anv_texture_swizzle_descriptor);
|
||||||
|
nir_ssa_def *swiz =
|
||||||
|
build_descriptor_load(deref, plane_offset, 1, 32, state);
|
||||||
|
|
||||||
|
b->cursor = nir_after_instr(&tex->instr);
|
||||||
|
|
||||||
|
assert(tex->dest.ssa.bit_size == 32);
|
||||||
|
assert(tex->dest.ssa.num_components == 4);
|
||||||
|
|
||||||
|
/* Initializing to undef is ok; nir_opt_undef will clean it up. */
|
||||||
|
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||||
|
nir_ssa_def *comps[8];
|
||||||
|
for (unsigned i = 0; i < ARRAY_SIZE(comps); i++)
|
||||||
|
comps[i] = undef;
|
||||||
|
|
||||||
|
comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0);
|
||||||
|
if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float)
|
||||||
|
comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1);
|
||||||
|
else
|
||||||
|
comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1);
|
||||||
|
comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0);
|
||||||
|
comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1);
|
||||||
|
comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2);
|
||||||
|
comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3);
|
||||||
|
|
||||||
|
nir_ssa_def *swiz_comps[4];
|
||||||
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
|
nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i));
|
||||||
|
swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8);
|
||||||
|
}
|
||||||
|
nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4);
|
||||||
|
|
||||||
|
/* Rewrite uses before we insert so we don't rewrite this use */
|
||||||
|
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa,
|
||||||
|
nir_src_for_ssa(swiz_tex_res),
|
||||||
|
swiz_tex_res->parent_instr);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
|
lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
|
||||||
{
|
{
|
||||||
state->builder.cursor = nir_before_instr(&tex->instr);
|
|
||||||
|
|
||||||
unsigned plane = tex_instr_get_and_remove_plane_src(tex);
|
unsigned plane = tex_instr_get_and_remove_plane_src(tex);
|
||||||
|
|
||||||
|
/* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this
|
||||||
|
* before we lower the derefs away so we can still find the descriptor.
|
||||||
|
*/
|
||||||
|
if (state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell)
|
||||||
|
lower_gen7_tex_swizzle(tex, plane, state);
|
||||||
|
|
||||||
|
state->builder.cursor = nir_before_instr(&tex->instr);
|
||||||
|
|
||||||
lower_tex_deref(tex, nir_tex_src_texture_deref,
|
lower_tex_deref(tex, nir_tex_src_texture_deref,
|
||||||
&tex->texture_index, plane, state);
|
&tex->texture_index, plane, state);
|
||||||
|
|
||||||
|
|
|
@ -1548,6 +1548,17 @@ struct anv_sampled_image_descriptor {
|
||||||
uint32_t sampler;
|
uint32_t sampler;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct anv_texture_swizzle_descriptor {
|
||||||
|
/** Texture swizzle
|
||||||
|
*
|
||||||
|
* See also nir_intrinsic_channel_select_intel
|
||||||
|
*/
|
||||||
|
uint8_t swizzle[4];
|
||||||
|
|
||||||
|
/** Unused padding to ensure the struct is a multiple of 64 bits */
|
||||||
|
uint32_t _pad;
|
||||||
|
};
|
||||||
|
|
||||||
/** Struct representing a storage image descriptor */
|
/** Struct representing a storage image descriptor */
|
||||||
struct anv_storage_image_descriptor {
|
struct anv_storage_image_descriptor {
|
||||||
/** Bindless image handles
|
/** Bindless image handles
|
||||||
|
@ -1589,6 +1600,8 @@ enum anv_descriptor_data {
|
||||||
ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6),
|
ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6),
|
||||||
/** Storage image handles */
|
/** Storage image handles */
|
||||||
ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7),
|
ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7),
|
||||||
|
/** Storage image handles */
|
||||||
|
ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8),
|
||||||
};
|
};
|
||||||
|
|
||||||
struct anv_descriptor_set_binding_layout {
|
struct anv_descriptor_set_binding_layout {
|
||||||
|
|
Loading…
Reference in New Issue