tu: Rewrite border color handling

Emit a single table of all possible Vulkan border colors up front, and
then index into it using the Vulkan enum directly. In fact this seems to
be the entire point of separating out border colors in the first place.

In addition to being simpler and having less CPU overhead, and fixing
cases where more than one sampler uses border color, this paves the way
for bindless samplers because the existing approach isn't great for
bindless.

Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4200>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4200>
This commit is contained in:
Connor Abbott 2020-03-12 12:39:16 +01:00 committed by Marge Bot
parent f6dad10d04
commit 3349fe9a26
5 changed files with 115 additions and 159 deletions

View File

@ -1163,6 +1163,9 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_regs(cs,
A6XX_RB_LRZ_CNTL(0));
tu_cs_emit_regs(cs,
A6XX_SP_TP_BORDER_COLOR_BASE_ADDR(.bo = &cmd->device->border_color));
tu_cs_sanity_check(cs);
}
@ -2236,6 +2239,9 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
}
tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->device->border_color,
MSM_SUBMIT_BO_READ);
for (uint32_t i = 0; i < cmd_buffer->draw_cs.bo_count; i++) {
tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_cs.bos[i],
MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
@ -2745,7 +2751,7 @@ struct tu_draw_state_group
struct tu_cs_entry ib;
};
const static struct tu_sampler*
const static void *
sampler_ptr(struct tu_descriptor_state *descriptors_state,
const struct tu_descriptor_map *map, unsigned i,
unsigned array_index)
@ -2759,20 +2765,18 @@ sampler_ptr(struct tu_descriptor_state *descriptors_state,
&set->layout->binding[map->binding[i]];
if (layout->immutable_samplers_offset) {
const struct tu_sampler *immutable_samplers =
const uint32_t *immutable_samplers =
tu_immutable_samplers(set->layout, layout);
return &immutable_samplers[array_index];
return &immutable_samplers[array_index * A6XX_TEX_SAMP_DWORDS];
}
switch (layout->type) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4];
return &set->mapped_ptr[layout->offset / 4];
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS +
array_index *
(A6XX_TEX_CONST_DWORDS +
sizeof(struct tu_sampler) / 4)];
return &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS +
array_index * (A6XX_TEX_CONST_DWORDS + A6XX_TEX_SAMP_DWORDS)];
default:
unreachable("unimplemented descriptor type");
break;
@ -2807,7 +2811,7 @@ write_tex_const(struct tu_cmd_buffer *cmd,
memcpy(dst, &set->mapped_ptr[layout->offset / 4 +
array_index *
(A6XX_TEX_CONST_DWORDS +
sizeof(struct tu_sampler) / 4)],
A6XX_TEX_SAMP_DWORDS)],
A6XX_TEX_CONST_DWORDS * 4);
break;
default:
@ -3094,7 +3098,6 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd,
struct tu_descriptor_state *descriptors_state,
gl_shader_stage type,
struct tu_cs_entry *entry,
bool *needs_border,
bool is_sysmem)
{
struct tu_cs *draw_state = &cmd->sub_cs;
@ -3135,12 +3138,11 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd,
int sampler_index = 0;
for (unsigned i = 0; i < link->sampler_map.num; i++) {
for (int j = 0; j < link->sampler_map.array_size[i]; j++) {
const struct tu_sampler *sampler = sampler_ptr(descriptors_state,
&link->sampler_map,
i, j);
const uint32_t *sampler = sampler_ptr(descriptors_state,
&link->sampler_map,
i, j);
memcpy(&tex_samp.map[A6XX_TEX_SAMP_DWORDS * sampler_index++],
sampler->state, sizeof(sampler->state));
*needs_border |= sampler->needs_border;
sampler, A6XX_TEX_SAMP_DWORDS * 4);
}
}
}
@ -3314,105 +3316,6 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd,
return VK_SUCCESS;
}
struct PACKED bcolor_entry {
uint32_t fp32[4];
uint16_t ui16[4];
int16_t si16[4];
uint16_t fp16[4];
uint16_t rgb565;
uint16_t rgb5a1;
uint16_t rgba4;
uint8_t __pad0[2];
uint8_t ui8[4];
int8_t si8[4];
uint32_t rgb10a2;
uint32_t z24; /* also s8? */
uint16_t srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */
uint8_t __pad1[56];
} border_color[] = {
[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = {},
[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = {},
[VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = {
.fp32[3] = 0x3f800000,
.ui16[3] = 0xffff,
.si16[3] = 0x7fff,
.fp16[3] = 0x3c00,
.rgb5a1 = 0x8000,
.rgba4 = 0xf000,
.ui8[3] = 0xff,
.si8[3] = 0x7f,
.rgb10a2 = 0xc0000000,
.srgb[3] = 0x3c00,
},
[VK_BORDER_COLOR_INT_OPAQUE_BLACK] = {
.fp32[3] = 1,
.fp16[3] = 1,
},
[VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = {
.fp32[0 ... 3] = 0x3f800000,
.ui16[0 ... 3] = 0xffff,
.si16[0 ... 3] = 0x7fff,
.fp16[0 ... 3] = 0x3c00,
.rgb565 = 0xffff,
.rgb5a1 = 0xffff,
.rgba4 = 0xffff,
.ui8[0 ... 3] = 0xff,
.si8[0 ... 3] = 0x7f,
.rgb10a2 = 0xffffffff,
.z24 = 0xffffff,
.srgb[0 ... 3] = 0x3c00,
},
[VK_BORDER_COLOR_INT_OPAQUE_WHITE] = {
.fp32[0 ... 3] = 1,
.fp16[0 ... 3] = 1,
},
};
static VkResult
tu6_emit_border_color(struct tu_cmd_buffer *cmd,
struct tu_cs *cs)
{
STATIC_ASSERT(sizeof(struct bcolor_entry) == 128);
const struct tu_pipeline *pipeline = cmd->state.pipeline;
struct tu_descriptor_state *descriptors_state =
&cmd->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS];
const struct tu_descriptor_map *vs_sampler =
&pipeline->program.link[MESA_SHADER_VERTEX].sampler_map;
const struct tu_descriptor_map *fs_sampler =
&pipeline->program.link[MESA_SHADER_FRAGMENT].sampler_map;
struct ts_cs_memory ptr;
VkResult result = tu_cs_alloc(&cmd->sub_cs,
vs_sampler->num_desc + fs_sampler->num_desc,
128 / 4,
&ptr);
if (result != VK_SUCCESS)
return result;
for (unsigned i = 0; i < vs_sampler->num; i++) {
for (unsigned j = 0; j < vs_sampler->array_size[i]; j++) {
const struct tu_sampler *sampler = sampler_ptr(descriptors_state,
vs_sampler, i, j);
memcpy(ptr.map, &border_color[sampler->border], 128);
ptr.map += 128 / 4;
}
}
for (unsigned i = 0; i < fs_sampler->num; i++) {
for (unsigned j = 0; j < fs_sampler->array_size[i]; j++) {
const struct tu_sampler *sampler = sampler_ptr(descriptors_state,
fs_sampler, i, j);
memcpy(ptr.map, &border_color[sampler->border], 128);
ptr.map += 128 / 4;
}
}
tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO, 2);
tu_cs_emit_qw(cs, ptr.iova);
return VK_SUCCESS;
}
static void
tu6_emit_streamout(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
@ -3612,12 +3515,10 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
if (cmd->state.dirty &
(TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DESCRIPTOR_SETS)) {
bool needs_border = false;
struct tu_cs_entry vs_tex, fs_tex_sysmem, fs_tex_gmem, fs_ibo;
result = tu6_emit_textures(cmd, pipeline, descriptors_state,
MESA_SHADER_VERTEX, &vs_tex, &needs_border,
false);
MESA_SHADER_VERTEX, &vs_tex, false);
if (result != VK_SUCCESS)
return result;
@ -3627,14 +3528,12 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
* attachments.
*/
result = tu6_emit_textures(cmd, pipeline, descriptors_state,
MESA_SHADER_FRAGMENT, &fs_tex_sysmem,
&needs_border, true);
MESA_SHADER_FRAGMENT, &fs_tex_sysmem, true);
if (result != VK_SUCCESS)
return result;
result = tu6_emit_textures(cmd, pipeline, descriptors_state,
MESA_SHADER_FRAGMENT, &fs_tex_gmem,
&needs_border, false);
MESA_SHADER_FRAGMENT, &fs_tex_gmem, false);
if (result != VK_SUCCESS)
return result;
@ -3667,12 +3566,6 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
.enable_mask = ENABLE_DRAW,
.ib = fs_ibo,
};
if (needs_border) {
result = tu6_emit_border_color(cmd, cs);
if (result != VK_SUCCESS)
return result;
}
}
struct tu_cs_entry vs_params;
@ -4055,9 +3948,8 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
tu_emit_compute_driver_params(cs, pipeline, info);
bool needs_border;
result = tu6_emit_textures(cmd, pipeline, descriptors_state,
MESA_SHADER_COMPUTE, &ib, &needs_border, false);
MESA_SHADER_COMPUTE, &ib, false);
if (result != VK_SUCCESS) {
cmd->record_result = result;
return;
@ -4066,9 +3958,6 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
if (ib.size)
tu_cs_emit_ib(cs, &ib);
if (needs_border)
tu_finishme("compute border color");
result = tu6_emit_ibo(cmd, pipeline, descriptors_state, MESA_SHADER_COMPUTE, &ib);
if (result != VK_SUCCESS) {
cmd->record_result = result;

View File

@ -96,10 +96,10 @@ descriptor_size(enum VkDescriptorType type)
/* We may need the IBO or the TEX representation, or both. */
return A6XX_TEX_CONST_DWORDS * 4 * 2;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
/* texture const + tu_sampler struct (includes border color) */
return A6XX_TEX_CONST_DWORDS * 4 + sizeof(struct tu_sampler);
/* texture const + texture sampler */
return (A6XX_TEX_CONST_DWORDS + A6XX_TEX_SAMP_DWORDS) * 4;
case VK_DESCRIPTOR_TYPE_SAMPLER:
return sizeof(struct tu_sampler);
return A6XX_TEX_SAMP_DWORDS * 4;
default:
unreachable("unknown descriptor type\n");
return 0;
@ -136,7 +136,7 @@ tu_CreateDescriptorSetLayout(
uint32_t samplers_offset = sizeof(struct tu_descriptor_set_layout) +
(max_binding + 1) * sizeof(set_layout->binding[0]);
uint32_t size = samplers_offset + immutable_sampler_count * sizeof(struct tu_sampler);
uint32_t size = samplers_offset + immutable_sampler_count * A6XX_TEX_SAMP_DWORDS * 4;
set_layout = vk_alloc2(&device->alloc, pAllocator, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);

View File

@ -92,11 +92,10 @@ struct tu_pipeline_layout
unsigned char sha1[20];
};
static inline const struct tu_sampler*
static inline const uint32_t *
tu_immutable_samplers(const struct tu_descriptor_set_layout *set,
const struct tu_descriptor_set_binding_layout *binding)
{
return (struct tu_sampler *) ((const char *) set +
binding->immutable_samplers_offset);
return (void *) ((const char *) set + binding->immutable_samplers_offset);
}
#endif /* TU_DESCRIPTOR_SET_H */

View File

@ -1059,6 +1059,61 @@ tu_get_device_extension_index(const char *name)
return -1;
}
struct PACKED bcolor_entry {
uint32_t fp32[4];
uint16_t ui16[4];
int16_t si16[4];
uint16_t fp16[4];
uint16_t rgb565;
uint16_t rgb5a1;
uint16_t rgba4;
uint8_t __pad0[2];
uint8_t ui8[4];
int8_t si8[4];
uint32_t rgb10a2;
uint32_t z24; /* also s8? */
uint16_t srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */
uint8_t __pad1[56];
} border_color[] = {
[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = {},
[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = {},
[VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = {
.fp32[3] = 0x3f800000,
.ui16[3] = 0xffff,
.si16[3] = 0x7fff,
.fp16[3] = 0x3c00,
.rgb5a1 = 0x8000,
.rgba4 = 0xf000,
.ui8[3] = 0xff,
.si8[3] = 0x7f,
.rgb10a2 = 0xc0000000,
.srgb[3] = 0x3c00,
},
[VK_BORDER_COLOR_INT_OPAQUE_BLACK] = {
.fp32[3] = 1,
.fp16[3] = 1,
},
[VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = {
.fp32[0 ... 3] = 0x3f800000,
.ui16[0 ... 3] = 0xffff,
.si16[0 ... 3] = 0x7fff,
.fp16[0 ... 3] = 0x3c00,
.rgb565 = 0xffff,
.rgb5a1 = 0xffff,
.rgba4 = 0xffff,
.ui8[0 ... 3] = 0xff,
.si8[0 ... 3] = 0x7f,
.rgb10a2 = 0xffffffff,
.z24 = 0xffffff,
.srgb[0 ... 3] = 0x3c00,
},
[VK_BORDER_COLOR_INT_OPAQUE_WHITE] = {
.fp32[0 ... 3] = 1,
.fp16[0 ... 3] = 1,
},
};
VkResult
tu_CreateDevice(VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo *pCreateInfo,
@ -1154,6 +1209,17 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
if (result != VK_SUCCESS)
goto fail_vsc_data2;
STATIC_ASSERT(sizeof(struct bcolor_entry) == 128);
result = tu_bo_init_new(device, &device->border_color, sizeof(border_color));
if (result != VK_SUCCESS)
goto fail_border_color;
result = tu_bo_map(device, &device->border_color);
if (result != VK_SUCCESS)
goto fail_border_color_map;
memcpy(device->border_color.map, border_color, sizeof(border_color));
VkPipelineCacheCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
ci.pNext = NULL;
@ -1172,6 +1238,10 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
return VK_SUCCESS;
fail_pipeline_cache:
fail_border_color_map:
tu_bo_finish(device, &device->border_color);
fail_border_color:
tu_bo_finish(device, &device->vsc_data2);
fail_vsc_data2:
@ -1961,7 +2031,7 @@ tu_DestroyFramebuffer(VkDevice _device,
}
static enum a6xx_tex_clamp
tu6_tex_wrap(VkSamplerAddressMode address_mode, bool *needs_border)
tu6_tex_wrap(VkSamplerAddressMode address_mode)
{
switch (address_mode) {
case VK_SAMPLER_ADDRESS_MODE_REPEAT:
@ -1971,7 +2041,6 @@ tu6_tex_wrap(VkSamplerAddressMode address_mode, bool *needs_border)
case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
return A6XX_TEX_CLAMP_TO_EDGE;
case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
*needs_border = true;
return A6XX_TEX_CLAMP_TO_BORDER;
case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
/* only works for PoT.. need to emulate otherwise! */
@ -2011,34 +2080,35 @@ tu_init_sampler(struct tu_device *device,
unsigned aniso = pCreateInfo->anisotropyEnable ?
util_last_bit(MIN2((uint32_t)pCreateInfo->maxAnisotropy >> 1, 8)) : 0;
bool miplinear = (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR);
bool needs_border = false;
sampler->state[0] =
sampler->descriptor[0] =
COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(pCreateInfo->magFilter, aniso)) |
A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(pCreateInfo->minFilter, aniso)) |
A6XX_TEX_SAMP_0_ANISO(aniso) |
A6XX_TEX_SAMP_0_WRAP_S(tu6_tex_wrap(pCreateInfo->addressModeU, &needs_border)) |
A6XX_TEX_SAMP_0_WRAP_T(tu6_tex_wrap(pCreateInfo->addressModeV, &needs_border)) |
A6XX_TEX_SAMP_0_WRAP_R(tu6_tex_wrap(pCreateInfo->addressModeW, &needs_border)) |
A6XX_TEX_SAMP_0_WRAP_S(tu6_tex_wrap(pCreateInfo->addressModeU)) |
A6XX_TEX_SAMP_0_WRAP_T(tu6_tex_wrap(pCreateInfo->addressModeV)) |
A6XX_TEX_SAMP_0_WRAP_R(tu6_tex_wrap(pCreateInfo->addressModeW)) |
A6XX_TEX_SAMP_0_LOD_BIAS(pCreateInfo->mipLodBias);
sampler->state[1] =
sampler->descriptor[1] =
/* COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | */
COND(pCreateInfo->unnormalizedCoordinates, A6XX_TEX_SAMP_1_UNNORM_COORDS) |
A6XX_TEX_SAMP_1_MIN_LOD(pCreateInfo->minLod) |
A6XX_TEX_SAMP_1_MAX_LOD(pCreateInfo->maxLod) |
COND(pCreateInfo->compareEnable,
A6XX_TEX_SAMP_1_COMPARE_FUNC(tu6_compare_func(pCreateInfo->compareOp)));
sampler->state[2] = 0;
sampler->state[3] = 0;
/* This is an offset into the border_color BO, which we fill with all the
* possible Vulkan border colors in the correct order, so we can just use
* the Vulkan enum with no translation necessary.
*/
sampler->descriptor[2] =
A6XX_TEX_SAMP_2_BCOLOR_OFFSET((unsigned) pCreateInfo->borderColor *
sizeof(struct bcolor_entry));
sampler->descriptor[3] = 0;
/* TODO:
* A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR disables mipmapping, but vk has no NONE mipfilter?
* border color
*/
sampler->needs_border = needs_border;
sampler->border = pCreateInfo->borderColor;
}
VkResult

View File

@ -496,6 +496,8 @@ struct tu_device
uint32_t vsc_data_pitch;
uint32_t vsc_data2_pitch;
struct tu_bo border_color;
struct list_head shader_slabs;
mtx_t shader_slab_mutex;
@ -1487,12 +1489,8 @@ struct tu_image_view
uint32_t storage_descriptor[A6XX_TEX_CONST_DWORDS];
};
struct tu_sampler
{
uint32_t state[A6XX_TEX_SAMP_DWORDS];
bool needs_border;
VkBorderColor border;
struct tu_sampler {
uint32_t descriptor[A6XX_TEX_SAMP_DWORDS];
};
VkResult