radeonsi: merge sampler and image descriptor lists into one
Sampler slots: slot[8], .. slot[39] (ascending) Image slots: slot[7], .. slot[0] (descending) Each image occupies 1/2 of each slot, so there are 16 images in total, therefore the layout is: slot[15], .. slot[0]. (in 1/2 slot increments) Updating image slot 2n+i (i <= 1) also dirties and re-uploads slot 2n+!i. Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
5df24c3fa6
commit
f07c15ef80
|
@ -99,17 +99,14 @@ static void si_init_descriptors(struct si_descriptors *desc,
|
|||
unsigned shader_userdata_index,
|
||||
unsigned element_dw_size,
|
||||
unsigned num_elements,
|
||||
const uint32_t *null_descriptor,
|
||||
unsigned *ce_offset)
|
||||
{
|
||||
int i;
|
||||
|
||||
assert(num_elements <= sizeof(desc->dirty_mask)*8);
|
||||
|
||||
desc->list = CALLOC(num_elements, element_dw_size * 4);
|
||||
desc->element_dw_size = element_dw_size;
|
||||
desc->num_elements = num_elements;
|
||||
desc->dirty_mask = num_elements == 32 ? ~0u : (1u << num_elements) - 1;
|
||||
desc->dirty_mask = u_bit_consecutive64(0, num_elements);
|
||||
desc->shader_userdata_offset = shader_userdata_index * 4;
|
||||
|
||||
if (ce_offset) {
|
||||
|
@ -119,14 +116,6 @@ static void si_init_descriptors(struct si_descriptors *desc,
|
|||
/* make sure that ce_offset stays 32 byte aligned */
|
||||
*ce_offset += align(element_dw_size * num_elements * 4, 32);
|
||||
}
|
||||
|
||||
/* Initialize the array to NULL descriptors if the element size is 8. */
|
||||
if (null_descriptor) {
|
||||
assert(element_dw_size % 8 == 0);
|
||||
for (i = 0; i < num_elements * element_dw_size / 8; i++)
|
||||
memcpy(desc->list + i * 8, null_descriptor,
|
||||
8 * 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_release_descriptors(struct si_descriptors *desc)
|
||||
|
@ -219,8 +208,8 @@ static bool si_upload_descriptors(struct si_context *sctx,
|
|||
|
||||
while(desc->dirty_mask) {
|
||||
int begin, count;
|
||||
u_bit_scan_consecutive_range(&desc->dirty_mask, &begin,
|
||||
&count);
|
||||
u_bit_scan_consecutive_range64(&desc->dirty_mask, &begin,
|
||||
&count);
|
||||
|
||||
begin *= desc->element_dw_size;
|
||||
count *= desc->element_dw_size;
|
||||
|
@ -273,16 +262,16 @@ si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc
|
|||
/* SAMPLER VIEWS */
|
||||
|
||||
static unsigned
|
||||
si_sampler_descriptors_idx(unsigned shader)
|
||||
si_sampler_and_image_descriptors_idx(unsigned shader)
|
||||
{
|
||||
return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
|
||||
SI_SHADER_DESCS_SAMPLERS;
|
||||
SI_SHADER_DESCS_SAMPLERS_AND_IMAGES;
|
||||
}
|
||||
|
||||
static struct si_descriptors *
|
||||
si_sampler_descriptors(struct si_context *sctx, unsigned shader)
|
||||
si_sampler_and_image_descriptors(struct si_context *sctx, unsigned shader)
|
||||
{
|
||||
return &sctx->descriptors[si_sampler_descriptors_idx(shader)];
|
||||
return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)];
|
||||
}
|
||||
|
||||
static void si_release_sampler_views(struct si_sampler_views *views)
|
||||
|
@ -474,8 +463,9 @@ static void si_set_sampler_view(struct si_context *sctx,
|
|||
{
|
||||
struct si_sampler_views *views = &sctx->samplers[shader].views;
|
||||
struct si_sampler_view *rview = (struct si_sampler_view*)view;
|
||||
struct si_descriptors *descs = si_sampler_descriptors(sctx, shader);
|
||||
uint32_t *desc = descs->list + slot * 16;
|
||||
struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
|
||||
unsigned desc_slot = si_get_sampler_slot(slot);
|
||||
uint32_t *desc = descs->list + desc_slot * 16;
|
||||
|
||||
if (views->views[slot] == view && !disallow_early_out)
|
||||
return;
|
||||
|
@ -549,8 +539,8 @@ static void si_set_sampler_view(struct si_context *sctx,
|
|||
views->enabled_mask &= ~(1u << slot);
|
||||
}
|
||||
|
||||
descs->dirty_mask |= 1u << slot;
|
||||
sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader);
|
||||
descs->dirty_mask |= 1ull << desc_slot;
|
||||
sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
|
||||
}
|
||||
|
||||
static bool is_compressed_colortex(struct r600_texture *rtex)
|
||||
|
@ -656,19 +646,6 @@ si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers)
|
|||
|
||||
/* IMAGE VIEWS */
|
||||
|
||||
static unsigned
|
||||
si_image_descriptors_idx(unsigned shader)
|
||||
{
|
||||
return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
|
||||
SI_SHADER_DESCS_IMAGES;
|
||||
}
|
||||
|
||||
static struct si_descriptors*
|
||||
si_image_descriptors(struct si_context *sctx, unsigned shader)
|
||||
{
|
||||
return &sctx->descriptors[si_image_descriptors_idx(shader)];
|
||||
}
|
||||
|
||||
static void
|
||||
si_release_image_views(struct si_images_info *images)
|
||||
{
|
||||
|
@ -704,15 +681,17 @@ si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)
|
|||
struct si_images_info *images = &ctx->images[shader];
|
||||
|
||||
if (images->enabled_mask & (1u << slot)) {
|
||||
struct si_descriptors *descs = si_image_descriptors(ctx, shader);
|
||||
struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
|
||||
unsigned desc_slot = si_get_image_slot(slot);
|
||||
|
||||
pipe_resource_reference(&images->views[slot].resource, NULL);
|
||||
images->compressed_colortex_mask &= ~(1 << slot);
|
||||
|
||||
memcpy(descs->list + slot*8, null_image_descriptor, 8*4);
|
||||
memcpy(descs->list + desc_slot*8, null_image_descriptor, 8*4);
|
||||
images->enabled_mask &= ~(1u << slot);
|
||||
descs->dirty_mask |= 1u << slot;
|
||||
ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader);
|
||||
/* two 8-byte images share one 16-byte slot */
|
||||
descs->dirty_mask |= 1u << (desc_slot / 2);
|
||||
ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -735,9 +714,10 @@ static void si_set_shader_image(struct si_context *ctx,
|
|||
{
|
||||
struct si_screen *screen = ctx->screen;
|
||||
struct si_images_info *images = &ctx->images[shader];
|
||||
struct si_descriptors *descs = si_image_descriptors(ctx, shader);
|
||||
struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
|
||||
struct r600_resource *res;
|
||||
uint32_t *desc = descs->list + slot * 8;
|
||||
unsigned desc_slot = si_get_image_slot(slot);
|
||||
uint32_t *desc = descs->list + desc_slot * 8;
|
||||
|
||||
if (!view || !view->resource) {
|
||||
si_disable_shader_image(ctx, shader, slot);
|
||||
|
@ -831,8 +811,9 @@ static void si_set_shader_image(struct si_context *ctx,
|
|||
}
|
||||
|
||||
images->enabled_mask |= 1u << slot;
|
||||
descs->dirty_mask |= 1u << slot;
|
||||
ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader);
|
||||
/* two 8-byte images share one 16-byte slot */
|
||||
descs->dirty_mask |= 1u << (desc_slot / 2);
|
||||
ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
|
||||
|
||||
/* Since this can flush, it must be done after enabled_mask is updated. */
|
||||
si_sampler_view_add_buffer(ctx, &res->b.b,
|
||||
|
@ -895,7 +876,7 @@ static void si_bind_sampler_states(struct pipe_context *ctx,
|
|||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct si_textures_info *samplers = &sctx->samplers[shader];
|
||||
struct si_descriptors *desc = si_sampler_descriptors(sctx, shader);
|
||||
struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);
|
||||
struct si_sampler_state **sstates = (struct si_sampler_state**)states;
|
||||
int i;
|
||||
|
||||
|
@ -904,6 +885,7 @@ static void si_bind_sampler_states(struct pipe_context *ctx,
|
|||
|
||||
for (i = 0; i < count; i++) {
|
||||
unsigned slot = start + i;
|
||||
unsigned desc_slot = si_get_sampler_slot(slot);
|
||||
|
||||
if (!sstates[i] ||
|
||||
sstates[i] == samplers->views.sampler_states[slot])
|
||||
|
@ -923,9 +905,9 @@ static void si_bind_sampler_states(struct pipe_context *ctx,
|
|||
((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size)
|
||||
continue;
|
||||
|
||||
memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
|
||||
desc->dirty_mask |= 1u << slot;
|
||||
sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader);
|
||||
memcpy(desc->list + desc_slot * 16 + 12, sstates[i]->val, 4*4);
|
||||
desc->dirty_mask |= 1ull << desc_slot;
|
||||
sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -948,7 +930,7 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers,
|
|||
buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
|
||||
|
||||
si_init_descriptors(descs, shader_userdata_index, 4,
|
||||
num_buffers, NULL, ce_offset);
|
||||
num_buffers, ce_offset);
|
||||
}
|
||||
|
||||
static void si_release_buffer_resources(struct si_buffer_resources *buffers,
|
||||
|
@ -1711,19 +1693,21 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
|
|||
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
|
||||
struct si_sampler_views *views = &sctx->samplers[shader].views;
|
||||
struct si_descriptors *descs =
|
||||
si_sampler_descriptors(sctx, shader);
|
||||
si_sampler_and_image_descriptors(sctx, shader);
|
||||
unsigned mask = views->enabled_mask;
|
||||
|
||||
while (mask) {
|
||||
unsigned i = u_bit_scan(&mask);
|
||||
if (views->views[i]->texture == buf) {
|
||||
unsigned desc_slot = si_get_sampler_slot(i);
|
||||
|
||||
si_desc_reset_buffer_offset(ctx,
|
||||
descs->list +
|
||||
i * 16 + 4,
|
||||
desc_slot * 16 + 4,
|
||||
old_va, buf);
|
||||
descs->dirty_mask |= 1u << i;
|
||||
descs->dirty_mask |= 1ull << desc_slot;
|
||||
sctx->descriptors_dirty |=
|
||||
1u << si_sampler_descriptors_idx(shader);
|
||||
1u << si_sampler_and_image_descriptors_idx(shader);
|
||||
|
||||
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
|
||||
rbuffer, RADEON_USAGE_READ,
|
||||
|
@ -1739,22 +1723,25 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
|
|||
for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
|
||||
struct si_images_info *images = &sctx->images[shader];
|
||||
struct si_descriptors *descs =
|
||||
si_image_descriptors(sctx, shader);
|
||||
si_sampler_and_image_descriptors(sctx, shader);
|
||||
unsigned mask = images->enabled_mask;
|
||||
|
||||
while (mask) {
|
||||
unsigned i = u_bit_scan(&mask);
|
||||
|
||||
if (images->views[i].resource == buf) {
|
||||
unsigned desc_slot = si_get_image_slot(i);
|
||||
|
||||
if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
|
||||
si_mark_image_range_valid(&images->views[i]);
|
||||
|
||||
si_desc_reset_buffer_offset(
|
||||
ctx, descs->list + i * 8 + 4,
|
||||
ctx, descs->list + desc_slot * 8 + 4,
|
||||
old_va, buf);
|
||||
descs->dirty_mask |= 1u << i;
|
||||
/* two 8-byte images share one 16-byte slot */
|
||||
descs->dirty_mask |= 1u << (desc_slot / 2);
|
||||
sctx->descriptors_dirty |=
|
||||
1u << si_image_descriptors_idx(shader);
|
||||
1u << si_sampler_and_image_descriptors_idx(shader);
|
||||
|
||||
radeon_add_to_buffer_list_check_mem(
|
||||
&sctx->b, &sctx->b.gfx, rbuffer,
|
||||
|
@ -2016,12 +2003,11 @@ void si_init_all_descriptors(struct si_context *sctx)
|
|||
* Rarely used descriptors don't use CE RAM.
|
||||
*/
|
||||
bool big_ce = sctx->b.chip_class <= VI;
|
||||
bool images_use_ce = big_ce;
|
||||
bool const_and_shaderbufs_use_ce = big_ce ||
|
||||
i == PIPE_SHADER_VERTEX ||
|
||||
i == PIPE_SHADER_FRAGMENT;
|
||||
bool samplers_use_ce = big_ce ||
|
||||
i == PIPE_SHADER_FRAGMENT;
|
||||
bool samplers_and_images_use_ce = big_ce ||
|
||||
i == PIPE_SHADER_FRAGMENT;
|
||||
|
||||
si_init_buffer_resources(&sctx->const_and_shader_buffers[i],
|
||||
si_const_and_shader_buffer_descriptors(sctx, i),
|
||||
|
@ -2035,21 +2021,19 @@ void si_init_all_descriptors(struct si_context *sctx)
|
|||
RADEON_PRIO_CONST_BUFFER,
|
||||
const_and_shaderbufs_use_ce ? &ce_offset : NULL);
|
||||
|
||||
si_init_descriptors(si_sampler_descriptors(sctx, i),
|
||||
gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS :
|
||||
gfx9_gs ? GFX9_SGPR_GS_SAMPLERS :
|
||||
SI_SGPR_SAMPLERS,
|
||||
16, SI_NUM_SAMPLERS,
|
||||
null_texture_descriptor,
|
||||
samplers_use_ce ? &ce_offset : NULL);
|
||||
struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, i);
|
||||
si_init_descriptors(desc,
|
||||
gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES :
|
||||
gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES :
|
||||
SI_SGPR_SAMPLERS_AND_IMAGES,
|
||||
16, SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS,
|
||||
samplers_and_images_use_ce ? &ce_offset : NULL);
|
||||
|
||||
si_init_descriptors(si_image_descriptors(sctx, i),
|
||||
gfx9_tcs ? GFX9_SGPR_TCS_IMAGES :
|
||||
gfx9_gs ? GFX9_SGPR_GS_IMAGES :
|
||||
SI_SGPR_IMAGES,
|
||||
8, SI_NUM_IMAGES,
|
||||
null_image_descriptor,
|
||||
images_use_ce ? &ce_offset : NULL);
|
||||
int j;
|
||||
for (j = 0; j < SI_NUM_IMAGES; j++)
|
||||
memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);
|
||||
for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++)
|
||||
memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
|
||||
}
|
||||
|
||||
si_init_buffer_resources(&sctx->rw_buffers,
|
||||
|
@ -2061,7 +2045,7 @@ void si_init_all_descriptors(struct si_context *sctx)
|
|||
RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER,
|
||||
&ce_offset);
|
||||
si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
|
||||
4, SI_NUM_VERTEX_BUFFERS, NULL, NULL);
|
||||
4, SI_NUM_VERTEX_BUFFERS, NULL);
|
||||
|
||||
sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
|
||||
|
||||
|
|
|
@ -2800,9 +2800,7 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
|
|||
ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
|
||||
8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
|
||||
ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
|
||||
8 + GFX9_SGPR_TCS_SAMPLERS);
|
||||
ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
|
||||
8 + GFX9_SGPR_TCS_IMAGES);
|
||||
8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES);
|
||||
|
||||
unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
|
||||
ret = si_insert_input_ret_float(ctx, ret,
|
||||
|
@ -2827,9 +2825,7 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
|
|||
ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
|
||||
8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
|
||||
ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
|
||||
8 + GFX9_SGPR_GS_SAMPLERS);
|
||||
ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
|
||||
8 + GFX9_SGPR_GS_IMAGES);
|
||||
8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES);
|
||||
|
||||
unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
|
||||
for (unsigned i = 0; i < 5; i++) {
|
||||
|
@ -4061,13 +4057,12 @@ static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
|
|||
{
|
||||
params[(*num_params)++] = si_const_array(ctx->v4i32,
|
||||
SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS);
|
||||
params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_SAMPLERS);
|
||||
params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_IMAGES);
|
||||
params[(*num_params)++] = si_const_array(ctx->v8i32,
|
||||
SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2);
|
||||
|
||||
if (assign_params) {
|
||||
ctx->param_const_and_shader_buffers = *num_params - 3;
|
||||
ctx->param_samplers = *num_params - 2;
|
||||
ctx->param_images = *num_params - 1;
|
||||
ctx->param_const_and_shader_buffers = *num_params - 2;
|
||||
ctx->param_samplers_and_images = *num_params - 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6666,7 +6661,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
|
|||
params[num_params++] = ctx->i64;
|
||||
params[num_params++] = ctx->i64;
|
||||
params[num_params++] = ctx->i64;
|
||||
params[num_params++] = ctx->i64;
|
||||
params[num_params++] = ctx->i32;
|
||||
params[num_params++] = ctx->i32;
|
||||
params[num_params++] = ctx->i32;
|
||||
|
@ -6680,7 +6674,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
|
|||
params[num_params++] = ctx->i64;
|
||||
params[num_params++] = ctx->i64;
|
||||
params[num_params++] = ctx->i64;
|
||||
params[num_params++] = ctx->i64;
|
||||
params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
|
||||
params[num_params++] = ctx->i32;
|
||||
params[num_params++] = ctx->i32;
|
||||
|
@ -7038,8 +7031,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
|
|||
/* Declare input SGPRs. */
|
||||
params[ctx->param_rw_buffers = num_params++] = ctx->i64;
|
||||
params[ctx->param_const_and_shader_buffers = num_params++] = ctx->i64;
|
||||
params[ctx->param_samplers = num_params++] = ctx->i64;
|
||||
params[ctx->param_images = num_params++] = ctx->i64;
|
||||
params[ctx->param_samplers_and_images = num_params++] = ctx->i64;
|
||||
assert(num_params == SI_PARAM_ALPHA_REF);
|
||||
params[SI_PARAM_ALPHA_REF] = ctx->f32;
|
||||
last_sgpr = SI_PARAM_ALPHA_REF;
|
||||
|
|
|
@ -159,10 +159,8 @@ enum {
|
|||
SI_SGPR_RW_BUFFERS_HI,
|
||||
SI_SGPR_CONST_AND_SHADER_BUFFERS,
|
||||
SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
|
||||
SI_SGPR_SAMPLERS, /* images & sampler states interleaved */
|
||||
SI_SGPR_SAMPLERS_HI,
|
||||
SI_SGPR_IMAGES,
|
||||
SI_SGPR_IMAGES_HI,
|
||||
SI_SGPR_SAMPLERS_AND_IMAGES,
|
||||
SI_SGPR_SAMPLERS_AND_IMAGES_HI,
|
||||
SI_NUM_RESOURCE_SGPRS,
|
||||
|
||||
/* all VS variants */
|
||||
|
@ -197,19 +195,15 @@ enum {
|
|||
GFX9_SGPR_unused_to_align_the_next_pointer,
|
||||
GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS,
|
||||
GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI,
|
||||
GFX9_SGPR_TCS_SAMPLERS, /* images & sampler states interleaved */
|
||||
GFX9_SGPR_TCS_SAMPLERS_HI,
|
||||
GFX9_SGPR_TCS_IMAGES,
|
||||
GFX9_SGPR_TCS_IMAGES_HI,
|
||||
GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES,
|
||||
GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES_HI,
|
||||
GFX9_TCS_NUM_USER_SGPR,
|
||||
|
||||
/* GFX9: Merged ES-GS (VS-GS or TES-GS). */
|
||||
GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR,
|
||||
GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI,
|
||||
GFX9_SGPR_GS_SAMPLERS,
|
||||
GFX9_SGPR_GS_SAMPLERS_HI,
|
||||
GFX9_SGPR_GS_IMAGES,
|
||||
GFX9_SGPR_GS_IMAGES_HI,
|
||||
GFX9_SGPR_GS_SAMPLERS_AND_IMAGES,
|
||||
GFX9_SGPR_GS_SAMPLERS_AND_IMAGES_HI,
|
||||
GFX9_GS_NUM_USER_SGPR,
|
||||
|
||||
/* GS limits */
|
||||
|
@ -223,7 +217,7 @@ enum {
|
|||
|
||||
/* LLVM function parameter indices */
|
||||
enum {
|
||||
SI_NUM_RESOURCE_PARAMS = 4,
|
||||
SI_NUM_RESOURCE_PARAMS = 3,
|
||||
|
||||
/* PS only parameters */
|
||||
SI_PARAM_ALPHA_REF = SI_NUM_RESOURCE_PARAMS,
|
||||
|
|
|
@ -110,8 +110,7 @@ struct si_shader_context {
|
|||
/* Parameter indices for LLVMGetParam. */
|
||||
int param_rw_buffers;
|
||||
int param_const_and_shader_buffers;
|
||||
int param_samplers;
|
||||
int param_images;
|
||||
int param_samplers_and_images;
|
||||
/* Common inputs for merged shaders. */
|
||||
int param_merged_wave_info;
|
||||
int param_merged_scratch_offset;
|
||||
|
|
|
@ -180,7 +180,7 @@ image_fetch_rsrc(
|
|||
{
|
||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||
LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
|
||||
ctx->param_images);
|
||||
ctx->param_samplers_and_images);
|
||||
LLVMValueRef index;
|
||||
bool dcc_off = is_store;
|
||||
|
||||
|
@ -191,7 +191,8 @@ image_fetch_rsrc(
|
|||
unsigned images_writemask = info->images_store |
|
||||
info->images_atomic;
|
||||
|
||||
index = LLVMConstInt(ctx->i32, image->Register.Index, 0);
|
||||
index = LLVMConstInt(ctx->i32,
|
||||
si_get_image_slot(image->Register.Index), 0);
|
||||
|
||||
if (images_writemask & (1 << image->Register.Index))
|
||||
dcc_off = true;
|
||||
|
@ -208,6 +209,9 @@ image_fetch_rsrc(
|
|||
index = si_get_bounded_indirect_index(ctx, &image->Indirect,
|
||||
image->Register.Index,
|
||||
SI_NUM_IMAGES);
|
||||
index = LLVMBuildSub(ctx->gallivm.builder,
|
||||
LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
|
||||
index, "");
|
||||
}
|
||||
|
||||
*rsrc = load_image_desc(ctx, rsrc_ptr, index, target);
|
||||
|
@ -1181,7 +1185,7 @@ static void tex_fetch_ptrs(
|
|||
LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||
LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers);
|
||||
LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images);
|
||||
const struct tgsi_full_instruction *inst = emit_data->inst;
|
||||
const struct tgsi_full_src_register *reg;
|
||||
unsigned target = inst->Texture.Texture;
|
||||
|
@ -1196,8 +1200,11 @@ static void tex_fetch_ptrs(
|
|||
®->Indirect,
|
||||
reg->Register.Index,
|
||||
SI_NUM_SAMPLERS);
|
||||
index = LLVMBuildAdd(ctx->gallivm.builder, index,
|
||||
LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
|
||||
} else {
|
||||
index = LLVMConstInt(ctx->i32, reg->Register.Index, 0);
|
||||
index = LLVMConstInt(ctx->i32,
|
||||
si_get_sampler_slot(reg->Register.Index), 0);
|
||||
}
|
||||
|
||||
if (target == TGSI_TEXTURE_BUFFER)
|
||||
|
|
|
@ -196,8 +196,7 @@ enum {
|
|||
*/
|
||||
enum {
|
||||
SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS,
|
||||
SI_SHADER_DESCS_SAMPLERS,
|
||||
SI_SHADER_DESCS_IMAGES,
|
||||
SI_SHADER_DESCS_SAMPLERS_AND_IMAGES,
|
||||
SI_NUM_SHADER_DESCS,
|
||||
};
|
||||
|
||||
|
@ -229,7 +228,7 @@ struct si_descriptors {
|
|||
unsigned ce_offset;
|
||||
|
||||
/* elements of the list that are changed and need to be uploaded */
|
||||
unsigned dirty_mask;
|
||||
uint64_t dirty_mask;
|
||||
|
||||
/* Whether CE is used to upload this descriptor array. */
|
||||
bool uses_ce;
|
||||
|
@ -387,4 +386,16 @@ static inline unsigned si_get_shaderbuf_slot(unsigned slot)
|
|||
return SI_NUM_SHADER_BUFFERS - 1 - slot;
|
||||
}
|
||||
|
||||
static inline unsigned si_get_sampler_slot(unsigned slot)
|
||||
{
|
||||
/* samplers are in slots [8..39], ascending */
|
||||
return SI_NUM_IMAGES / 2 + slot;
|
||||
}
|
||||
|
||||
static inline unsigned si_get_image_slot(unsigned slot)
|
||||
{
|
||||
/* images are in slots [15..0] (sampler slots [7..0]), descending */
|
||||
return SI_NUM_IMAGES - 1 - slot;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue