radeonsi: bump SI_NUM_SHADER_BUFFERS to 32
Some app uses more than 8 SSBOs (https://gitlab.freedesktop.org/mesa/mesa/-/issues/2946), so increase SI_NUM_SHADER_BUFFERS to 32 (which allows 16 SSBOs). Since we're now using a 64 bits number to track buffers, we could bump SI_NUM_SHADER_BUFFERS to 48 but that would conflict with Mesa's MAX_COMBINED_ATOMIC_BUFFERS limit (= 90). Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2122 Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5632>
This commit is contained in:
parent
7e8cfc0add
commit
5a05f9714b
|
@ -796,8 +796,13 @@ static void si_dump_descriptors(struct si_context *sctx, enum pipe_shader_type p
|
||||||
enabled_constbuf =
|
enabled_constbuf =
|
||||||
sctx->const_and_shader_buffers[processor].enabled_mask >> SI_NUM_SHADER_BUFFERS;
|
sctx->const_and_shader_buffers[processor].enabled_mask >> SI_NUM_SHADER_BUFFERS;
|
||||||
enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask &
|
enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask &
|
||||||
u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
|
u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS);
|
||||||
enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >> (32 - SI_NUM_SHADER_BUFFERS);
|
enabled_shaderbuf = 0;
|
||||||
|
for (int i = 0; i < SI_NUM_SHADER_BUFFERS; i++) {
|
||||||
|
enabled_shaderbuf |=
|
||||||
|
(sctx->const_and_shader_buffers[processor].enabled_mask &
|
||||||
|
1llu << (SI_NUM_SHADER_BUFFERS - i - 1)) << i;
|
||||||
|
}
|
||||||
enabled_samplers = sctx->samplers[processor].enabled_mask;
|
enabled_samplers = sctx->samplers[processor].enabled_mask;
|
||||||
enabled_images = sctx->images[processor].enabled_mask;
|
enabled_images = sctx->images[processor].enabled_mask;
|
||||||
}
|
}
|
||||||
|
|
|
@ -897,12 +897,12 @@ void si_update_ps_colorbuf0_slot(struct si_context *sctx)
|
||||||
pipe_resource_reference(&buffers->buffers[slot], &tex->buffer.b.b);
|
pipe_resource_reference(&buffers->buffers[slot], &tex->buffer.b.b);
|
||||||
radeon_add_to_buffer_list(sctx, sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READ,
|
radeon_add_to_buffer_list(sctx, sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READ,
|
||||||
RADEON_PRIO_SHADER_RW_IMAGE);
|
RADEON_PRIO_SHADER_RW_IMAGE);
|
||||||
buffers->enabled_mask |= 1u << slot;
|
buffers->enabled_mask |= 1llu << slot;
|
||||||
} else {
|
} else {
|
||||||
/* Clear the descriptor. */
|
/* Clear the descriptor. */
|
||||||
memset(descs->list + slot * 4, 0, 8 * 4);
|
memset(descs->list + slot * 4, 0, 8 * 4);
|
||||||
pipe_resource_reference(&buffers->buffers[slot], NULL);
|
pipe_resource_reference(&buffers->buffers[slot], NULL);
|
||||||
buffers->enabled_mask &= ~(1u << slot);
|
buffers->enabled_mask &= ~(1llu << slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
|
sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
|
||||||
|
@ -985,15 +985,15 @@ static void si_release_buffer_resources(struct si_buffer_resources *buffers,
|
||||||
static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
|
static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
|
||||||
struct si_buffer_resources *buffers)
|
struct si_buffer_resources *buffers)
|
||||||
{
|
{
|
||||||
unsigned mask = buffers->enabled_mask;
|
uint64_t mask = buffers->enabled_mask;
|
||||||
|
|
||||||
/* Add buffers to the CS. */
|
/* Add buffers to the CS. */
|
||||||
while (mask) {
|
while (mask) {
|
||||||
int i = u_bit_scan(&mask);
|
int i = u_bit_scan64(&mask);
|
||||||
|
|
||||||
radeon_add_to_buffer_list(
|
radeon_add_to_buffer_list(
|
||||||
sctx, sctx->gfx_cs, si_resource(buffers->buffers[i]),
|
sctx, sctx->gfx_cs, si_resource(buffers->buffers[i]),
|
||||||
buffers->writable_mask & (1u << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
|
buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
|
||||||
i < SI_NUM_SHADER_BUFFERS ? buffers->priority : buffers->priority_constbuf);
|
i < SI_NUM_SHADER_BUFFERS ? buffers->priority : buffers->priority_constbuf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1001,13 +1001,13 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
|
||||||
static bool si_buffer_resources_check_encrypted(struct si_context *sctx,
|
static bool si_buffer_resources_check_encrypted(struct si_context *sctx,
|
||||||
struct si_buffer_resources *buffers)
|
struct si_buffer_resources *buffers)
|
||||||
{
|
{
|
||||||
unsigned mask = buffers->enabled_mask;
|
uint64_t mask = buffers->enabled_mask;
|
||||||
|
|
||||||
while (mask) {
|
while (mask) {
|
||||||
int i = u_bit_scan(&mask);
|
int i = u_bit_scan64(&mask);
|
||||||
|
|
||||||
/* only check for reads */
|
/* only check for reads */
|
||||||
if ((buffers->writable_mask & (1u << i)) == 0 &&
|
if ((buffers->writable_mask & (1llu << i)) == 0 &&
|
||||||
(si_resource(buffers->buffers[i])->flags & RADEON_FLAG_ENCRYPTED))
|
(si_resource(buffers->buffers[i])->flags & RADEON_FLAG_ENCRYPTED))
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1238,11 +1238,11 @@ static void si_set_constant_buffer(struct si_context *sctx, struct si_buffer_res
|
||||||
buffers->offsets[slot] = buffer_offset;
|
buffers->offsets[slot] = buffer_offset;
|
||||||
radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ,
|
radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ,
|
||||||
buffers->priority_constbuf, true);
|
buffers->priority_constbuf, true);
|
||||||
buffers->enabled_mask |= 1u << slot;
|
buffers->enabled_mask |= 1llu << slot;
|
||||||
} else {
|
} else {
|
||||||
/* Clear the descriptor. */
|
/* Clear the descriptor. */
|
||||||
memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4);
|
memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4);
|
||||||
buffers->enabled_mask &= ~(1u << slot);
|
buffers->enabled_mask &= ~(1llu << slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
sctx->descriptors_dirty |= 1u << descriptors_idx;
|
sctx->descriptors_dirty |= 1u << descriptors_idx;
|
||||||
|
@ -1292,8 +1292,8 @@ static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resou
|
||||||
if (!sbuffer || !sbuffer->buffer) {
|
if (!sbuffer || !sbuffer->buffer) {
|
||||||
pipe_resource_reference(&buffers->buffers[slot], NULL);
|
pipe_resource_reference(&buffers->buffers[slot], NULL);
|
||||||
memset(desc, 0, sizeof(uint32_t) * 4);
|
memset(desc, 0, sizeof(uint32_t) * 4);
|
||||||
buffers->enabled_mask &= ~(1u << slot);
|
buffers->enabled_mask &= ~(1llu << slot);
|
||||||
buffers->writable_mask &= ~(1u << slot);
|
buffers->writable_mask &= ~(1llu << slot);
|
||||||
sctx->descriptors_dirty |= 1u << descriptors_idx;
|
sctx->descriptors_dirty |= 1u << descriptors_idx;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1320,12 +1320,12 @@ static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resou
|
||||||
radeon_add_to_gfx_buffer_list_check_mem(
|
radeon_add_to_gfx_buffer_list_check_mem(
|
||||||
sctx, buf, writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, priority, true);
|
sctx, buf, writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, priority, true);
|
||||||
if (writable)
|
if (writable)
|
||||||
buffers->writable_mask |= 1u << slot;
|
buffers->writable_mask |= 1llu << slot;
|
||||||
else
|
else
|
||||||
buffers->writable_mask &= ~(1u << slot);
|
buffers->writable_mask &= ~(1llu << slot);
|
||||||
|
|
||||||
buffers->enabled_mask |= 1u << slot;
|
buffers->enabled_mask |= 1llu << slot;
|
||||||
sctx->descriptors_dirty |= 1u << descriptors_idx;
|
sctx->descriptors_dirty |= 1lu << descriptors_idx;
|
||||||
|
|
||||||
util_range_add(&buf->b.b, &buf->valid_buffer_range, sbuffer->buffer_offset,
|
util_range_add(&buf->b.b, &buf->valid_buffer_range, sbuffer->buffer_offset,
|
||||||
sbuffer->buffer_offset + sbuffer->buffer_size);
|
sbuffer->buffer_offset + sbuffer->buffer_size);
|
||||||
|
@ -1469,11 +1469,11 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot, struct pipe_resource
|
||||||
pipe_resource_reference(&buffers->buffers[slot], buffer);
|
pipe_resource_reference(&buffers->buffers[slot], buffer);
|
||||||
radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_READWRITE,
|
radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_READWRITE,
|
||||||
buffers->priority);
|
buffers->priority);
|
||||||
buffers->enabled_mask |= 1u << slot;
|
buffers->enabled_mask |= 1llu << slot;
|
||||||
} else {
|
} else {
|
||||||
/* Clear the descriptor. */
|
/* Clear the descriptor. */
|
||||||
memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4);
|
memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4);
|
||||||
buffers->enabled_mask &= ~(1u << slot);
|
buffers->enabled_mask &= ~(1llu << slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
|
sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
|
||||||
|
@ -1557,14 +1557,14 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx)
|
||||||
* If buf == NULL, reset all descriptors.
|
* If buf == NULL, reset all descriptors.
|
||||||
*/
|
*/
|
||||||
static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers,
|
static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers,
|
||||||
unsigned descriptors_idx, unsigned slot_mask,
|
unsigned descriptors_idx, uint64_t slot_mask,
|
||||||
struct pipe_resource *buf, enum radeon_bo_priority priority)
|
struct pipe_resource *buf, enum radeon_bo_priority priority)
|
||||||
{
|
{
|
||||||
struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
|
struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
|
||||||
unsigned mask = buffers->enabled_mask & slot_mask;
|
uint64_t mask = buffers->enabled_mask & slot_mask;
|
||||||
|
|
||||||
while (mask) {
|
while (mask) {
|
||||||
unsigned i = u_bit_scan(&mask);
|
unsigned i = u_bit_scan64(&mask);
|
||||||
struct pipe_resource *buffer = buffers->buffers[i];
|
struct pipe_resource *buffer = buffers->buffers[i];
|
||||||
|
|
||||||
if (buffer && (!buf || buffer == buf)) {
|
if (buffer && (!buf || buffer == buf)) {
|
||||||
|
@ -1573,7 +1573,7 @@ static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_
|
||||||
|
|
||||||
radeon_add_to_gfx_buffer_list_check_mem(
|
radeon_add_to_gfx_buffer_list_check_mem(
|
||||||
sctx, si_resource(buffer),
|
sctx, si_resource(buffer),
|
||||||
buffers->writable_mask & (1u << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
|
buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
|
||||||
priority, true);
|
priority, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1646,7 +1646,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
|
||||||
for (shader = 0; shader < SI_NUM_SHADERS; shader++)
|
for (shader = 0; shader < SI_NUM_SHADERS; shader++)
|
||||||
si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
|
si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
|
||||||
si_const_and_shader_buffer_descriptors_idx(shader),
|
si_const_and_shader_buffer_descriptors_idx(shader),
|
||||||
u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
|
u_bit_consecutive64(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
|
||||||
buf, sctx->const_and_shader_buffers[shader].priority_constbuf);
|
buf, sctx->const_and_shader_buffers[shader].priority_constbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1654,7 +1654,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
|
||||||
for (shader = 0; shader < SI_NUM_SHADERS; shader++)
|
for (shader = 0; shader < SI_NUM_SHADERS; shader++)
|
||||||
si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
|
si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
|
||||||
si_const_and_shader_buffer_descriptors_idx(shader),
|
si_const_and_shader_buffer_descriptors_idx(shader),
|
||||||
u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS), buf,
|
u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf,
|
||||||
sctx->const_and_shader_buffers[shader].priority);
|
sctx->const_and_shader_buffers[shader].priority);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -482,7 +482,7 @@ struct si_shader_selector {
|
||||||
uint64_t inputs_read; /* "get_unique_index" bits */
|
uint64_t inputs_read; /* "get_unique_index" bits */
|
||||||
|
|
||||||
/* bitmasks of used descriptor slots */
|
/* bitmasks of used descriptor slots */
|
||||||
uint32_t active_const_and_shader_buffers;
|
uint64_t active_const_and_shader_buffers;
|
||||||
uint64_t active_samplers_and_images;
|
uint64_t active_samplers_and_images;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,7 @@
|
||||||
#define SI_NUM_CONST_BUFFERS 16
|
#define SI_NUM_CONST_BUFFERS 16
|
||||||
#define SI_NUM_IMAGES 16
|
#define SI_NUM_IMAGES 16
|
||||||
#define SI_NUM_IMAGE_SLOTS (SI_NUM_IMAGES * 2) /* the second half are FMASK slots */
|
#define SI_NUM_IMAGE_SLOTS (SI_NUM_IMAGES * 2) /* the second half are FMASK slots */
|
||||||
#define SI_NUM_SHADER_BUFFERS 16
|
#define SI_NUM_SHADER_BUFFERS 32
|
||||||
|
|
||||||
struct si_screen;
|
struct si_screen;
|
||||||
struct si_shader;
|
struct si_shader;
|
||||||
|
@ -457,8 +457,8 @@ struct si_buffer_resources {
|
||||||
enum radeon_bo_priority priority_constbuf : 6;
|
enum radeon_bo_priority priority_constbuf : 6;
|
||||||
|
|
||||||
/* The i-th bit is set if that element is enabled (non-NULL resource). */
|
/* The i-th bit is set if that element is enabled (non-NULL resource). */
|
||||||
unsigned enabled_mask;
|
uint64_t enabled_mask;
|
||||||
unsigned writable_mask;
|
uint64_t writable_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define si_pm4_state_changed(sctx, member) \
|
#define si_pm4_state_changed(sctx, member) \
|
||||||
|
@ -571,7 +571,7 @@ void si_schedule_initial_compile(struct si_context *sctx, unsigned processor,
|
||||||
struct util_queue_fence *ready_fence,
|
struct util_queue_fence *ready_fence,
|
||||||
struct si_compiler_ctx_state *compiler_ctx_state, void *job,
|
struct si_compiler_ctx_state *compiler_ctx_state, void *job,
|
||||||
util_queue_execute_func execute);
|
util_queue_execute_func execute);
|
||||||
void si_get_active_slot_masks(const struct si_shader_info *info, uint32_t *const_and_shader_buffers,
|
void si_get_active_slot_masks(const struct si_shader_info *info, uint64_t *const_and_shader_buffers,
|
||||||
uint64_t *samplers_and_images);
|
uint64_t *samplers_and_images);
|
||||||
int si_shader_select_with_key(struct si_screen *sscreen, struct si_shader_ctx_state *state,
|
int si_shader_select_with_key(struct si_screen *sscreen, struct si_shader_ctx_state *state,
|
||||||
struct si_compiler_ctx_state *compiler_state,
|
struct si_compiler_ctx_state *compiler_state,
|
||||||
|
@ -602,13 +602,13 @@ void si_init_streamout_functions(struct si_context *sctx);
|
||||||
|
|
||||||
static inline unsigned si_get_constbuf_slot(unsigned slot)
|
static inline unsigned si_get_constbuf_slot(unsigned slot)
|
||||||
{
|
{
|
||||||
/* Constant buffers are in slots [16..31], ascending */
|
/* Constant buffers are in slots [32..47], ascending */
|
||||||
return SI_NUM_SHADER_BUFFERS + slot;
|
return SI_NUM_SHADER_BUFFERS + slot;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned si_get_shaderbuf_slot(unsigned slot)
|
static inline unsigned si_get_shaderbuf_slot(unsigned slot)
|
||||||
{
|
{
|
||||||
/* shader buffers are in slots [15..0], descending */
|
/* shader buffers are in slots [31..0], descending */
|
||||||
return SI_NUM_SHADER_BUFFERS - 1 - slot;
|
return SI_NUM_SHADER_BUFFERS - 1 - slot;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2542,7 +2542,7 @@ void si_schedule_initial_compile(struct si_context *sctx, unsigned processor,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return descriptor slot usage masks from the given shader info. */
|
/* Return descriptor slot usage masks from the given shader info. */
|
||||||
void si_get_active_slot_masks(const struct si_shader_info *info, uint32_t *const_and_shader_buffers,
|
void si_get_active_slot_masks(const struct si_shader_info *info, uint64_t *const_and_shader_buffers,
|
||||||
uint64_t *samplers_and_images)
|
uint64_t *samplers_and_images)
|
||||||
{
|
{
|
||||||
unsigned start, num_shaderbufs, num_constbufs, num_images, num_msaa_images, num_samplers;
|
unsigned start, num_shaderbufs, num_constbufs, num_images, num_msaa_images, num_samplers;
|
||||||
|
@ -2556,7 +2556,7 @@ void si_get_active_slot_masks(const struct si_shader_info *info, uint32_t *const
|
||||||
|
|
||||||
/* The layout is: sb[last] ... sb[0], cb[0] ... cb[last] */
|
/* The layout is: sb[last] ... sb[0], cb[0] ... cb[last] */
|
||||||
start = si_get_shaderbuf_slot(num_shaderbufs - 1);
|
start = si_get_shaderbuf_slot(num_shaderbufs - 1);
|
||||||
*const_and_shader_buffers = u_bit_consecutive(start, num_shaderbufs + num_constbufs);
|
*const_and_shader_buffers = u_bit_consecutive64(start, num_shaderbufs + num_constbufs);
|
||||||
|
|
||||||
/* The layout is:
|
/* The layout is:
|
||||||
* - fmask[last] ... fmask[0] go to [15-last .. 15]
|
* - fmask[last] ... fmask[0] go to [15-last .. 15]
|
||||||
|
|
Loading…
Reference in New Issue