radeonsi: use a clever alignment for constant buffer uploads
This results in a very tiny decrease in lgkm wait cycles. Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
620aded541
commit
6b73aafceb
|
@ -201,6 +201,7 @@ struct radeon_info {
|
|||
uint32_t ce_fw_version;
|
||||
uint32_t vce_harvest_config;
|
||||
uint32_t clock_crystal_freq;
|
||||
uint32_t tcc_cache_line_size;
|
||||
|
||||
/* Kernel info. */
|
||||
uint32_t drm_major; /* version */
|
||||
|
|
|
@ -1047,7 +1047,9 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf
|
|||
{
|
||||
void *tmp;
|
||||
|
||||
u_upload_alloc(sctx->b.b.stream_uploader, 0, size, 256, const_offset,
|
||||
u_upload_alloc(sctx->b.b.stream_uploader, 0, size,
|
||||
si_optimal_tcc_alignment(sctx, size),
|
||||
const_offset,
|
||||
(struct pipe_resource**)rbuffer, &tmp);
|
||||
if (*rbuffer)
|
||||
util_memcpy_cpu_to_le32(tmp, ptr, size);
|
||||
|
|
|
@ -512,4 +512,19 @@ static inline bool si_vs_exports_prim_id(struct si_shader *shader)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size)
|
||||
{
|
||||
unsigned alignment, tcc_cache_line_size;
|
||||
|
||||
/* If the upload size is less than the cache line size (e.g. 16, 32),
|
||||
* the whole thing will fit into a cache line if we align it to its size.
|
||||
* The idea is that multiple small uploads can share a cache line.
|
||||
* If the upload size is greater, align it to the cache line size.
|
||||
*/
|
||||
alignment = util_next_power_of_two(upload_size);
|
||||
tcc_cache_line_size = sctx->screen->b.info.tcc_cache_line_size;
|
||||
return MIN2(alignment, tcc_cache_line_size);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -345,6 +345,7 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd)
|
|||
ws->info.has_userptr = true;
|
||||
ws->info.num_render_backends = ws->amdinfo.rb_pipes;
|
||||
ws->info.clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
|
||||
ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */
|
||||
ws->info.num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
|
||||
ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7);
|
||||
ws->info.has_virtual_memory = true;
|
||||
|
|
|
@ -524,6 +524,7 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
|||
ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= SI ||
|
||||
(ws->info.family == CHIP_HAWAII &&
|
||||
ws->accel_working2 < 3);
|
||||
ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */
|
||||
|
||||
ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL;
|
||||
|
||||
|
|
Loading…
Reference in New Issue