etnaviv/nn: Pipe through input/accumulation buffer depth from hwdb

Stop hard coding accumulation buffer depth and input buffer depth to the
values for VIPNano-QI. This is allows to calculate correct tile sizes
for other cores.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Tomeu Vizoso <tomeu@tomeuvizoso.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28956>
This commit is contained in:
Philipp Zabel 2024-04-26 16:57:01 +02:00 committed by Marge Bot
parent e82d70d472
commit 0554d11f1e
5 changed files with 17 additions and 6 deletions

View File

@ -92,6 +92,8 @@ struct etna_core_npu_info {
unsigned on_chip_sram_size; /* Size of on-chip SRAM */
unsigned axi_sram_size; /* Size of SRAM behind AXI */
unsigned nn_zrl_bits; /* Number of bits for zero run-length compression */
unsigned nn_input_buffer_depth; /* Input buffer size, determines tile size */
unsigned nn_accum_buffer_depth; /* Accumulation buffer size, determines tile size */
};
struct etna_core_info {

View File

@ -112,6 +112,8 @@ etna_query_feature_db(struct etna_core_info *info)
info->npu.on_chip_sram_size = db->VIP_SRAM_SIZE;
info->npu.axi_sram_size = db->AXI_SRAM_SIZE;
info->npu.nn_zrl_bits = db->NN_ZRL_BITS;
info->npu.nn_accum_buffer_depth = db->NNAccumBufferDepth;
info->npu.nn_input_buffer_depth = db->NNInputBufferDepth;
}
return true;

View File

@ -157,6 +157,10 @@ struct etna_specs {
unsigned axi_sram_size;
/* Number of bits for zero run-length compression */
unsigned nn_zrl_bits;
/* Input buffer size, determines tile size */
unsigned nn_input_buffer_depth;
/* Accumulation buffer size, determines tile size */
unsigned nn_accum_buffer_depth;
};
/* Compiled Gallium state. All the different compiled state atoms are woven

View File

@ -510,20 +510,19 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph,
operation->weight_scale);
}
#define ACCUM_BUFFER_DEPTH 64
#define INPUT_BUFFER_DEPTH 12
#define MAX_TILE_WIDTH 64
static unsigned
calc_superblocks(struct etna_context *ctx, const struct etna_operation *operation, unsigned tile_y, unsigned interleave_mode)
{
unsigned nn_core_count = ctx->screen->specs.nn_core_count;
unsigned nn_accum_buffer_depth = ctx->screen->specs.nn_accum_buffer_depth;
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
unsigned kernels_per_core = DIV_ROUND_UP(output_channels, nn_core_count);
unsigned foo = (ACCUM_BUFFER_DEPTH * interleave_mode) / tile_y;
unsigned foo = (nn_accum_buffer_depth * interleave_mode) / tile_y;
if (operation->weight_width == 1)
foo = MIN2(foo, ACCUM_BUFFER_DEPTH / 3);
foo = MIN2(foo, nn_accum_buffer_depth / 3);
foo = MIN2(foo, kernels_per_core);
foo = MIN2(foo, 127);
@ -591,6 +590,8 @@ calc_addition_sizes(unsigned *input_width, unsigned *input_height, unsigned *inp
static unsigned
calculate_tiling(struct etna_context *ctx, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out)
{
unsigned nn_input_buffer_depth = ctx->screen->specs.nn_input_buffer_depth;
unsigned nn_accum_buffer_depth = ctx->screen->specs.nn_accum_buffer_depth;
unsigned input_width = operation->input_width;
unsigned input_height = operation->input_height;
unsigned input_channels = operation->input_channels;
@ -614,8 +615,8 @@ calculate_tiling(struct etna_context *ctx, const struct etna_operation *operatio
tile_width = MIN2(output_width, 64);
interleave_mode = calc_interleave_mode(tile_width, operation->weight_height);
tile_height = INPUT_BUFFER_DEPTH * interleave_mode - operation->weight_height + 1;
tile_height = MIN2(tile_height, interleave_mode * ACCUM_BUFFER_DEPTH);
tile_height = nn_input_buffer_depth * interleave_mode - operation->weight_height + 1;
tile_height = MIN2(tile_height, interleave_mode * nn_accum_buffer_depth);
tile_height = MIN2(tile_height, output_height);
if (operation->stride > 1 && tile_height % 2 > 0)

View File

@ -870,6 +870,8 @@ etna_get_specs(struct etna_screen *screen)
screen->specs.on_chip_sram_size = info->npu.on_chip_sram_size;
screen->specs.axi_sram_size = info->npu.axi_sram_size;
screen->specs.nn_zrl_bits = info->npu.nn_zrl_bits;
screen->specs.nn_input_buffer_depth = info->npu.nn_input_buffer_depth;
screen->specs.nn_accum_buffer_depth = info->npu.nn_accum_buffer_depth;
if (etna_core_has_feature(info, ETNA_FEATURE_NN_XYDP0))
screen->specs.nn_core_version = 8;