etnaviv/nn: Fix calculation of remaining out channels

We were wrongly counting the remaining number of output channels in the
last superblock, when the former isn't divisible by the latter.

MobileNetV1: 9.991ms -> 9.991ms
SSDLite MobileDet: 32.692ms -> 27ms

Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28879>
This commit is contained in:
Tomeu Vizoso 2024-02-09 16:26:39 +01:00 committed by Marge Bot
parent baebd6f43d
commit c75b512673
1 changed files with 3 additions and 7 deletions

View File

@ -532,10 +532,6 @@ calc_superblocks(struct etna_context *ctx, const struct etna_operation *operatio
unsigned num_kernels = DIV_ROUND_UP(output_channels, kernels_per_core * nn_core_count);
unsigned superblocks = DIV_ROUND_UP(DIV_ROUND_UP(output_channels, nn_core_count), num_kernels);
/* TODO: Remove this once we support superblocks that don't divide output_channels in the compressed buffer */
while(output_channels % superblocks)
superblocks++;
return superblocks;
}
@ -984,7 +980,7 @@ write_core_6(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned core, co
unsigned kernels_in_superblock = DIV_ROUND_UP(kernels_per_core, superblocks);
if (superblock == superblocks - 1)
kernels_in_superblock = DIV_ROUND_UP(kernels_per_core, superblocks) - kernels_per_core % superblocks;
kernels_in_superblock = kernels_per_core - kernels_in_superblock * (superblocks - 1);
for (unsigned kernel = 0; kernel < kernels_in_superblock; kernel++) {
unsigned out_channel = core * kernels_in_superblock + kernel + superblock * DIV_ROUND_UP(kernels_per_core, superblocks) * cores_used;
@ -1063,7 +1059,7 @@ write_core_interleaved(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigne
unsigned kernels_in_superblock = DIV_ROUND_UP(kernels_per_core, superblocks);
if (superblock == superblocks - 1)
kernels_in_superblock = DIV_ROUND_UP(kernels_per_core, superblocks) - kernels_per_core % superblocks;
kernels_in_superblock = kernels_per_core - kernels_in_superblock * (superblocks - 1);
for (unsigned z = 0; z < input_channels; z++) {
for (unsigned kernel = 0; kernel < kernels_in_superblock; kernel++) {
@ -1148,7 +1144,7 @@ write_core_sequential(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned
unsigned kernels_in_superblock = DIV_ROUND_UP(kernels_per_core, superblocks);
if (superblock == superblocks - 1)
kernels_in_superblock = DIV_ROUND_UP(kernels_per_core, superblocks) - kernels_per_core % superblocks;
kernels_in_superblock = kernels_per_core - kernels_in_superblock * (superblocks - 1);
for (unsigned kernel = 0; kernel < kernels_in_superblock; kernel++) {
unsigned out_channel = core * kernels_in_superblock + kernel + superblock * DIV_ROUND_UP(kernels_per_core, superblocks) * cores_used;