asahi: simplify image atomic lowering

Do more calculation in the preamble so we can do less pointer chasing and keep
everything within our 64-bit budget.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29179>
This commit is contained in:
Alyssa Rosenzweig 2024-04-27 21:18:30 -04:00 committed by Marge Bot
parent 9069ac986b
commit cccf0609a6
6 changed files with 66 additions and 103 deletions

View File

@ -204,36 +204,6 @@
<value name="4" value="1"/>
</enum>
<!-- Beginning of software-defined metadata used to implement image atomics
and multisampled image writes. For atomics, information could be
recovered from the PBE descriptor, but it is convenient to compute on the
CPU where we have ail.
This descriptor (or a pointer to it, if the extended form is used) is stashed
in the last 8 bytes of a PBE descriptor. Ordinarily used for
acceleration buffers or linear layer stride. We forbid atomics and
multisampled writes to compressed images and linear 2D arrays, leaving this
area free.
-->
<struct name="Atomic (software)" size="16">
<field name="Base" size="33" start="0" type="address" modifier="shr(7)"/>
<field name="Tile width" size="3" start="33" type="uint" modifier="log2" default="1"/>
<field name="Tile height" size="3" start="36" type="uint" modifier="log2" default="1"/>
<field name="Tiles per row" size="15" start="39" type="uint"/>
<field name="Sample count" size="2" start="54" type="uint" modifier="log2" default="1"/>
<!-- Extended fields begin here -->
<field name="Layer stride (pixels)" size="32" start="64" type="uint"/>
</struct>
<struct name="PBE Buffer (software)" size="8">
<!-- Not necessarily aligned -->
<field name="Base" size="40" start="0" type="address"/>
</struct>
<!-- End of software defined data structures -->
<struct name="PBE" size="24">
<field name="Dimension" size="4" start="0" type="Texture dimension" default="2D"/>
<field name="Layout" size="2" start="4" type="Layout"/>
@ -279,8 +249,16 @@
<field name="Depth (linear)" size="11" start="128" type="uint" modifier="minus(1)" default="1"/>
<field name="Layer stride (linear)" size="27" start="139" type="uint" modifier="shr(7)"/>
<!-- If neither, for software use -->
<field name="Software-defined" size="64" start="128" type="hex"/>
<!-- If neither, for software use to accelerate atomics and multisampled
writes. These are forbidden for compressed images and linear 2D arrays,
leaving this area free.
-->
<field name="Level offset (sw)" size="27" start="128" type="uint" modifier="shr(7)"/>
<field name="Aligned width MSAA (sw)" size="15" start="128" type="uint"/>
<field name="Tile width (sw)" size="3" start="155" type="uint" modifier="log2" default="1"/>
<field name="Tile height (sw)" size="3" start="158" type="uint" modifier="log2" default="1"/>
<field name="Layer stride (sw)" size="27" start="161" type="uint" modifier="shr(7)"/>
<field name="Sample count log2 (sw)" size="2" start="188" type="uint"/>
</struct>
<struct name="Texture" size="24">

View File

@ -490,19 +490,13 @@ image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
(dim == GLSL_SAMPLER_DIM_CUBE) ||
(dim == GLSL_SAMPLER_DIM_3D);
/* The last 8 bytes of the 24-byte PBE descriptor points to the
* software-defined atomic descriptor. Grab the address.
*/
nir_def *meta_meta_ptr = nir_iadd_imm(b, desc_address, 16);
nir_def *meta_ptr = nir_load_global_constant(b, meta_meta_ptr, 8, 1, 64);
if (dim == GLSL_SAMPLER_DIM_BUF && return_index) {
return nir_channel(b, coord, 0);
} else if (dim == GLSL_SAMPLER_DIM_BUF) {
return libagx_buffer_texel_address(b, meta_ptr, coord, blocksize_B);
return libagx_buffer_texel_address(b, desc_address, coord, blocksize_B);
} else {
return libagx_image_texel_address(
b, meta_ptr, coord, nir_u2u32(b, intr->src[2].ssa), blocksize_B,
b, desc_address, coord, nir_u2u32(b, intr->src[2].ssa), blocksize_B,
nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_1D),
nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_MS), nir_imm_bool(b, layered),
nir_imm_bool(b, return_index));

View File

@ -6,12 +6,6 @@
#include "geometry.h"
static uint
align(uint x, uint y)
{
return (x + y - 1) & ~(y - 1);
}
/* Compatible with util/u_math.h */
static inline uint
util_logbase2_ceil(uint n)

View File

@ -44,6 +44,12 @@ uint ballot(bool cond);
#define AGX_STATIC_ASSERT(_COND) \
typedef char static_assertion_##__line__[(_COND) ? 1 : -1]
static inline uint
align(uint x, uint y)
{
return (x + y - 1) & ~(y - 1);
}
#endif
#endif

View File

@ -113,7 +113,7 @@ libagx_lower_txf_robustness(constant struct agx_texture_packed *ptr,
static uint32_t
calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px,
uint16_t tile_h_px, uint32_t width_tl)
uint16_t tile_h_px, uint32_t aligned_width_px)
{
/* Modulo by the tile width/height to get the offsets within the tile */
ushort2 tile_mask_vec = (ushort2)(tile_w_px - 1, tile_h_px - 1);
@ -131,7 +131,7 @@ calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px,
* tile height =
* align_down(y, tile height) * width_tl * tile width
*/
uint32_t tile_row_start_px = tile_px.y * width_tl * tile_w_px;
uint32_t tile_row_start_px = tile_px.y * aligned_width_px;
/* tile column start (px) =
* (x // tile width) * (# of pix/tile) =
@ -145,12 +145,12 @@ calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px,
}
uint64_t
libagx_image_texel_address(constant const struct agx_atomic_software_packed *ptr,
libagx_image_texel_address(constant const struct agx_pbe_packed *ptr,
uint4 coord, uint sample_idx,
uint bytes_per_sample_B, bool is_1d, bool is_msaa,
bool is_layered, bool return_index)
{
agx_unpack(NULL, ptr, ATOMIC_SOFTWARE, d);
agx_unpack(NULL, ptr, PBE, d);
/* We do not allow atomics on linear 2D or linear 2D arrays, as there are no
* known use cases. So we're twiddled in this path, unless we're handling a
@ -162,30 +162,41 @@ libagx_image_texel_address(constant const struct agx_atomic_software_packed *ptr
if (is_1d) {
total_px = coord.x;
} else {
total_px =
calculate_twiddled_coordinates(convert_ushort2(coord.xy), d.tile_width,
d.tile_height, d.tiles_per_row);
uint aligned_width_px;
if (is_msaa) {
aligned_width_px = d.aligned_width_msaa_sw;
} else {
uint width_px = max(d.width >> d.level, 1u);
aligned_width_px = align(width_px, d.tile_width_sw);
}
total_px = calculate_twiddled_coordinates(
convert_ushort2(coord.xy), d.tile_width_sw, d.tile_height_sw,
aligned_width_px);
}
if (is_layered)
total_px += coord[is_1d ? 1 : 2] * d.layer_stride_pixels;
uint samples_log2 = is_msaa ? d.sample_count_log2_sw : 0;
uint sample_count = is_msaa ? d.sample_count : 1;
uint total_sa = (total_px * d.sample_count) + sample_idx;
if (is_layered) {
total_px += coord[is_1d ? 1 : 2] *
((d.layer_stride_sw / bytes_per_sample_B) >> samples_log2);
}
uint total_sa = (total_px << samples_log2) + sample_idx;
if (return_index)
return total_sa;
else
return d.base + (uint64_t)(total_sa * bytes_per_sample_B);
return (d.buffer + (is_msaa ? 0 : d.level_offset_sw)) +
(uint64_t)(total_sa * bytes_per_sample_B);
}
uint64_t
libagx_buffer_texel_address(
constant const struct agx_pbe_buffer_software_packed *ptr, uint4 coord,
uint bytes_per_pixel_B)
libagx_buffer_texel_address(constant const struct agx_pbe_packed *ptr,
uint4 coord, uint bytes_per_pixel_B)
{
agx_unpack(NULL, ptr, PBE_BUFFER_SOFTWARE, d);
return d.base + (uint64_t)(coord.x * bytes_per_pixel_B);
agx_unpack(NULL, ptr, PBE, d);
return d.buffer + (uint64_t)(coord.x * bytes_per_pixel_B);
}
/* Buffer texture lowerings */

View File

@ -1174,41 +1174,6 @@ sampler_view_for_surface(struct pipe_surface *surf)
};
}
static void
agx_pack_image_atomic_data(void *packed, struct pipe_image_view *view)
{
struct agx_resource *tex = agx_resource(view->resource);
if (tex->base.target == PIPE_BUFFER) {
agx_pack(packed, PBE_BUFFER_SOFTWARE, cfg) {
cfg.base = tex->bo->ptr.gpu + view->u.buf.offset;
}
} else if (tex->layout.writeable_image) {
unsigned level = view->u.tex.level;
unsigned blocksize_B = util_format_get_blocksize(tex->layout.format);
agx_pack(packed, ATOMIC_SOFTWARE, cfg) {
cfg.base =
tex->bo->ptr.gpu +
ail_get_layer_level_B(&tex->layout, view->u.tex.first_layer, level);
cfg.sample_count = MAX2(util_res_sample_count(view->resource), 1);
if (tex->layout.tiling == AIL_TILING_TWIDDLED) {
struct ail_tile tile_size = tex->layout.tilesize_el[level];
cfg.tile_width = tile_size.width_el;
cfg.tile_height = tile_size.height_el;
unsigned width_el = u_minify(tex->base.width0, level);
cfg.tiles_per_row = DIV_ROUND_UP(width_el, tile_size.width_el);
cfg.layer_stride_pixels = DIV_ROUND_UP(
tex->layout.layer_stride_B, blocksize_B * cfg.sample_count);
}
}
}
}
static bool
target_is_array(enum pipe_texture_target target)
{
@ -1355,12 +1320,27 @@ agx_batch_upload_pbe(struct agx_batch *batch, struct agx_pbe_packed *out,
/* When the descriptor isn't extended architecturally, we can use the last
* 8 bytes as a sideband. We use it to provide metadata for image atomics.
*/
if (!cfg.extended) {
struct agx_ptr desc =
agx_pool_alloc_aligned(&batch->pool, AGX_ATOMIC_SOFTWARE_LENGTH, 8);
if (!cfg.extended && tex->layout.writeable_image &&
tex->base.target != PIPE_BUFFER) {
agx_pack_image_atomic_data(desc.cpu, view);
cfg.software_defined = desc.gpu;
if (util_res_sample_count(&tex->base) > 1) {
cfg.aligned_width_msaa_sw =
align(u_minify(view->resource->width0, level),
tex->layout.tilesize_el[level].width_el);
} else {
cfg.level_offset_sw =
ail_get_level_offset_B(&tex->layout, cfg.level);
}
cfg.sample_count_log2_sw = util_logbase2(tex->base.nr_samples);
if (tex->layout.tiling == AIL_TILING_TWIDDLED) {
struct ail_tile tile_size = tex->layout.tilesize_el[level];
cfg.tile_width_sw = tile_size.width_el;
cfg.tile_height_sw = tile_size.height_el;
cfg.layer_stride_sw = tex->layout.layer_stride_B;
}
}
};
}