mirror of https://gitlab.freedesktop.org/mesa/mesa
asahi: simplify image atomic lowering
Do more calculation in the preamble so we can do less pointer chasing and keep everything within our 64-bit budget. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29179>
This commit is contained in:
parent
9069ac986b
commit
cccf0609a6
|
@ -204,36 +204,6 @@
|
|||
<value name="4" value="1"/>
|
||||
</enum>
|
||||
|
||||
<!-- Beginning of software-defined metadata used to implement image atomics
|
||||
and multisampled image writes. For atomics, information could be
|
||||
recovered from the PBE descriptor, but it is convenient to compute on the
|
||||
CPU where we have ail.
|
||||
|
||||
This descriptor (or a pointer to it, if the extended form is used) is stashed
|
||||
in the last 8 bytes of a PBE descriptor. Ordinarily used for
|
||||
acceleration buffers or linear layer stride. We forbid atomics and
|
||||
multisampled writes to compressed images and linear 2D arrays, leaving this
|
||||
area free.
|
||||
-->
|
||||
|
||||
<struct name="Atomic (software)" size="16">
|
||||
<field name="Base" size="33" start="0" type="address" modifier="shr(7)"/>
|
||||
<field name="Tile width" size="3" start="33" type="uint" modifier="log2" default="1"/>
|
||||
<field name="Tile height" size="3" start="36" type="uint" modifier="log2" default="1"/>
|
||||
<field name="Tiles per row" size="15" start="39" type="uint"/>
|
||||
<field name="Sample count" size="2" start="54" type="uint" modifier="log2" default="1"/>
|
||||
|
||||
<!-- Extended fields begin here -->
|
||||
<field name="Layer stride (pixels)" size="32" start="64" type="uint"/>
|
||||
</struct>
|
||||
|
||||
<struct name="PBE Buffer (software)" size="8">
|
||||
<!-- Not necessarily aligned -->
|
||||
<field name="Base" size="40" start="0" type="address"/>
|
||||
</struct>
|
||||
|
||||
<!-- End of software defined data structures -->
|
||||
|
||||
<struct name="PBE" size="24">
|
||||
<field name="Dimension" size="4" start="0" type="Texture dimension" default="2D"/>
|
||||
<field name="Layout" size="2" start="4" type="Layout"/>
|
||||
|
@ -279,8 +249,16 @@
|
|||
<field name="Depth (linear)" size="11" start="128" type="uint" modifier="minus(1)" default="1"/>
|
||||
<field name="Layer stride (linear)" size="27" start="139" type="uint" modifier="shr(7)"/>
|
||||
|
||||
<!-- If neither, for software use -->
|
||||
<field name="Software-defined" size="64" start="128" type="hex"/>
|
||||
<!-- If neither, for software use to accelerate atomics and multisampled
|
||||
writes. These are forbidden for compressed images and linear 2D arrays,
|
||||
leaving this area free.
|
||||
-->
|
||||
<field name="Level offset (sw)" size="27" start="128" type="uint" modifier="shr(7)"/>
|
||||
<field name="Aligned width MSAA (sw)" size="15" start="128" type="uint"/>
|
||||
<field name="Tile width (sw)" size="3" start="155" type="uint" modifier="log2" default="1"/>
|
||||
<field name="Tile height (sw)" size="3" start="158" type="uint" modifier="log2" default="1"/>
|
||||
<field name="Layer stride (sw)" size="27" start="161" type="uint" modifier="shr(7)"/>
|
||||
<field name="Sample count log2 (sw)" size="2" start="188" type="uint"/>
|
||||
</struct>
|
||||
|
||||
<struct name="Texture" size="24">
|
||||
|
|
|
@ -490,19 +490,13 @@ image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
(dim == GLSL_SAMPLER_DIM_CUBE) ||
|
||||
(dim == GLSL_SAMPLER_DIM_3D);
|
||||
|
||||
/* The last 8 bytes of the 24-byte PBE descriptor points to the
|
||||
* software-defined atomic descriptor. Grab the address.
|
||||
*/
|
||||
nir_def *meta_meta_ptr = nir_iadd_imm(b, desc_address, 16);
|
||||
nir_def *meta_ptr = nir_load_global_constant(b, meta_meta_ptr, 8, 1, 64);
|
||||
|
||||
if (dim == GLSL_SAMPLER_DIM_BUF && return_index) {
|
||||
return nir_channel(b, coord, 0);
|
||||
} else if (dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
return libagx_buffer_texel_address(b, meta_ptr, coord, blocksize_B);
|
||||
return libagx_buffer_texel_address(b, desc_address, coord, blocksize_B);
|
||||
} else {
|
||||
return libagx_image_texel_address(
|
||||
b, meta_ptr, coord, nir_u2u32(b, intr->src[2].ssa), blocksize_B,
|
||||
b, desc_address, coord, nir_u2u32(b, intr->src[2].ssa), blocksize_B,
|
||||
nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_1D),
|
||||
nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_MS), nir_imm_bool(b, layered),
|
||||
nir_imm_bool(b, return_index));
|
||||
|
|
|
@ -6,12 +6,6 @@
|
|||
|
||||
#include "geometry.h"
|
||||
|
||||
static uint
|
||||
align(uint x, uint y)
|
||||
{
|
||||
return (x + y - 1) & ~(y - 1);
|
||||
}
|
||||
|
||||
/* Compatible with util/u_math.h */
|
||||
static inline uint
|
||||
util_logbase2_ceil(uint n)
|
||||
|
|
|
@ -44,6 +44,12 @@ uint ballot(bool cond);
|
|||
#define AGX_STATIC_ASSERT(_COND) \
|
||||
typedef char static_assertion_##__line__[(_COND) ? 1 : -1]
|
||||
|
||||
static inline uint
|
||||
align(uint x, uint y)
|
||||
{
|
||||
return (x + y - 1) & ~(y - 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -113,7 +113,7 @@ libagx_lower_txf_robustness(constant struct agx_texture_packed *ptr,
|
|||
|
||||
static uint32_t
|
||||
calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px,
|
||||
uint16_t tile_h_px, uint32_t width_tl)
|
||||
uint16_t tile_h_px, uint32_t aligned_width_px)
|
||||
{
|
||||
/* Modulo by the tile width/height to get the offsets within the tile */
|
||||
ushort2 tile_mask_vec = (ushort2)(tile_w_px - 1, tile_h_px - 1);
|
||||
|
@ -131,7 +131,7 @@ calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px,
|
|||
* tile height =
|
||||
* align_down(y, tile height) * width_tl * tile width
|
||||
*/
|
||||
uint32_t tile_row_start_px = tile_px.y * width_tl * tile_w_px;
|
||||
uint32_t tile_row_start_px = tile_px.y * aligned_width_px;
|
||||
|
||||
/* tile column start (px) =
|
||||
* (x // tile width) * (# of pix/tile) =
|
||||
|
@ -145,12 +145,12 @@ calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px,
|
|||
}
|
||||
|
||||
uint64_t
|
||||
libagx_image_texel_address(constant const struct agx_atomic_software_packed *ptr,
|
||||
libagx_image_texel_address(constant const struct agx_pbe_packed *ptr,
|
||||
uint4 coord, uint sample_idx,
|
||||
uint bytes_per_sample_B, bool is_1d, bool is_msaa,
|
||||
bool is_layered, bool return_index)
|
||||
{
|
||||
agx_unpack(NULL, ptr, ATOMIC_SOFTWARE, d);
|
||||
agx_unpack(NULL, ptr, PBE, d);
|
||||
|
||||
/* We do not allow atomics on linear 2D or linear 2D arrays, as there are no
|
||||
* known use cases. So we're twiddled in this path, unless we're handling a
|
||||
|
@ -162,30 +162,41 @@ libagx_image_texel_address(constant const struct agx_atomic_software_packed *ptr
|
|||
if (is_1d) {
|
||||
total_px = coord.x;
|
||||
} else {
|
||||
total_px =
|
||||
calculate_twiddled_coordinates(convert_ushort2(coord.xy), d.tile_width,
|
||||
d.tile_height, d.tiles_per_row);
|
||||
uint aligned_width_px;
|
||||
if (is_msaa) {
|
||||
aligned_width_px = d.aligned_width_msaa_sw;
|
||||
} else {
|
||||
uint width_px = max(d.width >> d.level, 1u);
|
||||
aligned_width_px = align(width_px, d.tile_width_sw);
|
||||
}
|
||||
|
||||
total_px = calculate_twiddled_coordinates(
|
||||
convert_ushort2(coord.xy), d.tile_width_sw, d.tile_height_sw,
|
||||
aligned_width_px);
|
||||
}
|
||||
|
||||
if (is_layered)
|
||||
total_px += coord[is_1d ? 1 : 2] * d.layer_stride_pixels;
|
||||
uint samples_log2 = is_msaa ? d.sample_count_log2_sw : 0;
|
||||
|
||||
uint sample_count = is_msaa ? d.sample_count : 1;
|
||||
uint total_sa = (total_px * d.sample_count) + sample_idx;
|
||||
if (is_layered) {
|
||||
total_px += coord[is_1d ? 1 : 2] *
|
||||
((d.layer_stride_sw / bytes_per_sample_B) >> samples_log2);
|
||||
}
|
||||
|
||||
uint total_sa = (total_px << samples_log2) + sample_idx;
|
||||
|
||||
if (return_index)
|
||||
return total_sa;
|
||||
else
|
||||
return d.base + (uint64_t)(total_sa * bytes_per_sample_B);
|
||||
return (d.buffer + (is_msaa ? 0 : d.level_offset_sw)) +
|
||||
(uint64_t)(total_sa * bytes_per_sample_B);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
libagx_buffer_texel_address(
|
||||
constant const struct agx_pbe_buffer_software_packed *ptr, uint4 coord,
|
||||
uint bytes_per_pixel_B)
|
||||
libagx_buffer_texel_address(constant const struct agx_pbe_packed *ptr,
|
||||
uint4 coord, uint bytes_per_pixel_B)
|
||||
{
|
||||
agx_unpack(NULL, ptr, PBE_BUFFER_SOFTWARE, d);
|
||||
return d.base + (uint64_t)(coord.x * bytes_per_pixel_B);
|
||||
agx_unpack(NULL, ptr, PBE, d);
|
||||
return d.buffer + (uint64_t)(coord.x * bytes_per_pixel_B);
|
||||
}
|
||||
|
||||
/* Buffer texture lowerings */
|
||||
|
|
|
@ -1174,41 +1174,6 @@ sampler_view_for_surface(struct pipe_surface *surf)
|
|||
};
|
||||
}
|
||||
|
||||
static void
|
||||
agx_pack_image_atomic_data(void *packed, struct pipe_image_view *view)
|
||||
{
|
||||
struct agx_resource *tex = agx_resource(view->resource);
|
||||
|
||||
if (tex->base.target == PIPE_BUFFER) {
|
||||
agx_pack(packed, PBE_BUFFER_SOFTWARE, cfg) {
|
||||
cfg.base = tex->bo->ptr.gpu + view->u.buf.offset;
|
||||
}
|
||||
} else if (tex->layout.writeable_image) {
|
||||
unsigned level = view->u.tex.level;
|
||||
unsigned blocksize_B = util_format_get_blocksize(tex->layout.format);
|
||||
|
||||
agx_pack(packed, ATOMIC_SOFTWARE, cfg) {
|
||||
cfg.base =
|
||||
tex->bo->ptr.gpu +
|
||||
ail_get_layer_level_B(&tex->layout, view->u.tex.first_layer, level);
|
||||
|
||||
cfg.sample_count = MAX2(util_res_sample_count(view->resource), 1);
|
||||
|
||||
if (tex->layout.tiling == AIL_TILING_TWIDDLED) {
|
||||
struct ail_tile tile_size = tex->layout.tilesize_el[level];
|
||||
cfg.tile_width = tile_size.width_el;
|
||||
cfg.tile_height = tile_size.height_el;
|
||||
|
||||
unsigned width_el = u_minify(tex->base.width0, level);
|
||||
cfg.tiles_per_row = DIV_ROUND_UP(width_el, tile_size.width_el);
|
||||
|
||||
cfg.layer_stride_pixels = DIV_ROUND_UP(
|
||||
tex->layout.layer_stride_B, blocksize_B * cfg.sample_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
target_is_array(enum pipe_texture_target target)
|
||||
{
|
||||
|
@ -1355,12 +1320,27 @@ agx_batch_upload_pbe(struct agx_batch *batch, struct agx_pbe_packed *out,
|
|||
/* When the descriptor isn't extended architecturally, we can use the last
|
||||
* 8 bytes as a sideband. We use it to provide metadata for image atomics.
|
||||
*/
|
||||
if (!cfg.extended) {
|
||||
struct agx_ptr desc =
|
||||
agx_pool_alloc_aligned(&batch->pool, AGX_ATOMIC_SOFTWARE_LENGTH, 8);
|
||||
if (!cfg.extended && tex->layout.writeable_image &&
|
||||
tex->base.target != PIPE_BUFFER) {
|
||||
|
||||
agx_pack_image_atomic_data(desc.cpu, view);
|
||||
cfg.software_defined = desc.gpu;
|
||||
if (util_res_sample_count(&tex->base) > 1) {
|
||||
cfg.aligned_width_msaa_sw =
|
||||
align(u_minify(view->resource->width0, level),
|
||||
tex->layout.tilesize_el[level].width_el);
|
||||
} else {
|
||||
cfg.level_offset_sw =
|
||||
ail_get_level_offset_B(&tex->layout, cfg.level);
|
||||
}
|
||||
|
||||
cfg.sample_count_log2_sw = util_logbase2(tex->base.nr_samples);
|
||||
|
||||
if (tex->layout.tiling == AIL_TILING_TWIDDLED) {
|
||||
struct ail_tile tile_size = tex->layout.tilesize_el[level];
|
||||
cfg.tile_width_sw = tile_size.width_el;
|
||||
cfg.tile_height_sw = tile_size.height_el;
|
||||
|
||||
cfg.layer_stride_sw = tex->layout.layer_stride_B;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue