lima: wire up MSAA 4x support

Utgard supports MSAA 4x, so wire it up.

RSW bits were already REd by Luc, the only remaining part was storing
non-resolved buffers, reloading them (including for depth/stencil) and
doing MSAA resolve.

To store non-resolved buffer we need to set mrt_pitch and mrt_bits
registers in WB, and to resolve non-resolved buffer we need to reload
it into individual samples and then write out with mrt_bits = 0, it's
now done by lima blitter.

We also need to do resolve on transfer_map() of multi-sampled buffers,
so utilize u_transfer_helper for that.

As a side fix, it turns out that our wb_reg definition wasn't correct,
'zero' isn't always zero, it's set if we need to swap channels, and
it goes before mrt_bits. mrt_bits actually enables multiple MRTs,
so this commit renames 'zero' to 'flags' and changes its position.

If mrt_bits == 0 and MSAA is enabled, GPU does resolve
in place, to expose this functionality we set PIPE_CAP_SURFACE_SAMPLE_COUNT.

Fixes dEQP-GLES2.functional.multisample.*

Reviewed-by: Erico Nunes <nunes.erico@gmail.com>
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13963>
This commit is contained in:
Vasily Khoruzhick 2021-11-25 23:56:41 -08:00 committed by Marge Bot
parent f93bee19d9
commit 24be011901
15 changed files with 153 additions and 88 deletions

View File

@ -322,7 +322,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
GL_EXT_memory_object DONE (radeonsi, i965/gen7+, llvmpipe, zink)
GL_EXT_memory_object_fd DONE (radeonsi, i965/gen7+, llvmpipe, zink)
GL_EXT_memory_object_win32 DONE (zink)
GL_EXT_multisampled_render_to_texture DONE (freedreno/a6xx, panfrost, zink)
GL_EXT_multisampled_render_to_texture DONE (freedreno/a6xx, panfrost, zink, lima)
GL_EXT_render_snorm DONE (i965, r600, radeonsi, softpipe, zink)
GL_EXT_semaphore DONE (radeonsi, i965/gen7+, zink)
GL_EXT_semaphore_fd DONE (radeonsi, i965/gen7+, zink)

View File

@ -32,7 +32,9 @@ lima_pack_blit_cmd(struct lima_job *job,
const struct pipe_box *src,
const struct pipe_box *dst,
unsigned filter,
bool scissor)
bool scissor,
unsigned sample_mask,
unsigned mrt_idx)
{
#define lima_blit_render_state_offset 0x0000
#define lima_blit_gl_pos_offset 0x0040
@ -63,7 +65,7 @@ lima_pack_blit_cmd(struct lima_job *job,
.depth_range = 0xffff0000,
.stencil_front = 0x00000007,
.stencil_back = 0x00000007,
.multi_sample = 0x0000f007,
.multi_sample = 0x00000007,
.shader_address = reload_shader_va | reload_shader_first_instr_size,
.varying_types = 0x00000001,
.textures_address = va + lima_blit_tex_array_offset,
@ -71,6 +73,8 @@ lima_pack_blit_cmd(struct lima_job *job,
.varyings_address = va + lima_blit_varying_offset,
};
reload_render_state.multi_sample |= (sample_mask << 12);
if (job->key.cbuf) {
fb_width = job->key.cbuf->width;
fb_height = job->key.cbuf->height;
@ -98,7 +102,8 @@ lima_pack_blit_cmd(struct lima_job *job,
lima_tex_desc *td = cpu + lima_blit_tex_desc_offset;
memset(td, 0, lima_min_tex_desc_size);
lima_texture_desc_set_res(ctx, td, psurf->texture, level, level, first_layer);
lima_texture_desc_set_res(ctx, td, psurf->texture, level, level,
first_layer, mrt_idx);
td->format = lima_format_get_texel_reload(psurf->format);
td->unnorm_coords = 1;
td->sampler_dim = LIMA_SAMPLER_DIM_2D;
@ -272,9 +277,19 @@ lima_do_blit(struct pipe_context *pctx,
_mesa_hash_table_insert(ctx->write_jobs, &dst_res->base, job);
lima_job_add_bo(job, LIMA_PIPE_PP, dst_res->bo, LIMA_SUBMIT_BO_WRITE);
lima_pack_blit_cmd(job, &job->plbu_cmd_array,
src_surf, &info->src.box,
&info->dst.box, info->filter, true);
if (info->src.resource->nr_samples > 1) {
for (int i = 0; i < MIN2(info->src.resource->nr_samples, LIMA_MAX_SAMPLES); i++) {
lima_pack_blit_cmd(job, &job->plbu_cmd_array,
src_surf, &info->src.box,
&info->dst.box, info->filter, true,
1 << i, i);
}
} else {
lima_pack_blit_cmd(job, &job->plbu_cmd_array,
src_surf, &info->src.box,
&info->dst.box, info->filter, true,
0xf, 0);
}
bool tile_aligned = false;

View File

@ -19,7 +19,9 @@ lima_pack_blit_cmd(struct lima_job *job,
const struct pipe_box *src,
const struct pipe_box *dst,
unsigned filter,
bool scissor);
bool scissor,
unsigned sample_mask,
unsigned mrt_idx);
bool lima_do_blit(struct pipe_context *ctx,
const struct pipe_blit_info *blit_info);

View File

@ -215,6 +215,8 @@ lima_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
return NULL;
}
ctx->sample_mask = (1 << LIMA_MAX_SAMPLES) - 1;
ctx->base.screen = pscreen;
ctx->base.destroy = lima_context_destroy;
ctx->base.set_debug_callback = lima_set_debug_callback;

View File

@ -204,6 +204,7 @@ struct lima_context {
LIMA_CONTEXT_DIRTY_CLIP = (1 << 15),
LIMA_CONTEXT_DIRTY_UNCOMPILED_VS = (1 << 16),
LIMA_CONTEXT_DIRTY_UNCOMPILED_FS = (1 << 17),
LIMA_CONTEXT_DIRTY_SAMPLE_MASK = (1 << 18),
} dirty;
struct u_upload_mgr *uploader;
@ -233,6 +234,9 @@ struct lima_context {
struct lima_texture_stateobj tex_stateobj;
struct lima_pp_stream_state pp_stream;
#define LIMA_MAX_SAMPLES 4
unsigned sample_mask;
unsigned min_index;
unsigned max_index;

View File

@ -675,13 +675,18 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
/* need more investigation */
if (info->mode == PIPE_PRIM_POINTS)
render->multi_sample = 0x0000F000;
render->multi_sample = 0x00000000;
else if (info->mode < PIPE_PRIM_TRIANGLES)
render->multi_sample = 0x0000F400;
render->multi_sample = 0x00000400;
else
render->multi_sample = 0x0000F800;
render->multi_sample = 0x00000800;
if (ctx->framebuffer.base.samples)
render->multi_sample |= 0x68;
if (ctx->blend->base.alpha_to_coverage)
render->multi_sample |= (1 << 7);
if (ctx->blend->base.alpha_to_one)
render->multi_sample |= (1 << 8);
render->multi_sample |= (ctx->sample_mask << 12);
/* Set gl_FragColor register, need to specify it 4 times */
render->multi_sample |= (fs->state.frag_color0_reg << 28) |
@ -716,7 +721,8 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
if (fs->state.uses_discard ||
ctx->zsa->base.alpha_enabled ||
fs->state.frag_depth_reg != -1) {
fs->state.frag_depth_reg != -1 ||
ctx->blend->base.alpha_to_coverage) {
early_z = false;
pixel_kill = false;
}

View File

@ -73,9 +73,9 @@ struct lima_pp_wb_reg {
uint32_t downsample_factor;
uint32_t pixel_layout;
uint32_t pitch;
uint32_t flags;
uint32_t mrt_bits;
uint32_t mrt_pitch;
uint32_t zero;
uint32_t unused0;
uint32_t unused1;
uint32_t unused2;

View File

@ -34,6 +34,7 @@
#include "util/format/u_format.h"
#include "util/u_upload_mgr.h"
#include "util/u_inlines.h"
#include "util/u_framebuffer.h"
#include "lima_screen.h"
#include "lima_context.h"
@ -359,6 +360,7 @@ static void
lima_pack_reload_plbu_cmd(struct lima_job *job, struct pipe_surface *psurf)
{
struct lima_job_fb_info *fb = &job->fb;
struct lima_context *ctx = job->ctx;
struct pipe_box src = {
.x = 0,
.y = 0,
@ -372,9 +374,20 @@ lima_pack_reload_plbu_cmd(struct lima_job *job, struct pipe_surface *psurf)
.width = fb->width,
.height = fb->height,
};
lima_pack_blit_cmd(job, &job->plbu_cmd_head,
psurf, &src, &dst,
PIPE_TEX_FILTER_NEAREST, false);
if (ctx->framebuffer.base.samples > 1) {
for (int i = 0; i < LIMA_MAX_SAMPLES; i++) {
lima_pack_blit_cmd(job, &job->plbu_cmd_head,
psurf, &src, &dst,
PIPE_TEX_FILTER_NEAREST, false,
(1 << i), i);
}
} else {
lima_pack_blit_cmd(job, &job->plbu_cmd_head,
psurf, &src, &dst,
PIPE_TEX_FILTER_NEAREST, false,
0xf, 0);
}
}
static void
@ -396,8 +409,9 @@ lima_pack_head_plbu_cmd(struct lima_job *job)
PLBU_CMD_END();
if (lima_fb_cbuf_needs_reload(job))
if (lima_fb_cbuf_needs_reload(job)) {
lima_pack_reload_plbu_cmd(job, job->key.cbuf);
}
if (lima_fb_zsbuf_needs_reload(job))
lima_pack_reload_plbu_cmd(job, job->key.zsbuf);
@ -733,7 +747,13 @@ lima_pack_wb_zsbuf_reg(struct lima_job *job, uint32_t *wb_reg, int wb_idx)
wb[wb_idx].pixel_layout = 0x0;
wb[wb_idx].pitch = res->levels[level].stride / 8;
}
wb[wb_idx].mrt_bits = 0;
wb[wb_idx].flags = 0;
unsigned nr_samples = zsbuf->nr_samples ?
zsbuf->nr_samples : MAX2(1, zsbuf->texture->nr_samples);
if (nr_samples > 1) {
wb[wb_idx].mrt_pitch = res->mrt_pitch;
wb[wb_idx].mrt_bits = u_bit_consecutive(0, nr_samples);
}
}
static void
@ -762,7 +782,13 @@ lima_pack_wb_cbuf_reg(struct lima_job *job, uint32_t *frame_reg,
wb[wb_idx].pixel_layout = 0x0;
wb[wb_idx].pitch = res->levels[level].stride / 8;
}
wb[wb_idx].mrt_bits = swap_channels ? 0x4 : 0x0;
wb[wb_idx].flags = swap_channels ? 0x4 : 0x0;
unsigned nr_samples = cbuf->nr_samples ?
cbuf->nr_samples : MAX2(1, cbuf->texture->nr_samples);
if (nr_samples > 1) {
wb[wb_idx].mrt_pitch = res->mrt_pitch;
wb[wb_idx].mrt_bits = u_bit_consecutive(0, nr_samples);
}
}
static void

View File

@ -589,13 +589,21 @@ parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper)
fprintf(fp, ": unknown");
if ((*value & 0x00000078) == 0x00000068)
fprintf(fp, ", fb_samples */\n");
fprintf(fp, ", msaa */\n");
else if ((*value & 0x00000078) == 0x00000000)
fprintf(fp, " */\n");
else
fprintf(fp, ", UNKNOWN\n");
fprintf(fp, ", UNKNOWN */\n");
fprintf(fp, "\t\t\t\t\t\t/* %s(3)", render_state_infos[i].info);
fprintf(fp, ": sample_mask: 0x%.x", ((*value & 0xf000) >> 12));
if ((*value & (1 << 7)))
fprintf(fp, ", alpha_to_coverage");
if ((*value & (1 << 8)))
fprintf(fp, ", alpha_to_one");
fprintf(fp, " */\n");
fprintf(fp, "\t\t\t\t\t\t/* %s(4)", render_state_infos[i].info);
fprintf(fp, ", register for gl_FragColor: $%d $%d $%d $%d */\n",
(*value & 0xf0000000) >> 28,
(*value & 0x0f000000) >> 24,

View File

@ -30,6 +30,7 @@
#include "util/u_debug.h"
#include "util/u_transfer.h"
#include "util/u_surface.h"
#include "util/u_transfer_helper.h"
#include "util/hash_table.h"
#include "util/ralloc.h"
#include "util/u_drm.h"
@ -88,14 +89,14 @@ lima_resource_create_scanout(struct pipe_screen *pscreen,
static uint32_t
setup_miptree(struct lima_resource *res,
unsigned width0, unsigned height0,
bool should_align_dimensions)
unsigned width0, unsigned height0)
{
struct pipe_resource *pres = &res->base;
unsigned level;
unsigned width = width0;
unsigned height = height0;
unsigned depth = pres->depth0;
unsigned nr_samples = MAX2(pres->nr_samples, 1);
uint32_t size = 0;
for (level = 0; level <= pres->last_level; level++) {
@ -104,13 +105,8 @@ setup_miptree(struct lima_resource *res,
unsigned aligned_width;
unsigned aligned_height;
if (should_align_dimensions) {
aligned_width = align(width, 16);
aligned_height = align(height, 16);
} else {
aligned_width = width;
aligned_height = height;
}
aligned_width = align(width, 16);
aligned_height = align(height, 16);
stride = util_format_get_stride(pres->format, aligned_width);
actual_level_size = stride *
@ -125,27 +121,25 @@ setup_miptree(struct lima_resource *res,
if (util_format_is_compressed(pres->format))
res->levels[level].layer_stride /= 4;
/* The start address of each level except the last level
* must be 64-aligned in order to be able to pass the
* addresses to the hardware. */
if (level != pres->last_level)
size += align(actual_level_size, 64);
else
size += actual_level_size; /* Save some memory */
size += align(actual_level_size, 64);
width = u_minify(width, 1);
height = u_minify(height, 1);
depth = u_minify(depth, 1);
}
if (nr_samples > 1)
res->mrt_pitch = size;
size *= nr_samples;
return size;
}
static struct pipe_resource *
lima_resource_create_bo(struct pipe_screen *pscreen,
const struct pipe_resource *templat,
unsigned width, unsigned height,
bool should_align_dimensions)
unsigned width, unsigned height)
{
struct lima_screen *screen = lima_screen(pscreen);
struct lima_resource *res;
@ -161,7 +155,7 @@ lima_resource_create_bo(struct pipe_screen *pscreen,
pres = &res->base;
uint32_t size = setup_miptree(res, width, height, should_align_dimensions);
uint32_t size = setup_miptree(res, width, height);
size = align(size, LIMA_PAGE_SIZE);
res->bo = lima_bo_create(screen, size, 0);
@ -182,7 +176,6 @@ _lima_resource_create_with_modifiers(struct pipe_screen *pscreen,
struct lima_screen *screen = lima_screen(pscreen);
bool should_tile = lima_debug & LIMA_DEBUG_NO_TILING ? false : true;
unsigned width, height;
bool should_align_dimensions;
bool has_user_modifiers = true;
if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID)
@ -204,24 +197,14 @@ _lima_resource_create_with_modifiers(struct pipe_screen *pscreen,
modifiers, count))
should_tile = false;
if (should_tile || (templat->bind & PIPE_BIND_RENDER_TARGET) ||
(templat->bind & PIPE_BIND_DEPTH_STENCIL)) {
should_align_dimensions = true;
width = align(templat->width0, 16);
height = align(templat->height0, 16);
}
else {
should_align_dimensions = false;
width = templat->width0;
height = templat->height0;
}
width = align(templat->width0, 16);
height = align(templat->height0, 16);
struct pipe_resource *pres;
if (screen->ro && (templat->bind & PIPE_BIND_SCANOUT))
pres = lima_resource_create_scanout(pscreen, templat, width, height);
else
pres = lima_resource_create_bo(pscreen, templat, width, height,
should_align_dimensions);
pres = lima_resource_create_bo(pscreen, templat, width, height);
if (pres) {
struct lima_resource *res = lima_resource(pres);
@ -544,18 +527,6 @@ lima_resource_set_damage_region(struct pipe_screen *pscreen,
damage->num_region = nrects;
}
void
lima_resource_screen_init(struct lima_screen *screen)
{
screen->base.resource_create = lima_resource_create;
screen->base.resource_create_with_modifiers = lima_resource_create_with_modifiers;
screen->base.resource_from_handle = lima_resource_from_handle;
screen->base.resource_destroy = lima_resource_destroy;
screen->base.resource_get_handle = lima_resource_get_handle;
screen->base.resource_get_param = lima_resource_get_param;
screen->base.set_damage_region = lima_resource_set_damage_region;
}
static struct pipe_surface *
lima_surface_create(struct pipe_context *pctx,
struct pipe_resource *pres,
@ -578,6 +549,7 @@ lima_surface_create(struct pipe_context *pctx,
psurf->format = surf_tmpl->format;
psurf->width = u_minify(pres->width0, level);
psurf->height = u_minify(pres->height0, level);
psurf->nr_samples = surf_tmpl->nr_samples;
psurf->u.tex.level = level;
psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
@ -715,14 +687,6 @@ lima_transfer_map(struct pipe_context *pctx,
}
}
static void
lima_transfer_flush_region(struct pipe_context *pctx,
struct pipe_transfer *ptrans,
const struct pipe_box *box)
{
}
static bool
lima_should_convert_linear(struct lima_resource *res,
struct pipe_transfer *ptrans)
@ -756,9 +720,11 @@ lima_should_convert_linear(struct lima_resource *res,
}
static void
lima_transfer_unmap_inner(struct lima_context *ctx,
struct pipe_transfer *ptrans)
lima_transfer_flush_region(struct pipe_context *pctx,
struct pipe_transfer *ptrans,
const struct pipe_box *box)
{
struct lima_context *ctx = lima_context(pctx);
struct lima_resource *res = lima_resource(ptrans->resource);
struct lima_transfer *trans = lima_transfer(ptrans);
struct lima_bo *bo = res->bo;
@ -814,7 +780,9 @@ lima_transfer_unmap(struct pipe_context *pctx,
struct lima_transfer *trans = lima_transfer(ptrans);
struct lima_resource *res = lima_resource(ptrans->resource);
lima_transfer_unmap_inner(ctx, ptrans);
struct pipe_box box;
u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
lima_transfer_flush_region(pctx, ptrans, &box);
if (trans->staging)
free(trans->staging);
panfrost_minmax_cache_invalidate(res->index_cache, ptrans);
@ -925,7 +893,33 @@ lima_texture_subdata(struct pipe_context *pctx,
if (!lima_bo_map(res->bo))
return;
lima_transfer_unmap_inner(ctx, &t.base);
struct pipe_box tbox;
u_box_2d(0, 0, t.base.box.width, t.base.box.height, &tbox);
lima_transfer_flush_region(pctx, &t.base, &tbox);
}
static const struct u_transfer_vtbl transfer_vtbl = {
.resource_create = lima_resource_create,
.resource_destroy = lima_resource_destroy,
.transfer_map = lima_transfer_map,
.transfer_unmap = lima_transfer_unmap,
.transfer_flush_region = lima_transfer_flush_region,
};
void
lima_resource_screen_init(struct lima_screen *screen)
{
screen->base.resource_create = lima_resource_create;
screen->base.resource_create_with_modifiers = lima_resource_create_with_modifiers;
screen->base.resource_from_handle = lima_resource_from_handle;
screen->base.resource_destroy = lima_resource_destroy;
screen->base.resource_get_handle = lima_resource_get_handle;
screen->base.resource_get_param = lima_resource_get_param;
screen->base.set_damage_region = lima_resource_set_damage_region;
screen->base.transfer_helper = u_transfer_helper_create(&transfer_vtbl,
false, false,
false, true,
false);
}
void
@ -944,11 +938,11 @@ lima_resource_context_init(struct lima_context *ctx)
ctx->base.blit = lima_blit;
ctx->base.buffer_map = lima_transfer_map;
ctx->base.texture_map = lima_transfer_map;
ctx->base.transfer_flush_region = lima_transfer_flush_region;
ctx->base.buffer_unmap = lima_transfer_unmap;
ctx->base.texture_unmap = lima_transfer_unmap;
ctx->base.buffer_map = u_transfer_helper_transfer_map;
ctx->base.texture_map = u_transfer_helper_transfer_map;
ctx->base.transfer_flush_region = u_transfer_helper_transfer_flush_region;
ctx->base.buffer_unmap = u_transfer_helper_transfer_unmap;
ctx->base.texture_unmap = u_transfer_helper_transfer_unmap;
ctx->base.flush_resource = lima_flush_resource;
}

View File

@ -55,6 +55,7 @@ struct lima_resource {
struct renderonly_scanout *scanout;
struct lima_bo *bo;
struct panfrost_minmax_cache *index_cache;
uint32_t mrt_pitch;
bool tiled;
bool modifier_constant;
unsigned full_updates;

View File

@ -108,6 +108,7 @@ lima_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
case PIPE_CAP_SURFACE_SAMPLE_COUNT:
return 1;
/* Unimplemented, but for exporting OpenGL 2.0 */
@ -334,7 +335,7 @@ lima_screen_is_format_supported(struct pipe_screen *pscreen,
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false;
/* be able to support 16, now limit to 4 */
/* Utgard supports 16x, but for now limit it to 4x */
if (sample_count > 1 && sample_count != 4)
return false;

View File

@ -420,6 +420,9 @@ static void
lima_set_sample_mask(struct pipe_context *pctx,
unsigned sample_mask)
{
struct lima_context *ctx = lima_context(pctx);
ctx->sample_mask = sample_mask & ((1 << LIMA_MAX_SAMPLES) - 1);
ctx->dirty |= LIMA_CONTEXT_DIRTY_SAMPLE_MASK;
}
void

View File

@ -71,7 +71,8 @@ lima_texture_desc_set_va(lima_tex_desc *desc,
void
lima_texture_desc_set_res(struct lima_context *ctx, lima_tex_desc *desc,
struct pipe_resource *prsc,
unsigned first_level, unsigned last_level, unsigned first_layer)
unsigned first_level, unsigned last_level,
unsigned first_layer, unsigned mrt_idx)
{
unsigned width, height, depth, layout, i;
struct lima_resource *lima_res = lima_resource(prsc);
@ -102,7 +103,9 @@ lima_texture_desc_set_res(struct lima_context *ctx, lima_tex_desc *desc,
uint32_t base_va = lima_res->bo->va;
/* attach first level */
uint32_t first_va = base_va + lima_res->levels[first_level].offset + first_layer * lima_res->levels[first_level].layer_stride;
uint32_t first_va = base_va + lima_res->levels[first_level].offset +
first_layer * lima_res->levels[first_level].layer_stride +
mrt_idx * lima_res->mrt_pitch;
desc->va_s.va_0 = first_va >> 6;
desc->va_s.layout = layout;
@ -255,7 +258,7 @@ lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sample
desc->lod_bias += lod_bias_delta;
lima_texture_desc_set_res(ctx, desc, texture->base.texture,
first_level, last_level, first_layer);
first_level, last_level, first_layer, 0);
}
static unsigned

View File

@ -101,7 +101,7 @@ typedef struct __attribute__((__packed__)) {
void lima_texture_desc_set_res(struct lima_context *ctx, lima_tex_desc *desc,
struct pipe_resource *prsc,
unsigned first_level, unsigned last_level,
unsigned first_layer);
unsigned first_layer, unsigned mrt_idx);
void lima_update_textures(struct lima_context *ctx);