ir3, fd, tu: Copy misc. info from ir3_shader to ir3_shader_variant

The shader won't be available for deserialized variants, so we need to
include all the info we need for compiling variants to be in the
variant. Most of the things we dug out of the shader were various bits
from nir_shader_info which we move into ir3_shader_variant.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16147>
This commit is contained in:
Connor Abbott 2022-02-16 12:42:45 +01:00 committed by Marge Bot
parent 3e30608ceb
commit f45c86dfb7
20 changed files with 159 additions and 80 deletions

View File

@ -116,9 +116,9 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count)
const struct ir3_compiler *compiler = v->shader->compiler;
/* If the user forced a particular wavesize respect that. */
if (v->shader->real_wavesize == IR3_SINGLE_ONLY)
if (v->real_wavesize == IR3_SINGLE_ONLY)
return false;
if (v->shader->real_wavesize == IR3_DOUBLE_ONLY)
if (v->real_wavesize == IR3_DOUBLE_ONLY)
return true;
/* We can't support more than compiler->branchstack_size diverging threads
@ -220,9 +220,9 @@ ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v,
*/
if (v->has_barrier && (max_waves < waves_per_wg)) {
mesa_loge(
"Compute shader (%s:%s) which has workgroup barrier cannot be used "
"Compute shader (%s) which has workgroup barrier cannot be used "
"because it's impossible to have enough concurrent waves.",
v->shader->nir->info.name, v->shader->nir->info.label);
v->name);
exit(1);
}
}
@ -381,7 +381,7 @@ ir3_collect_info(struct ir3_shader_variant *v)
unsigned reg_dependent_max_waves = ir3_get_reg_dependent_max_waves(
compiler, regs_count, info->double_threadsize);
info->max_waves = MIN2(reg_independent_max_waves, reg_dependent_max_waves);
assert(info->max_waves <= v->shader->compiler->max_waves);
assert(info->max_waves <= v->compiler->max_waves);
}
static struct ir3_register *

View File

@ -2071,7 +2071,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
*/
ctx->so->constlen =
MAX2(ctx->so->constlen,
ctx->so->shader->num_reserved_user_consts +
ctx->so->num_reserved_user_consts +
const_state->ubo_state.size / 16);
}
break;

View File

@ -155,7 +155,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so)
if (shader_debug_enabled(so->type)) {
mesa_logi("NIR (final form) for %s shader %s:", ir3_shader_stage(so),
so->shader->nir->info.name);
so->name);
nir_log_shaderi(ctx->s);
}

View File

@ -848,7 +848,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
const_state->num_ubos = nir->info.num_ubos;
debug_assert((const_state->ubo_state.size % 16) == 0);
unsigned constoff = v->shader->num_reserved_user_consts +
unsigned constoff = v->num_reserved_user_consts +
const_state->ubo_state.size / 16 +
const_state->preamble_size;
unsigned ptrsz = ir3_pointer_size(compiler);
@ -866,7 +866,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
if (v->type == MESA_SHADER_KERNEL) {
const_state->offsets.kernel_params = constoff;
constoff += align(v->shader->cs.req_input_mem, 4) / 4;
constoff += align(v->cs.req_input_mem, 4) / 4;
}
if (const_state->num_driver_params > 0) {

View File

@ -449,7 +449,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
* first.
*/
uint32_t offset = v->shader->num_reserved_user_consts * 16;
uint32_t offset = v->num_reserved_user_consts * 16;
for (uint32_t i = 0; i < state->num_enabled; i++) {
uint32_t range_size = state->range[i].end - state->range[i].start;
@ -458,7 +458,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
assert(offset <= max_upload);
offset += range_size;
}
state->size = offset - v->shader->num_reserved_user_consts * 16;
state->size = offset - v->num_reserved_user_consts * 16;
}
bool

View File

@ -305,7 +305,7 @@ ir3_nir_lower_preamble(nir_shader *nir, struct ir3_shader_variant *v)
/* First, lower load/store_preamble. */
const struct ir3_const_state *const_state = ir3_const_state(v);
unsigned preamble_base = v->shader->num_reserved_user_consts * 4 +
unsigned preamble_base = v->num_reserved_user_consts * 4 +
const_state->ubo_state.size / 4;
unsigned preamble_size = const_state->preamble_size * 4;

View File

@ -2572,7 +2572,7 @@ ir3_ra(struct ir3_shader_variant *v)
* because on some gens the register file is not big enough to hold a
* double-size wave with all 48 registers in use.
*/
if (v->shader->real_wavesize == IR3_DOUBLE_ONLY) {
if (v->real_wavesize == IR3_DOUBLE_ONLY) {
limit_pressure.full =
MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16);
}

View File

@ -271,7 +271,7 @@ assemble_variant(struct ir3_shader_variant *v)
fprintf(stream,
"Native code%s for unnamed %s shader %s with sha1 %s:\n",
shader_overridden ? " (overridden)" : "", ir3_shader_stage(v),
v->shader->nir->info.name, sha1buf);
v->name, sha1buf);
if (v->shader->type == MESA_SHADER_FRAGMENT)
fprintf(stream, "SIMD0\n");
ir3_shader_disasm(v, v->bin, stream);
@ -328,6 +328,7 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
return NULL;
v->id = ++shader->variant_count;
v->shader_id = shader->id;
v->shader = shader;
v->binning_pass = !!nonbinning;
v->nonbinning = nonbinning;
@ -336,6 +337,46 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
v->mergedregs = shader->compiler->gen >= 6;
v->stream_output = shader->stream_output;
v->name = ralloc_strdup(v, shader->nir->info.name);
struct shader_info *info = &shader->nir->info;
switch (v->type) {
case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_TESS_EVAL:
v->tess.primitive_mode = info->tess._primitive_mode;
v->tess.tcs_vertices_out = info->tess.tcs_vertices_out;
v->tess.spacing = info->tess.spacing;
v->tess.ccw = info->tess.ccw;
v->tess.point_mode = info->tess.point_mode;
break;
case MESA_SHADER_GEOMETRY:
v->gs.output_primitive = info->gs.output_primitive;
v->gs.vertices_out = info->gs.vertices_out;
v->gs.invocations = info->gs.invocations;
v->gs.vertices_in = info->gs.vertices_in;
break;
case MESA_SHADER_FRAGMENT:
v->fs.early_fragment_tests = info->fs.early_fragment_tests;
v->fs.color_is_dual_source = info->fs.color_is_dual_source;
break;
case MESA_SHADER_COMPUTE:
v->cs.req_input_mem = shader->cs.req_input_mem;
v->cs.req_local_mem = shader->cs.req_local_mem;
break;
default:
break;
}
v->num_ssbos = info->num_ssbos;
v->num_ibos = info->num_ssbos + info->num_images;
v->num_reserved_user_consts = shader->num_reserved_user_consts;
v->api_wavesize = shader->api_wavesize;
v->real_wavesize = shader->real_wavesize;
if (!v->binning_pass)
v->const_state = rzalloc_size(v, sizeof(*v->const_state));
@ -779,19 +820,19 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
fprintf(
out,
"; %s prog %d/%d: %u instr, %u nops, %u non-nops, %u mov, %u cov, %u dwords\n",
type, so->shader->id, so->id, so->info.instrs_count, so->info.nops_count,
type, so->shader_id, so->id, so->info.instrs_count, so->info.nops_count,
so->info.instrs_count - so->info.nops_count, so->info.mov_count,
so->info.cov_count, so->info.sizedwords);
fprintf(out,
"; %s prog %d/%d: %u last-baryf, %d half, %d full, %u constlen\n",
type, so->shader->id, so->id, so->info.last_baryf,
type, so->shader_id, so->id, so->info.last_baryf,
so->info.max_half_reg + 1, so->info.max_reg + 1, so->constlen);
fprintf(
out,
"; %s prog %d/%d: %u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7, \n",
type, so->shader->id, so->id, so->info.instrs_per_cat[0],
type, so->shader_id, so->id, so->info.instrs_per_cat[0],
so->info.instrs_per_cat[1], so->info.instrs_per_cat[2],
so->info.instrs_per_cat[3], so->info.instrs_per_cat[4],
so->info.instrs_per_cat[5], so->info.instrs_per_cat[6],
@ -800,7 +841,7 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
fprintf(
out,
"; %s prog %d/%d: %u sstall, %u (ss), %u systall, %u (sy), %d loops\n",
type, so->shader->id, so->id, so->info.sstall, so->info.ss,
type, so->shader_id, so->id, so->info.sstall, so->info.ss,
so->info.systall, so->info.sy, so->loops);
/* print shader type specific info: */

View File

@ -508,6 +508,9 @@ struct ir3_shader_variant {
/* variant id (for debug) */
uint32_t id;
/* id of the shader the variant came from (for debug) */
uint32_t shader_id;
struct ir3_shader_key key;
/* vertex shaders can have an extra version for hwbinning pass,
@ -528,6 +531,8 @@ struct ir3_shader_variant {
gl_shader_stage type;
struct ir3_shader *shader;
char *name;
/* variant's copy of nir->constant_data (since we don't track the NIR in
* the variant, and shader->nir is before the opt pass). Moves to v->bin
* after assembly.
@ -731,6 +736,51 @@ struct ir3_shader_variant {
/* Important for compute shader to determine max reg footprint */
bool has_barrier;
/* The offset where images start in the IBO array. */
unsigned num_ssbos;
/* The total number of SSBOs and images, i.e. the number of hardware IBOs. */
unsigned num_ibos;
unsigned num_reserved_user_consts;
union {
struct {
enum tess_primitive_mode primitive_mode;
/** The number of vertices in the TCS output patch. */
uint8_t tcs_vertices_out;
unsigned spacing:2; /*gl_tess_spacing*/
/** Is the vertex order counterclockwise? */
bool ccw:1;
bool point_mode:1;
} tess;
struct {
/** The output primitive type */
uint16_t output_primitive;
/** The maximum number of vertices the geometry shader might write. */
uint16_t vertices_out;
/** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
uint8_t invocations;
/** The number of vertices received per input primitive (max. 6) */
uint8_t vertices_in:3;
} gs;
struct {
bool early_fragment_tests : 1;
bool color_is_dual_source : 1;
} fs;
struct {
unsigned req_input_mem;
unsigned req_local_mem;
} cs;
};
enum ir3_wavesize_option api_wavesize, real_wavesize;
/* For when we don't have a shader, variant's copy of streamout state */
struct ir3_stream_output_info stream_output;
@ -1094,11 +1144,7 @@ ir3_shader_halfregs(const struct ir3_shader_variant *v)
static inline uint32_t
ir3_shader_nibo(const struct ir3_shader_variant *v)
{
/* The dummy variant used in binning mode won't have an actual shader. */
if (!v->shader)
return 0;
return v->shader->nir->info.num_ssbos + v->shader->nir->info.num_images;
return v->num_ibos;
}
static inline uint32_t

View File

@ -1181,10 +1181,8 @@ tu6_emit_vpc(struct tu_cs *cs,
A6XX_VPC_CNTL_0_VIEWIDLOC(linkage.viewid_loc));
if (hs) {
shader_info *hs_info = &hs->shader->nir->info;
tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
tu_cs_emit(cs, hs_info->tess.tcs_vertices_out);
tu_cs_emit(cs, hs->tess.tcs_vertices_out);
/* Total attribute slots in HS incoming patch. */
tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_INPUT_SIZE, 1);
@ -1194,10 +1192,10 @@ tu6_emit_vpc(struct tu_cs *cs,
const uint32_t max_wave_input_size = 64;
/* note: if HS is really just the VS extended, then this
* should be by MAX2(patch_control_points, hs_info->tess.tcs_vertices_out)
* should be by MAX2(patch_control_points, hs->tess.tcs_vertices_out)
* however that doesn't match the blob, and fails some dEQP tests.
*/
uint32_t prims_per_wave = wavesize / hs_info->tess.tcs_vertices_out;
uint32_t prims_per_wave = wavesize / hs->tess.tcs_vertices_out;
uint32_t max_prims_per_wave =
max_wave_input_size * wavesize / (vs->output_size * patch_control_points);
prims_per_wave = MIN2(prims_per_wave, max_prims_per_wave);
@ -1211,22 +1209,21 @@ tu6_emit_vpc(struct tu_cs *cs,
/* In SPIR-V generated from GLSL, the tessellation primitive params are
* are specified in the tess eval shader, but in SPIR-V generated from
* HLSL, they are specified in the tess control shader. */
shader_info *tess_info =
ds->shader->nir->info.tess.spacing == TESS_SPACING_UNSPECIFIED ?
&hs->shader->nir->info : &ds->shader->nir->info;
const struct ir3_shader_variant *tess =
ds->tess.spacing == TESS_SPACING_UNSPECIFIED ? hs : ds;
tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_CNTL, 1);
uint32_t output;
if (tess_info->tess.point_mode)
if (tess->tess.point_mode)
output = TESS_POINTS;
else if (tess_info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
else if (tess->tess.primitive_mode == TESS_PRIMITIVE_ISOLINES)
output = TESS_LINES;
else if (tess_info->tess.ccw)
else if (tess->tess.ccw)
output = TESS_CCW_TRIS;
else
output = TESS_CW_TRIS;
enum a6xx_tess_spacing spacing;
switch (tess_info->tess.spacing) {
switch (tess->tess.spacing) {
case TESS_SPACING_EQUAL:
spacing = TESS_EQUAL;
break;
@ -1257,11 +1254,11 @@ tu6_emit_vpc(struct tu_cs *cs,
} else {
tu6_emit_link_map(cs, vs, gs, SB6_GS_SHADER);
}
vertices_out = gs->shader->nir->info.gs.vertices_out - 1;
output = primitive_to_tess(gs->shader->nir->info.gs.output_primitive);
invocations = gs->shader->nir->info.gs.invocations - 1;
vertices_out = gs->gs.vertices_out - 1;
output = primitive_to_tess(gs->gs.output_primitive);
invocations = gs->gs.invocations - 1;
/* Size of per-primitive alloction in ldlw memory in vec4s. */
vec4_size = gs->shader->nir->info.gs.vertices_in *
vec4_size = gs->gs.vertices_in *
DIV_ROUND_UP(prev_stage_output_size, 4);
tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1);
@ -1578,9 +1575,9 @@ tu6_emit_fs_outputs(struct tu_cs *cs,
if (pipeline) {
pipeline->lrz.fs_has_kill = fs->has_kill;
pipeline->lrz.early_fragment_tests = fs->shader->nir->info.fs.early_fragment_tests;
pipeline->lrz.early_fragment_tests = fs->fs.early_fragment_tests;
if ((fs->shader && !fs->shader->nir->info.fs.early_fragment_tests) &&
if (!fs->fs.early_fragment_tests &&
(fs->no_earlyz || fs->has_kill || fs->writes_pos || fs->writes_stencilref || no_earlyz || fs->writes_smask)) {
pipeline->lrz.force_late_z = true;
}
@ -1601,7 +1598,7 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs,
struct tu_device *dev = cs->device;
uint32_t num_vertices =
hs ? cps_per_patch : gs->shader->nir->info.gs.vertices_in;
hs ? cps_per_patch : gs->gs.vertices_in;
uint32_t vs_params[4] = {
vs->output_size * num_vertices * 4, /* vs primitive stride */
@ -1641,13 +1638,13 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs,
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, hs_base, SB6_HS_SHADER, 0,
hs_param_dwords, hs_params);
if (gs)
num_vertices = gs->shader->nir->info.gs.vertices_in;
num_vertices = gs->gs.vertices_in;
uint32_t ds_params[8] = {
ds->output_size * num_vertices * 4, /* ds primitive stride */
ds->output_size * 4, /* ds vertex stride */
hs->output_size, /* hs vertex stride (dwords) */
hs->shader->nir->info.tess.tcs_vertices_out,
hs->tess.tcs_vertices_out,
tess_param_iova,
tess_param_iova >> 32,
tess_factor_iova,

View File

@ -710,7 +710,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
bool fragz = fp->no_earlyz || fp->has_kill || fp->writes_pos;
bool latez = fp->shader && !fp->shader->nir->info.fs.early_fragment_tests && fragz;
bool latez = !fp->fs.early_fragment_tests && fragz;
bool clamp = !ctx->rasterizer->depth_clip_near;
OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);

View File

@ -247,6 +247,6 @@ fd4_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (m->image_to_tex[index] != IBO_INVALID)
emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader);
emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img, shader);
emit_image_ssbo(ring, v->num_ssbos + index, &img, shader);
}
}

View File

@ -226,7 +226,7 @@ fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
translate_image(&img, &so->si[index]);
emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader);
emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img,
emit_image_ssbo(ring, v->num_ssbos + index, &img,
shader);
}
}

View File

@ -56,8 +56,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2);
OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED |
A6XX_SP_CS_CONFIG_NIBO(v->shader->nir->info.num_ssbos +
v->shader->nir->info.num_images) |
A6XX_SP_CS_CONFIG_NIBO(ir3_shader_nibo(v)) |
A6XX_SP_CS_CONFIG_NTEX(v->num_samp) |
A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */
OUT_RING(ring, v->instrlen); /* SP_VS_INSTRLEN */
@ -70,7 +69,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)));
uint32_t shared_size = MAX2(((int)v->shader->cs.req_local_mem - 1) / 1024, 1);
uint32_t shared_size = MAX2(((int)v->cs.req_local_mem - 1) / 1024, 1);
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
A6XX_SP_CS_UNKNOWN_A9B1_UNK6);

View File

@ -131,7 +131,7 @@ fd6_build_tess_consts(struct fd6_emit *emit)
*/
unsigned num_vertices = emit->hs
? emit->patch_vertices
: emit->gs->shader->nir->info.gs.vertices_in;
: emit->gs->gs.vertices_in;
uint32_t vs_params[4] = {
emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
@ -150,13 +150,13 @@ fd6_build_tess_consts(struct fd6_emit *emit)
ARRAY_SIZE(hs_params));
if (emit->gs)
num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
num_vertices = emit->gs->gs.vertices_in;
uint32_t ds_params[4] = {
emit->ds->output_size * num_vertices * 4, /* ds primitive stride */
emit->ds->output_size * 4, /* ds vertex stride */
emit->hs->output_size, /* hs vertex stride (dwords) */
emit->hs->shader->nir->info.tess.tcs_vertices_out};
emit->hs->tess.tcs_vertices_out};
emit_stage_tess_consts(constobj, emit->ds, ds_params,
ARRAY_SIZE(ds_params));
@ -176,7 +176,7 @@ fd6_build_tess_consts(struct fd6_emit *emit)
0,
};
num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
num_vertices = emit->gs->gs.vertices_in;
emit_stage_tess_consts(constobj, emit->gs, gs_params,
ARRAY_SIZE(gs_params));
}

View File

@ -586,7 +586,7 @@ compute_ztest_mode(struct fd6_emit *emit, bool lrz_valid) assert_dt
struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);
const struct ir3_shader_variant *fs = emit->fs;
if (fs->shader->nir->info.fs.early_fragment_tests)
if (fs->fs.early_fragment_tests)
return A6XX_EARLY_Z;
if (fs->no_earlyz || fs->writes_pos || !zsa->base.depth_enabled ||

View File

@ -156,20 +156,19 @@ fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *state = fd_submit_new_ringbuffer(
ctx->batch->submit,
(v->shader->nir->info.num_ssbos + v->shader->nir->info.num_images) * 16 *
4,
ir3_shader_nibo(v) * 16 * 4,
FD_RINGBUFFER_STREAMING);
assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);
uint32_t descriptor[FDL6_TEX_CONST_DWORDS];
for (unsigned i = 0; i < v->shader->nir->info.num_ssbos; i++) {
for (unsigned i = 0; i < v->num_ssbos; i++) {
fd6_ssbo_descriptor(ctx, &bufso->sb[i], descriptor);
fd6_emit_single_plane_descriptor(state, bufso->sb[i].buffer, descriptor);
}
for (unsigned i = 0; i < v->shader->nir->info.num_images; i++) {
fd6_emit_image_descriptor(ctx, state, &imgso->si[i], true);
for (unsigned i = v->num_ssbos; i < v->num_ibos; i++) {
fd6_emit_image_descriptor(ctx, state, &imgso->si[i - v->num_ssbos], true);
}
return state;

View File

@ -98,7 +98,7 @@ fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring,
#ifdef DEBUG
/* Name should generally match what you get with MESA_SHADER_CAPTURE_PATH: */
const char *name = so->shader->nir->info.name;
const char *name = so->name;
if (name)
fd_emit_string5(ring, name, strlen(name));
#endif
@ -552,7 +552,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
OUT_RING(ring, A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4);
bool fs_has_dual_src_color =
!binning_pass && fs->shader->nir->info.fs.color_is_dual_source;
!binning_pass && fs->fs.color_is_dual_source;
OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL0, 1);
OUT_RING(ring,
@ -719,15 +719,14 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
fd6_emit_immediates(ctx->screen, ds, ring);
fd6_emit_link_map(ctx->screen, hs, ds, ring);
shader_info *hs_info = &hs->shader->nir->info;
OUT_PKT4(ring, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
OUT_RING(ring, hs_info->tess.tcs_vertices_out);
OUT_RING(ring, hs->tess.tcs_vertices_out);
if (ctx->screen->info->a6xx.tess_use_shared) {
unsigned hs_input_size = 6 + (3 * (vs->output_size - 1));
unsigned wave_input_size =
MIN2(64, DIV_ROUND_UP(hs_input_size * 4,
hs_info->tess.tcs_vertices_out));
hs->tess.tcs_vertices_out));
OUT_PKT4(ring, REG_A6XX_PC_HS_INPUT_SIZE, 1);
OUT_RING(ring, hs_input_size);
@ -736,7 +735,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
OUT_RING(ring, wave_input_size);
} else {
uint32_t hs_input_size =
hs_info->tess.tcs_vertices_out * vs->output_size / 4;
hs->tess.tcs_vertices_out * vs->output_size / 4;
/* Total attribute slots in HS incoming patch. */
OUT_PKT4(ring, REG_A6XX_PC_HS_INPUT_SIZE, 1);
@ -744,13 +743,13 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
const uint32_t wavesize = 64;
const uint32_t max_wave_input_size = 64;
const uint32_t patch_control_points = hs_info->tess.tcs_vertices_out;
const uint32_t patch_control_points = hs->tess.tcs_vertices_out;
/* note: if HS is really just the VS extended, then this
* should be by MAX2(patch_control_points, hs_info->tess.tcs_vertices_out)
* however that doesn't match the blob, and fails some dEQP tests.
*/
uint32_t prims_per_wave = wavesize / hs_info->tess.tcs_vertices_out;
uint32_t prims_per_wave = wavesize / hs->tess.tcs_vertices_out;
uint32_t max_prims_per_wave = max_wave_input_size * wavesize /
(vs->output_size * patch_control_points);
prims_per_wave = MIN2(prims_per_wave, max_prims_per_wave);
@ -763,20 +762,19 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
OUT_RING(ring, wave_input_size);
}
shader_info *ds_info = &ds->shader->nir->info;
OUT_PKT4(ring, REG_A6XX_PC_TESS_CNTL, 1);
uint32_t output;
if (ds_info->tess.point_mode)
if (ds->tess.point_mode)
output = TESS_POINTS;
else if (ds_info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
else if (ds->tess.primitive_mode == TESS_PRIMITIVE_ISOLINES)
output = TESS_LINES;
else if (ds_info->tess.ccw)
else if (ds->tess.ccw)
output = TESS_CCW_TRIS;
else
output = TESS_CW_TRIS;
OUT_RING(ring, A6XX_PC_TESS_CNTL_SPACING(
fd6_gl2spacing(ds_info->tess.spacing)) |
fd6_gl2spacing(ds->tess.spacing)) |
A6XX_PC_TESS_CNTL_OUTPUT(output));
OUT_PKT4(ring, REG_A6XX_VPC_DS_CLIP_CNTL, 1);
@ -1006,7 +1004,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
A6XX_PC_GS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
uint32_t output;
switch (gs->shader->nir->info.gs.output_primitive) {
switch (gs->gs.output_primitive) {
case SHADER_PRIM_POINTS:
output = TESS_POINTS;
break;
@ -1021,10 +1019,10 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
}
OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1);
OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_5_GS_VERTICES_OUT(
gs->shader->nir->info.gs.vertices_out - 1) |
gs->gs.vertices_out - 1) |
A6XX_PC_PRIMITIVE_CNTL_5_GS_OUTPUT(output) |
A6XX_PC_PRIMITIVE_CNTL_5_GS_INVOCATIONS(
gs->shader->nir->info.gs.invocations - 1));
gs->gs.invocations - 1));
OUT_PKT4(ring, REG_A6XX_GRAS_GS_CL_CNTL, 1);
OUT_RING(ring, A6XX_GRAS_GS_CL_CNTL_CLIP_MASK(clip_mask) |
@ -1041,7 +1039,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
const struct ir3_shader_variant *prev = state->ds ? state->ds : state->vs;
/* Size of per-primitive alloction in ldlw memory in vec4s. */
uint32_t vec4_size = gs->shader->nir->info.gs.vertices_in *
uint32_t vec4_size = gs->gs.vertices_in *
DIV_ROUND_UP(prev->output_size, 4);
OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size));

View File

@ -444,7 +444,7 @@ emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v,
if (v->constlen > offset) {
ring_wfi(ctx->batch, ring);
emit_const_user(ring, v, offset * 4,
align(v->shader->cs.req_input_mem, 4),
align(v->cs.req_input_mem, 4),
info->input);
}
}

View File

@ -102,14 +102,13 @@ dump_shader_info(struct ir3_shader_variant *v,
static void
upload_shader_variant(struct ir3_shader_variant *v)
{
struct shader_info *info = &v->shader->nir->info;
struct ir3_compiler *compiler = v->shader->compiler;
assert(!v->bo);
v->bo =
fd_bo_new(compiler->dev, v->info.size, FD_BO_NOMAP,
"%s:%s", ir3_shader_stage(v), info->name);
"%s:%s", ir3_shader_stage(v), v->name);
/* Always include shaders in kernel crash dumps. */
fd_bo_mark_for_dump(v->bo);