i965: Implement ARB_shader_stencil_export (gen9+)

v2: remove useless source_stencil_to_render_target (Ken)
Squash in the actual packing function, which also got to
  v2:
Move the definition of the OPCODE outside of FB_WRITE opcodes (Matt)
Reorder the regioning to be in VWH order (Matt)
Don't retype src in the backend, just assert instead (Matt)
Rename the debug prints to something better (Matt)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Ben Widawsky 2015-10-20 14:29:39 -07:00
parent 5fa7114652
commit 1db44252d0
9 changed files with 98 additions and 3 deletions

View File

@ -335,6 +335,7 @@ struct brw_wm_prog_data {
} binding_table;
uint8_t computed_depth_mode;
bool computed_stencil;
bool early_fragment_tests;
bool no_8;

View File

@ -919,6 +919,7 @@ enum opcode {
FS_OPCODE_BLORP_FB_WRITE,
FS_OPCODE_REP_FB_WRITE,
FS_OPCODE_PACK_STENCIL_REF,
SHADER_OPCODE_RCP,
SHADER_OPCODE_RSQ,
SHADER_OPCODE_SQRT,
@ -1330,6 +1331,7 @@ enum fb_write_logical_srcs {
FB_WRITE_LOGICAL_SRC_SRC0_ALPHA,
FB_WRITE_LOGICAL_SRC_SRC_DEPTH, /* gl_FragDepth */
FB_WRITE_LOGICAL_SRC_DST_DEPTH, /* GEN4-5: passthrough from thread */
FB_WRITE_LOGICAL_SRC_SRC_STENCIL, /* gl_FragStencilRefARB */
FB_WRITE_LOGICAL_SRC_OMASK, /* Sample Mask (gl_SampleMask) */
FB_WRITE_LOGICAL_SRC_COMPONENTS, /* REQUIRED */
};

View File

@ -3357,6 +3357,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
const fs_reg &src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA];
const fs_reg &src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH];
const fs_reg &dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH];
const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
const unsigned components =
inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
@ -3449,6 +3450,17 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
length++;
}
if (src_stencil.file != BAD_FILE) {
assert(devinfo->gen >= 9);
assert(bld.dispatch_width() != 16);
sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.exec_all().annotate("FB write OS")
.emit(FS_OPCODE_PACK_STENCIL_REF, sources[length],
retype(src_stencil, BRW_REGISTER_TYPE_UB));
length++;
}
fs_inst *load;
if (devinfo->gen >= 7) {
/* Send from the GRF */
@ -5223,6 +5235,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
prog_data->uses_omask =
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
prog_data->computed_depth_mode = computed_depth_mode(shader);
prog_data->computed_stencil =
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;

View File

@ -337,6 +337,7 @@ public:
int *push_constant_loc;
fs_reg frag_depth;
fs_reg frag_stencil;
fs_reg sample_mask;
fs_reg outputs[VARYING_SLOT_MAX];
unsigned output_components[VARYING_SLOT_MAX];
@ -427,6 +428,8 @@ private:
void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
void generate_urb_write(fs_inst *inst, struct brw_reg payload);
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
void generate_stencil_ref_packing(fs_inst *inst, struct brw_reg dst,
struct brw_reg src);
void generate_barrier(fs_inst *inst, struct brw_reg src);
void generate_blorp_fb_write(fs_inst *inst);
void generate_linterp(fs_inst *inst, struct brw_reg dst,

View File

@ -317,6 +317,14 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
brw_imm_ud(inst->target));
}
/* Set computes stencil to render target */
if (prog_data->computed_stencil) {
brw_OR(p,
vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
brw_imm_ud(0x1 << 14));
}
implied_header = brw_null_reg();
} else {
implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
@ -436,6 +444,47 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
}
void
fs_generator::generate_stencil_ref_packing(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src)
{
assert(dispatch_width == 8);
assert(devinfo->gen >= 9);
/* Stencil value updates are provided in 8 slots of 1 byte per slot.
* Presumably, in order to save memory bandwidth, the stencil reference
* values written from the FS need to be packed into 2 dwords (this makes
* sense because the stencil values are limited to 1 byte each and a SIMD8
* send, so stencil slots 0-3 in dw0, and 4-7 in dw1.)
*
* The spec is confusing here because in the payload definition of MDP_RTW_S8
* (Message Data Payload for Render Target Writes with Stencil 8b) the
* stencil value seems to be dw4.0-dw4.7. However, if you look at the type of
* dw4 it is type MDPR_STENCIL (Message Data Payload Register) which is the
* packed values specified above and diagrammed below:
*
* 31 0
* --------------------------------
* DW | |
* 2-7 | IGNORED |
* | |
* --------------------------------
* DW1 | STC | STC | STC | STC |
* | slot7 | slot6 | slot5 | slot4|
* --------------------------------
* DW0 | STC | STC | STC | STC |
* | slot3 | slot2 | slot1 | slot0|
* --------------------------------
*/
src.vstride = BRW_VERTICAL_STRIDE_4;
src.width = BRW_WIDTH_1;
src.hstride = BRW_HORIZONTAL_STRIDE_0;
assert(src.type == BRW_REGISTER_TYPE_UB);
brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UB), src);
}
void
fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
{
@ -2182,6 +2231,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
generate_barrier(inst, src[0]);
break;
case FS_OPCODE_PACK_STENCIL_REF:
generate_stencil_ref_packing(inst, dst, src[0]);
break;
default:
unreachable("Unsupported opcode");

View File

@ -114,6 +114,8 @@ fs_visitor::nir_setup_outputs()
}
} else if (var->data.location == FRAG_RESULT_DEPTH) {
this->frag_depth = reg;
} else if (var->data.location == FRAG_RESULT_STENCIL) {
this->frag_stencil = reg;
} else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
this->sample_mask = reg;
} else {

View File

@ -697,7 +697,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
const fs_reg dst_depth = (payload.dest_depth_reg ?
fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) :
fs_reg());
fs_reg src_depth;
fs_reg src_depth, src_stencil;
if (source_depth_to_render_target) {
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
@ -706,9 +706,12 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
}
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
src_stencil = frag_stencil;
const fs_reg sources[] = {
color0, color1, src0_alpha, src_depth, dst_depth, sample_mask,
fs_reg(components)
color0, color1, src0_alpha, src_depth, dst_depth, src_stencil,
sample_mask, fs_reg(components)
};
assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS);
fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
@ -741,6 +744,16 @@ fs_visitor::emit_fb_writes()
no16("Missing support for simd16 depth writes on gen6\n");
}
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
/* From the 'Render Target Write message' section of the docs:
* "Output Stencil is not supported with SIMD16 Render Target Write
* Messages."
*
* FINISHME: split 16 into 2 8s
*/
no16("FINISHME: support 2 simd8 writes for gl_FragStencilRefARB\n");
}
if (do_dual_src) {
const fs_builder abld = bld.annotate("FB dual-source write");

View File

@ -295,6 +295,8 @@ brw_instruction_name(enum opcode op)
return "fb_write";
case FS_OPCODE_FB_WRITE_LOGICAL:
return "fb_write_logical";
case FS_OPCODE_PACK_STENCIL_REF:
return "pack_stencil_ref";
case FS_OPCODE_BLORP_FB_WRITE:
return "blorp_fb_write";
case FS_OPCODE_REP_FB_WRITE:

View File

@ -95,6 +95,11 @@ gen8_upload_ps_extra(struct brw_context *brw,
!brw_color_buffer_write_enabled(brw))
dw1 |= GEN8_PSX_SHADER_HAS_UAV;
if (prog_data->computed_stencil) {
assert(brw->gen >= 9);
dw1 |= GEN9_PSX_SHADER_COMPUTES_STENCIL;
}
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
OUT_BATCH(dw1);