intel/fs: Use SHADER_OPCODE_SEND for texturing on gen7+

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
Jason Ekstrand 2018-10-30 15:47:39 -05:00 committed by Jason Ekstrand
parent f547cebbe0
commit 8514eba693
4 changed files with 177 additions and 142 deletions

View File

@ -4551,6 +4551,66 @@ is_high_sampler(const struct gen_device_info *devinfo, const fs_reg &sampler)
return sampler.file != IMM || sampler.ud >= 16;
}
static unsigned
sampler_msg_type(const gen_device_info *devinfo,
opcode opcode, bool shadow_compare)
{
assert(devinfo->gen >= 5);
switch (opcode) {
case SHADER_OPCODE_TEX:
return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE :
GEN5_SAMPLER_MESSAGE_SAMPLE;
case FS_OPCODE_TXB:
return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE :
GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
case SHADER_OPCODE_TXL:
return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE :
GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
case SHADER_OPCODE_TXL_LZ:
return shadow_compare ? GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ :
GEN9_SAMPLER_MESSAGE_SAMPLE_LZ;
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_IMAGE_SIZE:
return GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
case SHADER_OPCODE_TXD:
assert(!shadow_compare || devinfo->gen >= 8 || devinfo->is_haswell);
return shadow_compare ? HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE :
GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
case SHADER_OPCODE_TXF:
return GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
case SHADER_OPCODE_TXF_LZ:
assert(devinfo->gen >= 9);
return GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ;
case SHADER_OPCODE_TXF_CMS_W:
assert(devinfo->gen >= 9);
return GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
case SHADER_OPCODE_TXF_CMS:
return devinfo->gen >= 7 ? GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS :
GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
case SHADER_OPCODE_TXF_UMS:
assert(devinfo->gen >= 7);
return GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
case SHADER_OPCODE_TXF_MCS:
assert(devinfo->gen >= 7);
return GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
case SHADER_OPCODE_LOD:
return GEN5_SAMPLER_MESSAGE_LOD;
case SHADER_OPCODE_TG4:
assert(devinfo->gen >= 7);
return shadow_compare ? GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C :
GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
break;
case SHADER_OPCODE_TG4_OFFSET:
assert(devinfo->gen >= 7);
return shadow_compare ? GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C :
GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
case SHADER_OPCODE_SAMPLEINFO:
return GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
default:
unreachable("not reached");
}
}
static void
lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
const fs_reg &coordinate,
@ -4566,6 +4626,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
unsigned grad_components)
{
const gen_device_info *devinfo = bld.shader->devinfo;
const brw_stage_prog_data *prog_data = bld.shader->stage_prog_data;
unsigned reg_width = bld.dispatch_width() / 8;
unsigned header_size = 0, length = 0;
fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE];
@ -4792,14 +4853,81 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
/* Generate the SEND. */
inst->opcode = op;
inst->src[0] = src_payload;
inst->src[1] = surface;
inst->src[2] = sampler;
inst->resize_sources(3);
inst->opcode = SHADER_OPCODE_SEND;
inst->mlen = mlen;
inst->header_size = header_size;
const unsigned msg_type =
sampler_msg_type(devinfo, op, inst->shadow_compare);
const unsigned simd_mode =
inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
BRW_SAMPLER_SIMD_MODE_SIMD16;
uint32_t base_binding_table_index;
switch (op) {
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
base_binding_table_index = prog_data->binding_table.gather_texture_start;
break;
case SHADER_OPCODE_IMAGE_SIZE:
base_binding_table_index = prog_data->binding_table.image_start;
break;
default:
base_binding_table_index = prog_data->binding_table.texture_start;
break;
}
inst->sfid = BRW_SFID_SAMPLER;
if (surface.file == IMM && sampler.file == IMM) {
inst->desc = brw_sampler_desc(devinfo,
surface.ud + base_binding_table_index,
sampler.ud % 16,
msg_type,
simd_mode,
0 /* return_format unused on gen7+ */);
inst->src[0] = brw_imm_ud(0);
} else {
/* Immediate portion of the descriptor */
inst->desc = brw_sampler_desc(devinfo,
0, /* surface */
0, /* sampler */
msg_type,
simd_mode,
0 /* return_format unused on gen7+ */);
const fs_builder ubld = bld.group(1, 0).exec_all();
fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
if (surface.equals(sampler)) {
/* This case is common in GL */
ubld.MUL(desc, surface, brw_imm_ud(0x101));
} else {
if (sampler.file == IMM) {
ubld.OR(desc, surface, brw_imm_ud(sampler.ud << 8));
} else {
ubld.SHL(desc, sampler, brw_imm_ud(8));
ubld.OR(desc, desc, surface);
}
}
if (base_binding_table_index)
ubld.ADD(desc, desc, brw_imm_ud(base_binding_table_index));
ubld.AND(desc, desc, brw_imm_ud(0xfff));
inst->src[0] = component(desc, 0);
}
inst->src[1] = brw_imm_ud(0); /* ex_desc */
inst->src[2] = src_payload;
inst->resize_sources(3);
if (inst->eot) {
/* EOT sampler messages don't make sense to split because it would
* involve ending half of the thread early.
*/
assert(inst->group == 0);
/* We need to use SENDC for EOT sampler messages */
inst->check_tdr = true;
inst->send_has_side_effects = true;
}
/* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */
assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE);
}

View File

@ -421,7 +421,7 @@ private:
void generate_barrier(fs_inst *inst, struct brw_reg src);
bool generate_linterp(fs_inst *inst, struct brw_reg dst,
struct brw_reg *src);
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
void generate_tex(fs_inst *inst, struct brw_reg dst,
struct brw_reg surface_index,
struct brw_reg sampler_index);
void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,

View File

@ -965,10 +965,11 @@ fs_generator::generate_get_buffer_size(fs_inst *inst,
}
void
fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst,
struct brw_reg surface_index,
struct brw_reg sampler_index)
{
assert(devinfo->gen < 7);
assert(inst->size_written % REG_SIZE == 0);
int msg_type = -1;
uint32_t simd_mode;
@ -1037,71 +1038,26 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
}
break;
case SHADER_OPCODE_TXL_LZ:
assert(devinfo->gen >= 9);
if (inst->shadow_compare) {
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ;
} else {
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LZ;
}
break;
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_IMAGE_SIZE:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
break;
case SHADER_OPCODE_TXD:
if (inst->shadow_compare) {
/* Gen7.5+. Otherwise, lowered in NIR */
assert(devinfo->gen >= 8 || devinfo->is_haswell);
msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
} else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
}
assert(!inst->shadow_compare);
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
break;
case SHADER_OPCODE_TXF:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
case SHADER_OPCODE_TXF_LZ:
assert(devinfo->gen >= 9);
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ;
break;
case SHADER_OPCODE_TXF_CMS_W:
assert(devinfo->gen >= 9);
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
break;
case SHADER_OPCODE_TXF_CMS:
if (devinfo->gen >= 7)
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
else
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
case SHADER_OPCODE_TXF_UMS:
assert(devinfo->gen >= 7);
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
break;
case SHADER_OPCODE_TXF_MCS:
assert(devinfo->gen >= 7);
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
case SHADER_OPCODE_LOD:
msg_type = GEN5_SAMPLER_MESSAGE_LOD;
break;
case SHADER_OPCODE_TG4:
if (inst->shadow_compare) {
assert(devinfo->gen >= 7);
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
} else {
assert(devinfo->gen >= 6);
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
}
break;
case SHADER_OPCODE_TG4_OFFSET:
assert(devinfo->gen >= 7);
if (inst->shadow_compare) {
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
} else {
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
}
assert(devinfo->gen == 6);
assert(!inst->shadow_compare);
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
break;
case SHADER_OPCODE_SAMPLEINFO:
msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
@ -1180,16 +1136,14 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
dst = vec16(dst);
}
assert(devinfo->gen < 7 || inst->header_size == 0 ||
src.file == BRW_GENERAL_REGISTER_FILE);
assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
/* Load the message header if present. If there's a texture offset,
* we need to set it up explicitly and load the offset bitfield.
* Otherwise, we can use an implied move from g0 to the first message reg.
*/
if (inst->header_size != 0 && devinfo->gen < 7) {
struct brw_reg src = brw_null_reg();
if (inst->header_size != 0) {
if (devinfo->gen < 6 && !inst->offset) {
/* Set up an implied move from g0 to the MRF. */
src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
@ -1218,83 +1172,28 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
uint32_t base_binding_table_index;
switch (inst->opcode) {
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
base_binding_table_index = prog_data->binding_table.gather_texture_start;
break;
case SHADER_OPCODE_IMAGE_SIZE:
base_binding_table_index = prog_data->binding_table.image_start;
break;
default:
base_binding_table_index = prog_data->binding_table.texture_start;
break;
}
if (surface_index.file == BRW_IMMEDIATE_VALUE &&
sampler_index.file == BRW_IMMEDIATE_VALUE) {
uint32_t surface = surface_index.ud;
uint32_t sampler = sampler_index.ud;
assert(surface_index.file == BRW_IMMEDIATE_VALUE);
assert(sampler_index.file == BRW_IMMEDIATE_VALUE);
brw_SAMPLE(p,
retype(dst, BRW_REGISTER_TYPE_UW),
inst->base_mrf,
src,
surface + base_binding_table_index,
sampler % 16,
msg_type,
inst->size_written / REG_SIZE,
inst->mlen,
inst->header_size != 0,
simd_mode,
return_format);
} else {
/* Non-const sampler index */
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD));
struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
if (brw_regs_equal(&surface_reg, &sampler_reg)) {
brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
} else {
if (sampler_reg.file == BRW_IMMEDIATE_VALUE) {
brw_OR(p, addr, surface_reg, brw_imm_ud(sampler_reg.ud << 8));
} else {
brw_SHL(p, addr, sampler_reg, brw_imm_ud(8));
brw_OR(p, addr, addr, surface_reg);
}
}
if (base_binding_table_index)
brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
brw_AND(p, addr, addr, brw_imm_ud(0xfff));
brw_pop_insn_state(p);
/* dst = send(offset, a0.0 | <descriptor>) */
brw_send_indirect_message(
p, BRW_SFID_SAMPLER, dst, src, addr,
brw_message_desc(devinfo, inst->mlen, inst->size_written / REG_SIZE,
inst->header_size) |
brw_sampler_desc(devinfo,
0 /* surface */,
0 /* sampler */,
msg_type,
simd_mode,
return_format));
/* visitor knows more than we do about the surface limit required,
* so has already done marking.
*/
}
if (is_combined_send) {
brw_inst_set_eot(p->devinfo, brw_last_inst, true);
brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
}
brw_SAMPLE(p,
retype(dst, BRW_REGISTER_TYPE_UW),
inst->base_mrf,
src,
surface_index.ud + base_binding_table_index,
sampler_index.ud % 16,
msg_type,
inst->size_written / REG_SIZE,
inst->mlen,
inst->header_size != 0,
simd_mode,
return_format);
}
@ -2170,23 +2069,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
case SHADER_OPCODE_TXF_LZ:
case SHADER_OPCODE_TXF_CMS:
case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_UMS:
case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TXL:
case SHADER_OPCODE_TXL_LZ:
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_LOD:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
case SHADER_OPCODE_SAMPLEINFO:
generate_tex(inst, dst, src[0], src[1], src[2]);
break;
case SHADER_OPCODE_IMAGE_SIZE:
generate_tex(inst, dst, src[0], src[1], brw_imm_ud(0));
assert(inst->src[0].file == BAD_FILE);
generate_tex(inst, dst, src[1], src[2]);
break;
case FS_OPCODE_DDX_COARSE:

View File

@ -416,6 +416,23 @@ schedule_node::set_latency_gen7(bool is_haswell)
case SHADER_OPCODE_SEND:
switch (inst->sfid) {
case BRW_SFID_SAMPLER: {
unsigned msg_type = (inst->desc >> 12) & 0x1f;
switch (msg_type) {
case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
case GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO:
/* See also SHADER_OPCODE_TXS */
latency = 100;
break;
default:
/* See also SHADER_OPCODE_TEX */
latency = 200;
break;
}
break;
}
case GEN6_SFID_DATAPORT_RENDER_CACHE:
switch ((inst->desc >> 14) & 0x1f) {
case GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE: