intel/fs: Use SHADER_OPCODE_SEND for texturing on gen7+
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
parent
f547cebbe0
commit
8514eba693
|
@ -4551,6 +4551,66 @@ is_high_sampler(const struct gen_device_info *devinfo, const fs_reg &sampler)
|
|||
return sampler.file != IMM || sampler.ud >= 16;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
sampler_msg_type(const gen_device_info *devinfo,
|
||||
opcode opcode, bool shadow_compare)
|
||||
{
|
||||
assert(devinfo->gen >= 5);
|
||||
switch (opcode) {
|
||||
case SHADER_OPCODE_TEX:
|
||||
return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE :
|
||||
GEN5_SAMPLER_MESSAGE_SAMPLE;
|
||||
case FS_OPCODE_TXB:
|
||||
return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE :
|
||||
GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
|
||||
case SHADER_OPCODE_TXL:
|
||||
return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE :
|
||||
GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
|
||||
case SHADER_OPCODE_TXL_LZ:
|
||||
return shadow_compare ? GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ :
|
||||
GEN9_SAMPLER_MESSAGE_SAMPLE_LZ;
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_IMAGE_SIZE:
|
||||
return GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
|
||||
case SHADER_OPCODE_TXD:
|
||||
assert(!shadow_compare || devinfo->gen >= 8 || devinfo->is_haswell);
|
||||
return shadow_compare ? HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE :
|
||||
GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
|
||||
case SHADER_OPCODE_TXF:
|
||||
return GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
||||
case SHADER_OPCODE_TXF_LZ:
|
||||
assert(devinfo->gen >= 9);
|
||||
return GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ;
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
assert(devinfo->gen >= 9);
|
||||
return GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
|
||||
case SHADER_OPCODE_TXF_CMS:
|
||||
return devinfo->gen >= 7 ? GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS :
|
||||
GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
||||
case SHADER_OPCODE_TXF_UMS:
|
||||
assert(devinfo->gen >= 7);
|
||||
return GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
assert(devinfo->gen >= 7);
|
||||
return GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
|
||||
case SHADER_OPCODE_LOD:
|
||||
return GEN5_SAMPLER_MESSAGE_LOD;
|
||||
case SHADER_OPCODE_TG4:
|
||||
assert(devinfo->gen >= 7);
|
||||
return shadow_compare ? GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C :
|
||||
GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
|
||||
break;
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
assert(devinfo->gen >= 7);
|
||||
return shadow_compare ? GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C :
|
||||
GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
return GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
const fs_reg &coordinate,
|
||||
|
@ -4566,6 +4626,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
unsigned grad_components)
|
||||
{
|
||||
const gen_device_info *devinfo = bld.shader->devinfo;
|
||||
const brw_stage_prog_data *prog_data = bld.shader->stage_prog_data;
|
||||
unsigned reg_width = bld.dispatch_width() / 8;
|
||||
unsigned header_size = 0, length = 0;
|
||||
fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE];
|
||||
|
@ -4792,14 +4853,81 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
|
||||
|
||||
/* Generate the SEND. */
|
||||
inst->opcode = op;
|
||||
inst->src[0] = src_payload;
|
||||
inst->src[1] = surface;
|
||||
inst->src[2] = sampler;
|
||||
inst->resize_sources(3);
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->mlen = mlen;
|
||||
inst->header_size = header_size;
|
||||
|
||||
const unsigned msg_type =
|
||||
sampler_msg_type(devinfo, op, inst->shadow_compare);
|
||||
const unsigned simd_mode =
|
||||
inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
|
||||
BRW_SAMPLER_SIMD_MODE_SIMD16;
|
||||
|
||||
uint32_t base_binding_table_index;
|
||||
switch (op) {
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
base_binding_table_index = prog_data->binding_table.gather_texture_start;
|
||||
break;
|
||||
case SHADER_OPCODE_IMAGE_SIZE:
|
||||
base_binding_table_index = prog_data->binding_table.image_start;
|
||||
break;
|
||||
default:
|
||||
base_binding_table_index = prog_data->binding_table.texture_start;
|
||||
break;
|
||||
}
|
||||
|
||||
inst->sfid = BRW_SFID_SAMPLER;
|
||||
if (surface.file == IMM && sampler.file == IMM) {
|
||||
inst->desc = brw_sampler_desc(devinfo,
|
||||
surface.ud + base_binding_table_index,
|
||||
sampler.ud % 16,
|
||||
msg_type,
|
||||
simd_mode,
|
||||
0 /* return_format unused on gen7+ */);
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
} else {
|
||||
/* Immediate portion of the descriptor */
|
||||
inst->desc = brw_sampler_desc(devinfo,
|
||||
0, /* surface */
|
||||
0, /* sampler */
|
||||
msg_type,
|
||||
simd_mode,
|
||||
0 /* return_format unused on gen7+ */);
|
||||
const fs_builder ubld = bld.group(1, 0).exec_all();
|
||||
fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
if (surface.equals(sampler)) {
|
||||
/* This case is common in GL */
|
||||
ubld.MUL(desc, surface, brw_imm_ud(0x101));
|
||||
} else {
|
||||
if (sampler.file == IMM) {
|
||||
ubld.OR(desc, surface, brw_imm_ud(sampler.ud << 8));
|
||||
} else {
|
||||
ubld.SHL(desc, sampler, brw_imm_ud(8));
|
||||
ubld.OR(desc, desc, surface);
|
||||
}
|
||||
}
|
||||
if (base_binding_table_index)
|
||||
ubld.ADD(desc, desc, brw_imm_ud(base_binding_table_index));
|
||||
ubld.AND(desc, desc, brw_imm_ud(0xfff));
|
||||
|
||||
inst->src[0] = component(desc, 0);
|
||||
}
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
|
||||
inst->src[2] = src_payload;
|
||||
inst->resize_sources(3);
|
||||
|
||||
if (inst->eot) {
|
||||
/* EOT sampler messages don't make sense to split because it would
|
||||
* involve ending half of the thread early.
|
||||
*/
|
||||
assert(inst->group == 0);
|
||||
/* We need to use SENDC for EOT sampler messages */
|
||||
inst->check_tdr = true;
|
||||
inst->send_has_side_effects = true;
|
||||
}
|
||||
|
||||
/* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */
|
||||
assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE);
|
||||
}
|
||||
|
|
|
@ -421,7 +421,7 @@ private:
|
|||
void generate_barrier(fs_inst *inst, struct brw_reg src);
|
||||
bool generate_linterp(fs_inst *inst, struct brw_reg dst,
|
||||
struct brw_reg *src);
|
||||
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
|
||||
void generate_tex(fs_inst *inst, struct brw_reg dst,
|
||||
struct brw_reg surface_index,
|
||||
struct brw_reg sampler_index);
|
||||
void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
|
||||
|
|
|
@ -965,10 +965,11 @@ fs_generator::generate_get_buffer_size(fs_inst *inst,
|
|||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
|
||||
fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst,
|
||||
struct brw_reg surface_index,
|
||||
struct brw_reg sampler_index)
|
||||
{
|
||||
assert(devinfo->gen < 7);
|
||||
assert(inst->size_written % REG_SIZE == 0);
|
||||
int msg_type = -1;
|
||||
uint32_t simd_mode;
|
||||
|
@ -1037,71 +1038,26 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
|
|||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
|
||||
}
|
||||
break;
|
||||
case SHADER_OPCODE_TXL_LZ:
|
||||
assert(devinfo->gen >= 9);
|
||||
if (inst->shadow_compare) {
|
||||
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ;
|
||||
} else {
|
||||
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LZ;
|
||||
}
|
||||
break;
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_IMAGE_SIZE:
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
|
||||
break;
|
||||
case SHADER_OPCODE_TXD:
|
||||
if (inst->shadow_compare) {
|
||||
/* Gen7.5+. Otherwise, lowered in NIR */
|
||||
assert(devinfo->gen >= 8 || devinfo->is_haswell);
|
||||
msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
|
||||
} else {
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
|
||||
}
|
||||
assert(!inst->shadow_compare);
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF:
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_LZ:
|
||||
assert(devinfo->gen >= 9);
|
||||
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
assert(devinfo->gen >= 9);
|
||||
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_CMS:
|
||||
if (devinfo->gen >= 7)
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
|
||||
else
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_UMS:
|
||||
assert(devinfo->gen >= 7);
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
assert(devinfo->gen >= 7);
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
||||
break;
|
||||
case SHADER_OPCODE_LOD:
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_LOD;
|
||||
break;
|
||||
case SHADER_OPCODE_TG4:
|
||||
if (inst->shadow_compare) {
|
||||
assert(devinfo->gen >= 7);
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
|
||||
} else {
|
||||
assert(devinfo->gen >= 6);
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
|
||||
}
|
||||
break;
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
assert(devinfo->gen >= 7);
|
||||
if (inst->shadow_compare) {
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
|
||||
} else {
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
|
||||
}
|
||||
assert(devinfo->gen == 6);
|
||||
assert(!inst->shadow_compare);
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
|
||||
break;
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
|
||||
|
@ -1180,16 +1136,14 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
|
|||
dst = vec16(dst);
|
||||
}
|
||||
|
||||
assert(devinfo->gen < 7 || inst->header_size == 0 ||
|
||||
src.file == BRW_GENERAL_REGISTER_FILE);
|
||||
|
||||
assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
|
||||
|
||||
/* Load the message header if present. If there's a texture offset,
|
||||
* we need to set it up explicitly and load the offset bitfield.
|
||||
* Otherwise, we can use an implied move from g0 to the first message reg.
|
||||
*/
|
||||
if (inst->header_size != 0 && devinfo->gen < 7) {
|
||||
struct brw_reg src = brw_null_reg();
|
||||
if (inst->header_size != 0) {
|
||||
if (devinfo->gen < 6 && !inst->offset) {
|
||||
/* Set up an implied move from g0 to the MRF. */
|
||||
src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
|
||||
|
@ -1218,83 +1172,28 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
|
|||
uint32_t base_binding_table_index;
|
||||
switch (inst->opcode) {
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
base_binding_table_index = prog_data->binding_table.gather_texture_start;
|
||||
break;
|
||||
case SHADER_OPCODE_IMAGE_SIZE:
|
||||
base_binding_table_index = prog_data->binding_table.image_start;
|
||||
break;
|
||||
default:
|
||||
base_binding_table_index = prog_data->binding_table.texture_start;
|
||||
break;
|
||||
}
|
||||
|
||||
if (surface_index.file == BRW_IMMEDIATE_VALUE &&
|
||||
sampler_index.file == BRW_IMMEDIATE_VALUE) {
|
||||
uint32_t surface = surface_index.ud;
|
||||
uint32_t sampler = sampler_index.ud;
|
||||
assert(surface_index.file == BRW_IMMEDIATE_VALUE);
|
||||
assert(sampler_index.file == BRW_IMMEDIATE_VALUE);
|
||||
|
||||
brw_SAMPLE(p,
|
||||
retype(dst, BRW_REGISTER_TYPE_UW),
|
||||
inst->base_mrf,
|
||||
src,
|
||||
surface + base_binding_table_index,
|
||||
sampler % 16,
|
||||
msg_type,
|
||||
inst->size_written / REG_SIZE,
|
||||
inst->mlen,
|
||||
inst->header_size != 0,
|
||||
simd_mode,
|
||||
return_format);
|
||||
} else {
|
||||
/* Non-const sampler index */
|
||||
|
||||
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
|
||||
struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD));
|
||||
struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
|
||||
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
|
||||
if (brw_regs_equal(&surface_reg, &sampler_reg)) {
|
||||
brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
|
||||
} else {
|
||||
if (sampler_reg.file == BRW_IMMEDIATE_VALUE) {
|
||||
brw_OR(p, addr, surface_reg, brw_imm_ud(sampler_reg.ud << 8));
|
||||
} else {
|
||||
brw_SHL(p, addr, sampler_reg, brw_imm_ud(8));
|
||||
brw_OR(p, addr, addr, surface_reg);
|
||||
}
|
||||
}
|
||||
if (base_binding_table_index)
|
||||
brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
|
||||
brw_AND(p, addr, addr, brw_imm_ud(0xfff));
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
|
||||
/* dst = send(offset, a0.0 | <descriptor>) */
|
||||
brw_send_indirect_message(
|
||||
p, BRW_SFID_SAMPLER, dst, src, addr,
|
||||
brw_message_desc(devinfo, inst->mlen, inst->size_written / REG_SIZE,
|
||||
inst->header_size) |
|
||||
brw_sampler_desc(devinfo,
|
||||
0 /* surface */,
|
||||
0 /* sampler */,
|
||||
msg_type,
|
||||
simd_mode,
|
||||
return_format));
|
||||
|
||||
/* visitor knows more than we do about the surface limit required,
|
||||
* so has already done marking.
|
||||
*/
|
||||
}
|
||||
|
||||
if (is_combined_send) {
|
||||
brw_inst_set_eot(p->devinfo, brw_last_inst, true);
|
||||
brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
|
||||
}
|
||||
brw_SAMPLE(p,
|
||||
retype(dst, BRW_REGISTER_TYPE_UW),
|
||||
inst->base_mrf,
|
||||
src,
|
||||
surface_index.ud + base_binding_table_index,
|
||||
sampler_index.ud % 16,
|
||||
msg_type,
|
||||
inst->size_written / REG_SIZE,
|
||||
inst->mlen,
|
||||
inst->header_size != 0,
|
||||
simd_mode,
|
||||
return_format);
|
||||
}
|
||||
|
||||
|
||||
|
@ -2170,23 +2069,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
|||
case FS_OPCODE_TXB:
|
||||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_LZ:
|
||||
case SHADER_OPCODE_TXF_CMS:
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
case SHADER_OPCODE_TXF_UMS:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
case SHADER_OPCODE_TXL:
|
||||
case SHADER_OPCODE_TXL_LZ:
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_LOD:
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
generate_tex(inst, dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_IMAGE_SIZE:
|
||||
generate_tex(inst, dst, src[0], src[1], brw_imm_ud(0));
|
||||
assert(inst->src[0].file == BAD_FILE);
|
||||
generate_tex(inst, dst, src[1], src[2]);
|
||||
break;
|
||||
|
||||
case FS_OPCODE_DDX_COARSE:
|
||||
|
|
|
@ -416,6 +416,23 @@ schedule_node::set_latency_gen7(bool is_haswell)
|
|||
|
||||
case SHADER_OPCODE_SEND:
|
||||
switch (inst->sfid) {
|
||||
case BRW_SFID_SAMPLER: {
|
||||
unsigned msg_type = (inst->desc >> 12) & 0x1f;
|
||||
switch (msg_type) {
|
||||
case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
|
||||
case GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO:
|
||||
/* See also SHADER_OPCODE_TXS */
|
||||
latency = 100;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* See also SHADER_OPCODE_TEX */
|
||||
latency = 200;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case GEN6_SFID_DATAPORT_RENDER_CACHE:
|
||||
switch ((inst->desc >> 14) & 0x1f) {
|
||||
case GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE:
|
||||
|
|
Loading…
Reference in New Issue