From d8a3294ac21741c3a78eef72b832902e15fbd948 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 17 May 2016 23:18:38 -0700 Subject: [PATCH] i965/fs: Hide varying pull constant load message setup behind logical opcode. This will allow the SIMD lowering pass to split 32-wide varying pull constant loads (not natively supported by the hardware) into 16-wide instructions. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 50 +++++++++++-------- src/mesa/drivers/dri/i965/brw_fs.h | 6 +-- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 +- .../drivers/dri/i965/brw_fs_generator.cpp | 9 +--- src/mesa/drivers/dri/i965/brw_shader.cpp | 2 + 6 files changed, 39 insertions(+), 31 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 31b33367727..432a1aec0df 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1116,6 +1116,7 @@ enum opcode { FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, + FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL, FS_OPCODE_GET_BUFFER_SIZE, FS_OPCODE_MOV_DISPATCH_TO_FLAGS, FS_OPCODE_DISCARD_JUMP, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 71df1e57fa3..336806b9e51 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -188,32 +188,16 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, scale = 2; } - enum opcode op; - if (devinfo->gen >= 7) - op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7; - else - op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD; - /* The pull load message will load a vec4 (16 bytes). If we are loading * a double this means we are only loading 2 elements worth of data. * We also want to use a 32-bit data type for the dst of the load operation * so other parts of the driver don't get confused about the size of the * result. */ - int regs_written = 4 * (bld.dispatch_width() / 8) * scale; - fs_reg vec4_result = fs_reg(VGRF, alloc.allocate(regs_written), - BRW_REGISTER_TYPE_F); - fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset); - inst->regs_written = regs_written; - - if (devinfo->gen < 7) { - inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen); - inst->header_size = 1; - if (devinfo->gen == 4) - inst->mlen = 3; - else - inst->mlen = 1 + bld.dispatch_width() / 8; - } + fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * scale); + fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL, + vec4_result, surf_index, vec4_offset); + inst->regs_written = 4 * (bld.dispatch_width() / 8) * scale; if (type_sz(dst.type) == 8) { assert(scale == 1); @@ -4439,6 +4423,28 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op, delete[] components; } +static void +lower_varying_pull_constant_logical_send(const fs_builder &bld, fs_inst *inst) +{ + const brw_device_info *devinfo = bld.shader->devinfo; + + if (devinfo->gen >= 7) { + inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7; + + } else { + const fs_reg payload(MRF, FIRST_PULL_LOAD_MRF(devinfo->gen), + BRW_REGISTER_TYPE_UD); + + bld.MOV(byte_offset(payload, REG_SIZE), inst->src[1]); + + inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD; + inst->resize_sources(1); + inst->base_mrf = payload.nr; + inst->header_size = 1; + inst->mlen = 1 + inst->exec_size / 8; + } +} + bool fs_visitor::lower_logical_sends() { @@ -4544,6 +4550,10 @@ fs_visitor::lower_logical_sends() ibld.sample_mask_reg()); break; + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: + lower_varying_pull_constant_logical_send(ibld, inst); + break; + default: continue; } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 062fcd54592..75759b7ba26 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -463,9 +463,9 @@ private: struct brw_reg dst, struct brw_reg surf_index, struct brw_reg offset); - void generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset); + void generate_varying_pull_constant_load(fs_inst *inst, + struct brw_reg dst, + struct brw_reg index); void generate_varying_pull_constant_load_gen7(fs_inst *inst, struct brw_reg dst, struct brw_reg index, diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index b17a082780a..99121c503f0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -72,8 +72,8 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: case FS_OPCODE_CINTERP: case FS_OPCODE_LINTERP: case SHADER_OPCODE_FIND_LIVE_CHANNEL: diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 0a847f8637f..d979518c922 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1347,8 +1347,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, void fs_generator::generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset) + struct brw_reg index) { assert(devinfo->gen < 7); /* Should use the gen7 variant. */ assert(inst->header_size != 0); @@ -1380,10 +1379,6 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst, simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; } - struct brw_reg offset_mrf = retype(brw_message_reg(inst->base_mrf + 1), - BRW_REGISTER_TYPE_D); - brw_MOV(p, offset_mrf, offset); - struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); @@ -2186,7 +2181,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: - generate_varying_pull_constant_load(inst, dst, src[0], src[1]); + generate_varying_pull_constant_load(inst, dst, src[0]); break; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index d24db58a046..551d9c23926 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -348,6 +348,8 @@ brw_instruction_name(const struct brw_device_info *devinfo, enum opcode op) return "varying_pull_const"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: return "varying_pull_const_gen7"; + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: + return "varying_pull_const_logical"; case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: return "mov_dispatch_to_flags";