diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 34a88ac89e2..efaa7e461c7 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -902,8 +902,6 @@ fs_inst::size_read(int arg) const break; case FS_OPCODE_FB_READ: - case SHADER_OPCODE_URB_READ_LOGICAL: - case SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: if (arg == 0) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 35a50e838a8..319567c9bdb 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2619,11 +2619,15 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, fs_reg indirect_offset = get_nir_src(offset_src); if (nir_src_is_const(offset_src)) { + fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + srcs[URB_LOGICAL_SRC_HANDLE] = icp_handle; + /* Constant indexing - use global offset. */ if (first_component != 0) { unsigned read_components = num_components + first_component; fs_reg tmp = bld.vgrf(dst.type, read_components); - inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, icp_handle); + inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, srcs, + ARRAY_SIZE(srcs)); inst->size_written = read_components * tmp.component_size(inst->exec_size); for (unsigned i = 0; i < num_components; i++) { @@ -2631,7 +2635,8 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, offset(tmp, bld, i + first_component)); } } else { - inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst, icp_handle); + inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst, srcs, + ARRAY_SIZE(srcs)); inst->size_written = num_components * dst.component_size(inst->exec_size); } @@ -2639,14 +2644,16 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, inst->mlen = 1; } else { /* Indirect indexing - use per-slot offsets as well. */ - const fs_reg srcs[] = { icp_handle, indirect_offset }; unsigned read_components = num_components + first_component; fs_reg tmp = bld.vgrf(dst.type, read_components); - fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); - bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); + + fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + srcs[URB_LOGICAL_SRC_HANDLE] = icp_handle; + srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset; + if (first_component != 0) { inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp, - payload); + srcs, ARRAY_SIZE(srcs)); inst->size_written = read_components * tmp.component_size(inst->exec_size); for (unsigned i = 0; i < num_components; i++) { @@ -2654,7 +2661,8 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, offset(tmp, bld, i + first_component)); } } else { - inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst, payload); + inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst, + srcs, ARRAY_SIZE(srcs)); inst->size_written = num_components * dst.component_size(inst->exec_size); } @@ -2923,38 +2931,42 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, unsigned num_components = instr->num_components; unsigned first_component = nir_intrinsic_component(instr); + fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + srcs[URB_LOGICAL_SRC_HANDLE] = icp_handle; + if (indirect_offset.file == BAD_FILE) { /* Constant indexing - use global offset. */ if (first_component != 0) { unsigned read_components = num_components + first_component; fs_reg tmp = bld.vgrf(dst.type, read_components); - inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, icp_handle); + inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, srcs, + ARRAY_SIZE(srcs)); for (unsigned i = 0; i < num_components; i++) { bld.MOV(offset(dst, bld, i), offset(tmp, bld, i + first_component)); } } else { - inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst, icp_handle); + inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst, srcs, + ARRAY_SIZE(srcs)); } inst->offset = imm_offset; inst->mlen = 1; } else { /* Indirect indexing - use per-slot offsets as well. */ - const fs_reg srcs[] = { icp_handle, indirect_offset }; - fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); - bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); + srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset; + if (first_component != 0) { unsigned read_components = num_components + first_component; fs_reg tmp = bld.vgrf(dst.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp, - payload); + srcs, ARRAY_SIZE(srcs)); for (unsigned i = 0; i < num_components; i++) { bld.MOV(offset(dst, bld, i), offset(tmp, bld, i + first_component)); } } else { inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst, - payload); + srcs, ARRAY_SIZE(srcs)); } inst->offset = imm_offset; inst->mlen = 2; @@ -2993,12 +3005,15 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, bld.MOV(patch_handle, output_handles); { + fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + srcs[URB_LOGICAL_SRC_HANDLE] = patch_handle; + if (first_component != 0) { unsigned read_components = instr->num_components + first_component; fs_reg tmp = bld.vgrf(dst.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, - patch_handle); + srcs, ARRAY_SIZE(srcs)); inst->size_written = read_components * REG_SIZE; for (unsigned i = 0; i < instr->num_components; i++) { bld.MOV(offset(dst, bld, i), @@ -3006,7 +3021,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, } } else { inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst, - patch_handle); + srcs, ARRAY_SIZE(srcs)); inst->size_written = instr->num_components * REG_SIZE; } inst->offset = imm_offset; @@ -3014,15 +3029,16 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, } } else { /* Indirect indexing - use per-slot offsets as well. */ - const fs_reg srcs[] = { output_handles, indirect_offset }; - fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); - bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); + fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + srcs[URB_LOGICAL_SRC_HANDLE] = output_handles; + srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset; + if (first_component != 0) { unsigned read_components = instr->num_components + first_component; fs_reg tmp = bld.vgrf(dst.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp, - payload); + srcs, ARRAY_SIZE(srcs)); inst->size_written = read_components * REG_SIZE; for (unsigned i = 0; i < instr->num_components; i++) { bld.MOV(offset(dst, bld, i), @@ -3030,7 +3046,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, } } else { inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst, - payload); + srcs, ARRAY_SIZE(srcs)); inst->size_written = instr->num_components * REG_SIZE; } inst->offset = imm_offset; @@ -3151,18 +3167,16 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld, (imm_offset / 2) + 1); } else { /* Replicate the patch handle to all enabled channels */ - const fs_reg srcs[] = { - retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD) - }; - fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - bld.LOAD_PAYLOAD(patch_handle, srcs, ARRAY_SIZE(srcs), 0); + fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + srcs[URB_LOGICAL_SRC_HANDLE] = + retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD); if (first_component != 0) { unsigned read_components = instr->num_components + first_component; fs_reg tmp = bld.vgrf(dest.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, - patch_handle); + srcs, ARRAY_SIZE(srcs)); inst->size_written = read_components * REG_SIZE; for (unsigned i = 0; i < instr->num_components; i++) { bld.MOV(offset(dest, bld, i), @@ -3170,7 +3184,7 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld, } } else { inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dest, - patch_handle); + srcs, ARRAY_SIZE(srcs)); inst->size_written = instr->num_components * REG_SIZE; } inst->mlen = 1; @@ -3184,26 +3198,25 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld, * two double components. */ unsigned num_components = instr->num_components; - const fs_reg srcs[] = { - retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), - indirect_offset - }; - fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); - bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); + + fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + srcs[URB_LOGICAL_SRC_HANDLE] = + retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD); + srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset; if (first_component != 0) { unsigned read_components = num_components + first_component; fs_reg tmp = bld.vgrf(dest.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp, - payload); + srcs, ARRAY_SIZE(srcs)); for (unsigned i = 0; i < num_components; i++) { bld.MOV(offset(dest, bld, i), offset(tmp, bld, i + first_component)); } } else { inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dest, - payload); + srcs, ARRAY_SIZE(srcs)); } inst->mlen = 2; inst->offset = imm_offset; diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 0ebc9984b1e..2ccd083f576 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -36,13 +36,24 @@ lower_urb_read_logical_send(const fs_builder &bld, fs_inst *inst, { const intel_device_info *devinfo = bld.shader->devinfo; - assert(inst->size_written % REG_SIZE == 0); - assert(inst->src[0].type == BRW_REGISTER_TYPE_UD); - assert(inst->src[0].file == FIXED_GRF || inst->src[0].file == VGRF); + assert(inst->header_size == 0); + + fs_reg *payload_sources = new fs_reg[inst->mlen]; + fs_reg payload = fs_reg(VGRF, bld.shader->alloc.allocate(inst->mlen), + BRW_REGISTER_TYPE_F); + + unsigned header_size = 0; + payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_HANDLE]; + if (per_slot_present) + payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS]; + + bld.LOAD_PAYLOAD(payload, payload_sources, inst->mlen, header_size); + + delete [] payload_sources; inst->opcode = SHADER_OPCODE_SEND; - inst->header_size = 1; + inst->header_size = header_size; inst->sfid = BRW_SFID_URB; inst->desc = brw_urb_desc(devinfo, @@ -55,13 +66,11 @@ lower_urb_read_logical_send(const fs_builder &bld, fs_inst *inst, inst->ex_mlen = 0; inst->send_is_volatile = true; - fs_reg tmp = inst->src[0]; - inst->resize_sources(4); inst->src[0] = brw_imm_ud(0); /* desc */ inst->src[1] = brw_imm_ud(0); /* ex_desc */ - inst->src[2] = tmp; + inst->src[2] = payload; inst->src[3] = brw_null_reg(); } diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp index 6a8872cebe9..65f6164de00 100644 --- a/src/intel/compiler/brw_mesh.cpp +++ b/src/intel/compiler/brw_mesh.cpp @@ -1037,8 +1037,11 @@ emit_urb_direct_reads(const fs_builder &bld, nir_intrinsic_instr *instr, fs_builder ubld8 = bld.group(8, 0).exec_all(); fs_reg data = ubld8.vgrf(BRW_REGISTER_TYPE_UD, num_regs); + fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; - fs_inst *inst = ubld8.emit(SHADER_OPCODE_URB_READ_LOGICAL, data, urb_handle); + fs_inst *inst = ubld8.emit(SHADER_OPCODE_URB_READ_LOGICAL, data, + srcs, ARRAY_SIZE(srcs)); inst->mlen = 1; inst->offset = urb_global_offset; assert(inst->offset < 2048); @@ -1093,17 +1096,14 @@ emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr, bld8.SHR(off, off, brw_imm_ud(2)); - fs_reg payload_srcs[2]; - payload_srcs[0] = urb_handle; - payload_srcs[1] = off; - - fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, 2); - bld8.LOAD_PAYLOAD(payload, payload_srcs, 2, 2); + fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; + srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off; fs_reg data = bld8.vgrf(BRW_REGISTER_TYPE_UD, 4); fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, - data, payload); + data, srcs, ARRAY_SIZE(srcs)); inst->mlen = 2; inst->offset = 0; inst->size_written = 4 * REG_SIZE;