2011-05-25 00:34:27 +01:00
|
|
|
/*
|
|
|
|
* Copyright © 2010 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
2013-09-18 07:32:10 +01:00
|
|
|
/** @file brw_fs_generator.cpp
|
2011-05-25 00:34:27 +01:00
|
|
|
*
|
2013-09-18 07:32:10 +01:00
|
|
|
* This file supports generating code from the FS LIR to the actual
|
2011-05-25 00:34:27 +01:00
|
|
|
* native instructions.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "brw_eu.h"
|
|
|
|
#include "brw_fs.h"
|
2012-10-03 21:03:12 +01:00
|
|
|
#include "brw_cfg.h"
|
2019-05-23 17:05:23 +01:00
|
|
|
#include "util/mesa-sha1.h"
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2015-10-23 21:11:44 +01:00
|
|
|
static enum brw_reg_file
|
|
|
|
brw_file_from_reg(fs_reg *reg)
|
2014-12-05 17:53:11 +00:00
|
|
|
{
|
|
|
|
switch (reg->file) {
|
2015-10-27 00:52:57 +00:00
|
|
|
case ARF:
|
|
|
|
return BRW_ARCHITECTURE_REGISTER_FILE;
|
|
|
|
case FIXED_GRF:
|
2015-10-27 00:09:25 +00:00
|
|
|
case VGRF:
|
2014-12-05 17:53:11 +00:00
|
|
|
return BRW_GENERAL_REGISTER_FILE;
|
|
|
|
case MRF:
|
|
|
|
return BRW_MESSAGE_REGISTER_FILE;
|
|
|
|
case IMM:
|
|
|
|
return BRW_IMMEDIATE_VALUE;
|
2015-10-26 13:58:56 +00:00
|
|
|
case BAD_FILE:
|
|
|
|
case ATTR:
|
|
|
|
case UNIFORM:
|
2014-12-05 17:53:11 +00:00
|
|
|
unreachable("not reached");
|
|
|
|
}
|
2015-10-23 21:11:44 +01:00
|
|
|
return BRW_ARCHITECTURE_REGISTER_FILE;
|
2014-12-05 17:53:11 +00:00
|
|
|
}
|
|
|
|
|
2014-11-28 20:21:03 +00:00
|
|
|
static struct brw_reg
|
2016-07-18 08:27:56 +01:00
|
|
|
brw_reg_from_fs_reg(const struct gen_device_info *devinfo, fs_inst *inst,
|
|
|
|
fs_reg *reg, bool compressed)
|
2014-11-28 20:21:03 +00:00
|
|
|
{
|
|
|
|
struct brw_reg brw_reg;
|
|
|
|
|
|
|
|
switch (reg->file) {
|
|
|
|
case MRF:
|
2016-07-18 08:27:56 +01:00
|
|
|
assert((reg->nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
|
2015-09-16 08:08:19 +01:00
|
|
|
/* Fallthrough */
|
2015-10-27 00:09:25 +00:00
|
|
|
case VGRF:
|
2014-11-28 20:21:03 +00:00
|
|
|
if (reg->stride == 0) {
|
2015-10-26 11:35:14 +00:00
|
|
|
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0);
|
2014-11-28 20:21:03 +00:00
|
|
|
} else {
|
|
|
|
/* From the Haswell PRM:
|
|
|
|
*
|
2016-05-20 05:43:48 +01:00
|
|
|
* "VertStride must be used to cross GRF register boundaries. This
|
|
|
|
* rule implies that elements within a 'Width' cannot cross GRF
|
|
|
|
* boundaries."
|
2014-11-28 20:21:03 +00:00
|
|
|
*
|
2016-05-20 05:43:48 +01:00
|
|
|
* The maximum width value that could satisfy this restriction is:
|
2014-11-28 20:21:03 +00:00
|
|
|
*/
|
2016-05-20 05:43:48 +01:00
|
|
|
const unsigned reg_width = REG_SIZE / (reg->stride * type_sz(reg->type));
|
|
|
|
|
|
|
|
/* Because the hardware can only split source regions at a whole
|
|
|
|
* multiple of width during decompression (i.e. vertically), clamp
|
|
|
|
* the value obtained above to the physical execution size of a
|
|
|
|
* single decompressed chunk of the instruction:
|
|
|
|
*/
|
|
|
|
const unsigned phys_width = compressed ? inst->exec_size / 2 :
|
|
|
|
inst->exec_size;
|
|
|
|
|
2019-08-31 01:16:28 +01:00
|
|
|
const unsigned max_hw_width = 16;
|
|
|
|
|
2016-05-20 05:43:48 +01:00
|
|
|
/* XXX - The equation above is strictly speaking not correct on
|
|
|
|
* hardware that supports unbalanced GRF writes -- On Gen9+
|
|
|
|
* each decompressed chunk of the instruction may have a
|
|
|
|
* different execution size when the number of components
|
|
|
|
* written to each destination GRF is not the same.
|
|
|
|
*/
|
2019-01-18 20:51:57 +00:00
|
|
|
if (reg->stride > 4) {
|
|
|
|
assert(reg != &inst->dst);
|
|
|
|
assert(reg->stride * type_sz(reg->type) <= REG_SIZE);
|
|
|
|
brw_reg = brw_vecn_reg(1, brw_file_from_reg(reg), reg->nr, 0);
|
|
|
|
brw_reg = stride(brw_reg, reg->stride, 1, 0);
|
|
|
|
} else {
|
2019-08-31 01:16:28 +01:00
|
|
|
const unsigned width = MIN3(reg_width, phys_width, max_hw_width);
|
2019-01-18 20:51:57 +00:00
|
|
|
brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0);
|
|
|
|
brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride);
|
|
|
|
}
|
2017-01-20 07:50:50 +00:00
|
|
|
|
|
|
|
if (devinfo->gen == 7 && !devinfo->is_haswell) {
|
|
|
|
/* From the IvyBridge PRM (EU Changes by Processor Generation, page 13):
|
|
|
|
* "Each DF (Double Float) operand uses an element size of 4 rather
|
|
|
|
* than 8 and all regioning parameters are twice what the values
|
|
|
|
* would be based on the true element size: ExecSize, Width,
|
|
|
|
* HorzStride, and VertStride. Each DF operand uses a pair of
|
|
|
|
* channels and all masking and swizzing should be adjusted
|
|
|
|
* appropriately."
|
|
|
|
*
|
|
|
|
* From the IvyBridge PRM (Special Requirements for Handling Double
|
|
|
|
* Precision Data Types, page 71):
|
|
|
|
* "In Align1 mode, all regioning parameters like stride, execution
|
|
|
|
* size, and width must use the syntax of a pair of packed
|
|
|
|
* floats. The offsets for these data types must be 64-bit
|
|
|
|
* aligned. The execution size and regioning parameters are in terms
|
|
|
|
* of floats."
|
|
|
|
*
|
|
|
|
* Summarized: when handling DF-typed arguments, ExecSize,
|
|
|
|
* VertStride, and Width must be doubled.
|
|
|
|
*
|
|
|
|
* It applies to BayTrail too.
|
|
|
|
*/
|
|
|
|
if (type_sz(reg->type) == 8) {
|
|
|
|
brw_reg.width++;
|
|
|
|
if (brw_reg.vstride > 0)
|
|
|
|
brw_reg.vstride++;
|
|
|
|
assert(brw_reg.hstride == BRW_HORIZONTAL_STRIDE_1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* When converting from DF->F, we set the destination stride to 2
|
|
|
|
* because each d2f conversion implicitly writes 2 floats, being
|
|
|
|
* the first one the converted value. IVB/BYT actually writes two
|
|
|
|
* F components per SIMD channel, and every other component is
|
|
|
|
* filled with garbage.
|
|
|
|
*/
|
|
|
|
if (reg == &inst->dst && get_exec_type_size(inst) == 8 &&
|
|
|
|
type_sz(inst->dst.type) < 8) {
|
|
|
|
assert(brw_reg.hstride > BRW_HORIZONTAL_STRIDE_1);
|
|
|
|
brw_reg.hstride--;
|
|
|
|
}
|
2016-07-18 08:27:56 +01:00
|
|
|
}
|
2014-11-28 20:21:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
brw_reg = retype(brw_reg, reg->type);
|
2016-09-02 05:25:18 +01:00
|
|
|
brw_reg = byte_offset(brw_reg, reg->offset);
|
2015-10-24 23:29:03 +01:00
|
|
|
brw_reg.abs = reg->abs;
|
|
|
|
brw_reg.negate = reg->negate;
|
2014-11-28 20:21:03 +00:00
|
|
|
break;
|
2015-10-27 00:52:57 +00:00
|
|
|
case ARF:
|
|
|
|
case FIXED_GRF:
|
2015-11-02 20:25:24 +00:00
|
|
|
case IMM:
|
2016-09-02 05:25:18 +01:00
|
|
|
assert(reg->offset == 0);
|
2015-11-20 05:51:37 +00:00
|
|
|
brw_reg = reg->as_brw_reg();
|
2014-11-28 20:21:03 +00:00
|
|
|
break;
|
|
|
|
case BAD_FILE:
|
|
|
|
/* Probably unused. */
|
|
|
|
brw_reg = brw_null_reg();
|
|
|
|
break;
|
2015-10-26 13:58:56 +00:00
|
|
|
case ATTR:
|
|
|
|
case UNIFORM:
|
2014-11-28 20:21:03 +00:00
|
|
|
unreachable("not reached");
|
|
|
|
}
|
|
|
|
|
i965: Use <0,2,1> region for scalar DF sources on IVB/BYT.
On HSW+, scalar DF sources can be accessed using the normal <0,1,0>
region, but on IVB and BYT DF regions must be programmed in terms of
floats. A <0,2,1> region accomplishes this.
v2:
- Apply region <0,2,1> in brw_reg_from_fs_reg() (Curro).
v3:
- Added comment explaining the reason (Curro).
Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2017-01-11 03:33:22 +00:00
|
|
|
/* On HSW+, scalar DF sources can be accessed using the normal <0,1,0>
|
|
|
|
* region, but on IVB and BYT DF regions must be programmed in terms of
|
|
|
|
* floats. A <0,2,1> region accomplishes this.
|
|
|
|
*/
|
|
|
|
if (devinfo->gen == 7 && !devinfo->is_haswell &&
|
|
|
|
type_sz(reg->type) == 8 &&
|
|
|
|
brw_reg.vstride == BRW_VERTICAL_STRIDE_0 &&
|
|
|
|
brw_reg.width == BRW_WIDTH_1 &&
|
|
|
|
brw_reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
|
|
|
|
brw_reg.width = BRW_WIDTH_2;
|
|
|
|
brw_reg.hstride = BRW_HORIZONTAL_STRIDE_1;
|
|
|
|
}
|
|
|
|
|
2014-11-28 20:21:03 +00:00
|
|
|
return brw_reg;
|
|
|
|
}
|
|
|
|
|
2015-04-16 22:34:04 +01:00
|
|
|
fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data,
|
2014-05-14 09:21:02 +01:00
|
|
|
void *mem_ctx,
|
2014-10-21 06:53:31 +01:00
|
|
|
struct brw_stage_prog_data *prog_data,
|
2016-10-17 22:10:26 +01:00
|
|
|
struct shader_stats shader_stats,
|
2015-01-13 22:28:13 +00:00
|
|
|
bool runtime_check_aads_emit,
|
2016-01-15 04:27:51 +00:00
|
|
|
gl_shader_stage stage)
|
2012-11-09 09:05:47 +00:00
|
|
|
|
2015-04-16 22:34:04 +01:00
|
|
|
: compiler(compiler), log_data(log_data),
|
2016-04-26 01:20:35 +01:00
|
|
|
devinfo(compiler->devinfo),
|
2014-10-28 02:43:31 +00:00
|
|
|
prog_data(prog_data),
|
2016-10-17 22:10:26 +01:00
|
|
|
shader_stats(shader_stats),
|
2015-03-16 19:18:31 +00:00
|
|
|
runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false),
|
2016-01-15 04:27:51 +00:00
|
|
|
stage(stage), mem_ctx(mem_ctx)
|
2012-11-09 09:05:47 +00:00
|
|
|
{
|
2015-04-16 19:06:57 +01:00
|
|
|
p = rzalloc(mem_ctx, struct brw_codegen);
|
|
|
|
brw_init_codegen(devinfo, p, mem_ctx);
|
2017-08-31 17:53:02 +01:00
|
|
|
|
|
|
|
/* In the FS code generator, we are very careful to ensure that we always
|
|
|
|
* set the right execution size so we don't need the EU code to "help" us
|
|
|
|
* by trying to infer it. Sometimes, it infers the wrong thing.
|
|
|
|
*/
|
|
|
|
p->automatic_exec_sizes = false;
|
2012-11-09 09:05:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fs_generator::~fs_generator()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2014-11-12 19:01:16 +00:00
|
|
|
class ip_record : public exec_node {
|
|
|
|
public:
|
|
|
|
DECLARE_RALLOC_CXX_OPERATORS(ip_record)
|
|
|
|
|
|
|
|
ip_record(int ip)
|
|
|
|
{
|
|
|
|
this->ip = ip;
|
|
|
|
}
|
|
|
|
|
|
|
|
int ip;
|
|
|
|
};
|
|
|
|
|
2014-05-16 21:06:45 +01:00
|
|
|
bool
|
2012-12-06 18:15:08 +00:00
|
|
|
fs_generator::patch_discard_jumps_to_fb_writes()
|
|
|
|
{
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen < 6 || this->discard_halt_patches.is_empty())
|
2014-05-16 21:06:45 +01:00
|
|
|
return false;
|
2012-12-06 18:15:08 +00:00
|
|
|
|
2015-04-15 02:00:06 +01:00
|
|
|
int scale = brw_jump_scale(p->devinfo);
|
2014-06-30 16:00:25 +01:00
|
|
|
|
2012-12-06 18:15:08 +00:00
|
|
|
/* There is a somewhat strange undocumented requirement of using
|
|
|
|
* HALT, according to the simulator. If some channel has HALTed to
|
|
|
|
* a particular UIP, then by the end of the program, every channel
|
|
|
|
* must have HALTed to that UIP. Furthermore, the tracking is a
|
|
|
|
* stack, so you can't do the final halt of a UIP after starting
|
|
|
|
* halting to a new UIP.
|
|
|
|
*
|
|
|
|
* Symptoms of not emitting this instruction on actual hardware
|
|
|
|
* included GPU hangs and sparkly rendering on the piglit discard
|
|
|
|
* tests.
|
|
|
|
*/
|
2014-06-13 22:29:25 +01:00
|
|
|
brw_inst *last_halt = gen6_HALT(p);
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_uip(p->devinfo, last_halt, 1 * scale);
|
|
|
|
brw_inst_set_jip(p->devinfo, last_halt, 1 * scale);
|
2012-12-06 18:15:08 +00:00
|
|
|
|
|
|
|
int ip = p->nr_insn;
|
|
|
|
|
2014-06-24 23:53:19 +01:00
|
|
|
foreach_in_list(ip_record, patch_ip, &discard_halt_patches) {
|
2014-06-13 22:29:25 +01:00
|
|
|
brw_inst *patch = &p->store[patch_ip->ip];
|
2012-12-06 18:15:08 +00:00
|
|
|
|
2015-04-15 02:00:06 +01:00
|
|
|
assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT);
|
2012-12-06 18:15:08 +00:00
|
|
|
/* HALT takes a half-instruction distance from the pre-incremented IP. */
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale);
|
2012-12-06 18:15:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
this->discard_halt_patches.make_empty();
|
2014-05-16 21:06:45 +01:00
|
|
|
return true;
|
2012-12-06 18:15:08 +00:00
|
|
|
}
|
|
|
|
|
2018-10-29 20:06:14 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_send(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg desc,
|
|
|
|
struct brw_reg ex_desc,
|
|
|
|
struct brw_reg payload,
|
|
|
|
struct brw_reg payload2)
|
|
|
|
{
|
|
|
|
const bool dst_is_null = dst.file == BRW_ARCHITECTURE_REGISTER_FILE &&
|
|
|
|
dst.nr == BRW_ARF_NULL;
|
|
|
|
const unsigned rlen = dst_is_null ? 0 : inst->size_written / REG_SIZE;
|
|
|
|
|
|
|
|
uint32_t desc_imm = inst->desc |
|
|
|
|
brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size);
|
|
|
|
|
2018-11-16 03:05:08 +00:00
|
|
|
uint32_t ex_desc_imm = brw_message_ex_desc(devinfo, inst->ex_mlen);
|
|
|
|
|
|
|
|
if (ex_desc.file != BRW_IMMEDIATE_VALUE || ex_desc.ud || ex_desc_imm) {
|
|
|
|
/* If we have any sort of extended descriptor, then we need SENDS. This
|
|
|
|
* also covers the dual-payload case because ex_mlen goes in ex_desc.
|
|
|
|
*/
|
|
|
|
brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
|
2019-02-07 23:45:51 +00:00
|
|
|
desc, desc_imm, ex_desc, ex_desc_imm,
|
|
|
|
inst->eot);
|
2018-11-16 03:05:08 +00:00
|
|
|
if (inst->check_tdr)
|
2019-08-26 02:12:35 +01:00
|
|
|
brw_inst_set_opcode(p->devinfo, brw_last_inst,
|
|
|
|
devinfo->gen >= 12 ? BRW_OPCODE_SENDC : BRW_OPCODE_SENDSC);
|
2018-11-16 03:05:08 +00:00
|
|
|
} else {
|
2019-02-07 23:45:51 +00:00
|
|
|
brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm,
|
|
|
|
inst->eot);
|
2018-11-16 03:05:08 +00:00
|
|
|
if (inst->check_tdr)
|
|
|
|
brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
|
|
|
|
}
|
2018-10-29 20:06:14 +00:00
|
|
|
}
|
|
|
|
|
2014-06-05 14:03:08 +01:00
|
|
|
void
|
|
|
|
fs_generator::fire_fb_write(fs_inst *inst,
|
2014-09-16 23:16:20 +01:00
|
|
|
struct brw_reg payload,
|
2014-06-05 14:03:08 +01:00
|
|
|
struct brw_reg implied_header,
|
|
|
|
GLuint nr)
|
|
|
|
{
|
2016-09-09 07:48:51 +01:00
|
|
|
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
|
2014-08-29 20:50:46 +01:00
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen < 6) {
|
2014-06-11 02:50:03 +01:00
|
|
|
brw_push_insn_state(p);
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-06-11 02:50:03 +01:00
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2018-05-18 02:47:19 +01:00
|
|
|
brw_MOV(p, offset(retype(payload, BRW_REGISTER_TYPE_UD), 1),
|
|
|
|
offset(retype(implied_header, BRW_REGISTER_TYPE_UD), 1));
|
2014-06-11 02:50:03 +01:00
|
|
|
brw_pop_insn_state(p);
|
2014-06-05 14:03:08 +01:00
|
|
|
}
|
|
|
|
|
2019-08-26 07:59:25 +01:00
|
|
|
uint32_t msg_control = brw_fb_write_msg_control(inst, prog_data);
|
2014-06-05 14:03:08 +01:00
|
|
|
|
2017-08-17 20:09:41 +01:00
|
|
|
/* We assume render targets start at 0, because headerless FB write
|
|
|
|
* messages set "Render Target Index" to 0. Using a different binding
|
|
|
|
* table index would make it impossible to use headerless messages.
|
|
|
|
*/
|
|
|
|
const uint32_t surf_index = inst->target;
|
2014-06-05 14:03:08 +01:00
|
|
|
|
2017-01-13 22:22:19 +00:00
|
|
|
brw_inst *insn = brw_fb_WRITE(p,
|
|
|
|
payload,
|
|
|
|
retype(implied_header, BRW_REGISTER_TYPE_UW),
|
|
|
|
msg_control,
|
|
|
|
surf_index,
|
|
|
|
nr,
|
|
|
|
0,
|
|
|
|
inst->eot,
|
|
|
|
inst->last_rt,
|
|
|
|
inst->header_size != 0);
|
|
|
|
|
|
|
|
if (devinfo->gen >= 6)
|
|
|
|
brw_inst_set_rt_slot_group(devinfo, insn, inst->group / 16);
|
2014-06-05 14:03:08 +01:00
|
|
|
}
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
void
|
2014-09-16 23:16:20 +01:00
|
|
|
fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen < 8 && !devinfo->is_haswell) {
|
2014-11-12 02:02:23 +00:00
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
2019-04-30 00:01:08 +01:00
|
|
|
brw_set_default_flag_reg(p, 0, 0);
|
2014-11-12 02:02:23 +00:00
|
|
|
}
|
|
|
|
|
2018-05-18 02:47:19 +01:00
|
|
|
const struct brw_reg implied_header =
|
|
|
|
devinfo->gen < 6 ? payload : brw_null_reg();
|
|
|
|
|
2014-09-16 23:16:20 +01:00
|
|
|
if (inst->base_mrf >= 0)
|
|
|
|
payload = brw_message_reg(inst->base_mrf);
|
2014-08-29 20:50:46 +01:00
|
|
|
|
2014-06-05 14:03:08 +01:00
|
|
|
if (!runtime_check_aads_emit) {
|
2014-09-16 23:16:20 +01:00
|
|
|
fire_fb_write(inst, payload, implied_header, inst->mlen);
|
2014-06-05 14:03:08 +01:00
|
|
|
} else {
|
|
|
|
/* This can only happen in gen < 6 */
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen < 6);
|
2014-06-05 14:03:08 +01:00
|
|
|
|
|
|
|
struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
|
|
|
|
|
|
|
|
/* Check runtime bit to detect if we have to send AA data or not */
|
2017-08-30 20:07:00 +01:00
|
|
|
brw_push_insn_state(p);
|
2017-08-31 17:53:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
2014-06-05 14:03:08 +01:00
|
|
|
brw_AND(p,
|
|
|
|
v1_null_ud,
|
|
|
|
retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD),
|
|
|
|
brw_imm_ud(1<<26));
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
|
2014-06-05 14:03:08 +01:00
|
|
|
|
|
|
|
int jmp = brw_JMPI(p, brw_imm_ud(0), BRW_PREDICATE_NORMAL) - p->store;
|
2017-08-31 17:53:02 +01:00
|
|
|
brw_pop_insn_state(p);
|
2014-06-05 14:03:08 +01:00
|
|
|
{
|
|
|
|
/* Don't send AA data */
|
2014-09-16 23:16:20 +01:00
|
|
|
fire_fb_write(inst, offset(payload, 1), implied_header, inst->mlen-1);
|
2014-06-05 14:03:08 +01:00
|
|
|
}
|
|
|
|
brw_land_fwd_jump(p, jmp);
|
2014-09-16 23:16:20 +01:00
|
|
|
fire_fb_write(inst, payload, implied_header, inst->mlen);
|
2014-06-05 14:03:08 +01:00
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 00:52:33 +01:00
|
|
|
void
|
|
|
|
fs_generator::generate_fb_read(fs_inst *inst, struct brw_reg dst,
|
|
|
|
struct brw_reg payload)
|
|
|
|
{
|
2016-09-07 21:38:20 +01:00
|
|
|
assert(inst->size_written % REG_SIZE == 0);
|
2016-09-09 07:48:51 +01:00
|
|
|
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
|
2018-01-03 07:59:06 +00:00
|
|
|
/* We assume that render targets start at binding table index 0. */
|
|
|
|
const unsigned surf_index = inst->target;
|
2016-07-22 00:52:33 +01:00
|
|
|
|
|
|
|
gen9_fb_READ(p, dst, payload, surf_index,
|
2016-09-07 21:38:20 +01:00
|
|
|
inst->header_size, inst->size_written / REG_SIZE,
|
2016-07-22 00:52:33 +01:00
|
|
|
prog_data->persample_dispatch);
|
|
|
|
}
|
|
|
|
|
2015-11-08 02:58:34 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_mov_indirect(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg reg,
|
|
|
|
struct brw_reg indirect_byte_offset)
|
|
|
|
{
|
|
|
|
assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE);
|
2017-10-17 19:57:48 +01:00
|
|
|
assert(!reg.abs && !reg.negate);
|
|
|
|
assert(reg.type == dst.type);
|
2015-11-08 02:58:34 +00:00
|
|
|
|
|
|
|
unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
|
|
|
|
|
2015-11-24 17:01:11 +00:00
|
|
|
if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
imm_byte_offset += indirect_byte_offset.ud;
|
2015-11-08 02:58:34 +00:00
|
|
|
|
2015-11-24 17:01:11 +00:00
|
|
|
reg.nr = imm_byte_offset / REG_SIZE;
|
|
|
|
reg.subnr = imm_byte_offset % REG_SIZE;
|
|
|
|
brw_MOV(p, dst, reg);
|
|
|
|
} else {
|
|
|
|
/* Prior to Broadwell, there are only 8 address registers. */
|
2017-02-09 18:16:58 +00:00
|
|
|
assert(inst->exec_size <= 8 || devinfo->gen >= 8);
|
2015-11-24 17:01:11 +00:00
|
|
|
|
|
|
|
/* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
|
|
|
|
struct brw_reg addr = vec8(brw_address_reg(0));
|
|
|
|
|
|
|
|
/* The destination stride of an instruction (in bytes) must be greater
|
|
|
|
* than or equal to the size of the rest of the instruction. Since the
|
|
|
|
* address register is of type UW, we can't use a D-type instruction.
|
|
|
|
* In order to get around this, re retype to UW and use a stride.
|
|
|
|
*/
|
|
|
|
indirect_byte_offset =
|
|
|
|
retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
|
|
|
|
|
2016-10-28 22:48:53 +01:00
|
|
|
/* There are a number of reasons why we don't use the base offset here.
|
|
|
|
* One reason is that the field is only 9 bits which means we can only
|
|
|
|
* use it to access the first 16 GRFs. Also, from the Haswell PRM
|
|
|
|
* section "Register Region Restrictions":
|
|
|
|
*
|
|
|
|
* "The lower bits of the AddressImmediate must not overflow to
|
|
|
|
* change the register address. The lower 5 bits of Address
|
|
|
|
* Immediate when added to lower 5 bits of address register gives
|
|
|
|
* the sub-register offset. The upper bits of Address Immediate
|
|
|
|
* when added to upper bits of address register gives the register
|
|
|
|
* address. Any overflow from sub-register offset is dropped."
|
|
|
|
*
|
|
|
|
* Since the indirect may cause us to cross a register boundary, this
|
|
|
|
* makes the base offset almost useless. We could try and do something
|
|
|
|
* clever where we use a actual base offset if base_offset % 32 == 0 but
|
|
|
|
* that would mean we were generating different code depending on the
|
|
|
|
* base offset. Instead, for the sake of consistency, we'll just do the
|
|
|
|
* add ourselves. This restriction is only listed in the Haswell PRM
|
|
|
|
* but empirical testing indicates that it applies on all older
|
|
|
|
* generations and is lifted on Broadwell.
|
|
|
|
*
|
|
|
|
* In the end, while base_offset is nice to look at in the generated
|
|
|
|
* code, using it saves us 0 instructions and would require quite a bit
|
|
|
|
* of case-by-case work. It's just not worth it.
|
|
|
|
*/
|
2017-10-17 22:45:43 +01:00
|
|
|
brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
2017-10-17 22:45:43 +01:00
|
|
|
|
|
|
|
if (type_sz(reg.type) > 4 &&
|
|
|
|
((devinfo->gen == 7 && !devinfo->is_haswell) ||
|
2018-11-27 19:20:20 +00:00
|
|
|
devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
|
|
|
|
!devinfo->has_64bit_types)) {
|
2017-10-17 22:45:43 +01:00
|
|
|
/* IVB has an issue (which we found empirically) where it reads two
|
|
|
|
* address register components per channel for indirectly addressed
|
|
|
|
* 64-bit sources.
|
|
|
|
*
|
|
|
|
* From the Cherryview PRM Vol 7. "Register Region Restrictions":
|
|
|
|
*
|
|
|
|
* "When source or destination datatype is 64b or operation is
|
|
|
|
* integer DWord multiply, indirect addressing must not be used."
|
|
|
|
*
|
|
|
|
* To work around both of these, we do two integer MOVs insead of one
|
|
|
|
* 64-bit MOV. Because no double value should ever cross a register
|
|
|
|
* boundary, it's safe to use the immediate offset in the indirect
|
|
|
|
* here to handle adding 4 bytes to the offset and avoid the extra
|
|
|
|
* ADD to the register file.
|
2017-02-09 18:16:58 +00:00
|
|
|
*/
|
2017-10-17 22:45:43 +01:00
|
|
|
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
|
|
|
|
retype(brw_VxH_indirect(0, 0), BRW_REGISTER_TYPE_D));
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2017-10-17 22:45:43 +01:00
|
|
|
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
|
|
|
|
retype(brw_VxH_indirect(0, 4), BRW_REGISTER_TYPE_D));
|
|
|
|
} else {
|
|
|
|
struct brw_reg ind_src = brw_VxH_indirect(0, 0);
|
2015-11-08 02:58:34 +00:00
|
|
|
|
2017-10-17 22:45:43 +01:00
|
|
|
brw_inst *mov = brw_MOV(p, dst, retype(ind_src, reg.type));
|
2015-11-08 02:58:34 +00:00
|
|
|
|
2017-10-17 22:45:43 +01:00
|
|
|
if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE &&
|
|
|
|
!inst->get_next()->is_tail_sentinel() &&
|
|
|
|
((fs_inst *)inst->get_next())->mlen > 0) {
|
|
|
|
/* From the Sandybridge PRM:
|
|
|
|
*
|
|
|
|
* "[Errata: DevSNB(SNB)] If MRF register is updated by any
|
|
|
|
* instruction that “indexed/indirect” source AND is followed
|
|
|
|
* by a send, the instruction requires a “Switch”. This is to
|
|
|
|
* avoid race condition where send may dispatch before MRF is
|
|
|
|
* updated."
|
|
|
|
*/
|
|
|
|
brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH);
|
|
|
|
}
|
2015-11-24 17:01:11 +00:00
|
|
|
}
|
|
|
|
}
|
2015-11-08 02:58:34 +00:00
|
|
|
}
|
|
|
|
|
2017-08-29 17:21:32 +01:00
|
|
|
void
|
|
|
|
fs_generator::generate_shuffle(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src,
|
|
|
|
struct brw_reg idx)
|
|
|
|
{
|
|
|
|
/* Ivy bridge has some strange behavior that makes this a real pain to
|
|
|
|
* implement for 64-bit values so we just don't bother.
|
|
|
|
*/
|
|
|
|
assert(devinfo->gen >= 8 || devinfo->is_haswell || type_sz(src.type) <= 4);
|
|
|
|
|
|
|
|
/* Because we're using the address register, we're limited to 8-wide
|
|
|
|
* execution on gen7. On gen8, we're limited to 16-wide by the address
|
|
|
|
* register file and 8-wide for 64-bit types. We could try and make this
|
|
|
|
* instruction splittable higher up in the compiler but that gets weird
|
|
|
|
* because it reads all of the channels regardless of execution size. It's
|
|
|
|
* easier just to split it here.
|
|
|
|
*/
|
|
|
|
const unsigned lower_width =
|
|
|
|
(devinfo->gen <= 7 || type_sz(src.type) > 4) ?
|
|
|
|
8 : MIN2(16, inst->exec_size);
|
|
|
|
|
|
|
|
brw_set_default_exec_size(p, cvt(lower_width) - 1);
|
|
|
|
for (unsigned group = 0; group < inst->exec_size; group += lower_width) {
|
|
|
|
brw_set_default_group(p, group);
|
|
|
|
|
|
|
|
if ((src.vstride == 0 && src.hstride == 0) ||
|
|
|
|
idx.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
/* Trivial, the source is already uniform or the index is a constant.
|
|
|
|
* We will typically not get here if the optimizer is doing its job,
|
|
|
|
* but asserting would be mean.
|
|
|
|
*/
|
|
|
|
const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
|
|
|
|
brw_MOV(p, suboffset(dst, group), stride(suboffset(src, i), 0, 1, 0));
|
|
|
|
} else {
|
|
|
|
/* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
|
|
|
|
struct brw_reg addr = vec8(brw_address_reg(0));
|
|
|
|
|
|
|
|
struct brw_reg group_idx = suboffset(idx, group);
|
|
|
|
|
|
|
|
if (lower_width == 8 && group_idx.width == BRW_WIDTH_16) {
|
|
|
|
/* Things get grumpy if the register is too wide. */
|
|
|
|
group_idx.width--;
|
|
|
|
group_idx.vstride--;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(type_sz(group_idx.type) <= 4);
|
|
|
|
if (type_sz(group_idx.type) == 4) {
|
|
|
|
/* The destination stride of an instruction (in bytes) must be
|
|
|
|
* greater than or equal to the size of the rest of the
|
|
|
|
* instruction. Since the address register is of type UW, we
|
|
|
|
* can't use a D-type instruction. In order to get around this,
|
|
|
|
* re retype to UW and use a stride.
|
|
|
|
*/
|
|
|
|
group_idx = retype(spread(group_idx, 2), BRW_REGISTER_TYPE_W);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Take into account the component size and horizontal stride. */
|
|
|
|
assert(src.vstride == src.hstride + src.width);
|
|
|
|
brw_SHL(p, addr, group_idx,
|
|
|
|
brw_imm_uw(_mesa_logbase2(type_sz(src.type)) +
|
|
|
|
src.hstride - 1));
|
|
|
|
|
|
|
|
/* Add on the register start offset */
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
2017-08-29 17:21:32 +01:00
|
|
|
brw_ADD(p, addr, addr, brw_imm_uw(src.nr * REG_SIZE + src.subnr));
|
|
|
|
|
|
|
|
if (type_sz(src.type) > 4 &&
|
|
|
|
((devinfo->gen == 7 && !devinfo->is_haswell) ||
|
|
|
|
devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
|
|
|
|
/* IVB has an issue (which we found empirically) where it reads
|
|
|
|
* two address register components per channel for indirectly
|
|
|
|
* addressed 64-bit sources.
|
|
|
|
*
|
|
|
|
* From the Cherryview PRM Vol 7. "Register Region Restrictions":
|
|
|
|
*
|
|
|
|
* "When source or destination datatype is 64b or operation is
|
|
|
|
* integer DWord multiply, indirect addressing must not be
|
|
|
|
* used."
|
|
|
|
*
|
|
|
|
* To work around both of these, we do two integer MOVs insead of
|
|
|
|
* one 64-bit MOV. Because no double value should ever cross a
|
|
|
|
* register boundary, it's safe to use the immediate offset in the
|
|
|
|
* indirect here to handle adding 4 bytes to the offset and avoid
|
|
|
|
* the extra ADD to the register file.
|
|
|
|
*/
|
|
|
|
struct brw_reg gdst = suboffset(dst, group);
|
|
|
|
struct brw_reg dst_d = retype(spread(gdst, 2),
|
|
|
|
BRW_REGISTER_TYPE_D);
|
2019-09-18 00:46:33 +01:00
|
|
|
assert(dst.hstride == 1);
|
2017-08-29 17:21:32 +01:00
|
|
|
brw_MOV(p, dst_d,
|
|
|
|
retype(brw_VxH_indirect(0, 0), BRW_REGISTER_TYPE_D));
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2017-08-29 17:21:32 +01:00
|
|
|
brw_MOV(p, byte_offset(dst_d, 4),
|
|
|
|
retype(brw_VxH_indirect(0, 4), BRW_REGISTER_TYPE_D));
|
|
|
|
} else {
|
2019-09-18 00:46:33 +01:00
|
|
|
brw_MOV(p, suboffset(dst, group * dst.hstride),
|
2017-08-29 17:21:32 +01:00
|
|
|
retype(brw_VxH_indirect(0, 0), src.type));
|
|
|
|
}
|
|
|
|
}
|
2019-09-27 07:38:24 +01:00
|
|
|
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2017-08-29 17:21:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-06 22:11:34 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_quad_swizzle(const fs_inst *inst,
|
|
|
|
struct brw_reg dst, struct brw_reg src,
|
|
|
|
unsigned swiz)
|
|
|
|
{
|
|
|
|
/* Requires a quad. */
|
|
|
|
assert(inst->exec_size >= 4);
|
|
|
|
|
|
|
|
if (src.file == BRW_IMMEDIATE_VALUE ||
|
|
|
|
has_scalar_region(src)) {
|
|
|
|
/* The value is uniform across all channels */
|
|
|
|
brw_MOV(p, dst, src);
|
|
|
|
|
|
|
|
} else if (devinfo->gen < 11 && type_sz(src.type) == 4) {
|
|
|
|
/* This only works on 8-wide 32-bit values */
|
|
|
|
assert(inst->exec_size == 8);
|
|
|
|
assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
|
|
|
|
assert(src.vstride == src.width + 1);
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
|
|
|
struct brw_reg swiz_src = stride(src, 4, 4, 1);
|
|
|
|
swiz_src.swizzle = swiz;
|
|
|
|
brw_MOV(p, dst, swiz_src);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
|
|
|
|
assert(src.vstride == src.width + 1);
|
|
|
|
const struct brw_reg src_0 = suboffset(src, BRW_GET_SWZ(swiz, 0));
|
|
|
|
|
|
|
|
switch (swiz) {
|
|
|
|
case BRW_SWIZZLE_XXXX:
|
|
|
|
case BRW_SWIZZLE_YYYY:
|
|
|
|
case BRW_SWIZZLE_ZZZZ:
|
|
|
|
case BRW_SWIZZLE_WWWW:
|
|
|
|
brw_MOV(p, dst, stride(src_0, 4, 4, 0));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BRW_SWIZZLE_XXZZ:
|
|
|
|
case BRW_SWIZZLE_YYWW:
|
|
|
|
brw_MOV(p, dst, stride(src_0, 2, 2, 0));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BRW_SWIZZLE_XYXY:
|
|
|
|
case BRW_SWIZZLE_ZWZW:
|
|
|
|
assert(inst->exec_size == 4);
|
|
|
|
brw_MOV(p, dst, stride(src_0, 0, 2, 1));
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
assert(inst->force_writemask_all);
|
|
|
|
brw_set_default_exec_size(p, cvt(inst->exec_size / 4) - 1);
|
|
|
|
|
|
|
|
for (unsigned c = 0; c < 4; c++) {
|
|
|
|
brw_inst *insn = brw_MOV(
|
|
|
|
p, stride(suboffset(dst, c),
|
|
|
|
4 * inst->dst.stride, 1, 4 * inst->dst.stride),
|
|
|
|
stride(suboffset(src, BRW_GET_SWZ(swiz, c)), 4, 1, 0));
|
|
|
|
|
2018-11-09 22:13:36 +00:00
|
|
|
if (devinfo->gen < 12) {
|
|
|
|
brw_inst_set_no_dd_clear(devinfo, insn, c < 3);
|
|
|
|
brw_inst_set_no_dd_check(devinfo, insn, c > 0);
|
|
|
|
}
|
2019-09-27 07:38:24 +01:00
|
|
|
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2018-12-06 22:11:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-29 22:32:02 +01:00
|
|
|
void
|
|
|
|
fs_generator::generate_urb_read(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg header)
|
|
|
|
{
|
2016-09-07 21:38:20 +01:00
|
|
|
assert(inst->size_written % REG_SIZE == 0);
|
2015-09-29 22:32:02 +01:00
|
|
|
assert(header.file == BRW_GENERAL_REGISTER_FILE);
|
|
|
|
assert(header.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
|
i965: Defeat the register stride checker in URB reads.
Pulling DF inputs from the URB generates messages like:
send(8) g23<1>DF g1<8,8,1>UD
urb 3 SIMD8 read mlen 1 rlen 2 { align1 1Q };
which makes the simulator angry:
"For 64-bit Align1 operation or multiplication of dwords in CHV,
source horizontal stride must be aligned to qword."
This seems to be documented in the Cherryview PRM, Volume 7, Page 823:
"When source or destination datatype is 64b or operation is integer
DWord multiply, regioning in Align1 must follow these rules:
1. Source and Destination horizontal stride must be aligned to the
same qword."
Setting the source horizontal stride to QWord is insane, as it's the
message header containing 8 URB handles in a single 32-bit DWord.
Instead, we should whack the destination type to UD, D, or F so that
the register stride checker doesn't notice. The destination type of
send messages is basically irrelevant anyway.
Cc: "12.0" <mesa-stable@lists.freedesktop.org>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
2016-06-09 00:24:50 +01:00
|
|
|
brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD));
|
2015-09-29 22:32:02 +01:00
|
|
|
brw_set_src0(p, send, header);
|
2019-09-03 20:18:38 +01:00
|
|
|
if (devinfo->gen < 12)
|
|
|
|
brw_set_src1(p, send, brw_imm_ud(0u));
|
2015-09-29 22:32:02 +01:00
|
|
|
|
|
|
|
brw_inst_set_sfid(p->devinfo, send, BRW_SFID_URB);
|
|
|
|
brw_inst_set_urb_opcode(p->devinfo, send, GEN8_URB_OPCODE_SIMD8_READ);
|
|
|
|
|
2015-11-07 09:37:33 +00:00
|
|
|
if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT)
|
|
|
|
brw_inst_set_urb_per_slot_offset(p->devinfo, send, true);
|
|
|
|
|
2015-09-29 22:32:02 +01:00
|
|
|
brw_inst_set_mlen(p->devinfo, send, inst->mlen);
|
2016-09-07 21:38:20 +01:00
|
|
|
brw_inst_set_rlen(p->devinfo, send, inst->size_written / REG_SIZE);
|
2015-09-29 22:32:02 +01:00
|
|
|
brw_inst_set_header_present(p->devinfo, send, true);
|
|
|
|
brw_inst_set_urb_global_offset(p->devinfo, send, inst->offset);
|
|
|
|
}
|
|
|
|
|
2014-10-21 07:00:50 +01:00
|
|
|
void
|
|
|
|
fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
|
|
|
|
{
|
|
|
|
brw_inst *insn;
|
|
|
|
|
2017-10-06 19:41:54 +01:00
|
|
|
/* WaClearTDRRegBeforeEOTForNonPS.
|
|
|
|
*
|
|
|
|
* WA: Clear tdr register before send EOT in all non-PS shader kernels
|
|
|
|
*
|
|
|
|
* mov(8) tdr0:ud 0x0:ud {NoMask}"
|
|
|
|
*/
|
|
|
|
if (inst->eot && p->devinfo->gen == 10) {
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
brw_MOV(p, brw_tdr_reg(), brw_imm_uw(0));
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
|
|
|
|
2014-10-21 07:00:50 +01:00
|
|
|
insn = brw_next_insn(p, BRW_OPCODE_SEND);
|
|
|
|
|
|
|
|
brw_set_dest(p, insn, brw_null_reg());
|
|
|
|
brw_set_src0(p, insn, payload);
|
2019-09-03 20:18:38 +01:00
|
|
|
if (devinfo->gen < 12)
|
|
|
|
brw_set_src1(p, insn, brw_imm_ud(0u));
|
2014-10-21 07:00:50 +01:00
|
|
|
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_sfid(p->devinfo, insn, BRW_SFID_URB);
|
|
|
|
brw_inst_set_urb_opcode(p->devinfo, insn, GEN8_URB_OPCODE_SIMD8_WRITE);
|
2014-10-21 07:00:50 +01:00
|
|
|
|
2015-05-06 08:04:10 +01:00
|
|
|
if (inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT ||
|
|
|
|
inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT)
|
|
|
|
brw_inst_set_urb_per_slot_offset(p->devinfo, insn, true);
|
|
|
|
|
|
|
|
if (inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED ||
|
|
|
|
inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT)
|
|
|
|
brw_inst_set_urb_channel_mask_present(p->devinfo, insn, true);
|
|
|
|
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_mlen(p->devinfo, insn, inst->mlen);
|
|
|
|
brw_inst_set_rlen(p->devinfo, insn, 0);
|
|
|
|
brw_inst_set_eot(p->devinfo, insn, inst->eot);
|
|
|
|
brw_inst_set_header_present(p->devinfo, insn, true);
|
|
|
|
brw_inst_set_urb_global_offset(p->devinfo, insn, inst->offset);
|
2014-10-21 07:00:50 +01:00
|
|
|
}
|
|
|
|
|
2014-08-27 19:33:25 +01:00
|
|
|
void
|
|
|
|
fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
|
|
|
|
{
|
|
|
|
struct brw_inst *insn;
|
|
|
|
|
|
|
|
insn = brw_next_insn(p, BRW_OPCODE_SEND);
|
|
|
|
|
2016-02-01 02:28:42 +00:00
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
|
2017-01-24 08:45:53 +00:00
|
|
|
brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UW));
|
2019-09-03 20:18:38 +01:00
|
|
|
if (devinfo->gen < 12)
|
|
|
|
brw_set_src1(p, insn, brw_imm_ud(0u));
|
2014-08-27 19:33:25 +01:00
|
|
|
|
|
|
|
/* Terminate a compute shader by sending a message to the thread spawner.
|
|
|
|
*/
|
|
|
|
brw_inst_set_sfid(devinfo, insn, BRW_SFID_THREAD_SPAWNER);
|
|
|
|
brw_inst_set_mlen(devinfo, insn, 1);
|
|
|
|
brw_inst_set_rlen(devinfo, insn, 0);
|
|
|
|
brw_inst_set_eot(devinfo, insn, inst->eot);
|
|
|
|
brw_inst_set_header_present(devinfo, insn, false);
|
|
|
|
|
|
|
|
brw_inst_set_ts_opcode(devinfo, insn, 0); /* Dereference resource */
|
|
|
|
brw_inst_set_ts_request_type(devinfo, insn, 0); /* Root thread */
|
|
|
|
|
|
|
|
/* Note that even though the thread has a URB resource associated with it,
|
|
|
|
* we set the "do not dereference URB" bit, because the URB resource is
|
|
|
|
* managed by the fixed-function unit, so it will free it automatically.
|
|
|
|
*/
|
|
|
|
brw_inst_set_ts_resource_select(devinfo, insn, 1); /* Do not dereference URB */
|
|
|
|
|
|
|
|
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
|
|
|
|
}
|
|
|
|
|
2014-08-27 19:32:08 +01:00
|
|
|
void
|
intel/compiler: Silence unused parameter warnings in generate_foo methods
Since all of the fs_generator::generate_foo methods take a fs_inst * as
the first parameter, just remove the name to quiet the compiler.
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_barrier(fs_inst*, brw_reg)’:
src/intel/compiler/brw_fs_generator.cpp:743:41: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
^~~~
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_discard_jump(fs_inst*)’:
src/intel/compiler/brw_fs_generator.cpp:1326:46: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_discard_jump(fs_inst *inst)
^~~~
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_pack_half_2x16_split(fs_inst*, brw_reg, brw_reg, brw_reg)’:
src/intel/compiler/brw_fs_generator.cpp:1675:54: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
^~~~
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_shader_time_add(fs_inst*, brw_reg, brw_reg, brw_reg)’:
src/intel/compiler/brw_fs_generator.cpp:1743:49: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_shader_time_add(fs_inst *inst,
^~~~
src/intel/compiler/brw_vec4_generator.cpp: In function ‘void generate_set_simd4x2_header_gen9(brw_codegen*, brw::vec4_instruction*, brw_reg)’:
src/intel/compiler/brw_vec4_generator.cpp:1412:52: warning: unused parameter ‘inst’ [-Wunused-parameter]
vec4_instruction *inst,
^~~~
src/intel/compiler/brw_vec4_generator.cpp: In function ‘void generate_mov_indirect(brw_codegen*, brw::vec4_instruction*, brw_reg, brw_reg, brw_reg, brw_reg)’:
src/intel/compiler/brw_vec4_generator.cpp:1430:41: warning: unused parameter ‘inst’ [-Wunused-parameter]
vec4_instruction *inst,
^~~~
src/intel/compiler/brw_vec4_generator.cpp:1432:63: warning: unused parameter ‘length’ [-Wunused-parameter]
struct brw_reg indirect, struct brw_reg length)
^~~~~~
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-03-29 00:29:45 +01:00
|
|
|
fs_generator::generate_barrier(fs_inst *, struct brw_reg src)
|
2014-08-27 19:32:08 +01:00
|
|
|
{
|
|
|
|
brw_barrier(p, src);
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2014-08-27 19:32:08 +01:00
|
|
|
brw_WAIT(p);
|
|
|
|
}
|
|
|
|
|
2017-06-14 19:06:45 +01:00
|
|
|
bool
|
2012-11-09 09:05:47 +00:00
|
|
|
fs_generator::generate_linterp(fs_inst *inst,
|
2017-06-14 19:06:45 +01:00
|
|
|
struct brw_reg dst, struct brw_reg *src)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2015-04-07 01:44:40 +01:00
|
|
|
/* PLN reads:
|
|
|
|
* / in SIMD16 \
|
|
|
|
* -----------------------------------
|
|
|
|
* | src1+0 | src1+1 | src1+2 | src1+3 |
|
|
|
|
* |-----------------------------------|
|
|
|
|
* |(x0, x1)|(y0, y1)|(x2, x3)|(y2, y3)|
|
|
|
|
* -----------------------------------
|
|
|
|
*
|
|
|
|
* but for the LINE/MAC pair, the LINE reads Xs and the MAC reads Ys:
|
|
|
|
*
|
|
|
|
* -----------------------------------
|
|
|
|
* | src1+0 | src1+1 | src1+2 | src1+3 |
|
|
|
|
* |-----------------------------------|
|
|
|
|
* |(x0, x1)|(y0, y1)| | | in SIMD8
|
|
|
|
* |-----------------------------------|
|
|
|
|
* |(x0, x1)|(x2, x3)|(y0, y1)|(y2, y3)| in SIMD16
|
|
|
|
* -----------------------------------
|
|
|
|
*
|
|
|
|
* See also: emit_interpolation_setup_gen4().
|
|
|
|
*/
|
2011-05-25 00:34:27 +01:00
|
|
|
struct brw_reg delta_x = src[0];
|
2016-05-20 08:13:33 +01:00
|
|
|
struct brw_reg delta_y = offset(src[0], inst->exec_size / 8);
|
2019-09-02 04:12:07 +01:00
|
|
|
struct brw_reg interp = src[1];
|
2018-10-23 22:06:33 +01:00
|
|
|
brw_inst *i[2];
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2019-04-11 20:57:12 +01:00
|
|
|
/* nir_lower_interpolation() will do the lowering to MAD instructions for
|
2018-10-23 22:06:33 +01:00
|
|
|
* us on gen11+
|
|
|
|
*/
|
|
|
|
assert(devinfo->gen < 11);
|
2018-05-17 01:33:17 +01:00
|
|
|
|
2018-10-23 22:06:33 +01:00
|
|
|
if (devinfo->has_pln) {
|
2018-05-26 06:23:30 +01:00
|
|
|
if (devinfo->gen <= 6 && (delta_x.nr & 1) != 0) {
|
|
|
|
/* From the Sandy Bridge PRM Vol. 4, Pt. 2, Section 8.3.53, "Plane":
|
|
|
|
*
|
|
|
|
* "[DevSNB]:<src1> must be even register aligned.
|
|
|
|
*
|
|
|
|
* This restriction is lifted on Ivy Bridge.
|
|
|
|
*
|
|
|
|
* This means that we need to split PLN into LINE+MAC on-the-fly.
|
|
|
|
* Unfortunately, the inputs are laid out for PLN and not LINE+MAC so
|
|
|
|
* we have to split into SIMD8 pieces. For gen4 (!has_pln), the
|
|
|
|
* coordinate registers are laid out differently so we leave it as a
|
|
|
|
* SIMD16 instruction.
|
|
|
|
*/
|
|
|
|
assert(inst->exec_size == 8 || inst->exec_size == 16);
|
|
|
|
assert(inst->group % 16 == 0);
|
2017-06-14 19:06:45 +01:00
|
|
|
|
2018-05-26 06:23:30 +01:00
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
|
|
|
|
|
|
|
/* Thanks to two accumulators, we can emit all the LINEs and then all
|
|
|
|
* the MACs. This improves parallelism a bit.
|
|
|
|
*/
|
|
|
|
for (unsigned g = 0; g < inst->exec_size / 8; g++) {
|
|
|
|
brw_inst *line = brw_LINE(p, brw_null_reg(), interp,
|
|
|
|
offset(delta_x, g * 2));
|
|
|
|
brw_inst_set_group(devinfo, line, inst->group + g * 8);
|
|
|
|
|
|
|
|
/* LINE writes the accumulator automatically on gen4-5. On Sandy
|
|
|
|
* Bridge and later, we have to explicitly enable it.
|
|
|
|
*/
|
|
|
|
if (devinfo->gen >= 6)
|
|
|
|
brw_inst_set_acc_wr_control(p->devinfo, line, true);
|
|
|
|
|
|
|
|
/* brw_set_default_saturate() is called before emitting
|
|
|
|
* instructions, so the saturate bit is set in each instruction,
|
|
|
|
* so we need to unset it on the LINE instructions.
|
|
|
|
*/
|
|
|
|
brw_inst_set_saturate(p->devinfo, line, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned g = 0; g < inst->exec_size / 8; g++) {
|
|
|
|
brw_inst *mac = brw_MAC(p, offset(dst, g), suboffset(interp, 1),
|
|
|
|
offset(delta_x, g * 2 + 1));
|
|
|
|
brw_inst_set_group(devinfo, mac, inst->group + g * 8);
|
|
|
|
brw_inst_set_cond_modifier(p->devinfo, mac, inst->conditional_mod);
|
|
|
|
}
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
brw_PLN(p, dst, interp, delta_x);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
} else {
|
2017-06-14 22:47:19 +01:00
|
|
|
i[0] = brw_LINE(p, brw_null_reg(), interp, delta_x);
|
|
|
|
i[1] = brw_MAC(p, dst, suboffset(interp, 1), delta_y);
|
|
|
|
|
|
|
|
brw_inst_set_cond_modifier(p->devinfo, i[1], inst->conditional_mod);
|
|
|
|
|
|
|
|
/* brw_set_default_saturate() is called before emitting instructions, so
|
|
|
|
* the saturate bit is set in each instruction, so we need to unset it on
|
|
|
|
* the first instruction.
|
|
|
|
*/
|
|
|
|
brw_inst_set_saturate(p->devinfo, i[0], false);
|
2017-06-14 19:06:45 +01:00
|
|
|
|
|
|
|
return true;
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-13 15:55:49 +01:00
|
|
|
void
|
|
|
|
fs_generator::generate_get_buffer_size(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src,
|
|
|
|
struct brw_reg surf_index)
|
|
|
|
{
|
|
|
|
assert(devinfo->gen >= 7);
|
|
|
|
assert(surf_index.file == BRW_IMMEDIATE_VALUE);
|
|
|
|
|
|
|
|
uint32_t simd_mode;
|
|
|
|
int rlen = 4;
|
|
|
|
|
|
|
|
switch (inst->exec_size) {
|
|
|
|
case 8:
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid width for texture instruction");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
|
|
|
|
rlen = 8;
|
|
|
|
dst = vec16(dst);
|
|
|
|
}
|
|
|
|
|
|
|
|
brw_SAMPLE(p,
|
|
|
|
retype(dst, BRW_REGISTER_TYPE_UW),
|
|
|
|
inst->base_mrf,
|
|
|
|
src,
|
2015-10-23 03:41:30 +01:00
|
|
|
surf_index.ud,
|
2015-04-13 15:55:49 +01:00
|
|
|
0,
|
|
|
|
GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
|
|
|
|
rlen, /* response length */
|
|
|
|
inst->mlen,
|
|
|
|
inst->header_size > 0,
|
|
|
|
simd_mode,
|
|
|
|
BRW_SAMPLER_RETURN_FORMAT_SINT32);
|
|
|
|
}
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
void
|
2018-10-30 20:47:39 +00:00
|
|
|
fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst,
|
2015-11-02 23:24:05 +00:00
|
|
|
struct brw_reg surface_index,
|
2014-08-03 10:23:31 +01:00
|
|
|
struct brw_reg sampler_index)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2018-10-30 20:47:39 +00:00
|
|
|
assert(devinfo->gen < 7);
|
2016-09-07 21:38:20 +01:00
|
|
|
assert(inst->size_written % REG_SIZE == 0);
|
2011-05-25 00:34:27 +01:00
|
|
|
int msg_type = -1;
|
2014-08-30 01:22:57 +01:00
|
|
|
uint32_t simd_mode;
|
2011-11-10 00:07:57 +00:00
|
|
|
uint32_t return_format;
|
|
|
|
|
2016-04-26 01:08:42 +01:00
|
|
|
/* Sampler EOT message of less than the dispatch width would kill the
|
|
|
|
* thread prematurely.
|
|
|
|
*/
|
2019-02-14 20:42:20 +00:00
|
|
|
assert(!inst->eot || inst->exec_size == dispatch_width);
|
2016-04-26 01:08:42 +01:00
|
|
|
|
2011-11-10 00:07:57 +00:00
|
|
|
switch (dst.type) {
|
|
|
|
case BRW_REGISTER_TYPE_D:
|
|
|
|
return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
|
|
|
|
break;
|
|
|
|
case BRW_REGISTER_TYPE_UD:
|
|
|
|
return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
|
|
|
|
break;
|
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2015-11-11 23:46:55 +00:00
|
|
|
/* Stomp the resinfo output type to UINT32. On gens 4-5, the output type
|
|
|
|
* is set as part of the message descriptor. On gen4, the PRM seems to
|
|
|
|
* allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on
|
|
|
|
* later gens UINT32 is required. Once you hit Sandy Bridge, the bit is
|
|
|
|
* gone from the message descriptor entirely and you just get UINT32 all
|
|
|
|
* the time regasrdless. Since we can really only do non-UINT32 on gen4,
|
|
|
|
* just stomp it to UINT32 all the time.
|
|
|
|
*/
|
|
|
|
if (inst->opcode == SHADER_OPCODE_TXS)
|
|
|
|
return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
|
|
|
|
|
2014-08-30 01:22:57 +01:00
|
|
|
switch (inst->exec_size) {
|
|
|
|
case 8:
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
|
|
|
|
break;
|
|
|
|
case 16:
|
2011-05-25 00:34:27 +01:00
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
2014-08-30 01:22:57 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid width for texture instruction");
|
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen >= 5) {
|
2011-05-25 00:34:27 +01:00
|
|
|
switch (inst->opcode) {
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TEX:
|
2011-05-25 00:34:27 +01:00
|
|
|
if (inst->shadow_compare) {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
|
|
|
|
} else {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case FS_OPCODE_TXB:
|
|
|
|
if (inst->shadow_compare) {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
|
|
|
|
} else {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
|
|
|
|
}
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXL:
|
2011-05-25 00:34:27 +01:00
|
|
|
if (inst->shadow_compare) {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
|
|
|
|
} else {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
|
|
|
|
}
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXS:
|
2011-06-19 09:47:50 +01:00
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXD:
|
2018-10-30 20:47:39 +00:00
|
|
|
assert(!inst->shadow_compare);
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXF:
|
2011-08-26 01:13:37 +01:00
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
|
|
|
break;
|
2013-12-10 14:36:31 +00:00
|
|
|
case SHADER_OPCODE_TXF_CMS:
|
2018-10-30 20:47:39 +00:00
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
2013-11-29 21:32:16 +00:00
|
|
|
break;
|
2013-03-06 22:47:01 +00:00
|
|
|
case SHADER_OPCODE_LOD:
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_LOD;
|
|
|
|
break;
|
2013-03-31 09:31:12 +01:00
|
|
|
case SHADER_OPCODE_TG4:
|
2018-10-30 20:47:39 +00:00
|
|
|
assert(devinfo->gen == 6);
|
|
|
|
assert(!inst->shadow_compare);
|
|
|
|
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
|
2013-10-08 09:42:10 +01:00
|
|
|
break;
|
2015-08-12 01:37:32 +01:00
|
|
|
case SHADER_OPCODE_SAMPLEINFO:
|
|
|
|
msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
|
|
|
|
break;
|
2011-05-03 18:55:50 +01:00
|
|
|
default:
|
2014-06-29 22:54:01 +01:00
|
|
|
unreachable("not reached");
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
switch (inst->opcode) {
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TEX:
|
2011-05-25 00:34:27 +01:00
|
|
|
/* Note that G45 and older determines shadow compare and dispatch width
|
|
|
|
* from message length for most messages.
|
|
|
|
*/
|
2015-07-13 13:42:20 +01:00
|
|
|
if (inst->exec_size == 8) {
|
2015-02-20 23:11:49 +00:00
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
|
|
|
|
if (inst->shadow_compare) {
|
|
|
|
assert(inst->mlen == 6);
|
|
|
|
} else {
|
|
|
|
assert(inst->mlen <= 4);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (inst->shadow_compare) {
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
|
|
|
|
assert(inst->mlen == 9);
|
|
|
|
} else {
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
|
|
|
|
assert(inst->mlen <= 7 && inst->mlen % 2 == 1);
|
|
|
|
}
|
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
case FS_OPCODE_TXB:
|
|
|
|
if (inst->shadow_compare) {
|
2015-07-13 13:42:20 +01:00
|
|
|
assert(inst->exec_size == 8);
|
2011-05-25 00:34:27 +01:00
|
|
|
assert(inst->mlen == 6);
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
|
|
|
|
} else {
|
|
|
|
assert(inst->mlen == 9);
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
}
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXL:
|
2011-05-25 00:34:27 +01:00
|
|
|
if (inst->shadow_compare) {
|
2015-07-13 13:42:20 +01:00
|
|
|
assert(inst->exec_size == 8);
|
2011-05-25 00:34:27 +01:00
|
|
|
assert(inst->mlen == 6);
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
|
|
|
|
} else {
|
|
|
|
assert(inst->mlen == 9);
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
}
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXD:
|
2011-06-10 22:48:46 +01:00
|
|
|
/* There is no sample_d_c message; comparisons are done manually */
|
2015-07-13 13:42:20 +01:00
|
|
|
assert(inst->exec_size == 8);
|
2011-06-09 00:05:34 +01:00
|
|
|
assert(inst->mlen == 7 || inst->mlen == 10);
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXF:
|
2015-02-20 23:11:49 +00:00
|
|
|
assert(inst->mlen <= 9 && inst->mlen % 2 == 1);
|
2011-09-07 00:39:01 +01:00
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXS:
|
2011-08-17 18:45:47 +01:00
|
|
|
assert(inst->mlen == 3);
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
break;
|
2011-05-03 18:55:50 +01:00
|
|
|
default:
|
2014-06-29 22:54:01 +01:00
|
|
|
unreachable("not reached");
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(msg_type != -1);
|
|
|
|
|
|
|
|
if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
|
|
|
|
dst = vec16(dst);
|
|
|
|
}
|
|
|
|
|
2014-08-03 10:23:31 +01:00
|
|
|
assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
|
2012-08-05 04:33:13 +01:00
|
|
|
/* Load the message header if present. If there's a texture offset,
|
|
|
|
* we need to set it up explicitly and load the offset bitfield.
|
|
|
|
* Otherwise, we can use an implied move from g0 to the first message reg.
|
|
|
|
*/
|
2018-10-30 20:47:39 +00:00
|
|
|
struct brw_reg src = brw_null_reg();
|
|
|
|
if (inst->header_size != 0) {
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen < 6 && !inst->offset) {
|
2014-01-18 20:48:18 +00:00
|
|
|
/* Set up an implied move from g0 to the MRF. */
|
|
|
|
src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
|
i965/fs: Convert gen7 to using GRFs for texture messages.
Looking at Lightsmark's shaders, the way we used MRFs (or in gen7's
case, GRFs) was bad in a couple of ways. One was that it prevented
compute-to-MRF for the common case of a texcoord that gets used
exactly once, but where the texcoord setup all gets emitted before the
texture calls (such as when it's a bare fragment shader input, which
gets interpolated before processing main()). Another was that it
introduced a bunch of dependencies that constrained scheduling, and
forced waits for texture operations to be done before they are
required. For example, we can now move the compute-to-MRF
interpolation for the second texture send down after the first send.
The downside is that this generally prevents
remove_duplicate_mrf_writes() from doing anything, whereas previously
it avoided work for the case of sampling from the same texcoord twice.
However, I suspect that most of the win that originally justified that
code was in avoiding the WAR stall on the first send, which this patch
also avoids, rather than the small cost of the extra instruction. We
see instruction count regressions in shaders in unigine, yofrankie,
savage2, hon, and gstreamer.
Improves GLB2.7 performance by 0.633628% +/- 0.491809% (n=121/125, avg of
~66fps, outliers below 61 dropped).
Improves openarena performance by 1.01092% +/- 0.66897% (n=425).
No significant difference on Lightsmark (n=44).
v2: Squash in the fix for register unspilling for send-from-GRF, fixing a
segfault in lightsmark.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Acked-by: Matt Turner <mattst88@gmail.com>
2013-10-10 01:17:59 +01:00
|
|
|
} else {
|
2019-09-27 07:38:24 +01:00
|
|
|
const tgl_swsb swsb = brw_get_default_swsb(p);
|
2018-03-01 03:57:44 +00:00
|
|
|
assert(inst->base_mrf != -1);
|
|
|
|
struct brw_reg header_reg = brw_message_reg(inst->base_mrf);
|
2014-01-18 20:48:18 +00:00
|
|
|
|
2013-10-13 00:20:03 +01:00
|
|
|
brw_push_insn_state(p);
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2014-01-18 20:48:18 +00:00
|
|
|
/* Explicitly set up the message header by copying g0 to the MRF. */
|
|
|
|
brw_MOV(p, header_reg, brw_vec8_grf(0, 0));
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
2014-01-18 20:48:18 +00:00
|
|
|
|
2017-08-30 20:07:00 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
2014-11-12 19:28:02 +00:00
|
|
|
if (inst->offset) {
|
2014-01-18 20:48:18 +00:00
|
|
|
/* Set the offset bits in DWord 2. */
|
|
|
|
brw_MOV(p, get_element_ud(header_reg, 2),
|
2014-11-12 19:28:02 +00:00
|
|
|
brw_imm_ud(inst->offset));
|
2014-01-18 20:48:18 +00:00
|
|
|
}
|
2014-01-18 21:29:39 +00:00
|
|
|
|
2013-10-13 00:20:03 +01:00
|
|
|
brw_pop_insn_state(p);
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
2013-10-13 00:20:03 +01:00
|
|
|
}
|
2012-08-05 04:33:13 +01:00
|
|
|
}
|
|
|
|
|
2018-08-16 17:01:24 +01:00
|
|
|
uint32_t base_binding_table_index;
|
|
|
|
switch (inst->opcode) {
|
|
|
|
case SHADER_OPCODE_TG4:
|
|
|
|
base_binding_table_index = prog_data->binding_table.gather_texture_start;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
base_binding_table_index = prog_data->binding_table.texture_start;
|
|
|
|
break;
|
|
|
|
}
|
2014-08-10 00:58:06 +01:00
|
|
|
|
2018-10-30 20:47:39 +00:00
|
|
|
assert(surface_index.file == BRW_IMMEDIATE_VALUE);
|
|
|
|
assert(sampler_index.file == BRW_IMMEDIATE_VALUE);
|
2014-08-10 01:02:22 +01:00
|
|
|
|
2018-10-30 20:47:39 +00:00
|
|
|
brw_SAMPLE(p,
|
|
|
|
retype(dst, BRW_REGISTER_TYPE_UW),
|
|
|
|
inst->base_mrf,
|
|
|
|
src,
|
|
|
|
surface_index.ud + base_binding_table_index,
|
|
|
|
sampler_index.ud % 16,
|
|
|
|
msg_type,
|
|
|
|
inst->size_written / REG_SIZE,
|
|
|
|
inst->mlen,
|
|
|
|
inst->header_size != 0,
|
|
|
|
simd_mode,
|
|
|
|
return_format);
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
|
|
|
|
* looking like:
|
|
|
|
*
|
|
|
|
* arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
|
|
|
|
*
|
2013-09-12 06:00:52 +01:00
|
|
|
* Ideally, we want to produce:
|
2011-05-25 00:34:27 +01:00
|
|
|
*
|
|
|
|
* DDX DDY
|
|
|
|
* dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
|
|
|
|
* (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
|
|
|
|
* (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
|
|
|
|
* (ss0.br - ss0.bl) (ss0.tr - ss0.br)
|
|
|
|
* (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
|
|
|
|
* (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
|
|
|
|
* (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
|
|
|
|
* (ss1.br - ss1.bl) (ss1.tr - ss1.br)
|
|
|
|
*
|
|
|
|
* and add another set of two more subspans if in 16-pixel dispatch mode.
|
|
|
|
*
|
|
|
|
* For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
|
|
|
|
* for each pair, and vertstride = 2 jumps us 2 elements after processing a
|
2013-09-12 06:00:52 +01:00
|
|
|
* pair. But the ideal approximation may impose a huge performance cost on
|
|
|
|
* sample_d. On at least Haswell, sample_d instruction does some
|
|
|
|
* optimizations if the same LOD is used for all pixels in the subspan.
|
|
|
|
*
|
i965/fs: Improve accuracy of dFdy() to match dFdx().
Previously, we computed dFdy() using the following instruction:
add(8) dst<1>F src<4,4,0)F -src.2<4,4,0>F { align1 1Q }
That had the disadvantage that it computed the same value for all 4
pixels of a 2x2 subspan, which meant that it was less accurate than
dFdx(). This patch changes it to the following instruction when
c->key.high_quality_derivatives is set:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
This gives it comparable accuracy to dFdx().
Unfortunately, align16 instructions can't be compressed, so in SIMD16
shaders, instead of emitting this instruction:
add(16) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1H }
We need to unroll to two instructions:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
add(8) (dst+1)<1>F (src+1)<4,4,1>.xyxyF -(src+1)<4,4,1>.zwzwF { align16 2Q }
Fixes piglit test spec/glsl-1.10/execution/fs-dfdy-accuracy.
Acked-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-09-20 17:04:31 +01:00
|
|
|
* For DDY, we need to use ALIGN16 mode since it's capable of doing the
|
|
|
|
* appropriate swizzling.
|
2011-05-25 00:34:27 +01:00
|
|
|
*/
|
|
|
|
void
|
2017-06-15 23:41:40 +01:00
|
|
|
fs_generator::generate_ddx(const fs_inst *inst,
|
2014-11-08 09:39:14 +00:00
|
|
|
struct brw_reg dst, struct brw_reg src)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2013-09-12 06:00:52 +01:00
|
|
|
unsigned vstride, width;
|
|
|
|
|
2019-07-26 00:28:44 +01:00
|
|
|
if (devinfo->gen >= 8) {
|
|
|
|
if (inst->opcode == FS_OPCODE_DDX_FINE) {
|
|
|
|
/* produce accurate derivatives */
|
|
|
|
vstride = BRW_VERTICAL_STRIDE_2;
|
|
|
|
width = BRW_WIDTH_2;
|
|
|
|
} else {
|
|
|
|
/* replicate the derivative at the top-left pixel to other pixels */
|
|
|
|
vstride = BRW_VERTICAL_STRIDE_4;
|
|
|
|
width = BRW_WIDTH_4;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct brw_reg src0 = byte_offset(src, type_sz(src.type));;
|
|
|
|
struct brw_reg src1 = src;
|
2013-09-12 06:00:52 +01:00
|
|
|
|
2019-07-26 00:28:44 +01:00
|
|
|
src0.vstride = vstride;
|
|
|
|
src0.width = width;
|
|
|
|
src0.hstride = BRW_HORIZONTAL_STRIDE_0;
|
|
|
|
src1.vstride = vstride;
|
|
|
|
src1.width = width;
|
|
|
|
src1.hstride = BRW_HORIZONTAL_STRIDE_0;
|
2017-06-16 01:20:29 +01:00
|
|
|
|
2019-07-26 00:28:44 +01:00
|
|
|
brw_ADD(p, dst, src0, negate(src1));
|
|
|
|
} else {
|
|
|
|
/* On Haswell and earlier, the region used above appears to not work
|
|
|
|
* correctly for compressed instructions. At least on Haswell and
|
|
|
|
* Iron Lake, compressed ALIGN16 instructions do work. Since we
|
|
|
|
* would have to split to SIMD8 no matter which method we choose, we
|
|
|
|
* may as well use ALIGN16 on all platforms gen7 and earlier.
|
|
|
|
*/
|
|
|
|
struct brw_reg src0 = stride(src, 4, 4, 1);
|
|
|
|
struct brw_reg src1 = stride(src, 4, 4, 1);
|
|
|
|
if (inst->opcode == FS_OPCODE_DDX_FINE) {
|
|
|
|
src0.swizzle = BRW_SWIZZLE_XXZZ;
|
|
|
|
src1.swizzle = BRW_SWIZZLE_YYWW;
|
|
|
|
} else {
|
|
|
|
src0.swizzle = BRW_SWIZZLE_XXXX;
|
|
|
|
src1.swizzle = BRW_SWIZZLE_YYYY;
|
|
|
|
}
|
2017-06-16 01:20:29 +01:00
|
|
|
|
2019-07-26 00:28:44 +01:00
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
|
|
|
brw_ADD(p, dst, negate(src0), src1);
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
2012-06-20 21:40:45 +01:00
|
|
|
/* The negate_value boolean is used to negate the derivative computation for
|
|
|
|
* FBOs, since they place the origin at the upper left instead of the lower
|
|
|
|
* left.
|
|
|
|
*/
|
2011-05-25 00:34:27 +01:00
|
|
|
void
|
2017-06-15 23:41:40 +01:00
|
|
|
fs_generator::generate_ddy(const fs_inst *inst,
|
2016-05-17 09:52:16 +01:00
|
|
|
struct brw_reg dst, struct brw_reg src)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2018-05-28 11:32:08 +01:00
|
|
|
const uint32_t type_size = type_sz(src.type);
|
|
|
|
|
2017-06-15 23:41:40 +01:00
|
|
|
if (inst->opcode == FS_OPCODE_DDY_FINE) {
|
2018-05-30 11:14:14 +01:00
|
|
|
/* produce accurate derivatives.
|
|
|
|
*
|
|
|
|
* From the Broadwell PRM, Volume 7 (3D-Media-GPGPU)
|
|
|
|
* "Register Region Restrictions", Section "1. Special Restrictions":
|
|
|
|
*
|
|
|
|
* "In Align16 mode, the channel selects and channel enables apply to
|
|
|
|
* a pair of half-floats, because these parameters are defined for
|
|
|
|
* DWord elements ONLY. This is applicable when both source and
|
|
|
|
* destination are half-floats."
|
|
|
|
*
|
|
|
|
* So for half-float operations we use the Gen11+ Align1 path. CHV
|
|
|
|
* inherits its FP16 hardware from SKL, so it is not affected.
|
|
|
|
*/
|
|
|
|
if (devinfo->gen >= 11 ||
|
|
|
|
(devinfo->is_broadwell && src.type == BRW_REGISTER_TYPE_HF)) {
|
intel/compiler/fs: Implement ddy without using align16 for Gen11+
Align16 is no more. We previously generated an align16 ADD instruction
to calculate DDY:
add(16) g25<1>F -g23<4>.xyxyF g23<4>.zwzwF { align16 1H };
Without align16, we now implement it as:
add(4) g25<1>F -g23<0,2,1>F g23.2<0,2,1>F { align1 1N };
add(4) g25.4<1>F -g23.4<0,2,1>F g23.6<0,2,1>F { align1 1N };
add(4) g26<1>F -g24<0,2,1>F g24.2<0,2,1>F { align1 1N };
add(4) g26.4<1>F -g24.4<0,2,1>F g24.6<0,2,1>F { align1 1N };
where only the first two instructions are needed in SIMD8 mode.
Note: an earlier version of the patch implemented this in two
instructions in SIMD16:
add(8) g25<2>F -g23<4,2,0>F g23.2<4,2,0>F { align1 1N };
add(8) g25.1<2>F -g23.1<4,2,0>F g23.3<4,2,0>F { align1 1N };
but I realized that the channel enable bits will not be correct. If we
knew we were under uniform control flow, we could emit only those two
instructions however.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-06-16 01:29:16 +01:00
|
|
|
src = stride(src, 0, 2, 1);
|
2017-06-16 01:20:29 +01:00
|
|
|
|
intel/compiler/fs: Implement ddy without using align16 for Gen11+
Align16 is no more. We previously generated an align16 ADD instruction
to calculate DDY:
add(16) g25<1>F -g23<4>.xyxyF g23<4>.zwzwF { align16 1H };
Without align16, we now implement it as:
add(4) g25<1>F -g23<0,2,1>F g23.2<0,2,1>F { align1 1N };
add(4) g25.4<1>F -g23.4<0,2,1>F g23.6<0,2,1>F { align1 1N };
add(4) g26<1>F -g24<0,2,1>F g24.2<0,2,1>F { align1 1N };
add(4) g26.4<1>F -g24.4<0,2,1>F g24.6<0,2,1>F { align1 1N };
where only the first two instructions are needed in SIMD8 mode.
Note: an earlier version of the patch implemented this in two
instructions in SIMD16:
add(8) g25<2>F -g23<4,2,0>F g23.2<4,2,0>F { align1 1N };
add(8) g25.1<2>F -g23.1<4,2,0>F g23.3<4,2,0>F { align1 1N };
but I realized that the channel enable bits will not be correct. If we
knew we were under uniform control flow, we could emit only those two
instructions however.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-06-16 01:29:16 +01:00
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_4);
|
2019-06-25 09:10:14 +01:00
|
|
|
for (uint32_t g = 0; g < inst->exec_size; g += 4) {
|
|
|
|
brw_set_default_group(p, inst->group + g);
|
|
|
|
brw_ADD(p, byte_offset(dst, g * type_size),
|
|
|
|
negate(byte_offset(src, g * type_size)),
|
|
|
|
byte_offset(src, (g + 2) * type_size));
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
intel/compiler/fs: Implement ddy without using align16 for Gen11+
Align16 is no more. We previously generated an align16 ADD instruction
to calculate DDY:
add(16) g25<1>F -g23<4>.xyxyF g23<4>.zwzwF { align16 1H };
Without align16, we now implement it as:
add(4) g25<1>F -g23<0,2,1>F g23.2<0,2,1>F { align1 1N };
add(4) g25.4<1>F -g23.4<0,2,1>F g23.6<0,2,1>F { align1 1N };
add(4) g26<1>F -g24<0,2,1>F g24.2<0,2,1>F { align1 1N };
add(4) g26.4<1>F -g24.4<0,2,1>F g24.6<0,2,1>F { align1 1N };
where only the first two instructions are needed in SIMD8 mode.
Note: an earlier version of the patch implemented this in two
instructions in SIMD16:
add(8) g25<2>F -g23<4,2,0>F g23.2<4,2,0>F { align1 1N };
add(8) g25.1<2>F -g23.1<4,2,0>F g23.3<4,2,0>F { align1 1N };
but I realized that the channel enable bits will not be correct. If we
knew we were under uniform control flow, we could emit only those two
instructions however.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-06-16 01:29:16 +01:00
|
|
|
}
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
} else {
|
|
|
|
struct brw_reg src0 = stride(src, 4, 4, 1);
|
|
|
|
struct brw_reg src1 = stride(src, 4, 4, 1);
|
|
|
|
src0.swizzle = BRW_SWIZZLE_XYXY;
|
|
|
|
src1.swizzle = BRW_SWIZZLE_ZWZW;
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
|
|
|
brw_ADD(p, dst, negate(src0), src1);
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
i965/fs: Improve accuracy of dFdy() to match dFdx().
Previously, we computed dFdy() using the following instruction:
add(8) dst<1>F src<4,4,0)F -src.2<4,4,0>F { align1 1Q }
That had the disadvantage that it computed the same value for all 4
pixels of a 2x2 subspan, which meant that it was less accurate than
dFdx(). This patch changes it to the following instruction when
c->key.high_quality_derivatives is set:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
This gives it comparable accuracy to dFdx().
Unfortunately, align16 instructions can't be compressed, so in SIMD16
shaders, instead of emitting this instruction:
add(16) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1H }
We need to unroll to two instructions:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
add(8) (dst+1)<1>F (src+1)<4,4,1>.xyxyF -(src+1)<4,4,1>.zwzwF { align16 2Q }
Fixes piglit test spec/glsl-1.10/execution/fs-dfdy-accuracy.
Acked-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-09-20 17:04:31 +01:00
|
|
|
} else {
|
|
|
|
/* replicate the derivative at the top-left pixel to other pixels */
|
2019-07-26 00:28:44 +01:00
|
|
|
if (devinfo->gen >= 8) {
|
|
|
|
struct brw_reg src0 = byte_offset(stride(src, 4, 4, 0), 0 * type_size);
|
|
|
|
struct brw_reg src1 = byte_offset(stride(src, 4, 4, 0), 2 * type_size);
|
2017-06-16 01:20:29 +01:00
|
|
|
|
2019-07-26 00:28:44 +01:00
|
|
|
brw_ADD(p, dst, negate(src0), src1);
|
|
|
|
} else {
|
|
|
|
/* On Haswell and earlier, the region used above appears to not work
|
|
|
|
* correctly for compressed instructions. At least on Haswell and
|
|
|
|
* Iron Lake, compressed ALIGN16 instructions do work. Since we
|
|
|
|
* would have to split to SIMD8 no matter which method we choose, we
|
|
|
|
* may as well use ALIGN16 on all platforms gen7 and earlier.
|
|
|
|
*/
|
|
|
|
struct brw_reg src0 = stride(src, 4, 4, 1);
|
|
|
|
struct brw_reg src1 = stride(src, 4, 4, 1);
|
|
|
|
src0.swizzle = BRW_SWIZZLE_XXXX;
|
|
|
|
src1.swizzle = BRW_SWIZZLE_ZZZZ;
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
|
|
|
brw_ADD(p, dst, negate(src0), src1);
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
i965/fs: Improve accuracy of dFdy() to match dFdx().
Previously, we computed dFdy() using the following instruction:
add(8) dst<1>F src<4,4,0)F -src.2<4,4,0>F { align1 1Q }
That had the disadvantage that it computed the same value for all 4
pixels of a 2x2 subspan, which meant that it was less accurate than
dFdx(). This patch changes it to the following instruction when
c->key.high_quality_derivatives is set:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
This gives it comparable accuracy to dFdx().
Unfortunately, align16 instructions can't be compressed, so in SIMD16
shaders, instead of emitting this instruction:
add(16) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1H }
We need to unroll to two instructions:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
add(8) (dst+1)<1>F (src+1)<4,4,1>.xyxyF -(src+1)<4,4,1>.zwzwF { align16 2Q }
Fixes piglit test spec/glsl-1.10/execution/fs-dfdy-accuracy.
Acked-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-09-20 17:04:31 +01:00
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
2012-12-06 18:15:08 +00:00
|
|
|
void
|
intel/compiler: Silence unused parameter warnings in generate_foo methods
Since all of the fs_generator::generate_foo methods take a fs_inst * as
the first parameter, just remove the name to quiet the compiler.
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_barrier(fs_inst*, brw_reg)’:
src/intel/compiler/brw_fs_generator.cpp:743:41: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
^~~~
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_discard_jump(fs_inst*)’:
src/intel/compiler/brw_fs_generator.cpp:1326:46: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_discard_jump(fs_inst *inst)
^~~~
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_pack_half_2x16_split(fs_inst*, brw_reg, brw_reg, brw_reg)’:
src/intel/compiler/brw_fs_generator.cpp:1675:54: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
^~~~
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_shader_time_add(fs_inst*, brw_reg, brw_reg, brw_reg)’:
src/intel/compiler/brw_fs_generator.cpp:1743:49: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_shader_time_add(fs_inst *inst,
^~~~
src/intel/compiler/brw_vec4_generator.cpp: In function ‘void generate_set_simd4x2_header_gen9(brw_codegen*, brw::vec4_instruction*, brw_reg)’:
src/intel/compiler/brw_vec4_generator.cpp:1412:52: warning: unused parameter ‘inst’ [-Wunused-parameter]
vec4_instruction *inst,
^~~~
src/intel/compiler/brw_vec4_generator.cpp: In function ‘void generate_mov_indirect(brw_codegen*, brw::vec4_instruction*, brw_reg, brw_reg, brw_reg, brw_reg)’:
src/intel/compiler/brw_vec4_generator.cpp:1430:41: warning: unused parameter ‘inst’ [-Wunused-parameter]
vec4_instruction *inst,
^~~~
src/intel/compiler/brw_vec4_generator.cpp:1432:63: warning: unused parameter ‘length’ [-Wunused-parameter]
struct brw_reg indirect, struct brw_reg length)
^~~~~~
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-03-29 00:29:45 +01:00
|
|
|
fs_generator::generate_discard_jump(fs_inst *)
|
2012-12-06 18:15:08 +00:00
|
|
|
{
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 6);
|
2012-12-06 18:15:08 +00:00
|
|
|
|
|
|
|
/* This HALT will be patched up at FB write time to point UIP at the end of
|
|
|
|
* the program, and at brw_uip_jip() JIP will be set to the end of the
|
|
|
|
* current block (or the program).
|
|
|
|
*/
|
|
|
|
this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
|
|
|
|
gen6_HALT(p);
|
|
|
|
}
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
void
|
2013-10-16 19:45:06 +01:00
|
|
|
fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2016-05-28 07:29:02 +01:00
|
|
|
/* The 32-wide messages only respect the first 16-wide half of the channel
|
|
|
|
* enable signals which are replicated identically for the second group of
|
|
|
|
* 16 channels, so we cannot use them unless the write is marked
|
|
|
|
* force_writemask_all.
|
|
|
|
*/
|
|
|
|
const unsigned lower_size = inst->force_writemask_all ? inst->exec_size :
|
|
|
|
MIN2(16, inst->exec_size);
|
|
|
|
const unsigned block_size = 4 * lower_size / REG_SIZE;
|
2019-09-27 07:38:24 +01:00
|
|
|
const tgl_swsb swsb = brw_get_default_swsb(p);
|
2011-05-25 00:34:27 +01:00
|
|
|
assert(inst->mlen != 0);
|
|
|
|
|
2016-05-28 07:29:02 +01:00
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_exec_size(p, cvt(lower_size) - 1);
|
|
|
|
brw_set_default_compression(p, lower_size > 8);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
|
2016-05-21 00:14:13 +01:00
|
|
|
brw_set_default_group(p, inst->group + lower_size * i);
|
2016-05-28 07:29:02 +01:00
|
|
|
|
2019-09-27 07:38:24 +01:00
|
|
|
if (i > 0) {
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
|
|
|
brw_SYNC(p, TGL_SYNC_ALLRD);
|
|
|
|
} else {
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
|
|
|
}
|
|
|
|
|
2016-05-28 07:29:02 +01:00
|
|
|
brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0),
|
|
|
|
retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD));
|
|
|
|
|
2019-09-27 07:38:24 +01:00
|
|
|
if (i + 1 < inst->exec_size / lower_size)
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
|
|
|
else
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
|
|
|
|
2016-05-28 07:29:02 +01:00
|
|
|
brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf),
|
|
|
|
block_size,
|
|
|
|
inst->offset + block_size * REG_SIZE * i);
|
|
|
|
}
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2013-10-16 19:45:06 +01:00
|
|
|
fs_generator::generate_scratch_read(fs_inst *inst, struct brw_reg dst)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2016-05-16 23:47:39 +01:00
|
|
|
assert(inst->exec_size <= 16 || inst->force_writemask_all);
|
2011-05-25 00:34:27 +01:00
|
|
|
assert(inst->mlen != 0);
|
|
|
|
|
2013-10-16 20:16:51 +01:00
|
|
|
brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf),
|
2014-10-24 19:35:51 +01:00
|
|
|
inst->exec_size / 8, inst->offset);
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
2013-10-16 19:51:22 +01:00
|
|
|
void
|
|
|
|
fs_generator::generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst)
|
|
|
|
{
|
2016-05-16 23:47:39 +01:00
|
|
|
assert(inst->exec_size <= 16 || inst->force_writemask_all);
|
|
|
|
|
2014-10-24 19:35:51 +01:00
|
|
|
gen7_block_read_scratch(p, dst, inst->exec_size / 8, inst->offset);
|
2013-10-16 19:51:22 +01:00
|
|
|
}
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
void
|
2012-11-07 18:42:34 +00:00
|
|
|
fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg index,
|
|
|
|
struct brw_reg offset)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2016-12-09 04:05:18 +00:00
|
|
|
assert(type_sz(dst.type) == 4);
|
2011-05-25 00:34:27 +01:00
|
|
|
assert(inst->mlen != 0);
|
|
|
|
|
2012-06-20 23:41:14 +01:00
|
|
|
assert(index.file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
index.type == BRW_REGISTER_TYPE_UD);
|
2015-10-23 03:41:30 +01:00
|
|
|
uint32_t surf_index = index.ud;
|
2012-06-20 23:41:14 +01:00
|
|
|
|
|
|
|
assert(offset.file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
offset.type == BRW_REGISTER_TYPE_UD);
|
2015-10-23 03:41:30 +01:00
|
|
|
uint32_t read_offset = offset.ud;
|
2012-06-20 23:41:14 +01:00
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
|
2012-06-20 23:41:14 +01:00
|
|
|
read_offset, surf_index);
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
2012-12-05 08:06:30 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg index,
|
2016-10-26 22:25:06 +01:00
|
|
|
struct brw_reg payload)
|
2012-12-05 08:06:30 +00:00
|
|
|
{
|
2014-08-02 03:27:21 +01:00
|
|
|
assert(index.type == BRW_REGISTER_TYPE_UD);
|
2016-10-26 22:25:06 +01:00
|
|
|
assert(payload.file == BRW_GENERAL_REGISTER_FILE);
|
2016-12-09 04:05:18 +00:00
|
|
|
assert(type_sz(dst.type) == 4);
|
2014-12-10 22:59:26 +00:00
|
|
|
|
2014-08-02 03:27:21 +01:00
|
|
|
if (index.file == BRW_IMMEDIATE_VALUE) {
|
2016-10-26 22:25:06 +01:00
|
|
|
const uint32_t surf_index = index.ud;
|
2014-08-02 03:27:21 +01:00
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
2018-06-07 18:50:20 +01:00
|
|
|
brw_inst_set_sfid(devinfo, send, GEN6_SFID_DATAPORT_CONSTANT_CACHE);
|
2016-12-09 04:05:18 +00:00
|
|
|
brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD));
|
|
|
|
brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
|
2018-06-07 18:50:20 +01:00
|
|
|
brw_set_desc(p, send,
|
|
|
|
brw_message_desc(devinfo, 1, DIV_ROUND_UP(inst->size_written,
|
|
|
|
REG_SIZE), true) |
|
|
|
|
brw_dp_read_desc(devinfo, surf_index,
|
|
|
|
BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
|
|
|
|
GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
|
|
|
|
BRW_DATAPORT_READ_TARGET_DATA_CACHE));
|
2014-08-02 03:27:21 +01:00
|
|
|
|
2016-10-26 22:25:06 +01:00
|
|
|
} else {
|
2019-09-27 07:38:24 +01:00
|
|
|
const tgl_swsb swsb = brw_get_default_swsb(p);
|
2014-08-02 03:27:21 +01:00
|
|
|
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
|
|
|
|
/* a0.0 = surf_index & 0xff */
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
2014-08-02 03:27:21 +01:00
|
|
|
brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_exec_size(p->devinfo, insn_and, BRW_EXECUTE_1);
|
2014-08-02 03:27:21 +01:00
|
|
|
brw_set_dest(p, insn_and, addr);
|
|
|
|
brw_set_src0(p, insn_and, vec1(retype(index, BRW_REGISTER_TYPE_UD)));
|
|
|
|
brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
|
|
|
|
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 13:44:24 +00:00
|
|
|
/* dst = send(payload, a0.0 | <descriptor>) */
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
2018-06-07 18:50:20 +01:00
|
|
|
brw_send_indirect_message(
|
2016-10-26 22:25:06 +01:00
|
|
|
p, GEN6_SFID_DATAPORT_CONSTANT_CACHE,
|
2016-12-09 04:05:18 +00:00
|
|
|
retype(dst, BRW_REGISTER_TYPE_UD),
|
2018-06-07 18:50:20 +01:00
|
|
|
retype(payload, BRW_REGISTER_TYPE_UD), addr,
|
|
|
|
brw_message_desc(devinfo, 1,
|
|
|
|
DIV_ROUND_UP(inst->size_written, REG_SIZE), true) |
|
|
|
|
brw_dp_read_desc(devinfo, 0 /* surface */,
|
|
|
|
BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
|
|
|
|
GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
|
2019-02-07 23:45:51 +00:00
|
|
|
BRW_DATAPORT_READ_TARGET_DATA_CACHE),
|
|
|
|
false /* EOT */);
|
2014-08-02 03:27:21 +01:00
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
2012-12-05 08:06:30 +00:00
|
|
|
}
|
|
|
|
|
2012-11-07 19:18:34 +00:00
|
|
|
void
|
2016-05-20 21:03:31 +01:00
|
|
|
fs_generator::generate_varying_pull_constant_load_gen4(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg index)
|
2012-11-07 19:18:34 +00:00
|
|
|
{
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen < 7); /* Should use the gen7 variant. */
|
2015-03-24 17:17:32 +00:00
|
|
|
assert(inst->header_size != 0);
|
2013-03-18 17:16:42 +00:00
|
|
|
assert(inst->mlen);
|
2012-11-07 19:18:34 +00:00
|
|
|
|
|
|
|
assert(index.file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
index.type == BRW_REGISTER_TYPE_UD);
|
2015-10-23 03:41:30 +01:00
|
|
|
uint32_t surf_index = index.ud;
|
2012-11-07 19:18:34 +00:00
|
|
|
|
2013-03-18 17:16:42 +00:00
|
|
|
uint32_t simd_mode, rlen, msg_type;
|
2016-05-20 08:13:33 +01:00
|
|
|
if (inst->exec_size == 16) {
|
2013-03-18 17:16:42 +00:00
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
rlen = 8;
|
2012-11-07 19:18:34 +00:00
|
|
|
} else {
|
2016-05-20 08:13:33 +01:00
|
|
|
assert(inst->exec_size == 8);
|
2013-03-18 17:16:42 +00:00
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
|
|
|
|
rlen = 4;
|
2012-11-07 19:18:34 +00:00
|
|
|
}
|
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen >= 5)
|
2013-03-18 17:16:42 +00:00
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
|
|
|
else {
|
|
|
|
/* We always use the SIMD16 message so that we only have to load U, and
|
|
|
|
* not V or R.
|
|
|
|
*/
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
|
|
|
|
assert(inst->mlen == 3);
|
2016-09-07 21:38:20 +01:00
|
|
|
assert(inst->size_written == 8 * REG_SIZE);
|
2013-03-18 17:16:42 +00:00
|
|
|
rlen = 8;
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
}
|
|
|
|
|
2012-11-07 19:18:34 +00:00
|
|
|
struct brw_reg header = brw_vec8_grf(0, 0);
|
|
|
|
gen6_resolve_implied_move(p, &header, inst->base_mrf);
|
|
|
|
|
2014-06-13 22:29:25 +01:00
|
|
|
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
|
2016-05-18 23:29:27 +01:00
|
|
|
brw_inst_set_compression(devinfo, send, false);
|
2018-06-02 23:15:15 +01:00
|
|
|
brw_inst_set_sfid(devinfo, send, BRW_SFID_SAMPLER);
|
2014-04-17 04:15:23 +01:00
|
|
|
brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW));
|
2012-11-07 19:18:34 +00:00
|
|
|
brw_set_src0(p, send, header);
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen < 6)
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_base_mrf(p->devinfo, send, inst->base_mrf);
|
2013-03-18 17:16:42 +00:00
|
|
|
|
|
|
|
/* Our surface is set up as floats, regardless of what actual data is
|
|
|
|
* stored in it.
|
|
|
|
*/
|
|
|
|
uint32_t return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
|
2018-06-02 23:15:15 +01:00
|
|
|
brw_set_desc(p, send,
|
|
|
|
brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size) |
|
|
|
|
brw_sampler_desc(devinfo, surf_index,
|
|
|
|
0, /* sampler (unused) */
|
|
|
|
msg_type, simd_mode, return_format));
|
2012-11-07 19:18:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-18 08:13:13 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src,
|
|
|
|
struct brw_reg msg_data,
|
|
|
|
unsigned msg_type)
|
|
|
|
{
|
2016-04-26 02:06:13 +01:00
|
|
|
const bool has_payload = inst->src[0].file != BAD_FILE;
|
2015-07-17 14:40:03 +01:00
|
|
|
assert(msg_data.type == BRW_REGISTER_TYPE_UD);
|
2016-04-26 02:06:13 +01:00
|
|
|
assert(inst->size_written % REG_SIZE == 0);
|
2013-11-18 08:13:13 +00:00
|
|
|
|
|
|
|
brw_pixel_interpolator_query(p,
|
|
|
|
retype(dst, BRW_REGISTER_TYPE_UW),
|
2016-04-26 02:06:13 +01:00
|
|
|
/* If we don't have a payload, what we send doesn't matter */
|
|
|
|
has_payload ? src : brw_vec8_grf(0, 0),
|
2013-11-18 08:13:13 +00:00
|
|
|
inst->pi_noperspective,
|
|
|
|
msg_type,
|
2015-07-17 14:40:03 +01:00
|
|
|
msg_data,
|
2016-04-26 02:06:13 +01:00
|
|
|
has_payload ? 2 * inst->exec_size / 8 : 1,
|
2016-09-07 21:38:20 +01:00
|
|
|
inst->size_written / REG_SIZE);
|
2013-11-18 08:13:13 +00:00
|
|
|
}
|
|
|
|
|
2013-10-25 00:17:08 +01:00
|
|
|
/* Sets vstride=1, width=4, hstride=0 of register src1 during
|
|
|
|
* the ADD instruction.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
fs_generator::generate_set_sample_id(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src0,
|
|
|
|
struct brw_reg src1)
|
|
|
|
{
|
|
|
|
assert(dst.type == BRW_REGISTER_TYPE_D ||
|
|
|
|
dst.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
assert(src0.type == BRW_REGISTER_TYPE_D ||
|
|
|
|
src0.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
|
2016-04-26 03:28:21 +01:00
|
|
|
const struct brw_reg reg = stride(src1, 1, 4, 0);
|
|
|
|
const unsigned lower_size = MIN2(inst->exec_size,
|
|
|
|
devinfo->gen >= 8 ? 16 : 8);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
|
|
|
|
brw_inst *insn = brw_ADD(p, offset(dst, i * lower_size / 8),
|
|
|
|
offset(src0, (src0.vstride == 0 ? 0 : (1 << (src0.vstride - 1)) *
|
|
|
|
(i * lower_size / (1 << src0.width))) *
|
|
|
|
type_sz(src0.type) / REG_SIZE),
|
|
|
|
suboffset(reg, i * lower_size / 4));
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, cvt(lower_size) - 1);
|
|
|
|
brw_inst_set_group(devinfo, insn, inst->group + lower_size * i);
|
|
|
|
brw_inst_set_compression(devinfo, insn, lower_size > 8);
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2014-08-13 20:23:47 +01:00
|
|
|
}
|
2013-10-25 00:17:08 +01:00
|
|
|
}
|
|
|
|
|
2013-01-09 19:46:42 +00:00
|
|
|
void
|
intel/compiler: Silence unused parameter warnings in generate_foo methods
Since all of the fs_generator::generate_foo methods take a fs_inst * as
the first parameter, just remove the name to quiet the compiler.
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_barrier(fs_inst*, brw_reg)’:
src/intel/compiler/brw_fs_generator.cpp:743:41: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
^~~~
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_discard_jump(fs_inst*)’:
src/intel/compiler/brw_fs_generator.cpp:1326:46: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_discard_jump(fs_inst *inst)
^~~~
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_pack_half_2x16_split(fs_inst*, brw_reg, brw_reg, brw_reg)’:
src/intel/compiler/brw_fs_generator.cpp:1675:54: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
^~~~
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_shader_time_add(fs_inst*, brw_reg, brw_reg, brw_reg)’:
src/intel/compiler/brw_fs_generator.cpp:1743:49: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_shader_time_add(fs_inst *inst,
^~~~
src/intel/compiler/brw_vec4_generator.cpp: In function ‘void generate_set_simd4x2_header_gen9(brw_codegen*, brw::vec4_instruction*, brw_reg)’:
src/intel/compiler/brw_vec4_generator.cpp:1412:52: warning: unused parameter ‘inst’ [-Wunused-parameter]
vec4_instruction *inst,
^~~~
src/intel/compiler/brw_vec4_generator.cpp: In function ‘void generate_mov_indirect(brw_codegen*, brw::vec4_instruction*, brw_reg, brw_reg, brw_reg, brw_reg)’:
src/intel/compiler/brw_vec4_generator.cpp:1430:41: warning: unused parameter ‘inst’ [-Wunused-parameter]
vec4_instruction *inst,
^~~~
src/intel/compiler/brw_vec4_generator.cpp:1432:63: warning: unused parameter ‘length’ [-Wunused-parameter]
struct brw_reg indirect, struct brw_reg length)
^~~~~~
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-03-29 00:29:45 +01:00
|
|
|
fs_generator::generate_pack_half_2x16_split(fs_inst *,
|
2013-01-09 19:46:42 +00:00
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg x,
|
|
|
|
struct brw_reg y)
|
|
|
|
{
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-01-09 19:46:42 +00:00
|
|
|
assert(dst.type == BRW_REGISTER_TYPE_UD);
|
2013-01-26 07:27:50 +00:00
|
|
|
assert(x.type == BRW_REGISTER_TYPE_F);
|
|
|
|
assert(y.type == BRW_REGISTER_TYPE_F);
|
2013-01-09 19:46:42 +00:00
|
|
|
|
|
|
|
/* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
|
|
|
|
*
|
|
|
|
* Because this instruction does not have a 16-bit floating-point type,
|
|
|
|
* the destination data type must be Word (W).
|
|
|
|
*
|
|
|
|
* The destination must be DWord-aligned and specify a horizontal stride
|
|
|
|
* (HorzStride) of 2. The 16-bit result is stored in the lower word of
|
|
|
|
* each destination channel and the upper word is not modified.
|
|
|
|
*/
|
2015-02-04 15:58:49 +00:00
|
|
|
struct brw_reg dst_w = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
|
2013-01-09 19:46:42 +00:00
|
|
|
|
2015-02-04 15:58:49 +00:00
|
|
|
/* Give each 32-bit channel of dst the form below, where "." means
|
2013-01-09 19:46:42 +00:00
|
|
|
* unchanged.
|
|
|
|
* 0x....hhhh
|
|
|
|
*/
|
|
|
|
brw_F32TO16(p, dst_w, y);
|
|
|
|
|
|
|
|
/* Now the form:
|
|
|
|
* 0xhhhh0000
|
|
|
|
*/
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
2013-01-09 19:46:42 +00:00
|
|
|
brw_SHL(p, dst, dst, brw_imm_ud(16u));
|
|
|
|
|
|
|
|
/* And, finally the form of packHalf2x16's output:
|
|
|
|
* 0xhhhhllll
|
|
|
|
*/
|
|
|
|
brw_F32TO16(p, dst_w, x);
|
|
|
|
}
|
|
|
|
|
2013-03-19 22:28:11 +00:00
|
|
|
void
|
intel/compiler: Silence unused parameter warnings in generate_foo methods
Since all of the fs_generator::generate_foo methods take a fs_inst * as
the first parameter, just remove the name to quiet the compiler.
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_barrier(fs_inst*, brw_reg)’:
src/intel/compiler/brw_fs_generator.cpp:743:41: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
^~~~
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_discard_jump(fs_inst*)’:
src/intel/compiler/brw_fs_generator.cpp:1326:46: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_discard_jump(fs_inst *inst)
^~~~
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_pack_half_2x16_split(fs_inst*, brw_reg, brw_reg, brw_reg)’:
src/intel/compiler/brw_fs_generator.cpp:1675:54: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
^~~~
src/intel/compiler/brw_fs_generator.cpp: In member function ‘void fs_generator::generate_shader_time_add(fs_inst*, brw_reg, brw_reg, brw_reg)’:
src/intel/compiler/brw_fs_generator.cpp:1743:49: warning: unused parameter ‘inst’ [-Wunused-parameter]
fs_generator::generate_shader_time_add(fs_inst *inst,
^~~~
src/intel/compiler/brw_vec4_generator.cpp: In function ‘void generate_set_simd4x2_header_gen9(brw_codegen*, brw::vec4_instruction*, brw_reg)’:
src/intel/compiler/brw_vec4_generator.cpp:1412:52: warning: unused parameter ‘inst’ [-Wunused-parameter]
vec4_instruction *inst,
^~~~
src/intel/compiler/brw_vec4_generator.cpp: In function ‘void generate_mov_indirect(brw_codegen*, brw::vec4_instruction*, brw_reg, brw_reg, brw_reg, brw_reg)’:
src/intel/compiler/brw_vec4_generator.cpp:1430:41: warning: unused parameter ‘inst’ [-Wunused-parameter]
vec4_instruction *inst,
^~~~
src/intel/compiler/brw_vec4_generator.cpp:1432:63: warning: unused parameter ‘length’ [-Wunused-parameter]
struct brw_reg indirect, struct brw_reg length)
^~~~~~
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-03-29 00:29:45 +01:00
|
|
|
fs_generator::generate_shader_time_add(fs_inst *,
|
2013-03-19 22:28:11 +00:00
|
|
|
struct brw_reg payload,
|
|
|
|
struct brw_reg offset,
|
|
|
|
struct brw_reg value)
|
|
|
|
{
|
2019-09-27 07:38:24 +01:00
|
|
|
const tgl_swsb swsb = brw_get_default_swsb(p);
|
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-03-19 22:28:11 +00:00
|
|
|
brw_push_insn_state(p);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_mask_control(p, true);
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
2013-03-19 22:28:11 +00:00
|
|
|
|
|
|
|
assert(payload.file == BRW_GENERAL_REGISTER_FILE);
|
|
|
|
struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
|
|
|
|
offset.type);
|
|
|
|
struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0),
|
|
|
|
value.type);
|
|
|
|
|
|
|
|
assert(offset.file == BRW_IMMEDIATE_VALUE);
|
|
|
|
if (value.file == BRW_GENERAL_REGISTER_FILE) {
|
|
|
|
value.width = BRW_WIDTH_1;
|
|
|
|
value.hstride = BRW_HORIZONTAL_STRIDE_0;
|
|
|
|
value.vstride = BRW_VERTICAL_STRIDE_0;
|
|
|
|
} else {
|
|
|
|
assert(value.file == BRW_IMMEDIATE_VALUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Trying to deal with setup of the params from the IR is crazy in the FS8
|
|
|
|
* case, and we don't really care about squeezing every bit of performance
|
|
|
|
* out of this path, so we just emit the MOVs from here.
|
|
|
|
*/
|
|
|
|
brw_MOV(p, payload_offset, offset);
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2013-03-19 22:28:11 +00:00
|
|
|
brw_MOV(p, payload_value, value);
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
2013-10-02 22:07:40 +01:00
|
|
|
brw_shader_time_add(p, payload,
|
2014-08-29 20:50:46 +01:00
|
|
|
prog_data->binding_table.shader_time_start);
|
2013-03-19 22:28:11 +00:00
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
|
|
|
|
2014-10-28 02:40:47 +00:00
|
|
|
void
|
|
|
|
fs_generator::enable_debug(const char *shader_name)
|
|
|
|
{
|
|
|
|
debug_flag = true;
|
|
|
|
this->shader_name = shader_name;
|
|
|
|
}
|
|
|
|
|
2014-11-14 00:28:08 +00:00
|
|
|
int
|
2019-04-24 05:19:56 +01:00
|
|
|
fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|
|
|
struct brw_compile_stats *stats)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2014-11-14 00:28:08 +00:00
|
|
|
/* align to 64 byte boundary. */
|
|
|
|
while (p->next_insn_offset % 64)
|
|
|
|
brw_NOP(p);
|
|
|
|
|
|
|
|
this->dispatch_width = dispatch_width;
|
|
|
|
|
2014-05-25 18:42:32 +01:00
|
|
|
int start_offset = p->next_insn_offset;
|
2015-03-16 19:18:31 +00:00
|
|
|
int spill_count = 0, fill_count = 0;
|
2014-08-06 09:27:58 +01:00
|
|
|
int loop_count = 0;
|
2014-05-25 18:42:32 +01:00
|
|
|
|
2017-11-16 01:08:42 +00:00
|
|
|
struct disasm_info *disasm_info = disasm_initialize(devinfo, cfg);
|
2014-05-25 18:42:32 +01:00
|
|
|
|
2014-07-12 05:16:13 +01:00
|
|
|
foreach_block_and_inst (block, fs_inst, inst, cfg) {
|
2019-05-29 23:46:55 +01:00
|
|
|
if (inst->opcode == SHADER_OPCODE_UNDEF)
|
|
|
|
continue;
|
|
|
|
|
2018-10-29 20:06:14 +00:00
|
|
|
struct brw_reg src[4], dst;
|
2014-05-31 00:41:32 +01:00
|
|
|
unsigned int last_insn_offset = p->next_insn_offset;
|
2014-12-30 20:56:13 +00:00
|
|
|
bool multiple_instructions_emitted = false;
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2016-05-03 07:32:13 +01:00
|
|
|
/* From the Broadwell PRM, Volume 7, "3D-Media-GPGPU", in the
|
|
|
|
* "Register Region Restrictions" section: for BDW, SKL:
|
|
|
|
*
|
|
|
|
* "A POW/FDIV operation must not be followed by an instruction
|
|
|
|
* that requires two destination registers."
|
|
|
|
*
|
|
|
|
* The documentation is often lacking annotations for Atom parts,
|
|
|
|
* and empirically this affects CHV as well.
|
|
|
|
*/
|
|
|
|
if (devinfo->gen >= 8 &&
|
2017-08-25 23:52:27 +01:00
|
|
|
devinfo->gen <= 9 &&
|
2016-05-03 07:32:13 +01:00
|
|
|
p->nr_insn > 1 &&
|
|
|
|
brw_inst_opcode(devinfo, brw_last_inst) == BRW_OPCODE_MATH &&
|
|
|
|
brw_inst_math_function(devinfo, brw_last_inst) == BRW_MATH_FUNCTION_POW &&
|
|
|
|
inst->dst.component_size(inst->exec_size) > REG_SIZE) {
|
|
|
|
brw_NOP(p);
|
|
|
|
last_insn_offset = p->next_insn_offset;
|
|
|
|
}
|
|
|
|
|
2014-05-19 18:20:37 +01:00
|
|
|
if (unlikely(debug_flag))
|
2017-11-16 01:08:42 +00:00
|
|
|
disasm_annotate(disasm_info, inst, p->next_insn_offset);
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2016-05-19 02:48:04 +01:00
|
|
|
/* If the instruction writes to more than one register, it needs to be
|
|
|
|
* explicitly marked as compressed on Gen <= 5. On Gen >= 6 the
|
|
|
|
* hardware figures out by itself what the right compression mode is,
|
|
|
|
* but we still need to know whether the instruction is compressed to
|
|
|
|
* set up the source register regions appropriately.
|
|
|
|
*
|
|
|
|
* XXX - This is wrong for instructions that write a single register but
|
|
|
|
* read more than one which should strictly speaking be treated as
|
|
|
|
* compressed. For instructions that don't write any registers it
|
|
|
|
* relies on the destination being a null register of the correct
|
|
|
|
* type and regioning so the instruction is considered compressed
|
|
|
|
* or not accordingly.
|
|
|
|
*/
|
2016-05-20 23:25:28 +01:00
|
|
|
const bool compressed =
|
|
|
|
inst->dst.component_size(inst->exec_size) > REG_SIZE;
|
|
|
|
brw_set_default_compression(p, compressed);
|
2016-05-21 00:14:13 +01:00
|
|
|
brw_set_default_group(p, inst->group);
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2016-05-16 23:09:17 +01:00
|
|
|
for (unsigned int i = 0; i < inst->sources; i++) {
|
2016-07-18 08:27:56 +01:00
|
|
|
src[i] = brw_reg_from_fs_reg(devinfo, inst,
|
|
|
|
&inst->src[i], compressed);
|
2016-05-16 23:09:17 +01:00
|
|
|
/* The accumulator result appears to get used for the
|
|
|
|
* conditional modifier generation. When negating a UD
|
|
|
|
* value, there is a 33rd bit generated for the sign in the
|
|
|
|
* accumulator value, so now you can't check, for example,
|
|
|
|
* equality with a 32-bit value. See piglit fs-op-neg-uvec4.
|
|
|
|
*/
|
|
|
|
assert(!inst->conditional_mod ||
|
|
|
|
inst->src[i].type != BRW_REGISTER_TYPE_UD ||
|
|
|
|
!inst->src[i].negate);
|
|
|
|
}
|
2016-07-18 08:27:56 +01:00
|
|
|
dst = brw_reg_from_fs_reg(devinfo, inst,
|
|
|
|
&inst->dst, compressed);
|
2016-05-16 23:09:17 +01:00
|
|
|
|
2016-05-19 02:41:28 +01:00
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
2016-05-16 23:09:17 +01:00
|
|
|
brw_set_default_predicate_control(p, inst->predicate);
|
|
|
|
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
|
2018-05-18 04:51:24 +01:00
|
|
|
/* On gen7 and above, hardware automatically adds the group onto the
|
|
|
|
* flag subregister number. On Sandy Bridge and older, we have to do it
|
|
|
|
* ourselves.
|
|
|
|
*/
|
|
|
|
const unsigned flag_subreg = inst->flag_subreg +
|
|
|
|
(devinfo->gen >= 7 ? 0 : inst->group / 16);
|
|
|
|
brw_set_default_flag_reg(p, flag_subreg / 2, flag_subreg % 2);
|
2016-05-16 23:09:17 +01:00
|
|
|
brw_set_default_saturate(p, inst->saturate);
|
|
|
|
brw_set_default_mask_control(p, inst->force_writemask_all);
|
|
|
|
brw_set_default_acc_write_control(p, inst->writes_accumulator);
|
2016-07-18 08:27:56 +01:00
|
|
|
|
|
|
|
unsigned exec_size = inst->exec_size;
|
|
|
|
if (devinfo->gen == 7 && !devinfo->is_haswell &&
|
|
|
|
(get_exec_type_size(inst) == 8 || type_sz(inst->dst.type) == 8)) {
|
|
|
|
exec_size *= 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
brw_set_default_exec_size(p, cvt(exec_size) - 1);
|
2016-05-16 23:09:17 +01:00
|
|
|
|
2016-06-20 11:13:14 +01:00
|
|
|
assert(inst->force_writemask_all || inst->exec_size >= 4);
|
2016-05-21 00:14:13 +01:00
|
|
|
assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
|
2016-05-16 23:09:17 +01:00
|
|
|
assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
|
|
|
|
assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
switch (inst->opcode) {
|
2019-09-04 01:51:17 +01:00
|
|
|
case BRW_OPCODE_SYNC:
|
|
|
|
assert(src[0].file == BRW_IMMEDIATE_VALUE);
|
|
|
|
brw_SYNC(p, tgl_sync_function(src[0].ud));
|
|
|
|
break;
|
2011-05-25 00:34:27 +01:00
|
|
|
case BRW_OPCODE_MOV:
|
|
|
|
brw_MOV(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_ADD:
|
|
|
|
brw_ADD(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_MUL:
|
|
|
|
brw_MUL(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2013-12-17 14:39:16 +00:00
|
|
|
case BRW_OPCODE_AVG:
|
|
|
|
brw_AVG(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2011-08-16 06:36:18 +01:00
|
|
|
case BRW_OPCODE_MACH:
|
|
|
|
brw_MACH(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2014-04-02 01:25:12 +01:00
|
|
|
case BRW_OPCODE_LINE:
|
|
|
|
brw_LINE(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
2012-02-06 23:59:11 +00:00
|
|
|
case BRW_OPCODE_MAD:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 6);
|
2018-01-05 17:46:11 +00:00
|
|
|
if (devinfo->gen < 10)
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
2016-05-18 03:51:50 +01:00
|
|
|
brw_MAD(p, dst, src[0], src[1], src[2]);
|
2012-02-06 23:59:11 +00:00
|
|
|
break;
|
|
|
|
|
2012-12-02 08:08:15 +00:00
|
|
|
case BRW_OPCODE_LRP:
|
2017-06-15 00:20:41 +01:00
|
|
|
assert(devinfo->gen >= 6 && devinfo->gen <= 10);
|
2018-01-05 17:46:11 +00:00
|
|
|
if (devinfo->gen < 10)
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
2016-05-18 03:51:50 +01:00
|
|
|
brw_LRP(p, dst, src[0], src[1], src[2]);
|
2012-12-02 08:08:15 +00:00
|
|
|
break;
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
case BRW_OPCODE_FRC:
|
|
|
|
brw_FRC(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_RNDD:
|
|
|
|
brw_RNDD(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_RNDE:
|
|
|
|
brw_RNDE(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_RNDZ:
|
|
|
|
brw_RNDZ(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BRW_OPCODE_AND:
|
|
|
|
brw_AND(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_OR:
|
|
|
|
brw_OR(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_XOR:
|
|
|
|
brw_XOR(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_NOT:
|
|
|
|
brw_NOT(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_ASR:
|
|
|
|
brw_ASR(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_SHR:
|
|
|
|
brw_SHR(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_SHL:
|
|
|
|
brw_SHL(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2019-05-29 19:43:30 +01:00
|
|
|
case BRW_OPCODE_ROL:
|
|
|
|
assert(devinfo->gen >= 11);
|
|
|
|
assert(src[0].type == dst.type);
|
|
|
|
brw_ROL(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_ROR:
|
|
|
|
assert(devinfo->gen >= 11);
|
|
|
|
assert(src[0].type == dst.type);
|
|
|
|
brw_ROR(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2013-01-09 19:46:42 +00:00
|
|
|
case BRW_OPCODE_F32TO16:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-01-09 19:46:42 +00:00
|
|
|
brw_F32TO16(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_F16TO32:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-01-09 19:46:42 +00:00
|
|
|
brw_F16TO32(p, dst, src[0]);
|
|
|
|
break;
|
2011-05-25 00:34:27 +01:00
|
|
|
case BRW_OPCODE_CMP:
|
2016-05-18 03:59:18 +01:00
|
|
|
if (inst->exec_size >= 16 && devinfo->gen == 7 && !devinfo->is_haswell &&
|
|
|
|
dst.file == BRW_ARCHITECTURE_REGISTER_FILE) {
|
|
|
|
/* For unknown reasons the WaCMPInstFlagDepClearedEarly workaround
|
|
|
|
* implemented in the compiler is not sufficient. Overriding the
|
|
|
|
* type when the destination is the null register is necessary but
|
|
|
|
* not sufficient by itself.
|
|
|
|
*/
|
|
|
|
assert(dst.nr == BRW_ARF_NULL);
|
|
|
|
dst.type = BRW_REGISTER_TYPE_D;
|
2015-02-04 01:38:49 +00:00
|
|
|
}
|
2016-05-18 03:59:18 +01:00
|
|
|
brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
case BRW_OPCODE_SEL:
|
|
|
|
brw_SEL(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2015-11-23 04:12:17 +00:00
|
|
|
case BRW_OPCODE_CSEL:
|
|
|
|
assert(devinfo->gen >= 8);
|
|
|
|
if (devinfo->gen < 10)
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
|
|
|
brw_CSEL(p, dst, src[0], src[1], src[2]);
|
|
|
|
break;
|
2013-04-10 03:22:34 +01:00
|
|
|
case BRW_OPCODE_BFREV:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-04-10 03:22:34 +01:00
|
|
|
brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
|
2017-06-30 23:10:17 +01:00
|
|
|
retype(src[0], BRW_REGISTER_TYPE_UD));
|
2013-04-10 03:22:34 +01:00
|
|
|
break;
|
|
|
|
case BRW_OPCODE_FBH:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2017-06-30 23:10:17 +01:00
|
|
|
brw_FBH(p, retype(dst, src[0].type), src[0]);
|
2013-04-10 03:22:34 +01:00
|
|
|
break;
|
|
|
|
case BRW_OPCODE_FBL:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2017-06-30 23:10:17 +01:00
|
|
|
brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD),
|
|
|
|
retype(src[0], BRW_REGISTER_TYPE_UD));
|
2016-06-21 23:14:03 +01:00
|
|
|
break;
|
|
|
|
case BRW_OPCODE_LZD:
|
|
|
|
brw_LZD(p, dst, src[0]);
|
2013-04-10 03:22:34 +01:00
|
|
|
break;
|
|
|
|
case BRW_OPCODE_CBIT:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2017-06-30 23:10:17 +01:00
|
|
|
brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD),
|
|
|
|
retype(src[0], BRW_REGISTER_TYPE_UD));
|
2013-04-10 03:22:34 +01:00
|
|
|
break;
|
2013-09-19 21:01:08 +01:00
|
|
|
case BRW_OPCODE_ADDC:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-09-19 21:01:08 +01:00
|
|
|
brw_ADDC(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_SUBB:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-09-19 21:01:08 +01:00
|
|
|
brw_SUBB(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2014-03-28 13:28:32 +00:00
|
|
|
case BRW_OPCODE_MAC:
|
|
|
|
brw_MAC(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2013-04-10 03:22:34 +01:00
|
|
|
|
|
|
|
case BRW_OPCODE_BFE:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2018-01-05 17:46:11 +00:00
|
|
|
if (devinfo->gen < 10)
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
2016-05-18 03:51:50 +01:00
|
|
|
brw_BFE(p, dst, src[0], src[1], src[2]);
|
2013-04-10 03:22:34 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case BRW_OPCODE_BFI1:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2016-05-18 04:02:29 +01:00
|
|
|
brw_BFI1(p, dst, src[0], src[1]);
|
2013-04-10 03:22:34 +01:00
|
|
|
break;
|
|
|
|
case BRW_OPCODE_BFI2:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2018-01-05 17:46:11 +00:00
|
|
|
if (devinfo->gen < 10)
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
2016-05-18 04:02:29 +01:00
|
|
|
brw_BFI2(p, dst, src[0], src[1], src[2]);
|
2013-04-10 03:22:34 +01:00
|
|
|
break;
|
2011-05-25 00:34:27 +01:00
|
|
|
|
|
|
|
case BRW_OPCODE_IF:
|
|
|
|
if (inst->src[0].file != BAD_FILE) {
|
|
|
|
/* The instruction has an embedded compare (only allowed on gen6) */
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen == 6);
|
2011-05-25 00:34:27 +01:00
|
|
|
gen6_IF(p, inst->conditional_mod, src[0], src[1]);
|
|
|
|
} else {
|
2018-05-29 22:50:46 +01:00
|
|
|
brw_IF(p, brw_get_default_exec_size(p));
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BRW_OPCODE_ELSE:
|
|
|
|
brw_ELSE(p);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_ENDIF:
|
|
|
|
brw_ENDIF(p);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BRW_OPCODE_DO:
|
2018-05-29 22:50:46 +01:00
|
|
|
brw_DO(p, brw_get_default_exec_size(p));
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case BRW_OPCODE_BREAK:
|
2011-12-06 20:44:41 +00:00
|
|
|
brw_BREAK(p);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
case BRW_OPCODE_CONTINUE:
|
2014-08-04 22:26:26 +01:00
|
|
|
brw_CONT(p);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
2011-12-06 20:30:03 +00:00
|
|
|
case BRW_OPCODE_WHILE:
|
|
|
|
brw_WHILE(p);
|
2014-08-06 09:27:58 +01:00
|
|
|
loop_count++;
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
2011-08-05 20:38:58 +01:00
|
|
|
case SHADER_OPCODE_RCP:
|
|
|
|
case SHADER_OPCODE_RSQ:
|
|
|
|
case SHADER_OPCODE_SQRT:
|
|
|
|
case SHADER_OPCODE_EXP2:
|
|
|
|
case SHADER_OPCODE_LOG2:
|
|
|
|
case SHADER_OPCODE_SIN:
|
|
|
|
case SHADER_OPCODE_COS:
|
2014-11-21 20:34:22 +00:00
|
|
|
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
|
2016-05-18 03:10:48 +01:00
|
|
|
if (devinfo->gen >= 6) {
|
|
|
|
assert(inst->mlen == 0);
|
|
|
|
assert(devinfo->gen >= 7 || inst->exec_size == 8);
|
|
|
|
gen6_math(p, dst, brw_math_function(inst->opcode),
|
|
|
|
src[0], brw_null_reg());
|
2011-08-18 19:55:42 +01:00
|
|
|
} else {
|
2016-05-18 03:10:48 +01:00
|
|
|
assert(inst->mlen >= 1);
|
|
|
|
assert(devinfo->gen == 5 || devinfo->is_g4x || inst->exec_size == 8);
|
|
|
|
gen4_math(p, dst,
|
|
|
|
brw_math_function(inst->opcode),
|
|
|
|
inst->base_mrf, src[0],
|
|
|
|
BRW_MATH_PRECISION_FULL);
|
2011-08-18 19:55:42 +01:00
|
|
|
}
|
|
|
|
break;
|
2011-09-29 01:37:54 +01:00
|
|
|
case SHADER_OPCODE_INT_QUOTIENT:
|
|
|
|
case SHADER_OPCODE_INT_REMAINDER:
|
2011-08-18 19:55:42 +01:00
|
|
|
case SHADER_OPCODE_POW:
|
2014-11-21 20:34:22 +00:00
|
|
|
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
|
2016-05-18 03:10:48 +01:00
|
|
|
if (devinfo->gen >= 6) {
|
|
|
|
assert(inst->mlen == 0);
|
|
|
|
assert((devinfo->gen >= 7 && inst->opcode == SHADER_OPCODE_POW) ||
|
|
|
|
inst->exec_size == 8);
|
2014-06-07 10:27:43 +01:00
|
|
|
gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
|
2016-05-18 03:10:48 +01:00
|
|
|
} else {
|
|
|
|
assert(inst->mlen >= 1);
|
|
|
|
assert(inst->exec_size == 8);
|
|
|
|
gen4_math(p, dst, brw_math_function(inst->opcode),
|
|
|
|
inst->base_mrf, src[0],
|
|
|
|
BRW_MATH_PRECISION_FULL);
|
2011-08-18 19:55:42 +01:00
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
case FS_OPCODE_LINTERP:
|
2017-06-14 19:06:45 +01:00
|
|
|
multiple_instructions_emitted = generate_linterp(inst, dst, src);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
2015-04-14 21:17:38 +01:00
|
|
|
case FS_OPCODE_PIXEL_X:
|
|
|
|
assert(src[0].type == BRW_REGISTER_TYPE_UW);
|
|
|
|
src[0].subnr = 0 * type_sz(src[0].type);
|
|
|
|
brw_MOV(p, dst, stride(src[0], 8, 4, 1));
|
|
|
|
break;
|
|
|
|
case FS_OPCODE_PIXEL_Y:
|
|
|
|
assert(src[0].type == BRW_REGISTER_TYPE_UW);
|
|
|
|
src[0].subnr = 4 * type_sz(src[0].type);
|
|
|
|
brw_MOV(p, dst, stride(src[0], 8, 4, 1));
|
|
|
|
break;
|
2018-10-29 20:06:14 +00:00
|
|
|
|
|
|
|
case SHADER_OPCODE_SEND:
|
|
|
|
generate_send(inst, dst, src[0], src[1], src[2],
|
|
|
|
inst->ex_mlen > 0 ? src[3] : brw_null_reg());
|
|
|
|
break;
|
|
|
|
|
2017-12-11 01:03:32 +00:00
|
|
|
case SHADER_OPCODE_GET_BUFFER_SIZE:
|
2015-04-13 15:55:49 +01:00
|
|
|
generate_get_buffer_size(inst, dst, src[0], src[1]);
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TEX:
|
2011-05-25 00:34:27 +01:00
|
|
|
case FS_OPCODE_TXB:
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXD:
|
|
|
|
case SHADER_OPCODE_TXF:
|
2013-12-10 14:36:31 +00:00
|
|
|
case SHADER_OPCODE_TXF_CMS:
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXL:
|
|
|
|
case SHADER_OPCODE_TXS:
|
2013-03-06 22:47:01 +00:00
|
|
|
case SHADER_OPCODE_LOD:
|
2013-03-31 09:31:12 +01:00
|
|
|
case SHADER_OPCODE_TG4:
|
2015-08-12 01:37:32 +01:00
|
|
|
case SHADER_OPCODE_SAMPLEINFO:
|
2018-10-30 20:47:39 +00:00
|
|
|
assert(inst->src[0].file == BAD_FILE);
|
|
|
|
generate_tex(inst, dst, src[1], src[2]);
|
2018-08-16 17:01:24 +01:00
|
|
|
break;
|
|
|
|
|
2014-11-08 09:39:14 +00:00
|
|
|
case FS_OPCODE_DDX_COARSE:
|
|
|
|
case FS_OPCODE_DDX_FINE:
|
2017-06-15 23:41:40 +01:00
|
|
|
generate_ddx(inst, dst, src[0]);
|
2014-11-08 09:39:14 +00:00
|
|
|
break;
|
|
|
|
case FS_OPCODE_DDY_COARSE:
|
|
|
|
case FS_OPCODE_DDY_FINE:
|
2017-06-15 23:41:40 +01:00
|
|
|
generate_ddy(inst, dst, src[0]);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
2013-10-16 19:45:06 +01:00
|
|
|
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
|
|
|
generate_scratch_write(inst, src[0]);
|
2015-03-16 19:18:31 +00:00
|
|
|
spill_count++;
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
2013-10-16 19:45:06 +01:00
|
|
|
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
|
|
|
generate_scratch_read(inst, dst);
|
2015-03-16 19:18:31 +00:00
|
|
|
fill_count++;
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
2013-10-16 19:51:22 +01:00
|
|
|
case SHADER_OPCODE_GEN7_SCRATCH_READ:
|
|
|
|
generate_scratch_read_gen7(inst, dst);
|
2015-03-16 19:18:31 +00:00
|
|
|
fill_count++;
|
2013-10-16 19:51:22 +01:00
|
|
|
break;
|
|
|
|
|
2015-11-08 02:58:34 +00:00
|
|
|
case SHADER_OPCODE_MOV_INDIRECT:
|
|
|
|
generate_mov_indirect(inst, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
2015-09-29 22:32:02 +01:00
|
|
|
case SHADER_OPCODE_URB_READ_SIMD8:
|
2015-11-07 09:37:33 +00:00
|
|
|
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
|
2015-09-29 22:32:02 +01:00
|
|
|
generate_urb_read(inst, dst, src[0]);
|
|
|
|
break;
|
|
|
|
|
2014-10-21 07:00:50 +01:00
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
2015-05-06 08:04:10 +01:00
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
|
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
|
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
|
2014-10-21 07:00:50 +01:00
|
|
|
generate_urb_write(inst, src[0]);
|
|
|
|
break;
|
|
|
|
|
2012-11-07 18:42:34 +00:00
|
|
|
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
|
2016-05-23 22:07:23 +01:00
|
|
|
assert(inst->force_writemask_all);
|
2012-11-07 18:42:34 +00:00
|
|
|
generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
2012-12-05 08:06:30 +00:00
|
|
|
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
|
2016-05-23 22:07:23 +01:00
|
|
|
assert(inst->force_writemask_all);
|
2012-12-05 08:06:30 +00:00
|
|
|
generate_uniform_pull_constant_load_gen7(inst, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
2016-05-20 21:03:31 +01:00
|
|
|
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
|
|
|
|
generate_varying_pull_constant_load_gen4(inst, dst, src[0]);
|
2012-11-07 19:18:34 +00:00
|
|
|
break;
|
|
|
|
|
2014-07-07 23:27:17 +01:00
|
|
|
case FS_OPCODE_REP_FB_WRITE:
|
2011-05-25 00:34:27 +01:00
|
|
|
case FS_OPCODE_FB_WRITE:
|
2014-09-16 23:16:20 +01:00
|
|
|
generate_fb_write(inst, src[0]);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
2012-06-18 22:50:04 +01:00
|
|
|
|
2016-07-22 00:52:33 +01:00
|
|
|
case FS_OPCODE_FB_READ:
|
|
|
|
generate_fb_read(inst, dst, src[0]);
|
|
|
|
break;
|
|
|
|
|
2012-12-06 18:15:08 +00:00
|
|
|
case FS_OPCODE_DISCARD_JUMP:
|
|
|
|
generate_discard_jump(inst);
|
|
|
|
break;
|
|
|
|
|
2012-11-27 22:10:52 +00:00
|
|
|
case SHADER_OPCODE_SHADER_TIME_ADD:
|
2013-03-19 22:28:11 +00:00
|
|
|
generate_shader_time_add(inst, src[0], src[1], src[2]);
|
2012-11-27 22:10:52 +00:00
|
|
|
break;
|
|
|
|
|
2015-04-23 12:30:28 +01:00
|
|
|
case SHADER_OPCODE_MEMORY_FENCE:
|
2019-05-22 18:36:17 +01:00
|
|
|
assert(src[1].file == BRW_IMMEDIATE_VALUE);
|
2019-07-10 20:02:23 +01:00
|
|
|
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
|
|
|
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, src[1].ud, src[2].ud);
|
2018-04-27 15:06:56 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case SHADER_OPCODE_INTERLOCK:
|
2019-03-12 19:25:36 +00:00
|
|
|
assert(devinfo->gen >= 9);
|
2018-04-27 15:06:56 +01:00
|
|
|
/* The interlock is basically a memory fence issued via sendc */
|
2019-07-10 20:02:23 +01:00
|
|
|
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC, false, /* bti */ 0);
|
2015-04-23 12:30:28 +01:00
|
|
|
break;
|
|
|
|
|
2016-09-14 23:09:33 +01:00
|
|
|
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
|
|
|
|
const struct brw_reg mask =
|
2016-09-16 01:24:10 +01:00
|
|
|
brw_stage_has_packed_dispatch(devinfo, stage,
|
|
|
|
prog_data) ? brw_imm_ud(~0u) :
|
|
|
|
stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() :
|
|
|
|
brw_dmask_reg();
|
2016-09-14 23:09:33 +01:00
|
|
|
brw_find_live_channel(p, dst, mask);
|
2015-04-23 12:42:53 +01:00
|
|
|
break;
|
2016-09-14 23:09:33 +01:00
|
|
|
}
|
2015-04-23 12:42:53 +01:00
|
|
|
|
2015-02-20 18:14:24 +00:00
|
|
|
case SHADER_OPCODE_BROADCAST:
|
2016-05-19 08:10:03 +01:00
|
|
|
assert(inst->force_writemask_all);
|
2015-02-20 18:14:24 +00:00
|
|
|
brw_broadcast(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
2017-08-29 17:21:32 +01:00
|
|
|
case SHADER_OPCODE_SHUFFLE:
|
|
|
|
generate_shuffle(inst, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
2017-09-01 05:45:30 +01:00
|
|
|
case SHADER_OPCODE_SEL_EXEC:
|
|
|
|
assert(inst->force_writemask_all);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
brw_MOV(p, dst, src[1]);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_ENABLE);
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2017-09-01 05:45:30 +01:00
|
|
|
brw_MOV(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
|
2017-09-01 23:18:02 +01:00
|
|
|
case SHADER_OPCODE_QUAD_SWIZZLE:
|
|
|
|
assert(src[1].file == BRW_IMMEDIATE_VALUE);
|
|
|
|
assert(src[1].type == BRW_REGISTER_TYPE_UD);
|
2018-12-06 22:11:34 +00:00
|
|
|
generate_quad_swizzle(inst, dst, src[0], src[1].ud);
|
2017-09-01 23:18:02 +01:00
|
|
|
break;
|
|
|
|
|
2017-09-01 05:45:30 +01:00
|
|
|
case SHADER_OPCODE_CLUSTER_BROADCAST: {
|
|
|
|
assert(!src[0].negate && !src[0].abs);
|
|
|
|
assert(src[1].file == BRW_IMMEDIATE_VALUE);
|
|
|
|
assert(src[1].type == BRW_REGISTER_TYPE_UD);
|
|
|
|
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
|
|
|
assert(src[2].type == BRW_REGISTER_TYPE_UD);
|
|
|
|
const unsigned component = src[1].ud;
|
|
|
|
const unsigned cluster_size = src[2].ud;
|
2019-09-04 23:07:20 +01:00
|
|
|
unsigned vstride = cluster_size;
|
|
|
|
unsigned width = cluster_size;
|
|
|
|
|
|
|
|
/* The maximum exec_size is 32, but the maximum width is only 16. */
|
|
|
|
if (inst->exec_size == width) {
|
|
|
|
vstride = 0;
|
|
|
|
width = 1;
|
|
|
|
}
|
|
|
|
|
2017-09-01 05:45:30 +01:00
|
|
|
struct brw_reg strided = stride(suboffset(src[0], component),
|
2019-09-04 23:07:20 +01:00
|
|
|
vstride, width, 0);
|
2017-09-01 05:45:30 +01:00
|
|
|
if (type_sz(src[0].type) > 4 &&
|
|
|
|
(devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
|
|
|
|
/* IVB has an issue (which we found empirically) where it reads
|
|
|
|
* two address register components per channel for indirectly
|
|
|
|
* addressed 64-bit sources.
|
|
|
|
*
|
|
|
|
* From the Cherryview PRM Vol 7. "Register Region Restrictions":
|
|
|
|
*
|
|
|
|
* "When source or destination datatype is 64b or operation is
|
|
|
|
* integer DWord multiply, indirect addressing must not be
|
|
|
|
* used."
|
|
|
|
*
|
|
|
|
* To work around both of these, we do two integer MOVs insead of
|
|
|
|
* one 64-bit MOV. Because no double value should ever cross a
|
|
|
|
* register boundary, it's safe to use the immediate offset in the
|
|
|
|
* indirect here to handle adding 4 bytes to the offset and avoid
|
|
|
|
* the extra ADD to the register file.
|
|
|
|
*/
|
2019-06-04 17:45:50 +01:00
|
|
|
assert(src[0].type == dst.type);
|
2017-09-01 05:45:30 +01:00
|
|
|
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
|
|
|
|
subscript(strided, BRW_REGISTER_TYPE_D, 0));
|
2019-09-27 07:38:24 +01:00
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2017-09-01 05:45:30 +01:00
|
|
|
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
|
|
|
|
subscript(strided, BRW_REGISTER_TYPE_D, 1));
|
|
|
|
} else {
|
|
|
|
brw_MOV(p, dst, strided);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2013-10-25 00:17:08 +01:00
|
|
|
case FS_OPCODE_SET_SAMPLE_ID:
|
|
|
|
generate_set_sample_id(inst, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
2013-01-09 19:46:42 +00:00
|
|
|
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
|
|
|
|
generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
2013-03-28 06:19:39 +00:00
|
|
|
case FS_OPCODE_PLACEHOLDER_HALT:
|
|
|
|
/* This is the place where the final HALT needs to be inserted if
|
|
|
|
* we've emitted any discards. If not, this will emit no code.
|
|
|
|
*/
|
2014-05-19 18:20:37 +01:00
|
|
|
if (!patch_discard_jumps_to_fb_writes()) {
|
2014-05-25 18:30:13 +01:00
|
|
|
if (unlikely(debug_flag)) {
|
2017-11-16 01:08:42 +00:00
|
|
|
disasm_info->use_tail = true;
|
2014-05-25 18:30:13 +01:00
|
|
|
}
|
2014-05-19 18:20:37 +01:00
|
|
|
}
|
2013-03-28 06:19:39 +00:00
|
|
|
break;
|
|
|
|
|
2013-11-18 08:13:13 +00:00
|
|
|
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
|
|
|
generate_pixel_interpolator_query(inst, dst, src[0], src[1],
|
|
|
|
GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
|
|
|
generate_pixel_interpolator_query(inst, dst, src[0], src[1],
|
|
|
|
GEN7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
|
|
|
generate_pixel_interpolator_query(inst, dst, src[0], src[1],
|
|
|
|
GEN7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET);
|
|
|
|
break;
|
|
|
|
|
2014-08-27 19:33:25 +01:00
|
|
|
case CS_OPCODE_CS_TERMINATE:
|
|
|
|
generate_cs_terminate(inst, src[0]);
|
|
|
|
break;
|
|
|
|
|
2014-08-27 19:32:08 +01:00
|
|
|
case SHADER_OPCODE_BARRIER:
|
|
|
|
generate_barrier(inst, src[0]);
|
|
|
|
break;
|
|
|
|
|
2016-07-07 07:38:22 +01:00
|
|
|
case BRW_OPCODE_DIM:
|
|
|
|
assert(devinfo->is_haswell);
|
|
|
|
assert(src[0].type == BRW_REGISTER_TYPE_DF);
|
|
|
|
assert(dst.type == BRW_REGISTER_TYPE_DF);
|
|
|
|
brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F));
|
|
|
|
break;
|
|
|
|
|
2019-09-12 23:34:35 +01:00
|
|
|
case SHADER_OPCODE_RND_MODE: {
|
2017-07-01 07:12:59 +01:00
|
|
|
assert(src[0].file == BRW_IMMEDIATE_VALUE);
|
2019-09-12 23:34:35 +01:00
|
|
|
/*
|
|
|
|
* Changes the floating point rounding mode updating the control
|
|
|
|
* register field defined at cr0.0[5-6] bits.
|
|
|
|
*/
|
|
|
|
enum brw_rnd_mode mode =
|
|
|
|
(enum brw_rnd_mode) (src[0].d << BRW_CR0_RND_MODE_SHIFT);
|
|
|
|
brw_float_controls_mode(p, mode, BRW_CR0_RND_MODE_MASK);
|
|
|
|
}
|
2017-07-01 07:12:59 +01:00
|
|
|
break;
|
|
|
|
|
2019-09-12 23:38:06 +01:00
|
|
|
case SHADER_OPCODE_FLOAT_CONTROL_MODE:
|
|
|
|
assert(src[0].file == BRW_IMMEDIATE_VALUE);
|
|
|
|
assert(src[1].file == BRW_IMMEDIATE_VALUE);
|
|
|
|
brw_float_controls_mode(p, src[0].d, src[1].d);
|
|
|
|
break;
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
default:
|
2015-04-15 22:51:18 +01:00
|
|
|
unreachable("Unsupported opcode");
|
2014-05-28 02:47:40 +01:00
|
|
|
|
|
|
|
case SHADER_OPCODE_LOAD_PAYLOAD:
|
2014-06-29 22:54:01 +01:00
|
|
|
unreachable("Should be lowered by lower_load_payload()");
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
2014-05-31 00:41:32 +01:00
|
|
|
|
2014-12-30 20:56:13 +00:00
|
|
|
if (multiple_instructions_emitted)
|
|
|
|
continue;
|
|
|
|
|
2014-06-29 07:31:04 +01:00
|
|
|
if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
|
|
|
|
assert(p->next_insn_offset == last_insn_offset + 16 ||
|
|
|
|
!"conditional_mod, no_dd_check, or no_dd_clear set for IR "
|
|
|
|
"emitting more than 1 instruction");
|
|
|
|
|
2014-06-13 22:29:25 +01:00
|
|
|
brw_inst *last = &p->store[last_insn_offset / 16];
|
2014-06-29 07:31:04 +01:00
|
|
|
|
2014-11-21 20:20:53 +00:00
|
|
|
if (inst->conditional_mod)
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_cond_modifier(p->devinfo, last, inst->conditional_mod);
|
2018-11-09 22:13:36 +00:00
|
|
|
if (devinfo->gen < 12) {
|
|
|
|
brw_inst_set_no_dd_clear(p->devinfo, last, inst->no_dd_clear);
|
|
|
|
brw_inst_set_no_dd_check(p->devinfo, last, inst->no_dd_check);
|
|
|
|
}
|
2014-05-31 00:41:32 +01:00
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
2016-08-29 23:57:41 +01:00
|
|
|
brw_set_uip_jip(p, start_offset);
|
2017-11-16 01:08:42 +00:00
|
|
|
|
|
|
|
/* end of program sentinel */
|
|
|
|
disasm_new_inst_group(disasm_info, p->next_insn_offset);
|
2014-05-25 18:42:32 +01:00
|
|
|
|
2015-06-29 22:08:51 +01:00
|
|
|
#ifndef NDEBUG
|
2017-11-16 21:35:01 +00:00
|
|
|
bool validated =
|
2015-06-29 22:08:51 +01:00
|
|
|
#else
|
|
|
|
if (unlikely(debug_flag))
|
2017-11-16 21:35:01 +00:00
|
|
|
#endif
|
2017-04-29 01:05:44 +01:00
|
|
|
brw_validate_instructions(devinfo, p->store,
|
|
|
|
start_offset,
|
|
|
|
p->next_insn_offset,
|
2017-11-16 01:08:42 +00:00
|
|
|
disasm_info);
|
2015-06-29 22:08:51 +01:00
|
|
|
|
2014-05-25 22:56:41 +01:00
|
|
|
int before_size = p->next_insn_offset - start_offset;
|
2017-11-16 01:08:42 +00:00
|
|
|
brw_compact_instructions(p, start_offset, disasm_info);
|
2014-05-25 22:56:41 +01:00
|
|
|
int after_size = p->next_insn_offset - start_offset;
|
2014-05-25 18:42:32 +01:00
|
|
|
|
|
|
|
if (unlikely(debug_flag)) {
|
2019-05-23 17:05:23 +01:00
|
|
|
unsigned char sha1[21];
|
|
|
|
char sha1buf[41];
|
|
|
|
|
|
|
|
_mesa_sha1_compute(p->store + start_offset / sizeof(brw_inst),
|
|
|
|
after_size, sha1);
|
|
|
|
_mesa_sha1_format(sha1buf, sha1);
|
|
|
|
|
|
|
|
fprintf(stderr, "Native code for %s (sha1 %s)\n"
|
2016-10-17 22:12:28 +01:00
|
|
|
"SIMD%d shader: %d instructions. %d loops. %u cycles. "
|
|
|
|
"%d:%d spills:fills. "
|
|
|
|
"scheduled with mode %s. "
|
|
|
|
"Promoted %u constants. "
|
|
|
|
"Compacted %d to %d bytes (%.0f%%)\n",
|
2019-05-23 17:05:23 +01:00
|
|
|
shader_name, sha1buf,
|
|
|
|
dispatch_width, before_size / 16,
|
2016-10-17 22:12:28 +01:00
|
|
|
loop_count, cfg->cycle_count,
|
|
|
|
spill_count, fill_count,
|
|
|
|
shader_stats.scheduler_mode,
|
|
|
|
shader_stats.promoted_constants,
|
|
|
|
before_size, after_size,
|
2014-05-25 22:56:41 +01:00
|
|
|
100.0f * (before_size - after_size) / before_size);
|
2014-05-25 18:46:55 +01:00
|
|
|
|
2019-05-23 17:05:23 +01:00
|
|
|
/* overriding the shader makes disasm_info invalid */
|
|
|
|
if (!brw_try_override_assembly(p, start_offset, sha1buf)) {
|
|
|
|
dump_assembly(p->store, disasm_info);
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "Successfully overrode shader with sha1 %s\n\n", sha1buf);
|
|
|
|
}
|
2014-05-25 18:42:32 +01:00
|
|
|
}
|
2017-11-20 08:57:17 +00:00
|
|
|
ralloc_free(disasm_info);
|
2015-06-29 22:08:51 +01:00
|
|
|
assert(validated);
|
2014-11-14 00:28:08 +00:00
|
|
|
|
2015-04-16 22:13:52 +01:00
|
|
|
compiler->shader_debug_log(log_data,
|
2015-06-06 15:55:21 +01:00
|
|
|
"%s SIMD%d shader: %d inst, %d loops, %u cycles, "
|
2016-10-17 22:12:28 +01:00
|
|
|
"%d:%d spills:fills, "
|
|
|
|
"scheduled with mode %s, "
|
|
|
|
"Promoted %u constants, "
|
2016-01-14 00:17:26 +00:00
|
|
|
"compacted %d to %d bytes.",
|
2016-01-15 04:27:51 +00:00
|
|
|
_mesa_shader_stage_to_abbrev(stage),
|
|
|
|
dispatch_width, before_size / 16,
|
2016-10-17 22:12:28 +01:00
|
|
|
loop_count, cfg->cycle_count,
|
|
|
|
spill_count, fill_count,
|
|
|
|
shader_stats.scheduler_mode,
|
|
|
|
shader_stats.promoted_constants,
|
|
|
|
before_size, after_size);
|
2019-04-24 05:19:56 +01:00
|
|
|
if (stats) {
|
|
|
|
stats->dispatch_width = dispatch_width;
|
|
|
|
stats->instructions = before_size / 16;
|
|
|
|
stats->loops = loop_count;
|
|
|
|
stats->cycles = cfg->cycle_count;
|
|
|
|
stats->spills = spill_count;
|
|
|
|
stats->fills = fill_count;
|
|
|
|
}
|
2014-11-14 20:46:44 +00:00
|
|
|
|
2014-11-14 00:28:08 +00:00
|
|
|
return start_offset;
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
2012-11-09 09:05:47 +00:00
|
|
|
|
|
|
|
const unsigned *
|
2018-02-27 00:34:55 +00:00
|
|
|
fs_generator::get_assembly()
|
2012-11-09 09:05:47 +00:00
|
|
|
{
|
2018-02-27 00:34:55 +00:00
|
|
|
return brw_get_program(p, &prog_data->program_size);
|
2012-11-09 09:05:47 +00:00
|
|
|
}
|