2011-05-25 00:34:27 +01:00
|
|
|
/*
|
|
|
|
* Copyright © 2010 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
2013-09-18 07:32:10 +01:00
|
|
|
/** @file brw_fs_generator.cpp
|
2011-05-25 00:34:27 +01:00
|
|
|
*
|
2013-09-18 07:32:10 +01:00
|
|
|
* This file supports generating code from the FS LIR to the actual
|
2011-05-25 00:34:27 +01:00
|
|
|
* native instructions.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "main/macros.h"
|
|
|
|
#include "brw_context.h"
|
|
|
|
#include "brw_eu.h"
|
|
|
|
#include "brw_fs.h"
|
2012-10-03 21:03:12 +01:00
|
|
|
#include "brw_cfg.h"
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2014-12-05 17:53:11 +00:00
|
|
|
static uint32_t brw_file_from_reg(fs_reg *reg)
|
|
|
|
{
|
|
|
|
switch (reg->file) {
|
|
|
|
case GRF:
|
|
|
|
return BRW_GENERAL_REGISTER_FILE;
|
|
|
|
case MRF:
|
|
|
|
return BRW_MESSAGE_REGISTER_FILE;
|
|
|
|
case IMM:
|
|
|
|
return BRW_IMMEDIATE_VALUE;
|
|
|
|
default:
|
|
|
|
unreachable("not reached");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-28 20:21:03 +00:00
|
|
|
static struct brw_reg
|
|
|
|
brw_reg_from_fs_reg(fs_reg *reg)
|
|
|
|
{
|
|
|
|
struct brw_reg brw_reg;
|
|
|
|
|
|
|
|
switch (reg->file) {
|
|
|
|
case GRF:
|
|
|
|
case MRF:
|
|
|
|
if (reg->stride == 0) {
|
|
|
|
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, 0);
|
|
|
|
} else if (reg->width < 8) {
|
|
|
|
brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
|
|
|
|
brw_reg = stride(brw_reg, reg->width * reg->stride,
|
|
|
|
reg->width, reg->stride);
|
|
|
|
} else {
|
|
|
|
/* From the Haswell PRM:
|
|
|
|
*
|
|
|
|
* VertStride must be used to cross GRF register boundaries. This
|
|
|
|
* rule implies that elements within a 'Width' cannot cross GRF
|
|
|
|
* boundaries.
|
|
|
|
*
|
|
|
|
* So, for registers with width > 8, we have to use a width of 8
|
|
|
|
* and trust the compression state to sort out the exec size.
|
|
|
|
*/
|
|
|
|
brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
|
|
|
|
brw_reg = stride(brw_reg, 8 * reg->stride, 8, reg->stride);
|
|
|
|
}
|
|
|
|
|
|
|
|
brw_reg = retype(brw_reg, reg->type);
|
|
|
|
brw_reg = byte_offset(brw_reg, reg->subreg_offset);
|
|
|
|
break;
|
|
|
|
case IMM:
|
|
|
|
switch (reg->type) {
|
|
|
|
case BRW_REGISTER_TYPE_F:
|
|
|
|
brw_reg = brw_imm_f(reg->fixed_hw_reg.dw1.f);
|
|
|
|
break;
|
|
|
|
case BRW_REGISTER_TYPE_D:
|
|
|
|
brw_reg = brw_imm_d(reg->fixed_hw_reg.dw1.d);
|
|
|
|
break;
|
|
|
|
case BRW_REGISTER_TYPE_UD:
|
|
|
|
brw_reg = brw_imm_ud(reg->fixed_hw_reg.dw1.ud);
|
|
|
|
break;
|
2015-01-09 06:56:44 +00:00
|
|
|
case BRW_REGISTER_TYPE_W:
|
|
|
|
brw_reg = brw_imm_w(reg->fixed_hw_reg.dw1.d);
|
|
|
|
break;
|
|
|
|
case BRW_REGISTER_TYPE_UW:
|
|
|
|
brw_reg = brw_imm_uw(reg->fixed_hw_reg.dw1.ud);
|
|
|
|
break;
|
2014-11-28 20:21:03 +00:00
|
|
|
case BRW_REGISTER_TYPE_VF:
|
|
|
|
brw_reg = brw_imm_vf(reg->fixed_hw_reg.dw1.ud);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
unreachable("not reached");
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case HW_REG:
|
|
|
|
assert(reg->type == reg->fixed_hw_reg.type);
|
|
|
|
brw_reg = reg->fixed_hw_reg;
|
|
|
|
break;
|
|
|
|
case BAD_FILE:
|
|
|
|
/* Probably unused. */
|
|
|
|
brw_reg = brw_null_reg();
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
unreachable("not reached");
|
|
|
|
}
|
|
|
|
if (reg->abs)
|
|
|
|
brw_reg = brw_abs(brw_reg);
|
|
|
|
if (reg->negate)
|
|
|
|
brw_reg = negate(brw_reg);
|
|
|
|
|
|
|
|
return brw_reg;
|
|
|
|
}
|
|
|
|
|
2015-04-16 22:34:04 +01:00
|
|
|
fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data,
|
2014-05-14 09:21:02 +01:00
|
|
|
void *mem_ctx,
|
2014-10-21 06:53:31 +01:00
|
|
|
const void *key,
|
|
|
|
struct brw_stage_prog_data *prog_data,
|
|
|
|
struct gl_program *prog,
|
2015-03-16 19:18:31 +00:00
|
|
|
unsigned promoted_constants,
|
2015-01-13 22:28:13 +00:00
|
|
|
bool runtime_check_aads_emit,
|
|
|
|
const char *stage_abbrev)
|
2012-11-09 09:05:47 +00:00
|
|
|
|
2015-04-16 22:34:04 +01:00
|
|
|
: compiler(compiler), log_data(log_data),
|
|
|
|
devinfo(compiler->devinfo), key(key),
|
2014-10-28 02:43:31 +00:00
|
|
|
prog_data(prog_data),
|
2015-03-16 19:18:31 +00:00
|
|
|
prog(prog), promoted_constants(promoted_constants),
|
|
|
|
runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false),
|
|
|
|
stage_abbrev(stage_abbrev), mem_ctx(mem_ctx)
|
2012-11-09 09:05:47 +00:00
|
|
|
{
|
2015-04-16 19:06:57 +01:00
|
|
|
p = rzalloc(mem_ctx, struct brw_codegen);
|
|
|
|
brw_init_codegen(devinfo, p, mem_ctx);
|
2012-11-09 09:05:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fs_generator::~fs_generator()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2014-11-12 19:01:16 +00:00
|
|
|
class ip_record : public exec_node {
|
|
|
|
public:
|
|
|
|
DECLARE_RALLOC_CXX_OPERATORS(ip_record)
|
|
|
|
|
|
|
|
ip_record(int ip)
|
|
|
|
{
|
|
|
|
this->ip = ip;
|
|
|
|
}
|
|
|
|
|
|
|
|
int ip;
|
|
|
|
};
|
|
|
|
|
2014-05-16 21:06:45 +01:00
|
|
|
bool
|
2012-12-06 18:15:08 +00:00
|
|
|
fs_generator::patch_discard_jumps_to_fb_writes()
|
|
|
|
{
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen < 6 || this->discard_halt_patches.is_empty())
|
2014-05-16 21:06:45 +01:00
|
|
|
return false;
|
2012-12-06 18:15:08 +00:00
|
|
|
|
2015-04-15 02:00:06 +01:00
|
|
|
int scale = brw_jump_scale(p->devinfo);
|
2014-06-30 16:00:25 +01:00
|
|
|
|
2012-12-06 18:15:08 +00:00
|
|
|
/* There is a somewhat strange undocumented requirement of using
|
|
|
|
* HALT, according to the simulator. If some channel has HALTed to
|
|
|
|
* a particular UIP, then by the end of the program, every channel
|
|
|
|
* must have HALTed to that UIP. Furthermore, the tracking is a
|
|
|
|
* stack, so you can't do the final halt of a UIP after starting
|
|
|
|
* halting to a new UIP.
|
|
|
|
*
|
|
|
|
* Symptoms of not emitting this instruction on actual hardware
|
|
|
|
* included GPU hangs and sparkly rendering on the piglit discard
|
|
|
|
* tests.
|
|
|
|
*/
|
2014-06-13 22:29:25 +01:00
|
|
|
brw_inst *last_halt = gen6_HALT(p);
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_uip(p->devinfo, last_halt, 1 * scale);
|
|
|
|
brw_inst_set_jip(p->devinfo, last_halt, 1 * scale);
|
2012-12-06 18:15:08 +00:00
|
|
|
|
|
|
|
int ip = p->nr_insn;
|
|
|
|
|
2014-06-24 23:53:19 +01:00
|
|
|
foreach_in_list(ip_record, patch_ip, &discard_halt_patches) {
|
2014-06-13 22:29:25 +01:00
|
|
|
brw_inst *patch = &p->store[patch_ip->ip];
|
2012-12-06 18:15:08 +00:00
|
|
|
|
2015-04-15 02:00:06 +01:00
|
|
|
assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT);
|
2012-12-06 18:15:08 +00:00
|
|
|
/* HALT takes a half-instruction distance from the pre-incremented IP. */
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale);
|
2012-12-06 18:15:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
this->discard_halt_patches.make_empty();
|
2014-05-16 21:06:45 +01:00
|
|
|
return true;
|
2012-12-06 18:15:08 +00:00
|
|
|
}
|
|
|
|
|
2014-06-05 14:03:08 +01:00
|
|
|
void
|
|
|
|
fs_generator::fire_fb_write(fs_inst *inst,
|
2014-09-16 23:16:20 +01:00
|
|
|
struct brw_reg payload,
|
2014-06-05 14:03:08 +01:00
|
|
|
struct brw_reg implied_header,
|
|
|
|
GLuint nr)
|
|
|
|
{
|
|
|
|
uint32_t msg_control;
|
|
|
|
|
2014-08-29 20:50:46 +01:00
|
|
|
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
|
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen < 6) {
|
2014-06-11 02:50:03 +01:00
|
|
|
brw_push_insn_state(p);
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-06-11 02:50:03 +01:00
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2014-09-16 23:16:20 +01:00
|
|
|
brw_MOV(p, offset(payload, 1), brw_vec8_grf(1, 0));
|
2014-06-11 02:50:03 +01:00
|
|
|
brw_pop_insn_state(p);
|
2014-06-05 14:03:08 +01:00
|
|
|
}
|
|
|
|
|
2014-07-07 23:27:17 +01:00
|
|
|
if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
|
|
|
|
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
|
2015-03-05 08:43:38 +00:00
|
|
|
else if (prog_data->dual_src_blend) {
|
|
|
|
if (dispatch_width == 8 || !inst->eot)
|
|
|
|
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
|
|
|
|
else
|
|
|
|
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
|
|
|
|
} else if (dispatch_width == 16)
|
2014-06-05 14:03:08 +01:00
|
|
|
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
|
|
|
|
else
|
|
|
|
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
|
|
|
|
|
|
|
|
uint32_t surf_index =
|
|
|
|
prog_data->binding_table.render_target_start + inst->target;
|
|
|
|
|
2015-03-05 08:43:38 +00:00
|
|
|
bool last_render_target = inst->eot ||
|
|
|
|
(prog_data->dual_src_blend && dispatch_width == 16);
|
|
|
|
|
|
|
|
|
2014-06-05 14:03:08 +01:00
|
|
|
brw_fb_WRITE(p,
|
|
|
|
dispatch_width,
|
2014-09-16 23:16:20 +01:00
|
|
|
payload,
|
2014-06-05 14:03:08 +01:00
|
|
|
implied_header,
|
|
|
|
msg_control,
|
|
|
|
surf_index,
|
|
|
|
nr,
|
|
|
|
0,
|
|
|
|
inst->eot,
|
2015-03-05 08:43:38 +00:00
|
|
|
last_render_target,
|
2015-03-24 17:17:32 +00:00
|
|
|
inst->header_size != 0);
|
2014-06-05 14:03:08 +01:00
|
|
|
|
|
|
|
brw_mark_surface_used(&prog_data->base, surf_index);
|
|
|
|
}
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
void
|
2014-09-16 23:16:20 +01:00
|
|
|
fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2014-08-29 20:50:46 +01:00
|
|
|
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
|
2014-08-29 20:50:46 +01:00
|
|
|
const brw_wm_prog_key * const key = (brw_wm_prog_key * const) this->key;
|
2014-09-16 23:16:20 +01:00
|
|
|
struct brw_reg implied_header;
|
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen < 8 && !devinfo->is_haswell) {
|
2014-11-12 02:02:23 +00:00
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
|
|
|
}
|
|
|
|
|
2014-09-16 23:16:20 +01:00
|
|
|
if (inst->base_mrf >= 0)
|
|
|
|
payload = brw_message_reg(inst->base_mrf);
|
2014-08-29 20:50:46 +01:00
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
/* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
|
|
|
|
* move, here's g1.
|
|
|
|
*/
|
2015-03-24 17:17:32 +00:00
|
|
|
if (inst->header_size != 0) {
|
2014-06-11 02:54:09 +01:00
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2014-08-11 00:15:51 +01:00
|
|
|
brw_set_default_flag_reg(p, 0, 0);
|
2014-06-11 02:54:09 +01:00
|
|
|
|
2014-02-14 05:37:50 +00:00
|
|
|
/* On HSW, the GPU will use the predicate on SENDC, unless the header is
|
|
|
|
* present.
|
|
|
|
*/
|
2014-11-08 10:34:43 +00:00
|
|
|
if (prog_data->uses_kill) {
|
2014-02-14 05:37:50 +00:00
|
|
|
struct brw_reg pixel_mask;
|
2012-12-06 20:15:13 +00:00
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen >= 6)
|
2014-02-14 05:37:50 +00:00
|
|
|
pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
|
|
|
|
else
|
|
|
|
pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
|
2012-12-06 20:15:13 +00:00
|
|
|
|
2014-02-14 05:37:50 +00:00
|
|
|
brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
|
|
|
|
}
|
2012-12-06 20:15:13 +00:00
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen >= 6) {
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_16);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
2011-05-25 00:34:27 +01:00
|
|
|
brw_MOV(p,
|
2014-09-16 23:16:20 +01:00
|
|
|
retype(payload, BRW_REGISTER_TYPE_UD),
|
2011-05-25 00:34:27 +01:00
|
|
|
retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_pop_insn_state(p);
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2014-05-14 08:24:50 +01:00
|
|
|
if (inst->target > 0 && key->replicate_alpha) {
|
2012-08-02 00:32:06 +01:00
|
|
|
/* Set "Source0 Alpha Present to RenderTarget" bit in message
|
|
|
|
* header.
|
|
|
|
*/
|
|
|
|
brw_OR(p,
|
2014-09-16 23:16:20 +01:00
|
|
|
vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
|
2012-08-02 00:32:06 +01:00
|
|
|
vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
|
|
|
|
brw_imm_ud(0x1 << 11));
|
|
|
|
}
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
if (inst->target > 0) {
|
|
|
|
/* Set the render target index for choosing BLEND_STATE. */
|
2014-09-16 23:16:20 +01:00
|
|
|
brw_MOV(p, retype(vec1(suboffset(payload, 2)),
|
|
|
|
BRW_REGISTER_TYPE_UD),
|
2011-05-25 00:34:27 +01:00
|
|
|
brw_imm_ud(inst->target));
|
|
|
|
}
|
|
|
|
|
|
|
|
implied_header = brw_null_reg();
|
|
|
|
} else {
|
|
|
|
implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
|
|
|
|
}
|
2014-06-11 02:54:09 +01:00
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
2011-05-25 00:34:27 +01:00
|
|
|
} else {
|
|
|
|
implied_header = brw_null_reg();
|
|
|
|
}
|
|
|
|
|
2014-06-05 14:03:08 +01:00
|
|
|
if (!runtime_check_aads_emit) {
|
2014-09-16 23:16:20 +01:00
|
|
|
fire_fb_write(inst, payload, implied_header, inst->mlen);
|
2014-06-05 14:03:08 +01:00
|
|
|
} else {
|
|
|
|
/* This can only happen in gen < 6 */
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen < 6);
|
2014-06-05 14:03:08 +01:00
|
|
|
|
|
|
|
struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
|
|
|
|
|
|
|
|
/* Check runtime bit to detect if we have to send AA data or not */
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
brw_AND(p,
|
|
|
|
v1_null_ud,
|
|
|
|
retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD),
|
|
|
|
brw_imm_ud(1<<26));
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
|
2014-06-05 14:03:08 +01:00
|
|
|
|
|
|
|
int jmp = brw_JMPI(p, brw_imm_ud(0), BRW_PREDICATE_NORMAL) - p->store;
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_exec_size(p->devinfo, brw_last_inst, BRW_EXECUTE_1);
|
2014-06-05 14:03:08 +01:00
|
|
|
{
|
|
|
|
/* Don't send AA data */
|
2014-09-16 23:16:20 +01:00
|
|
|
fire_fb_write(inst, offset(payload, 1), implied_header, inst->mlen-1);
|
2014-06-05 14:03:08 +01:00
|
|
|
}
|
|
|
|
brw_land_fwd_jump(p, jmp);
|
2014-09-16 23:16:20 +01:00
|
|
|
fire_fb_write(inst, payload, implied_header, inst->mlen);
|
2014-06-05 14:03:08 +01:00
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
2014-10-21 07:00:50 +01:00
|
|
|
void
|
|
|
|
fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
|
|
|
|
{
|
|
|
|
brw_inst *insn;
|
|
|
|
|
|
|
|
insn = brw_next_insn(p, BRW_OPCODE_SEND);
|
|
|
|
|
|
|
|
brw_set_dest(p, insn, brw_null_reg());
|
|
|
|
brw_set_src0(p, insn, payload);
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0));
|
|
|
|
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_sfid(p->devinfo, insn, BRW_SFID_URB);
|
|
|
|
brw_inst_set_urb_opcode(p->devinfo, insn, GEN8_URB_OPCODE_SIMD8_WRITE);
|
2014-10-21 07:00:50 +01:00
|
|
|
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_mlen(p->devinfo, insn, inst->mlen);
|
|
|
|
brw_inst_set_rlen(p->devinfo, insn, 0);
|
|
|
|
brw_inst_set_eot(p->devinfo, insn, inst->eot);
|
|
|
|
brw_inst_set_header_present(p->devinfo, insn, true);
|
|
|
|
brw_inst_set_urb_global_offset(p->devinfo, insn, inst->offset);
|
2014-10-21 07:00:50 +01:00
|
|
|
}
|
|
|
|
|
2014-08-27 19:33:25 +01:00
|
|
|
void
|
|
|
|
fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
|
|
|
|
{
|
|
|
|
struct brw_inst *insn;
|
|
|
|
|
|
|
|
insn = brw_next_insn(p, BRW_OPCODE_SEND);
|
|
|
|
|
|
|
|
brw_set_dest(p, insn, brw_null_reg());
|
|
|
|
brw_set_src0(p, insn, payload);
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0));
|
|
|
|
|
|
|
|
/* Terminate a compute shader by sending a message to the thread spawner.
|
|
|
|
*/
|
|
|
|
brw_inst_set_sfid(devinfo, insn, BRW_SFID_THREAD_SPAWNER);
|
|
|
|
brw_inst_set_mlen(devinfo, insn, 1);
|
|
|
|
brw_inst_set_rlen(devinfo, insn, 0);
|
|
|
|
brw_inst_set_eot(devinfo, insn, inst->eot);
|
|
|
|
brw_inst_set_header_present(devinfo, insn, false);
|
|
|
|
|
|
|
|
brw_inst_set_ts_opcode(devinfo, insn, 0); /* Dereference resource */
|
|
|
|
brw_inst_set_ts_request_type(devinfo, insn, 0); /* Root thread */
|
|
|
|
|
|
|
|
/* Note that even though the thread has a URB resource associated with it,
|
|
|
|
* we set the "do not dereference URB" bit, because the URB resource is
|
|
|
|
* managed by the fixed-function unit, so it will free it automatically.
|
|
|
|
*/
|
|
|
|
brw_inst_set_ts_resource_select(devinfo, insn, 1); /* Do not dereference URB */
|
|
|
|
|
|
|
|
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
|
|
|
|
}
|
|
|
|
|
2014-08-27 19:32:08 +01:00
|
|
|
void
|
|
|
|
fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
|
|
|
|
{
|
|
|
|
brw_barrier(p, src);
|
|
|
|
brw_WAIT(p);
|
|
|
|
}
|
|
|
|
|
2013-12-17 12:00:50 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_blorp_fb_write(fs_inst *inst)
|
|
|
|
{
|
|
|
|
brw_fb_WRITE(p,
|
|
|
|
16 /* dispatch_width */,
|
2014-09-16 23:16:20 +01:00
|
|
|
brw_message_reg(inst->base_mrf),
|
2013-12-17 12:00:50 +00:00
|
|
|
brw_reg_from_fs_reg(&inst->src[0]),
|
|
|
|
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
|
|
|
|
inst->target,
|
|
|
|
inst->mlen,
|
|
|
|
0,
|
|
|
|
true,
|
2015-03-05 08:43:38 +00:00
|
|
|
true,
|
2015-03-24 17:17:32 +00:00
|
|
|
inst->header_size != 0);
|
2013-12-17 12:00:50 +00:00
|
|
|
}
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
void
|
2012-11-09 09:05:47 +00:00
|
|
|
fs_generator::generate_linterp(fs_inst *inst,
|
2011-05-25 00:34:27 +01:00
|
|
|
struct brw_reg dst, struct brw_reg *src)
|
|
|
|
{
|
2015-04-07 01:44:40 +01:00
|
|
|
/* PLN reads:
|
|
|
|
* / in SIMD16 \
|
|
|
|
* -----------------------------------
|
|
|
|
* | src1+0 | src1+1 | src1+2 | src1+3 |
|
|
|
|
* |-----------------------------------|
|
|
|
|
* |(x0, x1)|(y0, y1)|(x2, x3)|(y2, y3)|
|
|
|
|
* -----------------------------------
|
|
|
|
*
|
|
|
|
* but for the LINE/MAC pair, the LINE reads Xs and the MAC reads Ys:
|
|
|
|
*
|
|
|
|
* -----------------------------------
|
|
|
|
* | src1+0 | src1+1 | src1+2 | src1+3 |
|
|
|
|
* |-----------------------------------|
|
|
|
|
* |(x0, x1)|(y0, y1)| | | in SIMD8
|
|
|
|
* |-----------------------------------|
|
|
|
|
* |(x0, x1)|(x2, x3)|(y0, y1)|(y2, y3)| in SIMD16
|
|
|
|
* -----------------------------------
|
|
|
|
*
|
|
|
|
* See also: emit_interpolation_setup_gen4().
|
|
|
|
*/
|
2011-05-25 00:34:27 +01:00
|
|
|
struct brw_reg delta_x = src[0];
|
2015-04-07 01:44:40 +01:00
|
|
|
struct brw_reg delta_y = offset(src[0], dispatch_width / 8);
|
|
|
|
struct brw_reg interp = src[1];
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->has_pln &&
|
|
|
|
(devinfo->gen >= 7 || (delta_x.nr & 1) == 0)) {
|
2011-05-25 00:34:27 +01:00
|
|
|
brw_PLN(p, dst, interp, delta_x);
|
|
|
|
} else {
|
|
|
|
brw_LINE(p, brw_null_reg(), interp, delta_x);
|
|
|
|
brw_MAC(p, dst, suboffset(interp, 1), delta_y);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2014-06-07 10:21:47 +01:00
|
|
|
fs_generator::generate_math_gen6(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src0,
|
|
|
|
struct brw_reg src1)
|
2011-08-18 19:55:42 +01:00
|
|
|
{
|
|
|
|
int op = brw_math_function(inst->opcode);
|
2014-08-15 04:14:34 +01:00
|
|
|
bool binop = src1.file != BRW_ARCHITECTURE_REGISTER_FILE;
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2014-08-13 20:23:47 +01:00
|
|
|
if (dispatch_width == 8) {
|
|
|
|
gen6_math(p, dst, op, src0, src1);
|
|
|
|
} else if (dispatch_width == 16) {
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-08-13 20:23:47 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
gen6_math(p, firsthalf(dst), op, firsthalf(src0), firsthalf(src1));
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
2014-06-07 10:21:47 +01:00
|
|
|
gen6_math(p, sechalf(dst), op, sechalf(src0),
|
|
|
|
binop ? sechalf(src1) : brw_null_reg());
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_pop_insn_state(p);
|
2011-08-18 19:55:42 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2012-11-09 09:05:47 +00:00
|
|
|
fs_generator::generate_math_gen4(fs_inst *inst,
|
2011-08-18 19:55:42 +01:00
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src)
|
|
|
|
{
|
|
|
|
int op = brw_math_function(inst->opcode);
|
|
|
|
|
|
|
|
assert(inst->mlen >= 1);
|
|
|
|
|
2014-08-13 20:23:47 +01:00
|
|
|
if (dispatch_width == 8) {
|
|
|
|
gen4_math(p, dst,
|
|
|
|
op,
|
|
|
|
inst->base_mrf, src,
|
|
|
|
BRW_MATH_PRECISION_FULL);
|
|
|
|
} else if (dispatch_width == 16) {
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-08-13 20:23:47 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
gen4_math(p, firsthalf(dst),
|
|
|
|
op,
|
|
|
|
inst->base_mrf, firsthalf(src),
|
|
|
|
BRW_MATH_PRECISION_FULL);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
2014-06-07 10:12:46 +01:00
|
|
|
gen4_math(p, sechalf(dst),
|
|
|
|
op,
|
|
|
|
inst->base_mrf + 1, sechalf(src),
|
|
|
|
BRW_MATH_PRECISION_FULL);
|
2011-08-18 19:55:42 +01:00
|
|
|
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-30 07:15:54 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_math_g45(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src)
|
|
|
|
{
|
|
|
|
if (inst->opcode == SHADER_OPCODE_POW ||
|
|
|
|
inst->opcode == SHADER_OPCODE_INT_QUOTIENT ||
|
|
|
|
inst->opcode == SHADER_OPCODE_INT_REMAINDER) {
|
|
|
|
generate_math_gen4(inst, dst, src);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
int op = brw_math_function(inst->opcode);
|
|
|
|
|
|
|
|
assert(inst->mlen >= 1);
|
|
|
|
|
2014-06-07 10:12:46 +01:00
|
|
|
gen4_math(p, dst,
|
|
|
|
op,
|
|
|
|
inst->base_mrf, src,
|
|
|
|
BRW_MATH_PRECISION_FULL);
|
2013-03-30 07:15:54 +00:00
|
|
|
}
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
void
|
2014-08-03 10:23:31 +01:00
|
|
|
fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
|
|
|
|
struct brw_reg sampler_index)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
|
|
|
int msg_type = -1;
|
|
|
|
int rlen = 4;
|
2014-08-30 01:22:57 +01:00
|
|
|
uint32_t simd_mode;
|
2011-11-10 00:07:57 +00:00
|
|
|
uint32_t return_format;
|
2015-02-08 21:59:57 +00:00
|
|
|
bool is_combined_send = inst->eot;
|
2011-11-10 00:07:57 +00:00
|
|
|
|
|
|
|
switch (dst.type) {
|
|
|
|
case BRW_REGISTER_TYPE_D:
|
|
|
|
return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
|
|
|
|
break;
|
|
|
|
case BRW_REGISTER_TYPE_UD:
|
|
|
|
return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
|
|
|
|
break;
|
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2014-08-30 01:22:57 +01:00
|
|
|
switch (inst->exec_size) {
|
|
|
|
case 8:
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
|
|
|
|
break;
|
|
|
|
case 16:
|
2011-05-25 00:34:27 +01:00
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
2014-08-30 01:22:57 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid width for texture instruction");
|
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen >= 5) {
|
2011-05-25 00:34:27 +01:00
|
|
|
switch (inst->opcode) {
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TEX:
|
2011-05-25 00:34:27 +01:00
|
|
|
if (inst->shadow_compare) {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
|
|
|
|
} else {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case FS_OPCODE_TXB:
|
|
|
|
if (inst->shadow_compare) {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
|
|
|
|
} else {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
|
|
|
|
}
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXL:
|
2011-05-25 00:34:27 +01:00
|
|
|
if (inst->shadow_compare) {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
|
|
|
|
} else {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
|
|
|
|
}
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXS:
|
2011-06-19 09:47:50 +01:00
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXD:
|
2013-01-04 15:53:09 +00:00
|
|
|
if (inst->shadow_compare) {
|
|
|
|
/* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 8 || devinfo->is_haswell);
|
2013-01-04 15:53:09 +00:00
|
|
|
msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
|
|
|
|
} else {
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
|
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXF:
|
2011-08-26 01:13:37 +01:00
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
|
|
|
break;
|
2013-12-10 14:36:31 +00:00
|
|
|
case SHADER_OPCODE_TXF_CMS:
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen >= 7)
|
2013-01-24 08:35:15 +00:00
|
|
|
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
|
|
|
|
else
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
|
|
|
break;
|
2013-12-10 14:38:15 +00:00
|
|
|
case SHADER_OPCODE_TXF_UMS:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-12-10 14:38:15 +00:00
|
|
|
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
|
|
|
|
break;
|
2013-11-29 21:32:16 +00:00
|
|
|
case SHADER_OPCODE_TXF_MCS:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-11-29 21:32:16 +00:00
|
|
|
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
|
|
|
|
break;
|
2013-03-06 22:47:01 +00:00
|
|
|
case SHADER_OPCODE_LOD:
|
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_LOD;
|
|
|
|
break;
|
2013-03-31 09:31:12 +01:00
|
|
|
case SHADER_OPCODE_TG4:
|
2013-10-10 07:57:29 +01:00
|
|
|
if (inst->shadow_compare) {
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-10-10 07:57:29 +01:00
|
|
|
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
|
|
|
|
} else {
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 6);
|
2013-10-10 07:57:29 +01:00
|
|
|
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
|
|
|
|
}
|
2013-03-31 09:31:12 +01:00
|
|
|
break;
|
2013-10-08 09:42:10 +01:00
|
|
|
case SHADER_OPCODE_TG4_OFFSET:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-10-10 07:57:29 +01:00
|
|
|
if (inst->shadow_compare) {
|
|
|
|
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
|
|
|
|
} else {
|
|
|
|
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
|
|
|
|
}
|
2013-10-08 09:42:10 +01:00
|
|
|
break;
|
2011-05-03 18:55:50 +01:00
|
|
|
default:
|
2014-06-29 22:54:01 +01:00
|
|
|
unreachable("not reached");
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
switch (inst->opcode) {
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TEX:
|
2011-05-25 00:34:27 +01:00
|
|
|
/* Note that G45 and older determines shadow compare and dispatch width
|
|
|
|
* from message length for most messages.
|
|
|
|
*/
|
2015-02-20 23:11:49 +00:00
|
|
|
if (dispatch_width == 8) {
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
|
|
|
|
if (inst->shadow_compare) {
|
|
|
|
assert(inst->mlen == 6);
|
|
|
|
} else {
|
|
|
|
assert(inst->mlen <= 4);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (inst->shadow_compare) {
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
|
|
|
|
assert(inst->mlen == 9);
|
|
|
|
} else {
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
|
|
|
|
assert(inst->mlen <= 7 && inst->mlen % 2 == 1);
|
|
|
|
}
|
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
case FS_OPCODE_TXB:
|
|
|
|
if (inst->shadow_compare) {
|
2015-02-20 23:11:49 +00:00
|
|
|
assert(dispatch_width == 8);
|
2011-05-25 00:34:27 +01:00
|
|
|
assert(inst->mlen == 6);
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
|
|
|
|
} else {
|
|
|
|
assert(inst->mlen == 9);
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
}
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXL:
|
2011-05-25 00:34:27 +01:00
|
|
|
if (inst->shadow_compare) {
|
2015-02-20 23:11:49 +00:00
|
|
|
assert(dispatch_width == 8);
|
2011-05-25 00:34:27 +01:00
|
|
|
assert(inst->mlen == 6);
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
|
|
|
|
} else {
|
|
|
|
assert(inst->mlen == 9);
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
}
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXD:
|
2011-06-10 22:48:46 +01:00
|
|
|
/* There is no sample_d_c message; comparisons are done manually */
|
2015-02-20 23:11:49 +00:00
|
|
|
assert(dispatch_width == 8);
|
2011-06-09 00:05:34 +01:00
|
|
|
assert(inst->mlen == 7 || inst->mlen == 10);
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXF:
|
2015-02-20 23:11:49 +00:00
|
|
|
assert(inst->mlen <= 9 && inst->mlen % 2 == 1);
|
2011-09-07 00:39:01 +01:00
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXS:
|
2011-08-17 18:45:47 +01:00
|
|
|
assert(inst->mlen == 3);
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
break;
|
2011-05-03 18:55:50 +01:00
|
|
|
default:
|
2014-06-29 22:54:01 +01:00
|
|
|
unreachable("not reached");
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(msg_type != -1);
|
|
|
|
|
|
|
|
if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
|
|
|
|
rlen = 8;
|
|
|
|
dst = vec16(dst);
|
|
|
|
}
|
|
|
|
|
2015-02-08 21:59:57 +00:00
|
|
|
if (is_combined_send) {
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 9 || devinfo->is_cherryview);
|
2015-02-08 21:59:57 +00:00
|
|
|
rlen = 0;
|
|
|
|
}
|
|
|
|
|
2015-03-24 17:17:32 +00:00
|
|
|
assert(devinfo->gen < 7 || inst->header_size == 0 ||
|
2014-08-18 22:27:55 +01:00
|
|
|
src.file == BRW_GENERAL_REGISTER_FILE);
|
i965/fs: Convert gen7 to using GRFs for texture messages.
Looking at Lightsmark's shaders, the way we used MRFs (or in gen7's
case, GRFs) was bad in a couple of ways. One was that it prevented
compute-to-MRF for the common case of a texcoord that gets used
exactly once, but where the texcoord setup all gets emitted before the
texture calls (such as when it's a bare fragment shader input, which
gets interpolated before processing main()). Another was that it
introduced a bunch of dependencies that constrained scheduling, and
forced waits for texture operations to be done before they are
required. For example, we can now move the compute-to-MRF
interpolation for the second texture send down after the first send.
The downside is that this generally prevents
remove_duplicate_mrf_writes() from doing anything, whereas previously
it avoided work for the case of sampling from the same texcoord twice.
However, I suspect that most of the win that originally justified that
code was in avoiding the WAR stall on the first send, which this patch
also avoids, rather than the small cost of the extra instruction. We
see instruction count regressions in shaders in unigine, yofrankie,
savage2, hon, and gstreamer.
Improves GLB2.7 performance by 0.633628% +/- 0.491809% (n=121/125, avg of
~66fps, outliers below 61 dropped).
Improves openarena performance by 1.01092% +/- 0.66897% (n=425).
No significant difference on Lightsmark (n=44).
v2: Squash in the fix for register unspilling for send-from-GRF, fixing a
segfault in lightsmark.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Acked-by: Matt Turner <mattst88@gmail.com>
2013-10-10 01:17:59 +01:00
|
|
|
|
2014-08-03 10:23:31 +01:00
|
|
|
assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
|
2012-08-05 04:33:13 +01:00
|
|
|
/* Load the message header if present. If there's a texture offset,
|
|
|
|
* we need to set it up explicitly and load the offset bitfield.
|
|
|
|
* Otherwise, we can use an implied move from g0 to the first message reg.
|
|
|
|
*/
|
2015-03-24 17:17:32 +00:00
|
|
|
if (inst->header_size != 0) {
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen < 6 && !inst->offset) {
|
2014-01-18 20:48:18 +00:00
|
|
|
/* Set up an implied move from g0 to the MRF. */
|
|
|
|
src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
|
i965/fs: Convert gen7 to using GRFs for texture messages.
Looking at Lightsmark's shaders, the way we used MRFs (or in gen7's
case, GRFs) was bad in a couple of ways. One was that it prevented
compute-to-MRF for the common case of a texcoord that gets used
exactly once, but where the texcoord setup all gets emitted before the
texture calls (such as when it's a bare fragment shader input, which
gets interpolated before processing main()). Another was that it
introduced a bunch of dependencies that constrained scheduling, and
forced waits for texture operations to be done before they are
required. For example, we can now move the compute-to-MRF
interpolation for the second texture send down after the first send.
The downside is that this generally prevents
remove_duplicate_mrf_writes() from doing anything, whereas previously
it avoided work for the case of sampling from the same texcoord twice.
However, I suspect that most of the win that originally justified that
code was in avoiding the WAR stall on the first send, which this patch
also avoids, rather than the small cost of the extra instruction. We
see instruction count regressions in shaders in unigine, yofrankie,
savage2, hon, and gstreamer.
Improves GLB2.7 performance by 0.633628% +/- 0.491809% (n=121/125, avg of
~66fps, outliers below 61 dropped).
Improves openarena performance by 1.01092% +/- 0.66897% (n=425).
No significant difference on Lightsmark (n=44).
v2: Squash in the fix for register unspilling for send-from-GRF, fixing a
segfault in lightsmark.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Acked-by: Matt Turner <mattst88@gmail.com>
2013-10-10 01:17:59 +01:00
|
|
|
} else {
|
2014-01-18 20:48:18 +00:00
|
|
|
struct brw_reg header_reg;
|
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen >= 7) {
|
2014-01-18 20:48:18 +00:00
|
|
|
header_reg = src;
|
|
|
|
} else {
|
|
|
|
assert(inst->base_mrf != -1);
|
|
|
|
header_reg = brw_message_reg(inst->base_mrf);
|
|
|
|
}
|
|
|
|
|
2013-10-13 00:20:03 +01:00
|
|
|
brw_push_insn_state(p);
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2014-01-18 20:48:18 +00:00
|
|
|
/* Explicitly set up the message header by copying g0 to the MRF. */
|
|
|
|
brw_MOV(p, header_reg, brw_vec8_grf(0, 0));
|
|
|
|
|
2014-11-12 19:28:02 +00:00
|
|
|
if (inst->offset) {
|
2014-01-18 20:48:18 +00:00
|
|
|
/* Set the offset bits in DWord 2. */
|
|
|
|
brw_MOV(p, get_element_ud(header_reg, 2),
|
2014-11-12 19:28:02 +00:00
|
|
|
brw_imm_ud(inst->offset));
|
2014-01-18 20:48:18 +00:00
|
|
|
}
|
2014-01-18 21:29:39 +00:00
|
|
|
|
2015-01-22 21:46:44 +00:00
|
|
|
brw_adjust_sampler_state_pointer(p, header_reg, sampler_index);
|
2013-10-13 00:20:03 +01:00
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
2012-08-05 04:33:13 +01:00
|
|
|
}
|
|
|
|
|
2014-08-10 00:58:06 +01:00
|
|
|
uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 ||
|
|
|
|
inst->opcode == SHADER_OPCODE_TG4_OFFSET)
|
2014-08-29 20:50:46 +01:00
|
|
|
? prog_data->binding_table.gather_texture_start
|
|
|
|
: prog_data->binding_table.texture_start;
|
2014-08-10 00:58:06 +01:00
|
|
|
|
|
|
|
if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
uint32_t sampler = sampler_index.dw1.ud;
|
|
|
|
|
|
|
|
brw_SAMPLE(p,
|
|
|
|
retype(dst, BRW_REGISTER_TYPE_UW),
|
|
|
|
inst->base_mrf,
|
|
|
|
src,
|
|
|
|
sampler + base_binding_table_index,
|
|
|
|
sampler % 16,
|
|
|
|
msg_type,
|
|
|
|
rlen,
|
|
|
|
inst->mlen,
|
2015-03-24 17:17:32 +00:00
|
|
|
inst->header_size != 0,
|
2014-08-10 00:58:06 +01:00
|
|
|
simd_mode,
|
|
|
|
return_format);
|
|
|
|
|
2014-08-29 20:50:46 +01:00
|
|
|
brw_mark_surface_used(prog_data, sampler + base_binding_table_index);
|
2014-08-10 00:58:06 +01:00
|
|
|
} else {
|
2014-08-10 01:02:22 +01:00
|
|
|
/* Non-const sampler index */
|
|
|
|
|
|
|
|
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
|
|
|
|
struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
|
|
|
|
2015-05-28 15:27:31 +01:00
|
|
|
/* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */
|
2015-06-03 01:46:38 +01:00
|
|
|
brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
|
2015-05-29 13:41:48 +01:00
|
|
|
if (base_binding_table_index)
|
|
|
|
brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
|
2015-05-28 15:27:31 +01:00
|
|
|
brw_AND(p, addr, addr, brw_imm_ud(0xfff));
|
2014-08-10 01:02:22 +01:00
|
|
|
|
2015-02-26 15:24:03 +00:00
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 13:44:24 +00:00
|
|
|
/* dst = send(offset, a0.0 | <descriptor>) */
|
|
|
|
brw_inst *insn = brw_send_indirect_message(
|
|
|
|
p, BRW_SFID_SAMPLER, dst, src, addr);
|
|
|
|
brw_set_sampler_message(p, insn,
|
2014-08-10 01:02:22 +01:00
|
|
|
0 /* surface */,
|
|
|
|
0 /* sampler */,
|
|
|
|
msg_type,
|
|
|
|
rlen,
|
|
|
|
inst->mlen /* mlen */,
|
2015-03-24 17:17:32 +00:00
|
|
|
inst->header_size != 0 /* header */,
|
2014-08-10 01:02:22 +01:00
|
|
|
simd_mode,
|
|
|
|
return_format);
|
|
|
|
|
|
|
|
/* visitor knows more than we do about the surface limit required,
|
|
|
|
* so has already done marking.
|
|
|
|
*/
|
2014-08-10 00:58:06 +01:00
|
|
|
}
|
2015-02-08 21:59:57 +00:00
|
|
|
|
|
|
|
if (is_combined_send) {
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_eot(p->devinfo, brw_last_inst, true);
|
|
|
|
brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
|
2015-02-08 21:59:57 +00:00
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
|
|
|
|
* looking like:
|
|
|
|
*
|
|
|
|
* arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
|
|
|
|
*
|
2013-09-12 06:00:52 +01:00
|
|
|
* Ideally, we want to produce:
|
2011-05-25 00:34:27 +01:00
|
|
|
*
|
|
|
|
* DDX DDY
|
|
|
|
* dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
|
|
|
|
* (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
|
|
|
|
* (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
|
|
|
|
* (ss0.br - ss0.bl) (ss0.tr - ss0.br)
|
|
|
|
* (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
|
|
|
|
* (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
|
|
|
|
* (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
|
|
|
|
* (ss1.br - ss1.bl) (ss1.tr - ss1.br)
|
|
|
|
*
|
|
|
|
* and add another set of two more subspans if in 16-pixel dispatch mode.
|
|
|
|
*
|
|
|
|
* For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
|
|
|
|
* for each pair, and vertstride = 2 jumps us 2 elements after processing a
|
2013-09-12 06:00:52 +01:00
|
|
|
* pair. But the ideal approximation may impose a huge performance cost on
|
|
|
|
* sample_d. On at least Haswell, sample_d instruction does some
|
|
|
|
* optimizations if the same LOD is used for all pixels in the subspan.
|
|
|
|
*
|
i965/fs: Improve accuracy of dFdy() to match dFdx().
Previously, we computed dFdy() using the following instruction:
add(8) dst<1>F src<4,4,0)F -src.2<4,4,0>F { align1 1Q }
That had the disadvantage that it computed the same value for all 4
pixels of a 2x2 subspan, which meant that it was less accurate than
dFdx(). This patch changes it to the following instruction when
c->key.high_quality_derivatives is set:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
This gives it comparable accuracy to dFdx().
Unfortunately, align16 instructions can't be compressed, so in SIMD16
shaders, instead of emitting this instruction:
add(16) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1H }
We need to unroll to two instructions:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
add(8) (dst+1)<1>F (src+1)<4,4,1>.xyxyF -(src+1)<4,4,1>.zwzwF { align16 2Q }
Fixes piglit test spec/glsl-1.10/execution/fs-dfdy-accuracy.
Acked-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-09-20 17:04:31 +01:00
|
|
|
* For DDY, we need to use ALIGN16 mode since it's capable of doing the
|
|
|
|
* appropriate swizzling.
|
2011-05-25 00:34:27 +01:00
|
|
|
*/
|
|
|
|
void
|
2014-11-08 09:39:14 +00:00
|
|
|
fs_generator::generate_ddx(enum opcode opcode,
|
|
|
|
struct brw_reg dst, struct brw_reg src)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2013-09-12 06:00:52 +01:00
|
|
|
unsigned vstride, width;
|
|
|
|
|
2014-11-08 09:39:14 +00:00
|
|
|
if (opcode == FS_OPCODE_DDX_FINE) {
|
2013-09-12 06:00:52 +01:00
|
|
|
/* produce accurate derivatives */
|
|
|
|
vstride = BRW_VERTICAL_STRIDE_2;
|
|
|
|
width = BRW_WIDTH_2;
|
2014-11-08 09:39:14 +00:00
|
|
|
} else {
|
2013-09-12 06:00:52 +01:00
|
|
|
/* replicate the derivative at the top-left pixel to other pixels */
|
|
|
|
vstride = BRW_VERTICAL_STRIDE_4;
|
|
|
|
width = BRW_WIDTH_4;
|
|
|
|
}
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
|
2014-12-12 16:19:07 +00:00
|
|
|
src.negate, src.abs,
|
2011-05-25 00:34:27 +01:00
|
|
|
BRW_REGISTER_TYPE_F,
|
2013-09-12 06:00:52 +01:00
|
|
|
vstride,
|
|
|
|
width,
|
2011-05-25 00:34:27 +01:00
|
|
|
BRW_HORIZONTAL_STRIDE_0,
|
|
|
|
BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
|
|
|
|
struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
|
2014-12-12 16:19:07 +00:00
|
|
|
src.negate, src.abs,
|
2011-05-25 00:34:27 +01:00
|
|
|
BRW_REGISTER_TYPE_F,
|
2013-09-12 06:00:52 +01:00
|
|
|
vstride,
|
|
|
|
width,
|
2011-05-25 00:34:27 +01:00
|
|
|
BRW_HORIZONTAL_STRIDE_0,
|
|
|
|
BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
|
|
|
|
brw_ADD(p, dst, src0, negate(src1));
|
|
|
|
}
|
|
|
|
|
2012-06-20 21:40:45 +01:00
|
|
|
/* The negate_value boolean is used to negate the derivative computation for
|
|
|
|
* FBOs, since they place the origin at the upper left instead of the lower
|
|
|
|
* left.
|
|
|
|
*/
|
2011-05-25 00:34:27 +01:00
|
|
|
void
|
2014-11-08 09:39:14 +00:00
|
|
|
fs_generator::generate_ddy(enum opcode opcode,
|
|
|
|
struct brw_reg dst, struct brw_reg src,
|
|
|
|
bool negate_value)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2014-11-08 09:39:14 +00:00
|
|
|
if (opcode == FS_OPCODE_DDY_FINE) {
|
i965/fs: Only unroll high-accuracy dFdy() from SIMD16 to SIMD8 on gen4 and IVB.
In commit 800610f (i965/fs: Improve accuracy of dFdy() to match
dFdx()) I unrolled the high-accuracy dFdy() computation from a single
SIMD16 instruction to two SIMD8 instructions because of text I found
in the i965 (gen4) PRM saying that instruction compression could not
be used in align16 mode. I couldn't find similar text in later
hardware docs, and I observed problems trying to use instruction
compression on align16 mode on Ivy Bridge, so I assumed that the
restriction still applied and the associated documentation had simply
been lost.
After consultation with the hardware engineers, it turns out this is
not the case. In point of fact, the restriction was dropped in gen5,
re-introduced in Ivy Bridge, and dropped again in Haswell. The reason
I didn't notice this is that in the Ivy Bridge documentation, the
restriction was in a different section, and described using different
language.
Now that we know that the restriction only applies to Gen4 and Ivy
Bridge, we can limit the unrolling to those platforms.
Tested on gen5, gen6, and gen7 (both Ivy Bridge and Haswell).
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-10-22 13:56:37 +01:00
|
|
|
/* From the Ivy Bridge PRM, volume 4 part 3, section 3.3.9 (Register
|
|
|
|
* Region Restrictions):
|
|
|
|
*
|
|
|
|
* In Align16 access mode, SIMD16 is not allowed for DW operations
|
|
|
|
* and SIMD8 is not allowed for DF operations.
|
|
|
|
*
|
|
|
|
* In this context, "DW operations" means "operations acting on 32-bit
|
|
|
|
* values", so it includes operations on floats.
|
|
|
|
*
|
|
|
|
* Gen4 has a similar restriction. From the i965 PRM, section 11.5.3
|
|
|
|
* (Instruction Compression -> Rules and Restrictions):
|
|
|
|
*
|
|
|
|
* A compressed instruction must be in Align1 access mode. Align16
|
|
|
|
* mode instructions cannot be compressed.
|
|
|
|
*
|
|
|
|
* Similar text exists in the g45 PRM.
|
|
|
|
*
|
|
|
|
* On these platforms, if we're building a SIMD16 shader, we need to
|
|
|
|
* manually unroll to a pair of SIMD8 instructions.
|
|
|
|
*/
|
|
|
|
bool unroll_to_simd8 =
|
|
|
|
(dispatch_width == 16 &&
|
2015-04-15 01:45:40 +01:00
|
|
|
(devinfo->gen == 4 || (devinfo->gen == 7 && !devinfo->is_haswell)));
|
i965/fs: Only unroll high-accuracy dFdy() from SIMD16 to SIMD8 on gen4 and IVB.
In commit 800610f (i965/fs: Improve accuracy of dFdy() to match
dFdx()) I unrolled the high-accuracy dFdy() computation from a single
SIMD16 instruction to two SIMD8 instructions because of text I found
in the i965 (gen4) PRM saying that instruction compression could not
be used in align16 mode. I couldn't find similar text in later
hardware docs, and I observed problems trying to use instruction
compression on align16 mode on Ivy Bridge, so I assumed that the
restriction still applied and the associated documentation had simply
been lost.
After consultation with the hardware engineers, it turns out this is
not the case. In point of fact, the restriction was dropped in gen5,
re-introduced in Ivy Bridge, and dropped again in Haswell. The reason
I didn't notice this is that in the Ivy Bridge documentation, the
restriction was in a different section, and described using different
language.
Now that we know that the restriction only applies to Gen4 and Ivy
Bridge, we can limit the unrolling to those platforms.
Tested on gen5, gen6, and gen7 (both Ivy Bridge and Haswell).
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-10-22 13:56:37 +01:00
|
|
|
|
i965/fs: Improve accuracy of dFdy() to match dFdx().
Previously, we computed dFdy() using the following instruction:
add(8) dst<1>F src<4,4,0)F -src.2<4,4,0>F { align1 1Q }
That had the disadvantage that it computed the same value for all 4
pixels of a 2x2 subspan, which meant that it was less accurate than
dFdx(). This patch changes it to the following instruction when
c->key.high_quality_derivatives is set:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
This gives it comparable accuracy to dFdx().
Unfortunately, align16 instructions can't be compressed, so in SIMD16
shaders, instead of emitting this instruction:
add(16) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1H }
We need to unroll to two instructions:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
add(8) (dst+1)<1>F (src+1)<4,4,1>.xyxyF -(src+1)<4,4,1>.zwzwF { align16 2Q }
Fixes piglit test spec/glsl-1.10/execution/fs-dfdy-accuracy.
Acked-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-09-20 17:04:31 +01:00
|
|
|
/* produce accurate derivatives */
|
|
|
|
struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
|
2014-12-12 16:19:07 +00:00
|
|
|
src.negate, src.abs,
|
i965/fs: Improve accuracy of dFdy() to match dFdx().
Previously, we computed dFdy() using the following instruction:
add(8) dst<1>F src<4,4,0)F -src.2<4,4,0>F { align1 1Q }
That had the disadvantage that it computed the same value for all 4
pixels of a 2x2 subspan, which meant that it was less accurate than
dFdx(). This patch changes it to the following instruction when
c->key.high_quality_derivatives is set:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
This gives it comparable accuracy to dFdx().
Unfortunately, align16 instructions can't be compressed, so in SIMD16
shaders, instead of emitting this instruction:
add(16) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1H }
We need to unroll to two instructions:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
add(8) (dst+1)<1>F (src+1)<4,4,1>.xyxyF -(src+1)<4,4,1>.zwzwF { align16 2Q }
Fixes piglit test spec/glsl-1.10/execution/fs-dfdy-accuracy.
Acked-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-09-20 17:04:31 +01:00
|
|
|
BRW_REGISTER_TYPE_F,
|
|
|
|
BRW_VERTICAL_STRIDE_4,
|
|
|
|
BRW_WIDTH_4,
|
|
|
|
BRW_HORIZONTAL_STRIDE_1,
|
|
|
|
BRW_SWIZZLE_XYXY, WRITEMASK_XYZW);
|
|
|
|
struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
|
2014-12-12 16:19:07 +00:00
|
|
|
src.negate, src.abs,
|
i965/fs: Improve accuracy of dFdy() to match dFdx().
Previously, we computed dFdy() using the following instruction:
add(8) dst<1>F src<4,4,0)F -src.2<4,4,0>F { align1 1Q }
That had the disadvantage that it computed the same value for all 4
pixels of a 2x2 subspan, which meant that it was less accurate than
dFdx(). This patch changes it to the following instruction when
c->key.high_quality_derivatives is set:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
This gives it comparable accuracy to dFdx().
Unfortunately, align16 instructions can't be compressed, so in SIMD16
shaders, instead of emitting this instruction:
add(16) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1H }
We need to unroll to two instructions:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
add(8) (dst+1)<1>F (src+1)<4,4,1>.xyxyF -(src+1)<4,4,1>.zwzwF { align16 2Q }
Fixes piglit test spec/glsl-1.10/execution/fs-dfdy-accuracy.
Acked-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-09-20 17:04:31 +01:00
|
|
|
BRW_REGISTER_TYPE_F,
|
|
|
|
BRW_VERTICAL_STRIDE_4,
|
|
|
|
BRW_WIDTH_4,
|
|
|
|
BRW_HORIZONTAL_STRIDE_1,
|
|
|
|
BRW_SWIZZLE_ZWZW, WRITEMASK_XYZW);
|
|
|
|
brw_push_insn_state(p);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
i965/fs: Only unroll high-accuracy dFdy() from SIMD16 to SIMD8 on gen4 and IVB.
In commit 800610f (i965/fs: Improve accuracy of dFdy() to match
dFdx()) I unrolled the high-accuracy dFdy() computation from a single
SIMD16 instruction to two SIMD8 instructions because of text I found
in the i965 (gen4) PRM saying that instruction compression could not
be used in align16 mode. I couldn't find similar text in later
hardware docs, and I observed problems trying to use instruction
compression on align16 mode on Ivy Bridge, so I assumed that the
restriction still applied and the associated documentation had simply
been lost.
After consultation with the hardware engineers, it turns out this is
not the case. In point of fact, the restriction was dropped in gen5,
re-introduced in Ivy Bridge, and dropped again in Haswell. The reason
I didn't notice this is that in the Ivy Bridge documentation, the
restriction was in a different section, and described using different
language.
Now that we know that the restriction only applies to Gen4 and Ivy
Bridge, we can limit the unrolling to those platforms.
Tested on gen5, gen6, and gen7 (both Ivy Bridge and Haswell).
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-10-22 13:56:37 +01:00
|
|
|
if (unroll_to_simd8) {
|
2015-05-12 12:24:08 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-08-13 20:23:47 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
if (negate_value) {
|
|
|
|
brw_ADD(p, firsthalf(dst), firsthalf(src1), negate(firsthalf(src0)));
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
|
|
|
brw_ADD(p, sechalf(dst), sechalf(src1), negate(sechalf(src0)));
|
|
|
|
} else {
|
|
|
|
brw_ADD(p, firsthalf(dst), firsthalf(src0), negate(firsthalf(src1)));
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
|
|
|
brw_ADD(p, sechalf(dst), sechalf(src0), negate(sechalf(src1)));
|
|
|
|
}
|
|
|
|
} else {
|
i965/fs: Improve accuracy of dFdy() to match dFdx().
Previously, we computed dFdy() using the following instruction:
add(8) dst<1>F src<4,4,0)F -src.2<4,4,0>F { align1 1Q }
That had the disadvantage that it computed the same value for all 4
pixels of a 2x2 subspan, which meant that it was less accurate than
dFdx(). This patch changes it to the following instruction when
c->key.high_quality_derivatives is set:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
This gives it comparable accuracy to dFdx().
Unfortunately, align16 instructions can't be compressed, so in SIMD16
shaders, instead of emitting this instruction:
add(16) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1H }
We need to unroll to two instructions:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
add(8) (dst+1)<1>F (src+1)<4,4,1>.xyxyF -(src+1)<4,4,1>.zwzwF { align16 2Q }
Fixes piglit test spec/glsl-1.10/execution/fs-dfdy-accuracy.
Acked-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-09-20 17:04:31 +01:00
|
|
|
if (negate_value)
|
|
|
|
brw_ADD(p, dst, src1, negate(src0));
|
|
|
|
else
|
|
|
|
brw_ADD(p, dst, src0, negate(src1));
|
|
|
|
}
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
} else {
|
|
|
|
/* replicate the derivative at the top-left pixel to other pixels */
|
|
|
|
struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
|
2014-12-12 16:19:07 +00:00
|
|
|
src.negate, src.abs,
|
i965/fs: Improve accuracy of dFdy() to match dFdx().
Previously, we computed dFdy() using the following instruction:
add(8) dst<1>F src<4,4,0)F -src.2<4,4,0>F { align1 1Q }
That had the disadvantage that it computed the same value for all 4
pixels of a 2x2 subspan, which meant that it was less accurate than
dFdx(). This patch changes it to the following instruction when
c->key.high_quality_derivatives is set:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
This gives it comparable accuracy to dFdx().
Unfortunately, align16 instructions can't be compressed, so in SIMD16
shaders, instead of emitting this instruction:
add(16) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1H }
We need to unroll to two instructions:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
add(8) (dst+1)<1>F (src+1)<4,4,1>.xyxyF -(src+1)<4,4,1>.zwzwF { align16 2Q }
Fixes piglit test spec/glsl-1.10/execution/fs-dfdy-accuracy.
Acked-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-09-20 17:04:31 +01:00
|
|
|
BRW_REGISTER_TYPE_F,
|
|
|
|
BRW_VERTICAL_STRIDE_4,
|
|
|
|
BRW_WIDTH_4,
|
|
|
|
BRW_HORIZONTAL_STRIDE_0,
|
|
|
|
BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
|
|
|
|
struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
|
2014-12-12 16:19:07 +00:00
|
|
|
src.negate, src.abs,
|
i965/fs: Improve accuracy of dFdy() to match dFdx().
Previously, we computed dFdy() using the following instruction:
add(8) dst<1>F src<4,4,0)F -src.2<4,4,0>F { align1 1Q }
That had the disadvantage that it computed the same value for all 4
pixels of a 2x2 subspan, which meant that it was less accurate than
dFdx(). This patch changes it to the following instruction when
c->key.high_quality_derivatives is set:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
This gives it comparable accuracy to dFdx().
Unfortunately, align16 instructions can't be compressed, so in SIMD16
shaders, instead of emitting this instruction:
add(16) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1H }
We need to unroll to two instructions:
add(8) dst<1>F src<4,4,1>.xyxyF -src<4,4,1>.zwzwF { align16 1Q }
add(8) (dst+1)<1>F (src+1)<4,4,1>.xyxyF -(src+1)<4,4,1>.zwzwF { align16 2Q }
Fixes piglit test spec/glsl-1.10/execution/fs-dfdy-accuracy.
Acked-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Eric Anholt <eric@anholt.net>
2013-09-20 17:04:31 +01:00
|
|
|
BRW_REGISTER_TYPE_F,
|
|
|
|
BRW_VERTICAL_STRIDE_4,
|
|
|
|
BRW_WIDTH_4,
|
|
|
|
BRW_HORIZONTAL_STRIDE_0,
|
|
|
|
BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
|
|
|
|
if (negate_value)
|
|
|
|
brw_ADD(p, dst, src1, negate(src0));
|
|
|
|
else
|
|
|
|
brw_ADD(p, dst, src0, negate(src1));
|
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
2012-12-06 18:15:08 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_discard_jump(fs_inst *inst)
|
|
|
|
{
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 6);
|
2012-12-06 18:15:08 +00:00
|
|
|
|
|
|
|
/* This HALT will be patched up at FB write time to point UIP at the end of
|
|
|
|
* the program, and at brw_uip_jip() JIP will be set to the end of the
|
|
|
|
* current block (or the program).
|
|
|
|
*/
|
|
|
|
this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2012-12-06 18:15:08 +00:00
|
|
|
gen6_HALT(p);
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
void
|
2013-10-16 19:45:06 +01:00
|
|
|
fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
|
|
|
assert(inst->mlen != 0);
|
|
|
|
|
|
|
|
brw_MOV(p,
|
2014-10-24 19:35:51 +01:00
|
|
|
brw_uvec_mrf(inst->exec_size, (inst->base_mrf + 1), 0),
|
2011-05-25 00:34:27 +01:00
|
|
|
retype(src, BRW_REGISTER_TYPE_UD));
|
2013-10-16 20:16:51 +01:00
|
|
|
brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf),
|
2014-10-24 19:35:51 +01:00
|
|
|
inst->exec_size / 8, inst->offset);
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2013-10-16 19:45:06 +01:00
|
|
|
fs_generator::generate_scratch_read(fs_inst *inst, struct brw_reg dst)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
|
|
|
assert(inst->mlen != 0);
|
|
|
|
|
2013-10-16 20:16:51 +01:00
|
|
|
brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf),
|
2014-10-24 19:35:51 +01:00
|
|
|
inst->exec_size / 8, inst->offset);
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
2013-10-16 19:51:22 +01:00
|
|
|
void
|
|
|
|
fs_generator::generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst)
|
|
|
|
{
|
2014-10-24 19:35:51 +01:00
|
|
|
gen7_block_read_scratch(p, dst, inst->exec_size / 8, inst->offset);
|
2013-10-16 19:51:22 +01:00
|
|
|
}
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
void
|
2012-11-07 18:42:34 +00:00
|
|
|
fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg index,
|
|
|
|
struct brw_reg offset)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
|
|
|
assert(inst->mlen != 0);
|
|
|
|
|
2012-06-20 23:41:14 +01:00
|
|
|
assert(index.file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
index.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
uint32_t surf_index = index.dw1.ud;
|
|
|
|
|
|
|
|
assert(offset.file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
offset.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
uint32_t read_offset = offset.dw1.ud;
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
|
2012-06-20 23:41:14 +01:00
|
|
|
read_offset, surf_index);
|
2013-08-15 03:49:33 +01:00
|
|
|
|
2014-08-29 20:50:46 +01:00
|
|
|
brw_mark_surface_used(prog_data, surf_index);
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
2012-12-05 08:06:30 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg index,
|
|
|
|
struct brw_reg offset)
|
|
|
|
{
|
2014-08-02 03:27:21 +01:00
|
|
|
assert(index.type == BRW_REGISTER_TYPE_UD);
|
2012-12-05 08:06:30 +00:00
|
|
|
|
|
|
|
assert(offset.file == BRW_GENERAL_REGISTER_FILE);
|
2013-03-06 22:47:22 +00:00
|
|
|
/* Reference just the dword we need, to avoid angering validate_reg(). */
|
|
|
|
offset = brw_vec1_grf(offset.nr, 0);
|
2012-12-05 08:06:30 +00:00
|
|
|
|
2013-03-06 22:47:22 +00:00
|
|
|
/* We use the SIMD4x2 mode because we want to end up with 4 components in
|
|
|
|
* the destination loaded consecutively from the same offset (which appears
|
|
|
|
* in the first component, and the rest are ignored).
|
|
|
|
*/
|
|
|
|
dst.width = BRW_WIDTH_4;
|
2013-08-15 03:49:33 +01:00
|
|
|
|
2014-12-10 22:59:26 +00:00
|
|
|
struct brw_reg src = offset;
|
|
|
|
bool header_present = false;
|
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen >= 9) {
|
2014-12-10 22:59:26 +00:00
|
|
|
/* Skylake requires a message header in order to use SIMD4x2 mode. */
|
2015-06-19 20:58:37 +01:00
|
|
|
src = retype(brw_vec4_grf(offset.nr, 0), BRW_REGISTER_TYPE_UD);
|
2014-12-10 22:59:26 +00:00
|
|
|
header_present = true;
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2015-04-23 18:09:52 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2015-04-10 17:20:21 +01:00
|
|
|
brw_MOV(p, vec8(src), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
2014-12-10 22:59:26 +00:00
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
|
|
|
|
|
|
|
brw_MOV(p, get_element_ud(src, 2),
|
|
|
|
brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2));
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
|
|
|
|
2014-08-02 03:27:21 +01:00
|
|
|
if (index.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
|
|
|
|
uint32_t surf_index = index.dw1.ud;
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
|
|
|
brw_set_dest(p, send, dst);
|
2014-12-10 22:59:26 +00:00
|
|
|
brw_set_src0(p, send, src);
|
2014-08-02 03:27:21 +01:00
|
|
|
brw_set_sampler_message(p, send,
|
|
|
|
surf_index,
|
|
|
|
0, /* LD message ignores sampler unit */
|
|
|
|
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
|
|
|
|
1, /* rlen */
|
2015-06-19 20:58:37 +01:00
|
|
|
inst->mlen,
|
2014-12-10 22:59:26 +00:00
|
|
|
header_present,
|
2014-08-02 03:27:21 +01:00
|
|
|
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
|
|
|
|
0);
|
|
|
|
|
2014-08-29 20:50:46 +01:00
|
|
|
brw_mark_surface_used(prog_data, surf_index);
|
2014-08-02 03:27:21 +01:00
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
|
|
|
|
|
|
|
/* a0.0 = surf_index & 0xff */
|
|
|
|
brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_exec_size(p->devinfo, insn_and, BRW_EXECUTE_1);
|
2014-08-02 03:27:21 +01:00
|
|
|
brw_set_dest(p, insn_and, addr);
|
|
|
|
brw_set_src0(p, insn_and, vec1(retype(index, BRW_REGISTER_TYPE_UD)));
|
|
|
|
brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
|
|
|
|
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 13:44:24 +00:00
|
|
|
/* dst = send(payload, a0.0 | <descriptor>) */
|
|
|
|
brw_inst *insn = brw_send_indirect_message(
|
|
|
|
p, BRW_SFID_SAMPLER, dst, src, addr);
|
|
|
|
brw_set_sampler_message(p, insn,
|
|
|
|
0,
|
|
|
|
0, /* LD message ignores sampler unit */
|
2014-08-02 03:27:21 +01:00
|
|
|
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 13:44:24 +00:00
|
|
|
1, /* rlen */
|
2015-06-19 20:58:37 +01:00
|
|
|
inst->mlen,
|
2014-12-10 22:59:26 +00:00
|
|
|
header_present,
|
2014-08-02 03:27:21 +01:00
|
|
|
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
|
|
|
|
0);
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
|
|
|
/* visitor knows more than we do about the surface limit required,
|
|
|
|
* so has already done marking.
|
|
|
|
*/
|
|
|
|
|
|
|
|
}
|
2012-12-05 08:06:30 +00:00
|
|
|
}
|
|
|
|
|
2012-11-07 19:18:34 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
2013-03-18 17:16:42 +00:00
|
|
|
struct brw_reg index,
|
|
|
|
struct brw_reg offset)
|
2012-11-07 19:18:34 +00:00
|
|
|
{
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen < 7); /* Should use the gen7 variant. */
|
2015-03-24 17:17:32 +00:00
|
|
|
assert(inst->header_size != 0);
|
2013-03-18 17:16:42 +00:00
|
|
|
assert(inst->mlen);
|
2012-11-07 19:18:34 +00:00
|
|
|
|
|
|
|
assert(index.file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
index.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
uint32_t surf_index = index.dw1.ud;
|
|
|
|
|
2013-03-18 17:16:42 +00:00
|
|
|
uint32_t simd_mode, rlen, msg_type;
|
2012-11-07 19:18:34 +00:00
|
|
|
if (dispatch_width == 16) {
|
2013-03-18 17:16:42 +00:00
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
rlen = 8;
|
2012-11-07 19:18:34 +00:00
|
|
|
} else {
|
2013-03-18 17:16:42 +00:00
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
|
|
|
|
rlen = 4;
|
2012-11-07 19:18:34 +00:00
|
|
|
}
|
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen >= 5)
|
2013-03-18 17:16:42 +00:00
|
|
|
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
|
|
|
else {
|
|
|
|
/* We always use the SIMD16 message so that we only have to load U, and
|
|
|
|
* not V or R.
|
|
|
|
*/
|
|
|
|
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
|
|
|
|
assert(inst->mlen == 3);
|
|
|
|
assert(inst->regs_written == 8);
|
|
|
|
rlen = 8;
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct brw_reg offset_mrf = retype(brw_message_reg(inst->base_mrf + 1),
|
|
|
|
BRW_REGISTER_TYPE_D);
|
|
|
|
brw_MOV(p, offset_mrf, offset);
|
|
|
|
|
2012-11-07 19:18:34 +00:00
|
|
|
struct brw_reg header = brw_vec8_grf(0, 0);
|
|
|
|
gen6_resolve_implied_move(p, &header, inst->base_mrf);
|
|
|
|
|
2014-06-13 22:29:25 +01:00
|
|
|
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_qtr_control(p->devinfo, send, BRW_COMPRESSION_NONE);
|
2014-04-17 04:15:23 +01:00
|
|
|
brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW));
|
2012-11-07 19:18:34 +00:00
|
|
|
brw_set_src0(p, send, header);
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen < 6)
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_base_mrf(p->devinfo, send, inst->base_mrf);
|
2013-03-18 17:16:42 +00:00
|
|
|
|
|
|
|
/* Our surface is set up as floats, regardless of what actual data is
|
|
|
|
* stored in it.
|
|
|
|
*/
|
|
|
|
uint32_t return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
|
|
|
|
brw_set_sampler_message(p, send,
|
2012-11-07 19:18:34 +00:00
|
|
|
surf_index,
|
2013-03-18 17:16:42 +00:00
|
|
|
0, /* sampler (unused) */
|
2012-11-07 19:18:34 +00:00
|
|
|
msg_type,
|
2013-03-18 17:16:42 +00:00
|
|
|
rlen,
|
2012-11-07 19:18:34 +00:00
|
|
|
inst->mlen,
|
2015-03-24 17:17:32 +00:00
|
|
|
inst->header_size != 0,
|
2013-03-18 17:16:42 +00:00
|
|
|
simd_mode,
|
|
|
|
return_format);
|
2013-08-15 03:49:33 +01:00
|
|
|
|
2014-08-29 20:50:46 +01:00
|
|
|
brw_mark_surface_used(prog_data, surf_index);
|
2012-11-07 19:18:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg index,
|
|
|
|
struct brw_reg offset)
|
|
|
|
{
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2012-11-07 19:18:34 +00:00
|
|
|
/* Varying-offset pull constant loads are treated as a normal expression on
|
|
|
|
* gen7, so the fact that it's a send message is hidden at the IR level.
|
|
|
|
*/
|
2015-03-24 17:17:32 +00:00
|
|
|
assert(inst->header_size == 0);
|
2012-11-07 19:18:34 +00:00
|
|
|
assert(!inst->mlen);
|
2014-08-02 03:27:21 +01:00
|
|
|
assert(index.type == BRW_REGISTER_TYPE_UD);
|
2012-11-07 19:18:34 +00:00
|
|
|
|
2013-03-13 21:48:55 +00:00
|
|
|
uint32_t simd_mode, rlen, mlen;
|
2012-11-07 19:18:34 +00:00
|
|
|
if (dispatch_width == 16) {
|
2013-03-13 21:48:55 +00:00
|
|
|
mlen = 2;
|
|
|
|
rlen = 8;
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
2012-11-07 19:18:34 +00:00
|
|
|
} else {
|
2013-03-13 21:48:55 +00:00
|
|
|
mlen = 1;
|
|
|
|
rlen = 4;
|
|
|
|
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
|
2012-11-07 19:18:34 +00:00
|
|
|
}
|
|
|
|
|
2014-08-02 03:27:21 +01:00
|
|
|
if (index.file == BRW_IMMEDIATE_VALUE) {
|
2013-08-15 03:49:33 +01:00
|
|
|
|
2014-08-02 03:27:21 +01:00
|
|
|
uint32_t surf_index = index.dw1.ud;
|
|
|
|
|
|
|
|
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
|
2014-09-08 23:26:24 +01:00
|
|
|
brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW));
|
2014-08-02 03:27:21 +01:00
|
|
|
brw_set_src0(p, send, offset);
|
|
|
|
brw_set_sampler_message(p, send,
|
|
|
|
surf_index,
|
|
|
|
0, /* LD message ignores sampler unit */
|
|
|
|
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
|
|
|
|
rlen,
|
|
|
|
mlen,
|
|
|
|
false, /* no header */
|
|
|
|
simd_mode,
|
|
|
|
0);
|
|
|
|
|
2014-08-29 20:50:46 +01:00
|
|
|
brw_mark_surface_used(prog_data, surf_index);
|
2014-08-02 03:27:21 +01:00
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
|
|
|
|
|
|
|
/* a0.0 = surf_index & 0xff */
|
|
|
|
brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_exec_size(p->devinfo, insn_and, BRW_EXECUTE_1);
|
2014-08-02 03:27:21 +01:00
|
|
|
brw_set_dest(p, insn_and, addr);
|
|
|
|
brw_set_src0(p, insn_and, vec1(retype(index, BRW_REGISTER_TYPE_UD)));
|
|
|
|
brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
|
|
|
|
|
2015-02-26 15:24:03 +00:00
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 13:44:24 +00:00
|
|
|
/* dst = send(offset, a0.0 | <descriptor>) */
|
|
|
|
brw_inst *insn = brw_send_indirect_message(
|
|
|
|
p, BRW_SFID_SAMPLER, retype(dst, BRW_REGISTER_TYPE_UW),
|
|
|
|
offset, addr);
|
|
|
|
brw_set_sampler_message(p, insn,
|
2014-08-02 03:27:21 +01:00
|
|
|
0 /* surface */,
|
|
|
|
0 /* sampler */,
|
|
|
|
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
|
|
|
|
rlen /* rlen */,
|
|
|
|
mlen /* mlen */,
|
|
|
|
false /* header */,
|
|
|
|
simd_mode,
|
|
|
|
0);
|
|
|
|
|
|
|
|
/* visitor knows more than we do about the surface limit required,
|
|
|
|
* so has already done marking.
|
|
|
|
*/
|
|
|
|
}
|
2012-11-07 19:18:34 +00:00
|
|
|
}
|
2012-06-18 22:50:04 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Cause the current pixel/sample mask (from R1.7 bits 15:0) to be transferred
|
|
|
|
* into the flags register (f0.0).
|
|
|
|
*
|
|
|
|
* Used only on Gen6 and above.
|
|
|
|
*/
|
|
|
|
void
|
2012-12-06 18:36:11 +00:00
|
|
|
fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst)
|
2012-06-18 22:50:04 +01:00
|
|
|
{
|
2012-12-06 18:36:11 +00:00
|
|
|
struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg);
|
2012-12-06 20:15:13 +00:00
|
|
|
struct brw_reg dispatch_mask;
|
|
|
|
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen >= 6)
|
2012-12-06 20:15:13 +00:00
|
|
|
dispatch_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
|
|
|
|
else
|
|
|
|
dispatch_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
|
2012-06-18 22:50:04 +01:00
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2012-12-06 20:15:13 +00:00
|
|
|
brw_MOV(p, flags, dispatch_mask);
|
2012-06-18 22:50:04 +01:00
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
|
|
|
|
2013-11-18 08:13:13 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src,
|
|
|
|
struct brw_reg msg_data,
|
|
|
|
unsigned msg_type)
|
|
|
|
{
|
|
|
|
assert(msg_data.file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
msg_data.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
brw_pixel_interpolator_query(p,
|
|
|
|
retype(dst, BRW_REGISTER_TYPE_UW),
|
|
|
|
src,
|
|
|
|
inst->pi_noperspective,
|
|
|
|
msg_type,
|
|
|
|
msg_data.dw1.ud,
|
|
|
|
inst->mlen,
|
|
|
|
inst->regs_written);
|
|
|
|
}
|
|
|
|
|
2012-06-18 22:50:04 +01:00
|
|
|
|
2012-12-05 08:06:30 +00:00
|
|
|
/**
|
2013-03-06 22:47:22 +00:00
|
|
|
* Sets the first word of a vgrf for gen7+ simd4x2 uniform pull constant
|
|
|
|
* sampler LD messages.
|
2012-12-05 08:06:30 +00:00
|
|
|
*
|
2013-03-06 22:47:22 +00:00
|
|
|
* We don't want to bake it into the send message's code generation because
|
|
|
|
* that means we don't get a chance to schedule the instructions.
|
2012-12-05 08:06:30 +00:00
|
|
|
*/
|
|
|
|
void
|
2013-03-06 22:47:22 +00:00
|
|
|
fs_generator::generate_set_simd4x2_offset(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg value)
|
2012-12-05 08:06:30 +00:00
|
|
|
{
|
|
|
|
assert(value.file == BRW_IMMEDIATE_VALUE);
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2013-03-06 22:47:22 +00:00
|
|
|
brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value);
|
2012-12-05 08:06:30 +00:00
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
|
|
|
|
2013-10-25 00:21:13 +01:00
|
|
|
/* Sets vstride=16, width=8, hstride=2 or vstride=0, width=1, hstride=0
|
|
|
|
* (when mask is passed as a uniform) of register mask before moving it
|
|
|
|
* to register dst.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
fs_generator::generate_set_omask(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg mask)
|
|
|
|
{
|
|
|
|
bool stride_8_8_1 =
|
|
|
|
(mask.vstride == BRW_VERTICAL_STRIDE_8 &&
|
|
|
|
mask.width == BRW_WIDTH_8 &&
|
|
|
|
mask.hstride == BRW_HORIZONTAL_STRIDE_1);
|
|
|
|
|
2014-12-23 03:29:22 +00:00
|
|
|
bool stride_0_1_0 = has_scalar_region(mask);
|
2013-10-25 00:21:13 +01:00
|
|
|
|
|
|
|
assert(stride_8_8_1 || stride_0_1_0);
|
|
|
|
assert(dst.type == BRW_REGISTER_TYPE_UW);
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2013-10-25 00:21:13 +01:00
|
|
|
|
|
|
|
if (stride_8_8_1) {
|
2014-02-10 23:37:09 +00:00
|
|
|
brw_MOV(p, dst, retype(stride(mask, 16, 8, 2), dst.type));
|
2013-10-25 00:21:13 +01:00
|
|
|
} else if (stride_0_1_0) {
|
2014-02-10 23:37:09 +00:00
|
|
|
brw_MOV(p, dst, retype(mask, dst.type));
|
2013-10-25 00:21:13 +01:00
|
|
|
}
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
|
|
|
|
2013-10-25 00:17:08 +01:00
|
|
|
/* Sets vstride=1, width=4, hstride=0 of register src1 during
|
|
|
|
* the ADD instruction.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
fs_generator::generate_set_sample_id(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src0,
|
|
|
|
struct brw_reg src1)
|
|
|
|
{
|
|
|
|
assert(dst.type == BRW_REGISTER_TYPE_D ||
|
|
|
|
dst.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
assert(src0.type == BRW_REGISTER_TYPE_D ||
|
|
|
|
src0.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2014-02-10 22:46:49 +00:00
|
|
|
struct brw_reg reg = retype(stride(src1, 1, 4, 0), BRW_REGISTER_TYPE_UW);
|
2014-08-13 20:23:47 +01:00
|
|
|
if (dispatch_width == 8) {
|
|
|
|
brw_ADD(p, dst, src0, reg);
|
|
|
|
} else if (dispatch_width == 16) {
|
|
|
|
brw_ADD(p, firsthalf(dst), firsthalf(src0), reg);
|
|
|
|
brw_ADD(p, sechalf(dst), sechalf(src0), suboffset(reg, 2));
|
|
|
|
}
|
2013-10-25 00:17:08 +01:00
|
|
|
brw_pop_insn_state(p);
|
|
|
|
}
|
|
|
|
|
2013-01-09 19:46:42 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg x,
|
|
|
|
struct brw_reg y)
|
|
|
|
{
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-01-09 19:46:42 +00:00
|
|
|
assert(dst.type == BRW_REGISTER_TYPE_UD);
|
2013-01-26 07:27:50 +00:00
|
|
|
assert(x.type == BRW_REGISTER_TYPE_F);
|
|
|
|
assert(y.type == BRW_REGISTER_TYPE_F);
|
2013-01-09 19:46:42 +00:00
|
|
|
|
|
|
|
/* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
|
|
|
|
*
|
|
|
|
* Because this instruction does not have a 16-bit floating-point type,
|
|
|
|
* the destination data type must be Word (W).
|
|
|
|
*
|
|
|
|
* The destination must be DWord-aligned and specify a horizontal stride
|
|
|
|
* (HorzStride) of 2. The 16-bit result is stored in the lower word of
|
|
|
|
* each destination channel and the upper word is not modified.
|
|
|
|
*/
|
2015-02-04 15:58:49 +00:00
|
|
|
struct brw_reg dst_w = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
|
2013-01-09 19:46:42 +00:00
|
|
|
|
2015-02-04 15:58:49 +00:00
|
|
|
/* Give each 32-bit channel of dst the form below, where "." means
|
2013-01-09 19:46:42 +00:00
|
|
|
* unchanged.
|
|
|
|
* 0x....hhhh
|
|
|
|
*/
|
|
|
|
brw_F32TO16(p, dst_w, y);
|
|
|
|
|
|
|
|
/* Now the form:
|
|
|
|
* 0xhhhh0000
|
|
|
|
*/
|
|
|
|
brw_SHL(p, dst, dst, brw_imm_ud(16u));
|
|
|
|
|
|
|
|
/* And, finally the form of packHalf2x16's output:
|
|
|
|
* 0xhhhhllll
|
|
|
|
*/
|
|
|
|
brw_F32TO16(p, dst_w, x);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src)
|
|
|
|
{
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-01-09 19:46:42 +00:00
|
|
|
assert(dst.type == BRW_REGISTER_TYPE_F);
|
|
|
|
assert(src.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
/* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
|
|
|
|
*
|
|
|
|
* Because this instruction does not have a 16-bit floating-point type,
|
|
|
|
* the source data type must be Word (W). The destination type must be
|
|
|
|
* F (Float).
|
|
|
|
*/
|
2015-02-04 15:58:49 +00:00
|
|
|
struct brw_reg src_w = spread(retype(src, BRW_REGISTER_TYPE_W), 2);
|
2013-01-09 19:46:42 +00:00
|
|
|
|
|
|
|
/* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
|
|
|
|
* For the Y case, we wish to access only the upper word; therefore
|
|
|
|
* a 16-bit subregister offset is needed.
|
|
|
|
*/
|
|
|
|
assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
|
|
|
|
inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
|
|
|
|
if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
|
2013-01-25 05:48:40 +00:00
|
|
|
src_w.subnr += 2;
|
2013-01-09 19:46:42 +00:00
|
|
|
|
|
|
|
brw_F16TO32(p, dst, src_w);
|
|
|
|
}
|
|
|
|
|
2013-03-19 22:28:11 +00:00
|
|
|
void
|
|
|
|
fs_generator::generate_shader_time_add(fs_inst *inst,
|
|
|
|
struct brw_reg payload,
|
|
|
|
struct brw_reg offset,
|
|
|
|
struct brw_reg value)
|
|
|
|
{
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-03-19 22:28:11 +00:00
|
|
|
brw_push_insn_state(p);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_mask_control(p, true);
|
2013-03-19 22:28:11 +00:00
|
|
|
|
|
|
|
assert(payload.file == BRW_GENERAL_REGISTER_FILE);
|
|
|
|
struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
|
|
|
|
offset.type);
|
|
|
|
struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0),
|
|
|
|
value.type);
|
|
|
|
|
|
|
|
assert(offset.file == BRW_IMMEDIATE_VALUE);
|
|
|
|
if (value.file == BRW_GENERAL_REGISTER_FILE) {
|
|
|
|
value.width = BRW_WIDTH_1;
|
|
|
|
value.hstride = BRW_HORIZONTAL_STRIDE_0;
|
|
|
|
value.vstride = BRW_VERTICAL_STRIDE_0;
|
|
|
|
} else {
|
|
|
|
assert(value.file == BRW_IMMEDIATE_VALUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Trying to deal with setup of the params from the IR is crazy in the FS8
|
|
|
|
* case, and we don't really care about squeezing every bit of performance
|
|
|
|
* out of this path, so we just emit the MOVs from here.
|
|
|
|
*/
|
|
|
|
brw_MOV(p, payload_offset, offset);
|
|
|
|
brw_MOV(p, payload_value, value);
|
2013-10-02 22:07:40 +01:00
|
|
|
brw_shader_time_add(p, payload,
|
2014-08-29 20:50:46 +01:00
|
|
|
prog_data->binding_table.shader_time_start);
|
2013-03-19 22:28:11 +00:00
|
|
|
brw_pop_insn_state(p);
|
2013-08-15 03:49:33 +01:00
|
|
|
|
2014-08-29 20:50:46 +01:00
|
|
|
brw_mark_surface_used(prog_data,
|
|
|
|
prog_data->binding_table.shader_time_start);
|
2013-03-19 22:28:11 +00:00
|
|
|
}
|
|
|
|
|
2014-10-28 02:40:47 +00:00
|
|
|
void
|
|
|
|
fs_generator::enable_debug(const char *shader_name)
|
|
|
|
{
|
|
|
|
debug_flag = true;
|
|
|
|
this->shader_name = shader_name;
|
|
|
|
}
|
|
|
|
|
2014-11-14 00:28:08 +00:00
|
|
|
int
|
|
|
|
fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
2011-05-25 00:34:27 +01:00
|
|
|
{
|
2014-11-14 00:28:08 +00:00
|
|
|
/* align to 64 byte boundary. */
|
|
|
|
while (p->next_insn_offset % 64)
|
|
|
|
brw_NOP(p);
|
|
|
|
|
|
|
|
this->dispatch_width = dispatch_width;
|
|
|
|
if (dispatch_width == 16)
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
|
|
|
|
2014-05-25 18:42:32 +01:00
|
|
|
int start_offset = p->next_insn_offset;
|
2015-03-16 19:18:31 +00:00
|
|
|
int spill_count = 0, fill_count = 0;
|
2014-08-06 09:27:58 +01:00
|
|
|
int loop_count = 0;
|
2014-05-25 18:42:32 +01:00
|
|
|
|
|
|
|
struct annotation_info annotation;
|
|
|
|
memset(&annotation, 0, sizeof(annotation));
|
|
|
|
|
2014-07-12 05:16:13 +01:00
|
|
|
foreach_block_and_inst (block, fs_inst, inst, cfg) {
|
2011-05-25 00:34:27 +01:00
|
|
|
struct brw_reg src[3], dst;
|
2014-05-31 00:41:32 +01:00
|
|
|
unsigned int last_insn_offset = p->next_insn_offset;
|
2014-12-30 20:56:13 +00:00
|
|
|
bool multiple_instructions_emitted = false;
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2014-05-19 18:20:37 +01:00
|
|
|
if (unlikely(debug_flag))
|
2015-04-15 23:01:25 +01:00
|
|
|
annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2014-03-17 17:39:43 +00:00
|
|
|
for (unsigned int i = 0; i < inst->sources; i++) {
|
2011-05-25 00:34:27 +01:00
|
|
|
src[i] = brw_reg_from_fs_reg(&inst->src[i]);
|
2011-10-03 23:12:10 +01:00
|
|
|
|
|
|
|
/* The accumulator result appears to get used for the
|
|
|
|
* conditional modifier generation. When negating a UD
|
|
|
|
* value, there is a 33rd bit generated for the sign in the
|
|
|
|
* accumulator value, so now you can't check, for example,
|
|
|
|
* equality with a 32-bit value. See piglit fs-op-neg-uvec4.
|
|
|
|
*/
|
|
|
|
assert(!inst->conditional_mod ||
|
|
|
|
inst->src[i].type != BRW_REGISTER_TYPE_UD ||
|
|
|
|
!inst->src[i].negate);
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
dst = brw_reg_from_fs_reg(&inst->dst);
|
|
|
|
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_predicate_control(p, inst->predicate);
|
|
|
|
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
|
|
|
|
brw_set_default_flag_reg(p, 0, inst->flag_subreg);
|
|
|
|
brw_set_default_saturate(p, inst->saturate);
|
|
|
|
brw_set_default_mask_control(p, inst->force_writemask_all);
|
|
|
|
brw_set_default_acc_write_control(p, inst->writes_accumulator);
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2014-08-16 04:58:50 +01:00
|
|
|
switch (inst->exec_size) {
|
|
|
|
case 1:
|
|
|
|
case 2:
|
|
|
|
case 4:
|
2015-02-24 20:11:21 +00:00
|
|
|
assert(inst->force_writemask_all);
|
2014-08-16 04:58:50 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
break;
|
|
|
|
case 8:
|
|
|
|
if (inst->force_sechalf) {
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
|
|
|
} else {
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 16:
|
2015-04-04 01:44:06 +01:00
|
|
|
case 32:
|
2015-05-14 23:58:20 +01:00
|
|
|
/* If the instruction writes to more than one register, it needs to
|
|
|
|
* be a "compressed" instruction on Gen <= 5.
|
|
|
|
*/
|
|
|
|
if (inst->exec_size * inst->dst.stride * type_sz(inst->dst.type) > 32)
|
2015-04-11 22:51:13 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
2015-05-14 23:58:20 +01:00
|
|
|
else
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2014-08-16 04:58:50 +01:00
|
|
|
break;
|
|
|
|
default:
|
2015-04-02 00:18:31 +01:00
|
|
|
unreachable("Invalid instruction width");
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
switch (inst->opcode) {
|
|
|
|
case BRW_OPCODE_MOV:
|
|
|
|
brw_MOV(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_ADD:
|
|
|
|
brw_ADD(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_MUL:
|
|
|
|
brw_MUL(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2013-12-17 14:39:16 +00:00
|
|
|
case BRW_OPCODE_AVG:
|
|
|
|
brw_AVG(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2011-08-16 06:36:18 +01:00
|
|
|
case BRW_OPCODE_MACH:
|
|
|
|
brw_MACH(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2011-05-25 00:34:27 +01:00
|
|
|
|
2014-04-02 01:25:12 +01:00
|
|
|
case BRW_OPCODE_LINE:
|
|
|
|
brw_LINE(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
2012-02-06 23:59:11 +00:00
|
|
|
case BRW_OPCODE_MAD:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 6);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
2015-04-17 01:52:03 +01:00
|
|
|
if (dispatch_width == 16 && !devinfo->supports_simd16_3src) {
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2014-12-30 20:56:13 +00:00
|
|
|
brw_inst *f = brw_MAD(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2]));
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
2014-12-30 20:56:13 +00:00
|
|
|
brw_inst *s = brw_MAD(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
2014-12-30 20:56:13 +00:00
|
|
|
|
|
|
|
if (inst->conditional_mod) {
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_cond_modifier(p->devinfo, f, inst->conditional_mod);
|
|
|
|
brw_inst_set_cond_modifier(p->devinfo, s, inst->conditional_mod);
|
2014-12-30 20:56:13 +00:00
|
|
|
multiple_instructions_emitted = true;
|
|
|
|
}
|
2012-02-06 23:59:11 +00:00
|
|
|
} else {
|
|
|
|
brw_MAD(p, dst, src[0], src[1], src[2]);
|
|
|
|
}
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
2012-02-06 23:59:11 +00:00
|
|
|
break;
|
|
|
|
|
2012-12-02 08:08:15 +00:00
|
|
|
case BRW_OPCODE_LRP:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 6);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
2015-04-17 01:52:03 +01:00
|
|
|
if (dispatch_width == 16 && !devinfo->supports_simd16_3src) {
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2014-12-30 20:56:13 +00:00
|
|
|
brw_inst *f = brw_LRP(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2]));
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
2014-12-30 20:56:13 +00:00
|
|
|
brw_inst *s = brw_LRP(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
2014-12-30 20:56:13 +00:00
|
|
|
|
|
|
|
if (inst->conditional_mod) {
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_cond_modifier(p->devinfo, f, inst->conditional_mod);
|
|
|
|
brw_inst_set_cond_modifier(p->devinfo, s, inst->conditional_mod);
|
2014-12-30 20:56:13 +00:00
|
|
|
multiple_instructions_emitted = true;
|
|
|
|
}
|
2012-12-02 08:08:15 +00:00
|
|
|
} else {
|
|
|
|
brw_LRP(p, dst, src[0], src[1], src[2]);
|
|
|
|
}
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
2012-12-02 08:08:15 +00:00
|
|
|
break;
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
case BRW_OPCODE_FRC:
|
|
|
|
brw_FRC(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_RNDD:
|
|
|
|
brw_RNDD(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_RNDE:
|
|
|
|
brw_RNDE(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_RNDZ:
|
|
|
|
brw_RNDZ(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BRW_OPCODE_AND:
|
|
|
|
brw_AND(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_OR:
|
|
|
|
brw_OR(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_XOR:
|
|
|
|
brw_XOR(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_NOT:
|
|
|
|
brw_NOT(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_ASR:
|
|
|
|
brw_ASR(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_SHR:
|
|
|
|
brw_SHR(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_SHL:
|
|
|
|
brw_SHL(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2013-01-09 19:46:42 +00:00
|
|
|
case BRW_OPCODE_F32TO16:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-01-09 19:46:42 +00:00
|
|
|
brw_F32TO16(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_F16TO32:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-01-09 19:46:42 +00:00
|
|
|
brw_F16TO32(p, dst, src[0]);
|
|
|
|
break;
|
2011-05-25 00:34:27 +01:00
|
|
|
case BRW_OPCODE_CMP:
|
2015-02-04 01:38:49 +00:00
|
|
|
/* The Ivybridge/BayTrail WaCMPInstFlagDepClearedEarly workaround says
|
|
|
|
* that when the destination is a GRF that the dependency-clear bit on
|
|
|
|
* the flag register is cleared early.
|
|
|
|
*
|
|
|
|
* Suggested workarounds are to disable coissuing CMP instructions
|
|
|
|
* or to split CMP(16) instructions into two CMP(8) instructions.
|
|
|
|
*
|
|
|
|
* We choose to split into CMP(8) instructions since disabling
|
|
|
|
* coissuing would affect CMP instructions not otherwise affected by
|
|
|
|
* the errata.
|
|
|
|
*/
|
2015-04-15 01:45:40 +01:00
|
|
|
if (dispatch_width == 16 && devinfo->gen == 7 && !devinfo->is_haswell) {
|
2015-02-04 01:38:49 +00:00
|
|
|
if (dst.file == BRW_GENERAL_REGISTER_FILE) {
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2015-02-04 01:38:49 +00:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
brw_CMP(p, firsthalf(dst), inst->conditional_mod,
|
|
|
|
firsthalf(src[0]), firsthalf(src[1]));
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
|
|
|
brw_CMP(p, sechalf(dst), inst->conditional_mod,
|
|
|
|
sechalf(src[0]), sechalf(src[1]));
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
|
|
|
|
|
|
|
multiple_instructions_emitted = true;
|
|
|
|
} else if (dst.file == BRW_ARCHITECTURE_REGISTER_FILE) {
|
|
|
|
/* For unknown reasons, the aforementioned workaround is not
|
|
|
|
* sufficient. Overriding the type when the destination is the
|
|
|
|
* null register is necessary but not sufficient by itself.
|
|
|
|
*/
|
|
|
|
assert(dst.nr == BRW_ARF_NULL);
|
|
|
|
dst.type = BRW_REGISTER_TYPE_D;
|
|
|
|
brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
|
|
|
|
} else {
|
|
|
|
unreachable("not reached");
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
|
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
case BRW_OPCODE_SEL:
|
|
|
|
brw_SEL(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2013-04-10 03:22:34 +01:00
|
|
|
case BRW_OPCODE_BFREV:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-04-10 03:22:34 +01:00
|
|
|
/* BFREV only supports UD type for src and dst. */
|
|
|
|
brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
|
|
|
|
retype(src[0], BRW_REGISTER_TYPE_UD));
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_FBH:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-04-10 03:22:34 +01:00
|
|
|
/* FBH only supports UD type for dst. */
|
|
|
|
brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_FBL:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-04-10 03:22:34 +01:00
|
|
|
/* FBL only supports UD type for dst. */
|
|
|
|
brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_CBIT:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-04-10 03:22:34 +01:00
|
|
|
/* CBIT only supports UD type for dst. */
|
|
|
|
brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
|
|
|
break;
|
2013-09-19 21:01:08 +01:00
|
|
|
case BRW_OPCODE_ADDC:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-09-19 21:01:08 +01:00
|
|
|
brw_ADDC(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_SUBB:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-09-19 21:01:08 +01:00
|
|
|
brw_SUBB(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2014-03-28 13:28:32 +00:00
|
|
|
case BRW_OPCODE_MAC:
|
|
|
|
brw_MAC(p, dst, src[0], src[1]);
|
|
|
|
break;
|
2013-04-10 03:22:34 +01:00
|
|
|
|
|
|
|
case BRW_OPCODE_BFE:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
2015-04-17 01:52:03 +01:00
|
|
|
if (dispatch_width == 16 && !devinfo->supports_simd16_3src) {
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2014-08-13 20:23:47 +01:00
|
|
|
brw_BFE(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2]));
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
2013-04-10 03:22:34 +01:00
|
|
|
brw_BFE(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
2013-04-10 03:22:34 +01:00
|
|
|
} else {
|
|
|
|
brw_BFE(p, dst, src[0], src[1], src[2]);
|
|
|
|
}
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
2013-04-10 03:22:34 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case BRW_OPCODE_BFI1:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2013-11-16 21:16:50 +00:00
|
|
|
/* The Haswell WaForceSIMD8ForBFIInstruction workaround says that we
|
|
|
|
* should
|
|
|
|
*
|
|
|
|
* "Force BFI instructions to be executed always in SIMD8."
|
|
|
|
*/
|
2015-04-15 01:45:40 +01:00
|
|
|
if (dispatch_width == 16 && devinfo->is_haswell) {
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2014-08-13 20:23:47 +01:00
|
|
|
brw_BFI1(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]));
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
2013-11-16 21:16:50 +00:00
|
|
|
brw_BFI1(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]));
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
2013-11-16 21:16:50 +00:00
|
|
|
} else {
|
|
|
|
brw_BFI1(p, dst, src[0], src[1]);
|
|
|
|
}
|
2013-04-10 03:22:34 +01:00
|
|
|
break;
|
|
|
|
case BRW_OPCODE_BFI2:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen >= 7);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
2013-11-16 21:16:50 +00:00
|
|
|
/* The Haswell WaForceSIMD8ForBFIInstruction workaround says that we
|
|
|
|
* should
|
|
|
|
*
|
|
|
|
* "Force BFI instructions to be executed always in SIMD8."
|
|
|
|
*
|
|
|
|
* Otherwise we would be able to emit compressed instructions like we
|
|
|
|
* do for the other three-source instructions.
|
|
|
|
*/
|
2015-03-19 18:18:49 +00:00
|
|
|
if (dispatch_width == 16 &&
|
2015-04-15 01:45:40 +01:00
|
|
|
(devinfo->is_haswell || !devinfo->supports_simd16_3src)) {
|
2015-04-14 20:40:34 +01:00
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2014-08-13 20:23:47 +01:00
|
|
|
brw_BFI2(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2]));
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
2013-04-10 03:22:34 +01:00
|
|
|
brw_BFI2(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
2013-04-10 03:22:34 +01:00
|
|
|
} else {
|
|
|
|
brw_BFI2(p, dst, src[0], src[1], src[2]);
|
|
|
|
}
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
2013-04-10 03:22:34 +01:00
|
|
|
break;
|
2011-05-25 00:34:27 +01:00
|
|
|
|
|
|
|
case BRW_OPCODE_IF:
|
|
|
|
if (inst->src[0].file != BAD_FILE) {
|
|
|
|
/* The instruction has an embedded compare (only allowed on gen6) */
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen == 6);
|
2011-05-25 00:34:27 +01:00
|
|
|
gen6_IF(p, inst->conditional_mod, src[0], src[1]);
|
|
|
|
} else {
|
2012-11-20 21:50:52 +00:00
|
|
|
brw_IF(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BRW_OPCODE_ELSE:
|
|
|
|
brw_ELSE(p);
|
|
|
|
break;
|
|
|
|
case BRW_OPCODE_ENDIF:
|
|
|
|
brw_ENDIF(p);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BRW_OPCODE_DO:
|
2011-12-06 20:30:03 +00:00
|
|
|
brw_DO(p, BRW_EXECUTE_8);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case BRW_OPCODE_BREAK:
|
2011-12-06 20:44:41 +00:00
|
|
|
brw_BREAK(p);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
case BRW_OPCODE_CONTINUE:
|
2014-08-04 22:26:26 +01:00
|
|
|
brw_CONT(p);
|
2014-06-01 00:57:02 +01:00
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
2011-12-06 20:30:03 +00:00
|
|
|
case BRW_OPCODE_WHILE:
|
|
|
|
brw_WHILE(p);
|
2014-08-06 09:27:58 +01:00
|
|
|
loop_count++;
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
2011-08-05 20:38:58 +01:00
|
|
|
case SHADER_OPCODE_RCP:
|
|
|
|
case SHADER_OPCODE_RSQ:
|
|
|
|
case SHADER_OPCODE_SQRT:
|
|
|
|
case SHADER_OPCODE_EXP2:
|
|
|
|
case SHADER_OPCODE_LOG2:
|
|
|
|
case SHADER_OPCODE_SIN:
|
|
|
|
case SHADER_OPCODE_COS:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen < 6 || inst->mlen == 0);
|
2014-11-21 20:34:22 +00:00
|
|
|
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen >= 7) {
|
2014-06-07 10:27:43 +01:00
|
|
|
gen6_math(p, dst, brw_math_function(inst->opcode), src[0],
|
|
|
|
brw_null_reg());
|
2015-04-15 01:45:40 +01:00
|
|
|
} else if (devinfo->gen == 6) {
|
2014-06-07 10:21:47 +01:00
|
|
|
generate_math_gen6(inst, dst, src[0], brw_null_reg());
|
2015-04-15 01:45:40 +01:00
|
|
|
} else if (devinfo->gen == 5 || devinfo->is_g4x) {
|
2013-03-30 07:15:54 +00:00
|
|
|
generate_math_g45(inst, dst, src[0]);
|
2011-08-18 19:55:42 +01:00
|
|
|
} else {
|
|
|
|
generate_math_gen4(inst, dst, src[0]);
|
|
|
|
}
|
|
|
|
break;
|
2011-09-29 01:37:54 +01:00
|
|
|
case SHADER_OPCODE_INT_QUOTIENT:
|
|
|
|
case SHADER_OPCODE_INT_REMAINDER:
|
2011-08-18 19:55:42 +01:00
|
|
|
case SHADER_OPCODE_POW:
|
2015-04-15 01:45:40 +01:00
|
|
|
assert(devinfo->gen < 6 || inst->mlen == 0);
|
2014-11-21 20:34:22 +00:00
|
|
|
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
|
2015-04-15 01:45:40 +01:00
|
|
|
if (devinfo->gen >= 7 && inst->opcode == SHADER_OPCODE_POW) {
|
2014-06-07 10:27:43 +01:00
|
|
|
gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
|
2015-04-15 01:45:40 +01:00
|
|
|
} else if (devinfo->gen >= 6) {
|
2014-06-07 10:21:47 +01:00
|
|
|
generate_math_gen6(inst, dst, src[0], src[1]);
|
2011-08-18 19:55:42 +01:00
|
|
|
} else {
|
|
|
|
generate_math_gen4(inst, dst, src[0]);
|
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
case FS_OPCODE_CINTERP:
|
|
|
|
brw_MOV(p, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case FS_OPCODE_LINTERP:
|
|
|
|
generate_linterp(inst, dst, src);
|
|
|
|
break;
|
2015-04-14 21:17:38 +01:00
|
|
|
case FS_OPCODE_PIXEL_X:
|
|
|
|
assert(src[0].type == BRW_REGISTER_TYPE_UW);
|
|
|
|
src[0].subnr = 0 * type_sz(src[0].type);
|
|
|
|
brw_MOV(p, dst, stride(src[0], 8, 4, 1));
|
|
|
|
break;
|
|
|
|
case FS_OPCODE_PIXEL_Y:
|
|
|
|
assert(src[0].type == BRW_REGISTER_TYPE_UW);
|
|
|
|
src[0].subnr = 4 * type_sz(src[0].type);
|
|
|
|
brw_MOV(p, dst, stride(src[0], 8, 4, 1));
|
|
|
|
break;
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TEX:
|
2011-05-25 00:34:27 +01:00
|
|
|
case FS_OPCODE_TXB:
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXD:
|
|
|
|
case SHADER_OPCODE_TXF:
|
2013-12-10 14:36:31 +00:00
|
|
|
case SHADER_OPCODE_TXF_CMS:
|
2013-12-10 14:38:15 +00:00
|
|
|
case SHADER_OPCODE_TXF_UMS:
|
2013-11-29 21:32:16 +00:00
|
|
|
case SHADER_OPCODE_TXF_MCS:
|
2011-10-26 20:58:37 +01:00
|
|
|
case SHADER_OPCODE_TXL:
|
|
|
|
case SHADER_OPCODE_TXS:
|
2013-03-06 22:47:01 +00:00
|
|
|
case SHADER_OPCODE_LOD:
|
2013-03-31 09:31:12 +01:00
|
|
|
case SHADER_OPCODE_TG4:
|
2013-10-08 09:42:10 +01:00
|
|
|
case SHADER_OPCODE_TG4_OFFSET:
|
2014-08-03 10:23:31 +01:00
|
|
|
generate_tex(inst, dst, src[0], src[1]);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
2014-11-08 09:39:14 +00:00
|
|
|
case FS_OPCODE_DDX_COARSE:
|
|
|
|
case FS_OPCODE_DDX_FINE:
|
|
|
|
generate_ddx(inst->opcode, dst, src[0]);
|
|
|
|
break;
|
|
|
|
case FS_OPCODE_DDY_COARSE:
|
|
|
|
case FS_OPCODE_DDY_FINE:
|
2014-11-08 10:01:32 +00:00
|
|
|
assert(src[1].file == BRW_IMMEDIATE_VALUE);
|
|
|
|
generate_ddy(inst->opcode, dst, src[0], src[1].dw1.ud);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
2013-10-16 19:45:06 +01:00
|
|
|
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
|
|
|
generate_scratch_write(inst, src[0]);
|
2015-03-16 19:18:31 +00:00
|
|
|
spill_count++;
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
2013-10-16 19:45:06 +01:00
|
|
|
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
|
|
|
generate_scratch_read(inst, dst);
|
2015-03-16 19:18:31 +00:00
|
|
|
fill_count++;
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
2013-10-16 19:51:22 +01:00
|
|
|
case SHADER_OPCODE_GEN7_SCRATCH_READ:
|
|
|
|
generate_scratch_read_gen7(inst, dst);
|
2015-03-16 19:18:31 +00:00
|
|
|
fill_count++;
|
2013-10-16 19:51:22 +01:00
|
|
|
break;
|
|
|
|
|
2014-10-21 07:00:50 +01:00
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
|
|
|
generate_urb_write(inst, src[0]);
|
|
|
|
break;
|
|
|
|
|
2012-11-07 18:42:34 +00:00
|
|
|
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
|
|
|
|
generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
|
|
|
|
2012-12-05 08:06:30 +00:00
|
|
|
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
|
|
|
|
generate_uniform_pull_constant_load_gen7(inst, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
2012-11-07 19:18:34 +00:00
|
|
|
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
|
2013-03-18 17:16:42 +00:00
|
|
|
generate_varying_pull_constant_load(inst, dst, src[0], src[1]);
|
2012-11-07 19:18:34 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
|
|
|
|
generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
2014-07-07 23:27:17 +01:00
|
|
|
case FS_OPCODE_REP_FB_WRITE:
|
2011-05-25 00:34:27 +01:00
|
|
|
case FS_OPCODE_FB_WRITE:
|
2014-09-16 23:16:20 +01:00
|
|
|
generate_fb_write(inst, src[0]);
|
2011-05-25 00:34:27 +01:00
|
|
|
break;
|
2012-06-18 22:50:04 +01:00
|
|
|
|
2013-12-17 12:00:50 +00:00
|
|
|
case FS_OPCODE_BLORP_FB_WRITE:
|
|
|
|
generate_blorp_fb_write(inst);
|
|
|
|
break;
|
|
|
|
|
2012-06-18 22:50:04 +01:00
|
|
|
case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
|
2012-12-06 18:36:11 +00:00
|
|
|
generate_mov_dispatch_to_flags(inst);
|
2012-06-18 22:50:04 +01:00
|
|
|
break;
|
|
|
|
|
2012-12-06 18:15:08 +00:00
|
|
|
case FS_OPCODE_DISCARD_JUMP:
|
|
|
|
generate_discard_jump(inst);
|
|
|
|
break;
|
|
|
|
|
2012-11-27 22:10:52 +00:00
|
|
|
case SHADER_OPCODE_SHADER_TIME_ADD:
|
2013-03-19 22:28:11 +00:00
|
|
|
generate_shader_time_add(inst, src[0], src[1], src[2]);
|
2012-11-27 22:10:52 +00:00
|
|
|
break;
|
|
|
|
|
2013-09-11 22:01:50 +01:00
|
|
|
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
2015-04-22 19:10:43 +01:00
|
|
|
assert(src[1].file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
src[2].file == BRW_IMMEDIATE_VALUE);
|
2015-03-19 13:12:01 +00:00
|
|
|
brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud,
|
2015-02-26 15:41:46 +00:00
|
|
|
inst->mlen, !inst->dst.is_null());
|
2015-03-19 13:12:01 +00:00
|
|
|
brw_mark_surface_used(prog_data, src[1].dw1.ud);
|
2013-09-11 22:01:50 +01:00
|
|
|
break;
|
|
|
|
|
2013-09-11 22:03:13 +01:00
|
|
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
2015-03-19 13:11:28 +00:00
|
|
|
assert(src[1].file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
src[2].file == BRW_IMMEDIATE_VALUE);
|
2015-04-22 19:10:43 +01:00
|
|
|
brw_untyped_surface_read(p, dst, src[0], src[1],
|
2015-03-19 13:11:28 +00:00
|
|
|
inst->mlen, src[2].dw1.ud);
|
2015-04-22 19:10:43 +01:00
|
|
|
brw_mark_surface_used(prog_data, src[1].dw1.ud);
|
2013-09-11 22:03:13 +01:00
|
|
|
break;
|
|
|
|
|
2015-04-23 12:24:14 +01:00
|
|
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
|
|
|
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
|
|
|
brw_untyped_surface_write(p, src[0], src[1],
|
|
|
|
inst->mlen, src[2].dw1.ud);
|
|
|
|
break;
|
|
|
|
|
2015-04-23 12:28:25 +01:00
|
|
|
case SHADER_OPCODE_TYPED_ATOMIC:
|
|
|
|
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
|
|
|
brw_typed_atomic(p, dst, src[0], src[1],
|
|
|
|
src[2].dw1.ud, inst->mlen, !inst->dst.is_null());
|
|
|
|
break;
|
|
|
|
|
|
|
|
case SHADER_OPCODE_TYPED_SURFACE_READ:
|
|
|
|
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
|
|
|
brw_typed_surface_read(p, dst, src[0], src[1],
|
|
|
|
inst->mlen, src[2].dw1.ud);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
|
|
|
|
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
|
|
|
brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].dw1.ud);
|
|
|
|
break;
|
|
|
|
|
2015-04-23 12:30:28 +01:00
|
|
|
case SHADER_OPCODE_MEMORY_FENCE:
|
|
|
|
brw_memory_fence(p, dst);
|
|
|
|
break;
|
|
|
|
|
2013-03-06 22:47:22 +00:00
|
|
|
case FS_OPCODE_SET_SIMD4X2_OFFSET:
|
|
|
|
generate_set_simd4x2_offset(inst, dst, src[0]);
|
2012-12-05 08:06:30 +00:00
|
|
|
break;
|
|
|
|
|
2015-04-23 12:42:53 +01:00
|
|
|
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
|
|
|
|
brw_find_live_channel(p, dst);
|
|
|
|
break;
|
|
|
|
|
2015-02-20 18:14:24 +00:00
|
|
|
case SHADER_OPCODE_BROADCAST:
|
|
|
|
brw_broadcast(p, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
2013-10-25 00:21:13 +01:00
|
|
|
case FS_OPCODE_SET_OMASK:
|
|
|
|
generate_set_omask(inst, dst, src[0]);
|
|
|
|
break;
|
|
|
|
|
2013-10-25 00:17:08 +01:00
|
|
|
case FS_OPCODE_SET_SAMPLE_ID:
|
|
|
|
generate_set_sample_id(inst, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
2013-01-09 19:46:42 +00:00
|
|
|
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
|
|
|
|
generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
|
|
|
|
case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
|
|
|
|
generate_unpack_half_2x16_split(inst, dst, src[0]);
|
|
|
|
break;
|
|
|
|
|
2013-03-28 06:19:39 +00:00
|
|
|
case FS_OPCODE_PLACEHOLDER_HALT:
|
|
|
|
/* This is the place where the final HALT needs to be inserted if
|
|
|
|
* we've emitted any discards. If not, this will emit no code.
|
|
|
|
*/
|
2014-05-19 18:20:37 +01:00
|
|
|
if (!patch_discard_jumps_to_fb_writes()) {
|
2014-05-25 18:30:13 +01:00
|
|
|
if (unlikely(debug_flag)) {
|
2014-05-25 18:42:32 +01:00
|
|
|
annotation.ann_count--;
|
2014-05-25 18:30:13 +01:00
|
|
|
}
|
2014-05-19 18:20:37 +01:00
|
|
|
}
|
2013-03-28 06:19:39 +00:00
|
|
|
break;
|
|
|
|
|
2013-11-18 08:13:13 +00:00
|
|
|
case FS_OPCODE_INTERPOLATE_AT_CENTROID:
|
|
|
|
generate_pixel_interpolator_query(inst, dst, src[0], src[1],
|
|
|
|
GEN7_PIXEL_INTERPOLATOR_LOC_CENTROID);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
|
|
|
generate_pixel_interpolator_query(inst, dst, src[0], src[1],
|
|
|
|
GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
|
|
|
generate_pixel_interpolator_query(inst, dst, src[0], src[1],
|
|
|
|
GEN7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
|
|
|
generate_pixel_interpolator_query(inst, dst, src[0], src[1],
|
|
|
|
GEN7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET);
|
|
|
|
break;
|
|
|
|
|
2014-08-27 19:33:25 +01:00
|
|
|
case CS_OPCODE_CS_TERMINATE:
|
|
|
|
generate_cs_terminate(inst, src[0]);
|
|
|
|
break;
|
|
|
|
|
2014-08-27 19:32:08 +01:00
|
|
|
case SHADER_OPCODE_BARRIER:
|
|
|
|
generate_barrier(inst, src[0]);
|
|
|
|
break;
|
|
|
|
|
2011-05-25 00:34:27 +01:00
|
|
|
default:
|
2015-04-15 22:51:18 +01:00
|
|
|
unreachable("Unsupported opcode");
|
2014-05-28 02:47:40 +01:00
|
|
|
|
|
|
|
case SHADER_OPCODE_LOAD_PAYLOAD:
|
2014-06-29 22:54:01 +01:00
|
|
|
unreachable("Should be lowered by lower_load_payload()");
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
2014-05-31 00:41:32 +01:00
|
|
|
|
2014-12-30 20:56:13 +00:00
|
|
|
if (multiple_instructions_emitted)
|
|
|
|
continue;
|
|
|
|
|
2014-06-29 07:31:04 +01:00
|
|
|
if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
|
|
|
|
assert(p->next_insn_offset == last_insn_offset + 16 ||
|
|
|
|
!"conditional_mod, no_dd_check, or no_dd_clear set for IR "
|
|
|
|
"emitting more than 1 instruction");
|
|
|
|
|
2014-06-13 22:29:25 +01:00
|
|
|
brw_inst *last = &p->store[last_insn_offset / 16];
|
2014-06-29 07:31:04 +01:00
|
|
|
|
2014-11-21 20:20:53 +00:00
|
|
|
if (inst->conditional_mod)
|
2015-04-15 02:00:06 +01:00
|
|
|
brw_inst_set_cond_modifier(p->devinfo, last, inst->conditional_mod);
|
|
|
|
brw_inst_set_no_dd_clear(p->devinfo, last, inst->no_dd_clear);
|
|
|
|
brw_inst_set_no_dd_check(p->devinfo, last, inst->no_dd_check);
|
2014-05-31 00:41:32 +01:00
|
|
|
}
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
brw_set_uip_jip(p);
|
2014-05-25 18:42:32 +01:00
|
|
|
annotation_finalize(&annotation, p->next_insn_offset);
|
|
|
|
|
2014-05-25 22:56:41 +01:00
|
|
|
int before_size = p->next_insn_offset - start_offset;
|
2014-05-25 18:42:32 +01:00
|
|
|
brw_compact_instructions(p, start_offset, annotation.ann_count,
|
|
|
|
annotation.ann);
|
2014-05-25 22:56:41 +01:00
|
|
|
int after_size = p->next_insn_offset - start_offset;
|
2014-05-25 18:42:32 +01:00
|
|
|
|
|
|
|
if (unlikely(debug_flag)) {
|
2014-10-28 02:40:47 +00:00
|
|
|
fprintf(stderr, "Native code for %s\n"
|
2015-03-16 19:18:31 +00:00
|
|
|
"SIMD%d shader: %d instructions. %d loops. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d"
|
2014-10-28 02:40:47 +00:00
|
|
|
" bytes (%.0f%%)\n",
|
2015-03-16 19:18:31 +00:00
|
|
|
shader_name, dispatch_width, before_size / 16, loop_count,
|
|
|
|
spill_count, fill_count, promoted_constants, before_size, after_size,
|
2014-05-25 22:56:41 +01:00
|
|
|
100.0f * (before_size - after_size) / before_size);
|
2014-05-25 18:46:55 +01:00
|
|
|
|
2015-04-15 23:01:25 +01:00
|
|
|
dump_assembly(p->store, annotation.ann_count, annotation.ann,
|
|
|
|
p->devinfo, prog);
|
2014-05-25 18:42:32 +01:00
|
|
|
ralloc_free(annotation.ann);
|
|
|
|
}
|
2014-11-14 00:28:08 +00:00
|
|
|
|
2015-04-16 22:13:52 +01:00
|
|
|
compiler->shader_debug_log(log_data,
|
|
|
|
"%s SIMD%d shader: %d inst, %d loops, "
|
|
|
|
"%d:%d spills:fills, Promoted %u constants, "
|
|
|
|
"compacted %d to %d bytes.\n",
|
|
|
|
stage_abbrev, dispatch_width, before_size / 16,
|
|
|
|
loop_count, spill_count, fill_count,
|
|
|
|
promoted_constants, before_size, after_size);
|
2014-11-14 20:46:44 +00:00
|
|
|
|
2014-11-14 00:28:08 +00:00
|
|
|
return start_offset;
|
2011-05-25 00:34:27 +01:00
|
|
|
}
|
2012-11-09 09:05:47 +00:00
|
|
|
|
|
|
|
const unsigned *
|
2014-11-14 00:28:08 +00:00
|
|
|
fs_generator::get_assembly(unsigned int *assembly_size)
|
2012-11-09 09:05:47 +00:00
|
|
|
{
|
|
|
|
return brw_get_program(p, assembly_size);
|
|
|
|
}
|