Merge remote-tracking branch 'mattst88/nir-lower-pack-unpack' into vulkan

This commit is contained in:
Jason Ekstrand 2016-01-25 15:50:31 -08:00
commit e462d4d815
145 changed files with 2306 additions and 1271 deletions

View File

@ -271,6 +271,7 @@ C_SOURCES := \
util/u_prim_restart.h \
util/u_pstipple.c \
util/u_pstipple.h \
util/u_pwr8.h \
util/u_range.h \
util/u_rect.h \
util/u_resource.c \

View File

@ -1618,6 +1618,12 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
context_ptr = LLVMGetParam(variant_func, 0);
io_ptr = LLVMGetParam(variant_func, 1);
vbuffers_ptr = LLVMGetParam(variant_func, 2);
/*
* XXX: stride is actually unused. The stride we use is strictly calculated
* from the number of outputs (including the draw_extra outputs).
* Should probably fix some day (we need a new vs just because of extra
* outputs which the generated vs won't touch).
*/
stride = LLVMGetParam(variant_func, 5 + (elts ? 1 : 0));
vb_ptr = LLVMGetParam(variant_func, 6 + (elts ? 1 : 0));
system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0));

View File

@ -461,50 +461,49 @@ lp_build_pack2(struct gallivm_state *gallivm,
assert(src_type.length * 2 == dst_type.length);
/* Check for special cases first */
if((util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec) &&
src_type.width * src_type.length >= 128) {
if ((util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec) &&
src_type.width * src_type.length >= 128) {
const char *intrinsic = NULL;
boolean swap_intrinsic_operands = FALSE;
switch(src_type.width) {
case 32:
if (util_cpu_caps.has_sse2) {
if(dst_type.sign) {
if (dst_type.sign) {
intrinsic = "llvm.x86.sse2.packssdw.128";
}
else {
} else {
if (util_cpu_caps.has_sse4_1) {
intrinsic = "llvm.x86.sse41.packusdw";
}
}
} else if (util_cpu_caps.has_altivec) {
if (dst_type.sign) {
intrinsic = "llvm.ppc.altivec.vpkswus";
} else {
intrinsic = "llvm.ppc.altivec.vpkuwus";
}
intrinsic = "llvm.ppc.altivec.vpkswss";
} else {
intrinsic = "llvm.ppc.altivec.vpkuwus";
}
#ifdef PIPE_ARCH_LITTLE_ENDIAN
swap_intrinsic_operands = TRUE;
swap_intrinsic_operands = TRUE;
#endif
}
break;
case 16:
if (dst_type.sign) {
if (util_cpu_caps.has_sse2) {
intrinsic = "llvm.x86.sse2.packsswb.128";
intrinsic = "llvm.x86.sse2.packsswb.128";
} else if (util_cpu_caps.has_altivec) {
intrinsic = "llvm.ppc.altivec.vpkshss";
intrinsic = "llvm.ppc.altivec.vpkshss";
#ifdef PIPE_ARCH_LITTLE_ENDIAN
swap_intrinsic_operands = TRUE;
swap_intrinsic_operands = TRUE;
#endif
}
} else {
if (util_cpu_caps.has_sse2) {
intrinsic = "llvm.x86.sse2.packuswb.128";
intrinsic = "llvm.x86.sse2.packuswb.128";
} else if (util_cpu_caps.has_altivec) {
intrinsic = "llvm.ppc.altivec.vpkshus";
intrinsic = "llvm.ppc.altivec.vpkshus";
#ifdef PIPE_ARCH_LITTLE_ENDIAN
swap_intrinsic_operands = TRUE;
swap_intrinsic_operands = TRUE;
#endif
}
}

View File

@ -1536,8 +1536,22 @@ mod_emit_cpu(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
emit_data->output[emit_data->chan] = lp_build_mod(&bld_base->int_bld,
emit_data->args[0], emit_data->args[1]);
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
PIPE_FUNC_EQUAL, emit_data->args[1],
bld_base->uint_bld.zero);
/* We want to make sure that we never divide/mod by zero to not
* generate sigfpe. We don't want to crash just because the
* shader is doing something weird. */
LLVMValueRef divisor = LLVMBuildOr(builder,
div_mask,
emit_data->args[1], "");
LLVMValueRef result = lp_build_mod(&bld_base->int_bld,
emit_data->args[0], divisor);
/* umod by zero doesn't have a guaranteed return value chose -1 for now. */
emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
div_mask,
result, "");
}
/* TGSI_OPCODE_NOT */

View File

@ -673,10 +673,6 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
if (c->temp_regs[index].var) {
nir_builder *b = &c->build;
nir_intrinsic_instr *load;
struct tgsi_ind_register *indirect =
tgsi_dst->Indirect ? &tgsi_fdst->Indirect : NULL;
nir_register *reg;
/* this works, because TGSI will give us a base offset
@ -690,26 +686,6 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
reg->num_components = 4;
dest.dest.reg.reg = reg;
dest.dest.reg.base_offset = 0;
/* since the alu op might not write to all components
* of the temporary, we must first do a load_var to
* get the previous array elements into the register.
* This is one area that NIR could use a bit of
* improvement (or opt pass to clean up the mess
* once things are scalarized)
*/
load = nir_intrinsic_instr_create(c->build.shader,
nir_intrinsic_load_var);
load->num_components = 4;
load->variables[0] =
ttn_array_deref(c, load, c->temp_regs[index].var,
c->temp_regs[index].offset,
indirect);
load->dest = nir_dest_for_reg(reg);
nir_builder_instr_insert(b, &load->instr);
} else {
assert(!tgsi_dst->Indirect);
dest.dest.reg.reg = c->temp_regs[index].reg;
@ -1886,7 +1862,7 @@ ttn_emit_instruction(struct ttn_compile *c)
ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest)));
}
/* if the dst has a matching var, append store_global to move
/* if the dst has a matching var, append store_var to move
* output from reg to var
*/
nir_variable *var = ttn_get_var(c, tgsi_dst);
@ -1899,7 +1875,7 @@ ttn_emit_instruction(struct ttn_compile *c)
&tgsi_dst->Indirect : NULL;
store->num_components = 4;
store->const_index[0] = 0xf;
store->const_index[0] = dest.write_mask;
store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
@ -1932,6 +1908,7 @@ ttn_add_output_stores(struct ttn_compile *c)
store->src[0].reg.reg = c->output_regs[loc].reg;
store->src[0].reg.base_offset = c->output_regs[loc].offset;
store->const_index[0] = loc;
store->const_index[1] = 0xf; /* writemask */
store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_builder_instr_insert(b, &store->instr);
}

View File

@ -110,6 +110,7 @@ tgsi_default_declaration( void )
declaration.Invariant = 0;
declaration.Local = 0;
declaration.Array = 0;
declaration.Atomic = 0;
declaration.Padding = 0;
return declaration;

View File

@ -230,6 +230,7 @@ pstip_transform_immed(struct tgsi_transform_context *ctx,
struct pstip_transform_context *pctx =
(struct pstip_transform_context *) ctx;
pctx->numImmed++;
ctx->emit_immediate(ctx, immed);
}

View File

@ -153,6 +153,12 @@ vec_mullo_epi32 (__m128i a, __m128i b)
return v;
}
static inline __m128i
vec_andnot_si128 (__m128i a, __m128i b)
{
return vec_andc (b, a);
}
static inline void
transpose4_epi32(const __m128i * restrict a,
const __m128i * restrict b,

View File

@ -305,6 +305,7 @@ The integer capabilities:
for buffers is supported.
* ``PIPE_CAP_GENERATE_MIPMAP``: Indicates whether pipe_context::generate_mipmap
is supported.
* ``PIPE_CAP_STRING_MARKER``: Whether pipe->emit_string_marker() is supported.
.. _pipe_capf:

View File

@ -109,6 +109,7 @@ fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
fd2_gmem_init(pctx);
fd2_texture_init(pctx);
fd2_prog_init(pctx);
fd2_emit_init(pctx);
pctx = fd_context_init(&fd2_ctx->base, pscreen,
(screen->gpu_id >= 220) ? a22x_primtypes : a20x_primtypes,

View File

@ -446,3 +446,17 @@ fd2_emit_setup(struct fd_context *ctx)
fd_ringbuffer_flush(ring);
fd_ringmarker_mark(ctx->draw_start);
}
static void
fd2_emit_ib(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
struct fd_ringmarker *end)
{
__OUT_IB(ring, false, start, end);
}
void
fd2_emit_init(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
ctx->emit_ib = fd2_emit_ib;
}

View File

@ -45,4 +45,6 @@ void fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
void fd2_emit_state(struct fd_context *ctx, uint32_t dirty);
void fd2_emit_setup(struct fd_context *ctx);
void fd2_emit_init(struct pipe_context *pctx);
#endif /* FD2_EMIT_H */

View File

@ -891,10 +891,18 @@ fd3_emit_restore(struct fd_context *ctx)
ctx->needs_rb_fbd = true;
}
static void
fd3_emit_ib(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
struct fd_ringmarker *end)
{
__OUT_IB(ring, true, start, end);
}
void
fd3_emit_init(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
ctx->emit_const = fd3_emit_const;
ctx->emit_const_bo = fd3_emit_const_bo;
ctx->emit_ib = fd3_emit_ib;
}

View File

@ -853,7 +853,7 @@ emit_binning_pass(struct fd_context *ctx)
A3XX_PC_VSTREAM_CONTROL_N(0));
/* emit IB to binning drawcmds: */
OUT_IB(ring, ctx->binning_start, ctx->binning_end);
ctx->emit_ib(ring, ctx->binning_start, ctx->binning_end);
fd_reset_wfi(ctx);
fd_wfi(ctx, ring);

View File

@ -885,10 +885,18 @@ fd4_emit_restore(struct fd_context *ctx)
ctx->needs_rb_fbd = true;
}
static void
fd4_emit_ib(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
struct fd_ringmarker *end)
{
__OUT_IB(ring, true, start, end);
}
void
fd4_emit_init(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
ctx->emit_const = fd4_emit_const;
ctx->emit_const_bo = fd4_emit_const_bo;
ctx->emit_ib = fd4_emit_ib;
}

View File

@ -217,6 +217,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
struct stage s[MAX_STAGES];
uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
uint32_t face_regid, coord_regid, zwcoord_regid;
enum a3xx_threadsize fssz;
int constmode;
int i, j, k;
@ -224,6 +225,8 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
setup_stages(emit, s);
fssz = (s[FS].i->max_reg >= 24) ? TWO_QUADS : FOUR_QUADS;
/* blob seems to always use constmode currently: */
constmode = 1;
@ -258,7 +261,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
OUT_RING(ring, 0x00000003);
OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 5);
OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) |
A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
/* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
@ -385,7 +388,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
A4XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE));
OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |

View File

@ -141,6 +141,32 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
}
}
/**
* emit marker string as payload of a no-op packet, which can be
* decoded by cffdump.
*/
static void
fd_emit_string_marker(struct pipe_context *pctx, const char *string, int len)
{
struct fd_context *ctx = fd_context(pctx);
struct fd_ringbuffer *ring = ctx->ring;
const uint32_t *buf = (const void *)string;
OUT_PKT3(ring, CP_NOP, align(len, 4) / 4);
while (len >= 4) {
OUT_RING(ring, *buf);
buf++;
len -= 4;
}
/* copy remainder bytes without reading past end of input string: */
if (len > 0) {
uint32_t w = 0;
memcpy(&w, buf, len);
OUT_RING(ring, w);
}
}
void
fd_context_destroy(struct pipe_context *pctx)
{
@ -207,6 +233,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
pctx->screen = pscreen;
pctx->priv = priv;
pctx->flush = fd_context_flush;
pctx->emit_string_marker = fd_emit_string_marker;
for (i = 0; i < ARRAY_SIZE(ctx->rings); i++) {
ctx->rings[i] = fd_ringbuffer_new(screen->pipe, 0x100000);

View File

@ -386,6 +386,10 @@ struct fd_context {
const uint32_t *dwords, struct pipe_resource *prsc);
void (*emit_const_bo)(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets);
/* indirect-branch emit: */
void (*emit_ib)(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
struct fd_ringmarker *end);
};
static inline struct fd_context *

View File

@ -331,7 +331,7 @@ render_tiles(struct fd_context *ctx)
fd_hw_query_prepare_tile(ctx, i, ctx->ring);
/* emit IB to drawcmds: */
OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
ctx->emit_ib(ctx->ring, ctx->draw_start, ctx->draw_end);
fd_reset_wfi(ctx);
/* emit gmem2mem to transfer tile back to system memory: */
@ -349,7 +349,7 @@ render_sysmem(struct fd_context *ctx)
fd_hw_query_prepare_tile(ctx, 0, ctx->ring);
/* emit IB to drawcmds: */
OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
ctx->emit_ib(ctx->ring, ctx->draw_start, ctx->draw_end);
fd_reset_wfi(ctx);
}

View File

@ -155,6 +155,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_USER_CONSTANT_BUFFERS:
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_STRING_MARKER:
return 1;
case PIPE_CAP_SHADER_STENCIL_EXPORT:
@ -400,9 +401,16 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 1;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
/* Technically this should be the same as for TEMP/CONST, since
* everything is just normal registers. This is just temporary
* hack until load_input/store_output handle arrays in a similar
* way as load_var/store_var..
*/
return 0;
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
return 1;
/* a2xx compiler doesn't handle indirect: */
return is_ir3(screen) ? 1 : 0;
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
@ -566,6 +574,7 @@ fd_screen_create(struct fd_device *dev)
fd3_screen_init(pscreen);
break;
case 420:
case 430:
fd4_screen_init(pscreen);
break;
default:

View File

@ -265,8 +265,8 @@ OUT_WFI(struct fd_ringbuffer *ring)
}
static inline void
OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
struct fd_ringmarker *end)
__OUT_IB(struct fd_ringbuffer *ring, bool prefetch,
struct fd_ringmarker *start, struct fd_ringmarker *end)
{
uint32_t dwords = fd_ringmarker_dwords(start, end);
@ -280,7 +280,7 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
*/
emit_marker(ring, 6);
OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
fd_ringbuffer_emit_reloc_ring(ring, start, end);
OUT_RING(ring, dwords);

View File

@ -220,7 +220,7 @@ static void print_instr_cat1(instr_t *instr)
else if (cat1->off > 0)
printf("%c<a0.x + %d>", type, cat1->off);
else
printf("c<a0.x>");
printf("%c<a0.x>", type);
} else {
print_reg_src((reg_t)(cat1->src), type_size(cat1->src_type) == 32,
cat1->src_r, cat1->src_c, cat1->src_im, false, false, false);
@ -650,7 +650,7 @@ static void print_instr_cat6(instr_t *instr)
/* size of largest OPC field of all the instruction categories: */
#define NOPC_BITS 6
struct opc_info {
static const struct opc_info {
uint16_t cat;
uint16_t opc;
const char *name;

View File

@ -261,6 +261,7 @@ typedef union PACKED {
/* to make compiler happy: */
uint32_t dummy32;
uint32_t dummy10 : 10;
int32_t idummy10 : 10;
uint32_t dummy11 : 11;
uint32_t dummy12 : 12;
uint32_t dummy13 : 13;

View File

@ -81,6 +81,7 @@ struct ir3 * ir3_create(struct ir3_compiler *compiler,
shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
list_inithead(&shader->block_list);
list_inithead(&shader->array_list);
return shader;
}
@ -121,18 +122,19 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
val.iim_val = reg->iim_val;
} else {
unsigned components;
int16_t max;
if (reg->flags & IR3_REG_RELATIV) {
components = reg->size;
val.dummy10 = reg->offset;
val.idummy10 = reg->array.offset;
max = (reg->array.offset + repeat + components - 1) >> 2;
} else {
components = util_last_bit(reg->wrmask);
val.comp = reg->num & 0x3;
val.num = reg->num >> 2;
max = (reg->num + repeat + components - 1) >> 2;
}
int16_t max = (reg->num + repeat + components - 1) >> 2;
if (reg->flags & IR3_REG_CONST) {
info->max_const = MAX2(info->max_const, max);
} else if (val.num == 63) {
@ -233,7 +235,7 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
iassert((instr->regs_count == 2) || (instr->regs_count == 3));
if (src1->flags & IR3_REG_RELATIV) {
iassert(src1->num < (1 << 10));
iassert(src1->array.offset < (1 << 10));
cat2->rel1.src1 = reg(src1, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
IR3_REG_HALF | absneg);
@ -260,7 +262,7 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
!((src1->flags ^ src2->flags) & IR3_REG_HALF));
if (src2->flags & IR3_REG_RELATIV) {
iassert(src2->num < (1 << 10));
iassert(src2->array.offset < (1 << 10));
cat2->rel2.src2 = reg(src2, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
IR3_REG_HALF | absneg);
@ -333,7 +335,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
if (src1->flags & IR3_REG_RELATIV) {
iassert(src1->num < (1 << 10));
iassert(src1->array.offset < (1 << 10));
cat3->rel1.src1 = reg(src1, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
IR3_REG_HALF | absneg);
@ -361,7 +363,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
if (src3->flags & IR3_REG_RELATIV) {
iassert(src3->num < (1 << 10));
iassert(src3->array.offset < (1 << 10));
cat3->rel2.src3 = reg(src3, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
IR3_REG_HALF | absneg);
@ -404,7 +406,7 @@ static int emit_cat4(struct ir3_instruction *instr, void *ptr,
iassert(instr->regs_count == 2);
if (src->flags & IR3_REG_RELATIV) {
iassert(src->num < (1 << 10));
iassert(src->array.offset < (1 << 10));
cat4->rel.src = reg(src, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
@ -737,6 +739,14 @@ struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
return reg;
}
struct ir3_register * ir3_reg_clone(struct ir3 *shader,
struct ir3_register *reg)
{
struct ir3_register *new_reg = reg_create(shader, 0, 0);
*new_reg = *reg;
return new_reg;
}
void
ir3_instr_set_address(struct ir3_instruction *instr,
struct ir3_instruction *addr)
@ -777,3 +787,12 @@ ir3_count_instructions(struct ir3 *ir)
}
return cnt;
}
struct ir3_array *
ir3_lookup_array(struct ir3 *ir, unsigned id)
{
list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
if (arr->id == id)
return arr;
return NULL;
}

View File

@ -83,7 +83,8 @@ struct ir3_register {
* before register assignment is done:
*/
IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */
IR3_REG_PHI_SRC= 0x4000, /* phi src, regs[0]->instr points to phi */
IR3_REG_ARRAY = 0x4000,
IR3_REG_PHI_SRC= 0x8000, /* phi src, regs[0]->instr points to phi */
} flags;
union {
@ -97,11 +98,18 @@ struct ir3_register {
uint32_t uim_val;
float fim_val;
/* relative: */
int offset;
struct {
uint16_t id;
int16_t offset;
} array;
};
/* for IR3_REG_SSA, src registers contain ptr back to
* assigning instruction.
/* For IR3_REG_SSA, src registers contain ptr back to assigning
* instruction.
*
* For IR3_REG_ARRAY, the pointer is back to the last dependent
* array access (although the net effect is the same, it points
* back to a previous instruction that we depend on).
*/
struct ir3_instruction *instr;
@ -221,9 +229,6 @@ struct ir3_instruction {
struct {
int off; /* component/offset */
} fo;
struct {
int aid;
} fi;
struct {
/* used to temporarily hold reference to nir_phi_instr
* until we resolve the phi srcs
@ -293,19 +298,6 @@ struct ir3_instruction {
*/
struct ir3_instruction *address;
/* in case of a instruction with relative dst instruction, we need to
* capture the dependency on the fanin for the previous values of
* the array elements. Since we don't know at compile time actually
* which array elements are written, this serves to preserve the
* unconditional write to array elements prior to the conditional
* write.
*
* TODO only cat1 can do indirect write.. we could maybe move this
* into instr->cat1.fanin (but would require the frontend to insert
* the extra mov)
*/
struct ir3_instruction *fanin;
/* Entry in ir3_block's instruction list: */
struct list_head node;
@ -379,10 +371,41 @@ struct ir3 {
/* List of blocks: */
struct list_head block_list;
/* List of ir3_array's: */
struct list_head array_list;
unsigned heap_idx;
struct ir3_heap_chunk *chunk;
};
typedef struct nir_variable nir_variable;
struct ir3_array {
struct list_head node;
unsigned length;
unsigned id;
nir_variable *var;
/* We track the last write and last access (read or write) to
* setup dependencies on instructions that read or write the
* array. Reads can be re-ordered wrt. other reads, but should
* not be re-ordered wrt. to writes. Writes cannot be reordered
* wrt. any other access to the array.
*
* So array reads depend on last write, and array writes depend
* on the last access.
*/
struct ir3_instruction *last_write, *last_access;
/* extra stuff used in RA pass: */
unsigned base; /* base vreg name */
unsigned reg; /* base physical reg */
uint16_t start_ip, end_ip;
};
struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id);
typedef struct nir_block nir_block;
struct ir3_block {
@ -430,6 +453,8 @@ const char *ir3_instr_name(struct ir3_instruction *instr);
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
int num, int flags);
struct ir3_register * ir3_reg_clone(struct ir3 *shader,
struct ir3_register *reg);
void ir3_instr_set_address(struct ir3_instruction *instr,
struct ir3_instruction *addr);
@ -510,6 +535,9 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr)
if (dst->num == regid(REG_A0, 0))
return false;
if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
return false;
if ((instr->category == 1) &&
(instr->cat1.src_type == instr->cat1.dst_type))
return true;
@ -623,8 +651,10 @@ static inline bool writes_pred(struct ir3_instruction *instr)
/* TODO better name */
static inline struct ir3_instruction *ssa(struct ir3_register *reg)
{
if (reg->flags & IR3_REG_SSA)
if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) {
debug_assert(!(reg->instr && (reg->instr->flags & IR3_INSTR_UNUSED)));
return reg->instr;
}
return NULL;
}
@ -813,8 +843,6 @@ static inline unsigned ir3_cat3_absneg(opc_t opc)
static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
{
if (instr->fanin)
return instr->regs_count + 2;
if (instr->address)
return instr->regs_count + 1;
return instr->regs_count;
@ -822,8 +850,6 @@ static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
{
if (n == (instr->regs_count + 1))
return instr->fanin;
if (n == (instr->regs_count + 0))
return instr->address;
return ssa(instr->regs[n]);
@ -834,8 +860,8 @@ static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr
/* iterator for an instruction's SSA sources (instr), also returns src #: */
#define foreach_ssa_src_n(__srcinst, __n, __instr) \
if ((__instr)->regs_count) \
for (unsigned __cnt = __ssa_src_cnt(__instr) - 1, __n = 0; __n < __cnt; __n++) \
if ((__srcinst = __ssa_src_n(__instr, __n + 1)))
for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \
if ((__srcinst = __ssa_src_n(__instr, __n)))
/* iterator for an instruction's SSA sources (instr): */
#define foreach_ssa_src(__srcinst, __instr) \
@ -878,7 +904,15 @@ ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
struct ir3_instruction *instr =
ir3_instr_create(block, 1, 0);
ir3_reg_create(instr, 0, 0); /* dst */
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
if (src->regs[0]->flags & IR3_REG_ARRAY) {
struct ir3_register *src_reg =
ir3_reg_create(instr, 0, IR3_REG_ARRAY);
src_reg->array = src->regs[0]->array;
src_reg->instr = src;
} else {
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
}
debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV));
instr->cat1.src_type = type;
instr->cat1.dst_type = type;
return instr;
@ -894,6 +928,7 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
instr->cat1.src_type = src_type;
instr->cat1.dst_type = dst_type;
debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY));
return instr;
}
@ -1083,7 +1118,7 @@ typedef uint8_t regmask_t[2 * MAX_REG / 8];
static inline unsigned regmask_idx(struct ir3_register *reg)
{
unsigned num = reg->num;
unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num;
debug_assert(num < MAX_REG);
if (reg->flags & IR3_REG_HALF)
num += MAX_REG;

View File

@ -46,7 +46,6 @@
struct ir3_compile {
struct ir3_compiler *compiler;
const struct tgsi_token *tokens;
struct nir_shader *s;
struct ir3 *ir;
@ -75,8 +74,6 @@ struct ir3_compile {
/* mapping from nir_register to defining instruction: */
struct hash_table *def_ht;
/* mapping from nir_variable to ir3_array: */
struct hash_table *var_ht;
unsigned num_arrays;
/* a common pattern for indirect addressing is to request the
@ -143,8 +140,6 @@ compile_init(struct ir3_compiler *compiler,
ctx->so = so;
ctx->def_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
ctx->var_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
ctx->block_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
@ -221,206 +216,26 @@ compile_free(struct ir3_compile *ctx)
ralloc_free(ctx);
}
/* global per-array information: */
struct ir3_array {
unsigned length, aid;
};
/* per-block array state: */
struct ir3_array_value {
/* TODO drop length/aid, and just have ptr back to ir3_array */
unsigned length, aid;
/* initial array element values are phi's, other than for the
* entry block. The phi src's get added later in a resolve step
* after we have visited all the blocks, to account for back
* edges in the cfg.
*/
struct ir3_instruction **phis;
/* current array element values (as block is processed). When
* the array phi's are resolved, it will contain the array state
* at exit of block, so successor blocks can use it to add their
* phi srcs.
*/
struct ir3_instruction *arr[];
};
/* track array assignments per basic block. When an array is read
* outside of the same basic block, we can use NIR's dominance-frontier
* information to figure out where phi nodes are needed.
*/
struct ir3_nir_block_data {
unsigned foo;
/* indexed by array-id (aid): */
struct ir3_array_value *arrs[];
};
static struct ir3_nir_block_data *
get_block_data(struct ir3_compile *ctx, struct ir3_block *block)
{
if (!block->data) {
struct ir3_nir_block_data *bd = ralloc_size(ctx, sizeof(*bd) +
((ctx->num_arrays + 1) * sizeof(bd->arrs[0])));
block->data = bd;
}
return block->data;
}
static void
declare_var(struct ir3_compile *ctx, nir_variable *var)
{
unsigned length = glsl_get_length(var->type) * 4; /* always vec4, at least with ttn */
struct ir3_array *arr = ralloc(ctx, struct ir3_array);
arr->id = ++ctx->num_arrays;
arr->length = length;
arr->aid = ++ctx->num_arrays;
_mesa_hash_table_insert(ctx->var_ht, var, arr);
arr->var = var;
list_addtail(&arr->node, &ctx->ir->array_list);
}
static nir_block *
nir_block_pred(nir_block *block)
{
assert(block->predecessors->entries < 2);
if (block->predecessors->entries == 0)
return NULL;
return (nir_block *)_mesa_set_next_entry(block->predecessors, NULL)->key;
}
static struct ir3_array_value *
static struct ir3_array *
get_var(struct ir3_compile *ctx, nir_variable *var)
{
struct hash_entry *entry = _mesa_hash_table_search(ctx->var_ht, var);
struct ir3_block *block = ctx->block;
struct ir3_nir_block_data *bd = get_block_data(ctx, block);
struct ir3_array *arr = entry->data;
if (!bd->arrs[arr->aid]) {
struct ir3_array_value *av = ralloc_size(bd, sizeof(*av) +
(arr->length * sizeof(av->arr[0])));
struct ir3_array_value *defn = NULL;
nir_block *pred_block;
av->length = arr->length;
av->aid = arr->aid;
/* For loops, we have to consider that we have not visited some
* of the blocks who should feed into the phi (ie. back-edges in
* the cfg).. for example:
*
* loop {
* block { load_var; ... }
* if then block {} else block {}
* block { store_var; ... }
* if then block {} else block {}
* block {...}
* }
*
* We can skip the phi if we can chase the block predecessors
* until finding the block previously defining the array without
* crossing a block that has more than one predecessor.
*
* Otherwise create phi's and resolve them as a post-pass after
* all the blocks have been visited (to handle back-edges).
*/
for (pred_block = block->nblock;
pred_block && (pred_block->predecessors->entries < 2) && !defn;
pred_block = nir_block_pred(pred_block)) {
struct ir3_block *pblock = get_block(ctx, pred_block);
struct ir3_nir_block_data *pbd = pblock->data;
if (!pbd)
continue;
defn = pbd->arrs[arr->aid];
}
if (defn) {
/* only one possible definer: */
for (unsigned i = 0; i < arr->length; i++)
av->arr[i] = defn->arr[i];
} else if (pred_block) {
/* not the first block, and multiple potential definers: */
av->phis = ralloc_size(av, arr->length * sizeof(av->phis[0]));
for (unsigned i = 0; i < arr->length; i++) {
struct ir3_instruction *phi;
phi = ir3_instr_create2(block, -1, OPC_META_PHI,
1 + ctx->impl->num_blocks);
ir3_reg_create(phi, 0, 0); /* dst */
/* phi's should go at head of block: */
list_delinit(&phi->node);
list_add(&phi->node, &block->instr_list);
av->phis[i] = av->arr[i] = phi;
}
} else {
/* Some shaders end up reading array elements without
* first writing.. so initialize things to prevent null
* instr ptrs later:
*/
for (unsigned i = 0; i < arr->length; i++)
av->arr[i] = create_immed(block, 0);
}
bd->arrs[arr->aid] = av;
}
return bd->arrs[arr->aid];
}
static void
add_array_phi_srcs(struct ir3_compile *ctx, nir_block *nblock,
struct ir3_array_value *av, BITSET_WORD *visited)
{
struct ir3_block *block;
struct ir3_nir_block_data *bd;
if (BITSET_TEST(visited, nblock->index))
return;
BITSET_SET(visited, nblock->index);
block = get_block(ctx, nblock);
bd = block->data;
if (bd && bd->arrs[av->aid]) {
struct ir3_array_value *dav = bd->arrs[av->aid];
for (unsigned i = 0; i < av->length; i++) {
ir3_reg_create(av->phis[i], 0, IR3_REG_SSA)->instr =
dav->arr[i];
}
} else {
/* didn't find defn, recurse predecessors: */
struct set_entry *entry;
set_foreach(nblock->predecessors, entry) {
add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
}
}
}
static void
resolve_array_phis(struct ir3_compile *ctx, struct ir3_block *block)
{
struct ir3_nir_block_data *bd = block->data;
unsigned bitset_words = BITSET_WORDS(ctx->impl->num_blocks);
if (!bd)
return;
/* TODO use nir dom_frontier to help us with this? */
for (unsigned i = 1; i <= ctx->num_arrays; i++) {
struct ir3_array_value *av = bd->arrs[i];
BITSET_WORD visited[bitset_words];
struct set_entry *entry;
if (!(av && av->phis))
continue;
memset(visited, 0, sizeof(visited));
set_foreach(block->nblock->predecessors, entry) {
add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
}
list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
if (arr->var == var)
return arr;
}
compile_error(ctx, "bogus var: %s\n", var->name);
return NULL;
}
/* allocate a n element value array (to be populated by caller) and
@ -438,6 +253,7 @@ __get_dst(struct ir3_compile *ctx, void *key, unsigned n)
static struct ir3_instruction **
get_dst(struct ir3_compile *ctx, nir_dest *dst, unsigned n)
{
compile_assert(ctx, dst->is_ssa);
if (dst->is_ssa) {
return __get_dst(ctx, &dst->ssa, n);
} else {
@ -455,6 +271,7 @@ static struct ir3_instruction **
get_src(struct ir3_compile *ctx, nir_src *src)
{
struct hash_entry *entry;
compile_assert(ctx, src->is_ssa);
if (src->is_ssa) {
entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
} else {
@ -560,7 +377,7 @@ create_uniform(struct ir3_compile *ctx, unsigned n)
}
static struct ir3_instruction *
create_uniform_indirect(struct ir3_compile *ctx, unsigned n,
create_uniform_indirect(struct ir3_compile *ctx, int n,
struct ir3_instruction *address)
{
struct ir3_instruction *mov;
@ -569,7 +386,7 @@ create_uniform_indirect(struct ir3_compile *ctx, unsigned n,
mov->cat1.src_type = TYPE_U32;
mov->cat1.dst_type = TYPE_U32;
ir3_reg_create(mov, 0, 0);
ir3_reg_create(mov, n, IR3_REG_CONST | IR3_REG_RELATIV);
ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
ir3_instr_set_address(mov, address);
@ -594,7 +411,7 @@ create_collect(struct ir3_block *block, struct ir3_instruction **arr,
}
static struct ir3_instruction *
create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, int n,
struct ir3_instruction *address, struct ir3_instruction *collect)
{
struct ir3_block *block = ctx->block;
@ -608,17 +425,45 @@ create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
src = ir3_reg_create(mov, 0, IR3_REG_SSA | IR3_REG_RELATIV);
src->instr = collect;
src->size = arrsz;
src->offset = n;
src->array.offset = n;
ir3_instr_set_address(mov, address);
return mov;
}
/* relative (indirect) if address!=NULL */
static struct ir3_instruction *
create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
struct ir3_instruction *src, struct ir3_instruction *address,
struct ir3_instruction *collect)
create_var_load(struct ir3_compile *ctx, struct ir3_array *arr, int n,
struct ir3_instruction *address)
{
struct ir3_block *block = ctx->block;
struct ir3_instruction *mov;
struct ir3_register *src;
mov = ir3_instr_create(block, 1, 0);
mov->cat1.src_type = TYPE_U32;
mov->cat1.dst_type = TYPE_U32;
ir3_reg_create(mov, 0, 0);
src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
COND(address, IR3_REG_RELATIV));
src->instr = arr->last_write;
src->size = arr->length;
src->array.id = arr->id;
src->array.offset = n;
if (address)
ir3_instr_set_address(mov, address);
arr->last_access = mov;
return mov;
}
/* relative (indirect) if address!=NULL */
static struct ir3_instruction *
create_var_store(struct ir3_compile *ctx, struct ir3_array *arr, int n,
struct ir3_instruction *src, struct ir3_instruction *address)
{
struct ir3_block *block = ctx->block;
struct ir3_instruction *mov;
@ -627,14 +472,18 @@ create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
mov = ir3_instr_create(block, 1, 0);
mov->cat1.src_type = TYPE_U32;
mov->cat1.dst_type = TYPE_U32;
dst = ir3_reg_create(mov, 0, IR3_REG_RELATIV);
dst->size = arrsz;
dst->offset = n;
dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
COND(address, IR3_REG_RELATIV));
dst->instr = arr->last_access;
dst->size = arr->length;
dst->array.id = arr->id;
dst->array.offset = n;
ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
mov->fanin = collect;
ir3_instr_set_address(mov, address);
arr->last_write = arr->last_access = mov;
return mov;
}
@ -1151,7 +1000,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
nir_const_value *const_offset;
/* UBO addresses are the first driver params: */
unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
unsigned off = intr->const_index[0];
int off = intr->const_index[0];
/* First src is ubo index, which could either be an immed or not: */
src0 = get_src(ctx, &intr->src[0])[0];
@ -1199,7 +1048,7 @@ emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
{
nir_deref_var *dvar = intr->variables[0];
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
struct ir3_array_value *arr = get_var(ctx, dvar->var);
struct ir3_array *arr = get_var(ctx, dvar->var);
compile_assert(ctx, dvar->deref.child &&
(dvar->deref.child->deref_type == nir_deref_type_array));
@ -1210,19 +1059,17 @@ emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
for (int i = 0; i < intr->num_components; i++) {
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
dst[i] = arr->arr[n];
dst[i] = create_var_load(ctx, arr, n, NULL);
}
break;
case nir_deref_array_type_indirect: {
/* for indirect, we need to collect all the array elements: */
struct ir3_instruction *collect =
create_collect(ctx->block, arr->arr, arr->length);
struct ir3_instruction *addr =
get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
dst[i] = create_indirect_load(ctx, arr->length, n, addr, collect);
dst[i] = create_var_load(ctx, arr, n, addr);
}
break;
}
@ -1239,8 +1086,9 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
{
nir_deref_var *dvar = intr->variables[0];
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
struct ir3_array_value *arr = get_var(ctx, dvar->var);
struct ir3_instruction **src;
struct ir3_array *arr = get_var(ctx, dvar->var);
struct ir3_instruction *addr, **src;
unsigned wrmask = intr->const_index[0];
compile_assert(ctx, dvar->deref.child &&
(dvar->deref.child->deref_type == nir_deref_type_array));
@ -1249,66 +1097,24 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
switch (darr->deref_array_type) {
case nir_deref_array_type_direct:
/* direct access does not require anything special: */
for (int i = 0; i < intr->num_components; i++) {
/* ttn doesn't generate partial writemasks */
assert(intr->const_index[0] ==
(1 << intr->num_components) - 1);
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
arr->arr[n] = src[i];
}
addr = NULL;
break;
case nir_deref_array_type_indirect: {
/* for indirect, create indirect-store and fan that out: */
struct ir3_instruction *collect =
create_collect(ctx->block, arr->arr, arr->length);
struct ir3_instruction *addr =
get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
for (int i = 0; i < intr->num_components; i++) {
/* ttn doesn't generate partial writemasks */
assert(intr->const_index[0] ==
(1 << intr->num_components) - 1);
struct ir3_instruction *store;
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
store = create_indirect_store(ctx, arr->length,
n, src[i], addr, collect);
store->fanin->fi.aid = arr->aid;
/* TODO: probably split this out to be used for
* store_output_indirect? or move this into
* create_indirect_store()?
*/
for (int j = i; j < arr->length; j += intr->num_components) {
struct ir3_instruction *split;
split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
split->fo.off = j;
ir3_reg_create(split, 0, 0);
ir3_reg_create(split, 0, IR3_REG_SSA)->instr = store;
arr->arr[j] = split;
}
}
/* fixup fanout/split neighbors: */
for (int i = 0; i < arr->length; i++) {
arr->arr[i]->cp.right = (i < (arr->length - 1)) ?
arr->arr[i+1] : NULL;
arr->arr[i]->cp.left = (i > 0) ?
arr->arr[i-1] : NULL;
}
case nir_deref_array_type_indirect:
addr = get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
break;
}
default:
compile_error(ctx, "Unhandled store deref type: %u\n",
darr->deref_array_type);
break;
}
for (int i = 0; i < intr->num_components; i++) {
if (!(wrmask & (1 << i)))
continue;
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
create_var_store(ctx, arr, n, src[i], addr);
}
}
static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
@ -1335,7 +1141,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
struct ir3_instruction **dst, **src;
struct ir3_block *b = ctx->block;
unsigned idx = intr->const_index[0];
int idx = intr->const_index[0];
nir_const_value *const_offset;
if (info->has_dest) {
@ -1356,7 +1162,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
} else {
src = get_src(ctx, &intr->src[0]);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i;
int n = idx * 4 + i;
dst[i] = create_uniform_indirect(ctx, n,
get_addr(ctx, src[0]));
}
@ -1836,8 +1642,6 @@ resolve_phis(struct ir3_compile *ctx, struct ir3_block *block)
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
}
}
resolve_array_phis(ctx, block);
}
static void

View File

@ -41,16 +41,22 @@ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
struct ir3_register *dst = instr->regs[0];
struct ir3_register *src = instr->regs[1];
struct ir3_instruction *src_instr = ssa(src);
/* only if mov src is SSA (not const/immed): */
if (!src_instr)
return false;
/* no indirect: */
if (dst->flags & IR3_REG_RELATIV)
return false;
if (src->flags & IR3_REG_RELATIV)
return false;
if (!allow_flags)
if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
return false;
if (!src_instr)
return false;
/* TODO: remove this hack: */
if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO))
return false;
@ -82,10 +88,17 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n,
unsigned valid_flags;
flags = cp_flags(flags);
/* If destination is indirect, then source cannot be.. at least
* I don't think so..
*/
if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
(flags & IR3_REG_RELATIV))
return false;
/* clear flags that are 'ok' */
switch (instr->category) {
case 1:
valid_flags = IR3_REG_IMMED | IR3_REG_RELATIV;
valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
if (flags & ~valid_flags)
return false;
break;
@ -183,9 +196,14 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags)
*dstflags ^= IR3_REG_SNEG;
if (srcflags & IR3_REG_BNOT)
*dstflags ^= IR3_REG_BNOT;
}
static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags);
*dstflags &= ~IR3_REG_SSA;
*dstflags |= srcflags & IR3_REG_SSA;
*dstflags |= srcflags & IR3_REG_CONST;
*dstflags |= srcflags & IR3_REG_IMMED;
*dstflags |= srcflags & IR3_REG_RELATIV;
*dstflags |= srcflags & IR3_REG_ARRAY;
}
/* the "plain" MAD's (ie. the ones that don't shift first src prior to
* multiply) can swap their first two srcs if src[0] is !CONST and
@ -206,52 +224,35 @@ static bool is_valid_mad(struct ir3_instruction *instr)
static void
reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
{
unsigned src_flags = 0, new_flags;
struct ir3_instruction *src_instr;
struct ir3_instruction *src = ssa(reg);
if (is_meta(instr)) {
/* meta instructions cannot fold up register
* flags.. they are usually src for texture
* fetch, etc, where we cannot specify abs/neg
*/
reg->instr = instr_cp(reg->instr, NULL);
return;
}
if (is_eligible_mov(src, true)) {
/* simple case, no immed/const/relativ, only mov's w/ ssa src: */
struct ir3_register *src_reg = src->regs[1];
unsigned new_flags = reg->flags;
src_instr = instr_cp(reg->instr, &src_flags);
combine_flags(&new_flags, src_reg->flags);
new_flags = reg->flags;
combine_flags(&new_flags, src_flags);
reg->flags = new_flags;
reg->instr = src_instr;
if (!valid_flags(instr, n, reg->flags)) {
/* insert an absneg.f */
if (reg->flags & (IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)) {
debug_assert(!(reg->flags & (IR3_REG_FNEG | IR3_REG_FABS)));
reg->instr = ir3_ABSNEG_S(instr->block,
reg->instr, cp_flags(src_flags));
} else {
debug_assert(!(reg->flags & (IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)));
reg->instr = ir3_ABSNEG_F(instr->block,
reg->instr, cp_flags(src_flags));
if (valid_flags(instr, n, new_flags)) {
if (new_flags & IR3_REG_ARRAY) {
debug_assert(!(reg->flags & IR3_REG_ARRAY));
reg->array = src_reg->array;
}
reg->flags = new_flags;
reg->instr = ssa(src_reg);
}
reg->flags &= ~cp_flags(src_flags);
debug_assert(valid_flags(instr, n, reg->flags));
/* send it through instr_cp() again since
* the absneg src might be a mov from const
* that could be cleaned up:
*/
reg->instr = instr_cp(reg->instr, NULL);
return;
}
if (is_same_type_mov(reg->instr)) {
struct ir3_register *src_reg = reg->instr->regs[1];
unsigned new_flags = src_reg->flags;
src = ssa(reg); /* could be null for IR3_REG_ARRAY case */
if (!src)
return;
} else if (is_same_type_mov(src) &&
/* cannot collapse const/immed/etc into meta instrs: */
!is_meta(instr)) {
/* immed/const/etc cases, which require some special handling: */
struct ir3_register *src_reg = src->regs[1];
unsigned new_flags = reg->flags;
combine_flags(&new_flags, reg->flags);
combine_flags(&new_flags, src_reg->flags);
if (!valid_flags(instr, n, new_flags)) {
/* special case for "normal" mad instructions, we can
@ -287,6 +288,16 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
conflicts(instr->address, reg->instr->address))
return;
/* This seems to be a hw bug, or something where the timings
* just somehow don't work out. This restriction may only
* apply if the first src is also CONST.
*/
if ((instr->category == 3) && (n == 2) &&
(src_reg->flags & IR3_REG_RELATIV) &&
(src_reg->array.offset == 0))
return;
src_reg = ir3_reg_clone(instr->block->shader, src_reg);
src_reg->flags = new_flags;
instr->regs[n+1] = src_reg;
@ -298,6 +309,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
if ((src_reg->flags & IR3_REG_RELATIV) &&
!conflicts(instr->address, reg->instr->address)) {
src_reg = ir3_reg_clone(instr->block->shader, src_reg);
src_reg->flags = new_flags;
instr->regs[n+1] = src_reg;
ir3_instr_set_address(instr, reg->instr->address);
@ -330,8 +342,10 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
if (new_flags & IR3_REG_BNOT)
iim_val = ~iim_val;
if (!(iim_val & ~0x3ff)) {
/* other than category 1 (mov) we can only encode up to 10 bits: */
if ((instr->category == 1) || !(iim_val & ~0x3ff)) {
new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
src_reg = ir3_reg_clone(instr->block->shader, src_reg);
src_reg->flags = new_flags;
src_reg->iim_val = iim_val;
instr->regs[n+1] = src_reg;
@ -342,56 +356,68 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
}
}
/**
* Given an SSA src (instruction), return the one with extraneous
* mov's removed, ie, for (to copy NIR syntax):
*
* vec1 ssa1 = fadd <something>, <somethingelse>
* vec1 ssa2 = fabs ssa1
* vec1 ssa3 = fneg ssa1
*
* then calling instr_cp(ssa3, &flags) would return ssa1 with
* (IR3_REG_ABS | IR3_REG_NEGATE) in flags. If flags is NULL,
* then disallow eliminating copies which would require flag
* propagation (for example, we cannot propagate abs/neg into
* an output).
/* Handle special case of eliminating output mov, and similar cases where
* there isn't a normal "consuming" instruction. In this case we cannot
* collapse flags (ie. output mov from const, or w/ abs/neg flags, cannot
* be eliminated)
*/
static struct ir3_instruction *
instr_cp(struct ir3_instruction *instr, unsigned *flags)
eliminate_output_mov(struct ir3_instruction *instr)
{
if (is_eligible_mov(instr, false)) {
struct ir3_register *reg = instr->regs[1];
if (!(reg->flags & IR3_REG_ARRAY)) {
struct ir3_instruction *src_instr = ssa(reg);
debug_assert(src_instr);
return src_instr;
}
}
return instr;
}
/**
* Find instruction src's which are mov's that can be collapsed, replacing
* the mov dst with the mov src
*/
static void
instr_cp(struct ir3_instruction *instr)
{
struct ir3_register *reg;
if (is_eligible_mov(instr, !!flags)) {
struct ir3_register *reg = instr->regs[1];
struct ir3_instruction *src_instr = ssa(reg);
if (flags)
combine_flags(flags, reg->flags);
return instr_cp(src_instr, flags);
}
if (instr->regs_count == 0)
return;
/* Check termination condition before walking children (rather
* than before checking eligible-mov). A mov instruction may
* appear as ssa-src for multiple other instructions, and we
* want to consider it for removal for each, rather than just
* the first one. (But regardless of how many places it shows
* up as a src, we only need to recursively walk the children
* once.)
*/
if (ir3_instr_check_mark(instr))
return instr;
return;
/* walk down the graph from each src: */
foreach_src_n(reg, n, instr) {
if (!(reg->flags & IR3_REG_SSA))
struct ir3_instruction *src = ssa(reg);
if (!src)
continue;
instr_cp(src);
/* TODO non-indirect access we could figure out which register
* we actually want and allow cp..
*/
if (reg->flags & IR3_REG_ARRAY)
continue;
reg_cp(instr, reg, n);
}
if (instr->address)
ir3_instr_set_address(instr, instr_cp(instr->address, NULL));
if (instr->regs[0]->flags & IR3_REG_ARRAY) {
struct ir3_instruction *src = ssa(instr->regs[0]);
if (src)
instr_cp(src);
}
return instr;
if (instr->address) {
instr_cp(instr->address);
ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
}
}
void
@ -401,19 +427,20 @@ ir3_cp(struct ir3 *ir)
for (unsigned i = 0; i < ir->noutputs; i++) {
if (ir->outputs[i]) {
struct ir3_instruction *out =
instr_cp(ir->outputs[i], NULL);
ir->outputs[i] = out;
instr_cp(ir->outputs[i]);
ir->outputs[i] = eliminate_output_mov(ir->outputs[i]);
}
}
for (unsigned i = 0; i < ir->keeps_count; i++) {
ir->keeps[i] = instr_cp(ir->keeps[i], NULL);
instr_cp(ir->keeps[i]);
ir->keeps[i] = eliminate_output_mov(ir->keeps[i]);
}
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
if (block->condition)
block->condition = instr_cp(block->condition, NULL);
if (block->condition) {
instr_cp(block->condition);
block->condition = eliminate_output_mov(block->condition);
}
}
}

View File

@ -76,7 +76,7 @@ int ir3_delayslots(struct ir3_instruction *assigner,
return 6;
} else if ((consumer->category == 3) &&
(is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
(n == 2)) {
(n == 3)) {
/* special case, 3rd src to cat3 not required on first cycle */
return 1;
} else {
@ -118,6 +118,10 @@ ir3_instr_depth(struct ir3_instruction *instr)
/* visit child to compute it's depth: */
ir3_instr_depth(src);
/* for array writes, no need to delay on previous write: */
if (i == 0)
continue;
sd = ir3_delayslots(src, instr, i) + src->depth;
instr->depth = MAX2(instr->depth, sd);

View File

@ -94,7 +94,7 @@ static void print_instr_name(struct ir3_instruction *instr)
}
}
static void print_reg_name(struct ir3_register *reg, bool followssa)
static void print_reg_name(struct ir3_register *reg)
{
if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
(reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
@ -106,20 +106,29 @@ static void print_reg_name(struct ir3_register *reg, bool followssa)
if (reg->flags & IR3_REG_IMMED) {
printf("imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
} else if (reg->flags & IR3_REG_SSA) {
printf("_");
if (followssa) {
printf("[");
} else if (reg->flags & IR3_REG_ARRAY) {
printf("arr[id=%u, offset=%d, size=%u", reg->array.id,
reg->array.offset, reg->size);
/* for ARRAY we could have null src, for example first write
* instruction..
*/
if (reg->instr) {
printf(", _[");
print_instr_name(reg->instr);
printf("]");
}
printf("]");
} else if (reg->flags & IR3_REG_SSA) {
printf("_[");
print_instr_name(reg->instr);
printf("]");
} else if (reg->flags & IR3_REG_RELATIV) {
if (reg->flags & IR3_REG_HALF)
printf("h");
if (reg->flags & IR3_REG_CONST)
printf("c<a0.x + %u>", reg->num);
printf("c<a0.x + %d>", reg->array.offset);
else
printf("\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->num, reg->size);
printf("\x1b[0;31mr<a0.x + %d>\x1b[0m (%u)", reg->array.offset, reg->size);
} else {
if (reg->flags & IR3_REG_HALF)
printf("h");
@ -158,7 +167,7 @@ print_instr(struct ir3_instruction *instr, int lvl)
for (i = 0; i < instr->regs_count; i++) {
struct ir3_register *reg = instr->regs[i];
printf(i ? ", " : " ");
print_reg_name(reg, !!i);
print_reg_name(reg);
}
if (instr->address) {
@ -168,13 +177,6 @@ print_instr(struct ir3_instruction *instr, int lvl)
printf("]");
}
if (instr->fanin) {
printf(", fanin=_");
printf("[");
print_instr_name(instr->fanin);
printf("]");
}
if (instr->cp.left) {
printf(", left=_");
printf("[");
@ -192,8 +194,6 @@ print_instr(struct ir3_instruction *instr, int lvl)
if (is_meta(instr)) {
if (instr->opc == OPC_META_FO) {
printf(", off=%d", instr->fo.off);
} else if ((instr->opc == OPC_META_FI) && instr->fi.aid) {
printf(", aid=%d", instr->fi.aid);
}
}

View File

@ -68,25 +68,24 @@
* LOAD_PAYLOAD instruction which gets turned into multiple MOV's after
* register assignment. But for us that is horrible from a scheduling
* standpoint. Instead what we do is use idea of 'definer' instruction.
* Ie. the first instruction (lowest ip) to write to the array is the
* Ie. the first instruction (lowest ip) to write to the variable is the
* one we consider from use/def perspective when building interference
* graph. (Other instructions which write other array elements just
* define the variable some more.)
* graph. (Other instructions which write other variable components
* just define the variable some more.)
*
* Arrays of arbitrary size are handled via pre-coloring a consecutive
* sequence of registers. Additional scalar (single component) reg
* names are allocated starting at ctx->class_base[total_class_count]
* (see arr->base), which are pre-colored. In the use/def graph direct
* access is treated as a single element use/def, and indirect access
* is treated as use or def of all array elements. (Only the first
* def is tracked, in case of multiple indirect writes, etc.)
*/
static const unsigned class_sizes[] = {
1, 2, 3, 4,
4 + 4, /* txd + 1d/2d */
4 + 6, /* txd + 3d */
/* temporary: until we can assign arrays, create classes so we
* can round up array to fit. NOTE with tgsi arrays should
* really all be multiples of four:
*/
4 * 4,
4 * 8,
4 * 16,
4 * 32,
};
#define class_count ARRAY_SIZE(class_sizes)
@ -265,13 +264,21 @@ struct ir3_ra_ctx {
struct ir3_ra_reg_set *set;
struct ra_graph *g;
unsigned alloc_count;
unsigned class_alloc_count[total_class_count];
unsigned class_base[total_class_count];
/* one per class, plus one slot for arrays: */
unsigned class_alloc_count[total_class_count + 1];
unsigned class_base[total_class_count + 1];
unsigned instr_cnt;
unsigned *def, *use; /* def/use table */
struct ir3_ra_instr_data *instrd;
};
/* does it conflict? */
static inline bool
intersects(unsigned a_start, unsigned a_end, unsigned b_start, unsigned b_end)
{
return !((a_start >= b_end) || (b_start >= a_end));
}
static bool
is_half(struct ir3_instruction *instr)
{
@ -329,9 +336,6 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_instruction *d = NULL;
if (instr->fanin)
return get_definer(ctx, instr->fanin, sz, off);
if (id->defn) {
*sz = id->sz;
*off = id->off;
@ -485,10 +489,13 @@ ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
/* couple special cases: */
if (writes_addr(instr) || writes_pred(instr)) {
id->cls = -1;
continue;
} else if (instr->regs[0]->flags & IR3_REG_ARRAY) {
id->cls = total_class_count;
id->defn = instr;
} else {
id->defn = get_definer(ctx, instr, &id->sz, &id->off);
id->cls = size_to_class(id->sz, is_half(id->defn));
}
id->defn = get_definer(ctx, instr, &id->sz, &id->off);
id->cls = size_to_class(id->sz, is_half(id->defn));
}
}
@ -518,8 +525,6 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
/* arrays which don't fit in one of the pre-defined class
* sizes are pre-colored:
*
* TODO but we still need to allocate names for them, don't we??
*/
if (id->cls >= 0) {
instr->name = ctx->class_alloc_count[id->cls]++;
@ -531,7 +536,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
static void
ra_init(struct ir3_ra_ctx *ctx)
{
unsigned n;
unsigned n, base;
ir3_clear_mark(ctx->ir);
n = ir3_count_instructions(ctx->ir);
@ -550,11 +555,20 @@ ra_init(struct ir3_ra_ctx *ctx)
* actual ra name is class_base[cls] + instr->name;
*/
ctx->class_base[0] = 0;
for (unsigned i = 1; i < total_class_count; i++) {
for (unsigned i = 1; i <= total_class_count; i++) {
ctx->class_base[i] = ctx->class_base[i-1] +
ctx->class_alloc_count[i-1];
}
/* and vreg names for array elements: */
base = ctx->class_base[total_class_count];
list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
arr->base = base;
ctx->class_alloc_count[total_class_count] += arr->length;
base += arr->length;
}
ctx->alloc_count += ctx->class_alloc_count[total_class_count];
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
ralloc_steal(ctx->g, ctx->instrd);
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
@ -562,15 +576,23 @@ ra_init(struct ir3_ra_ctx *ctx)
}
static unsigned
ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
__ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
{
unsigned name;
debug_assert(cls >= 0);
debug_assert(cls < total_class_count); /* we shouldn't get arrays here.. */
name = ctx->class_base[cls] + defn->name;
debug_assert(name < ctx->alloc_count);
return name;
}
static int
ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id)
{
/* TODO handle name mapping for arrays */
return __ra_name(ctx, id->cls, id->defn);
}
static void
ra_destroy(struct ir3_ra_ctx *ctx)
{
@ -583,6 +605,22 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
struct ir3_ra_block_data *bd;
unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
void def(unsigned name, struct ir3_instruction *instr)
{
/* defined on first write: */
if (!ctx->def[name])
ctx->def[name] = instr->ip;
ctx->use[name] = instr->ip;
BITSET_SET(bd->def, name);
}
void use(unsigned name, struct ir3_instruction *instr)
{
ctx->use[name] = MAX2(ctx->use[name], instr->ip);
if (!BITSET_TEST(bd->def, name))
BITSET_SET(bd->use, name);
}
bd = rzalloc(ctx->g, struct ir3_ra_block_data);
bd->def = rzalloc_array(bd, BITSET_WORD, bitset_words);
@ -594,6 +632,7 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
struct ir3_instruction *src;
struct ir3_register *reg;
if (instr->regs_count == 0)
continue;
@ -625,61 +664,101 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if (writes_gpr(instr)) {
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_register *dst = instr->regs[0];
if (id->defn == instr) {
/* arrays which don't fit in one of the pre-defined class
* sizes are pre-colored:
if (dst->flags & IR3_REG_ARRAY) {
struct ir3_array *arr =
ir3_lookup_array(ctx->ir, dst->array.id);
unsigned i;
debug_assert(!(dst->flags & IR3_REG_PHI_SRC));
arr->start_ip = MIN2(arr->start_ip, instr->ip);
arr->end_ip = MAX2(arr->end_ip, instr->ip);
/* set the node class now.. in case we don't encounter
* this array dst again. From register_alloc algo's
* perspective, these are all single/scalar regs:
*/
if (id->cls >= 0) {
unsigned name = ra_name(ctx, id->cls, id->defn);
for (i = 0; i < arr->length; i++) {
unsigned name = arr->base + i;
ra_set_node_class(ctx->g, name, ctx->set->classes[0]);
}
ctx->def[name] = id->defn->ip;
ctx->use[name] = id->defn->ip;
/* since we are in SSA at this point: */
debug_assert(!BITSET_TEST(bd->use, name));
BITSET_SET(bd->def, name);
if (is_half(id->defn)) {
ra_set_node_class(ctx->g, name,
ctx->set->half_classes[id->cls - class_count]);
} else {
ra_set_node_class(ctx->g, name,
ctx->set->classes[id->cls]);
/* indirect write is treated like a write to all array
* elements, since we don't know which one is actually
* written:
*/
if (dst->flags & IR3_REG_RELATIV) {
for (i = 0; i < arr->length; i++) {
unsigned name = arr->base + i;
def(name, instr);
}
} else {
unsigned name = arr->base + dst->array.offset;
def(name, instr);
}
/* extend the live range for phi srcs, which may come
* from the bottom of the loop
*/
if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) {
struct ir3_instruction *phi = id->defn->regs[0]->instr;
foreach_ssa_src(src, phi) {
/* if src is after phi, then we need to extend
* the liverange to the end of src's block:
*/
if (src->ip > phi->ip) {
struct ir3_instruction *last =
} else if (id->defn == instr) {
unsigned name = ra_name(ctx, id);
/* since we are in SSA at this point: */
debug_assert(!BITSET_TEST(bd->use, name));
def(name, id->defn);
if (is_half(id->defn)) {
ra_set_node_class(ctx->g, name,
ctx->set->half_classes[id->cls - class_count]);
} else {
ra_set_node_class(ctx->g, name,
ctx->set->classes[id->cls]);
}
/* extend the live range for phi srcs, which may come
* from the bottom of the loop
*/
if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) {
struct ir3_instruction *phi = id->defn->regs[0]->instr;
foreach_ssa_src(src, phi) {
/* if src is after phi, then we need to extend
* the liverange to the end of src's block:
*/
if (src->ip > phi->ip) {
struct ir3_instruction *last =
list_last_entry(&src->block->instr_list,
struct ir3_instruction, node);
ctx->use[name] = MAX2(ctx->use[name], last->ip);
}
struct ir3_instruction, node);
ctx->use[name] = MAX2(ctx->use[name], last->ip);
}
}
}
}
}
foreach_ssa_src(src, instr) {
if (writes_gpr(src)) {
struct ir3_ra_instr_data *id = &ctx->instrd[src->ip];
if (id->cls >= 0) {
unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->use[name] = MAX2(ctx->use[name], instr->ip);
if (!BITSET_TEST(bd->def, name))
BITSET_SET(bd->use, name);
foreach_src(reg, instr) {
if (reg->flags & IR3_REG_ARRAY) {
struct ir3_array *arr =
ir3_lookup_array(ctx->ir, reg->array.id);
arr->start_ip = MIN2(arr->start_ip, instr->ip);
arr->end_ip = MAX2(arr->end_ip, instr->ip);
/* indirect read is treated like a read fromall array
* elements, since we don't know which one is actually
* read:
*/
if (reg->flags & IR3_REG_RELATIV) {
unsigned i;
for (i = 0; i < arr->length; i++) {
unsigned name = arr->base + i;
use(name, instr);
}
} else {
unsigned name = arr->base + reg->array.offset;
use(name, instr);
debug_assert(reg->array.offset < arr->length);
}
} else if ((src = ssa(reg)) && writes_gpr(src)) {
unsigned name = ra_name(ctx, &ctx->instrd[src->ip]);
use(name, instr);
}
}
}
@ -735,6 +814,12 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
{
struct ir3 *ir = ctx->ir;
/* initialize array live ranges: */
list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) {
arr->start_ip = ~0;
arr->end_ip = 0;
}
/* compute live ranges (use/def) on a block level, also updating
* block's def/use bitmasks (used below to calculate per-block
* livein/liveout):
@ -767,18 +852,14 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
/* need to fix things up to keep outputs live: */
for (unsigned i = 0; i < ir->noutputs; i++) {
struct ir3_instruction *instr = ir->outputs[i];
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
if (id->cls >= 0) {
unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->use[name] = ctx->instr_cnt;
}
unsigned name = ra_name(ctx, &ctx->instrd[instr->ip]);
ctx->use[name] = ctx->instr_cnt;
}
for (unsigned i = 0; i < ctx->alloc_count; i++) {
for (unsigned j = 0; j < ctx->alloc_count; j++) {
if (!((ctx->def[i] >= ctx->use[j]) ||
(ctx->def[j] >= ctx->use[i]))) {
if (intersects(ctx->def[i], ctx->use[i],
ctx->def[j], ctx->use[j])) {
ra_add_node_interference(ctx->g, i, j);
}
}
@ -836,19 +917,36 @@ static void fixup_half_instr_src(struct ir3_instruction *instr)
}
}
/* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first
* array access(es) which do not have any previous access to depend
* on from scheduling point of view
*/
static void
reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
struct ir3_instruction *instr)
{
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_ra_instr_data *id;
if (id->cls >= 0) {
unsigned name = ra_name(ctx, id->cls, id->defn);
if (reg->flags & IR3_REG_ARRAY) {
struct ir3_array *arr =
ir3_lookup_array(ctx->ir, reg->array.id);
unsigned name = arr->base + reg->array.offset;
unsigned r = ra_get_node_reg(ctx->g, name);
unsigned num = ctx->set->ra_reg_to_gpr[r];
if (reg->flags & IR3_REG_RELATIV) {
reg->array.offset = num;
} else {
reg->num = num;
}
reg->flags &= ~IR3_REG_ARRAY;
} else if ((id = &ctx->instrd[instr->ip]) && id->defn) {
unsigned name = ra_name(ctx, id);
unsigned r = ra_get_node_reg(ctx->g, name);
unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
if (reg->flags & IR3_REG_RELATIV)
num += reg->offset;
debug_assert(!(reg->flags & IR3_REG_RELATIV));
reg->num = num;
reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
@ -875,9 +973,9 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
foreach_src_n(reg, n, instr) {
struct ir3_instruction *src = reg->instr;
if (!src)
/* Note: reg->instr could be null for IR3_REG_ARRAY */
if (!(src || (reg->flags & IR3_REG_ARRAY)))
continue;
reg_assign(ctx, instr->regs[n+1], src);
if (instr->regs[n+1]->flags & IR3_REG_HALF)
fixup_half_instr_src(instr);
@ -888,6 +986,8 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
static int
ra_alloc(struct ir3_ra_ctx *ctx)
{
unsigned n = 0;
/* frag shader inputs get pre-assigned, since we have some
* constraints/unknowns about setup for some of these regs:
*/
@ -897,7 +997,7 @@ ra_alloc(struct ir3_ra_ctx *ctx)
if (ctx->frag_face && (i < ir->ninputs) && ir->inputs[i]) {
struct ir3_instruction *instr = ir->inputs[i];
int cls = size_to_class(1, true);
unsigned name = ra_name(ctx, cls, instr);
unsigned name = __ra_name(ctx, cls, instr);
unsigned reg = ctx->set->gpr_to_ra_reg[cls][0];
/* if we have frag_face, it gets hr0.x */
@ -905,7 +1005,8 @@ ra_alloc(struct ir3_ra_ctx *ctx)
i += 4;
}
for (j = 0; i < ir->ninputs; i++) {
j = 0;
for (; i < ir->ninputs; i++) {
struct ir3_instruction *instr = ir->inputs[i];
if (instr) {
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
@ -913,7 +1014,7 @@ ra_alloc(struct ir3_ra_ctx *ctx)
if (id->defn == instr) {
unsigned name, reg;
name = ra_name(ctx, id->cls, id->defn);
name = ra_name(ctx, id);
reg = ctx->set->gpr_to_ra_reg[id->cls][j];
ra_set_node_reg(ctx->g, name, reg);
@ -921,6 +1022,46 @@ ra_alloc(struct ir3_ra_ctx *ctx)
}
}
}
n = j;
}
/* pre-assign array elements:
*/
list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
unsigned base = n;
if (arr->end_ip == 0)
continue;
/* figure out what else we conflict with which has already
* been assigned:
*/
retry:
list_for_each_entry (struct ir3_array, arr2, &ctx->ir->array_list, node) {
if (arr2 == arr)
break;
if (arr2->end_ip == 0)
continue;
/* if it intersects with liverange AND register range.. */
if (intersects(arr->start_ip, arr->end_ip,
arr2->start_ip, arr2->end_ip) &&
intersects(base, base + arr->length,
arr2->reg, arr2->reg + arr2->length)) {
base = MAX2(base, arr2->reg + arr2->length);
goto retry;
}
}
arr->reg = base;
for (unsigned i = 0; i < arr->length; i++) {
unsigned name, reg;
name = arr->base + i;
reg = ctx->set->gpr_to_ra_reg[0][base++];
ra_set_node_reg(ctx->g, name, reg);
}
}
if (!ra_allocate(ctx->g))

View File

@ -187,6 +187,9 @@ delay_calc(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
foreach_ssa_src_n(src, i, instr) {
unsigned d;
/* for array writes, no need to delay on previous write: */
if (i == 0)
continue;
if (src->block != instr->block)
continue;
d = delay_calc_srcn(ctx, src, instr, i);

View File

@ -261,6 +261,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:

View File

@ -184,7 +184,7 @@ static void calculate_vertex_layout(struct i915_context *i915)
struct i915_tracked_state i915_update_vertex_layout = {
"vertex_layout",
calculate_vertex_layout,
I915_NEW_FS | I915_NEW_VS
I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS
};

View File

@ -485,6 +485,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
return 0;
case PIPE_CAP_VENDOR_ID:

View File

@ -82,8 +82,6 @@ struct llvmpipe_context {
struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
struct pipe_index_buffer index_buffer;
struct pipe_resource *mapped_vs_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS];
struct pipe_resource *mapped_gs_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS];
unsigned num_samplers[PIPE_SHADER_TYPES];
unsigned num_sampler_views[PIPE_SHADER_TYPES];

View File

@ -149,9 +149,6 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
draw_vs_reset_so(lp->vs);
}
}
llvmpipe_cleanup_vertex_sampling(lp);
llvmpipe_cleanup_geometry_sampling(lp);
/*
* TODO: Flush only when a user vertex/index buffer is present

View File

@ -310,6 +310,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
return 0;
}
/* should only get here on unhandled cases */

View File

@ -476,27 +476,30 @@ lp_setup_try_clear_zs(struct lp_setup_context *setup,
uint64_t zsvalue = 0;
uint32_t zmask32;
uint8_t smask8;
enum pipe_format format = setup->fb.zsbuf->format;
LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
zmask32 = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
smask8 = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format,
depth,
stencil);
zsvalue = util_pack64_z_stencil(format, depth, stencil);
/*
* XXX: should make a full mask here for things like D24X8,
* otherwise we'll do a read-modify-write clear later which
* should be unnecessary.
*/
zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
zmask32,
smask8);
zsmask = util_pack64_mask_z_stencil(format, zmask32, smask8);
zsvalue &= zsmask;
if (format == PIPE_FORMAT_Z24X8_UNORM ||
format == PIPE_FORMAT_X8Z24_UNORM) {
/*
* Make full mask if there's "X" bits so we can do full
* clear (without rmw).
*/
uint32_t zsmask_full = 0;
zsmask_full = util_pack_mask_z_stencil(format, ~0, ~0);
zsmask |= ~zsmask_full;
}
if (setup->state == SETUP_ACTIVE) {
struct lp_scene *scene = setup->scene;
@ -796,13 +799,15 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
unsigned num,
struct pipe_sampler_view **views)
{
unsigned i;
unsigned i, max_tex_num;
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
max_tex_num = MAX2(num, setup->fs.current_tex_num);
for (i = 0; i < max_tex_num; i++) {
struct pipe_sampler_view *view = i < num ? views[i] : NULL;
if (view) {
@ -922,7 +927,11 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
assert(jit_tex->base);
}
}
else {
pipe_resource_reference(&setup->fs.current_tex[i], NULL);
}
}
setup->fs.current_tex_num = num;
setup->dirty |= LP_SETUP_NEW_FS;
}

View File

@ -133,6 +133,7 @@ struct lp_setup_context
const struct lp_rast_state *stored; /**< what's in the scene */
struct lp_rast_state current; /**< currently set state */
struct pipe_resource *current_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS];
unsigned current_tex_num;
} fs;
/** fragment shader constants */

View File

@ -556,7 +556,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* Calculate trivial reject values:
*/
eo = vec_sub_epi32(vec_andc(dcdy_neg_mask, dcdy),
eo = vec_sub_epi32(vec_andnot_si128(dcdy_neg_mask, dcdy),
vec_and(dcdx_neg_mask, dcdx));
/* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */

View File

@ -130,16 +130,10 @@ void
llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *ctx,
unsigned num,
struct pipe_sampler_view **views);
void
llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx);
void
llvmpipe_prepare_geometry_sampling(struct llvmpipe_context *ctx,
unsigned num,
struct pipe_sampler_view **views);
void
llvmpipe_cleanup_geometry_sampling(struct llvmpipe_context *ctx);
#endif

View File

@ -190,8 +190,10 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
llvmpipe->tex_timestamp = lp_screen->timestamp;
llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
}
if (llvmpipe->dirty & (LP_NEW_FS |
/* This needs LP_NEW_RASTERIZER because of draw_prepare_shader_outputs(). */
if (llvmpipe->dirty & (LP_NEW_RASTERIZER |
LP_NEW_FS |
LP_NEW_VS))
compute_vertex_info(llvmpipe);

View File

@ -98,8 +98,9 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe,
llvmpipe->samplers[shader],
llvmpipe->num_samplers[shader]);
}
llvmpipe->dirty |= LP_NEW_SAMPLER;
else {
llvmpipe->dirty |= LP_NEW_SAMPLER;
}
}
@ -128,6 +129,15 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
*/
pipe_sampler_view_release(pipe,
&llvmpipe->sampler_views[shader][start + i]);
/*
* Warn if someone tries to set a view created in a different context
* (which is why we need the hack above in the first place).
* An assert would be better but st/mesa relies on it...
*/
if (views[i] && views[i]->context != pipe) {
debug_printf("Illegal setting of sampler_view %d created in another "
"context\n", i);
}
pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i],
views[i]);
}
@ -146,8 +156,9 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
llvmpipe->sampler_views[shader],
llvmpipe->num_sampler_views[shader]);
}
llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
else {
llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
}
}
@ -228,8 +239,7 @@ prepare_shader_sampling(
struct llvmpipe_context *lp,
unsigned num,
struct pipe_sampler_view **views,
unsigned shader_type,
struct pipe_resource *mapped_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS])
unsigned shader_type)
{
unsigned i;
@ -242,7 +252,7 @@ prepare_shader_sampling(
if (!num)
return;
for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
for (i = 0; i < num; i++) {
struct pipe_sampler_view *view = i < num ? views[i] : NULL;
if (view) {
@ -253,11 +263,6 @@ prepare_shader_sampling(
unsigned first_level = 0;
unsigned last_level = 0;
/* We're referencing the texture's internal data, so save a
* reference to it.
*/
pipe_resource_reference(&mapped_tex[i], tex);
if (!lp_tex->dt) {
/* regular texture - setup array of mipmap level offsets */
struct pipe_resource *res = view->texture;
@ -335,47 +340,28 @@ prepare_shader_sampling(
/**
* Called during state validation when LP_NEW_SAMPLER_VIEW is set.
* Called whenever we're about to draw (no dirty flag, FIXME?).
*/
void
llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp,
unsigned num,
struct pipe_sampler_view **views)
{
prepare_shader_sampling(lp, num, views, PIPE_SHADER_VERTEX,
lp->mapped_vs_tex);
}
void
llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx)
{
unsigned i;
for (i = 0; i < Elements(ctx->mapped_vs_tex); i++) {
pipe_resource_reference(&ctx->mapped_vs_tex[i], NULL);
}
prepare_shader_sampling(lp, num, views, PIPE_SHADER_VERTEX);
}
/**
* Called during state validation when LP_NEW_SAMPLER_VIEW is set.
* Called whenever we're about to draw (no dirty flag, FIXME?).
*/
void
llvmpipe_prepare_geometry_sampling(struct llvmpipe_context *lp,
unsigned num,
struct pipe_sampler_view **views)
{
prepare_shader_sampling(lp, num, views, PIPE_SHADER_GEOMETRY,
lp->mapped_gs_tex);
prepare_shader_sampling(lp, num, views, PIPE_SHADER_GEOMETRY);
}
void
llvmpipe_cleanup_geometry_sampling(struct llvmpipe_context *ctx)
{
unsigned i;
for (i = 0; i < Elements(ctx->mapped_gs_tex); i++) {
pipe_resource_reference(&ctx->mapped_gs_tex[i], NULL);
}
}
void
llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe)

View File

@ -70,6 +70,15 @@ llvmpipe_set_so_targets(struct pipe_context *pipe,
int i;
for (i = 0; i < num_targets; i++) {
const boolean append = (offsets[i] == (unsigned)-1);
/*
* Warn if the so target was created in another context.
* XXX Not entirely sure if mesa/st may rely on this?
* Otherwise should just assert.
*/
if (targets[i] && targets[i]->context != pipe) {
debug_printf("Illegal setting of so target with target %d created in "
"another context\n", i);
}
pipe_so_target_reference((struct pipe_stream_output_target **)&llvmpipe->so_targets[i], targets[i]);
/* If we're not appending then lets set the internal
offset to what was requested */

View File

@ -52,6 +52,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
struct llvmpipe_context *lp = llvmpipe_context(pipe);
boolean changed = !util_framebuffer_state_equal(&lp->framebuffer, fb);
unsigned i;
assert(fb->width <= LP_MAX_WIDTH);
assert(fb->height <= LP_MAX_HEIGHT);
@ -66,10 +67,22 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
const struct util_format_description *depth_desc =
util_format_description(depth_format);
if (lp->framebuffer.zsbuf && lp->framebuffer.zsbuf->context != pipe) {
debug_printf("Illegal setting of fb state with zsbuf created in "
"another context\n");
}
for (i = 0; i < fb->nr_cbufs; i++) {
if (lp->framebuffer.cbufs[i] &&
lp->framebuffer.cbufs[i]->context != pipe) {
debug_printf("Illegal setting of fb state with cbuf %d created in "
"another context\n", i);
}
}
util_copy_framebuffer_state(&lp->framebuffer, fb);
if (LP_PERF & PERF_NO_DEPTH) {
pipe_surface_reference(&lp->framebuffer.zsbuf, NULL);
pipe_surface_reference(&lp->framebuffer.zsbuf, NULL);
}
/*

View File

@ -615,6 +615,7 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
case FILE_MEMORY_CONST:
case FILE_MEMORY_SHARED:
case FILE_SHADER_INPUT:
case FILE_SHADER_OUTPUT:
hi->getSrc(s)->reg.data.offset += 4;
break;
default:
@ -625,7 +626,7 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
}
}
if (srcNr == 2) {
lo->setDef(1, carry);
lo->setFlagsDef(1, carry);
hi->setFlagsSrc(hi->srcCount(), carry);
}
return hi;

View File

@ -75,7 +75,8 @@ private:
void emitLOAD(const Instruction *);
void emitSTORE(const Instruction *);
void emitMOV(const Instruction *);
void emitMEMBAR(const Instruction *);
void emitATOM(const Instruction *);
void emitCCTL(const Instruction *);
void emitINTERP(const Instruction *);
void emitAFETCH(const Instruction *);
@ -123,6 +124,7 @@ private:
void emitPIXLD(const Instruction *);
void emitBAR(const Instruction *);
void emitMEMBAR(const Instruction *);
void emitFlow(const Instruction *);
@ -698,6 +700,10 @@ CodeEmitterGK110::emitIMAD(const Instruction *i)
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
code[1] |= 1 << 25;
if (i->flagsDef >= 0) code[1] |= 1 << 18;
if (i->flagsSrc >= 0) code[1] |= 1 << 20;
SAT_(35);
}
@ -1252,8 +1258,32 @@ CodeEmitterGK110::emitPIXLD(const Instruction *i)
void
CodeEmitterGK110::emitBAR(const Instruction *i)
{
/* TODO */
emitNOP(i);
code[0] = 0x00000002;
code[1] = 0x85400000;
switch (i->subOp) {
case NV50_IR_SUBOP_BAR_ARRIVE: code[1] |= 0x08; break;
case NV50_IR_SUBOP_BAR_RED_AND: code[1] |= 0x50; break;
case NV50_IR_SUBOP_BAR_RED_OR: code[1] |= 0x90; break;
case NV50_IR_SUBOP_BAR_RED_POPC: code[1] |= 0x10; break;
default:
code[1] |= 0x20;
assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
break;
}
emitPredicate(i);
srcId(i->src(0), 10);
srcId(i->src(1), 23);
}
void CodeEmitterGK110::emitMEMBAR(const Instruction *i)
{
code[0] = 0x00000002 | NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) << 8;
code[1] = 0x7cc00000;
emitPredicate(i);
}
void
@ -1587,6 +1617,10 @@ CodeEmitterGK110::emitSTORE(const Instruction *i)
srcId(i->src(1), 2);
srcId(i->src(0).getIndirect(0), 10);
if (i->src(0).getFile() == FILE_MEMORY_GLOBAL &&
i->src(0).isIndirect(0) &&
i->getIndirect(0, 0)->reg.size == 8)
code[1] |= 1 << 23;
}
void
@ -1597,7 +1631,7 @@ CodeEmitterGK110::emitLOAD(const Instruction *i)
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;
case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break;
case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break;
case FILE_MEMORY_SHARED: code[1] = 0x7a400000; code[0] = 0x00000002; break;
case FILE_MEMORY_CONST:
if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
emitMOV(i);
@ -1628,7 +1662,13 @@ CodeEmitterGK110::emitLOAD(const Instruction *i)
emitPredicate(i);
defId(i->def(0), 2);
srcId(i->src(0).getIndirect(0), 10);
if (i->getIndirect(0, 0)) {
srcId(i->src(0).getIndirect(0), 10);
if (i->getIndirect(0, 0)->reg.size == 8)
code[1] |= 1 << 23;
} else {
code[0] |= 255 << 10;
}
}
uint8_t
@ -1683,10 +1723,83 @@ CodeEmitterGK110::emitMOV(const Instruction *i)
}
}
void CodeEmitterGK110::emitMEMBAR(const Instruction *i)
static inline bool
uses64bitAddress(const Instruction *ldst)
{
code[0] = 0x00000002 | NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) << 8;
code[1] = 0x7cc00000;
return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
ldst->src(0).isIndirect(0) &&
ldst->getIndirect(0, 0)->reg.size == 8;
}
void
CodeEmitterGK110::emitATOM(const Instruction *i)
{
code[0] = 0x00000002;
if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
code[1] = 0x77800000;
else
code[1] = 0x68000000;
switch (i->subOp) {
case NV50_IR_SUBOP_ATOM_CAS: break;
case NV50_IR_SUBOP_ATOM_EXCH: code[1] |= 0x04000000; break;
default: code[1] |= i->subOp << 23; break;
}
switch (i->dType) {
case TYPE_U32: break;
case TYPE_S32: code[1] |= 0x00100000; break;
case TYPE_U64: code[1] |= 0x00200000; break;
case TYPE_F32: code[1] |= 0x00300000; break;
case TYPE_B128: code[1] |= 0x00400000; break; /* TODO: U128 */
case TYPE_S64: code[1] |= 0x00500000; break;
default: assert(!"unsupported type"); break;
}
emitPredicate(i);
/* TODO: cas: check that src regs line up */
/* TODO: cas: flip bits if $r255 is used */
srcId(i->src(1), 23);
if (i->defExists(0))
defId(i->def(0), 2);
else
code[0] |= 255 << 2;
const int32_t offset = SDATA(i->src(0)).offset;
assert(offset < 0x80000 && offset >= -0x80000);
code[0] |= (offset & 1) << 31;
code[1] |= (offset & 0xffffe) >> 1;
if (i->getIndirect(0, 0)) {
srcId(i->getIndirect(0, 0), 10);
if (i->getIndirect(0, 0)->reg.size == 8)
code[1] |= 1 << 19;
} else {
code[0] |= 255 << 10;
}
}
void
CodeEmitterGK110::emitCCTL(const Instruction *i)
{
int32_t offset = SDATA(i->src(0)).offset;
code[0] = 0x00000002 | (i->subOp << 2);
if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
code[1] = 0x7b000000;
} else {
code[1] = 0x7c000000;
offset &= 0xffffff;
}
code[0] |= offset << 23;
code[1] |= offset >> 9;
if (uses64bitAddress(i))
code[1] |= 1 << 23;
srcId(i->src(0).getIndirect(0), 10);
emitPredicate(i);
}
@ -1925,6 +2038,12 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
case OP_MEMBAR:
emitMEMBAR(insn);
break;
case OP_ATOM:
emitATOM(insn);
break;
case OP_CCTL:
emitCCTL(insn);
break;
case OP_PHI:
case OP_UNION:
case OP_CONSTRAINT:

View File

@ -176,6 +176,8 @@ private:
void emitISBERD();
void emitAL2P();
void emitIPA();
void emitATOM();
void emitCCTL();
void emitPIXLD();
@ -1552,11 +1554,13 @@ CodeEmitterGM107::emitLOP()
break;
}
emitPRED (0x30);
emitX (0x2b);
emitField(0x29, 2, lop);
emitINV (0x28, insn->src(1));
emitINV (0x27, insn->src(0));
} else {
emitInsn (0x04000000);
emitX (0x39);
emitINV (0x38, insn->src(1));
emitINV (0x37, insn->src(0));
emitField(0x35, 2, lop);
@ -1624,9 +1628,11 @@ CodeEmitterGM107::emitIADD()
emitNEG(0x31, insn->src(0));
emitNEG(0x30, insn->src(1));
emitCC (0x2f);
emitX (0x2b);
} else {
emitInsn(0x1c000000);
emitSAT (0x36);
emitX (0x35);
emitCC (0x34);
emitIMMD(0x14, 32, insn->src(1));
}
@ -2146,6 +2152,7 @@ CodeEmitterGM107::emitLD()
emitPRED (0x3a);
emitLDSTc(0x38);
emitLDSTs(0x35, insn->dType);
emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
emitADDR (0x08, 0x14, 32, 0, insn->src(0));
emitGPR (0x00, insn->def(0));
}
@ -2176,6 +2183,7 @@ CodeEmitterGM107::emitST()
emitPRED (0x3a);
emitLDSTc(0x38);
emitLDSTs(0x35, insn->dType);
emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
emitADDR (0x08, 0x14, 32, 0, insn->src(0));
emitGPR (0x00, insn->src(1));
}
@ -2296,6 +2304,50 @@ CodeEmitterGM107::emitIPA()
emitGPR(0x27);
}
void
CodeEmitterGM107::emitATOM()
{
unsigned dType, subOp;
switch (insn->dType) {
case TYPE_U32: dType = 0; break;
case TYPE_S32: dType = 1; break;
case TYPE_U64: dType = 2; break;
case TYPE_F32: dType = 3; break;
case TYPE_B128: dType = 4; break;
case TYPE_S64: dType = 5; break;
default: assert(!"unexpected dType"); dType = 0; break;
}
if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
subOp = 8;
else
subOp = insn->subOp;
assert(insn->subOp != NV50_IR_SUBOP_ATOM_CAS); /* XXX */
emitInsn (0xed000000);
emitField(0x34, 4, subOp);
emitField(0x31, 3, dType);
emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
emitGPR (0x14, insn->src(1));
emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
emitGPR (0x00, insn->def(0));
}
void
CodeEmitterGM107::emitCCTL()
{
unsigned width;
if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
emitInsn(0xef600000);
width = 30;
} else {
emitInsn(0xef800000);
width = 22;
}
emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
emitADDR (0x08, 0x16, width, 2, insn->src(0));
emitField(0x00, 4, insn->subOp);
}
/*******************************************************************************
* surface
******************************************************************************/
@ -2795,6 +2847,12 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
break;
}
break;
case OP_ATOM:
emitATOM();
break;
case OP_CCTL:
emitCCTL();
break;
case OP_VFETCH:
emitALD();
break;

View File

@ -1463,6 +1463,7 @@ CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
if (i->encSize == 4) {
assert(i->op == OP_RCP);
assert(!i->saturate);
code[0] |= i->src(0).mod.abs() << 15;
code[0] |= i->src(0).mod.neg() << 22;
emitForm_MUL(i);
@ -1470,6 +1471,10 @@ CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
code[1] = subOp << 29;
code[1] |= i->src(0).mod.abs() << 20;
code[1] |= i->src(0).mod.neg() << 26;
if (i->saturate) {
assert(subOp == 6 && i->op == OP_EX2);
code[1] |= 1 << 27;
}
emitForm_MAD(i);
}
}

View File

@ -95,6 +95,13 @@ public:
return tgsi_util_get_src_register_swizzle(&reg, chan);
}
int getArrayId() const
{
if (isIndirect(0))
return fsr->Indirect.ArrayID;
return 0;
}
nv50_ir::Modifier getMod(int chan) const;
SrcRegister getIndirect(int dim) const
@ -154,6 +161,13 @@ public:
return SrcRegister(fdr->Indirect);
}
int getArrayId() const
{
if (isIndirect(0))
return fdr->Indirect.ArrayID;
return 0;
}
private:
const struct tgsi_dst_register reg;
const struct tgsi_full_dst_register *fdr;
@ -809,7 +823,10 @@ public:
// these registers are per-subroutine, cannot be used for parameter passing
std::set<Location> locals;
bool mainTempsInLMem;
std::set<int> indirectTempArrays;
std::map<int, int> indirectTempOffsets;
std::map<int, std::pair<int, int> > tempArrayInfo;
std::vector<int> tempArrayId;
int clipVertexOutput;
@ -841,8 +858,6 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
tgsi_dump(tokens, 0);
mainTempsInLMem = false;
}
Source::~Source()
@ -872,6 +887,7 @@ bool Source::scanSource()
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
//resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
info->immd.bufSize = 0;
@ -917,8 +933,16 @@ bool Source::scanSource()
}
tgsi_parse_free(&parse);
if (mainTempsInLMem)
info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
if (indirectTempArrays.size()) {
int tempBase = 0;
for (std::set<int>::const_iterator it = indirectTempArrays.begin();
it != indirectTempArrays.end(); ++it) {
std::pair<int, int>& info = tempArrayInfo[*it];
indirectTempOffsets.insert(std::make_pair(*it, tempBase - info.first));
tempBase += info.second;
}
info->bin.tlsSpace += tempBase * 16;
}
if (info->io.genUserClip > 0) {
info->io.clipDistances = info->io.genUserClip;
@ -1028,6 +1052,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
unsigned sn = TGSI_SEMANTIC_GENERIC;
unsigned si = 0;
const unsigned first = decl->Range.First, last = decl->Range.Last;
const int arrayId = decl->Array.ArrayID;
if (decl->Declaration.Semantic) {
sn = decl->Semantic.Name;
@ -1172,8 +1197,14 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
for (i = first; i <= last; ++i)
textureViews[i].target = decl->SamplerView.Resource;
break;
case TGSI_FILE_NULL:
case TGSI_FILE_TEMPORARY:
for (i = first; i <= last; ++i)
tempArrayId[i] = arrayId;
if (arrayId)
tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
first, last - first + 1)));
break;
case TGSI_FILE_NULL:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
@ -1223,7 +1254,7 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
} else
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
if (insn.getDst(0).isIndirect(0))
mainTempsInLMem = true;
indirectTempArrays.insert(insn.getDst(0).getArrayId());
}
}
@ -1231,7 +1262,7 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
Instruction::SrcRegister src = insn.getSrc(s);
if (src.getFile() == TGSI_FILE_TEMPORARY) {
if (src.isIndirect(0))
mainTempsInLMem = true;
indirectTempArrays.insert(src.getArrayId());
} else
/*
if (src.getFile() == TGSI_FILE_RESOURCE) {
@ -1337,6 +1368,7 @@ private:
void storeDst(const tgsi::Instruction::DstRegister dst, int c,
Value *val, Value *ptr);
void adjustTempIndex(int arrayId, int &idx, int &idx2d) const;
Value *applySrcMod(Value *, int s, int c);
Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
@ -1416,6 +1448,7 @@ private:
DataType srcTy;
DataArray tData; // TGSI_FILE_TEMPORARY
DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays
DataArray aData; // TGSI_FILE_ADDRESS
DataArray pData; // TGSI_FILE_PREDICATE
DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
@ -1619,7 +1652,7 @@ Converter::getArrayForFile(unsigned file, int idx)
{
switch (file) {
case TGSI_FILE_TEMPORARY:
return &tData;
return idx == 0 ? &tData : &lData;
case TGSI_FILE_PREDICATE:
return &pData;
case TGSI_FILE_ADDRESS:
@ -1641,11 +1674,23 @@ Converter::shiftAddress(Value *index)
return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
}
void
Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const
{
std::map<int, int>::const_iterator it =
code->indirectTempOffsets.find(arrayId);
if (it == code->indirectTempOffsets.end())
return;
idx2d = 1;
idx += it->second;
}
Value *
Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
{
const int idx2d = src.is2D() ? src.getIndex(1) : 0;
const int idx = src.getIndex(0);
int idx2d = src.is2D() ? src.getIndex(1) : 0;
int idx = src.getIndex(0);
const int swz = src.getSwizzle(c);
Instruction *ld;
@ -1686,6 +1731,13 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
ld->perPatch = info->sv[idx].patch;
return ld->getDef(0);
case TGSI_FILE_TEMPORARY: {
int arrayid = src.getArrayId();
if (!arrayid)
arrayid = code->tempArrayId[idx];
adjustTempIndex(arrayid, idx, idx2d);
}
/* fallthrough */
default:
return getArrayForFile(src.getFile(), idx2d)->load(
sub.cur->values, idx, swz, shiftAddress(ptr));
@ -1697,8 +1749,8 @@ Converter::acquireDst(int d, int c)
{
const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
const unsigned f = dst.getFile();
const int idx = dst.getIndex(0);
const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
return NULL;
@ -1708,6 +1760,13 @@ Converter::acquireDst(int d, int c)
(f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
return getScratch();
if (f == TGSI_FILE_TEMPORARY) {
int arrayid = dst.getArrayId();
if (!arrayid)
arrayid = code->tempArrayId[idx];
adjustTempIndex(arrayid, idx, idx2d);
}
return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
}
@ -1739,8 +1798,8 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
Value *val, Value *ptr)
{
const unsigned f = dst.getFile();
const int idx = dst.getIndex(0);
const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
if (f == TGSI_FILE_SYSTEM_VALUE) {
assert(!ptr);
@ -1763,6 +1822,13 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
f == TGSI_FILE_PREDICATE ||
f == TGSI_FILE_ADDRESS ||
f == TGSI_FILE_OUTPUT) {
if (f == TGSI_FILE_TEMPORARY) {
int arrayid = dst.getArrayId();
if (!arrayid)
arrayid = code->tempArrayId[idx];
adjustTempIndex(arrayid, idx, idx2d);
}
getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
} else {
assert(!"invalid dst file");
@ -3326,18 +3392,17 @@ Converter::exportOutputs()
Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
code(code),
tgsi(NULL),
tData(this), aData(this), pData(this), oData(this)
tData(this), lData(this), aData(this), pData(this), oData(this)
{
info = code->info;
const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR;
const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0);
tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0);
lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0);
pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);

View File

@ -540,6 +540,12 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
// It seems like barriers are never required for tessellation since
// the warp size is 32, and there are always at most 32 tcs threads.
bb->remove(i);
} else
if (i->op == OP_LOAD && i->subOp == NV50_IR_SUBOP_LDC_IS) {
int offset = i->src(0).get()->reg.data.offset;
if (abs(offset) > 0x10000)
i->src(0).get()->reg.fileIndex += offset >> 16;
i->src(0).get()->reg.data.offset = (int)(short)offset;
} else {
// TODO: Move this to before register allocation for operations that
// need the $c register !

View File

@ -171,7 +171,10 @@ LoadPropagation::isImmdLoad(Instruction *ld)
if (!ld || (ld->op != OP_MOV) ||
((typeSizeof(ld->dType) != 4) && (typeSizeof(ld->dType) != 8)))
return false;
return ld->src(0).getFile() == FILE_IMMEDIATE;
// A 0 can be replaced with a register, so it doesn't count as an immediate.
ImmediateValue val;
return ld->src(0).getImmediate(val) && !val.isInteger(0);
}
bool
@ -187,7 +190,8 @@ LoadPropagation::isAttribOrSharedLoad(Instruction *ld)
void
LoadPropagation::checkSwapSrc01(Instruction *insn)
{
if (!prog->getTarget()->getOpInfo(insn).commutative)
const Target *targ = prog->getTarget();
if (!targ->getOpInfo(insn).commutative)
if (insn->op != OP_SET && insn->op != OP_SLCT)
return;
if (insn->src(1).getFile() != FILE_GPR)
@ -196,14 +200,15 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
Instruction *i0 = insn->getSrc(0)->getInsn();
Instruction *i1 = insn->getSrc(1)->getInsn();
if (isCSpaceLoad(i0)) {
if (!isCSpaceLoad(i1))
insn->swapSources(0, 1);
else
return;
} else
if (isImmdLoad(i0)) {
if (!isCSpaceLoad(i1) && !isImmdLoad(i1))
// Swap sources to inline the less frequently used source. That way,
// optimistically, it will eventually be able to remove the instruction.
int i0refs = insn->getSrc(0)->refCount();
int i1refs = insn->getSrc(1)->refCount();
if ((isCSpaceLoad(i0) || isImmdLoad(i0)) && targ->insnCanLoad(insn, 1, i0)) {
if ((!isImmdLoad(i1) && !isCSpaceLoad(i1)) ||
!targ->insnCanLoad(insn, 1, i1) ||
i0refs < i1refs)
insn->swapSources(0, 1);
else
return;
@ -1224,6 +1229,8 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
adds = 1;
else
return;
if (si->src(!adds).mod != Modifier(0))
return;
// SHL(ADD(x, y), z) = ADD(SHL(x, z), SHL(y, z))
// This is more operations, but if one of x, y is an immediate, then

View File

@ -192,7 +192,7 @@ public:
virtual bool insnCanLoad(const Instruction *insn, int s,
const Instruction *ld) const = 0;
virtual bool insnCanLoadOffset(const Instruction *insn, int s,
int offset) const { return true; }
int offset) const = 0;
virtual bool isOpSupported(operation, DataType) const = 0;
virtual bool isAccessSupported(DataFile, DataType) const = 0;
virtual bool isModSupported(const Instruction *,

View File

@ -99,6 +99,7 @@ static const struct opProperties _initProps[] =
{ OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
{ OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_EX2, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x0 },
{ OP_LG2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_RCP, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_RSQ, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },

View File

@ -383,6 +383,16 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
return true;
}
bool
TargetNVC0::insnCanLoadOffset(const Instruction *insn, int s, int offset) const
{
const ValueRef& ref = insn->src(s);
if (ref.getFile() == FILE_MEMORY_CONST &&
(insn->op != OP_LOAD || insn->subOp != NV50_IR_SUBOP_LDC_IS))
return offset >= -0x8000 && offset < 0x8000;
return true;
}
bool
TargetNVC0::isAccessSupported(DataFile file, DataType ty) const
{

View File

@ -48,6 +48,8 @@ public:
virtual bool insnCanLoad(const Instruction *insn, int s,
const Instruction *ld) const;
virtual bool insnCanLoadOffset(const Instruction *insn, int s,
int offset) const;
virtual bool isOpSupported(operation, DataType) const;
virtual bool isAccessSupported(DataFile, DataType) const;
virtual bool isModSupported(const Instruction *, int s, Modifier) const;

View File

@ -183,6 +183,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
return 0;
case PIPE_CAP_VENDOR_ID:

View File

@ -226,6 +226,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
return 0;
case PIPE_CAP_VENDOR_ID:

View File

@ -215,6 +215,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
return 0;
case PIPE_CAP_VENDOR_ID:
@ -295,9 +296,10 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
if (shader == PIPE_SHADER_COMPUTE && class_3d >= NVE4_3D_CLASS)
return NVE4_MAX_PIPE_CONSTBUFS_COMPUTE;
return NVC0_MAX_PIPE_CONSTBUFS;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
return shader != PIPE_SHADER_FRAGMENT;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
return shader != PIPE_SHADER_FRAGMENT || class_3d < GM107_3D_CLASS;
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
return 1;

View File

@ -64,7 +64,7 @@ nvc0_decoder_bsp_next(struct nouveau_vp3_decoder *dec,
bsp_size += num_bytes[i];
bsp_size += 256; /* the 4 end markers */
if (!bsp_bo || bsp_size > bsp_bo->size) {
if (bsp_size > bsp_bo->size) {
union nouveau_bo_config cfg;
struct nouveau_bo *tmp_bo = NULL;

View File

@ -209,6 +209,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
return 0;
/* SWTCL-only features. */

View File

@ -68,6 +68,7 @@ static const struct debug_named_value r600_debug_options[] = {
static void r600_destroy_context(struct pipe_context *context)
{
struct r600_context *rctx = (struct r600_context *)context;
unsigned sh;
r600_isa_destroy(rctx->isa);
@ -76,6 +77,11 @@ static void r600_destroy_context(struct pipe_context *context)
pipe_resource_reference((struct pipe_resource**)&rctx->dummy_cmask, NULL);
pipe_resource_reference((struct pipe_resource**)&rctx->dummy_fmask, NULL);
for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, NULL);
free(rctx->driver_consts[sh].constants);
}
if (rctx->fixed_func_tcs_shader)
rctx->b.b.delete_tcs_state(&rctx->b.b, rctx->fixed_func_tcs_shader);
@ -357,6 +363,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:

View File

@ -210,8 +210,8 @@ static void r600_buffer_destroy(struct pipe_screen *screen,
}
static bool
r600_do_invalidate_resource(struct r600_common_context *rctx,
struct r600_resource *rbuffer)
r600_invalidate_buffer(struct r600_common_context *rctx,
struct r600_resource *rbuffer)
{
/* In AMD_pinned_memory, the user pointer association only gets
* broken when the buffer is explicitly re-allocated.
@ -236,7 +236,9 @@ void r600_invalidate_resource(struct pipe_context *ctx,
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_resource *rbuffer = r600_resource(resource);
(void)r600_do_invalidate_resource(rctx, rbuffer);
/* We currently only do anyting here for buffers */
if (resource->target == PIPE_BUFFER)
(void)r600_invalidate_buffer(rctx, rbuffer);
}
static void *r600_buffer_get_transfer(struct pipe_context *ctx,
@ -306,7 +308,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
assert(usage & PIPE_TRANSFER_WRITE);
if (r600_do_invalidate_resource(rctx, rbuffer)) {
if (r600_invalidate_buffer(rctx, rbuffer)) {
/* At this point, the buffer is always idle. */
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}

View File

@ -349,6 +349,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:

View File

@ -3728,6 +3728,9 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary,
case R_0286CC_SPI_PS_INPUT_ENA:
conf->spi_ps_input_ena = value;
break;
case R_0286D0_SPI_PS_INPUT_ADDR:
/* Not used yet, but will be in the future */
break;
case R_0286E8_SPI_TMPRING_SIZE:
case R_00B860_COMPUTE_TMPRING_SIZE:
/* WAVESIZE is in units of 256 dwords. */
@ -3735,8 +3738,15 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary,
G_00B860_WAVESIZE(value) * 256 * 4 * 1;
break;
default:
fprintf(stderr, "Warning: Compiler emitted unknown "
"config register: 0x%x\n", reg);
{
static bool printed;
if (!printed) {
fprintf(stderr, "Warning: LLVM emitted unknown "
"config register: 0x%x\n", reg);
printed = true;
}
}
break;
}
}

View File

@ -260,6 +260,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
return 0;
}
/* should only get here on unhandled cases */

View File

@ -357,6 +357,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_STRING_MARKER:
return 0;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return 64;

View File

@ -198,6 +198,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
return 0;
/* Stream output. */

View File

@ -678,6 +678,13 @@ struct pipe_context {
void (*dump_debug_state)(struct pipe_context *ctx, FILE *stream,
unsigned flags);
/**
* Emit string marker in cmdstream
*/
void (*emit_string_marker)(struct pipe_context *ctx,
const char *string,
int len);
/**
* Generate mipmap.
* \return TRUE if mipmap generation succeeds, FALSE otherwise

View File

@ -644,6 +644,7 @@ enum pipe_cap
PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT,
PIPE_CAP_INVALIDATE_BUFFER,
PIPE_CAP_GENERATE_MIPMAP,
PIPE_CAP_STRING_MARKER,
};
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)

View File

@ -492,8 +492,10 @@ dri_flush(__DRIcontext *cPriv,
if (pipe->invalidate_resource &&
(flags & __DRI2_FLUSH_INVALIDATE_ANCILLARY)) {
pipe->invalidate_resource(pipe, drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]);
pipe->invalidate_resource(pipe, drawable->msaa_textures[ST_ATTACHMENT_DEPTH_STENCIL]);
if (drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL])
pipe->invalidate_resource(pipe, drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]);
if (drawable->msaa_textures[ST_ATTACHMENT_DEPTH_STENCIL])
pipe->invalidate_resource(pipe, drawable->msaa_textures[ST_ATTACHMENT_DEPTH_STENCIL]);
}
}

View File

@ -31,6 +31,7 @@
#include "util/u_memory.h"
#include "util/u_handle_table.h"
#include "util/u_video.h"
#include "vl/vl_deint_filter.h"
#include "vl/vl_winsys.h"
#include "va_private.h"
@ -296,6 +297,10 @@ vlVaDestroyContext(VADriverContextP ctx, VAContextID context_id)
}
context->decoder->destroy(context->decoder);
}
if (context->deint) {
vl_deint_filter_cleanup(context->deint);
FREE(context->deint);
}
FREE(context);
handle_table_remove(drv->htab, context_id);
pipe_mutex_unlock(drv->mutex);

View File

@ -29,6 +29,7 @@
#include "vl/vl_defines.h"
#include "vl/vl_video_buffer.h"
#include "vl/vl_deint_filter.h"
#include "va_private.h"
@ -174,6 +175,51 @@ static VAStatus vlVaPostProcBlit(vlVaDriver *drv, vlVaContext *context,
return VA_STATUS_SUCCESS;
}
static struct pipe_video_buffer *
vlVaApplyDeint(vlVaDriver *drv, vlVaContext *context,
VAProcPipelineParameterBuffer *param,
struct pipe_video_buffer *current,
unsigned field)
{
vlVaSurface *prevprev, *prev, *next;
if (param->num_forward_references < 1 ||
param->num_backward_references < 2)
return current;
prevprev = handle_table_get(drv->htab, param->backward_references[1]);
prev = handle_table_get(drv->htab, param->backward_references[0]);
next = handle_table_get(drv->htab, param->forward_references[0]);
if (!prevprev || !prev || !next)
return current;
if (context->deint && (context->deint->video_width != current->width ||
context->deint->video_height != current->height)) {
vl_deint_filter_cleanup(context->deint);
FREE(context->deint);
context->deint = NULL;
}
if (!context->deint) {
context->deint = MALLOC(sizeof(struct vl_deint_filter));
if (!vl_deint_filter_init(context->deint, drv->pipe, current->width,
current->height, false, false)) {
FREE(context->deint);
context->deint = NULL;
return current;
}
}
if (!vl_deint_filter_check_buffers(context->deint, prevprev->buffer,
prev->buffer, current, next->buffer))
return current;
vl_deint_filter_render(context->deint, prevprev->buffer, prev->buffer,
current, next->buffer, field);
return context->deint->video_buffer;
}
VAStatus
vlVaHandleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf)
{
@ -181,6 +227,7 @@ vlVaHandleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *contex
VARectangle def_src_region, def_dst_region;
const VARectangle *src_region, *dst_region;
VAProcPipelineParameterBuffer *param;
struct pipe_video_buffer *src;
vlVaSurface *src_surface;
unsigned i;
@ -199,6 +246,8 @@ vlVaHandleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *contex
if (!src_surface || !src_surface->buffer)
return VA_STATUS_ERROR_INVALID_SURFACE;
src = src_surface->buffer;
for (i = 0; i < param->num_filters; i++) {
vlVaBuffer *buf = handle_table_get(drv->htab, param->filters[i]);
VAProcFilterParameterBufferBase *filter;
@ -222,6 +271,11 @@ vlVaHandleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *contex
deinterlace = VL_COMPOSITOR_WEAVE;
break;
case VAProcDeinterlacingMotionAdaptive:
src = vlVaApplyDeint(drv, context, param, src,
!!(deint->flags & VA_DEINTERLACING_BOTTOM_FIELD));
break;
default:
return VA_STATUS_ERROR_UNIMPLEMENTED;
}
@ -239,10 +293,8 @@ vlVaHandleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *contex
if (context->target->buffer_format != PIPE_FORMAT_NV12)
return vlVaPostProcCompositor(drv, context, src_region, dst_region,
src_surface->buffer, context->target,
deinterlace);
src, context->target, deinterlace);
else
return vlVaPostProcBlit(drv, context, src_region, dst_region,
src_surface->buffer, context->target,
deinterlace);
src, context->target, deinterlace);
}

View File

@ -691,13 +691,14 @@ vlVaQueryVideoProcFilterCaps(VADriverContextP ctx, VAContextID context,
case VAProcFilterDeinterlacing: {
VAProcFilterCapDeinterlacing *deint = filter_caps;
if (*num_filter_caps < 2) {
*num_filter_caps = 2;
if (*num_filter_caps < 3) {
*num_filter_caps = 3;
return VA_STATUS_ERROR_MAX_NUM_EXCEEDED;
}
deint[i++].type = VAProcDeinterlacingBob;
deint[i++].type = VAProcDeinterlacingWeave;
deint[i++].type = VAProcDeinterlacingMotionAdaptive;
break;
}
@ -750,9 +751,24 @@ vlVaQueryVideoProcPipelineCaps(VADriverContextP ctx, VAContextID context,
for (i = 0; i < num_filters; i++) {
vlVaBuffer *buf = handle_table_get(VL_VA_DRIVER(ctx)->htab, filters[i]);
VAProcFilterParameterBufferBase *filter;
if (!buf || buf->type >= VABufferTypeMax)
if (!buf || buf->type != VAProcFilterParameterBufferType)
return VA_STATUS_ERROR_INVALID_BUFFER;
filter = buf->data;
switch (filter->type) {
case VAProcFilterDeinterlacing: {
VAProcFilterParameterBufferDeinterlacing *deint = buf->data;
if (deint->algorithm == VAProcDeinterlacingMotionAdaptive) {
pipeline_cap->num_forward_references = 1;
pipeline_cap->num_backward_references = 2;
}
break;
}
default:
return VA_STATUS_ERROR_UNIMPLEMENTED;
}
}
return VA_STATUS_SUCCESS;

View File

@ -236,6 +236,8 @@ typedef struct {
VAPictureParameterBufferMPEG4 pps;
uint8_t start_code[32];
} mpeg4;
struct vl_deint_filter *deint;
} vlVaContext;
typedef struct {

View File

@ -699,17 +699,18 @@ struct ast_type_qualifier {
bool merge_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
const ast_type_qualifier &q);
const ast_type_qualifier &q,
bool is_single_layout_merge);
bool merge_out_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
const ast_type_qualifier &q,
ast_node* &node);
ast_node* &node, bool create_node);
bool merge_in_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
const ast_type_qualifier &q,
ast_node* &node);
ast_node* &node, bool create_node);
ast_subroutine_list *subroutine_list;
};

View File

@ -487,15 +487,17 @@ unary_arithmetic_result_type(const struct glsl_type *type,
* If the given types to the bit-logic operator are invalid, return
* glsl_type::error_type.
*
* \param type_a Type of LHS of bit-logic op
* \param type_b Type of RHS of bit-logic op
* \param value_a LHS of bit-logic op
* \param value_b RHS of bit-logic op
*/
static const struct glsl_type *
bit_logic_result_type(const struct glsl_type *type_a,
const struct glsl_type *type_b,
bit_logic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
ast_operators op,
struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
{
const glsl_type *type_a = value_a->type;
const glsl_type *type_b = value_b->type;
if (!state->check_bitwise_operations_allowed(loc)) {
return glsl_type::error_type;
}
@ -517,6 +519,36 @@ bit_logic_result_type(const struct glsl_type *type_a,
return glsl_type::error_type;
}
/* Prior to GLSL 4.0 / GL_ARB_gpu_shader5, implicit conversions didn't
* make sense for bitwise operations, as they don't operate on floats.
*
* GLSL 4.0 added implicit int -> uint conversions, which are relevant
* here. It wasn't clear whether or not we should apply them to bitwise
* operations. However, Khronos has decided that they should in future
* language revisions. Applications also rely on this behavior. We opt
* to apply them in general, but issue a portability warning.
*
* See https://www.khronos.org/bugzilla/show_bug.cgi?id=1405
*/
if (type_a->base_type != type_b->base_type) {
if (!apply_implicit_conversion(type_a, value_b, state)
&& !apply_implicit_conversion(type_b, value_a, state)) {
_mesa_glsl_error(loc, state,
"could not implicitly convert operands to "
"`%s` operator",
ast_expression::operator_string(op));
return glsl_type::error_type;
} else {
_mesa_glsl_warning(loc, state,
"some implementations may not support implicit "
"int -> uint conversions for `%s' operators; "
"consider casting explicitly for portability",
ast_expression::operator_string(op));
}
type_a = value_a->type;
type_b = value_b->type;
}
/* "The fundamental types of the operands (signed or unsigned) must
* match,"
*/
@ -1435,8 +1467,7 @@ ast_expression::do_hir(exec_list *instructions,
case ast_bit_or:
op[0] = this->subexpressions[0]->hir(instructions, state);
op[1] = this->subexpressions[1]->hir(instructions, state);
type = bit_logic_result_type(op[0]->type, op[1]->type, this->oper,
state, &loc);
type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc);
result = new(ctx) ir_expression(operations[this->oper], type,
op[0], op[1]);
error_emitted = op[0]->type->is_error() || op[1]->type->is_error();
@ -1626,8 +1657,7 @@ ast_expression::do_hir(exec_list *instructions,
case ast_or_assign: {
op[0] = this->subexpressions[0]->hir(instructions, state);
op[1] = this->subexpressions[1]->hir(instructions, state);
type = bit_logic_result_type(op[0]->type, op[1]->type, this->oper,
state, &loc);
type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc);
ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper],
type, op[0], op[1]);
error_emitted =
@ -6329,7 +6359,7 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
qual_stream != block_stream) {
_mesa_glsl_error(&loc, state, "stream layout qualifier on "
"interface block member does not match "
"the interface block (%d vs %d)", qual->stream,
"the interface block (%u vs %u)", qual_stream,
block_stream);
}
}

View File

@ -74,9 +74,11 @@ ast_type_qualifier::has_layout() const
|| this->flags.q.row_major
|| this->flags.q.packed
|| this->flags.q.explicit_location
|| this->flags.q.explicit_image_format
|| this->flags.q.explicit_index
|| this->flags.q.explicit_binding
|| this->flags.q.explicit_offset;
|| this->flags.q.explicit_offset
|| this->flags.q.explicit_stream;
}
bool
@ -113,10 +115,16 @@ ast_type_qualifier::interpolation_string() const
return NULL;
}
/**
* This function merges both duplicate identifies within a single layout and
* multiple layout qualifiers on a single variable declaration. The
* is_single_layout_merge param is used differentiate between the two.
*/
bool
ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
const ast_type_qualifier &q)
const ast_type_qualifier &q,
bool is_single_layout_merge)
{
ast_type_qualifier ubo_mat_mask;
ubo_mat_mask.flags.i = 0;
@ -156,7 +164,8 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
allowed_duplicates_mask.flags.i |=
stream_layout_mask.flags.i;
if ((this->flags.i & q.flags.i & ~allowed_duplicates_mask.flags.i) != 0) {
if (is_single_layout_merge && !state->has_enhanced_layouts() &&
(this->flags.i & q.flags.i & ~allowed_duplicates_mask.flags.i) != 0) {
_mesa_glsl_error(loc, state,
"duplicate layout qualifiers used");
return false;
@ -207,11 +216,6 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
this->flags.q.stream = 1;
this->stream = state->out_qualifier->stream;
}
} else {
if (q.flags.q.explicit_stream) {
_mesa_glsl_error(loc, state,
"duplicate layout `stream' qualifier");
}
}
}
@ -294,13 +298,35 @@ bool
ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
const ast_type_qualifier &q,
ast_node* &node)
ast_node* &node, bool create_node)
{
void *mem_ctx = state;
const bool r = this->merge_qualifier(loc, state, q);
const bool r = this->merge_qualifier(loc, state, q, false);
if (state->stage == MESA_SHADER_TESS_CTRL) {
node = new(mem_ctx) ast_tcs_output_layout(*loc);
if (state->stage == MESA_SHADER_GEOMETRY) {
if (q.flags.q.prim_type) {
/* Make sure this is a valid output primitive type. */
switch (q.prim_type) {
case GL_POINTS:
case GL_LINE_STRIP:
case GL_TRIANGLE_STRIP:
break;
default:
_mesa_glsl_error(loc, state, "invalid geometry shader output "
"primitive type");
break;
}
}
/* Allow future assigments of global out's stream id value */
this->flags.q.explicit_stream = 0;
} else if (state->stage == MESA_SHADER_TESS_CTRL) {
if (create_node) {
node = new(mem_ctx) ast_tcs_output_layout(*loc);
}
} else {
_mesa_glsl_error(loc, state, "out layout qualifiers only valid in "
"tessellation control or geometry shaders");
}
return r;
@ -310,7 +336,7 @@ bool
ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
const ast_type_qualifier &q,
ast_node* &node)
ast_node* &node, bool create_node)
{
void *mem_ctx = state;
bool create_gs_ast = false;
@ -450,10 +476,12 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
this->point_mode = q.point_mode;
}
if (create_gs_ast) {
node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type);
} else if (create_cs_ast) {
node = new(mem_ctx) ast_cs_input_layout(*loc, q.local_size);
if (create_node) {
if (create_gs_ast) {
node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type);
} else if (create_cs_ast) {
node = new(mem_ctx) ast_cs_input_layout(*loc, q.local_size);
}
}
return true;

View File

@ -299,6 +299,10 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
%type <node> for_init_statement
%type <for_rest_statement> for_rest_statement
%type <node> layout_defaults
%type <node> layout_uniform_defaults
%type <node> layout_buffer_defaults
%type <node> layout_in_defaults
%type <node> layout_out_defaults
%right THEN ELSE
%%
@ -953,7 +957,7 @@ parameter_qualifier:
"or precise");
$$ = $1;
$$.merge_qualifier(&@1, state, $2);
$$.merge_qualifier(&@1, state, $2, false);
}
| precision_qualifier parameter_qualifier
{
@ -970,7 +974,7 @@ parameter_qualifier:
| memory_qualifier parameter_qualifier
{
$$ = $1;
$$.merge_qualifier(&@1, state, $2);
$$.merge_qualifier(&@1, state, $2, false);
}
parameter_direction_qualifier:
@ -1149,7 +1153,7 @@ layout_qualifier_id_list:
| layout_qualifier_id_list ',' layout_qualifier_id
{
$$ = $1;
if (!$$.merge_qualifier(& @3, state, $3)) {
if (!$$.merge_qualifier(& @3, state, $3, true)) {
YYERROR;
}
}
@ -1758,7 +1762,7 @@ type_qualifier:
}
$$ = $1;
$$.merge_qualifier(&@1, state, $2);
$$.merge_qualifier(&@1, state, $2, false);
}
| layout_qualifier type_qualifier
{
@ -1775,12 +1779,12 @@ type_qualifier:
_mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
$$ = $1;
$$.merge_qualifier(&@1, state, $2);
$$.merge_qualifier(&@1, state, $2, false);
}
| subroutine_qualifier type_qualifier
{
$$ = $1;
$$.merge_qualifier(&@1, state, $2);
$$.merge_qualifier(&@1, state, $2, false);
}
| auxiliary_storage_qualifier type_qualifier
{
@ -1796,7 +1800,7 @@ type_qualifier:
"just before storage qualifiers");
}
$$ = $1;
$$.merge_qualifier(&@1, state, $2);
$$.merge_qualifier(&@1, state, $2, false);
}
| storage_qualifier type_qualifier
{
@ -1816,7 +1820,7 @@ type_qualifier:
}
$$ = $1;
$$.merge_qualifier(&@1, state, $2);
$$.merge_qualifier(&@1, state, $2, false);
}
| precision_qualifier type_qualifier
{
@ -1833,7 +1837,7 @@ type_qualifier:
| memory_qualifier type_qualifier
{
$$ = $1;
$$.merge_qualifier(&@1, state, $2);
$$.merge_qualifier(&@1, state, $2, false);
}
;
@ -2585,7 +2589,7 @@ interface_block:
YYERROR;
}
if (!block->layout.merge_qualifier(& @1, state, $1)) {
if (!block->layout.merge_qualifier(& @1, state, $1, false)) {
YYERROR;
}
@ -2602,7 +2606,7 @@ interface_block:
"memory qualifiers can only be used in the "
"declaration of shader storage blocks");
}
if (!block->layout.merge_qualifier(& @1, state, $1)) {
if (!block->layout.merge_qualifier(& @1, state, $1, false)) {
YYERROR;
}
$$ = block;
@ -2737,18 +2741,48 @@ member_declaration:
}
;
layout_defaults:
layout_qualifier UNIFORM ';'
layout_uniform_defaults:
layout_qualifier layout_uniform_defaults
{
if (!state->default_uniform_qualifier->merge_qualifier(& @1, state, $1)) {
$$ = NULL;
if (!state->has_420pack_or_es31()) {
_mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
YYERROR;
} else {
if (!state->default_uniform_qualifier->
merge_qualifier(& @1, state, $1, false)) {
YYERROR;
}
}
}
| layout_qualifier UNIFORM ';'
{
if (!state->default_uniform_qualifier->
merge_qualifier(& @1, state, $1, false)) {
YYERROR;
}
$$ = NULL;
}
;
layout_buffer_defaults:
layout_qualifier layout_buffer_defaults
{
$$ = NULL;
if (!state->has_420pack_or_es31()) {
_mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
YYERROR;
} else {
if (!state->default_shader_storage_qualifier->
merge_qualifier(& @1, state, $1, false)) {
YYERROR;
}
}
}
| layout_qualifier BUFFER ';'
{
if (!state->default_shader_storage_qualifier->merge_qualifier(& @1, state, $1)) {
if (!state->default_shader_storage_qualifier->
merge_qualifier(& @1, state, $1, false)) {
YYERROR;
}
@ -2764,43 +2798,58 @@ layout_defaults:
$$ = NULL;
}
;
layout_in_defaults:
layout_qualifier layout_in_defaults
{
$$ = NULL;
if (!state->has_420pack_or_es31()) {
_mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
YYERROR;
} else {
if (!state->in_qualifier->
merge_in_qualifier(& @1, state, $1, $$, false)) {
YYERROR;
}
}
}
| layout_qualifier IN_TOK ';'
{
$$ = NULL;
if (!state->in_qualifier->merge_in_qualifier(& @1, state, $1, $$)) {
if (!state->in_qualifier->
merge_in_qualifier(& @1, state, $1, $$, true)) {
YYERROR;
}
}
;
layout_out_defaults:
layout_qualifier layout_out_defaults
{
$$ = NULL;
if (!state->has_420pack_or_es31()) {
_mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
YYERROR;
} else {
if (!state->out_qualifier->
merge_out_qualifier(& @1, state, $1, $$, false)) {
YYERROR;
}
}
}
| layout_qualifier OUT_TOK ';'
{
$$ = NULL;
if (state->stage == MESA_SHADER_GEOMETRY) {
if ($1.flags.q.prim_type) {
/* Make sure this is a valid output primitive type. */
switch ($1.prim_type) {
case GL_POINTS:
case GL_LINE_STRIP:
case GL_TRIANGLE_STRIP:
break;
default:
_mesa_glsl_error(&@1, state, "invalid geometry shader output "
"primitive type");
break;
}
}
if (!state->out_qualifier->merge_qualifier(& @1, state, $1))
YYERROR;
/* Allow future assigments of global out's stream id value */
state->out_qualifier->flags.q.explicit_stream = 0;
} else if (state->stage == MESA_SHADER_TESS_CTRL) {
if (!state->out_qualifier->merge_out_qualifier(& @1, state, $1, $$))
YYERROR;
} else {
_mesa_glsl_error(& @1, state,
"out layout qualifiers only valid in "
"tessellation control or geometry shaders");
}
if (!state->out_qualifier->
merge_out_qualifier(& @1, state, $1, $$, true))
YYERROR;
}
;
layout_defaults:
layout_uniform_defaults
| layout_buffer_defaults
| layout_in_defaults
| layout_out_defaults
;

View File

@ -298,8 +298,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
break;
case ir_unop_noise:
case ir_unop_unpack_half_2x16_split_x:
case ir_unop_unpack_half_2x16_split_y:
this->type = glsl_type::float_type;
break;
@ -422,10 +420,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1)
this->type = op0->type->get_base_type();
break;
case ir_binop_pack_half_2x16_split:
this->type = glsl_type::uint_type;
break;
case ir_binop_imul_high:
case ir_binop_carry:
case ir_binop_borrow:
@ -555,8 +549,6 @@ static const char *const operator_strs[] = {
"unpackUnorm2x16",
"unpackUnorm4x8",
"unpackHalf2x16",
"unpackHalf2x16_split_x",
"unpackHalf2x16_split_y",
"bitfield_reverse",
"bit_count",
"find_msb",
@ -599,7 +591,6 @@ static const char *const operator_strs[] = {
"min",
"max",
"pow",
"packHalf2x16_split",
"ubo_load",
"ldexp",
"vector_extract",

View File

@ -1401,16 +1401,6 @@ enum ir_expression_operation {
ir_unop_unpack_half_2x16,
/*@}*/
/**
* \name Lowered floating point unpacking operations.
*
* \see lower_packing_builtins_visitor::split_unpack_half_2x16
*/
/*@{*/
ir_unop_unpack_half_2x16_split_x,
ir_unop_unpack_half_2x16_split_y,
/*@}*/
/**
* \name Bit operations, part of ARB_gpu_shader5.
*/
@ -1541,15 +1531,6 @@ enum ir_expression_operation {
ir_binop_pow,
/**
* \name Lowered floating point packing operations.
*
* \see lower_packing_builtins_visitor::split_pack_half_2x16
*/
/*@{*/
ir_binop_pack_half_2x16_split,
/*@}*/
/**
* Load a value the size of a given GLSL type from a uniform block.
*

View File

@ -58,17 +58,14 @@ enum lower_packing_builtins_op {
LOWER_PACK_HALF_2x16 = 0x0010,
LOWER_UNPACK_HALF_2x16 = 0x0020,
LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040,
LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080,
LOWER_PACK_SNORM_4x8 = 0x0040,
LOWER_UNPACK_SNORM_4x8 = 0x0080,
LOWER_PACK_SNORM_4x8 = 0x0100,
LOWER_UNPACK_SNORM_4x8 = 0x0200,
LOWER_PACK_UNORM_4x8 = 0x0100,
LOWER_UNPACK_UNORM_4x8 = 0x0200,
LOWER_PACK_UNORM_4x8 = 0x0400,
LOWER_UNPACK_UNORM_4x8 = 0x0800,
LOWER_PACK_USE_BFI = 0x1000,
LOWER_PACK_USE_BFE = 0x2000,
LOWER_PACK_USE_BFI = 0x0400,
LOWER_PACK_USE_BFE = 0x0800,
};
bool do_common_optimization(exec_list *ir, bool linked,

View File

@ -372,12 +372,6 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->operands[0]->type == glsl_type::uint_type);
break;
case ir_unop_unpack_half_2x16_split_x:
case ir_unop_unpack_half_2x16_split_y:
assert(ir->type == glsl_type::float_type);
assert(ir->operands[0]->type == glsl_type::uint_type);
break;
case ir_unop_unpack_double_2x32:
assert(ir->type == glsl_type::uvec2_type);
assert(ir->operands[0]->type == glsl_type::double_type);
@ -567,12 +561,6 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->operands[0]->type == ir->operands[1]->type);
break;
case ir_binop_pack_half_2x16_split:
assert(ir->type == glsl_type::uint_type);
assert(ir->operands[0]->type == glsl_type::float_type);
assert(ir->operands[1]->type == glsl_type::float_type);
break;
case ir_binop_ubo_load:
assert(ir->operands[0]->type == glsl_type::uint_type);

View File

@ -968,10 +968,12 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
}
if ((consumer_var == NULL && producer_var->type->contains_integer()) ||
consumer_stage != MESA_SHADER_FRAGMENT) {
(consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT)) {
/* Since this varying is not being consumed by the fragment shader, its
* interpolation type varying cannot possibly affect rendering. Also,
* this variable is non-flat and is (or contains) an integer.
* interpolation type varying cannot possibly affect rendering.
* Also, this variable is non-flat and is (or contains) an integer.
* If the consumer stage is unknown, don't modify the interpolation
* type as it could affect rendering later with separate shaders.
*
* lower_packed_varyings requires all integer varyings to flat,
* regardless of where they appear. We can trivially satisfy that

View File

@ -992,7 +992,17 @@ cross_validate_globals(struct gl_shader_program *prog,
existing->data.location = var->data.location;
existing->data.explicit_location = true;
}
} else {
/* Check if uniform with implicit location was marked explicit
* by earlier shader stage. If so, mark it explicit in this stage
* too to make sure later processing does not treat it as
* implicit one.
*/
if (existing->data.explicit_location) {
var->data.location = existing->data.location;
var->data.explicit_location = true;
}
}
/* From the GLSL 4.20 specification:
* "A link error will result if two compilation units in a program
@ -3152,7 +3162,7 @@ check_explicit_uniform_locations(struct gl_context *ctx,
if (var->data.explicit_location) {
bool ret;
if (var->type->is_subroutine())
if (var->type->without_array()->is_subroutine())
ret = reserve_subroutine_explicit_locations(prog, sh, var);
else
ret = reserve_explicit_locations(prog, uniform_map, var);

View File

@ -43,13 +43,6 @@ public:
: op_mask(op_mask),
progress(false)
{
/* Mutually exclusive options. */
assert(!((op_mask & LOWER_PACK_HALF_2x16) &&
(op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT)));
assert(!((op_mask & LOWER_UNPACK_HALF_2x16) &&
(op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT)));
factory.instructions = &factory_instructions;
}
@ -96,9 +89,6 @@ public:
case LOWER_PACK_HALF_2x16:
*rvalue = lower_pack_half_2x16(op0);
break;
case LOWER_PACK_HALF_2x16_TO_SPLIT:
*rvalue = split_pack_half_2x16(op0);
break;
case LOWER_UNPACK_SNORM_2x16:
*rvalue = lower_unpack_snorm_2x16(op0);
break;
@ -114,9 +104,6 @@ public:
case LOWER_UNPACK_HALF_2x16:
*rvalue = lower_unpack_half_2x16(op0);
break;
case LOWER_UNPACK_HALF_2x16_TO_SPLIT:
*rvalue = split_unpack_half_2x16(op0);
break;
case LOWER_PACK_UNPACK_NONE:
case LOWER_PACK_USE_BFI:
case LOWER_PACK_USE_BFE:
@ -161,7 +148,7 @@ private:
result = op_mask & LOWER_PACK_UNORM_4x8;
break;
case ir_unop_pack_half_2x16:
result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT);
result = op_mask & LOWER_PACK_HALF_2x16;
break;
case ir_unop_unpack_snorm_2x16:
result = op_mask & LOWER_UNPACK_SNORM_2x16;
@ -176,7 +163,7 @@ private:
result = op_mask & LOWER_UNPACK_UNORM_4x8;
break;
case ir_unop_unpack_half_2x16:
result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT);
result = op_mask & LOWER_UNPACK_HALF_2x16;
break;
default:
result = LOWER_PACK_UNPACK_NONE;
@ -1092,41 +1079,6 @@ private:
return result;
}
/**
* \brief Split packHalf2x16's vec2 operand into two floats.
*
* \param vec2_rval is packHalf2x16's input
* \return a uint rvalue
*
* Some code generators, such as the i965 fragment shader, require that all
* vector expressions be lowered to a sequence of scalar expressions.
* However, packHalf2x16 cannot be scalarized by the same mechanism as
* a true vector operation because its input and output have a differing
* number of vector components.
*
* This method scalarizes packHalf2x16 by transforming it from an unary
* operation having vector input to a binary operation having scalar input.
* That is, it transforms
*
* packHalf2x16(VEC2_RVAL);
*
* into
*
* vec2 v = VEC2_RVAL;
* return packHalf2x16_split(v.x, v.y);
*/
ir_rvalue*
split_pack_half_2x16(ir_rvalue *vec2_rval)
{
assert(vec2_rval->type == glsl_type::vec2_type);
ir_variable *v = factory.make_temp(glsl_type::vec2_type,
"tmp_split_pack_half_2x16_v");
factory.emit(assign(v, vec2_rval));
return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v));
}
/**
* \brief Lower the component-wise calculation of unpackHalf2x16.
*
@ -1341,59 +1293,6 @@ private:
assert(result->type == glsl_type::vec2_type);
return result;
}
/**
* \brief Split unpackHalf2x16 into two operations.
*
* \param uint_rval is unpackHalf2x16's input
* \return a vec2 rvalue
*
* Some code generators, such as the i965 fragment shader, require that all
* vector expressions be lowered to a sequence of scalar expressions.
* However, unpackHalf2x16 cannot be scalarized by the same method as
* a true vector operation because the number of components of its input
* and output differ.
*
* This method scalarizes unpackHalf2x16 by transforming it from a single
* operation having vec2 output to a pair of operations each having float
* output. That is, it transforms
*
* unpackHalf2x16(UINT_RVAL)
*
* into
*
* uint u = UINT_RVAL;
* vec2 v;
*
* v.x = unpackHalf2x16_split_x(u);
* v.y = unpackHalf2x16_split_y(u);
*
* return v;
*/
ir_rvalue*
split_unpack_half_2x16(ir_rvalue *uint_rval)
{
assert(uint_rval->type == glsl_type::uint_type);
/* uint u = uint_rval; */
ir_variable *u = factory.make_temp(glsl_type::uint_type,
"tmp_split_unpack_half_2x16_u");
factory.emit(assign(u, uint_rval));
/* vec2 v; */
ir_variable *v = factory.make_temp(glsl_type::vec2_type,
"tmp_split_unpack_half_2x16_v");
/* v.x = unpack_half_2x16_split_x(u); */
factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u),
WRITEMASK_X));
/* v.y = unpack_half_2x16_split_y(u); */
factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u),
WRITEMASK_Y));
return deref(v).val;
}
};
} // namespace anonymous

View File

@ -44,6 +44,7 @@ public:
}
ir_visitor_status visit_leave(ir_call *);
ir_call *call_clone(ir_call *call, ir_function_signature *callee);
bool progress;
struct _mesa_glsl_parse_state *state;
};
@ -58,6 +59,23 @@ lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state)
return v.progress;
}
ir_call *
lower_subroutine_visitor::call_clone(ir_call *call, ir_function_signature *callee)
{
void *mem_ctx = ralloc_parent(call);
ir_dereference_variable *new_return_ref = NULL;
if (call->return_deref != NULL)
new_return_ref = call->return_deref->clone(mem_ctx, NULL);
exec_list new_parameters;
foreach_in_list(ir_instruction, ir, &call->actual_parameters) {
new_parameters.push_tail(ir->clone(mem_ctx, NULL));
}
return new(mem_ctx) ir_call(callee, new_return_ref, &new_parameters);
}
ir_visitor_status
lower_subroutine_visitor::visit_leave(ir_call *ir)
{
@ -66,7 +84,6 @@ lower_subroutine_visitor::visit_leave(ir_call *ir)
void *mem_ctx = ralloc_parent(ir);
ir_if *last_branch = NULL;
ir_dereference_variable *return_deref = ir->return_deref;
for (int s = this->state->num_subroutines - 1; s >= 0; s--) {
ir_rvalue *var;
@ -92,14 +109,11 @@ lower_subroutine_visitor::visit_leave(ir_call *ir)
fn->exact_matching_signature(this->state,
&ir->actual_parameters);
ir_call *new_call = new(mem_ctx) ir_call(sub_sig, return_deref, &ir->actual_parameters);
ir_call *new_call = call_clone(ir, sub_sig);
if (!last_branch)
last_branch = if_tree(equal(subr_to_int(var), lc), new_call);
else
last_branch = if_tree(equal(subr_to_int(var), lc), new_call, last_branch);
if (return_deref && s > 0)
return_deref = return_deref->clone(mem_ctx, NULL);
}
if (last_branch)
ir->insert_before(last_branch);

View File

@ -1442,12 +1442,6 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_unpack_half_2x16:
result = nir_unpack_half_2x16(&b, srcs[0]);
break;
case ir_unop_unpack_half_2x16_split_x:
result = nir_unpack_half_2x16_split_x(&b, srcs[0]);
break;
case ir_unop_unpack_half_2x16_split_y:
result = nir_unpack_half_2x16_split_y(&b, srcs[0]);
break;
case ir_unop_bitfield_reverse:
result = nir_bitfield_reverse(&b, srcs[0]);
break;
@ -1731,9 +1725,6 @@ nir_visitor::visit(ir_expression *ir)
}
break;
case ir_binop_pack_half_2x16_split:
result = nir_pack_half_2x16_split(&b, srcs[0], srcs[1]);
break;
case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
case ir_triop_fma:
result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);

View File

@ -140,7 +140,7 @@ typedef enum {
* ir_variable - it should be easy to translate between the two.
*/
typedef struct {
typedef struct nir_variable {
struct exec_node node;
/**
@ -383,7 +383,7 @@ nir_variable_get_io_mask(nir_variable *var, gl_shader_stage stage)
return ((1ull << slots) - 1) << var->data.location;
}
typedef struct {
typedef struct nir_register {
struct exec_node node;
unsigned num_components; /** < number of vector components */
@ -477,7 +477,7 @@ nir_instr_is_last(nir_instr *instr)
return exec_node_is_tail_sentinel(exec_node_get_next(&instr->node));
}
typedef struct {
typedef struct nir_ssa_def {
/** for debugging only, can be NULL */
const char* name;
@ -1530,6 +1530,20 @@ typedef struct nir_shader_compiler_options {
/** lowers ffract to fsub+ffloor: */
bool lower_ffract;
bool lower_pack_half_2x16;
bool lower_pack_unorm_2x16;
bool lower_pack_snorm_2x16;
bool lower_pack_unorm_4x8;
bool lower_pack_snorm_4x8;
bool lower_unpack_half_2x16;
bool lower_unpack_unorm_2x16;
bool lower_unpack_snorm_2x16;
bool lower_unpack_unorm_4x8;
bool lower_unpack_snorm_4x8;
bool lower_extract_byte;
bool lower_extract_word;
/**
* Does the driver support real 32-bit integers? (Otherwise, integers
* are simulated by floats.)

View File

@ -134,6 +134,20 @@ nir_imm_int(nir_builder *build, int x)
return nir_build_imm(build, 1, v);
}
static inline nir_ssa_def *
nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
{
nir_const_value v;
memset(&v, 0, sizeof(v));
v.i[0] = x;
v.i[1] = y;
v.i[2] = z;
v.i[3] = w;
return nir_build_imm(build, 4, v);
}
static inline nir_ssa_def *
nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)

View File

@ -97,6 +97,20 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
*/
return;
case nir_op_pack_half_2x16:
if (!b->shader->options->lower_pack_half_2x16)
return;
nir_ssa_def *val =
nir_pack_half_2x16_split(b, nir_channel(b, instr->src[0].src.ssa,
instr->src[0].swizzle[0]),
nir_channel(b, instr->src[0].src.ssa,
instr->src[0].swizzle[1]));
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
nir_instr_remove(&instr->instr);
return;
case nir_op_unpack_unorm_4x8:
case nir_op_unpack_snorm_4x8:
case nir_op_unpack_unorm_2x16:
@ -106,11 +120,51 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
*/
return;
case nir_op_unpack_half_2x16:
/* We could split this into unpack_half_2x16_split_[xy], but should
* we?
*/
case nir_op_unpack_half_2x16: {
if (!b->shader->options->lower_unpack_half_2x16)
return;
nir_ssa_def *comps[2];
comps[0] = nir_unpack_half_2x16_split_x(b, instr->src[0].src.ssa);
comps[1] = nir_unpack_half_2x16_split_y(b, instr->src[0].src.ssa);
nir_ssa_def *vec = nir_vec(b, comps, 2);
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
nir_instr_remove(&instr->instr);
return;
}
case nir_op_pack_uvec2_to_uint: {
assert(b->shader->options->lower_pack_snorm_2x16 ||
b->shader->options->lower_pack_unorm_2x16);
nir_ssa_def *word =
nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
nir_ssa_def *val =
nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
nir_channel(b, word, 0));
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
nir_instr_remove(&instr->instr);
break;
}
case nir_op_pack_uvec4_to_uint: {
assert(b->shader->options->lower_pack_snorm_4x8 ||
b->shader->options->lower_pack_unorm_4x8);
nir_ssa_def *byte =
nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
nir_ssa_def *val =
nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),
nir_ior(b, nir_ishl(b, nir_channel(b, byte, 1), nir_imm_int(b, 8)),
nir_channel(b, byte, 0)));
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
nir_instr_remove(&instr->instr);
break;
}
case nir_op_fdph: {
nir_ssa_def *sum[4];

View File

@ -105,7 +105,7 @@ def opcode(name, output_size, output_type, input_sizes, input_types,
opcodes[name] = Opcode(name, output_size, output_type, input_sizes,
input_types, algebraic_properties, const_expr)
def unop_convert(name, in_type, out_type, const_expr):
def unop_convert(name, out_type, in_type, const_expr):
opcode(name, 0, out_type, [0], [in_type], "", const_expr)
def unop(name, ty, const_expr):
@ -155,17 +155,17 @@ unop("frsq", tfloat, "1.0f / sqrtf(src0)")
unop("fsqrt", tfloat, "sqrtf(src0)")
unop("fexp2", tfloat, "exp2f(src0)")
unop("flog2", tfloat, "log2f(src0)")
unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion
unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
unop_convert("f2i", tint, tfloat, "src0") # Float-to-integer conversion.
unop_convert("f2u", tuint, tfloat, "src0") # Float-to-unsigned conversion
unop_convert("i2f", tfloat, tint, "src0") # Integer-to-float conversion.
# Float-to-boolean conversion
unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
unop_convert("f2b", tbool, tfloat, "src0 != 0.0f")
# Boolean-to-float conversion
unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
unop_convert("b2f", tfloat, tbool, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
unop_convert("i2b", tint, tbool, "src0 != 0")
unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion.
unop_convert("i2b", tbool, tint, "src0 != 0")
unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
unop_convert("u2f", tfloat, tuint, "src0") # Unsigned-to-float conversion.
# Unary floating-point rounding operations.
@ -238,6 +238,16 @@ unpack_2x16("unorm")
unpack_4x8("unorm")
unpack_2x16("half")
unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """
dst = (src0.x & 0xffff) | (src0.y >> 16);
""")
unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """
dst = (src0.x << 0) |
(src0.y << 8) |
(src0.z << 16) |
(src0.w << 24);
""")
# Lowered floating point unpacking operations.
@ -265,7 +275,7 @@ for (unsigned bit = 0; bit < 32; bit++) {
}
""")
unop_convert("ufind_msb", tuint, tint, """
unop_convert("ufind_msb", tint, tuint, """
dst = -1;
for (int bit = 31; bit > 0; bit--) {
if ((src0 >> bit) & 1) {
@ -551,6 +561,15 @@ dst.x = src0.x;
dst.y = src1.x;
""")
# Byte extraction
binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))")
# Word extraction
binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))")
def triop(name, ty, const_expr):
opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr)
def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):

View File

@ -245,6 +245,70 @@ optimizations = [
('bcsel', ('ult', 31, 'bits'), 'value',
('ubfe', 'value', 'offset', 'bits')),
'options->lower_bitfield_extract'),
(('extract_ibyte', a, b),
('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8),
'options->lower_extract_byte'),
(('extract_ubyte', a, b),
('iand', ('ushr', a, ('imul', b, 8)), 0xff),
'options->lower_extract_byte'),
(('extract_iword', a, b),
('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16),
'options->lower_extract_word'),
(('extract_uword', a, b),
('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
'options->lower_extract_word'),
(('pack_unorm_2x16', 'v'),
('pack_uvec2_to_uint',
('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))),
'options->lower_pack_unorm_2x16'),
(('pack_unorm_4x8', 'v'),
('pack_uvec4_to_uint',
('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))),
'options->lower_pack_unorm_4x8'),
(('pack_snorm_2x16', 'v'),
('pack_uvec2_to_uint',
('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))),
'options->lower_pack_snorm_2x16'),
(('pack_snorm_4x8', 'v'),
('pack_uvec4_to_uint',
('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))),
'options->lower_pack_snorm_4x8'),
(('unpack_unorm_2x16', 'v'),
('fdiv', ('u2f', ('vec4', ('extract_uword', 'v', 0),
('extract_uword', 'v', 1), 0, 0)),
65535.0),
'options->lower_unpack_unorm_2x16'),
(('unpack_unorm_4x8', 'v'),
('fdiv', ('u2f', ('vec4', ('extract_ubyte', 'v', 0),
('extract_ubyte', 'v', 1),
('extract_ubyte', 'v', 2),
('extract_ubyte', 'v', 3))),
255.0),
'options->lower_unpack_unorm_4x8'),
(('unpack_snorm_2x16', 'v'),
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_iword', 'v', 0),
('extract_iword', 'v', 1), 0, 0)),
32767.0))),
'options->lower_unpack_snorm_2x16'),
(('unpack_snorm_4x8', 'v'),
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_ibyte', 'v', 0),
('extract_ibyte', 'v', 1),
('extract_ibyte', 'v', 2),
('extract_ibyte', 'v', 3))),
127.0))),
'options->lower_unpack_snorm_4x8'),
]
# Add optimizations to handle the case where the result of a ternary is

View File

@ -487,7 +487,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
if (i != 0)
fprintf(fp, ", ");
fprintf(fp, "%u", instr->const_index[i]);
fprintf(fp, "%d", instr->const_index[i]);
}
fprintf(fp, ")");

View File

@ -33,7 +33,8 @@
#define ENUM(x) [x] = #x
#define NAME(val) ((((val) < ARRAY_SIZE(names)) && names[(val)]) ? names[(val)] : "UNKNOWN")
const char * gl_shader_stage_name(gl_shader_stage stage)
const char *
gl_shader_stage_name(gl_shader_stage stage)
{
static const char *names[] = {
ENUM(MESA_SHADER_VERTEX),
@ -51,15 +52,16 @@ const char * gl_shader_stage_name(gl_shader_stage stage)
* Translate a gl_shader_stage to a short shader stage name for debug
* printouts and error messages.
*/
const char * _mesa_shader_stage_to_string(unsigned stage)
const char *
_mesa_shader_stage_to_string(unsigned stage)
{
switch (stage) {
case MESA_SHADER_VERTEX: return "vertex";
case MESA_SHADER_FRAGMENT: return "fragment";
case MESA_SHADER_GEOMETRY: return "geometry";
case MESA_SHADER_COMPUTE: return "compute";
case MESA_SHADER_TESS_CTRL: return "tess ctrl";
case MESA_SHADER_TESS_EVAL: return "tess eval";
case MESA_SHADER_TESS_CTRL: return "tessellation control";
case MESA_SHADER_TESS_EVAL: return "tessellation evaluation";
}
unreachable("Unknown shader stage.");
@ -69,7 +71,8 @@ const char * _mesa_shader_stage_to_string(unsigned stage)
* Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS)
* for debug printouts and error messages.
*/
const char * _mesa_shader_stage_to_abbrev(unsigned stage)
const char *
_mesa_shader_stage_to_abbrev(unsigned stage)
{
switch (stage) {
case MESA_SHADER_VERTEX: return "VS";
@ -83,7 +86,8 @@ const char * _mesa_shader_stage_to_abbrev(unsigned stage)
unreachable("Unknown shader stage.");
}
const char * gl_vert_attrib_name(gl_vert_attrib attrib)
const char *
gl_vert_attrib_name(gl_vert_attrib attrib)
{
static const char *names[] = {
ENUM(VERT_ATTRIB_POS),
@ -124,7 +128,8 @@ const char * gl_vert_attrib_name(gl_vert_attrib attrib)
return NAME(attrib);
}
const char * gl_varying_slot_name(gl_varying_slot slot)
const char *
gl_varying_slot_name(gl_varying_slot slot)
{
static const char *names[] = {
ENUM(VARYING_SLOT_POS),
@ -190,7 +195,8 @@ const char * gl_varying_slot_name(gl_varying_slot slot)
return NAME(slot);
}
const char * gl_system_value_name(gl_system_value sysval)
const char *
gl_system_value_name(gl_system_value sysval)
{
static const char *names[] = {
ENUM(SYSTEM_VALUE_VERTEX_ID),
@ -218,7 +224,8 @@ const char * gl_system_value_name(gl_system_value sysval)
return NAME(sysval);
}
const char * glsl_interp_qualifier_name(enum glsl_interp_qualifier qual)
const char *
glsl_interp_qualifier_name(enum glsl_interp_qualifier qual)
{
static const char *names[] = {
ENUM(INTERP_QUALIFIER_NONE),
@ -230,7 +237,8 @@ const char * glsl_interp_qualifier_name(enum glsl_interp_qualifier qual)
return NAME(qual);
}
const char * gl_frag_result_name(gl_frag_result result)
const char *
gl_frag_result_name(gl_frag_result result)
{
static const char *names[] = {
ENUM(FRAG_RESULT_DEPTH),

View File

@ -47,19 +47,19 @@ typedef enum
MESA_SHADER_COMPUTE = 5,
} gl_shader_stage;
const char * gl_shader_stage_name(gl_shader_stage stage);
const char *gl_shader_stage_name(gl_shader_stage stage);
/**
* Translate a gl_shader_stage to a short shader stage name for debug
* printouts and error messages.
*/
const char * _mesa_shader_stage_to_string(unsigned stage);
const char *_mesa_shader_stage_to_string(unsigned stage);
/**
* Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS)
* for debug printouts and error messages.
*/
const char * _mesa_shader_stage_to_abbrev(unsigned stage);
const char *_mesa_shader_stage_to_abbrev(unsigned stage);
#define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1)
@ -109,7 +109,7 @@ typedef enum
VERT_ATTRIB_MAX = 33
} gl_vert_attrib;
const char * gl_vert_attrib_name(gl_vert_attrib attrib);
const char *gl_vert_attrib_name(gl_vert_attrib attrib);
/**
* Symbolic constats to help iterating over
@ -254,7 +254,7 @@ typedef enum
#define VARYING_SLOT_PATCH0 (VARYING_SLOT_MAX)
#define VARYING_SLOT_TESS_MAX (VARYING_SLOT_PATCH0 + MAX_VARYING)
const char * gl_varying_slot_name(gl_varying_slot slot);
const char *gl_varying_slot_name(gl_varying_slot slot);
/**
* Bitflags for varying slots.
@ -467,7 +467,7 @@ typedef enum
SYSTEM_VALUE_MAX /**< Number of values */
} gl_system_value;
const char * gl_system_value_name(gl_system_value sysval);
const char *gl_system_value_name(gl_system_value sysval);
/**
* The possible interpolation qualifiers that can be applied to a fragment
@ -485,7 +485,7 @@ enum glsl_interp_qualifier
INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */
};
const char * glsl_interp_qualifier_name(enum glsl_interp_qualifier qual);
const char *glsl_interp_qualifier_name(enum glsl_interp_qualifier qual);
/**
* Fragment program results
@ -516,7 +516,7 @@ typedef enum
FRAG_RESULT_DATA7,
} gl_frag_result;
const char * gl_frag_result_name(gl_frag_result result);
const char *gl_frag_result_name(gl_frag_result result);
#define FRAG_RESULT_MAX (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)

View File

@ -35,6 +35,7 @@ EXTRA_DIST = \
es2api/ABI-check \
mapi_abi.py \
glapi/SConscript \
glapi/registry/gl.xml \
shared-glapi/SConscript
AM_CFLAGS = \
@ -106,12 +107,16 @@ if HAVE_SPARC_ASM
GLAPI_ASM_SOURCES = glapi/glapi_sparc.S
endif
glapi_libglapi_la_SOURCES = glapi/glapi_gentable.c
glapi_libglapi_la_SOURCES =
glapi_libglapi_la_CPPFLAGS = \
$(AM_CPPFLAGS) \
-I$(top_srcdir)/src/mapi/glapi \
-I$(top_srcdir)/src/mesa
if HAVE_APPLEDRI
glapi_libglapi_la_SOURCES += glapi/glapi_gentable.c
endif
if HAVE_SHARED_GLAPI
glapi_libglapi_la_SOURCES += $(MAPI_BRIDGE_FILES) glapi/glapi_mapi_tmp.h
glapi_libglapi_la_CPPFLAGS += \

View File

@ -0,0 +1,18 @@
<?xml version="1.0"?>
<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
<!-- Note: no GLX protocol info yet. -->
<OpenGLAPI>
<category name="GL_GREMEDY_string_marker" number="311">
<function name="StringMarkerGREMEDY">
<param name="len" type="GLsizei"/>
<param name="string" type="const GLvoid *"/>
</function>
</category>
</OpenGLAPI>

View File

@ -27,8 +27,11 @@ MESA_GLAPI_OUTPUTS = \
$(MESA_GLAPI_DIR)/glapi_mapi_tmp.h \
$(MESA_GLAPI_DIR)/glprocs.h \
$(MESA_GLAPI_DIR)/glapitemp.h \
$(MESA_GLAPI_DIR)/glapitable.h \
$(MESA_GLAPI_DIR)/glapi_gentable.c
$(MESA_GLAPI_DIR)/glapitable.h
if HAVE_APPLEDRI
MESA_GLAPI_OUTPUTS += $(MESA_GLAPI_DIR)/glapi_gentable.c
endif
MESA_GLAPI_ASM_OUTPUTS =
if HAVE_X86_ASM
@ -57,6 +60,7 @@ BUILT_SOURCES = \
$(MESA_GLX_DIR)/indirect_size.c
EXTRA_DIST= \
$(BUILT_SOURCES) \
$(MESA_GLAPI_DIR)/glapi_gentable.c \
$(MESA_GLAPI_DIR)/glapi_x86.S \
$(MESA_GLAPI_DIR)/glapi_x86-64.S \
$(MESA_GLAPI_DIR)/glapi_sparc.S \
@ -88,8 +92,12 @@ XORG_GLAPI_DIR = $(XORG_BASE)/glx
XORG_GLAPI_OUTPUTS = \
$(XORG_GLAPI_DIR)/glprocs.h \
$(XORG_GLAPI_DIR)/glapitable.h \
$(XORG_GLAPI_DIR)/dispatch.h \
$(XORG_GLAPI_DIR)/dispatch.h
if HAVE_APPLEDRI
XORG_GLAPI_OUTPUTS += \
$(XORG_GLAPI_DIR)/glapi_gentable.c
endif
XORG_OUTPUTS = \
$(XORG_GLAPI_OUTPUTS) \
@ -188,6 +196,7 @@ API_XML = \
EXT_texture_array.xml \
EXT_texture_integer.xml \
EXT_transform_feedback.xml \
GREMEDY_string_marker.xml \
INTEL_performance_query.xml \
KHR_debug.xml \
KHR_context_flush_control.xml \

View File

@ -12620,6 +12620,8 @@
<xi:include href="EXT_framebuffer_object.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<xi:include href="GREMEDY_string_marker.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<xi:include href="EXT_packed_depth_stencil.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<xi:include href="EXT_provoking_vertex.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>

Some files were not shown because too many files have changed in this diff Show More