i965: Support compacted instructions with immediate sources.

Note the weirdness with src1 subregs. The compacted immediate fields are
uncompacted to bits [127:96] and the high five bits of the subreg
mapping maps to bits [100:96].

Number of compacted instructions: 790085 -> 817752 (3.50%)

Reviewed-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
Matt Turner 2014-04-30 16:28:59 -07:00
parent 8942f44c8d
commit 1acb3a290e
1 changed files with 62 additions and 19 deletions

View File

@ -373,13 +373,16 @@ set_datatype_index(struct brw_compact_instruction *dst,
static bool
set_subreg_index(struct brw_compact_instruction *dst,
struct brw_instruction *src)
struct brw_instruction *src,
bool is_immediate)
{
uint16_t uncompacted = 0;
uncompacted |= src->bits1.da1.dest_subreg_nr << 0;
uncompacted |= src->bits2.da1.src0_subreg_nr << 5;
uncompacted |= src->bits3.da1.src1_subreg_nr << 10;
if (!is_immediate)
uncompacted |= src->bits3.da1.src1_subreg_nr << 10;
for (int i = 0; i < 32; i++) {
if (subreg_table[i] == uncompacted) {
@ -424,20 +427,40 @@ set_src0_index(struct brw_compact_instruction *dst,
static bool
set_src1_index(struct brw_compact_instruction *dst,
struct brw_instruction *src)
struct brw_instruction *src, bool is_immediate)
{
uint16_t compacted, uncompacted = 0;
if (is_immediate) {
dst->dw1.src1_index = (src->bits3.ud >> 8) & 0x1f;
} else {
uint16_t compacted, uncompacted;
uncompacted |= (src->bits3.ud >> 13) & 0xfff;
uncompacted = (src->bits3.ud >> 13) & 0xfff;
if (!get_src_index(uncompacted, &compacted))
return false;
if (!get_src_index(uncompacted, &compacted))
return false;
dst->dw1.src1_index = compacted;
dst->dw1.src1_index = compacted;
}
return true;
}
/* Compacted instructions have 12-bits for immediate sources, and a 13th bit
* that's replicated through the high 20 bits.
*
* Effectively this means we get 12-bit integers, 0.0f, and some limited uses
* of packed vectors as compactable immediates.
*/
static bool
is_compactable_immediate(unsigned imm)
{
/* We get the low 12 bits as-is. */
imm &= ~0xfff;
/* We get one bit replicated through the top 20 bits. */
return imm == 0 || imm == 0xfffff000;
}
/**
* Tries to compact instruction src into dst.
*
@ -464,10 +487,11 @@ brw_try_compact_instruction(struct brw_compile *p,
return false;
}
/* FINISHME: immediates */
if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
bool is_immediate = src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE;
if (is_immediate && !is_compactable_immediate(src->bits3.ud)) {
return false;
}
memset(&temp, 0, sizeof(temp));
@ -477,7 +501,7 @@ brw_try_compact_instruction(struct brw_compile *p,
return false;
if (!set_datatype_index(&temp, src))
return false;
if (!set_subreg_index(&temp, src))
if (!set_subreg_index(&temp, src, is_immediate))
return false;
temp.dw0.acc_wr_control = src->header.acc_wr_control;
temp.dw0.conditionalmod = src->header.destreg__conditionalmod;
@ -486,11 +510,15 @@ brw_try_compact_instruction(struct brw_compile *p,
temp.dw0.cmpt_ctrl = 1;
if (!set_src0_index(&temp, src))
return false;
if (!set_src1_index(&temp, src))
if (!set_src1_index(&temp, src, is_immediate))
return false;
temp.dw1.dst_reg_nr = src->bits1.da1.dest_reg_nr;
temp.dw1.src0_reg_nr = src->bits2.da1.src0_reg_nr;
temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr;
if (is_immediate) {
temp.dw1.src1_reg_nr = src->bits3.ud & 0xff;
} else {
temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr;
}
*dst = temp;
@ -547,11 +575,17 @@ set_uncompacted_src0(struct brw_instruction *dst,
static void
set_uncompacted_src1(struct brw_instruction *dst,
struct brw_compact_instruction *src)
struct brw_compact_instruction *src, bool is_immediate)
{
uint16_t uncompacted = src_index_table[src->dw1.src1_index];
if (is_immediate) {
signed high5 = src->dw1.src1_index;
/* Replicate top bit of src1_index into high 20 bits of the immediate. */
dst->bits3.ud = (high5 << 27) >> 19;
} else {
uint16_t uncompacted = src_index_table[src->dw1.src1_index];
dst->bits3.ud |= uncompacted << 13;
dst->bits3.ud |= uncompacted << 13;
}
}
void
@ -566,16 +600,25 @@ brw_uncompact_instruction(struct brw_context *brw,
set_uncompacted_control(brw, dst, src);
set_uncompacted_datatype(dst, src);
/* src0/1 register file fields are in the datatype table. */
bool is_immediate = dst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
dst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE;
set_uncompacted_subreg(dst, src);
dst->header.acc_wr_control = src->dw0.acc_wr_control;
dst->header.destreg__conditionalmod = src->dw0.conditionalmod;
if (brw->gen <= 6)
dst->bits2.da1.flag_subreg_nr = src->dw0.flag_subreg_nr;
set_uncompacted_src0(dst, src);
set_uncompacted_src1(dst, src);
set_uncompacted_src1(dst, src, is_immediate);
dst->bits1.da1.dest_reg_nr = src->dw1.dst_reg_nr;
dst->bits2.da1.src0_reg_nr = src->dw1.src0_reg_nr;
dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr;
if (is_immediate) {
dst->bits3.ud |= src->dw1.src1_reg_nr;
} else {
dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr;
}
}
void brw_debug_compact_uncompact(struct brw_context *brw,