i965: Use the PLN instruction when possible in interpolation.

Saves an instruction in PINTERP, LINTERP, and PIXEL_W from
brw_wm_glsl.c For non-GLSL it isn't used yet because the deltas have
to be laid out differently.
This commit is contained in:
Eric Anholt 2010-03-10 14:46:27 -08:00
parent dc8c035944
commit 56ff30a9f9
8 changed files with 82 additions and 7 deletions

View File

@ -156,6 +156,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
brw->has_surface_tile_offset = GL_TRUE;
brw->has_compr4 = GL_TRUE;
brw->has_aa_line_parameters = GL_TRUE;
brw->has_pln = GL_TRUE;
} else {
brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965;
brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;

View File

@ -446,6 +446,7 @@ struct brw_context
GLboolean has_compr4;
GLboolean has_negative_rhw_bug;
GLboolean has_aa_line_parameters;
GLboolean has_pln;
;
struct {
struct brw_state_flags dirty;

View File

@ -550,6 +550,7 @@
#define BRW_OPCODE_DP2 87
#define BRW_OPCODE_DPA2 88
#define BRW_OPCODE_LINE 89
#define BRW_OPCODE_PLN 90
#define BRW_OPCODE_NOP 126
#define BRW_PREDICATE_NONE 0

View File

@ -50,6 +50,7 @@ struct {
[BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },

View File

@ -795,6 +795,7 @@ ALU2(DPH)
ALU2(DP3)
ALU2(DP2)
ALU2(LINE)
ALU2(PLN)
#undef ALU1
#undef ALU2

View File

@ -573,7 +573,7 @@ ALU2(DPH)
ALU2(DP3)
ALU2(DP2)
ALU2(LINE)
ALU2(PLN)

View File

@ -34,6 +34,23 @@
#include "brw_context.h"
#include "brw_wm.h"
static GLboolean can_do_pln(struct intel_context *intel,
const struct brw_reg *deltas)
{
struct brw_context *brw = brw_context(&intel->ctx);
if (!brw->has_pln)
return GL_FALSE;
if (deltas[1].nr != deltas[0].nr + 1)
return GL_FALSE;
if (intel->gen < 6 && ((deltas[0].nr & 1) != 0))
return GL_FALSE;
return GL_TRUE;
}
/* Not quite sure how correct this is - need to understand horiz
* vs. vertical strides a little better.
*/
@ -186,6 +203,7 @@ void emit_pixel_w(struct brw_wm_compile *c,
const struct brw_reg *deltas)
{
struct brw_compile *p = &c->func;
struct intel_context *intel = &p->brw->intel;
/* Don't need this if all you are doing is interpolating color, for
* instance.
@ -196,8 +214,12 @@ void emit_pixel_w(struct brw_wm_compile *c,
/* Calc 1/w - just linterp wpos[3] optimized by putting the
* result straight into a message reg.
*/
brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
if (can_do_pln(intel, deltas)) {
brw_PLN(p, brw_message_reg(2), interp3, deltas[0]);
} else {
brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
}
/* Calc w */
if (c->dispatch_width == 16) {
@ -224,6 +246,7 @@ void emit_linterp(struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *deltas)
{
struct intel_context *intel = &p->brw->intel;
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
GLuint i;
@ -235,8 +258,12 @@ void emit_linterp(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
if (can_do_pln(intel, deltas)) {
brw_PLN(p, dst[i], interp[i], deltas[0]);
} else {
brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
}
}
}
}
@ -249,6 +276,7 @@ void emit_pinterp(struct brw_compile *p,
const struct brw_reg *deltas,
const struct brw_reg *w)
{
struct intel_context *intel = &p->brw->intel;
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
GLuint i;
@ -260,8 +288,12 @@ void emit_pinterp(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
if (can_do_pln(intel, deltas)) {
brw_PLN(p, dst[i], interp[i], deltas[0]);
} else {
brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
}
}
}
for (i = 0; i < 4; i++) {

View File

@ -289,6 +289,7 @@ reclaim_temps(struct brw_wm_compile *c)
*/
static void prealloc_reg(struct brw_wm_compile *c)
{
struct intel_context *intel = &c->func.brw->intel;
int i, j;
struct brw_reg reg;
int urb_read_length = 0;
@ -413,6 +414,43 @@ static void prealloc_reg(struct brw_wm_compile *c)
}
}
for (i = 0; i < c->nr_fp_insns; i++) {
const struct prog_instruction *inst = &c->prog_instructions[i];
switch (inst->Opcode) {
case WM_DELTAXY:
/* Allocate WM_DELTAXY destination on G45/GM45 to an
* even-numbered GRF if possible so that we can use the PLN
* instruction.
*/
if (inst->DstReg.WriteMask == WRITEMASK_XY &&
!c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited &&
!c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited &&
(IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) {
int grf;
for (grf = c->first_free_grf & ~1;
grf < BRW_WM_MAX_GRF;
grf += 2)
{
if (!c->used_grf[grf] && !c->used_grf[grf + 1]) {
c->used_grf[grf] = GL_TRUE;
c->used_grf[grf + 1] = GL_TRUE;
c->first_free_grf = grf + 2; /* a guess */
set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0,
brw_vec8_grf(grf, 0));
set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1,
brw_vec8_grf(grf + 1, 0));
break;
}
}
}
default:
break;
}
}
/* An instruction may reference up to three constants.
* They'll be found in these registers.
* XXX alloc these on demand!