i965g: re-starting from the dri driver

This commit is contained in:
Keith Whitwell 2009-10-23 16:55:02 +01:00
parent da253319f9
commit 2f5f7c0773
68 changed files with 29208 additions and 0 deletions

View File

@ -0,0 +1,104 @@
TOP = ../../../../..
include $(TOP)/configs/current
LIBNAME = i965_dri.so
DRIVER_SOURCES = \
intel_batchbuffer.c \
intel_blit.c \
intel_buffer_objects.c \
intel_buffers.c \
intel_clear.c \
intel_context.c \
intel_decode.c \
intel_extensions.c \
intel_fbo.c \
intel_mipmap_tree.c \
intel_regions.c \
intel_screen.c \
intel_span.c \
intel_pixel.c \
intel_pixel_bitmap.c \
intel_pixel_copy.c \
intel_pixel_draw.c \
intel_pixel_read.c \
intel_state.c \
intel_swapbuffers.c \
intel_syncobj.c \
intel_tex.c \
intel_tex_copy.c \
intel_tex_format.c \
intel_tex_image.c \
intel_tex_layout.c \
intel_tex_subimage.c \
intel_tex_validate.c \
brw_cc.c \
brw_clip.c \
brw_clip_line.c \
brw_clip_point.c \
brw_clip_state.c \
brw_clip_tri.c \
brw_clip_unfilled.c \
brw_clip_util.c \
brw_context.c \
brw_curbe.c \
brw_disasm.c \
brw_draw.c \
brw_draw_upload.c \
brw_eu.c \
brw_eu_debug.c \
brw_eu_emit.c \
brw_eu_util.c \
brw_fallback.c \
brw_gs.c \
brw_gs_emit.c \
brw_gs_state.c \
brw_misc_state.c \
brw_program.c \
brw_queryobj.c \
brw_sf.c \
brw_sf_emit.c \
brw_sf_state.c \
brw_state_batch.c \
brw_state_cache.c \
brw_state_dump.c \
brw_state_upload.c \
brw_tex.c \
brw_tex_layout.c \
brw_urb.c \
brw_util.c \
brw_vs.c \
brw_vs_constval.c \
brw_vs_emit.c \
brw_vs_state.c \
brw_vs_surface_state.c \
brw_vtbl.c \
brw_wm.c \
brw_wm_debug.c \
brw_wm_emit.c \
brw_wm_fp.c \
brw_wm_iz.c \
brw_wm_glsl.c \
brw_wm_pass0.c \
brw_wm_pass1.c \
brw_wm_pass2.c \
brw_wm_sampler_state.c \
brw_wm_state.c \
brw_wm_surface_state.c
C_SOURCES = \
$(COMMON_SOURCES) \
$(MINIGLX_SOURCES) \
$(DRIVER_SOURCES)
ASM_SOURCES =
DRIVER_DEFINES = -I../intel -I../intel/server
DRI_LIB_DEPS += -ldrm_intel
include ../Makefile.template
intel_decode.o: ../intel/intel_decode.c
intel_tex_layout.o: ../intel/intel_tex_layout.c

View File

@ -0,0 +1,297 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_util.h"
#include "main/macros.h"
#include "main/enums.h"
static void prepare_cc_vp( struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_cc_viewport ccv;
memset(&ccv, 0, sizeof(ccv));
/* _NEW_VIEWPORT */
ccv.min_depth = ctx->Viewport.Near;
ccv.max_depth = ctx->Viewport.Far;
dri_bo_unreference(brw->cc.vp_bo);
brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
}
const struct brw_tracked_state brw_cc_vp = {
.dirty = {
.mesa = _NEW_VIEWPORT,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
.prepare = prepare_cc_vp
};
struct brw_cc_unit_key {
GLboolean stencil, stencil_two_side, color_blend, alpha_enabled;
GLenum stencil_func[2], stencil_fail_op[2];
GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2];
GLenum logic_op;
GLenum blend_eq_rgb, blend_eq_a;
GLenum blend_src_rgb, blend_src_a;
GLenum blend_dst_rgb, blend_dst_a;
GLenum alpha_func;
GLclampf alpha_ref;
GLboolean dither;
GLboolean depth_test, depth_write;
GLenum depth_func;
};
static void
cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
{
GLcontext *ctx = &brw->intel.ctx;
const unsigned back = ctx->Stencil._BackFace;
memset(key, 0, sizeof(*key));
key->stencil = ctx->Stencil._Enabled;
key->stencil_two_side = ctx->Stencil._TestTwoSide;
if (key->stencil) {
key->stencil_func[0] = ctx->Stencil.Function[0];
key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0];
key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0];
key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0];
key->stencil_ref[0] = ctx->Stencil.Ref[0];
key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0];
key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0];
}
if (key->stencil_two_side) {
key->stencil_func[1] = ctx->Stencil.Function[back];
key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back];
key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back];
key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back];
key->stencil_ref[1] = ctx->Stencil.Ref[back];
key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back];
key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back];
}
if (ctx->Color._LogicOpEnabled)
key->logic_op = ctx->Color.LogicOp;
else
key->logic_op = GL_COPY;
key->color_blend = ctx->Color.BlendEnabled;
if (key->color_blend) {
key->blend_eq_rgb = ctx->Color.BlendEquationRGB;
key->blend_eq_a = ctx->Color.BlendEquationA;
key->blend_src_rgb = ctx->Color.BlendSrcRGB;
key->blend_dst_rgb = ctx->Color.BlendDstRGB;
key->blend_src_a = ctx->Color.BlendSrcA;
key->blend_dst_a = ctx->Color.BlendDstA;
}
key->alpha_enabled = ctx->Color.AlphaEnabled;
if (key->alpha_enabled) {
key->alpha_func = ctx->Color.AlphaFunc;
key->alpha_ref = ctx->Color.AlphaRef;
}
key->dither = ctx->Color.DitherFlag;
key->depth_test = ctx->Depth.Test;
if (key->depth_test) {
key->depth_func = ctx->Depth.Func;
key->depth_write = ctx->Depth.Mask;
}
}
/**
* Creates the state cache entry for the given CC unit key.
*/
static dri_bo *
cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
{
struct brw_cc_unit_state cc;
dri_bo *bo;
memset(&cc, 0, sizeof(cc));
/* _NEW_STENCIL */
if (key->stencil) {
cc.cc0.stencil_enable = 1;
cc.cc0.stencil_func =
intel_translate_compare_func(key->stencil_func[0]);
cc.cc0.stencil_fail_op =
intel_translate_stencil_op(key->stencil_fail_op[0]);
cc.cc0.stencil_pass_depth_fail_op =
intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
cc.cc0.stencil_pass_depth_pass_op =
intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
cc.cc1.stencil_ref = key->stencil_ref[0];
cc.cc1.stencil_write_mask = key->stencil_write_mask[0];
cc.cc1.stencil_test_mask = key->stencil_test_mask[0];
if (key->stencil_two_side) {
cc.cc0.bf_stencil_enable = 1;
cc.cc0.bf_stencil_func =
intel_translate_compare_func(key->stencil_func[1]);
cc.cc0.bf_stencil_fail_op =
intel_translate_stencil_op(key->stencil_fail_op[1]);
cc.cc0.bf_stencil_pass_depth_fail_op =
intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
cc.cc0.bf_stencil_pass_depth_pass_op =
intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
cc.cc1.bf_stencil_ref = key->stencil_ref[1];
cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1];
cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1];
}
/* Not really sure about this:
*/
if (key->stencil_write_mask[0] ||
(key->stencil_two_side && key->stencil_write_mask[1]))
cc.cc0.stencil_write_enable = 1;
}
/* _NEW_COLOR */
if (key->logic_op != GL_COPY) {
cc.cc2.logicop_enable = 1;
cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op);
} else if (key->color_blend) {
GLenum eqRGB = key->blend_eq_rgb;
GLenum eqA = key->blend_eq_a;
GLenum srcRGB = key->blend_src_rgb;
GLenum dstRGB = key->blend_dst_rgb;
GLenum srcA = key->blend_src_a;
GLenum dstA = key->blend_dst_a;
if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
srcRGB = dstRGB = GL_ONE;
}
if (eqA == GL_MIN || eqA == GL_MAX) {
srcA = dstA = GL_ONE;
}
cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
cc.cc6.blend_function = brw_translate_blend_equation(eqRGB);
cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA);
cc.cc3.blend_enable = 1;
cc.cc3.ia_blend_enable = (srcA != srcRGB ||
dstA != dstRGB ||
eqA != eqRGB);
}
if (key->alpha_enabled) {
cc.cc3.alpha_test = 1;
cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func);
cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref);
}
if (key->dither) {
cc.cc5.dither_enable = 1;
cc.cc6.y_dither_offset = 0;
cc.cc6.x_dither_offset = 0;
}
/* _NEW_DEPTH */
if (key->depth_test) {
cc.cc2.depth_test = 1;
cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func);
cc.cc2.depth_write_enable = key->depth_write;
}
/* CACHE_NEW_CC_VP */
cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
if (INTEL_DEBUG & DEBUG_STATS)
cc.cc5.statistics_enable = 1;
bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
key, sizeof(*key),
&brw->cc.vp_bo, 1,
&cc, sizeof(cc),
NULL, NULL);
/* Emit CC viewport relocation */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION,
0,
0,
offsetof(struct brw_cc_unit_state, cc4),
brw->cc.vp_bo);
return bo;
}
static void prepare_cc_unit( struct brw_context *brw )
{
struct brw_cc_unit_key key;
cc_unit_populate_key(brw, &key);
dri_bo_unreference(brw->cc.state_bo);
brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT,
&key, sizeof(key),
&brw->cc.vp_bo, 1,
NULL);
if (brw->cc.state_bo == NULL)
brw->cc.state_bo = cc_unit_create_from_key(brw, &key);
}
const struct brw_tracked_state brw_cc_unit = {
.dirty = {
.mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH,
.brw = 0,
.cache = CACHE_NEW_CC_VP
},
.prepare = prepare_cc_unit,
};

View File

@ -0,0 +1,273 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_util.h"
#include "brw_state.h"
#include "brw_clip.h"
#define FRONT_UNFILLED_BIT 0x1
#define BACK_UNFILLED_BIT 0x2
static void compile_clip_prog( struct brw_context *brw,
struct brw_clip_prog_key *key )
{
struct brw_clip_compile c;
const GLuint *program;
GLuint program_size;
GLuint delta;
GLuint i;
memset(&c, 0, sizeof(c));
/* Begin the compilation:
*/
brw_init_compile(brw, &c.func);
c.func.single_program_flow = 1;
c.key = *key;
c.need_ff_sync = BRW_IS_IGDNG(brw);
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.header_position_offset = ATTR_SIZE;
if (BRW_IS_IGDNG(brw))
delta = 3 * REG_SIZE;
else
delta = REG_SIZE;
for (i = 0; i < VERT_RESULT_MAX; i++)
if (c.key.attrs & (1<<i)) {
c.offset[i] = delta;
delta += ATTR_SIZE;
}
c.nr_attrs = brw_count_bits(c.key.attrs);
if (BRW_IS_IGDNG(brw))
c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
else
c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
c.nr_bytes = c.nr_regs * REG_SIZE;
c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
/* For some reason the thread is spawned with only 4 channels
* unmasked.
*/
brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
/* Would ideally have the option of producing a program which could
* do all three:
*/
switch (key->primitive) {
case GL_TRIANGLES:
if (key->do_unfilled)
brw_emit_unfilled_clip( &c );
else
brw_emit_tri_clip( &c );
break;
case GL_LINES:
brw_emit_line_clip( &c );
break;
case GL_POINTS:
brw_emit_point_clip( &c );
break;
default:
assert(0);
return;
}
/* get the program
*/
program = brw_get_program(&c.func, &program_size);
/* Upload
*/
dri_bo_unreference(brw->clip.prog_bo);
brw->clip.prog_bo = brw_upload_cache( &brw->cache,
BRW_CLIP_PROG,
&c.key, sizeof(c.key),
NULL, 0,
program, program_size,
&c.prog_data,
&brw->clip.prog_data );
}
/* Calculate interpolants for triangle and line rasterization.
*/
static void upload_clip_prog(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_clip_prog_key key;
memset(&key, 0, sizeof(key));
/* Populate the key:
*/
/* BRW_NEW_REDUCED_PRIMITIVE */
key.primitive = brw->intel.reduced_primitive;
/* CACHE_NEW_VS_PROG */
key.attrs = brw->vs.prog_data->outputs_written;
/* _NEW_LIGHT */
key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
/* _NEW_TRANSFORM */
key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
if (BRW_IS_IGDNG(brw))
key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
else
key.clip_mode = BRW_CLIPMODE_NORMAL;
/* _NEW_POLYGON */
if (key.primitive == GL_TRIANGLES) {
if (ctx->Polygon.CullFlag &&
ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
else {
GLuint fill_front = CLIP_CULL;
GLuint fill_back = CLIP_CULL;
GLuint offset_front = 0;
GLuint offset_back = 0;
if (!ctx->Polygon.CullFlag ||
ctx->Polygon.CullFaceMode != GL_FRONT) {
switch (ctx->Polygon.FrontMode) {
case GL_FILL:
fill_front = CLIP_FILL;
offset_front = 0;
break;
case GL_LINE:
fill_front = CLIP_LINE;
offset_front = ctx->Polygon.OffsetLine;
break;
case GL_POINT:
fill_front = CLIP_POINT;
offset_front = ctx->Polygon.OffsetPoint;
break;
}
}
if (!ctx->Polygon.CullFlag ||
ctx->Polygon.CullFaceMode != GL_BACK) {
switch (ctx->Polygon.BackMode) {
case GL_FILL:
fill_back = CLIP_FILL;
offset_back = 0;
break;
case GL_LINE:
fill_back = CLIP_LINE;
offset_back = ctx->Polygon.OffsetLine;
break;
case GL_POINT:
fill_back = CLIP_POINT;
offset_back = ctx->Polygon.OffsetPoint;
break;
}
}
if (ctx->Polygon.BackMode != GL_FILL ||
ctx->Polygon.FrontMode != GL_FILL) {
key.do_unfilled = 1;
/* Most cases the fixed function units will handle. Cases where
* one or more polygon faces are unfilled will require help:
*/
key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
if (offset_back || offset_front) {
/* _NEW_POLYGON, _NEW_BUFFERS */
key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale;
key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
}
switch (ctx->Polygon.FrontFace) {
case GL_CCW:
key.fill_ccw = fill_front;
key.fill_cw = fill_back;
key.offset_ccw = offset_front;
key.offset_cw = offset_back;
if (ctx->Light.Model.TwoSide &&
key.fill_cw != CLIP_CULL)
key.copy_bfc_cw = 1;
break;
case GL_CW:
key.fill_cw = fill_front;
key.fill_ccw = fill_back;
key.offset_cw = offset_front;
key.offset_ccw = offset_back;
if (ctx->Light.Model.TwoSide &&
key.fill_ccw != CLIP_CULL)
key.copy_bfc_ccw = 1;
break;
}
}
}
}
dri_bo_unreference(brw->clip.prog_bo);
brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
&key, sizeof(key),
NULL, 0,
&brw->clip.prog_data);
if (brw->clip.prog_bo == NULL)
compile_clip_prog( brw, &key );
}
const struct brw_tracked_state brw_clip_prog = {
.dirty = {
.mesa = (_NEW_LIGHT |
_NEW_TRANSFORM |
_NEW_POLYGON |
_NEW_BUFFERS),
.brw = (BRW_NEW_REDUCED_PRIMITIVE),
.cache = CACHE_NEW_VS_PROG
},
.prepare = upload_clip_prog
};

View File

@ -0,0 +1,179 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef BRW_CLIP_H
#define BRW_CLIP_H
#include "brw_context.h"
#include "brw_eu.h"
#define MAX_VERTS (3+6+6)
/* Note that if unfilled primitives are being emitted, we have to fix
* up polygon offset and flatshading at this point:
*/
struct brw_clip_prog_key {
GLuint attrs:32;
GLuint primitive:4;
GLuint nr_userclip:3;
GLuint do_flat_shading:1;
GLuint do_unfilled:1;
GLuint fill_cw:2; /* includes cull information */
GLuint fill_ccw:2; /* includes cull information */
GLuint offset_cw:1;
GLuint offset_ccw:1;
GLuint pad0:17;
GLuint copy_bfc_cw:1;
GLuint copy_bfc_ccw:1;
GLuint clip_mode:3;
GLuint pad1:27;
GLfloat offset_factor;
GLfloat offset_units;
};
#define CLIP_LINE 0
#define CLIP_POINT 1
#define CLIP_FILL 2
#define CLIP_CULL 3
#define PRIM_MASK (0x1f)
struct brw_clip_compile {
struct brw_compile func;
struct brw_clip_prog_key key;
struct brw_clip_prog_data prog_data;
struct {
struct brw_reg R0;
struct brw_reg vertex[MAX_VERTS];
struct brw_reg t;
struct brw_reg t0, t1;
struct brw_reg dp0, dp1;
struct brw_reg dpPrev;
struct brw_reg dp;
struct brw_reg loopcount;
struct brw_reg nr_verts;
struct brw_reg planemask;
struct brw_reg inlist;
struct brw_reg outlist;
struct brw_reg freelist;
struct brw_reg dir;
struct brw_reg tmp0, tmp1;
struct brw_reg offset;
struct brw_reg fixed_planes;
struct brw_reg plane_equation;
struct brw_reg ff_sync;
} reg;
/* 3 different ways of expressing vertex size:
*/
GLuint nr_attrs;
GLuint nr_regs;
GLuint nr_bytes;
GLuint first_tmp;
GLuint last_tmp;
GLboolean need_direction;
GLuint last_mrf;
GLuint header_position_offset;
GLuint offset[VERT_ATTRIB_MAX];
GLboolean need_ff_sync;
};
#define ATTR_SIZE (4*4)
/* Points are only culled, so no need for a clip routine, however it
* works out easier to have a dummy one.
*/
void brw_emit_unfilled_clip( struct brw_clip_compile *c );
void brw_emit_tri_clip( struct brw_clip_compile *c );
void brw_emit_line_clip( struct brw_clip_compile *c );
void brw_emit_point_clip( struct brw_clip_compile *c );
/* brw_clip_tri.c, for use by the unfilled clip routine:
*/
void brw_clip_tri_init_vertices( struct brw_clip_compile *c );
void brw_clip_tri_flat_shade( struct brw_clip_compile *c );
void brw_clip_tri( struct brw_clip_compile *c );
void brw_clip_tri_emit_polygon( struct brw_clip_compile *c );
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
GLuint nr_verts );
/* Utils:
*/
void brw_clip_interp_vertex( struct brw_clip_compile *c,
struct brw_indirect dest_ptr,
struct brw_indirect v0_ptr, /* from */
struct brw_indirect v1_ptr, /* to */
struct brw_reg t0,
GLboolean force_edgeflag );
void brw_clip_init_planes( struct brw_clip_compile *c );
void brw_clip_emit_vue(struct brw_clip_compile *c,
struct brw_indirect vert,
GLboolean allocate,
GLboolean eot,
GLuint header);
void brw_clip_kill_thread(struct brw_clip_compile *c);
struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c );
struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c );
void brw_clip_copy_colors( struct brw_clip_compile *c,
GLuint to, GLuint from );
void brw_clip_init_clipmask( struct brw_clip_compile *c );
struct brw_reg get_tmp( struct brw_clip_compile *c );
void brw_clip_project_position(struct brw_clip_compile *c,
struct brw_reg pos );
void brw_clip_ff_sync(struct brw_clip_compile *c);
void brw_clip_init_ff_sync(struct brw_clip_compile *c);
#endif

View File

@ -0,0 +1,276 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_util.h"
#include "brw_clip.h"
static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
{
GLuint i = 0,j;
/* Register usage is static, precompute here:
*/
c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
if (c->key.nr_userclip) {
c->reg.fixed_planes = brw_vec4_grf(i, 0);
i += (6 + c->key.nr_userclip + 1) / 2;
c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
}
else
c->prog_data.curb_read_length = 0;
/* Payload vertices plus space for more generated vertices:
*/
for (j = 0; j < 4; j++) {
c->reg.vertex[j] = brw_vec4_grf(i, 0);
i += c->nr_regs;
}
c->reg.t = brw_vec1_grf(i, 0);
c->reg.t0 = brw_vec1_grf(i, 1);
c->reg.t1 = brw_vec1_grf(i, 2);
c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
c->reg.plane_equation = brw_vec4_grf(i, 4);
i++;
c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
c->reg.dp1 = brw_vec1_grf(i, 4);
i++;
if (!c->key.nr_userclip) {
c->reg.fixed_planes = brw_vec8_grf(i, 0);
i++;
}
if (c->need_ff_sync) {
c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
i++;
}
c->first_tmp = i;
c->last_tmp = i;
c->prog_data.urb_read_length = c->nr_regs; /* ? */
c->prog_data.total_grf = i;
}
/* Line clipping, more or less following the following algorithm:
*
* for (p=0;p<MAX_PLANES;p++) {
* if (clipmask & (1 << p)) {
* GLfloat dp0 = DOTPROD( vtx0, plane[p] );
* GLfloat dp1 = DOTPROD( vtx1, plane[p] );
*
* if (IS_NEGATIVE(dp1)) {
* GLfloat t = dp1 / (dp1 - dp0);
* if (t > t1) t1 = t;
* } else {
* GLfloat t = dp0 / (dp0 - dp1);
* if (t > t0) t0 = t;
* }
*
* if (t0 + t1 >= 1.0)
* return;
* }
* }
*
* interp( ctx, newvtx0, vtx0, vtx1, t0 );
* interp( ctx, newvtx1, vtx1, vtx0, t1 );
*
*/
static void clip_and_emit_line( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_indirect vtx0 = brw_indirect(0, 0);
struct brw_indirect vtx1 = brw_indirect(1, 0);
struct brw_indirect newvtx0 = brw_indirect(2, 0);
struct brw_indirect newvtx1 = brw_indirect(3, 0);
struct brw_indirect plane_ptr = brw_indirect(4, 0);
struct brw_instruction *plane_loop;
struct brw_instruction *plane_active;
struct brw_instruction *is_negative;
struct brw_instruction *is_neg2 = NULL;
struct brw_instruction *not_culled;
struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0]));
brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1]));
brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2]));
brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3]));
brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
/* Note: init t0, t1 together:
*/
brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));
brw_clip_init_planes(c);
brw_clip_init_clipmask(c);
/* -ve rhw workaround */
if (BRW_IS_965(p->brw)) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
}
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
plane_loop = brw_DO(p, BRW_EXECUTE_1);
{
/* if (planemask & 1)
*/
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
plane_active = brw_IF(p, BRW_EXECUTE_1);
{
if (c->key.nr_userclip)
brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
else
brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
/* dp = DP4(vtx->position, plane)
*/
brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
/* if (IS_NEGATIVE(dp1))
*/
brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
is_negative = brw_IF(p, BRW_EXECUTE_1);
{
/*
* Both can be negative on GM965/G965 due to RHW workaround
* if so, this object should be rejected.
*/
if (BRW_IS_965(p->brw)) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_kill_thread(c);
}
brw_ENDIF(p, is_neg2);
}
brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
brw_math_invert(p, c->reg.t, c->reg.t);
brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
brw_MOV(p, c->reg.t1, c->reg.t);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
is_negative = brw_ELSE(p, is_negative);
{
/* Coming back in. We know that both cannot be negative
* because the line would have been culled in that case.
*/
/* If both are positive, do nothing */
/* Only on GM965/G965 */
if (BRW_IS_965(p->brw)) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
}
{
brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
brw_math_invert(p, c->reg.t, c->reg.t);
brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
brw_MOV(p, c->reg.t0, c->reg.t);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
if (BRW_IS_965(p->brw)) {
brw_ENDIF(p, is_neg2);
}
}
brw_ENDIF(p, is_negative);
}
brw_ENDIF(p, plane_active);
/* plane_ptr++;
*/
brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
/* while (planemask>>=1) != 0
*/
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
}
brw_WHILE(p, plane_loop);
brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
not_culled = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE);
brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, GL_FALSE);
brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
}
brw_ENDIF(p, not_culled);
brw_clip_kill_thread(c);
}
void brw_emit_line_clip( struct brw_clip_compile *c )
{
brw_clip_line_alloc_regs(c);
brw_clip_init_ff_sync(c);
if (c->key.do_flat_shading)
brw_clip_copy_colors(c, 0, 1);
clip_and_emit_line(c);
}

View File

@ -0,0 +1,56 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_util.h"
#include "brw_clip.h"
/* Point clipping, nothing to do?
*/
void brw_emit_point_clip( struct brw_clip_compile *c )
{
/* Send an empty message to kill the thread:
*/
brw_clip_tri_alloc_regs(c, 0);
brw_clip_init_ff_sync(c);
brw_clip_kill_thread(c);
}

View File

@ -0,0 +1,184 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "main/macros.h"
struct brw_clip_unit_key {
unsigned int total_grf;
unsigned int urb_entry_read_length;
unsigned int curb_entry_read_length;
unsigned int clip_mode;
unsigned int curbe_offset;
unsigned int nr_urb_entries, urb_size;
GLboolean depth_clamp;
};
static void
clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
{
GLcontext *ctx = &brw->intel.ctx;
memset(key, 0, sizeof(*key));
/* CACHE_NEW_CLIP_PROG */
key->total_grf = brw->clip.prog_data->total_grf;
key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
key->clip_mode = brw->clip.prog_data->clip_mode;
/* BRW_NEW_CURBE_OFFSETS */
key->curbe_offset = brw->curbe.clip_start;
/* BRW_NEW_URB_FENCE */
key->nr_urb_entries = brw->urb.nr_clip_entries;
key->urb_size = brw->urb.vsize;
/* _NEW_TRANSOFORM */
key->depth_clamp = ctx->Transform.DepthClamp;
}
static dri_bo *
clip_unit_create_from_key(struct brw_context *brw,
struct brw_clip_unit_key *key)
{
struct brw_clip_unit_state clip;
dri_bo *bo;
memset(&clip, 0, sizeof(clip));
clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
/* reloc */
clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
clip.thread1.single_program_flow = 1;
clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
clip.thread3.dispatch_grf_start_reg = 1;
clip.thread3.urb_entry_read_offset = 0;
clip.thread4.nr_urb_entries = key->nr_urb_entries;
clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
/* If we have enough clip URB entries to run two threads, do so.
*/
if (key->nr_urb_entries >= 10) {
/* Half of the URB entries go to each thread, and it has to be an
* even number.
*/
assert(key->nr_urb_entries % 2 == 0);
/* Although up to 16 concurrent Clip threads are allowed on IGDNG,
* only 2 threads can output VUEs at a time.
*/
if (BRW_IS_IGDNG(brw))
clip.thread4.max_threads = 16 - 1;
else
clip.thread4.max_threads = 2 - 1;
} else {
assert(key->nr_urb_entries >= 5);
clip.thread4.max_threads = 1 - 1;
}
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
clip.thread4.max_threads = 0;
if (INTEL_DEBUG & DEBUG_STATS)
clip.thread4.stats_enable = 1;
clip.clip5.userclip_enable_flags = 0x7f;
clip.clip5.userclip_must_clip = 1;
clip.clip5.guard_band_enable = 0;
if (!key->depth_clamp)
clip.clip5.viewport_z_clip_enable = 1;
clip.clip5.viewport_xy_clip_enable = 1;
clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
clip.clip5.api_mode = BRW_CLIP_API_OGL;
clip.clip5.clip_mode = key->clip_mode;
if (BRW_IS_G4X(brw))
clip.clip5.negative_w_clip_test = 1;
clip.clip6.clipper_viewport_state_ptr = 0;
clip.viewport_xmin = -1;
clip.viewport_xmax = 1;
clip.viewport_ymin = -1;
clip.viewport_ymax = 1;
bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
key, sizeof(*key),
&brw->clip.prog_bo, 1,
&clip, sizeof(clip),
NULL, NULL);
/* Emit clip program relocation */
assert(brw->clip.prog_bo);
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION,
0,
clip.thread0.grf_reg_count << 1,
offsetof(struct brw_clip_unit_state, thread0),
brw->clip.prog_bo);
return bo;
}
static void upload_clip_unit( struct brw_context *brw )
{
struct brw_clip_unit_key key;
clip_unit_populate_key(brw, &key);
dri_bo_unreference(brw->clip.state_bo);
brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
&key, sizeof(key),
&brw->clip.prog_bo, 1,
NULL);
if (brw->clip.state_bo == NULL) {
brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
}
}
const struct brw_tracked_state brw_clip_unit = {
.dirty = {
.mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_CLIP_PROG
},
.prepare = upload_clip_unit,
};

View File

@ -0,0 +1,603 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_util.h"
#include "brw_clip.h"
static void release_tmps( struct brw_clip_compile *c )
{
c->last_tmp = c->first_tmp;
}
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
GLuint nr_verts )
{
GLuint i = 0,j;
/* Register usage is static, precompute here:
*/
c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
if (c->key.nr_userclip) {
c->reg.fixed_planes = brw_vec4_grf(i, 0);
i += (6 + c->key.nr_userclip + 1) / 2;
c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
}
else
c->prog_data.curb_read_length = 0;
/* Payload vertices plus space for more generated vertices:
*/
for (j = 0; j < nr_verts; j++) {
c->reg.vertex[j] = brw_vec4_grf(i, 0);
i += c->nr_regs;
}
if (c->nr_attrs & 1) {
for (j = 0; j < 3; j++) {
GLuint delta = c->nr_attrs*16 + 32;
if (BRW_IS_IGDNG(c->func.brw))
delta = c->nr_attrs * 16 + 32 * 3;
brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
}
}
c->reg.t = brw_vec1_grf(i, 0);
c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
c->reg.plane_equation = brw_vec4_grf(i, 4);
i++;
c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
c->reg.dp = brw_vec1_grf(i, 4);
i++;
c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
i++;
c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
i++;
c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
i++;
if (!c->key.nr_userclip) {
c->reg.fixed_planes = brw_vec8_grf(i, 0);
i++;
}
if (c->key.do_unfilled) {
c->reg.dir = brw_vec4_grf(i, 0);
c->reg.offset = brw_vec4_grf(i, 4);
i++;
c->reg.tmp0 = brw_vec4_grf(i, 0);
c->reg.tmp1 = brw_vec4_grf(i, 4);
i++;
}
if (c->need_ff_sync) {
c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
i++;
}
c->first_tmp = i;
c->last_tmp = i;
c->prog_data.urb_read_length = c->nr_regs; /* ? */
c->prog_data.total_grf = i;
}
void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
struct brw_instruction *is_rev;
/* Initial list of indices for incoming vertexes:
*/
brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
brw_CMP(p,
vec1(brw_null_reg()),
BRW_CONDITIONAL_EQ,
tmp0,
brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
/* XXX: Is there an easier way to do this? Need to reverse every
* second tristrip element: Can ignore sometimes?
*/
is_rev = brw_IF(p, BRW_EXECUTE_1);
{
brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) );
brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) );
if (c->need_direction)
brw_MOV(p, c->reg.dir, brw_imm_f(-1));
}
is_rev = brw_ELSE(p, is_rev);
{
brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) );
brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) );
if (c->need_direction)
brw_MOV(p, c->reg.dir, brw_imm_f(1));
}
brw_ENDIF(p, is_rev);
brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) );
brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3));
}
void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_instruction *is_poly;
struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
brw_CMP(p,
vec1(brw_null_reg()),
BRW_CONDITIONAL_EQ,
tmp0,
brw_imm_ud(_3DPRIM_POLYGON));
is_poly = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_copy_colors(c, 1, 0);
brw_clip_copy_colors(c, 2, 0);
}
is_poly = brw_ELSE(p, is_poly);
{
brw_clip_copy_colors(c, 0, 2);
brw_clip_copy_colors(c, 1, 2);
}
brw_ENDIF(p, is_poly);
}
/* Use mesa's clipping algorithms, translated to GEN4 assembly.
*/
void brw_clip_tri( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_indirect vtx = brw_indirect(0, 0);
struct brw_indirect vtxPrev = brw_indirect(1, 0);
struct brw_indirect vtxOut = brw_indirect(2, 0);
struct brw_indirect plane_ptr = brw_indirect(3, 0);
struct brw_indirect inlist_ptr = brw_indirect(4, 0);
struct brw_indirect outlist_ptr = brw_indirect(5, 0);
struct brw_indirect freelist_ptr = brw_indirect(6, 0);
struct brw_instruction *plane_loop;
struct brw_instruction *plane_active;
struct brw_instruction *vertex_loop;
struct brw_instruction *next_test;
struct brw_instruction *prev_test;
brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) );
brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );
plane_loop = brw_DO(p, BRW_EXECUTE_1);
{
/* if (planemask & 1)
*/
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
plane_active = brw_IF(p, BRW_EXECUTE_1);
{
/* vtxOut = freelist_ptr++
*/
brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) );
brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));
if (c->key.nr_userclip)
brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
else
brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));
vertex_loop = brw_DO(p, BRW_EXECUTE_1);
{
/* vtx = *input_ptr;
*/
brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));
/* IS_NEGATIVE(prev) */
brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
prev_test = brw_IF(p, BRW_EXECUTE_1);
{
/* IS_POSITIVE(next)
*/
brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
next_test = brw_IF(p, BRW_EXECUTE_1);
{
/* Coming back in.
*/
brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
brw_math_invert(p, c->reg.t, c->reg.t);
brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);
/* If (vtxOut == 0) vtxOut = vtxPrev
*/
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE);
/* *outlist_ptr++ = vtxOut;
* nr_verts++;
* vtxOut = 0;
*/
brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
}
brw_ENDIF(p, next_test);
}
prev_test = brw_ELSE(p, prev_test);
{
/* *outlist_ptr++ = vtxPrev;
* nr_verts++;
*/
brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
/* IS_NEGATIVE(next)
*/
brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
next_test = brw_IF(p, BRW_EXECUTE_1);
{
/* Going out of bounds. Avoid division by zero as we
* know dp != dpPrev from DIFFERENT_SIGNS, above.
*/
brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
brw_math_invert(p, c->reg.t, c->reg.t);
brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);
/* If (vtxOut == 0) vtxOut = vtx
*/
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE);
/* *outlist_ptr++ = vtxOut;
* nr_verts++;
* vtxOut = 0;
*/
brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
}
brw_ENDIF(p, next_test);
}
brw_ENDIF(p, prev_test);
/* vtxPrev = vtx;
* inlist_ptr++;
*/
brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));
/* while (--loopcount != 0)
*/
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
}
brw_WHILE(p, vertex_loop);
/* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1]
* inlist = outlist
* inlist_ptr = &inlist[0]
* outlist_ptr = &outlist[0]
*/
brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
}
brw_ENDIF(p, plane_active);
/* plane_ptr++;
*/
brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
/* nr_verts >= 3
*/
brw_CMP(p,
vec1(brw_null_reg()),
BRW_CONDITIONAL_GE,
c->reg.nr_verts,
brw_imm_ud(3));
/* && (planemask>>=1) != 0
*/
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
}
brw_WHILE(p, plane_loop);
}
void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
{
struct brw_compile *p = &c->func;
struct brw_instruction *loop, *if_insn;
/* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
*/
brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
brw_ADD(p,
c->reg.loopcount,
c->reg.nr_verts,
brw_imm_d(-2));
if_insn = brw_IF(p, BRW_EXECUTE_1);
{
struct brw_indirect v0 = brw_indirect(0, 0);
struct brw_indirect vptr = brw_indirect(1, 0);
brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist));
brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START));
brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
loop = brw_DO(p, BRW_EXECUTE_1);
{
brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2));
brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
}
brw_WHILE(p, loop);
brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END));
}
brw_ENDIF(p, if_insn);
}
static void do_clip_tri( struct brw_clip_compile *c )
{
brw_clip_init_planes(c);
brw_clip_tri(c);
}
static void maybe_do_clip_tri( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_instruction *do_clip;
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
do_clip = brw_IF(p, BRW_EXECUTE_1);
{
do_clip_tri(c);
}
brw_ENDIF(p, do_clip);
}
static void brw_clip_test( struct brw_clip_compile *c )
{
struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
struct brw_reg v0 = get_tmp(c);
struct brw_reg v1 = get_tmp(c);
struct brw_reg v2 = get_tmp(c);
struct brw_indirect vt0 = brw_indirect(0, 0);
struct brw_indirect vt1 = brw_indirect(1, 0);
struct brw_indirect vt2 = brw_indirect(2, 0);
struct brw_compile *p = &c->func;
struct brw_instruction *is_outside;
struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
/* test nearz, xmin, ymin plane */
/* clip.xyz < -clip.w */
brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
/* All vertices are outside of a plane, rejected */
brw_AND(p, t, t1, t2);
brw_AND(p, t, t, t3);
brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
brw_OR(p, tmp0, tmp0, get_element(t, 2));
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
is_outside = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_kill_thread(c);
}
brw_ENDIF(p, is_outside);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
/* some vertices are inside a plane, some are outside,need to clip */
brw_XOR(p, t, t1, t2);
brw_XOR(p, t1, t2, t3);
brw_OR(p, t, t, t1);
brw_AND(p, t, t, brw_imm_ud(0x1));
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
get_element(t, 0), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
get_element(t, 1), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
get_element(t, 2), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
/* test farz, xmax, ymax plane */
/* clip.xyz > clip.w */
brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
/* All vertices are outside of a plane, rejected */
brw_AND(p, t, t1, t2);
brw_AND(p, t, t, t3);
brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
brw_OR(p, tmp0, tmp0, get_element(t, 2));
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
is_outside = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_kill_thread(c);
}
brw_ENDIF(p, is_outside);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
/* some vertices are inside a plane, some are outside,need to clip */
brw_XOR(p, t, t1, t2);
brw_XOR(p, t1, t2, t3);
brw_OR(p, t, t, t1);
brw_AND(p, t, t, brw_imm_ud(0x1));
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
get_element(t, 0), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
get_element(t, 1), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
get_element(t, 2), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
release_tmps(c);
}
void brw_emit_tri_clip( struct brw_clip_compile *c )
{
struct brw_instruction *neg_rhw;
struct brw_compile *p = &c->func;
brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
brw_clip_tri_init_vertices(c);
brw_clip_init_clipmask(c);
brw_clip_init_ff_sync(c);
/* if -ve rhw workaround bit is set,
do cliptest */
if (BRW_IS_965(p->brw)) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
neg_rhw = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_test(c);
}
brw_ENDIF(p, neg_rhw);
}
/* Can't push into do_clip_tri because with polygon (or quad)
* flatshading, need to apply the flatshade here because we don't
* respect the PV when converting to trifan for emit:
*/
if (c->key.do_flat_shading)
brw_clip_tri_flat_shade(c);
if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) ||
(c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP))
do_clip_tri(c);
else
maybe_do_clip_tri(c);
brw_clip_tri_emit_polygon(c);
/* Send an empty message to kill the thread:
*/
brw_clip_kill_thread(c);
}

View File

@ -0,0 +1,505 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_util.h"
#include "brw_clip.h"
/* This is performed against the original triangles, so no indirection
* required:
BZZZT!
*/
static void compute_tri_direction( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_reg e = c->reg.tmp0;
struct brw_reg f = c->reg.tmp1;
struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]);
struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]);
struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]);
struct brw_reg v0n = get_tmp(c);
struct brw_reg v1n = get_tmp(c);
struct brw_reg v2n = get_tmp(c);
/* Convert to NDC.
* NOTE: We can't modify the original vertex coordinates,
* as it may impact further operations.
* So, we have to keep normalized coordinates in temp registers.
*
* TBD-KC
* Try to optimize unnecessary MOV's.
*/
brw_MOV(p, v0n, v0);
brw_MOV(p, v1n, v1);
brw_MOV(p, v2n, v2);
brw_clip_project_position(c, v0n);
brw_clip_project_position(c, v1n);
brw_clip_project_position(c, v2n);
/* Calculate the vectors of two edges of the triangle:
*/
brw_ADD(p, e, v0n, negate(v2n));
brw_ADD(p, f, v1n, negate(v2n));
/* Take their crossproduct:
*/
brw_set_access_mode(p, BRW_ALIGN_16);
brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3));
brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
brw_set_access_mode(p, BRW_ALIGN_1);
brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
}
static void cull_direction( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_instruction *ccw;
GLuint conditional;
assert (!(c->key.fill_ccw == CLIP_CULL &&
c->key.fill_cw == CLIP_CULL));
if (c->key.fill_ccw == CLIP_CULL)
conditional = BRW_CONDITIONAL_GE;
else
conditional = BRW_CONDITIONAL_L;
brw_CMP(p,
vec1(brw_null_reg()),
conditional,
get_element(c->reg.dir, 2),
brw_imm_f(0));
ccw = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_kill_thread(c);
}
brw_ENDIF(p, ccw);
}
static void copy_bfc( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_instruction *ccw;
GLuint conditional;
/* Do we have any colors to copy?
*/
if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) &&
!(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]))
return;
/* In some wierd degnerate cases we can end up testing the
* direction twice, once for culling and once for bfc copying. Oh
* well, that's what you get for setting wierd GL state.
*/
if (c->key.copy_bfc_ccw)
conditional = BRW_CONDITIONAL_GE;
else
conditional = BRW_CONDITIONAL_L;
brw_CMP(p,
vec1(brw_null_reg()),
conditional,
get_element(c->reg.dir, 2),
brw_imm_f(0));
ccw = brw_IF(p, BRW_EXECUTE_1);
{
GLuint i;
for (i = 0; i < 3; i++) {
if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0])
brw_MOV(p,
byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]),
byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0]));
if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])
brw_MOV(p,
byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]),
byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1]));
}
}
brw_ENDIF(p, ccw);
}
/*
GLfloat iz = 1.0 / dir.z;
GLfloat ac = dir.x * iz;
GLfloat bc = dir.y * iz;
offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
offset *= MRD;
*/
static void compute_offset( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_reg off = c->reg.offset;
struct brw_reg dir = c->reg.dir;
brw_math_invert(p, get_element(off, 2), get_element(dir, 2));
brw_MUL(p, vec2(off), dir, get_element(off, 2));
brw_CMP(p,
vec1(brw_null_reg()),
BRW_CONDITIONAL_GE,
brw_abs(get_element(off, 0)),
brw_abs(get_element(off, 1)));
brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor));
brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units));
}
static void merge_edgeflags( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_instruction *is_poly;
struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);
brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
brw_CMP(p,
vec1(brw_null_reg()),
BRW_CONDITIONAL_EQ,
tmp0,
brw_imm_ud(_3DPRIM_POLYGON));
/* Get away with using reg.vertex because we know that this is not
* a _3DPRIM_TRISTRIP_REVERSE:
*/
is_poly = brw_IF(p, BRW_EXECUTE_1);
{
brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
brw_ENDIF(p, is_poly);
}
static void apply_one_offset( struct brw_clip_compile *c,
struct brw_indirect vert )
{
struct brw_compile *p = &c->func;
struct brw_reg z = deref_1f(vert, c->header_position_offset +
2 * type_sz(BRW_REGISTER_TYPE_F));
brw_ADD(p, z, z, vec1(c->reg.offset));
}
/***********************************************************************
* Output clipped polygon as an unfilled primitive:
*/
static void emit_lines(struct brw_clip_compile *c,
GLboolean do_offset)
{
struct brw_compile *p = &c->func;
struct brw_instruction *loop;
struct brw_instruction *draw_edge;
struct brw_indirect v0 = brw_indirect(0, 0);
struct brw_indirect v1 = brw_indirect(1, 0);
struct brw_indirect v0ptr = brw_indirect(2, 0);
struct brw_indirect v1ptr = brw_indirect(3, 0);
/* Need a seperate loop for offset:
*/
if (do_offset) {
brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
loop = brw_DO(p, BRW_EXECUTE_1);
{
brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
apply_one_offset(c, v0);
brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
}
brw_WHILE(p, loop);
}
/* v1ptr = &inlist[nr_verts]
* *v1ptr = v0
*/
brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));
loop = brw_DO(p, BRW_EXECUTE_1);
{
brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
/* draw edge if edgeflag != 0 */
brw_CMP(p,
vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
brw_imm_f(0));
draw_edge = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
}
brw_ENDIF(p, draw_edge);
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
}
brw_WHILE(p, loop);
}
static void emit_points(struct brw_clip_compile *c,
GLboolean do_offset )
{
struct brw_compile *p = &c->func;
struct brw_instruction *loop;
struct brw_instruction *draw_point;
struct brw_indirect v0 = brw_indirect(0, 0);
struct brw_indirect v0ptr = brw_indirect(2, 0);
brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
loop = brw_DO(p, BRW_EXECUTE_1);
{
brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
/* draw if edgeflag != 0
*/
brw_CMP(p,
vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
brw_imm_f(0));
draw_point = brw_IF(p, BRW_EXECUTE_1);
{
if (do_offset)
apply_one_offset(c, v0);
brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END);
}
brw_ENDIF(p, draw_point);
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
}
brw_WHILE(p, loop);
}
static void emit_primitives( struct brw_clip_compile *c,
GLuint mode,
GLboolean do_offset )
{
switch (mode) {
case CLIP_FILL:
brw_clip_tri_emit_polygon(c);
break;
case CLIP_LINE:
emit_lines(c, do_offset);
break;
case CLIP_POINT:
emit_points(c, do_offset);
break;
case CLIP_CULL:
assert(0);
break;
}
}
static void emit_unfilled_primitives( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_instruction *ccw;
/* Direction culling has already been done.
*/
if (c->key.fill_ccw != c->key.fill_cw &&
c->key.fill_ccw != CLIP_CULL &&
c->key.fill_cw != CLIP_CULL)
{
brw_CMP(p,
vec1(brw_null_reg()),
BRW_CONDITIONAL_GE,
get_element(c->reg.dir, 2),
brw_imm_f(0));
ccw = brw_IF(p, BRW_EXECUTE_1);
{
emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
}
ccw = brw_ELSE(p, ccw);
{
emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
}
brw_ENDIF(p, ccw);
}
else if (c->key.fill_cw != CLIP_CULL) {
emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
}
else if (c->key.fill_ccw != CLIP_CULL) {
emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
}
}
static void check_nr_verts( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_instruction *if_insn;
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));
if_insn = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_kill_thread(c);
}
brw_ENDIF(p, if_insn);
}
void brw_emit_unfilled_clip( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_instruction *do_clip;
c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
(c->key.fill_ccw != c->key.fill_cw) ||
c->key.fill_ccw == CLIP_CULL ||
c->key.fill_cw == CLIP_CULL ||
c->key.copy_bfc_cw ||
c->key.copy_bfc_ccw);
brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
brw_clip_tri_init_vertices(c);
brw_clip_init_ff_sync(c);
assert(c->offset[VERT_RESULT_EDGE]);
if (c->key.fill_ccw == CLIP_CULL &&
c->key.fill_cw == CLIP_CULL) {
brw_clip_kill_thread(c);
return;
}
merge_edgeflags(c);
/* Need to use the inlist indirection here:
*/
if (c->need_direction)
compute_tri_direction(c);
if (c->key.fill_ccw == CLIP_CULL ||
c->key.fill_cw == CLIP_CULL)
cull_direction(c);
if (c->key.offset_ccw ||
c->key.offset_cw)
compute_offset(c);
if (c->key.copy_bfc_ccw ||
c->key.copy_bfc_cw)
copy_bfc(c);
/* Need to do this whether we clip or not:
*/
if (c->key.do_flat_shading)
brw_clip_tri_flat_shade(c);
brw_clip_init_clipmask(c);
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
do_clip = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_init_planes(c);
brw_clip_tri(c);
check_nr_verts(c);
}
brw_ENDIF(p, do_clip);
emit_unfilled_primitives(c);
brw_clip_kill_thread(c);
}

View File

@ -0,0 +1,396 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_util.h"
#include "brw_clip.h"
struct brw_reg get_tmp( struct brw_clip_compile *c )
{
struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
if (++c->last_tmp > c->prog_data.total_grf)
c->prog_data.total_grf = c->last_tmp;
return tmp;
}
static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
{
if (tmp.nr == c->last_tmp-1)
c->last_tmp--;
}
static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
{
return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
}
void brw_clip_init_planes( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
if (!c->key.nr_userclip) {
brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1));
brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1));
brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1));
brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1));
brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1));
brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1));
}
}
#define W 3
/* Project 'pos' to screen space (or back again), overwrite with results:
*/
void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
{
struct brw_compile *p = &c->func;
/* calc rhw
*/
brw_math_invert(p, get_element(pos, W), get_element(pos, W));
/* value.xyz *= value.rhw
*/
brw_set_access_mode(p, BRW_ALIGN_16);
brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
brw_set_access_mode(p, BRW_ALIGN_1);
}
static void brw_clip_project_vertex( struct brw_clip_compile *c,
struct brw_indirect vert_addr )
{
struct brw_compile *p = &c->func;
struct brw_reg tmp = get_tmp(c);
/* Fixup position. Extract from the original vertex and re-project
* to screen space:
*/
brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS]));
brw_clip_project_position(c, tmp);
brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
release_tmp(c, tmp);
}
/* Interpolate between two vertices and put the result into a0.0.
* Increment a0.0 accordingly.
*/
void brw_clip_interp_vertex( struct brw_clip_compile *c,
struct brw_indirect dest_ptr,
struct brw_indirect v0_ptr, /* from */
struct brw_indirect v1_ptr, /* to */
struct brw_reg t0,
GLboolean force_edgeflag)
{
struct brw_compile *p = &c->func;
struct brw_reg tmp = get_tmp(c);
GLuint i;
/* Just copy the vertex header:
*/
/*
* After CLIP stage, only first 256 bits of the VUE are read
* back on IGDNG, so needn't change it
*/
brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
/* Iterate over each attribute (could be done in pairs?)
*/
for (i = 0; i < c->nr_attrs; i++) {
GLuint delta = i*16 + 32;
if (BRW_IS_IGDNG(p->brw))
delta = i * 16 + 32 * 3;
if (delta == c->offset[VERT_RESULT_EDGE]) {
if (force_edgeflag)
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
else
brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
}
else {
/* Interpolate:
*
* New = attr0 + t*attr1 - t*attr0
*/
brw_MUL(p,
vec4(brw_null_reg()),
deref_4f(v1_ptr, delta),
t0);
brw_MAC(p,
tmp,
negate(deref_4f(v0_ptr, delta)),
t0);
brw_ADD(p,
deref_4f(dest_ptr, delta),
deref_4f(v0_ptr, delta),
tmp);
}
}
if (i & 1) {
GLuint delta = i*16 + 32;
if (BRW_IS_IGDNG(p->brw))
delta = i * 16 + 32 * 3;
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
}
release_tmp(c, tmp);
/* Recreate the projected (NDC) coordinate in the new vertex
* header:
*/
brw_clip_project_vertex(c, dest_ptr );
}
#define MAX_MRF 16
void brw_clip_emit_vue(struct brw_clip_compile *c,
struct brw_indirect vert,
GLboolean allocate,
GLboolean eot,
GLuint header)
{
struct brw_compile *p = &c->func;
GLuint start = c->last_mrf;
brw_clip_ff_sync(c);
assert(!(allocate && eot));
/* Cycle through mrf regs - probably futile as we have to wait for
* the allocation response anyway. Also, the order this function
* is invoked doesn't correspond to the order the instructions will
* be executed, so it won't have any effect in many cases.
*/
#if 0
if (start + c->nr_regs + 1 >= MAX_MRF)
start = 0;
c->last_mrf = start + c->nr_regs + 1;
#endif
/* Copy the vertex from vertn into m1..mN+1:
*/
brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
/* Overwrite PrimType and PrimStart in the message header, for
* each vertex in turn:
*/
brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
/* Send each vertex as a seperate write to the urb. This
* is different to the concept in brw_sf_emit.c, where
* subsequent writes are used to build up a single urb
* entry. Each of these writes instantiates a seperate
* urb entry - (I think... what about 'allocate'?)
*/
brw_urb_WRITE(p,
allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
start,
c->reg.R0,
allocate,
1, /* used */
c->nr_regs + 1, /* msg length */
allocate ? 1 : 0, /* response_length */
eot, /* eot */
1, /* writes_complete */
0, /* urb offset */
BRW_URB_SWIZZLE_NONE);
}
void brw_clip_kill_thread(struct brw_clip_compile *c)
{
struct brw_compile *p = &c->func;
brw_clip_ff_sync(c);
/* Send an empty message to kill the thread and release any
* allocated urb entry:
*/
brw_urb_WRITE(p,
retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
0,
c->reg.R0,
0, /* allocate */
0, /* used */
1, /* msg len */
0, /* response len */
1, /* eot */
1, /* writes complete */
0,
BRW_URB_SWIZZLE_NONE);
}
struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
{
return brw_address(c->reg.fixed_planes);
}
struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
{
if (c->key.nr_userclip) {
return brw_imm_uw(16);
}
else {
return brw_imm_uw(4);
}
}
/* If flatshading, distribute color from provoking vertex prior to
* clipping.
*/
void brw_clip_copy_colors( struct brw_clip_compile *c,
GLuint to, GLuint from )
{
struct brw_compile *p = &c->func;
if (c->offset[VERT_RESULT_COL0])
brw_MOV(p,
byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]),
byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0]));
if (c->offset[VERT_RESULT_COL1])
brw_MOV(p,
byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]),
byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1]));
if (c->offset[VERT_RESULT_BFC0])
brw_MOV(p,
byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]),
byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0]));
if (c->offset[VERT_RESULT_BFC1])
brw_MOV(p,
byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]),
byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1]));
}
void brw_clip_init_clipmask( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
/* Shift so that lowest outcode bit is rightmost:
*/
brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
if (c->key.nr_userclip) {
struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
/* Rearrange userclip outcodes so that they come directly after
* the fixed plane bits.
*/
brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
brw_SHR(p, tmp, tmp, brw_imm_ud(8));
brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
release_tmp(c, tmp);
}
}
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
if (c->need_ff_sync) {
struct brw_compile *p = &c->func;
struct brw_instruction *need_ff_sync;
brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
{
brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
brw_ff_sync(p,
c->reg.R0,
0,
c->reg.R0,
1,
1, /* used */
1, /* msg length */
1, /* response length */
0, /* eot */
1, /* write compelete */
0, /* urb offset */
BRW_URB_SWIZZLE_NONE);
}
brw_ENDIF(p, need_ff_sync);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
}
void brw_clip_init_ff_sync(struct brw_clip_compile *c)
{
if (c->need_ff_sync) {
struct brw_compile *p = &c->func;
brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
}
}

View File

@ -0,0 +1,173 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/imports.h"
#include "main/api_noop.h"
#include "main/macros.h"
#include "main/vtxfmt.h"
#include "main/simple_list.h"
#include "shader/shader_api.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_draw.h"
#include "brw_state.h"
#include "brw_vs.h"
#include "intel_tex.h"
#include "intel_blit.h"
#include "intel_batchbuffer.h"
#include "intel_pixel.h"
#include "intel_span.h"
#include "tnl/t_pipeline.h"
#include "utils.h"
/***************************************
* Mesa's Driver Functions
***************************************/
static void brwUseProgram(GLcontext *ctx, GLuint program)
{
_mesa_use_program(ctx, program);
}
static void brwInitProgFuncs( struct dd_function_table *functions )
{
functions->UseProgram = brwUseProgram;
}
static void brwInitDriverFunctions( struct dd_function_table *functions )
{
intelInitDriverFunctions( functions );
brwInitFragProgFuncs( functions );
brwInitProgFuncs( functions );
brw_init_queryobj_functions(functions);
functions->Viewport = intel_viewport;
}
GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
__DRIcontextPrivate *driContextPriv,
void *sharedContextPrivate)
{
struct dd_function_table functions;
struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
struct intel_context *intel = &brw->intel;
GLcontext *ctx = &intel->ctx;
if (!brw) {
_mesa_printf("%s: failed to alloc context\n", __FUNCTION__);
return GL_FALSE;
}
brwInitVtbl( brw );
brwInitDriverFunctions( &functions );
if (!intelInitContext( intel, mesaVis, driContextPriv,
sharedContextPrivate, &functions )) {
_mesa_printf("%s: failed to init intel context\n", __FUNCTION__);
FREE(brw);
return GL_FALSE;
}
/* Initialize swrast, tnl driver tables: */
intelInitSpanFuncs(ctx);
TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
ctx->Const.MaxTextureImageUnits);
ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */
/* Mesa limits textures to 4kx4k; it would be nice to fix that someday
*/
ctx->Const.MaxTextureLevels = 13;
ctx->Const.Max3DTextureLevels = 9;
ctx->Const.MaxCubeTextureLevels = 12;
ctx->Const.MaxTextureRectSize = (1<<12);
ctx->Const.MaxTextureMaxAnisotropy = 16.0;
/* if conformance mode is set, swrast can handle any size AA point */
ctx->Const.MaxPointSizeAA = 255.0;
/* We want the GLSL compiler to emit code that uses condition codes */
ctx->Shader.EmitCondCodes = GL_TRUE;
ctx->Shader.EmitNVTempInitialization = GL_TRUE;
ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
ctx->Const.VertexProgram.MaxAluInstructions = 0;
ctx->Const.VertexProgram.MaxTexInstructions = 0;
ctx->Const.VertexProgram.MaxTexIndirections = 0;
ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
ctx->Const.VertexProgram.MaxNativeAttribs = 16;
ctx->Const.VertexProgram.MaxNativeTemps = 256;
ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
ctx->Const.VertexProgram.MaxNativeParameters = 1024;
ctx->Const.VertexProgram.MaxEnvParams =
MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
ctx->Const.VertexProgram.MaxEnvParams);
ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024);
ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024);
ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024);
ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024);
ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
ctx->Const.FragmentProgram.MaxNativeTemps = 256;
ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
ctx->Const.FragmentProgram.MaxEnvParams =
MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
ctx->Const.FragmentProgram.MaxEnvParams);
brw_init_state( brw );
brw->state.dirty.mesa = ~0;
brw->state.dirty.brw = ~0;
brw->emit_state_always = 0;
ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
make_empty_list(&brw->query.active_head);
brw_draw_init( brw );
return GL_TRUE;
}

View File

@ -0,0 +1,767 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef BRWCONTEXT_INC
#define BRWCONTEXT_INC
#include "intel_context.h"
#include "brw_structs.h"
#include "main/imports.h"
/* Glossary:
*
* URB - uniform resource buffer. A mid-sized buffer which is
* partitioned between the fixed function units and used for passing
* values (vertices, primitives, constants) between them.
*
* CURBE - constant URB entry. An urb region (entry) used to hold
* constant values which the fixed function units can be instructed to
* preload into the GRF when spawning a thread.
*
* VUE - vertex URB entry. An urb entry holding a vertex and usually
* a vertex header. The header contains control information and
* things like primitive type, Begin/end flags and clip codes.
*
* PUE - primitive URB entry. An urb entry produced by the setup (SF)
* unit holding rasterization and interpolation parameters.
*
* GRF - general register file. One of several register files
* addressable by programmed threads. The inputs (r0, payload, curbe,
* urb) of the thread are preloaded to this area before the thread is
* spawned. The registers are individually 8 dwords wide and suitable
* for general usage. Registers holding thread input values are not
* special and may be overwritten.
*
* MRF - message register file. Threads communicate (and terminate)
* by sending messages. Message parameters are placed in contiguous
* MRF registers. All program output is via these messages. URB
* entries are populated by sending a message to the shared URB
* function containing the new data, together with a control word,
* often an unmodified copy of R0.
*
* R0 - GRF register 0. Typically holds control information used when
* sending messages to other threads.
*
* EU or GEN4 EU: The name of the programmable subsystem of the
* i965 hardware. Threads are executed by the EU, the registers
* described above are part of the EU architecture.
*
* Fixed function units:
*
* CS - Command streamer. Notional first unit, little software
* interaction. Holds the URB entries used for constant data, ie the
* CURBEs.
*
* VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of
* this unit is responsible for pulling vertices out of vertex buffers
* in vram and injecting them into the processing pipe as VUEs. If
* enabled, it first passes them to a VS thread which is a good place
* for the driver to implement any active vertex shader.
*
* GS - Geometry Shader. This corresponds to a new DX10 concept. If
* enabled, incoming strips etc are passed to GS threads in individual
* line/triangle/point units. The GS thread may perform arbitary
* computation and emit whatever primtives with whatever vertices it
* chooses. This makes GS an excellent place to implement GL's
* unfilled polygon modes, though of course it is capable of much
* more. Additionally, GS is used to translate away primitives not
* handled by latter units, including Quads and Lineloops.
*
* CS - Clipper. Mesa's clipping algorithms are imported to run on
* this unit. The fixed function part performs cliptesting against
* the 6 fixed clipplanes and makes descisions on whether or not the
* incoming primitive needs to be passed to a thread for clipping.
* User clip planes are handled via cooperation with the VS thread.
*
* SF - Strips Fans or Setup: Triangles are prepared for
* rasterization. Interpolation coefficients are calculated.
* Flatshading and two-side lighting usually performed here.
*
* WM - Windower. Interpolation of vertex attributes performed here.
* Fragment shader implemented here. SIMD aspects of EU taken full
* advantage of, as pixels are processed in blocks of 16.
*
* CC - Color Calculator. No EU threads associated with this unit.
* Handles blending and (presumably) depth and stencil testing.
*/
#define BRW_FALLBACK_TEXTURE 0x1
#define BRW_MAX_CURBE (32*16)
struct brw_context;
#define BRW_NEW_URB_FENCE 0x1
#define BRW_NEW_FRAGMENT_PROGRAM 0x2
#define BRW_NEW_VERTEX_PROGRAM 0x4
#define BRW_NEW_INPUT_DIMENSIONS 0x8
#define BRW_NEW_CURBE_OFFSETS 0x10
#define BRW_NEW_REDUCED_PRIMITIVE 0x20
#define BRW_NEW_PRIMITIVE 0x40
#define BRW_NEW_CONTEXT 0x80
#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
#define BRW_NEW_PSP 0x800
#define BRW_NEW_WM_SURFACES 0x1000
#define BRW_NEW_FENCE 0x2000
#define BRW_NEW_INDICES 0x4000
#define BRW_NEW_VERTICES 0x8000
/**
* Used for any batch entry with a relocated pointer that will be used
* by any 3D rendering.
*/
#define BRW_NEW_BATCH 0x10000
/** brw->depth_region updated */
#define BRW_NEW_DEPTH_BUFFER 0x20000
#define BRW_NEW_NR_WM_SURFACES 0x40000
#define BRW_NEW_NR_VS_SURFACES 0x80000
#define BRW_NEW_INDEX_BUFFER 0x100000
struct brw_state_flags {
/** State update flags signalled by mesa internals */
GLuint mesa;
/**
* State update flags signalled as the result of brw_tracked_state updates
*/
GLuint brw;
/** State update flags signalled by brw_state_cache.c searches */
GLuint cache;
};
/** Subclass of Mesa vertex program */
struct brw_vertex_program {
struct gl_vertex_program program;
GLuint id;
dri_bo *const_buffer; /** Program constant buffer/surface */
GLboolean use_const_buffer;
};
/** Subclass of Mesa fragment program */
struct brw_fragment_program {
struct gl_fragment_program program;
GLuint id; /**< serial no. to identify frag progs, never re-used */
GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
dri_bo *const_buffer; /** Program constant buffer/surface */
GLboolean use_const_buffer;
/** for debugging, which texture units are referenced */
GLbitfield tex_units_used;
};
/* Data about a particular attempt to compile a program. Note that
* there can be many of these, each in a different GL state
* corresponding to a different brw_wm_prog_key struct, with different
* compiled programs:
*/
struct brw_wm_prog_data {
GLuint curb_read_length;
GLuint urb_read_length;
GLuint first_curbe_grf;
GLuint total_grf;
GLuint total_scratch;
GLuint nr_params; /**< number of float params/constants */
GLboolean error;
/* Pointer to tracked values (only valid once
* _mesa_load_state_parameters has been called at runtime).
*/
const GLfloat *param[BRW_MAX_CURBE];
};
struct brw_sf_prog_data {
GLuint urb_read_length;
GLuint total_grf;
/* Each vertex may have upto 12 attributes, 4 components each,
* except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11
* rows.
*
* Actually we use 4 for each, so call it 12 rows.
*/
GLuint urb_entry_size;
};
struct brw_clip_prog_data {
GLuint curb_read_length; /* user planes? */
GLuint clip_mode;
GLuint urb_read_length;
GLuint total_grf;
};
struct brw_gs_prog_data {
GLuint urb_read_length;
GLuint total_grf;
};
struct brw_vs_prog_data {
GLuint curb_read_length;
GLuint urb_read_length;
GLuint total_grf;
GLuint outputs_written;
GLuint nr_params; /**< number of float params/constants */
GLuint inputs_read;
/* Used for calculating urb partitions:
*/
GLuint urb_entry_size;
};
/* Size == 0 if output either not written, or always [0,0,0,1]
*/
struct brw_vs_ouput_sizes {
GLubyte output_size[VERT_RESULT_MAX];
};
/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 16
/**
* Size of our surface binding table for the WM.
* This contains pointers to the drawing surfaces and current texture
* objects and shader constant buffers (+2).
*/
#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
/**
* Helpers to convert drawing buffers, textures and constant buffers
* to surface binding table indexes, for WM.
*/
#define SURF_INDEX_DRAW(d) (d)
#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS)
#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t))
/**
* Size of surface binding table for the VS.
* Only one constant buffer for now.
*/
#define BRW_VS_MAX_SURF 1
/**
* Only a VS constant buffer
*/
#define SURF_INDEX_VERT_CONST_BUFFER 0
enum brw_cache_id {
BRW_CC_VP,
BRW_CC_UNIT,
BRW_WM_PROG,
BRW_SAMPLER_DEFAULT_COLOR,
BRW_SAMPLER,
BRW_WM_UNIT,
BRW_SF_PROG,
BRW_SF_VP,
BRW_SF_UNIT,
BRW_VS_UNIT,
BRW_VS_PROG,
BRW_GS_UNIT,
BRW_GS_PROG,
BRW_CLIP_VP,
BRW_CLIP_UNIT,
BRW_CLIP_PROG,
BRW_SS_SURFACE,
BRW_SS_SURF_BIND,
BRW_MAX_CACHE
};
struct brw_cache_item {
/**
* Effectively part of the key, cache_id identifies what kind of state
* buffer is involved, and also which brw->state.dirty.cache flag should
* be set when this cache item is chosen.
*/
enum brw_cache_id cache_id;
/** 32-bit hash of the key data */
GLuint hash;
GLuint key_size; /* for variable-sized keys */
const void *key;
dri_bo **reloc_bufs;
GLuint nr_reloc_bufs;
dri_bo *bo;
GLuint data_size;
struct brw_cache_item *next;
};
struct brw_cache {
struct brw_context *brw;
struct brw_cache_item **items;
GLuint size, n_items;
GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */
GLuint aux_size[BRW_MAX_CACHE];
char *name[BRW_MAX_CACHE];
/* Record of the last BOs chosen for each cache_id. Used to set
* brw->state.dirty.cache when a new cache item is chosen.
*/
dri_bo *last_bo[BRW_MAX_CACHE];
};
/* Considered adding a member to this struct to document which flags
* an update might raise so that ordering of the state atoms can be
* checked or derived at runtime. Dropped the idea in favor of having
* a debug mode where the state is monitored for flags which are
* raised that have already been tested against.
*/
struct brw_tracked_state {
struct brw_state_flags dirty;
void (*prepare)( struct brw_context *brw );
void (*emit)( struct brw_context *brw );
};
/* Flags for brw->state.cache.
*/
#define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR)
#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
#define CACHE_NEW_SF_VP (1<<BRW_SF_VP)
#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT)
#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT)
#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG)
#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT)
#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG)
#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE)
#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND)
struct brw_cached_batch_item {
struct header *header;
GLuint sz;
struct brw_cached_batch_item *next;
};
/* Protect against a future where VERT_ATTRIB_MAX > 32. Wouldn't life
* be easier if C allowed arrays of packed elements?
*/
#define ATTRIB_BIT_DWORDS ((VERT_ATTRIB_MAX+31)/32)
struct brw_vertex_element {
const struct gl_client_array *glarray;
/** The corresponding Mesa vertex attribute */
gl_vert_attrib attrib;
/** Size of a complete element */
GLuint element_size;
/** Number of uploaded elements for this input. */
GLuint count;
/** Byte stride between elements in the uploaded array */
GLuint stride;
/** Offset of the first element within the buffer object */
unsigned int offset;
/** Buffer object containing the uploaded vertex data */
dri_bo *bo;
};
struct brw_vertex_info {
GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
};
/* Cache for TNL programs.
*/
struct brw_tnl_cache_item {
GLuint hash;
void *key;
void *data;
struct brw_tnl_cache_item *next;
};
struct brw_tnl_cache {
struct brw_tnl_cache_item **items;
GLuint size, n_items;
};
struct brw_query_object {
struct gl_query_object Base;
/** Doubly linked list of active query objects in the context. */
struct brw_query_object *prev, *next;
/** Last query BO associated with this query. */
dri_bo *bo;
/** First index in bo with query data for this object. */
int first_index;
/** Last index in bo with query data for this object. */
int last_index;
/* Total count of pixels from previous BOs */
unsigned int count;
};
/**
* brw_context is derived from intel_context.
*/
struct brw_context
{
struct intel_context intel; /**< base class, must be first field */
GLuint primitive;
GLboolean emit_state_always;
GLboolean tmp_fallback;
GLboolean no_batch_wrap;
struct {
struct brw_state_flags dirty;
GLuint nr_color_regions;
struct intel_region *color_regions[MAX_DRAW_BUFFERS];
struct intel_region *depth_region;
/**
* List of buffers accumulated in brw_validate_state to receive
* dri_bo_check_aperture treatment before exec, so we can know if we
* should flush the batch and try again before emitting primitives.
*
* This can be a fixed number as we only have a limited number of
* objects referenced from the batchbuffer in a primitive emit,
* consisting of the vertex buffers, pipelined state pointers,
* the CURBE, the depth buffer, and a query BO.
*/
dri_bo *validated_bos[VERT_ATTRIB_MAX + 16];
int validated_bo_count;
} state;
struct brw_cache cache; /** non-surface items */
struct brw_cache surface_cache; /* surface items */
struct brw_cached_batch_item *cached_batch_items;
struct {
struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
GLuint nr_enabled;
#define BRW_NR_UPLOAD_BUFS 17
#define BRW_UPLOAD_INIT_SIZE (128*1024)
struct {
dri_bo *bo;
GLuint offset;
} upload;
/* Summary of size and varying of active arrays, so we can check
* for changes to this state:
*/
struct brw_vertex_info info;
unsigned int min_index, max_index;
} vb;
struct {
/**
* Index buffer for this draw_prims call.
*
* Updates are signaled by BRW_NEW_INDICES.
*/
const struct _mesa_index_buffer *ib;
/* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
dri_bo *bo;
unsigned int offset;
unsigned int size;
/* Offset to index buffer index to use in CMD_3D_PRIM so that we can
* avoid re-uploading the IB packet over and over if we're actually
* referencing the same index buffer.
*/
unsigned int start_vertex_offset;
} ib;
/* Active vertex program:
*/
const struct gl_vertex_program *vertex_program;
const struct gl_fragment_program *fragment_program;
/* For populating the gtt:
*/
GLuint next_free_page;
/* BRW_NEW_URB_ALLOCATIONS:
*/
struct {
GLuint vsize; /* vertex size plus header in urb registers */
GLuint csize; /* constant buffer size in urb registers */
GLuint sfsize; /* setup data size in urb registers */
GLboolean constrained;
GLuint nr_vs_entries;
GLuint nr_gs_entries;
GLuint nr_clip_entries;
GLuint nr_sf_entries;
GLuint nr_cs_entries;
/* GLuint vs_size; */
/* GLuint gs_size; */
/* GLuint clip_size; */
/* GLuint sf_size; */
/* GLuint cs_size; */
GLuint vs_start;
GLuint gs_start;
GLuint clip_start;
GLuint sf_start;
GLuint cs_start;
} urb;
/* BRW_NEW_CURBE_OFFSETS:
*/
struct {
GLuint wm_start; /**< pos of first wm const in CURBE buffer */
GLuint wm_size; /**< number of float[4] consts, multiple of 16 */
GLuint clip_start;
GLuint clip_size;
GLuint vs_start;
GLuint vs_size;
GLuint total_size;
dri_bo *curbe_bo;
/** Offset within curbe_bo of space for current curbe entry */
GLuint curbe_offset;
/** Offset within curbe_bo of space for next curbe entry */
GLuint curbe_next_offset;
GLfloat *last_buf;
GLuint last_bufsz;
/**
* Whether we should create a new bo instead of reusing the old one
* (if we just dispatch the batch pointing at the old one.
*/
GLboolean need_new_bo;
} curbe;
struct {
struct brw_vs_prog_data *prog_data;
dri_bo *prog_bo;
dri_bo *state_bo;
/** Binding table of pointers to surf_bo entries */
dri_bo *bind_bo;
dri_bo *surf_bo[BRW_VS_MAX_SURF];
GLuint nr_surfaces;
} vs;
struct {
struct brw_gs_prog_data *prog_data;
GLboolean prog_active;
dri_bo *prog_bo;
dri_bo *state_bo;
} gs;
struct {
struct brw_clip_prog_data *prog_data;
dri_bo *prog_bo;
dri_bo *state_bo;
dri_bo *vp_bo;
} clip;
struct {
struct brw_sf_prog_data *prog_data;
dri_bo *prog_bo;
dri_bo *state_bo;
dri_bo *vp_bo;
} sf;
struct {
struct brw_wm_prog_data *prog_data;
struct brw_wm_compile *compile_data;
/** Input sizes, calculated from active vertex program.
* One bit per fragment program input attribute.
*/
GLbitfield input_size_masks[4];
/** Array of surface default colors (texture border color) */
dri_bo *sdc_bo[BRW_MAX_TEX_UNIT];
GLuint render_surf;
GLuint nr_surfaces;
GLuint max_threads;
dri_bo *scratch_bo;
GLuint sampler_count;
dri_bo *sampler_bo;
/** Binding table of pointers to surf_bo entries */
dri_bo *bind_bo;
dri_bo *surf_bo[BRW_WM_MAX_SURF];
dri_bo *prog_bo;
dri_bo *state_bo;
} wm;
struct {
dri_bo *prog_bo;
dri_bo *state_bo;
dri_bo *vp_bo;
} cc;
struct {
struct brw_query_object active_head;
dri_bo *bo;
int index;
GLboolean active;
} query;
/* Used to give every program string a unique id
*/
GLuint program_id;
};
#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a)
/*======================================================================
* brw_vtbl.c
*/
void brwInitVtbl( struct brw_context *brw );
/*======================================================================
* brw_context.c
*/
GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
__DRIcontextPrivate *driContextPriv,
void *sharedContextPrivate);
/*======================================================================
* brw_queryobj.c
*/
void brw_init_queryobj_functions(struct dd_function_table *functions);
void brw_prepare_query_begin(struct brw_context *brw);
void brw_emit_query_begin(struct brw_context *brw);
void brw_emit_query_end(struct brw_context *brw);
/*======================================================================
* brw_state_dump.c
*/
void brw_debug_batch(struct intel_context *intel);
/*======================================================================
* brw_tex.c
*/
void brw_validate_textures( struct brw_context *brw );
/*======================================================================
* brw_program.c
*/
void brwInitFragProgFuncs( struct dd_function_table *functions );
/* brw_urb.c
*/
void brw_upload_urb_fence(struct brw_context *brw);
/* brw_curbe.c
*/
void brw_upload_cs_urb_state(struct brw_context *brw);
/* brw_disasm.c */
int brw_disasm (FILE *file, struct brw_instruction *inst);
/*======================================================================
* Inline conversion functions. These are better-typed than the
* macros used previously:
*/
static INLINE struct brw_context *
brw_context( GLcontext *ctx )
{
return (struct brw_context *)ctx;
}
static INLINE struct brw_vertex_program *
brw_vertex_program(struct gl_vertex_program *p)
{
return (struct brw_vertex_program *) p;
}
static INLINE const struct brw_vertex_program *
brw_vertex_program_const(const struct gl_vertex_program *p)
{
return (const struct brw_vertex_program *) p;
}
static INLINE struct brw_fragment_program *
brw_fragment_program(struct gl_fragment_program *p)
{
return (struct brw_fragment_program *) p;
}
static INLINE const struct brw_fragment_program *
brw_fragment_program_const(const struct gl_fragment_program *p)
{
return (const struct brw_fragment_program *) p;
}
#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
#endif

View File

@ -0,0 +1,376 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/context.h"
#include "main/macros.h"
#include "main/enums.h"
#include "shader/prog_parameter.h"
#include "shader/prog_print.h"
#include "shader/prog_statevars.h"
#include "intel_batchbuffer.h"
#include "intel_regions.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_state.h"
#include "brw_util.h"
/**
* Partition the CURBE between the various users of constant values:
* Note that vertex and fragment shaders can now fetch constants out
* of constant buffers. We no longer allocatea block of the GRF for
* constants. That greatly reduces the demand for space in the CURBE.
* Some of the comments within are dated...
*/
static void calculate_curbe_offsets( struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
/* CACHE_NEW_WM_PROG */
const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
/* BRW_NEW_VERTEX_PROGRAM */
const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16;
GLuint nr_clip_regs = 0;
GLuint total_regs;
/* _NEW_TRANSFORM */
if (ctx->Transform.ClipPlanesEnabled) {
GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled);
nr_clip_regs = (nr_planes * 4 + 15) / 16;
}
total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
/* This can happen - what to do? Probably rather than falling
* back, the best thing to do is emit programs which code the
* constants as immediate values. Could do this either as a static
* cap on WM and VS, or adaptively.
*
* Unfortunately, this is currently dependent on the results of the
* program generation process (in the case of wm), so this would
* introduce the need to re-generate programs in the event of a
* curbe allocation failure.
*/
/* Max size is 32 - just large enough to
* hold the 128 parameters allowed by
* the fragment and vertex program
* api's. It's not clear what happens
* when both VP and FP want to use 128
* parameters, though.
*/
assert(total_regs <= 32);
/* Lazy resize:
*/
if (nr_fp_regs > brw->curbe.wm_size ||
nr_vp_regs > brw->curbe.vs_size ||
nr_clip_regs != brw->curbe.clip_size ||
(total_regs < brw->curbe.total_size / 4 &&
brw->curbe.total_size > 16)) {
GLuint reg = 0;
/* Calculate a new layout:
*/
reg = 0;
brw->curbe.wm_start = reg;
brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
brw->curbe.clip_start = reg;
brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
brw->curbe.vs_start = reg;
brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
brw->curbe.total_size = reg;
if (0)
_mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
brw->curbe.wm_start,
brw->curbe.wm_size,
brw->curbe.clip_start,
brw->curbe.clip_size,
brw->curbe.vs_start,
brw->curbe.vs_size );
brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
}
}
const struct brw_tracked_state brw_curbe_offsets = {
.dirty = {
.mesa = _NEW_TRANSFORM,
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = CACHE_NEW_WM_PROG
},
.prepare = calculate_curbe_offsets
};
/* Define the number of curbes within CS's urb allocation. Multiple
* urb entries -> multiple curbes. These will be used by
* fixed-function hardware in a double-buffering scheme to avoid a
* pipeline stall each time the contents of the curbe is changed.
*/
void brw_upload_cs_urb_state(struct brw_context *brw)
{
struct brw_cs_urb_state cs_urb;
memset(&cs_urb, 0, sizeof(cs_urb));
/* It appears that this is the state packet for the CS unit, ie. the
* urb entries detailed here are housed in the CS range from the
* URB_FENCE command.
*/
cs_urb.header.opcode = CMD_CS_URB_STATE;
cs_urb.header.length = sizeof(cs_urb)/4 - 2;
/* BRW_NEW_URB_FENCE */
cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
assert(brw->urb.nr_cs_entries);
BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
}
static GLfloat fixed_plane[6][4] = {
{ 0, 0, -1, 1 },
{ 0, 0, 1, 1 },
{ 0, -1, 0, 1 },
{ 0, 1, 0, 1 },
{-1, 0, 0, 1 },
{ 1, 0, 0, 1 }
};
/* Upload a new set of constants. Too much variability to go into the
* cache mechanism, but maybe would benefit from a comparison against
* the current uploaded set of constants.
*/
static void prepare_constant_buffer(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
const struct brw_vertex_program *vp =
brw_vertex_program_const(brw->vertex_program);
const struct brw_fragment_program *fp =
brw_fragment_program_const(brw->fragment_program);
const GLuint sz = brw->curbe.total_size;
const GLuint bufsz = sz * 16 * sizeof(GLfloat);
GLfloat *buf;
GLuint i;
if (sz == 0) {
if (brw->curbe.last_buf) {
free(brw->curbe.last_buf);
brw->curbe.last_buf = NULL;
brw->curbe.last_bufsz = 0;
}
return;
}
buf = (GLfloat *) _mesa_calloc(bufsz);
/* fragment shader constants */
if (brw->curbe.wm_size) {
GLuint offset = brw->curbe.wm_start * 16;
_mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
/* copy float constants */
for (i = 0; i < brw->wm.prog_data->nr_params; i++)
buf[offset + i] = *brw->wm.prog_data->param[i];
}
/* The clipplanes are actually delivered to both CLIP and VS units.
* VS uses them to calculate the outcode bitmasks.
*/
if (brw->curbe.clip_size) {
GLuint offset = brw->curbe.clip_start * 16;
GLuint j;
/* If any planes are going this way, send them all this way:
*/
for (i = 0; i < 6; i++) {
buf[offset + i * 4 + 0] = fixed_plane[i][0];
buf[offset + i * 4 + 1] = fixed_plane[i][1];
buf[offset + i * 4 + 2] = fixed_plane[i][2];
buf[offset + i * 4 + 3] = fixed_plane[i][3];
}
/* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
* clip-space:
*/
assert(MAX_CLIP_PLANES == 6);
for (j = 0; j < MAX_CLIP_PLANES; j++) {
if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0];
buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1];
buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2];
buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3];
i++;
}
}
}
/* vertex shader constants */
if (brw->curbe.vs_size) {
GLuint offset = brw->curbe.vs_start * 16;
GLuint nr = brw->vs.prog_data->nr_params / 4;
if (brw->vertex_program->IsNVProgram)
_mesa_load_tracked_matrices(ctx);
/* Updates the ParamaterValues[i] pointers for all parameters of the
* basic type of PROGRAM_STATE_VAR.
*/
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
/* XXX just use a memcpy here */
for (i = 0; i < nr; i++) {
const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i];
buf[offset + i * 4 + 0] = value[0];
buf[offset + i * 4 + 1] = value[1];
buf[offset + i * 4 + 2] = value[2];
buf[offset + i * 4 + 3] = value[3];
}
}
if (0) {
for (i = 0; i < sz*16; i+=4)
_mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
_mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
brw->curbe.last_buf, buf,
bufsz, brw->curbe.last_bufsz,
brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
}
if (brw->curbe.curbe_bo != NULL &&
brw->curbe.last_buf &&
bufsz == brw->curbe.last_bufsz &&
memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
/* constants have not changed */
_mesa_free(buf);
}
else {
/* constants have changed */
if (brw->curbe.last_buf)
_mesa_free(brw->curbe.last_buf);
brw->curbe.last_buf = buf;
brw->curbe.last_bufsz = bufsz;
if (brw->curbe.curbe_bo != NULL &&
(brw->curbe.need_new_bo ||
brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
{
dri_bo_unreference(brw->curbe.curbe_bo);
brw->curbe.curbe_bo = NULL;
}
if (brw->curbe.curbe_bo == NULL) {
/* Allocate a single page for CURBE entries for this batchbuffer.
* They're generally around 64b.
*/
brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
4096, 1 << 6);
brw->curbe.curbe_next_offset = 0;
}
brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
brw->curbe.curbe_next_offset += bufsz;
brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);
/* Copy data to the buffer:
*/
dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
}
brw_add_validated_bo(brw, brw->curbe.curbe_bo);
/* Because this provokes an action (ie copy the constants into the
* URB), it shouldn't be shortcircuited if identical to the
* previous time - because eg. the urb destination may have
* changed, or the urb contents different to last time.
*
* Note that the data referred to is actually copied internally,
* not just used in place according to passed pointer.
*
* It appears that the CS unit takes care of using each available
* URB entry (Const URB Entry == CURBE) in turn, and issuing
* flushes as necessary when doublebuffering of CURBEs isn't
* possible.
*/
}
static void emit_constant_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
GLuint sz = brw->curbe.total_size;
BEGIN_BATCH(2, IGNORE_CLIPRECTS);
if (sz == 0) {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
OUT_BATCH(0);
} else {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
OUT_RELOC(brw->curbe.curbe_bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
(sz - 1) + brw->curbe.curbe_offset);
}
ADVANCE_BATCH();
}
/* This tracked state is unique in that the state it monitors varies
* dynamically depending on the parameters tracked by the fragment and
* vertex programs. This is the template used as a starting point,
* each context will maintain a copy of this internally and update as
* required.
*/
const struct brw_tracked_state brw_constant_buffer = {
.dirty = {
.mesa = _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
BRW_NEW_CURBE_OFFSETS |
BRW_NEW_BATCH),
.cache = (CACHE_NEW_WM_PROG)
},
.prepare = prepare_constant_buffer,
.emit = emit_constant_buffer,
};

View File

@ -0,0 +1,851 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef BRW_DEFINES_H
#define BRW_DEFINES_H
/* 3D state:
*/
#define _3DOP_3DSTATE_PIPELINED 0x0
#define _3DOP_3DSTATE_NONPIPELINED 0x1
#define _3DOP_3DCONTROL 0x2
#define _3DOP_3DPRIMITIVE 0x3
#define _3DSTATE_PIPELINED_POINTERS 0x00
#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
#define _3DSTATE_VERTEX_BUFFERS 0x08
#define _3DSTATE_VERTEX_ELEMENTS 0x09
#define _3DSTATE_INDEX_BUFFER 0x0A
#define _3DSTATE_VF_STATISTICS 0x0B
#define _3DSTATE_DRAWING_RECTANGLE 0x00
#define _3DSTATE_CONSTANT_COLOR 0x01
#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
#define _3DSTATE_CHROMA_KEY 0x04
#define _3DSTATE_DEPTH_BUFFER 0x05
#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
#define _3DSTATE_LINE_STIPPLE 0x08
#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
#define _3DCONTROL 0x00
#define PIPE_CONTROL_NOWRITE 0x00
#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
#define PIPE_CONTROL_WRITEDEPTH 0x02
#define PIPE_CONTROL_WRITETIMESTAMP 0x03
#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
#define BRW_ANISORATIO_2 0
#define BRW_ANISORATIO_4 1
#define BRW_ANISORATIO_6 2
#define BRW_ANISORATIO_8 3
#define BRW_ANISORATIO_10 4
#define BRW_ANISORATIO_12 5
#define BRW_ANISORATIO_14 6
#define BRW_ANISORATIO_16 7
#define BRW_BLENDFACTOR_ONE 0x1
#define BRW_BLENDFACTOR_SRC_COLOR 0x2
#define BRW_BLENDFACTOR_SRC_ALPHA 0x3
#define BRW_BLENDFACTOR_DST_ALPHA 0x4
#define BRW_BLENDFACTOR_DST_COLOR 0x5
#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define BRW_BLENDFACTOR_CONST_COLOR 0x7
#define BRW_BLENDFACTOR_CONST_ALPHA 0x8
#define BRW_BLENDFACTOR_SRC1_COLOR 0x9
#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A
#define BRW_BLENDFACTOR_ZERO 0x11
#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12
#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14
#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15
#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17
#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
#define BRW_BLENDFUNCTION_ADD 0
#define BRW_BLENDFUNCTION_SUBTRACT 1
#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define BRW_BLENDFUNCTION_MIN 3
#define BRW_BLENDFUNCTION_MAX 4
#define BRW_ALPHATEST_FORMAT_UNORM8 0
#define BRW_ALPHATEST_FORMAT_FLOAT32 1
#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0
#define BRW_CHROMAKEY_REPLACE_BLACK 1
#define BRW_CLIP_API_OGL 0
#define BRW_CLIP_API_DX 1
#define BRW_CLIPMODE_NORMAL 0
#define BRW_CLIPMODE_CLIP_ALL 1
#define BRW_CLIPMODE_CLIP_NON_REJECTED 2
#define BRW_CLIPMODE_REJECT_ALL 3
#define BRW_CLIPMODE_ACCEPT_ALL 4
#define BRW_CLIPMODE_KERNEL_CLIP 5
#define BRW_CLIP_NDCSPACE 0
#define BRW_CLIP_SCREENSPACE 1
#define BRW_COMPAREFUNCTION_ALWAYS 0
#define BRW_COMPAREFUNCTION_NEVER 1
#define BRW_COMPAREFUNCTION_LESS 2
#define BRW_COMPAREFUNCTION_EQUAL 3
#define BRW_COMPAREFUNCTION_LEQUAL 4
#define BRW_COMPAREFUNCTION_GREATER 5
#define BRW_COMPAREFUNCTION_NOTEQUAL 6
#define BRW_COMPAREFUNCTION_GEQUAL 7
#define BRW_COVERAGE_PIXELS_HALF 0
#define BRW_COVERAGE_PIXELS_1 1
#define BRW_COVERAGE_PIXELS_2 2
#define BRW_COVERAGE_PIXELS_4 3
#define BRW_CULLMODE_BOTH 0
#define BRW_CULLMODE_NONE 1
#define BRW_CULLMODE_FRONT 2
#define BRW_CULLMODE_BACK 3
#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0
#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define BRW_DEPTHFORMAT_D32_FLOAT 1
#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2
#define BRW_DEPTHFORMAT_D16_UNORM 5
#define BRW_FLOATING_POINT_IEEE_754 0
#define BRW_FLOATING_POINT_NON_IEEE_754 1
#define BRW_FRONTWINDING_CW 0
#define BRW_FRONTWINDING_CCW 1
#define BRW_SPRITE_POINT_ENABLE 16
#define BRW_INDEX_BYTE 0
#define BRW_INDEX_WORD 1
#define BRW_INDEX_DWORD 2
#define BRW_LOGICOPFUNCTION_CLEAR 0
#define BRW_LOGICOPFUNCTION_NOR 1
#define BRW_LOGICOPFUNCTION_AND_INVERTED 2
#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3
#define BRW_LOGICOPFUNCTION_AND_REVERSE 4
#define BRW_LOGICOPFUNCTION_INVERT 5
#define BRW_LOGICOPFUNCTION_XOR 6
#define BRW_LOGICOPFUNCTION_NAND 7
#define BRW_LOGICOPFUNCTION_AND 8
#define BRW_LOGICOPFUNCTION_EQUIV 9
#define BRW_LOGICOPFUNCTION_NOOP 10
#define BRW_LOGICOPFUNCTION_OR_INVERTED 11
#define BRW_LOGICOPFUNCTION_COPY 12
#define BRW_LOGICOPFUNCTION_OR_REVERSE 13
#define BRW_LOGICOPFUNCTION_OR 14
#define BRW_LOGICOPFUNCTION_SET 15
#define BRW_MAPFILTER_NEAREST 0x0
#define BRW_MAPFILTER_LINEAR 0x1
#define BRW_MAPFILTER_ANISOTROPIC 0x2
#define BRW_MIPFILTER_NONE 0
#define BRW_MIPFILTER_NEAREST 1
#define BRW_MIPFILTER_LINEAR 3
#define BRW_POLYGON_FRONT_FACING 0
#define BRW_POLYGON_BACK_FACING 1
#define BRW_PREFILTER_ALWAYS 0x0
#define BRW_PREFILTER_NEVER 0x1
#define BRW_PREFILTER_LESS 0x2
#define BRW_PREFILTER_EQUAL 0x3
#define BRW_PREFILTER_LEQUAL 0x4
#define BRW_PREFILTER_GREATER 0x5
#define BRW_PREFILTER_NOTEQUAL 0x6
#define BRW_PREFILTER_GEQUAL 0x7
#define BRW_PROVOKING_VERTEX_0 0
#define BRW_PROVOKING_VERTEX_1 1
#define BRW_PROVOKING_VERTEX_2 2
#define BRW_RASTRULE_UPPER_LEFT 0
#define BRW_RASTRULE_UPPER_RIGHT 1
/* These are listed as "Reserved, but not seen as useful"
* in Intel documentation (page 212, "Point Rasterization Rule",
* section 7.4 "SF Pipeline State Summary", of document
* "Intel® 965 Express Chipset Family and Intel® G35 Express
* Chipset Graphics Controller Programmer's Reference Manual,
* Volume 2: 3D/Media", Revision 1.0b as of January 2008,
* available at
* http://intellinuxgraphics.org/documentation.html
* at the time of this writing).
*
* These appear to be supported on at least some
* i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
* is useful when using OpenGL to render to a FBO
* (which has the pixel coordinate Y orientation inverted
* with respect to the normal OpenGL pixel coordinate system).
*/
#define BRW_RASTRULE_LOWER_LEFT 2
#define BRW_RASTRULE_LOWER_RIGHT 3
#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0
#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1
#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2
#define BRW_STENCILOP_KEEP 0
#define BRW_STENCILOP_ZERO 1
#define BRW_STENCILOP_REPLACE 2
#define BRW_STENCILOP_INCRSAT 3
#define BRW_STENCILOP_DECRSAT 4
#define BRW_STENCILOP_INCR 5
#define BRW_STENCILOP_DECR 6
#define BRW_STENCILOP_INVERT 7
#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1
#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005
#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041
#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042
#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085
#define BRW_SURFACEFORMAT_R32G32_SINT 0x086
#define BRW_SURFACEFORMAT_R32G32_UINT 0x087
#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B
#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C
#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D
#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090
#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091
#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092
#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095
#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096
#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE
#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF
#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define BRW_SURFACEFORMAT_R32_SINT 0x0D6
#define BRW_SURFACEFORMAT_R32_UINT 0x0D7
#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8
#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3
#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4
#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5
#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1
#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2
#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8
#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9
#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106
#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107
#define BRW_SURFACEFORMAT_R8G8_SINT 0x108
#define BRW_SURFACEFORMAT_R8G8_UINT 0x109
#define BRW_SURFACEFORMAT_R16_UNORM 0x10A
#define BRW_SURFACEFORMAT_R16_SNORM 0x10B
#define BRW_SURFACEFORMAT_R16_SINT 0x10C
#define BRW_SURFACEFORMAT_R16_UINT 0x10D
#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E
#define BRW_SURFACEFORMAT_I16_UNORM 0x111
#define BRW_SURFACEFORMAT_L16_UNORM 0x112
#define BRW_SURFACEFORMAT_A16_UNORM 0x113
#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114
#define BRW_SURFACEFORMAT_I16_FLOAT 0x115
#define BRW_SURFACEFORMAT_L16_FLOAT 0x116
#define BRW_SURFACEFORMAT_A16_FLOAT 0x117
#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118
#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D
#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E
#define BRW_SURFACEFORMAT_R16_USCALED 0x11F
#define BRW_SURFACEFORMAT_R8_UNORM 0x140
#define BRW_SURFACEFORMAT_R8_SNORM 0x141
#define BRW_SURFACEFORMAT_R8_SINT 0x142
#define BRW_SURFACEFORMAT_R8_UINT 0x143
#define BRW_SURFACEFORMAT_A8_UNORM 0x144
#define BRW_SURFACEFORMAT_I8_UNORM 0x145
#define BRW_SURFACEFORMAT_L8_UNORM 0x146
#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147
#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148
#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C
#define BRW_SURFACEFORMAT_R1_UINT 0x181
#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define BRW_SURFACEFORMAT_BC1_UNORM 0x186
#define BRW_SURFACEFORMAT_BC2_UNORM 0x187
#define BRW_SURFACEFORMAT_BC3_UNORM 0x188
#define BRW_SURFACEFORMAT_BC4_UNORM 0x189
#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A
#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define BRW_SURFACEFORMAT_MONO8 0x18E
#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define BRW_SURFACEFORMAT_DXT1_RGB 0x191
#define BRW_SURFACEFORMAT_FXT1 0x192
#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define BRW_SURFACEFORMAT_BC4_SNORM 0x199
#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A
#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F
#define BRW_SURFACERETURNFORMAT_FLOAT32 0
#define BRW_SURFACERETURNFORMAT_S1 1
#define BRW_SURFACE_1D 0
#define BRW_SURFACE_2D 1
#define BRW_SURFACE_3D 2
#define BRW_SURFACE_CUBE 3
#define BRW_SURFACE_BUFFER 4
#define BRW_SURFACE_NULL 7
#define BRW_TEXCOORDMODE_WRAP 0
#define BRW_TEXCOORDMODE_MIRROR 1
#define BRW_TEXCOORDMODE_CLAMP 2
#define BRW_TEXCOORDMODE_CUBE 3
#define BRW_TEXCOORDMODE_CLAMP_BORDER 4
#define BRW_TEXCOORDMODE_MIRROR_ONCE 5
#define BRW_THREAD_PRIORITY_NORMAL 0
#define BRW_THREAD_PRIORITY_HIGH 1
#define BRW_TILEWALK_XMAJOR 0
#define BRW_TILEWALK_YMAJOR 1
#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0
#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1
/* Execution Unit (EU) defines
*/
#define BRW_ALIGN_1 0
#define BRW_ALIGN_16 1
#define BRW_ADDRESS_DIRECT 0
#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
#define BRW_CHANNEL_X 0
#define BRW_CHANNEL_Y 1
#define BRW_CHANNEL_Z 2
#define BRW_CHANNEL_W 3
#define BRW_COMPRESSION_NONE 0
#define BRW_COMPRESSION_2NDHALF 1
#define BRW_COMPRESSION_COMPRESSED 2
#define BRW_CONDITIONAL_NONE 0
#define BRW_CONDITIONAL_Z 1
#define BRW_CONDITIONAL_NZ 2
#define BRW_CONDITIONAL_EQ 1 /* Z */
#define BRW_CONDITIONAL_NEQ 2 /* NZ */
#define BRW_CONDITIONAL_G 3
#define BRW_CONDITIONAL_GE 4
#define BRW_CONDITIONAL_L 5
#define BRW_CONDITIONAL_LE 6
#define BRW_CONDITIONAL_R 7
#define BRW_CONDITIONAL_O 8
#define BRW_CONDITIONAL_U 9
#define BRW_DEBUG_NONE 0
#define BRW_DEBUG_BREAKPOINT 1
#define BRW_DEPENDENCY_NORMAL 0
#define BRW_DEPENDENCY_NOTCLEARED 1
#define BRW_DEPENDENCY_NOTCHECKED 2
#define BRW_DEPENDENCY_DISABLE 3
#define BRW_EXECUTE_1 0
#define BRW_EXECUTE_2 1
#define BRW_EXECUTE_4 2
#define BRW_EXECUTE_8 3
#define BRW_EXECUTE_16 4
#define BRW_EXECUTE_32 5
#define BRW_HORIZONTAL_STRIDE_0 0
#define BRW_HORIZONTAL_STRIDE_1 1
#define BRW_HORIZONTAL_STRIDE_2 2
#define BRW_HORIZONTAL_STRIDE_4 3
#define BRW_INSTRUCTION_NORMAL 0
#define BRW_INSTRUCTION_SATURATE 1
#define BRW_MASK_ENABLE 0
#define BRW_MASK_DISABLE 1
#define BRW_OPCODE_MOV 1
#define BRW_OPCODE_SEL 2
#define BRW_OPCODE_NOT 4
#define BRW_OPCODE_AND 5
#define BRW_OPCODE_OR 6
#define BRW_OPCODE_XOR 7
#define BRW_OPCODE_SHR 8
#define BRW_OPCODE_SHL 9
#define BRW_OPCODE_RSR 10
#define BRW_OPCODE_RSL 11
#define BRW_OPCODE_ASR 12
#define BRW_OPCODE_CMP 16
#define BRW_OPCODE_CMPN 17
#define BRW_OPCODE_JMPI 32
#define BRW_OPCODE_IF 34
#define BRW_OPCODE_IFF 35
#define BRW_OPCODE_ELSE 36
#define BRW_OPCODE_ENDIF 37
#define BRW_OPCODE_DO 38
#define BRW_OPCODE_WHILE 39
#define BRW_OPCODE_BREAK 40
#define BRW_OPCODE_CONTINUE 41
#define BRW_OPCODE_HALT 42
#define BRW_OPCODE_MSAVE 44
#define BRW_OPCODE_MRESTORE 45
#define BRW_OPCODE_PUSH 46
#define BRW_OPCODE_POP 47
#define BRW_OPCODE_WAIT 48
#define BRW_OPCODE_SEND 49
#define BRW_OPCODE_ADD 64
#define BRW_OPCODE_MUL 65
#define BRW_OPCODE_AVG 66
#define BRW_OPCODE_FRC 67
#define BRW_OPCODE_RNDU 68
#define BRW_OPCODE_RNDD 69
#define BRW_OPCODE_RNDE 70
#define BRW_OPCODE_RNDZ 71
#define BRW_OPCODE_MAC 72
#define BRW_OPCODE_MACH 73
#define BRW_OPCODE_LZD 74
#define BRW_OPCODE_SAD2 80
#define BRW_OPCODE_SADA2 81
#define BRW_OPCODE_DP4 84
#define BRW_OPCODE_DPH 85
#define BRW_OPCODE_DP3 86
#define BRW_OPCODE_DP2 87
#define BRW_OPCODE_DPA2 88
#define BRW_OPCODE_LINE 89
#define BRW_OPCODE_NOP 126
#define BRW_PREDICATE_NONE 0
#define BRW_PREDICATE_NORMAL 1
#define BRW_PREDICATE_ALIGN1_ANYV 2
#define BRW_PREDICATE_ALIGN1_ALLV 3
#define BRW_PREDICATE_ALIGN1_ANY2H 4
#define BRW_PREDICATE_ALIGN1_ALL2H 5
#define BRW_PREDICATE_ALIGN1_ANY4H 6
#define BRW_PREDICATE_ALIGN1_ALL4H 7
#define BRW_PREDICATE_ALIGN1_ANY8H 8
#define BRW_PREDICATE_ALIGN1_ALL8H 9
#define BRW_PREDICATE_ALIGN1_ANY16H 10
#define BRW_PREDICATE_ALIGN1_ALL16H 11
#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
#define BRW_PREDICATE_ALIGN16_ANY4H 6
#define BRW_PREDICATE_ALIGN16_ALL4H 7
#define BRW_ARCHITECTURE_REGISTER_FILE 0
#define BRW_GENERAL_REGISTER_FILE 1
#define BRW_MESSAGE_REGISTER_FILE 2
#define BRW_IMMEDIATE_VALUE 3
#define BRW_REGISTER_TYPE_UD 0
#define BRW_REGISTER_TYPE_D 1
#define BRW_REGISTER_TYPE_UW 2
#define BRW_REGISTER_TYPE_W 3
#define BRW_REGISTER_TYPE_UB 4
#define BRW_REGISTER_TYPE_B 5
#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define BRW_REGISTER_TYPE_HF 6
#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define BRW_REGISTER_TYPE_F 7
#define BRW_ARF_NULL 0x00
#define BRW_ARF_ADDRESS 0x10
#define BRW_ARF_ACCUMULATOR 0x20
#define BRW_ARF_FLAG 0x30
#define BRW_ARF_MASK 0x40
#define BRW_ARF_MASK_STACK 0x50
#define BRW_ARF_MASK_STACK_DEPTH 0x60
#define BRW_ARF_STATE 0x70
#define BRW_ARF_CONTROL 0x80
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2
#define BRW_CMASK 3
#define BRW_THREAD_NORMAL 0
#define BRW_THREAD_ATOMIC 1
#define BRW_THREAD_SWITCH 2
#define BRW_VERTICAL_STRIDE_0 0
#define BRW_VERTICAL_STRIDE_1 1
#define BRW_VERTICAL_STRIDE_2 2
#define BRW_VERTICAL_STRIDE_4 3
#define BRW_VERTICAL_STRIDE_8 4
#define BRW_VERTICAL_STRIDE_16 5
#define BRW_VERTICAL_STRIDE_32 6
#define BRW_VERTICAL_STRIDE_64 7
#define BRW_VERTICAL_STRIDE_128 8
#define BRW_VERTICAL_STRIDE_256 9
#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
#define BRW_WIDTH_1 0
#define BRW_WIDTH_2 1
#define BRW_WIDTH_4 2
#define BRW_WIDTH_8 3
#define BRW_WIDTH_16 4
#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
#define BRW_POLYGON_FACING_FRONT 0
#define BRW_POLYGON_FACING_BACK 1
#define BRW_MESSAGE_TARGET_NULL 0
#define BRW_MESSAGE_TARGET_MATH 1
#define BRW_MESSAGE_TARGET_SAMPLER 2
#define BRW_MESSAGE_TARGET_GATEWAY 3
#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
#define BRW_MESSAGE_TARGET_URB 6
#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3
/* for IGDNG only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
#define BRW_MATH_FUNCTION_INV 1
#define BRW_MATH_FUNCTION_LOG 2
#define BRW_MATH_FUNCTION_EXP 3
#define BRW_MATH_FUNCTION_SQRT 4
#define BRW_MATH_FUNCTION_RSQ 5
#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
#define BRW_MATH_FUNCTION_TAN 9
#define BRW_MATH_FUNCTION_POW 10
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
#define BRW_MATH_INTEGER_UNSIGNED 0
#define BRW_MATH_INTEGER_SIGNED 1
#define BRW_MATH_PRECISION_FULL 0
#define BRW_MATH_PRECISION_PARTIAL 1
#define BRW_MATH_SATURATE_NONE 0
#define BRW_MATH_SATURATE_SATURATE 1
#define BRW_MATH_DATA_VECTOR 0
#define BRW_MATH_DATA_SCALAR 1
#define BRW_URB_OPCODE_WRITE 0
#define BRW_URB_SWIZZLE_NONE 0
#define BRW_URB_SWIZZLE_INTERLEAVE 1
#define BRW_URB_SWIZZLE_TRANSPOSE 2
#define BRW_SCRATCH_SPACE_SIZE_1K 0
#define BRW_SCRATCH_SPACE_SIZE_2K 1
#define BRW_SCRATCH_SPACE_SIZE_4K 2
#define BRW_SCRATCH_SPACE_SIZE_8K 3
#define BRW_SCRATCH_SPACE_SIZE_16K 4
#define BRW_SCRATCH_SPACE_SIZE_32K 5
#define BRW_SCRATCH_SPACE_SIZE_64K 6
#define BRW_SCRATCH_SPACE_SIZE_128K 7
#define BRW_SCRATCH_SPACE_SIZE_256K 8
#define BRW_SCRATCH_SPACE_SIZE_512K 9
#define BRW_SCRATCH_SPACE_SIZE_1M 10
#define BRW_SCRATCH_SPACE_SIZE_2M 11
#define CMD_URB_FENCE 0x6000
#define CMD_CS_URB_STATE 0x6001
#define CMD_CONST_BUFFER 0x6002
#define CMD_STATE_BASE_ADDRESS 0x6101
#define CMD_STATE_INSN_POINTER 0x6102
#define CMD_PIPELINE_SELECT_965 0x6104
#define CMD_PIPELINE_SELECT_GM45 0x6904
#define CMD_PIPELINED_STATE_POINTERS 0x7800
#define CMD_BINDING_TABLE_PTRS 0x7801
#define CMD_VERTEX_BUFFER 0x7808
# define BRW_VB0_INDEX_SHIFT 27
# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
# define BRW_VB0_PITCH_SHIFT 0
#define CMD_VERTEX_ELEMENT 0x7809
# define BRW_VE0_INDEX_SHIFT 27
# define BRW_VE0_FORMAT_SHIFT 16
# define BRW_VE0_VALID (1 << 26)
# define BRW_VE0_SRC_OFFSET_SHIFT 0
# define BRW_VE1_COMPONENT_NOSTORE 0
# define BRW_VE1_COMPONENT_STORE_SRC 1
# define BRW_VE1_COMPONENT_STORE_0 2
# define BRW_VE1_COMPONENT_STORE_1_FLT 3
# define BRW_VE1_COMPONENT_STORE_1_INT 4
# define BRW_VE1_COMPONENT_STORE_VID 5
# define BRW_VE1_COMPONENT_STORE_IID 6
# define BRW_VE1_COMPONENT_STORE_PID 7
# define BRW_VE1_COMPONENT_0_SHIFT 28
# define BRW_VE1_COMPONENT_1_SHIFT 24
# define BRW_VE1_COMPONENT_2_SHIFT 20
# define BRW_VE1_COMPONENT_3_SHIFT 16
# define BRW_VE1_DST_OFFSET_SHIFT 0
#define CMD_INDEX_BUFFER 0x780a
#define CMD_VF_STATISTICS_965 0x780b
#define CMD_VF_STATISTICS_GM45 0x680b
#define CMD_DRAW_RECT 0x7900
#define CMD_BLEND_CONSTANT_COLOR 0x7901
#define CMD_CHROMA_KEY 0x7904
#define CMD_DEPTH_BUFFER 0x7905
#define CMD_POLY_STIPPLE_OFFSET 0x7906
#define CMD_POLY_STIPPLE_PATTERN 0x7907
#define CMD_LINE_STIPPLE_PATTERN 0x7908
#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
#define CMD_AA_LINE_PARAMETERS 0x790a
#define CMD_PIPE_CONTROL 0x7a00
#define CMD_3D_PRIM 0x7b00
#define CMD_MI_FLUSH 0x0200
/* Various values from the R0 vertex header:
*/
#define R02_PRIM_END 0x1
#define R02_PRIM_START 0x2
#include "intel_chipset.h"
#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID))
#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID))
#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)))
#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \
(BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */
#endif

View File

@ -0,0 +1,903 @@
/*
* Copyright © 2008 Keith Packard
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that copyright
* notice and this permission notice appear in supporting documentation, and
* that the name of the copyright holders not be used in advertising or
* publicity pertaining to distribution of the software without specific,
* written prior permission. The copyright holders make no representations
* about the suitability of this software for any purpose. It is provided "as
* is" without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THIS SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <getopt.h>
#include <unistd.h>
#include <stdarg.h>
#include "main/mtypes.h"
#include "brw_context.h"
#include "brw_defines.h"
struct {
char *name;
int nsrc;
int ndst;
} opcode[128] = {
[BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
[BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
};
char *conditional_modifier[16] = {
[BRW_CONDITIONAL_NONE] = "",
[BRW_CONDITIONAL_Z] = ".e",
[BRW_CONDITIONAL_NZ] = ".ne",
[BRW_CONDITIONAL_G] = ".g",
[BRW_CONDITIONAL_GE] = ".ge",
[BRW_CONDITIONAL_L] = ".l",
[BRW_CONDITIONAL_LE] = ".le",
[BRW_CONDITIONAL_R] = ".r",
[BRW_CONDITIONAL_O] = ".o",
[BRW_CONDITIONAL_U] = ".u",
};
char *negate[2] = {
[0] = "",
[1] = "-",
};
char *_abs[2] = {
[0] = "",
[1] = "(abs)",
};
char *vert_stride[16] = {
[0] = "0",
[1] = "1",
[2] = "2",
[3] = "4",
[4] = "8",
[5] = "16",
[6] = "32",
[15] = "VxH",
};
char *width[8] = {
[0] = "1",
[1] = "2",
[2] = "4",
[3] = "8",
[4] = "16",
};
char *horiz_stride[4] = {
[0] = "0",
[1] = "1",
[2] = "2",
[3] = "4"
};
char *chan_sel[4] = {
[0] = "x",
[1] = "y",
[2] = "z",
[3] = "w",
};
char *dest_condmod[16] = {
};
char *debug_ctrl[2] = {
[0] = "",
[1] = ".breakpoint"
};
char *saturate[2] = {
[0] = "",
[1] = ".sat"
};
char *exec_size[8] = {
[0] = "1",
[1] = "2",
[2] = "4",
[3] = "8",
[4] = "16",
[5] = "32"
};
char *pred_inv[2] = {
[0] = "+",
[1] = "-"
};
char *pred_ctrl_align16[16] = {
[1] = "",
[2] = ".x",
[3] = ".y",
[4] = ".z",
[5] = ".w",
[6] = ".any4h",
[7] = ".all4h",
};
char *pred_ctrl_align1[16] = {
[1] = "",
[2] = ".anyv",
[3] = ".allv",
[4] = ".any2h",
[5] = ".all2h",
[6] = ".any4h",
[7] = ".all4h",
[8] = ".any8h",
[9] = ".all8h",
[10] = ".any16h",
[11] = ".all16h",
};
char *thread_ctrl[4] = {
[0] = "",
[2] = "switch"
};
char *compr_ctrl[4] = {
[0] = "",
[1] = "sechalf",
[2] = "compr",
};
char *dep_ctrl[4] = {
[0] = "",
[1] = "NoDDClr",
[2] = "NoDDChk",
[3] = "NoDDClr,NoDDChk",
};
char *mask_ctrl[4] = {
[0] = "",
[1] = "nomask",
};
char *access_mode[2] = {
[0] = "align1",
[1] = "align16",
};
char *reg_encoding[8] = {
[0] = "UD",
[1] = "D",
[2] = "UW",
[3] = "W",
[4] = "UB",
[5] = "B",
[7] = "F"
};
char *imm_encoding[8] = {
[0] = "UD",
[1] = "D",
[2] = "UW",
[3] = "W",
[5] = "VF",
[5] = "V",
[7] = "F"
};
char *reg_file[4] = {
[0] = "A",
[1] = "g",
[2] = "m",
[3] = "imm",
};
char *writemask[16] = {
[0x0] = ".",
[0x1] = ".x",
[0x2] = ".y",
[0x3] = ".xy",
[0x4] = ".z",
[0x5] = ".xz",
[0x6] = ".yz",
[0x7] = ".xyz",
[0x8] = ".w",
[0x9] = ".xw",
[0xa] = ".yw",
[0xb] = ".xyw",
[0xc] = ".zw",
[0xd] = ".xzw",
[0xe] = ".yzw",
[0xf] = "",
};
char *end_of_thread[2] = {
[0] = "",
[1] = "EOT"
};
char *target_function[16] = {
[BRW_MESSAGE_TARGET_NULL] = "null",
[BRW_MESSAGE_TARGET_MATH] = "math",
[BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
[BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
[BRW_MESSAGE_TARGET_DATAPORT_READ] = "read",
[BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write",
[BRW_MESSAGE_TARGET_URB] = "urb",
[BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
};
char *math_function[16] = {
[BRW_MATH_FUNCTION_INV] = "inv",
[BRW_MATH_FUNCTION_LOG] = "log",
[BRW_MATH_FUNCTION_EXP] = "exp",
[BRW_MATH_FUNCTION_SQRT] = "sqrt",
[BRW_MATH_FUNCTION_RSQ] = "rsq",
[BRW_MATH_FUNCTION_SIN] = "sin",
[BRW_MATH_FUNCTION_COS] = "cos",
[BRW_MATH_FUNCTION_SINCOS] = "sincos",
[BRW_MATH_FUNCTION_TAN] = "tan",
[BRW_MATH_FUNCTION_POW] = "pow",
[BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
[BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod",
[BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv",
};
char *math_saturate[2] = {
[0] = "",
[1] = "sat"
};
char *math_signed[2] = {
[0] = "",
[1] = "signed"
};
char *math_scalar[2] = {
[0] = "",
[1] = "scalar"
};
char *math_precision[2] = {
[0] = "",
[1] = "partial_precision"
};
char *urb_swizzle[4] = {
[BRW_URB_SWIZZLE_NONE] = "",
[BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
[BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
};
char *urb_allocate[2] = {
[0] = "",
[1] = "allocate"
};
char *urb_used[2] = {
[0] = "",
[1] = "used"
};
char *urb_complete[2] = {
[0] = "",
[1] = "complete"
};
char *sampler_target_format[4] = {
[0] = "F",
[2] = "UD",
[3] = "D"
};
static int column;
static int string (FILE *file, char *string)
{
fputs (string, file);
column += strlen (string);
return 0;
}
static int format (FILE *f, char *format, ...)
{
char buf[1024];
va_list args;
va_start (args, format);
vsnprintf (buf, sizeof (buf) - 1, format, args);
string (f, buf);
return 0;
}
static int newline (FILE *f)
{
putc ('\n', f);
column = 0;
return 0;
}
static int pad (FILE *f, int c)
{
do
string (f, " ");
while (column < c);
return 0;
}
static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space)
{
if (!ctrl[id]) {
fprintf (file, "*** invalid %s value %d ",
name, id);
return 1;
}
if (ctrl[id][0])
{
if (space && *space)
string (file, " ");
string (file, ctrl[id]);
if (space)
*space = 1;
}
return 0;
}
static int print_opcode (FILE *file, int id)
{
if (!opcode[id].name) {
format (file, "*** invalid opcode value %d ", id);
return 1;
}
string (file, opcode[id].name);
return 0;
}
static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
{
int err = 0;
if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
switch (_reg_nr & 0xf0) {
case BRW_ARF_NULL:
string (file, "null");
return -1;
case BRW_ARF_ADDRESS:
format (file, "a%d", _reg_nr & 0x0f);
break;
case BRW_ARF_ACCUMULATOR:
format (file, "acc%d", _reg_nr & 0x0f);
break;
case BRW_ARF_MASK:
format (file, "mask%d", _reg_nr & 0x0f);
break;
case BRW_ARF_MASK_STACK:
format (file, "msd%d", _reg_nr & 0x0f);
break;
case BRW_ARF_STATE:
format (file, "sr%d", _reg_nr & 0x0f);
break;
case BRW_ARF_CONTROL:
format (file, "cr%d", _reg_nr & 0x0f);
break;
case BRW_ARF_NOTIFICATION_COUNT:
format (file, "n%d", _reg_nr & 0x0f);
break;
case BRW_ARF_IP:
string (file, "ip");
return -1;
break;
default:
format (file, "ARF%d", _reg_nr);
break;
}
} else {
err |= control (file, "src reg file", reg_file, _reg_file, NULL);
format (file, "%d", _reg_nr);
}
return err;
}
static int dest (FILE *file, struct brw_instruction *inst)
{
int err = 0;
if (inst->header.access_mode == BRW_ALIGN_1)
{
if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
{
err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
if (err == -1)
return 0;
if (inst->bits1.da1.dest_subreg_nr)
format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
}
else
{
string (file, "g[a0");
if (inst->bits1.ia1.dest_subreg_nr)
format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
if (inst->bits1.ia1.dest_indirect_offset)
format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
string (file, "]");
format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
}
}
else
{
if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
{
err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
if (err == -1)
return 0;
if (inst->bits1.da16.dest_subreg_nr)
format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
string (file, "<1>");
err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
}
else
{
err = 1;
string (file, "Indirect align16 address mode not supported");
}
}
return 0;
}
static int src_align1_region (FILE *file,
GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
{
int err = 0;
string (file, "<");
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
string (file, ",");
err |= control (file, "width", width, _width, NULL);
string (file, ",");
err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
string (file, ">");
return err;
}
static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
{
int err = 0;
err |= control (file, "negate", negate, _negate, NULL);
err |= control (file, "abs", _abs, __abs, NULL);
err |= reg (file, _reg_file, reg_num);
if (err == -1)
return 0;
if (sub_reg_num)
format (file, ".%d", sub_reg_num);
src_align1_region (file, _vert_stride, _width, _horiz_stride);
err |= control (file, "src reg encoding", reg_encoding, type, NULL);
return err;
}
static int src_ia1 (FILE *file,
GLuint type,
GLuint _reg_file,
GLint _addr_imm,
GLuint _addr_subreg_nr,
GLuint _negate,
GLuint __abs,
GLuint _addr_mode,
GLuint _horiz_stride,
GLuint _width,
GLuint _vert_stride)
{
int err = 0;
err |= control (file, "negate", negate, _negate, NULL);
err |= control (file, "abs", _abs, __abs, NULL);
string (file, "g[a0");
if (_addr_subreg_nr)
format (file, ".%d", _addr_subreg_nr);
if (_addr_imm)
format (file, " %d", _addr_imm);
string (file, "]");
src_align1_region (file, _vert_stride, _width, _horiz_stride);
err |= control (file, "src reg encoding", reg_encoding, type, NULL);
return err;
}
static int src_da16 (FILE *file,
GLuint _reg_type,
GLuint _reg_file,
GLuint _vert_stride,
GLuint _reg_nr,
GLuint _subreg_nr,
GLuint __abs,
GLuint _negate,
GLuint swz_x,
GLuint swz_y,
GLuint swz_z,
GLuint swz_w)
{
int err = 0;
err |= control (file, "negate", negate, _negate, NULL);
err |= control (file, "abs", _abs, __abs, NULL);
err |= reg (file, _reg_file, _reg_nr);
if (err == -1)
return 0;
if (_subreg_nr)
format (file, ".%d", _subreg_nr);
string (file, "<");
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
string (file, ",1,1>");
err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
/*
* Three kinds of swizzle display:
* identity - nothing printed
* 1->all - print the single channel
* 1->1 - print the mapping
*/
if (swz_x == BRW_CHANNEL_X &&
swz_y == BRW_CHANNEL_Y &&
swz_z == BRW_CHANNEL_Z &&
swz_w == BRW_CHANNEL_W)
{
;
}
else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
{
string (file, ".");
err |= control (file, "channel select", chan_sel, swz_x, NULL);
}
else
{
string (file, ".");
err |= control (file, "channel select", chan_sel, swz_x, NULL);
err |= control (file, "channel select", chan_sel, swz_y, NULL);
err |= control (file, "channel select", chan_sel, swz_z, NULL);
err |= control (file, "channel select", chan_sel, swz_w, NULL);
}
return err;
}
static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
switch (type) {
case BRW_REGISTER_TYPE_UD:
format (file, "0x%08xUD", inst->bits3.ud);
break;
case BRW_REGISTER_TYPE_D:
format (file, "%dD", inst->bits3.d);
break;
case BRW_REGISTER_TYPE_UW:
format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
break;
case BRW_REGISTER_TYPE_W:
format (file, "%dW", (int16_t) inst->bits3.d);
break;
case BRW_REGISTER_TYPE_UB:
format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
break;
case BRW_REGISTER_TYPE_VF:
format (file, "Vector Float");
break;
case BRW_REGISTER_TYPE_V:
format (file, "0x%08xV", inst->bits3.ud);
break;
case BRW_REGISTER_TYPE_F:
format (file, "%-gF", inst->bits3.f);
}
return 0;
}
static int src0 (FILE *file, struct brw_instruction *inst)
{
if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
return imm (file, inst->bits1.da1.src0_reg_type,
inst);
else if (inst->header.access_mode == BRW_ALIGN_1)
{
if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
{
return src_da1 (file,
inst->bits1.da1.src0_reg_type,
inst->bits1.da1.src0_reg_file,
inst->bits2.da1.src0_vert_stride,
inst->bits2.da1.src0_width,
inst->bits2.da1.src0_horiz_stride,
inst->bits2.da1.src0_reg_nr,
inst->bits2.da1.src0_subreg_nr,
inst->bits2.da1.src0_abs,
inst->bits2.da1.src0_negate);
}
else
{
return src_ia1 (file,
inst->bits1.ia1.src0_reg_type,
inst->bits1.ia1.src0_reg_file,
inst->bits2.ia1.src0_indirect_offset,
inst->bits2.ia1.src0_subreg_nr,
inst->bits2.ia1.src0_negate,
inst->bits2.ia1.src0_abs,
inst->bits2.ia1.src0_address_mode,
inst->bits2.ia1.src0_horiz_stride,
inst->bits2.ia1.src0_width,
inst->bits2.ia1.src0_vert_stride);
}
}
else
{
if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
{
return src_da16 (file,
inst->bits1.da16.src0_reg_type,
inst->bits1.da16.src0_reg_file,
inst->bits2.da16.src0_vert_stride,
inst->bits2.da16.src0_reg_nr,
inst->bits2.da16.src0_subreg_nr,
inst->bits2.da16.src0_abs,
inst->bits2.da16.src0_negate,
inst->bits2.da16.src0_swz_x,
inst->bits2.da16.src0_swz_y,
inst->bits2.da16.src0_swz_z,
inst->bits2.da16.src0_swz_w);
}
else
{
string (file, "Indirect align16 address mode not supported");
return 1;
}
}
}
static int src1 (FILE *file, struct brw_instruction *inst)
{
if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
return imm (file, inst->bits1.da1.src1_reg_type,
inst);
else if (inst->header.access_mode == BRW_ALIGN_1)
{
if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
{
return src_da1 (file,
inst->bits1.da1.src1_reg_type,
inst->bits1.da1.src1_reg_file,
inst->bits3.da1.src1_vert_stride,
inst->bits3.da1.src1_width,
inst->bits3.da1.src1_horiz_stride,
inst->bits3.da1.src1_reg_nr,
inst->bits3.da1.src1_subreg_nr,
inst->bits3.da1.src1_abs,
inst->bits3.da1.src1_negate);
}
else
{
return src_ia1 (file,
inst->bits1.ia1.src1_reg_type,
inst->bits1.ia1.src1_reg_file,
inst->bits3.ia1.src1_indirect_offset,
inst->bits3.ia1.src1_subreg_nr,
inst->bits3.ia1.src1_negate,
inst->bits3.ia1.src1_abs,
inst->bits3.ia1.src1_address_mode,
inst->bits3.ia1.src1_horiz_stride,
inst->bits3.ia1.src1_width,
inst->bits3.ia1.src1_vert_stride);
}
}
else
{
if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
{
return src_da16 (file,
inst->bits1.da16.src1_reg_type,
inst->bits1.da16.src1_reg_file,
inst->bits3.da16.src1_vert_stride,
inst->bits3.da16.src1_reg_nr,
inst->bits3.da16.src1_subreg_nr,
inst->bits3.da16.src1_abs,
inst->bits3.da16.src1_negate,
inst->bits3.da16.src1_swz_x,
inst->bits3.da16.src1_swz_y,
inst->bits3.da16.src1_swz_z,
inst->bits3.da16.src1_swz_w);
}
else
{
string (file, "Indirect align16 address mode not supported");
return 1;
}
}
}
int brw_disasm (FILE *file, struct brw_instruction *inst)
{
int err = 0;
int space = 0;
if (inst->header.predicate_control) {
string (file, "(");
err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
string (file, "f0");
if (inst->bits2.da1.flag_reg_nr)
format (file, ".%d", inst->bits2.da1.flag_reg_nr);
if (inst->header.access_mode == BRW_ALIGN_1)
err |= control (file, "predicate control align1", pred_ctrl_align1,
inst->header.predicate_control, NULL);
else
err |= control (file, "predicate control align16", pred_ctrl_align16,
inst->header.predicate_control, NULL);
string (file, ") ");
}
err |= print_opcode (file, inst->header.opcode);
err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
if (inst->header.opcode != BRW_OPCODE_SEND)
err |= control (file, "conditional modifier", conditional_modifier,
inst->header.destreg__conditionalmod, NULL);
if (inst->header.opcode != BRW_OPCODE_NOP) {
string (file, "(");
err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
string (file, ")");
}
if (inst->header.opcode == BRW_OPCODE_SEND)
format (file, " %d", inst->header.destreg__conditionalmod);
if (opcode[inst->header.opcode].ndst > 0) {
pad (file, 16);
err |= dest (file, inst);
}
if (opcode[inst->header.opcode].nsrc > 0) {
pad (file, 32);
err |= src0 (file, inst);
}
if (opcode[inst->header.opcode].nsrc > 1) {
pad (file, 48);
err |= src1 (file, inst);
}
if (inst->header.opcode == BRW_OPCODE_SEND) {
newline (file);
pad (file, 16);
space = 0;
err |= control (file, "target function", target_function,
inst->bits3.generic.msg_target, &space);
switch (inst->bits3.generic.msg_target) {
case BRW_MESSAGE_TARGET_MATH:
err |= control (file, "math function", math_function,
inst->bits3.math.function, &space);
err |= control (file, "math saturate", math_saturate,
inst->bits3.math.saturate, &space);
err |= control (file, "math signed", math_signed,
inst->bits3.math.int_type, &space);
err |= control (file, "math scalar", math_scalar,
inst->bits3.math.data_type, &space);
err |= control (file, "math precision", math_precision,
inst->bits3.math.precision, &space);
break;
case BRW_MESSAGE_TARGET_SAMPLER:
format (file, " (%d, %d, ",
inst->bits3.sampler.binding_table_index,
inst->bits3.sampler.sampler);
err |= control (file, "sampler target format", sampler_target_format,
inst->bits3.sampler.return_format, NULL);
string (file, ")");
break;
case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
format (file, " (%d, %d, %d, %d)",
inst->bits3.dp_write.binding_table_index,
(inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
inst->bits3.dp_write.msg_control,
inst->bits3.dp_write.msg_type,
inst->bits3.dp_write.send_commit_msg);
break;
case BRW_MESSAGE_TARGET_URB:
format (file, " %d", inst->bits3.urb.offset);
space = 1;
err |= control (file, "urb swizzle", urb_swizzle,
inst->bits3.urb.swizzle_control, &space);
err |= control (file, "urb allocate", urb_allocate,
inst->bits3.urb.allocate, &space);
err |= control (file, "urb used", urb_used,
inst->bits3.urb.used, &space);
err |= control (file, "urb complete", urb_complete,
inst->bits3.urb.complete, &space);
break;
case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
break;
default:
format (file, "unsupported target %d", inst->bits3.generic.msg_target);
break;
}
if (space)
string (file, " ");
format (file, "mlen %d",
inst->bits3.generic.msg_length);
format (file, " rlen %d",
inst->bits3.generic.response_length);
}
pad (file, 64);
if (inst->header.opcode != BRW_OPCODE_NOP) {
string (file, "{");
space = 1;
err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
if (inst->header.opcode == BRW_OPCODE_SEND)
err |= control (file, "end of thread", end_of_thread,
inst->bits3.generic.end_of_thread, &space);
if (space)
string (file, " ");
string (file, "}");
}
string (file, ";");
newline (file);
return err;
}

View File

@ -0,0 +1,493 @@
/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "main/glheader.h"
#include "main/context.h"
#include "main/state.h"
#include "main/enums.h"
#include "tnl/tnl.h"
#include "vbo/vbo_context.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
#include "brw_draw.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_fallback.h"
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
#define FILE_DEBUG_FLAG DEBUG_BATCH
static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
_3DPRIM_POINTLIST,
_3DPRIM_LINELIST,
_3DPRIM_LINELOOP,
_3DPRIM_LINESTRIP,
_3DPRIM_TRILIST,
_3DPRIM_TRISTRIP,
_3DPRIM_TRIFAN,
_3DPRIM_QUADLIST,
_3DPRIM_QUADSTRIP,
_3DPRIM_POLYGON
};
static const GLenum reduced_prim[GL_POLYGON+1] = {
GL_POINTS,
GL_LINES,
GL_LINES,
GL_LINES,
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES
};
/* When the primitive changes, set a state bit and re-validate. Not
* the nicest and would rather deal with this by having all the
* programs be immune to the active primitive (ie. cope with all
* possibilities). That may not be realistic however.
*/
static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
{
GLcontext *ctx = &brw->intel.ctx;
if (INTEL_DEBUG & DEBUG_PRIMS)
_mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
/* Slight optimization to avoid the GS program when not needed:
*/
if (prim == GL_QUAD_STRIP &&
ctx->Light.ShadeModel != GL_FLAT &&
ctx->Polygon.FrontMode == GL_FILL &&
ctx->Polygon.BackMode == GL_FILL)
prim = GL_TRIANGLE_STRIP;
if (prim != brw->primitive) {
brw->primitive = prim;
brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
if (reduced_prim[prim] != brw->intel.reduced_primitive) {
brw->intel.reduced_primitive = reduced_prim[prim];
brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
}
}
return prim_to_hw_prim[prim];
}
static GLuint trim(GLenum prim, GLuint length)
{
if (prim == GL_QUAD_STRIP)
return length > 3 ? (length - length % 2) : 0;
else if (prim == GL_QUADS)
return length - length % 4;
else
return length;
}
static void brw_emit_prim(struct brw_context *brw,
const struct _mesa_prim *prim,
uint32_t hw_prim)
{
struct brw_3d_primitive prim_packet;
struct intel_context *intel = &brw->intel;
if (INTEL_DEBUG & DEBUG_PRIMS)
_mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
prim->start, prim->count);
prim_packet.header.opcode = CMD_3D_PRIM;
prim_packet.header.length = sizeof(prim_packet)/4 - 2;
prim_packet.header.pad = 0;
prim_packet.header.topology = hw_prim;
prim_packet.header.indexed = prim->indexed;
prim_packet.verts_per_instance = trim(prim->mode, prim->count);
prim_packet.start_vert_location = prim->start;
if (prim->indexed)
prim_packet.start_vert_location += brw->ib.start_vertex_offset;
prim_packet.instance_count = 1;
prim_packet.start_instance_location = 0;
prim_packet.base_vert_location = prim->basevertex;
/* Can't wrap here, since we rely on the validated state. */
brw->no_batch_wrap = GL_TRUE;
/* If we're set to always flush, do it before and after the primitive emit.
* We want to catch both missed flushes that hurt instruction/state cache
* and missed flushes of the render cache as it heads to other parts of
* the besides the draw code.
*/
if (intel->always_flush_cache) {
BEGIN_BATCH(1, IGNORE_CLIPRECTS);
OUT_BATCH(intel->vtbl.flush_cmd());
ADVANCE_BATCH();
}
if (prim_packet.verts_per_instance) {
intel_batchbuffer_data( brw->intel.batch, &prim_packet,
sizeof(prim_packet), LOOP_CLIPRECTS);
}
if (intel->always_flush_cache) {
BEGIN_BATCH(1, IGNORE_CLIPRECTS);
OUT_BATCH(intel->vtbl.flush_cmd());
ADVANCE_BATCH();
}
brw->no_batch_wrap = GL_FALSE;
}
static void brw_merge_inputs( struct brw_context *brw,
const struct gl_client_array *arrays[])
{
struct brw_vertex_info old = brw->vb.info;
GLuint i;
for (i = 0; i < VERT_ATTRIB_MAX; i++)
dri_bo_unreference(brw->vb.inputs[i].bo);
memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs));
memset(&brw->vb.info, 0, sizeof(brw->vb.info));
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
brw->vb.inputs[i].glarray = arrays[i];
brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
if (arrays[i]->StrideB != 0)
brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) <<
((i%16) * 2);
}
/* Raise statechanges if input sizes have changed. */
if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
}
/* XXX: could split the primitive list to fallback only on the
* non-conformant primitives.
*/
static GLboolean check_fallbacks( struct brw_context *brw,
const struct _mesa_prim *prim,
GLuint nr_prims )
{
GLcontext *ctx = &brw->intel.ctx;
GLuint i;
/* If we don't require strict OpenGL conformance, never
* use fallbacks. If we're forcing fallbacks, always
* use fallfacks.
*/
if (brw->intel.conformance_mode == 0)
return GL_FALSE;
if (brw->intel.conformance_mode == 2)
return GL_TRUE;
if (ctx->Polygon.SmoothFlag) {
for (i = 0; i < nr_prims; i++)
if (reduced_prim[prim[i].mode] == GL_TRIANGLES)
return GL_TRUE;
}
/* BRW hardware will do AA lines, but they are non-conformant it
* seems. TBD whether we keep this fallback:
*/
if (ctx->Line.SmoothFlag) {
for (i = 0; i < nr_prims; i++)
if (reduced_prim[prim[i].mode] == GL_LINES)
return GL_TRUE;
}
/* Stipple -- these fallbacks could be resolved with a little
* bit of work?
*/
if (ctx->Line.StippleFlag) {
for (i = 0; i < nr_prims; i++) {
/* GS doesn't get enough information to know when to reset
* the stipple counter?!?
*/
if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP)
return GL_TRUE;
if (prim[i].mode == GL_POLYGON &&
(ctx->Polygon.FrontMode == GL_LINE ||
ctx->Polygon.BackMode == GL_LINE))
return GL_TRUE;
}
}
if (ctx->Point.SmoothFlag) {
for (i = 0; i < nr_prims; i++)
if (prim[i].mode == GL_POINTS)
return GL_TRUE;
}
/* BRW hardware doesn't handle GL_CLAMP texturing correctly;
* brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
* as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and
* we want strict conformance, force the fallback.
* Right now, we only do this for 2D textures.
*/
{
int u;
for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
if (texUnit->Enabled) {
if (texUnit->Enabled & TEXTURE_1D_BIT) {
if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
return GL_TRUE;
}
}
if (texUnit->Enabled & TEXTURE_2D_BIT) {
if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
return GL_TRUE;
}
}
if (texUnit->Enabled & TEXTURE_3D_BIT) {
if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
return GL_TRUE;
}
}
}
}
}
/* Nothing stopping us from the fast path now */
return GL_FALSE;
}
/* May fail if out of video memory for texture or vbo upload, or on
* fallback conditions.
*/
static GLboolean brw_try_draw_prims( GLcontext *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLuint min_index,
GLuint max_index )
{
struct intel_context *intel = intel_context(ctx);
struct brw_context *brw = brw_context(ctx);
GLboolean retval = GL_FALSE;
GLboolean warn = GL_FALSE;
GLboolean first_time = GL_TRUE;
GLuint i;
if (ctx->NewState)
_mesa_update_state( ctx );
/* We have to validate the textures *before* checking for fallbacks;
* otherwise, the software fallback won't be able to rely on the
* texture state, the firstLevel and lastLevel fields won't be
* set in the intel texture object (they'll both be 0), and the
* software fallback will segfault if it attempts to access any
* texture level other than level 0.
*/
brw_validate_textures( brw );
if (check_fallbacks(brw, prim, nr_prims))
return GL_FALSE;
/* Bind all inputs, derive varying and size information:
*/
brw_merge_inputs( brw, arrays );
brw->ib.ib = ib;
brw->state.dirty.brw |= BRW_NEW_INDICES;
brw->vb.min_index = min_index;
brw->vb.max_index = max_index;
brw->state.dirty.brw |= BRW_NEW_VERTICES;
/* Have to validate state quite late. Will rebuild tnl_program,
* which depends on varying information.
*
* Note this is where brw->vs->prog_data.inputs_read is calculated,
* so can't access it earlier.
*/
LOCK_HARDWARE(intel);
if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) {
UNLOCK_HARDWARE(intel);
return GL_TRUE;
}
for (i = 0; i < nr_prims; i++) {
uint32_t hw_prim;
/* Flush the batch if it's approaching full, so that we don't wrap while
* we've got validated state that needs to be in the same batch as the
* primitives. This fraction is just a guess (minimal full state plus
* a primitive is around 512 bytes), and would be better if we had
* an upper bound of how much we might emit in a single
* brw_try_draw_prims().
*/
intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
LOOP_CLIPRECTS);
hw_prim = brw_set_prim(brw, prim[i].mode);
if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) {
first_time = GL_FALSE;
brw_validate_state(brw);
/* Various fallback checks: */
if (brw->intel.Fallback)
goto out;
/* Check that we can fit our state in with our existing batchbuffer, or
* flush otherwise.
*/
if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
brw->state.validated_bo_count)) {
static GLboolean warned;
intel_batchbuffer_flush(intel->batch);
/* Validate the state after we flushed the batch (which would have
* changed the set of dirty state). If we still fail to
* check_aperture, warn of what's happening, but attempt to continue
* on since it may succeed anyway, and the user would probably rather
* see a failure and a warning than a fallback.
*/
brw_validate_state(brw);
if (!warned &&
dri_bufmgr_check_aperture_space(brw->state.validated_bos,
brw->state.validated_bo_count)) {
warn = GL_TRUE;
warned = GL_TRUE;
}
}
brw_upload_state(brw);
}
brw_emit_prim(brw, &prim[i], hw_prim);
retval = GL_TRUE;
}
if (intel->always_flush_batch)
intel_batchbuffer_flush(intel->batch);
out:
UNLOCK_HARDWARE(intel);
brw_state_cache_check_size(brw);
if (warn)
fprintf(stderr, "i965: Single primitive emit potentially exceeded "
"available aperture space\n");
if (!retval)
DBG("%s failed\n", __FUNCTION__);
return retval;
}
void brw_draw_prims( GLcontext *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index )
{
GLboolean retval;
if (!vbo_all_varyings_in_vbos(arrays)) {
if (!index_bounds_valid)
vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
/* Decide if we want to rebase. If so we end up recursing once
* only into this function.
*/
if (min_index != 0) {
vbo_rebase_prims(ctx, arrays,
prim, nr_prims,
ib, min_index, max_index,
brw_draw_prims );
return;
}
}
/* Make a first attempt at drawing:
*/
retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
/* Otherwise, we really are out of memory. Pass the drawing
* command to the software tnl module and which will in turn call
* swrast to do the drawing.
*/
if (!retval) {
_swsetup_Wakeup(ctx);
_tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
}
}
void brw_draw_init( struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
struct vbo_context *vbo = vbo_context(ctx);
/* Register our drawing function:
*/
vbo->draw_prims = brw_draw_prims;
}
void brw_draw_destroy( struct brw_context *brw )
{
int i;
if (brw->vb.upload.bo != NULL) {
dri_bo_unreference(brw->vb.upload.bo);
brw->vb.upload.bo = NULL;
}
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
dri_bo_unreference(brw->vb.inputs[i].bo);
brw->vb.inputs[i].bo = NULL;
}
dri_bo_unreference(brw->ib.bo);
brw->ib.bo = NULL;
}

View File

@ -0,0 +1,54 @@
/**************************************************************************
*
* Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef BRW_DRAW_H
#define BRW_DRAW_H
#include "main/mtypes.h" /* for GLcontext... */
#include "vbo/vbo.h"
struct brw_context;
void brw_draw_prims( GLcontext *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index );
void brw_draw_init( struct brw_context *brw );
void brw_draw_destroy( struct brw_context *brw );
/* brw_draw_current.c
*/
void brw_init_current_values(GLcontext *ctx,
struct gl_client_array *arrays);
#endif

View File

@ -0,0 +1,742 @@
/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "main/glheader.h"
#include "main/bufferobj.h"
#include "main/context.h"
#include "main/state.h"
#include "main/api_validate.h"
#include "main/enums.h"
#include "brw_draw.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_fallback.h"
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
#include "intel_tex.h"
static GLuint double_types[5] = {
0,
BRW_SURFACEFORMAT_R64_FLOAT,
BRW_SURFACEFORMAT_R64G64_FLOAT,
BRW_SURFACEFORMAT_R64G64B64_FLOAT,
BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
};
static GLuint float_types[5] = {
0,
BRW_SURFACEFORMAT_R32_FLOAT,
BRW_SURFACEFORMAT_R32G32_FLOAT,
BRW_SURFACEFORMAT_R32G32B32_FLOAT,
BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
};
static GLuint uint_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R32_UNORM,
BRW_SURFACEFORMAT_R32G32_UNORM,
BRW_SURFACEFORMAT_R32G32B32_UNORM,
BRW_SURFACEFORMAT_R32G32B32A32_UNORM
};
static GLuint uint_types_scale[5] = {
0,
BRW_SURFACEFORMAT_R32_USCALED,
BRW_SURFACEFORMAT_R32G32_USCALED,
BRW_SURFACEFORMAT_R32G32B32_USCALED,
BRW_SURFACEFORMAT_R32G32B32A32_USCALED
};
static GLuint int_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R32_SNORM,
BRW_SURFACEFORMAT_R32G32_SNORM,
BRW_SURFACEFORMAT_R32G32B32_SNORM,
BRW_SURFACEFORMAT_R32G32B32A32_SNORM
};
static GLuint int_types_scale[5] = {
0,
BRW_SURFACEFORMAT_R32_SSCALED,
BRW_SURFACEFORMAT_R32G32_SSCALED,
BRW_SURFACEFORMAT_R32G32B32_SSCALED,
BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
};
static GLuint ushort_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R16_UNORM,
BRW_SURFACEFORMAT_R16G16_UNORM,
BRW_SURFACEFORMAT_R16G16B16_UNORM,
BRW_SURFACEFORMAT_R16G16B16A16_UNORM
};
static GLuint ushort_types_scale[5] = {
0,
BRW_SURFACEFORMAT_R16_USCALED,
BRW_SURFACEFORMAT_R16G16_USCALED,
BRW_SURFACEFORMAT_R16G16B16_USCALED,
BRW_SURFACEFORMAT_R16G16B16A16_USCALED
};
static GLuint short_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R16_SNORM,
BRW_SURFACEFORMAT_R16G16_SNORM,
BRW_SURFACEFORMAT_R16G16B16_SNORM,
BRW_SURFACEFORMAT_R16G16B16A16_SNORM
};
static GLuint short_types_scale[5] = {
0,
BRW_SURFACEFORMAT_R16_SSCALED,
BRW_SURFACEFORMAT_R16G16_SSCALED,
BRW_SURFACEFORMAT_R16G16B16_SSCALED,
BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
};
static GLuint ubyte_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R8_UNORM,
BRW_SURFACEFORMAT_R8G8_UNORM,
BRW_SURFACEFORMAT_R8G8B8_UNORM,
BRW_SURFACEFORMAT_R8G8B8A8_UNORM
};
static GLuint ubyte_types_scale[5] = {
0,
BRW_SURFACEFORMAT_R8_USCALED,
BRW_SURFACEFORMAT_R8G8_USCALED,
BRW_SURFACEFORMAT_R8G8B8_USCALED,
BRW_SURFACEFORMAT_R8G8B8A8_USCALED
};
static GLuint byte_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R8_SNORM,
BRW_SURFACEFORMAT_R8G8_SNORM,
BRW_SURFACEFORMAT_R8G8B8_SNORM,
BRW_SURFACEFORMAT_R8G8B8A8_SNORM
};
static GLuint byte_types_scale[5] = {
0,
BRW_SURFACEFORMAT_R8_SSCALED,
BRW_SURFACEFORMAT_R8G8_SSCALED,
BRW_SURFACEFORMAT_R8G8B8_SSCALED,
BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
};
/**
* Given vertex array type/size/format/normalized info, return
* the appopriate hardware surface type.
* Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
*/
static GLuint get_surface_type( GLenum type, GLuint size,
GLenum format, GLboolean normalized )
{
if (INTEL_DEBUG & DEBUG_VERTS)
_mesa_printf("type %s size %d normalized %d\n",
_mesa_lookup_enum_by_nr(type), size, normalized);
if (normalized) {
switch (type) {
case GL_DOUBLE: return double_types[size];
case GL_FLOAT: return float_types[size];
case GL_INT: return int_types_norm[size];
case GL_SHORT: return short_types_norm[size];
case GL_BYTE: return byte_types_norm[size];
case GL_UNSIGNED_INT: return uint_types_norm[size];
case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
case GL_UNSIGNED_BYTE:
if (format == GL_BGRA) {
/* See GL_EXT_vertex_array_bgra */
assert(size == 4);
return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
}
else {
return ubyte_types_norm[size];
}
default: assert(0); return 0;
}
}
else {
assert(format == GL_RGBA); /* sanity check */
switch (type) {
case GL_DOUBLE: return double_types[size];
case GL_FLOAT: return float_types[size];
case GL_INT: return int_types_scale[size];
case GL_SHORT: return short_types_scale[size];
case GL_BYTE: return byte_types_scale[size];
case GL_UNSIGNED_INT: return uint_types_scale[size];
case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
default: assert(0); return 0;
}
}
}
static GLuint get_size( GLenum type )
{
switch (type) {
case GL_DOUBLE: return sizeof(GLdouble);
case GL_FLOAT: return sizeof(GLfloat);
case GL_INT: return sizeof(GLint);
case GL_SHORT: return sizeof(GLshort);
case GL_BYTE: return sizeof(GLbyte);
case GL_UNSIGNED_INT: return sizeof(GLuint);
case GL_UNSIGNED_SHORT: return sizeof(GLushort);
case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
default: return 0;
}
}
static GLuint get_index_type(GLenum type)
{
switch (type) {
case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE;
case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
case GL_UNSIGNED_INT: return BRW_INDEX_DWORD;
default: assert(0); return 0;
}
}
static void wrap_buffers( struct brw_context *brw,
GLuint size )
{
if (size < BRW_UPLOAD_INIT_SIZE)
size = BRW_UPLOAD_INIT_SIZE;
brw->vb.upload.offset = 0;
if (brw->vb.upload.bo != NULL)
dri_bo_unreference(brw->vb.upload.bo);
brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO",
size, 1);
/* Set the internal VBO\ to no-backing-store. We only use them as a
* temporary within a brw_try_draw_prims while the lock is held.
*/
/* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH
FAKE TO PUSH THIS STUFF */
// if (!brw->intel.ttm)
// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL);
}
static void get_space( struct brw_context *brw,
GLuint size,
dri_bo **bo_return,
GLuint *offset_return )
{
size = ALIGN(size, 64);
if (brw->vb.upload.bo == NULL ||
brw->vb.upload.offset + size > brw->vb.upload.bo->size) {
wrap_buffers(brw, size);
}
assert(*bo_return == NULL);
dri_bo_reference(brw->vb.upload.bo);
*bo_return = brw->vb.upload.bo;
*offset_return = brw->vb.upload.offset;
brw->vb.upload.offset += size;
}
static void
copy_array_to_vbo_array( struct brw_context *brw,
struct brw_vertex_element *element,
GLuint dst_stride)
{
struct intel_context *intel = &brw->intel;
GLuint size = element->count * dst_stride;
get_space(brw, size, &element->bo, &element->offset);
if (element->glarray->StrideB == 0) {
assert(element->count == 1);
element->stride = 0;
} else {
element->stride = dst_stride;
}
if (dst_stride == element->glarray->StrideB) {
if (intel->intelScreen->kernel_exec_fencing) {
drm_intel_gem_bo_map_gtt(element->bo);
memcpy((char *)element->bo->virtual + element->offset,
element->glarray->Ptr, size);
drm_intel_gem_bo_unmap_gtt(element->bo);
} else {
dri_bo_subdata(element->bo,
element->offset,
size,
element->glarray->Ptr);
}
} else {
char *dest;
const unsigned char *src = element->glarray->Ptr;
int i;
if (intel->intelScreen->kernel_exec_fencing) {
drm_intel_gem_bo_map_gtt(element->bo);
dest = element->bo->virtual;
dest += element->offset;
for (i = 0; i < element->count; i++) {
memcpy(dest, src, dst_stride);
src += element->glarray->StrideB;
dest += dst_stride;
}
drm_intel_gem_bo_unmap_gtt(element->bo);
} else {
void *data;
data = _mesa_malloc(dst_stride * element->count);
dest = data;
for (i = 0; i < element->count; i++) {
memcpy(dest, src, dst_stride);
src += element->glarray->StrideB;
dest += dst_stride;
}
dri_bo_subdata(element->bo,
element->offset,
size,
data);
_mesa_free(data);
}
}
}
static void brw_prepare_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
GLbitfield vs_inputs = brw->vs.prog_data->inputs_read;
GLuint i;
const unsigned char *ptr = NULL;
GLuint interleave = 0;
unsigned int min_index = brw->vb.min_index;
unsigned int max_index = brw->vb.max_index;
struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
GLuint nr_uploads = 0;
/* First build an array of pointers to ve's in vb.inputs_read
*/
if (0)
_mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
/* Accumulate the list of enabled arrays. */
brw->vb.nr_enabled = 0;
while (vs_inputs) {
GLuint i = _mesa_ffsll(vs_inputs) - 1;
struct brw_vertex_element *input = &brw->vb.inputs[i];
vs_inputs &= ~(1 << i);
brw->vb.enabled[brw->vb.nr_enabled++] = input;
}
/* XXX: In the rare cases where this happens we fallback all
* the way to software rasterization, although a tnl fallback
* would be sufficient. I don't know of *any* real world
* cases with > 17 vertex attributes enabled, so it probably
* isn't an issue at this point.
*/
if (brw->vb.nr_enabled >= BRW_VEP_MAX) {
intel->Fallback = 1;
return;
}
for (i = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
if (_mesa_is_bufferobj(input->glarray->BufferObj)) {
struct intel_buffer_object *intel_buffer =
intel_buffer_object(input->glarray->BufferObj);
/* Named buffer object: Just reference its contents directly. */
dri_bo_unreference(input->bo);
input->bo = intel_bufferobj_buffer(intel, intel_buffer,
INTEL_READ);
dri_bo_reference(input->bo);
input->offset = (unsigned long)input->glarray->Ptr;
input->stride = input->glarray->StrideB;
input->count = input->glarray->_MaxElement;
/* This is a common place to reach if the user mistakenly supplies
* a pointer in place of a VBO offset. If we just let it go through,
* we may end up dereferencing a pointer beyond the bounds of the
* GTT. We would hope that the VBO's max_index would save us, but
* Mesa appears to hand us min/max values not clipped to the
* array object's _MaxElement, and _MaxElement frequently appears
* to be wrong anyway.
*
* The VBO spec allows application termination in this case, and it's
* probably a service to the poor programmer to do so rather than
* trying to just not render.
*/
assert(input->offset < input->bo->size);
} else {
input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
if (input->bo != NULL) {
/* Already-uploaded vertex data is present from a previous
* prepare_vertices, but we had to re-validate state due to
* check_aperture failing and a new batch being produced.
*/
continue;
}
/* Queue the buffer object up to be uploaded in the next pass,
* when we've decided if we're doing interleaved or not.
*/
if (input->attrib == VERT_ATTRIB_POS) {
/* Position array not properly enabled:
*/
if (input->glarray->StrideB == 0) {
intel->Fallback = 1;
return;
}
interleave = input->glarray->StrideB;
ptr = input->glarray->Ptr;
}
else if (interleave != input->glarray->StrideB ||
(const unsigned char *)input->glarray->Ptr - ptr < 0 ||
(const unsigned char *)input->glarray->Ptr - ptr > interleave)
{
interleave = 0;
}
upload[nr_uploads++] = input;
/* We rebase drawing to start at element zero only when
* varyings are not in vbos, which means we can end up
* uploading non-varying arrays (stride != 0) when min_index
* is zero. This doesn't matter as the amount to upload is
* the same for these arrays whether the draw call is rebased
* or not - we just have to upload the one element.
*/
assert(min_index == 0 || input->glarray->StrideB == 0);
}
}
/* Handle any arrays to be uploaded. */
if (nr_uploads > 1 && interleave && interleave <= 256) {
/* All uploads are interleaved, so upload the arrays together as
* interleaved. First, upload the contents and set up upload[0].
*/
copy_array_to_vbo_array(brw, upload[0], interleave);
for (i = 1; i < nr_uploads; i++) {
/* Then, just point upload[i] at upload[0]'s buffer. */
upload[i]->stride = interleave;
upload[i]->offset = upload[0]->offset +
((const unsigned char *)upload[i]->glarray->Ptr - ptr);
upload[i]->bo = upload[0]->bo;
dri_bo_reference(upload[i]->bo);
}
}
else {
/* Upload non-interleaved arrays */
for (i = 0; i < nr_uploads; i++) {
copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size);
}
}
brw_prepare_query_begin(brw);
for (i = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
brw_add_validated_bo(brw, input->bo);
}
}
static void brw_emit_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
GLuint i;
brw_emit_query_begin(brw);
/* If the VS doesn't read any inputs (calculating vertex position from
* a state variable for some reason, for example), emit a single pad
* VERTEX_ELEMENT struct and bail.
*
* The stale VB state stays in place, but they don't do anything unless
* a VE loads from them.
*/
if (brw->vb.nr_enabled == 0) {
BEGIN_BATCH(3, IGNORE_CLIPRECTS);
OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
BRW_VE0_VALID |
(BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
(0 << BRW_VE0_SRC_OFFSET_SHIFT));
OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
ADVANCE_BATCH();
return;
}
/* Now emit VB and VEP state packets.
*
* This still defines a hardware VB for each input, even if they
* are interleaved or from the same VBO. TBD if this makes a
* performance difference.
*/
BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS);
OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
((1 + brw->vb.nr_enabled * 4) - 2));
for (i = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
BRW_VB0_ACCESS_VERTEXDATA |
(input->stride << BRW_VB0_PITCH_SHIFT));
OUT_RELOC(input->bo,
I915_GEM_DOMAIN_VERTEX, 0,
input->offset);
if (BRW_IS_IGDNG(brw)) {
if (input->stride) {
OUT_RELOC(input->bo,
I915_GEM_DOMAIN_VERTEX, 0,
input->offset + input->stride * input->count - 1);
} else {
assert(input->count == 1);
OUT_RELOC(input->bo,
I915_GEM_DOMAIN_VERTEX, 0,
input->offset + input->element_size - 1);
}
} else
OUT_BATCH(input->stride ? input->count : 0);
OUT_BATCH(0); /* Instance data step rate */
}
ADVANCE_BATCH();
BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS);
OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
for (i = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
uint32_t format = get_surface_type(input->glarray->Type,
input->glarray->Size,
input->glarray->Format,
input->glarray->Normalized);
uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
switch (input->glarray->Size) {
case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
break;
}
OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
BRW_VE0_VALID |
(format << BRW_VE0_FORMAT_SHIFT) |
(0 << BRW_VE0_SRC_OFFSET_SHIFT));
if (BRW_IS_IGDNG(brw))
OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
(comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
(comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
(comp3 << BRW_VE1_COMPONENT_3_SHIFT));
else
OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
(comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
(comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
(comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
}
ADVANCE_BATCH();
}
const struct brw_tracked_state brw_vertices = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH | BRW_NEW_VERTICES,
.cache = 0,
},
.prepare = brw_prepare_vertices,
.emit = brw_emit_vertices,
};
static void brw_prepare_indices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
GLuint ib_size;
dri_bo *bo = NULL;
struct gl_buffer_object *bufferobj;
GLuint offset;
GLuint ib_type_size;
if (index_buffer == NULL)
return;
ib_type_size = get_size(index_buffer->type);
ib_size = ib_type_size * index_buffer->count;
bufferobj = index_buffer->obj;;
/* Turn into a proper VBO:
*/
if (!_mesa_is_bufferobj(bufferobj)) {
brw->ib.start_vertex_offset = 0;
/* Get new bufferobj, offset:
*/
get_space(brw, ib_size, &bo, &offset);
/* Straight upload
*/
if (intel->intelScreen->kernel_exec_fencing) {
drm_intel_gem_bo_map_gtt(bo);
memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
drm_intel_gem_bo_unmap_gtt(bo);
} else {
dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
}
} else {
offset = (GLuint) (unsigned long) index_buffer->ptr;
brw->ib.start_vertex_offset = 0;
/* If the index buffer isn't aligned to its element size, we have to
* rebase it into a temporary.
*/
if ((get_size(index_buffer->type) - 1) & offset) {
GLubyte *map = ctx->Driver.MapBuffer(ctx,
GL_ELEMENT_ARRAY_BUFFER_ARB,
GL_DYNAMIC_DRAW_ARB,
bufferobj);
map += offset;
get_space(brw, ib_size, &bo, &offset);
dri_bo_subdata(bo, offset, ib_size, map);
ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
} else {
bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
INTEL_READ);
dri_bo_reference(bo);
/* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
* the index buffer state when we're just moving the start index
* of our drawing.
*/
brw->ib.start_vertex_offset = offset / ib_type_size;
offset = 0;
ib_size = bo->size;
}
}
if (brw->ib.bo != bo ||
brw->ib.offset != offset ||
brw->ib.size != ib_size)
{
drm_intel_bo_unreference(brw->ib.bo);
brw->ib.bo = bo;
brw->ib.offset = offset;
brw->ib.size = ib_size;
brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
} else {
drm_intel_bo_unreference(bo);
}
brw_add_validated_bo(brw, brw->ib.bo);
}
const struct brw_tracked_state brw_indices = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_INDICES,
.cache = 0,
},
.prepare = brw_prepare_indices,
};
static void brw_emit_index_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
if (index_buffer == NULL)
return;
/* Emit the indexbuffer packet:
*/
{
struct brw_indexbuffer ib;
memset(&ib, 0, sizeof(ib));
ib.header.bits.opcode = CMD_INDEX_BUFFER;
ib.header.bits.length = sizeof(ib)/4 - 2;
ib.header.bits.index_format = get_index_type(index_buffer->type);
ib.header.bits.cut_index_enable = 0;
BEGIN_BATCH(4, IGNORE_CLIPRECTS);
OUT_BATCH( ib.header.dword );
OUT_RELOC(brw->ib.bo,
I915_GEM_DOMAIN_VERTEX, 0,
brw->ib.offset);
OUT_RELOC(brw->ib.bo,
I915_GEM_DOMAIN_VERTEX, 0,
brw->ib.offset + brw->ib.size - 1);
OUT_BATCH( 0 );
ADVANCE_BATCH();
}
}
const struct brw_tracked_state brw_index_buffer = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER,
.cache = 0,
},
.emit = brw_emit_index_buffer,
};

View File

@ -0,0 +1,254 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_eu.h"
/* How does predicate control work when execution_size != 8? Do I
* need to test/set for 0xffff when execution_size is 16?
*/
void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value )
{
p->current->header.predicate_control = BRW_PREDICATE_NONE;
if (value != 0xff) {
if (value != p->flag_value) {
brw_push_insn_state(p);
brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
p->flag_value = value;
brw_pop_insn_state(p);
}
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
}
}
void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
{
p->current->header.predicate_control = pc;
}
void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
{
p->current->header.destreg__conditionalmod = conditional;
}
void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
{
p->current->header.access_mode = access_mode;
}
void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control )
{
p->current->header.compression_control = compression_control;
}
void brw_set_mask_control( struct brw_compile *p, GLuint value )
{
p->current->header.mask_control = value;
}
void brw_set_saturate( struct brw_compile *p, GLuint value )
{
p->current->header.saturate = value;
}
void brw_push_insn_state( struct brw_compile *p )
{
assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
p->current++;
}
void brw_pop_insn_state( struct brw_compile *p )
{
assert(p->current != p->stack);
p->current--;
}
/***********************************************************************
*/
void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
{
p->brw = brw;
p->nr_insn = 0;
p->current = p->stack;
memset(p->current, 0, sizeof(p->current[0]));
/* Some defaults?
*/
brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
brw_set_saturate(p, 0);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_predicate_control_flag_value(p, 0xff);
}
const GLuint *brw_get_program( struct brw_compile *p,
GLuint *sz )
{
GLuint i;
for (i = 0; i < 8; i++)
brw_NOP(p);
*sz = p->nr_insn * sizeof(struct brw_instruction);
return (const GLuint *)p->store;
}
/**
* Subroutine calls require special attention.
* Mesa instructions may be expanded into multiple hardware instructions
* so the prog_instruction::BranchTarget field can't be used as an index
* into the hardware instructions.
*
* The BranchTarget field isn't needed, however. Mesa's GLSL compiler
* emits CAL and BGNSUB instructions with labels that can be used to map
* subroutine calls to actual subroutine code blocks.
*
* The structures and function here implement patching of CAL instructions
* so they jump to the right subroutine code...
*/
/**
* For each OPCODE_BGNSUB we create one of these.
*/
struct brw_glsl_label
{
const char *name; /**< the label string */
GLuint position; /**< the position of the brw instruction for this label */
struct brw_glsl_label *next; /**< next in linked list */
};
/**
* For each OPCODE_CAL we create one of these.
*/
struct brw_glsl_call
{
GLuint call_inst_pos; /**< location of the CAL instruction */
const char *sub_name; /**< name of subroutine to call */
struct brw_glsl_call *next; /**< next in linked list */
};
/**
* Called for each OPCODE_BGNSUB.
*/
void
brw_save_label(struct brw_compile *c, const char *name, GLuint position)
{
struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label);
label->name = name;
label->position = position;
label->next = c->first_label;
c->first_label = label;
}
/**
* Called for each OPCODE_CAL.
*/
void
brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos)
{
struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call);
call->call_inst_pos = call_pos;
call->sub_name = name;
call->next = c->first_call;
c->first_call = call;
}
/**
* Lookup a label, return label's position/offset.
*/
static GLuint
brw_lookup_label(struct brw_compile *c, const char *name)
{
const struct brw_glsl_label *label;
for (label = c->first_label; label; label = label->next) {
if (strcmp(name, label->name) == 0) {
return label->position;
}
}
abort(); /* should never happen */
return ~0;
}
/**
* When we're done generating code, this function is called to resolve
* subroutine calls.
*/
void
brw_resolve_cals(struct brw_compile *c)
{
const struct brw_glsl_call *call;
for (call = c->first_call; call; call = call->next) {
const GLuint sub_loc = brw_lookup_label(c, call->sub_name);
struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos];
struct brw_instruction *brw_sub_inst = &c->store[sub_loc];
GLint offset = brw_sub_inst - brw_call_inst;
/* patch brw_inst1 to point to brw_inst2 */
brw_set_src1(brw_call_inst, brw_imm_d(offset * 16));
}
/* free linked list of calls */
{
struct brw_glsl_call *call, *next;
for (call = c->first_call; call; call = next) {
next = call->next;
_mesa_free(call);
}
c->first_call = NULL;
}
/* free linked list of labels */
{
struct brw_glsl_label *label, *next;
for (label = c->first_label; label; label = next) {
next = label->next;
_mesa_free(label);
}
c->first_label = NULL;
}
}

View File

@ -0,0 +1,968 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef BRW_EU_H
#define BRW_EU_H
#include "brw_structs.h"
#include "brw_defines.h"
#include "shader/prog_instruction.h"
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
#define REG_SIZE (8*4)
/* These aren't hardware structs, just something useful for us to pass around:
*
* Align1 operation has a lot of control over input ranges. Used in
* WM programs to implement shaders decomposed into "channel serial"
* or "structure of array" form:
*/
struct brw_reg
{
GLuint type:4;
GLuint file:2;
GLuint nr:8;
GLuint subnr:5; /* :1 in align16 */
GLuint negate:1; /* source only */
GLuint abs:1; /* source only */
GLuint vstride:4; /* source only */
GLuint width:3; /* src only, align1 only */
GLuint hstride:2; /* align1 only */
GLuint address_mode:1; /* relative addressing, hopefully! */
GLuint pad0:1;
union {
struct {
GLuint swizzle:8; /* src only, align16 only */
GLuint writemask:4; /* dest only, align16 only */
GLint indirect_offset:10; /* relative addressing offset */
GLuint pad1:10; /* two dwords total */
} bits;
GLfloat f;
GLint d;
GLuint ud;
} dw1;
};
struct brw_indirect {
GLuint addr_subnr:4;
GLint addr_offset:10;
GLuint pad:18;
};
struct brw_glsl_label;
struct brw_glsl_call;
#define BRW_EU_MAX_INSN_STACK 5
#define BRW_EU_MAX_INSN 10000
struct brw_compile {
struct brw_instruction store[BRW_EU_MAX_INSN];
GLuint nr_insn;
/* Allow clients to push/pop instruction state:
*/
struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
struct brw_instruction *current;
GLuint flag_value;
GLboolean single_program_flow;
struct brw_context *brw;
struct brw_glsl_label *first_label; /**< linked list of labels */
struct brw_glsl_call *first_call; /**< linked list of CALs */
};
void
brw_save_label(struct brw_compile *c, const char *name, GLuint position);
void
brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos);
void
brw_resolve_cals(struct brw_compile *c);
static INLINE int type_sz( GLuint type )
{
switch( type ) {
case BRW_REGISTER_TYPE_UD:
case BRW_REGISTER_TYPE_D:
case BRW_REGISTER_TYPE_F:
return 4;
case BRW_REGISTER_TYPE_HF:
case BRW_REGISTER_TYPE_UW:
case BRW_REGISTER_TYPE_W:
return 2;
case BRW_REGISTER_TYPE_UB:
case BRW_REGISTER_TYPE_B:
return 1;
default:
return 0;
}
}
/**
* Construct a brw_reg.
* \param file one of the BRW_x_REGISTER_FILE values
* \param nr register number/index
* \param subnr register sub number
* \param type one of BRW_REGISTER_TYPE_x
* \param vstride one of BRW_VERTICAL_STRIDE_x
* \param width one of BRW_WIDTH_x
* \param hstride one of BRW_HORIZONTAL_STRIDE_x
* \param swizzle one of BRW_SWIZZLE_x
* \param writemask WRITEMASK_X/Y/Z/W bitfield
*/
static INLINE struct brw_reg brw_reg( GLuint file,
GLuint nr,
GLuint subnr,
GLuint type,
GLuint vstride,
GLuint width,
GLuint hstride,
GLuint swizzle,
GLuint writemask )
{
struct brw_reg reg;
if (type == BRW_GENERAL_REGISTER_FILE)
assert(nr < BRW_MAX_GRF);
else if (type == BRW_MESSAGE_REGISTER_FILE)
assert(nr < BRW_MAX_MRF);
else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
assert(nr <= BRW_ARF_IP);
reg.type = type;
reg.file = file;
reg.nr = nr;
reg.subnr = subnr * type_sz(type);
reg.negate = 0;
reg.abs = 0;
reg.vstride = vstride;
reg.width = width;
reg.hstride = hstride;
reg.address_mode = BRW_ADDRESS_DIRECT;
reg.pad0 = 0;
/* Could do better: If the reg is r5.3<0;1,0>, we probably want to
* set swizzle and writemask to W, as the lower bits of subnr will
* be lost when converted to align16. This is probably too much to
* keep track of as you'd want it adjusted by suboffset(), etc.
* Perhaps fix up when converting to align16?
*/
reg.dw1.bits.swizzle = swizzle;
reg.dw1.bits.writemask = writemask;
reg.dw1.bits.indirect_offset = 0;
reg.dw1.bits.pad1 = 0;
return reg;
}
/** Construct float[16] register */
static INLINE struct brw_reg brw_vec16_reg( GLuint file,
GLuint nr,
GLuint subnr )
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_16,
BRW_WIDTH_16,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
/** Construct float[8] register */
static INLINE struct brw_reg brw_vec8_reg( GLuint file,
GLuint nr,
GLuint subnr )
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_8,
BRW_WIDTH_8,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
/** Construct float[4] register */
static INLINE struct brw_reg brw_vec4_reg( GLuint file,
GLuint nr,
GLuint subnr )
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_4,
BRW_WIDTH_4,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
/** Construct float[2] register */
static INLINE struct brw_reg brw_vec2_reg( GLuint file,
GLuint nr,
GLuint subnr )
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_2,
BRW_WIDTH_2,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYXY,
WRITEMASK_XY);
}
/** Construct float[1] register */
static INLINE struct brw_reg brw_vec1_reg( GLuint file,
GLuint nr,
GLuint subnr )
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_0,
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XXXX,
WRITEMASK_X);
}
static INLINE struct brw_reg retype( struct brw_reg reg,
GLuint type )
{
reg.type = type;
return reg;
}
static INLINE struct brw_reg suboffset( struct brw_reg reg,
GLuint delta )
{
reg.subnr += delta * type_sz(reg.type);
return reg;
}
static INLINE struct brw_reg offset( struct brw_reg reg,
GLuint delta )
{
reg.nr += delta;
return reg;
}
static INLINE struct brw_reg byte_offset( struct brw_reg reg,
GLuint bytes )
{
GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
reg.nr = newoffset / REG_SIZE;
reg.subnr = newoffset % REG_SIZE;
return reg;
}
/** Construct unsigned word[16] register */
static INLINE struct brw_reg brw_uw16_reg( GLuint file,
GLuint nr,
GLuint subnr )
{
return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
/** Construct unsigned word[8] register */
static INLINE struct brw_reg brw_uw8_reg( GLuint file,
GLuint nr,
GLuint subnr )
{
return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
/** Construct unsigned word[1] register */
static INLINE struct brw_reg brw_uw1_reg( GLuint file,
GLuint nr,
GLuint subnr )
{
return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
static INLINE struct brw_reg brw_imm_reg( GLuint type )
{
return brw_reg( BRW_IMMEDIATE_VALUE,
0,
0,
type,
BRW_VERTICAL_STRIDE_0,
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
0,
0);
}
/** Construct float immediate register */
static INLINE struct brw_reg brw_imm_f( GLfloat f )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
imm.dw1.f = f;
return imm;
}
/** Construct integer immediate register */
static INLINE struct brw_reg brw_imm_d( GLint d )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
imm.dw1.d = d;
return imm;
}
/** Construct uint immediate register */
static INLINE struct brw_reg brw_imm_ud( GLuint ud )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
imm.dw1.ud = ud;
return imm;
}
/** Construct ushort immediate register */
static INLINE struct brw_reg brw_imm_uw( GLushort uw )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
imm.dw1.ud = uw | (uw << 16);
return imm;
}
/** Construct short immediate register */
static INLINE struct brw_reg brw_imm_w( GLshort w )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
imm.dw1.d = w | (w << 16);
return imm;
}
/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
* numbers alias with _V and _VF below:
*/
/** Construct vector of eight signed half-byte values */
static INLINE struct brw_reg brw_imm_v( GLuint v )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_8;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = v;
return imm;
}
/** Construct vector of four 8-bit float values */
static INLINE struct brw_reg brw_imm_vf( GLuint v )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_4;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = v;
return imm;
}
#define VF_ZERO 0x0
#define VF_ONE 0x30
#define VF_NEG (1<<7)
static INLINE struct brw_reg brw_imm_vf4( GLuint v0,
GLuint v1,
GLuint v2,
GLuint v3)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_4;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = ((v0 << 0) |
(v1 << 8) |
(v2 << 16) |
(v3 << 24));
return imm;
}
static INLINE struct brw_reg brw_address( struct brw_reg reg )
{
return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
}
/** Construct float[1] general-purpose register */
static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr )
{
return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
/** Construct float[2] general-purpose register */
static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr )
{
return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
/** Construct float[4] general-purpose register */
static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr )
{
return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
/** Construct float[8] general-purpose register */
static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr )
{
return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr )
{
return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr )
{
return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
/** Construct null register (usually used for setting condition codes) */
static INLINE struct brw_reg brw_null_reg( void )
{
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_NULL,
0);
}
static INLINE struct brw_reg brw_address_reg( GLuint subnr )
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_ADDRESS,
subnr);
}
/* If/else instructions break in align16 mode if writemask & swizzle
* aren't xyzw. This goes against the convention for other scalar
* regs:
*/
static INLINE struct brw_reg brw_ip_reg( void )
{
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_IP,
0,
BRW_REGISTER_TYPE_UD,
BRW_VERTICAL_STRIDE_4, /* ? */
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XYZW, /* NOTE! */
WRITEMASK_XYZW); /* NOTE! */
}
static INLINE struct brw_reg brw_acc_reg( void )
{
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_ACCUMULATOR,
0);
}
static INLINE struct brw_reg brw_flag_reg( void )
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_FLAG,
0);
}
static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_MASK,
subnr);
}
static INLINE struct brw_reg brw_message_reg( GLuint nr )
{
assert(nr < BRW_MAX_MRF);
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
nr,
0);
}
/* This is almost always called with a numeric constant argument, so
* make things easy to evaluate at compile time:
*/
static INLINE GLuint cvt( GLuint val )
{
switch (val) {
case 0: return 0;
case 1: return 1;
case 2: return 2;
case 4: return 3;
case 8: return 4;
case 16: return 5;
case 32: return 6;
}
return 0;
}
static INLINE struct brw_reg stride( struct brw_reg reg,
GLuint vstride,
GLuint width,
GLuint hstride )
{
reg.vstride = cvt(vstride);
reg.width = cvt(width) - 1;
reg.hstride = cvt(hstride);
return reg;
}
static INLINE struct brw_reg vec16( struct brw_reg reg )
{
return stride(reg, 16,16,1);
}
static INLINE struct brw_reg vec8( struct brw_reg reg )
{
return stride(reg, 8,8,1);
}
static INLINE struct brw_reg vec4( struct brw_reg reg )
{
return stride(reg, 4,4,1);
}
static INLINE struct brw_reg vec2( struct brw_reg reg )
{
return stride(reg, 2,2,1);
}
static INLINE struct brw_reg vec1( struct brw_reg reg )
{
return stride(reg, 0,1,0);
}
static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt )
{
return vec1(suboffset(reg, elt));
}
static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
{
return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
}
static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
GLuint x,
GLuint y,
GLuint z,
GLuint w)
{
reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
return reg;
}
static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg,
GLuint x )
{
return brw_swizzle(reg, x, x, x, x);
}
static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
GLuint mask )
{
reg.dw1.bits.writemask &= mask;
return reg;
}
static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg,
GLuint mask )
{
reg.dw1.bits.writemask = mask;
return reg;
}
static INLINE struct brw_reg negate( struct brw_reg reg )
{
reg.negate ^= 1;
return reg;
}
static INLINE struct brw_reg brw_abs( struct brw_reg reg )
{
reg.abs = 1;
return reg;
}
/***********************************************************************
*/
static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr,
GLint offset )
{
struct brw_reg reg = brw_vec4_grf(0, 0);
reg.subnr = subnr;
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
reg.dw1.bits.indirect_offset = offset;
return reg;
}
static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr,
GLint offset )
{
struct brw_reg reg = brw_vec1_grf(0, 0);
reg.subnr = subnr;
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
reg.dw1.bits.indirect_offset = offset;
return reg;
}
static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
{
return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
}
static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
{
return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
}
static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
{
return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
}
static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
{
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
}
static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset)
{
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
}
static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset)
{
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
}
static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr)
{
return brw_address_reg(ptr.addr_subnr);
}
static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
{
ptr.addr_offset += offset;
return ptr;
}
static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
{
struct brw_indirect ptr;
ptr.addr_subnr = addr_subnr;
ptr.addr_offset = offset;
ptr.pad = 0;
return ptr;
}
/** Do two brw_regs refer to the same register? */
static INLINE GLboolean
brw_same_reg(struct brw_reg r1, struct brw_reg r2)
{
return r1.file == r2.file && r1.nr == r2.nr;
}
static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
{
return &p->store[p->nr_insn];
}
void brw_pop_insn_state( struct brw_compile *p );
void brw_push_insn_state( struct brw_compile *p );
void brw_set_mask_control( struct brw_compile *p, GLuint value );
void brw_set_saturate( struct brw_compile *p, GLuint value );
void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
void brw_set_compression_control( struct brw_compile *p, GLboolean control );
void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
void brw_init_compile( struct brw_context *, struct brw_compile *p );
const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
/* Helpers for regular instructions:
*/
#define ALU1(OP) \
struct brw_instruction *brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src0);
#define ALU2(OP) \
struct brw_instruction *brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src0, \
struct brw_reg src1);
ALU1(MOV)
ALU2(SEL)
ALU1(NOT)
ALU2(AND)
ALU2(OR)
ALU2(XOR)
ALU2(SHR)
ALU2(SHL)
ALU2(RSR)
ALU2(RSL)
ALU2(ASR)
ALU2(JMPI)
ALU2(ADD)
ALU2(MUL)
ALU1(FRC)
ALU1(RNDD)
ALU1(RNDZ)
ALU2(MAC)
ALU2(MACH)
ALU1(LZD)
ALU2(DP4)
ALU2(DPH)
ALU2(DP3)
ALU2(DP2)
ALU2(LINE)
#undef ALU1
#undef ALU2
/* Helpers for SEND instruction:
*/
void brw_urb_WRITE(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
struct brw_reg src0,
GLboolean allocate,
GLboolean used,
GLuint msg_length,
GLuint response_length,
GLboolean eot,
GLboolean writes_complete,
GLuint offset,
GLuint swizzle);
void brw_ff_sync(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
struct brw_reg src0,
GLboolean allocate,
GLboolean used,
GLuint msg_length,
GLuint response_length,
GLboolean eot,
GLboolean writes_complete,
GLuint offset,
GLuint swizzle);
void brw_fb_WRITE(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
struct brw_reg src0,
GLuint binding_table_index,
GLuint msg_length,
GLuint response_length,
GLboolean eot);
void brw_SAMPLE(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
struct brw_reg src0,
GLuint binding_table_index,
GLuint sampler,
GLuint writemask,
GLuint msg_type,
GLuint response_length,
GLuint msg_length,
GLboolean eot,
GLuint header_present,
GLuint simd_mode);
void brw_math_16( struct brw_compile *p,
struct brw_reg dest,
GLuint function,
GLuint saturate,
GLuint msg_reg_nr,
struct brw_reg src,
GLuint precision );
void brw_math( struct brw_compile *p,
struct brw_reg dest,
GLuint function,
GLuint saturate,
GLuint msg_reg_nr,
struct brw_reg src,
GLuint data_type,
GLuint precision );
void brw_dp_READ_16( struct brw_compile *p,
struct brw_reg dest,
GLuint scratch_offset );
void brw_dp_READ_4( struct brw_compile *p,
struct brw_reg dest,
GLboolean relAddr,
GLuint location,
GLuint bind_table_index );
void brw_dp_READ_4_vs( struct brw_compile *p,
struct brw_reg dest,
GLuint oword,
GLboolean relAddr,
struct brw_reg addrReg,
GLuint location,
GLuint bind_table_index );
void brw_dp_WRITE_16( struct brw_compile *p,
struct brw_reg src,
GLuint scratch_offset );
/* If/else/endif. Works by manipulating the execution flags on each
* channel.
*/
struct brw_instruction *brw_IF(struct brw_compile *p,
GLuint execute_size);
struct brw_instruction *brw_ELSE(struct brw_compile *p,
struct brw_instruction *if_insn);
void brw_ENDIF(struct brw_compile *p,
struct brw_instruction *if_or_else_insn);
/* DO/WHILE loops:
*/
struct brw_instruction *brw_DO(struct brw_compile *p,
GLuint execute_size);
struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *patch_insn);
struct brw_instruction *brw_BREAK(struct brw_compile *p);
struct brw_instruction *brw_CONT(struct brw_compile *p);
/* Forward jumps:
*/
void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *jmp_insn);
void brw_NOP(struct brw_compile *p);
/* Special case: there is never a destination, execution size will be
* taken from src0:
*/
void brw_CMP(struct brw_compile *p,
struct brw_reg dest,
GLuint conditional,
struct brw_reg src0,
struct brw_reg src1);
void brw_print_reg( struct brw_reg reg );
/***********************************************************************
* brw_eu_util.c:
*/
void brw_copy_indirect_to_indirect(struct brw_compile *p,
struct brw_indirect dst_ptr,
struct brw_indirect src_ptr,
GLuint count);
void brw_copy_from_indirect(struct brw_compile *p,
struct brw_reg dst,
struct brw_indirect ptr,
GLuint count);
void brw_copy4(struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src,
GLuint count);
void brw_copy8(struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src,
GLuint count);
void brw_math_invert( struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src);
void brw_set_src1( struct brw_instruction *insn,
struct brw_reg reg );
#endif

View File

@ -0,0 +1,95 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/mtypes.h"
#include "main/imports.h"
#include "brw_eu.h"
void brw_print_reg( struct brw_reg hwreg )
{
static const char *file[] = {
"arf",
"grf",
"msg",
"imm"
};
static const char *type[] = {
"ud",
"d",
"uw",
"w",
"ub",
"vf",
"hf",
"f"
};
_mesa_printf("%s%s",
hwreg.abs ? "abs/" : "",
hwreg.negate ? "-" : "");
if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
hwreg.nr % 2 == 0 &&
hwreg.subnr == 0 &&
hwreg.vstride == BRW_VERTICAL_STRIDE_8 &&
hwreg.width == BRW_WIDTH_8 &&
hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
hwreg.type == BRW_REGISTER_TYPE_F) {
/* vector register */
_mesa_printf("vec%d", hwreg.nr);
}
else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
hwreg.width == BRW_WIDTH_1 &&
hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
hwreg.type == BRW_REGISTER_TYPE_F) {
/* "scalar" register */
_mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
}
else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
_mesa_printf("imm %f", hwreg.dw1.f);
}
else {
_mesa_printf("%s%d.%d<%d;%d,%d>:%s",
file[hwreg.file],
hwreg.nr,
hwreg.subnr / type_sz(hwreg.type),
hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0,
1<<hwreg.width,
hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0,
type[hwreg.type]);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,126 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_eu.h"
void brw_math_invert( struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src)
{
brw_math( p,
dst,
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
0,
src,
BRW_MATH_PRECISION_FULL,
BRW_MATH_DATA_VECTOR );
}
void brw_copy4(struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src,
GLuint count)
{
GLuint i;
dst = vec4(dst);
src = vec4(src);
for (i = 0; i < count; i++)
{
GLuint delta = i*32;
brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
}
}
void brw_copy8(struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src,
GLuint count)
{
GLuint i;
dst = vec8(dst);
src = vec8(src);
for (i = 0; i < count; i++)
{
GLuint delta = i*32;
brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
}
}
void brw_copy_indirect_to_indirect(struct brw_compile *p,
struct brw_indirect dst_ptr,
struct brw_indirect src_ptr,
GLuint count)
{
GLuint i;
for (i = 0; i < count; i++)
{
GLuint delta = i*32;
brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta));
brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16));
}
}
void brw_copy_from_indirect(struct brw_compile *p,
struct brw_reg dst,
struct brw_indirect ptr,
GLuint count)
{
GLuint i;
dst = vec4(dst);
for (i = 0; i < count; i++)
{
GLuint delta = i*32;
brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta));
brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
}
}

View File

@ -0,0 +1,201 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_util.h"
#include "brw_state.h"
#include "brw_gs.h"
static void compile_gs_prog( struct brw_context *brw,
struct brw_gs_prog_key *key )
{
struct brw_gs_compile c;
const GLuint *program;
GLuint program_size;
memset(&c, 0, sizeof(c));
c.key = *key;
c.need_ff_sync = BRW_IS_IGDNG(brw);
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.nr_attrs = brw_count_bits(c.key.attrs);
if (BRW_IS_IGDNG(brw))
c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
else
c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
c.nr_bytes = c.nr_regs * REG_SIZE;
/* Begin the compilation:
*/
brw_init_compile(brw, &c.func);
c.func.single_program_flow = 1;
/* For some reason the thread is spawned with only 4 channels
* unmasked.
*/
brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
/* Note that primitives which don't require a GS program have
* already been weeded out by this stage:
*/
switch (key->primitive) {
case GL_QUADS:
brw_gs_quads( &c );
break;
case GL_QUAD_STRIP:
brw_gs_quad_strip( &c );
break;
case GL_LINE_LOOP:
brw_gs_lines( &c );
break;
case GL_LINES:
if (key->hint_gs_always)
brw_gs_lines( &c );
else {
return;
}
break;
case GL_TRIANGLES:
if (key->hint_gs_always)
brw_gs_tris( &c );
else {
return;
}
break;
case GL_POINTS:
if (key->hint_gs_always)
brw_gs_points( &c );
else {
return;
}
break;
default:
return;
}
/* get the program
*/
program = brw_get_program(&c.func, &program_size);
/* Upload
*/
dri_bo_unreference(brw->gs.prog_bo);
brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG,
&c.key, sizeof(c.key),
NULL, 0,
program, program_size,
&c.prog_data,
&brw->gs.prog_data );
}
static const GLenum gs_prim[GL_POLYGON+1] = {
GL_POINTS,
GL_LINES,
GL_LINE_LOOP,
GL_LINES,
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES,
GL_QUADS,
GL_QUAD_STRIP,
GL_TRIANGLES
};
static void populate_key( struct brw_context *brw,
struct brw_gs_prog_key *key )
{
memset(key, 0, sizeof(*key));
/* CACHE_NEW_VS_PROG */
key->attrs = brw->vs.prog_data->outputs_written;
/* BRW_NEW_PRIMITIVE */
key->primitive = gs_prim[brw->primitive];
key->hint_gs_always = 0; /* debug code? */
key->need_gs_prog = (key->hint_gs_always ||
brw->primitive == GL_QUADS ||
brw->primitive == GL_QUAD_STRIP ||
brw->primitive == GL_LINE_LOOP);
}
/* Calculate interpolants for triangle and line rasterization.
*/
static void prepare_gs_prog(struct brw_context *brw)
{
struct brw_gs_prog_key key;
/* Populate the key:
*/
populate_key(brw, &key);
if (brw->gs.prog_active != key.need_gs_prog) {
brw->state.dirty.cache |= CACHE_NEW_GS_PROG;
brw->gs.prog_active = key.need_gs_prog;
}
if (brw->gs.prog_active) {
dri_bo_unreference(brw->gs.prog_bo);
brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
&key, sizeof(key),
NULL, 0,
&brw->gs.prog_data);
if (brw->gs.prog_bo == NULL)
compile_gs_prog( brw, &key );
}
}
const struct brw_tracked_state brw_gs_prog = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_PRIMITIVE,
.cache = CACHE_NEW_VS_PROG
},
.prepare = prepare_gs_prog
};

View File

@ -0,0 +1,76 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef BRW_GS_H
#define BRW_GS_H
#include "brw_context.h"
#include "brw_eu.h"
#define MAX_GS_VERTS (4)
struct brw_gs_prog_key {
GLuint attrs:32;
GLuint primitive:4;
GLuint hint_gs_always:1;
GLuint need_gs_prog:1;
GLuint pad:26;
};
struct brw_gs_compile {
struct brw_compile func;
struct brw_gs_prog_key key;
struct brw_gs_prog_data prog_data;
struct {
struct brw_reg R0;
struct brw_reg vertex[MAX_GS_VERTS];
} reg;
/* 3 different ways of expressing vertex size:
*/
GLuint nr_attrs;
GLuint nr_regs;
GLuint nr_bytes;
GLboolean need_ff_sync;
};
#define ATTR_SIZE (4*4)
void brw_gs_quads( struct brw_gs_compile *c );
void brw_gs_quad_strip( struct brw_gs_compile *c );
void brw_gs_tris( struct brw_gs_compile *c );
void brw_gs_lines( struct brw_gs_compile *c );
void brw_gs_points( struct brw_gs_compile *c );
#endif

View File

@ -0,0 +1,186 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_util.h"
#include "brw_gs.h"
static void brw_gs_alloc_regs( struct brw_gs_compile *c,
GLuint nr_verts )
{
GLuint i = 0,j;
/* Register usage is static, precompute here:
*/
c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
/* Payload vertices plus space for more generated vertices:
*/
for (j = 0; j < nr_verts; j++) {
c->reg.vertex[j] = brw_vec4_grf(i, 0);
i += c->nr_regs;
}
c->prog_data.urb_read_length = c->nr_regs;
c->prog_data.total_grf = i;
}
static void brw_gs_emit_vue(struct brw_gs_compile *c,
struct brw_reg vert,
GLboolean last,
GLuint header)
{
struct brw_compile *p = &c->func;
GLboolean allocate = !last;
/* Overwrite PrimType and PrimStart in the message header, for
* each vertex in turn:
*/
brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
/* Copy the vertex from vertn into m1..mN+1:
*/
brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);
/* Send each vertex as a seperate write to the urb. This is
* different to the concept in brw_sf_emit.c, where subsequent
* writes are used to build up a single urb entry. Each of these
* writes instantiates a seperate urb entry, and a new one must be
* allocated each time.
*/
brw_urb_WRITE(p,
allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
0,
c->reg.R0,
allocate,
1, /* used */
c->nr_regs + 1, /* msg length */
allocate ? 1 : 0, /* response length */
allocate ? 0 : 1, /* eot */
1, /* writes_complete */
0, /* urb offset */
BRW_URB_SWIZZLE_NONE);
}
static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
{
struct brw_compile *p = &c->func;
brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
brw_ff_sync(p,
c->reg.R0,
0,
c->reg.R0,
1,
1, /* used */
1, /* msg length */
1, /* response length */
0, /* eot */
1, /* write compelete */
0, /* urb offset */
BRW_URB_SWIZZLE_NONE);
}
void brw_gs_quads( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 4);
/* Use polygons for correct edgeflag behaviour. Note that vertex 3
* is the PV for quads, but vertex 0 for polygons:
*/
if (c->need_ff_sync)
brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
}
void brw_gs_quad_strip( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 4);
if (c->need_ff_sync)
brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
}
void brw_gs_tris( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 3);
if (c->need_ff_sync)
brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
}
void brw_gs_lines( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 2);
if (c->need_ff_sync)
brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
}
void brw_gs_points( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 1);
if (c->need_ff_sync)
brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
}

View File

@ -0,0 +1,149 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "main/macros.h"
struct brw_gs_unit_key {
unsigned int total_grf;
unsigned int urb_entry_read_length;
unsigned int curbe_offset;
unsigned int nr_urb_entries, urb_size;
GLboolean prog_active;
};
static void
gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
{
memset(key, 0, sizeof(*key));
/* CACHE_NEW_GS_PROG */
key->prog_active = brw->gs.prog_active;
if (key->prog_active) {
key->total_grf = brw->gs.prog_data->total_grf;
key->urb_entry_read_length = brw->gs.prog_data->urb_read_length;
} else {
key->total_grf = 1;
key->urb_entry_read_length = 1;
}
/* BRW_NEW_CURBE_OFFSETS */
key->curbe_offset = brw->curbe.clip_start;
/* BRW_NEW_URB_FENCE */
key->nr_urb_entries = brw->urb.nr_gs_entries;
key->urb_size = brw->urb.vsize;
}
static dri_bo *
gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
{
struct brw_gs_unit_state gs;
dri_bo *bo;
memset(&gs, 0, sizeof(gs));
gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
if (key->prog_active) /* reloc */
gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
gs.thread1.single_program_flow = 1;
gs.thread3.dispatch_grf_start_reg = 1;
gs.thread3.const_urb_entry_read_offset = 0;
gs.thread3.const_urb_entry_read_length = 0;
gs.thread3.urb_entry_read_offset = 0;
gs.thread3.urb_entry_read_length = key->urb_entry_read_length;
gs.thread4.nr_urb_entries = key->nr_urb_entries;
gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
if (key->nr_urb_entries >= 8)
gs.thread4.max_threads = 1;
else
gs.thread4.max_threads = 0;
if (BRW_IS_IGDNG(brw))
gs.thread4.rendering_enable = 1;
if (INTEL_DEBUG & DEBUG_STATS)
gs.thread4.stats_enable = 1;
bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
key, sizeof(*key),
&brw->gs.prog_bo, 1,
&gs, sizeof(gs),
NULL, NULL);
if (key->prog_active) {
/* Emit GS program relocation */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
gs.thread0.grf_reg_count << 1,
offsetof(struct brw_gs_unit_state, thread0),
brw->gs.prog_bo);
}
return bo;
}
static void prepare_gs_unit(struct brw_context *brw)
{
struct brw_gs_unit_key key;
gs_unit_populate_key(brw, &key);
dri_bo_unreference(brw->gs.state_bo);
brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT,
&key, sizeof(key),
&brw->gs.prog_bo, 1,
NULL);
if (brw->gs.state_bo == NULL) {
brw->gs.state_bo = gs_unit_create_from_key(brw, &key);
}
}
const struct brw_tracked_state brw_gs_unit = {
.dirty = {
.mesa = 0,
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_GS_PROG
},
.prepare = prepare_gs_unit,
};

View File

@ -0,0 +1,545 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "intel_batchbuffer.h"
#include "intel_regions.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
/***********************************************************************
* Blend color
*/
static void upload_blend_constant_color(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_blend_constant_color bcc;
memset(&bcc, 0, sizeof(bcc));
bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
bcc.header.length = sizeof(bcc)/4-2;
bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
bcc.blend_constant_color[2] = ctx->Color.BlendColor[2];
bcc.blend_constant_color[3] = ctx->Color.BlendColor[3];
BRW_CACHED_BATCH_STRUCT(brw, &bcc);
}
const struct brw_tracked_state brw_blend_constant_color = {
.dirty = {
.mesa = _NEW_COLOR,
.brw = 0,
.cache = 0
},
.emit = upload_blend_constant_color
};
/* Constant single cliprect for framebuffer object or DRI2 drawing */
static void upload_drawing_rect(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
GLcontext *ctx = &intel->ctx;
if (!intel->constant_cliprect)
return;
BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
OUT_BATCH(0); /* xmin, ymin */
OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
((ctx->DrawBuffer->Height - 1) << 16));
OUT_BATCH(0);
ADVANCE_BATCH();
}
const struct brw_tracked_state brw_drawing_rect = {
.dirty = {
.mesa = _NEW_BUFFERS,
.brw = 0,
.cache = 0
},
.emit = upload_drawing_rect
};
static void prepare_binding_table_pointers(struct brw_context *brw)
{
brw_add_validated_bo(brw, brw->vs.bind_bo);
brw_add_validated_bo(brw, brw->wm.bind_bo);
}
/**
* Upload the binding table pointers, which point each stage's array of surface
* state pointers.
*
* The binding table pointers are relative to the surface state base address,
* which is 0.
*/
static void upload_binding_table_pointers(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(6, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
if (brw->vs.bind_bo != NULL)
OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
else
OUT_BATCH(0);
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
ADVANCE_BATCH();
}
const struct brw_tracked_state brw_binding_table_pointers = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH,
.cache = CACHE_NEW_SURF_BIND,
},
.prepare = prepare_binding_table_pointers,
.emit = upload_binding_table_pointers,
};
/**
* Upload pointers to the per-stage state.
*
* The state pointers in this packet are all relative to the general state
* base address set by CMD_STATE_BASE_ADDRESS, which is 0.
*/
static void upload_pipelined_state_pointers(struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(7, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
if (brw->gs.prog_active)
OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
else
OUT_BATCH(0);
OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
ADVANCE_BATCH();
brw->state.dirty.brw |= BRW_NEW_PSP;
}
static void prepare_psp_urb_cbs(struct brw_context *brw)
{
brw_add_validated_bo(brw, brw->vs.state_bo);
brw_add_validated_bo(brw, brw->gs.state_bo);
brw_add_validated_bo(brw, brw->clip.state_bo);
brw_add_validated_bo(brw, brw->sf.state_bo);
brw_add_validated_bo(brw, brw->wm.state_bo);
brw_add_validated_bo(brw, brw->cc.state_bo);
}
static void upload_psp_urb_cbs(struct brw_context *brw )
{
upload_pipelined_state_pointers(brw);
brw_upload_urb_fence(brw);
brw_upload_cs_urb_state(brw);
}
const struct brw_tracked_state brw_psp_urb_cbs = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH,
.cache = (CACHE_NEW_VS_UNIT |
CACHE_NEW_GS_UNIT |
CACHE_NEW_GS_PROG |
CACHE_NEW_CLIP_UNIT |
CACHE_NEW_SF_UNIT |
CACHE_NEW_WM_UNIT |
CACHE_NEW_CC_UNIT)
},
.prepare = prepare_psp_urb_cbs,
.emit = upload_psp_urb_cbs,
};
static void prepare_depthbuffer(struct brw_context *brw)
{
struct intel_region *region = brw->state.depth_region;
if (region != NULL)
brw_add_validated_bo(brw, region->buffer);
}
static void emit_depthbuffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct intel_region *region = brw->state.depth_region;
unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
if (region == NULL) {
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
(BRW_SURFACE_NULL << 29));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
unsigned int format;
switch (region->cpp) {
case 2:
format = BRW_DEPTHFORMAT_D16_UNORM;
break;
case 4:
if (intel->depth_buffer_is_float)
format = BRW_DEPTHFORMAT_D32_FLOAT;
else
format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
break;
default:
assert(0);
return;
}
assert(region->tiling != I915_TILING_X);
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH(((region->pitch * region->cpp) - 1) |
(format << 18) |
(BRW_TILEWALK_YMAJOR << 26) |
((region->tiling != I915_TILING_NONE) << 27) |
(BRW_SURFACE_2D << 29));
OUT_RELOC(region->buffer,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
0);
OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
((region->pitch - 1) << 6) |
((region->height - 1) << 19));
OUT_BATCH(0);
if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
OUT_BATCH(0);
ADVANCE_BATCH();
}
}
const struct brw_tracked_state brw_depthbuffer = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH,
.cache = 0,
},
.prepare = prepare_depthbuffer,
.emit = emit_depthbuffer,
};
/***********************************************************************
* Polygon stipple packet
*/
static void upload_polygon_stipple(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_polygon_stipple bps;
GLuint i;
memset(&bps, 0, sizeof(bps));
bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
bps.header.length = sizeof(bps)/4-2;
/* Polygon stipple is provided in OpenGL order, i.e. bottom
* row first. If we're rendering to a window (i.e. the
* default frame buffer object, 0), then we need to invert
* it to match our pixel layout. But if we're rendering
* to a FBO (i.e. any named frame buffer object), we *don't*
* need to invert - we already match the layout.
*/
if (ctx->DrawBuffer->Name == 0) {
for (i = 0; i < 32; i++)
bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */
}
else {
for (i = 0; i < 32; i++)
bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */
}
BRW_CACHED_BATCH_STRUCT(brw, &bps);
}
const struct brw_tracked_state brw_polygon_stipple = {
.dirty = {
.mesa = _NEW_POLYGONSTIPPLE,
.brw = 0,
.cache = 0
},
.emit = upload_polygon_stipple
};
/***********************************************************************
* Polygon stipple offset packet
*/
static void upload_polygon_stipple_offset(struct brw_context *brw)
{
__DRIdrawablePrivate *dPriv = brw->intel.driDrawable;
struct brw_polygon_stipple_offset bpso;
memset(&bpso, 0, sizeof(bpso));
bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
bpso.header.length = sizeof(bpso)/4-2;
/* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
* we have to invert the Y axis in order to match the OpenGL
* pixel coordinate system, and our offset must be matched
* to the window position. If we're drawing to a FBO
* (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
* system works just fine, and there's no window system to
* worry about.
*/
if (brw->intel.ctx.DrawBuffer->Name == 0) {
bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31;
bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31;
}
else {
bpso.bits0.y_offset = 0;
bpso.bits0.x_offset = 0;
}
BRW_CACHED_BATCH_STRUCT(brw, &bpso);
}
#define _NEW_WINDOW_POS 0x40000000
const struct brw_tracked_state brw_polygon_stipple_offset = {
.dirty = {
.mesa = _NEW_WINDOW_POS,
.brw = 0,
.cache = 0
},
.emit = upload_polygon_stipple_offset
};
/**********************************************************************
* AA Line parameters
*/
static void upload_aa_line_parameters(struct brw_context *brw)
{
struct brw_aa_line_parameters balp;
if (BRW_IS_965(brw))
return;
/* use legacy aa line coverage computation */
memset(&balp, 0, sizeof(balp));
balp.header.opcode = CMD_AA_LINE_PARAMETERS;
balp.header.length = sizeof(balp) / 4 - 2;
BRW_CACHED_BATCH_STRUCT(brw, &balp);
}
const struct brw_tracked_state brw_aa_line_parameters = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_aa_line_parameters
};
/***********************************************************************
* Line stipple packet
*/
static void upload_line_stipple(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_line_stipple bls;
GLfloat tmp;
GLint tmpi;
memset(&bls, 0, sizeof(bls));
bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
bls.header.length = sizeof(bls)/4 - 2;
bls.bits0.pattern = ctx->Line.StipplePattern;
bls.bits1.repeat_count = ctx->Line.StippleFactor;
tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
tmpi = tmp * (1<<13);
bls.bits1.inverse_repeat_count = tmpi;
BRW_CACHED_BATCH_STRUCT(brw, &bls);
}
const struct brw_tracked_state brw_line_stipple = {
.dirty = {
.mesa = _NEW_LINE,
.brw = 0,
.cache = 0
},
.emit = upload_line_stipple
};
/***********************************************************************
* Misc invarient state packets
*/
static void upload_invarient_state( struct brw_context *brw )
{
{
/* 0x61040000 Pipeline Select */
/* PipelineSelect : 0 */
struct brw_pipeline_select ps;
memset(&ps, 0, sizeof(ps));
ps.header.opcode = CMD_PIPELINE_SELECT(brw);
ps.header.pipeline_select = 0;
BRW_BATCH_STRUCT(brw, &ps);
}
{
struct brw_global_depth_offset_clamp gdo;
memset(&gdo, 0, sizeof(gdo));
/* Disable depth offset clamping.
*/
gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
gdo.header.length = sizeof(gdo)/4 - 2;
gdo.depth_offset_clamp = 0.0;
BRW_BATCH_STRUCT(brw, &gdo);
}
/* 0x61020000 State Instruction Pointer */
{
struct brw_system_instruction_pointer sip;
memset(&sip, 0, sizeof(sip));
sip.header.opcode = CMD_STATE_INSN_POINTER;
sip.header.length = 0;
sip.bits0.pad = 0;
sip.bits0.system_instruction_pointer = 0;
BRW_BATCH_STRUCT(brw, &sip);
}
{
struct brw_vf_statistics vfs;
memset(&vfs, 0, sizeof(vfs));
vfs.opcode = CMD_VF_STATISTICS(brw);
if (INTEL_DEBUG & DEBUG_STATS)
vfs.statistics_enable = 1;
BRW_BATCH_STRUCT(brw, &vfs);
}
}
const struct brw_tracked_state brw_invarient_state = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_invarient_state
};
/**
* Define the base addresses which some state is referenced from.
*
* This allows us to avoid having to emit relocations in many places for
* cached state, and instead emit pointers inside of large, mostly-static
* state pools. This comes at the expense of memory, and more expensive cache
* misses.
*/
static void upload_state_base_address( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
/* Output the structure (brw_state_base_address) directly to the
* batchbuffer, so we can emit relocations inline.
*/
if (BRW_IS_IGDNG(brw)) {
BEGIN_BATCH(8, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
OUT_BATCH(1); /* General state base address */
OUT_BATCH(1); /* Surface state base address */
OUT_BATCH(1); /* Indirect object base address */
OUT_BATCH(1); /* Instruction base address */
OUT_BATCH(1); /* General state upper bound */
OUT_BATCH(1); /* Indirect object upper bound */
OUT_BATCH(1); /* Instruction access upper bound */
ADVANCE_BATCH();
} else {
BEGIN_BATCH(6, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
OUT_BATCH(1); /* General state base address */
OUT_BATCH(1); /* Surface state base address */
OUT_BATCH(1); /* Indirect object base address */
OUT_BATCH(1); /* General state upper bound */
OUT_BATCH(1); /* Indirect object upper bound */
ADVANCE_BATCH();
}
}
const struct brw_tracked_state brw_state_base_address = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_CONTEXT,
.cache = 0,
},
.emit = upload_state_base_address
};

View File

@ -0,0 +1,166 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/imports.h"
#include "main/enums.h"
#include "shader/prog_parameter.h"
#include "shader/program.h"
#include "shader/programopt.h"
#include "tnl/tnl.h"
#include "brw_context.h"
#include "brw_util.h"
#include "brw_wm.h"
static void brwBindProgram( GLcontext *ctx,
GLenum target,
struct gl_program *prog )
{
struct brw_context *brw = brw_context(ctx);
switch (target) {
case GL_VERTEX_PROGRAM_ARB:
brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
break;
case GL_FRAGMENT_PROGRAM_ARB:
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
break;
}
}
static struct gl_program *brwNewProgram( GLcontext *ctx,
GLenum target,
GLuint id )
{
struct brw_context *brw = brw_context(ctx);
switch (target) {
case GL_VERTEX_PROGRAM_ARB: {
struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
if (prog) {
prog->id = brw->program_id++;
return _mesa_init_vertex_program( ctx, &prog->program,
target, id );
}
else
return NULL;
}
case GL_FRAGMENT_PROGRAM_ARB: {
struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
if (prog) {
prog->id = brw->program_id++;
return _mesa_init_fragment_program( ctx, &prog->program,
target, id );
}
else
return NULL;
}
default:
return _mesa_new_program(ctx, target, id);
}
}
static void brwDeleteProgram( GLcontext *ctx,
struct gl_program *prog )
{
if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
dri_bo_unreference(brw_fprog->const_buffer);
}
_mesa_delete_program( ctx, prog );
}
static GLboolean brwIsProgramNative( GLcontext *ctx,
GLenum target,
struct gl_program *prog )
{
return GL_TRUE;
}
static void brwProgramStringNotify( GLcontext *ctx,
GLenum target,
struct gl_program *prog )
{
struct brw_context *brw = brw_context(ctx);
if (target == GL_FRAGMENT_PROGRAM_ARB) {
struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
struct brw_fragment_program *newFP = brw_fragment_program(fprog);
const struct brw_fragment_program *curFP =
brw_fragment_program_const(brw->fragment_program);
if (fprog->FogOption) {
_mesa_append_fog_code(ctx, fprog);
fprog->FogOption = GL_NONE;
}
if (newFP == curFP)
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
newFP->id = brw->program_id++;
newFP->isGLSL = brw_wm_is_glsl(fprog);
}
else if (target == GL_VERTEX_PROGRAM_ARB) {
struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
struct brw_vertex_program *newVP = brw_vertex_program(vprog);
const struct brw_vertex_program *curVP =
brw_vertex_program_const(brw->vertex_program);
if (newVP == curVP)
brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
if (newVP->program.IsPositionInvariant) {
_mesa_insert_mvp_code(ctx, &newVP->program);
}
newVP->id = brw->program_id++;
/* Also tell tnl about it:
*/
_tnl_program_string(ctx, target, prog);
}
}
void brwInitFragProgFuncs( struct dd_function_table *functions )
{
assert(functions->ProgramStringNotify == _tnl_program_string);
functions->BindProgram = brwBindProgram;
functions->NewProgram = brwNewProgram;
functions->DeleteProgram = brwDeleteProgram;
functions->IsProgramNative = brwIsProgramNative;
functions->ProgramStringNotify = brwProgramStringNotify;
}

View File

@ -0,0 +1,254 @@
/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
*
*/
/** @file support for ARB_query_object
*
* ARB_query_object is implemented by using the PIPE_CONTROL command to stall
* execution on the completion of previous depth tests, and write the
* current PS_DEPTH_COUNT to a buffer object.
*
* We use before and after counts when drawing during a query so that
* we don't pick up other clients' query data in ours. To reduce overhead,
* a single BO is used to record the query data for all active queries at
* once. This also gives us a simple bound on how much batchbuffer space is
* required for handling queries, so that we can be sure that we won't
* have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
*/
#include "main/simple_list.h"
#include "main/imports.h"
#include "brw_context.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
#include "intel_reg.h"
/** Waits on the query object's BO and totals the results for this query */
static void
brw_queryobj_get_results(struct brw_query_object *query)
{
int i;
uint64_t *results;
if (query->bo == NULL)
return;
/* Map and count the pixels from the current query BO */
dri_bo_map(query->bo, GL_FALSE);
results = query->bo->virtual;
for (i = query->first_index; i <= query->last_index; i++) {
query->Base.Result += results[i * 2 + 1] - results[i * 2];
}
dri_bo_unmap(query->bo);
dri_bo_unreference(query->bo);
query->bo = NULL;
}
static struct gl_query_object *
brw_new_query_object(GLcontext *ctx, GLuint id)
{
struct brw_query_object *query;
query = _mesa_calloc(sizeof(struct brw_query_object));
query->Base.Id = id;
query->Base.Result = 0;
query->Base.Active = GL_FALSE;
query->Base.Ready = GL_TRUE;
return &query->Base;
}
static void
brw_delete_query(GLcontext *ctx, struct gl_query_object *q)
{
struct brw_query_object *query = (struct brw_query_object *)q;
dri_bo_unreference(query->bo);
_mesa_free(query);
}
static void
brw_begin_query(GLcontext *ctx, struct gl_query_object *q)
{
struct brw_context *brw = brw_context(ctx);
struct intel_context *intel = intel_context(ctx);
struct brw_query_object *query = (struct brw_query_object *)q;
/* Reset our driver's tracking of query state. */
dri_bo_unreference(query->bo);
query->bo = NULL;
query->first_index = -1;
query->last_index = -1;
insert_at_head(&brw->query.active_head, query);
intel->stats_wm++;
}
/**
* Begin the ARB_occlusion_query query on a query object.
*/
static void
brw_end_query(GLcontext *ctx, struct gl_query_object *q)
{
struct brw_context *brw = brw_context(ctx);
struct intel_context *intel = intel_context(ctx);
struct brw_query_object *query = (struct brw_query_object *)q;
/* Flush the batchbuffer in case it has writes to our query BO.
* Have later queries write to a new query BO so that further rendering
* doesn't delay the collection of our results.
*/
if (query->bo) {
brw_emit_query_end(brw);
intel_batchbuffer_flush(intel->batch);
dri_bo_unreference(brw->query.bo);
brw->query.bo = NULL;
}
remove_from_list(query);
intel->stats_wm--;
}
static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q)
{
struct brw_query_object *query = (struct brw_query_object *)q;
brw_queryobj_get_results(query);
query->Base.Ready = GL_TRUE;
}
static void brw_check_query(GLcontext *ctx, struct gl_query_object *q)
{
struct brw_query_object *query = (struct brw_query_object *)q;
if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
brw_queryobj_get_results(query);
query->Base.Ready = GL_TRUE;
}
}
/** Called to set up the query BO and account for its aperture space */
void
brw_prepare_query_begin(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
/* Skip if we're not doing any queries. */
if (is_empty_list(&brw->query.active_head))
return;
/* Get a new query BO if we're going to need it. */
if (brw->query.bo == NULL ||
brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
dri_bo_unreference(brw->query.bo);
brw->query.bo = NULL;
brw->query.bo = dri_bo_alloc(intel->bufmgr, "query", 4096, 1);
brw->query.index = 0;
}
brw_add_validated_bo(brw, brw->query.bo);
}
/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
void
brw_emit_query_begin(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct brw_query_object *query;
/* Skip if we're not doing any queries, or we've emitted the start. */
if (brw->query.active || is_empty_list(&brw->query.active_head))
return;
BEGIN_BATCH(4, IGNORE_CLIPRECTS);
OUT_BATCH(_3DSTATE_PIPE_CONTROL |
PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_WRITE_DEPTH_COUNT);
/* This object could be mapped cacheable, but we don't have an exposed
* mechanism to support that. Since it's going uncached, tell GEM that
* we're writing to it. The usual clflush should be all that's required
* to pick up the results.
*/
OUT_RELOC(brw->query.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
PIPE_CONTROL_GLOBAL_GTT_WRITE |
((brw->query.index * 2) * sizeof(uint64_t)));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
foreach(query, &brw->query.active_head) {
if (query->bo != brw->query.bo) {
if (query->bo != NULL)
brw_queryobj_get_results(query);
dri_bo_reference(brw->query.bo);
query->bo = brw->query.bo;
query->first_index = brw->query.index;
}
query->last_index = brw->query.index;
}
brw->query.active = GL_TRUE;
}
/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */
void
brw_emit_query_end(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
if (!brw->query.active)
return;
BEGIN_BATCH(4, IGNORE_CLIPRECTS);
OUT_BATCH(_3DSTATE_PIPE_CONTROL |
PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_WRITE_DEPTH_COUNT);
OUT_RELOC(brw->query.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
PIPE_CONTROL_GLOBAL_GTT_WRITE |
((brw->query.index * 2 + 1) * sizeof(uint64_t)));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
brw->query.active = GL_FALSE;
brw->query.index++;
}
void brw_init_queryobj_functions(struct dd_function_table *functions)
{
functions->NewQueryObject = brw_new_query_object;
functions->DeleteQuery = brw_delete_query;
functions->BeginQuery = brw_begin_query;
functions->EndQuery = brw_end_query;
functions->CheckQuery = brw_check_query;
functions->WaitQuery = brw_wait_query;
}

View File

@ -0,0 +1,200 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_util.h"
#include "brw_sf.h"
#include "brw_state.h"
static void compile_sf_prog( struct brw_context *brw,
struct brw_sf_prog_key *key )
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_sf_compile c;
const GLuint *program;
GLuint program_size;
GLuint i, idx;
memset(&c, 0, sizeof(c));
/* Begin the compilation:
*/
brw_init_compile(brw, &c.func);
c.key = *key;
c.nr_attrs = brw_count_bits(c.key.attrs);
c.nr_attr_regs = (c.nr_attrs+1)/2;
c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
c.prog_data.urb_read_length = c.nr_attr_regs;
c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
/* Construct map from attribute number to position in the vertex.
*/
for (i = idx = 0; i < VERT_RESULT_MAX; i++)
if (c.key.attrs & (1<<i)) {
c.attr_to_idx[i] = idx;
c.idx_to_attr[idx] = i;
if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
c.point_attrs[i].CoordReplace =
ctx->Point.CoordReplace[i - VERT_RESULT_TEX0];
}
else {
c.point_attrs[i].CoordReplace = GL_FALSE;
}
idx++;
}
/* Which primitive? Or all three?
*/
switch (key->primitive) {
case SF_TRIANGLES:
c.nr_verts = 3;
brw_emit_tri_setup( &c, GL_TRUE );
break;
case SF_LINES:
c.nr_verts = 2;
brw_emit_line_setup( &c, GL_TRUE );
break;
case SF_POINTS:
c.nr_verts = 1;
if (key->do_point_sprite)
brw_emit_point_sprite_setup( &c, GL_TRUE );
else
brw_emit_point_setup( &c, GL_TRUE );
break;
case SF_UNFILLED_TRIS:
c.nr_verts = 3;
brw_emit_anyprim_setup( &c );
break;
default:
assert(0);
return;
}
/* get the program
*/
program = brw_get_program(&c.func, &program_size);
/* Upload
*/
dri_bo_unreference(brw->sf.prog_bo);
brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG,
&c.key, sizeof(c.key),
NULL, 0,
program, program_size,
&c.prog_data,
&brw->sf.prog_data );
}
/* Calculate interpolants for triangle and line rasterization.
*/
static void upload_sf_prog(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_sf_prog_key key;
memset(&key, 0, sizeof(key));
/* Populate the key, noting state dependencies:
*/
/* CACHE_NEW_VS_PROG */
key.attrs = brw->vs.prog_data->outputs_written;
/* BRW_NEW_REDUCED_PRIMITIVE */
switch (brw->intel.reduced_primitive) {
case GL_TRIANGLES:
/* NOTE: We just use the edgeflag attribute as an indicator that
* unfilled triangles are active. We don't actually do the
* edgeflag testing here, it is already done in the clip
* program.
*/
if (key.attrs & (1<<VERT_RESULT_EDGE))
key.primitive = SF_UNFILLED_TRIS;
else
key.primitive = SF_TRIANGLES;
break;
case GL_LINES:
key.primitive = SF_LINES;
break;
case GL_POINTS:
key.primitive = SF_POINTS;
break;
}
key.do_point_sprite = ctx->Point.PointSprite;
key.SpriteOrigin = ctx->Point.SpriteOrigin;
/* _NEW_LIGHT */
key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
/* _NEW_HINT */
key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
/* _NEW_POLYGON */
if (key.do_twoside_color) {
/* If we're rendering to a FBO, we have to invert the polygon
* face orientation, just as we invert the viewport in
* sf_unit_create_from_key(). ctx->DrawBuffer->Name will be
* nonzero if we're rendering to such an FBO.
*/
key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0);
}
dri_bo_unreference(brw->sf.prog_bo);
brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG,
&key, sizeof(key),
NULL, 0,
&brw->sf.prog_data);
if (brw->sf.prog_bo == NULL)
compile_sf_prog( brw, &key );
}
const struct brw_tracked_state brw_sf_prog = {
.dirty = {
.mesa = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT),
.brw = (BRW_NEW_REDUCED_PRIMITIVE),
.cache = CACHE_NEW_VS_PROG
},
.prepare = upload_sf_prog
};

View File

@ -0,0 +1,113 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef BRW_SF_H
#define BRW_SF_H
#include "shader/program.h"
#include "brw_context.h"
#include "brw_eu.h"
#define SF_POINTS 0
#define SF_LINES 1
#define SF_TRIANGLES 2
#define SF_UNFILLED_TRIS 3
struct brw_sf_prog_key {
GLuint attrs:32;
GLuint primitive:2;
GLuint do_twoside_color:1;
GLuint do_flat_shading:1;
GLuint frontface_ccw:1;
GLuint do_point_sprite:1;
GLuint linear_color:1; /**< linear interp vs. perspective interp */
GLuint pad:25;
GLenum SpriteOrigin;
};
struct brw_sf_point_tex {
GLboolean CoordReplace;
};
struct brw_sf_compile {
struct brw_compile func;
struct brw_sf_prog_key key;
struct brw_sf_prog_data prog_data;
struct brw_reg pv;
struct brw_reg det;
struct brw_reg dx0;
struct brw_reg dx2;
struct brw_reg dy0;
struct brw_reg dy2;
/* z and 1/w passed in seperately:
*/
struct brw_reg z[3];
struct brw_reg inv_w[3];
/* The vertices:
*/
struct brw_reg vert[3];
/* Temporaries, allocated after last vertex reg.
*/
struct brw_reg inv_det;
struct brw_reg a1_sub_a0;
struct brw_reg a2_sub_a0;
struct brw_reg tmp;
struct brw_reg m1Cx;
struct brw_reg m2Cy;
struct brw_reg m3C0;
GLuint nr_verts;
GLuint nr_attrs;
GLuint nr_attr_regs;
GLuint nr_setup_attrs;
GLuint nr_setup_regs;
GLubyte attr_to_idx[VERT_RESULT_MAX];
GLubyte idx_to_attr[VERT_RESULT_MAX];
struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX];
};
void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate );
void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate );
void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate );
void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate );
void brw_emit_anyprim_setup( struct brw_sf_compile *c );
#endif

View File

@ -0,0 +1,739 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_util.h"
#include "brw_sf.h"
static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
struct brw_reg vert,
GLuint attr)
{
GLuint off = c->attr_to_idx[attr] / 2;
GLuint sub = c->attr_to_idx[attr] % 2;
return brw_vec4_grf(vert.nr + off, sub * 4);
}
static GLboolean have_attr(struct brw_sf_compile *c,
GLuint attr)
{
return (c->key.attrs & (1<<attr)) ? 1 : 0;
}
/***********************************************************************
* Twoside lighting
*/
static void copy_bfc( struct brw_sf_compile *c,
struct brw_reg vert )
{
struct brw_compile *p = &c->func;
GLuint i;
for (i = 0; i < 2; i++) {
if (have_attr(c, VERT_RESULT_COL0+i) &&
have_attr(c, VERT_RESULT_BFC0+i))
brw_MOV(p,
get_vert_attr(c, vert, VERT_RESULT_COL0+i),
get_vert_attr(c, vert, VERT_RESULT_BFC0+i));
}
}
static void do_twoside_color( struct brw_sf_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_instruction *if_insn;
GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
/* Already done in clip program:
*/
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
/* XXX: What happens if BFC isn't present? This could only happen
* for user-supplied vertex programs, as t_vp_build.c always does
* the right thing.
*/
if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
!(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
return;
/* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
* to get all channels active inside the IF. In the clipping code
* we run with NoMask, so it's not an option and we can use
* BRW_EXECUTE_1 for all comparisions.
*/
brw_push_insn_state(p);
brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
if_insn = brw_IF(p, BRW_EXECUTE_4);
{
switch (c->nr_verts) {
case 3: copy_bfc(c, c->vert[2]);
case 2: copy_bfc(c, c->vert[1]);
case 1: copy_bfc(c, c->vert[0]);
}
}
brw_ENDIF(p, if_insn);
brw_pop_insn_state(p);
}
/***********************************************************************
* Flat shading
*/
#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
(1<<VERT_RESULT_COL1))
static void copy_colors( struct brw_sf_compile *c,
struct brw_reg dst,
struct brw_reg src)
{
struct brw_compile *p = &c->func;
GLuint i;
for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
if (have_attr(c,i))
brw_MOV(p,
get_vert_attr(c, dst, i),
get_vert_attr(c, src, i));
}
}
/* Need to use a computed jump to copy flatshaded attributes as the
* vertices are ordered according to y-coordinate before reaching this
* point, so the PV could be anywhere.
*/
static void do_flatshade_triangle( struct brw_sf_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_reg ip = brw_ip_reg();
GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
GLuint jmpi = 1;
if (!nr)
return;
/* Already done in clip program:
*/
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
if (BRW_IS_IGDNG(p->brw))
jmpi = 2;
brw_push_insn_state(p);
brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
brw_JMPI(p, ip, ip, c->pv);
copy_colors(c, c->vert[1], c->vert[0]);
copy_colors(c, c->vert[2], c->vert[0]);
brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
copy_colors(c, c->vert[0], c->vert[1]);
copy_colors(c, c->vert[2], c->vert[1]);
brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
copy_colors(c, c->vert[0], c->vert[2]);
copy_colors(c, c->vert[1], c->vert[2]);
brw_pop_insn_state(p);
}
static void do_flatshade_line( struct brw_sf_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_reg ip = brw_ip_reg();
GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
GLuint jmpi = 1;
if (!nr)
return;
/* Already done in clip program:
*/
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
if (BRW_IS_IGDNG(p->brw))
jmpi = 2;
brw_push_insn_state(p);
brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
brw_JMPI(p, ip, ip, c->pv);
copy_colors(c, c->vert[1], c->vert[0]);
brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
copy_colors(c, c->vert[0], c->vert[1]);
brw_pop_insn_state(p);
}
/***********************************************************************
* Triangle setup.
*/
static void alloc_regs( struct brw_sf_compile *c )
{
GLuint reg, i;
/* Values computed by fixed function unit:
*/
c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
c->det = brw_vec1_grf(1, 2);
c->dx0 = brw_vec1_grf(1, 3);
c->dx2 = brw_vec1_grf(1, 4);
c->dy0 = brw_vec1_grf(1, 5);
c->dy2 = brw_vec1_grf(1, 6);
/* z and 1/w passed in seperately:
*/
c->z[0] = brw_vec1_grf(2, 0);
c->inv_w[0] = brw_vec1_grf(2, 1);
c->z[1] = brw_vec1_grf(2, 2);
c->inv_w[1] = brw_vec1_grf(2, 3);
c->z[2] = brw_vec1_grf(2, 4);
c->inv_w[2] = brw_vec1_grf(2, 5);
/* The vertices:
*/
reg = 3;
for (i = 0; i < c->nr_verts; i++) {
c->vert[i] = brw_vec8_grf(reg, 0);
reg += c->nr_attr_regs;
}
/* Temporaries, allocated after last vertex reg.
*/
c->inv_det = brw_vec1_grf(reg, 0); reg++;
c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;
c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;
c->tmp = brw_vec8_grf(reg, 0); reg++;
/* Note grf allocation:
*/
c->prog_data.total_grf = reg;
/* Outputs of this program - interpolation coefficients for
* rasterization:
*/
c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
}
static void copy_z_inv_w( struct brw_sf_compile *c )
{
struct brw_compile *p = &c->func;
GLuint i;
brw_push_insn_state(p);
/* Copy both scalars with a single MOV:
*/
for (i = 0; i < c->nr_verts; i++)
brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
brw_pop_insn_state(p);
}
static void invert_det( struct brw_sf_compile *c)
{
/* Looks like we invert all 8 elements just to get 1/det in
* position 2 !?!
*/
brw_math(&c->func,
c->inv_det,
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
0,
c->det,
BRW_MATH_DATA_SCALAR,
BRW_MATH_PRECISION_FULL);
}
static GLboolean calculate_masks( struct brw_sf_compile *c,
GLuint reg,
GLushort *pc,
GLushort *pc_persp,
GLushort *pc_linear)
{
GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
GLuint persp_mask;
GLuint linear_mask;
if (c->key.do_flat_shading || c->key.linear_color)
persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS |
FRAG_BIT_COL0 |
FRAG_BIT_COL1);
else
persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS);
if (c->key.do_flat_shading)
linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
else
linear_mask = c->key.attrs;
*pc_persp = 0;
*pc_linear = 0;
*pc = 0xf;
if (persp_mask & (1 << c->idx_to_attr[reg*2]))
*pc_persp = 0xf;
if (linear_mask & (1 << c->idx_to_attr[reg*2]))
*pc_linear = 0xf;
/* Maybe only processs one attribute on the final round:
*/
if (reg*2+1 < c->nr_setup_attrs) {
*pc |= 0xf0;
if (persp_mask & (1 << c->idx_to_attr[reg*2+1]))
*pc_persp |= 0xf0;
if (linear_mask & (1 << c->idx_to_attr[reg*2+1]))
*pc_linear |= 0xf0;
}
return is_last_attr;
}
void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
{
struct brw_compile *p = &c->func;
GLuint i;
c->nr_verts = 3;
if (allocate)
alloc_regs(c);
invert_det(c);
copy_z_inv_w(c);
if (c->key.do_twoside_color)
do_twoside_color(c);
if (c->key.do_flat_shading)
do_flatshade_triangle(c);
for (i = 0; i < c->nr_setup_regs; i++)
{
/* Pair of incoming attributes:
*/
struct brw_reg a0 = offset(c->vert[0], i);
struct brw_reg a1 = offset(c->vert[1], i);
struct brw_reg a2 = offset(c->vert[2], i);
GLushort pc, pc_persp, pc_linear;
GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
if (pc_persp)
{
brw_set_predicate_control_flag_value(p, pc_persp);
brw_MUL(p, a0, a0, c->inv_w[0]);
brw_MUL(p, a1, a1, c->inv_w[1]);
brw_MUL(p, a2, a2, c->inv_w[2]);
}
/* Calculate coefficients for interpolated values:
*/
if (pc_linear)
{
brw_set_predicate_control_flag_value(p, pc_linear);
brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
/* calculate dA/dx
*/
brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
/* calculate dA/dy
*/
brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
}
{
brw_set_predicate_control_flag_value(p, pc);
/* start point for interpolation
*/
brw_MOV(p, c->m3C0, a0);
/* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
* the send instruction:
*/
brw_urb_WRITE(p,
brw_null_reg(),
0,
brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
0, /* allocate */
1, /* used */
4, /* msg len */
0, /* response len */
last, /* eot */
last, /* writes complete */
i*4, /* offset */
BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
}
}
}
void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate)
{
struct brw_compile *p = &c->func;
GLuint i;
c->nr_verts = 2;
if (allocate)
alloc_regs(c);
invert_det(c);
copy_z_inv_w(c);
if (c->key.do_flat_shading)
do_flatshade_line(c);
for (i = 0; i < c->nr_setup_regs; i++)
{
/* Pair of incoming attributes:
*/
struct brw_reg a0 = offset(c->vert[0], i);
struct brw_reg a1 = offset(c->vert[1], i);
GLushort pc, pc_persp, pc_linear;
GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
if (pc_persp)
{
brw_set_predicate_control_flag_value(p, pc_persp);
brw_MUL(p, a0, a0, c->inv_w[0]);
brw_MUL(p, a1, a1, c->inv_w[1]);
}
/* Calculate coefficients for position, color:
*/
if (pc_linear) {
brw_set_predicate_control_flag_value(p, pc_linear);
brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
}
{
brw_set_predicate_control_flag_value(p, pc);
/* start point for interpolation
*/
brw_MOV(p, c->m3C0, a0);
/* Copy m0..m3 to URB.
*/
brw_urb_WRITE(p,
brw_null_reg(),
0,
brw_vec8_grf(0, 0),
0, /* allocate */
1, /* used */
4, /* msg len */
0, /* response len */
last, /* eot */
last, /* writes complete */
i*4, /* urb destination offset */
BRW_URB_SWIZZLE_TRANSPOSE);
}
}
}
void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
{
struct brw_compile *p = &c->func;
GLuint i;
c->nr_verts = 1;
if (allocate)
alloc_regs(c);
copy_z_inv_w(c);
for (i = 0; i < c->nr_setup_regs; i++)
{
struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
struct brw_reg a0 = offset(c->vert[0], i);
GLushort pc, pc_persp, pc_linear;
GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
if (pc_persp)
{
if (!tex->CoordReplace) {
brw_set_predicate_control_flag_value(p, pc_persp);
brw_MUL(p, a0, a0, c->inv_w[0]);
}
}
if (tex->CoordReplace) {
/* Caculate 1.0/PointWidth */
brw_math(&c->func,
c->tmp,
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
0,
c->dx0,
BRW_MATH_DATA_SCALAR,
BRW_MATH_PRECISION_FULL);
if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
} else {
brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
}
} else {
brw_MOV(p, c->m1Cx, brw_imm_ud(0));
brw_MOV(p, c->m2Cy, brw_imm_ud(0));
}
{
brw_set_predicate_control_flag_value(p, pc);
if (tex->CoordReplace) {
if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
}
else
brw_MOV(p, c->m3C0, brw_imm_f(0.0));
} else {
brw_MOV(p, c->m3C0, a0); /* constant value */
}
/* Copy m0..m3 to URB.
*/
brw_urb_WRITE(p,
brw_null_reg(),
0,
brw_vec8_grf(0, 0),
0, /* allocate */
1, /* used */
4, /* msg len */
0, /* response len */
last, /* eot */
last, /* writes complete */
i*4, /* urb destination offset */
BRW_URB_SWIZZLE_TRANSPOSE);
}
}
}
/* Points setup - several simplifications as all attributes are
* constant across the face of the point (point sprites excluded!)
*/
void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate)
{
struct brw_compile *p = &c->func;
GLuint i;
c->nr_verts = 1;
if (allocate)
alloc_regs(c);
copy_z_inv_w(c);
brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
for (i = 0; i < c->nr_setup_regs; i++)
{
struct brw_reg a0 = offset(c->vert[0], i);
GLushort pc, pc_persp, pc_linear;
GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
if (pc_persp)
{
/* This seems odd as the values are all constant, but the
* fragment shader will be expecting it:
*/
brw_set_predicate_control_flag_value(p, pc_persp);
brw_MUL(p, a0, a0, c->inv_w[0]);
}
/* The delta values are always zero, just send the starting
* coordinate. Again, this is to fit in with the interpolation
* code in the fragment shader.
*/
{
brw_set_predicate_control_flag_value(p, pc);
brw_MOV(p, c->m3C0, a0); /* constant value */
/* Copy m0..m3 to URB.
*/
brw_urb_WRITE(p,
brw_null_reg(),
0,
brw_vec8_grf(0, 0),
0, /* allocate */
1, /* used */
4, /* msg len */
0, /* response len */
last, /* eot */
last, /* writes complete */
i*4, /* urb destination offset */
BRW_URB_SWIZZLE_TRANSPOSE);
}
}
}
void brw_emit_anyprim_setup( struct brw_sf_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_reg ip = brw_ip_reg();
struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
struct brw_reg primmask;
struct brw_instruction *jmp;
struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
GLuint saveflag;
c->nr_verts = 3;
alloc_regs(c);
primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
brw_MOV(p, primmask, brw_imm_ud(1));
brw_SHL(p, primmask, primmask, payload_prim);
brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
(1<<_3DPRIM_TRISTRIP) |
(1<<_3DPRIM_TRIFAN) |
(1<<_3DPRIM_TRISTRIP_REVERSE) |
(1<<_3DPRIM_POLYGON) |
(1<<_3DPRIM_RECTLIST) |
(1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
{
saveflag = p->flag_value;
brw_push_insn_state(p);
brw_emit_tri_setup( c, GL_FALSE );
brw_pop_insn_state(p);
p->flag_value = saveflag;
/* note - thread killed in subroutine, so must
* restore the flag which is changed when building
* the subroutine. fix #13240
*/
}
brw_land_fwd_jump(p, jmp);
brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
(1<<_3DPRIM_LINESTRIP) |
(1<<_3DPRIM_LINELOOP) |
(1<<_3DPRIM_LINESTRIP_CONT) |
(1<<_3DPRIM_LINESTRIP_BF) |
(1<<_3DPRIM_LINESTRIP_CONT_BF)));
jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
{
saveflag = p->flag_value;
brw_push_insn_state(p);
brw_emit_line_setup( c, GL_FALSE );
brw_pop_insn_state(p);
p->flag_value = saveflag;
/* note - thread killed in subroutine */
}
brw_land_fwd_jump(p, jmp);
brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
{
saveflag = p->flag_value;
brw_push_insn_state(p);
brw_emit_point_sprite_setup( c, GL_FALSE );
brw_pop_insn_state(p);
p->flag_value = saveflag;
}
brw_land_fwd_jump(p, jmp);
brw_emit_point_setup( c, GL_FALSE );
}

View File

@ -0,0 +1,365 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "main/macros.h"
#include "intel_fbo.h"
static void upload_sf_vp(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
struct brw_sf_viewport sfv;
GLfloat y_scale, y_bias;
const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
const GLfloat *v = ctx->Viewport._WindowMap.m;
memset(&sfv, 0, sizeof(sfv));
if (render_to_fbo) {
y_scale = 1.0;
y_bias = 0;
}
else {
y_scale = -1.0;
y_bias = ctx->DrawBuffer->Height;
}
/* _NEW_VIEWPORT */
sfv.viewport.m00 = v[MAT_SX];
sfv.viewport.m11 = v[MAT_SY] * y_scale;
sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
sfv.viewport.m30 = v[MAT_TX];
sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
/* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT
* for DrawBuffer->_[XY]{min,max}
*/
/* The scissor only needs to handle the intersection of drawable and
* scissor rect. Clipping to the boundaries of static shared buffers
* for front/back/depth is covered by looping over cliprects in brw_draw.c.
*
* Note that the hardware's coordinates are inclusive, while Mesa's min is
* inclusive but max is exclusive.
*/
if (render_to_fbo) {
/* texmemory: Y=0=bottom */
sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
}
else {
/* memory: Y=0=top */
sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
}
dri_bo_unreference(brw->sf.vp_bo);
brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
}
const struct brw_tracked_state brw_sf_vp = {
.dirty = {
.mesa = (_NEW_VIEWPORT |
_NEW_SCISSOR |
_NEW_BUFFERS),
.brw = 0,
.cache = 0
},
.prepare = upload_sf_vp
};
struct brw_sf_unit_key {
unsigned int total_grf;
unsigned int urb_entry_read_length;
unsigned int nr_urb_entries, urb_size, sfsize;
GLenum front_face, cull_face, provoking_vertex;
unsigned scissor:1;
unsigned line_smooth:1;
unsigned point_sprite:1;
unsigned point_attenuated:1;
unsigned render_to_fbo:1;
float line_width;
float point_size;
};
static void
sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
{
GLcontext *ctx = &brw->intel.ctx;
memset(key, 0, sizeof(*key));
/* CACHE_NEW_SF_PROG */
key->total_grf = brw->sf.prog_data->total_grf;
key->urb_entry_read_length = brw->sf.prog_data->urb_read_length;
/* BRW_NEW_URB_FENCE */
key->nr_urb_entries = brw->urb.nr_sf_entries;
key->urb_size = brw->urb.vsize;
key->sfsize = brw->urb.sfsize;
key->scissor = ctx->Scissor.Enabled;
key->front_face = ctx->Polygon.FrontFace;
if (ctx->Polygon.CullFlag)
key->cull_face = ctx->Polygon.CullFaceMode;
else
key->cull_face = GL_NONE;
key->line_width = ctx->Line.Width;
key->line_smooth = ctx->Line.SmoothFlag;
key->point_sprite = ctx->Point.PointSprite;
key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
key->point_attenuated = ctx->Point._Attenuated;
/* _NEW_LIGHT */
key->provoking_vertex = ctx->Light.ProvokingVertex;
key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
}
static dri_bo *
sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
dri_bo **reloc_bufs)
{
struct brw_sf_unit_state sf;
dri_bo *bo;
int chipset_max_threads;
memset(&sf, 0, sizeof(sf));
sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
sf.thread3.dispatch_grf_start_reg = 3;
if (BRW_IS_IGDNG(brw))
sf.thread3.urb_entry_read_offset = 3;
else
sf.thread3.urb_entry_read_offset = 1;
sf.thread3.urb_entry_read_length = key->urb_entry_read_length;
sf.thread4.nr_urb_entries = key->nr_urb_entries;
sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
/* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or
* 48(IGDNG) threads
*/
if (BRW_IS_IGDNG(brw))
chipset_max_threads = 48;
else
chipset_max_threads = 24;
sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
sf.thread4.max_threads = 0;
if (INTEL_DEBUG & DEBUG_STATS)
sf.thread4.stats_enable = 1;
/* CACHE_NEW_SF_VP */
sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */
sf.sf5.viewport_transform = 1;
/* _NEW_SCISSOR */
if (key->scissor)
sf.sf6.scissor = 1;
/* _NEW_POLYGON */
if (key->front_face == GL_CCW)
sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
else
sf.sf5.front_winding = BRW_FRONTWINDING_CW;
/* The viewport is inverted for rendering to a FBO, and that inverts
* polygon front/back orientation.
*/
sf.sf5.front_winding ^= key->render_to_fbo;
switch (key->cull_face) {
case GL_FRONT:
sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
break;
case GL_BACK:
sf.sf6.cull_mode = BRW_CULLMODE_BACK;
break;
case GL_FRONT_AND_BACK:
sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
break;
case GL_NONE:
sf.sf6.cull_mode = BRW_CULLMODE_NONE;
break;
default:
assert(0);
break;
}
/* _NEW_LINE */
/* XXX use ctx->Const.Min/MaxLineWidth here */
sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1);
sf.sf6.line_endcap_aa_region_width = 1;
if (key->line_smooth)
sf.sf6.aa_enable = 1;
else if (sf.sf6.line_width <= 0x2)
sf.sf6.line_width = 0;
/* _NEW_BUFFERS */
key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
if (!key->render_to_fbo) {
/* Rendering to an OpenGL window */
sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
}
else {
/* If rendering to an FBO, the pixel coordinate system is
* inverted with respect to the normal OpenGL coordinate
* system, so BRW_RASTRULE_LOWER_RIGHT is correct.
* But this value is listed as "Reserved, but not seen as useful"
* in Intel documentation (page 212, "Point Rasterization Rule",
* section 7.4 "SF Pipeline State Summary", of document
* "Intel® 965 Express Chipset Family and Intel® G35 Express
* Chipset Graphics Controller Programmer's Reference Manual,
* Volume 2: 3D/Media", Revision 1.0b as of January 2008,
* available at
* http://intellinuxgraphics.org/documentation.html
* at the time of this writing).
*
* It does work on at least some devices, if not all;
* if devices that don't support it can be identified,
* the likely failure case is that points are rasterized
* incorrectly, which is no worse than occurs without
* the value, so we're using it here.
*/
sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
}
/* XXX clamp max depends on AA vs. non-AA */
/* _NEW_POINT */
sf.sf7.sprite_point = key->point_sprite;
sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
sf.sf7.use_point_size_state = !key->point_attenuated;
sf.sf7.aa_line_distance_mode = 0;
/* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
*/
if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) {
sf.sf7.trifan_pv = 2;
sf.sf7.linestrip_pv = 1;
sf.sf7.tristrip_pv = 2;
} else {
sf.sf7.trifan_pv = 1;
sf.sf7.linestrip_pv = 0;
sf.sf7.tristrip_pv = 0;
}
sf.sf7.line_last_pixel_enable = 0;
/* Set bias for OpenGL rasterization rules:
*/
sf.sf6.dest_org_vbias = 0x8;
sf.sf6.dest_org_hbias = 0x8;
bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
key, sizeof(*key),
reloc_bufs, 2,
&sf, sizeof(sf),
NULL, NULL);
/* STATE_PREFETCH command description describes this state as being
* something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
*/
/* Emit SF program relocation */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
sf.thread0.grf_reg_count << 1,
offsetof(struct brw_sf_unit_state, thread0),
brw->sf.prog_bo);
/* Emit SF viewport relocation */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
offsetof(struct brw_sf_unit_state, sf5),
brw->sf.vp_bo);
return bo;
}
static void upload_sf_unit( struct brw_context *brw )
{
struct brw_sf_unit_key key;
dri_bo *reloc_bufs[2];
sf_unit_populate_key(brw, &key);
reloc_bufs[0] = brw->sf.prog_bo;
reloc_bufs[1] = brw->sf.vp_bo;
dri_bo_unreference(brw->sf.state_bo);
brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT,
&key, sizeof(key),
reloc_bufs, 2,
NULL);
if (brw->sf.state_bo == NULL) {
brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs);
}
}
const struct brw_tracked_state brw_sf_unit = {
.dirty = {
.mesa = (_NEW_POLYGON |
_NEW_LIGHT |
_NEW_LINE |
_NEW_POINT |
_NEW_SCISSOR |
_NEW_BUFFERS),
.brw = BRW_NEW_URB_FENCE,
.cache = (CACHE_NEW_SF_VP |
CACHE_NEW_SF_PROG)
},
.prepare = upload_sf_unit,
};

View File

@ -0,0 +1,173 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef BRW_STATE_H
#define BRW_STATE_H
#include "brw_context.h"
static inline void
brw_add_validated_bo(struct brw_context *brw, dri_bo *bo)
{
assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos));
if (bo != NULL) {
dri_bo_reference(bo);
brw->state.validated_bos[brw->state.validated_bo_count++] = bo;
}
};
const struct brw_tracked_state brw_blend_constant_color;
const struct brw_tracked_state brw_cc_unit;
const struct brw_tracked_state brw_cc_vp;
const struct brw_tracked_state brw_check_fallback;
const struct brw_tracked_state brw_clip_prog;
const struct brw_tracked_state brw_clip_unit;
const struct brw_tracked_state brw_constant_buffer;
const struct brw_tracked_state brw_curbe_offsets;
const struct brw_tracked_state brw_invarient_state;
const struct brw_tracked_state brw_gs_prog;
const struct brw_tracked_state brw_gs_unit;
const struct brw_tracked_state brw_line_stipple;
const struct brw_tracked_state brw_aa_line_parameters;
const struct brw_tracked_state brw_pipelined_state_pointers;
const struct brw_tracked_state brw_binding_table_pointers;
const struct brw_tracked_state brw_depthbuffer;
const struct brw_tracked_state brw_polygon_stipple_offset;
const struct brw_tracked_state brw_polygon_stipple;
const struct brw_tracked_state brw_program_parameters;
const struct brw_tracked_state brw_recalculate_urb_fence;
const struct brw_tracked_state brw_sf_prog;
const struct brw_tracked_state brw_sf_unit;
const struct brw_tracked_state brw_sf_vp;
const struct brw_tracked_state brw_state_base_address;
const struct brw_tracked_state brw_urb_fence;
const struct brw_tracked_state brw_vertex_state;
const struct brw_tracked_state brw_vs_surfaces;
const struct brw_tracked_state brw_vs_prog;
const struct brw_tracked_state brw_vs_unit;
const struct brw_tracked_state brw_wm_input_sizes;
const struct brw_tracked_state brw_wm_prog;
const struct brw_tracked_state brw_wm_samplers;
const struct brw_tracked_state brw_wm_constant_surface;
const struct brw_tracked_state brw_wm_surfaces;
const struct brw_tracked_state brw_wm_unit;
const struct brw_tracked_state brw_psp_urb_cbs;
const struct brw_tracked_state brw_pipe_control;
const struct brw_tracked_state brw_drawing_rect;
const struct brw_tracked_state brw_indices;
const struct brw_tracked_state brw_vertices;
const struct brw_tracked_state brw_index_buffer;
/**
* Use same key for WM and VS surfaces.
*/
struct brw_surface_key {
GLenum target, depthmode;
dri_bo *bo;
GLint format, internal_format;
GLint first_level, last_level;
GLint width, height, depth;
GLint pitch, cpp;
uint32_t tiling;
GLuint offset;
};
/***********************************************************************
* brw_state.c
*/
void brw_validate_state(struct brw_context *brw);
void brw_upload_state(struct brw_context *brw);
void brw_init_state(struct brw_context *brw);
void brw_destroy_state(struct brw_context *brw);
/***********************************************************************
* brw_state_cache.c
*/
dri_bo *brw_cache_data(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *data,
dri_bo **reloc_bufs,
GLuint nr_reloc_bufs);
dri_bo *brw_cache_data_sz(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *data,
GLuint data_size,
dri_bo **reloc_bufs,
GLuint nr_reloc_bufs);
dri_bo *brw_upload_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_sz,
dri_bo **reloc_bufs,
GLuint nr_reloc_bufs,
const void *data,
GLuint data_sz,
const void *aux,
void *aux_return );
dri_bo *brw_search_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_size,
dri_bo **reloc_bufs,
GLuint nr_reloc_bufs,
void *aux_return);
void brw_state_cache_check_size( struct brw_context *brw );
void brw_init_caches( struct brw_context *brw );
void brw_destroy_caches( struct brw_context *brw );
void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo);
/***********************************************************************
* brw_state_batch.c
*/
#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
GLboolean brw_cached_batch_struct( struct brw_context *brw,
const void *data,
GLuint sz );
void brw_destroy_batch_cache( struct brw_context *brw );
void brw_clear_batch_cache( struct brw_context *brw );
/* brw_wm_surface_state.c */
dri_bo *
brw_create_constant_surface( struct brw_context *brw,
struct brw_surface_key *key );
#endif

View File

@ -0,0 +1,99 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_state.h"
#include "intel_batchbuffer.h"
#include "main/imports.h"
/* A facility similar to the data caching code above, which aims to
* prevent identical commands being issued repeatedly.
*/
GLboolean brw_cached_batch_struct( struct brw_context *brw,
const void *data,
GLuint sz )
{
struct brw_cached_batch_item *item = brw->cached_batch_items;
struct header *newheader = (struct header *)data;
if (brw->emit_state_always) {
intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
return GL_TRUE;
}
while (item) {
if (item->header->opcode == newheader->opcode) {
if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
return GL_FALSE;
if (item->sz != sz) {
_mesa_free(item->header);
item->header = _mesa_malloc(sz);
item->sz = sz;
}
goto emit;
}
item = item->next;
}
assert(!item);
item = CALLOC_STRUCT(brw_cached_batch_item);
item->header = _mesa_malloc(sz);
item->sz = sz;
item->next = brw->cached_batch_items;
brw->cached_batch_items = item;
emit:
memcpy(item->header, newheader, sz);
intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
return GL_TRUE;
}
void brw_clear_batch_cache( struct brw_context *brw )
{
struct brw_cached_batch_item *item = brw->cached_batch_items;
while (item) {
struct brw_cached_batch_item *next = item->next;
free((void *)item->header);
free(item);
item = next;
}
brw->cached_batch_items = NULL;
}
void brw_destroy_batch_cache( struct brw_context *brw )
{
brw_clear_batch_cache(brw);
}

View File

@ -0,0 +1,597 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
/** @file brw_state_cache.c
*
* This file implements a simple static state cache for 965. The consumers
* can query the hash table of state using a cache_id, opaque key data,
* and list of buffers that will be used in relocations, and receive the
* corresponding state buffer object of state (plus associated auxiliary
* data) in return.
*
* The inner workings are a simple hash table based on a CRC of the key data.
* The cache_id and relocation target buffers associated with the state
* buffer are included as auxiliary key data, but are not part of the hash
* value (this should be fixed, but will likely be fixed instead by making
* consumers use structured keys).
*
* Replacement is not implemented. Instead, when the cache gets too big, at
* a safe point (unlock) we throw out all of the cache data and let it
* regenerate for the next rendering operation.
*
* The reloc_buf pointers need to be included as key data, otherwise the
* non-unique values stuffed in the offset in key data through
* brw_cache_data() may result in successful probe for state buffers
* even when the buffer being referenced doesn't match. The result would be
* that the same state cache entry is used twice for different buffers,
* only one of the two buffers referenced gets put into the offset, and the
* incorrect program is run for the other instance.
*/
#include "main/imports.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
/* XXX: Fixme - have to include these to get the sizes of the prog_key
* structs:
*/
#include "brw_wm.h"
#include "brw_vs.h"
#include "brw_clip.h"
#include "brw_sf.h"
#include "brw_gs.h"
static GLuint
hash_key(const void *key, GLuint key_size,
dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
{
GLuint *ikey = (GLuint *)key;
GLuint hash = 0, i;
assert(key_size % 4 == 0);
/* I'm sure this can be improved on:
*/
for (i = 0; i < key_size/4; i++) {
hash ^= ikey[i];
hash = (hash << 5) | (hash >> 27);
}
/* Include the BO pointers as key data as well */
ikey = (GLuint *)reloc_bufs;
key_size = nr_reloc_bufs * sizeof(dri_bo *);
for (i = 0; i < key_size/4; i++) {
hash ^= ikey[i];
hash = (hash << 5) | (hash >> 27);
}
return hash;
}
/**
* Marks a new buffer as being chosen for the given cache id.
*/
static void
update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
dri_bo *bo)
{
if (bo == cache->last_bo[cache_id])
return; /* no change */
dri_bo_unreference(cache->last_bo[cache_id]);
cache->last_bo[cache_id] = bo;
dri_bo_reference(cache->last_bo[cache_id]);
cache->brw->state.dirty.cache |= 1 << cache_id;
}
static struct brw_cache_item *
search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
GLuint hash, const void *key, GLuint key_size,
dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
{
struct brw_cache_item *c;
#if 0
int bucketcount = 0;
for (c = cache->items[hash % cache->size]; c; c = c->next)
bucketcount++;
fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
cache->size, bucketcount, cache->n_items);
#endif
for (c = cache->items[hash % cache->size]; c; c = c->next) {
if (c->cache_id == cache_id &&
c->hash == hash &&
c->key_size == key_size &&
memcmp(c->key, key, key_size) == 0 &&
c->nr_reloc_bufs == nr_reloc_bufs &&
memcmp(c->reloc_bufs, reloc_bufs,
nr_reloc_bufs * sizeof(dri_bo *)) == 0)
return c;
}
return NULL;
}
static void
rehash(struct brw_cache *cache)
{
struct brw_cache_item **items;
struct brw_cache_item *c, *next;
GLuint size, i;
size = cache->size * 3;
items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items));
for (i = 0; i < cache->size; i++)
for (c = cache->items[i]; c; c = next) {
next = c->next;
c->next = items[c->hash % size];
items[c->hash % size] = c;
}
FREE(cache->items);
cache->items = items;
cache->size = size;
}
/**
* Returns the buffer object matching cache_id and key, or NULL.
*/
dri_bo *
brw_search_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_size,
dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
void *aux_return)
{
struct brw_cache_item *item;
GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
item = search_cache(cache, cache_id, hash, key, key_size,
reloc_bufs, nr_reloc_bufs);
if (item == NULL)
return NULL;
if (aux_return)
*(void **)aux_return = (void *)((char *)item->key + item->key_size);
update_cache_last(cache, cache_id, item->bo);
dri_bo_reference(item->bo);
return item->bo;
}
dri_bo *
brw_upload_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_size,
dri_bo **reloc_bufs,
GLuint nr_reloc_bufs,
const void *data,
GLuint data_size,
const void *aux,
void *aux_return )
{
struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *);
GLuint aux_size = cache->aux_size[cache_id];
void *tmp;
dri_bo *bo;
int i;
/* Create the buffer object to contain the data */
bo = dri_bo_alloc(cache->brw->intel.bufmgr,
cache->name[cache_id], data_size, 1 << 6);
/* Set up the memory containing the key, aux_data, and reloc_bufs */
tmp = _mesa_malloc(key_size + aux_size + relocs_size);
memcpy(tmp, key, key_size);
memcpy(tmp + key_size, aux, cache->aux_size[cache_id]);
memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
for (i = 0; i < nr_reloc_bufs; i++) {
if (reloc_bufs[i] != NULL)
dri_bo_reference(reloc_bufs[i]);
}
item->cache_id = cache_id;
item->key = tmp;
item->hash = hash;
item->key_size = key_size;
item->reloc_bufs = tmp + key_size + aux_size;
item->nr_reloc_bufs = nr_reloc_bufs;
item->bo = bo;
dri_bo_reference(bo);
item->data_size = data_size;
if (cache->n_items > cache->size * 1.5)
rehash(cache);
hash %= cache->size;
item->next = cache->items[hash];
cache->items[hash] = item;
cache->n_items++;
if (aux_return) {
assert(cache->aux_size[cache_id]);
*(void **)aux_return = (void *)((char *)item->key + item->key_size);
}
if (INTEL_DEBUG & DEBUG_STATE)
_mesa_printf("upload %s: %d bytes to cache id %d\n",
cache->name[cache_id],
data_size, cache_id);
/* Copy data to the buffer */
dri_bo_subdata(bo, 0, data_size, data);
update_cache_last(cache, cache_id, bo);
return bo;
}
/**
* This doesn't really work with aux data. Use search/upload instead
*/
dri_bo *
brw_cache_data_sz(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *data,
GLuint data_size,
dri_bo **reloc_bufs,
GLuint nr_reloc_bufs)
{
dri_bo *bo;
struct brw_cache_item *item;
GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs);
item = search_cache(cache, cache_id, hash, data, data_size,
reloc_bufs, nr_reloc_bufs);
if (item) {
update_cache_last(cache, cache_id, item->bo);
dri_bo_reference(item->bo);
return item->bo;
}
bo = brw_upload_cache(cache, cache_id,
data, data_size,
reloc_bufs, nr_reloc_bufs,
data, data_size,
NULL, NULL);
return bo;
}
/**
* Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
*
* If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be
* better to use, as the potentially changing offsets in the data-used-as-key
* will result in excessive cache misses.
*/
dri_bo *
brw_cache_data(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *data,
dri_bo **reloc_bufs,
GLuint nr_reloc_bufs)
{
return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
reloc_bufs, nr_reloc_bufs);
}
enum pool_type {
DW_SURFACE_STATE,
DW_GENERAL_STATE
};
static void
brw_init_cache_id(struct brw_cache *cache,
const char *name,
enum brw_cache_id id,
GLuint key_size,
GLuint aux_size)
{
cache->name[id] = strdup(name);
cache->key_size[id] = key_size;
cache->aux_size[id] = aux_size;
}
static void
brw_init_non_surface_cache(struct brw_context *brw)
{
struct brw_cache *cache = &brw->cache;
cache->brw = brw;
cache->size = 7;
cache->n_items = 0;
cache->items = (struct brw_cache_item **)
_mesa_calloc(cache->size * sizeof(struct brw_cache_item));
brw_init_cache_id(cache,
"CC_VP",
BRW_CC_VP,
sizeof(struct brw_cc_viewport),
0);
brw_init_cache_id(cache,
"CC_UNIT",
BRW_CC_UNIT,
sizeof(struct brw_cc_unit_state),
0);
brw_init_cache_id(cache,
"WM_PROG",
BRW_WM_PROG,
sizeof(struct brw_wm_prog_key),
sizeof(struct brw_wm_prog_data));
brw_init_cache_id(cache,
"SAMPLER_DEFAULT_COLOR",
BRW_SAMPLER_DEFAULT_COLOR,
sizeof(struct brw_sampler_default_color),
0);
brw_init_cache_id(cache,
"SAMPLER",
BRW_SAMPLER,
0, /* variable key/data size */
0);
brw_init_cache_id(cache,
"WM_UNIT",
BRW_WM_UNIT,
sizeof(struct brw_wm_unit_state),
0);
brw_init_cache_id(cache,
"SF_PROG",
BRW_SF_PROG,
sizeof(struct brw_sf_prog_key),
sizeof(struct brw_sf_prog_data));
brw_init_cache_id(cache,
"SF_VP",
BRW_SF_VP,
sizeof(struct brw_sf_viewport),
0);
brw_init_cache_id(cache,
"SF_UNIT",
BRW_SF_UNIT,
sizeof(struct brw_sf_unit_state),
0);
brw_init_cache_id(cache,
"VS_UNIT",
BRW_VS_UNIT,
sizeof(struct brw_vs_unit_state),
0);
brw_init_cache_id(cache,
"VS_PROG",
BRW_VS_PROG,
sizeof(struct brw_vs_prog_key),
sizeof(struct brw_vs_prog_data));
brw_init_cache_id(cache,
"CLIP_UNIT",
BRW_CLIP_UNIT,
sizeof(struct brw_clip_unit_state),
0);
brw_init_cache_id(cache,
"CLIP_PROG",
BRW_CLIP_PROG,
sizeof(struct brw_clip_prog_key),
sizeof(struct brw_clip_prog_data));
brw_init_cache_id(cache,
"GS_UNIT",
BRW_GS_UNIT,
sizeof(struct brw_gs_unit_state),
0);
brw_init_cache_id(cache,
"GS_PROG",
BRW_GS_PROG,
sizeof(struct brw_gs_prog_key),
sizeof(struct brw_gs_prog_data));
}
static void
brw_init_surface_cache(struct brw_context *brw)
{
struct brw_cache *cache = &brw->surface_cache;
cache->brw = brw;
cache->size = 7;
cache->n_items = 0;
cache->items = (struct brw_cache_item **)
_mesa_calloc(cache->size * sizeof(struct brw_cache_item));
brw_init_cache_id(cache,
"SS_SURFACE",
BRW_SS_SURFACE,
sizeof(struct brw_surface_state),
0);
brw_init_cache_id(cache,
"SS_SURF_BIND",
BRW_SS_SURF_BIND,
0,
0);
}
void
brw_init_caches(struct brw_context *brw)
{
brw_init_non_surface_cache(brw);
brw_init_surface_cache(brw);
}
static void
brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
{
struct brw_cache_item *c, *next;
GLuint i;
if (INTEL_DEBUG & DEBUG_STATE)
_mesa_printf("%s\n", __FUNCTION__);
for (i = 0; i < cache->size; i++) {
for (c = cache->items[i]; c; c = next) {
int j;
next = c->next;
for (j = 0; j < c->nr_reloc_bufs; j++)
dri_bo_unreference(c->reloc_bufs[j]);
dri_bo_unreference(c->bo);
free((void *)c->key);
free(c);
}
cache->items[i] = NULL;
}
cache->n_items = 0;
if (brw->curbe.last_buf) {
_mesa_free(brw->curbe.last_buf);
brw->curbe.last_buf = NULL;
}
brw->state.dirty.mesa |= ~0;
brw->state.dirty.brw |= ~0;
brw->state.dirty.cache |= ~0;
}
/* Clear all entries from the cache that point to the given bo.
*
* This lets us release memory for reuse earlier for known-dead buffers,
* at the cost of walking the entire hash table.
*/
void
brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo)
{
struct brw_cache_item **prev;
GLuint i;
if (INTEL_DEBUG & DEBUG_STATE)
_mesa_printf("%s\n", __FUNCTION__);
for (i = 0; i < cache->size; i++) {
for (prev = &cache->items[i]; *prev;) {
struct brw_cache_item *c = *prev;
if (drm_intel_bo_references(c->bo, bo)) {
int j;
*prev = c->next;
for (j = 0; j < c->nr_reloc_bufs; j++)
dri_bo_unreference(c->reloc_bufs[j]);
dri_bo_unreference(c->bo);
free((void *)c->key);
free(c);
cache->n_items--;
} else {
prev = &c->next;
}
}
}
}
void
brw_state_cache_check_size(struct brw_context *brw)
{
if (INTEL_DEBUG & DEBUG_STATE)
_mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
/* un-tuned guess. We've got around 20 state objects for a total of around
* 32k, so 1000 of them is around 1.5MB.
*/
if (brw->cache.n_items > 1000)
brw_clear_cache(brw, &brw->cache);
if (brw->surface_cache.n_items > 1000)
brw_clear_cache(brw, &brw->surface_cache);
}
static void
brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
{
GLuint i;
if (INTEL_DEBUG & DEBUG_STATE)
_mesa_printf("%s\n", __FUNCTION__);
brw_clear_cache(brw, cache);
for (i = 0; i < BRW_MAX_CACHE; i++) {
dri_bo_unreference(cache->last_bo[i]);
free(cache->name[i]);
}
free(cache->items);
cache->items = NULL;
cache->size = 0;
}
void
brw_destroy_caches(struct brw_context *brw)
{
brw_destroy_cache(brw, &brw->cache);
brw_destroy_cache(brw, &brw->surface_cache);
}

View File

@ -0,0 +1,224 @@
/*
* Copyright © 2007 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
*
*/
#include "main/mtypes.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
/**
* Prints out a header, the contents, and the message associated with
* the hardware state data given.
*
* \param name Name of the state object
* \param data Pointer to the base of the state object
* \param hw_offset Hardware offset of the base of the state data.
* \param index Index of the DWORD being output.
*/
static void
state_out(const char *name, void *data, uint32_t hw_offset, int index,
char *fmt, ...)
{
va_list va;
fprintf(stderr, "%8s: 0x%08x: 0x%08x: ",
name, hw_offset + index * 4, ((uint32_t *)data)[index]);
va_start(va, fmt);
vfprintf(stderr, fmt, va);
va_end(va);
}
/** Generic, undecoded state buffer debug printout */
static void
state_struct_out(const char *name, dri_bo *buffer, unsigned int state_size)
{
int i;
if (buffer == NULL)
return;
dri_bo_map(buffer, GL_FALSE);
for (i = 0; i < state_size / 4; i++) {
state_out(name, buffer->virtual, buffer->offset, i,
"dword %d\n", i);
}
dri_bo_unmap(buffer);
}
static const char *
get_965_surfacetype(unsigned int surfacetype)
{
switch (surfacetype) {
case 0: return "1D";
case 1: return "2D";
case 2: return "3D";
case 3: return "CUBE";
case 4: return "BUFFER";
case 7: return "NULL";
default: return "unknown";
}
}
static const char *
get_965_surface_format(unsigned int surface_format)
{
switch (surface_format) {
case 0x000: return "r32g32b32a32_float";
case 0x0c1: return "b8g8r8a8_unorm";
case 0x100: return "b5g6r5_unorm";
case 0x102: return "b5g5r5a1_unorm";
case 0x104: return "b4g4r4a4_unorm";
default: return "unknown";
}
}
static void dump_wm_surface_state(struct brw_context *brw)
{
int i;
for (i = 0; i < brw->wm.nr_surfaces; i++) {
dri_bo *surf_bo = brw->wm.surf_bo[i];
unsigned int surfoff;
struct brw_surface_state *surf;
char name[20];
if (surf_bo == NULL) {
fprintf(stderr, " WM SS%d: NULL\n", i);
continue;
}
dri_bo_map(surf_bo, GL_FALSE);
surfoff = surf_bo->offset;
surf = (struct brw_surface_state *)(surf_bo->virtual);
sprintf(name, "WM SS%d", i);
state_out(name, surf, surfoff, 0, "%s %s\n",
get_965_surfacetype(surf->ss0.surface_type),
get_965_surface_format(surf->ss0.surface_format));
state_out(name, surf, surfoff, 1, "offset\n");
state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n",
surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count);
state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n",
surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not ");
state_out(name, surf, surfoff, 4, "mip base %d\n",
surf->ss4.min_lod);
state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n",
surf->ss5.x_offset, surf->ss5.y_offset);
dri_bo_unmap(surf_bo);
}
}
static void dump_sf_viewport_state(struct brw_context *brw)
{
const char *name = "SF VP";
struct brw_sf_viewport *vp;
uint32_t vp_off;
if (brw->sf.vp_bo == NULL)
return;
dri_bo_map(brw->sf.vp_bo, GL_FALSE);
vp = brw->sf.vp_bo->virtual;
vp_off = brw->sf.vp_bo->offset;
state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00);
state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11);
state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22);
state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30);
state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31);
state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32);
state_out(name, vp, vp_off, 6, "top left = %d,%d\n",
vp->scissor.xmin, vp->scissor.ymin);
state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n",
vp->scissor.xmax, vp->scissor.ymax);
dri_bo_unmap(brw->sf.vp_bo);
}
static void brw_debug_prog(const char *name, dri_bo *prog)
{
unsigned int i;
uint32_t *data;
if (prog == NULL)
return;
dri_bo_map(prog, GL_FALSE);
data = prog->virtual;
for (i = 0; i < prog->size / 4 / 4; i++) {
fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
name, (unsigned int)prog->offset + i * 4 * 4,
data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
/* Stop at the end of the program. It'd be nice to keep track of the actual
* intended program size instead of guessing like this.
*/
if (data[i * 4 + 0] == 0 &&
data[i * 4 + 1] == 0 &&
data[i * 4 + 2] == 0 &&
data[i * 4 + 3] == 0)
break;
}
dri_bo_unmap(prog);
}
/**
* Print additional debug information associated with the batchbuffer
* when DEBUG_BATCH is set.
*
* For 965, this means mapping the state buffers that would have been referenced
* by the batchbuffer and dumping them.
*
* The buffer offsets printed rely on the buffer containing the last offset
* it was validated at.
*/
void brw_debug_batch(struct intel_context *intel)
{
struct brw_context *brw = brw_context(&intel->ctx);
state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces);
dump_wm_surface_state(brw);
state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state));
brw_debug_prog("VS prog", brw->vs.prog_bo);
state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state));
brw_debug_prog("GS prog", brw->gs.prog_bo);
state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state));
dump_sf_viewport_state(brw);
brw_debug_prog("SF prog", brw->sf.prog_bo);
state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state));
brw_debug_prog("WM prog", brw->wm.prog_bo);
}

View File

@ -0,0 +1,416 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
/* This is used to initialize brw->state.atoms[]. We could use this
* list directly except for a single atom, brw_constant_buffer, which
* has a .dirty value which changes according to the parameters of the
* current fragment and vertex programs, and so cannot be a static
* value.
*/
const struct brw_tracked_state *atoms[] =
{
&brw_check_fallback,
&brw_wm_input_sizes,
&brw_vs_prog,
&brw_gs_prog,
&brw_clip_prog,
&brw_sf_prog,
&brw_wm_prog,
/* Once all the programs are done, we know how large urb entry
* sizes need to be and can decide if we need to change the urb
* layout.
*/
&brw_curbe_offsets,
&brw_recalculate_urb_fence,
&brw_cc_vp,
&brw_cc_unit,
&brw_vs_surfaces, /* must do before unit */
&brw_wm_constant_surface, /* must do before wm surfaces/bind bo */
&brw_wm_surfaces, /* must do before samplers and unit */
&brw_wm_samplers,
&brw_wm_unit,
&brw_sf_vp,
&brw_sf_unit,
&brw_vs_unit, /* always required, enabled or not */
&brw_clip_unit,
&brw_gs_unit,
/* Command packets:
*/
&brw_invarient_state,
&brw_state_base_address,
&brw_binding_table_pointers,
&brw_blend_constant_color,
&brw_depthbuffer,
&brw_polygon_stipple,
&brw_polygon_stipple_offset,
&brw_line_stipple,
&brw_aa_line_parameters,
&brw_psp_urb_cbs,
&brw_drawing_rect,
&brw_indices,
&brw_index_buffer,
&brw_vertices,
&brw_constant_buffer
};
void brw_init_state( struct brw_context *brw )
{
brw_init_caches(brw);
}
void brw_destroy_state( struct brw_context *brw )
{
brw_destroy_caches(brw);
brw_destroy_batch_cache(brw);
}
/***********************************************************************
*/
static GLboolean check_state( const struct brw_state_flags *a,
const struct brw_state_flags *b )
{
return ((a->mesa & b->mesa) ||
(a->brw & b->brw) ||
(a->cache & b->cache));
}
static void accumulate_state( struct brw_state_flags *a,
const struct brw_state_flags *b )
{
a->mesa |= b->mesa;
a->brw |= b->brw;
a->cache |= b->cache;
}
static void xor_states( struct brw_state_flags *result,
const struct brw_state_flags *a,
const struct brw_state_flags *b )
{
result->mesa = a->mesa ^ b->mesa;
result->brw = a->brw ^ b->brw;
result->cache = a->cache ^ b->cache;
}
static void
brw_clear_validated_bos(struct brw_context *brw)
{
int i;
/* Clear the last round of validated bos */
for (i = 0; i < brw->state.validated_bo_count; i++) {
dri_bo_unreference(brw->state.validated_bos[i]);
brw->state.validated_bos[i] = NULL;
}
brw->state.validated_bo_count = 0;
}
struct dirty_bit_map {
uint32_t bit;
char *name;
uint32_t count;
};
#define DEFINE_BIT(name) {name, #name, 0}
static struct dirty_bit_map mesa_bits[] = {
DEFINE_BIT(_NEW_MODELVIEW),
DEFINE_BIT(_NEW_PROJECTION),
DEFINE_BIT(_NEW_TEXTURE_MATRIX),
DEFINE_BIT(_NEW_COLOR_MATRIX),
DEFINE_BIT(_NEW_ACCUM),
DEFINE_BIT(_NEW_COLOR),
DEFINE_BIT(_NEW_DEPTH),
DEFINE_BIT(_NEW_EVAL),
DEFINE_BIT(_NEW_FOG),
DEFINE_BIT(_NEW_HINT),
DEFINE_BIT(_NEW_LIGHT),
DEFINE_BIT(_NEW_LINE),
DEFINE_BIT(_NEW_PIXEL),
DEFINE_BIT(_NEW_POINT),
DEFINE_BIT(_NEW_POLYGON),
DEFINE_BIT(_NEW_POLYGONSTIPPLE),
DEFINE_BIT(_NEW_SCISSOR),
DEFINE_BIT(_NEW_STENCIL),
DEFINE_BIT(_NEW_TEXTURE),
DEFINE_BIT(_NEW_TRANSFORM),
DEFINE_BIT(_NEW_VIEWPORT),
DEFINE_BIT(_NEW_PACKUNPACK),
DEFINE_BIT(_NEW_ARRAY),
DEFINE_BIT(_NEW_RENDERMODE),
DEFINE_BIT(_NEW_BUFFERS),
DEFINE_BIT(_NEW_MULTISAMPLE),
DEFINE_BIT(_NEW_TRACK_MATRIX),
DEFINE_BIT(_NEW_PROGRAM),
DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
{0, 0, 0}
};
static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_URB_FENCE),
DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS),
DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
DEFINE_BIT(BRW_NEW_PRIMITIVE),
DEFINE_BIT(BRW_NEW_CONTEXT),
DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
DEFINE_BIT(BRW_NEW_PSP),
DEFINE_BIT(BRW_NEW_FENCE),
DEFINE_BIT(BRW_NEW_INDICES),
DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
DEFINE_BIT(BRW_NEW_VERTICES),
DEFINE_BIT(BRW_NEW_BATCH),
DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),
{0, 0, 0}
};
static struct dirty_bit_map cache_bits[] = {
DEFINE_BIT(CACHE_NEW_CC_VP),
DEFINE_BIT(CACHE_NEW_CC_UNIT),
DEFINE_BIT(CACHE_NEW_WM_PROG),
DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR),
DEFINE_BIT(CACHE_NEW_SAMPLER),
DEFINE_BIT(CACHE_NEW_WM_UNIT),
DEFINE_BIT(CACHE_NEW_SF_PROG),
DEFINE_BIT(CACHE_NEW_SF_VP),
DEFINE_BIT(CACHE_NEW_SF_UNIT),
DEFINE_BIT(CACHE_NEW_VS_UNIT),
DEFINE_BIT(CACHE_NEW_VS_PROG),
DEFINE_BIT(CACHE_NEW_GS_UNIT),
DEFINE_BIT(CACHE_NEW_GS_PROG),
DEFINE_BIT(CACHE_NEW_CLIP_VP),
DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
DEFINE_BIT(CACHE_NEW_CLIP_PROG),
DEFINE_BIT(CACHE_NEW_SURFACE),
DEFINE_BIT(CACHE_NEW_SURF_BIND),
{0, 0, 0}
};
static void
brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
{
int i;
for (i = 0; i < 32; i++) {
if (bit_map[i].bit == 0)
return;
if (bit_map[i].bit & bits)
bit_map[i].count++;
}
}
static void
brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
{
int i;
for (i = 0; i < 32; i++) {
if (bit_map[i].bit == 0)
return;
fprintf(stderr, "0x%08x: %12d (%s)\n",
bit_map[i].bit, bit_map[i].count, bit_map[i].name);
}
}
/***********************************************************************
* Emit all state:
*/
void brw_validate_state( struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
struct brw_state_flags *state = &brw->state.dirty;
GLuint i;
brw_clear_validated_bos(brw);
state->mesa |= brw->intel.NewGLState;
brw->intel.NewGLState = 0;
brw_add_validated_bo(brw, intel->batch->buf);
if (brw->emit_state_always) {
state->mesa |= ~0;
state->brw |= ~0;
state->cache |= ~0;
}
if (brw->fragment_program != ctx->FragmentProgram._Current) {
brw->fragment_program = ctx->FragmentProgram._Current;
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
}
if (brw->vertex_program != ctx->VertexProgram._Current) {
brw->vertex_program = ctx->VertexProgram._Current;
brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
}
if (state->mesa == 0 &&
state->cache == 0 &&
state->brw == 0)
return;
if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
brw_clear_batch_cache(brw);
brw->intel.Fallback = 0;
/* do prepare stage for all atoms */
for (i = 0; i < Elements(atoms); i++) {
const struct brw_tracked_state *atom = atoms[i];
if (brw->intel.Fallback)
break;
if (check_state(state, &atom->dirty)) {
if (atom->prepare) {
atom->prepare(brw);
}
}
}
/* Make sure that the textures which are referenced by the current
* brw fragment program are actually present/valid.
* If this fails, we can experience GPU lock-ups.
*/
{
const struct brw_fragment_program *fp;
fp = brw_fragment_program_const(brw->fragment_program);
if (fp) {
assert((fp->tex_units_used & ctx->Texture._EnabledUnits)
== fp->tex_units_used);
}
}
}
void brw_upload_state(struct brw_context *brw)
{
struct brw_state_flags *state = &brw->state.dirty;
int i;
static int dirty_count = 0;
brw_clear_validated_bos(brw);
if (INTEL_DEBUG) {
/* Debug version which enforces various sanity checks on the
* state flags which are generated and checked to help ensure
* state atoms are ordered correctly in the list.
*/
struct brw_state_flags examined, prev;
_mesa_memset(&examined, 0, sizeof(examined));
prev = *state;
for (i = 0; i < Elements(atoms); i++) {
const struct brw_tracked_state *atom = atoms[i];
struct brw_state_flags generated;
assert(atom->dirty.mesa ||
atom->dirty.brw ||
atom->dirty.cache);
if (brw->intel.Fallback)
break;
if (check_state(state, &atom->dirty)) {
if (atom->emit) {
atom->emit( brw );
}
}
accumulate_state(&examined, &atom->dirty);
/* generated = (prev ^ state)
* if (examined & generated)
* fail;
*/
xor_states(&generated, &prev, state);
assert(!check_state(&examined, &generated));
prev = *state;
}
}
else {
for (i = 0; i < Elements(atoms); i++) {
const struct brw_tracked_state *atom = atoms[i];
if (brw->intel.Fallback)
break;
if (check_state(state, &atom->dirty)) {
if (atom->emit) {
atom->emit( brw );
}
}
}
}
if (INTEL_DEBUG & DEBUG_STATE) {
brw_update_dirty_count(mesa_bits, state->mesa);
brw_update_dirty_count(brw_bits, state->brw);
brw_update_dirty_count(cache_bits, state->cache);
if (dirty_count++ % 1000 == 0) {
brw_print_dirty_count(mesa_bits, state->mesa);
brw_print_dirty_count(brw_bits, state->brw);
brw_print_dirty_count(cache_bits, state->cache);
fprintf(stderr, "\n");
}
}
if (!brw->intel.Fallback)
memset(state, 0, sizeof(*state));
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,59 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/mtypes.h"
#include "main/teximage.h"
#include "intel_context.h"
#include "intel_regions.h"
#include "intel_tex.h"
#include "brw_context.h"
/**
* Finalizes all textures, completing any rendering that needs to be done
* to prepare them.
*/
void brw_validate_textures( struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
int i;
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
if (texUnit->_ReallyEnabled) {
intel_finalize_mipmap_tree(intel, i);
}
}
}

View File

@ -0,0 +1,222 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
/* Code to layout images in a mipmap tree for i965.
*/
#include "intel_mipmap_tree.h"
#include "intel_tex_layout.h"
#include "intel_context.h"
#include "main/macros.h"
#include "intel_chipset.h"
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
GLboolean brw_miptree_layout(struct intel_context *intel,
struct intel_mipmap_tree *mt,
uint32_t tiling)
{
/* XXX: these vary depending on image format: */
/* GLint align_w = 4; */
switch (mt->target) {
case GL_TEXTURE_CUBE_MAP:
if (IS_IGDNG(intel->intelScreen->deviceID)) {
GLuint align_h = 2, align_w = 4;
GLuint level;
GLuint x = 0;
GLuint y = 0;
GLuint width = mt->width0;
GLuint height = mt->height0;
GLuint qpitch = 0;
GLuint y_pitch = 0;
mt->pitch = mt->width0;
intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
y_pitch = ALIGN(height, align_h);
if (mt->compressed) {
mt->pitch = ALIGN(mt->width0, align_w);
}
if (mt->first_level != mt->last_level) {
GLuint mip1_width;
if (mt->compressed) {
mip1_width = ALIGN(minify(mt->width0), align_w)
+ ALIGN(minify(minify(mt->width0)), align_w);
} else {
mip1_width = ALIGN(minify(mt->width0), align_w)
+ minify(minify(mt->width0));
}
if (mip1_width > mt->pitch) {
mt->pitch = mip1_width;
}
}
mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch);
if (mt->compressed) {
qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp;
mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6;
} else {
qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp;
mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
}
for (level = mt->first_level; level <= mt->last_level; level++) {
GLuint img_height;
GLuint nr_images = 6;
GLuint q = 0;
intel_miptree_set_level_info(mt, level, nr_images, x, y, width,
height, 1);
for (q = 0; q < nr_images; q++)
intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch);
if (mt->compressed)
img_height = MAX2(1, height/4);
else
img_height = ALIGN(height, align_h);
if (level == mt->first_level + 1) {
x += ALIGN(width, align_w);
}
else {
y += img_height;
}
width = minify(width);
height = minify(height);
}
break;
}
case GL_TEXTURE_3D: {
GLuint width = mt->width0;
GLuint height = mt->height0;
GLuint depth = mt->depth0;
GLuint pack_x_pitch, pack_x_nr;
GLuint pack_y_pitch;
GLuint level;
GLuint align_h = 2;
GLuint align_w = 4;
mt->total_height = 0;
intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
if (mt->compressed) {
mt->pitch = ALIGN(width, align_w);
pack_y_pitch = (height + 3) / 4;
} else {
mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
pack_y_pitch = ALIGN(mt->height0, align_h);
}
pack_x_pitch = width;
pack_x_nr = 1;
for (level = mt->first_level ; level <= mt->last_level ; level++) {
GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6;
GLint x = 0;
GLint y = 0;
GLint q, j;
intel_miptree_set_level_info(mt, level, nr_images,
0, mt->total_height,
width, height, depth);
for (q = 0; q < nr_images;) {
for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
intel_miptree_set_image_offset(mt, level, q, x, y);
x += pack_x_pitch;
}
x = 0;
y += pack_y_pitch;
}
mt->total_height += y;
width = minify(width);
height = minify(height);
depth = minify(depth);
if (mt->compressed) {
pack_y_pitch = (height + 3) / 4;
if (pack_x_pitch > ALIGN(width, align_w)) {
pack_x_pitch = ALIGN(width, align_w);
pack_x_nr <<= 1;
}
} else {
if (pack_x_pitch > 4) {
pack_x_pitch >>= 1;
pack_x_nr <<= 1;
assert(pack_x_pitch * pack_x_nr <= mt->pitch);
}
if (pack_y_pitch > 2) {
pack_y_pitch >>= 1;
pack_y_pitch = ALIGN(pack_y_pitch, align_h);
}
}
}
/* The 965's sampler lays cachelines out according to how accesses
* in the texture surfaces run, so they may be "vertical" through
* memory. As a result, the docs say in Surface Padding Requirements:
* Sampling Engine Surfaces that two extra rows of padding are required.
* We don't know of similar requirements for pre-965, but given that
* those docs are silent on padding requirements in general, let's play
* it safe.
*/
if (mt->target == GL_TEXTURE_CUBE_MAP)
mt->total_height += 2;
break;
}
default:
i945_miptree_layout_2d(intel, mt, tiling);
break;
}
DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
mt->pitch,
mt->total_height,
mt->cpp,
mt->pitch * mt->total_height * mt->cpp );
return GL_TRUE;
}

View File

@ -0,0 +1,250 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "intel_batchbuffer.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#define VS 0
#define GS 1
#define CLP 2
#define SF 3
#define CS 4
/** @file brw_urb.c
*
* Manages the division of the URB space between the various fixed-function
* units.
*
* See the Thread Initiation Management section of the GEN4 B-Spec, and
* the individual *_STATE structures for restrictions on numbers of
* entries and threads.
*/
/*
* Generally, a unit requires a min_nr_entries based on how many entries
* it produces before the downstream unit gets unblocked and can use and
* dereference some of its handles.
*
* The SF unit preallocates a PUE at the start of thread dispatch, and only
* uses that one. So it requires one entry per thread.
*
* For CLIP, the SF unit will hold the previous primitive while the
* next is getting assembled, meaning that linestrips require 3 CLIP VUEs
* (vertices) to ensure continued processing, trifans require 4, and tristrips
* require 5. There can be 1 or 2 threads, and each has the same requirement.
*
* GS has the same requirement as CLIP, but it never handles tristrips,
* so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
* We only run it single-threaded.
*
* For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
* Each thread processes 2 preallocated VUEs (vertices) at a time, and they
* get streamed down as soon as threads processing earlier vertices get
* theirs accepted.
*
* Each unit will take the number of URB entries we give it (based on the
* entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
* and brw_curbe.c for the CURBEs) and decide its maximum number of
* threads it can support based on that. in brw_*_state.c.
*
* XXX: Are the min_entry_size numbers useful?
* XXX: Verify min_nr_entries, esp for VS.
* XXX: Verify SF min_entry_size.
*/
static const struct {
GLuint min_nr_entries;
GLuint preferred_nr_entries;
GLuint min_entry_size;
GLuint max_entry_size;
} limits[CS+1] = {
{ 16, 32, 1, 5 }, /* vs */
{ 4, 8, 1, 5 }, /* gs */
{ 5, 10, 1, 5 }, /* clp */
{ 1, 8, 1, 12 }, /* sf */
{ 1, 4, 1, 32 } /* cs */
};
static GLboolean check_urb_layout( struct brw_context *brw )
{
brw->urb.vs_start = 0;
brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw);
}
/* Most minimal update, forces re-emit of URB fence packet after GS
* unit turned on/off.
*/
static void recalculate_urb_fence( struct brw_context *brw )
{
GLuint csize = brw->curbe.total_size;
GLuint vsize = brw->vs.prog_data->urb_entry_size;
GLuint sfsize = brw->sf.prog_data->urb_entry_size;
if (csize < limits[CS].min_entry_size)
csize = limits[CS].min_entry_size;
if (vsize < limits[VS].min_entry_size)
vsize = limits[VS].min_entry_size;
if (sfsize < limits[SF].min_entry_size)
sfsize = limits[SF].min_entry_size;
if (brw->urb.vsize < vsize ||
brw->urb.sfsize < sfsize ||
brw->urb.csize < csize ||
(brw->urb.constrained && (brw->urb.vsize > vsize ||
brw->urb.sfsize > sfsize ||
brw->urb.csize > csize))) {
brw->urb.csize = csize;
brw->urb.sfsize = sfsize;
brw->urb.vsize = vsize;
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;
brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
brw->urb.constrained = 0;
if (BRW_IS_IGDNG(brw)) {
brw->urb.nr_vs_entries = 128;
brw->urb.nr_sf_entries = 48;
if (check_urb_layout(brw)) {
goto done;
} else {
brw->urb.constrained = 1;
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
}
} else if (BRW_IS_G4X(brw)) {
brw->urb.nr_vs_entries = 64;
if (check_urb_layout(brw)) {
goto done;
} else {
brw->urb.constrained = 1;
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
}
}
if (!check_urb_layout(brw)) {
brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
/* Mark us as operating with constrained nr_entries, so that next
* time we recalculate we'll resize the fences in the hope of
* escaping constrained mode and getting back to normal performance.
*/
brw->urb.constrained = 1;
if (!check_urb_layout(brw)) {
/* This is impossible, given the maximal sizes of urb
* entries and the values for minimum nr of entries
* provided above.
*/
_mesa_printf("couldn't calculate URB layout!\n");
exit(1);
}
if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
_mesa_printf("URB CONSTRAINED\n");
}
done:
if (INTEL_DEBUG & DEBUG_URB)
_mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
brw->urb.vs_start,
brw->urb.gs_start,
brw->urb.clip_start,
brw->urb.sf_start,
brw->urb.cs_start,
URB_SIZES(brw));
brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
}
}
const struct brw_tracked_state brw_recalculate_urb_fence = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_CURBE_OFFSETS,
.cache = (CACHE_NEW_VS_PROG |
CACHE_NEW_SF_PROG)
},
.prepare = recalculate_urb_fence
};
void brw_upload_urb_fence(struct brw_context *brw)
{
struct brw_urb_fence uf;
memset(&uf, 0, sizeof(uf));
uf.header.opcode = CMD_URB_FENCE;
uf.header.length = sizeof(uf)/4-2;
uf.header.vs_realloc = 1;
uf.header.gs_realloc = 1;
uf.header.clp_realloc = 1;
uf.header.sf_realloc = 1;
uf.header.vfe_realloc = 1;
uf.header.cs_realloc = 1;
/* The ordering below is correct, not the layout in the
* instruction.
*
* There are 256/384 urb reg pairs in total.
*/
uf.bits0.vs_fence = brw->urb.gs_start;
uf.bits0.gs_fence = brw->urb.clip_start;
uf.bits0.clp_fence = brw->urb.sf_start;
uf.bits1.sf_fence = brw->urb.cs_start;
uf.bits1.cs_fence = URB_SIZES(brw);
BRW_BATCH_STRUCT(brw, &uf);
}

View File

@ -0,0 +1,104 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/mtypes.h"
#include "shader/prog_parameter.h"
#include "brw_util.h"
#include "brw_defines.h"
GLuint brw_count_bits( GLuint val )
{
GLuint i;
for (i = 0; val ; val >>= 1)
if (val & 1)
i++;
return i;
}
GLuint brw_translate_blend_equation( GLenum mode )
{
switch (mode) {
case GL_FUNC_ADD:
return BRW_BLENDFUNCTION_ADD;
case GL_MIN:
return BRW_BLENDFUNCTION_MIN;
case GL_MAX:
return BRW_BLENDFUNCTION_MAX;
case GL_FUNC_SUBTRACT:
return BRW_BLENDFUNCTION_SUBTRACT;
case GL_FUNC_REVERSE_SUBTRACT:
return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
default:
assert(0);
return BRW_BLENDFUNCTION_ADD;
}
}
GLuint brw_translate_blend_factor( GLenum factor )
{
switch(factor) {
case GL_ZERO:
return BRW_BLENDFACTOR_ZERO;
case GL_SRC_ALPHA:
return BRW_BLENDFACTOR_SRC_ALPHA;
case GL_ONE:
return BRW_BLENDFACTOR_ONE;
case GL_SRC_COLOR:
return BRW_BLENDFACTOR_SRC_COLOR;
case GL_ONE_MINUS_SRC_COLOR:
return BRW_BLENDFACTOR_INV_SRC_COLOR;
case GL_DST_COLOR:
return BRW_BLENDFACTOR_DST_COLOR;
case GL_ONE_MINUS_DST_COLOR:
return BRW_BLENDFACTOR_INV_DST_COLOR;
case GL_ONE_MINUS_SRC_ALPHA:
return BRW_BLENDFACTOR_INV_SRC_ALPHA;
case GL_DST_ALPHA:
return BRW_BLENDFACTOR_DST_ALPHA;
case GL_ONE_MINUS_DST_ALPHA:
return BRW_BLENDFACTOR_INV_DST_ALPHA;
case GL_SRC_ALPHA_SATURATE:
return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
case GL_CONSTANT_COLOR:
return BRW_BLENDFACTOR_CONST_COLOR;
case GL_ONE_MINUS_CONSTANT_COLOR:
return BRW_BLENDFACTOR_INV_CONST_COLOR;
case GL_CONSTANT_ALPHA:
return BRW_BLENDFACTOR_CONST_ALPHA;
case GL_ONE_MINUS_CONSTANT_ALPHA:
return BRW_BLENDFACTOR_INV_CONST_ALPHA;
default:
assert(0);
return BRW_BLENDFACTOR_ZERO;
}
}

View File

@ -0,0 +1,45 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef BRW_UTIL_H
#define BRW_UTIL_H
#include "main/mtypes.h"
extern GLuint brw_count_bits( GLuint val );
extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList);
extern GLuint brw_translate_blend_factor( GLenum factor );
extern GLuint brw_translate_blend_equation( GLenum mode );
#endif

View File

@ -0,0 +1,124 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_vs.h"
#include "brw_util.h"
#include "brw_state.h"
#include "shader/prog_print.h"
static void do_vs_prog( struct brw_context *brw,
struct brw_vertex_program *vp,
struct brw_vs_prog_key *key )
{
GLuint program_size;
const GLuint *program;
struct brw_vs_compile c;
memset(&c, 0, sizeof(c));
memcpy(&c.key, key, sizeof(*key));
brw_init_compile(brw, &c.func);
c.vp = vp;
c.prog_data.outputs_written = vp->program.Base.OutputsWritten;
c.prog_data.inputs_read = vp->program.Base.InputsRead;
if (c.key.copy_edgeflag) {
c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE;
c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
}
if (0)
_mesa_print_program(&c.vp->program.Base);
/* Emit GEN4 code.
*/
brw_vs_emit(&c);
/* get the program
*/
program = brw_get_program(&c.func, &program_size);
dri_bo_unreference(brw->vs.prog_bo);
brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG,
&c.key, sizeof(c.key),
NULL, 0,
program, program_size,
&c.prog_data,
&brw->vs.prog_data );
}
static void brw_upload_vs_prog(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_vs_prog_key key;
struct brw_vertex_program *vp =
(struct brw_vertex_program *)brw->vertex_program;
memset(&key, 0, sizeof(key));
/* Just upload the program verbatim for now. Always send it all
* the inputs it asks for, whether they are varying or not.
*/
key.program_string_id = vp->id;
key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
ctx->Polygon.BackMode != GL_FILL);
/* Make an early check for the key.
*/
dri_bo_unreference(brw->vs.prog_bo);
brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG,
&key, sizeof(key),
NULL, 0,
&brw->vs.prog_data);
if (brw->vs.prog_bo == NULL)
do_vs_prog(brw, vp, &key);
}
/* See brw_vs.c:
*/
const struct brw_tracked_state brw_vs_prog = {
.dirty = {
.mesa = _NEW_TRANSFORM | _NEW_POLYGON,
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = 0
},
.prepare = brw_upload_vs_prog
};

View File

@ -0,0 +1,88 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef BRW_VS_H
#define BRW_VS_H
#include "brw_context.h"
#include "brw_eu.h"
#include "shader/program.h"
struct brw_vs_prog_key {
GLuint program_string_id;
GLuint nr_userclip:4;
GLuint copy_edgeflag:1;
GLuint pad:26;
};
struct brw_vs_compile {
struct brw_compile func;
struct brw_vs_prog_key key;
struct brw_vs_prog_data prog_data;
struct brw_vertex_program *vp;
GLuint nr_inputs;
GLuint first_output;
GLuint nr_outputs;
GLuint first_overflow_output; /**< VERT_ATTRIB_x */
GLuint first_tmp;
GLuint last_tmp;
struct brw_reg r0;
struct brw_reg r1;
struct brw_reg regs[PROGRAM_ADDRESS+1][128];
struct brw_reg tmp;
struct brw_reg stack;
struct {
GLboolean used_in_src;
struct brw_reg reg;
} output_regs[128];
struct brw_reg userplane[6];
/** we may need up to 3 constants per instruction (if use_const_buffer) */
struct {
GLint index;
struct brw_reg reg;
} current_const[3];
};
void brw_vs_emit( struct brw_vs_compile *c );
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,185 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "main/macros.h"
struct brw_vs_unit_key {
unsigned int total_grf;
unsigned int urb_entry_read_length;
unsigned int curb_entry_read_length;
unsigned int curbe_offset;
unsigned int nr_urb_entries, urb_size;
unsigned int nr_surfaces;
};
static void
vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
{
GLcontext *ctx = &brw->intel.ctx;
memset(key, 0, sizeof(*key));
/* CACHE_NEW_VS_PROG */
key->total_grf = brw->vs.prog_data->total_grf;
key->urb_entry_read_length = brw->vs.prog_data->urb_read_length;
key->curb_entry_read_length = brw->vs.prog_data->curb_read_length;
/* BRW_NEW_URB_FENCE */
key->nr_urb_entries = brw->urb.nr_vs_entries;
key->urb_size = brw->urb.vsize;
/* BRW_NEW_NR_VS_SURFACES */
key->nr_surfaces = brw->vs.nr_surfaces;
/* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
if (ctx->Transform.ClipPlanesEnabled) {
/* Note that we read in the userclip planes as well, hence
* clip_start:
*/
key->curbe_offset = brw->curbe.clip_start;
}
else {
key->curbe_offset = brw->curbe.vs_start;
}
}
static dri_bo *
vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
{
struct brw_vs_unit_state vs;
dri_bo *bo;
int chipset_max_threads;
memset(&vs, 0, sizeof(vs));
vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
/* Choosing multiple program flow means that we may get 2-vertex threads,
* which will have the channel mask for dwords 4-7 enabled in the thread,
* and those dwords will be written to the second URB handle when we
* brw_urb_WRITE() results.
*/
vs.thread1.single_program_flow = 0;
if (BRW_IS_IGDNG(brw))
vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
else
vs.thread1.binding_table_entry_count = key->nr_surfaces;
vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
vs.thread3.dispatch_grf_start_reg = 1;
vs.thread3.urb_entry_read_offset = 0;
vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
if (BRW_IS_IGDNG(brw))
vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
else
vs.thread4.nr_urb_entries = key->nr_urb_entries;
vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
if (BRW_IS_IGDNG(brw))
chipset_max_threads = 72;
else if (BRW_IS_G4X(brw))
chipset_max_threads = 32;
else
chipset_max_threads = 16;
vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
1, chipset_max_threads) - 1;
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
vs.thread4.max_threads = 0;
/* No samplers for ARB_vp programs:
*/
/* It has to be set to 0 for IGDNG
*/
vs.vs5.sampler_count = 0;
if (INTEL_DEBUG & DEBUG_STATS)
vs.thread4.stats_enable = 1;
/* Vertex program always enabled:
*/
vs.vs6.vs_enable = 1;
bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
key, sizeof(*key),
&brw->vs.prog_bo, 1,
&vs, sizeof(vs),
NULL, NULL);
/* Emit VS program relocation */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
vs.thread0.grf_reg_count << 1,
offsetof(struct brw_vs_unit_state, thread0),
brw->vs.prog_bo);
return bo;
}
static void prepare_vs_unit(struct brw_context *brw)
{
struct brw_vs_unit_key key;
vs_unit_populate_key(brw, &key);
dri_bo_unreference(brw->vs.state_bo);
brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT,
&key, sizeof(key),
&brw->vs.prog_bo, 1,
NULL);
if (brw->vs.state_bo == NULL) {
brw->vs.state_bo = vs_unit_create_from_key(brw, &key);
}
}
const struct brw_tracked_state brw_vs_unit = {
.dirty = {
.mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_VS_PROG
},
.prepare = prepare_vs_unit,
};

View File

@ -0,0 +1,226 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/mtypes.h"
#include "main/texformat.h"
#include "main/texstore.h"
#include "shader/prog_parameter.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
/* Creates a new VS constant buffer reflecting the current VS program's
* constants, if needed by the VS program.
*
* Otherwise, constants go through the CURBEs using the brw_constant_buffer
* state atom.
*/
static drm_intel_bo *
brw_vs_update_constant_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct brw_vertex_program *vp =
(struct brw_vertex_program *) brw->vertex_program;
const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
const int size = params->NumParameters * 4 * sizeof(GLfloat);
drm_intel_bo *const_buffer;
/* BRW_NEW_VERTEX_PROGRAM */
if (!vp->use_const_buffer)
return NULL;
const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
size, 64);
/* _NEW_PROGRAM_CONSTANTS */
dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
return const_buffer;
}
/**
* Update the surface state for a VS constant buffer.
*
* Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer.
*/
static void
brw_update_vs_constant_surface( GLcontext *ctx,
GLuint surf)
{
struct brw_context *brw = brw_context(ctx);
struct brw_surface_key key;
struct brw_vertex_program *vp =
(struct brw_vertex_program *) brw->vertex_program;
const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
assert(surf == 0);
/* If we're in this state update atom, we need to update VS constants, so
* free the old buffer and create a new one for the new contents.
*/
dri_bo_unreference(vp->const_buffer);
vp->const_buffer = brw_vs_update_constant_buffer(brw);
/* If there's no constant buffer, then no surface BO is needed to point at
* it.
*/
if (vp->const_buffer == 0) {
drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
brw->vs.surf_bo[surf] = NULL;
return;
}
memset(&key, 0, sizeof(key));
key.format = MESA_FORMAT_RGBA_FLOAT32;
key.internal_format = GL_RGBA;
key.bo = vp->const_buffer;
key.depthmode = GL_NONE;
key.pitch = params->NumParameters;
key.width = params->NumParameters;
key.height = 1;
key.depth = 1;
key.cpp = 16;
/*
printf("%s:\n", __FUNCTION__);
printf(" width %d height %d depth %d cpp %d pitch %d\n",
key.width, key.height, key.depth, key.cpp, key.pitch);
*/
drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
&key.bo, key.bo ? 1 : 0,
NULL);
if (brw->vs.surf_bo[surf] == NULL) {
brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
}
}
/**
* Constructs the binding table for the VS surface state.
*/
static dri_bo *
brw_vs_get_binding_table(struct brw_context *brw)
{
dri_bo *bind_bo;
bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->vs.surf_bo, BRW_VS_MAX_SURF,
NULL);
if (bind_bo == NULL) {
GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint);
uint32_t *data = malloc(data_size);
int i;
for (i = 0; i < BRW_VS_MAX_SURF; i++)
if (brw->vs.surf_bo[i])
data[i] = brw->vs.surf_bo[i]->offset;
else
data[i] = 0;
bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->vs.surf_bo, BRW_VS_MAX_SURF,
data, data_size,
NULL, NULL);
/* Emit binding table relocations to surface state */
for (i = 0; i < BRW_VS_MAX_SURF; i++) {
if (brw->vs.surf_bo[i] != NULL) {
/* The presumed offsets were set in the data values for
* brw_upload_cache.
*/
drm_intel_bo_emit_reloc(bind_bo, i * 4,
brw->vs.surf_bo[i], 0,
I915_GEM_DOMAIN_INSTRUCTION, 0);
}
}
free(data);
}
return bind_bo;
}
/**
* Vertex shader surfaces (constant buffer).
*
* This consumes the state updates for the constant buffer needing
* to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
* CACHE_NEW_SURF_BIND for the binding table upload.
*/
static void prepare_vs_surfaces(struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
int i;
int nr_surfaces = 0;
brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
for (i = 0; i < BRW_VS_MAX_SURF; i++) {
if (brw->vs.surf_bo[i] != NULL) {
nr_surfaces = i + 1;
}
}
if (brw->vs.nr_surfaces != nr_surfaces) {
brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
brw->vs.nr_surfaces = nr_surfaces;
}
/* Note that we don't end up updating the bind_bo if we don't have a
* surface to be pointing at. This should be relatively harmless, as it
* just slightly increases our working set size.
*/
if (brw->vs.nr_surfaces != 0) {
dri_bo_unreference(brw->vs.bind_bo);
brw->vs.bind_bo = brw_vs_get_binding_table(brw);
}
}
const struct brw_tracked_state brw_vs_surfaces = {
.dirty = {
.mesa = (_NEW_PROGRAM_CONSTANTS),
.brw = (BRW_NEW_VERTEX_PROGRAM),
.cache = 0
},
.prepare = prepare_vs_surfaces,
};

View File

@ -0,0 +1,375 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/texformat.h"
#include "brw_context.h"
#include "brw_util.h"
#include "brw_wm.h"
#include "brw_state.h"
/** Return number of src args for given instruction */
GLuint brw_wm_nr_args( GLuint opcode )
{
switch (opcode) {
case WM_FRONTFACING:
case WM_PIXELXY:
return 0;
case WM_CINTERP:
case WM_WPOSXY:
case WM_DELTAXY:
return 1;
case WM_LINTERP:
case WM_PIXELW:
return 2;
case WM_FB_WRITE:
case WM_PINTERP:
return 3;
default:
assert(opcode < MAX_OPCODE);
return _mesa_num_inst_src_regs(opcode);
}
}
GLuint brw_wm_is_scalar_result( GLuint opcode )
{
switch (opcode) {
case OPCODE_COS:
case OPCODE_EX2:
case OPCODE_LG2:
case OPCODE_POW:
case OPCODE_RCP:
case OPCODE_RSQ:
case OPCODE_SIN:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_DPH:
case OPCODE_DST:
return 1;
default:
return 0;
}
}
/**
* Do GPU code generation for non-GLSL shader. non-GLSL shaders have
* no flow control instructions so we can more readily do SSA-style
* optimizations.
*/
static void
brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
{
/* Augment fragment program. Add instructions for pre- and
* post-fragment-program tasks such as interpolation and fogging.
*/
brw_wm_pass_fp(c);
/* Translate to intermediate representation. Build register usage
* chains.
*/
brw_wm_pass0(c);
/* Dead code removal.
*/
brw_wm_pass1(c);
/* Register allocation.
* Divide by two because we operate on 16 pixels at a time and require
* two GRF entries for each logical shader register.
*/
c->grf_limit = BRW_WM_MAX_GRF / 2;
brw_wm_pass2(c);
/* how many general-purpose registers are used */
c->prog_data.total_grf = c->max_wm_grf;
/* Scratch space is used for register spilling */
if (c->last_scratch) {
c->prog_data.total_scratch = c->last_scratch + 0x40;
}
else {
c->prog_data.total_scratch = 0;
}
/* Emit GEN4 code.
*/
brw_wm_emit(c);
}
/**
* All Mesa program -> GPU code generation goes through this function.
* Depending on the instructions used (i.e. flow control instructions)
* we'll use one of two code generators.
*/
static void do_wm_prog( struct brw_context *brw,
struct brw_fragment_program *fp,
struct brw_wm_prog_key *key)
{
struct brw_wm_compile *c;
const GLuint *program;
GLuint program_size;
c = brw->wm.compile_data;
if (c == NULL) {
brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
c = brw->wm.compile_data;
if (c == NULL) {
/* Ouch - big out of memory problem. Can't continue
* without triggering a segfault, no way to signal,
* so just return.
*/
return;
}
} else {
memset(c, 0, sizeof(*brw->wm.compile_data));
}
memcpy(&c->key, key, sizeof(*key));
c->fp = fp;
c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
brw_init_compile(brw, &c->func);
/* temporary sanity check assertion */
ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
/*
* Shader which use GLSL features such as flow control are handled
* differently from "simple" shaders.
*/
if (fp->isGLSL) {
c->dispatch_width = 8;
brw_wm_glsl_emit(brw, c);
}
else {
c->dispatch_width = 16;
brw_wm_non_glsl_emit(brw, c);
}
if (INTEL_DEBUG & DEBUG_WM)
fprintf(stderr, "\n");
/* get the program
*/
program = brw_get_program(&c->func, &program_size);
dri_bo_unreference(brw->wm.prog_bo);
brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG,
&c->key, sizeof(c->key),
NULL, 0,
program, program_size,
&c->prog_data,
&brw->wm.prog_data );
}
static void brw_wm_populate_key( struct brw_context *brw,
struct brw_wm_prog_key *key )
{
GLcontext *ctx = &brw->intel.ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct brw_fragment_program *fp =
(struct brw_fragment_program *)brw->fragment_program;
GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
GLuint lookup = 0;
GLuint line_aa;
GLuint i;
memset(key, 0, sizeof(*key));
/* Build the index for table lookup
*/
/* _NEW_COLOR */
if (fp->program.UsesKill ||
ctx->Color.AlphaEnabled)
lookup |= IZ_PS_KILL_ALPHATEST_BIT;
if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH))
lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
/* _NEW_DEPTH */
if (ctx->Depth.Test)
lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
if (ctx->Depth.Test &&
ctx->Depth.Mask) /* ?? */
lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
/* _NEW_STENCIL */
if (ctx->Stencil._Enabled) {
lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
if (ctx->Stencil.WriteMask[0] ||
ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
}
line_aa = AA_NEVER;
/* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
if (ctx->Line.SmoothFlag) {
if (brw->intel.reduced_primitive == GL_LINES) {
line_aa = AA_ALWAYS;
}
else if (brw->intel.reduced_primitive == GL_TRIANGLES) {
if (ctx->Polygon.FrontMode == GL_LINE) {
line_aa = AA_SOMETIMES;
if (ctx->Polygon.BackMode == GL_LINE ||
(ctx->Polygon.CullFlag &&
ctx->Polygon.CullFaceMode == GL_BACK))
line_aa = AA_ALWAYS;
}
else if (ctx->Polygon.BackMode == GL_LINE) {
line_aa = AA_SOMETIMES;
if ((ctx->Polygon.CullFlag &&
ctx->Polygon.CullFaceMode == GL_FRONT))
line_aa = AA_ALWAYS;
}
}
}
brw_wm_lookup_iz(line_aa,
lookup,
uses_depth,
key);
/* BRW_NEW_WM_INPUT_DIMENSIONS */
key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
/* _NEW_LIGHT */
key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
/* _NEW_HINT */
key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
/* _NEW_TEXTURE */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
if (unit->_ReallyEnabled) {
const struct gl_texture_object *t = unit->_Current;
const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
if (img->InternalFormat == GL_YCBCR_MESA) {
key->yuvtex_mask |= 1 << i;
if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR)
key->yuvtex_swap_mask |= 1 << i;
}
key->tex_swizzles[i] = t->_Swizzle;
}
else {
key->tex_swizzles[i] = SWIZZLE_NOOP;
}
}
/* Shadow */
key->shadowtex_mask = fp->program.Base.ShadowSamplers;
/* _NEW_BUFFERS */
/*
* Include the draw buffer origin and height so that we can calculate
* fragment position values relative to the bottom left of the drawable,
* from the incoming screen origin relative position we get as part of our
* payload.
*
* We could avoid recompiling by including this as a constant referenced by
* our program, but if we were to do that it would also be nice to handle
* getting that constant updated at batchbuffer submit time (when we
* hold the lock and know where the buffer really is) rather than at emit
* time when we don't hold the lock and are just guessing. We could also
* just avoid using this as key data if the program doesn't use
* fragment.position.
*
* This pretty much becomes moot with DRI2 and redirected buffers anyway,
* as our origins will always be zero then.
*/
if (brw->intel.driDrawable != NULL) {
key->origin_x = brw->intel.driDrawable->x;
key->origin_y = brw->intel.driDrawable->y;
key->drawable_height = brw->intel.driDrawable->h;
}
/* CACHE_NEW_VS_PROG */
key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS;
/* The unique fragment program ID */
key->program_string_id = fp->id;
}
static void brw_prepare_wm_prog(struct brw_context *brw)
{
struct brw_wm_prog_key key;
struct brw_fragment_program *fp = (struct brw_fragment_program *)
brw->fragment_program;
brw_wm_populate_key(brw, &key);
/* Make an early check for the key.
*/
dri_bo_unreference(brw->wm.prog_bo);
brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
&key, sizeof(key),
NULL, 0,
&brw->wm.prog_data);
if (brw->wm.prog_bo == NULL)
do_wm_prog(brw, fp, &key);
}
const struct brw_tracked_state brw_wm_prog = {
.dirty = {
.mesa = (_NEW_COLOR |
_NEW_DEPTH |
_NEW_HINT |
_NEW_STENCIL |
_NEW_POLYGON |
_NEW_LINE |
_NEW_LIGHT |
_NEW_BUFFERS |
_NEW_TEXTURE),
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_WM_INPUT_DIMENSIONS |
BRW_NEW_REDUCED_PRIMITIVE),
.cache = CACHE_NEW_VS_PROG,
},
.prepare = brw_prepare_wm_prog
};

View File

@ -0,0 +1,309 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef BRW_WM_H
#define BRW_WM_H
#include "shader/prog_instruction.h"
#include "brw_context.h"
#include "brw_eu.h"
#define SATURATE (1<<5)
/* A big lookup table is used to figure out which and how many
* additional regs will inserted before the main payload in the WM
* program execution. These mainly relate to depth and stencil
* processing and the early-depth-test optimization.
*/
#define IZ_PS_KILL_ALPHATEST_BIT 0x1
#define IZ_PS_COMPUTES_DEPTH_BIT 0x2
#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4
#define IZ_DEPTH_TEST_ENABLE_BIT 0x8
#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10
#define IZ_STENCIL_TEST_ENABLE_BIT 0x20
#define IZ_BIT_MAX 0x40
#define AA_NEVER 0
#define AA_SOMETIMES 1
#define AA_ALWAYS 2
struct brw_wm_prog_key {
GLuint source_depth_reg:3;
GLuint aa_dest_stencil_reg:3;
GLuint dest_depth_reg:3;
GLuint nr_depth_regs:3;
GLuint computes_depth:1; /* could be derived from program string */
GLuint source_depth_to_render_target:1;
GLuint flat_shade:1;
GLuint linear_color:1; /**< linear interpolation vs perspective interp */
GLuint runtime_check_aads_emit:1;
GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
GLuint shadowtex_mask:16;
GLuint yuvtex_mask:16;
GLuint yuvtex_swap_mask:16; /* UV swaped */
GLuint tex_swizzles[BRW_MAX_TEX_UNIT];
GLuint program_string_id:32;
GLuint origin_x, origin_y;
GLuint drawable_height;
GLuint vp_outputs_written;
};
/* A bit of a glossary:
*
* brw_wm_value: A computed value or program input. Values are
* constant, they are created once and are never modified. When a
* fragment program register is written or overwritten, new values are
* created fresh, preserving the rule that values are constant.
*
* brw_wm_ref: A reference to a value. Wherever a value used is by an
* instruction or as a program output, that is tracked with an
* instance of this struct. All references to a value occur after it
* is created. After the last reference, a value is dead and can be
* discarded.
*
* brw_wm_grf: Represents a physical hardware register. May be either
* empty or hold a value. Register allocation is the process of
* assigning values to grf registers. This occurs in pass2 and the
* brw_wm_grf struct is not used before that.
*
* Fragment program registers: These are time-varying constructs that
* are hard to reason about and which we translate away in pass0. A
* single fragment program register element (eg. temp[0].x) will be
* translated to one or more brw_wm_value structs, one for each time
* that temp[0].x is written to during the program.
*/
/* Used in pass2 to track register allocation.
*/
struct brw_wm_grf {
struct brw_wm_value *value;
GLuint nextuse;
};
struct brw_wm_value {
struct brw_reg hw_reg; /* emitted to this reg, may not always be there */
struct brw_wm_ref *lastuse;
struct brw_wm_grf *resident;
GLuint contributes_to_output:1;
GLuint spill_slot:16; /* if non-zero, spill immediately after calculation */
};
struct brw_wm_ref {
struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */
struct brw_wm_value *value;
struct brw_wm_ref *prevuse;
GLuint unspill_reg:7; /* unspill to reg */
GLuint emitted:1;
GLuint insn:24;
};
struct brw_wm_constref {
const struct brw_wm_ref *ref;
GLfloat constval;
};
struct brw_wm_instruction {
struct brw_wm_value *dst[4];
struct brw_wm_ref *src[3][4];
GLuint opcode:8;
GLuint saturate:1;
GLuint writemask:4;
GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */
GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
GLuint tex_shadow:1; /* do shadow comparison? */
GLuint eot:1; /* End of thread indicator for FB_WRITE*/
GLuint target:10; /* target binding table index for FB_WRITE*/
};
#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3)
#define BRW_WM_MAX_GRF 128 /* hardware limit */
#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4)
#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12)
#define BRW_WM_MAX_PARAM 256
#define BRW_WM_MAX_CONST 256
#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
#define BRW_WM_MAX_SUBROUTINE 16
/* New opcodes to track internal operations required for WM unit.
* These are added early so that the registers used can be tracked,
* freed and reused like those of other instructions.
*/
#define WM_PIXELXY (MAX_OPCODE)
#define WM_DELTAXY (MAX_OPCODE + 1)
#define WM_PIXELW (MAX_OPCODE + 2)
#define WM_LINTERP (MAX_OPCODE + 3)
#define WM_PINTERP (MAX_OPCODE + 4)
#define WM_CINTERP (MAX_OPCODE + 5)
#define WM_WPOSXY (MAX_OPCODE + 6)
#define WM_FB_WRITE (MAX_OPCODE + 7)
#define WM_FRONTFACING (MAX_OPCODE + 8)
#define MAX_WM_OPCODE (MAX_OPCODE + 9)
#define PROGRAM_PAYLOAD (PROGRAM_FILE_MAX)
#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX)
struct brw_wm_compile {
struct brw_compile func;
struct brw_wm_prog_key key;
struct brw_wm_prog_data prog_data;
struct brw_fragment_program *fp;
GLfloat (*env_param)[4];
enum {
START,
PASS2_DONE
} state;
/* Initial pass - translate fp instructions to fp instructions,
* simplifying and adding instructions for interpolation and
* framebuffer writes.
*/
struct prog_instruction prog_instructions[BRW_WM_MAX_INSN];
GLuint nr_fp_insns;
GLuint fp_temp;
GLuint fp_interp_emitted;
GLuint fp_fragcolor_emitted;
struct prog_src_register pixel_xy;
struct prog_src_register delta_xy;
struct prog_src_register pixel_w;
struct brw_wm_value vreg[BRW_WM_MAX_VREG];
GLuint nr_vreg;
struct brw_wm_value creg[BRW_WM_MAX_PARAM];
GLuint nr_creg;
struct {
struct brw_wm_value depth[4]; /* includes r0/r1 */
struct brw_wm_value input_interp[FRAG_ATTRIB_MAX];
} payload;
const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4];
struct brw_wm_ref undef_ref;
struct brw_wm_value undef_value;
struct brw_wm_ref refs[BRW_WM_MAX_REF];
GLuint nr_refs;
struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
GLuint nr_insns;
struct brw_wm_constref constref[BRW_WM_MAX_CONST];
GLuint nr_constrefs;
struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
GLuint grf_limit;
GLuint max_wm_grf;
GLuint last_scratch;
GLuint cur_inst; /**< index of current instruction */
GLboolean out_of_regs; /**< ran out of GRF registers? */
/** Mapping from Mesa registers to hardware registers */
struct {
GLboolean inited;
struct brw_reg reg;
} wm_regs[PROGRAM_PAYLOAD+1][256][4];
GLboolean used_grf[BRW_WM_MAX_GRF];
GLuint first_free_grf;
struct brw_reg stack;
struct brw_reg emit_mask_reg;
GLuint tmp_regs[BRW_WM_MAX_GRF];
GLuint tmp_index;
GLuint tmp_max;
GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
GLuint dispatch_width;
/** we may need up to 3 constants per instruction (if use_const_buffer) */
struct {
GLint index;
struct brw_reg reg;
} current_const[3];
};
GLuint brw_wm_nr_args( GLuint opcode );
GLuint brw_wm_is_scalar_result( GLuint opcode );
void brw_wm_pass_fp( struct brw_wm_compile *c );
void brw_wm_pass0( struct brw_wm_compile *c );
void brw_wm_pass1( struct brw_wm_compile *c );
void brw_wm_pass2( struct brw_wm_compile *c );
void brw_wm_emit( struct brw_wm_compile *c );
void brw_wm_print_value( struct brw_wm_compile *c,
struct brw_wm_value *value );
void brw_wm_print_ref( struct brw_wm_compile *c,
struct brw_wm_ref *ref );
void brw_wm_print_insn( struct brw_wm_compile *c,
struct brw_wm_instruction *inst );
void brw_wm_print_program( struct brw_wm_compile *c,
const char *stage );
void brw_wm_lookup_iz( GLuint line_aa,
GLuint lookup,
GLboolean ps_uses_depth,
struct brw_wm_prog_key *key );
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
void emit_ddxy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
GLboolean is_ddx,
const struct brw_reg *arg0);
#endif

View File

@ -0,0 +1,174 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_wm.h"
void brw_wm_print_value( struct brw_wm_compile *c,
struct brw_wm_value *value )
{
assert(value);
if (c->state >= PASS2_DONE)
brw_print_reg(value->hw_reg);
else if( value == &c->undef_value )
_mesa_printf("undef");
else if( value - c->vreg >= 0 &&
value - c->vreg < BRW_WM_MAX_VREG)
_mesa_printf("r%d", value - c->vreg);
else if (value - c->creg >= 0 &&
value - c->creg < BRW_WM_MAX_PARAM)
_mesa_printf("c%d", value - c->creg);
else if (value - c->payload.input_interp >= 0 &&
value - c->payload.input_interp < FRAG_ATTRIB_MAX)
_mesa_printf("i%d", value - c->payload.input_interp);
else if (value - c->payload.depth >= 0 &&
value - c->payload.depth < FRAG_ATTRIB_MAX)
_mesa_printf("d%d", value - c->payload.depth);
else
_mesa_printf("?");
}
void brw_wm_print_ref( struct brw_wm_compile *c,
struct brw_wm_ref *ref )
{
struct brw_reg hw_reg = ref->hw_reg;
if (ref->unspill_reg)
_mesa_printf("UNSPILL(%x)/", ref->value->spill_slot);
if (c->state >= PASS2_DONE)
brw_print_reg(ref->hw_reg);
else {
_mesa_printf("%s", hw_reg.negate ? "-" : "");
_mesa_printf("%s", hw_reg.abs ? "abs/" : "");
brw_wm_print_value(c, ref->value);
if ((hw_reg.nr&1) || hw_reg.subnr) {
_mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
}
}
}
void brw_wm_print_insn( struct brw_wm_compile *c,
struct brw_wm_instruction *inst )
{
GLuint i, arg;
GLuint nr_args = brw_wm_nr_args(inst->opcode);
_mesa_printf("[");
for (i = 0; i < 4; i++) {
if (inst->dst[i]) {
brw_wm_print_value(c, inst->dst[i]);
if (inst->dst[i]->spill_slot)
_mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot);
}
else
_mesa_printf("#");
if (i < 3)
_mesa_printf(",");
}
_mesa_printf("]");
if (inst->writemask != WRITEMASK_XYZW)
_mesa_printf(".%s%s%s%s",
GET_BIT(inst->writemask, 0) ? "x" : "",
GET_BIT(inst->writemask, 1) ? "y" : "",
GET_BIT(inst->writemask, 2) ? "z" : "",
GET_BIT(inst->writemask, 3) ? "w" : "");
switch (inst->opcode) {
case WM_PIXELXY:
_mesa_printf(" = PIXELXY");
break;
case WM_DELTAXY:
_mesa_printf(" = DELTAXY");
break;
case WM_PIXELW:
_mesa_printf(" = PIXELW");
break;
case WM_WPOSXY:
_mesa_printf(" = WPOSXY");
break;
case WM_PINTERP:
_mesa_printf(" = PINTERP");
break;
case WM_LINTERP:
_mesa_printf(" = LINTERP");
break;
case WM_CINTERP:
_mesa_printf(" = CINTERP");
break;
case WM_FB_WRITE:
_mesa_printf(" = FB_WRITE");
break;
case WM_FRONTFACING:
_mesa_printf(" = FRONTFACING");
break;
default:
_mesa_printf(" = %s", _mesa_opcode_string(inst->opcode));
break;
}
if (inst->saturate)
_mesa_printf("_SAT");
for (arg = 0; arg < nr_args; arg++) {
_mesa_printf(" [");
for (i = 0; i < 4; i++) {
if (inst->src[arg][i]) {
brw_wm_print_ref(c, inst->src[arg][i]);
}
else
_mesa_printf("%%");
if (i < 3)
_mesa_printf(",");
else
_mesa_printf("]");
}
}
_mesa_printf("\n");
}
void brw_wm_print_program( struct brw_wm_compile *c,
const char *stage )
{
GLuint insn;
_mesa_printf("%s:\n", stage);
for (insn = 0; insn < c->nr_insns; insn++)
brw_wm_print_insn(c, &c->instruction[insn]);
_mesa_printf("\n");
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,157 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/mtypes.h"
#include "brw_wm.h"
#undef P /* prompted depth */
#undef C /* computed */
#undef N /* non-promoted? */
#define P 0
#define C 1
#define N 2
const struct {
GLuint mode:2;
GLuint sd_present:1;
GLuint sd_to_rt:1;
GLuint dd_present:1;
GLuint ds_present:1;
} wm_iz_table[IZ_BIT_MAX] =
{
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 0 },
{ N, 0, 1, 0, 0 },
{ N, 0, 1, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ C, 0, 1, 1, 0 },
{ C, 0, 1, 1, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 0 },
{ C, 0, 1, 1, 0 },
{ C, 0, 1, 1, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 0 },
{ N, 0, 1, 0, 0 },
{ N, 0, 1, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ C, 0, 1, 1, 0 },
{ C, 0, 1, 1, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 0 },
{ C, 0, 1, 1, 0 },
{ C, 0, 1, 1, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 1 },
{ N, 0, 1, 0, 1 },
{ N, 0, 1, 0, 1 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ C, 0, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 1 },
{ C, 0, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ P, 0, 0, 0, 0 },
{ C, 0, 0, 0, 1 },
{ P, 0, 0, 0, 0 },
{ C, 0, 1, 0, 1 },
{ P, 0, 0, 0, 0 },
{ C, 1, 1, 0, 1 },
{ C, 0, 1, 0, 1 },
{ C, 0, 1, 0, 1 },
{ P, 0, 0, 0, 0 },
{ C, 1, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ P, 0, 0, 0, 0 },
{ C, 1, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ C, 0, 1, 1, 1 }
};
/**
* \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES
* \param lookup bitmask of IZ_* flags
*/
void brw_wm_lookup_iz( GLuint line_aa,
GLuint lookup,
GLboolean ps_uses_depth,
struct brw_wm_prog_key *key )
{
GLuint reg = 2;
assert (lookup < IZ_BIT_MAX);
if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
key->computes_depth = 1;
if (wm_iz_table[lookup].sd_present || ps_uses_depth) {
key->source_depth_reg = reg;
reg += 2;
}
if (wm_iz_table[lookup].sd_to_rt)
key->source_depth_to_render_target = 1;
if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
key->aa_dest_stencil_reg = reg;
key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
line_aa == AA_SOMETIMES);
reg++;
}
if (wm_iz_table[lookup].dd_present) {
key->dest_depth_reg = reg;
reg+=2;
}
key->nr_depth_regs = (reg+1)/2;
}

View File

@ -0,0 +1,442 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_wm.h"
#include "shader/prog_parameter.h"
/***********************************************************************
*/
static struct brw_wm_ref *get_ref( struct brw_wm_compile *c )
{
assert(c->nr_refs < BRW_WM_MAX_REF);
return &c->refs[c->nr_refs++];
}
static struct brw_wm_value *get_value( struct brw_wm_compile *c)
{
assert(c->nr_refs < BRW_WM_MAX_VREG);
return &c->vreg[c->nr_vreg++];
}
/** return pointer to a newly allocated instruction */
static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
{
assert(c->nr_insns < BRW_WM_MAX_INSN);
return &c->instruction[c->nr_insns++];
}
/***********************************************************************
*/
/** Init the "undef" register */
static void pass0_init_undef( struct brw_wm_compile *c)
{
struct brw_wm_ref *ref = &c->undef_ref;
ref->value = &c->undef_value;
ref->hw_reg = brw_vec8_grf(0, 0);
ref->insn = 0;
ref->prevuse = NULL;
}
/** Set a FP register to a value */
static void pass0_set_fpreg_value( struct brw_wm_compile *c,
GLuint file,
GLuint idx,
GLuint component,
struct brw_wm_value *value )
{
struct brw_wm_ref *ref = get_ref(c);
ref->value = value;
ref->hw_reg = brw_vec8_grf(0, 0);
ref->insn = 0;
ref->prevuse = NULL;
c->pass0_fp_reg[file][idx][component] = ref;
}
/** Set a FP register to a ref */
static void pass0_set_fpreg_ref( struct brw_wm_compile *c,
GLuint file,
GLuint idx,
GLuint component,
const struct brw_wm_ref *src_ref )
{
c->pass0_fp_reg[file][idx][component] = src_ref;
}
static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
const GLfloat *param_ptr )
{
GLuint i = c->prog_data.nr_params++;
if (i >= BRW_WM_MAX_PARAM) {
_mesa_printf("%s: out of params\n", __FUNCTION__);
c->prog_data.error = 1;
return NULL;
}
else {
struct brw_wm_ref *ref = get_ref(c);
c->prog_data.param[i] = param_ptr;
c->nr_creg = (i+16)/16;
/* Push the offsets into hw_reg. These will be added to the
* real register numbers once one is allocated in pass2.
*/
ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8);
ref->value = &c->creg[i/16];
ref->insn = 0;
ref->prevuse = NULL;
return ref;
}
}
/** Return a ref to a constant/literal value */
static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
const GLfloat *constval )
{
GLuint i;
/* Search for an existing const value matching the request:
*/
for (i = 0; i < c->nr_constrefs; i++) {
if (c->constref[i].constval == *constval)
return c->constref[i].ref;
}
/* Else try to add a new one:
*/
if (c->nr_constrefs < BRW_WM_MAX_CONST) {
GLuint i = c->nr_constrefs++;
/* A constant is a special type of parameter:
*/
c->constref[i].constval = *constval;
c->constref[i].ref = get_param_ref(c, constval);
return c->constref[i].ref;
}
else {
_mesa_printf("%s: out of constrefs\n", __FUNCTION__);
c->prog_data.error = 1;
return NULL;
}
}
/* Lookup our internal registers
*/
static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
GLuint file,
GLuint idx,
GLuint component )
{
const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component];
if (!ref) {
switch (file) {
case PROGRAM_INPUT:
case PROGRAM_PAYLOAD:
case PROGRAM_TEMPORARY:
case PROGRAM_OUTPUT:
case PROGRAM_VARYING:
break;
case PROGRAM_LOCAL_PARAM:
ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]);
break;
case PROGRAM_ENV_PARAM:
ref = get_param_ref(c, &c->env_param[idx][component]);
break;
case PROGRAM_STATE_VAR:
case PROGRAM_UNIFORM:
case PROGRAM_CONSTANT:
case PROGRAM_NAMED_PARAM: {
struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
/* There's something really hokey about parameters parsed in
* arb programs - they all end up in here, whether they be
* state values, parameters or constants. This duplicates the
* structure above & also seems to subvert the limits set for
* each type of constant/param.
*/
switch (plist->Parameters[idx].Type) {
case PROGRAM_NAMED_PARAM:
case PROGRAM_CONSTANT:
/* These are invarient:
*/
ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
break;
case PROGRAM_STATE_VAR:
case PROGRAM_UNIFORM:
/* These may change from run to run:
*/
ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
break;
default:
assert(0);
break;
}
break;
}
default:
assert(0);
break;
}
c->pass0_fp_reg[file][idx][component] = ref;
}
if (!ref)
ref = &c->undef_ref;
return ref;
}
/***********************************************************************
* Straight translation to internal instruction format
*/
static void pass0_set_dst( struct brw_wm_compile *c,
struct brw_wm_instruction *out,
const struct prog_instruction *inst,
GLuint writemask )
{
const struct prog_dst_register *dst = &inst->DstReg;
GLuint i;
for (i = 0; i < 4; i++) {
if (writemask & (1<<i)) {
out->dst[i] = get_value(c);
pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]);
}
}
out->writemask = writemask;
}
static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
struct prog_src_register src,
GLuint i )
{
GLuint component = GET_SWZ(src.Swizzle,i);
const struct brw_wm_ref *src_ref;
static const GLfloat const_zero = 0.0;
static const GLfloat const_one = 1.0;
if (component == SWIZZLE_ZERO)
src_ref = get_const_ref(c, &const_zero);
else if (component == SWIZZLE_ONE)
src_ref = get_const_ref(c, &const_one);
else
src_ref = pass0_get_reg(c, src.File, src.Index, component);
return src_ref;
}
static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
struct prog_src_register src,
GLuint i,
struct brw_wm_instruction *insn)
{
const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i);
struct brw_wm_ref *newref = get_ref(c);
newref->value = ref->value;
newref->hw_reg = ref->hw_reg;
if (insn) {
newref->insn = insn - c->instruction;
newref->prevuse = newref->value->lastuse;
newref->value->lastuse = newref;
}
if (src.Negate & (1 << i))
newref->hw_reg.negate ^= 1;
if (src.Abs) {
newref->hw_reg.negate = 0;
newref->hw_reg.abs = 1;
}
return newref;
}
static void
translate_insn(struct brw_wm_compile *c,
const struct prog_instruction *inst)
{
struct brw_wm_instruction *out = get_instruction(c);
GLuint writemask = inst->DstReg.WriteMask;
GLuint nr_args = brw_wm_nr_args(inst->Opcode);
GLuint i, j;
/* Copy some data out of the instruction
*/
out->opcode = inst->Opcode;
out->saturate = (inst->SaturateMode != SATURATE_OFF);
out->tex_unit = inst->TexSrcUnit;
out->tex_idx = inst->TexSrcTarget;
out->tex_shadow = inst->TexShadow;
out->eot = inst->Aux & 1;
out->target = inst->Aux >> 1;
/* Args:
*/
for (i = 0; i < nr_args; i++) {
for (j = 0; j < 4; j++) {
out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out);
}
}
/* Dst:
*/
pass0_set_dst(c, out, inst, writemask);
}
/***********************************************************************
* Optimize moves and swizzles away:
*/
static void pass0_precalc_mov( struct brw_wm_compile *c,
const struct prog_instruction *inst )
{
const struct prog_dst_register *dst = &inst->DstReg;
GLuint writemask = inst->DstReg.WriteMask;
struct brw_wm_ref *refs[4];
GLuint i;
/* Get the effect of a MOV by manipulating our register table:
* First get all refs, then assign refs. This ensures that "in-place"
* swizzles such as:
* MOV t, t.xxyx
* are handled correctly. Previously, these two steps were done in
* one loop and the above case was incorrectly handled.
*/
for (i = 0; i < 4; i++) {
refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL);
}
for (i = 0; i < 4; i++) {
if (writemask & (1 << i)) {
pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]);
}
}
}
/* Initialize payload "registers".
*/
static void pass0_init_payload( struct brw_wm_compile *c )
{
GLuint i;
for (i = 0; i < 4; i++) {
GLuint j = i >= c->key.nr_depth_regs ? 0 : i;
pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i,
&c->payload.depth[j] );
}
#if 0
/* This seems to be an alternative to the INTERP_WPOS stuff I do
* elsewhere:
*/
if (c->key.source_depth_reg)
pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2,
&c->payload.depth[c->key.source_depth_reg/2]);
#endif
for (i = 0; i < FRAG_ATTRIB_MAX; i++)
pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0,
&c->payload.input_interp[i] );
}
/***********************************************************************
* PASS 0
*
* Work forwards to give each calculated value a unique number. Where
* an instruction produces duplicate values (eg DP3), all are given
* the same number.
*
* Translate away swizzling and eliminate non-saturating moves.
*/
void brw_wm_pass0( struct brw_wm_compile *c )
{
GLuint insn;
c->nr_vreg = 0;
c->nr_insns = 0;
pass0_init_undef(c);
pass0_init_payload(c);
for (insn = 0; insn < c->nr_fp_insns; insn++) {
const struct prog_instruction *inst = &c->prog_instructions[insn];
/* Optimize away moves, otherwise emit translated instruction:
*/
switch (inst->Opcode) {
case OPCODE_MOV:
case OPCODE_SWZ:
if (!inst->SaturateMode) {
pass0_precalc_mov(c, inst);
}
else {
translate_insn(c, inst);
}
break;
default:
translate_insn(c, inst);
break;
}
}
if (INTEL_DEBUG & DEBUG_WM) {
brw_wm_print_program(c, "pass0");
}
}

View File

@ -0,0 +1,291 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_wm.h"
static GLuint get_tracked_mask(struct brw_wm_compile *c,
struct brw_wm_instruction *inst)
{
GLuint i;
for (i = 0; i < 4; i++) {
if (inst->writemask & (1<<i)) {
if (!inst->dst[i]->contributes_to_output) {
inst->writemask &= ~(1<<i);
inst->dst[i] = 0;
}
}
}
return inst->writemask;
}
/* Remove a reference from a value's usage chain.
*/
static void unlink_ref(struct brw_wm_ref *ref)
{
struct brw_wm_value *value = ref->value;
if (ref == value->lastuse) {
value->lastuse = ref->prevuse;
}
else {
struct brw_wm_ref *i = value->lastuse;
while (i->prevuse != ref) i = i->prevuse;
i->prevuse = ref->prevuse;
}
}
static void track_arg(struct brw_wm_compile *c,
struct brw_wm_instruction *inst,
GLuint arg,
GLuint readmask)
{
GLuint i;
for (i = 0; i < 4; i++) {
struct brw_wm_ref *ref = inst->src[arg][i];
if (ref) {
if (readmask & (1<<i)) {
ref->value->contributes_to_output = 1;
}
else {
unlink_ref(ref);
inst->src[arg][i] = NULL;
}
}
}
}
static GLuint get_texcoord_mask( GLuint tex_idx )
{
switch (tex_idx) {
case TEXTURE_1D_INDEX:
return WRITEMASK_X;
case TEXTURE_2D_INDEX:
return WRITEMASK_XY;
case TEXTURE_3D_INDEX:
return WRITEMASK_XYZ;
case TEXTURE_CUBE_INDEX:
return WRITEMASK_XYZ;
case TEXTURE_RECT_INDEX:
return WRITEMASK_XY;
default: return 0;
}
}
/* Step two: Basically this is dead code elimination.
*
* Iterate backwards over instructions, noting which values
* contribute to the final result. Adjust writemasks to only
* calculate these values.
*/
void brw_wm_pass1( struct brw_wm_compile *c )
{
GLint insn;
for (insn = c->nr_insns-1; insn >= 0; insn--) {
struct brw_wm_instruction *inst = &c->instruction[insn];
GLuint writemask;
GLuint read0, read1, read2;
if (inst->opcode == OPCODE_KIL) {
track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */
continue;
}
if (inst->opcode == WM_FB_WRITE) {
track_arg(c, inst, 0, WRITEMASK_XYZW);
track_arg(c, inst, 1, WRITEMASK_XYZW);
if (c->key.source_depth_to_render_target &&
c->key.computes_depth)
track_arg(c, inst, 2, WRITEMASK_Z);
else
track_arg(c, inst, 2, 0);
continue;
}
/* Lookup all the registers which were written by this
* instruction and get a mask of those that contribute to the output:
*/
writemask = get_tracked_mask(c, inst);
if (!writemask) {
GLuint arg;
for (arg = 0; arg < 3; arg++)
track_arg(c, inst, arg, 0);
continue;
}
read0 = 0;
read1 = 0;
read2 = 0;
/* Mark all inputs which contribute to the marked outputs:
*/
switch (inst->opcode) {
case OPCODE_ABS:
case OPCODE_FLR:
case OPCODE_FRC:
case OPCODE_MOV:
case OPCODE_SWZ:
case OPCODE_TRUNC:
read0 = writemask;
break;
case OPCODE_SUB:
case OPCODE_SLT:
case OPCODE_SLE:
case OPCODE_SGE:
case OPCODE_SGT:
case OPCODE_SEQ:
case OPCODE_SNE:
case OPCODE_ADD:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MUL:
read0 = writemask;
read1 = writemask;
break;
case OPCODE_DDX:
case OPCODE_DDY:
read0 = writemask;
break;
case OPCODE_MAD:
case OPCODE_CMP:
case OPCODE_LRP:
read0 = writemask;
read1 = writemask;
read2 = writemask;
break;
case OPCODE_XPD:
if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ;
if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ;
if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY;
read1 = read0;
break;
case OPCODE_COS:
case OPCODE_EX2:
case OPCODE_LG2:
case OPCODE_RCP:
case OPCODE_RSQ:
case OPCODE_SIN:
case OPCODE_SCS:
case WM_CINTERP:
case WM_PIXELXY:
read0 = WRITEMASK_X;
break;
case OPCODE_POW:
read0 = WRITEMASK_X;
read1 = WRITEMASK_X;
break;
case OPCODE_TEX:
case OPCODE_TXP:
read0 = get_texcoord_mask(inst->tex_idx);
if (inst->tex_shadow)
read0 |= WRITEMASK_Z;
break;
case OPCODE_TXB:
/* Shadow ignored for txb.
*/
read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W;
break;
case WM_WPOSXY:
read0 = writemask & WRITEMASK_XY;
break;
case WM_DELTAXY:
read0 = writemask & WRITEMASK_XY;
read1 = WRITEMASK_X;
break;
case WM_PIXELW:
read0 = WRITEMASK_X;
read1 = WRITEMASK_XY;
break;
case WM_LINTERP:
read0 = WRITEMASK_X;
read1 = WRITEMASK_XY;
break;
case WM_PINTERP:
read0 = WRITEMASK_X; /* interpolant */
read1 = WRITEMASK_XY; /* deltas */
read2 = WRITEMASK_W; /* pixel w */
break;
case OPCODE_DP3:
read0 = WRITEMASK_XYZ;
read1 = WRITEMASK_XYZ;
break;
case OPCODE_DPH:
read0 = WRITEMASK_XYZ;
read1 = WRITEMASK_XYZW;
break;
case OPCODE_DP4:
read0 = WRITEMASK_XYZW;
read1 = WRITEMASK_XYZW;
break;
case OPCODE_LIT:
read0 = WRITEMASK_XYW;
break;
case OPCODE_DST:
case WM_FRONTFACING:
case OPCODE_KIL_NV:
default:
break;
}
track_arg(c, inst, 0, read0);
track_arg(c, inst, 1, read1);
track_arg(c, inst, 2, read2);
}
if (INTEL_DEBUG & DEBUG_WM) {
brw_wm_print_program(c, "pass1");
}
}

View File

@ -0,0 +1,343 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_wm.h"
/* Use these to force spilling so that that functionality can be
* tested with known-good examples rather than having to construct new
* tests.
*/
#define TEST_PAYLOAD_SPILLS 0
#define TEST_DST_SPILLS 0
static void spill_value(struct brw_wm_compile *c,
struct brw_wm_value *value);
static void prealloc_reg(struct brw_wm_compile *c,
struct brw_wm_value *value,
GLuint reg)
{
if (value->lastuse) {
/* Set nextuse to zero, it will be corrected by
* update_register_usage().
*/
c->pass2_grf[reg].value = value;
c->pass2_grf[reg].nextuse = 0;
value->resident = &c->pass2_grf[reg];
value->hw_reg = brw_vec8_grf(reg*2, 0);
if (TEST_PAYLOAD_SPILLS)
spill_value(c, value);
}
}
/* Initialize all the register values. Do the initial setup
* calculations for interpolants.
*/
static void init_registers( struct brw_wm_compile *c )
{
GLuint nr_interp_regs = 0;
GLuint i = 0;
GLuint j;
for (j = 0; j < c->grf_limit; j++)
c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
for (j = 0; j < c->key.nr_depth_regs; j++)
prealloc_reg(c, &c->payload.depth[j], i++);
for (j = 0; j < c->nr_creg; j++)
prealloc_reg(c, &c->creg[j], i++);
for (j = 0; j < FRAG_ATTRIB_MAX; j++) {
if (c->key.vp_outputs_written & (1<<j)) {
int fp_index;
if (j >= VERT_RESULT_VAR0)
fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
else if (j <= VERT_RESULT_TEX7)
fp_index = j;
else
fp_index = -1;
nr_interp_regs++;
if (fp_index >= 0)
prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
}
}
assert(nr_interp_regs >= 1);
c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
c->prog_data.urb_read_length = nr_interp_regs * 2;
c->prog_data.curb_read_length = c->nr_creg * 2;
c->max_wm_grf = i * 2;
}
/* Update the nextuse value for each register in our file.
*/
static void update_register_usage(struct brw_wm_compile *c,
GLuint thisinsn)
{
GLuint i;
for (i = 1; i < c->grf_limit; i++) {
struct brw_wm_grf *grf = &c->pass2_grf[i];
/* Only search those which can change:
*/
if (grf->nextuse < thisinsn) {
const struct brw_wm_ref *ref = grf->value->lastuse;
/* Has last use of value been passed?
*/
if (ref->insn < thisinsn) {
grf->value->resident = 0;
grf->value = 0;
grf->nextuse = BRW_WM_MAX_INSN;
}
else {
/* Else loop through chain to update:
*/
while (ref->prevuse && ref->prevuse->insn >= thisinsn)
ref = ref->prevuse;
grf->nextuse = ref->insn;
}
}
}
}
static void spill_value(struct brw_wm_compile *c,
struct brw_wm_value *value)
{
/* Allocate a spill slot. Note that allocations start from 0x40 -
* the first slot is reserved to mean "undef" in brw_wm_emit.c
*/
if (!value->spill_slot) {
c->last_scratch += 0x40;
value->spill_slot = c->last_scratch;
}
/* The spill will be done in brw_wm_emit.c immediately after the
* value is calculated, so we can just take this reg without any
* further work.
*/
value->resident->value = NULL;
value->resident->nextuse = BRW_WM_MAX_INSN;
value->resident = NULL;
}
/* Search for contiguous region with the most distant nearest
* member. Free regs count as very distant.
*
* TODO: implement spill-to-reg so that we can rearrange discontigous
* free regs and then spill the oldest non-free regs in sequence.
* This would mean inserting instructions in this pass.
*/
static GLuint search_contiguous_regs(struct brw_wm_compile *c,
GLuint nr,
GLuint thisinsn)
{
struct brw_wm_grf *grf = c->pass2_grf;
GLuint furthest = 0;
GLuint reg = 0;
GLuint i, j;
/* Start search at 1: r0 is special and can't be used or spilled.
*/
for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) {
GLuint group_nextuse = BRW_WM_MAX_INSN;
for (j = 0; j < nr; j++) {
if (grf[i+j].nextuse < group_nextuse)
group_nextuse = grf[i+j].nextuse;
}
if (group_nextuse > furthest) {
furthest = group_nextuse;
reg = i;
}
}
assert(furthest != thisinsn);
/* Any non-empty regs will need to be spilled:
*/
for (j = 0; j < nr; j++)
if (grf[reg+j].value)
spill_value(c, grf[reg+j].value);
return reg;
}
static void alloc_contiguous_dest(struct brw_wm_compile *c,
struct brw_wm_value *dst[],
GLuint nr,
GLuint thisinsn)
{
GLuint reg = search_contiguous_regs(c, nr, thisinsn);
GLuint i;
for (i = 0; i < nr; i++) {
if (!dst[i]) {
/* Need to grab a dummy value in TEX case. Don't introduce
* it into the tracking scheme.
*/
dst[i] = &c->vreg[c->nr_vreg++];
}
else {
assert(!dst[i]->resident);
assert(c->pass2_grf[reg+i].nextuse != thisinsn);
c->pass2_grf[reg+i].value = dst[i];
c->pass2_grf[reg+i].nextuse = thisinsn;
dst[i]->resident = &c->pass2_grf[reg+i];
}
dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0);
}
if ((reg+nr)*2 > c->max_wm_grf)
c->max_wm_grf = (reg+nr) * 2;
}
static void load_args(struct brw_wm_compile *c,
struct brw_wm_instruction *inst)
{
GLuint thisinsn = inst - c->instruction;
GLuint i,j;
for (i = 0; i < 3; i++) {
for (j = 0; j < 4; j++) {
struct brw_wm_ref *ref = inst->src[i][j];
if (ref) {
if (!ref->value->resident) {
/* Need to bring the value in from scratch space. The code for
* this will be done in brw_wm_emit.c, here we just do the
* register allocation and mark the ref as requiring a fill.
*/
GLuint reg = search_contiguous_regs(c, 1, thisinsn);
c->pass2_grf[reg].value = ref->value;
c->pass2_grf[reg].nextuse = thisinsn;
ref->value->resident = &c->pass2_grf[reg];
/* Note that a fill is required:
*/
ref->unspill_reg = reg*2;
}
/* Adjust the hw_reg to point at the value's current location:
*/
assert(ref->value == ref->value->resident->value);
ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2;
}
}
}
}
/* Step 3: Work forwards once again. Perform register allocations,
* taking into account instructions like TEX which require contiguous
* result registers. Where necessary spill registers to scratch space
* and reload later.
*/
void brw_wm_pass2( struct brw_wm_compile *c )
{
GLuint insn;
GLuint i;
init_registers(c);
for (insn = 0; insn < c->nr_insns; insn++) {
struct brw_wm_instruction *inst = &c->instruction[insn];
/* Update registers' nextuse values:
*/
update_register_usage(c, insn);
/* May need to unspill some args.
*/
load_args(c, inst);
/* Allocate registers to hold results:
*/
switch (inst->opcode) {
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
alloc_contiguous_dest(c, inst->dst, 4, insn);
break;
default:
for (i = 0; i < 4; i++) {
if (inst->writemask & (1<<i)) {
assert(inst->dst[i]);
alloc_contiguous_dest(c, &inst->dst[i], 1, insn);
}
}
break;
}
if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) {
for (i = 0; i < 4; i++)
if (inst->dst[i])
spill_value(c, inst->dst[i]);
}
}
if (INTEL_DEBUG & DEBUG_WM) {
brw_wm_print_program(c, "pass2");
}
c->state = PASS2_DONE;
if (INTEL_DEBUG & DEBUG_WM) {
brw_wm_print_program(c, "pass2/done");
}
}

View File

@ -0,0 +1,369 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "main/macros.h"
/* Samplers aren't strictly wm state from the hardware's perspective,
* but that is the only situation in which we use them in this driver.
*/
/* The brw (and related graphics cores) do not support GL_CLAMP. The
* Intel drivers for "other operating systems" implement GL_CLAMP as
* GL_CLAMP_TO_EDGE, so the same is done here.
*/
static GLuint translate_wrap_mode( GLenum wrap )
{
switch( wrap ) {
case GL_REPEAT:
return BRW_TEXCOORDMODE_WRAP;
case GL_CLAMP:
return BRW_TEXCOORDMODE_CLAMP;
case GL_CLAMP_TO_EDGE:
return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
case GL_CLAMP_TO_BORDER:
return BRW_TEXCOORDMODE_CLAMP_BORDER;
case GL_MIRRORED_REPEAT:
return BRW_TEXCOORDMODE_MIRROR;
default:
return BRW_TEXCOORDMODE_WRAP;
}
}
static GLuint U_FIXED(GLfloat value, GLuint frac_bits)
{
value *= (1<<frac_bits);
return value < 0 ? 0 : value;
}
static GLint S_FIXED(GLfloat value, GLuint frac_bits)
{
return value * (1<<frac_bits);
}
static dri_bo *upload_default_color( struct brw_context *brw,
const GLfloat *color )
{
struct brw_sampler_default_color sdc;
COPY_4V(sdc.color, color);
return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc,
NULL, 0 );
}
struct wm_sampler_key {
int sampler_count;
struct wm_sampler_entry {
GLenum tex_target;
GLenum wrap_r, wrap_s, wrap_t;
float maxlod, minlod;
float lod_bias;
float max_aniso;
GLenum minfilter, magfilter;
GLenum comparemode, comparefunc;
dri_bo *sdc_bo;
/** If target is cubemap, take context setting.
*/
GLboolean seamless_cube_map;
} sampler[BRW_MAX_TEX_UNIT];
};
/**
* Sets the sampler state for a single unit based off of the sampler key
* entry.
*/
static void brw_update_sampler_state(struct wm_sampler_entry *key,
dri_bo *sdc_bo,
struct brw_sampler_state *sampler)
{
_mesa_memset(sampler, 0, sizeof(*sampler));
switch (key->minfilter) {
case GL_NEAREST:
sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
break;
case GL_LINEAR:
sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
break;
case GL_NEAREST_MIPMAP_NEAREST:
sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
break;
case GL_LINEAR_MIPMAP_NEAREST:
sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
break;
case GL_NEAREST_MIPMAP_LINEAR:
sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
break;
case GL_LINEAR_MIPMAP_LINEAR:
sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
break;
default:
break;
}
/* Set Anisotropy:
*/
if (key->max_aniso > 1.0) {
sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
if (key->max_aniso > 2.0) {
sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2,
BRW_ANISORATIO_16);
}
}
else {
switch (key->magfilter) {
case GL_NEAREST:
sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
break;
case GL_LINEAR:
sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
break;
default:
break;
}
}
sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
/* Cube-maps on 965 and later must use the same wrap mode for all 3
* coordinate dimensions. Futher, only CUBE and CLAMP are valid.
*/
if (key->tex_target == GL_TEXTURE_CUBE_MAP) {
if (key->seamless_cube_map &&
(key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) {
sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
} else {
sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
}
} else if (key->tex_target == GL_TEXTURE_1D) {
/* There's a bug in 1D texture sampling - it actually pays
* attention to the wrap_t value, though it should not.
* Override the wrap_t value here to GL_REPEAT to keep
* any nonexistent border pixels from floating in.
*/
sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
}
/* Set shadow function:
*/
if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) {
/* Shadowing is "enabled" by emitting a particular sampler
* message (sample_c). So need to recompile WM program when
* shadow comparison is enabled on each/any texture unit.
*/
sampler->ss0.shadow_function =
intel_translate_shadow_compare_func(key->comparefunc);
}
/* Set LOD bias:
*/
sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6);
sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
/* Set BaseMipLevel, MaxLOD, MinLOD:
*
* XXX: I don't think that using firstLevel, lastLevel works,
* because we always setup the surface state as if firstLevel ==
* level zero. Probably have to subtract firstLevel from each of
* these:
*/
sampler->ss0.base_level = U_FIXED(0, 1);
sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6);
sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6);
sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */
}
/** Sets up the cache key for sampler state for all texture units */
static void
brw_wm_sampler_populate_key(struct brw_context *brw,
struct wm_sampler_key *key)
{
GLcontext *ctx = &brw->intel.ctx;
int unit;
memset(key, 0, sizeof(*key));
for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
if (ctx->Texture.Unit[unit]._ReallyEnabled) {
struct wm_sampler_entry *entry = &key->sampler[unit];
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
struct gl_texture_object *texObj = texUnit->_Current;
struct intel_texture_object *intelObj = intel_texture_object(texObj);
struct gl_texture_image *firstImage =
texObj->Image[0][intelObj->firstLevel];
entry->tex_target = texObj->Target;
entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP)
? ctx->Texture.CubeMapSeamless : GL_FALSE;
entry->wrap_r = texObj->WrapR;
entry->wrap_s = texObj->WrapS;
entry->wrap_t = texObj->WrapT;
entry->maxlod = texObj->MaxLod;
entry->minlod = texObj->MinLod;
entry->lod_bias = texUnit->LodBias + texObj->LodBias;
entry->max_aniso = texObj->MaxAnisotropy;
entry->minfilter = texObj->MinFilter;
entry->magfilter = texObj->MagFilter;
entry->comparemode = texObj->CompareMode;
entry->comparefunc = texObj->CompareFunc;
dri_bo_unreference(brw->wm.sdc_bo[unit]);
if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
float bordercolor[4] = {
texObj->BorderColor[0],
texObj->BorderColor[0],
texObj->BorderColor[0],
texObj->BorderColor[0]
};
/* GL specs that border color for depth textures is taken from the
* R channel, while the hardware uses A. Spam R into all the
* channels for safety.
*/
brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor);
} else {
brw->wm.sdc_bo[unit] = upload_default_color(brw,
texObj->BorderColor);
}
key->sampler_count = unit + 1;
}
}
}
/* All samplers must be uploaded in a single contiguous array, which
* complicates various things. However, this is still too confusing -
* FIXME: simplify all the different new texture state flags.
*/
static void upload_wm_samplers( struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
struct wm_sampler_key key;
int i;
brw_wm_sampler_populate_key(brw, &key);
if (brw->wm.sampler_count != key.sampler_count) {
brw->wm.sampler_count = key.sampler_count;
brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
}
dri_bo_unreference(brw->wm.sampler_bo);
brw->wm.sampler_bo = NULL;
if (brw->wm.sampler_count == 0)
return;
brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
&key, sizeof(key),
brw->wm.sdc_bo, key.sampler_count,
NULL);
/* If we didnt find it in the cache, compute the state and put it in the
* cache.
*/
if (brw->wm.sampler_bo == NULL) {
struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
memset(sampler, 0, sizeof(sampler));
for (i = 0; i < key.sampler_count; i++) {
if (brw->wm.sdc_bo[i] == NULL)
continue;
brw_update_sampler_state(&key.sampler[i], brw->wm.sdc_bo[i],
&sampler[i]);
}
brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER,
&key, sizeof(key),
brw->wm.sdc_bo, key.sampler_count,
&sampler, sizeof(sampler),
NULL, NULL);
/* Emit SDC relocations */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
if (!ctx->Texture.Unit[i]._ReallyEnabled)
continue;
dri_bo_emit_reloc(brw->wm.sampler_bo,
I915_GEM_DOMAIN_SAMPLER, 0,
0,
i * sizeof(struct brw_sampler_state) +
offsetof(struct brw_sampler_state, ss2),
brw->wm.sdc_bo[i]);
}
}
}
const struct brw_tracked_state brw_wm_samplers = {
.dirty = {
.mesa = _NEW_TEXTURE,
.brw = 0,
.cache = 0
},
.prepare = upload_wm_samplers,
};

View File

@ -0,0 +1,317 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_wm.h"
/***********************************************************************
* WM unit - fragment programs and rasterization
*/
struct brw_wm_unit_key {
unsigned int total_grf, total_scratch;
unsigned int urb_entry_read_length;
unsigned int curb_entry_read_length;
unsigned int dispatch_grf_start_reg;
unsigned int curbe_offset;
unsigned int urb_size;
unsigned int max_threads;
unsigned int nr_surfaces, sampler_count;
GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
GLfloat offset_units, offset_factor;
};
static void
wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
{
GLcontext *ctx = &brw->intel.ctx;
const struct gl_fragment_program *fp = brw->fragment_program;
const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
struct intel_context *intel = &brw->intel;
memset(key, 0, sizeof(*key));
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
key->max_threads = 1;
else {
/* WM maximum threads is number of EUs times number of threads per EU. */
if (BRW_IS_IGDNG(brw))
key->max_threads = 12 * 6;
else if (BRW_IS_G4X(brw))
key->max_threads = 10 * 5;
else
key->max_threads = 8 * 4;
}
/* CACHE_NEW_WM_PROG */
key->total_grf = brw->wm.prog_data->total_grf;
key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
/* BRW_NEW_URB_FENCE */
key->urb_size = brw->urb.vsize;
/* BRW_NEW_CURBE_OFFSETS */
key->curbe_offset = brw->curbe.wm_start;
/* BRW_NEW_NR_SURFACEs */
key->nr_surfaces = brw->wm.nr_surfaces;
/* CACHE_NEW_SAMPLER */
key->sampler_count = brw->wm.sampler_count;
/* _NEW_POLYGONSTIPPLE */
key->polygon_stipple = ctx->Polygon.StippleFlag;
/* BRW_NEW_FRAGMENT_PROGRAM */
key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
/* as far as we can tell */
key->computes_depth =
(fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
/* BRW_NEW_DEPTH_BUFFER
* Override for NULL depthbuffer case, required by the Pixel Shader Computed
* Depth field.
*/
if (brw->state.depth_region == NULL)
key->computes_depth = 0;
/* _NEW_COLOR */
key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
key->is_glsl = bfp->isGLSL;
/* temporary sanity check assertion */
ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
/* _NEW_DEPTH */
key->stats_wm = intel->stats_wm;
/* _NEW_LINE */
key->line_stipple = ctx->Line.StippleFlag;
/* _NEW_POLYGON */
key->offset_enable = ctx->Polygon.OffsetFill;
key->offset_units = ctx->Polygon.OffsetUnits;
key->offset_factor = ctx->Polygon.OffsetFactor;
}
/**
* Setup wm hardware state. See page 225 of Volume 2
*/
static dri_bo *
wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
dri_bo **reloc_bufs)
{
struct brw_wm_unit_state wm;
dri_bo *bo;
memset(&wm, 0, sizeof(wm));
wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
if (BRW_IS_IGDNG(brw))
wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
else
wm.thread1.binding_table_entry_count = key->nr_surfaces;
if (key->total_scratch != 0) {
wm.thread2.scratch_space_base_pointer =
brw->wm.scratch_bo->offset >> 10; /* reloc */
wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
} else {
wm.thread2.scratch_space_base_pointer = 0;
wm.thread2.per_thread_scratch_space = 0;
}
wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
wm.thread3.urb_entry_read_offset = 0;
wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
if (BRW_IS_IGDNG(brw))
wm.wm4.sampler_count = 0; /* hardware requirement */
else
wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
if (brw->wm.sampler_bo != NULL) {
/* reloc */
wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
} else {
wm.wm4.sampler_state_pointer = 0;
}
wm.wm5.program_uses_depth = key->uses_depth;
wm.wm5.program_computes_depth = key->computes_depth;
wm.wm5.program_uses_killpixel = key->uses_kill;
if (key->is_glsl)
wm.wm5.enable_8_pix = 1;
else
wm.wm5.enable_16_pix = 1;
wm.wm5.max_threads = key->max_threads - 1;
wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */
wm.wm5.legacy_line_rast = 0;
wm.wm5.legacy_global_depth_bias = 0;
wm.wm5.early_depth_test = 1; /* never need to disable */
wm.wm5.line_aa_region_width = 0;
wm.wm5.line_endcap_aa_region_width = 1;
wm.wm5.polygon_stipple = key->polygon_stipple;
if (key->offset_enable) {
wm.wm5.depth_offset = 1;
/* Something wierd going on with legacy_global_depth_bias,
* offset_constant, scaling and MRD. This value passes glean
* but gives some odd results elsewere (eg. the
* quad-offset-units test).
*/
wm.global_depth_offset_constant = key->offset_units * 2;
/* This is the only value that passes glean:
*/
wm.global_depth_offset_scale = key->offset_factor;
}
wm.wm5.line_stipple = key->line_stipple;
if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
wm.wm4.stats_enable = 1;
bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
key, sizeof(*key),
reloc_bufs, 3,
&wm, sizeof(wm),
NULL, NULL);
/* Emit WM program relocation */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
wm.thread0.grf_reg_count << 1,
offsetof(struct brw_wm_unit_state, thread0),
brw->wm.prog_bo);
/* Emit scratch space relocation */
if (key->total_scratch != 0) {
dri_bo_emit_reloc(bo,
0, 0,
wm.thread2.per_thread_scratch_space,
offsetof(struct brw_wm_unit_state, thread2),
brw->wm.scratch_bo);
}
/* Emit sampler state relocation */
if (key->sampler_count != 0) {
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
offsetof(struct brw_wm_unit_state, wm4),
brw->wm.sampler_bo);
}
return bo;
}
static void upload_wm_unit( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
struct brw_wm_unit_key key;
dri_bo *reloc_bufs[3];
wm_unit_populate_key(brw, &key);
/* Allocate the necessary scratch space if we haven't already. Don't
* bother reducing the allocation later, since we use scratch so
* rarely.
*/
assert(key.total_scratch <= 12 * 1024);
if (key.total_scratch) {
GLuint total = key.total_scratch * key.max_threads;
if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
dri_bo_unreference(brw->wm.scratch_bo);
brw->wm.scratch_bo = NULL;
}
if (brw->wm.scratch_bo == NULL) {
brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr,
"wm scratch",
total,
4096);
}
}
reloc_bufs[0] = brw->wm.prog_bo;
reloc_bufs[1] = brw->wm.scratch_bo;
reloc_bufs[2] = brw->wm.sampler_bo;
dri_bo_unreference(brw->wm.state_bo);
brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
&key, sizeof(key),
reloc_bufs, 3,
NULL);
if (brw->wm.state_bo == NULL) {
brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
}
}
const struct brw_tracked_state brw_wm_unit = {
.dirty = {
.mesa = (_NEW_POLYGON |
_NEW_POLYGONSTIPPLE |
_NEW_LINE |
_NEW_COLOR |
_NEW_DEPTH),
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
BRW_NEW_DEPTH_BUFFER |
BRW_NEW_NR_WM_SURFACES),
.cache = (CACHE_NEW_WM_PROG |
CACHE_NEW_SAMPLER)
},
.prepare = upload_wm_unit,
};

View File

@ -0,0 +1,752 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/mtypes.h"
#include "main/texformat.h"
#include "main/texstore.h"
#include "shader/prog_parameter.h"
#include "intel_mipmap_tree.h"
#include "intel_batchbuffer.h"
#include "intel_tex.h"
#include "intel_fbo.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
static GLuint translate_tex_target( GLenum target )
{
switch (target) {
case GL_TEXTURE_1D:
return BRW_SURFACE_1D;
case GL_TEXTURE_RECTANGLE_NV:
return BRW_SURFACE_2D;
case GL_TEXTURE_2D:
return BRW_SURFACE_2D;
case GL_TEXTURE_3D:
return BRW_SURFACE_3D;
case GL_TEXTURE_CUBE_MAP:
return BRW_SURFACE_CUBE;
default:
assert(0);
return 0;
}
}
static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
GLenum depth_mode )
{
switch( mesa_format ) {
case MESA_FORMAT_L8:
return BRW_SURFACEFORMAT_L8_UNORM;
case MESA_FORMAT_I8:
return BRW_SURFACEFORMAT_I8_UNORM;
case MESA_FORMAT_A8:
return BRW_SURFACEFORMAT_A8_UNORM;
case MESA_FORMAT_AL88:
return BRW_SURFACEFORMAT_L8A8_UNORM;
case MESA_FORMAT_RGB888:
assert(0); /* not supported for sampling */
return BRW_SURFACEFORMAT_R8G8B8_UNORM;
case MESA_FORMAT_ARGB8888:
if (internal_format == GL_RGB)
return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
else
return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
case MESA_FORMAT_RGBA8888_REV:
if (internal_format == GL_RGB)
return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
else
return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
case MESA_FORMAT_RGB565:
return BRW_SURFACEFORMAT_B5G6R5_UNORM;
case MESA_FORMAT_ARGB1555:
return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
case MESA_FORMAT_ARGB4444:
return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
case MESA_FORMAT_YCBCR_REV:
return BRW_SURFACEFORMAT_YCRCB_NORMAL;
case MESA_FORMAT_YCBCR:
return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
case MESA_FORMAT_RGB_FXT1:
case MESA_FORMAT_RGBA_FXT1:
return BRW_SURFACEFORMAT_FXT1;
case MESA_FORMAT_Z16:
if (depth_mode == GL_INTENSITY)
return BRW_SURFACEFORMAT_I16_UNORM;
else if (depth_mode == GL_ALPHA)
return BRW_SURFACEFORMAT_A16_UNORM;
else
return BRW_SURFACEFORMAT_L16_UNORM;
case MESA_FORMAT_RGB_DXT1:
return BRW_SURFACEFORMAT_DXT1_RGB;
case MESA_FORMAT_RGBA_DXT1:
return BRW_SURFACEFORMAT_BC1_UNORM;
case MESA_FORMAT_RGBA_DXT3:
return BRW_SURFACEFORMAT_BC2_UNORM;
case MESA_FORMAT_RGBA_DXT5:
return BRW_SURFACEFORMAT_BC3_UNORM;
case MESA_FORMAT_SARGB8:
return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
case MESA_FORMAT_SLA8:
return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
case MESA_FORMAT_SL8:
return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
case MESA_FORMAT_SRGB_DXT1:
return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
case MESA_FORMAT_S8_Z24:
/* XXX: these different surface formats don't seem to
* make any difference for shadow sampler/compares.
*/
if (depth_mode == GL_INTENSITY)
return BRW_SURFACEFORMAT_I24X8_UNORM;
else if (depth_mode == GL_ALPHA)
return BRW_SURFACEFORMAT_A24X8_UNORM;
else
return BRW_SURFACEFORMAT_L24X8_UNORM;
case MESA_FORMAT_DUDV8:
return BRW_SURFACEFORMAT_R8G8_SNORM;
case MESA_FORMAT_SIGNED_RGBA8888_REV:
return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
default:
assert(0);
return 0;
}
}
static void
brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
{
switch (tiling) {
case I915_TILING_NONE:
surf->ss3.tiled_surface = 0;
surf->ss3.tile_walk = 0;
break;
case I915_TILING_X:
surf->ss3.tiled_surface = 1;
surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
break;
case I915_TILING_Y:
surf->ss3.tiled_surface = 1;
surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR;
break;
}
}
static dri_bo *
brw_create_texture_surface( struct brw_context *brw,
struct brw_surface_key *key )
{
struct brw_surface_state surf;
dri_bo *bo;
memset(&surf, 0, sizeof(surf));
surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
surf.ss0.surface_type = translate_tex_target(key->target);
if (key->bo) {
surf.ss0.surface_format = translate_tex_format(key->format,
key->internal_format,
key->depthmode);
}
else {
switch (key->depth) {
case 32:
surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
break;
default:
case 24:
surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
break;
case 16:
surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
break;
}
}
/* This is ok for all textures with channel width 8bit or less:
*/
/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
if (key->bo)
surf.ss1.base_addr = key->bo->offset; /* reloc */
else
surf.ss1.base_addr = key->offset;
surf.ss2.mip_count = key->last_level - key->first_level;
surf.ss2.width = key->width - 1;
surf.ss2.height = key->height - 1;
brw_set_surface_tiling(&surf, key->tiling);
surf.ss3.pitch = (key->pitch * key->cpp) - 1;
surf.ss3.depth = key->depth - 1;
surf.ss4.min_lod = 0;
if (key->target == GL_TEXTURE_CUBE_MAP) {
surf.ss0.cube_pos_x = 1;
surf.ss0.cube_pos_y = 1;
surf.ss0.cube_pos_z = 1;
surf.ss0.cube_neg_x = 1;
surf.ss0.cube_neg_y = 1;
surf.ss0.cube_neg_z = 1;
}
bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
&key->bo, key->bo ? 1 : 0,
&surf, sizeof(surf),
NULL, NULL);
if (key->bo) {
/* Emit relocation to surface contents */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_SAMPLER, 0,
0,
offsetof(struct brw_surface_state, ss1),
key->bo);
}
return bo;
}
static void
brw_update_texture_surface( GLcontext *ctx, GLuint unit )
{
struct brw_context *brw = brw_context(ctx);
struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
struct intel_texture_object *intelObj = intel_texture_object(tObj);
struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
struct brw_surface_key key;
const GLuint surf = SURF_INDEX_TEXTURE(unit);
memset(&key, 0, sizeof(key));
if (intelObj->imageOverride) {
key.pitch = intelObj->pitchOverride / intelObj->mt->cpp;
key.depth = intelObj->depthOverride;
key.bo = NULL;
key.offset = intelObj->textureOffset;
} else {
key.format = firstImage->TexFormat->MesaFormat;
key.internal_format = firstImage->InternalFormat;
key.pitch = intelObj->mt->pitch;
key.depth = firstImage->Depth;
key.bo = intelObj->mt->region->buffer;
key.offset = 0;
}
key.target = tObj->Target;
key.depthmode = tObj->DepthMode;
key.first_level = intelObj->firstLevel;
key.last_level = intelObj->lastLevel;
key.width = firstImage->Width;
key.height = firstImage->Height;
key.cpp = intelObj->mt->cpp;
key.tiling = intelObj->mt->region->tiling;
dri_bo_unreference(brw->wm.surf_bo[surf]);
brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
&key.bo, key.bo ? 1 : 0,
NULL);
if (brw->wm.surf_bo[surf] == NULL) {
brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
}
}
/**
* Create the constant buffer surface. Vertex/fragment shader constants will be
* read from this buffer with Data Port Read instructions/messages.
*/
dri_bo *
brw_create_constant_surface( struct brw_context *brw,
struct brw_surface_key *key )
{
const GLint w = key->width - 1;
struct brw_surface_state surf;
dri_bo *bo;
memset(&surf, 0, sizeof(surf));
surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
surf.ss0.surface_type = BRW_SURFACE_BUFFER;
surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
assert(key->bo);
if (key->bo)
surf.ss1.base_addr = key->bo->offset; /* reloc */
else
surf.ss1.base_addr = key->offset;
surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
&key->bo, key->bo ? 1 : 0,
&surf, sizeof(surf),
NULL, NULL);
if (key->bo) {
/* Emit relocation to surface contents */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_SAMPLER, 0,
0,
offsetof(struct brw_surface_state, ss1),
key->bo);
}
return bo;
}
/* Creates a new WM constant buffer reflecting the current fragment program's
* constants, if needed by the fragment program.
*
* Otherwise, constants go through the CURBEs using the brw_constant_buffer
* state atom.
*/
static drm_intel_bo *
brw_wm_update_constant_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct brw_fragment_program *fp =
(struct brw_fragment_program *) brw->fragment_program;
const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
const int size = params->NumParameters * 4 * sizeof(GLfloat);
drm_intel_bo *const_buffer;
/* BRW_NEW_FRAGMENT_PROGRAM */
if (!fp->use_const_buffer)
return NULL;
const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer",
size, 64);
/* _NEW_PROGRAM_CONSTANTS */
dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
return const_buffer;
}
/**
* Update the surface state for a WM constant buffer.
* The constant buffer will be (re)allocated here if needed.
*/
static void
brw_update_wm_constant_surface( GLcontext *ctx,
GLuint surf)
{
struct brw_context *brw = brw_context(ctx);
struct brw_surface_key key;
struct brw_fragment_program *fp =
(struct brw_fragment_program *) brw->fragment_program;
const struct gl_program_parameter_list *params =
fp->program.Base.Parameters;
/* If we're in this state update atom, we need to update WM constants, so
* free the old buffer and create a new one for the new contents.
*/
dri_bo_unreference(fp->const_buffer);
fp->const_buffer = brw_wm_update_constant_buffer(brw);
/* If there's no constant buffer, then no surface BO is needed to point at
* it.
*/
if (fp->const_buffer == 0) {
drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
brw->wm.surf_bo[surf] = NULL;
return;
}
memset(&key, 0, sizeof(key));
key.format = MESA_FORMAT_RGBA_FLOAT32;
key.internal_format = GL_RGBA;
key.bo = fp->const_buffer;
key.depthmode = GL_NONE;
key.pitch = params->NumParameters;
key.width = params->NumParameters;
key.height = 1;
key.depth = 1;
key.cpp = 16;
/*
printf("%s:\n", __FUNCTION__);
printf(" width %d height %d depth %d cpp %d pitch %d\n",
key.width, key.height, key.depth, key.cpp, key.pitch);
*/
dri_bo_unreference(brw->wm.surf_bo[surf]);
brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
&key.bo, key.bo ? 1 : 0,
NULL);
if (brw->wm.surf_bo[surf] == NULL) {
brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
}
brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
}
/**
* Updates surface / buffer for fragment shader constant buffer, if
* one is required.
*
* This consumes the state updates for the constant buffer, and produces
* BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
* inclusion in the binding table.
*/
static void prepare_wm_constant_surface(struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_fragment_program *fp =
(struct brw_fragment_program *) brw->fragment_program;
GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
drm_intel_bo_unreference(fp->const_buffer);
fp->const_buffer = brw_wm_update_constant_buffer(brw);
/* If there's no constant buffer, then no surface BO is needed to point at
* it.
*/
if (fp->const_buffer == 0) {
if (brw->wm.surf_bo[surf] != NULL) {
drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
brw->wm.surf_bo[surf] = NULL;
brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
}
return;
}
brw_update_wm_constant_surface(ctx, surf);
}
const struct brw_tracked_state brw_wm_constant_surface = {
.dirty = {
.mesa = (_NEW_PROGRAM_CONSTANTS),
.brw = (BRW_NEW_FRAGMENT_PROGRAM),
.cache = 0
},
.prepare = prepare_wm_constant_surface,
};
/**
* Sets up a surface state structure to point at the given region.
* While it is only used for the front/back buffer currently, it should be
* usable for further buffers when doing ARB_draw_buffer support.
*/
static void
brw_update_renderbuffer_surface(struct brw_context *brw,
struct gl_renderbuffer *rb,
unsigned int unit)
{
GLcontext *ctx = &brw->intel.ctx;
dri_bo *region_bo = NULL;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_region *region = irb ? irb->region : NULL;
struct {
unsigned int surface_type;
unsigned int surface_format;
unsigned int width, height, pitch, cpp;
GLubyte color_mask[4];
GLboolean color_blend;
uint32_t tiling;
uint32_t draw_offset;
} key;
memset(&key, 0, sizeof(key));
if (region != NULL) {
region_bo = region->buffer;
key.surface_type = BRW_SURFACE_2D;
switch (irb->texformat->MesaFormat) {
case MESA_FORMAT_ARGB8888:
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
break;
case MESA_FORMAT_RGB565:
key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
break;
case MESA_FORMAT_ARGB1555:
key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
break;
case MESA_FORMAT_ARGB4444:
key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
break;
default:
_mesa_problem(ctx, "Bad renderbuffer format: %d\n",
irb->texformat->MesaFormat);
}
key.tiling = region->tiling;
if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) {
key.width = rb->Width;
key.height = rb->Height;
} else {
key.width = region->width;
key.height = region->height;
}
key.pitch = region->pitch;
key.cpp = region->cpp;
key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
} else {
key.surface_type = BRW_SURFACE_NULL;
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
key.tiling = I915_TILING_X;
key.width = 1;
key.height = 1;
key.cpp = 4;
key.draw_offset = 0;
}
memcpy(key.color_mask, ctx->Color.ColorMask,
sizeof(key.color_mask));
key.color_blend = (!ctx->Color._LogicOpEnabled &&
ctx->Color.BlendEnabled);
dri_bo_unreference(brw->wm.surf_bo[unit]);
brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
&region_bo, 1,
NULL);
if (brw->wm.surf_bo[unit] == NULL) {
struct brw_surface_state surf;
memset(&surf, 0, sizeof(surf));
surf.ss0.surface_format = key.surface_format;
surf.ss0.surface_type = key.surface_type;
if (key.tiling == I915_TILING_NONE) {
surf.ss1.base_addr = key.draw_offset;
} else {
uint32_t tile_offset = key.draw_offset % 4096;
surf.ss1.base_addr = key.draw_offset - tile_offset;
assert(BRW_IS_G4X(brw) || tile_offset == 0);
if (BRW_IS_G4X(brw)) {
if (key.tiling == I915_TILING_X) {
/* Note that the low bits of these fields are missing, so
* there's the possibility of getting in trouble.
*/
surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4;
surf.ss5.y_offset = tile_offset / 512 / 2;
} else {
surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4;
surf.ss5.y_offset = tile_offset / 128 / 2;
}
}
}
if (region_bo != NULL)
surf.ss1.base_addr += region_bo->offset; /* reloc */
surf.ss2.width = key.width - 1;
surf.ss2.height = key.height - 1;
brw_set_surface_tiling(&surf, key.tiling);
surf.ss3.pitch = (key.pitch * key.cpp) - 1;
/* _NEW_COLOR */
surf.ss0.color_blend = key.color_blend;
surf.ss0.writedisable_red = !key.color_mask[0];
surf.ss0.writedisable_green = !key.color_mask[1];
surf.ss0.writedisable_blue = !key.color_mask[2];
surf.ss0.writedisable_alpha = !key.color_mask[3];
/* Key size will never match key size for textures, so we're safe. */
brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
&region_bo, 1,
&surf, sizeof(surf),
NULL, NULL);
if (region_bo != NULL) {
/* We might sample from it, and we might render to it, so flag
* them both. We might be able to figure out from other state
* a more restrictive relocation to emit.
*/
drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
offsetof(struct brw_surface_state, ss1),
region_bo,
surf.ss1.base_addr - region_bo->offset,
I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER);
}
}
}
/**
* Constructs the binding table for the WM surface state, which maps unit
* numbers to surface state objects.
*/
static dri_bo *
brw_wm_get_binding_table(struct brw_context *brw)
{
dri_bo *bind_bo;
assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
NULL);
if (bind_bo == NULL) {
GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint);
uint32_t data[BRW_WM_MAX_SURF];
int i;
for (i = 0; i < brw->wm.nr_surfaces; i++)
if (brw->wm.surf_bo[i])
data[i] = brw->wm.surf_bo[i]->offset;
else
data[i] = 0;
bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
data, data_size,
NULL, NULL);
/* Emit binding table relocations to surface state */
for (i = 0; i < BRW_WM_MAX_SURF; i++) {
if (brw->wm.surf_bo[i] != NULL) {
dri_bo_emit_reloc(bind_bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0,
i * sizeof(GLuint),
brw->wm.surf_bo[i]);
}
}
}
return bind_bo;
}
static void prepare_wm_surfaces(struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
GLuint i;
int old_nr_surfaces;
/* _NEW_BUFFERS */
/* Update surfaces for drawing buffers */
if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
brw_update_renderbuffer_surface(brw,
ctx->DrawBuffer->_ColorDrawBuffers[i],
i);
}
} else {
brw_update_renderbuffer_surface(brw, NULL, 0);
}
old_nr_surfaces = brw->wm.nr_surfaces;
brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL)
brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
/* Update surfaces for textures */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
const GLuint surf = SURF_INDEX_TEXTURE(i);
/* _NEW_TEXTURE, BRW_NEW_TEXDATA */
if (texUnit->_ReallyEnabled) {
brw_update_texture_surface(ctx, i);
brw->wm.nr_surfaces = surf + 1;
} else {
dri_bo_unreference(brw->wm.surf_bo[surf]);
brw->wm.surf_bo[surf] = NULL;
}
}
dri_bo_unreference(brw->wm.bind_bo);
brw->wm.bind_bo = brw_wm_get_binding_table(brw);
if (brw->wm.nr_surfaces != old_nr_surfaces)
brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
}
const struct brw_tracked_state brw_wm_surfaces = {
.dirty = {
.mesa = (_NEW_COLOR |
_NEW_TEXTURE |
_NEW_BUFFERS),
.brw = (BRW_NEW_CONTEXT |
BRW_NEW_WM_SURFACES),
.cache = 0
},
.prepare = prepare_wm_surfaces,
};

View File

@ -0,0 +1,184 @@
#ifndef INTEL_BATCHBUFFER_H
#define INTEL_BATCHBUFFER_H
#include "main/mtypes.h"
#include "intel_context.h"
#include "intel_bufmgr.h"
#include "intel_reg.h"
#define BATCH_SZ 16384
#define BATCH_RESERVED 16
enum cliprect_mode {
/**
* Batchbuffer contents may be looped over per cliprect, but do not
* require it.
*/
IGNORE_CLIPRECTS,
/**
* Batchbuffer contents require looping over per cliprect at batch submit
* time.
*
* This will be upgraded to NO_LOOP_CLIPRECTS when there's a single
* constant cliprect, as in DRI2 or FBO rendering.
*/
LOOP_CLIPRECTS,
/**
* Batchbuffer contents contain drawing that should not be executed multiple
* times.
*/
NO_LOOP_CLIPRECTS,
/**
* Batchbuffer contents contain drawing that already handles cliprects, such
* as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE.
*
* Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch
* outside of LOCK/UNLOCK. This is upgraded to just NO_LOOP_CLIPRECTS when
* there's a constant cliprect, as in DRI2 or FBO rendering.
*/
REFERENCES_CLIPRECTS
};
struct intel_batchbuffer
{
struct intel_context *intel;
dri_bo *buf;
GLubyte *buffer;
GLubyte *map;
GLubyte *ptr;
enum cliprect_mode cliprect_mode;
GLuint size;
/** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */
struct {
GLuint total;
GLubyte *start_ptr;
} emit;
GLuint dirty_state;
};
struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
*intel);
void intel_batchbuffer_free(struct intel_batchbuffer *batch);
void _intel_batchbuffer_flush(struct intel_batchbuffer *batch,
const char *file, int line);
#define intel_batchbuffer_flush(batch) \
_intel_batchbuffer_flush(batch, __FILE__, __LINE__)
void intel_batchbuffer_reset(struct intel_batchbuffer *batch);
/* Unlike bmBufferData, this currently requires the buffer be mapped.
* Consider it a convenience function wrapping multple
* intel_buffer_dword() calls.
*/
void intel_batchbuffer_data(struct intel_batchbuffer *batch,
const void *data, GLuint bytes,
enum cliprect_mode cliprect_mode);
void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
GLuint bytes);
GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
dri_bo *buffer,
uint32_t read_domains,
uint32_t write_domain,
uint32_t offset);
/* Inline functions - might actually be better off with these
* non-inlined. Certainly better off switching all command packets to
* be passed as structs rather than dwords, but that's a little bit of
* work...
*/
static INLINE GLint
intel_batchbuffer_space(struct intel_batchbuffer *batch)
{
return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
}
static INLINE void
intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
{
assert(batch->map);
assert(intel_batchbuffer_space(batch) >= 4);
*(GLuint *) (batch->ptr) = dword;
batch->ptr += 4;
}
static INLINE void
intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
GLuint sz,
enum cliprect_mode cliprect_mode)
{
assert(sz < batch->size - 8);
if (intel_batchbuffer_space(batch) < sz)
intel_batchbuffer_flush(batch);
if ((cliprect_mode == LOOP_CLIPRECTS ||
cliprect_mode == REFERENCES_CLIPRECTS) &&
batch->intel->constant_cliprect)
cliprect_mode = NO_LOOP_CLIPRECTS;
if (cliprect_mode != IGNORE_CLIPRECTS) {
if (batch->cliprect_mode == IGNORE_CLIPRECTS) {
batch->cliprect_mode = cliprect_mode;
} else {
if (batch->cliprect_mode != cliprect_mode) {
intel_batchbuffer_flush(batch);
batch->cliprect_mode = cliprect_mode;
}
}
}
}
/* Here are the crusty old macros, to be removed:
*/
#define BATCH_LOCALS
#define BEGIN_BATCH(n, cliprect_mode) do { \
intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \
assert(intel->batch->emit.start_ptr == NULL); \
intel->batch->emit.total = (n) * 4; \
intel->batch->emit.start_ptr = intel->batch->ptr; \
} while (0)
#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \
assert((unsigned) (delta) < buf->size); \
intel_batchbuffer_emit_reloc(intel->batch, buf, \
read_domains, write_domain, delta); \
} while (0)
#define ADVANCE_BATCH() do { \
unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \
assert(intel->batch->emit.start_ptr != NULL); \
if (_n != intel->batch->emit.total) { \
fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \
_n, intel->batch->emit.total); \
abort(); \
} \
intel->batch->emit.start_ptr = NULL; \
} while(0)
static INLINE void
intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
{
intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS);
intel_batchbuffer_emit_dword(batch, MI_FLUSH);
}
#endif

View File

@ -0,0 +1,118 @@
/*
* Copyright © 2007 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
*
*/
#define PCI_CHIP_I810 0x7121
#define PCI_CHIP_I810_DC100 0x7123
#define PCI_CHIP_I810_E 0x7125
#define PCI_CHIP_I815 0x1132
#define PCI_CHIP_I830_M 0x3577
#define PCI_CHIP_845_G 0x2562
#define PCI_CHIP_I855_GM 0x3582
#define PCI_CHIP_I865_G 0x2572
#define PCI_CHIP_I915_G 0x2582
#define PCI_CHIP_E7221_G 0x258A
#define PCI_CHIP_I915_GM 0x2592
#define PCI_CHIP_I945_G 0x2772
#define PCI_CHIP_I945_GM 0x27A2
#define PCI_CHIP_I945_GME 0x27AE
#define PCI_CHIP_Q35_G 0x29B2
#define PCI_CHIP_G33_G 0x29C2
#define PCI_CHIP_Q33_G 0x29D2
#define PCI_CHIP_IGD_GM 0xA011
#define PCI_CHIP_IGD_G 0xA001
#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM)
#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G)
#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid))
#define PCI_CHIP_I965_G 0x29A2
#define PCI_CHIP_I965_Q 0x2992
#define PCI_CHIP_I965_G_1 0x2982
#define PCI_CHIP_I946_GZ 0x2972
#define PCI_CHIP_I965_GM 0x2A02
#define PCI_CHIP_I965_GME 0x2A12
#define PCI_CHIP_GM45_GM 0x2A42
#define PCI_CHIP_IGD_E_G 0x2E02
#define PCI_CHIP_Q45_G 0x2E12
#define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G41_G 0x2E32
#define PCI_CHIP_B43_G 0x2E42
#define PCI_CHIP_ILD_G 0x0042
#define PCI_CHIP_ILM_G 0x0046
#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \
devid == PCI_CHIP_I915_GM || \
devid == PCI_CHIP_I945_GM || \
devid == PCI_CHIP_I945_GME || \
devid == PCI_CHIP_I965_GM || \
devid == PCI_CHIP_I965_GME || \
devid == PCI_CHIP_GM45_GM || \
IS_IGD(devid) || \
devid == PCI_CHIP_ILM_G)
#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
devid == PCI_CHIP_Q45_G || \
devid == PCI_CHIP_G45_G || \
devid == PCI_CHIP_G41_G || \
devid == PCI_CHIP_B43_G)
#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G)
#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G)
#define IS_IGDNG(devid) (IS_ILD(devid) || IS_ILM(devid))
#define IS_915(devid) (devid == PCI_CHIP_I915_G || \
devid == PCI_CHIP_E7221_G || \
devid == PCI_CHIP_I915_GM)
#define IS_945(devid) (devid == PCI_CHIP_I945_G || \
devid == PCI_CHIP_I945_GM || \
devid == PCI_CHIP_I945_GME || \
devid == PCI_CHIP_G33_G || \
devid == PCI_CHIP_Q33_G || \
devid == PCI_CHIP_Q35_G || IS_IGD(devid))
#define IS_965(devid) (devid == PCI_CHIP_I965_G || \
devid == PCI_CHIP_I965_Q || \
devid == PCI_CHIP_I965_G_1 || \
devid == PCI_CHIP_I965_GM || \
devid == PCI_CHIP_I965_GME || \
devid == PCI_CHIP_I946_GZ || \
IS_G4X(devid) || \
IS_IGDNG(devid))
#define IS_9XX(devid) (IS_915(devid) || \
IS_945(devid) || \
IS_965(devid))

View File

@ -0,0 +1,132 @@
#ifndef INTEL_STRUCTS_H
#define INTEL_STRUCTS_H
struct br0 {
GLuint length:8;
GLuint pad0:3;
GLuint dst_tiled:1;
GLuint pad1:8;
GLuint write_rgb:1;
GLuint write_alpha:1;
GLuint opcode:7;
GLuint client:3;
};
struct br13 {
GLint dest_pitch:16;
GLuint rop:8;
GLuint color_depth:2;
GLuint pad1:3;
GLuint mono_source_transparency:1;
GLuint clipping_enable:1;
GLuint pad0:1;
};
/* This is an attempt to move some of the 2D interaction in this
* driver to using structs for packets rather than a bunch of #defines
* and dwords.
*/
struct xy_color_blit {
struct br0 br0;
struct br13 br13;
struct {
GLuint dest_x1:16;
GLuint dest_y1:16;
} dw2;
struct {
GLuint dest_x2:16;
GLuint dest_y2:16;
} dw3;
GLuint dest_base_addr;
GLuint color;
};
struct xy_src_copy_blit {
struct br0 br0;
struct br13 br13;
struct {
GLuint dest_x1:16;
GLuint dest_y1:16;
} dw2;
struct {
GLuint dest_x2:16;
GLuint dest_y2:16;
} dw3;
GLuint dest_base_addr;
struct {
GLuint src_x1:16;
GLuint src_y1:16;
} dw5;
struct {
GLint src_pitch:16;
GLuint pad:16;
} dw6;
GLuint src_base_addr;
};
struct xy_setup_blit {
struct br0 br0;
struct br13 br13;
struct {
GLuint clip_x1:16;
GLuint clip_y1:16;
} dw2;
struct {
GLuint clip_x2:16;
GLuint clip_y2:16;
} dw3;
GLuint dest_base_addr;
GLuint background_color;
GLuint foreground_color;
GLuint pattern_base_addr;
};
struct xy_text_immediate_blit {
struct {
GLuint length:8;
GLuint pad2:3;
GLuint dst_tiled:1;
GLuint pad1:4;
GLuint byte_packed:1;
GLuint pad0:5;
GLuint opcode:7;
GLuint client:3;
} dw0;
struct {
GLuint dest_x1:16;
GLuint dest_y1:16;
} dw1;
struct {
GLuint dest_x2:16;
GLuint dest_y2:16;
} dw2;
/* Src bitmap data follows as inline dwords.
*/
};
#define CLIENT_2D 0x2
#define OPCODE_XY_SETUP_BLT 0x1
#define OPCODE_XY_COLOR_BLT 0x50
#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31
#endif

View File

@ -0,0 +1,225 @@
#include "intel_context.h"
#include "intel_tex.h"
#include "intel_chipset.h"
#include "main/texformat.h"
#include "main/enums.h"
/**
* Choose hardware texture format given the user's glTexImage parameters.
*
* It works out that this function is fine for all the supported
* hardware. However, there is still a need to map the formats onto
* hardware descriptors.
*
* Note that the i915 can actually support many more formats than
* these if we take the step of simply swizzling the colors
* immediately after sampling...
*/
const struct gl_texture_format *
intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
GLenum format, GLenum type)
{
struct intel_context *intel = intel_context(ctx);
const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24);
#if 0
printf("%s intFmt=0x%x format=0x%x type=0x%x\n",
__FUNCTION__, internalFormat, format, type);
#endif
switch (internalFormat) {
case 4:
case GL_RGBA:
case GL_COMPRESSED_RGBA:
if (format == GL_BGRA) {
if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) {
return &_mesa_texformat_argb8888;
}
else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) {
return &_mesa_texformat_argb4444;
}
else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) {
return &_mesa_texformat_argb1555;
}
}
return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
case 3:
case GL_RGB:
case GL_COMPRESSED_RGB:
if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
return &_mesa_texformat_rgb565;
}
return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
case GL_RGBA8:
case GL_RGB10_A2:
case GL_RGBA12:
case GL_RGBA16:
return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
case GL_RGBA4:
case GL_RGBA2:
return &_mesa_texformat_argb4444;
case GL_RGB5_A1:
return &_mesa_texformat_argb1555;
case GL_RGB8:
case GL_RGB10:
case GL_RGB12:
case GL_RGB16:
return &_mesa_texformat_argb8888;
case GL_RGB5:
case GL_RGB4:
case GL_R3_G3_B2:
return &_mesa_texformat_rgb565;
case GL_ALPHA:
case GL_ALPHA4:
case GL_ALPHA8:
case GL_ALPHA12:
case GL_ALPHA16:
case GL_COMPRESSED_ALPHA:
return &_mesa_texformat_a8;
case 1:
case GL_LUMINANCE:
case GL_LUMINANCE4:
case GL_LUMINANCE8:
case GL_LUMINANCE12:
case GL_LUMINANCE16:
case GL_COMPRESSED_LUMINANCE:
return &_mesa_texformat_l8;
case 2:
case GL_LUMINANCE_ALPHA:
case GL_LUMINANCE4_ALPHA4:
case GL_LUMINANCE6_ALPHA2:
case GL_LUMINANCE8_ALPHA8:
case GL_LUMINANCE12_ALPHA4:
case GL_LUMINANCE12_ALPHA12:
case GL_LUMINANCE16_ALPHA16:
case GL_COMPRESSED_LUMINANCE_ALPHA:
return &_mesa_texformat_al88;
case GL_INTENSITY:
case GL_INTENSITY4:
case GL_INTENSITY8:
case GL_INTENSITY12:
case GL_INTENSITY16:
case GL_COMPRESSED_INTENSITY:
return &_mesa_texformat_i8;
case GL_YCBCR_MESA:
if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE)
return &_mesa_texformat_ycbcr;
else
return &_mesa_texformat_ycbcr_rev;
case GL_COMPRESSED_RGB_FXT1_3DFX:
return &_mesa_texformat_rgb_fxt1;
case GL_COMPRESSED_RGBA_FXT1_3DFX:
return &_mesa_texformat_rgba_fxt1;
case GL_RGB_S3TC:
case GL_RGB4_S3TC:
case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
return &_mesa_texformat_rgb_dxt1;
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
return &_mesa_texformat_rgba_dxt1;
case GL_RGBA_S3TC:
case GL_RGBA4_S3TC:
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
return &_mesa_texformat_rgba_dxt3;
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
return &_mesa_texformat_rgba_dxt5;
case GL_DEPTH_COMPONENT:
case GL_DEPTH_COMPONENT16:
case GL_DEPTH_COMPONENT24:
case GL_DEPTH_COMPONENT32:
#if 0
return &_mesa_texformat_z16;
#else
/* fall-through.
* 16bpp depth texture can't be paired with a stencil buffer so
* always used combined depth/stencil format.
*/
#endif
case GL_DEPTH_STENCIL_EXT:
case GL_DEPTH24_STENCIL8_EXT:
return &_mesa_texformat_s8_z24;
#ifndef I915
case GL_SRGB_EXT:
case GL_SRGB8_EXT:
case GL_SRGB_ALPHA_EXT:
case GL_SRGB8_ALPHA8_EXT:
case GL_COMPRESSED_SRGB_EXT:
case GL_COMPRESSED_SRGB_ALPHA_EXT:
case GL_COMPRESSED_SLUMINANCE_EXT:
case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
return &_mesa_texformat_sargb8;
case GL_SLUMINANCE_EXT:
case GL_SLUMINANCE8_EXT:
if (IS_G4X(intel->intelScreen->deviceID))
return &_mesa_texformat_sl8;
else
return &_mesa_texformat_sargb8;
case GL_SLUMINANCE_ALPHA_EXT:
case GL_SLUMINANCE8_ALPHA8_EXT:
if (IS_G4X(intel->intelScreen->deviceID))
return &_mesa_texformat_sla8;
else
return &_mesa_texformat_sargb8;
case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
return &_mesa_texformat_srgb_dxt1;
/* i915 could also do this */
case GL_DUDV_ATI:
case GL_DU8DV8_ATI:
return &_mesa_texformat_dudv8;
case GL_RGBA_SNORM:
case GL_RGBA8_SNORM:
return &_mesa_texformat_signed_rgba8888_rev;
#endif
default:
fprintf(stderr, "unexpected texture format %s in %s\n",
_mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__);
return NULL;
}
return NULL; /* never get here */
}
int intel_compressed_num_bytes(GLuint mesaFormat)
{
int bytes = 0;
switch(mesaFormat) {
case MESA_FORMAT_RGB_FXT1:
case MESA_FORMAT_RGBA_FXT1:
case MESA_FORMAT_RGB_DXT1:
case MESA_FORMAT_RGBA_DXT1:
bytes = 2;
break;
case MESA_FORMAT_RGBA_DXT3:
case MESA_FORMAT_RGBA_DXT5:
bytes = 4;
default:
break;
}
return bytes;
}

View File

@ -0,0 +1,140 @@
/**************************************************************************
*
* Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
* Michel Dänzer <michel@tungstengraphics.com>
*/
#include "intel_mipmap_tree.h"
#include "intel_tex_layout.h"
#include "intel_context.h"
#include "main/macros.h"
void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h)
{
switch (internalFormat) {
case GL_COMPRESSED_RGB_FXT1_3DFX:
case GL_COMPRESSED_RGBA_FXT1_3DFX:
*w = 8;
*h = 4;
break;
case GL_RGB_S3TC:
case GL_RGB4_S3TC:
case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
case GL_RGBA_S3TC:
case GL_RGBA4_S3TC:
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
*w = 4;
*h = 4;
break;
default:
*w = 4;
*h = 2;
break;
}
}
void i945_miptree_layout_2d( struct intel_context *intel,
struct intel_mipmap_tree *mt,
uint32_t tiling )
{
GLuint align_h = 2, align_w = 4;
GLuint level;
GLuint x = 0;
GLuint y = 0;
GLuint width = mt->width0;
GLuint height = mt->height0;
mt->pitch = mt->width0;
intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
if (mt->compressed) {
mt->pitch = ALIGN(mt->width0, align_w);
}
/* May need to adjust pitch to accomodate the placement of
* the 2nd mipmap. This occurs when the alignment
* constraints of mipmap placement push the right edge of the
* 2nd mipmap out past the width of its parent.
*/
if (mt->first_level != mt->last_level) {
GLuint mip1_width;
if (mt->compressed) {
mip1_width = ALIGN(minify(mt->width0), align_w)
+ ALIGN(minify(minify(mt->width0)), align_w);
} else {
mip1_width = ALIGN(minify(mt->width0), align_w)
+ minify(minify(mt->width0));
}
if (mip1_width > mt->pitch) {
mt->pitch = mip1_width;
}
}
/* Pitch must be a whole number of dwords, even though we
* express it in texels.
*/
mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch);
mt->total_height = 0;
for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
GLuint img_height;
intel_miptree_set_level_info(mt, level, 1, x, y, width,
height, 1);
if (mt->compressed)
img_height = MAX2(1, height/4);
else
img_height = ALIGN(height, align_h);
/* Because the images are packed better, the final offset
* might not be the maximal one:
*/
mt->total_height = MAX2(mt->total_height, y + img_height);
/* Layout_below: step right after second mipmap.
*/
if (level == mt->first_level + 1) {
x += ALIGN(width, align_w);
}
else {
y += img_height;
}
width = minify(width);
height = minify(height);
}
}