i965: Port Gen4-5 VS_STATE to genxml.
It's actually not that much code. Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
This commit is contained in:
parent
4933c3d16e
commit
f790d6e0b4
|
@ -753,7 +753,7 @@
|
|||
<field name="Sampler State Offset" start="165" end="191" type="address"/>
|
||||
<field name="Sampler Count" start="160" end="162" type="uint"/>
|
||||
<field name="Vertex Cache Disable" start="193" end="193" type="bool"/>
|
||||
<field name="Function Enable" start="192" end="192" type="bool"/>
|
||||
<field name="Enable" start="192" end="192" type="bool"/>
|
||||
</struct>
|
||||
|
||||
<struct name="WM_STATE" length="8">
|
||||
|
|
|
@ -704,7 +704,7 @@
|
|||
<field name="Sampler State Offset" start="165" end="191" type="address"/>
|
||||
<field name="Sampler Count" start="160" end="162" type="uint"/>
|
||||
<field name="Vertex Cache Disable" start="193" end="193" type="bool"/>
|
||||
<field name="Function Enable" start="192" end="192" type="bool"/>
|
||||
<field name="Enable" start="192" end="192" type="bool"/>
|
||||
</struct>
|
||||
|
||||
<struct name="WM_STATE" length="8">
|
||||
|
|
|
@ -860,7 +860,7 @@
|
|||
<field name="Sampler State Offset" start="165" end="191" type="address"/>
|
||||
<field name="Sampler Count" start="160" end="162" type="uint"/>
|
||||
<field name="Vertex Cache Disable" start="193" end="193" type="bool"/>
|
||||
<field name="Function Enable" start="192" end="192" type="bool"/>
|
||||
<field name="Enable" start="192" end="192" type="bool"/>
|
||||
</struct>
|
||||
|
||||
<struct name="WM_STATE" length="11">
|
||||
|
|
|
@ -70,7 +70,6 @@ i965_FILES = \
|
|||
brw_util.h \
|
||||
brw_vs.c \
|
||||
brw_vs.h \
|
||||
brw_vs_state.c \
|
||||
brw_vs_surface_state.c \
|
||||
brw_wm.c \
|
||||
brw_wm.h \
|
||||
|
|
|
@ -79,7 +79,6 @@ extern const struct brw_tracked_state brw_tes_image_surfaces;
|
|||
extern const struct brw_tracked_state brw_gs_ubo_surfaces;
|
||||
extern const struct brw_tracked_state brw_gs_abo_surfaces;
|
||||
extern const struct brw_tracked_state brw_gs_image_surfaces;
|
||||
extern const struct brw_tracked_state brw_vs_unit;
|
||||
extern const struct brw_tracked_state brw_renderbuffer_surfaces;
|
||||
extern const struct brw_tracked_state brw_renderbuffer_read_surfaces;
|
||||
extern const struct brw_tracked_state brw_texture_surfaces;
|
||||
|
|
|
@ -483,41 +483,6 @@ struct brw_gs_unit_state
|
|||
};
|
||||
|
||||
|
||||
struct brw_vs_unit_state
|
||||
{
|
||||
struct thread0 thread0;
|
||||
struct thread1 thread1;
|
||||
struct thread2 thread2;
|
||||
struct thread3 thread3;
|
||||
|
||||
struct
|
||||
{
|
||||
unsigned pad0:10;
|
||||
unsigned stats_enable:1;
|
||||
unsigned nr_urb_entries:7;
|
||||
unsigned pad1:1;
|
||||
unsigned urb_entry_allocation_size:5;
|
||||
unsigned pad2:1;
|
||||
unsigned max_threads:6;
|
||||
unsigned pad3:1;
|
||||
} thread4;
|
||||
|
||||
struct
|
||||
{
|
||||
unsigned sampler_count:3;
|
||||
unsigned pad0:2;
|
||||
unsigned sampler_state_pointer:27;
|
||||
} vs5;
|
||||
|
||||
struct
|
||||
{
|
||||
unsigned vs_enable:1;
|
||||
unsigned vert_cache_disable:1;
|
||||
unsigned pad0:30;
|
||||
} vs6;
|
||||
};
|
||||
|
||||
|
||||
struct brw_wm_unit_state
|
||||
{
|
||||
struct thread0 thread0;
|
||||
|
|
|
@ -1,192 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "intel_batchbuffer.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_defines.h"
|
||||
#include "main/macros.h"
|
||||
|
||||
static void
|
||||
brw_upload_vs_unit(struct brw_context *brw)
|
||||
{
|
||||
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct brw_stage_state *stage_state = &brw->vs.base;
|
||||
const struct brw_stage_prog_data *prog_data = stage_state->prog_data;
|
||||
const struct brw_vue_prog_data *vue_prog_data =
|
||||
brw_vue_prog_data(stage_state->prog_data);
|
||||
|
||||
struct brw_vs_unit_state *vs;
|
||||
|
||||
vs = brw_state_batch(brw, sizeof(*vs), 32, &stage_state->state_offset);
|
||||
memset(vs, 0, sizeof(*vs));
|
||||
|
||||
/* BRW_NEW_PROGRAM_CACHE | BRW_NEW_VS_PROG_DATA */
|
||||
vs->thread0.grf_reg_count = ALIGN(vue_prog_data->total_grf, 16) / 16 - 1;
|
||||
vs->thread0.kernel_start_pointer =
|
||||
brw_program_reloc(brw,
|
||||
stage_state->state_offset +
|
||||
offsetof(struct brw_vs_unit_state, thread0),
|
||||
stage_state->prog_offset +
|
||||
(vs->thread0.grf_reg_count << 1)) >> 6;
|
||||
|
||||
if (prog_data->use_alt_mode)
|
||||
vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
|
||||
else
|
||||
vs->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
|
||||
|
||||
/* Choosing multiple program flow means that we may get 2-vertex threads,
|
||||
* which will have the channel mask for dwords 4-7 enabled in the thread,
|
||||
* and those dwords will be written to the second URB handle when we
|
||||
* brw_urb_WRITE() results.
|
||||
*/
|
||||
/* Force single program flow on Ironlake. We cannot reliably get
|
||||
* all applications working without it. See:
|
||||
* https://bugs.freedesktop.org/show_bug.cgi?id=29172
|
||||
*
|
||||
* The most notable and reliably failing application is the Humus
|
||||
* demo "CelShading"
|
||||
*/
|
||||
vs->thread1.single_program_flow = (brw->gen == 5);
|
||||
|
||||
vs->thread1.binding_table_entry_count =
|
||||
prog_data->binding_table.size_bytes / 4;
|
||||
|
||||
if (prog_data->total_scratch != 0) {
|
||||
vs->thread2.scratch_space_base_pointer =
|
||||
stage_state->scratch_bo->offset64 >> 10; /* reloc */
|
||||
vs->thread2.per_thread_scratch_space =
|
||||
ffs(stage_state->per_thread_scratch) - 11;
|
||||
} else {
|
||||
vs->thread2.scratch_space_base_pointer = 0;
|
||||
vs->thread2.per_thread_scratch_space = 0;
|
||||
}
|
||||
|
||||
vs->thread3.urb_entry_read_length = vue_prog_data->urb_read_length;
|
||||
vs->thread3.const_urb_entry_read_length = prog_data->curb_read_length;
|
||||
vs->thread3.dispatch_grf_start_reg = prog_data->dispatch_grf_start_reg;
|
||||
vs->thread3.urb_entry_read_offset = 0;
|
||||
|
||||
/* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
|
||||
vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
|
||||
|
||||
/* BRW_NEW_URB_FENCE */
|
||||
if (brw->gen == 5) {
|
||||
switch (brw->urb.nr_vs_entries) {
|
||||
case 8:
|
||||
case 12:
|
||||
case 16:
|
||||
case 32:
|
||||
case 64:
|
||||
case 96:
|
||||
case 128:
|
||||
case 168:
|
||||
case 192:
|
||||
case 224:
|
||||
case 256:
|
||||
vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2;
|
||||
break;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
} else {
|
||||
switch (brw->urb.nr_vs_entries) {
|
||||
case 8:
|
||||
case 12:
|
||||
case 16:
|
||||
case 32:
|
||||
break;
|
||||
case 64:
|
||||
assert(brw->is_g4x);
|
||||
break;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries;
|
||||
}
|
||||
|
||||
vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
|
||||
|
||||
vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2,
|
||||
1, devinfo->max_vs_threads) - 1;
|
||||
|
||||
if (brw->gen == 5)
|
||||
vs->vs5.sampler_count = 0; /* hardware requirement */
|
||||
else {
|
||||
vs->vs5.sampler_count = (stage_state->sampler_count + 3) / 4;
|
||||
}
|
||||
|
||||
/* Vertex program always enabled:
|
||||
*/
|
||||
vs->vs6.vs_enable = 1;
|
||||
|
||||
/* Set the sampler state pointer, and its reloc
|
||||
*/
|
||||
if (stage_state->sampler_count) {
|
||||
/* BRW_NEW_SAMPLER_STATE_TABLE - reloc */
|
||||
vs->vs5.sampler_state_pointer =
|
||||
(brw->batch.bo->offset64 + stage_state->sampler_offset) >> 5;
|
||||
brw_emit_reloc(&brw->batch,
|
||||
stage_state->state_offset +
|
||||
offsetof(struct brw_vs_unit_state, vs5),
|
||||
brw->batch.bo,
|
||||
(stage_state->sampler_offset | vs->vs5.sampler_count),
|
||||
I915_GEM_DOMAIN_INSTRUCTION, 0);
|
||||
}
|
||||
|
||||
/* Emit scratch space relocation */
|
||||
if (prog_data->total_scratch != 0) {
|
||||
brw_emit_reloc(&brw->batch,
|
||||
stage_state->state_offset +
|
||||
offsetof(struct brw_vs_unit_state, thread2),
|
||||
stage_state->scratch_bo,
|
||||
vs->thread2.per_thread_scratch_space,
|
||||
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
|
||||
}
|
||||
|
||||
brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_vs_unit = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_PROGRAM_CACHE |
|
||||
BRW_NEW_PUSH_CONSTANT_ALLOCATION |
|
||||
BRW_NEW_SAMPLER_STATE_TABLE |
|
||||
BRW_NEW_URB_FENCE |
|
||||
BRW_NEW_VS_PROG_DATA,
|
||||
},
|
||||
.emit = brw_upload_vs_unit,
|
||||
};
|
|
@ -132,6 +132,17 @@ instruction_bo(struct brw_bo *bo, uint32_t offset)
|
|||
};
|
||||
}
|
||||
|
||||
static inline struct brw_address
|
||||
instruction_ro_bo(struct brw_bo *bo, uint32_t offset)
|
||||
{
|
||||
return (struct brw_address) {
|
||||
.bo = bo,
|
||||
.offset = offset,
|
||||
.read_domains = I915_GEM_DOMAIN_INSTRUCTION,
|
||||
.write_domain = 0,
|
||||
};
|
||||
}
|
||||
|
||||
static inline struct brw_address
|
||||
vertex_bo(struct brw_bo *bo, uint32_t offset)
|
||||
{
|
||||
|
@ -1693,8 +1704,22 @@ static const struct brw_tracked_state genX(wm_state) = {
|
|||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
#if GEN_GEN == 4
|
||||
static inline struct brw_address
|
||||
KSP(struct brw_context *brw, uint32_t offset)
|
||||
{
|
||||
return instruction_bo(brw->cache.bo, offset);
|
||||
}
|
||||
#else
|
||||
static inline uint32_t
|
||||
KSP(struct brw_context *brw, uint32_t offset)
|
||||
{
|
||||
return offset;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
|
||||
pkt.KernelStartPointer = stage_state->prog_offset; \
|
||||
pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \
|
||||
pkt.SamplerCount = \
|
||||
DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
|
||||
pkt.BindingTableEntryCount = \
|
||||
|
@ -1716,12 +1741,12 @@ static const struct brw_tracked_state genX(wm_state) = {
|
|||
pkt.StatisticsEnable = true; \
|
||||
pkt.Enable = true;
|
||||
|
||||
#if GEN_GEN >= 6
|
||||
static void
|
||||
genX(upload_vs_state)(struct brw_context *brw)
|
||||
{
|
||||
UNUSED struct gl_context *ctx = &brw->ctx;
|
||||
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
||||
const struct brw_stage_state *stage_state = &brw->vs.base;
|
||||
struct brw_stage_state *stage_state = &brw->vs.base;
|
||||
|
||||
/* BRW_NEW_VS_PROG_DATA */
|
||||
const struct brw_vue_prog_data *vue_prog_data =
|
||||
|
@ -1755,11 +1780,44 @@ genX(upload_vs_state)(struct brw_context *brw)
|
|||
if (GEN_GEN == 7 && devinfo->is_ivybridge)
|
||||
gen7_emit_vs_workaround_flush(brw);
|
||||
|
||||
#if GEN_GEN >= 6
|
||||
brw_batch_emit(brw, GENX(3DSTATE_VS), vs) {
|
||||
#else
|
||||
ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
|
||||
brw_state_emit(brw, GENX(VS_STATE), 32, &stage_state->state_offset, vs) {
|
||||
#endif
|
||||
INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
|
||||
|
||||
vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
|
||||
|
||||
#if GEN_GEN < 6
|
||||
vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1;
|
||||
vs.ConstantURBEntryReadLength = stage_prog_data->curb_read_length;
|
||||
vs.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2;
|
||||
|
||||
vs.NumberofURBEntries = brw->urb.nr_vs_entries >> (GEN_GEN == 5 ? 2 : 0);
|
||||
vs.URBEntryAllocationSize = brw->urb.vsize - 1;
|
||||
|
||||
vs.MaximumNumberofThreads =
|
||||
CLAMP(brw->urb.nr_vs_entries / 2, 1, devinfo->max_vs_threads) - 1;
|
||||
|
||||
vs.StatisticsEnable = false;
|
||||
vs.SamplerStateOffset =
|
||||
instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset);
|
||||
#endif
|
||||
|
||||
#if GEN_GEN == 5
|
||||
/* Force single program flow on Ironlake. We cannot reliably get
|
||||
* all applications working without it. See:
|
||||
* https://bugs.freedesktop.org/show_bug.cgi?id=29172
|
||||
*
|
||||
* The most notable and reliably failing application is the Humus
|
||||
* demo "CelShading"
|
||||
*/
|
||||
vs.SingleProgramFlow = true;
|
||||
vs.SamplerCount = 0; /* hardware requirement */
|
||||
#endif
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
vs.SIMD8DispatchEnable =
|
||||
vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
|
||||
|
@ -1801,11 +1859,15 @@ static const struct brw_tracked_state genX(vs_state) = {
|
|||
BRW_NEW_BLORP |
|
||||
BRW_NEW_CONTEXT |
|
||||
BRW_NEW_VS_PROG_DATA |
|
||||
(GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0),
|
||||
(GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0) |
|
||||
(GEN_GEN <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
|
||||
BRW_NEW_PROGRAM_CACHE |
|
||||
BRW_NEW_SAMPLER_STATE_TABLE |
|
||||
BRW_NEW_URB_FENCE
|
||||
: 0),
|
||||
},
|
||||
.emit = genX(upload_vs_state),
|
||||
};
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
@ -3884,7 +3946,7 @@ genX(init_atoms)(struct brw_context *brw)
|
|||
&brw_wm_unit,
|
||||
&brw_sf_vp,
|
||||
&brw_sf_unit,
|
||||
&brw_vs_unit, /* always required, enabled or not */
|
||||
&genX(vs_state), /* always required, enabled or not */
|
||||
&brw_clip_unit,
|
||||
&brw_gs_unit,
|
||||
|
||||
|
|
Loading…
Reference in New Issue