ilo: add a pass to finalize ilo_ve_state

Add finalize_vertex_elements() to finalize ilo_ve_state.  This fixes a
potential issue with URB entry allocation for VS and move the complexity of
gen6_3DSTATE_VERTEX_ELEMENTS() to the new function.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
This commit is contained in:
Chia-I Wu 2014-09-30 10:32:53 +08:00
parent 2b4c8ffc30
commit 2d13b5ac81
8 changed files with 190 additions and 158 deletions

View File

@ -28,7 +28,6 @@
#include "util/u_draw.h"
#include "util/u_pack_color.h"
#include "ilo_builder_3d_top.h" /* for ve_init_cso_with_components() */
#include "ilo_draw.h"
#include "ilo_state.h"
#include "ilo_state_gen.h"
@ -41,24 +40,25 @@
static bool
ilo_blitter_set_invariants(struct ilo_blitter *blitter)
{
struct pipe_vertex_element velems[2];
struct pipe_vertex_element velem;
struct pipe_viewport_state vp;
if (blitter->initialized)
return true;
/* only vertex X and Y */
memset(&velems, 0, sizeof(velems));
velems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
ilo_gpe_init_ve(blitter->ilo->dev, 2, velems, &blitter->ve);
memset(&velem, 0, sizeof(velem));
velem.src_format = PIPE_FORMAT_R32G32_FLOAT;
ilo_gpe_init_ve(blitter->ilo->dev, 1, &velem, &blitter->ve);
/* override first VE to be VUE header */
ve_init_cso_with_components(blitter->ilo->dev,
/* generate VUE header */
ilo_gpe_init_ve_nosrc(blitter->ilo->dev,
GEN6_VFCOMP_STORE_0, /* Reserved */
GEN6_VFCOMP_STORE_0, /* Render Target Array Index */
GEN6_VFCOMP_STORE_0, /* Viewport Index */
GEN6_VFCOMP_STORE_0, /* Point Width */
&blitter->ve.cso[0]);
&blitter->ve.nosrc_cso);
blitter->ve.prepend_nosrc_cso = true;
/* a rectangle has 3 vertices in a RECTLIST */
util_draw_init_info(&blitter->draw);

View File

@ -438,77 +438,9 @@ gen6_user_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
ilo_builder_batch_reloc(builder, pos + 2, bat->bo, vb_end, 0);
}
static inline void
ve_init_cso_with_components(const struct ilo_dev_info *dev,
int comp0, int comp1, int comp2, int comp3,
struct ilo_ve_cso *cso)
{
ILO_DEV_ASSERT(dev, 6, 7.5);
STATIC_ASSERT(Elements(cso->payload) >= 2);
cso->payload[0] = GEN6_VE_STATE_DW0_VALID;
cso->payload[1] =
comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT |
comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT |
comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT |
comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT;
}
static inline void
ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
struct ilo_ve_cso *cso)
{
int format;
ILO_DEV_ASSERT(dev, 6, 7.5);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 94:
*
* "- This bit (Edge Flag Enable) must only be ENABLED on the last
* valid VERTEX_ELEMENT structure.
*
* - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
* and Component 1-3 Control must be set to VFCOMP_NOSTORE.
*
* - The Source Element Format must be set to the UINT format.
*
* - [DevSNB]: Edge Flags are not supported for QUADLIST
* primitives. Software may elect to convert QUADLIST primitives
* to some set of corresponding edge-flag-supported primitive
* types (e.g., POLYGONs) prior to submission to the 3D pipeline."
*/
cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE;
cso->payload[1] =
GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT;
/*
* Edge flags have format GEN6_FORMAT_R8_UINT when defined via
* glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
* via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
*
* Since all the hardware cares about is whether the flags are zero or not,
* we can treat them as GEN6_FORMAT_R32_UINT in the latter case.
*/
format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff;
if (format == GEN6_FORMAT_R32_FLOAT) {
STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1);
cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT);
}
else {
assert(format == GEN6_FORMAT_R8_UINT);
}
}
static inline void
gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder,
const struct ilo_ve_state *ve,
bool last_velement_edgeflag,
bool prepend_generated_ids)
const struct ilo_ve_state *ve)
{
uint8_t cmd_len;
uint32_t *dw;
@ -517,66 +449,37 @@ gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder,
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 92:
*
* "At least one VERTEX_ELEMENT_STATE structure must be included."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 93:
*
* "Up to 34 (DevSNB+) vertex elements are supported."
*/
assert(ve->count + prepend_generated_ids <= 34);
assert(ve->count + ve->prepend_nosrc_cso >= 1);
assert(ve->count + ve->prepend_nosrc_cso <= 34);
STATIC_ASSERT(Elements(ve->cso[0].payload) == 2);
if (!ve->count && !prepend_generated_ids) {
struct ilo_ve_cso dummy;
ve_init_cso_with_components(builder->dev,
GEN6_VFCOMP_STORE_0,
GEN6_VFCOMP_STORE_0,
GEN6_VFCOMP_STORE_0,
GEN6_VFCOMP_STORE_1_FP,
&dummy);
cmd_len = 3;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2);
memcpy(&dw[1], dummy.payload, sizeof(dummy.payload));
return;
}
cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
cmd_len = 1 + 2 * (ve->count + ve->prepend_nosrc_cso);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2);
dw++;
if (prepend_generated_ids) {
struct ilo_ve_cso gen_ids;
ve_init_cso_with_components(builder->dev,
GEN6_VFCOMP_STORE_VID,
GEN6_VFCOMP_STORE_IID,
GEN6_VFCOMP_NOSTORE,
GEN6_VFCOMP_NOSTORE,
&gen_ids);
memcpy(dw, gen_ids.payload, sizeof(gen_ids.payload));
if (ve->prepend_nosrc_cso) {
memcpy(dw, ve->nosrc_cso.payload, sizeof(ve->nosrc_cso.payload));
dw += 2;
}
if (last_velement_edgeflag && ve->count) {
struct ilo_ve_cso edgeflag;
for (i = 0; i < ve->count - 1; i++)
memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload));
edgeflag = ve->cso[i];
ve_set_cso_edgeflag(builder->dev, &edgeflag);
memcpy(&dw[2 * i], edgeflag.payload, sizeof(edgeflag.payload));
} else {
for (i = 0; i < ve->count; i++)
memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload));
for (i = 0; i < ve->count - ve->last_cso_edgeflag; i++) {
memcpy(dw, ve->cso[i].payload, sizeof(ve->cso[i].payload));
dw += 2;
}
if (ve->last_cso_edgeflag)
memcpy(dw, ve->edgeflag_cso.payload, sizeof(ve->edgeflag_cso.payload));
}
static inline void

View File

@ -332,8 +332,8 @@ gen6_draw_common_urb(struct ilo_render *r,
* VS-generated output data, output URB availability isn't a
* factor."
*/
if (vs_entry_size < vec->ve->count)
vs_entry_size = vec->ve->count;
if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
gs_entry_size = (vec->gs) ?
ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT) :
@ -465,31 +465,8 @@ gen6_draw_vf(struct ilo_render *r,
gen6_3DSTATE_VERTEX_BUFFERS(r->builder, vec->ve, &vec->vb);
/* 3DSTATE_VERTEX_ELEMENTS */
if (DIRTY(VE) || DIRTY(VS)) {
const struct ilo_ve_state *ve = vec->ve;
bool last_velement_edgeflag = false;
bool prepend_generate_ids = false;
if (vec->vs) {
if (ilo_shader_get_kernel_param(vec->vs,
ILO_KERNEL_VS_INPUT_EDGEFLAG)) {
/* we rely on the state tracker here */
assert(ilo_shader_get_kernel_param(vec->vs,
ILO_KERNEL_INPUT_COUNT) == ve->count);
last_velement_edgeflag = true;
}
if (ilo_shader_get_kernel_param(vec->vs,
ILO_KERNEL_VS_INPUT_INSTANCEID) ||
ilo_shader_get_kernel_param(vec->vs,
ILO_KERNEL_VS_INPUT_VERTEXID))
prepend_generate_ids = true;
}
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, ve,
last_velement_edgeflag, prepend_generate_ids);
}
if (DIRTY(VE))
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, vec->ve);
}
void
@ -978,11 +955,12 @@ ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r,
session->vb_start, session->vb_end,
sizeof(blitter->vertices[0]));
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder,
&blitter->ve, false, false);
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve);
gen6_3DSTATE_URB(r->builder, r->dev->urb_size, 0,
(blitter->ve.count + blitter->ve.prepend_nosrc_cso) * 4 * sizeof(float),
0);
gen6_3DSTATE_URB(r->builder,
r->dev->urb_size, 0, blitter->ve.count * 4 * sizeof(float), 0);
/* 3DSTATE_URB workaround */
if (r->state.gs.active) {
ilo_render_emit_flush(r);

View File

@ -245,8 +245,8 @@ gen7_draw_common_urb(struct ilo_render *r,
* Allocation Size must be sized to the maximum of the vertex input
* and output structures."
*/
if (vs_entry_size < vec->ve->count)
vs_entry_size = vec->ve->count;
if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
vs_entry_size *= sizeof(float) * 4;
vs_total_size = r->dev->urb_size - offset;
@ -716,7 +716,8 @@ gen7_rectlist_urb(struct ilo_render *r,
(ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? 32768 : 16384;
gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset,
blitter->ve.count * 4 * sizeof(float));
(blitter->ve.count + blitter->ve.prepend_nosrc_cso) *
4 * sizeof(float));
gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
@ -839,8 +840,7 @@ ilo_render_emit_rectlist_commands_gen7(struct ilo_render *r,
session->vb_start, session->vb_end,
sizeof(blitter->vertices[0]));
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder,
&blitter->ve, false, false);
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve);
gen7_rectlist_pcb_alloc(r, blitter);

View File

@ -186,6 +186,63 @@ finalize_index_buffer(struct ilo_context *ilo)
pipe_resource_reference(&current_hw_res, NULL);
}
static void
finalize_vertex_elements(struct ilo_context *ilo)
{
struct ilo_state_vector *vec = &ilo->state_vector;
if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS)))
return;
vec->dirty |= ILO_DIRTY_VE;
vec->ve->last_cso_edgeflag = false;
if (vec->ve->count && vec->vs &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_EDGEFLAG)) {
vec->ve->edgeflag_cso = vec->ve->cso[vec->ve->count - 1];
ilo_gpe_set_ve_edgeflag(ilo->dev, &vec->ve->edgeflag_cso);
vec->ve->last_cso_edgeflag = true;
}
vec->ve->prepend_nosrc_cso = false;
if (vec->vs &&
(ilo_shader_get_kernel_param(vec->vs,
ILO_KERNEL_VS_INPUT_INSTANCEID) ||
ilo_shader_get_kernel_param(vec->vs,
ILO_KERNEL_VS_INPUT_VERTEXID))) {
ilo_gpe_init_ve_nosrc(ilo->dev,
GEN6_VFCOMP_STORE_VID,
GEN6_VFCOMP_STORE_IID,
GEN6_VFCOMP_NOSTORE,
GEN6_VFCOMP_NOSTORE,
&vec->ve->nosrc_cso);
vec->ve->prepend_nosrc_cso = true;
} else if (!vec->vs) {
/* generate VUE header */
ilo_gpe_init_ve_nosrc(ilo->dev,
GEN6_VFCOMP_STORE_0, /* Reserved */
GEN6_VFCOMP_STORE_0, /* Render Target Array Index */
GEN6_VFCOMP_STORE_0, /* Viewport Index */
GEN6_VFCOMP_STORE_0, /* Point Width */
&vec->ve->nosrc_cso);
vec->ve->prepend_nosrc_cso = true;
} else if (!vec->ve->count) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 92:
*
* "SW must ensure that at least one vertex element is defined prior
* to issuing a 3DPRIMTIVE command, or operation is UNDEFINED."
*/
ilo_gpe_init_ve_nosrc(ilo->dev,
GEN6_VFCOMP_STORE_0,
GEN6_VFCOMP_STORE_0,
GEN6_VFCOMP_STORE_0,
GEN6_VFCOMP_STORE_1_FP,
&vec->ve->nosrc_cso);
vec->ve->prepend_nosrc_cso = true;
}
}
/**
* Finalize states. Some states depend on other states and are
* incomplete/invalid until finalized.
@ -199,6 +256,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo,
finalize_shader_states(&ilo->state_vector);
finalize_constant_buffers(ilo);
finalize_index_buffer(ilo);
finalize_vertex_elements(ilo);
u_upload_unmap(ilo->uploader);
}

View File

@ -176,6 +176,13 @@ struct ilo_ve_state {
unsigned instance_divisors[PIPE_MAX_ATTRIBS];
unsigned vb_mapping[PIPE_MAX_ATTRIBS];
unsigned vb_count;
/* these are not valid until the state is finalized */
struct ilo_ve_cso edgeflag_cso;
bool last_cso_edgeflag;
struct ilo_ve_cso nosrc_cso;
bool prepend_nosrc_cso;
};
struct ilo_so_state {
@ -385,7 +392,7 @@ struct ilo_state_vector {
uint32_t dirty;
struct ilo_vb_state vb;
const struct ilo_ve_state *ve;
struct ilo_ve_state *ve;
struct ilo_ib_state ib;
struct ilo_shader_state *vs;

View File

@ -86,6 +86,15 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev,
const struct pipe_vertex_element *states,
struct ilo_ve_state *ve);
void
ilo_gpe_set_ve_edgeflag(const struct ilo_dev_info *dev,
struct ilo_ve_cso *cso);
void
ilo_gpe_init_ve_nosrc(const struct ilo_dev_info *dev,
int comp0, int comp1, int comp2, int comp3,
struct ilo_ve_cso *cso);
void
ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
const struct pipe_viewport_state *state,

View File

@ -327,6 +327,83 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev,
}
}
void
ilo_gpe_set_ve_edgeflag(const struct ilo_dev_info *dev,
struct ilo_ve_cso *cso)
{
int format;
ILO_DEV_ASSERT(dev, 6, 7.5);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 94:
*
* "- This bit (Edge Flag Enable) must only be ENABLED on the last
* valid VERTEX_ELEMENT structure.
*
* - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
* and Component 1-3 Control must be set to VFCOMP_NOSTORE.
*
* - The Source Element Format must be set to the UINT format.
*
* - [DevSNB]: Edge Flags are not supported for QUADLIST
* primitives. Software may elect to convert QUADLIST primitives
* to some set of corresponding edge-flag-supported primitive
* types (e.g., POLYGONs) prior to submission to the 3D pipeline."
*/
cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE;
/*
* Edge flags have format GEN6_FORMAT_R8_UINT when defined via
* glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
* via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
*
* Since all the hardware cares about is whether the flags are zero or not,
* we can treat them as GEN6_FORMAT_R32_UINT in the latter case.
*/
format = GEN_EXTRACT(cso->payload[0], GEN6_VE_STATE_DW0_FORMAT);
cso->payload[0] &= ~GEN6_VE_STATE_DW0_FORMAT__MASK;
switch (format) {
case GEN6_FORMAT_R32_FLOAT:
format = GEN6_FORMAT_R32_UINT;
break;
default:
assert(format == GEN6_FORMAT_R8_UINT);
break;
}
cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_STATE_DW0_FORMAT);
cso->payload[1] =
GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT;
}
void
ilo_gpe_init_ve_nosrc(const struct ilo_dev_info *dev,
int comp0, int comp1, int comp2, int comp3,
struct ilo_ve_cso *cso)
{
ILO_DEV_ASSERT(dev, 6, 7.5);
STATIC_ASSERT(Elements(cso->payload) >= 2);
assert(comp0 != GEN6_VFCOMP_STORE_SRC &&
comp1 != GEN6_VFCOMP_STORE_SRC &&
comp2 != GEN6_VFCOMP_STORE_SRC &&
comp3 != GEN6_VFCOMP_STORE_SRC);
cso->payload[0] = GEN6_VE_STATE_DW0_VALID;
cso->payload[1] =
comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT |
comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT |
comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT |
comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT;
}
void
ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
const struct ilo_shader_state *vs,