ilo: use ilo_shader_cso for GS

Add ilo_gpe_init_gs_cso() to construct 3DSTATE_GS once and early for geometry
shaders.
This commit is contained in:
Chia-I Wu 2013-06-20 17:42:21 +08:00
parent d209da5e33
commit 851202c319
7 changed files with 225 additions and 145 deletions

View File

@ -501,16 +501,9 @@ gen6_pipeline_gs(struct ilo_3d_pipeline *p,
/* 3DSTATE_GS */
if (DIRTY(GS) || DIRTY(VS) ||
session->prim_changed || session->kernel_bo_changed) {
const struct ilo_shader *gs = (ilo->gs)? ilo->gs->shader : NULL;
const struct ilo_shader *vs = (ilo->vs)? ilo->vs->shader : NULL;
const int num_vertices = u_vertices_per_prim(session->reduced_prim);
const int verts_per_prim = u_vertices_per_prim(session->reduced_prim);
if (gs)
assert(!gs->pcb.clip_state_size);
p->gen6_3DSTATE_GS(p->dev, gs, vs,
(vs) ? vs->cache_offset + vs->gs_offsets[num_vertices - 1] : 0,
p->cp);
p->gen6_3DSTATE_GS(p->dev, ilo->gs, ilo->vs, verts_per_prim, p->cp);
}
}

View File

@ -438,4 +438,27 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
const struct ilo_shader_state *vs,
struct ilo_shader_cso *cso);
void
ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso);
void
ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso);
static inline void
ilo_gpe_init_gs_cso(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso)
{
if (dev->gen >= ILO_GEN(7)) {
ilo_gpe_init_gs_cso_gen7(dev, gs, cso);
}
else {
ilo_gpe_init_gs_cso_gen6(dev, gs, cso);
}
}
#endif /* ILO_GPE_H */

View File

@ -1246,136 +1246,168 @@ gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
ilo_cp_end(cp);
}
void
ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso)
{
int start_grf, vue_read_len, max_threads;
uint32_t dw2, dw4, dw5, dw6;
ILO_GPE_VALID_GEN(dev, 6, 6);
if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
start_grf = ilo_shader_get_kernel_param(gs,
ILO_KERNEL_URB_DATA_START_REG);
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
}
else {
start_grf = ilo_shader_get_kernel_param(gs,
ILO_KERNEL_VS_GEN6_SO_START_REG);
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
}
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 153:
*
* "Specifies the amount of URB data read and passed in the thread
* payload for each Vertex URB entry, in 256-bit register increments.
*
* It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
* 0 indicating no Vertex URB data to be read and passed to the
* thread."
*/
vue_read_len = (vue_read_len + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 154:
*
* "Maximum Number of Threads valid range is [0,27] when Rendering
* Enabled bit is set."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 173:
*
* "Programming Note: If the GS stage is enabled, software must always
* allocate at least one GS URB Entry. This is true even if the GS
* thread never needs to output vertices to the pipeline, e.g., when
* only performing stream output. This is an artifact of the need to
* pass the GS thread an initial destination URB handle."
*
* As such, we always enable rendering, and limit the number of threads.
*/
if (dev->gt == 2) {
/* maximum is 60, but limited to 28 */
max_threads = 28;
}
else {
/* maximum is 24, but limited to 21 (see brwCreateContext()) */
max_threads = 21;
}
if (max_threads > 28)
max_threads = 28;
dw2 = GEN6_GS_SPF_MODE;
dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
GEN6_GS_STATISTICS_ENABLE |
GEN6_GS_SO_STATISTICS_ENABLE |
GEN6_GS_RENDERING_ENABLE;
/*
* we cannot make use of GEN6_GS_REORDER because it will reorder
* triangle strips according to D3D rules (triangle 2N+1 uses vertices
* (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
* (2N+2, 2N+1, 2N+3)).
*/
dw6 = GEN6_GS_ENABLE;
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
dw6 |= GEN6_GS_DISCARD_ADJACENCY;
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
const uint32_t svbi_post_inc =
ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
if (svbi_post_inc) {
dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
}
}
STATIC_ASSERT(Elements(cso->payload) >= 4);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
cso->payload[3] = dw6;
}
static void
gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
const struct ilo_shader *gs,
const struct ilo_shader *vs,
uint32_t vs_offset,
const struct ilo_shader_state *gs,
const struct ilo_shader_state *vs,
int verts_per_prim,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
const uint8_t cmd_len = 7;
uint32_t dw1, dw2, dw4, dw5, dw6;
int i;
ILO_GPE_VALID_GEN(dev, 6, 6);
if (!gs && (!vs || !vs->stream_output)) {
if (gs) {
const struct ilo_shader_cso *cso;
dw1 = ilo_shader_get_kernel_offset(gs);
cso = ilo_shader_get_kernel_cso(gs);
dw2 = cso->payload[0];
dw4 = cso->payload[1];
dw5 = cso->payload[2];
dw6 = cso->payload[3];
}
else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
struct ilo_shader_cso cso;
enum ilo_kernel_param param;
switch (verts_per_prim) {
case 1:
param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
break;
case 2:
param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
break;
default:
param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
break;
}
dw1 = ilo_shader_get_kernel_offset(vs) +
ilo_shader_get_kernel_param(vs, param);
/* cannot use VS's CSO */
ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
dw2 = cso.payload[0];
dw4 = cso.payload[1];
dw5 = cso.payload[2];
dw6 = cso.payload[3];
}
else {
dw1 = 0;
dw2 = 0;
dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
dw5 = GEN6_GS_STATISTICS_ENABLE;
dw6 = 0;
}
else {
int max_threads, vue_read_len;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 154:
*
* "Maximum Number of Threads valid range is [0,27] when Rendering
* Enabled bit is set."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 173:
*
* "Programming Note: If the GS stage is enabled, software must
* always allocate at least one GS URB Entry. This is true even if
* the GS thread never needs to output vertices to the pipeline,
* e.g., when only performing stream output. This is an artifact of
* the need to pass the GS thread an initial destination URB
* handle."
*
* As such, we always enable rendering, and limit the number of threads.
*/
if (dev->gt == 2) {
/* maximum is 60, but limited to 28 */
max_threads = 28;
}
else {
/* maximum is 24, but limited to 21 (see brwCreateContext()) */
max_threads = 21;
}
if (max_threads > 28)
max_threads = 28;
dw2 = GEN6_GS_SPF_MODE;
dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
GEN6_GS_STATISTICS_ENABLE |
GEN6_GS_SO_STATISTICS_ENABLE |
GEN6_GS_RENDERING_ENABLE;
/*
* we cannot make use of GEN6_GS_REORDER because it will reorder
* triangle strips according to D3D rules (triangle 2N+1 uses vertices
* (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
* (2N+2, 2N+1, 2N+3)).
*/
dw6 = GEN6_GS_ENABLE;
if (gs) {
/* VS ouputs must match GS inputs */
assert(gs->in.count == vs->out.count);
for (i = 0; i < gs->in.count; i++) {
assert(gs->in.semantic_names[i] == vs->out.semantic_names[i]);
assert(gs->in.semantic_indices[i] == vs->out.semantic_indices[i]);
}
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 153:
*
* "It is UNDEFINED to set this field (Vertex URB Entry Read
* Length) to 0 indicating no Vertex URB data to be read and
* passed to the thread."
*/
vue_read_len = (gs->in.count + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
dw1 = gs->cache_offset;
dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
if (gs->in.discard_adj)
dw6 |= GEN6_GS_DISCARD_ADJACENCY;
if (gs->stream_output) {
dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
if (gs->svbi_post_inc) {
dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
gs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
}
}
}
else {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 153:
*
* "It is UNDEFINED to set this field (Vertex URB Entry Read
* Length) to 0 indicating no Vertex URB data to be read and
* passed to the thread."
*/
vue_read_len = (vs->out.count + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
dw1 = vs_offset;
dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
vs->gs_start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
if (vs->in.discard_adj)
dw6 |= GEN6_GS_DISCARD_ADJACENCY;
dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
if (vs->svbi_post_inc) {
dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
vs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
}
}
}
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));

View File

@ -241,9 +241,9 @@ typedef void
typedef void
(*ilo_gpe_gen6_3DSTATE_GS)(const struct ilo_dev_info *dev,
const struct ilo_shader *gs,
const struct ilo_shader *vs,
uint32_t vs_offset,
const struct ilo_shader_state *gs,
const struct ilo_shader_state *vs,
int verts_per_prim,
struct ilo_cp *cp);
typedef void

View File

@ -84,19 +84,22 @@ gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
}
static void
gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
const struct ilo_shader *gs,
int num_samplers,
struct ilo_cp *cp)
void
ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
const uint8_t cmd_len = 7;
int start_grf, vue_read_len, max_threads;
uint32_t dw2, dw4, dw5;
int max_threads;
ILO_GPE_VALID_GEN(dev, 7, 7);
start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
/* in pairs */
vue_read_len = (vue_read_len + 1) / 2;
switch (dev->gen) {
case ILO_GEN(7):
max_threads = (dev->gt == 2) ? 128 : 36;
@ -106,6 +109,36 @@ gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
break;
}
dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT;
dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
GEN7_GS_INCLUDE_VERTEX_HANDLES |
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
GEN6_GS_STATISTICS_ENABLE |
GEN6_GS_ENABLE;
STATIC_ASSERT(Elements(cso->payload) >= 3);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
}
static void
gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
int num_samplers,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
const uint8_t cmd_len = 7;
const struct ilo_shader_cso *cso;
uint32_t dw2, dw4, dw5;
ILO_GPE_VALID_GEN(dev, 7, 7);
if (!gs) {
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
@ -119,20 +152,16 @@ gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
return;
}
dw2 = ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
cso = ilo_shader_get_kernel_cso(gs);
dw2 = cso->payload[0];
dw4 = cso->payload[1];
dw5 = cso->payload[2];
dw4 = ((gs->in.count + 1) / 2) << GEN6_GS_URB_READ_LENGTH_SHIFT |
GEN7_GS_INCLUDE_VERTEX_HANDLES |
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
GEN6_GS_STATISTICS_ENABLE |
GEN6_GS_ENABLE;
dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, gs->cache_offset);
ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
ilo_cp_write(cp, dw2);
ilo_cp_write(cp, 0); /* scratch */
ilo_cp_write(cp, dw4);

View File

@ -158,7 +158,7 @@ typedef ilo_gpe_gen6_3DSTATE_VS ilo_gpe_gen7_3DSTATE_VS;
typedef void
(*ilo_gpe_gen7_3DSTATE_GS)(const struct ilo_dev_info *dev,
const struct ilo_shader *gs,
const struct ilo_shader_state *gs,
int num_samplers,
struct ilo_cp *cp);

View File

@ -683,6 +683,9 @@ ilo_shader_state_use_variant(struct ilo_shader_state *state,
case PIPE_SHADER_VERTEX:
ilo_gpe_init_vs_cso(state->info.dev, state, &sh->cso);
break;
case PIPE_SHADER_GEOMETRY:
ilo_gpe_init_gs_cso(state->info.dev, state, &sh->cso);
break;
default:
break;
}