ilo: use ilo_shader_cso for GS
Add ilo_gpe_init_gs_cso() to construct 3DSTATE_GS once and early for geometry shaders.
This commit is contained in:
parent
d209da5e33
commit
851202c319
|
@ -501,16 +501,9 @@ gen6_pipeline_gs(struct ilo_3d_pipeline *p,
|
|||
/* 3DSTATE_GS */
|
||||
if (DIRTY(GS) || DIRTY(VS) ||
|
||||
session->prim_changed || session->kernel_bo_changed) {
|
||||
const struct ilo_shader *gs = (ilo->gs)? ilo->gs->shader : NULL;
|
||||
const struct ilo_shader *vs = (ilo->vs)? ilo->vs->shader : NULL;
|
||||
const int num_vertices = u_vertices_per_prim(session->reduced_prim);
|
||||
const int verts_per_prim = u_vertices_per_prim(session->reduced_prim);
|
||||
|
||||
if (gs)
|
||||
assert(!gs->pcb.clip_state_size);
|
||||
|
||||
p->gen6_3DSTATE_GS(p->dev, gs, vs,
|
||||
(vs) ? vs->cache_offset + vs->gs_offsets[num_vertices - 1] : 0,
|
||||
p->cp);
|
||||
p->gen6_3DSTATE_GS(p->dev, ilo->gs, ilo->vs, verts_per_prim, p->cp);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -438,4 +438,27 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
|
|||
const struct ilo_shader_state *vs,
|
||||
struct ilo_shader_cso *cso);
|
||||
|
||||
void
|
||||
ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader_state *gs,
|
||||
struct ilo_shader_cso *cso);
|
||||
|
||||
void
|
||||
ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader_state *gs,
|
||||
struct ilo_shader_cso *cso);
|
||||
|
||||
static inline void
|
||||
ilo_gpe_init_gs_cso(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader_state *gs,
|
||||
struct ilo_shader_cso *cso)
|
||||
{
|
||||
if (dev->gen >= ILO_GEN(7)) {
|
||||
ilo_gpe_init_gs_cso_gen7(dev, gs, cso);
|
||||
}
|
||||
else {
|
||||
ilo_gpe_init_gs_cso_gen6(dev, gs, cso);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* ILO_GPE_H */
|
||||
|
|
|
@ -1246,136 +1246,168 @@ gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
|
|||
ilo_cp_end(cp);
|
||||
}
|
||||
|
||||
void
|
||||
ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader_state *gs,
|
||||
struct ilo_shader_cso *cso)
|
||||
{
|
||||
int start_grf, vue_read_len, max_threads;
|
||||
uint32_t dw2, dw4, dw5, dw6;
|
||||
|
||||
ILO_GPE_VALID_GEN(dev, 6, 6);
|
||||
|
||||
if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
|
||||
start_grf = ilo_shader_get_kernel_param(gs,
|
||||
ILO_KERNEL_URB_DATA_START_REG);
|
||||
|
||||
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
|
||||
}
|
||||
else {
|
||||
start_grf = ilo_shader_get_kernel_param(gs,
|
||||
ILO_KERNEL_VS_GEN6_SO_START_REG);
|
||||
|
||||
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
|
||||
}
|
||||
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 2 part 1, page 153:
|
||||
*
|
||||
* "Specifies the amount of URB data read and passed in the thread
|
||||
* payload for each Vertex URB entry, in 256-bit register increments.
|
||||
*
|
||||
* It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
|
||||
* 0 indicating no Vertex URB data to be read and passed to the
|
||||
* thread."
|
||||
*/
|
||||
vue_read_len = (vue_read_len + 1) / 2;
|
||||
if (!vue_read_len)
|
||||
vue_read_len = 1;
|
||||
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 2 part 1, page 154:
|
||||
*
|
||||
* "Maximum Number of Threads valid range is [0,27] when Rendering
|
||||
* Enabled bit is set."
|
||||
*
|
||||
* From the Sandy Bridge PRM, volume 2 part 1, page 173:
|
||||
*
|
||||
* "Programming Note: If the GS stage is enabled, software must always
|
||||
* allocate at least one GS URB Entry. This is true even if the GS
|
||||
* thread never needs to output vertices to the pipeline, e.g., when
|
||||
* only performing stream output. This is an artifact of the need to
|
||||
* pass the GS thread an initial destination URB handle."
|
||||
*
|
||||
* As such, we always enable rendering, and limit the number of threads.
|
||||
*/
|
||||
if (dev->gt == 2) {
|
||||
/* maximum is 60, but limited to 28 */
|
||||
max_threads = 28;
|
||||
}
|
||||
else {
|
||||
/* maximum is 24, but limited to 21 (see brwCreateContext()) */
|
||||
max_threads = 21;
|
||||
}
|
||||
|
||||
if (max_threads > 28)
|
||||
max_threads = 28;
|
||||
|
||||
dw2 = GEN6_GS_SPF_MODE;
|
||||
|
||||
dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
|
||||
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
|
||||
start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
|
||||
|
||||
dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
|
||||
GEN6_GS_STATISTICS_ENABLE |
|
||||
GEN6_GS_SO_STATISTICS_ENABLE |
|
||||
GEN6_GS_RENDERING_ENABLE;
|
||||
|
||||
/*
|
||||
* we cannot make use of GEN6_GS_REORDER because it will reorder
|
||||
* triangle strips according to D3D rules (triangle 2N+1 uses vertices
|
||||
* (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
|
||||
* (2N+2, 2N+1, 2N+3)).
|
||||
*/
|
||||
dw6 = GEN6_GS_ENABLE;
|
||||
|
||||
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
|
||||
dw6 |= GEN6_GS_DISCARD_ADJACENCY;
|
||||
|
||||
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
|
||||
const uint32_t svbi_post_inc =
|
||||
ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
|
||||
|
||||
dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
|
||||
if (svbi_post_inc) {
|
||||
dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
|
||||
svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
STATIC_ASSERT(Elements(cso->payload) >= 4);
|
||||
cso->payload[0] = dw2;
|
||||
cso->payload[1] = dw4;
|
||||
cso->payload[2] = dw5;
|
||||
cso->payload[3] = dw6;
|
||||
}
|
||||
|
||||
static void
|
||||
gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *gs,
|
||||
const struct ilo_shader *vs,
|
||||
uint32_t vs_offset,
|
||||
const struct ilo_shader_state *gs,
|
||||
const struct ilo_shader_state *vs,
|
||||
int verts_per_prim,
|
||||
struct ilo_cp *cp)
|
||||
{
|
||||
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
|
||||
const uint8_t cmd_len = 7;
|
||||
uint32_t dw1, dw2, dw4, dw5, dw6;
|
||||
int i;
|
||||
|
||||
ILO_GPE_VALID_GEN(dev, 6, 6);
|
||||
|
||||
if (!gs && (!vs || !vs->stream_output)) {
|
||||
if (gs) {
|
||||
const struct ilo_shader_cso *cso;
|
||||
|
||||
dw1 = ilo_shader_get_kernel_offset(gs);
|
||||
|
||||
cso = ilo_shader_get_kernel_cso(gs);
|
||||
dw2 = cso->payload[0];
|
||||
dw4 = cso->payload[1];
|
||||
dw5 = cso->payload[2];
|
||||
dw6 = cso->payload[3];
|
||||
}
|
||||
else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
|
||||
struct ilo_shader_cso cso;
|
||||
enum ilo_kernel_param param;
|
||||
|
||||
switch (verts_per_prim) {
|
||||
case 1:
|
||||
param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
|
||||
break;
|
||||
case 2:
|
||||
param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
|
||||
break;
|
||||
default:
|
||||
param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
|
||||
break;
|
||||
}
|
||||
|
||||
dw1 = ilo_shader_get_kernel_offset(vs) +
|
||||
ilo_shader_get_kernel_param(vs, param);
|
||||
|
||||
/* cannot use VS's CSO */
|
||||
ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
|
||||
dw2 = cso.payload[0];
|
||||
dw4 = cso.payload[1];
|
||||
dw5 = cso.payload[2];
|
||||
dw6 = cso.payload[3];
|
||||
}
|
||||
else {
|
||||
dw1 = 0;
|
||||
dw2 = 0;
|
||||
dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
|
||||
dw5 = GEN6_GS_STATISTICS_ENABLE;
|
||||
dw6 = 0;
|
||||
}
|
||||
else {
|
||||
int max_threads, vue_read_len;
|
||||
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 2 part 1, page 154:
|
||||
*
|
||||
* "Maximum Number of Threads valid range is [0,27] when Rendering
|
||||
* Enabled bit is set."
|
||||
*
|
||||
* From the Sandy Bridge PRM, volume 2 part 1, page 173:
|
||||
*
|
||||
* "Programming Note: If the GS stage is enabled, software must
|
||||
* always allocate at least one GS URB Entry. This is true even if
|
||||
* the GS thread never needs to output vertices to the pipeline,
|
||||
* e.g., when only performing stream output. This is an artifact of
|
||||
* the need to pass the GS thread an initial destination URB
|
||||
* handle."
|
||||
*
|
||||
* As such, we always enable rendering, and limit the number of threads.
|
||||
*/
|
||||
if (dev->gt == 2) {
|
||||
/* maximum is 60, but limited to 28 */
|
||||
max_threads = 28;
|
||||
}
|
||||
else {
|
||||
/* maximum is 24, but limited to 21 (see brwCreateContext()) */
|
||||
max_threads = 21;
|
||||
}
|
||||
|
||||
if (max_threads > 28)
|
||||
max_threads = 28;
|
||||
|
||||
dw2 = GEN6_GS_SPF_MODE;
|
||||
|
||||
dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
|
||||
GEN6_GS_STATISTICS_ENABLE |
|
||||
GEN6_GS_SO_STATISTICS_ENABLE |
|
||||
GEN6_GS_RENDERING_ENABLE;
|
||||
|
||||
/*
|
||||
* we cannot make use of GEN6_GS_REORDER because it will reorder
|
||||
* triangle strips according to D3D rules (triangle 2N+1 uses vertices
|
||||
* (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
|
||||
* (2N+2, 2N+1, 2N+3)).
|
||||
*/
|
||||
dw6 = GEN6_GS_ENABLE;
|
||||
|
||||
if (gs) {
|
||||
/* VS ouputs must match GS inputs */
|
||||
assert(gs->in.count == vs->out.count);
|
||||
for (i = 0; i < gs->in.count; i++) {
|
||||
assert(gs->in.semantic_names[i] == vs->out.semantic_names[i]);
|
||||
assert(gs->in.semantic_indices[i] == vs->out.semantic_indices[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 2 part 1, page 153:
|
||||
*
|
||||
* "It is UNDEFINED to set this field (Vertex URB Entry Read
|
||||
* Length) to 0 indicating no Vertex URB data to be read and
|
||||
* passed to the thread."
|
||||
*/
|
||||
vue_read_len = (gs->in.count + 1) / 2;
|
||||
if (!vue_read_len)
|
||||
vue_read_len = 1;
|
||||
|
||||
dw1 = gs->cache_offset;
|
||||
dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
|
||||
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
|
||||
gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
|
||||
|
||||
if (gs->in.discard_adj)
|
||||
dw6 |= GEN6_GS_DISCARD_ADJACENCY;
|
||||
|
||||
if (gs->stream_output) {
|
||||
dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
|
||||
if (gs->svbi_post_inc) {
|
||||
dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
|
||||
gs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 2 part 1, page 153:
|
||||
*
|
||||
* "It is UNDEFINED to set this field (Vertex URB Entry Read
|
||||
* Length) to 0 indicating no Vertex URB data to be read and
|
||||
* passed to the thread."
|
||||
*/
|
||||
vue_read_len = (vs->out.count + 1) / 2;
|
||||
if (!vue_read_len)
|
||||
vue_read_len = 1;
|
||||
|
||||
dw1 = vs_offset;
|
||||
dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
|
||||
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
|
||||
vs->gs_start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
|
||||
|
||||
if (vs->in.discard_adj)
|
||||
dw6 |= GEN6_GS_DISCARD_ADJACENCY;
|
||||
|
||||
dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
|
||||
if (vs->svbi_post_inc) {
|
||||
dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
|
||||
vs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilo_cp_begin(cp, cmd_len);
|
||||
ilo_cp_write(cp, cmd | (cmd_len - 2));
|
||||
|
|
|
@ -241,9 +241,9 @@ typedef void
|
|||
|
||||
typedef void
|
||||
(*ilo_gpe_gen6_3DSTATE_GS)(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *gs,
|
||||
const struct ilo_shader *vs,
|
||||
uint32_t vs_offset,
|
||||
const struct ilo_shader_state *gs,
|
||||
const struct ilo_shader_state *vs,
|
||||
int verts_per_prim,
|
||||
struct ilo_cp *cp);
|
||||
|
||||
typedef void
|
||||
|
|
|
@ -84,19 +84,22 @@ gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
|
|||
gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
|
||||
}
|
||||
|
||||
static void
|
||||
gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *gs,
|
||||
int num_samplers,
|
||||
struct ilo_cp *cp)
|
||||
void
|
||||
ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader_state *gs,
|
||||
struct ilo_shader_cso *cso)
|
||||
{
|
||||
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
|
||||
const uint8_t cmd_len = 7;
|
||||
int start_grf, vue_read_len, max_threads;
|
||||
uint32_t dw2, dw4, dw5;
|
||||
int max_threads;
|
||||
|
||||
ILO_GPE_VALID_GEN(dev, 7, 7);
|
||||
|
||||
start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
|
||||
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
|
||||
|
||||
/* in pairs */
|
||||
vue_read_len = (vue_read_len + 1) / 2;
|
||||
|
||||
switch (dev->gen) {
|
||||
case ILO_GEN(7):
|
||||
max_threads = (dev->gt == 2) ? 128 : 36;
|
||||
|
@ -106,6 +109,36 @@ gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
|
|||
break;
|
||||
}
|
||||
|
||||
dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT;
|
||||
|
||||
dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
|
||||
GEN7_GS_INCLUDE_VERTEX_HANDLES |
|
||||
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
|
||||
start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
|
||||
|
||||
dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
|
||||
GEN6_GS_STATISTICS_ENABLE |
|
||||
GEN6_GS_ENABLE;
|
||||
|
||||
STATIC_ASSERT(Elements(cso->payload) >= 3);
|
||||
cso->payload[0] = dw2;
|
||||
cso->payload[1] = dw4;
|
||||
cso->payload[2] = dw5;
|
||||
}
|
||||
|
||||
static void
|
||||
gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader_state *gs,
|
||||
int num_samplers,
|
||||
struct ilo_cp *cp)
|
||||
{
|
||||
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
|
||||
const uint8_t cmd_len = 7;
|
||||
const struct ilo_shader_cso *cso;
|
||||
uint32_t dw2, dw4, dw5;
|
||||
|
||||
ILO_GPE_VALID_GEN(dev, 7, 7);
|
||||
|
||||
if (!gs) {
|
||||
ilo_cp_begin(cp, cmd_len);
|
||||
ilo_cp_write(cp, cmd | (cmd_len - 2));
|
||||
|
@ -119,20 +152,16 @@ gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
|
|||
return;
|
||||
}
|
||||
|
||||
dw2 = ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
|
||||
cso = ilo_shader_get_kernel_cso(gs);
|
||||
dw2 = cso->payload[0];
|
||||
dw4 = cso->payload[1];
|
||||
dw5 = cso->payload[2];
|
||||
|
||||
dw4 = ((gs->in.count + 1) / 2) << GEN6_GS_URB_READ_LENGTH_SHIFT |
|
||||
GEN7_GS_INCLUDE_VERTEX_HANDLES |
|
||||
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
|
||||
gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
|
||||
|
||||
dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
|
||||
GEN6_GS_STATISTICS_ENABLE |
|
||||
GEN6_GS_ENABLE;
|
||||
dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
|
||||
|
||||
ilo_cp_begin(cp, cmd_len);
|
||||
ilo_cp_write(cp, cmd | (cmd_len - 2));
|
||||
ilo_cp_write(cp, gs->cache_offset);
|
||||
ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
|
||||
ilo_cp_write(cp, dw2);
|
||||
ilo_cp_write(cp, 0); /* scratch */
|
||||
ilo_cp_write(cp, dw4);
|
||||
|
|
|
@ -158,7 +158,7 @@ typedef ilo_gpe_gen6_3DSTATE_VS ilo_gpe_gen7_3DSTATE_VS;
|
|||
|
||||
typedef void
|
||||
(*ilo_gpe_gen7_3DSTATE_GS)(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *gs,
|
||||
const struct ilo_shader_state *gs,
|
||||
int num_samplers,
|
||||
struct ilo_cp *cp);
|
||||
|
||||
|
|
|
@ -683,6 +683,9 @@ ilo_shader_state_use_variant(struct ilo_shader_state *state,
|
|||
case PIPE_SHADER_VERTEX:
|
||||
ilo_gpe_init_vs_cso(state->info.dev, state, &sh->cso);
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
ilo_gpe_init_gs_cso(state->info.dev, state, &sh->cso);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue