anv/pipeline: Unify 3DSTATE_GS emission

Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
This commit is contained in:
Jason Ekstrand 2016-11-12 09:18:52 -08:00
parent f3783f1249
commit 7fe6655aad
3 changed files with 75 additions and 109 deletions

View File

@ -105,53 +105,7 @@ genX(graphics_pipeline_create)(
#endif
emit_3dstate_vs(pipeline);
const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
} else {
const struct anv_shader_bin *gs_bin =
pipeline->shaders[MESA_SHADER_GEOMETRY];
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
gs.KernelStartPointer = gs_bin->kernel.offset;
gs.ScratchSpaceBasePointer = (struct anv_address) {
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
MESA_SHADER_GEOMETRY,
gs_prog_data->base.base.total_scratch),
.offset = 0,
};
gs.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base);
gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
gs.OutputTopology = gs_prog_data->output_topology;
gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles;
gs.DispatchGRFStartRegisterForURBData =
gs_prog_data->base.base.dispatch_grf_start_reg;
gs.SamplerCount = get_sampler_count(gs_bin);
gs.BindingTableEntryCount = get_binding_table_entry_count(gs_bin);
gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
/* This in the next dword on HSW. */
gs.ControlDataFormat = gs_prog_data->control_data_format;
gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords;
gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1;
gs.DispatchMode = gs_prog_data->base.dispatch_mode;
gs.StatisticsEnable = true;
gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
# if (GEN_IS_HASWELL)
gs.ReorderMode = TRAILING;
# else
gs.ReorderEnable = true;
# endif
gs.FunctionEnable = true;
}
}
emit_3dstate_gs(pipeline);
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), sbe);

View File

@ -53,9 +53,6 @@ genX(graphics_pipeline_create)(
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
const struct anv_physical_device *physical_device =
&device->instance->physicalDevice;
const struct gen_device_info *devinfo = &physical_device->info;
struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
struct anv_pipeline *pipeline;
VkResult result;
@ -112,65 +109,7 @@ genX(graphics_pipeline_create)(
wm_prog_data ? wm_prog_data->barycentric_interp_modes : 0;
}
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
} else {
const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
const struct anv_shader_bin *gs_bin =
pipeline->shaders[MESA_SHADER_GEOMETRY];
uint32_t offset = 1;
uint32_t length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
gs.SingleProgramFlow = false;
gs.KernelStartPointer = gs_bin->kernel.offset;
gs.VectorMaskEnable = false;
gs.SamplerCount = get_sampler_count(gs_bin);
gs.BindingTableEntryCount = get_binding_table_entry_count(gs_bin);
gs.ExpectedVertexCount = gs_prog_data->vertices_in;
gs.ScratchSpaceBasePointer = (struct anv_address) {
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
MESA_SHADER_GEOMETRY,
gs_prog_data->base.base.total_scratch),
.offset = 0,
};
gs.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base);
gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
gs.OutputTopology = gs_prog_data->output_topology;
gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles;
gs.DispatchGRFStartRegisterForURBData =
gs_prog_data->base.base.dispatch_grf_start_reg;
gs.MaximumNumberofThreads = devinfo->max_gs_threads / 2 - 1;
gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords;
gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1;
gs.DispatchMode = gs_prog_data->base.dispatch_mode;
gs.StatisticsEnable = true;
gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
gs.ReorderMode = TRAILING;
gs.FunctionEnable = true;
gs.ControlDataFormat = gs_prog_data->control_data_format;
gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
gs.StaticOutputVertexCount =
gs_prog_data->static_vertex_count >= 0 ?
gs_prog_data->static_vertex_count : 0;
/* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
* UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
* UserClipDistanceCullTestEnableBitmask(v)
*/
gs.VertexURBEntryOutputReadOffset = offset;
gs.VertexURBEntryOutputLength = length;
}
}
emit_3dstate_gs(pipeline);
emit_3dstate_vs(pipeline);
const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;

View File

@ -1053,4 +1053,77 @@ emit_3dstate_vs(struct anv_pipeline *pipeline)
}
}
static void
emit_3dstate_gs(struct anv_pipeline *pipeline)
{
const struct gen_device_info *devinfo = &pipeline->device->info;
const struct anv_shader_bin *gs_bin =
pipeline->shaders[MESA_SHADER_GEOMETRY];
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
return;
}
const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
gs.FunctionEnable = true;
gs.StatisticsEnable = true;
gs.KernelStartPointer = gs_bin->kernel.offset;
gs.DispatchMode = gs_prog_data->base.dispatch_mode;
gs.SingleProgramFlow = false;
gs.VectorMaskEnable = false;
gs.SamplerCount = get_sampler_count(gs_bin);
gs.BindingTableEntryCount = get_binding_table_entry_count(gs_bin);
gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles;
gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
if (GEN_GEN == 8) {
/* Broadwell is weird. It needs us to divide by 2. */
gs.MaximumNumberofThreads = devinfo->max_gs_threads / 2 - 1;
} else {
gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
}
gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
gs.OutputTopology = gs_prog_data->output_topology;
gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
gs.ControlDataFormat = gs_prog_data->control_data_format;
gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords;
gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1;
#if GEN_GEN >= 8 || GEN_IS_HASWELL
gs.ReorderMode = TRAILING;
#else
gs.ReorderEnable = true;
#endif
#if GEN_GEN >= 8
gs.ExpectedVertexCount = gs_prog_data->vertices_in;
gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ?
gs_prog_data->static_vertex_count : 0;
#endif
gs.VertexURBEntryReadOffset = 0;
gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
gs.DispatchGRFStartRegisterForURBData =
gs_prog_data->base.base.dispatch_grf_start_reg;
#if GEN_GEN >= 8
gs.VertexURBEntryOutputReadOffset = get_urb_output_offset();
gs.VertexURBEntryOutputLength = get_urb_output_length(gs_bin);
/* TODO */
gs.UserClipDistanceClipTestEnableBitmask = 0;
gs.UserClipDistanceCullTestEnableBitmask = 0;
#endif
gs.PerThreadScratchSpace = get_scratch_space(gs_bin);
gs.ScratchSpaceBasePointer =
get_scratch_address(pipeline, MESA_SHADER_GEOMETRY, gs_bin);
}
}
#endif /* GENX_PIPELINE_UTIL_H */