iris: implement scratch space!
we borrow the approach from anv rather than i965, as it works better with pre-baked state that needs to contain scratch BO addresses fixes a bunch of varying packing tests
This commit is contained in:
parent
9511b89ef9
commit
4d219b0eb3
|
@ -328,7 +328,7 @@ struct iris_vtable {
|
|||
uint64_t imm);
|
||||
|
||||
unsigned (*derived_program_state_size)(enum iris_program_cache_id id);
|
||||
void (*store_derived_program_state)(const struct gen_device_info *devinfo,
|
||||
void (*store_derived_program_state)(struct iris_context *ice,
|
||||
enum iris_program_cache_id cache_id,
|
||||
struct iris_compiled_shader *shader);
|
||||
uint32_t *(*create_so_decl_list)(const struct pipe_stream_output_info *sol,
|
||||
|
@ -394,6 +394,14 @@ struct iris_context {
|
|||
struct hash_table *cache;
|
||||
|
||||
unsigned urb_size;
|
||||
|
||||
/**
|
||||
* Scratch buffers for various sizes and stages.
|
||||
*
|
||||
* Indexed by the "Per-Thread Scratch Space" field's 4-bit encoding,
|
||||
* and shader stage.
|
||||
*/
|
||||
struct iris_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES];
|
||||
} shaders;
|
||||
|
||||
struct {
|
||||
|
@ -552,7 +560,9 @@ const struct shader_info *iris_get_shader_info(const struct iris_context *ice,
|
|||
gl_shader_stage stage);
|
||||
unsigned iris_get_shader_num_ubos(const struct iris_context *ice,
|
||||
gl_shader_stage stage);
|
||||
|
||||
uint32_t iris_get_scratch_space(struct iris_context *ice,
|
||||
unsigned per_thread_scratch,
|
||||
gl_shader_stage stage);
|
||||
|
||||
/* iris_program_cache.c */
|
||||
|
||||
|
|
|
@ -1072,6 +1072,56 @@ iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
|
|||
dst[8 * t] = t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate scratch BOs as needed for the given per-thread size and stage.
|
||||
*
|
||||
* Returns the 32-bit "Scratch Space Base Pointer" value.
|
||||
*/
|
||||
uint32_t
|
||||
iris_get_scratch_space(struct iris_context *ice,
|
||||
unsigned per_thread_scratch,
|
||||
gl_shader_stage stage)
|
||||
{
|
||||
struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
|
||||
struct iris_bufmgr *bufmgr = screen->bufmgr;
|
||||
const struct gen_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
unsigned encoded_size = ffs(per_thread_scratch) - 11;
|
||||
assert(encoded_size < (1 << 16));
|
||||
|
||||
struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
|
||||
|
||||
/* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
|
||||
*
|
||||
* "Scratch Space per slice is computed based on 4 sub-slices. SW must
|
||||
* allocate scratch space enough so that each slice has 4 slices
|
||||
* allowed."
|
||||
*
|
||||
* According to the other driver team, this applies to compute shaders
|
||||
* as well. This is not currently documented at all.
|
||||
*/
|
||||
unsigned subslice_total = 4 * devinfo->num_slices;
|
||||
assert(subslice_total >= screen->subslice_total);
|
||||
|
||||
if (!*bop) {
|
||||
unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
|
||||
uint32_t max_threads[] = {
|
||||
[MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
|
||||
[MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
|
||||
[MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
|
||||
[MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
|
||||
[MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
|
||||
[MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
|
||||
};
|
||||
|
||||
uint32_t size = per_thread_scratch * max_threads[stage];
|
||||
|
||||
*bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
|
||||
}
|
||||
|
||||
return (*bop)->gtt_offset;
|
||||
}
|
||||
|
||||
void
|
||||
iris_init_program_functions(struct pipe_context *ctx)
|
||||
{
|
||||
|
|
|
@ -241,8 +241,6 @@ iris_upload_shader(struct iris_context *ice,
|
|||
struct brw_stage_prog_data *prog_data,
|
||||
uint32_t *streamout)
|
||||
{
|
||||
struct iris_screen *screen = (void *) ice->ctx.screen;
|
||||
struct gen_device_info *devinfo = &screen->devinfo;
|
||||
struct hash_table *cache = ice->shaders.cache;
|
||||
struct iris_compiled_shader *shader =
|
||||
rzalloc_size(cache, sizeof(struct iris_compiled_shader) +
|
||||
|
@ -277,7 +275,7 @@ iris_upload_shader(struct iris_context *ice,
|
|||
ralloc_steal(shader, shader->streamout);
|
||||
|
||||
/* Store the 3DSTATE shader packets and other derived state. */
|
||||
ice->vtbl.store_derived_program_state(devinfo, cache_id, shader);
|
||||
ice->vtbl.store_derived_program_state(ice, cache_id, shader);
|
||||
|
||||
struct keybox *keybox = make_keybox(cache, cache_id, key, key_size);
|
||||
_mesa_hash_table_insert(ice->shaders.cache, keybox, shader);
|
||||
|
|
|
@ -2981,8 +2981,6 @@ iris_populate_cs_key(const struct iris_context *ice,
|
|||
// XXX: these need to go in INIT_THREAD_DISPATCH_FIELDS
|
||||
pkt.SamplerCount = \
|
||||
DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
|
||||
pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 : \
|
||||
ffs(stage_state->per_thread_scratch) - 11; \
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -2997,7 +2995,7 @@ KSP(const struct iris_compiled_shader *shader)
|
|||
// prefetching of binding tables in A0 and B0 steppings. XXX: Revisit
|
||||
// this WA on C0 stepping.
|
||||
|
||||
#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
|
||||
#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix, stage) \
|
||||
pkt.KernelStartPointer = KSP(shader); \
|
||||
pkt.BindingTableEntryCount = GEN_GEN == 11 ? 0 : \
|
||||
prog_data->binding_table.size_bytes / 4; \
|
||||
|
@ -3009,20 +3007,28 @@ KSP(const struct iris_compiled_shader *shader)
|
|||
pkt.prefix##URBEntryReadOffset = 0; \
|
||||
\
|
||||
pkt.StatisticsEnable = true; \
|
||||
pkt.Enable = true;
|
||||
pkt.Enable = true; \
|
||||
\
|
||||
if (prog_data->total_scratch) { \
|
||||
uint32_t scratch_addr = \
|
||||
iris_get_scratch_space(ice, prog_data->total_scratch, stage); \
|
||||
pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; \
|
||||
pkt.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode most of 3DSTATE_VS based on the compiled shader.
|
||||
*/
|
||||
static void
|
||||
iris_store_vs_state(const struct gen_device_info *devinfo,
|
||||
iris_store_vs_state(struct iris_context *ice,
|
||||
const struct gen_device_info *devinfo,
|
||||
struct iris_compiled_shader *shader)
|
||||
{
|
||||
struct brw_stage_prog_data *prog_data = shader->prog_data;
|
||||
struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
|
||||
|
||||
iris_pack_command(GENX(3DSTATE_VS), shader->derived_data, vs) {
|
||||
INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
|
||||
INIT_THREAD_DISPATCH_FIELDS(vs, Vertex, MESA_SHADER_VERTEX);
|
||||
vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
|
||||
vs.SIMD8DispatchEnable = true;
|
||||
vs.UserClipDistanceCullTestEnableBitmask =
|
||||
|
@ -3034,7 +3040,8 @@ iris_store_vs_state(const struct gen_device_info *devinfo,
|
|||
* Encode most of 3DSTATE_HS based on the compiled shader.
|
||||
*/
|
||||
static void
|
||||
iris_store_tcs_state(const struct gen_device_info *devinfo,
|
||||
iris_store_tcs_state(struct iris_context *ice,
|
||||
const struct gen_device_info *devinfo,
|
||||
struct iris_compiled_shader *shader)
|
||||
{
|
||||
struct brw_stage_prog_data *prog_data = shader->prog_data;
|
||||
|
@ -3042,7 +3049,7 @@ iris_store_tcs_state(const struct gen_device_info *devinfo,
|
|||
struct brw_tcs_prog_data *tcs_prog_data = (void *) prog_data;
|
||||
|
||||
iris_pack_command(GENX(3DSTATE_HS), shader->derived_data, hs) {
|
||||
INIT_THREAD_DISPATCH_FIELDS(hs, Vertex);
|
||||
INIT_THREAD_DISPATCH_FIELDS(hs, Vertex, MESA_SHADER_TESS_CTRL);
|
||||
|
||||
hs.InstanceCount = tcs_prog_data->instances - 1;
|
||||
hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
|
||||
|
@ -3054,7 +3061,8 @@ iris_store_tcs_state(const struct gen_device_info *devinfo,
|
|||
* Encode 3DSTATE_TE and most of 3DSTATE_DS based on the compiled shader.
|
||||
*/
|
||||
static void
|
||||
iris_store_tes_state(const struct gen_device_info *devinfo,
|
||||
iris_store_tes_state(struct iris_context *ice,
|
||||
const struct gen_device_info *devinfo,
|
||||
struct iris_compiled_shader *shader)
|
||||
{
|
||||
struct brw_stage_prog_data *prog_data = shader->prog_data;
|
||||
|
@ -3074,7 +3082,7 @@ iris_store_tes_state(const struct gen_device_info *devinfo,
|
|||
}
|
||||
|
||||
iris_pack_command(GENX(3DSTATE_DS), ds_state, ds) {
|
||||
INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
|
||||
INIT_THREAD_DISPATCH_FIELDS(ds, Patch, MESA_SHADER_TESS_EVAL);
|
||||
|
||||
ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
|
||||
ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
|
||||
|
@ -3091,7 +3099,8 @@ iris_store_tes_state(const struct gen_device_info *devinfo,
|
|||
* Encode most of 3DSTATE_GS based on the compiled shader.
|
||||
*/
|
||||
static void
|
||||
iris_store_gs_state(const struct gen_device_info *devinfo,
|
||||
iris_store_gs_state(struct iris_context *ice,
|
||||
const struct gen_device_info *devinfo,
|
||||
struct iris_compiled_shader *shader)
|
||||
{
|
||||
struct brw_stage_prog_data *prog_data = shader->prog_data;
|
||||
|
@ -3099,7 +3108,7 @@ iris_store_gs_state(const struct gen_device_info *devinfo,
|
|||
struct brw_gs_prog_data *gs_prog_data = (void *) prog_data;
|
||||
|
||||
iris_pack_command(GENX(3DSTATE_GS), shader->derived_data, gs) {
|
||||
INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
|
||||
INIT_THREAD_DISPATCH_FIELDS(gs, Vertex, MESA_SHADER_GEOMETRY);
|
||||
|
||||
gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
|
||||
gs.OutputTopology = gs_prog_data->output_topology;
|
||||
|
@ -3138,7 +3147,8 @@ iris_store_gs_state(const struct gen_device_info *devinfo,
|
|||
* Encode most of 3DSTATE_PS and 3DSTATE_PS_EXTRA based on the shader.
|
||||
*/
|
||||
static void
|
||||
iris_store_fs_state(const struct gen_device_info *devinfo,
|
||||
iris_store_fs_state(struct iris_context *ice,
|
||||
const struct gen_device_info *devinfo,
|
||||
struct iris_compiled_shader *shader)
|
||||
{
|
||||
struct brw_stage_prog_data *prog_data = shader->prog_data;
|
||||
|
@ -3193,6 +3203,14 @@ iris_store_fs_state(const struct gen_device_info *devinfo,
|
|||
KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
|
||||
ps.KernelStartPointer2 =
|
||||
KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
|
||||
|
||||
if (prog_data->total_scratch) {
|
||||
uint32_t scratch_addr =
|
||||
iris_get_scratch_space(ice, prog_data->total_scratch,
|
||||
MESA_SHADER_FRAGMENT);
|
||||
ps.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
|
||||
ps.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr);
|
||||
}
|
||||
}
|
||||
|
||||
iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
|
||||
|
@ -3226,7 +3244,8 @@ iris_store_fs_state(const struct gen_device_info *devinfo,
|
|||
* This must match the data written by the iris_store_xs_state() functions.
|
||||
*/
|
||||
static void
|
||||
iris_store_cs_state(const struct gen_device_info *devinfo,
|
||||
iris_store_cs_state(struct iris_context *ice,
|
||||
const struct gen_device_info *devinfo,
|
||||
struct iris_compiled_shader *shader)
|
||||
{
|
||||
struct brw_stage_prog_data *prog_data = shader->prog_data;
|
||||
|
@ -3271,28 +3290,31 @@ iris_derived_program_state_size(enum iris_program_cache_id cache_id)
|
|||
* get most of the state packet without having to reconstruct it.
|
||||
*/
|
||||
static void
|
||||
iris_store_derived_program_state(const struct gen_device_info *devinfo,
|
||||
iris_store_derived_program_state(struct iris_context *ice,
|
||||
enum iris_program_cache_id cache_id,
|
||||
struct iris_compiled_shader *shader)
|
||||
{
|
||||
struct iris_screen *screen = (void *) ice->ctx.screen;
|
||||
const struct gen_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
switch (cache_id) {
|
||||
case IRIS_CACHE_VS:
|
||||
iris_store_vs_state(devinfo, shader);
|
||||
iris_store_vs_state(ice, devinfo, shader);
|
||||
break;
|
||||
case IRIS_CACHE_TCS:
|
||||
iris_store_tcs_state(devinfo, shader);
|
||||
iris_store_tcs_state(ice, devinfo, shader);
|
||||
break;
|
||||
case IRIS_CACHE_TES:
|
||||
iris_store_tes_state(devinfo, shader);
|
||||
iris_store_tes_state(ice, devinfo, shader);
|
||||
break;
|
||||
case IRIS_CACHE_GS:
|
||||
iris_store_gs_state(devinfo, shader);
|
||||
iris_store_gs_state(ice, devinfo, shader);
|
||||
break;
|
||||
case IRIS_CACHE_FS:
|
||||
iris_store_fs_state(devinfo, shader);
|
||||
iris_store_fs_state(ice, devinfo, shader);
|
||||
break;
|
||||
case IRIS_CACHE_CS:
|
||||
iris_store_cs_state(devinfo, shader);
|
||||
iris_store_cs_state(ice, devinfo, shader);
|
||||
case IRIS_CACHE_BLORP:
|
||||
break;
|
||||
default:
|
||||
|
@ -4401,12 +4423,11 @@ iris_upload_compute_state(struct iris_context *ice,
|
|||
|
||||
iris_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) {
|
||||
if (prog_data->total_scratch) {
|
||||
/* Per Thread Scratch Space is in the range [0, 11] where
|
||||
* 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
|
||||
*/
|
||||
// XXX: vfe.ScratchSpaceBasePointer
|
||||
//vfe.PerThreadScratchSpace =
|
||||
//ffs(stage_state->per_thread_scratch) - 11;
|
||||
uint32_t scratch_addr =
|
||||
iris_get_scratch_space(ice, prog_data->total_scratch,
|
||||
MESA_SHADER_COMPUTE);
|
||||
vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
|
||||
vfe.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr);
|
||||
}
|
||||
|
||||
vfe.MaximumNumberofThreads =
|
||||
|
|
Loading…
Reference in New Issue