intel: Allocate prog_data::[pull_]param deeper inside the compiler
Now that we're always growing the param array as-needed, we can allocate the param array in common code and stop repeating the allocation everywere. In order to keep things sane, we ralloc the [pull_]param array off of the compile context and then steal it back to a NULL context later. This doesn't get us all the way to where prog_data::[pull_]param is purely an out parameter of the back-end compiler but it gets us a lot closer. Reviewed-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
c3d54d0375
commit
29737eac98
|
@ -134,12 +134,13 @@ enum brw_reg_type brw_type_for_nir_type(const struct gen_device_info *devinfo,
|
|||
|
||||
enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type);
|
||||
|
||||
void brw_nir_setup_glsl_uniforms(nir_shader *shader,
|
||||
void brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
bool is_scalar);
|
||||
|
||||
void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
|
||||
void brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
|
||||
struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data);
|
||||
|
||||
void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
|
||||
|
|
|
@ -1762,8 +1762,7 @@ vec4_visitor::setup_uniforms(int reg)
|
|||
* matter what, or the GPU would hang.
|
||||
*/
|
||||
if (devinfo->gen < 6 && this->uniforms == 0) {
|
||||
stage_prog_data->param =
|
||||
reralloc(NULL, stage_prog_data->param, uint32_t, 4);
|
||||
brw_stage_prog_data_add_params(stage_prog_data, 4);
|
||||
for (unsigned int i = 0; i < 4; i++) {
|
||||
unsigned int slot = this->uniforms * 4 + i;
|
||||
stage_prog_data->param[slot] = BRW_PARAM_BUILTIN_ZERO;
|
||||
|
|
|
@ -77,18 +77,8 @@ brw_codegen_cs_prog(struct brw_context *brw,
|
|||
|
||||
assign_cs_binding_table_offsets(devinfo, &cp->program, &prog_data);
|
||||
|
||||
/* Allocate the references to the uniforms that will end up in the
|
||||
* prog_data associated with the compiled program, and which will be freed
|
||||
* by the state cache.
|
||||
*/
|
||||
int param_count = cp->program.nir->num_uniforms / 4;
|
||||
|
||||
prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count);
|
||||
prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
|
||||
prog_data.base.nr_params = param_count;
|
||||
|
||||
brw_nir_setup_glsl_uniforms(cp->program.nir, &cp->program,&prog_data.base,
|
||||
true);
|
||||
brw_nir_setup_glsl_uniforms(mem_ctx, cp->program.nir,
|
||||
&cp->program, &prog_data.base, true);
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
start_busy = (brw->batch.last_bo &&
|
||||
|
@ -149,6 +139,9 @@ brw_codegen_cs_prog(struct brw_context *brw,
|
|||
prog_data.base.total_scratch,
|
||||
scratch_ids_per_subslice * subslices);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.param);
|
||||
ralloc_steal(NULL, prog_data.base.pull_param);
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
|
||||
key, sizeof(*key),
|
||||
program, program_size,
|
||||
|
|
|
@ -87,23 +87,11 @@ brw_codegen_gs_prog(struct brw_context *brw,
|
|||
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data);
|
||||
|
||||
/* Allocate the references to the uniforms that will end up in the
|
||||
* prog_data associated with the compiled program, and which will be freed
|
||||
* by the state cache.
|
||||
*
|
||||
* Note: param_count needs to be num_uniform_components * 4, since we add
|
||||
* padding around uniform values below vec4 size, so the worst case is that
|
||||
* every uniform is a float which gets padded to the size of a vec4.
|
||||
*/
|
||||
int param_count = gp->program.nir->num_uniforms / 4;
|
||||
|
||||
prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count);
|
||||
prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
|
||||
prog_data.base.base.nr_params = param_count;
|
||||
|
||||
brw_nir_setup_glsl_uniforms(gp->program.nir, &gp->program,
|
||||
brw_nir_setup_glsl_uniforms(mem_ctx, gp->program.nir, &gp->program,
|
||||
&prog_data.base.base,
|
||||
compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
|
||||
brw_nir_analyze_ubo_ranges(compiler, gp->program.nir,
|
||||
|
@ -124,7 +112,6 @@ brw_codegen_gs_prog(struct brw_context *brw,
|
|||
start_time = get_time();
|
||||
}
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
unsigned program_size;
|
||||
char *error_str;
|
||||
const unsigned *program =
|
||||
|
@ -155,6 +142,9 @@ brw_codegen_gs_prog(struct brw_context *brw,
|
|||
prog_data.base.base.total_scratch,
|
||||
devinfo->max_gs_threads);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.base.param);
|
||||
ralloc_steal(NULL, prog_data.base.base.pull_param);
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
|
||||
key, sizeof(*key),
|
||||
program, program_size,
|
||||
|
|
|
@ -187,10 +187,16 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
|
|||
}
|
||||
|
||||
void
|
||||
brw_nir_setup_glsl_uniforms(nir_shader *shader, const struct gl_program *prog,
|
||||
brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
bool is_scalar)
|
||||
{
|
||||
unsigned nr_params = shader->num_uniforms / 4;
|
||||
stage_prog_data->nr_params = nr_params;
|
||||
stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
|
||||
stage_prog_data->pull_param = rzalloc_array(mem_ctx, uint32_t, nr_params);
|
||||
|
||||
nir_foreach_variable(var, &shader->uniforms) {
|
||||
/* UBO's, atomics and samplers don't take up space in the
|
||||
uniform file */
|
||||
|
@ -208,11 +214,17 @@ brw_nir_setup_glsl_uniforms(nir_shader *shader, const struct gl_program *prog,
|
|||
}
|
||||
|
||||
void
|
||||
brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
|
||||
brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
|
||||
struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data)
|
||||
{
|
||||
struct gl_program_parameter_list *plist = prog->Parameters;
|
||||
|
||||
unsigned nr_params = plist->NumParameters * 4;
|
||||
stage_prog_data->nr_params = nr_params;
|
||||
stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
|
||||
stage_prog_data->pull_param = rzalloc_array(mem_ctx, uint32_t, nr_params);
|
||||
|
||||
/* For ARB programs, prog_to_nir generates a single "parameters" variable
|
||||
* for all uniform data. nir_lower_wpos_ytransform may also create an
|
||||
* additional variable.
|
||||
|
|
|
@ -178,25 +178,12 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
|
|||
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
/* Allocate the references to the uniforms that will end up in the
|
||||
* prog_data associated with the compiled program, and which will be freed
|
||||
* by the state cache.
|
||||
*
|
||||
* Note: param_count needs to be num_uniform_components * 4, since we add
|
||||
* padding around uniform values below vec4 size, so the worst case is that
|
||||
* every uniform is a float which gets padded to the size of a vec4.
|
||||
*/
|
||||
int param_count = nir->num_uniforms / 4;
|
||||
|
||||
prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count);
|
||||
prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
|
||||
prog_data.base.base.nr_params = param_count;
|
||||
|
||||
if (tcp) {
|
||||
brw_assign_common_binding_table_offsets(devinfo, &tcp->program,
|
||||
&prog_data.base.base, 0);
|
||||
|
||||
brw_nir_setup_glsl_uniforms(nir, &tcp->program, &prog_data.base.base,
|
||||
brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tcp->program,
|
||||
&prog_data.base.base,
|
||||
compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
|
||||
brw_nir_analyze_ubo_ranges(compiler, tcp->program.nir,
|
||||
prog_data.base.base.ubo_ranges);
|
||||
|
@ -204,6 +191,10 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
|
|||
/* Upload the Patch URB Header as the first two uniforms.
|
||||
* Do the annoying scrambling so the shader doesn't have to.
|
||||
*/
|
||||
assert(nir->num_uniforms == 32);
|
||||
prog_data.base.base.param = rzalloc_array(mem_ctx, uint32_t, 8);
|
||||
prog_data.base.base.nr_params = 8;
|
||||
|
||||
uint32_t *param = prog_data.base.base.param;
|
||||
for (int i = 0; i < 8; i++)
|
||||
param[i] = BRW_PARAM_BUILTIN_ZERO;
|
||||
|
@ -272,6 +263,9 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
|
|||
prog_data.base.base.total_scratch,
|
||||
devinfo->max_tcs_threads);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.base.param);
|
||||
ralloc_steal(NULL, prog_data.base.base.pull_param);
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG,
|
||||
key, sizeof(*key),
|
||||
program, program_size,
|
||||
|
|
|
@ -77,24 +77,13 @@ brw_codegen_tes_prog(struct brw_context *brw,
|
|||
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
brw_assign_common_binding_table_offsets(devinfo, &tep->program,
|
||||
&prog_data.base.base, 0);
|
||||
|
||||
/* Allocate the references to the uniforms that will end up in the
|
||||
* prog_data associated with the compiled program, and which will be freed
|
||||
* by the state cache.
|
||||
*
|
||||
* Note: param_count needs to be num_uniform_components * 4, since we add
|
||||
* padding around uniform values below vec4 size, so the worst case is that
|
||||
* every uniform is a float which gets padded to the size of a vec4.
|
||||
*/
|
||||
int param_count = nir->num_uniforms / 4;
|
||||
|
||||
prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count);
|
||||
prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
|
||||
prog_data.base.base.nr_params = param_count;
|
||||
|
||||
brw_nir_setup_glsl_uniforms(nir, &tep->program, &prog_data.base.base,
|
||||
brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program,
|
||||
&prog_data.base.base,
|
||||
compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
|
||||
brw_nir_analyze_ubo_ranges(compiler, tep->program.nir,
|
||||
prog_data.base.base.ubo_ranges);
|
||||
|
@ -112,7 +101,6 @@ brw_codegen_tes_prog(struct brw_context *brw,
|
|||
brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
|
||||
key->patch_inputs_read);
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
unsigned program_size;
|
||||
char *error_str;
|
||||
const unsigned *program =
|
||||
|
@ -145,6 +133,9 @@ brw_codegen_tes_prog(struct brw_context *brw,
|
|||
prog_data.base.base.total_scratch,
|
||||
devinfo->max_tes_threads);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.base.param);
|
||||
ralloc_steal(NULL, prog_data.base.base.pull_param);
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
|
||||
key, sizeof(*key),
|
||||
program, program_size,
|
||||
|
|
|
@ -178,24 +178,14 @@ brw_codegen_vs_prog(struct brw_context *brw,
|
|||
brw_assign_common_binding_table_offsets(devinfo, &vp->program,
|
||||
&prog_data.base.base, 0);
|
||||
|
||||
/* Allocate the references to the uniforms that will end up in the
|
||||
* prog_data associated with the compiled program, and which will be freed
|
||||
* by the state cache.
|
||||
*/
|
||||
int param_count = vp->program.nir->num_uniforms / 4;
|
||||
|
||||
stage_prog_data->param = rzalloc_array(NULL, uint32_t, param_count);
|
||||
stage_prog_data->pull_param = rzalloc_array(NULL, uint32_t, param_count);
|
||||
stage_prog_data->nr_params = param_count;
|
||||
|
||||
if (!vp->program.is_arb_asm) {
|
||||
brw_nir_setup_glsl_uniforms(vp->program.nir, &vp->program,
|
||||
brw_nir_setup_glsl_uniforms(mem_ctx, vp->program.nir, &vp->program,
|
||||
&prog_data.base.base,
|
||||
compiler->scalar_stage[MESA_SHADER_VERTEX]);
|
||||
brw_nir_analyze_ubo_ranges(compiler, vp->program.nir,
|
||||
prog_data.base.base.ubo_ranges);
|
||||
} else {
|
||||
brw_nir_setup_arb_uniforms(vp->program.nir, &vp->program,
|
||||
brw_nir_setup_arb_uniforms(mem_ctx, vp->program.nir, &vp->program,
|
||||
&prog_data.base.base);
|
||||
}
|
||||
|
||||
|
@ -262,6 +252,9 @@ brw_codegen_vs_prog(struct brw_context *brw,
|
|||
prog_data.base.base.total_scratch,
|
||||
devinfo->max_vs_threads);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.base.param);
|
||||
ralloc_steal(NULL, prog_data.base.base.pull_param);
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
|
||||
key, sizeof(struct brw_vs_prog_key),
|
||||
program, program_size,
|
||||
|
|
|
@ -144,22 +144,13 @@ brw_codegen_wm_prog(struct brw_context *brw,
|
|||
|
||||
assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data);
|
||||
|
||||
/* Allocate the references to the uniforms that will end up in the
|
||||
* prog_data associated with the compiled program, and which will be freed
|
||||
* by the state cache.
|
||||
*/
|
||||
int param_count = fp->program.nir->num_uniforms / 4;
|
||||
prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count);
|
||||
prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
|
||||
prog_data.base.nr_params = param_count;
|
||||
|
||||
if (!fp->program.is_arb_asm) {
|
||||
brw_nir_setup_glsl_uniforms(fp->program.nir, &fp->program,
|
||||
brw_nir_setup_glsl_uniforms(mem_ctx, fp->program.nir, &fp->program,
|
||||
&prog_data.base, true);
|
||||
brw_nir_analyze_ubo_ranges(brw->screen->compiler, fp->program.nir,
|
||||
prog_data.base.ubo_ranges);
|
||||
} else {
|
||||
brw_nir_setup_arb_uniforms(fp->program.nir, &fp->program,
|
||||
brw_nir_setup_arb_uniforms(mem_ctx, fp->program.nir, &fp->program,
|
||||
&prog_data.base);
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_WM))
|
||||
|
@ -217,6 +208,9 @@ brw_codegen_wm_prog(struct brw_context *brw,
|
|||
if (unlikely((INTEL_DEBUG & DEBUG_WM) && fp->program.is_arb_asm))
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.param);
|
||||
ralloc_steal(NULL, prog_data.base.pull_param);
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG,
|
||||
key, sizeof(struct brw_wm_prog_key),
|
||||
program, program_size,
|
||||
|
|
Loading…
Reference in New Issue