diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index de58d515ddb..3fc44169cfe 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -775,9 +775,6 @@ brwCreateContext(gl_api api, if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; - brw_fs_alloc_reg_sets(brw); - brw_vec4_alloc_reg_set(brw); - if (INTEL_DEBUG & DEBUG_SHADER_TIME) brw_init_shader_time(brw); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index bc79a00b593..80dba1011fa 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1272,26 +1272,6 @@ struct brw_context */ struct brw_vue_map vue_map_geom_out; - /** - * Data structures used by all vec4 program compiles (not specific to any - * particular program). - */ - struct { - struct ra_regs *regs; - - /** - * Array of the ra classes for the unaligned contiguous register - * block sizes used. - */ - int *classes; - - /** - * Mapping for register-allocated objects in *regs to the first - * GRF for that object. - */ - uint8_t *ra_reg_to_grf; - } vec4; - struct { struct brw_stage_state base; struct brw_vs_prog_data *prog_data; @@ -1356,28 +1336,6 @@ struct brw_context * Gen6. See brw_update_null_renderbuffer_surface(). */ drm_intel_bo *multisampled_null_render_target_bo; - - struct { - struct ra_regs *regs; - - /** - * Array of the ra classes for the unaligned contiguous register - * block sizes used, indexed by register size. - */ - int classes[16]; - - /** - * Mapping for register-allocated objects in *regs to the first - * GRF for that object. - */ - uint8_t *ra_reg_to_grf; - - /** - * ra class for the aligned pairs we use for PLN, which doesn't - * appear in *classes. - */ - int aligned_pairs_class; - } reg_sets[2]; } wm; @@ -1607,10 +1565,10 @@ void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_fs_reg_allocate.cpp */ -void brw_fs_alloc_reg_sets(struct brw_context *brw); +void brw_fs_alloc_reg_sets(struct intel_screen *screen); /* brw_vec4_reg_allocate.cpp */ -void brw_vec4_alloc_reg_set(struct brw_context *brw); +void brw_vec4_alloc_reg_set(struct intel_screen *screen); /* brw_disasm.c */ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 5b1741acd78..f2f158e6793 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -71,8 +71,9 @@ fs_visitor::assign_regs_trivial() } static void -brw_alloc_reg_set(struct brw_context *brw, int reg_width) +brw_alloc_reg_set(struct intel_screen *screen, int reg_width) { + const struct brw_device_info *devinfo = screen->devinfo; int base_reg_count = BRW_MAX_GRF / reg_width; int index = reg_width - 1; @@ -102,7 +103,7 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width) int class_count; int class_sizes[BRW_MAX_MRF]; - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { for (class_count = 0; class_count < MAX_SAMPLER_MESSAGE_SIZE; class_count++) class_sizes[class_count] = class_count + 1; @@ -118,11 +119,11 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width) ra_reg_count += base_reg_count - (class_sizes[i] - 1); } - uint8_t *ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); - struct ra_regs *regs = ra_alloc_reg_set(brw, ra_reg_count); - if (brw->gen >= 6) + uint8_t *ra_reg_to_grf = ralloc_array(screen, uint8_t, ra_reg_count); + struct ra_regs *regs = ra_alloc_reg_set(screen, ra_reg_count); + if (devinfo->gen >= 6) ra_set_allocate_round_robin(regs); - int *classes = ralloc_array(brw, int, class_count); + int *classes = ralloc_array(screen, int, class_count); int aligned_pairs_class = -1; /* Now, add the registers to their classes, and add the conflicts @@ -160,7 +161,7 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width) /* Add a special class for aligned pairs, which we'll put delta_x/y * in on gen5 so that we can do PLN. */ - if (brw->has_pln && reg_width == 1 && brw->gen < 6) { + if (devinfo->has_pln && reg_width == 1 && devinfo->gen < 6) { aligned_pairs_class = ra_alloc_reg_class(regs); for (int i = 0; i < pairs_reg_count; i++) { @@ -172,20 +173,20 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width) ra_set_finalize(regs, NULL); - brw->wm.reg_sets[index].regs = regs; - for (unsigned i = 0; i < ARRAY_SIZE(brw->wm.reg_sets[index].classes); i++) - brw->wm.reg_sets[index].classes[i] = -1; + screen->wm_reg_sets[index].regs = regs; + for (unsigned i = 0; i < ARRAY_SIZE(screen->wm_reg_sets[index].classes); i++) + screen->wm_reg_sets[index].classes[i] = -1; for (int i = 0; i < class_count; i++) - brw->wm.reg_sets[index].classes[class_sizes[i] - 1] = classes[i]; - brw->wm.reg_sets[index].ra_reg_to_grf = ra_reg_to_grf; - brw->wm.reg_sets[index].aligned_pairs_class = aligned_pairs_class; + screen->wm_reg_sets[index].classes[class_sizes[i] - 1] = classes[i]; + screen->wm_reg_sets[index].ra_reg_to_grf = ra_reg_to_grf; + screen->wm_reg_sets[index].aligned_pairs_class = aligned_pairs_class; } void -brw_fs_alloc_reg_sets(struct brw_context *brw) +brw_fs_alloc_reg_sets(struct intel_screen *screen) { - brw_alloc_reg_set(brw, 1); - brw_alloc_reg_set(brw, 2); + brw_alloc_reg_set(screen, 1); + brw_alloc_reg_set(screen, 2); } int @@ -420,6 +421,7 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node) bool fs_visitor::assign_regs(bool allow_spilling) { + struct intel_screen *screen = brw->intelScreen; /* Most of this allocation was written for a reg_width of 1 * (dispatch_width == 8). In extending to SIMD16, the code was * left in place and it was converted to have the hardware @@ -430,7 +432,7 @@ fs_visitor::assign_regs(bool allow_spilling) int hw_reg_mapping[this->virtual_grf_count]; int payload_node_count = (ALIGN(this->first_non_payload_grf, reg_width) / reg_width); - int rsi = reg_width - 1; /* Which brw->wm.reg_sets[] to use */ + int rsi = reg_width - 1; /* Which screen->wm_reg_sets[] to use */ calculate_live_intervals(); int node_count = this->virtual_grf_count; @@ -439,16 +441,16 @@ fs_visitor::assign_regs(bool allow_spilling) int first_mrf_hack_node = node_count; if (brw->gen >= 7) node_count += BRW_MAX_GRF - GEN7_MRF_HACK_START; - struct ra_graph *g = ra_alloc_interference_graph(brw->wm.reg_sets[rsi].regs, + struct ra_graph *g = ra_alloc_interference_graph(screen->wm_reg_sets[rsi].regs, node_count); for (int i = 0; i < this->virtual_grf_count; i++) { unsigned size = this->virtual_grf_sizes[i]; int c; - assert(size <= ARRAY_SIZE(brw->wm.reg_sets[rsi].classes) && + assert(size <= ARRAY_SIZE(screen->wm_reg_sets[rsi].classes) && "Register allocation relies on split_virtual_grfs()"); - c = brw->wm.reg_sets[rsi].classes[size - 1]; + c = screen->wm_reg_sets[rsi].classes[size - 1]; /* Special case: on pre-GEN6 hardware that supports PLN, the * second operand of a PLN instruction needs to be an @@ -459,9 +461,9 @@ fs_visitor::assign_regs(bool allow_spilling) * any other interpolation modes). So all we need to do is find * that register and set it to the appropriate class. */ - if (brw->wm.reg_sets[rsi].aligned_pairs_class >= 0 && + if (screen->wm_reg_sets[rsi].aligned_pairs_class >= 0 && this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) { - c = brw->wm.reg_sets[rsi].aligned_pairs_class; + c = screen->wm_reg_sets[rsi].aligned_pairs_class; } ra_set_node_class(g, i, c); @@ -514,7 +516,7 @@ fs_visitor::assign_regs(bool allow_spilling) for (int i = 0; i < this->virtual_grf_count; i++) { int reg = ra_get_node_reg(g, i); - hw_reg_mapping[i] = brw->wm.reg_sets[rsi].ra_reg_to_grf[reg] * reg_width; + hw_reg_mapping[i] = screen->wm_reg_sets[rsi].ra_reg_to_grf[reg] * reg_width; this->grf_used = MAX2(this->grf_used, hw_reg_mapping[i] + this->virtual_grf_sizes[i] * reg_width); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index e7aab9d0393..349c031090c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -97,9 +97,10 @@ vec4_visitor::reg_allocate_trivial() } extern "C" void -brw_vec4_alloc_reg_set(struct brw_context *brw) +brw_vec4_alloc_reg_set(struct intel_screen *screen) { - int base_reg_count = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; + int base_reg_count = + screen->devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; /* After running split_virtual_grfs(), almost all VGRFs will be of size 1. * SEND-from-GRF sources cannot be split, so we also need classes for each @@ -114,14 +115,14 @@ brw_vec4_alloc_reg_set(struct brw_context *brw) ra_reg_count += base_reg_count - (class_sizes[i] - 1); } - ralloc_free(brw->vec4.ra_reg_to_grf); - brw->vec4.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); - ralloc_free(brw->vec4.regs); - brw->vec4.regs = ra_alloc_reg_set(brw, ra_reg_count); - if (brw->gen >= 6) - ra_set_allocate_round_robin(brw->vec4.regs); - ralloc_free(brw->vec4.classes); - brw->vec4.classes = ralloc_array(brw, int, class_count); + ralloc_free(screen->vec4_reg_set.ra_reg_to_grf); + screen->vec4_reg_set.ra_reg_to_grf = ralloc_array(screen, uint8_t, ra_reg_count); + ralloc_free(screen->vec4_reg_set.regs); + screen->vec4_reg_set.regs = ra_alloc_reg_set(screen, ra_reg_count); + if (screen->devinfo->gen >= 6) + ra_set_allocate_round_robin(screen->vec4_reg_set.regs); + ralloc_free(screen->vec4_reg_set.classes); + screen->vec4_reg_set.classes = ralloc_array(screen, int, class_count); /* Now, add the registers to their classes, and add the conflicts * between them and the base GRF registers (and also each other). @@ -129,17 +130,17 @@ brw_vec4_alloc_reg_set(struct brw_context *brw) int reg = 0; for (int i = 0; i < class_count; i++) { int class_reg_count = base_reg_count - (class_sizes[i] - 1); - brw->vec4.classes[i] = ra_alloc_reg_class(brw->vec4.regs); + screen->vec4_reg_set.classes[i] = ra_alloc_reg_class(screen->vec4_reg_set.regs); for (int j = 0; j < class_reg_count; j++) { - ra_class_add_reg(brw->vec4.regs, brw->vec4.classes[i], reg); + ra_class_add_reg(screen->vec4_reg_set.regs, screen->vec4_reg_set.classes[i], reg); - brw->vec4.ra_reg_to_grf[reg] = j; + screen->vec4_reg_set.ra_reg_to_grf[reg] = j; for (int base_reg = j; base_reg < j + class_sizes[i]; base_reg++) { - ra_add_transitive_reg_conflict(brw->vec4.regs, base_reg, reg); + ra_add_transitive_reg_conflict(screen->vec4_reg_set.regs, base_reg, reg); } reg++; @@ -147,7 +148,7 @@ brw_vec4_alloc_reg_set(struct brw_context *brw) } assert(reg == ra_reg_count); - ra_set_finalize(brw->vec4.regs, NULL); + ra_set_finalize(screen->vec4_reg_set.regs, NULL); } void @@ -177,6 +178,7 @@ vec4_visitor::setup_payload_interference(struct ra_graph *g, bool vec4_visitor::reg_allocate() { + struct intel_screen *screen = brw->intelScreen; unsigned int hw_reg_mapping[virtual_grf_count]; int payload_reg_count = this->first_non_payload_grf; @@ -192,13 +194,13 @@ vec4_visitor::reg_allocate() int first_payload_node = node_count; node_count += payload_reg_count; struct ra_graph *g = - ra_alloc_interference_graph(brw->vec4.regs, node_count); + ra_alloc_interference_graph(screen->vec4_reg_set.regs, node_count); for (int i = 0; i < virtual_grf_count; i++) { int size = this->virtual_grf_sizes[i]; assert(size >= 1 && size <= 2 && "Register allocation relies on split_virtual_grfs()."); - ra_set_node_class(g, i, brw->vec4.classes[size - 1]); + ra_set_node_class(g, i, screen->vec4_reg_set.classes[size - 1]); for (int j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { @@ -234,7 +236,7 @@ vec4_visitor::reg_allocate() for (int i = 0; i < virtual_grf_count; i++) { int reg = ra_get_node_reg(g, i); - hw_reg_mapping[i] = brw->vec4.ra_reg_to_grf[reg]; + hw_reg_mapping[i] = screen->vec4_reg_set.ra_reg_to_grf[reg]; prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + virtual_grf_sizes[i]); } diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 35369220c8e..4e9a77594b3 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1354,6 +1354,9 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) psp->extensions = !intelScreen->has_context_reset_notification ? intelScreenExtensions : intelRobustScreenExtensions; + brw_fs_alloc_reg_sets(intelScreen); + brw_vec4_alloc_reg_set(intelScreen); + return (const __DRIconfig**) intel_screen_make_configs(psp); } diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index b08cb68cbcf..09690215ded 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -62,6 +62,44 @@ struct intel_screen */ unsigned program_id; + struct { + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + } vec4_reg_set; + + struct { + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used, indexed by register size. + */ + int classes[16]; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + } wm_reg_sets[2]; + /** * Configuration cache with default values for all contexts */