diff --git a/src/util/register_allocate.c b/src/util/register_allocate.c index b9886e802fd..93a7a11709c 100644 --- a/src/util/register_allocate.c +++ b/src/util/register_allocate.c @@ -234,6 +234,24 @@ ra_alloc_reg_class(struct ra_regs *regs) return class; } +/** + * Creates a register class for contiguous register groups of a base register + * set. + * + * A reg set using this type of register class must use only this type of + * register class. + */ +struct ra_class * +ra_alloc_contig_reg_class(struct ra_regs *regs, int contig_len) +{ + struct ra_class *c = ra_alloc_reg_class(regs); + + assert(contig_len != 0); + c->contig_len = contig_len; + + return c; +} + struct ra_class * ra_get_class_from_index(struct ra_regs *regs, unsigned int class) { @@ -250,6 +268,7 @@ void ra_class_add_reg(struct ra_class *class, unsigned int r) { assert(r < class->regset->count); + assert(r + class->contig_len <= class->regset->count); BITSET_SET(class->regs, r); class->p++; @@ -291,21 +310,67 @@ ra_set_finalize(struct ra_regs *regs, unsigned int **q_values) */ for (b = 0; b < regs->class_count; b++) { for (c = 0; c < regs->class_count; c++) { - unsigned int rc; - int max_conflicts = 0; + struct ra_class *class_b = regs->classes[b]; + struct ra_class *class_c = regs->classes[c]; - BITSET_FOREACH_SET(rc, regs->classes[c]->regs, regs->count) { - int conflicts = 0; + if (class_b->contig_len && class_c->contig_len) { + if (class_b->contig_len == 1 && class_c->contig_len == 1) { + /* If both classes are single registers, then they only + * conflict if there are any regs shared between them. This + * is a cheap test for a common case. + */ + class_b->q[c] = 0; + for (int i = 0; i < BITSET_WORDS(regs->count); i++) { + if (class_b->regs[i] & class_c->regs[i]) { + class_b->q[c] = 1; + break; + } + } + } else { + int max_possible_conflicts = class_b->contig_len + class_c->contig_len - 1; - util_dynarray_foreach(®s->regs[rc].conflict_list, - unsigned int, rbp) { - unsigned int rb = *rbp; - if (reg_belongs_to_class(rb, regs->classes[b])) - conflicts++; + unsigned int max_conflicts = 0; + unsigned int rc; + BITSET_FOREACH_SET(rc, regs->classes[c]->regs, regs->count) { + int start = MAX2(0, (int)rc - class_b->contig_len + 1); + int end = MIN2(regs->count, rc + class_c->contig_len); + unsigned int conflicts = 0; + for (int i = start; i < end; i++) { + if (BITSET_TEST(class_b->regs, i)) + conflicts++; + } + max_conflicts = MAX2(max_conflicts, conflicts); + /* Unless a class has some restriction like the register + * bases are all aligned, then we should quickly find this + * limit and exit the loop. + */ + if (max_conflicts == max_possible_conflicts) + break; + } + class_b->q[c] = max_conflicts; } - max_conflicts = MAX2(max_conflicts, conflicts); + } else { + /* If you're doing contiguous classes, you have to be all in + * because I don't want to deal with it. + */ + assert(!class_b->contig_len && !class_c->contig_len); + + unsigned int rc; + int max_conflicts = 0; + + BITSET_FOREACH_SET(rc, regs->classes[c]->regs, regs->count) { + int conflicts = 0; + + util_dynarray_foreach(®s->regs[rc].conflict_list, + unsigned int, rbp) { + unsigned int rb = *rbp; + if (reg_belongs_to_class(rb, regs->classes[b])) + conflicts++; + } + max_conflicts = MAX2(max_conflicts, conflicts); + } + regs->classes[b]->q[c] = max_conflicts; } - regs->classes[b]->q[c] = max_conflicts; } } } @@ -313,6 +378,20 @@ ra_set_finalize(struct ra_regs *regs, unsigned int **q_values) for (b = 0; b < regs->count; b++) { util_dynarray_fini(®s->regs[b].conflict_list); } + + bool all_contig = true; + for (int c = 0; c < regs->class_count; c++) + all_contig &= regs->classes[c]->contig_len != 0; + if (all_contig) { + /* In this case, we never need the conflicts lists (and it would probably + * be a mistake to look at conflicts when doing contiguous classes!), so + * free them. TODO: Avoid the allocation in the first place. + */ + for (int i = 0; i < regs->count; i++) { + ralloc_free(regs->regs[i].conflicts); + regs->regs[i].conflicts = NULL; + } + } } void @@ -321,17 +400,23 @@ ra_set_serialize(const struct ra_regs *regs, struct blob *blob) blob_write_uint32(blob, regs->count); blob_write_uint32(blob, regs->class_count); - for (unsigned int r = 0; r < regs->count; r++) { - struct ra_reg *reg = ®s->regs[r]; - blob_write_bytes(blob, reg->conflicts, BITSET_WORDS(regs->count) * - sizeof(BITSET_WORD)); - assert(util_dynarray_num_elements(®->conflict_list, unsigned int) == 0); + bool is_contig = regs->classes[0]->contig_len != 0; + blob_write_uint8(blob, is_contig); + + if (!is_contig) { + for (unsigned int r = 0; r < regs->count; r++) { + struct ra_reg *reg = ®s->regs[r]; + blob_write_bytes(blob, reg->conflicts, BITSET_WORDS(regs->count) * + sizeof(BITSET_WORD)); + assert(util_dynarray_num_elements(®->conflict_list, unsigned int) == 0); + } } for (unsigned int c = 0; c < regs->class_count; c++) { struct ra_class *class = regs->classes[c]; blob_write_bytes(blob, class->regs, BITSET_WORDS(regs->count) * sizeof(BITSET_WORD)); + blob_write_uint32(blob, class->contig_len); blob_write_uint32(blob, class->p); blob_write_bytes(blob, class->q, regs->class_count * sizeof(*class->q)); } @@ -344,14 +429,22 @@ ra_set_deserialize(void *mem_ctx, struct blob_reader *blob) { unsigned int reg_count = blob_read_uint32(blob); unsigned int class_count = blob_read_uint32(blob); + bool is_contig = blob_read_uint8(blob); struct ra_regs *regs = ra_alloc_reg_set(mem_ctx, reg_count, false); assert(regs->count == reg_count); - for (unsigned int r = 0; r < reg_count; r++) { - struct ra_reg *reg = ®s->regs[r]; - blob_copy_bytes(blob, reg->conflicts, BITSET_WORDS(reg_count) * - sizeof(BITSET_WORD)); + if (is_contig) { + for (int i = 0; i < regs->count; i++) { + ralloc_free(regs->regs[i].conflicts); + regs->regs[i].conflicts = NULL; + } + } else { + for (unsigned int r = 0; r < reg_count; r++) { + struct ra_reg *reg = ®s->regs[r]; + blob_copy_bytes(blob, reg->conflicts, BITSET_WORDS(reg_count) * + sizeof(BITSET_WORD)); + } } assert(regs->classes == NULL); @@ -366,6 +459,7 @@ ra_set_deserialize(void *mem_ctx, struct blob_reader *blob) blob_copy_bytes(blob, class->regs, BITSET_WORDS(reg_count) * sizeof(BITSET_WORD)); + class->contig_len = blob_read_uint32(blob); class->p = blob_read_uint32(blob); class->q = ralloc_array(regs->classes[c], unsigned int, class_count); @@ -695,14 +789,31 @@ ra_simplify(struct ra_graph *g) g->tmp.stack_optimistic_start = stack_optimistic_start; } +bool +ra_class_allocations_conflict(struct ra_class *c1, unsigned int r1, + struct ra_class *c2, unsigned int r2) +{ + if (c1->contig_len) { + assert(c2->contig_len); + + int r1_end = r1 + c1->contig_len; + int r2_end = r2 + c2->contig_len; + return !(r2 >= r1_end || r1 >= r2_end); + } else { + return BITSET_TEST(c1->regset->regs[r1].conflicts, r2); + } +} + static bool ra_any_neighbors_conflict(struct ra_graph *g, unsigned int n, unsigned int r) { util_dynarray_foreach(&g->nodes[n].adjacency_list, unsigned int, n2p) { unsigned int n2 = *n2p; + /* If our adjacent node is in the stack, it's not allocated yet. */ if (!BITSET_TEST(g->tmp.in_stack, n2) && - BITSET_TEST(g->regs->regs[r].conflicts, g->nodes[n2].reg)) { + ra_class_allocations_conflict(g->regs->classes[g->nodes[n].class], r, + g->regs->classes[g->nodes[n2].class], g->nodes[n2].reg)) { return true; } } @@ -728,12 +839,19 @@ ra_compute_available_regs(struct ra_graph *g, unsigned int n, BITSET_WORD *regs) * already colored. */ util_dynarray_foreach(&g->nodes[n].adjacency_list, unsigned int, n2p) { - unsigned int n2 = *n2p; - unsigned int r = g->nodes[n2].reg; + struct ra_node *n2 = &g->nodes[*n2p]; + struct ra_class *n2c = g->regs->classes[n2->class]; - if (!BITSET_TEST(g->tmp.in_stack, n2)) { - for (int j = 0; j < BITSET_WORDS(g->regs->count); j++) - regs[j] &= ~g->regs->regs[r].conflicts[j]; + if (!BITSET_TEST(g->tmp.in_stack, *n2p)) { + if (c->contig_len) { + int start = MAX2(0, (int)n2->reg - c->contig_len + 1); + int end = MIN2(g->regs->count, n2->reg + n2c->contig_len); + for (unsigned i = start; i < end; i++) + BITSET_CLEAR(regs, i); + } else { + for (int j = 0; j < BITSET_WORDS(g->regs->count); j++) + regs[j] &= ~g->regs->regs[n2->reg].conflicts[j]; + } } } diff --git a/src/util/register_allocate.h b/src/util/register_allocate.h index 820f7569475..34dcc09650e 100644 --- a/src/util/register_allocate.h +++ b/src/util/register_allocate.h @@ -54,6 +54,7 @@ struct ra_regs *ra_alloc_reg_set(void *mem_ctx, unsigned int count, bool need_conflict_lists); void ra_set_allocate_round_robin(struct ra_regs *regs); struct ra_class *ra_alloc_reg_class(struct ra_regs *regs); +struct ra_class *ra_alloc_contig_reg_class(struct ra_regs *regs, int contig_len); unsigned int ra_class_index(struct ra_class *c); void ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2); diff --git a/src/util/register_allocate_internal.h b/src/util/register_allocate_internal.h index de5a5564db9..b62b8e8143e 100644 --- a/src/util/register_allocate_internal.h +++ b/src/util/register_allocate_internal.h @@ -63,6 +63,12 @@ struct ra_class { */ BITSET_WORD *regs; + /** + * Number of regs after each bit in *regs that are also conflicted by an + * allocation to that reg for this class. + */ + int contig_len; + /** * p(B) in Runeson/Nyström paper. * @@ -164,6 +170,9 @@ struct ra_graph { } tmp; }; +bool ra_class_allocations_conflict(struct ra_class *c1, unsigned int r1, + struct ra_class *c2, unsigned int r2); + #ifdef __cplusplus } /* extern "C" */ diff --git a/src/util/register_allocate_test.cpp b/src/util/register_allocate_test.cpp index 09b499d9a54..9a0cb3a12ae 100644 --- a/src/util/register_allocate_test.cpp +++ b/src/util/register_allocate_test.cpp @@ -45,6 +45,44 @@ ra_test::~ra_test() ralloc_free(mem_ctx); } +void +thumb_checks(struct ra_regs *regs, unsigned reg32_base, unsigned reg64_base) +{ + struct ra_class *reg32low = ra_get_class_from_index(regs, 0); + struct ra_class *reg64low = ra_get_class_from_index(regs, 1); + struct ra_class *reg96 = ra_get_class_from_index(regs, 2); + + /* Table 4.1 */ + ASSERT_EQ(reg32low->p, 8); + ASSERT_EQ(reg32low->q[reg32low->index], 1); + ASSERT_EQ(reg32low->q[reg64low->index], 2); + ASSERT_EQ(reg32low->q[reg96->index], 3); + ASSERT_EQ(reg64low->p, 8); + ASSERT_EQ(reg64low->q[reg32low->index], 2); + ASSERT_EQ(reg64low->q[reg64low->index], 3); + ASSERT_EQ(reg64low->q[reg96->index], 4); + ASSERT_EQ(reg96->p, 2); + ASSERT_EQ(reg96->q[reg96->index], 2); + ASSERT_EQ(reg96->q[reg64low->index], 2); + ASSERT_EQ(reg96->q[reg96->index], 2); + + /* These individual regs should conflict with themselves, but nothing else from their class */ + for (int i = 0; i < 7; i++) { + ASSERT_FALSE(ra_class_allocations_conflict(reg32low, reg32_base + i, reg32low, reg32_base + i + 1)); + ASSERT_TRUE(ra_class_allocations_conflict(reg32low, reg32_base + i, reg32low, reg32_base + i)); + } + + /* Check that reg64low conflicts with the pairs of reg32low but not neighbors */ + ASSERT_TRUE(ra_class_allocations_conflict(reg64low, reg64_base + 0, reg32low, reg32_base + 0)); + ASSERT_TRUE(ra_class_allocations_conflict(reg64low, reg64_base + 0, reg32low, reg32_base + 1)); + ASSERT_FALSE(ra_class_allocations_conflict(reg64low, reg64_base + 0, reg32low, reg32_base + 2)); + + ASSERT_FALSE(ra_class_allocations_conflict(reg64low, reg64_base + 1, reg32low, reg32_base + 0)); + ASSERT_TRUE(ra_class_allocations_conflict(reg64low, reg64_base + 1, reg32low, reg32_base + 1)); + ASSERT_TRUE(ra_class_allocations_conflict(reg64low, reg64_base + 1, reg32low, reg32_base + 2)); + ASSERT_FALSE(ra_class_allocations_conflict(reg64low, reg64_base + 1, reg32low, reg32_base + 3)); +} + TEST_F(ra_test, thumb) { struct ra_regs *regs = ra_alloc_reg_set(mem_ctx, 100, true); @@ -53,6 +91,7 @@ TEST_F(ra_test, thumb) int next_vreg = 16; /* reg32low is any of the low 8 registers. */ + unsigned int reg32_base = next_vreg; struct ra_class *reg32low = ra_alloc_reg_class(regs); for (int i = 0; i < 8; i++) { int vreg = next_vreg++; @@ -61,6 +100,7 @@ TEST_F(ra_test, thumb) } /* reg64low is pairs of the low 8 registers (with wraparound!) */ + unsigned int reg64_base = next_vreg; struct ra_class *reg64low = ra_alloc_reg_class(regs); for (int i = 0; i < 8; i++) { int vreg = next_vreg++; @@ -80,17 +120,92 @@ TEST_F(ra_test, thumb) ra_set_finalize(regs, NULL); - /* Table 4.1 */ - ASSERT_EQ(reg32low->p, 8); - ASSERT_EQ(reg32low->q[reg32low->index], 1); - ASSERT_EQ(reg32low->q[reg64low->index], 2); - ASSERT_EQ(reg32low->q[reg96->index], 3); - ASSERT_EQ(reg64low->p, 8); - ASSERT_EQ(reg64low->q[reg32low->index], 2); - ASSERT_EQ(reg64low->q[reg64low->index], 3); - ASSERT_EQ(reg64low->q[reg96->index], 4); - ASSERT_EQ(reg96->p, 2); - ASSERT_EQ(reg96->q[reg96->index], 2); - ASSERT_EQ(reg96->q[reg64low->index], 2); - ASSERT_EQ(reg96->q[reg96->index], 2); + thumb_checks(regs, reg32_base, reg64_base); +} + +TEST_F(ra_test, thumb_contigregs) +{ + struct ra_regs *regs = ra_alloc_reg_set(mem_ctx, 16, true); + + /* reg32low is any of the low 8 registers. */ + struct ra_class *reg32low = ra_alloc_contig_reg_class(regs, 1); + for (int i = 0; i < 8; i++) + ra_class_add_reg(reg32low, i); + + /* reg64low is pairs of the low 8 registers (we're ignoring the wraparound thing here) */ + struct ra_class *reg64low = ra_alloc_contig_reg_class(regs, 2); + for (int i = 0; i < 8; i++) + ra_class_add_reg(reg64low, i); + + /* reg96 is one of either r[0..2] or r[1..3] */ + struct ra_class *reg96 = ra_alloc_contig_reg_class(regs, 3); + for (int i = 0; i < 2; i++) + ra_class_add_reg(reg96, i); + + ra_set_finalize(regs, NULL); + + thumb_checks(regs, 0, 0); +} + +TEST_F(ra_test, nonintersect_contigregs) +{ + struct ra_regs *regs = ra_alloc_reg_set(mem_ctx, 16, true); + + struct ra_class *low = ra_alloc_contig_reg_class(regs, 1); + for (int i = 0; i < 8; i++) + ra_class_add_reg(low, i); + + struct ra_class *high = ra_alloc_contig_reg_class(regs, 1); + for (int i = 8; i < 16; i++) + ra_class_add_reg(high, i); + + ra_set_finalize(regs, NULL); + + ASSERT_EQ(low->q[low->index], 1); + ASSERT_EQ(low->q[high->index], 0); + ASSERT_EQ(high->q[low->index], 0); + ASSERT_EQ(high->q[high->index], 1); +} + +TEST_F(ra_test, aligned_contigregs) +{ + int base_regs = 32; + struct ra_regs *regs = ra_alloc_reg_set(mem_ctx, base_regs, true); + + struct ra_class *c1 = ra_alloc_contig_reg_class(regs, 1); + for (int i = 0; i < base_regs; i++) + ra_class_add_reg(c1, i); + + struct ra_class *c2 = ra_alloc_contig_reg_class(regs, 2); + for (int i = 8; i < base_regs; i += 2) + ra_class_add_reg(c2, i); + + struct ra_class *c4 = ra_alloc_contig_reg_class(regs, 4); + for (int i = 8; i < base_regs; i += 4) + ra_class_add_reg(c4, i); + + ra_set_finalize(regs, NULL); + + ASSERT_EQ(c1->q[c1->index], 1); + ASSERT_EQ(c1->q[c2->index], 2); + ASSERT_EQ(c1->q[c4->index], 4); + ASSERT_EQ(c2->q[c1->index], 1); + ASSERT_EQ(c2->q[c2->index], 1); + ASSERT_EQ(c2->q[c4->index], 2); + ASSERT_EQ(c4->q[c1->index], 1); + ASSERT_EQ(c4->q[c2->index], 1); + ASSERT_EQ(c4->q[c4->index], 1); + + /* Check conflicts for a c4 allocation at i against other classes. */ + for (int i = 0; i < base_regs / 4; i += 4) { + for (int j = 0; j < base_regs; j++) { + ASSERT_EQ(ra_class_allocations_conflict(c4, i, c1, j), + j >= i && j < i + 4); + } + + for (int j = 0; j < base_regs; j += 2) { + ASSERT_EQ(ra_class_allocations_conflict(c4, i, c2, j), + j >= i && j < i + 4); + } + } }