From 47bd462b0caa46a2dd1fb88e57f6a968aff6cfc4 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 3 Jun 2015 14:10:28 -0400 Subject: [PATCH 01/11] awesome control flow bugfixes/clarifications --- src/glsl/nir/spirv_to_nir.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 1fc1b8bc5dc..979df2019e0 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1355,20 +1355,11 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, { struct vtn_block *block = start; while (block != end_block) { - if (block->block != NULL) { - /* We've already visited this block once before so this is a - * back-edge. Back-edges are only allowed to point to a loop - * merge. - */ - assert(block == cont_block); - return; - } - if (block->merge_op == SpvOpLoopMerge) { /* This is the jump into a loop. */ - cont_block = block; - break_block = vtn_value(b, block->merge_block_id, - vtn_value_type_block)->block; + struct vtn_block *new_cont_block = block; + struct vtn_block *new_break_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; nir_loop *loop = nir_loop_create(b->shader); nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node); @@ -1379,7 +1370,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, block->merge_op = SpvOpNop; nir_builder_insert_after_cf_list(&b->nb, &loop->body); - vtn_walk_blocks(b, block, break_block, cont_block, NULL); + vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); nir_builder_insert_after_cf_list(&b->nb, old_list); block = break_block; @@ -1411,8 +1402,16 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, return; } else if (branch_block == end_block) { + /* We're branching to the merge block of an if, since for loops + * and functions end_block == NULL, so we're done here. + */ return; } else { + /* We're branching to another block, and according to the rules, + * we can only branch to another block with one predecessor (so + * we're the only one jumping to it) so we can just process it + * next. + */ block = branch_block; continue; } @@ -1454,7 +1453,10 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, &jump->instr); block = then_block; } else { - /* Conventional if statement */ + /* According to the rules we're branching to two blocks that don't + * have any other predecessors, so we can handle this as a + * conventional if. + */ assert(block->merge_op == SpvOpSelectionMerge); struct vtn_block *merge_block = vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; From bf5a6156591797cd4783e73201298254b14914af Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 17 Jun 2015 16:25:38 -0700 Subject: [PATCH 02/11] composites composites composites --- src/glsl/nir/spirv_to_nir.c | 659 ++++++++++++++++++++++++---- src/glsl/nir/spirv_to_nir_private.h | 21 +- 2 files changed, 593 insertions(+), 87 deletions(-) diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 979df2019e0..e84d7564300 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -28,23 +28,93 @@ #include "spirv_to_nir_private.h" #include "nir_vla.h" -nir_ssa_def * +static struct vtn_ssa_value * +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, + const struct glsl_type *type) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); + + if (entry) + return entry->data; + + struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + val->type = type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components); + + for (unsigned i = 0; i < num_components; i++) + load->value.u[i] = constant->value.u[i]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + val->def = &load->def; + } else { + assert(glsl_type_is_matrix(type)); + unsigned rows = glsl_get_vector_elements(val->type); + unsigned columns = glsl_get_matrix_columns(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); + + for (unsigned i = 0; i < columns; i++) { + struct vtn_ssa_value *col_val = ralloc(b, struct vtn_ssa_value); + col_val->type = glsl_get_column_type(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, rows); + + for (unsigned j = 0; j < rows; j++) + load->value.u[j] = constant->value.u[rows * i + j]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + col_val->def = &load->def; + + val->elems[i] = col_val; + } + } + break; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + const struct glsl_type *elem_type = glsl_get_array_element(val->type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + break; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = + glsl_get_struct_field(val->type, i); + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + } + break; + } + + default: + unreachable("bad constant type"); + } + + return val; +} + +struct vtn_ssa_value * vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) { struct vtn_value *val = vtn_untyped_value(b, value_id); switch (val->value_type) { - case vtn_value_type_constant: { - assert(glsl_type_is_vector_or_scalar(val->type)); - unsigned num_components = glsl_get_vector_elements(val->type); - nir_load_const_instr *load = - nir_load_const_instr_create(b->shader, num_components); - - for (unsigned i = 0; i < num_components; i++) - load->value.u[0] = val->constant->value.u[0]; - - nir_builder_instr_insert(&b->nb, &load->instr); - return &load->def; - } + case vtn_value_type_constant: + return vtn_const_ssa_value(b, val->constant, val->type); case vtn_value_type_ssa: return val->ssa; @@ -451,6 +521,204 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, } } +static struct vtn_ssa_value * +_vtn_variable_load(struct vtn_builder *b, + nir_deref_var *src_deref, nir_deref *src_deref_tail) +{ + struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + val->type = src_deref_tail->type; + + /* The deref tail may contain a deref to select a component of a vector (in + * other words, it might not be an actual tail) so we have to save it away + * here since we overwrite it later. + */ + nir_deref *old_child = src_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->variables[0] = + nir_deref_as_var(nir_copy_deref(load, &src_deref->deref)); + load->num_components = glsl_get_vector_elements(val->type); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + + nir_builder_instr_insert(&b->nb, &load->instr); + + if (src_deref->var->data.mode == nir_var_uniform && + glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) { + /* Uniform boolean loads need to be fixed up since they're defined + * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. + */ + val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->def = &load->dest.ssa; + } + } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(val->type)) { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(val->type); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } else { + assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(val->type, i); + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } + + src_deref_tail->child = old_child; + + return val; +} + +static void +_vtn_variable_store(struct vtn_builder *b, nir_deref_var *dest_deref, + nir_deref *dest_deref_tail, struct vtn_ssa_value *src) +{ + nir_deref *old_child = dest_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(src->type)) { + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->variables[0] = + nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); + store->src[0] = nir_src_for_ssa(src->def); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(src->type)) { + unsigned elems = glsl_get_length(src->type); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(src->type); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } else { + assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(src->type); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(src->type, i); + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } + + dest_deref_tail->child = old_child; +} + +/* + * Gets the NIR-level deref tail, which may have as a child an array deref + * selecting which component due to OpAccessChain supporting per-component + * indexing in SPIR-V. + */ + +static nir_deref * +get_deref_tail(nir_deref_var *deref) +{ + nir_deref *cur = &deref->deref; + while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) + cur = cur->child; + + return cur; +} + +static nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, + nir_ssa_def *src, unsigned index); + +static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *index); + +static struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, nir_deref_var *src) +{ + nir_deref *src_tail = get_deref_tail(src); + struct vtn_ssa_value *val = _vtn_variable_load(b, src, src_tail); + + if (src_tail->child) { + nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); + assert(vec_deref->deref.child == NULL); + val->type = vec_deref->deref.type; + if (vec_deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); + else + val->def = vtn_vector_extract_dynamic(b, val->def, + vec_deref->indirect.ssa); + } + + return val; +} + +static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, + nir_ssa_def *src, nir_ssa_def *insert, + unsigned index); + +static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *insert, + nir_ssa_def *index); +static void +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest) +{ + nir_deref *dest_tail = get_deref_tail(dest); + if (dest_tail->child) { + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_variable_store(b, dest, dest_tail, val); + } else { + _vtn_variable_store(b, dest, dest_tail, src); + } +} + +static void +vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, + nir_deref_var *dest) +{ + nir_deref *src_tail = get_deref_tail(src); + + if (src_tail->child) { + assert(get_deref_tail(dest)->child); + struct vtn_ssa_value *val = vtn_variable_load(b, src); + vtn_variable_store(b, val, dest); + } else { + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + + nir_builder_instr_insert(&b->nb, ©->instr); + } +} + static void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -552,7 +820,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, assert(idx_val->value_type == vtn_value_type_ssa); deref_arr->deref_array_type = nir_deref_array_type_indirect; deref_arr->base_offset = 0; - deref_arr->indirect = nir_src_for_ssa(vtn_ssa_value(b, w[1])); + deref_arr->indirect = + nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); } tail->child = &deref_arr->deref; break; @@ -578,12 +847,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); - copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); - - nir_builder_instr_insert(&b->nb, ©->instr); + vtn_variable_copy(b, src, dest); break; } @@ -596,61 +860,15 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, return; } - assert(glsl_type_is_vector_or_scalar(src_type)); struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); - load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src->deref)); - load->num_components = glsl_get_vector_elements(src_type); - nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, - val->name); - - nir_builder_instr_insert(&b->nb, &load->instr); - val->type = src_type; - - if (src->var->data.mode == nir_var_uniform && - glsl_get_base_type(src_type) == GLSL_TYPE_BOOL) { - /* Uniform boolean loads need to be fixed up since they're defined - * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. - */ - val->ssa = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); - } else { - val->ssa = &load->dest.ssa; - } + val->ssa = vtn_variable_load(b, src); break; } case SpvOpStore: { nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; - const struct glsl_type *dest_type = nir_deref_tail(&dest->deref)->type; - struct vtn_value *src_val = vtn_untyped_value(b, w[2]); - if (src_val->value_type == vtn_value_type_ssa) { - assert(glsl_type_is_vector_or_scalar(dest_type)); - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); - store->src[0] = nir_src_for_ssa(src_val->ssa); - store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest->deref)); - store->num_components = glsl_get_vector_elements(dest_type); - - nir_builder_instr_insert(&b->nb, &store->instr); - } else { - assert(src_val->value_type == vtn_value_type_constant); - - nir_variable *const_tmp = rzalloc(b->shader, nir_variable); - const_tmp->type = dest_type; - const_tmp->name = "const_temp"; - const_tmp->data.mode = nir_var_local; - const_tmp->data.read_only = true; - exec_list_push_tail(&b->impl->locals, &const_tmp->node); - - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); - copy->variables[1] = nir_deref_var_create(copy, const_tmp); - - nir_builder_instr_insert(&b->nb, ©->instr); - } + struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); + vtn_variable_store(b, src, dest); break; } @@ -674,7 +892,7 @@ static nir_tex_src vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) { nir_tex_src src; - src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa); + src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa->def); src.src_type = type; return src; } @@ -715,7 +933,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, /* All these types have the coordinate as their first real argument */ struct vtn_value *coord = vtn_value(b, w[4], vtn_value_type_ssa); coord_components = glsl_get_vector_elements(coord->type); - p->src = nir_src_for_ssa(coord->ssa); + p->src = nir_src_for_ssa(coord->ssa->def); p->src_type = nir_tex_src_coord; p++; break; @@ -786,7 +1004,8 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, instr->sampler = sampler; nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); - val->ssa = &instr->dest.ssa; + val->ssa->def = &instr->dest.ssa; + val->ssa->type = val->type; nir_builder_instr_insert(&b->nb, &instr->instr); } @@ -804,12 +1023,13 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa->type = val->type; /* Collect the various SSA sources */ unsigned num_inputs = count - 3; nir_ssa_def *src[4]; for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 3]); + src[i] = vtn_ssa_value(b, w[i + 3])->def; /* Indicates that the first two arguments should be swapped. This is * used for implementing greater-than and less-than-or-equal. @@ -921,24 +1141,24 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; case SpvOpFwidth: - val->ssa = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); return; case SpvOpFwidthFine: - val->ssa = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); return; case SpvOpFwidthCoarse: - val->ssa = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); return; case SpvOpVectorTimesScalar: /* The builder will take care of splatting for us. */ - val->ssa = nir_fmul(&b->nb, src[0], src[1]); + val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); return; case SpvOpSRem: @@ -966,7 +1186,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, glsl_get_vector_elements(val->type), val->name); - val->ssa = &instr->dest.dest.ssa; + val->ssa->def = &instr->dest.dest.ssa; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) instr->src[i].src = nir_src_for_ssa(src[i]); @@ -974,6 +1194,263 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_builder_instr_insert(&b->nb, &instr->instr); } +static nir_ssa_def * +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) +{ + nir_alu_src alu_src; + alu_src.src = nir_src_for_ssa(src); + alu_src.swizzle[0] = index; + return nir_fmov_alu(&b->nb, alu_src, 1); +} + +static nir_alu_instr * +create_vec(void *mem_ctx, unsigned num_components) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + + return vec; +} + +static nir_ssa_def * +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, + unsigned index) +{ + nir_alu_instr *vec = create_vec(b->shader, src->num_components); + + for (unsigned i = 0; i < src->num_components; i++) { + if (i == index) { + vec->src[i].src = nir_src_for_ssa(insert); + } else { + vec->src[i].src = nir_src_for_ssa(src); + vec->src[i].swizzle[0] = i; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static nir_ssa_def * +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_extract(b, src, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_extract(b, src, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_insert(b, src, insert, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, + nir_ssa_def *src0, nir_ssa_def *src1, + const uint32_t *indices) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1); + nir_builder_instr_insert(&b->nb, &undef->instr); + + for (unsigned i = 0; i < num_components; i++) { + uint32_t index = indices[i]; + if (index == 0xffffffff) { + vec->src[i].src = nir_src_for_ssa(&undef->def); + } else if (index < src0->num_components) { + vec->src[i].src = nir_src_for_ssa(src0); + vec->src[i].swizzle[0] = index; + } else { + vec->src[i].src = nir_src_for_ssa(src1); + vec->src[i].swizzle[0] = index - src0->num_components; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +/* + * Concatentates a number of vectors/scalars together to produce a vector + */ +static nir_ssa_def * +vtn_vector_construct(struct vtn_builder *b, unsigned num_components, + unsigned num_srcs, nir_ssa_def **srcs) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + unsigned dest_idx = 0; + for (unsigned i = 0; i < num_srcs; i++) { + nir_ssa_def *src = srcs[i]; + for (unsigned j = 0; j < src->num_components; j++) { + vec->src[dest_idx].src = nir_src_for_ssa(src); + vec->src[dest_idx].swizzle[0] = j; + dest_idx++; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static struct vtn_ssa_value * +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) +{ + struct vtn_ssa_value *dest = ralloc(mem_ctx, struct vtn_ssa_value); + dest->type = src->type; + + if (glsl_type_is_vector_or_scalar(src->type)) { + dest->def = src->def; + } else { + unsigned elems = glsl_get_length(src->type); + + dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_ssa_value *insert, const uint32_t *indices, + unsigned num_indices) +{ + struct vtn_ssa_value *dest = vtn_composite_copy(b, src); + + struct vtn_ssa_value *cur = dest; + unsigned i; + for (i = 0; i < num_indices - 1; i++) { + cur = cur->elems[indices[i]]; + } + + if (glsl_type_is_vector_or_scalar(cur->type)) { + /* According to the SPIR-V spec, OpCompositeInsert may work down to + * the component granularity. In that case, the last index will be + * the index to insert the scalar into the vector. + */ + + cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); + } else { + cur->elems[indices[i]] = insert; + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, + const uint32_t *indices, unsigned num_indices) +{ + struct vtn_ssa_value *cur = src; + for (unsigned i = 0; i < num_indices; i++) { + if (glsl_type_is_vector_or_scalar(cur->type)) { + assert(i == num_indices - 1); + /* According to the SPIR-V spec, OpCompositeExtract may work down to + * the component granularity. The last index will be the index of the + * vector to extract. + */ + + struct vtn_ssa_value *ret = ralloc(b, struct vtn_ssa_value); + ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); + ret->def = vtn_vector_extract(b, cur->def, indices[i]); + return ret; + } + } + + return cur; +} + +static void +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + switch (opcode) { + case SpvOpVectorExtractDynamic: + val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def); + break; + + case SpvOpVectorInsertDynamic: + val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + vtn_ssa_value(b, w[5])->def); + break; + + case SpvOpVectorShuffle: + val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(val->type), + vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + w + 5); + break; + + case SpvOpCompositeConstruct: { + val->ssa = ralloc(b, struct vtn_ssa_value); + unsigned elems = count - 3; + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < elems; i++) + srcs[i] = vtn_ssa_value(b, w[3 + i])->def; + val->ssa->def = + vtn_vector_construct(b, glsl_get_vector_elements(val->type), + elems, srcs); + } else { + val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); + } + break; + } + case SpvOpCompositeExtract: + val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), + w + 4, count - 4); + break; + + case SpvOpCompositeInsert: + val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), + vtn_ssa_value(b, w[3]), + w + 5, count - 5); + break; + + case SpvOpCopyObject: + val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + break; + + default: + unreachable("unknown composite operation"); + } + + val->ssa->type = val->type; +} + static bool vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -1341,6 +1818,16 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_matrix_alu(b, opcode, w, count); break; + case SpvOpVectorExtractDynamic: + case SpvOpVectorInsertDynamic: + case SpvOpVectorShuffle: + case SpvOpCompositeConstruct: + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: + case SpvOpCopyObject: + vtn_handle_composite(b, opcode, w, count); + break; + default: unreachable("Unhandled opcode"); } @@ -1425,7 +1912,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_value(b, w[3], vtn_value_type_block)->block; nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])); + if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node); if (then_block == break_block) { @@ -1563,6 +2050,8 @@ spirv_to_nir(const uint32_t *words, size_t word_count, foreach_list_typed(struct vtn_function, func, node, &b->functions) { b->impl = nir_function_impl_create(func->overload); + b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); nir_builder_init(&b->nb, b->impl); nir_builder_insert_after_cf_list(&b->nb, &b->impl->body); vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index d2b364bdfeb..a4760620d46 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -65,6 +65,15 @@ struct vtn_function { typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, const uint32_t *, unsigned); +struct vtn_ssa_value { + union { + nir_ssa_def *def; + struct vtn_ssa_value **elems; + }; + + const struct glsl_type *type; +}; + struct vtn_value { enum vtn_value_type value_type; const char *name; @@ -77,7 +86,7 @@ struct vtn_value { nir_deref_var *deref; struct vtn_function *func; struct vtn_block *block; - nir_ssa_def *ssa; + struct vtn_ssa_value *ssa; vtn_instruction_handler ext_handler; }; }; @@ -96,6 +105,14 @@ struct vtn_builder { nir_function_impl *impl; struct vtn_block *block; + /* + * In SPIR-V, constants are global, whereas in NIR, the load_const + * instruction we use is per-function. So while we parse each function, we + * keep a hash table of constants we've resolved to nir_ssa_value's so + * far, and we lazily resolve them when we see them used in a function. + */ + struct hash_table *const_table; + unsigned value_id_bound; struct vtn_value *values; @@ -134,7 +151,7 @@ vtn_value(struct vtn_builder *b, uint32_t value_id, return val; } -nir_ssa_def *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); +struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, struct vtn_value *, From de4c31a085507719c9985035c885d72bd52cf3ac Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Jun 2015 17:33:08 -0700 Subject: [PATCH 03/11] fix glsl450 for composites --- src/glsl/nir/spirv_glsl450_to_nir.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c index 3b9d0940aad..100fde9ce7f 100644 --- a/src/glsl/nir/spirv_glsl450_to_nir.c +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -140,12 +140,14 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->type = val->type; /* Collect the various SSA sources */ unsigned num_inputs = count - 5; nir_ssa_def *src[3]; for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 5]); + src[i] = vtn_ssa_value(b, w[i + 5])->def; nir_op op; switch (entrypoint) { @@ -158,16 +160,16 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case Ceil: op = nir_op_fceil; break; case Fract: op = nir_op_ffract; break; case Radians: - val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); + val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); return; case Degrees: - val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); + val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); return; case Sin: op = nir_op_fsin; break; case Cos: op = nir_op_fcos; break; case Tan: - val->ssa = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), - nir_fcos(&b->nb, src[0])); + val->ssa->def = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), + nir_fcos(&b->nb, src[0])); return; case Pow: op = nir_op_fpow; break; case Exp2: op = nir_op_fexp2; break; @@ -180,7 +182,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case Max: op = nir_op_fmax; break; case Mix: op = nir_op_flrp; break; case Step: - val->ssa = nir_sge(&b->nb, src[1], src[0]); + val->ssa->def = nir_sge(&b->nb, src[1], src[0]); return; case FloatBitsToInt: @@ -188,7 +190,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case IntBitsToFloat: case UintBitsToFloat: /* Probably going to be removed from the final version of the spec. */ - val->ssa = src[0]; + val->ssa->def = src[0]; return; case Fma: op = nir_op_ffma; break; @@ -207,13 +209,13 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; case Length: - val->ssa = build_length(&b->nb, src[0]); + val->ssa->def = build_length(&b->nb, src[0]); return; case Distance: - val->ssa = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); + val->ssa->def = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); return; case Normalize: - val->ssa = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); + val->ssa->def = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); return; case UaddCarry: op = nir_op_uadd_carry; break; @@ -256,7 +258,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, glsl_get_vector_elements(val->type), val->name); - val->ssa = &instr->dest.dest.ssa; + val->ssa->def = &instr->dest.dest.ssa; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) instr->src[i].src = nir_src_for_ssa(src[i]); From 0e86ab7c0a438d86c2177a0e02847798ef81e343 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Jun 2015 17:34:12 -0700 Subject: [PATCH 04/11] nir/types: add a helper to transpose a matrix type --- src/glsl/nir/nir_types.cpp | 7 +++++++ src/glsl/nir/nir_types.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 35421506545..d44d48095da 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -270,3 +270,10 @@ glsl_function_type(const glsl_type *return_type, { return glsl_type::get_function_instance(return_type, params, num_params); } + +const glsl_type * +glsl_transposed_type(const struct glsl_type *type) +{ + return glsl_type::get_instance(type->base_type, type->matrix_columns, + type->vector_elements); +} diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index ceb131c9f47..60e1d9d96fc 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -103,6 +103,8 @@ const struct glsl_type * glsl_function_type(const struct glsl_type *return_type, const struct glsl_function_param *params, unsigned num_params); +const struct glsl_type *glsl_transposed_type(const struct glsl_type *type); + #ifdef __cplusplus } #endif From 22854a60efc3ee2442b9150e2bf328e7441d6794 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Jun 2015 17:34:55 -0700 Subject: [PATCH 05/11] nir/builder: add a nir_fdot() convenience function --- src/glsl/nir/nir_builder.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 9223e838095..cffe38abf0e 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -240,6 +240,23 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], nir_imov_alu(build, alu_src, num_components); } +/* Selects the right fdot given the number of components in each source. */ +static inline nir_ssa_def * +nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) +{ + assert(src0->num_components == src1->num_components); + switch (src0->num_components) { + case 1: return nir_fmul(build, src0, src1); + case 2: return nir_fdot2(build, src0, src1); + case 3: return nir_fdot3(build, src0, src1); + case 4: return nir_fdot4(build, src0, src1); + default: + unreachable("bad component size"); + } + + return NULL; +} + /** * Turns a nir_src into a nir_ssa_def * so it can be passed to * nir_build_alu()-based builder calls. From d0fc04aacf12ad834f9542289f3c915d5ec9b945 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Jun 2015 18:51:51 -0700 Subject: [PATCH 06/11] nir/types: be less strict about constructing matrix types --- src/glsl/nir/nir_types.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index d44d48095da..3c00bdb3c18 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -240,7 +240,7 @@ glsl_vector_type(enum glsl_base_type base_type, unsigned components) const glsl_type * glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns) { - assert(rows > 1 && rows <= 4 && columns > 1 && columns <= 4); + assert(rows >= 1 && rows <= 4 && columns >= 1 && columns <= 4); return glsl_type::get_instance(base_type, rows, columns); } From 841aab6f50c88d54d1a3a68a311621f948d09126 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Jun 2015 18:52:44 -0700 Subject: [PATCH 07/11] matrices matrices matrices --- src/glsl/nir/spirv_to_nir.c | 291 +++++++++++++++++++++++++--- src/glsl/nir/spirv_to_nir_private.h | 5 + 2 files changed, 268 insertions(+), 28 deletions(-) diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index e84d7564300..a0a75040771 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -37,7 +37,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, if (entry) return entry->data; - struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); val->type = type; switch (glsl_get_base_type(type)) { @@ -63,7 +63,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); for (unsigned i = 0; i < columns; i++) { - struct vtn_ssa_value *col_val = ralloc(b, struct vtn_ssa_value); + struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); col_val->type = glsl_get_column_type(val->type); nir_load_const_instr *load = nir_load_const_instr_create(b->shader, rows); @@ -516,6 +516,7 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, case SpvDecorationFPFastMathMode: case SpvDecorationLinkageAttributes: case SpvDecorationSpecId: + break; default: unreachable("Unhandled variable decoration"); } @@ -525,7 +526,7 @@ static struct vtn_ssa_value * _vtn_variable_load(struct vtn_builder *b, nir_deref_var *src_deref, nir_deref *src_deref_tail) { - struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); val->type = src_deref_tail->type; /* The deref tail may contain a deref to select a component of a vector (in @@ -1010,11 +1011,264 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, nir_builder_instr_insert(&b->nb, &instr->instr); } +static struct vtn_ssa_value * +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (!glsl_type_is_vector_or_scalar(type)) { + unsigned elems = glsl_get_length(type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *child_type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + child_type = glsl_get_column_type(type); + break; + case GLSL_TYPE_ARRAY: + child_type = glsl_get_array_element(type); + break; + case GLSL_TYPE_STRUCT: + child_type = glsl_get_struct_field(type, i); + break; + default: + unreachable("unkown base type"); + } + + val->elems[i] = vtn_create_ssa_value(b, child_type); + } + } + + return val; +} + +static nir_alu_instr * +create_vec(void *mem_ctx, unsigned num_components) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + + return vec; +} + +static struct vtn_ssa_value * +vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + if (src->transposed) + return src->transposed; + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_transposed_type(src->type)); + + for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { + nir_alu_instr *vec = create_vec(b, glsl_get_matrix_columns(src->type)); + if (glsl_type_is_vector_or_scalar(src->type)) { + vec->src[0].src = nir_src_for_ssa(src->def); + vec->src[0].swizzle[0] = i; + } else { + for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { + vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); + vec->src[j].swizzle[0] = i; + } + } + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + + dest->transposed = src; + + return dest; +} + +/* + * Normally, column vectors in SPIR-V correspond to a single NIR SSA + * definition. But for matrix multiplies, we want to do one routine for + * multiplying a matrix by a matrix and then pretend that vectors are matrices + * with one column. So we "wrap" these things, and unwrap the result before we + * send it off. + */ + +static struct vtn_ssa_value * +vtn_wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (val == NULL) + return NULL; + + if (glsl_type_is_matrix(val->type)) + return val; + + struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); + dest->type = val->type; + dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); + dest->elems[0] = val; + + return dest; +} + +static struct vtn_ssa_value * +vtn_unwrap_matrix(struct vtn_ssa_value *val) +{ + if (glsl_type_is_matrix(val->type)) + return val; + + return val->elems[0]; +} + +static struct vtn_ssa_value * +vtn_matrix_multiply(struct vtn_builder *b, + struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) +{ + + struct vtn_ssa_value *src0 = vtn_wrap_matrix(b, _src0); + struct vtn_ssa_value *src1 = vtn_wrap_matrix(b, _src1); + struct vtn_ssa_value *src0_transpose = vtn_wrap_matrix(b, _src0->transposed); + struct vtn_ssa_value *src1_transpose = vtn_wrap_matrix(b, _src1->transposed); + + unsigned src0_rows = glsl_get_vector_elements(src0->type); + unsigned src0_columns = glsl_get_matrix_columns(src0->type); + unsigned src1_columns = glsl_get_matrix_columns(src1->type); + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns)); + + dest = vtn_wrap_matrix(b, dest); + + bool transpose_result = false; + if (src0_transpose && src1_transpose) { + /* transpose(A) * transpose(B) = transpose(B * A) */ + src1 = src0_transpose; + src0 = src1_transpose; + src0_transpose = NULL; + src1_transpose = NULL; + transpose_result = true; + } + + if (src0_transpose && !src1_transpose && + glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { + /* We already have the rows of src0 and the columns of src1 available, + * so we can just take the dot product of each row with each column to + * get the result. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + nir_alu_instr *vec = create_vec(b, src0_rows); + for (unsigned j = 0; j < src0_rows; j++) { + vec->src[j].src = + nir_src_for_ssa(nir_fdot(&b->nb, src0_transpose->elems[j]->def, + src1->elems[i]->def)); + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + } else { + /* We don't handle the case where src1 is transposed but not src0, since + * the general case only uses individual components of src1 so the + * optimizer should chew through the transpose we emitted for src1. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[0]->def, + vtn_vector_extract(b, src1->elems[i]->def, 0)); + for (unsigned j = 1; j < src0_columns; j++) { + dest->elems[i]->def = + nir_fadd(&b->nb, dest->elems[i]->def, + nir_fmul(&b->nb, src0->elems[j]->def, + vtn_vector_extract(b, + src1->elems[i]->def, j))); + } + } + } + + dest = vtn_unwrap_matrix(dest); + + if (transpose_result) + dest = vtn_transpose(b, dest); + + return dest; +} + +static struct vtn_ssa_value * +vtn_mat_times_scalar(struct vtn_builder *b, + struct vtn_ssa_value *mat, + nir_ssa_def *scalar) +{ + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); + for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { + if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) + dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); + else + dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); + } + + return dest; +} + static void vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Matrix math not handled"); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + switch (opcode) { + case SpvOpTranspose: { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[3]); + val->ssa = vtn_transpose(b, src); + break; + } + + case SpvOpOuterProduct: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, vtn_transpose(b, src1)); + break; + } + + case SpvOpMatrixTimesScalar: { + struct vtn_ssa_value *mat = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *scalar = vtn_ssa_value(b, w[4]); + + if (mat->transposed) { + val->ssa = vtn_transpose(b, vtn_mat_times_scalar(b, mat->transposed, + scalar->def)); + } else { + val->ssa = vtn_mat_times_scalar(b, mat, scalar->def); + } + break; + } + + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, src1); + break; + } + + default: unreachable("unknown matrix opcode"); + } } static void @@ -1197,29 +1451,10 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, static nir_ssa_def * vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) { - nir_alu_src alu_src; - alu_src.src = nir_src_for_ssa(src); - alu_src.swizzle[0] = index; - return nir_fmov_alu(&b->nb, alu_src, 1); + unsigned swiz[4] = { index }; + return nir_swizzle(&b->nb, src, swiz, 1, true); } -static nir_alu_instr * -create_vec(void *mem_ctx, unsigned num_components) -{ - nir_op op; - switch (num_components) { - case 1: op = nir_op_fmov; break; - case 2: op = nir_op_vec2; break; - case 3: op = nir_op_vec3; break; - case 4: op = nir_op_vec4; break; - default: unreachable("bad vector size"); - } - - nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); - nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); - - return vec; -} static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, @@ -1320,7 +1555,7 @@ vtn_vector_construct(struct vtn_builder *b, unsigned num_components, static struct vtn_ssa_value * vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) { - struct vtn_ssa_value *dest = ralloc(mem_ctx, struct vtn_ssa_value); + struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); dest->type = src->type; if (glsl_type_is_vector_or_scalar(src->type)) { @@ -1376,7 +1611,7 @@ vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, * vector to extract. */ - struct vtn_ssa_value *ret = ralloc(b, struct vtn_ssa_value); + struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); ret->def = vtn_vector_extract(b, cur->def, indices[i]); return ret; @@ -1413,7 +1648,7 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, break; case SpvOpCompositeConstruct: { - val->ssa = ralloc(b, struct vtn_ssa_value); + val->ssa = rzalloc(b, struct vtn_ssa_value); unsigned elems = count - 3; if (glsl_type_is_vector_or_scalar(val->type)) { nir_ssa_def *srcs[4]; diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index a4760620d46..937c45b08c2 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -71,6 +71,11 @@ struct vtn_ssa_value { struct vtn_ssa_value **elems; }; + /* For matrices, a transposed version of the value, or NULL if it hasn't + * been computed + */ + struct vtn_ssa_value *transposed; + const struct glsl_type *type; }; From e9c21d0ca01eaada2d61bd73e97f59cd2835a0b0 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 22 Jun 2015 11:59:55 -0700 Subject: [PATCH 08/11] unbreak things --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a0a75040771..e0f280ceaae 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1277,7 +1277,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->type = vtn_value(b, w[1], vtn_value_type_type)->type; - val->ssa->type = val->type; + val->ssa = vtn_create_ssa_value(b, val->type); /* Collect the various SSA sources */ unsigned num_inputs = count - 3; From 9a3dda101e88142cb3b3eebb18883edecb21b375 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 22 Jun 2015 13:53:08 -0700 Subject: [PATCH 09/11] nir/vtn: fix emitting code after loops When we're done emitting the code for a loop, we need to visit the new break block, which is the merge block of the current loop, rather than the old merge block, which is the merge block of the loop containing the one we just emitted code for. --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index e0f280ceaae..4aabf3cc4e0 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2095,7 +2095,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); nir_builder_insert_after_cf_list(&b->nb, old_list); - block = break_block; + block = new_break_block; continue; } From fe1269cf28784450f0032432eb1f32e379beec17 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 23 Jun 2015 10:34:22 -0700 Subject: [PATCH 10/11] nir/builder: add support for inserting before/after blocks --- src/glsl/nir/nir_builder.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index cffe38abf0e..7d449262585 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -28,6 +28,10 @@ struct exec_list; typedef struct nir_builder { struct exec_list *cf_node_list; + + nir_block *before_block; + nir_block *after_block; + nir_instr *before_instr; nir_instr *after_instr; @@ -48,6 +52,30 @@ nir_builder_insert_after_cf_list(nir_builder *build, struct exec_list *cf_node_list) { build->cf_node_list = cf_node_list; + build->before_block = NULL; + build->after_block = NULL; + build->before_instr = NULL; + build->after_instr = NULL; +} + +static inline void +nir_builder_insert_before_block(nir_builder *build, + nir_block *block) +{ + build->cf_node_list = NULL; + build->before_block = block; + build->after_block = NULL; + build->before_instr = NULL; + build->after_instr = NULL; +} + +static inline void +nir_builder_insert_after_block(nir_builder *build, + nir_block *block) +{ + build->cf_node_list = NULL; + build->before_block = NULL; + build->after_block = block; build->before_instr = NULL; build->after_instr = NULL; } @@ -56,6 +84,8 @@ static inline void nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr) { build->cf_node_list = NULL; + build->before_block = NULL; + build->after_block = NULL; build->before_instr = before_instr; build->after_instr = NULL; } @@ -64,6 +94,8 @@ static inline void nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr) { build->cf_node_list = NULL; + build->before_block = NULL; + build->after_block = NULL; build->before_instr = NULL; build->after_instr = after_instr; } @@ -73,6 +105,10 @@ nir_builder_instr_insert(nir_builder *build, nir_instr *instr) { if (build->cf_node_list) { nir_instr_insert_after_cf_list(build->cf_node_list, instr); + } else if (build->before_block) { + nir_instr_insert_before_block(build->before_block, instr); + } else if (build->after_block) { + nir_instr_insert_after_block(build->after_block, instr); } else if (build->before_instr) { nir_instr_insert_before(build->before_instr, instr); } else { From dee4a94e69d724e984386f81fe1897bc6e3a021c Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 23 Jun 2015 10:34:55 -0700 Subject: [PATCH 11/11] nir/vtn: add support for phi nodes --- src/glsl/nir/spirv_to_nir.c | 121 ++++++++++++++++++++++++++++ src/glsl/nir/spirv_to_nir_private.h | 8 ++ 2 files changed, 129 insertions(+) diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 4aabf3cc4e0..a5e9c4aa8ae 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1686,6 +1686,112 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, val->ssa->type = val->type; } +static void +vtn_phi_node_init(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + glsl_get_vector_elements(val->type), NULL); + exec_list_make_empty(&phi->srcs); + nir_builder_instr_insert(&b->nb, &phi->instr); + val->def = &phi->dest.ssa; + } else { + unsigned elems = glsl_get_length(val->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_init(b, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_phi_node_create(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = vtn_create_ssa_value(b, type); + vtn_phi_node_init(b, val); + return val; +} + +static void +vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa = vtn_phi_node_create(b, val->type); +} + +static void +vtn_phi_node_add_src(struct vtn_ssa_value *phi, const nir_block *pred, + struct vtn_ssa_value *val) +{ + assert(phi->type == val->type); + if (glsl_type_is_vector_or_scalar(phi->type)) { + nir_phi_instr *phi_instr = nir_instr_as_phi(phi->def->parent_instr); + nir_phi_src *src = ralloc(phi_instr, nir_phi_src); + src->pred = (nir_block *) pred; + src->src = nir_src_for_ssa(val->def); + exec_list_push_tail(&phi_instr->srcs, &src->node); + } else { + unsigned elems = glsl_get_length(phi->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_add_src(phi->elems[i], pred, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block, + const struct glsl_type *type, const uint32_t *w, + unsigned count) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->block_table, block); + if (entry) { + struct vtn_block *spv_block = entry->data; + for (unsigned off = 4; off < count; off += 2) { + if (spv_block == vtn_value(b, w[off], vtn_value_type_block)->block) { + return vtn_ssa_value(b, w[off - 1]); + } + } + } + + nir_builder_insert_before_block(&b->nb, block); + struct vtn_ssa_value *phi = vtn_phi_node_create(b, type); + + struct set_entry *entry2; + set_foreach(block->predecessors, entry2) { + nir_block *pred = (nir_block *) entry2->key; + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return phi; +} + +static bool +vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpLabel) { + b->block = vtn_value(b, w[1], vtn_value_type_block)->block; + return true; + } + + if (opcode != SpvOpPhi) + return true; + + struct vtn_ssa_value *phi = vtn_value(b, w[2], vtn_value_type_ssa)->ssa; + + struct set_entry *entry; + set_foreach(b->block->block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, phi->type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return true; +} + static bool vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -1835,6 +1941,7 @@ vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, } case SpvOpFunctionEnd: + b->func->end = w; b->func = NULL; break; @@ -2063,6 +2170,10 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_composite(b, opcode, w, count); break; + case SpvOpPhi: + vtn_handle_phi_first_pass(b, w); + break; + default: unreachable("Unhandled opcode"); } @@ -2106,6 +2217,12 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_foreach_instruction(b, block->label, block->branch, vtn_handle_body_instruction); + nir_cf_node *cur_cf_node = + exec_node_data(nir_cf_node, exec_list_get_tail(b->nb.cf_node_list), + node); + nir_block *cur_block = nir_cf_node_as_block(cur_cf_node); + _mesa_hash_table_insert(b->block_table, cur_block, block); + switch (branch_op) { case SpvOpBranch: { struct vtn_block *branch_block = @@ -2287,9 +2404,13 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->impl = nir_function_impl_create(func->overload); b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, _mesa_key_pointer_equal); + b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); nir_builder_init(&b->nb, b->impl); nir_builder_insert_after_cf_list(&b->nb, &b->impl->body); vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); + vtn_foreach_instruction(b, func->start_block->label, func->end, + vtn_handle_phi_second_pass); } ralloc_free(b); diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index 937c45b08c2..b157e023a68 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -60,6 +60,8 @@ struct vtn_function { nir_function_overload *overload; struct vtn_block *start_block; + + const uint32_t *end; }; typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, @@ -118,6 +120,12 @@ struct vtn_builder { */ struct hash_table *const_table; + /* + * Map from nir_block to the vtn_block which ends with it -- used for + * handling phi nodes. + */ + struct hash_table *block_table; + unsigned value_id_bound; struct vtn_value *values;