diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index 9223e838095..7d449262585 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -28,6 +28,10 @@ struct exec_list;
 
 typedef struct nir_builder {
    struct exec_list *cf_node_list;
+
+   nir_block *before_block;
+   nir_block *after_block;
+
    nir_instr *before_instr;
    nir_instr *after_instr;
 
@@ -48,6 +52,30 @@ nir_builder_insert_after_cf_list(nir_builder *build,
                                  struct exec_list *cf_node_list)
 {
    build->cf_node_list = cf_node_list;
+   build->before_block = NULL;
+   build->after_block = NULL;
+   build->before_instr = NULL;
+   build->after_instr = NULL;
+}
+
+static inline void
+nir_builder_insert_before_block(nir_builder *build,
+                                nir_block *block)
+{
+   build->cf_node_list = NULL;
+   build->before_block = block;
+   build->after_block = NULL;
+   build->before_instr = NULL;
+   build->after_instr = NULL;
+}
+
+static inline void
+nir_builder_insert_after_block(nir_builder *build,
+                                nir_block *block)
+{
+   build->cf_node_list = NULL;
+   build->before_block = NULL;
+   build->after_block = block;
    build->before_instr = NULL;
    build->after_instr = NULL;
 }
@@ -56,6 +84,8 @@ static inline void
 nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr)
 {
    build->cf_node_list = NULL;
+   build->before_block = NULL;
+   build->after_block = NULL;
    build->before_instr = before_instr;
    build->after_instr = NULL;
 }
@@ -64,6 +94,8 @@ static inline void
 nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr)
 {
    build->cf_node_list = NULL;
+   build->before_block = NULL;
+   build->after_block = NULL;
    build->before_instr = NULL;
    build->after_instr = after_instr;
 }
@@ -73,6 +105,10 @@ nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
 {
    if (build->cf_node_list) {
       nir_instr_insert_after_cf_list(build->cf_node_list, instr);
+   } else if (build->before_block) {
+      nir_instr_insert_before_block(build->before_block, instr);
+   } else if (build->after_block) {
+      nir_instr_insert_after_block(build->after_block, instr);
    } else if (build->before_instr) {
       nir_instr_insert_before(build->before_instr, instr);
    } else {
@@ -240,6 +276,23 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
                      nir_imov_alu(build, alu_src, num_components);
 }
 
+/* Selects the right fdot given the number of components in each source. */
+static inline nir_ssa_def *
+nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)
+{
+   assert(src0->num_components == src1->num_components);
+   switch (src0->num_components) {
+   case 1: return nir_fmul(build, src0, src1);
+   case 2: return nir_fdot2(build, src0, src1);
+   case 3: return nir_fdot3(build, src0, src1);
+   case 4: return nir_fdot4(build, src0, src1);
+   default:
+      unreachable("bad component size");
+   }
+
+   return NULL;
+}
+
 /**
  * Turns a nir_src into a nir_ssa_def * so it can be passed to
  * nir_build_alu()-based builder calls.
diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp
index 35421506545..3c00bdb3c18 100644
--- a/src/glsl/nir/nir_types.cpp
+++ b/src/glsl/nir/nir_types.cpp
@@ -240,7 +240,7 @@ glsl_vector_type(enum glsl_base_type base_type, unsigned components)
 const glsl_type *
 glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns)
 {
-   assert(rows > 1 && rows <= 4 && columns > 1 && columns <= 4);
+   assert(rows >= 1 && rows <= 4 && columns >= 1 && columns <= 4);
    return glsl_type::get_instance(base_type, rows, columns);
 }
 
@@ -270,3 +270,10 @@ glsl_function_type(const glsl_type *return_type,
 {
    return glsl_type::get_function_instance(return_type, params, num_params);
 }
+
+const glsl_type *
+glsl_transposed_type(const struct glsl_type *type)
+{
+   return glsl_type::get_instance(type->base_type, type->matrix_columns,
+                                  type->vector_elements);
+}
diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h
index ceb131c9f47..60e1d9d96fc 100644
--- a/src/glsl/nir/nir_types.h
+++ b/src/glsl/nir/nir_types.h
@@ -103,6 +103,8 @@ const struct glsl_type * glsl_function_type(const struct glsl_type *return_type,
                                             const struct glsl_function_param *params,
                                             unsigned num_params);
 
+const struct glsl_type *glsl_transposed_type(const struct glsl_type *type);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c
index 3b9d0940aad..100fde9ce7f 100644
--- a/src/glsl/nir/spirv_glsl450_to_nir.c
+++ b/src/glsl/nir/spirv_glsl450_to_nir.c
@@ -140,12 +140,14 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint,
 {
    struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
    val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
+   val->ssa = rzalloc(b, struct vtn_ssa_value);
+   val->ssa->type = val->type;
 
    /* Collect the various SSA sources */
    unsigned num_inputs = count - 5;
    nir_ssa_def *src[3];
    for (unsigned i = 0; i < num_inputs; i++)
-      src[i] = vtn_ssa_value(b, w[i + 5]);
+      src[i] = vtn_ssa_value(b, w[i + 5])->def;
 
    nir_op op;
    switch (entrypoint) {
@@ -158,16 +160,16 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint,
    case Ceil:        op = nir_op_fceil;         break;
    case Fract:       op = nir_op_ffract;        break;
    case Radians:
-      val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251));
+      val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251));
       return;
    case Degrees:
-      val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131));
+      val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131));
       return;
    case Sin:         op = nir_op_fsin;       break;
    case Cos:         op = nir_op_fcos;       break;
    case Tan:
-      val->ssa = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]),
-                                  nir_fcos(&b->nb, src[0]));
+      val->ssa->def = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]),
+                               nir_fcos(&b->nb, src[0]));
       return;
    case Pow:         op = nir_op_fpow;       break;
    case Exp2:        op = nir_op_fexp2;      break;
@@ -180,7 +182,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint,
    case Max:         op = nir_op_fmax;       break;
    case Mix:         op = nir_op_flrp;       break;
    case Step:
-      val->ssa = nir_sge(&b->nb, src[1], src[0]);
+      val->ssa->def = nir_sge(&b->nb, src[1], src[0]);
       return;
 
    case FloatBitsToInt:
@@ -188,7 +190,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint,
    case IntBitsToFloat:
    case UintBitsToFloat:
       /* Probably going to be removed from the final version of the spec. */
-      val->ssa = src[0];
+      val->ssa->def = src[0];
       return;
 
    case Fma:         op = nir_op_ffma;       break;
@@ -207,13 +209,13 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint,
    case UnpackHalf2x16:    op = nir_op_unpack_half_2x16;    break;
 
    case Length:
-      val->ssa = build_length(&b->nb, src[0]);
+      val->ssa->def = build_length(&b->nb, src[0]);
       return;
    case Distance:
-      val->ssa = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1]));
+      val->ssa->def = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1]));
       return;
    case Normalize:
-      val->ssa = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0]));
+      val->ssa->def = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0]));
       return;
 
    case UaddCarry:         op = nir_op_uadd_carry;          break;
@@ -256,7 +258,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint,
    nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
    nir_ssa_dest_init(&instr->instr, &instr->dest.dest,
                      glsl_get_vector_elements(val->type), val->name);
-   val->ssa = &instr->dest.dest.ssa;
+   val->ssa->def = &instr->dest.dest.ssa;
 
    for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++)
       instr->src[i].src = nir_src_for_ssa(src[i]);
diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c
index d8523fd5a52..a47c683ffdf 100644
--- a/src/glsl/nir/spirv_to_nir.c
+++ b/src/glsl/nir/spirv_to_nir.c
@@ -28,23 +28,93 @@
 #include "spirv_to_nir_private.h"
 #include "nir_vla.h"
 
-nir_ssa_def *
+static struct vtn_ssa_value *
+vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
+                    const struct glsl_type *type)
+{
+   struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant);
+
+   if (entry)
+      return entry->data;
+
+   struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
+   val->type = type;
+
+   switch (glsl_get_base_type(type)) {
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_DOUBLE:
+      if (glsl_type_is_vector_or_scalar(type)) {
+         unsigned num_components = glsl_get_vector_elements(val->type);
+         nir_load_const_instr *load =
+            nir_load_const_instr_create(b->shader, num_components);
+
+         for (unsigned i = 0; i < num_components; i++)
+            load->value.u[i] = constant->value.u[i];
+
+         nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
+         val->def = &load->def;
+      } else {
+         assert(glsl_type_is_matrix(type));
+         unsigned rows = glsl_get_vector_elements(val->type);
+         unsigned columns = glsl_get_matrix_columns(val->type);
+         val->elems = ralloc_array(b, struct vtn_ssa_value *, columns);
+
+         for (unsigned i = 0; i < columns; i++) {
+            struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value);
+            col_val->type = glsl_get_column_type(val->type);
+            nir_load_const_instr *load =
+               nir_load_const_instr_create(b->shader, rows);
+
+            for (unsigned j = 0; j < rows; j++)
+               load->value.u[j] = constant->value.u[rows * i + j];
+
+            nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
+            col_val->def = &load->def;
+
+            val->elems[i] = col_val;
+         }
+      }
+      break;
+
+   case GLSL_TYPE_ARRAY: {
+      unsigned elems = glsl_get_length(val->type);
+      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+      const struct glsl_type *elem_type = glsl_get_array_element(val->type);
+      for (unsigned i = 0; i < elems; i++)
+         val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
+                                             elem_type);
+      break;
+   }
+
+   case GLSL_TYPE_STRUCT: {
+      unsigned elems = glsl_get_length(val->type);
+      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+      for (unsigned i = 0; i < elems; i++) {
+         const struct glsl_type *elem_type =
+            glsl_get_struct_field(val->type, i);
+         val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
+                                             elem_type);
+      }
+      break;
+   }
+
+   default:
+      unreachable("bad constant type");
+   }
+
+   return val;
+}
+
+struct vtn_ssa_value *
 vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
 {
    struct vtn_value *val = vtn_untyped_value(b, value_id);
    switch (val->value_type) {
-   case vtn_value_type_constant: {
-      assert(glsl_type_is_vector_or_scalar(val->type));
-      unsigned num_components = glsl_get_vector_elements(val->type);
-      nir_load_const_instr *load =
-         nir_load_const_instr_create(b->shader, num_components);
-
-      for (unsigned i = 0; i < num_components; i++)
-         load->value.u[0] = val->constant->value.u[0];
-
-      nir_builder_instr_insert(&b->nb, &load->instr);
-      return &load->def;
-   }
+   case vtn_value_type_constant:
+      return vtn_const_ssa_value(b, val->constant, val->type);
 
    case vtn_value_type_ssa:
       return val->ssa;
@@ -512,11 +582,210 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val,
    case SpvDecorationFPFastMathMode:
    case SpvDecorationLinkageAttributes:
    case SpvDecorationSpecId:
+      break;
    default:
       unreachable("Unhandled variable decoration");
    }
 }
 
+static struct vtn_ssa_value *
+_vtn_variable_load(struct vtn_builder *b,
+                   nir_deref_var *src_deref, nir_deref *src_deref_tail)
+{
+   struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
+   val->type = src_deref_tail->type;
+
+   /* The deref tail may contain a deref to select a component of a vector (in
+    * other words, it might not be an actual tail) so we have to save it away
+    * here since we overwrite it later.
+    */
+   nir_deref *old_child = src_deref_tail->child;
+
+   if (glsl_type_is_vector_or_scalar(val->type)) {
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
+      load->variables[0] =
+         nir_deref_as_var(nir_copy_deref(load, &src_deref->deref));
+      load->num_components = glsl_get_vector_elements(val->type);
+      nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL);
+
+      nir_builder_instr_insert(&b->nb, &load->instr);
+
+      if (src_deref->var->data.mode == nir_var_uniform &&
+          glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) {
+         /* Uniform boolean loads need to be fixed up since they're defined
+          * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE.
+          */
+         val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0));
+      } else {
+         val->def = &load->dest.ssa;
+      }
+   } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY ||
+              glsl_type_is_matrix(val->type)) {
+      unsigned elems = glsl_get_length(val->type);
+      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+
+      nir_deref_array *deref = nir_deref_array_create(b);
+      deref->deref_array_type = nir_deref_array_type_direct;
+      deref->deref.type = glsl_get_array_element(val->type);
+      src_deref_tail->child = &deref->deref;
+      for (unsigned i = 0; i < elems; i++) {
+         deref->base_offset = i;
+         val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref);
+      }
+   } else {
+      assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT);
+      unsigned elems = glsl_get_length(val->type);
+      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+
+      nir_deref_struct *deref = nir_deref_struct_create(b, 0);
+      src_deref_tail->child = &deref->deref;
+      for (unsigned i = 0; i < elems; i++) {
+         deref->index = i;
+         deref->deref.type = glsl_get_struct_field(val->type, i);
+         val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref);
+      }
+   }
+
+   src_deref_tail->child = old_child;
+
+   return val;
+}
+
+static void
+_vtn_variable_store(struct vtn_builder *b, nir_deref_var *dest_deref,
+                    nir_deref *dest_deref_tail, struct vtn_ssa_value *src)
+{
+   nir_deref *old_child = dest_deref_tail->child;
+
+   if (glsl_type_is_vector_or_scalar(src->type)) {
+      nir_intrinsic_instr *store =
+         nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
+      store->variables[0] =
+         nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref));
+      store->src[0] = nir_src_for_ssa(src->def);
+
+      nir_builder_instr_insert(&b->nb, &store->instr);
+   } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY ||
+              glsl_type_is_matrix(src->type)) {
+      unsigned elems = glsl_get_length(src->type);
+
+      nir_deref_array *deref = nir_deref_array_create(b);
+      deref->deref_array_type = nir_deref_array_type_direct;
+      deref->deref.type = glsl_get_array_element(src->type);
+      dest_deref_tail->child = &deref->deref;
+      for (unsigned i = 0; i < elems; i++) {
+         deref->base_offset = i;
+         _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]);
+      }
+   } else {
+      assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT);
+      unsigned elems = glsl_get_length(src->type);
+
+      nir_deref_struct *deref = nir_deref_struct_create(b, 0);
+      dest_deref_tail->child = &deref->deref;
+      for (unsigned i = 0; i < elems; i++) {
+         deref->index = i;
+         deref->deref.type = glsl_get_struct_field(src->type, i);
+         _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]);
+      }
+   }
+
+   dest_deref_tail->child = old_child;
+}
+
+/*
+ * Gets the NIR-level deref tail, which may have as a child an array deref
+ * selecting which component due to OpAccessChain supporting per-component
+ * indexing in SPIR-V.
+ */
+
+static nir_deref *
+get_deref_tail(nir_deref_var *deref)
+{
+   nir_deref *cur = &deref->deref;
+   while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child)
+      cur = cur->child;
+
+   return cur;
+}
+
+static nir_ssa_def *vtn_vector_extract(struct vtn_builder *b,
+                                       nir_ssa_def *src, unsigned index);
+
+static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b,
+                                               nir_ssa_def *src,
+                                               nir_ssa_def *index);
+
+static struct vtn_ssa_value *
+vtn_variable_load(struct vtn_builder *b, nir_deref_var *src)
+{
+   nir_deref *src_tail = get_deref_tail(src);
+   struct vtn_ssa_value *val = _vtn_variable_load(b, src, src_tail);
+
+   if (src_tail->child) {
+      nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child);
+      assert(vec_deref->deref.child == NULL);
+      val->type = vec_deref->deref.type;
+      if (vec_deref->deref_array_type == nir_deref_array_type_direct)
+         val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset);
+      else
+         val->def = vtn_vector_extract_dynamic(b, val->def,
+                                               vec_deref->indirect.ssa);
+   }
+
+   return val;
+}
+
+static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b,
+                                       nir_ssa_def *src, nir_ssa_def *insert,
+                                       unsigned index);
+
+static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b,
+                                               nir_ssa_def *src,
+                                               nir_ssa_def *insert,
+                                               nir_ssa_def *index);
+static void
+vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
+                   nir_deref_var *dest)
+{
+   nir_deref *dest_tail = get_deref_tail(dest);
+   if (dest_tail->child) {
+      struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail);
+      nir_deref_array *deref = nir_deref_as_array(dest_tail->child);
+      assert(deref->deref.child == NULL);
+      if (deref->deref_array_type == nir_deref_array_type_direct)
+         val->def = vtn_vector_insert(b, val->def, src->def,
+                                      deref->base_offset);
+      else
+         val->def = vtn_vector_insert_dynamic(b, val->def, src->def,
+                                              deref->indirect.ssa);
+      _vtn_variable_store(b, dest, dest_tail, val);
+   } else {
+      _vtn_variable_store(b, dest, dest_tail, src);
+   }
+}
+
+static void
+vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src,
+                  nir_deref_var *dest)
+{
+   nir_deref *src_tail = get_deref_tail(src);
+
+   if (src_tail->child) {
+      assert(get_deref_tail(dest)->child);
+      struct vtn_ssa_value *val = vtn_variable_load(b, src);
+      vtn_variable_store(b, val, dest);
+   } else {
+      nir_intrinsic_instr *copy =
+         nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var);
+      copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref));
+      copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref));
+
+      nir_builder_instr_insert(&b->nb, &copy->instr);
+   }
+}
+
 static void
 vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
                      const uint32_t *w, unsigned count)
@@ -645,7 +914,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
                assert(idx_val->value_type == vtn_value_type_ssa);
                deref_arr->deref_array_type = nir_deref_array_type_indirect;
                deref_arr->base_offset = 0;
-               deref_arr->indirect = nir_src_for_ssa(vtn_ssa_value(b, w[1]));
+               deref_arr->indirect =
+                  nir_src_for_ssa(vtn_ssa_value(b, w[1])->def);
             }
             tail->child = &deref_arr->deref;
             break;
@@ -671,12 +941,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
       nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref;
       nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref;
 
-      nir_intrinsic_instr *copy =
-         nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var);
-      copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref));
-      copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref));
-
-      nir_builder_instr_insert(&b->nb, &copy->instr);
+      vtn_variable_copy(b, src, dest);
       break;
    }
 
@@ -689,61 +954,15 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
          return;
       }
 
-      assert(glsl_type_is_vector_or_scalar(src_type));
       struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
-
-      nir_intrinsic_instr *load =
-         nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
-      load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src->deref));
-      load->num_components = glsl_get_vector_elements(src_type);
-      nir_ssa_dest_init(&load->instr, &load->dest, load->num_components,
-                        val->name);
-
-      nir_builder_instr_insert(&b->nb, &load->instr);
-      val->type = src_type;
-
-      if (src->var->data.mode == nir_var_uniform &&
-          glsl_get_base_type(src_type) == GLSL_TYPE_BOOL) {
-         /* Uniform boolean loads need to be fixed up since they're defined
-          * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE.
-          */
-         val->ssa = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0));
-      } else {
-         val->ssa = &load->dest.ssa;
-      }
+      val->ssa = vtn_variable_load(b, src);
       break;
    }
 
    case SpvOpStore: {
       nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref;
-      const struct glsl_type *dest_type = nir_deref_tail(&dest->deref)->type;
-      struct vtn_value *src_val = vtn_untyped_value(b, w[2]);
-      if (src_val->value_type == vtn_value_type_ssa) {
-         assert(glsl_type_is_vector_or_scalar(dest_type));
-         nir_intrinsic_instr *store =
-            nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
-         store->src[0] = nir_src_for_ssa(src_val->ssa);
-         store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest->deref));
-         store->num_components = glsl_get_vector_elements(dest_type);
-
-         nir_builder_instr_insert(&b->nb, &store->instr);
-      } else {
-         assert(src_val->value_type == vtn_value_type_constant);
-
-         nir_variable *const_tmp = rzalloc(b->shader, nir_variable);
-         const_tmp->type = dest_type;
-         const_tmp->name = "const_temp";
-         const_tmp->data.mode = nir_var_local;
-         const_tmp->data.read_only = true;
-         exec_list_push_tail(&b->impl->locals, &const_tmp->node);
-
-         nir_intrinsic_instr *copy =
-            nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var);
-         copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref));
-         copy->variables[1] = nir_deref_var_create(copy, const_tmp);
-
-         nir_builder_instr_insert(&b->nb, &copy->instr);
-      }
+      struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]);
+      vtn_variable_store(b, src, dest);
       break;
    }
 
@@ -767,7 +986,7 @@ static nir_tex_src
 vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
 {
    nir_tex_src src;
-   src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa);
+   src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa->def);
    src.src_type = type;
    return src;
 }
@@ -804,13 +1023,16 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
    case SpvOpTextureGather:
    case SpvOpTextureGatherOffset:
    case SpvOpTextureGatherOffsets:
-   case SpvOpTextureQueryLod:
+   case SpvOpTextureQueryLod: {
       /* All these types have the coordinate as their first real argument */
-      coord_components = glsl_get_vector_elements(b->values[w[4]].type);
-      p->src = nir_src_for_ssa(vtn_ssa_value(b, w[4]));
+      struct vtn_ssa_value *coord = vtn_ssa_value(b, w[4]);
+      coord_components = glsl_get_vector_elements(coord->type);
+      p->src = nir_src_for_ssa(coord->def);
       p->src_type = nir_tex_src_coord;
       p++;
       break;
+   }
+
    default:
       break;
    }
@@ -877,16 +1099,270 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
    instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref));
 
    nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
-   val->ssa = &instr->dest.ssa;
+   val->ssa->def = &instr->dest.ssa;
+   val->ssa->type = val->type;
 
    nir_builder_instr_insert(&b->nb, &instr->instr);
 }
 
+static struct vtn_ssa_value *
+vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
+{
+   struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
+   val->type = type;
+   
+   if (!glsl_type_is_vector_or_scalar(type)) {
+      unsigned elems = glsl_get_length(type);
+      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+      for (unsigned i = 0; i < elems; i++) {
+         const struct glsl_type *child_type;
+
+         switch (glsl_get_base_type(type)) {
+         case GLSL_TYPE_INT:
+         case GLSL_TYPE_UINT:
+         case GLSL_TYPE_BOOL:
+         case GLSL_TYPE_FLOAT:
+         case GLSL_TYPE_DOUBLE:
+            child_type = glsl_get_column_type(type);
+            break;
+         case GLSL_TYPE_ARRAY:
+            child_type = glsl_get_array_element(type);
+            break;
+         case GLSL_TYPE_STRUCT:
+            child_type = glsl_get_struct_field(type, i);
+            break;
+         default:
+            unreachable("unkown base type");
+         }
+
+         val->elems[i] = vtn_create_ssa_value(b, child_type);
+      }
+   }
+
+   return val;
+}
+
+static nir_alu_instr *
+create_vec(void *mem_ctx, unsigned num_components)
+{
+   nir_op op;
+   switch (num_components) {
+   case 1: op = nir_op_fmov; break;
+   case 2: op = nir_op_vec2; break;
+   case 3: op = nir_op_vec3; break;
+   case 4: op = nir_op_vec4; break;
+   default: unreachable("bad vector size");
+   }
+
+   nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op);
+   nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
+
+   return vec;
+}
+
+static struct vtn_ssa_value *
+vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
+{
+   if (src->transposed)
+      return src->transposed;
+
+   struct vtn_ssa_value *dest =
+      vtn_create_ssa_value(b, glsl_transposed_type(src->type));
+
+   for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
+      nir_alu_instr *vec = create_vec(b, glsl_get_matrix_columns(src->type));
+      if (glsl_type_is_vector_or_scalar(src->type)) {
+          vec->src[0].src = nir_src_for_ssa(src->def);
+          vec->src[0].swizzle[0] = i;
+      } else {
+         for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) {
+            vec->src[j].src = nir_src_for_ssa(src->elems[j]->def);
+            vec->src[j].swizzle[0] = i;
+         }
+      }
+      nir_builder_instr_insert(&b->nb, &vec->instr);
+      dest->elems[i]->def = &vec->dest.dest.ssa;
+   }
+
+   dest->transposed = src;
+
+   return dest;
+}
+
+/*
+ * Normally, column vectors in SPIR-V correspond to a single NIR SSA
+ * definition. But for matrix multiplies, we want to do one routine for
+ * multiplying a matrix by a matrix and then pretend that vectors are matrices
+ * with one column. So we "wrap" these things, and unwrap the result before we
+ * send it off.
+ */
+
+static struct vtn_ssa_value *
+vtn_wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val)
+{
+   if (val == NULL)
+      return NULL;
+
+   if (glsl_type_is_matrix(val->type))
+      return val;
+
+   struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value);
+   dest->type = val->type;
+   dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1);
+   dest->elems[0] = val;
+
+   return dest;
+}
+
+static struct vtn_ssa_value *
+vtn_unwrap_matrix(struct vtn_ssa_value *val)
+{
+   if (glsl_type_is_matrix(val->type))
+         return val;
+
+   return val->elems[0];
+}
+
+static struct vtn_ssa_value *
+vtn_matrix_multiply(struct vtn_builder *b,
+                    struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1)
+{
+
+   struct vtn_ssa_value *src0 = vtn_wrap_matrix(b, _src0);
+   struct vtn_ssa_value *src1 = vtn_wrap_matrix(b, _src1);
+   struct vtn_ssa_value *src0_transpose = vtn_wrap_matrix(b, _src0->transposed);
+   struct vtn_ssa_value *src1_transpose = vtn_wrap_matrix(b, _src1->transposed);
+
+   unsigned src0_rows = glsl_get_vector_elements(src0->type);
+   unsigned src0_columns = glsl_get_matrix_columns(src0->type);
+   unsigned src1_columns = glsl_get_matrix_columns(src1->type);
+
+   struct vtn_ssa_value *dest =
+      vtn_create_ssa_value(b, glsl_matrix_type(glsl_get_base_type(src0->type),
+                                               src0_rows, src1_columns));
+
+   dest = vtn_wrap_matrix(b, dest);
+
+   bool transpose_result = false;
+   if (src0_transpose && src1_transpose) {
+      /* transpose(A) * transpose(B) = transpose(B * A) */
+      src1 = src0_transpose;
+      src0 = src1_transpose;
+      src0_transpose = NULL;
+      src1_transpose = NULL;
+      transpose_result = true;
+   }
+
+   if (src0_transpose && !src1_transpose &&
+       glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) {
+      /* We already have the rows of src0 and the columns of src1 available,
+       * so we can just take the dot product of each row with each column to
+       * get the result.
+       */
+
+      for (unsigned i = 0; i < src1_columns; i++) {
+         nir_alu_instr *vec = create_vec(b, src0_rows);
+         for (unsigned j = 0; j < src0_rows; j++) {
+            vec->src[j].src =
+               nir_src_for_ssa(nir_fdot(&b->nb, src0_transpose->elems[j]->def,
+                                        src1->elems[i]->def));
+         }
+
+         nir_builder_instr_insert(&b->nb, &vec->instr);
+         dest->elems[i]->def = &vec->dest.dest.ssa;
+      }
+   } else {
+      /* We don't handle the case where src1 is transposed but not src0, since
+       * the general case only uses individual components of src1 so the
+       * optimizer should chew through the transpose we emitted for src1.
+       */
+
+      for (unsigned i = 0; i < src1_columns; i++) {
+         /* dest[i] = sum(src0[j] * src1[i][j] for all j) */
+         dest->elems[i]->def =
+            nir_fmul(&b->nb, src0->elems[0]->def,
+                     vtn_vector_extract(b, src1->elems[i]->def, 0));
+         for (unsigned j = 1; j < src0_columns; j++) {
+            dest->elems[i]->def =
+               nir_fadd(&b->nb, dest->elems[i]->def,
+                        nir_fmul(&b->nb, src0->elems[j]->def,
+                                 vtn_vector_extract(b,
+                                                    src1->elems[i]->def, j)));
+         }
+      }
+   }
+   
+   dest = vtn_unwrap_matrix(dest);
+
+   if (transpose_result)
+      dest = vtn_transpose(b, dest);
+
+   return dest;
+}
+
+static struct vtn_ssa_value *
+vtn_mat_times_scalar(struct vtn_builder *b,
+                     struct vtn_ssa_value *mat,
+                     nir_ssa_def *scalar)
+{
+   struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type);
+   for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) {
+      if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT)
+         dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar);
+      else
+         dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar);
+   }
+
+   return dest;
+}
+
 static void
 vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode,
                       const uint32_t *w, unsigned count)
 {
-   unreachable("Matrix math not handled");
+   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+   val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
+
+   switch (opcode) {
+   case SpvOpTranspose: {
+      struct vtn_ssa_value *src = vtn_ssa_value(b, w[3]);
+      val->ssa = vtn_transpose(b, src);
+      break;
+   }
+
+   case SpvOpOuterProduct: {
+      struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]);
+      struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]);
+
+      val->ssa = vtn_matrix_multiply(b, src0, vtn_transpose(b, src1));
+      break;
+   }
+
+   case SpvOpMatrixTimesScalar: {
+      struct vtn_ssa_value *mat = vtn_ssa_value(b, w[3]);
+      struct vtn_ssa_value *scalar = vtn_ssa_value(b, w[4]);
+
+      if (mat->transposed) {
+         val->ssa = vtn_transpose(b, vtn_mat_times_scalar(b, mat->transposed,
+                                                          scalar->def));
+      } else {
+         val->ssa = vtn_mat_times_scalar(b, mat, scalar->def);
+      }
+      break;
+   }
+
+   case SpvOpVectorTimesMatrix:
+   case SpvOpMatrixTimesVector:
+   case SpvOpMatrixTimesMatrix: {
+      struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]);
+      struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]);
+
+      val->ssa = vtn_matrix_multiply(b, src0, src1);
+      break;
+   }
+
+   default: unreachable("unknown matrix opcode");
+   }
 }
 
 static void
@@ -895,12 +1371,13 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
 {
    struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
    val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
+   val->ssa = vtn_create_ssa_value(b, val->type);
 
    /* Collect the various SSA sources */
    unsigned num_inputs = count - 3;
    nir_ssa_def *src[4];
    for (unsigned i = 0; i < num_inputs; i++)
-      src[i] = vtn_ssa_value(b, w[i + 3]);
+      src[i] = vtn_ssa_value(b, w[i + 3])->def;
 
    /* Indicates that the first two arguments should be swapped.  This is
     * used for implementing greater-than and less-than-or-equal.
@@ -1012,24 +1489,24 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
    case SpvOpDPdxCoarse:   op = nir_op_fddx_coarse;   break;
    case SpvOpDPdyCoarse:   op = nir_op_fddy_coarse;   break;
    case SpvOpFwidth:
-      val->ssa = nir_fadd(&b->nb,
-                          nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])),
-                          nir_fabs(&b->nb, nir_fddx(&b->nb, src[1])));
+      val->ssa->def = nir_fadd(&b->nb,
+                               nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])),
+                               nir_fabs(&b->nb, nir_fddx(&b->nb, src[1])));
       return;
    case SpvOpFwidthFine:
-      val->ssa = nir_fadd(&b->nb,
-                          nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])),
-                          nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1])));
+      val->ssa->def = nir_fadd(&b->nb,
+                               nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])),
+                               nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1])));
       return;
    case SpvOpFwidthCoarse:
-      val->ssa = nir_fadd(&b->nb,
-                          nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])),
-                          nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1])));
+      val->ssa->def = nir_fadd(&b->nb,
+                               nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])),
+                               nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1])));
       return;
 
    case SpvOpVectorTimesScalar:
       /* The builder will take care of splatting for us. */
-      val->ssa = nir_fmul(&b->nb, src[0], src[1]);
+      val->ssa->def = nir_fmul(&b->nb, src[0], src[1]);
       return;
 
    case SpvOpSRem:
@@ -1057,7 +1534,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
    nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
    nir_ssa_dest_init(&instr->instr, &instr->dest.dest,
                      glsl_get_vector_elements(val->type), val->name);
-   val->ssa = &instr->dest.dest.ssa;
+   val->ssa->def = &instr->dest.dest.ssa;
 
    for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++)
       instr->src[i].src = nir_src_for_ssa(src[i]);
@@ -1065,6 +1542,350 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
    nir_builder_instr_insert(&b->nb, &instr->instr);
 }
 
+static nir_ssa_def *
+vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index)
+{
+   unsigned swiz[4] = { index };
+   return nir_swizzle(&b->nb, src, swiz, 1, true);
+}
+
+
+static nir_ssa_def *
+vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert,
+                  unsigned index)
+{
+   nir_alu_instr *vec = create_vec(b->shader, src->num_components);
+
+   for (unsigned i = 0; i < src->num_components; i++) {
+      if (i == index) {
+         vec->src[i].src = nir_src_for_ssa(insert);
+      } else {
+         vec->src[i].src = nir_src_for_ssa(src);
+         vec->src[i].swizzle[0] = i;
+      }
+   }
+
+   nir_builder_instr_insert(&b->nb, &vec->instr);
+
+   return &vec->dest.dest.ssa;
+}
+
+static nir_ssa_def *
+vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
+                           nir_ssa_def *index)
+{
+   nir_ssa_def *dest = vtn_vector_extract(b, src, 0);
+   for (unsigned i = 1; i < src->num_components; i++)
+      dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
+                       vtn_vector_extract(b, src, i), dest);
+
+   return dest;
+}
+
+static nir_ssa_def *
+vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src,
+                          nir_ssa_def *insert, nir_ssa_def *index)
+{
+   nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0);
+   for (unsigned i = 1; i < src->num_components; i++)
+      dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
+                       vtn_vector_insert(b, src, insert, i), dest);
+
+   return dest;
+}
+
+static nir_ssa_def *
+vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
+                   nir_ssa_def *src0, nir_ssa_def *src1,
+                   const uint32_t *indices)
+{
+   nir_alu_instr *vec = create_vec(b->shader, num_components);
+
+   nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1);
+   nir_builder_instr_insert(&b->nb, &undef->instr);
+
+   for (unsigned i = 0; i < num_components; i++) {
+      uint32_t index = indices[i];
+      if (index == 0xffffffff) {
+         vec->src[i].src = nir_src_for_ssa(&undef->def);
+      } else if (index < src0->num_components) {
+         vec->src[i].src = nir_src_for_ssa(src0);
+         vec->src[i].swizzle[0] = index;
+      } else {
+         vec->src[i].src = nir_src_for_ssa(src1);
+         vec->src[i].swizzle[0] = index - src0->num_components;
+      }
+   }
+
+   nir_builder_instr_insert(&b->nb, &vec->instr);
+
+   return &vec->dest.dest.ssa;
+}
+
+/*
+ * Concatentates a number of vectors/scalars together to produce a vector
+ */
+static nir_ssa_def *
+vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
+                     unsigned num_srcs, nir_ssa_def **srcs)
+{
+   nir_alu_instr *vec = create_vec(b->shader, num_components);
+
+   unsigned dest_idx = 0;
+   for (unsigned i = 0; i < num_srcs; i++) {
+      nir_ssa_def *src = srcs[i];
+      for (unsigned j = 0; j < src->num_components; j++) {
+         vec->src[dest_idx].src = nir_src_for_ssa(src);
+         vec->src[dest_idx].swizzle[0] = j;
+         dest_idx++;
+      }
+   }
+
+   nir_builder_instr_insert(&b->nb, &vec->instr);
+
+   return &vec->dest.dest.ssa;
+}
+
+static struct vtn_ssa_value *
+vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src)
+{
+   struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value);
+   dest->type = src->type;
+
+   if (glsl_type_is_vector_or_scalar(src->type)) {
+      dest->def = src->def;
+   } else {
+      unsigned elems = glsl_get_length(src->type);
+
+      dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems);
+      for (unsigned i = 0; i < elems; i++)
+         dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]);
+   }
+
+   return dest;
+}
+
+static struct vtn_ssa_value *
+vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
+                     struct vtn_ssa_value *insert, const uint32_t *indices,
+                     unsigned num_indices)
+{
+   struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
+
+   struct vtn_ssa_value *cur = dest;
+   unsigned i;
+   for (i = 0; i < num_indices - 1; i++) {
+      cur = cur->elems[indices[i]];
+   }
+
+   if (glsl_type_is_vector_or_scalar(cur->type)) {
+      /* According to the SPIR-V spec, OpCompositeInsert may work down to
+       * the component granularity. In that case, the last index will be
+       * the index to insert the scalar into the vector.
+       */
+
+      cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]);
+   } else {
+      cur->elems[indices[i]] = insert;
+   }
+
+   return dest;
+}
+
+static struct vtn_ssa_value *
+vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
+                      const uint32_t *indices, unsigned num_indices)
+{
+   struct vtn_ssa_value *cur = src;
+   for (unsigned i = 0; i < num_indices; i++) {
+      if (glsl_type_is_vector_or_scalar(cur->type)) {
+         assert(i == num_indices - 1);
+         /* According to the SPIR-V spec, OpCompositeExtract may work down to
+          * the component granularity. The last index will be the index of the
+          * vector to extract.
+          */
+
+         struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value);
+         ret->type = glsl_scalar_type(glsl_get_base_type(cur->type));
+         ret->def = vtn_vector_extract(b, cur->def, indices[i]);
+         return ret;
+      }
+   }
+
+   return cur;
+}
+
+static void
+vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
+                     const uint32_t *w, unsigned count)
+{
+   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+   val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
+
+   switch (opcode) {
+   case SpvOpVectorExtractDynamic:
+      val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def,
+                                                 vtn_ssa_value(b, w[4])->def);
+      break;
+
+   case SpvOpVectorInsertDynamic:
+      val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def,
+                                                vtn_ssa_value(b, w[4])->def,
+                                                vtn_ssa_value(b, w[5])->def);
+      break;
+
+   case SpvOpVectorShuffle:
+      val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(val->type),
+                                         vtn_ssa_value(b, w[3])->def,
+                                         vtn_ssa_value(b, w[4])->def,
+                                         w + 5);
+      break;
+
+   case SpvOpCompositeConstruct: {
+      val->ssa = rzalloc(b, struct vtn_ssa_value);
+      unsigned elems = count - 3;
+      if (glsl_type_is_vector_or_scalar(val->type)) {
+         nir_ssa_def *srcs[4];
+         for (unsigned i = 0; i < elems; i++)
+            srcs[i] = vtn_ssa_value(b, w[3 + i])->def;
+         val->ssa->def =
+            vtn_vector_construct(b, glsl_get_vector_elements(val->type),
+                                 elems, srcs);
+      } else {
+         val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+         for (unsigned i = 0; i < elems; i++)
+            val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
+      }
+      break;
+   }
+   case SpvOpCompositeExtract:
+      val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
+                                       w + 4, count - 4);
+      break;
+
+   case SpvOpCompositeInsert:
+      val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
+                                      vtn_ssa_value(b, w[3]),
+                                      w + 5, count - 5);
+      break;
+
+   case SpvOpCopyObject:
+      val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
+      break;
+
+   default:
+      unreachable("unknown composite operation");
+   }
+
+   val->ssa->type = val->type;
+}
+
+static void
+vtn_phi_node_init(struct vtn_builder *b, struct vtn_ssa_value *val)
+{
+   if (glsl_type_is_vector_or_scalar(val->type)) {
+      nir_phi_instr *phi = nir_phi_instr_create(b->shader);
+      nir_ssa_dest_init(&phi->instr, &phi->dest,
+                        glsl_get_vector_elements(val->type), NULL);
+      exec_list_make_empty(&phi->srcs);
+      nir_builder_instr_insert(&b->nb, &phi->instr);
+      val->def = &phi->dest.ssa;
+   } else {
+      unsigned elems = glsl_get_length(val->type);
+      for (unsigned i = 0; i < elems; i++)
+         vtn_phi_node_init(b, val->elems[i]);
+   }
+}
+
+static struct vtn_ssa_value *
+vtn_phi_node_create(struct vtn_builder *b, const struct glsl_type *type)
+{
+   struct vtn_ssa_value *val = vtn_create_ssa_value(b, type);
+   vtn_phi_node_init(b, val);
+   return val;
+}
+
+static void
+vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w)
+{
+   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+   val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
+   val->ssa = vtn_phi_node_create(b, val->type);
+}
+
+static void
+vtn_phi_node_add_src(struct vtn_ssa_value *phi, const nir_block *pred,
+                     struct vtn_ssa_value *val)
+{
+   assert(phi->type == val->type);
+   if (glsl_type_is_vector_or_scalar(phi->type)) {
+      nir_phi_instr *phi_instr = nir_instr_as_phi(phi->def->parent_instr);
+      nir_phi_src *src = ralloc(phi_instr, nir_phi_src);
+      src->pred = (nir_block *) pred;
+      src->src = nir_src_for_ssa(val->def);
+      exec_list_push_tail(&phi_instr->srcs, &src->node);
+   } else {
+      unsigned elems = glsl_get_length(phi->type);
+      for (unsigned i = 0; i < elems; i++)
+         vtn_phi_node_add_src(phi->elems[i], pred, val->elems[i]);
+   }
+}
+
+static struct vtn_ssa_value *
+vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block,
+                     const struct glsl_type *type, const uint32_t *w,
+                     unsigned count)
+{
+   struct hash_entry *entry = _mesa_hash_table_search(b->block_table, block);
+   if (entry) {
+      struct vtn_block *spv_block = entry->data;
+      for (unsigned off = 4; off < count; off += 2) {
+         if (spv_block == vtn_value(b, w[off], vtn_value_type_block)->block) {
+            return vtn_ssa_value(b, w[off - 1]);
+         }
+      }
+   }
+
+   nir_builder_insert_before_block(&b->nb, block);
+   struct vtn_ssa_value *phi = vtn_phi_node_create(b, type);
+
+   struct set_entry *entry2;
+   set_foreach(block->predecessors, entry2) {
+      nir_block *pred = (nir_block *) entry2->key;
+      struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, type, w,
+                                                       count);
+      vtn_phi_node_add_src(phi, pred, val);
+   }
+
+   return phi;
+}
+
+static bool
+vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode,
+                           const uint32_t *w, unsigned count)
+{
+   if (opcode == SpvOpLabel) {
+      b->block = vtn_value(b, w[1], vtn_value_type_block)->block;
+      return true;
+   }
+
+   if (opcode != SpvOpPhi)
+      return true;
+
+   struct vtn_ssa_value *phi = vtn_value(b, w[2], vtn_value_type_ssa)->ssa;
+
+   struct set_entry *entry;
+   set_foreach(b->block->block->predecessors, entry) {
+      nir_block *pred = (nir_block *) entry->key;
+
+      struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, phi->type, w,
+                                                       count);
+      vtn_phi_node_add_src(phi, pred, val);
+   }
+
+   return true;
+}
+
 static bool
 vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
                                 const uint32_t *w, unsigned count)
@@ -1214,6 +2035,7 @@ vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode,
    }
 
    case SpvOpFunctionEnd:
+      b->func->end = w;
       b->func = NULL;
       break;
 
@@ -1432,6 +2254,20 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
       vtn_handle_matrix_alu(b, opcode, w, count);
       break;
 
+   case SpvOpVectorExtractDynamic:
+   case SpvOpVectorInsertDynamic:
+   case SpvOpVectorShuffle:
+   case SpvOpCompositeConstruct:
+   case SpvOpCompositeExtract:
+   case SpvOpCompositeInsert:
+   case SpvOpCopyObject:
+      vtn_handle_composite(b, opcode, w, count);
+      break;
+
+   case SpvOpPhi:
+      vtn_handle_phi_first_pass(b, w);
+      break;
+
    default:
       unreachable("Unhandled opcode");
    }
@@ -1446,20 +2282,11 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
 {
    struct vtn_block *block = start;
    while (block != end_block) {
-      if (block->block != NULL) {
-         /* We've already visited this block once before so this is a
-          * back-edge.  Back-edges are only allowed to point to a loop
-          * merge.
-          */
-         assert(block == cont_block);
-         return;
-      }
-
       if (block->merge_op == SpvOpLoopMerge) {
          /* This is the jump into a loop. */
-         cont_block = block;
-         break_block = vtn_value(b, block->merge_block_id,
-                                 vtn_value_type_block)->block;
+         struct vtn_block *new_cont_block = block;
+         struct vtn_block *new_break_block =
+            vtn_value(b, block->merge_block_id, vtn_value_type_block)->block;
 
          nir_loop *loop = nir_loop_create(b->shader);
          nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node);
@@ -1470,10 +2297,10 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
          block->merge_op = SpvOpNop;
 
          nir_builder_insert_after_cf_list(&b->nb, &loop->body);
-         vtn_walk_blocks(b, block, break_block, cont_block, NULL);
+         vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL);
 
          nir_builder_insert_after_cf_list(&b->nb, old_list);
-         block = break_block;
+         block = new_break_block;
          continue;
       }
 
@@ -1484,6 +2311,12 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
       vtn_foreach_instruction(b, block->label, block->branch,
                               vtn_handle_body_instruction);
 
+      nir_cf_node *cur_cf_node =
+         exec_node_data(nir_cf_node, exec_list_get_tail(b->nb.cf_node_list),
+                        node);
+      nir_block *cur_block = nir_cf_node_as_block(cur_cf_node);
+      _mesa_hash_table_insert(b->block_table, cur_block, block);
+
       switch (branch_op) {
       case SpvOpBranch: {
          struct vtn_block *branch_block =
@@ -1502,8 +2335,16 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
 
             return;
          } else if (branch_block == end_block) {
+            /* We're branching to the merge block of an if, since for loops
+             * and functions end_block == NULL, so we're done here.
+             */
             return;
          } else {
+            /* We're branching to another block, and according to the rules,
+             * we can only branch to another block with one predecessor (so
+             * we're the only one jumping to it) so we can just process it
+             * next.
+             */
             block = branch_block;
             continue;
          }
@@ -1517,7 +2358,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
             vtn_value(b, w[3], vtn_value_type_block)->block;
 
          nir_if *if_stmt = nir_if_create(b->shader);
-         if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1]));
+         if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def);
          nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node);
 
          if (then_block == break_block) {
@@ -1545,7 +2386,10 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
                                            &jump->instr);
             block = then_block;
          } else {
-            /* Conventional if statement */
+            /* According to the rules we're branching to two blocks that don't
+             * have any other predecessors, so we can handle this as a
+             * conventional if.
+             */
             assert(block->merge_op == SpvOpSelectionMerge);
             struct vtn_block *merge_block =
                vtn_value(b, block->merge_block_id, vtn_value_type_block)->block;
@@ -1652,9 +2496,15 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
 
    foreach_list_typed(struct vtn_function, func, node, &b->functions) {
       b->impl = nir_function_impl_create(func->overload);
+      b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
+                                               _mesa_key_pointer_equal);
+      b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
+                                               _mesa_key_pointer_equal);
       nir_builder_init(&b->nb, b->impl);
       nir_builder_insert_after_cf_list(&b->nb, &b->impl->body);
       vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL);
+      vtn_foreach_instruction(b, func->start_block->label, func->end,
+                              vtn_handle_phi_second_pass);
    }
 
    ralloc_free(b);
diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h
index d2b364bdfeb..b157e023a68 100644
--- a/src/glsl/nir/spirv_to_nir_private.h
+++ b/src/glsl/nir/spirv_to_nir_private.h
@@ -60,11 +60,27 @@ struct vtn_function {
 
    nir_function_overload *overload;
    struct vtn_block *start_block;
+
+   const uint32_t *end;
 };
 
 typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t,
                                         const uint32_t *, unsigned);
 
+struct vtn_ssa_value {
+   union {
+      nir_ssa_def *def;
+      struct vtn_ssa_value **elems;
+   };
+
+   /* For matrices, a transposed version of the value, or NULL if it hasn't
+    * been computed
+    */
+   struct vtn_ssa_value *transposed;
+
+   const struct glsl_type *type;
+};
+
 struct vtn_value {
    enum vtn_value_type value_type;
    const char *name;
@@ -77,7 +93,7 @@ struct vtn_value {
       nir_deref_var *deref;
       struct vtn_function *func;
       struct vtn_block *block;
-      nir_ssa_def *ssa;
+      struct vtn_ssa_value *ssa;
       vtn_instruction_handler ext_handler;
    };
 };
@@ -96,6 +112,20 @@ struct vtn_builder {
    nir_function_impl *impl;
    struct vtn_block *block;
 
+   /*
+    * In SPIR-V, constants are global, whereas in NIR, the load_const
+    * instruction we use is per-function. So while we parse each function, we
+    * keep a hash table of constants we've resolved to nir_ssa_value's so
+    * far, and we lazily resolve them when we see them used in a function.
+    */
+   struct hash_table *const_table;
+
+   /*
+    * Map from nir_block to the vtn_block which ends with it -- used for
+    * handling phi nodes.
+    */
+   struct hash_table *block_table;
+
    unsigned value_id_bound;
    struct vtn_value *values;
 
@@ -134,7 +164,7 @@ vtn_value(struct vtn_builder *b, uint32_t value_id,
    return val;
 }
 
-nir_ssa_def *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id);
+struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id);
 
 typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *,
                                           struct vtn_value *,