ir3: Reformat source with clang-format

Generated using: cd src/freedreno/ir3 && clang-format -i {**,.}/*.c {**,.}/*.h -style=file Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11801>
2021-07-09 14:50:05 +02:00 · 2021-07-09 14:50:05 +02:00 · 177138d8cb
parent 082871bb35
commit 177138d8cb
52 changed files with 18722 additions and 18389 deletions
--- a/src/freedreno/ir3/disasm-a3xx.c
+++ b/src/freedreno/ir3/disasm-a3xx.c
@ -21,15 +21,15 @@
 * SOFTWARE.
 */

+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
 #include <string.h>
-#include <assert.h>

-#include <util/u_debug.h>
 #include <util/log.h>
+#include <util/u_debug.h>

 #include "isa/isa.h"

@ -39,125 +39,120 @@
 static enum debug_t debug;

 static const char *levels[] = {
-		"",
-		"\t",
-		"\t\t",
-		"\t\t\t",
-		"\t\t\t\t",
-		"\t\t\t\t\t",
-		"\t\t\t\t\t\t",
-		"\t\t\t\t\t\t\t",
-		"\t\t\t\t\t\t\t\t",
-		"\t\t\t\t\t\t\t\t\t",
-		"x",
-		"x",
-		"x",
-		"x",
-		"x",
-		"x",
+   "",
+   "\t",
+   "\t\t",
+   "\t\t\t",
+   "\t\t\t\t",
+   "\t\t\t\t\t",
+   "\t\t\t\t\t\t",
+   "\t\t\t\t\t\t\t",
+   "\t\t\t\t\t\t\t\t",
+   "\t\t\t\t\t\t\t\t\t",
+   "x",
+   "x",
+   "x",
+   "x",
+   "x",
+   "x",
 };

 struct disasm_ctx {
-	FILE *out;
-	struct isa_decode_options *options;
-	unsigned level;
-	unsigned extra_cycles;
+   FILE *out;
+   struct isa_decode_options *options;
+   unsigned level;
+   unsigned extra_cycles;

-	/**
-	 * nop_count/has_end used to detect the real end of shader.  Since
-	 * in some cases there can be a epilogue following an `end` we look
-	 * for a sequence of `nop`s following the `end`
-	 */
-	int nop_count;      /* number of nop's since non-nop instruction: */
-	bool has_end;       /* have we seen end instruction */
+   /**
+    * nop_count/has_end used to detect the real end of shader.  Since
+    * in some cases there can be a epilogue following an `end` we look
+    * for a sequence of `nop`s following the `end`
+    */
+   int nop_count; /* number of nop's since non-nop instruction: */
+   bool has_end;  /* have we seen end instruction */

-	int cur_n;          /* current instr # */
-	int cur_opc_cat;    /* current opc_cat */
+   int cur_n;       /* current instr # */
+   int cur_opc_cat; /* current opc_cat */

-	int sfu_delay;
+   int sfu_delay;

-	/**
-	 * State accumulated decoding fields of the current instruction,
-	 * handled after decoding is complete (ie. at start of next instr)
-	 */
-	struct {
-		bool ss;
-		uint8_t nop;
-		uint8_t repeat;
-	} last;
+   /**
+    * State accumulated decoding fields of the current instruction,
+    * handled after decoding is complete (ie. at start of next instr)
+    */
+   struct {
+      bool ss;
+      uint8_t nop;
+      uint8_t repeat;
+   } last;

-	/**
-	 * State accumulated decoding fields of src or dst register
-	 */
-	struct {
-		bool half;
-		bool r;
-		enum {
-			FILE_GPR = 1,
-			FILE_CONST = 2,
-		} file;
-		unsigned num;
-	} reg;
+   /**
+    * State accumulated decoding fields of src or dst register
+    */
+   struct {
+      bool half;
+      bool r;
+      enum {
+         FILE_GPR = 1,
+         FILE_CONST = 2,
+      } file;
+      unsigned num;
+   } reg;

-	struct shader_stats *stats;
+   struct shader_stats *stats;
 };

-static void print_stats(struct disasm_ctx *ctx)
+static void
+print_stats(struct disasm_ctx *ctx)
 {
-	if (ctx->options->gpu_id >= 600) {
-		/* handle MERGEREGS case.. this isn't *entirely* accurate, as
-		 * you can have shader stages not using merged register file,
-		 * but it is good enough for a guestimate:
-		 */
-		unsigned n = (ctx->stats->halfreg + 1) / 2;
+   if (ctx->options->gpu_id >= 600) {
+      /* handle MERGEREGS case.. this isn't *entirely* accurate, as
+       * you can have shader stages not using merged register file,
+       * but it is good enough for a guestimate:
+       */
+      unsigned n = (ctx->stats->halfreg + 1) / 2;

-		ctx->stats->halfreg = 0;
-		ctx->stats->fullreg = MAX2(ctx->stats->fullreg, n);
-	}
+      ctx->stats->halfreg = 0;
+      ctx->stats->fullreg = MAX2(ctx->stats->fullreg, n);
+   }

-	unsigned instructions = ctx->cur_n + ctx->extra_cycles + 1;
+   unsigned instructions = ctx->cur_n + ctx->extra_cycles + 1;

-	fprintf(ctx->out, "%sStats:\n", levels[ctx->level]);
-	fprintf(ctx->out, "%s- shaderdb: %u instr, %u nops, %u non-nops, %u mov, %u cov\n",
-			levels[ctx->level],
-			instructions,
-			ctx->stats->nops,
-			instructions - ctx->stats->nops,
-			ctx->stats->mov_count,
-			ctx->stats->cov_count);
+   fprintf(ctx->out, "%sStats:\n", levels[ctx->level]);
+   fprintf(ctx->out,
+           "%s- shaderdb: %u instr, %u nops, %u non-nops, %u mov, %u cov\n",
+           levels[ctx->level], instructions, ctx->stats->nops,
+           instructions - ctx->stats->nops, ctx->stats->mov_count,
+           ctx->stats->cov_count);

-	fprintf(ctx->out, "%s- shaderdb: %u last-baryf, %d half, %d full, %u constlen\n",
-			levels[ctx->level],
-			ctx->stats->last_baryf,
-			DIV_ROUND_UP(ctx->stats->halfreg, 4),
-			DIV_ROUND_UP(ctx->stats->fullreg, 4),
-			DIV_ROUND_UP(ctx->stats->constlen, 4));
+   fprintf(ctx->out,
+           "%s- shaderdb: %u last-baryf, %d half, %d full, %u constlen\n",
+           levels[ctx->level], ctx->stats->last_baryf,
+           DIV_ROUND_UP(ctx->stats->halfreg, 4),
+           DIV_ROUND_UP(ctx->stats->fullreg, 4),
+           DIV_ROUND_UP(ctx->stats->constlen, 4));

-	fprintf(ctx->out, "%s- shaderdb: %u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7\n",
-			levels[ctx->level],
-			ctx->stats->instrs_per_cat[0],
-			ctx->stats->instrs_per_cat[1],
-			ctx->stats->instrs_per_cat[2],
-			ctx->stats->instrs_per_cat[3],
-			ctx->stats->instrs_per_cat[4],
-			ctx->stats->instrs_per_cat[5],
-			ctx->stats->instrs_per_cat[6],
-			ctx->stats->instrs_per_cat[7]);
+   fprintf(
+      ctx->out,
+      "%s- shaderdb: %u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7\n",
+      levels[ctx->level], ctx->stats->instrs_per_cat[0],
+      ctx->stats->instrs_per_cat[1], ctx->stats->instrs_per_cat[2],
+      ctx->stats->instrs_per_cat[3], ctx->stats->instrs_per_cat[4],
+      ctx->stats->instrs_per_cat[5], ctx->stats->instrs_per_cat[6],
+      ctx->stats->instrs_per_cat[7]);

-	fprintf(ctx->out, "%s- shaderdb: %u sstall, %u (ss), %u (sy)\n",
-			levels[ctx->level],
-			ctx->stats->sstall,
-			ctx->stats->ss,
-			ctx->stats->sy);
+   fprintf(ctx->out, "%s- shaderdb: %u sstall, %u (ss), %u (sy)\n",
+           levels[ctx->level], ctx->stats->sstall, ctx->stats->ss,
+           ctx->stats->sy);
 }

 /* size of largest OPC field of all the instruction categories: */
 #define NOPC_BITS 6

 static const struct opc_info {
-	const char *name;
-} opcs[1 << (3+NOPC_BITS)] = {
-#define OPC(cat, opc, name) [(opc)] = { #name }
+   const char *name;
+} opcs[1 << (3 + NOPC_BITS)] = {
+#define OPC(cat, opc, name) [(opc)] = {#name}
   /* clang-format off */
   /* category 0: */
   OPC(0, OPC_NOP,          nop),
@ -359,96 +354,96 @@ static const struct opc_info {
 #undef OPC
 };

-#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))
+#define GETINFO(instr)                                                         \
+   (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))

-const char *disasm_a3xx_instr_name(opc_t opc)
+const char *
+disasm_a3xx_instr_name(opc_t opc)
 {
-	if (opc_cat(opc) == -1) return "??meta??";
-	return opcs[opc].name;
+   if (opc_cat(opc) == -1)
+      return "??meta??";
+   return opcs[opc].name;
 }

-
 static void
 disasm_field_cb(void *d, const char *field_name, struct isa_decode_value *val)
 {
-	struct disasm_ctx *ctx = d;
+   struct disasm_ctx *ctx = d;

-	if (!strcmp(field_name, "NAME")) {
-		if (!strcmp("nop", val->str)) {
-			if (ctx->has_end) {
-				ctx->nop_count++;
-				if (ctx->nop_count > 3) {
-					ctx->options->stop = true;
-				}
-			}
-			ctx->stats->nops += 1 + ctx->last.repeat;
-		} else {
-			ctx->nop_count = 0;
-		}
+   if (!strcmp(field_name, "NAME")) {
+      if (!strcmp("nop", val->str)) {
+         if (ctx->has_end) {
+            ctx->nop_count++;
+            if (ctx->nop_count > 3) {
+               ctx->options->stop = true;
+            }
+         }
+         ctx->stats->nops += 1 + ctx->last.repeat;
+      } else {
+         ctx->nop_count = 0;
+      }

-		if (!strcmp("end", val->str)) {
-			ctx->has_end = true;
-			ctx->nop_count = 0;
-		} else if (!strcmp("chsh", val->str)) {
-			ctx->options->stop = true;
-		} else if (!strcmp("bary.f", val->str)) {
-			ctx->stats->last_baryf = ctx->cur_n;
-		}
-	} else if (!strcmp(field_name, "REPEAT")) {
-		ctx->extra_cycles += val->num;
-		ctx->stats->instrs_per_cat[ctx->cur_opc_cat] += val->num;
-		ctx->last.repeat = val->num;
-	} else if (!strcmp(field_name, "NOP")) {
-		ctx->extra_cycles += val->num;
-		ctx->stats->instrs_per_cat[0] += val->num;
-		ctx->stats->nops += val->num;
-		ctx->last.nop = val->num;
-	} else if (!strcmp(field_name, "SY")) {
-		ctx->stats->sy += val->num;
-	} else if (!strcmp(field_name, "SS")) {
-		ctx->stats->ss += val->num;
-		ctx->last.ss = !!val->num;
-	} else if (!strcmp(field_name, "CONST")) {
-		ctx->reg.num = val->num;
-		ctx->reg.file = FILE_CONST;
-	} else if (!strcmp(field_name, "GPR")) {
-		/* don't count GPR regs r48.x (shared) or higher: */
-		if (val->num < 48) {
-			ctx->reg.num = val->num;
-			ctx->reg.file = FILE_GPR;
-		}
-	} else if (!strcmp(field_name, "SRC_R") ||
-			!strcmp(field_name, "SRC1_R") ||
-			!strcmp(field_name, "SRC2_R") ||
-			!strcmp(field_name, "SRC3_R")) {
-		ctx->reg.r = val->num;
-	} else if (!strcmp(field_name, "DST")) {
-		/* Dest register is always repeated
-		 *
-		 * Note that this doesn't really properly handle instructions
-		 * that write multiple components.. the old disasm didn't handle
-		 * that case either.
-		 */
-		ctx->reg.r = true;
-	} else if (strstr(field_name, "HALF")) {
-		ctx->reg.half = val->num;
-	} else if (!strcmp(field_name, "SWIZ")) {
-		unsigned num = (ctx->reg.num << 2) | val->num;
-		if (ctx->reg.r)
-			num += ctx->last.repeat;
+      if (!strcmp("end", val->str)) {
+         ctx->has_end = true;
+         ctx->nop_count = 0;
+      } else if (!strcmp("chsh", val->str)) {
+         ctx->options->stop = true;
+      } else if (!strcmp("bary.f", val->str)) {
+         ctx->stats->last_baryf = ctx->cur_n;
+      }
+   } else if (!strcmp(field_name, "REPEAT")) {
+      ctx->extra_cycles += val->num;
+      ctx->stats->instrs_per_cat[ctx->cur_opc_cat] += val->num;
+      ctx->last.repeat = val->num;
+   } else if (!strcmp(field_name, "NOP")) {
+      ctx->extra_cycles += val->num;
+      ctx->stats->instrs_per_cat[0] += val->num;
+      ctx->stats->nops += val->num;
+      ctx->last.nop = val->num;
+   } else if (!strcmp(field_name, "SY")) {
+      ctx->stats->sy += val->num;
+   } else if (!strcmp(field_name, "SS")) {
+      ctx->stats->ss += val->num;
+      ctx->last.ss = !!val->num;
+   } else if (!strcmp(field_name, "CONST")) {
+      ctx->reg.num = val->num;
+      ctx->reg.file = FILE_CONST;
+   } else if (!strcmp(field_name, "GPR")) {
+      /* don't count GPR regs r48.x (shared) or higher: */
+      if (val->num < 48) {
+         ctx->reg.num = val->num;
+         ctx->reg.file = FILE_GPR;
+      }
+   } else if (!strcmp(field_name, "SRC_R") || !strcmp(field_name, "SRC1_R") ||
+              !strcmp(field_name, "SRC2_R") || !strcmp(field_name, "SRC3_R")) {
+      ctx->reg.r = val->num;
+   } else if (!strcmp(field_name, "DST")) {
+      /* Dest register is always repeated
+       *
+       * Note that this doesn't really properly handle instructions
+       * that write multiple components.. the old disasm didn't handle
+       * that case either.
+       */
+      ctx->reg.r = true;
+   } else if (strstr(field_name, "HALF")) {
+      ctx->reg.half = val->num;
+   } else if (!strcmp(field_name, "SWIZ")) {
+      unsigned num = (ctx->reg.num << 2) | val->num;
+      if (ctx->reg.r)
+         num += ctx->last.repeat;

-		if (ctx->reg.file == FILE_CONST) {
-			ctx->stats->constlen = MAX2(ctx->stats->constlen, num);
-		} else if (ctx->reg.file == FILE_GPR) {
-			if (ctx->reg.half) {
-				ctx->stats->halfreg = MAX2(ctx->stats->halfreg, num);
-			} else {
-				ctx->stats->fullreg = MAX2(ctx->stats->fullreg, num);
-			}
-		}
+      if (ctx->reg.file == FILE_CONST) {
+         ctx->stats->constlen = MAX2(ctx->stats->constlen, num);
+      } else if (ctx->reg.file == FILE_GPR) {
+         if (ctx->reg.half) {
+            ctx->stats->halfreg = MAX2(ctx->stats->halfreg, num);
+         } else {
+            ctx->stats->fullreg = MAX2(ctx->stats->fullreg, num);
+         }
+      }

-		memset(&ctx->reg, 0, sizeof(ctx->reg));
-	}
+      memset(&ctx->reg, 0, sizeof(ctx->reg));
+   }
 }

 /**
@ -458,103 +453,105 @@ disasm_field_cb(void *d, const char *field_name, struct isa_decode_value *val)
 static void
 disasm_handle_last(struct disasm_ctx *ctx)
 {
-	if (ctx->last.ss) {
-		ctx->stats->sstall += ctx->sfu_delay;
-		ctx->sfu_delay = 0;
-	}
+   if (ctx->last.ss) {
+      ctx->stats->sstall += ctx->sfu_delay;
+      ctx->sfu_delay = 0;
+   }

-	if (ctx->cur_opc_cat == 4) {
-		ctx->sfu_delay = 10;
-	} else {
-		int n = MIN2(ctx->sfu_delay, 1 + ctx->last.repeat + ctx->last.nop);
-		ctx->sfu_delay -= n;
-	}
+   if (ctx->cur_opc_cat == 4) {
+      ctx->sfu_delay = 10;
+   } else {
+      int n = MIN2(ctx->sfu_delay, 1 + ctx->last.repeat + ctx->last.nop);
+      ctx->sfu_delay -= n;
+   }

-	memset(&ctx->last, 0, sizeof(ctx->last));
+   memset(&ctx->last, 0, sizeof(ctx->last));
 }

 static void
 disasm_instr_cb(void *d, unsigned n, uint64_t instr)
 {
-	struct disasm_ctx *ctx = d;
-	uint32_t *dwords = (uint32_t *)&instr;
-	unsigned opc_cat = instr >> 61;
+   struct disasm_ctx *ctx = d;
+   uint32_t *dwords = (uint32_t *)&instr;
+   unsigned opc_cat = instr >> 61;

-	/* There are some cases where we can get instr_cb called multiple
-	 * times per instruction (like when we need an extra line for branch
-	 * target labels), don't update stats in these cases:
-	 */
-	if (n != ctx->cur_n) {
-		if (n > 0) {
-			disasm_handle_last(ctx);
-		}
-		ctx->stats->instrs_per_cat[opc_cat]++;
-		ctx->cur_n = n;
+   /* There are some cases where we can get instr_cb called multiple
+    * times per instruction (like when we need an extra line for branch
+    * target labels), don't update stats in these cases:
+    */
+   if (n != ctx->cur_n) {
+      if (n > 0) {
+         disasm_handle_last(ctx);
+      }
+      ctx->stats->instrs_per_cat[opc_cat]++;
+      ctx->cur_n = n;

-		/* mov vs cov stats are a bit harder to fish out of the field
-		 * names, because current ir3-cat1.xml doesn't use {NAME} for
-		 * this distinction.  So for now just handle this case with
-		 * some hand-coded parsing:
-		 */
-		if (opc_cat == 1) {
-			unsigned opc      = (instr >> 57) & 0x3;
-			unsigned src_type = (instr >> 50) & 0x7;
-			unsigned dst_type = (instr >> 46) & 0x7;
+      /* mov vs cov stats are a bit harder to fish out of the field
+       * names, because current ir3-cat1.xml doesn't use {NAME} for
+       * this distinction.  So for now just handle this case with
+       * some hand-coded parsing:
+       */
+      if (opc_cat == 1) {
+         unsigned opc = (instr >> 57) & 0x3;
+         unsigned src_type = (instr >> 50) & 0x7;
+         unsigned dst_type = (instr >> 46) & 0x7;

-			if (opc == 0) {
-				if (src_type == dst_type) {
-					ctx->stats->mov_count++;
-				} else {
-					ctx->stats->cov_count++;
-				}
-			}
-		}
-	}
+         if (opc == 0) {
+            if (src_type == dst_type) {
+               ctx->stats->mov_count++;
+            } else {
+               ctx->stats->cov_count++;
+            }
+         }
+      }
+   }

-	ctx->cur_opc_cat = opc_cat;
+   ctx->cur_opc_cat = opc_cat;

-	if (debug & PRINT_RAW) {
-		fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level],
-			opc_cat, n, ctx->extra_cycles + n, dwords[1], dwords[0]);
-	}
+   if (debug & PRINT_RAW) {
+      fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level],
+              opc_cat, n, ctx->extra_cycles + n, dwords[1], dwords[0]);
+   }
 }

-int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
-		unsigned gpu_id, struct shader_stats *stats)
+int
+disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
+                 unsigned gpu_id, struct shader_stats *stats)
 {
-	struct isa_decode_options decode_options = {
-		.gpu_id = gpu_id,
-		.show_errors = true,
-		.max_errors = 5,
-		.branch_labels = true,
-		.field_cb = disasm_field_cb,
-		.instr_cb = disasm_instr_cb,
-	};
-	struct disasm_ctx ctx = {
-		.out = out,
-		.level = level,
-		.options = &decode_options,
-		.stats = stats,
-		.cur_n = -1,
-	};
+   struct isa_decode_options decode_options = {
+      .gpu_id = gpu_id,
+      .show_errors = true,
+      .max_errors = 5,
+      .branch_labels = true,
+      .field_cb = disasm_field_cb,
+      .instr_cb = disasm_instr_cb,
+   };
+   struct disasm_ctx ctx = {
+      .out = out,
+      .level = level,
+      .options = &decode_options,
+      .stats = stats,
+      .cur_n = -1,
+   };

-	memset(stats, 0, sizeof(*stats));
+   memset(stats, 0, sizeof(*stats));

-	decode_options.cbdata = &ctx;
+   decode_options.cbdata = &ctx;

-	isa_decode(dwords, sizedwords * 4, out, &decode_options);
+   isa_decode(dwords, sizedwords * 4, out, &decode_options);

-	disasm_handle_last(&ctx);
+   disasm_handle_last(&ctx);

-	if (debug & PRINT_STATS)
-		print_stats(&ctx);
+   if (debug & PRINT_STATS)
+      print_stats(&ctx);

-	return 0;
+   return 0;
 }

-void disasm_a3xx_set_debug(enum debug_t d)
+void
+disasm_a3xx_set_debug(enum debug_t d)
 {
-	debug = d;
+   debug = d;
 }

 #include <setjmp.h>
@ -564,34 +561,38 @@ static jmp_buf jmp_env;

 void
 ir3_assert_handler(const char *expr, const char *file, int line,
-		const char *func)
+                   const char *func)
 {
-	mesa_loge("%s:%u: %s: Assertion `%s' failed.", file, line, func, expr);
-	if (jmp_env_valid)
-		longjmp(jmp_env, 1);
-	abort();
+   mesa_loge("%s:%u: %s: Assertion `%s' failed.", file, line, func, expr);
+   if (jmp_env_valid)
+      longjmp(jmp_env, 1);
+   abort();
 }

-#define TRY(x) do { \
-		assert(!jmp_env_valid); \
-		if (setjmp(jmp_env) == 0) { \
-			jmp_env_valid = true; \
-			x; \
-		} \
-		jmp_env_valid = false; \
-	} while (0)
+#define TRY(x)                                                                 \
+   do {                                                                        \
+      assert(!jmp_env_valid);                                                  \
+      if (setjmp(jmp_env) == 0) {                                              \
+         jmp_env_valid = true;                                                 \
+         x;                                                                    \
+      }                                                                        \
+      jmp_env_valid = false;                                                   \
+   } while (0)

-
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id)
+int
+disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out,
+            unsigned gpu_id)
 {
-	struct shader_stats stats;
-	return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats);
+   struct shader_stats stats;
+   return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats);
 }

-int try_disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id)
+int
+try_disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out,
+                unsigned gpu_id)
 {
-	struct shader_stats stats;
-	int ret = -1;
-	TRY(ret = disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats));
-	return ret;
+   struct shader_stats stats;
+   int ret = -1;
+   TRY(ret = disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats));
+   return ret;
 }
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
--- a/src/freedreno/ir3/ir3_a4xx.c
+++ b/src/freedreno/ir3/ir3_a4xx.c
@ -33,67 +33,65 @@
 * Handlers for instructions changed/added in a4xx:
 */

-
 /* src[] = { buffer_index, offset }. No const_index */
 static void
 emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-		struct ir3_instruction **dst)
+                         struct ir3_instruction **dst)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *ldgb, *src0, *src1, *byte_offset, *offset;
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *ldgb, *src0, *src1, *byte_offset, *offset;

-	struct ir3_instruction *ssbo = ir3_ssbo_to_ibo(ctx, intr->src[0]);
+   struct ir3_instruction *ssbo = ir3_ssbo_to_ibo(ctx, intr->src[0]);

-	byte_offset = ir3_get_src(ctx, &intr->src[1])[0];
-	offset = ir3_get_src(ctx, &intr->src[2])[0];
+   byte_offset = ir3_get_src(ctx, &intr->src[1])[0];
+   offset = ir3_get_src(ctx, &intr->src[2])[0];

-	/* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */
-	src0 = ir3_collect(ctx, byte_offset, create_immed(b, 0));
-	src1 = offset;
+   /* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */
+   src0 = ir3_collect(ctx, byte_offset, create_immed(b, 0));
+   src1 = offset;

-	ldgb = ir3_LDGB(b, ssbo, 0,
-			src0, 0, src1, 0);
-	ldgb->dsts[0]->wrmask = MASK(intr->num_components);
-	ldgb->cat6.iim_val = intr->num_components;
-	ldgb->cat6.d = 4;
-	ldgb->cat6.type = TYPE_U32;
-	ldgb->barrier_class = IR3_BARRIER_BUFFER_R;
-	ldgb->barrier_conflict = IR3_BARRIER_BUFFER_W;
+   ldgb = ir3_LDGB(b, ssbo, 0, src0, 0, src1, 0);
+   ldgb->dsts[0]->wrmask = MASK(intr->num_components);
+   ldgb->cat6.iim_val = intr->num_components;
+   ldgb->cat6.d = 4;
+   ldgb->cat6.type = TYPE_U32;
+   ldgb->barrier_class = IR3_BARRIER_BUFFER_R;
+   ldgb->barrier_conflict = IR3_BARRIER_BUFFER_W;

-	ir3_split_dest(b, dst, ldgb, 0, intr->num_components);
+   ir3_split_dest(b, dst, ldgb, 0, intr->num_components);
 }

 /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
 static void
 emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *stgb, *src0, *src1, *src2, *byte_offset, *offset;
-	unsigned wrmask = nir_intrinsic_write_mask(intr);
-	unsigned ncomp = ffs(~wrmask) - 1;
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *stgb, *src0, *src1, *src2, *byte_offset, *offset;
+   unsigned wrmask = nir_intrinsic_write_mask(intr);
+   unsigned ncomp = ffs(~wrmask) - 1;

-	assert(wrmask == BITFIELD_MASK(intr->num_components));
+   assert(wrmask == BITFIELD_MASK(intr->num_components));

-	struct ir3_instruction *ssbo = ir3_ssbo_to_ibo(ctx, intr->src[1]);
+   struct ir3_instruction *ssbo = ir3_ssbo_to_ibo(ctx, intr->src[1]);

-	byte_offset = ir3_get_src(ctx, &intr->src[2])[0];
-	offset = ir3_get_src(ctx, &intr->src[3])[0];
+   byte_offset = ir3_get_src(ctx, &intr->src[2])[0];
+   offset = ir3_get_src(ctx, &intr->src[3])[0];

-	/* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0)..
-	 * nir already *= 4:
-	 */
-	src0 = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
-	src1 = offset;
-	src2 = ir3_collect(ctx, byte_offset, create_immed(b, 0));
+   /* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0)..
+    * nir already *= 4:
+    */
+   src0 = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
+   src1 = offset;
+   src2 = ir3_collect(ctx, byte_offset, create_immed(b, 0));

-	stgb = ir3_STGB(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-	stgb->cat6.iim_val = ncomp;
-	stgb->cat6.d = 4;
-	stgb->cat6.type = TYPE_U32;
-	stgb->barrier_class = IR3_BARRIER_BUFFER_W;
-	stgb->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+   stgb = ir3_STGB(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+   stgb->cat6.iim_val = ncomp;
+   stgb->cat6.d = 4;
+   stgb->cat6.type = TYPE_U32;
+   stgb->barrier_class = IR3_BARRIER_BUFFER_W;
+   stgb->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;

-	array_insert(b, b->keeps, stgb);
+   array_insert(b, b->keeps, stgb);
 }

 /*
@ -116,229 +114,228 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 static struct ir3_instruction *
 emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *atomic, *ssbo, *src0, *src1, *src2, *byte_offset,
-		*offset;
-	type_t type = TYPE_U32;
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *atomic, *ssbo, *src0, *src1, *src2, *byte_offset,
+      *offset;
+   type_t type = TYPE_U32;

-	ssbo = ir3_ssbo_to_ibo(ctx, intr->src[0]);
+   ssbo = ir3_ssbo_to_ibo(ctx, intr->src[0]);

-	byte_offset = ir3_get_src(ctx, &intr->src[1])[0];
-	offset = ir3_get_src(ctx, &intr->src[3])[0];
+   byte_offset = ir3_get_src(ctx, &intr->src[1])[0];
+   offset = ir3_get_src(ctx, &intr->src[3])[0];

-	/* src0 is data (or uvec2(data, compare))
-	 * src1 is offset
-	 * src2 is uvec2(offset*4, 0) (appears to be 64b byte offset)
-	 *
-	 * Note that nir already multiplies the offset by four
-	 */
-	src0 = ir3_get_src(ctx, &intr->src[2])[0];
-	src1 = offset;
-	src2 = ir3_collect(ctx, byte_offset, create_immed(b, 0));
+   /* src0 is data (or uvec2(data, compare))
+    * src1 is offset
+    * src2 is uvec2(offset*4, 0) (appears to be 64b byte offset)
+    *
+    * Note that nir already multiplies the offset by four
+    */
+   src0 = ir3_get_src(ctx, &intr->src[2])[0];
+   src1 = offset;
+   src2 = ir3_collect(ctx, byte_offset, create_immed(b, 0));

-	switch (intr->intrinsic) {
-	case nir_intrinsic_ssbo_atomic_add_ir3:
-		atomic = ir3_ATOMIC_ADD_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_imin_ir3:
-		atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		type = TYPE_S32;
-		break;
-	case nir_intrinsic_ssbo_atomic_umin_ir3:
-		atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_imax_ir3:
-		atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		type = TYPE_S32;
-		break;
-	case nir_intrinsic_ssbo_atomic_umax_ir3:
-		atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_and_ir3:
-		atomic = ir3_ATOMIC_AND_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_or_ir3:
-		atomic = ir3_ATOMIC_OR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_xor_ir3:
-		atomic = ir3_ATOMIC_XOR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_exchange_ir3:
-		atomic = ir3_ATOMIC_XCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
-		/* for cmpxchg, src0 is [ui]vec2(data, compare): */
-		src0 = ir3_collect(ctx, ir3_get_src(ctx, &intr->src[3])[0], src0);
-		src1 = ir3_get_src(ctx, &intr->src[4])[0];
-		atomic = ir3_ATOMIC_CMPXCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	default:
-		unreachable("boo");
-	}
+   switch (intr->intrinsic) {
+   case nir_intrinsic_ssbo_atomic_add_ir3:
+      atomic = ir3_ATOMIC_ADD_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_imin_ir3:
+      atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+      type = TYPE_S32;
+      break;
+   case nir_intrinsic_ssbo_atomic_umin_ir3:
+      atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_imax_ir3:
+      atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+      type = TYPE_S32;
+      break;
+   case nir_intrinsic_ssbo_atomic_umax_ir3:
+      atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_and_ir3:
+      atomic = ir3_ATOMIC_AND_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_or_ir3:
+      atomic = ir3_ATOMIC_OR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_xor_ir3:
+      atomic = ir3_ATOMIC_XOR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_exchange_ir3:
+      atomic = ir3_ATOMIC_XCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
+      /* for cmpxchg, src0 is [ui]vec2(data, compare): */
+      src0 = ir3_collect(ctx, ir3_get_src(ctx, &intr->src[3])[0], src0);
+      src1 = ir3_get_src(ctx, &intr->src[4])[0];
+      atomic = ir3_ATOMIC_CMPXCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   default:
+      unreachable("boo");
+   }

-	atomic->cat6.iim_val = 1;
-	atomic->cat6.d = 4;
-	atomic->cat6.type = type;
-	atomic->barrier_class = IR3_BARRIER_BUFFER_W;
-	atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+   atomic->cat6.iim_val = 1;
+   atomic->cat6.d = 4;
+   atomic->cat6.type = type;
+   atomic->barrier_class = IR3_BARRIER_BUFFER_W;
+   atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;

-	/* even if nothing consume the result, we can't DCE the instruction: */
-	array_insert(b, b->keeps, atomic);
+   /* even if nothing consume the result, we can't DCE the instruction: */
+   array_insert(b, b->keeps, atomic);

-	return atomic;
+   return atomic;
 }

 static struct ir3_instruction *
 get_image_offset(struct ir3_context *ctx, const nir_intrinsic_instr *instr,
-		struct ir3_instruction * const *coords, bool byteoff)
+                 struct ir3_instruction *const *coords, bool byteoff)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *offset;
-	unsigned index = nir_src_as_uint(instr->src[0]);
-	unsigned ncoords = ir3_get_image_coords(instr, NULL);
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *offset;
+   unsigned index = nir_src_as_uint(instr->src[0]);
+   unsigned ncoords = ir3_get_image_coords(instr, NULL);

-	/* to calculate the byte offset (yes, uggg) we need (up to) three
-	 * const values to know the bytes per pixel, and y and z stride:
-	 */
-	const struct ir3_const_state *const_state = ir3_const_state(ctx->so);
-	unsigned cb = regid(const_state->offsets.image_dims, 0) +
-		const_state->image_dims.off[index];
+   /* to calculate the byte offset (yes, uggg) we need (up to) three
+    * const values to know the bytes per pixel, and y and z stride:
+    */
+   const struct ir3_const_state *const_state = ir3_const_state(ctx->so);
+   unsigned cb = regid(const_state->offsets.image_dims, 0) +
+                 const_state->image_dims.off[index];

-	debug_assert(const_state->image_dims.mask & (1 << index));
+   debug_assert(const_state->image_dims.mask & (1 << index));

-	/* offset = coords.x * bytes_per_pixel: */
-	offset = ir3_MUL_S24(b, coords[0], 0, create_uniform(b, cb + 0), 0);
-	if (ncoords > 1) {
-		/* offset += coords.y * y_pitch: */
-		offset = ir3_MAD_S24(b, create_uniform(b, cb + 1), 0,
-				coords[1], 0, offset, 0);
-	}
-	if (ncoords > 2) {
-		/* offset += coords.z * z_pitch: */
-		offset = ir3_MAD_S24(b, create_uniform(b, cb + 2), 0,
-				coords[2], 0, offset, 0);
-	}
+   /* offset = coords.x * bytes_per_pixel: */
+   offset = ir3_MUL_S24(b, coords[0], 0, create_uniform(b, cb + 0), 0);
+   if (ncoords > 1) {
+      /* offset += coords.y * y_pitch: */
+      offset =
+         ir3_MAD_S24(b, create_uniform(b, cb + 1), 0, coords[1], 0, offset, 0);
+   }
+   if (ncoords > 2) {
+      /* offset += coords.z * z_pitch: */
+      offset =
+         ir3_MAD_S24(b, create_uniform(b, cb + 2), 0, coords[2], 0, offset, 0);
+   }

-	if (!byteoff) {
-		/* Some cases, like atomics, seem to use dword offset instead
-		 * of byte offsets.. blob just puts an extra shr.b in there
-		 * in those cases:
-		 */
-		offset = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
-	}
+   if (!byteoff) {
+      /* Some cases, like atomics, seem to use dword offset instead
+       * of byte offsets.. blob just puts an extra shr.b in there
+       * in those cases:
+       */
+      offset = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
+   }

-	return ir3_collect(ctx, offset, create_immed(b, 0));
+   return ir3_collect(ctx, offset, create_immed(b, 0));
 }

 /* src[] = { index, coord, sample_index, value }. const_index[] = {} */
 static void
 emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *stib, *offset;
-	struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]);
-	struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
-	struct ir3_instruction * ibo = ir3_image_to_ibo(ctx, intr->src[0]);
-	unsigned ncoords = ir3_get_image_coords(intr, NULL);
-	unsigned ncomp = ir3_get_num_components_for_image_format(nir_intrinsic_format(intr));
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *stib, *offset;
+   struct ir3_instruction *const *value = ir3_get_src(ctx, &intr->src[3]);
+   struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
+   struct ir3_instruction *ibo = ir3_image_to_ibo(ctx, intr->src[0]);
+   unsigned ncoords = ir3_get_image_coords(intr, NULL);
+   unsigned ncomp =
+      ir3_get_num_components_for_image_format(nir_intrinsic_format(intr));

-	/* src0 is value
-	 * src1 is coords
-	 * src2 is 64b byte offset
-	 */
+   /* src0 is value
+    * src1 is coords
+    * src2 is 64b byte offset
+    */

-	offset = get_image_offset(ctx, intr, coords, true);
+   offset = get_image_offset(ctx, intr, coords, true);

-	/* NOTE: stib seems to take byte offset, but stgb.typed can be used
-	 * too and takes a dword offset.. not quite sure yet why blob uses
-	 * one over the other in various cases.
-	 */
+   /* NOTE: stib seems to take byte offset, but stgb.typed can be used
+    * too and takes a dword offset.. not quite sure yet why blob uses
+    * one over the other in various cases.
+    */

-	stib = ir3_STIB(b, ibo, 0,
-			ir3_create_collect(ctx, value, ncomp), 0,
-			ir3_create_collect(ctx, coords, ncoords), 0,
-			offset, 0);
-	stib->cat6.iim_val = ncomp;
-	stib->cat6.d = ncoords;
-	stib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
-	stib->cat6.typed = true;
-	stib->barrier_class = IR3_BARRIER_IMAGE_W;
-	stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
+   stib = ir3_STIB(b, ibo, 0, ir3_create_collect(ctx, value, ncomp), 0,
+                   ir3_create_collect(ctx, coords, ncoords), 0, offset, 0);
+   stib->cat6.iim_val = ncomp;
+   stib->cat6.d = ncoords;
+   stib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
+   stib->cat6.typed = true;
+   stib->barrier_class = IR3_BARRIER_IMAGE_W;
+   stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;

-	array_insert(b, b->keeps, stib);
+   array_insert(b, b->keeps, stib);
 }

 /* src[] = { deref, coord, sample_index, value, compare }. const_index[] = {} */
 static struct ir3_instruction *
 emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *atomic, *src0, *src1, *src2;
-	struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
-	struct ir3_instruction * image = ir3_image_to_ibo(ctx, intr->src[0]);
-	unsigned ncoords = ir3_get_image_coords(intr, NULL);
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *atomic, *src0, *src1, *src2;
+   struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
+   struct ir3_instruction *image = ir3_image_to_ibo(ctx, intr->src[0]);
+   unsigned ncoords = ir3_get_image_coords(intr, NULL);

-	/* src0 is value (or uvec2(value, compare))
-	 * src1 is coords
-	 * src2 is 64b byte offset
-	 */
-	src0 = ir3_get_src(ctx, &intr->src[3])[0];
-	src1 = ir3_create_collect(ctx, coords, ncoords);
-	src2 = get_image_offset(ctx, intr, coords, false);
+   /* src0 is value (or uvec2(value, compare))
+    * src1 is coords
+    * src2 is 64b byte offset
+    */
+   src0 = ir3_get_src(ctx, &intr->src[3])[0];
+   src1 = ir3_create_collect(ctx, coords, ncoords);
+   src2 = get_image_offset(ctx, intr, coords, false);

-	switch (intr->intrinsic) {
-	case nir_intrinsic_image_atomic_add:
-		atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_atomic_imin:
-	case nir_intrinsic_image_atomic_umin:
-		atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_atomic_imax:
-	case nir_intrinsic_image_atomic_umax:
-		atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_atomic_and:
-		atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_atomic_or:
-		atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_atomic_xor:
-		atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_atomic_exchange:
-		atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_atomic_comp_swap:
-		/* for cmpxchg, src0 is [ui]vec2(data, compare): */
-		src0 = ir3_collect(ctx, ir3_get_src(ctx, &intr->src[4])[0], src0);
-		atomic = ir3_ATOMIC_CMPXCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	default:
-		unreachable("boo");
-	}
+   switch (intr->intrinsic) {
+   case nir_intrinsic_image_atomic_add:
+      atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_image_atomic_umin:
+      atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_image_atomic_imax:
+   case nir_intrinsic_image_atomic_umax:
+      atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_image_atomic_and:
+      atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_image_atomic_or:
+      atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_image_atomic_xor:
+      atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_image_atomic_exchange:
+      atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   case nir_intrinsic_image_atomic_comp_swap:
+      /* for cmpxchg, src0 is [ui]vec2(data, compare): */
+      src0 = ir3_collect(ctx, ir3_get_src(ctx, &intr->src[4])[0], src0);
+      atomic = ir3_ATOMIC_CMPXCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      break;
+   default:
+      unreachable("boo");
+   }

-	atomic->cat6.iim_val = 1;
-	atomic->cat6.d = ncoords;
-	atomic->cat6.type = ir3_get_type_for_image_intrinsic(intr);
-	atomic->cat6.typed = true;
-	atomic->barrier_class = IR3_BARRIER_IMAGE_W;
-	atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
+   atomic->cat6.iim_val = 1;
+   atomic->cat6.d = ncoords;
+   atomic->cat6.type = ir3_get_type_for_image_intrinsic(intr);
+   atomic->cat6.typed = true;
+   atomic->barrier_class = IR3_BARRIER_IMAGE_W;
+   atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;

-	/* even if nothing consume the result, we can't DCE the instruction: */
-	array_insert(b, b->keeps, atomic);
+   /* even if nothing consume the result, we can't DCE the instruction: */
+   array_insert(b, b->keeps, atomic);

-	return atomic;
+   return atomic;
 }

 const struct ir3_context_funcs ir3_a4xx_funcs = {
-		.emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
-		.emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
-		.emit_intrinsic_atomic_ssbo = emit_intrinsic_atomic_ssbo,
-		.emit_intrinsic_store_image = emit_intrinsic_store_image,
-		.emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
-		.emit_intrinsic_image_size = emit_intrinsic_image_size_tex,
-		.emit_intrinsic_load_global_ir3 = NULL,
-		.emit_intrinsic_store_global_ir3 = NULL,
+   .emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
+   .emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
+   .emit_intrinsic_atomic_ssbo = emit_intrinsic_atomic_ssbo,
+   .emit_intrinsic_store_image = emit_intrinsic_store_image,
+   .emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
+   .emit_intrinsic_image_size = emit_intrinsic_image_size_tex,
+   .emit_intrinsic_load_global_ir3 = NULL,
+   .emit_intrinsic_store_global_ir3 = NULL,
 };
--- a/src/freedreno/ir3/ir3_a6xx.c
+++ b/src/freedreno/ir3/ir3_a6xx.c
@ -40,53 +40,53 @@
 /* src[] = { buffer_index, offset }. No const_index */
 static void
 emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-		struct ir3_instruction **dst)
+                         struct ir3_instruction **dst)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *offset;
-	struct ir3_instruction *ldib;
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *offset;
+   struct ir3_instruction *ldib;

-	offset = ir3_get_src(ctx, &intr->src[2])[0];
+   offset = ir3_get_src(ctx, &intr->src[2])[0];

-	ldib = ir3_LDIB(b, ir3_ssbo_to_ibo(ctx, intr->src[0]), 0, offset, 0);
-	ldib->dsts[0]->wrmask = MASK(intr->num_components);
-	ldib->cat6.iim_val = intr->num_components;
-	ldib->cat6.d = 1;
-	ldib->cat6.type = intr->dest.ssa.bit_size == 16 ? TYPE_U16 : TYPE_U32;
-	ldib->barrier_class = IR3_BARRIER_BUFFER_R;
-	ldib->barrier_conflict = IR3_BARRIER_BUFFER_W;
-	ir3_handle_bindless_cat6(ldib, intr->src[0]);
-	ir3_handle_nonuniform(ldib, intr);
+   ldib = ir3_LDIB(b, ir3_ssbo_to_ibo(ctx, intr->src[0]), 0, offset, 0);
+   ldib->dsts[0]->wrmask = MASK(intr->num_components);
+   ldib->cat6.iim_val = intr->num_components;
+   ldib->cat6.d = 1;
+   ldib->cat6.type = intr->dest.ssa.bit_size == 16 ? TYPE_U16 : TYPE_U32;
+   ldib->barrier_class = IR3_BARRIER_BUFFER_R;
+   ldib->barrier_conflict = IR3_BARRIER_BUFFER_W;
+   ir3_handle_bindless_cat6(ldib, intr->src[0]);
+   ir3_handle_nonuniform(ldib, intr);

-	ir3_split_dest(b, dst, ldib, 0, intr->num_components);
+   ir3_split_dest(b, dst, ldib, 0, intr->num_components);
 }

 /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
 static void
 emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *stib, *val, *offset;
-	unsigned wrmask = nir_intrinsic_write_mask(intr);
-	unsigned ncomp = ffs(~wrmask) - 1;
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *stib, *val, *offset;
+   unsigned wrmask = nir_intrinsic_write_mask(intr);
+   unsigned ncomp = ffs(~wrmask) - 1;

-	assert(wrmask == BITFIELD_MASK(intr->num_components));
+   assert(wrmask == BITFIELD_MASK(intr->num_components));

-	/* src0 is offset, src1 is value:
-	 */
-	val = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
-	offset = ir3_get_src(ctx, &intr->src[3])[0];
+   /* src0 is offset, src1 is value:
+    */
+   val = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
+   offset = ir3_get_src(ctx, &intr->src[3])[0];

-	stib = ir3_STIB(b, ir3_ssbo_to_ibo(ctx, intr->src[1]), 0, offset, 0, val, 0);
-	stib->cat6.iim_val = ncomp;
-	stib->cat6.d = 1;
-	stib->cat6.type = intr->src[0].ssa->bit_size == 16 ? TYPE_U16 : TYPE_U32;
-	stib->barrier_class = IR3_BARRIER_BUFFER_W;
-	stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
-	ir3_handle_bindless_cat6(stib, intr->src[1]);
-	ir3_handle_nonuniform(stib, intr);
+   stib = ir3_STIB(b, ir3_ssbo_to_ibo(ctx, intr->src[1]), 0, offset, 0, val, 0);
+   stib->cat6.iim_val = ncomp;
+   stib->cat6.d = 1;
+   stib->cat6.type = intr->src[0].ssa->bit_size == 16 ? TYPE_U16 : TYPE_U32;
+   stib->barrier_class = IR3_BARRIER_BUFFER_W;
+   stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+   ir3_handle_bindless_cat6(stib, intr->src[1]);
+   ir3_handle_nonuniform(stib, intr);

-	array_insert(b, b->keeps, stib);
+   array_insert(b, b->keeps, stib);
 }

 /*
@ -109,329 +109,321 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 static struct ir3_instruction *
 emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *atomic, *ibo, *src0, *src1, *data, *dummy;
-	type_t type = TYPE_U32;
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *atomic, *ibo, *src0, *src1, *data, *dummy;
+   type_t type = TYPE_U32;

-	ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]);
+   ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]);

-	data   = ir3_get_src(ctx, &intr->src[2])[0];
+   data = ir3_get_src(ctx, &intr->src[2])[0];

-	/* So this gets a bit creative:
-	 *
-	 *    src0    - vecN offset/coords
-	 *    src1.x  - is actually destination register
-	 *    src1.y  - is 'data' except for cmpxchg where src2.y is 'compare'
-	 *    src1.z  - is 'data' for cmpxchg
-	 *
-	 * The combining src and dest kinda doesn't work out so well with how
-	 * scheduling and RA work. So we create a dummy src2 which is tied to the
-	 * destination in RA (i.e. must be allocated to the same vec2/vec3
-	 * register) and then immediately extract the first component.
-	 *
-	 * Note that nir already multiplies the offset by four
-	 */
-	dummy = create_immed(b, 0);
+   /* So this gets a bit creative:
+    *
+    *    src0    - vecN offset/coords
+    *    src1.x  - is actually destination register
+    *    src1.y  - is 'data' except for cmpxchg where src2.y is 'compare'
+    *    src1.z  - is 'data' for cmpxchg
+    *
+    * The combining src and dest kinda doesn't work out so well with how
+    * scheduling and RA work. So we create a dummy src2 which is tied to the
+    * destination in RA (i.e. must be allocated to the same vec2/vec3
+    * register) and then immediately extract the first component.
+    *
+    * Note that nir already multiplies the offset by four
+    */
+   dummy = create_immed(b, 0);

-	if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap_ir3) {
-		src0 = ir3_get_src(ctx, &intr->src[4])[0];
-		struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[3])[0];
-		src1 = ir3_collect(ctx, dummy, compare, data);
-	} else {
-		src0 = ir3_get_src(ctx, &intr->src[3])[0];
-		src1 = ir3_collect(ctx, dummy, data);
-	}
+   if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap_ir3) {
+      src0 = ir3_get_src(ctx, &intr->src[4])[0];
+      struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[3])[0];
+      src1 = ir3_collect(ctx, dummy, compare, data);
+   } else {
+      src0 = ir3_get_src(ctx, &intr->src[3])[0];
+      src1 = ir3_collect(ctx, dummy, data);
+   }

-	switch (intr->intrinsic) {
-	case nir_intrinsic_ssbo_atomic_add_ir3:
-		atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_imin_ir3:
-		atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
-		type = TYPE_S32;
-		break;
-	case nir_intrinsic_ssbo_atomic_umin_ir3:
-		atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_imax_ir3:
-		atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
-		type = TYPE_S32;
-		break;
-	case nir_intrinsic_ssbo_atomic_umax_ir3:
-		atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_and_ir3:
-		atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_or_ir3:
-		atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_xor_ir3:
-		atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_exchange_ir3:
-		atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
-		atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	default:
-		unreachable("boo");
-	}
+   switch (intr->intrinsic) {
+   case nir_intrinsic_ssbo_atomic_add_ir3:
+      atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_imin_ir3:
+      atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
+      type = TYPE_S32;
+      break;
+   case nir_intrinsic_ssbo_atomic_umin_ir3:
+      atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_imax_ir3:
+      atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
+      type = TYPE_S32;
+      break;
+   case nir_intrinsic_ssbo_atomic_umax_ir3:
+      atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_and_ir3:
+      atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_or_ir3:
+      atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_xor_ir3:
+      atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_exchange_ir3:
+      atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
+      atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   default:
+      unreachable("boo");
+   }

-	atomic->cat6.iim_val = 1;
-	atomic->cat6.d = 1;
-	atomic->cat6.type = type;
-	atomic->barrier_class = IR3_BARRIER_BUFFER_W;
-	atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
-	ir3_handle_bindless_cat6(atomic, intr->src[0]);
+   atomic->cat6.iim_val = 1;
+   atomic->cat6.d = 1;
+   atomic->cat6.type = type;
+   atomic->barrier_class = IR3_BARRIER_BUFFER_W;
+   atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+   ir3_handle_bindless_cat6(atomic, intr->src[0]);

-	/* even if nothing consume the result, we can't DCE the instruction: */
-	array_insert(b, b->keeps, atomic);
+   /* even if nothing consume the result, we can't DCE the instruction: */
+   array_insert(b, b->keeps, atomic);

-	atomic->dsts[0]->wrmask = src1->dsts[0]->wrmask;
-	ir3_reg_tie(atomic->dsts[0], atomic->srcs[2]);
-	struct ir3_instruction *split;
-	ir3_split_dest(b, &split, atomic, 0, 1);
-	return split;
+   atomic->dsts[0]->wrmask = src1->dsts[0]->wrmask;
+   ir3_reg_tie(atomic->dsts[0], atomic->srcs[2]);
+   struct ir3_instruction *split;
+   ir3_split_dest(b, &split, atomic, 0, 1);
+   return split;
 }

 /* src[] = { deref, coord, sample_index }. const_index[] = {} */
 static void
 emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-		struct ir3_instruction **dst)
+                          struct ir3_instruction **dst)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *ldib;
-	struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
-	unsigned ncoords = ir3_get_image_coords(intr, NULL);
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *ldib;
+   struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
+   unsigned ncoords = ir3_get_image_coords(intr, NULL);

-	ldib = ir3_LDIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0,
-					ir3_create_collect(ctx, coords, ncoords), 0);
-	ldib->dsts[0]->wrmask = MASK(intr->num_components);
-	ldib->cat6.iim_val = intr->num_components;
-	ldib->cat6.d = ncoords;
-	ldib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
-	ldib->cat6.typed = true;
-	ldib->barrier_class = IR3_BARRIER_IMAGE_R;
-	ldib->barrier_conflict = IR3_BARRIER_IMAGE_W;
-	ir3_handle_bindless_cat6(ldib, intr->src[0]);
-	ir3_handle_nonuniform(ldib, intr);
+   ldib = ir3_LDIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0,
+                   ir3_create_collect(ctx, coords, ncoords), 0);
+   ldib->dsts[0]->wrmask = MASK(intr->num_components);
+   ldib->cat6.iim_val = intr->num_components;
+   ldib->cat6.d = ncoords;
+   ldib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
+   ldib->cat6.typed = true;
+   ldib->barrier_class = IR3_BARRIER_IMAGE_R;
+   ldib->barrier_conflict = IR3_BARRIER_IMAGE_W;
+   ir3_handle_bindless_cat6(ldib, intr->src[0]);
+   ir3_handle_nonuniform(ldib, intr);

-	ir3_split_dest(b, dst, ldib, 0, intr->num_components);
+   ir3_split_dest(b, dst, ldib, 0, intr->num_components);
 }

 /* src[] = { deref, coord, sample_index, value }. const_index[] = {} */
 static void
 emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *stib;
-	struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]);
-	struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
-	unsigned ncoords = ir3_get_image_coords(intr, NULL);
-	enum pipe_format format = nir_intrinsic_format(intr);
-	unsigned ncomp = ir3_get_num_components_for_image_format(format);
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *stib;
+   struct ir3_instruction *const *value = ir3_get_src(ctx, &intr->src[3]);
+   struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
+   unsigned ncoords = ir3_get_image_coords(intr, NULL);
+   enum pipe_format format = nir_intrinsic_format(intr);
+   unsigned ncomp = ir3_get_num_components_for_image_format(format);

-	/* src0 is offset, src1 is value:
-	 */
-	stib = ir3_STIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0,
-			ir3_create_collect(ctx, coords, ncoords), 0,
-			ir3_create_collect(ctx, value, ncomp), 0);
-	stib->cat6.iim_val = ncomp;
-	stib->cat6.d = ncoords;
-	stib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
-	stib->cat6.typed = true;
-	stib->barrier_class = IR3_BARRIER_IMAGE_W;
-	stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
-	ir3_handle_bindless_cat6(stib, intr->src[0]);
-	ir3_handle_nonuniform(stib, intr);
+   /* src0 is offset, src1 is value:
+    */
+   stib = ir3_STIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0,
+                   ir3_create_collect(ctx, coords, ncoords), 0,
+                   ir3_create_collect(ctx, value, ncomp), 0);
+   stib->cat6.iim_val = ncomp;
+   stib->cat6.d = ncoords;
+   stib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
+   stib->cat6.typed = true;
+   stib->barrier_class = IR3_BARRIER_IMAGE_W;
+   stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
+   ir3_handle_bindless_cat6(stib, intr->src[0]);
+   ir3_handle_nonuniform(stib, intr);

-	array_insert(b, b->keeps, stib);
+   array_insert(b, b->keeps, stib);
 }

 /* src[] = { deref, coord, sample_index, value, compare }. const_index[] = {} */
 static struct ir3_instruction *
 emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *atomic, *ibo, *src0, *src1, *dummy;
-	struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
-	struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0];
-	unsigned ncoords = ir3_get_image_coords(intr, NULL);
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *atomic, *ibo, *src0, *src1, *dummy;
+   struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
+   struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0];
+   unsigned ncoords = ir3_get_image_coords(intr, NULL);

-	ibo = ir3_image_to_ibo(ctx, intr->src[0]);
+   ibo = ir3_image_to_ibo(ctx, intr->src[0]);

-	/* So this gets a bit creative:
-	 *
-	 *    src0    - vecN offset/coords
-	 *    src1.x  - is actually destination register
-	 *    src1.y  - is 'value' except for cmpxchg where src2.y is 'compare'
-	 *    src1.z  - is 'value' for cmpxchg
-	 *
-	 * The combining src and dest kinda doesn't work out so well with how
-	 * scheduling and RA work. So we create a dummy src2 which is tied to the
-	 * destination in RA (i.e. must be allocated to the same vec2/vec3
-	 * register) and then immediately extract the first component.
-	 */
-	dummy = create_immed(b, 0);
-	src0 = ir3_create_collect(ctx, coords, ncoords);
+   /* So this gets a bit creative:
+    *
+    *    src0    - vecN offset/coords
+    *    src1.x  - is actually destination register
+    *    src1.y  - is 'value' except for cmpxchg where src2.y is 'compare'
+    *    src1.z  - is 'value' for cmpxchg
+    *
+    * The combining src and dest kinda doesn't work out so well with how
+    * scheduling and RA work. So we create a dummy src2 which is tied to the
+    * destination in RA (i.e. must be allocated to the same vec2/vec3
+    * register) and then immediately extract the first component.
+    */
+   dummy = create_immed(b, 0);
+   src0 = ir3_create_collect(ctx, coords, ncoords);

-	if (intr->intrinsic == nir_intrinsic_image_atomic_comp_swap ||
-		intr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap) {
-		struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[4])[0];
-		src1 = ir3_collect(ctx, dummy, compare, value);
-	} else {
-		src1 = ir3_collect(ctx, dummy, value);
-	}
+   if (intr->intrinsic == nir_intrinsic_image_atomic_comp_swap ||
+       intr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap) {
+      struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[4])[0];
+      src1 = ir3_collect(ctx, dummy, compare, value);
+   } else {
+      src1 = ir3_collect(ctx, dummy, value);
+   }

-	switch (intr->intrinsic) {
-	case nir_intrinsic_image_atomic_add:
-	case nir_intrinsic_bindless_image_atomic_add:
-		atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_image_atomic_imin:
-	case nir_intrinsic_image_atomic_umin:
-	case nir_intrinsic_bindless_image_atomic_imin:
-	case nir_intrinsic_bindless_image_atomic_umin:
-		atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_image_atomic_imax:
-	case nir_intrinsic_image_atomic_umax:
-	case nir_intrinsic_bindless_image_atomic_imax:
-	case nir_intrinsic_bindless_image_atomic_umax:
-		atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_image_atomic_and:
-	case nir_intrinsic_bindless_image_atomic_and:
-		atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_image_atomic_or:
-	case nir_intrinsic_bindless_image_atomic_or:
-		atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_image_atomic_xor:
-	case nir_intrinsic_bindless_image_atomic_xor:
-		atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_image_atomic_exchange:
-	case nir_intrinsic_bindless_image_atomic_exchange:
-		atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_image_atomic_comp_swap:
-	case nir_intrinsic_bindless_image_atomic_comp_swap:
-		atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
-		break;
-	default:
-		unreachable("boo");
-	}
+   switch (intr->intrinsic) {
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_bindless_image_atomic_add:
+      atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_bindless_image_atomic_imin:
+   case nir_intrinsic_bindless_image_atomic_umin:
+      atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_image_atomic_imax:
+   case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_bindless_image_atomic_imax:
+   case nir_intrinsic_bindless_image_atomic_umax:
+      atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_bindless_image_atomic_and:
+      atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_bindless_image_atomic_or:
+      atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_bindless_image_atomic_xor:
+      atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_bindless_image_atomic_exchange:
+      atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   case nir_intrinsic_image_atomic_comp_swap:
+   case nir_intrinsic_bindless_image_atomic_comp_swap:
+      atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
+      break;
+   default:
+      unreachable("boo");
+   }

-	atomic->cat6.iim_val = 1;
-	atomic->cat6.d = ncoords;
-	atomic->cat6.type = ir3_get_type_for_image_intrinsic(intr);
-	atomic->cat6.typed = true;
-	atomic->barrier_class = IR3_BARRIER_IMAGE_W;
-	atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
-	ir3_handle_bindless_cat6(atomic, intr->src[0]);
+   atomic->cat6.iim_val = 1;
+   atomic->cat6.d = ncoords;
+   atomic->cat6.type = ir3_get_type_for_image_intrinsic(intr);
+   atomic->cat6.typed = true;
+   atomic->barrier_class = IR3_BARRIER_IMAGE_W;
+   atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
+   ir3_handle_bindless_cat6(atomic, intr->src[0]);

-	/* even if nothing consume the result, we can't DCE the instruction: */
-	array_insert(b, b->keeps, atomic);
+   /* even if nothing consume the result, we can't DCE the instruction: */
+   array_insert(b, b->keeps, atomic);

-	atomic->dsts[0]->wrmask = src1->dsts[0]->wrmask;
-	ir3_reg_tie(atomic->dsts[0], atomic->srcs[2]);
-	struct ir3_instruction *split;
-	ir3_split_dest(b, &split, atomic, 0, 1);
-	return split;
+   atomic->dsts[0]->wrmask = src1->dsts[0]->wrmask;
+   ir3_reg_tie(atomic->dsts[0], atomic->srcs[2]);
+   struct ir3_instruction *split;
+   ir3_split_dest(b, &split, atomic, 0, 1);
+   return split;
 }

 static void
 emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-		struct ir3_instruction **dst)
+                          struct ir3_instruction **dst)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *ibo = ir3_image_to_ibo(ctx, intr->src[0]);
-	struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0);
-	resinfo->cat6.iim_val = 1;
-	resinfo->cat6.d = intr->num_components;
-	resinfo->cat6.type = TYPE_U32;
-	resinfo->cat6.typed = false;
-	/* resinfo has no writemask and always writes out 3 components: */
-	compile_assert(ctx, intr->num_components <= 3);
-	resinfo->dsts[0]->wrmask = MASK(3);
-	ir3_handle_bindless_cat6(resinfo, intr->src[0]);
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *ibo = ir3_image_to_ibo(ctx, intr->src[0]);
+   struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0);
+   resinfo->cat6.iim_val = 1;
+   resinfo->cat6.d = intr->num_components;
+   resinfo->cat6.type = TYPE_U32;
+   resinfo->cat6.typed = false;
+   /* resinfo has no writemask and always writes out 3 components: */
+   compile_assert(ctx, intr->num_components <= 3);
+   resinfo->dsts[0]->wrmask = MASK(3);
+   ir3_handle_bindless_cat6(resinfo, intr->src[0]);

-	ir3_split_dest(b, dst, resinfo, 0, intr->num_components);
+   ir3_split_dest(b, dst, resinfo, 0, intr->num_components);
 }

 static void
-emit_intrinsic_load_global_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-		struct ir3_instruction **dst)
+emit_intrinsic_load_global_ir3(struct ir3_context *ctx,
+                               nir_intrinsic_instr *intr,
+                               struct ir3_instruction **dst)
 {
-	struct ir3_block *b = ctx->block;
-	unsigned dest_components = nir_intrinsic_dest_components(intr);
-	struct ir3_instruction *addr, *offset;
+   struct ir3_block *b = ctx->block;
+   unsigned dest_components = nir_intrinsic_dest_components(intr);
+   struct ir3_instruction *addr, *offset;

-	addr = ir3_collect(ctx,
-			ir3_get_src(ctx, &intr->src[0])[0],
-			ir3_get_src(ctx, &intr->src[0])[1]);
+   addr = ir3_collect(ctx, ir3_get_src(ctx, &intr->src[0])[0],
+                      ir3_get_src(ctx, &intr->src[0])[1]);

-	offset = ir3_get_src(ctx, &intr->src[1])[0];
+   offset = ir3_get_src(ctx, &intr->src[1])[0];

-	struct ir3_instruction *load =
-		ir3_LDG_A(b, addr, 0, offset, 0,
-				create_immed(b, 0), 0,
-				create_immed(b, 0), 0,
-				create_immed(b, dest_components), 0);
-	load->cat6.type = TYPE_U32;
-	load->dsts[0]->wrmask = MASK(dest_components);
+   struct ir3_instruction *load =
+      ir3_LDG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0,
+                create_immed(b, 0), 0, create_immed(b, dest_components), 0);
+   load->cat6.type = TYPE_U32;
+   load->dsts[0]->wrmask = MASK(dest_components);

-	load->barrier_class = IR3_BARRIER_BUFFER_R;
-	load->barrier_conflict = IR3_BARRIER_BUFFER_W;
+   load->barrier_class = IR3_BARRIER_BUFFER_R;
+   load->barrier_conflict = IR3_BARRIER_BUFFER_W;

-	ir3_split_dest(b, dst, load, 0, dest_components);
+   ir3_split_dest(b, dst, load, 0, dest_components);
 }

 static void
-emit_intrinsic_store_global_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+emit_intrinsic_store_global_ir3(struct ir3_context *ctx,
+                                nir_intrinsic_instr *intr)
 {
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *value, *addr, *offset;
-	unsigned ncomp = nir_intrinsic_src_components(intr, 0);
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *value, *addr, *offset;
+   unsigned ncomp = nir_intrinsic_src_components(intr, 0);

-	addr = ir3_collect(ctx,
-			ir3_get_src(ctx, &intr->src[1])[0],
-			ir3_get_src(ctx, &intr->src[1])[1]);
+   addr = ir3_collect(ctx, ir3_get_src(ctx, &intr->src[1])[0],
+                      ir3_get_src(ctx, &intr->src[1])[1]);

-	offset = ir3_get_src(ctx, &intr->src[2])[0];
+   offset = ir3_get_src(ctx, &intr->src[2])[0];

-	value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
+   value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);

-	struct ir3_instruction *stg =
-		ir3_STG_A(b,
-					addr, 0,
-					offset, 0,
-					create_immed(b, 0), 0,
-					create_immed(b, 0), 0,
-					value, 0,
-					create_immed(b, ncomp), 0);
-	stg->cat6.type = TYPE_U32;
-	stg->cat6.iim_val = 1;
+   struct ir3_instruction *stg =
+      ir3_STG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0,
+                create_immed(b, 0), 0, value, 0, create_immed(b, ncomp), 0);
+   stg->cat6.type = TYPE_U32;
+   stg->cat6.iim_val = 1;

-	array_insert(b, b->keeps, stg);
+   array_insert(b, b->keeps, stg);

-	stg->barrier_class = IR3_BARRIER_BUFFER_W;
-	stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+   stg->barrier_class = IR3_BARRIER_BUFFER_W;
+   stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
 }

 const struct ir3_context_funcs ir3_a6xx_funcs = {
-		.emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
-		.emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
-		.emit_intrinsic_atomic_ssbo = emit_intrinsic_atomic_ssbo,
-		.emit_intrinsic_load_image = emit_intrinsic_load_image,
-		.emit_intrinsic_store_image = emit_intrinsic_store_image,
-		.emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
-		.emit_intrinsic_image_size = emit_intrinsic_image_size,
-		.emit_intrinsic_load_global_ir3 = emit_intrinsic_load_global_ir3,
-		.emit_intrinsic_store_global_ir3 = emit_intrinsic_store_global_ir3,
+   .emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
+   .emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
+   .emit_intrinsic_atomic_ssbo = emit_intrinsic_atomic_ssbo,
+   .emit_intrinsic_load_image = emit_intrinsic_load_image,
+   .emit_intrinsic_store_image = emit_intrinsic_store_image,
+   .emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
+   .emit_intrinsic_image_size = emit_intrinsic_image_size,
+   .emit_intrinsic_load_global_ir3 = emit_intrinsic_load_global_ir3,
+   .emit_intrinsic_store_global_ir3 = emit_intrinsic_store_global_ir3,
 };
-
--- a/src/freedreno/ir3/ir3_array_to_ssa.c
+++ b/src/freedreno/ir3/ir3_array_to_ssa.c
@ -42,270 +42,274 @@
 * so that we don't have to rewrite (and keep track of) users.
 */

-#include "ir3.h"
 #include <stdlib.h>
+#include "ir3.h"

 struct array_state {
-	struct ir3_register *live_in_definition;
-	struct ir3_register *live_out_definition;
-	bool constructed;
-	bool optimized;
+   struct ir3_register *live_in_definition;
+   struct ir3_register *live_out_definition;
+   bool constructed;
+   bool optimized;
 };

 struct array_ctx {
-	struct array_state *states;
-	struct ir3 *ir;
-	unsigned array_count;
+   struct array_state *states;
+   struct ir3 *ir;
+   unsigned array_count;
 };

 static struct array_state *
 get_state(struct array_ctx *ctx, struct ir3_block *block, unsigned id)
 {
-	return &ctx->states[ctx->array_count * block->index + id];
+   return &ctx->states[ctx->array_count * block->index + id];
 }

-static struct ir3_register *
-read_value_beginning(struct array_ctx *ctx, struct ir3_block *block, struct ir3_array *arr);
+static struct ir3_register *read_value_beginning(struct array_ctx *ctx,
+                                                 struct ir3_block *block,
+                                                 struct ir3_array *arr);

 static struct ir3_register *
-read_value_end(struct array_ctx *ctx, struct ir3_block *block, struct ir3_array *arr)
+read_value_end(struct array_ctx *ctx, struct ir3_block *block,
+               struct ir3_array *arr)
 {
-	struct array_state *state = get_state(ctx, block, arr->id);
-	if (state->live_out_definition)
-		return state->live_out_definition;
+   struct array_state *state = get_state(ctx, block, arr->id);
+   if (state->live_out_definition)
+      return state->live_out_definition;

-	state->live_out_definition = read_value_beginning(ctx, block, arr);
-	return state->live_out_definition;
+   state->live_out_definition = read_value_beginning(ctx, block, arr);
+   return state->live_out_definition;
 }

 /* Roughly equivalent to readValueRecursive from the paper: */
 static struct ir3_register *
-read_value_beginning(struct array_ctx *ctx, struct ir3_block *block, struct ir3_array *arr)
+read_value_beginning(struct array_ctx *ctx, struct ir3_block *block,
+                     struct ir3_array *arr)
 {
-	struct array_state *state = get_state(ctx, block, arr->id);
+   struct array_state *state = get_state(ctx, block, arr->id);

-	if (state->constructed)
-		return state->live_in_definition;
+   if (state->constructed)
+      return state->live_in_definition;

-	if (block->predecessors_count == 0) {
-		state->constructed = true;
-		return NULL;
-	}
+   if (block->predecessors_count == 0) {
+      state->constructed = true;
+      return NULL;
+   }

-	if (block->predecessors_count == 1) {
-		state->live_in_definition = read_value_end(ctx, block->predecessors[0], arr);
-		state->constructed = true;
-		return state->live_in_definition;
-	}
+   if (block->predecessors_count == 1) {
+      state->live_in_definition =
+         read_value_end(ctx, block->predecessors[0], arr);
+      state->constructed = true;
+      return state->live_in_definition;
+   }

-	unsigned flags = IR3_REG_ARRAY | (arr->half ? IR3_REG_HALF : 0);
-	struct ir3_instruction *phi =
-		ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);
-	list_del(&phi->node);
-	list_add(&phi->node, &block->instr_list);
+   unsigned flags = IR3_REG_ARRAY | (arr->half ? IR3_REG_HALF : 0);
+   struct ir3_instruction *phi =
+      ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);
+   list_del(&phi->node);
+   list_add(&phi->node, &block->instr_list);

-	struct ir3_register *dst = __ssa_dst(phi);
-	dst->flags |= flags;
-	dst->array.id = arr->id;
-	dst->size = arr->length;
+   struct ir3_register *dst = __ssa_dst(phi);
+   dst->flags |= flags;
+   dst->array.id = arr->id;
+   dst->size = arr->length;

-	state->live_in_definition = phi->dsts[0];
-	state->constructed = true;
+   state->live_in_definition = phi->dsts[0];
+   state->constructed = true;

-	for (unsigned i = 0; i < block->predecessors_count; i++) {
-		struct ir3_register *src = read_value_end(ctx, block->predecessors[i], arr);
-		struct ir3_register *src_reg;
-		if (src) {
-			src_reg = __ssa_src(phi, src->instr, flags);
-		} else {
-			src_reg = ir3_src_create(phi, INVALID_REG, flags | IR3_REG_SSA);
-		}
-		src_reg->array.id = arr->id;
-		src_reg->size = arr->length;
-	}
-	return phi->dsts[0];
+   for (unsigned i = 0; i < block->predecessors_count; i++) {
+      struct ir3_register *src =
+         read_value_end(ctx, block->predecessors[i], arr);
+      struct ir3_register *src_reg;
+      if (src) {
+         src_reg = __ssa_src(phi, src->instr, flags);
+      } else {
+         src_reg = ir3_src_create(phi, INVALID_REG, flags | IR3_REG_SSA);
+      }
+      src_reg->array.id = arr->id;
+      src_reg->size = arr->length;
+   }
+   return phi->dsts[0];
 }

 static struct ir3_register *
 remove_trivial_phi(struct ir3_instruction *phi)
 {
-	/* Break cycles */
-	if (phi->data)
-		return phi->data;
+   /* Break cycles */
+   if (phi->data)
+      return phi->data;

-	phi->data = phi->dsts[0];
+   phi->data = phi->dsts[0];

-	struct ir3_register *unique_def = NULL;
-	bool unique = true;
-	for (unsigned i = 0; i < phi->block->predecessors_count; i++) {
-		struct ir3_register *src = phi->srcs[i];
+   struct ir3_register *unique_def = NULL;
+   bool unique = true;
+   for (unsigned i = 0; i < phi->block->predecessors_count; i++) {
+      struct ir3_register *src = phi->srcs[i];

-		/* If there are any undef sources, then the remaining sources may not
-		 * dominate the phi node, even if they are all equal. So we need to
-		 * bail out in this case.
-		 *
-		 * This seems to be a bug in the original paper.
-		 */
-		if (!src->def) {
-			unique = false;
-			break;
-		}
+      /* If there are any undef sources, then the remaining sources may not
+       * dominate the phi node, even if they are all equal. So we need to
+       * bail out in this case.
+       *
+       * This seems to be a bug in the original paper.
+       */
+      if (!src->def) {
+         unique = false;
+         break;
+      }

-		struct ir3_instruction *src_instr = src->def->instr;
+      struct ir3_instruction *src_instr = src->def->instr;

-		/* phi sources which point to the phi itself don't count for
-		 * figuring out if the phi is trivial
-		 */
-		if (src_instr == phi)
-			continue;
+      /* phi sources which point to the phi itself don't count for
+       * figuring out if the phi is trivial
+       */
+      if (src_instr == phi)
+         continue;

-		if (src_instr->opc == OPC_META_PHI) {
-			src->def = remove_trivial_phi(src->def->instr);
-		}
+      if (src_instr->opc == OPC_META_PHI) {
+         src->def = remove_trivial_phi(src->def->instr);
+      }

-		if (unique_def) {
-			if (unique_def != src->def) {
-				unique = false;
-				break;
-			}
-		} else {
-			unique_def = src->def;
-		}
-	}
+      if (unique_def) {
+         if (unique_def != src->def) {
+            unique = false;
+            break;
+         }
+      } else {
+         unique_def = src->def;
+      }
+   }

-	if (unique) {
-		phi->data = unique_def;
-		return unique_def;
-	} else {
-		return phi->dsts[0];
-	}
+   if (unique) {
+      phi->data = unique_def;
+      return unique_def;
+   } else {
+      return phi->dsts[0];
+   }
 }

 static struct ir3_register *
 lookup_value(struct ir3_register *reg)
 {
-	if (reg->instr->opc == OPC_META_PHI)
-		return reg->instr->data;
-	return reg;
+   if (reg->instr->opc == OPC_META_PHI)
+      return reg->instr->data;
+   return reg;
 }

 static struct ir3_register *
 lookup_live_in(struct array_ctx *ctx, struct ir3_block *block, unsigned id)
 {
-	struct array_state *state = get_state(ctx, block, id);
-	if (state->live_in_definition)
-		return lookup_value(state->live_in_definition);
+   struct array_state *state = get_state(ctx, block, id);
+   if (state->live_in_definition)
+      return lookup_value(state->live_in_definition);

-	return NULL;
+   return NULL;
 }

 bool
 ir3_array_to_ssa(struct ir3 *ir)
 {
-	struct array_ctx ctx = {};
+   struct array_ctx ctx = {};

-	foreach_array (array, &ir->array_list) {
-		ctx.array_count = MAX2(ctx.array_count, array->id + 1);
-	}
+   foreach_array (array, &ir->array_list) {
+      ctx.array_count = MAX2(ctx.array_count, array->id + 1);
+   }

-	if (ctx.array_count == 0)
-		return false;
+   if (ctx.array_count == 0)
+      return false;

-	unsigned i = 0;
-	foreach_block (block, &ir->block_list) {
-		block->index = i++;
-	}
+   unsigned i = 0;
+   foreach_block (block, &ir->block_list) {
+      block->index = i++;
+   }

-	ctx.ir = ir;
-	ctx.states = calloc(ctx.array_count * i, sizeof(struct array_state));
+   ctx.ir = ir;
+   ctx.states = calloc(ctx.array_count * i, sizeof(struct array_state));

-	foreach_block (block, &ir->block_list) {
-		foreach_instr (instr, &block->instr_list) {
-			foreach_dst (dst, instr) {
-				if (dst->flags & IR3_REG_ARRAY) {
-					struct array_state *state =
-						get_state(&ctx, block, dst->array.id);
-					state->live_out_definition = dst;
-				}
-			}
-		}
-	}
+   foreach_block (block, &ir->block_list) {
+      foreach_instr (instr, &block->instr_list) {
+         foreach_dst (dst, instr) {
+            if (dst->flags & IR3_REG_ARRAY) {
+               struct array_state *state =
+                  get_state(&ctx, block, dst->array.id);
+               state->live_out_definition = dst;
+            }
+         }
+      }
+   }

-	foreach_block (block, &ir->block_list) {
-		foreach_instr (instr, &block->instr_list) {
-			if (instr->opc == OPC_META_PHI)
-				continue;
+   foreach_block (block, &ir->block_list) {
+      foreach_instr (instr, &block->instr_list) {
+         if (instr->opc == OPC_META_PHI)
+            continue;

-			foreach_dst (reg, instr) {
-				if ((reg->flags & IR3_REG_ARRAY) && !reg->tied) {
-					struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);
+         foreach_dst (reg, instr) {
+            if ((reg->flags & IR3_REG_ARRAY) && !reg->tied) {
+               struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);

-					/* Construct any phi nodes necessary to read this value */
-					read_value_beginning(&ctx, block, arr);
-				}
-			}
-			foreach_src (reg, instr) {
-				if ((reg->flags & IR3_REG_ARRAY) && !reg->def) {
-					struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);
+               /* Construct any phi nodes necessary to read this value */
+               read_value_beginning(&ctx, block, arr);
+            }
+         }
+         foreach_src (reg, instr) {
+            if ((reg->flags & IR3_REG_ARRAY) && !reg->def) {
+               struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);

-					/* Construct any phi nodes necessary to read this value */
-					read_value_beginning(&ctx, block, arr);
-				}
-			}
-		}
-	}
+               /* Construct any phi nodes necessary to read this value */
+               read_value_beginning(&ctx, block, arr);
+            }
+         }
+      }
+   }

-	foreach_block (block, &ir->block_list) {
-		foreach_instr_safe (instr, &block->instr_list) {
-			if (instr->opc == OPC_META_PHI)
-				remove_trivial_phi(instr);
-			else
-				break;
-		}
-	}
+   foreach_block (block, &ir->block_list) {
+      foreach_instr_safe (instr, &block->instr_list) {
+         if (instr->opc == OPC_META_PHI)
+            remove_trivial_phi(instr);
+         else
+            break;
+      }
+   }

-	foreach_block (block, &ir->block_list) {
-		foreach_instr_safe (instr, &block->instr_list) {
-			if (instr->opc == OPC_META_PHI) {
-				if (!(instr->flags & IR3_REG_ARRAY))
-					continue;
-				if (instr->data != instr->dsts[0]) {
-					list_del(&instr->node);
-					continue;
-				}
-				for (unsigned i = 0; i < instr->srcs_count; i++) {
-					instr->srcs[i] = lookup_value(instr->srcs[i]);
-				}
-			} else {
-				foreach_dst (reg, instr) {
-					if ((reg->flags & IR3_REG_ARRAY)) {
-						if (!reg->tied) {
-							struct ir3_register *def =
-								lookup_live_in(&ctx, block, reg->array.id);
-							if (def)
-								ir3_reg_set_last_array(instr, reg, def);
-						}
-						reg->flags |= IR3_REG_SSA;
-					}
-				}
-				foreach_src (reg, instr) {
-					if ((reg->flags & IR3_REG_ARRAY)) {
-						/* It is assumed that before calling
-						 * ir3_array_to_ssa(), reg->def was set to the
-						 * previous writer of the array within the current
-						 * block or NULL if none.
-						 */
-						if (!reg->def) {
-							reg->def = lookup_live_in(&ctx, block, reg->array.id);
-						}
-						reg->flags |= IR3_REG_SSA;
-					}
-				}
-			}
-		}
-	}
+   foreach_block (block, &ir->block_list) {
+      foreach_instr_safe (instr, &block->instr_list) {
+         if (instr->opc == OPC_META_PHI) {
+            if (!(instr->flags & IR3_REG_ARRAY))
+               continue;
+            if (instr->data != instr->dsts[0]) {
+               list_del(&instr->node);
+               continue;
+            }
+            for (unsigned i = 0; i < instr->srcs_count; i++) {
+               instr->srcs[i] = lookup_value(instr->srcs[i]);
+            }
+         } else {
+            foreach_dst (reg, instr) {
+               if ((reg->flags & IR3_REG_ARRAY)) {
+                  if (!reg->tied) {
+                     struct ir3_register *def =
+                        lookup_live_in(&ctx, block, reg->array.id);
+                     if (def)
+                        ir3_reg_set_last_array(instr, reg, def);
+                  }
+                  reg->flags |= IR3_REG_SSA;
+               }
+            }
+            foreach_src (reg, instr) {
+               if ((reg->flags & IR3_REG_ARRAY)) {
+                  /* It is assumed that before calling
+                   * ir3_array_to_ssa(), reg->def was set to the
+                   * previous writer of the array within the current
+                   * block or NULL if none.
+                   */
+                  if (!reg->def) {
+                     reg->def = lookup_live_in(&ctx, block, reg->array.id);
+                  }
+                  reg->flags |= IR3_REG_SSA;
+               }
+            }
+         }
+      }
+   }

-	free(ctx.states);
-	return true;
+   free(ctx.states);
+   return true;
 }
-
--- a/src/freedreno/ir3/ir3_assembler.c
+++ b/src/freedreno/ir3/ir3_assembler.c
@ -22,8 +22,8 @@
 */

 #include "ir3_assembler.h"
-#include "ir3_shader.h"
 #include "ir3_parser.h"
+#include "ir3_shader.h"

 /**
 * A helper to go from ir3 assembly to assembled shader.  The shader has a
@ -32,43 +32,43 @@
 struct ir3_shader *
 ir3_parse_asm(struct ir3_compiler *c, struct ir3_kernel_info *info, FILE *in)
 {
-	struct ir3_shader *shader = rzalloc_size(NULL, sizeof(*shader));
-	shader->compiler = c;
-	shader->type = MESA_SHADER_COMPUTE;
-	mtx_init(&shader->variants_lock, mtx_plain);
+   struct ir3_shader *shader = rzalloc_size(NULL, sizeof(*shader));
+   shader->compiler = c;
+   shader->type = MESA_SHADER_COMPUTE;
+   mtx_init(&shader->variants_lock, mtx_plain);

-	struct ir3_shader_variant *v = rzalloc_size(shader, sizeof(*v));
-	v->type = MESA_SHADER_COMPUTE;
-	v->shader = shader;
-	v->const_state = rzalloc_size(v, sizeof(*v->const_state));
+   struct ir3_shader_variant *v = rzalloc_size(shader, sizeof(*v));
+   v->type = MESA_SHADER_COMPUTE;
+   v->shader = shader;
+   v->const_state = rzalloc_size(v, sizeof(*v->const_state));

-	shader->variants = v;
-	shader->variant_count = 1;
+   shader->variants = v;
+   shader->variant_count = 1;

-	info->numwg = INVALID_REG;
+   info->numwg = INVALID_REG;

-	for (int i = 0; i < MAX_BUFS; i++) {
-		info->buf_addr_regs[i] = INVALID_REG;
-	}
+   for (int i = 0; i < MAX_BUFS; i++) {
+      info->buf_addr_regs[i] = INVALID_REG;
+   }

-	/* Provide a default local_size in case the shader doesn't set it, so that
-	 * we don't crash at least.
-	 */
-	v->local_size[0] = v->local_size[1] = v->local_size[2] = 1;
+   /* Provide a default local_size in case the shader doesn't set it, so that
+    * we don't crash at least.
+    */
+   v->local_size[0] = v->local_size[1] = v->local_size[2] = 1;

-	v->ir = ir3_parse(v, info, in);
-	if (!v->ir)
-		goto error;
+   v->ir = ir3_parse(v, info, in);
+   if (!v->ir)
+      goto error;

-	ir3_debug_print(v->ir, "AFTER PARSING");
+   ir3_debug_print(v->ir, "AFTER PARSING");

-	v->bin = ir3_shader_assemble(v);
-	if (!v->bin)
-		goto error;
+   v->bin = ir3_shader_assemble(v);
+   if (!v->bin)
+      goto error;

-	return shader;
+   return shader;

 error:
-	ralloc_free(shader);
-	return NULL;
+   ralloc_free(shader);
+   return NULL;
 }
--- a/src/freedreno/ir3/ir3_assembler.h
+++ b/src/freedreno/ir3/ir3_assembler.h
@ -30,17 +30,18 @@
 #define MAX_BUFS 4

 struct ir3_kernel_info {
-	uint32_t num_bufs;
-	uint32_t buf_sizes[MAX_BUFS]; /* size in dwords */
-	uint32_t buf_addr_regs[MAX_BUFS]; 
+   uint32_t num_bufs;
+   uint32_t buf_sizes[MAX_BUFS]; /* size in dwords */
+   uint32_t buf_addr_regs[MAX_BUFS];

-	/* driver-param uniforms: */
-	unsigned numwg;
+   /* driver-param uniforms: */
+   unsigned numwg;
 };

 struct ir3_shader;
 struct ir3_compiler;

-struct ir3_shader * ir3_parse_asm(struct ir3_compiler *c, struct ir3_kernel_info *info, FILE *in);
+struct ir3_shader *ir3_parse_asm(struct ir3_compiler *c,
+                                 struct ir3_kernel_info *info, FILE *in);

 #endif /* __IR3_ASSEMBLER_H__ */
--- a/src/freedreno/ir3/ir3_cf.c
+++ b/src/freedreno/ir3/ir3_cf.c
@ -26,75 +26,74 @@
 #include "ir3.h"

 static bool
-is_safe_conv(struct ir3_instruction *instr, type_t src_type,
-			 opc_t *src_opc)
+is_safe_conv(struct ir3_instruction *instr, type_t src_type, opc_t *src_opc)
 {
-	if (instr->opc != OPC_MOV)
-		return false;
+   if (instr->opc != OPC_MOV)
+      return false;

-	/* Only allow half->full or full->half without any type conversion (like
-	 * int to float).
-	 */
-	if (type_size(instr->cat1.src_type) == type_size(instr->cat1.dst_type) ||
-		full_type(instr->cat1.src_type) != full_type(instr->cat1.dst_type))
-		return false;
+   /* Only allow half->full or full->half without any type conversion (like
+    * int to float).
+    */
+   if (type_size(instr->cat1.src_type) == type_size(instr->cat1.dst_type) ||
+       full_type(instr->cat1.src_type) != full_type(instr->cat1.dst_type))
+      return false;

-	struct ir3_register *dst = instr->dsts[0];
-	struct ir3_register *src = instr->srcs[0];
+   struct ir3_register *dst = instr->dsts[0];
+   struct ir3_register *src = instr->srcs[0];

-	/* disallow conversions that cannot be folded into
-	 * alu instructions:
-	 */
-	if (instr->cat1.round != ROUND_ZERO)
-		return false;
+   /* disallow conversions that cannot be folded into
+    * alu instructions:
+    */
+   if (instr->cat1.round != ROUND_ZERO)
+      return false;

-	if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
-		return false;
-	if (src->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
-		return false;
+   if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
+      return false;
+   if (src->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
+      return false;

-	/* Check that the source of the conv matches the type of the src
-	 * instruction.
-	 */
-	if (src_type == instr->cat1.src_type)
-		return true;
+   /* Check that the source of the conv matches the type of the src
+    * instruction.
+    */
+   if (src_type == instr->cat1.src_type)
+      return true;

-	/* We can handle mismatches with integer types by converting the opcode
-	 * but not when an integer is reinterpreted as a float or vice-versa.
-	 */
-	if (type_float(src_type) != type_float(instr->cat1.src_type))
-		return false;
+   /* We can handle mismatches with integer types by converting the opcode
+    * but not when an integer is reinterpreted as a float or vice-versa.
+    */
+   if (type_float(src_type) != type_float(instr->cat1.src_type))
+      return false;

-	/* We have types with mismatched signedness. Mismatches on the signedness
-	 * don't matter when narrowing:
-	 */
-	if (type_size(instr->cat1.dst_type) < type_size(instr->cat1.src_type))
-		return true;
+   /* We have types with mismatched signedness. Mismatches on the signedness
+    * don't matter when narrowing:
+    */
+   if (type_size(instr->cat1.dst_type) < type_size(instr->cat1.src_type))
+      return true;

-	/* Try swapping the opcode: */
-	bool can_swap = true;
-	*src_opc = ir3_try_swap_signedness(*src_opc, &can_swap);
-	return can_swap;
+   /* Try swapping the opcode: */
+   bool can_swap = true;
+   *src_opc = ir3_try_swap_signedness(*src_opc, &can_swap);
+   return can_swap;
 }

 static bool
 all_uses_safe_conv(struct ir3_instruction *conv_src, type_t src_type)
 {
-	opc_t opc = conv_src->opc;
-	bool first = true;
-	foreach_ssa_use (use, conv_src) {
-		opc_t new_opc = opc;
-		if (!is_safe_conv(use, src_type, &new_opc))
-			return false;
-		/* Check if multiple uses have conflicting requirements on the opcode.
-		 */
-		if (!first && opc != new_opc)
-			return false;
-		first = false;
-		opc = new_opc;
-	}
-	conv_src->opc = opc;
-	return true;
+   opc_t opc = conv_src->opc;
+   bool first = true;
+   foreach_ssa_use (use, conv_src) {
+      opc_t new_opc = opc;
+      if (!is_safe_conv(use, src_type, &new_opc))
+         return false;
+      /* Check if multiple uses have conflicting requirements on the opcode.
+       */
+      if (!first && opc != new_opc)
+         return false;
+      first = false;
+      opc = new_opc;
+   }
+   conv_src->opc = opc;
+   return true;
 }

 /* For an instruction which has a conversion folded in, re-write the
@ -105,74 +104,74 @@ all_uses_safe_conv(struct ir3_instruction *conv_src, type_t src_type)
 static void
 rewrite_src_uses(struct ir3_instruction *src)
 {
-	foreach_ssa_use (use, src) {
-		assert(use->opc == OPC_MOV);
+   foreach_ssa_use (use, src) {
+      assert(use->opc == OPC_MOV);

-		if (is_half(src)) {
-			use->srcs[0]->flags |= IR3_REG_HALF;
-		} else {
-			use->srcs[0]->flags &= ~IR3_REG_HALF;
-		}
+      if (is_half(src)) {
+         use->srcs[0]->flags |= IR3_REG_HALF;
+      } else {
+         use->srcs[0]->flags &= ~IR3_REG_HALF;
+      }

-		use->cat1.src_type = use->cat1.dst_type;
-	}
+      use->cat1.src_type = use->cat1.dst_type;
+   }
 }

 static bool
 try_conversion_folding(struct ir3_instruction *conv)
 {
-	struct ir3_instruction *src;
+   struct ir3_instruction *src;

-	if (conv->opc != OPC_MOV)
-		return false;
+   if (conv->opc != OPC_MOV)
+      return false;

-	/* NOTE: we can have non-ssa srcs after copy propagation: */
-	src = ssa(conv->srcs[0]);
-	if (!src)
-		return false;
+   /* NOTE: we can have non-ssa srcs after copy propagation: */
+   src = ssa(conv->srcs[0]);
+   if (!src)
+      return false;

-	if (!is_alu(src))
-		return false;
+   if (!is_alu(src))
+      return false;

-	bool can_fold;
-	type_t base_type = ir3_output_conv_type(src, &can_fold);
-	if (!can_fold)
-		return false;
+   bool can_fold;
+   type_t base_type = ir3_output_conv_type(src, &can_fold);
+   if (!can_fold)
+      return false;

-	type_t src_type = ir3_output_conv_src_type(src, base_type);
-	type_t dst_type = ir3_output_conv_dst_type(src, base_type);
+   type_t src_type = ir3_output_conv_src_type(src, base_type);
+   type_t dst_type = ir3_output_conv_dst_type(src, base_type);

-	/* Avoid cases where we've already folded in a conversion. We assume that
-	 * if there is a chain of conversions that's foldable then it's been
-	 * folded in NIR already.
-	 */
-	if (src_type != dst_type)
-		return false;
+   /* Avoid cases where we've already folded in a conversion. We assume that
+    * if there is a chain of conversions that's foldable then it's been
+    * folded in NIR already.
+    */
+   if (src_type != dst_type)
+      return false;

-	if (!all_uses_safe_conv(src, src_type))
-		return false;
+   if (!all_uses_safe_conv(src, src_type))
+      return false;

-	ir3_set_dst_type(src, is_half(conv));
-	rewrite_src_uses(src);
+   ir3_set_dst_type(src, is_half(conv));
+   rewrite_src_uses(src);

-	return true;
+   return true;
 }

 bool
 ir3_cf(struct ir3 *ir)
 {
-	void *mem_ctx = ralloc_context(NULL);
-	bool progress = false;
+   void *mem_ctx = ralloc_context(NULL);
+   bool progress = false;

-	ir3_find_ssa_uses(ir, mem_ctx, false);
+   ir3_find_ssa_uses(ir, mem_ctx, false);

-	foreach_block (block, &ir->block_list) {
-		foreach_instr (instr, &block->instr_list) {
-			progress |= try_conversion_folding(instr);
-		}
-	}
+   foreach_block (block, &ir->block_list) {
+      foreach_instr (instr, &block->instr_list) {
+         progress |= try_conversion_folding(instr);
+      }
+   }

-	ralloc_free(mem_ctx);
+   ralloc_free(mem_ctx);

-	return progress;
+   return progress;
 }
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@ -51,8 +51,10 @@ static const struct debug_named_value shader_debug_options[] = {
   /* clang-format on */
 };

-DEBUG_GET_ONCE_FLAGS_OPTION(ir3_shader_debug, "IR3_SHADER_DEBUG", shader_debug_options, 0)
-DEBUG_GET_ONCE_OPTION(ir3_shader_override_path, "IR3_SHADER_OVERRIDE_PATH", NULL)
+DEBUG_GET_ONCE_FLAGS_OPTION(ir3_shader_debug, "IR3_SHADER_DEBUG",
+                            shader_debug_options, 0)
+DEBUG_GET_ONCE_OPTION(ir3_shader_override_path, "IR3_SHADER_OVERRIDE_PATH",
+                      NULL)

 enum ir3_shader_debug ir3_shader_debug = 0;
 const char *ir3_shader_override_path = NULL;
@ -60,126 +62,127 @@ const char *ir3_shader_override_path = NULL;
 void
 ir3_compiler_destroy(struct ir3_compiler *compiler)
 {
-	disk_cache_destroy(compiler->disk_cache);
-	ralloc_free(compiler);
+   disk_cache_destroy(compiler->disk_cache);
+   ralloc_free(compiler);
 }

 struct ir3_compiler *
-ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id, bool robust_ubo_access)
+ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id,
+                    bool robust_ubo_access)
 {
-	struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler);
+   struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler);

-	ir3_shader_debug = debug_get_option_ir3_shader_debug();
-	ir3_shader_override_path =
-		!__check_suid() ? debug_get_option_ir3_shader_override_path() : NULL;
+   ir3_shader_debug = debug_get_option_ir3_shader_debug();
+   ir3_shader_override_path =
+      !__check_suid() ? debug_get_option_ir3_shader_override_path() : NULL;

-	if (ir3_shader_override_path) {
-		ir3_shader_debug |= IR3_DBG_NOCACHE;
-	}
+   if (ir3_shader_override_path) {
+      ir3_shader_debug |= IR3_DBG_NOCACHE;
+   }

-	compiler->dev = dev;
-	compiler->gpu_id = gpu_id;
-	compiler->robust_ubo_access = robust_ubo_access;
+   compiler->dev = dev;
+   compiler->gpu_id = gpu_id;
+   compiler->robust_ubo_access = robust_ubo_access;

-	/* All known GPU's have 32k local memory (aka shared) */
-	compiler->local_mem_size = 32 * 1024;
-	/* TODO see if older GPU's were different here */
-	compiler->branchstack_size = 64;
-	compiler->wave_granularity = 2;
-	compiler->max_waves = 16;
+   /* All known GPU's have 32k local memory (aka shared) */
+   compiler->local_mem_size = 32 * 1024;
+   /* TODO see if older GPU's were different here */
+   compiler->branchstack_size = 64;
+   compiler->wave_granularity = 2;
+   compiler->max_waves = 16;

-	if (compiler->gpu_id >= 600) {
-		compiler->samgq_workaround = true;
-		/* a6xx split the pipeline state into geometry and fragment state, in
-		 * order to let the VS run ahead of the FS. As a result there are now
-		 * separate const files for the the fragment shader and everything
-		 * else, and separate limits. There seems to be a shared limit, but
-		 * it's higher than the vert or frag limits.
-		 *
-		 * TODO: The shared limit seems to be different on different on
-		 * different models.
-		 */
-		compiler->max_const_pipeline = 640;
-		compiler->max_const_frag = 512;
-		compiler->max_const_geom = 512;
-		compiler->max_const_safe = 128;
+   if (compiler->gpu_id >= 600) {
+      compiler->samgq_workaround = true;
+      /* a6xx split the pipeline state into geometry and fragment state, in
+       * order to let the VS run ahead of the FS. As a result there are now
+       * separate const files for the the fragment shader and everything
+       * else, and separate limits. There seems to be a shared limit, but
+       * it's higher than the vert or frag limits.
+       *
+       * TODO: The shared limit seems to be different on different on
+       * different models.
+       */
+      compiler->max_const_pipeline = 640;
+      compiler->max_const_frag = 512;
+      compiler->max_const_geom = 512;
+      compiler->max_const_safe = 128;

-		/* Compute shaders don't share a const file with the FS. Instead they
-		 * have their own file, which is smaller than the FS one.
-		 *
-		 * TODO: is this true on earlier gen's?
-		 */
-		compiler->max_const_compute = 256;
+      /* Compute shaders don't share a const file with the FS. Instead they
+       * have their own file, which is smaller than the FS one.
+       *
+       * TODO: is this true on earlier gen's?
+       */
+      compiler->max_const_compute = 256;

-		/* TODO: implement clip+cull distances on earlier gen's */
-		compiler->has_clip_cull = true;
+      /* TODO: implement clip+cull distances on earlier gen's */
+      compiler->has_clip_cull = true;

-		/* TODO: implement private memory on earlier gen's */
-		compiler->has_pvtmem = true;
+      /* TODO: implement private memory on earlier gen's */
+      compiler->has_pvtmem = true;

-		if (compiler->gpu_id == 650)
-			compiler->tess_use_shared = true;
-	} else {
-		compiler->max_const_pipeline = 512;
-		compiler->max_const_geom = 512;
-		compiler->max_const_frag = 512;
-		compiler->max_const_compute = 512;
+      if (compiler->gpu_id == 650)
+         compiler->tess_use_shared = true;
+   } else {
+      compiler->max_const_pipeline = 512;
+      compiler->max_const_geom = 512;
+      compiler->max_const_frag = 512;
+      compiler->max_const_compute = 512;

-		/* Note: this will have to change if/when we support tess+GS on
-		 * earlier gen's.
-		 */
-		compiler->max_const_safe = 256;
-	}
+      /* Note: this will have to change if/when we support tess+GS on
+       * earlier gen's.
+       */
+      compiler->max_const_safe = 256;
+   }

-	if (compiler->gpu_id == 650) {
-		/* This changed mid-generation for a650, so that using r32.x and above
-		 * requires using the smallest threadsize.
-		 */
-		compiler->reg_size_vec4 = 64;
-	} else if (compiler->gpu_id >= 600) {
-		compiler->reg_size_vec4 = 96;
-	} else if (compiler->gpu_id >= 400) {
-		/* On a4xx-a5xx, using r24.x and above requires using the smallest
-		 * threadsize.
-		 */
-		compiler->reg_size_vec4 = 48;
-	} else {
-		/* TODO: confirm this */
-		compiler->reg_size_vec4 = 96;
-	}
+   if (compiler->gpu_id == 650) {
+      /* This changed mid-generation for a650, so that using r32.x and above
+       * requires using the smallest threadsize.
+       */
+      compiler->reg_size_vec4 = 64;
+   } else if (compiler->gpu_id >= 600) {
+      compiler->reg_size_vec4 = 96;
+   } else if (compiler->gpu_id >= 400) {
+      /* On a4xx-a5xx, using r24.x and above requires using the smallest
+       * threadsize.
+       */
+      compiler->reg_size_vec4 = 48;
+   } else {
+      /* TODO: confirm this */
+      compiler->reg_size_vec4 = 96;
+   }

-	if (compiler->gpu_id >= 600) {
-		compiler->threadsize_base = 64;
-	} else if (compiler->gpu_id >= 400) {
-		/* TODO: Confirm this for a4xx. For a5xx this is based on the Vulkan
-		 * 1.1 subgroupSize which is 32.
-		 */
-		compiler->threadsize_base = 32;
-	} else {
-		compiler->threadsize_base = 8;
-	}
+   if (compiler->gpu_id >= 600) {
+      compiler->threadsize_base = 64;
+   } else if (compiler->gpu_id >= 400) {
+      /* TODO: Confirm this for a4xx. For a5xx this is based on the Vulkan
+       * 1.1 subgroupSize which is 32.
+       */
+      compiler->threadsize_base = 32;
+   } else {
+      compiler->threadsize_base = 8;
+   }

-	if (compiler->gpu_id >= 400) {
-		/* need special handling for "flat" */
-		compiler->flat_bypass = true;
-		compiler->levels_add_one = false;
-		compiler->unminify_coords = false;
-		compiler->txf_ms_with_isaml = false;
-		compiler->array_index_add_half = true;
-		compiler->instr_align = 16;
-		compiler->const_upload_unit = 4;
-	} else {
-		/* no special handling for "flat" */
-		compiler->flat_bypass = false;
-		compiler->levels_add_one = true;
-		compiler->unminify_coords = true;
-		compiler->txf_ms_with_isaml = true;
-		compiler->array_index_add_half = false;
-		compiler->instr_align = 4;
-		compiler->const_upload_unit = 8;
-	}
+   if (compiler->gpu_id >= 400) {
+      /* need special handling for "flat" */
+      compiler->flat_bypass = true;
+      compiler->levels_add_one = false;
+      compiler->unminify_coords = false;
+      compiler->txf_ms_with_isaml = false;
+      compiler->array_index_add_half = true;
+      compiler->instr_align = 16;
+      compiler->const_upload_unit = 4;
+   } else {
+      /* no special handling for "flat" */
+      compiler->flat_bypass = false;
+      compiler->levels_add_one = true;
+      compiler->unminify_coords = true;
+      compiler->txf_ms_with_isaml = true;
+      compiler->array_index_add_half = false;
+      compiler->instr_align = 4;
+      compiler->const_upload_unit = 8;
+   }

-	ir3_disk_cache_init(compiler);
+   ir3_disk_cache_init(compiler);

-	return compiler;
+   return compiler;
 }
--- a/src/freedreno/ir3/ir3_compiler.h
+++ b/src/freedreno/ir3/ir3_compiler.h
@ -36,167 +36,167 @@ struct ir3_ra_reg_set;
 struct ir3_shader;

 struct ir3_compiler {
-	struct fd_device *dev;
-	uint32_t gpu_id;
-	uint32_t shader_count;
+   struct fd_device *dev;
+   uint32_t gpu_id;
+   uint32_t shader_count;

-	struct disk_cache *disk_cache;
+   struct disk_cache *disk_cache;

-	/* If true, UBO accesses are assumed to be bounds-checked as defined by
-	 * VK_EXT_robustness2 and optimizations may have to be more conservative.
-	 */
-	bool robust_ubo_access;
+   /* If true, UBO accesses are assumed to be bounds-checked as defined by
+    * VK_EXT_robustness2 and optimizations may have to be more conservative.
+    */
+   bool robust_ubo_access;

-	/*
-	 * Configuration options for things that are handled differently on
-	 * different generations:
-	 */
+   /*
+    * Configuration options for things that are handled differently on
+    * different generations:
+    */

-	/* a4xx (and later) drops SP_FS_FLAT_SHAD_MODE_REG_* for flat-interpolate
-	 * so we need to use ldlv.u32 to load the varying directly:
-	 */
-	bool flat_bypass;
+   /* a4xx (and later) drops SP_FS_FLAT_SHAD_MODE_REG_* for flat-interpolate
+    * so we need to use ldlv.u32 to load the varying directly:
+    */
+   bool flat_bypass;

-	/* on a3xx, we need to add one to # of array levels:
-	 */
-	bool levels_add_one;
+   /* on a3xx, we need to add one to # of array levels:
+    */
+   bool levels_add_one;

-	/* on a3xx, we need to scale up integer coords for isaml based
-	 * on LoD:
-	 */
-	bool unminify_coords;
+   /* on a3xx, we need to scale up integer coords for isaml based
+    * on LoD:
+    */
+   bool unminify_coords;

-	/* on a3xx do txf_ms w/ isaml and scaled coords: */
-	bool txf_ms_with_isaml;
+   /* on a3xx do txf_ms w/ isaml and scaled coords: */
+   bool txf_ms_with_isaml;

-	/* on a4xx, for array textures we need to add 0.5 to the array
-	 * index coordinate:
-	 */
-	bool array_index_add_half;
+   /* on a4xx, for array textures we need to add 0.5 to the array
+    * index coordinate:
+    */
+   bool array_index_add_half;

-	/* on a6xx, rewrite samgp to sequence of samgq0-3 in vertex shaders:
-	 */
-	bool samgq_workaround;
+   /* on a6xx, rewrite samgp to sequence of samgq0-3 in vertex shaders:
+    */
+   bool samgq_workaround;

-	/* on a650, vertex shader <-> tess control io uses LDL/STL */
-	bool tess_use_shared;
+   /* on a650, vertex shader <-> tess control io uses LDL/STL */
+   bool tess_use_shared;

-	/* The maximum number of constants, in vec4's, across the entire graphics
-	 * pipeline.
-	 */
-	uint16_t max_const_pipeline;
+   /* The maximum number of constants, in vec4's, across the entire graphics
+    * pipeline.
+    */
+   uint16_t max_const_pipeline;

-	/* The maximum number of constants, in vec4's, for VS+HS+DS+GS. */
-	uint16_t max_const_geom;
+   /* The maximum number of constants, in vec4's, for VS+HS+DS+GS. */
+   uint16_t max_const_geom;

-	/* The maximum number of constants, in vec4's, for FS. */
-	uint16_t max_const_frag;
+   /* The maximum number of constants, in vec4's, for FS. */
+   uint16_t max_const_frag;

-	/* A "safe" max constlen that can be applied to each shader in the
-	 * pipeline which we guarantee will never exceed any combined limits.
-	 */
-	uint16_t max_const_safe;
+   /* A "safe" max constlen that can be applied to each shader in the
+    * pipeline which we guarantee will never exceed any combined limits.
+    */
+   uint16_t max_const_safe;

-	/* The maximum number of constants, in vec4's, for compute shaders. */
-	uint16_t max_const_compute;
+   /* The maximum number of constants, in vec4's, for compute shaders. */
+   uint16_t max_const_compute;

-	/* Number of instructions that the shader's base address and length
-	 * (instrlen divides instruction count by this) must be aligned to.
-	 */
-	uint32_t instr_align;
+   /* Number of instructions that the shader's base address and length
+    * (instrlen divides instruction count by this) must be aligned to.
+    */
+   uint32_t instr_align;

-	/* on a3xx, the unit of indirect const load is higher than later gens (in
-	 * vec4 units):
-	 */
-	uint32_t const_upload_unit;
+   /* on a3xx, the unit of indirect const load is higher than later gens (in
+    * vec4 units):
+    */
+   uint32_t const_upload_unit;

-	/* The base number of threads per wave. Some stages may be able to double
-	 * this.
-	 */
-	uint32_t threadsize_base;
+   /* The base number of threads per wave. Some stages may be able to double
+    * this.
+    */
+   uint32_t threadsize_base;

-	/* On at least a6xx, waves are always launched in pairs. In calculations
-	 * about occupancy, we pretend that each wave pair is actually one wave,
-	 * which simplifies many of the calculations, but means we have to
-	 * multiply threadsize_base by this number.
-	 */
-	uint32_t wave_granularity;
+   /* On at least a6xx, waves are always launched in pairs. In calculations
+    * about occupancy, we pretend that each wave pair is actually one wave,
+    * which simplifies many of the calculations, but means we have to
+    * multiply threadsize_base by this number.
+    */
+   uint32_t wave_granularity;

-	/* The maximum number of simultaneous waves per core. */
-	uint32_t max_waves;
+   /* The maximum number of simultaneous waves per core. */
+   uint32_t max_waves;

-	/* This is theoretical maximum number of vec4 registers that one wave of
-	 * the base threadsize could use. To get the actual size of the register
-	 * file in bytes one would need to compute:
-	 *
-	 * reg_size_vec4 * threadsize_base * wave_granularity * 16 (bytes per vec4)
-	 *
-	 * However this number is more often what we actually need. For example, a
-	 * max_reg more than half of this will result in a doubled threadsize
-	 * being impossible (because double-sized waves take up twice as many
-	 * registers). Also, the formula for the occupancy given a particular
-	 * register footprint is simpler.
-	 *
-	 * It is in vec4 units because the register file is allocated
-	 * with vec4 granularity, so it's in the same units as max_reg.
-	 */
-	uint32_t reg_size_vec4;
+   /* This is theoretical maximum number of vec4 registers that one wave of
+    * the base threadsize could use. To get the actual size of the register
+    * file in bytes one would need to compute:
+    *
+    * reg_size_vec4 * threadsize_base * wave_granularity * 16 (bytes per vec4)
+    *
+    * However this number is more often what we actually need. For example, a
+    * max_reg more than half of this will result in a doubled threadsize
+    * being impossible (because double-sized waves take up twice as many
+    * registers). Also, the formula for the occupancy given a particular
+    * register footprint is simpler.
+    *
+    * It is in vec4 units because the register file is allocated
+    * with vec4 granularity, so it's in the same units as max_reg.
+    */
+   uint32_t reg_size_vec4;

-	/* The size of local memory in bytes */
-	uint32_t local_mem_size;
+   /* The size of local memory in bytes */
+   uint32_t local_mem_size;

-	/* The number of total branch stack entries, divided by wave_granularity. */
-	uint32_t branchstack_size;
+   /* The number of total branch stack entries, divided by wave_granularity. */
+   uint32_t branchstack_size;

-	/* Whether clip+cull distances are supported */
-	bool has_clip_cull;
+   /* Whether clip+cull distances are supported */
+   bool has_clip_cull;

-	/* Whether private memory is supported */
-	bool has_pvtmem;
+   /* Whether private memory is supported */
+   bool has_pvtmem;
 };

 void ir3_compiler_destroy(struct ir3_compiler *compiler);
-struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id,
-										  bool robust_ubo_access);
+struct ir3_compiler *ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id,
+                                         bool robust_ubo_access);

 void ir3_disk_cache_init(struct ir3_compiler *compiler);
 void ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
-		struct ir3_shader *shader);
+                                    struct ir3_shader *shader);
 bool ir3_disk_cache_retrieve(struct ir3_compiler *compiler,
-		struct ir3_shader_variant *v);
+                             struct ir3_shader_variant *v);
 void ir3_disk_cache_store(struct ir3_compiler *compiler,
-		struct ir3_shader_variant *v);
+                          struct ir3_shader_variant *v);

 int ir3_compile_shader_nir(struct ir3_compiler *compiler,
-		struct ir3_shader_variant *so);
+                           struct ir3_shader_variant *so);

 /* gpu pointer size in units of 32bit registers/slots */
-static inline
-unsigned ir3_pointer_size(struct ir3_compiler *compiler)
+static inline unsigned
+ir3_pointer_size(struct ir3_compiler *compiler)
 {
-	return (compiler->gpu_id >= 500) ? 2 : 1;
+   return (compiler->gpu_id >= 500) ? 2 : 1;
 }

 enum ir3_shader_debug {
-	IR3_DBG_SHADER_VS  = BITFIELD_BIT(0),
-	IR3_DBG_SHADER_TCS = BITFIELD_BIT(1),
-	IR3_DBG_SHADER_TES = BITFIELD_BIT(2),
-	IR3_DBG_SHADER_GS  = BITFIELD_BIT(3),
-	IR3_DBG_SHADER_FS  = BITFIELD_BIT(4),
-	IR3_DBG_SHADER_CS  = BITFIELD_BIT(5),
-	IR3_DBG_DISASM     = BITFIELD_BIT(6),
-	IR3_DBG_OPTMSGS    = BITFIELD_BIT(7),
-	IR3_DBG_FORCES2EN  = BITFIELD_BIT(8),
-	IR3_DBG_NOUBOOPT   = BITFIELD_BIT(9),
-	IR3_DBG_NOFP16     = BITFIELD_BIT(10),
-	IR3_DBG_NOCACHE    = BITFIELD_BIT(11),
+   IR3_DBG_SHADER_VS = BITFIELD_BIT(0),
+   IR3_DBG_SHADER_TCS = BITFIELD_BIT(1),
+   IR3_DBG_SHADER_TES = BITFIELD_BIT(2),
+   IR3_DBG_SHADER_GS = BITFIELD_BIT(3),
+   IR3_DBG_SHADER_FS = BITFIELD_BIT(4),
+   IR3_DBG_SHADER_CS = BITFIELD_BIT(5),
+   IR3_DBG_DISASM = BITFIELD_BIT(6),
+   IR3_DBG_OPTMSGS = BITFIELD_BIT(7),
+   IR3_DBG_FORCES2EN = BITFIELD_BIT(8),
+   IR3_DBG_NOUBOOPT = BITFIELD_BIT(9),
+   IR3_DBG_NOFP16 = BITFIELD_BIT(10),
+   IR3_DBG_NOCACHE = BITFIELD_BIT(11),

-	/* DEBUG-only options: */
-	IR3_DBG_SCHEDMSGS  = BITFIELD_BIT(20),
-	IR3_DBG_RAMSGS     = BITFIELD_BIT(21),
+   /* DEBUG-only options: */
+   IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20),
+   IR3_DBG_RAMSGS = BITFIELD_BIT(21),

-	/* Only used for the disk-caching logic: */
-	IR3_DBG_ROBUST_UBO_ACCESS = BITFIELD_BIT(30),
+   /* Only used for the disk-caching logic: */
+   IR3_DBG_ROBUST_UBO_ACCESS = BITFIELD_BIT(30),
 };

 extern enum ir3_shader_debug ir3_shader_debug;
@ -205,29 +205,35 @@ extern const char *ir3_shader_override_path;
 static inline bool
 shader_debug_enabled(gl_shader_stage type)
 {
-	if (ir3_shader_debug & IR3_DBG_DISASM)
-		return true;
+   if (ir3_shader_debug & IR3_DBG_DISASM)
+      return true;

-	switch (type) {
-	case MESA_SHADER_VERTEX:      return !!(ir3_shader_debug & IR3_DBG_SHADER_VS);
-	case MESA_SHADER_TESS_CTRL:   return !!(ir3_shader_debug & IR3_DBG_SHADER_TCS);
-	case MESA_SHADER_TESS_EVAL:   return !!(ir3_shader_debug & IR3_DBG_SHADER_TES);
-	case MESA_SHADER_GEOMETRY:    return !!(ir3_shader_debug & IR3_DBG_SHADER_GS);
-	case MESA_SHADER_FRAGMENT:    return !!(ir3_shader_debug & IR3_DBG_SHADER_FS);
-	case MESA_SHADER_COMPUTE:     return !!(ir3_shader_debug & IR3_DBG_SHADER_CS);
-	default:
-		debug_assert(0);
-		return false;
-	}
+   switch (type) {
+   case MESA_SHADER_VERTEX:
+      return !!(ir3_shader_debug & IR3_DBG_SHADER_VS);
+   case MESA_SHADER_TESS_CTRL:
+      return !!(ir3_shader_debug & IR3_DBG_SHADER_TCS);
+   case MESA_SHADER_TESS_EVAL:
+      return !!(ir3_shader_debug & IR3_DBG_SHADER_TES);
+   case MESA_SHADER_GEOMETRY:
+      return !!(ir3_shader_debug & IR3_DBG_SHADER_GS);
+   case MESA_SHADER_FRAGMENT:
+      return !!(ir3_shader_debug & IR3_DBG_SHADER_FS);
+   case MESA_SHADER_COMPUTE:
+      return !!(ir3_shader_debug & IR3_DBG_SHADER_CS);
+   default:
+      debug_assert(0);
+      return false;
+   }
 }

 static inline void
 ir3_debug_print(struct ir3 *ir, const char *when)
 {
-	if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
-		mesa_logi("%s:", when);
-		ir3_print(ir);
-	}
+   if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
+      mesa_logi("%s:", when);
+      ir3_print(ir);
+   }
 }

 #endif /* IR3_COMPILER_H_ */
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@ -27,215 +27,250 @@
 #ifndef IR3_CONTEXT_H_
 #define IR3_CONTEXT_H_

+#include "ir3.h"
 #include "ir3_compiler.h"
 #include "ir3_nir.h"
-#include "ir3.h"

 /* for conditionally setting boolean flag(s): */
 #define COND(bool, val) ((bool) ? (val) : 0)

-#define DBG(fmt, ...) \
-		do { mesa_logd("%s:%d: "fmt, \
-				__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
+#define DBG(fmt, ...)                                                          \
+   do {                                                                        \
+      mesa_logd("%s:%d: " fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__);         \
+   } while (0)

 /**
 * The context for compilation of a single shader.
 */
 struct ir3_context {
-	struct ir3_compiler *compiler;
-	const struct ir3_context_funcs *funcs;
+   struct ir3_compiler *compiler;
+   const struct ir3_context_funcs *funcs;

-	struct nir_shader *s;
+   struct nir_shader *s;

-	struct nir_instr *cur_instr;  /* current instruction, just for debug */
+   struct nir_instr *cur_instr; /* current instruction, just for debug */

-	struct ir3 *ir;
-	struct ir3_shader_variant *so;
+   struct ir3 *ir;
+   struct ir3_shader_variant *so;

-	/* Tables of scalar inputs/outputs.  Because of the way varying packing
-	 * works, we could have inputs w/ fractional location, which is a bit
-	 * awkward to deal with unless we keep track of the split scalar in/
-	 * out components.
-	 *
-	 * These *only* have inputs/outputs that are touched by load_*input and
-	 * store_output.
-	 */
-	unsigned ninputs, noutputs;
-	struct ir3_instruction **inputs;
-	struct ir3_instruction **outputs;
+   /* Tables of scalar inputs/outputs.  Because of the way varying packing
+    * works, we could have inputs w/ fractional location, which is a bit
+    * awkward to deal with unless we keep track of the split scalar in/
+    * out components.
+    *
+    * These *only* have inputs/outputs that are touched by load_*input and
+    * store_output.
+    */
+   unsigned ninputs, noutputs;
+   struct ir3_instruction **inputs;
+   struct ir3_instruction **outputs;

-	struct ir3_block *block;      /* the current block */
-	struct ir3_block *in_block;   /* block created for shader inputs */
+   struct ir3_block *block;    /* the current block */
+   struct ir3_block *in_block; /* block created for shader inputs */

-	nir_function_impl *impl;
+   nir_function_impl *impl;

-	/* For fragment shaders, varyings are not actual shader inputs,
-	 * instead the hw passes a ij coord which is used with
-	 * bary.f.
-	 *
-	 * But NIR doesn't know that, it still declares varyings as
-	 * inputs.  So we do all the input tracking normally and fix
-	 * things up after compile_instructions()
-	 */
-	struct ir3_instruction *ij[IJ_COUNT];
+   /* For fragment shaders, varyings are not actual shader inputs,
+    * instead the hw passes a ij coord which is used with
+    * bary.f.
+    *
+    * But NIR doesn't know that, it still declares varyings as
+    * inputs.  So we do all the input tracking normally and fix
+    * things up after compile_instructions()
+    */
+   struct ir3_instruction *ij[IJ_COUNT];

-	/* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
-	struct ir3_instruction *frag_face, *frag_coord;
+   /* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
+   struct ir3_instruction *frag_face, *frag_coord;

-	/* For vertex shaders, keep track of the system values sources */
-	struct ir3_instruction *vertex_id, *basevertex, *instance_id, *base_instance, *draw_id, *view_index;
+   /* For vertex shaders, keep track of the system values sources */
+   struct ir3_instruction *vertex_id, *basevertex, *instance_id, *base_instance,
+      *draw_id, *view_index;

-	/* For fragment shaders: */
-	struct ir3_instruction *samp_id, *samp_mask_in;
+   /* For fragment shaders: */
+   struct ir3_instruction *samp_id, *samp_mask_in;

-	/* For geometry shaders: */
-	struct ir3_instruction *primitive_id;
-	struct ir3_instruction *gs_header;
+   /* For geometry shaders: */
+   struct ir3_instruction *primitive_id;
+   struct ir3_instruction *gs_header;

-	/* For tessellation shaders: */
-	struct ir3_instruction *patch_vertices_in;
-	struct ir3_instruction *tcs_header;
-	struct ir3_instruction *tess_coord;
+   /* For tessellation shaders: */
+   struct ir3_instruction *patch_vertices_in;
+   struct ir3_instruction *tcs_header;
+   struct ir3_instruction *tess_coord;

-	/* Compute shader inputs: */
-	struct ir3_instruction *local_invocation_id, *work_group_id;
+   /* Compute shader inputs: */
+   struct ir3_instruction *local_invocation_id, *work_group_id;

-	/* mapping from nir_register to defining instruction: */
-	struct hash_table *def_ht;
+   /* mapping from nir_register to defining instruction: */
+   struct hash_table *def_ht;

-	unsigned num_arrays;
+   unsigned num_arrays;

-	/* Tracking for max level of flowcontrol (branchstack) needed
-	 * by a5xx+:
-	 */
-	unsigned stack, max_stack;
+   /* Tracking for max level of flowcontrol (branchstack) needed
+    * by a5xx+:
+    */
+   unsigned stack, max_stack;

-	unsigned loop_id;
+   unsigned loop_id;

-	/* a common pattern for indirect addressing is to request the
-	 * same address register multiple times.  To avoid generating
-	 * duplicate instruction sequences (which our backend does not
-	 * try to clean up, since that should be done as the NIR stage)
-	 * we cache the address value generated for a given src value:
-	 *
-	 * Note that we have to cache these per alignment, since same
-	 * src used for an array of vec1 cannot be also used for an
-	 * array of vec4.
-	 */
-	struct hash_table *addr0_ht[4];
+   /* a common pattern for indirect addressing is to request the
+    * same address register multiple times.  To avoid generating
+    * duplicate instruction sequences (which our backend does not
+    * try to clean up, since that should be done as the NIR stage)
+    * we cache the address value generated for a given src value:
+    *
+    * Note that we have to cache these per alignment, since same
+    * src used for an array of vec1 cannot be also used for an
+    * array of vec4.
+    */
+   struct hash_table *addr0_ht[4];

-	/* The same for a1.x. We only support immediate values for a1.x, as this
-	 * is the only use so far.
-	 */
-	struct hash_table_u64 *addr1_ht;
+   /* The same for a1.x. We only support immediate values for a1.x, as this
+    * is the only use so far.
+    */
+   struct hash_table_u64 *addr1_ht;

-	struct hash_table *sel_cond_conversions;
+   struct hash_table *sel_cond_conversions;

-	/* last dst array, for indirect we need to insert a var-store.
-	 */
-	struct ir3_instruction **last_dst;
-	unsigned last_dst_n;
+   /* last dst array, for indirect we need to insert a var-store.
+    */
+   struct ir3_instruction **last_dst;
+   unsigned last_dst_n;

-	/* maps nir_block to ir3_block, mostly for the purposes of
-	 * figuring out the blocks successors
-	 */
-	struct hash_table *block_ht;
+   /* maps nir_block to ir3_block, mostly for the purposes of
+    * figuring out the blocks successors
+    */
+   struct hash_table *block_ht;

-	/* maps nir_block at the top of a loop to ir3_block collecting continue
-	 * edges.
-	 */
-	struct hash_table *continue_block_ht;
+   /* maps nir_block at the top of a loop to ir3_block collecting continue
+    * edges.
+    */
+   struct hash_table *continue_block_ht;

-	/* on a4xx, bitmask of samplers which need astc+srgb workaround: */
-	unsigned astc_srgb;
+   /* on a4xx, bitmask of samplers which need astc+srgb workaround: */
+   unsigned astc_srgb;

-	unsigned samples;             /* bitmask of x,y sample shifts */
+   unsigned samples; /* bitmask of x,y sample shifts */

-	unsigned max_texture_index;
+   unsigned max_texture_index;

-	unsigned prefetch_limit;
+   unsigned prefetch_limit;

-	/* set if we encounter something we can't handle yet, so we
-	 * can bail cleanly and fallback to TGSI compiler f/e
-	 */
-	bool error;
+   /* set if we encounter something we can't handle yet, so we
+    * can bail cleanly and fallback to TGSI compiler f/e
+    */
+   bool error;
 };

 struct ir3_context_funcs {
-	void (*emit_intrinsic_load_ssbo)(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-			struct ir3_instruction **dst);
-	void (*emit_intrinsic_store_ssbo)(struct ir3_context *ctx, nir_intrinsic_instr *intr);
-	struct ir3_instruction * (*emit_intrinsic_atomic_ssbo)(struct ir3_context *ctx, nir_intrinsic_instr *intr);
-	void (*emit_intrinsic_load_image)(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-			struct ir3_instruction **dst);
-	void (*emit_intrinsic_store_image)(struct ir3_context *ctx, nir_intrinsic_instr *intr);
-	struct ir3_instruction * (*emit_intrinsic_atomic_image)(struct ir3_context *ctx, nir_intrinsic_instr *intr);
-	void (*emit_intrinsic_image_size)(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-			struct ir3_instruction **dst);
-	void (*emit_intrinsic_load_global_ir3)(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-			struct ir3_instruction **dst);
-	void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx, nir_intrinsic_instr *intr);
+   void (*emit_intrinsic_load_ssbo)(struct ir3_context *ctx,
+                                    nir_intrinsic_instr *intr,
+                                    struct ir3_instruction **dst);
+   void (*emit_intrinsic_store_ssbo)(struct ir3_context *ctx,
+                                     nir_intrinsic_instr *intr);
+   struct ir3_instruction *(*emit_intrinsic_atomic_ssbo)(
+      struct ir3_context *ctx, nir_intrinsic_instr *intr);
+   void (*emit_intrinsic_load_image)(struct ir3_context *ctx,
+                                     nir_intrinsic_instr *intr,
+                                     struct ir3_instruction **dst);
+   void (*emit_intrinsic_store_image)(struct ir3_context *ctx,
+                                      nir_intrinsic_instr *intr);
+   struct ir3_instruction *(*emit_intrinsic_atomic_image)(
+      struct ir3_context *ctx, nir_intrinsic_instr *intr);
+   void (*emit_intrinsic_image_size)(struct ir3_context *ctx,
+                                     nir_intrinsic_instr *intr,
+                                     struct ir3_instruction **dst);
+   void (*emit_intrinsic_load_global_ir3)(struct ir3_context *ctx,
+                                          nir_intrinsic_instr *intr,
+                                          struct ir3_instruction **dst);
+   void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx,
+                                           nir_intrinsic_instr *intr);
 };

 extern const struct ir3_context_funcs ir3_a4xx_funcs;
 extern const struct ir3_context_funcs ir3_a6xx_funcs;

-struct ir3_context * ir3_context_init(struct ir3_compiler *compiler,
-		struct ir3_shader_variant *so);
+struct ir3_context *ir3_context_init(struct ir3_compiler *compiler,
+                                     struct ir3_shader_variant *so);
 void ir3_context_free(struct ir3_context *ctx);

-struct ir3_instruction ** ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n);
-struct ir3_instruction ** ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n);
-struct ir3_instruction * const * ir3_get_src(struct ir3_context *ctx, nir_src *src);
+struct ir3_instruction **ir3_get_dst_ssa(struct ir3_context *ctx,
+                                         nir_ssa_def *dst, unsigned n);
+struct ir3_instruction **ir3_get_dst(struct ir3_context *ctx, nir_dest *dst,
+                                     unsigned n);
+struct ir3_instruction *const *ir3_get_src(struct ir3_context *ctx,
+                                           nir_src *src);
 void ir3_put_dst(struct ir3_context *ctx, nir_dest *dst);
-struct ir3_instruction * ir3_create_collect(struct ir3_context *ctx,
-		struct ir3_instruction *const *arr, unsigned arrsz);
+struct ir3_instruction *ir3_create_collect(struct ir3_context *ctx,
+                                           struct ir3_instruction *const *arr,
+                                           unsigned arrsz);
 void ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
-		struct ir3_instruction *src, unsigned base, unsigned n);
+                    struct ir3_instruction *src, unsigned base, unsigned n);
 void ir3_handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc);
-void ir3_handle_nonuniform(struct ir3_instruction *instr, nir_intrinsic_instr *intrin);
-void emit_intrinsic_image_size_tex(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-		struct ir3_instruction **dst);
+void ir3_handle_nonuniform(struct ir3_instruction *instr,
+                           nir_intrinsic_instr *intrin);
+void emit_intrinsic_image_size_tex(struct ir3_context *ctx,
+                                   nir_intrinsic_instr *intr,
+                                   struct ir3_instruction **dst);

-#define ir3_collect(ctx, ...) ({ \
-	struct ir3_instruction *__arr[] = { __VA_ARGS__ }; \
-	ir3_create_collect(ctx, __arr, ARRAY_SIZE(__arr)); \
-})
+#define ir3_collect(ctx, ...)                                                  \
+   ({                                                                          \
+      struct ir3_instruction *__arr[] = {__VA_ARGS__};                         \
+      ir3_create_collect(ctx, __arr, ARRAY_SIZE(__arr));                       \
+   })

-NORETURN void ir3_context_error(struct ir3_context *ctx, const char *format, ...);
+NORETURN void ir3_context_error(struct ir3_context *ctx, const char *format,
+                                ...);

-#define compile_assert(ctx, cond) do { \
-		if (!(cond)) ir3_context_error((ctx), "failed assert: "#cond"\n"); \
-	} while (0)
+#define compile_assert(ctx, cond)                                              \
+   do {                                                                        \
+      if (!(cond))                                                             \
+         ir3_context_error((ctx), "failed assert: " #cond "\n");               \
+   } while (0)

-struct ir3_instruction * ir3_get_addr0(struct ir3_context *ctx,
-		struct ir3_instruction *src, int align);
-struct ir3_instruction * ir3_get_addr1(struct ir3_context *ctx,
-		unsigned const_val);
-struct ir3_instruction * ir3_get_predicate(struct ir3_context *ctx,
-		struct ir3_instruction *src);
+struct ir3_instruction *ir3_get_addr0(struct ir3_context *ctx,
+                                      struct ir3_instruction *src, int align);
+struct ir3_instruction *ir3_get_addr1(struct ir3_context *ctx,
+                                      unsigned const_val);
+struct ir3_instruction *ir3_get_predicate(struct ir3_context *ctx,
+                                          struct ir3_instruction *src);

 void ir3_declare_array(struct ir3_context *ctx, nir_register *reg);
-struct ir3_array * ir3_get_array(struct ir3_context *ctx, nir_register *reg);
+struct ir3_array *ir3_get_array(struct ir3_context *ctx, nir_register *reg);
 struct ir3_instruction *ir3_create_array_load(struct ir3_context *ctx,
-		struct ir3_array *arr, int n, struct ir3_instruction *address);
-void ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
-		struct ir3_instruction *src, struct ir3_instruction *address);
+                                              struct ir3_array *arr, int n,
+                                              struct ir3_instruction *address);
+void ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr,
+                            int n, struct ir3_instruction *src,
+                            struct ir3_instruction *address);

-static inline type_t utype_for_size(unsigned bit_size)
+static inline type_t
+utype_for_size(unsigned bit_size)
 {
-	switch (bit_size) {
-	case 32: return TYPE_U32;
-	case 16: return TYPE_U16;
-	case  8: return TYPE_U8;
-	default: unreachable("bad bitsize"); return ~0;
-	}
+   switch (bit_size) {
+   case 32:
+      return TYPE_U32;
+   case 16:
+      return TYPE_U16;
+   case 8:
+      return TYPE_U8;
+   default:
+      unreachable("bad bitsize");
+      return ~0;
+   }
 }

-static inline type_t utype_src(nir_src src)
-{ return utype_for_size(nir_src_bit_size(src)); }
+static inline type_t
+utype_src(nir_src src)
+{
+   return utype_for_size(nir_src_bit_size(src));
+}

-static inline type_t utype_dst(nir_dest dst)
-{ return utype_for_size(nir_dest_bit_size(dst)); }
+static inline type_t
+utype_dst(nir_dest dst)
+{
+   return utype_for_size(nir_dest_bit_size(dst));
+}

 #endif /* IR3_CONTEXT_H_ */
--- a/src/freedreno/ir3/ir3_cp.c
+++ b/src/freedreno/ir3/ir3_cp.c
--- a/src/freedreno/ir3/ir3_cp_postsched.c
+++ b/src/freedreno/ir3/ir3_cp_postsched.c
@ -36,7 +36,6 @@
 * one.  It is basically anything that is not SSA.
 */

-
 /**
 * Check if any instruction before `use` and after `src` writes to the
 * specified array.  If `offset` is negative, it is a relative (a0.x)
@ -48,186 +47,184 @@
 * the correct array write.
 */
 static bool
-has_conflicting_write(struct ir3_instruction *src,
-		struct ir3_instruction *use,
-		struct ir3_register **def,
-		unsigned id, int offset)
+has_conflicting_write(struct ir3_instruction *src, struct ir3_instruction *use,
+                      struct ir3_register **def, unsigned id, int offset)
 {
-	assert(src->block == use->block);
-	bool last_write = true;
+   assert(src->block == use->block);
+   bool last_write = true;

-	/* NOTE that since src and use are in the same block, src by
-	 * definition appears in the block's instr_list before use:
-	 */
-	foreach_instr_rev (instr, &use->node) {
-		if (instr == src)
-			break;
+   /* NOTE that since src and use are in the same block, src by
+    * definition appears in the block's instr_list before use:
+    */
+   foreach_instr_rev (instr, &use->node) {
+      if (instr == src)
+         break;

-		/* if we are looking at a RELATIV read, we can't move
-		 * it past an a0.x write:
-		 */
-		if ((offset < 0) && (dest_regs(instr) > 0) &&
-				(instr->dsts[0]->num == regid(REG_A0, 0)))
-			return true;
+      /* if we are looking at a RELATIV read, we can't move
+       * it past an a0.x write:
+       */
+      if ((offset < 0) && (dest_regs(instr) > 0) &&
+          (instr->dsts[0]->num == regid(REG_A0, 0)))
+         return true;

-		if (!writes_gpr(instr))
-			continue;
+      if (!writes_gpr(instr))
+         continue;

-		struct ir3_register *dst = instr->dsts[0];
-		if (!(dst->flags & IR3_REG_ARRAY))
-			continue;
+      struct ir3_register *dst = instr->dsts[0];
+      if (!(dst->flags & IR3_REG_ARRAY))
+         continue;

-		if (dst->array.id != id)
-			continue;
+      if (dst->array.id != id)
+         continue;

-		/*
-		 * At this point, we have narrowed down an instruction
-		 * that writes to the same array.. check if it the write
-		 * is to an array element that we care about:
-		 */
+      /*
+       * At this point, we have narrowed down an instruction
+       * that writes to the same array.. check if it the write
+       * is to an array element that we care about:
+       */

-		/* is write to an unknown array element? */
-		if (dst->flags & IR3_REG_RELATIV)
-			return true;
+      /* is write to an unknown array element? */
+      if (dst->flags & IR3_REG_RELATIV)
+         return true;

-		/* is read from an unknown array element? */
-		if (offset < 0)
-			return true;
+      /* is read from an unknown array element? */
+      if (offset < 0)
+         return true;

-		/* is write to same array element? */
-		if (dst->array.offset == offset)
-			return true;
+      /* is write to same array element? */
+      if (dst->array.offset == offset)
+         return true;

-		if (last_write)
-			*def = dst;
+      if (last_write)
+         *def = dst;

-		last_write = false;
-	}
+      last_write = false;
+   }

-	return false;
+   return false;
 }

 /* Can we fold the mov src into use without invalid flags? */
 static bool
 valid_flags(struct ir3_instruction *use, struct ir3_instruction *mov)
 {
-	struct ir3_register *src = mov->srcs[0];
+   struct ir3_register *src = mov->srcs[0];

-	foreach_src_n (reg, n, use) {
-		if (ssa(reg) != mov)
-			continue;
+   foreach_src_n (reg, n, use) {
+      if (ssa(reg) != mov)
+         continue;

-		if (!ir3_valid_flags(use, n, reg->flags | src->flags))
-			return false;
-	}
+      if (!ir3_valid_flags(use, n, reg->flags | src->flags))
+         return false;
+   }

-	return true;
+   return true;
 }

 static bool
 instr_cp_postsched(struct ir3_instruction *mov)
 {
-	struct ir3_register *src = mov->srcs[0];
+   struct ir3_register *src = mov->srcs[0];

-	/* only consider mov's from "arrays", other cases we have
-	 * already considered already:
-	 */
-	if (!(src->flags & IR3_REG_ARRAY))
-		return false;
+   /* only consider mov's from "arrays", other cases we have
+    * already considered already:
+    */
+   if (!(src->flags & IR3_REG_ARRAY))
+      return false;

-	int offset = (src->flags & IR3_REG_RELATIV) ? -1 : src->array.offset;
+   int offset = (src->flags & IR3_REG_RELATIV) ? -1 : src->array.offset;

-	/* Once we move the array read directly into the consuming
-	 * instruction(s), we will also need to update instructions
-	 * that had a false-dep on the original mov to have deps
-	 * on the consuming instructions:
-	 */
-	struct util_dynarray newdeps;
-	util_dynarray_init(&newdeps, mov->uses);
+   /* Once we move the array read directly into the consuming
+    * instruction(s), we will also need to update instructions
+    * that had a false-dep on the original mov to have deps
+    * on the consuming instructions:
+    */
+   struct util_dynarray newdeps;
+   util_dynarray_init(&newdeps, mov->uses);

-	foreach_ssa_use (use, mov) {
-		if (use->block != mov->block)
-			continue;
+   foreach_ssa_use (use, mov) {
+      if (use->block != mov->block)
+         continue;

-		if (is_meta(use))
-			continue;
+      if (is_meta(use))
+         continue;

-		struct ir3_register *def = src->def;
-		if (has_conflicting_write(mov, use, &def, src->array.id, offset))
-			continue;
+      struct ir3_register *def = src->def;
+      if (has_conflicting_write(mov, use, &def, src->array.id, offset))
+         continue;

-		if (conflicts(mov->address, use->address))
-			continue;
+      if (conflicts(mov->address, use->address))
+         continue;

-		if (!valid_flags(use, mov))
-			continue;
+      if (!valid_flags(use, mov))
+         continue;

-		/* Ok, we've established that it is safe to remove this copy: */
+      /* Ok, we've established that it is safe to remove this copy: */

-		bool removed = false;
-		foreach_src_n (reg, n, use) {
-			if (ssa(reg) != mov)
-				continue;
+      bool removed = false;
+      foreach_src_n (reg, n, use) {
+         if (ssa(reg) != mov)
+            continue;

-			use->srcs[n] = ir3_reg_clone(mov->block->shader, src);
+         use->srcs[n] = ir3_reg_clone(mov->block->shader, src);

-			/* preserve (abs)/etc modifiers: */
-			use->srcs[n]-> flags |= reg->flags;
+         /* preserve (abs)/etc modifiers: */
+         use->srcs[n]->flags |= reg->flags;

-			/* If we're sinking the array read past any writes, make
-			 * sure to update it to point to the new previous write:
-			 */
-			use->srcs[n]->def = def;
+         /* If we're sinking the array read past any writes, make
+          * sure to update it to point to the new previous write:
+          */
+         use->srcs[n]->def = def;

-			removed = true;
-		}
+         removed = true;
+      }

-		/* the use could have been only a false-dep, only add to the newdeps
-		 * array and update the address if we've actually updated a real src
-		 * reg for the use:
-		 */
-		if (removed) {
-			if (src->flags & IR3_REG_RELATIV)
-				ir3_instr_set_address(use, mov->address->def->instr);
+      /* the use could have been only a false-dep, only add to the newdeps
+       * array and update the address if we've actually updated a real src
+       * reg for the use:
+       */
+      if (removed) {
+         if (src->flags & IR3_REG_RELATIV)
+            ir3_instr_set_address(use, mov->address->def->instr);

-			util_dynarray_append(&newdeps, struct ir3_instruction *, use);
+         util_dynarray_append(&newdeps, struct ir3_instruction *, use);

-			/* Remove the use from the src instruction: */
-			_mesa_set_remove_key(mov->uses, use);
-		}
-	}
+         /* Remove the use from the src instruction: */
+         _mesa_set_remove_key(mov->uses, use);
+      }
+   }

-	/* Once we have the complete set of instruction(s) that are are now
-	 * directly reading from the array, update any false-dep uses to
-	 * now depend on these instructions.  The only remaining uses at
-	 * this point should be false-deps:
-	 */
-	foreach_ssa_use (use, mov) {
-		util_dynarray_foreach(&newdeps, struct ir3_instruction *, instrp) {
-			struct ir3_instruction *newdep = *instrp;
-			ir3_instr_add_dep(use, newdep);
-		}
-	}
+   /* Once we have the complete set of instruction(s) that are are now
+    * directly reading from the array, update any false-dep uses to
+    * now depend on these instructions.  The only remaining uses at
+    * this point should be false-deps:
+    */
+   foreach_ssa_use (use, mov) {
+      util_dynarray_foreach (&newdeps, struct ir3_instruction *, instrp) {
+         struct ir3_instruction *newdep = *instrp;
+         ir3_instr_add_dep(use, newdep);
+      }
+   }

-	return util_dynarray_num_elements(&newdeps, struct ir3_instruction **) > 0;
+   return util_dynarray_num_elements(&newdeps, struct ir3_instruction **) > 0;
 }

 bool
 ir3_cp_postsched(struct ir3 *ir)
 {
-	void *mem_ctx = ralloc_context(NULL);
-	bool progress = false;
+   void *mem_ctx = ralloc_context(NULL);
+   bool progress = false;

-	ir3_find_ssa_uses(ir, mem_ctx, false);
+   ir3_find_ssa_uses(ir, mem_ctx, false);

-	foreach_block (block, &ir->block_list) {
-		foreach_instr_safe (instr, &block->instr_list) {
-			if (is_same_type_mov(instr))
-				progress |= instr_cp_postsched(instr);
-		}
-	}
+   foreach_block (block, &ir->block_list) {
+      foreach_instr_safe (instr, &block->instr_list) {
+         if (is_same_type_mov(instr))
+            progress |= instr_cp_postsched(instr);
+      }
+   }

-	ralloc_free(mem_ctx);
+   ralloc_free(mem_ctx);

-	return progress;
+   return progress;
 }
--- a/src/freedreno/ir3/ir3_cse.c
+++ b/src/freedreno/ir3/ir3_cse.c
@ -37,112 +37,109 @@
 static uint32_t
 hash_instr(const void *data)
 {
-	const struct ir3_instruction *instr = data;
-	uint32_t hash = 0;
+   const struct ir3_instruction *instr = data;
+   uint32_t hash = 0;

-	hash = HASH(hash, instr->opc);
-	hash = HASH(hash, instr->dsts[0]->flags);
-	foreach_src (src, (struct ir3_instruction *) instr) {
-		if (src->flags & IR3_REG_CONST)
-			hash = HASH(hash, src->num);
-		else if (src->flags & IR3_REG_IMMED)
-			hash = HASH(hash, src->uim_val);
-		else
-			hash = HASH(hash, src->def);
-	}
+   hash = HASH(hash, instr->opc);
+   hash = HASH(hash, instr->dsts[0]->flags);
+   foreach_src (src, (struct ir3_instruction *)instr) {
+      if (src->flags & IR3_REG_CONST)
+         hash = HASH(hash, src->num);
+      else if (src->flags & IR3_REG_IMMED)
+         hash = HASH(hash, src->uim_val);
+      else
+         hash = HASH(hash, src->def);
+   }

-	return hash;
+   return hash;
 }

 static bool
 instrs_equal(const struct ir3_instruction *i1, const struct ir3_instruction *i2)
 {
-	if (i1->opc != i2->opc)
-		return false;
+   if (i1->opc != i2->opc)
+      return false;

-	if (i1->dsts_count != i2->dsts_count)
-		return false;
+   if (i1->dsts_count != i2->dsts_count)
+      return false;

-	if (i1->srcs_count != i2->srcs_count)
-		return false;
+   if (i1->srcs_count != i2->srcs_count)
+      return false;

-	if (i1->dsts[0]->flags != i2->dsts[0]->flags)
-		return false;
+   if (i1->dsts[0]->flags != i2->dsts[0]->flags)
+      return false;

-	for (unsigned i = 0; i < i1->srcs_count; i++) {
-		const struct ir3_register *i1_reg = i1->srcs[i], *i2_reg = i2->srcs[i];
+   for (unsigned i = 0; i < i1->srcs_count; i++) {
+      const struct ir3_register *i1_reg = i1->srcs[i], *i2_reg = i2->srcs[i];

-		if (i1_reg->flags != i2_reg->flags)
-			return false;
+      if (i1_reg->flags != i2_reg->flags)
+         return false;

-		if (i1_reg->flags & IR3_REG_CONST) {
-			if (i1_reg->num != i2_reg->num)
-				return false;
-		} else if (i1_reg->flags & IR3_REG_IMMED) {
-			if (i1_reg->uim_val != i2_reg->uim_val)
-				return false;
-		} else {
-			if (i1_reg->def != i2_reg->def)
-				return false;
-		}
-	}
+      if (i1_reg->flags & IR3_REG_CONST) {
+         if (i1_reg->num != i2_reg->num)
+            return false;
+      } else if (i1_reg->flags & IR3_REG_IMMED) {
+         if (i1_reg->uim_val != i2_reg->uim_val)
+            return false;
+      } else {
+         if (i1_reg->def != i2_reg->def)
+            return false;
+      }
+   }

-	return true;
+   return true;
 }

 static bool
 instr_can_cse(const struct ir3_instruction *instr)
 {
-	if (instr->opc != OPC_META_COLLECT)
-		return false;
+   if (instr->opc != OPC_META_COLLECT)
+      return false;

-	return true;
+   return true;
 }

 static bool
 cmp_func(const void *data1, const void *data2)
 {
-	return instrs_equal(data1, data2);
+   return instrs_equal(data1, data2);
 }

 bool
 ir3_cse(struct ir3 *ir)
 {
-	struct set *instr_set = _mesa_set_create(NULL, hash_instr, cmp_func);
-	foreach_block (block, &ir->block_list) {
-		_mesa_set_clear(instr_set, NULL);
+   struct set *instr_set = _mesa_set_create(NULL, hash_instr, cmp_func);
+   foreach_block (block, &ir->block_list) {
+      _mesa_set_clear(instr_set, NULL);

-		foreach_instr (instr, &block->instr_list) {
-			instr->data = NULL;
+      foreach_instr (instr, &block->instr_list) {
+         instr->data = NULL;

-			if (!instr_can_cse(instr))
-				continue;
+         if (!instr_can_cse(instr))
+            continue;

-			bool found;
-			struct set_entry *entry =
-				_mesa_set_search_or_add(instr_set, instr, &found);
-			if (found)
-				instr->data = (void *) entry->key;
-		}
-	}
+         bool found;
+         struct set_entry *entry =
+            _mesa_set_search_or_add(instr_set, instr, &found);
+         if (found)
+            instr->data = (void *)entry->key;
+      }
+   }

+   bool progress = false;
+   foreach_block (block, &ir->block_list) {
+      foreach_instr (instr, &block->instr_list) {
+         foreach_src (src, instr) {
+            if ((src->flags & IR3_REG_SSA) && src->def &&
+                src->def->instr->data) {
+               progress = true;
+               struct ir3_instruction *instr = src->def->instr->data;
+               src->def = instr->dsts[0];
+            }
+         }
+      }
+   }

-	bool progress = false;
-	foreach_block (block, &ir->block_list) {
-		foreach_instr (instr, &block->instr_list) {
-			foreach_src(src, instr) {
-				if ((src->flags & IR3_REG_SSA) &&
-					src->def &&
-					src->def->instr->data) {
-					progress = true;
-					struct ir3_instruction *instr = src->def->instr->data;
-					src->def = instr->dsts[0];
-				}
-			}
-		}
-	}
-
-	_mesa_set_destroy(instr_set, NULL);
-	return progress;
+   _mesa_set_destroy(instr_set, NULL);
+   return progress;
 }
-
--- a/src/freedreno/ir3/ir3_dce.c
+++ b/src/freedreno/ir3/ir3_dce.c
@ -36,168 +36,168 @@
 static void
 mark_array_use(struct ir3_instruction *instr, struct ir3_register *reg)
 {
-	if (reg->flags & IR3_REG_ARRAY) {
-		struct ir3_array *arr =
-			ir3_lookup_array(instr->block->shader, reg->array.id);
-		arr->unused = false;
-	}
+   if (reg->flags & IR3_REG_ARRAY) {
+      struct ir3_array *arr =
+         ir3_lookup_array(instr->block->shader, reg->array.id);
+      arr->unused = false;
+   }
 }

 static void
 instr_dce(struct ir3_instruction *instr, bool falsedep)
 {
-	/* don't mark falsedep's as used, but otherwise process them normally: */
-	if (!falsedep)
-		instr->flags &= ~IR3_INSTR_UNUSED;
+   /* don't mark falsedep's as used, but otherwise process them normally: */
+   if (!falsedep)
+      instr->flags &= ~IR3_INSTR_UNUSED;

-	if (ir3_instr_check_mark(instr))
-		return;
+   if (ir3_instr_check_mark(instr))
+      return;

-	if (writes_gpr(instr))
-		mark_array_use(instr, instr->dsts[0]);   /* dst */
+   if (writes_gpr(instr))
+      mark_array_use(instr, instr->dsts[0]); /* dst */

-	foreach_src (reg, instr)
-		mark_array_use(instr, reg);              /* src */
+   foreach_src (reg, instr)
+      mark_array_use(instr, reg); /* src */

-	foreach_ssa_src_n (src, i, instr) {
-		instr_dce(src, __is_false_dep(instr, i));
-	}
+   foreach_ssa_src_n (src, i, instr) {
+      instr_dce(src, __is_false_dep(instr, i));
+   }
 }

 static bool
 remove_unused_by_block(struct ir3_block *block)
 {
-	bool progress = false;
-	foreach_instr_safe (instr, &block->instr_list) {
-		if (instr->opc == OPC_END || instr->opc == OPC_CHSH || instr->opc == OPC_CHMASK)
-			continue;
-		if (instr->flags & IR3_INSTR_UNUSED) {
-			if (instr->opc == OPC_META_SPLIT) {
-				struct ir3_instruction *src = ssa(instr->srcs[0]);
-				/* tex (cat5) instructions have a writemask, so we can
-				 * mask off unused components.  Other instructions do not.
-				 */
-				if (src && is_tex_or_prefetch(src) && (src->dsts[0]->wrmask > 1)) {
-					src->dsts[0]->wrmask &= ~(1 << instr->split.off);
-				}
-			}
+   bool progress = false;
+   foreach_instr_safe (instr, &block->instr_list) {
+      if (instr->opc == OPC_END || instr->opc == OPC_CHSH ||
+          instr->opc == OPC_CHMASK)
+         continue;
+      if (instr->flags & IR3_INSTR_UNUSED) {
+         if (instr->opc == OPC_META_SPLIT) {
+            struct ir3_instruction *src = ssa(instr->srcs[0]);
+            /* tex (cat5) instructions have a writemask, so we can
+             * mask off unused components.  Other instructions do not.
+             */
+            if (src && is_tex_or_prefetch(src) && (src->dsts[0]->wrmask > 1)) {
+               src->dsts[0]->wrmask &= ~(1 << instr->split.off);
+            }
+         }

-			/* prune false-deps, etc: */
-			foreach_ssa_use (use, instr)
-				foreach_ssa_srcp_n (srcp, n, use)
-					if (*srcp == instr)
-						*srcp = NULL;
+         /* prune false-deps, etc: */
+         foreach_ssa_use (use, instr)
+            foreach_ssa_srcp_n (srcp, n, use)
+               if (*srcp == instr)
+                  *srcp = NULL;

-			list_delinit(&instr->node);
-			progress = true;
-		}
-	}
-	return progress;
+         list_delinit(&instr->node);
+         progress = true;
+      }
+   }
+   return progress;
 }

 static bool
 find_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so)
 {
-	unsigned i;
-	bool progress = false;
+   unsigned i;
+   bool progress = false;

-	ir3_clear_mark(ir);
+   ir3_clear_mark(ir);

-	/* initially mark everything as unused, we'll clear the flag as we
-	 * visit the instructions:
-	 */
-	foreach_block (block, &ir->block_list) {
-		foreach_instr (instr, &block->instr_list) {
-			/* special case, if pre-fs texture fetch used, we cannot
-			 * eliminate the barycentric i/j input
-			 */
-			if (so->num_sampler_prefetch &&
-					(instr->opc == OPC_META_INPUT) &&
-					(instr->input.sysval == SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL))
-				continue;
-			instr->flags |= IR3_INSTR_UNUSED;
-		}
-	}
+   /* initially mark everything as unused, we'll clear the flag as we
+    * visit the instructions:
+    */
+   foreach_block (block, &ir->block_list) {
+      foreach_instr (instr, &block->instr_list) {
+         /* special case, if pre-fs texture fetch used, we cannot
+          * eliminate the barycentric i/j input
+          */
+         if (so->num_sampler_prefetch && (instr->opc == OPC_META_INPUT) &&
+             (instr->input.sysval == SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL))
+            continue;
+         instr->flags |= IR3_INSTR_UNUSED;
+      }
+   }

-	foreach_array (arr, &ir->array_list)
-		arr->unused = true;
+   foreach_array (arr, &ir->array_list)
+      arr->unused = true;

-	foreach_block (block, &ir->block_list) {
-		for (i = 0; i < block->keeps_count; i++)
-			instr_dce(block->keeps[i], false);
+   foreach_block (block, &ir->block_list) {
+      for (i = 0; i < block->keeps_count; i++)
+         instr_dce(block->keeps[i], false);

-		/* We also need to account for if-condition: */
-		if (block->condition)
-			instr_dce(block->condition, false);
-	}
+      /* We also need to account for if-condition: */
+      if (block->condition)
+         instr_dce(block->condition, false);
+   }

-	/* remove un-used instructions: */
-	foreach_block (block, &ir->block_list) {
-		progress |= remove_unused_by_block(block);
-	}
+   /* remove un-used instructions: */
+   foreach_block (block, &ir->block_list) {
+      progress |= remove_unused_by_block(block);
+   }

-	/* remove un-used arrays: */
-	foreach_array_safe (arr, &ir->array_list) {
-		if (arr->unused)
-			list_delinit(&arr->node);
-	}
+   /* remove un-used arrays: */
+   foreach_array_safe (arr, &ir->array_list) {
+      if (arr->unused)
+         list_delinit(&arr->node);
+   }

-	/* fixup wrmask of split instructions to account for adjusted tex
-	 * wrmask's:
-	 */
-	foreach_block (block, &ir->block_list) {
-		foreach_instr (instr, &block->instr_list) {
-			if (instr->opc != OPC_META_SPLIT)
-				continue;
+   /* fixup wrmask of split instructions to account for adjusted tex
+    * wrmask's:
+    */
+   foreach_block (block, &ir->block_list) {
+      foreach_instr (instr, &block->instr_list) {
+         if (instr->opc != OPC_META_SPLIT)
+            continue;

-			struct ir3_instruction *src = ssa(instr->srcs[0]);
-			if (!is_tex_or_prefetch(src))
-				continue;
+         struct ir3_instruction *src = ssa(instr->srcs[0]);
+         if (!is_tex_or_prefetch(src))
+            continue;

-			instr->srcs[0]->wrmask = src->dsts[0]->wrmask;
-		}
-	}
+         instr->srcs[0]->wrmask = src->dsts[0]->wrmask;
+      }
+   }

-	for (i = 0; i < ir->a0_users_count; i++) {
-		struct ir3_instruction *instr = ir->a0_users[i];
-		if (instr && (instr->flags & IR3_INSTR_UNUSED))
-			ir->a0_users[i] = NULL;
-	}
+   for (i = 0; i < ir->a0_users_count; i++) {
+      struct ir3_instruction *instr = ir->a0_users[i];
+      if (instr && (instr->flags & IR3_INSTR_UNUSED))
+         ir->a0_users[i] = NULL;
+   }

-	for (i = 0; i < ir->a1_users_count; i++) {
-		struct ir3_instruction *instr = ir->a1_users[i];
-		if (instr && (instr->flags & IR3_INSTR_UNUSED))
-			ir->a1_users[i] = NULL;
-	}
+   for (i = 0; i < ir->a1_users_count; i++) {
+      struct ir3_instruction *instr = ir->a1_users[i];
+      if (instr && (instr->flags & IR3_INSTR_UNUSED))
+         ir->a1_users[i] = NULL;
+   }

-	for (i = 0; i < ir->predicates_count; i++) {
-		struct ir3_instruction *instr = ir->predicates[i];
-		if (instr && (instr->flags & IR3_INSTR_UNUSED))
-			ir->predicates[i] = NULL;
-	}
+   for (i = 0; i < ir->predicates_count; i++) {
+      struct ir3_instruction *instr = ir->predicates[i];
+      if (instr && (instr->flags & IR3_INSTR_UNUSED))
+         ir->predicates[i] = NULL;
+   }

-	/* cleanup unused inputs: */
-	foreach_input_n (in, n, ir)
-		if (in->flags & IR3_INSTR_UNUSED)
-			ir->inputs[n] = NULL;
+   /* cleanup unused inputs: */
+   foreach_input_n (in, n, ir)
+      if (in->flags & IR3_INSTR_UNUSED)
+         ir->inputs[n] = NULL;

-	return progress;
+   return progress;
 }

 bool
 ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so)
 {
-	void *mem_ctx = ralloc_context(NULL);
-	bool progress, made_progress = false;
+   void *mem_ctx = ralloc_context(NULL);
+   bool progress, made_progress = false;

-	ir3_find_ssa_uses(ir, mem_ctx, true);
+   ir3_find_ssa_uses(ir, mem_ctx, true);

-	do {
-		progress = find_and_remove_unused(ir, so);
-		made_progress |= progress;
-	} while (progress);
+   do {
+      progress = find_and_remove_unused(ir, so);
+      made_progress |= progress;
+   } while (progress);

-	ralloc_free(mem_ctx);
+   ralloc_free(mem_ctx);

-	return made_progress;
+   return made_progress;
 }
--- a/src/freedreno/ir3/ir3_delay.c
+++ b/src/freedreno/ir3/ir3_delay.c
@ -57,116 +57,112 @@
 */
 int
 ir3_delayslots(struct ir3_instruction *assigner,
-		struct ir3_instruction *consumer, unsigned n, bool soft)
+               struct ir3_instruction *consumer, unsigned n, bool soft)
 {
-	/* generally don't count false dependencies, since this can just be
-	 * something like a barrier, or SSBO store.
-	 */
-	if (__is_false_dep(consumer, n))
-		return 0;
+   /* generally don't count false dependencies, since this can just be
+    * something like a barrier, or SSBO store.
+    */
+   if (__is_false_dep(consumer, n))
+      return 0;

-	/* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal
-	 * alu -> alu needs 3 cycles, cat4 -> alu and texture fetch
-	 * handled with sync bits
-	 */
+   /* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal
+    * alu -> alu needs 3 cycles, cat4 -> alu and texture fetch
+    * handled with sync bits
+    */

-	if (is_meta(assigner) || is_meta(consumer))
-		return 0;
+   if (is_meta(assigner) || is_meta(consumer))
+      return 0;

-	if (writes_addr0(assigner) || writes_addr1(assigner))
-		return 6;
+   if (writes_addr0(assigner) || writes_addr1(assigner))
+      return 6;

-	if (soft && is_sfu(assigner))
-		return SOFT_SS_NOPS;
+   if (soft && is_sfu(assigner))
+      return SOFT_SS_NOPS;

-	/* handled via sync flags: */
-	if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
-		return 0;
+   /* handled via sync flags: */
+   if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
+      return 0;

-	/* As far as we know, shader outputs don't need any delay. */
-	if (consumer->opc == OPC_END || consumer->opc == OPC_CHMASK)
-		return 0;
+   /* As far as we know, shader outputs don't need any delay. */
+   if (consumer->opc == OPC_END || consumer->opc == OPC_CHMASK)
+      return 0;

-	/* assigner must be alu: */
-	if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
-			is_mem(consumer) || (assigner->dsts[0]->flags & IR3_REG_SHARED)) {
-		return 6;
-	} else {
-		/* In mergedregs mode, there is an extra 2-cycle penalty when half of
-		 * a full-reg is read as a half-reg or when a half-reg is read as a
-		 * full-reg.
-		 */
-		bool mismatched_half =
-			(assigner->dsts[0]->flags & IR3_REG_HALF) !=
-			(consumer->srcs[n]->flags & IR3_REG_HALF);
-		unsigned penalty = mismatched_half ? 2 : 0;
-		if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
-			(n == 2)) {
-			/* special case, 3rd src to cat3 not required on first cycle */
-			return 1 + penalty;
-		} else {
-			return 3 + penalty;
-		}
-	}
+   /* assigner must be alu: */
+   if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
+       is_mem(consumer) || (assigner->dsts[0]->flags & IR3_REG_SHARED)) {
+      return 6;
+   } else {
+      /* In mergedregs mode, there is an extra 2-cycle penalty when half of
+       * a full-reg is read as a half-reg or when a half-reg is read as a
+       * full-reg.
+       */
+      bool mismatched_half = (assigner->dsts[0]->flags & IR3_REG_HALF) !=
+                             (consumer->srcs[n]->flags & IR3_REG_HALF);
+      unsigned penalty = mismatched_half ? 2 : 0;
+      if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) && (n == 2)) {
+         /* special case, 3rd src to cat3 not required on first cycle */
+         return 1 + penalty;
+      } else {
+         return 3 + penalty;
+      }
+   }
 }

 static bool
 count_instruction(struct ir3_instruction *n)
 {
-	/* NOTE: don't count branch/jump since we don't know yet if they will
-	 * be eliminated later in resolve_jumps().. really should do that
-	 * earlier so we don't have this constraint.
-	 */
-	return is_alu(n) || (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_B));
+   /* NOTE: don't count branch/jump since we don't know yet if they will
+    * be eliminated later in resolve_jumps().. really should do that
+    * earlier so we don't have this constraint.
+    */
+   return is_alu(n) ||
+          (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_B));
 }

 static unsigned
-distance(struct ir3_block *block, struct ir3_instruction *instr,
-		unsigned maxd)
+distance(struct ir3_block *block, struct ir3_instruction *instr, unsigned maxd)
 {
-	unsigned d = 0;
+   unsigned d = 0;

-	/* Note that this relies on incrementally building up the block's
-	 * instruction list.. but this is how scheduling and nopsched
-	 * work.
-	 */
-	foreach_instr_rev (n, &block->instr_list) {
-		if ((n == instr) || (d >= maxd))
-			return MIN2(maxd, d + n->nop);
-		if (count_instruction(n))
-			d = MIN2(maxd, d + 1 + n->repeat + n->nop);
-	}
+   /* Note that this relies on incrementally building up the block's
+    * instruction list.. but this is how scheduling and nopsched
+    * work.
+    */
+   foreach_instr_rev (n, &block->instr_list) {
+      if ((n == instr) || (d >= maxd))
+         return MIN2(maxd, d + n->nop);
+      if (count_instruction(n))
+         d = MIN2(maxd, d + 1 + n->repeat + n->nop);
+   }

-	return maxd;
+   return maxd;
 }

 static unsigned
-delay_calc_srcn_prera(struct ir3_block *block,
-		struct ir3_instruction *assigner,
-		struct ir3_instruction *consumer,
-		unsigned srcn)
+delay_calc_srcn_prera(struct ir3_block *block, struct ir3_instruction *assigner,
+                      struct ir3_instruction *consumer, unsigned srcn)
 {
-	unsigned delay = 0;
+   unsigned delay = 0;

-	if (assigner->opc == OPC_META_PHI)
-		return 0;
+   if (assigner->opc == OPC_META_PHI)
+      return 0;

-	if (is_meta(assigner)) {
-		foreach_src_n (src, n, assigner) {
-			unsigned d;
+   if (is_meta(assigner)) {
+      foreach_src_n (src, n, assigner) {
+         unsigned d;

-			if (!src->def)
-				continue;
+         if (!src->def)
+            continue;

-			d = delay_calc_srcn_prera(block, src->def->instr, consumer, srcn);
-			delay = MAX2(delay, d);
-		}
-	} else {
-		delay = ir3_delayslots(assigner, consumer, srcn, false);
-		delay -= distance(block, assigner, delay);
-	}
+         d = delay_calc_srcn_prera(block, src->def->instr, consumer, srcn);
+         delay = MAX2(delay, d);
+      }
+   } else {
+      delay = ir3_delayslots(assigner, consumer, srcn, false);
+      delay -= distance(block, assigner, delay);
+   }

-	return delay;
+   return delay;
 }

 /**
@ -176,19 +172,19 @@ delay_calc_srcn_prera(struct ir3_block *block,
 unsigned
 ir3_delay_calc_prera(struct ir3_block *block, struct ir3_instruction *instr)
 {
-	unsigned delay = 0;
+   unsigned delay = 0;

-	foreach_src_n (src, i, instr) {
-		unsigned d = 0;
+   foreach_src_n (src, i, instr) {
+      unsigned d = 0;

-		if (src->def && src->def->instr->block == block) {
-			d = delay_calc_srcn_prera(block, src->def->instr, instr, i);
-		}
+      if (src->def && src->def->instr->block == block) {
+         d = delay_calc_srcn_prera(block, src->def->instr, instr, i);
+      }

-		delay = MAX2(delay, d);
-	}
+      delay = MAX2(delay, d);
+   }

-	return delay;
+   return delay;
 }

 /* Post-RA, we don't have arrays any more, so we have to be a bit careful here
@ -198,185 +194,186 @@ ir3_delay_calc_prera(struct ir3_block *block, struct ir3_instruction *instr)
 static unsigned
 post_ra_reg_elems(struct ir3_register *reg)
 {
-	if (reg->flags & IR3_REG_RELATIV)
-		return reg->size;
-	return reg_elems(reg);
+   if (reg->flags & IR3_REG_RELATIV)
+      return reg->size;
+   return reg_elems(reg);
 }

 static unsigned
 post_ra_reg_num(struct ir3_register *reg)
 {
-	if (reg->flags & IR3_REG_RELATIV)
-		return reg->array.base;
-	return reg->num;
+   if (reg->flags & IR3_REG_RELATIV)
+      return reg->array.base;
+   return reg->num;
 }

 static unsigned
-delay_calc_srcn_postra(struct ir3_instruction *assigner, struct ir3_instruction *consumer,
-					   unsigned assigner_n, unsigned consumer_n, bool soft, bool mergedregs)
+delay_calc_srcn_postra(struct ir3_instruction *assigner,
+                       struct ir3_instruction *consumer, unsigned assigner_n,
+                       unsigned consumer_n, bool soft, bool mergedregs)
 {
-	struct ir3_register *src = consumer->srcs[consumer_n];
-	struct ir3_register *dst = assigner->dsts[assigner_n];
-	bool mismatched_half =
-		(src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF);
+   struct ir3_register *src = consumer->srcs[consumer_n];
+   struct ir3_register *dst = assigner->dsts[assigner_n];
+   bool mismatched_half =
+      (src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF);

-	/* In the mergedregs case or when the register is a special register,
-	 * half-registers do not alias with full registers.
-	 */
-	if ((!mergedregs || is_reg_special(src) || is_reg_special(dst)) &&
-		mismatched_half)
-		return 0;
+   /* In the mergedregs case or when the register is a special register,
+    * half-registers do not alias with full registers.
+    */
+   if ((!mergedregs || is_reg_special(src) || is_reg_special(dst)) &&
+       mismatched_half)
+      return 0;

-	unsigned src_start = post_ra_reg_num(src) * reg_elem_size(src);
-	unsigned src_end = src_start + post_ra_reg_elems(src) * reg_elem_size(src);
-	unsigned dst_start = post_ra_reg_num(dst) * reg_elem_size(dst);
-	unsigned dst_end = dst_start + post_ra_reg_elems(dst) * reg_elem_size(dst);
+   unsigned src_start = post_ra_reg_num(src) * reg_elem_size(src);
+   unsigned src_end = src_start + post_ra_reg_elems(src) * reg_elem_size(src);
+   unsigned dst_start = post_ra_reg_num(dst) * reg_elem_size(dst);
+   unsigned dst_end = dst_start + post_ra_reg_elems(dst) * reg_elem_size(dst);

-	if (dst_start >= src_end || src_start >= dst_end)
-		return 0;
+   if (dst_start >= src_end || src_start >= dst_end)
+      return 0;

-	unsigned delay = ir3_delayslots(assigner, consumer, consumer_n, soft);
+   unsigned delay = ir3_delayslots(assigner, consumer, consumer_n, soft);

-	if (assigner->repeat == 0 && consumer->repeat == 0)
-		return delay;
+   if (assigner->repeat == 0 && consumer->repeat == 0)
+      return delay;

-	/* If either side is a relative access, we can't really apply most of the
-	 * reasoning below because we don't know which component aliases which.
-	 * Just bail in this case.
-	 */
-	if ((src->flags & IR3_REG_RELATIV) || (dst->flags & IR3_REG_RELATIV))
-		return delay;
+   /* If either side is a relative access, we can't really apply most of the
+    * reasoning below because we don't know which component aliases which.
+    * Just bail in this case.
+    */
+   if ((src->flags & IR3_REG_RELATIV) || (dst->flags & IR3_REG_RELATIV))
+      return delay;

-	/* MOVMSK seems to require that all users wait until the entire
-	 * instruction is finished, so just bail here.
-	 */
-	if (assigner->opc == OPC_MOVMSK)
-		return delay;
+   /* MOVMSK seems to require that all users wait until the entire
+    * instruction is finished, so just bail here.
+    */
+   if (assigner->opc == OPC_MOVMSK)
+      return delay;

-	/* TODO: Handle the combination of (rpt) and different component sizes
-	 * better like below. This complicates things significantly because the
-	 * components don't line up.
-	 */
-	if (mismatched_half)
-		return delay;
+   /* TODO: Handle the combination of (rpt) and different component sizes
+    * better like below. This complicates things significantly because the
+    * components don't line up.
+    */
+   if (mismatched_half)
+      return delay;

-	/* If an instruction has a (rpt), then it acts as a sequence of
-	 * instructions, reading its non-(r) sources at each cycle. First, get the
-	 * register num for the first instruction where they interfere:
-	 */
+   /* If an instruction has a (rpt), then it acts as a sequence of
+    * instructions, reading its non-(r) sources at each cycle. First, get the
+    * register num for the first instruction where they interfere:
+    */

-	unsigned first_num = MAX2(src_start, dst_start) / reg_elem_size(dst);
+   unsigned first_num = MAX2(src_start, dst_start) / reg_elem_size(dst);

-	/* Now, for that first conflicting half/full register, figure out the
-	 * sub-instruction within assigner/consumer it corresponds to. For (r)
-	 * sources, this should already return the correct answer of 0. However we
-	 * have to special-case the multi-mov instructions, where the
-	 * sub-instructions sometimes come from the src/dst indices instead.
-	 */
-	unsigned first_src_instr;
-	if (consumer->opc == OPC_SWZ || consumer->opc == OPC_GAT)
-		first_src_instr = consumer_n;
-	else
-		first_src_instr = first_num - src->num;
+   /* Now, for that first conflicting half/full register, figure out the
+    * sub-instruction within assigner/consumer it corresponds to. For (r)
+    * sources, this should already return the correct answer of 0. However we
+    * have to special-case the multi-mov instructions, where the
+    * sub-instructions sometimes come from the src/dst indices instead.
+    */
+   unsigned first_src_instr;
+   if (consumer->opc == OPC_SWZ || consumer->opc == OPC_GAT)
+      first_src_instr = consumer_n;
+   else
+      first_src_instr = first_num - src->num;

-	unsigned first_dst_instr;
-	if (assigner->opc == OPC_SWZ || assigner->opc == OPC_SCT)
-		first_dst_instr = assigner_n;
-	else
-		first_dst_instr = first_num - dst->num;
+   unsigned first_dst_instr;
+   if (assigner->opc == OPC_SWZ || assigner->opc == OPC_SCT)
+      first_dst_instr = assigner_n;
+   else
+      first_dst_instr = first_num - dst->num;

-	/* The delay we return is relative to the *end* of assigner and the
-	 * *beginning* of consumer, because it's the number of nops (or other
-	 * things) needed between them. Any instructions after first_dst_instr
-	 * subtract from the delay, and so do any instructions before
-	 * first_src_instr. Calculate an offset to subtract from the non-rpt-aware
-	 * delay to account for that.
-	 *
-	 * Now, a priori, we need to go through this process for every
-	 * conflicting regnum and take the minimum of the offsets to make sure
-	 * that the appropriate number of nop's is inserted for every conflicting
-	 * pair of sub-instructions. However, as we go to the next conflicting
-	 * regnum (if any), the number of instructions after first_dst_instr
-	 * decreases by 1 and the number of source instructions before
-	 * first_src_instr correspondingly increases by 1, so the offset stays the
-	 * same for all conflicting registers.
-	 */
-	unsigned offset = first_src_instr + (assigner->repeat - first_dst_instr);
-	return offset > delay ? 0 : delay - offset;
+   /* The delay we return is relative to the *end* of assigner and the
+    * *beginning* of consumer, because it's the number of nops (or other
+    * things) needed between them. Any instructions after first_dst_instr
+    * subtract from the delay, and so do any instructions before
+    * first_src_instr. Calculate an offset to subtract from the non-rpt-aware
+    * delay to account for that.
+    *
+    * Now, a priori, we need to go through this process for every
+    * conflicting regnum and take the minimum of the offsets to make sure
+    * that the appropriate number of nop's is inserted for every conflicting
+    * pair of sub-instructions. However, as we go to the next conflicting
+    * regnum (if any), the number of instructions after first_dst_instr
+    * decreases by 1 and the number of source instructions before
+    * first_src_instr correspondingly increases by 1, so the offset stays the
+    * same for all conflicting registers.
+    */
+   unsigned offset = first_src_instr + (assigner->repeat - first_dst_instr);
+   return offset > delay ? 0 : delay - offset;
 }

 static unsigned
-delay_calc_postra(struct ir3_block *block,
-				  struct ir3_instruction *start,
-				  struct ir3_instruction *consumer,
-				  unsigned distance, bool soft, bool pred, bool mergedregs)
+delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
+                  struct ir3_instruction *consumer, unsigned distance,
+                  bool soft, bool pred, bool mergedregs)
 {
-	unsigned delay = 0;
-	/* Search backwards starting at the instruction before start, unless it's
-	 * NULL then search backwards from the block end.
-	 */
-	struct list_head *start_list = start ? start->node.prev : block->instr_list.prev;
-	list_for_each_entry_from_rev(struct ir3_instruction, assigner, start_list, &block->instr_list, node) {
-		if (count_instruction(assigner))
-			distance += assigner->nop;
+   unsigned delay = 0;
+   /* Search backwards starting at the instruction before start, unless it's
+    * NULL then search backwards from the block end.
+    */
+   struct list_head *start_list =
+      start ? start->node.prev : block->instr_list.prev;
+   list_for_each_entry_from_rev (struct ir3_instruction, assigner, start_list,
+                                 &block->instr_list, node) {
+      if (count_instruction(assigner))
+         distance += assigner->nop;

-		if (distance + delay >= (soft ? SOFT_SS_NOPS : MAX_NOPS))
-			return delay;
+      if (distance + delay >= (soft ? SOFT_SS_NOPS : MAX_NOPS))
+         return delay;

-		if (is_meta(assigner))
-			continue;
+      if (is_meta(assigner))
+         continue;

-		unsigned new_delay = 0;
+      unsigned new_delay = 0;

-		foreach_dst_n (dst, dst_n, assigner) {
-			if (dst->wrmask == 0)
-				continue;
-			foreach_src_n (src, src_n, consumer) {
-				if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST))
-					continue;
+      foreach_dst_n (dst, dst_n, assigner) {
+         if (dst->wrmask == 0)
+            continue;
+         foreach_src_n (src, src_n, consumer) {
+            if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST))
+               continue;

-				unsigned src_delay =
-					delay_calc_srcn_postra(assigner, consumer, dst_n,
-										   src_n, soft, mergedregs);
-				new_delay = MAX2(new_delay, src_delay);
-			}
-		}
+            unsigned src_delay = delay_calc_srcn_postra(
+               assigner, consumer, dst_n, src_n, soft, mergedregs);
+            new_delay = MAX2(new_delay, src_delay);
+         }
+      }

-		new_delay = new_delay > distance ? new_delay - distance : 0;
-		delay = MAX2(delay, new_delay);
+      new_delay = new_delay > distance ? new_delay - distance : 0;
+      delay = MAX2(delay, new_delay);

-		if (count_instruction(assigner))
-			distance += 1 + assigner->repeat;
-	}
+      if (count_instruction(assigner))
+         distance += 1 + assigner->repeat;
+   }

-	/* Note: this allows recursion into "block" if it has already been
-	 * visited, but *not* recursion into its predecessors. We may have to
-	 * visit the original block twice, for the loop case where we have to
-	 * consider definititons in an earlier iterations of the same loop:
-	 *
-	 * while (...) {
-	 *		mov.u32u32 ..., r0.x
-	 *		...
-	 *		mov.u32u32 r0.x, ...
-	 * }
-	 *
-	 * However any other recursion would be unnecessary.
-	 */
+   /* Note: this allows recursion into "block" if it has already been
+    * visited, but *not* recursion into its predecessors. We may have to
+    * visit the original block twice, for the loop case where we have to
+    * consider definititons in an earlier iterations of the same loop:
+    *
+    * while (...) {
+    *		mov.u32u32 ..., r0.x
+    *		...
+    *		mov.u32u32 r0.x, ...
+    * }
+    *
+    * However any other recursion would be unnecessary.
+    */

-	if (pred && block->data != block) {
-		block->data = block;
+   if (pred && block->data != block) {
+      block->data = block;

-		for (unsigned i = 0; i < block->predecessors_count; i++) {
-			struct ir3_block *pred = block->predecessors[i];
-			unsigned pred_delay =
-				delay_calc_postra(pred, NULL, consumer, distance, soft, pred, mergedregs);
-			delay = MAX2(delay, pred_delay);
-		}
+      for (unsigned i = 0; i < block->predecessors_count; i++) {
+         struct ir3_block *pred = block->predecessors[i];
+         unsigned pred_delay = delay_calc_postra(pred, NULL, consumer, distance,
+                                                 soft, pred, mergedregs);
+         delay = MAX2(delay, pred_delay);
+      }

-		block->data = NULL;
-	}
+      block->data = NULL;
+   }

-	return delay;
+   return delay;
 }

 /**
@ -392,9 +389,9 @@ delay_calc_postra(struct ir3_block *block,
 */
 unsigned
 ir3_delay_calc_postra(struct ir3_block *block, struct ir3_instruction *instr,
-		bool soft, bool mergedregs)
+                      bool soft, bool mergedregs)
 {
-	return delay_calc_postra(block, NULL, instr, 0, soft, false, mergedregs);
+   return delay_calc_postra(block, NULL, instr, 0, soft, false, mergedregs);
 }

 /**
@ -403,9 +400,9 @@ ir3_delay_calc_postra(struct ir3_block *block, struct ir3_instruction *instr,
 */
 unsigned
 ir3_delay_calc_exact(struct ir3_block *block, struct ir3_instruction *instr,
-		bool mergedregs)
+                     bool mergedregs)
 {
-	return delay_calc_postra(block, NULL, instr, 0, false, true, mergedregs);
+   return delay_calc_postra(block, NULL, instr, 0, false, true, mergedregs);
 }

 /**
@ -419,12 +416,11 @@ ir3_delay_calc_exact(struct ir3_block *block, struct ir3_instruction *instr,
 void
 ir3_remove_nops(struct ir3 *ir)
 {
-	foreach_block (block, &ir->block_list) {
-		foreach_instr_safe (instr, &block->instr_list) {
-			if (instr->opc == OPC_NOP) {
-				list_del(&instr->node);
-			}
-		}
-	}
-
+   foreach_block (block, &ir->block_list) {
+      foreach_instr_safe (instr, &block->instr_list) {
+         if (instr->opc == OPC_NOP) {
+            list_del(&instr->node);
+         }
+      }
+   }
 }
--- a/src/freedreno/ir3/ir3_disk_cache.c
+++ b/src/freedreno/ir3/ir3_disk_cache.c
@ -48,183 +48,185 @@
 void
 ir3_disk_cache_init(struct ir3_compiler *compiler)
 {
-	if (ir3_shader_debug & IR3_DBG_NOCACHE)
-		return;
+   if (ir3_shader_debug & IR3_DBG_NOCACHE)
+      return;

-	/* array length = print length + nul char + 1 extra to verify it's unused */
-	char renderer[7];
-	ASSERTED int len =
-			snprintf(renderer, sizeof(renderer), "FD%03d", compiler->gpu_id);
-	assert(len == sizeof(renderer) - 2);
+   /* array length = print length + nul char + 1 extra to verify it's unused */
+   char renderer[7];
+   ASSERTED int len =
+      snprintf(renderer, sizeof(renderer), "FD%03d", compiler->gpu_id);
+   assert(len == sizeof(renderer) - 2);

-	const struct build_id_note *note =
-			build_id_find_nhdr_for_addr(ir3_disk_cache_init);
-	assert(note && build_id_length(note) == 20); /* sha1 */
+   const struct build_id_note *note =
+      build_id_find_nhdr_for_addr(ir3_disk_cache_init);
+   assert(note && build_id_length(note) == 20); /* sha1 */

-	const uint8_t *id_sha1 = build_id_data(note);
-	assert(id_sha1);
+   const uint8_t *id_sha1 = build_id_data(note);
+   assert(id_sha1);

-	char timestamp[41];
-	_mesa_sha1_format(timestamp, id_sha1);
+   char timestamp[41];
+   _mesa_sha1_format(timestamp, id_sha1);

-	uint64_t driver_flags = ir3_shader_debug;
-	if (compiler->robust_ubo_access)
-		driver_flags |= IR3_DBG_ROBUST_UBO_ACCESS;
-	compiler->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
+   uint64_t driver_flags = ir3_shader_debug;
+   if (compiler->robust_ubo_access)
+      driver_flags |= IR3_DBG_ROBUST_UBO_ACCESS;
+   compiler->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
 }

 void
 ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
-		struct ir3_shader *shader)
+                               struct ir3_shader *shader)
 {
-	if (!compiler->disk_cache)
-		return;
+   if (!compiler->disk_cache)
+      return;

-	struct mesa_sha1 ctx;
+   struct mesa_sha1 ctx;

-	_mesa_sha1_init(&ctx);
+   _mesa_sha1_init(&ctx);

-	/* Serialize the NIR to a binary blob that we can hash for the disk
-	 * cache.  Drop unnecessary information (like variable names)
-	 * so the serialized NIR is smaller, and also to let us detect more
-	 * isomorphic shaders when hashing, increasing cache hits.
-	 */
-	struct blob blob;
-	blob_init(&blob);
-	nir_serialize(&blob, shader->nir, true);
-	_mesa_sha1_update(&ctx, blob.data, blob.size);
-	blob_finish(&blob);
+   /* Serialize the NIR to a binary blob that we can hash for the disk
+    * cache.  Drop unnecessary information (like variable names)
+    * so the serialized NIR is smaller, and also to let us detect more
+    * isomorphic shaders when hashing, increasing cache hits.
+    */
+   struct blob blob;
+   blob_init(&blob);
+   nir_serialize(&blob, shader->nir, true);
+   _mesa_sha1_update(&ctx, blob.data, blob.size);
+   blob_finish(&blob);

-	/* Note that on some gens stream-out is lowered in ir3 to stg.  For later
-	 * gens we maybe don't need to include stream-out in the cache key.
-	 */
-	_mesa_sha1_update(&ctx, &shader->stream_output, sizeof(shader->stream_output));
+   /* Note that on some gens stream-out is lowered in ir3 to stg.  For later
+    * gens we maybe don't need to include stream-out in the cache key.
+    */
+   _mesa_sha1_update(&ctx, &shader->stream_output,
+                     sizeof(shader->stream_output));

-	_mesa_sha1_final(&ctx, shader->cache_key);
+   _mesa_sha1_final(&ctx, shader->cache_key);
 }

 static void
-compute_variant_key(struct ir3_compiler *compiler,
-		struct ir3_shader_variant *v, cache_key cache_key)
+compute_variant_key(struct ir3_compiler *compiler, struct ir3_shader_variant *v,
+                    cache_key cache_key)
 {
-	struct blob blob;
-	blob_init(&blob);
+   struct blob blob;
+   blob_init(&blob);

-	blob_write_bytes(&blob, &v->shader->cache_key, sizeof(v->shader->cache_key));
-	blob_write_bytes(&blob, &v->key, sizeof(v->key));
-	blob_write_uint8(&blob, v->binning_pass);
+   blob_write_bytes(&blob, &v->shader->cache_key, sizeof(v->shader->cache_key));
+   blob_write_bytes(&blob, &v->key, sizeof(v->key));
+   blob_write_uint8(&blob, v->binning_pass);

-	disk_cache_compute_key(compiler->disk_cache, blob.data, blob.size, cache_key);
+   disk_cache_compute_key(compiler->disk_cache, blob.data, blob.size,
+                          cache_key);

-	blob_finish(&blob);
+   blob_finish(&blob);
 }

 static void
 retrieve_variant(struct blob_reader *blob, struct ir3_shader_variant *v)
 {
-	blob_copy_bytes(blob, VARIANT_CACHE_PTR(v), VARIANT_CACHE_SIZE);
+   blob_copy_bytes(blob, VARIANT_CACHE_PTR(v), VARIANT_CACHE_SIZE);

-	/*
-	 * pointers need special handling:
-	 */
+   /*
+    * pointers need special handling:
+    */

-	v->bin = rzalloc_size(v, v->info.size);
-	blob_copy_bytes(blob, v->bin, v->info.size);
+   v->bin = rzalloc_size(v, v->info.size);
+   blob_copy_bytes(blob, v->bin, v->info.size);

-	if (!v->binning_pass) {
-		blob_copy_bytes(blob, v->const_state, sizeof(*v->const_state));
-		unsigned immeds_sz = v->const_state->immediates_size *
-				sizeof(v->const_state->immediates[0]);
-		v->const_state->immediates = ralloc_size(v->const_state, immeds_sz);
-		blob_copy_bytes(blob, v->const_state->immediates, immeds_sz);
-	}
+   if (!v->binning_pass) {
+      blob_copy_bytes(blob, v->const_state, sizeof(*v->const_state));
+      unsigned immeds_sz = v->const_state->immediates_size *
+                           sizeof(v->const_state->immediates[0]);
+      v->const_state->immediates = ralloc_size(v->const_state, immeds_sz);
+      blob_copy_bytes(blob, v->const_state->immediates, immeds_sz);
+   }
 }

 static void
 store_variant(struct blob *blob, struct ir3_shader_variant *v)
 {
-	blob_write_bytes(blob, VARIANT_CACHE_PTR(v), VARIANT_CACHE_SIZE);
+   blob_write_bytes(blob, VARIANT_CACHE_PTR(v), VARIANT_CACHE_SIZE);

-	/*
-	 * pointers need special handling:
-	 */
+   /*
+    * pointers need special handling:
+    */

-	blob_write_bytes(blob, v->bin, v->info.size);
+   blob_write_bytes(blob, v->bin, v->info.size);

-	/* No saving constant_data, it's already baked into bin at this point. */
+   /* No saving constant_data, it's already baked into bin at this point. */

-	if (!v->binning_pass) {
-		blob_write_bytes(blob, v->const_state, sizeof(*v->const_state));
-		unsigned immeds_sz = v->const_state->immediates_size *
-				sizeof(v->const_state->immediates[0]);
-		blob_write_bytes(blob, v->const_state->immediates, immeds_sz);
-	}
+   if (!v->binning_pass) {
+      blob_write_bytes(blob, v->const_state, sizeof(*v->const_state));
+      unsigned immeds_sz = v->const_state->immediates_size *
+                           sizeof(v->const_state->immediates[0]);
+      blob_write_bytes(blob, v->const_state->immediates, immeds_sz);
+   }
 }

 bool
 ir3_disk_cache_retrieve(struct ir3_compiler *compiler,
-		struct ir3_shader_variant *v)
+                        struct ir3_shader_variant *v)
 {
-	if (!compiler->disk_cache)
-		return false;
+   if (!compiler->disk_cache)
+      return false;

-	cache_key cache_key;
+   cache_key cache_key;

-	compute_variant_key(compiler, v, cache_key);
+   compute_variant_key(compiler, v, cache_key);

-	if (debug) {
-		char sha1[41];
-		_mesa_sha1_format(sha1, cache_key);
-		fprintf(stderr, "[mesa disk cache] retrieving variant %s: ", sha1);
-	}
+   if (debug) {
+      char sha1[41];
+      _mesa_sha1_format(sha1, cache_key);
+      fprintf(stderr, "[mesa disk cache] retrieving variant %s: ", sha1);
+   }

-	size_t size;
-	void *buffer = disk_cache_get(compiler->disk_cache, cache_key, &size);
+   size_t size;
+   void *buffer = disk_cache_get(compiler->disk_cache, cache_key, &size);

-	if (debug)
-		fprintf(stderr, "%s\n", buffer ? "found" : "missing");
+   if (debug)
+      fprintf(stderr, "%s\n", buffer ? "found" : "missing");

-	if (!buffer)
-		return false;
+   if (!buffer)
+      return false;

-	struct blob_reader blob;
-	blob_reader_init(&blob, buffer, size);
+   struct blob_reader blob;
+   blob_reader_init(&blob, buffer, size);

-	retrieve_variant(&blob, v);
+   retrieve_variant(&blob, v);

-	if (v->binning)
-		retrieve_variant(&blob, v->binning);
+   if (v->binning)
+      retrieve_variant(&blob, v->binning);

-	free(buffer);
+   free(buffer);

-	return true;
+   return true;
 }

 void
 ir3_disk_cache_store(struct ir3_compiler *compiler,
-		struct ir3_shader_variant *v)
+                     struct ir3_shader_variant *v)
 {
-	if (!compiler->disk_cache)
-		return;
+   if (!compiler->disk_cache)
+      return;

-	cache_key cache_key;
+   cache_key cache_key;

-	compute_variant_key(compiler, v, cache_key);
+   compute_variant_key(compiler, v, cache_key);

-	if (debug) {
-		char sha1[41];
-		_mesa_sha1_format(sha1, cache_key);
-		fprintf(stderr, "[mesa disk cache] storing variant %s\n", sha1);
-	}
+   if (debug) {
+      char sha1[41];
+      _mesa_sha1_format(sha1, cache_key);
+      fprintf(stderr, "[mesa disk cache] storing variant %s\n", sha1);
+   }

-	struct blob blob;
-	blob_init(&blob);
+   struct blob blob;
+   blob_init(&blob);

-	store_variant(&blob, v);
+   store_variant(&blob, v);

-	if (v->binning)
-		store_variant(&blob, v->binning);
+   if (v->binning)
+      store_variant(&blob, v->binning);

-	disk_cache_put(compiler->disk_cache, cache_key, blob.data, blob.size, NULL);
-	blob_finish(&blob);
+   disk_cache_put(compiler->disk_cache, cache_key, blob.data, blob.size, NULL);
+   blob_finish(&blob);
 }
--- a/src/freedreno/ir3/ir3_dominance.c
+++ b/src/freedreno/ir3/ir3_dominance.c
@ -35,92 +35,91 @@
 static struct ir3_block *
 intersect(struct ir3_block *b1, struct ir3_block *b2)
 {
-	while (b1 != b2) {
-		/*
-		 * Note, the comparisons here are the opposite of what the paper says
-		 * because we index blocks from beginning -> end (i.e. reverse
-		 * post-order) instead of post-order like they assume.
-		 */
-		while (b1->index > b2->index)
-			b1 = b1->imm_dom;
-		while (b2->index > b1->index)
-			b2 = b2->imm_dom;
-	}
+   while (b1 != b2) {
+      /*
+       * Note, the comparisons here are the opposite of what the paper says
+       * because we index blocks from beginning -> end (i.e. reverse
+       * post-order) instead of post-order like they assume.
+       */
+      while (b1->index > b2->index)
+         b1 = b1->imm_dom;
+      while (b2->index > b1->index)
+         b2 = b2->imm_dom;
+   }

-	return b1;
+   return b1;
 }

-
 static bool
 calc_dominance(struct ir3_block *block)
 {
-	struct ir3_block *new_idom = NULL;
-	for (unsigned i = 0; i < block->predecessors_count; i++) {
-		struct ir3_block *pred = block->predecessors[i];
+   struct ir3_block *new_idom = NULL;
+   for (unsigned i = 0; i < block->predecessors_count; i++) {
+      struct ir3_block *pred = block->predecessors[i];

-		if (pred->imm_dom) {
-			if (new_idom)
-				new_idom = intersect(pred, new_idom);
-			else
-				new_idom = pred;
-		}
-	}
+      if (pred->imm_dom) {
+         if (new_idom)
+            new_idom = intersect(pred, new_idom);
+         else
+            new_idom = pred;
+      }
+   }

-	if (block->imm_dom != new_idom) {
-		block->imm_dom = new_idom;
-		return true;
-	}
+   if (block->imm_dom != new_idom) {
+      block->imm_dom = new_idom;
+      return true;
+   }

-	return false;
+   return false;
 }

 static unsigned
 calc_dfs_indices(struct ir3_block *block, unsigned index)
 {
-	block->dom_pre_index = index++;
-	for (unsigned i = 0; i < block->dom_children_count; i++)
-		index = calc_dfs_indices(block->dom_children[i], index);
-	block->dom_post_index = index++;
-	return index;
+   block->dom_pre_index = index++;
+   for (unsigned i = 0; i < block->dom_children_count; i++)
+      index = calc_dfs_indices(block->dom_children[i], index);
+   block->dom_post_index = index++;
+   return index;
 }

 void
 ir3_calc_dominance(struct ir3 *ir)
 {
-	unsigned i = 0;
-	foreach_block (block, &ir->block_list) {
-		block->index = i++;
-		if (block == ir3_start_block(ir))
-			block->imm_dom = block;
-		else
-			block->imm_dom = NULL;
-		block->dom_children = NULL;
-		block->dom_children_count = block->dom_children_sz = 0;
-	}
+   unsigned i = 0;
+   foreach_block (block, &ir->block_list) {
+      block->index = i++;
+      if (block == ir3_start_block(ir))
+         block->imm_dom = block;
+      else
+         block->imm_dom = NULL;
+      block->dom_children = NULL;
+      block->dom_children_count = block->dom_children_sz = 0;
+   }

-	bool progress = true;
-	while (progress) {
-		progress = false;
-		foreach_block (block, &ir->block_list) {
-			if (block != ir3_start_block(ir))
-				progress |= calc_dominance(block);
-		}
-	}
+   bool progress = true;
+   while (progress) {
+      progress = false;
+      foreach_block (block, &ir->block_list) {
+         if (block != ir3_start_block(ir))
+            progress |= calc_dominance(block);
+      }
+   }

-	ir3_start_block(ir)->imm_dom = NULL;
+   ir3_start_block(ir)->imm_dom = NULL;

-	foreach_block (block, &ir->block_list) {
-		if (block->imm_dom)
-			array_insert(block->imm_dom, block->imm_dom->dom_children, block);
-	}
+   foreach_block (block, &ir->block_list) {
+      if (block->imm_dom)
+         array_insert(block->imm_dom, block->imm_dom->dom_children, block);
+   }

-	calc_dfs_indices(ir3_start_block(ir), 0);
+   calc_dfs_indices(ir3_start_block(ir), 0);
 }

 /* Return true if a dominates b. This includes if a == b. */
-bool ir3_block_dominates(struct ir3_block *a, struct ir3_block *b)
+bool
+ir3_block_dominates(struct ir3_block *a, struct ir3_block *b)
 {
-	return a->dom_pre_index <= b->dom_pre_index &&
-		   a->dom_post_index >= b->dom_post_index;
+   return a->dom_pre_index <= b->dom_pre_index &&
+          a->dom_post_index >= b->dom_post_index;
 }
-
--- a/src/freedreno/ir3/ir3_image.c
+++ b/src/freedreno/ir3/ir3_image.c
@ -26,7 +26,6 @@

 #include "ir3_image.h"

-
 /*
 * SSBO/Image to/from IBO/tex hw mapping table:
 */
@ -34,57 +33,57 @@
 void
 ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures)
 {
-	memset(mapping, IBO_INVALID, sizeof(*mapping));
-	mapping->num_tex = 0;
-	mapping->tex_base = num_textures;
+   memset(mapping, IBO_INVALID, sizeof(*mapping));
+   mapping->num_tex = 0;
+   mapping->tex_base = num_textures;
 }

 struct ir3_instruction *
 ir3_ssbo_to_ibo(struct ir3_context *ctx, nir_src src)
 {
-	if (ir3_bindless_resource(src)) {
-		ctx->so->bindless_ibo = true;
-		return ir3_get_src(ctx, &src)[0];
-	} else {
-		/* can this be non-const buffer_index?  how do we handle that? */
-		int ssbo_idx = nir_src_as_uint(src);
-		return create_immed(ctx->block, ssbo_idx);
-	}
+   if (ir3_bindless_resource(src)) {
+      ctx->so->bindless_ibo = true;
+      return ir3_get_src(ctx, &src)[0];
+   } else {
+      /* can this be non-const buffer_index?  how do we handle that? */
+      int ssbo_idx = nir_src_as_uint(src);
+      return create_immed(ctx->block, ssbo_idx);
+   }
 }

 unsigned
 ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo)
 {
-	if (mapping->ssbo_to_tex[ssbo] == IBO_INVALID) {
-		unsigned tex = mapping->num_tex++;
-		mapping->ssbo_to_tex[ssbo] = tex;
-		mapping->tex_to_image[tex] = IBO_SSBO | ssbo;
-	}
-	return mapping->ssbo_to_tex[ssbo] + mapping->tex_base;
+   if (mapping->ssbo_to_tex[ssbo] == IBO_INVALID) {
+      unsigned tex = mapping->num_tex++;
+      mapping->ssbo_to_tex[ssbo] = tex;
+      mapping->tex_to_image[tex] = IBO_SSBO | ssbo;
+   }
+   return mapping->ssbo_to_tex[ssbo] + mapping->tex_base;
 }

 struct ir3_instruction *
 ir3_image_to_ibo(struct ir3_context *ctx, nir_src src)
 {
-	if (ir3_bindless_resource(src)) {
-		ctx->so->bindless_ibo = true;
-		return ir3_get_src(ctx, &src)[0];
-	} else {
-		/* can this be non-const buffer_index?  how do we handle that? */
-		int image_idx = nir_src_as_uint(src);
-		return create_immed(ctx->block, ctx->s->info.num_ssbos + image_idx);
-	}
+   if (ir3_bindless_resource(src)) {
+      ctx->so->bindless_ibo = true;
+      return ir3_get_src(ctx, &src)[0];
+   } else {
+      /* can this be non-const buffer_index?  how do we handle that? */
+      int image_idx = nir_src_as_uint(src);
+      return create_immed(ctx->block, ctx->s->info.num_ssbos + image_idx);
+   }
 }

 unsigned
 ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image)
 {
-	if (mapping->image_to_tex[image] == IBO_INVALID) {
-		unsigned tex = mapping->num_tex++;
-		mapping->image_to_tex[image] = tex;
-		mapping->tex_to_image[tex] = image;
-	}
-	return mapping->image_to_tex[image] + mapping->tex_base;
+   if (mapping->image_to_tex[image] == IBO_INVALID) {
+      unsigned tex = mapping->num_tex++;
+      mapping->image_to_tex[image] = tex;
+      mapping->tex_to_image[tex] = image;
+   }
+   return mapping->image_to_tex[image] + mapping->tex_base;
 }

 /* see tex_info() for equiv logic for texture instructions.. it would be
@ -93,87 +92,87 @@ ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image)
 unsigned
 ir3_get_image_coords(const nir_intrinsic_instr *instr, unsigned *flagsp)
 {
-	unsigned coords = nir_image_intrinsic_coord_components(instr);
-	unsigned flags = 0;
+   unsigned coords = nir_image_intrinsic_coord_components(instr);
+   unsigned flags = 0;

-	if (coords == 3)
-		flags |= IR3_INSTR_3D;
+   if (coords == 3)
+      flags |= IR3_INSTR_3D;

-	if (nir_intrinsic_image_array(instr))
-		flags |= IR3_INSTR_A;
+   if (nir_intrinsic_image_array(instr))
+      flags |= IR3_INSTR_A;

-	if (flagsp)
-		*flagsp = flags;
+   if (flagsp)
+      *flagsp = flags;

-	return coords;
+   return coords;
 }

 type_t
 ir3_get_type_for_image_intrinsic(const nir_intrinsic_instr *instr)
 {
-	const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
-	int bit_size = info->has_dest ? nir_dest_bit_size(instr->dest) : 32;
+   const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
+   int bit_size = info->has_dest ? nir_dest_bit_size(instr->dest) : 32;

-	nir_alu_type type = nir_type_uint;
-	switch (instr->intrinsic) {
-	case nir_intrinsic_image_load:
-	case nir_intrinsic_bindless_image_load:
-		type = nir_alu_type_get_base_type(nir_intrinsic_dest_type(instr));
-		/* SpvOpAtomicLoad doesn't have dest type */
-		if (type == nir_type_invalid)
-			type = nir_type_uint;
-		break;
+   nir_alu_type type = nir_type_uint;
+   switch (instr->intrinsic) {
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_bindless_image_load:
+      type = nir_alu_type_get_base_type(nir_intrinsic_dest_type(instr));
+      /* SpvOpAtomicLoad doesn't have dest type */
+      if (type == nir_type_invalid)
+         type = nir_type_uint;
+      break;

-	case nir_intrinsic_image_store:
-	case nir_intrinsic_bindless_image_store:
-		type = nir_alu_type_get_base_type(nir_intrinsic_src_type(instr));
-		/* SpvOpAtomicStore doesn't have src type */
-		if (type == nir_type_invalid)
-			type = nir_type_uint;
-		break;
+   case nir_intrinsic_image_store:
+   case nir_intrinsic_bindless_image_store:
+      type = nir_alu_type_get_base_type(nir_intrinsic_src_type(instr));
+      /* SpvOpAtomicStore doesn't have src type */
+      if (type == nir_type_invalid)
+         type = nir_type_uint;
+      break;

-	case nir_intrinsic_image_atomic_add:
-	case nir_intrinsic_bindless_image_atomic_add:
-	case nir_intrinsic_image_atomic_umin:
-	case nir_intrinsic_bindless_image_atomic_umin:
-	case nir_intrinsic_image_atomic_umax:
-	case nir_intrinsic_bindless_image_atomic_umax:
-	case nir_intrinsic_image_atomic_and:
-	case nir_intrinsic_bindless_image_atomic_and:
-	case nir_intrinsic_image_atomic_or:
-	case nir_intrinsic_bindless_image_atomic_or:
-	case nir_intrinsic_image_atomic_xor:
-	case nir_intrinsic_bindless_image_atomic_xor:
-	case nir_intrinsic_image_atomic_exchange:
-	case nir_intrinsic_bindless_image_atomic_exchange:
-	case nir_intrinsic_image_atomic_comp_swap:
-	case nir_intrinsic_bindless_image_atomic_comp_swap:
-	case nir_intrinsic_image_atomic_inc_wrap:
-	case nir_intrinsic_bindless_image_atomic_inc_wrap:
-		type = nir_type_uint;
-		break;
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_bindless_image_atomic_add:
+   case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_bindless_image_atomic_umin:
+   case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_bindless_image_atomic_umax:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_bindless_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_bindless_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_bindless_image_atomic_xor:
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_bindless_image_atomic_exchange:
+   case nir_intrinsic_image_atomic_comp_swap:
+   case nir_intrinsic_bindless_image_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_inc_wrap:
+   case nir_intrinsic_bindless_image_atomic_inc_wrap:
+      type = nir_type_uint;
+      break;

-	case nir_intrinsic_image_atomic_imin:
-	case nir_intrinsic_bindless_image_atomic_imin:
-	case nir_intrinsic_image_atomic_imax:
-	case nir_intrinsic_bindless_image_atomic_imax:
-		type = nir_type_int;
-		break;
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_bindless_image_atomic_imin:
+   case nir_intrinsic_image_atomic_imax:
+   case nir_intrinsic_bindless_image_atomic_imax:
+      type = nir_type_int;
+      break;

-	default:
-		unreachable("Unhandled NIR image intrinsic");
-	}
+   default:
+      unreachable("Unhandled NIR image intrinsic");
+   }

-	switch (type) {
-	case nir_type_uint:
-		return bit_size == 16 ? TYPE_U16 : TYPE_U32;
-	case nir_type_int:
-		return bit_size == 16 ? TYPE_S16 : TYPE_S32;
-	case nir_type_float:
-		return bit_size == 16 ? TYPE_F16 : TYPE_F32;
-	default:
-		unreachable("bad type");
-	}
+   switch (type) {
+   case nir_type_uint:
+      return bit_size == 16 ? TYPE_U16 : TYPE_U32;
+   case nir_type_int:
+      return bit_size == 16 ? TYPE_S16 : TYPE_S32;
+   case nir_type_float:
+      return bit_size == 16 ? TYPE_F16 : TYPE_F32;
+   default:
+      unreachable("bad type");
+   }
 }

 /* Returns the number of components for the different image formats
@ -183,8 +182,8 @@ ir3_get_type_for_image_intrinsic(const nir_intrinsic_instr *instr)
 unsigned
 ir3_get_num_components_for_image_format(enum pipe_format format)
 {
-	if (format == PIPE_FORMAT_NONE)
-		return 4;
-	else
-		return util_format_get_nr_components(format);
+   if (format == PIPE_FORMAT_NONE)
+      return 4;
+   else
+      return util_format_get_nr_components(format);
 }
--- a/src/freedreno/ir3/ir3_image.h
+++ b/src/freedreno/ir3/ir3_image.h
@ -29,14 +29,15 @@

 #include "ir3_context.h"

-
-void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures);
+void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping,
+                          unsigned num_textures);
 struct ir3_instruction *ir3_ssbo_to_ibo(struct ir3_context *ctx, nir_src src);
 unsigned ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo);
 struct ir3_instruction *ir3_image_to_ibo(struct ir3_context *ctx, nir_src src);
 unsigned ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image);

-unsigned ir3_get_image_coords(const nir_intrinsic_instr *instr, unsigned *flagsp);
+unsigned ir3_get_image_coords(const nir_intrinsic_instr *instr,
+                              unsigned *flagsp);
 type_t ir3_get_type_for_image_intrinsic(const nir_intrinsic_instr *instr);
 unsigned ir3_get_num_components_for_image_format(enum pipe_format);

--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
--- a/src/freedreno/ir3/ir3_liveness.c
+++ b/src/freedreno/ir3/ir3_liveness.c
@ -37,127 +37,130 @@

 static bool
 compute_block_liveness(struct ir3_liveness *live, struct ir3_block *block,
-					   BITSET_WORD *tmp_live, unsigned bitset_words)
+                       BITSET_WORD *tmp_live, unsigned bitset_words)
 {
-	memcpy(tmp_live, live->live_out[block->index], bitset_words *
-			sizeof(BITSET_WORD));
+   memcpy(tmp_live, live->live_out[block->index],
+          bitset_words * sizeof(BITSET_WORD));

-	/* Process instructions */
-	foreach_instr_rev (instr, &block->instr_list) {
-		ra_foreach_dst(dst, instr) {
-			if (BITSET_TEST(tmp_live, dst->name))
-				dst->flags &= ~IR3_REG_UNUSED;
-			else
-				dst->flags |= IR3_REG_UNUSED;
-			BITSET_CLEAR(tmp_live, dst->name);
-		}
+   /* Process instructions */
+   foreach_instr_rev (instr, &block->instr_list) {
+      ra_foreach_dst (dst, instr) {
+         if (BITSET_TEST(tmp_live, dst->name))
+            dst->flags &= ~IR3_REG_UNUSED;
+         else
+            dst->flags |= IR3_REG_UNUSED;
+         BITSET_CLEAR(tmp_live, dst->name);
+      }

-		/* Phi node uses occur after the predecessor block */
-		if (instr->opc != OPC_META_PHI) {
-			ra_foreach_src(src, instr) {
-				if (BITSET_TEST(tmp_live, src->def->name))
-					src->flags &= ~IR3_REG_KILL;
-				else
-					src->flags |= IR3_REG_KILL;
-			}
+      /* Phi node uses occur after the predecessor block */
+      if (instr->opc != OPC_META_PHI) {
+         ra_foreach_src (src, instr) {
+            if (BITSET_TEST(tmp_live, src->def->name))
+               src->flags &= ~IR3_REG_KILL;
+            else
+               src->flags |= IR3_REG_KILL;
+         }

-			ra_foreach_src(src, instr) {
-				if (BITSET_TEST(tmp_live, src->def->name))
-					src->flags &= ~IR3_REG_FIRST_KILL;
-				else
-					src->flags |= IR3_REG_FIRST_KILL;
-				BITSET_SET(tmp_live, src->def->name);
-			}
-		}
-	}
+         ra_foreach_src (src, instr) {
+            if (BITSET_TEST(tmp_live, src->def->name))
+               src->flags &= ~IR3_REG_FIRST_KILL;
+            else
+               src->flags |= IR3_REG_FIRST_KILL;
+            BITSET_SET(tmp_live, src->def->name);
+         }
+      }
+   }

-	memcpy(live->live_in[block->index], tmp_live,
-			bitset_words * sizeof(BITSET_WORD));
+   memcpy(live->live_in[block->index], tmp_live,
+          bitset_words * sizeof(BITSET_WORD));

-	bool progress = false;
-	for (unsigned i = 0; i < block->predecessors_count; i++) {
-		const struct ir3_block *pred = block->predecessors[i];
-		for (unsigned j = 0; j < bitset_words; j++) {
-			if (tmp_live[j] & ~live->live_out[pred->index][j])
-				progress = true;
-			live->live_out[pred->index][j] |= tmp_live[j];
-		}
+   bool progress = false;
+   for (unsigned i = 0; i < block->predecessors_count; i++) {
+      const struct ir3_block *pred = block->predecessors[i];
+      for (unsigned j = 0; j < bitset_words; j++) {
+         if (tmp_live[j] & ~live->live_out[pred->index][j])
+            progress = true;
+         live->live_out[pred->index][j] |= tmp_live[j];
+      }

-		/* Process phi sources. */
-		foreach_instr (phi, &block->instr_list) {
-			if (phi->opc != OPC_META_PHI)
-				break;
-			if (!phi->srcs[i]->def)
-				continue;
-			unsigned name = phi->srcs[i]->def->name;
-			if (!BITSET_TEST(live->live_out[pred->index], name)) {
-				progress = true;
-				BITSET_SET(live->live_out[pred->index], name);
-			}
-		}
-	}
+      /* Process phi sources. */
+      foreach_instr (phi, &block->instr_list) {
+         if (phi->opc != OPC_META_PHI)
+            break;
+         if (!phi->srcs[i]->def)
+            continue;
+         unsigned name = phi->srcs[i]->def->name;
+         if (!BITSET_TEST(live->live_out[pred->index], name)) {
+            progress = true;
+            BITSET_SET(live->live_out[pred->index], name);
+         }
+      }
+   }

-	for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
-		const struct ir3_block *pred = block->physical_predecessors[i];
-		unsigned name;
-		BITSET_FOREACH_SET(name, tmp_live, live->definitions_count) {
-			struct ir3_register *reg = live->definitions[name];
-			if (!(reg->flags & IR3_REG_SHARED))
-				continue;
-			if (!BITSET_TEST(live->live_out[pred->index], name)) {
-				progress = true;
-				BITSET_SET(live->live_out[pred->index], name);
-			}
-		}
-	}
+   for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
+      const struct ir3_block *pred = block->physical_predecessors[i];
+      unsigned name;
+      BITSET_FOREACH_SET (name, tmp_live, live->definitions_count) {
+         struct ir3_register *reg = live->definitions[name];
+         if (!(reg->flags & IR3_REG_SHARED))
+            continue;
+         if (!BITSET_TEST(live->live_out[pred->index], name)) {
+            progress = true;
+            BITSET_SET(live->live_out[pred->index], name);
+         }
+      }
+   }

-	return progress;
+   return progress;
 }

-struct ir3_liveness *ir3_calc_liveness(struct ir3_shader_variant *v)
+struct ir3_liveness *
+ir3_calc_liveness(struct ir3_shader_variant *v)
 {
-	struct ir3_liveness *live = rzalloc(NULL, struct ir3_liveness);
+   struct ir3_liveness *live = rzalloc(NULL, struct ir3_liveness);

-	/* Reserve name 0 to mean "doesn't have a name yet" to make the debug
-	 * output nicer.
-	 */
-	array_insert(live, live->definitions, NULL);
+   /* Reserve name 0 to mean "doesn't have a name yet" to make the debug
+    * output nicer.
+    */
+   array_insert(live, live->definitions, NULL);

-	/* Build definition <-> name mapping */
-	unsigned block_count = 0;
-	foreach_block (block, &v->ir->block_list) {
-		block->index = block_count++;
-		foreach_instr (instr, &block->instr_list) {
-			ra_foreach_dst(dst, instr) {
-				dst->name = live->definitions_count;
-				array_insert(live, live->definitions, dst);
-			}
-		}
-	}
+   /* Build definition <-> name mapping */
+   unsigned block_count = 0;
+   foreach_block (block, &v->ir->block_list) {
+      block->index = block_count++;
+      foreach_instr (instr, &block->instr_list) {
+         ra_foreach_dst (dst, instr) {
+            dst->name = live->definitions_count;
+            array_insert(live, live->definitions, dst);
+         }
+      }
+   }

-	live->block_count = block_count;
+   live->block_count = block_count;

-	unsigned bitset_words = BITSET_WORDS(live->definitions_count);
-	BITSET_WORD *tmp_live = ralloc_array(live, BITSET_WORD, bitset_words);
-	live->live_in = ralloc_array(live, BITSET_WORD *, block_count);
-	live->live_out = ralloc_array(live, BITSET_WORD *, block_count);
-	unsigned i = 0;
-	foreach_block (block, &v->ir->block_list) {
-		block->index = i++;
-		live->live_in[block->index] = rzalloc_array(live, BITSET_WORD, bitset_words);
-		live->live_out[block->index] = rzalloc_array(live, BITSET_WORD, bitset_words);
-	}
+   unsigned bitset_words = BITSET_WORDS(live->definitions_count);
+   BITSET_WORD *tmp_live = ralloc_array(live, BITSET_WORD, bitset_words);
+   live->live_in = ralloc_array(live, BITSET_WORD *, block_count);
+   live->live_out = ralloc_array(live, BITSET_WORD *, block_count);
+   unsigned i = 0;
+   foreach_block (block, &v->ir->block_list) {
+      block->index = i++;
+      live->live_in[block->index] =
+         rzalloc_array(live, BITSET_WORD, bitset_words);
+      live->live_out[block->index] =
+         rzalloc_array(live, BITSET_WORD, bitset_words);
+   }

-	bool progress = true;
-	while (progress) {
-		progress = false;
-		foreach_block_rev (block, &v->ir->block_list) {
-			progress |=
-				compute_block_liveness(live, block, tmp_live, bitset_words);
-		}
-	}
+   bool progress = true;
+   while (progress) {
+      progress = false;
+      foreach_block_rev (block, &v->ir->block_list) {
+         progress |=
+            compute_block_liveness(live, block, tmp_live, bitset_words);
+      }
+   }

-	return live;
+   return live;
 }

 /* Return true if "def" is live after "instr". It's assumed that "def"
@ -165,32 +168,31 @@ struct ir3_liveness *ir3_calc_liveness(struct ir3_shader_variant *v)
 */
 bool
 ir3_def_live_after(struct ir3_liveness *live, struct ir3_register *def,
-				   struct ir3_instruction *instr)
+                   struct ir3_instruction *instr)
 {
-	/* If it's live out then it's definitely live at the instruction. */
-	if (BITSET_TEST(live->live_out[instr->block->index], def->name))
-		return true;
+   /* If it's live out then it's definitely live at the instruction. */
+   if (BITSET_TEST(live->live_out[instr->block->index], def->name))
+      return true;

-	/* If it's not live in and not defined in the same block then the live
-	 * range can't extend to the instruction.
-	 */
-	if (def->instr->block != instr->block &&
-		!BITSET_TEST(live->live_in[instr->block->index], def->name))
-		return false;
+   /* If it's not live in and not defined in the same block then the live
+    * range can't extend to the instruction.
+    */
+   if (def->instr->block != instr->block &&
+       !BITSET_TEST(live->live_in[instr->block->index], def->name))
+      return false;

-	/* Ok, now comes the tricky case, where "def" is killed somewhere in
-	 * "instr"'s block and we have to check if it's before or after.
-	 */
-	foreach_instr_rev (test_instr, &instr->block->instr_list) {
-		if (test_instr == instr)
-			break;
+   /* Ok, now comes the tricky case, where "def" is killed somewhere in
+    * "instr"'s block and we have to check if it's before or after.
+    */
+   foreach_instr_rev (test_instr, &instr->block->instr_list) {
+      if (test_instr == instr)
+         break;

-		for (unsigned i = 0; i < test_instr->srcs_count; i++) {
-			if (test_instr->srcs[i]->def == def)
-				return true;
-		}
-	}
+      for (unsigned i = 0; i < test_instr->srcs_count; i++) {
+         if (test_instr->srcs[i]->def == def)
+            return true;
+      }
+   }

-	return false;
+   return false;
 }
-
--- a/src/freedreno/ir3/ir3_lower_parallelcopy.c
+++ b/src/freedreno/ir3/ir3_lower_parallelcopy.c
@ -25,524 +25,542 @@
 #include "ir3_shader.h"

 struct copy_src {
-	unsigned flags;
-	union {
-		uint32_t imm;
-		physreg_t reg;
-		unsigned const_num;
-	};
+   unsigned flags;
+   union {
+      uint32_t imm;
+      physreg_t reg;
+      unsigned const_num;
+   };
 };

 struct copy_entry {
-	physreg_t dst;
-	unsigned flags;
-	bool done;
+   physreg_t dst;
+   unsigned flags;
+   bool done;

-	struct copy_src src;
+   struct copy_src src;
 };

 static unsigned
 copy_entry_size(const struct copy_entry *entry)
 {
-	return (entry->flags & IR3_REG_HALF) ? 1 : 2;
+   return (entry->flags & IR3_REG_HALF) ? 1 : 2;
 }

 static struct copy_src
 get_copy_src(const struct ir3_register *reg, unsigned offset)
 {
-	if (reg->flags & IR3_REG_IMMED) {
-		return (struct copy_src) {
-			.flags = IR3_REG_IMMED,
-			.imm = reg->uim_val,
-		};
-	} else if (reg->flags & IR3_REG_CONST) {
-		return (struct copy_src) {
-			.flags = IR3_REG_CONST,
-			.const_num = reg->num,
-		};
-	} else {
-		return (struct copy_src) {
-			.flags = 0,
-			.reg = ra_reg_get_physreg(reg) + offset,
-		};
-	}
+   if (reg->flags & IR3_REG_IMMED) {
+      return (struct copy_src){
+         .flags = IR3_REG_IMMED,
+         .imm = reg->uim_val,
+      };
+   } else if (reg->flags & IR3_REG_CONST) {
+      return (struct copy_src){
+         .flags = IR3_REG_CONST,
+         .const_num = reg->num,
+      };
+   } else {
+      return (struct copy_src){
+         .flags = 0,
+         .reg = ra_reg_get_physreg(reg) + offset,
+      };
+   }
 }

 static void
-do_xor(struct ir3_instruction *instr, unsigned dst_num, unsigned src1_num, unsigned src2_num, unsigned flags)
+do_xor(struct ir3_instruction *instr, unsigned dst_num, unsigned src1_num,
+       unsigned src2_num, unsigned flags)
 {
-	struct ir3_instruction *xor = ir3_instr_create(instr->block, OPC_XOR_B, 1, 2);
-	ir3_dst_create(xor, dst_num, flags);
-	ir3_src_create(xor, src1_num, flags);
-	ir3_src_create(xor, src2_num, flags);
+   struct ir3_instruction * xor
+      = ir3_instr_create(instr->block, OPC_XOR_B, 1, 2);
+   ir3_dst_create(xor, dst_num, flags);
+   ir3_src_create(xor, src1_num, flags);
+   ir3_src_create(xor, src2_num, flags);

-	ir3_instr_move_before(xor, instr);
+   ir3_instr_move_before(xor, instr);
 }

 static void
 do_swap(struct ir3_compiler *compiler, struct ir3_instruction *instr,
-		const struct copy_entry *entry)
+        const struct copy_entry *entry)
 {
-	assert(!entry->src.flags);
+   assert(!entry->src.flags);

-	if (entry->flags & IR3_REG_HALF) {
-		/* We currently make sure to never emit parallel copies where the
-		 * source/destination is a half-reg above the range accessable to half
-		 * registers. However, when a full-reg source overlaps a half-reg
-		 * destination or vice versa, it can be very, very complicated to come
-		 * up with a series of "legal" swaps and copies to resolve the
-		 * parallel copy. So here we provide a fallback to implement the
-		 * "illegal" swap instead. This may also be useful for implementing
-		 * "spilling" half-regs to the inaccessable space.
-		 */
-		if (entry->src.reg >= RA_HALF_SIZE) {
-			/* Choose a temporary that doesn't overlap src or dst */
-			physreg_t tmp = entry->dst < 2 ? 2 : 0;
+   if (entry->flags & IR3_REG_HALF) {
+      /* We currently make sure to never emit parallel copies where the
+       * source/destination is a half-reg above the range accessable to half
+       * registers. However, when a full-reg source overlaps a half-reg
+       * destination or vice versa, it can be very, very complicated to come
+       * up with a series of "legal" swaps and copies to resolve the
+       * parallel copy. So here we provide a fallback to implement the
+       * "illegal" swap instead. This may also be useful for implementing
+       * "spilling" half-regs to the inaccessable space.
+       */
+      if (entry->src.reg >= RA_HALF_SIZE) {
+         /* Choose a temporary that doesn't overlap src or dst */
+         physreg_t tmp = entry->dst < 2 ? 2 : 0;

-			/* Swap src and the temporary */
-			do_swap(compiler, instr, &(struct copy_entry) {
-				.src = { .reg = entry->src.reg & ~1u },
-				.dst = tmp,
-				.flags = entry->flags & ~IR3_REG_HALF,
-			});
+         /* Swap src and the temporary */
+         do_swap(compiler, instr,
+                 &(struct copy_entry){
+                    .src = {.reg = entry->src.reg & ~1u},
+                    .dst = tmp,
+                    .flags = entry->flags & ~IR3_REG_HALF,
+                 });

-			/* Do the original swap with src replaced with tmp */
-			do_swap(compiler, instr, &(struct copy_entry) {
-				.src = { .reg = tmp + (entry->src.reg & 1) },
-				.dst = entry->dst,
-				.flags = entry->flags,
-			});
+         /* Do the original swap with src replaced with tmp */
+         do_swap(compiler, instr,
+                 &(struct copy_entry){
+                    .src = {.reg = tmp + (entry->src.reg & 1)},
+                    .dst = entry->dst,
+                    .flags = entry->flags,
+                 });

-			/* Swap src and the temporary back */
-			do_swap(compiler, instr, &(struct copy_entry) {
-				.src = { .reg = entry->src.reg & ~1u },
-				.dst = tmp,
-				.flags = entry->flags & ~IR3_REG_HALF,
-			});
-			return;
-		}
+         /* Swap src and the temporary back */
+         do_swap(compiler, instr,
+                 &(struct copy_entry){
+                    .src = {.reg = entry->src.reg & ~1u},
+                    .dst = tmp,
+                    .flags = entry->flags & ~IR3_REG_HALF,
+                 });
+         return;
+      }

-		/* If dst is not addressable, we only need to swap the arguments and
-		 * let the case above handle it.
-		 */
-		if (entry->dst >= RA_HALF_SIZE) {
-			do_swap(compiler, instr, &(struct copy_entry) {
-				.src = { .reg = entry->dst },
-				.dst = entry->src.reg,
-				.flags = entry->flags,
-			});
-			return;
-		}
-	}
+      /* If dst is not addressable, we only need to swap the arguments and
+       * let the case above handle it.
+       */
+      if (entry->dst >= RA_HALF_SIZE) {
+         do_swap(compiler, instr,
+                 &(struct copy_entry){
+                    .src = {.reg = entry->dst},
+                    .dst = entry->src.reg,
+                    .flags = entry->flags,
+                 });
+         return;
+      }
+   }

-	unsigned src_num = ra_physreg_to_num(entry->src.reg, entry->flags);
-	unsigned dst_num = ra_physreg_to_num(entry->dst, entry->flags);
+   unsigned src_num = ra_physreg_to_num(entry->src.reg, entry->flags);
+   unsigned dst_num = ra_physreg_to_num(entry->dst, entry->flags);

-	/* a5xx+ is known to support swz, which enables us to swap two registers
-	 * in-place. If unsupported we emulate it using the xor trick.
-	 */
-	if (compiler->gpu_id < 500) {
-		/* Shared regs only exist since a5xx, so we don't have to provide a
-		 * fallback path for them.
-		 */
-		assert(!(entry->flags & IR3_REG_SHARED));
-		do_xor(instr, dst_num, dst_num, src_num, entry->flags);
-		do_xor(instr, src_num, src_num, dst_num, entry->flags);
-		do_xor(instr, dst_num, dst_num, src_num, entry->flags);
-	} else {
-		/* Use a macro for shared regs because any shared reg writes need to
-		 * be wrapped in a getone block to work correctly. Writing shared regs
-		 * with multiple threads active does not work, even if they all return
-		 * the same value.
-		 */
-		unsigned opc = (entry->flags & IR3_REG_SHARED) ? OPC_SWZ_SHARED_MACRO : OPC_SWZ;
-		struct ir3_instruction *swz = ir3_instr_create(instr->block, opc, 2, 2);
-		ir3_dst_create(swz, dst_num, entry->flags);
-		ir3_dst_create(swz, src_num, entry->flags);
-		ir3_src_create(swz, src_num, entry->flags);
-		ir3_src_create(swz, dst_num, entry->flags);
-		swz->cat1.dst_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
-		swz->cat1.src_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
-		swz->repeat = 1;
-		ir3_instr_move_before(swz, instr);
-	}
+   /* a5xx+ is known to support swz, which enables us to swap two registers
+    * in-place. If unsupported we emulate it using the xor trick.
+    */
+   if (compiler->gpu_id < 500) {
+      /* Shared regs only exist since a5xx, so we don't have to provide a
+       * fallback path for them.
+       */
+      assert(!(entry->flags & IR3_REG_SHARED));
+      do_xor(instr, dst_num, dst_num, src_num, entry->flags);
+      do_xor(instr, src_num, src_num, dst_num, entry->flags);
+      do_xor(instr, dst_num, dst_num, src_num, entry->flags);
+   } else {
+      /* Use a macro for shared regs because any shared reg writes need to
+       * be wrapped in a getone block to work correctly. Writing shared regs
+       * with multiple threads active does not work, even if they all return
+       * the same value.
+       */
+      unsigned opc =
+         (entry->flags & IR3_REG_SHARED) ? OPC_SWZ_SHARED_MACRO : OPC_SWZ;
+      struct ir3_instruction *swz = ir3_instr_create(instr->block, opc, 2, 2);
+      ir3_dst_create(swz, dst_num, entry->flags);
+      ir3_dst_create(swz, src_num, entry->flags);
+      ir3_src_create(swz, src_num, entry->flags);
+      ir3_src_create(swz, dst_num, entry->flags);
+      swz->cat1.dst_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
+      swz->cat1.src_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
+      swz->repeat = 1;
+      ir3_instr_move_before(swz, instr);
+   }
 }

 static void
 do_copy(struct ir3_compiler *compiler, struct ir3_instruction *instr,
-		const struct copy_entry *entry)
+        const struct copy_entry *entry)
 {
-	if (entry->flags & IR3_REG_HALF) {
-		/* See do_swap() for why this is here. */
-		if (entry->dst >= RA_HALF_SIZE) {
-			/* TODO: is there a hw instruction we can use for this case? */
-			physreg_t tmp = !entry->src.flags && entry->src.reg < 2 ? 2 : 0;
+   if (entry->flags & IR3_REG_HALF) {
+      /* See do_swap() for why this is here. */
+      if (entry->dst >= RA_HALF_SIZE) {
+         /* TODO: is there a hw instruction we can use for this case? */
+         physreg_t tmp = !entry->src.flags && entry->src.reg < 2 ? 2 : 0;

-			do_swap(compiler, instr, &(struct copy_entry) {
-				.src = { .reg = entry->dst & ~1u },
-				.dst = tmp,
-				.flags = entry->flags & ~IR3_REG_HALF,
-			});
+         do_swap(compiler, instr,
+                 &(struct copy_entry){
+                    .src = {.reg = entry->dst & ~1u},
+                    .dst = tmp,
+                    .flags = entry->flags & ~IR3_REG_HALF,
+                 });

-			do_copy(compiler, instr, &(struct copy_entry) {
-				.src = entry->src,
-				.dst = tmp + (entry->dst & 1),
-				.flags = entry->flags,
-			});
+         do_copy(compiler, instr,
+                 &(struct copy_entry){
+                    .src = entry->src,
+                    .dst = tmp + (entry->dst & 1),
+                    .flags = entry->flags,
+                 });

-			do_swap(compiler, instr, &(struct copy_entry) {
-				.src = { .reg = entry->dst & ~1u },
-				.dst = tmp,
-				.flags = entry->flags & ~IR3_REG_HALF,
-			});
-			return;
-		}
+         do_swap(compiler, instr,
+                 &(struct copy_entry){
+                    .src = {.reg = entry->dst & ~1u},
+                    .dst = tmp,
+                    .flags = entry->flags & ~IR3_REG_HALF,
+                 });
+         return;
+      }

-		if (!entry->src.flags && entry->src.reg >= RA_HALF_SIZE) {
-			unsigned src_num =
-				ra_physreg_to_num(entry->src.reg & ~1u, entry->flags & ~IR3_REG_HALF);
-			unsigned dst_num = ra_physreg_to_num(entry->dst, entry->flags);
+      if (!entry->src.flags && entry->src.reg >= RA_HALF_SIZE) {
+         unsigned src_num = ra_physreg_to_num(entry->src.reg & ~1u,
+                                              entry->flags & ~IR3_REG_HALF);
+         unsigned dst_num = ra_physreg_to_num(entry->dst, entry->flags);

-			if (entry->src.reg % 2 == 0) {
-				/* cov.u32u16 dst, src */
-				struct ir3_instruction *cov = ir3_instr_create(instr->block, OPC_MOV, 1, 1);
-				ir3_dst_create(cov, dst_num, entry->flags);
-				ir3_src_create(cov, src_num, entry->flags & ~IR3_REG_HALF);
-				cov->cat1.dst_type = TYPE_U16;
-				cov->cat1.src_type = TYPE_U32;
-				ir3_instr_move_before(cov, instr);
-			} else {
-				/* shr.b dst, src, h(16) */
-				struct ir3_instruction *shr = ir3_instr_create(instr->block, OPC_SHR_B, 1, 2);
-				ir3_dst_create(shr, dst_num, entry->flags);
-				ir3_src_create(shr, src_num, entry->flags & ~IR3_REG_HALF);
-				ir3_src_create(shr, 0, entry->flags | IR3_REG_IMMED)->uim_val = 16;
-				ir3_instr_move_before(shr, instr);
-			}
-			return;
-		}
-	}
+         if (entry->src.reg % 2 == 0) {
+            /* cov.u32u16 dst, src */
+            struct ir3_instruction *cov =
+               ir3_instr_create(instr->block, OPC_MOV, 1, 1);
+            ir3_dst_create(cov, dst_num, entry->flags);
+            ir3_src_create(cov, src_num, entry->flags & ~IR3_REG_HALF);
+            cov->cat1.dst_type = TYPE_U16;
+            cov->cat1.src_type = TYPE_U32;
+            ir3_instr_move_before(cov, instr);
+         } else {
+            /* shr.b dst, src, h(16) */
+            struct ir3_instruction *shr =
+               ir3_instr_create(instr->block, OPC_SHR_B, 1, 2);
+            ir3_dst_create(shr, dst_num, entry->flags);
+            ir3_src_create(shr, src_num, entry->flags & ~IR3_REG_HALF);
+            ir3_src_create(shr, 0, entry->flags | IR3_REG_IMMED)->uim_val = 16;
+            ir3_instr_move_before(shr, instr);
+         }
+         return;
+      }
+   }

-	unsigned src_num = ra_physreg_to_num(entry->src.reg, entry->flags);
-	unsigned dst_num = ra_physreg_to_num(entry->dst, entry->flags);
+   unsigned src_num = ra_physreg_to_num(entry->src.reg, entry->flags);
+   unsigned dst_num = ra_physreg_to_num(entry->dst, entry->flags);

-	/* Similar to the swap case, we have to use a macro for shared regs. */
-	unsigned opc = (entry->flags & IR3_REG_SHARED) ? OPC_READ_FIRST_MACRO : OPC_MOV;
-	struct ir3_instruction *mov = ir3_instr_create(instr->block, opc, 1, 1);
-	ir3_dst_create(mov, dst_num, entry->flags);
-	ir3_src_create(mov, src_num, entry->flags | entry->src.flags);
-	mov->cat1.dst_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
-	mov->cat1.src_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
-	if (entry->src.flags & IR3_REG_IMMED)
-		mov->srcs[0]->uim_val = entry->src.imm;
-	else if (entry->src.flags & IR3_REG_CONST)
-		mov->srcs[0]->num = entry->src.const_num;
-	ir3_instr_move_before(mov, instr);
+   /* Similar to the swap case, we have to use a macro for shared regs. */
+   unsigned opc =
+      (entry->flags & IR3_REG_SHARED) ? OPC_READ_FIRST_MACRO : OPC_MOV;
+   struct ir3_instruction *mov = ir3_instr_create(instr->block, opc, 1, 1);
+   ir3_dst_create(mov, dst_num, entry->flags);
+   ir3_src_create(mov, src_num, entry->flags | entry->src.flags);
+   mov->cat1.dst_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
+   mov->cat1.src_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
+   if (entry->src.flags & IR3_REG_IMMED)
+      mov->srcs[0]->uim_val = entry->src.imm;
+   else if (entry->src.flags & IR3_REG_CONST)
+      mov->srcs[0]->num = entry->src.const_num;
+   ir3_instr_move_before(mov, instr);
 }

 struct copy_ctx {
-	/* For each physreg, the number of pending copy entries that use it as a
-	 * source. Once this drops to zero, then the physreg is unblocked and can
-	 * be moved to.
-	 */
-	unsigned physreg_use_count[RA_MAX_FILE_SIZE];
+   /* For each physreg, the number of pending copy entries that use it as a
+    * source. Once this drops to zero, then the physreg is unblocked and can
+    * be moved to.
+    */
+   unsigned physreg_use_count[RA_MAX_FILE_SIZE];

-	/* For each physreg, the pending copy_entry that uses it as a dest. */
-	struct copy_entry *physreg_dst[RA_MAX_FILE_SIZE];
+   /* For each physreg, the pending copy_entry that uses it as a dest. */
+   struct copy_entry *physreg_dst[RA_MAX_FILE_SIZE];

-	struct copy_entry entries[RA_MAX_FILE_SIZE];
-	unsigned entry_count;
+   struct copy_entry entries[RA_MAX_FILE_SIZE];
+   unsigned entry_count;
 };

 static bool
 entry_blocked(struct copy_entry *entry, struct copy_ctx *ctx)
 {
-	for (unsigned i = 0; i < copy_entry_size(entry); i++) {
-		if (ctx->physreg_use_count[entry->dst + i] != 0)
-			return true;
-	}
+   for (unsigned i = 0; i < copy_entry_size(entry); i++) {
+      if (ctx->physreg_use_count[entry->dst + i] != 0)
+         return true;
+   }

-	return false;
+   return false;
 }

 static void
 split_32bit_copy(struct copy_ctx *ctx, struct copy_entry *entry)
 {
-	assert(!entry->done);
-	assert(!(entry->flags & (IR3_REG_IMMED | IR3_REG_CONST)));
-	assert(copy_entry_size(entry) == 2);
-	struct copy_entry *new_entry = &ctx->entries[ctx->entry_count++];
+   assert(!entry->done);
+   assert(!(entry->flags & (IR3_REG_IMMED | IR3_REG_CONST)));
+   assert(copy_entry_size(entry) == 2);
+   struct copy_entry *new_entry = &ctx->entries[ctx->entry_count++];

-	new_entry->dst = entry->dst + 1;
-	new_entry->src.flags = entry->src.flags;
-	new_entry->src.reg = entry->src.reg + 1;
-	new_entry->done = false;
-	entry->flags |= IR3_REG_HALF;
-	new_entry->flags = entry->flags;
-	ctx->physreg_dst[entry->dst + 1] = new_entry;
+   new_entry->dst = entry->dst + 1;
+   new_entry->src.flags = entry->src.flags;
+   new_entry->src.reg = entry->src.reg + 1;
+   new_entry->done = false;
+   entry->flags |= IR3_REG_HALF;
+   new_entry->flags = entry->flags;
+   ctx->physreg_dst[entry->dst + 1] = new_entry;
 }

 static void
 _handle_copies(struct ir3_compiler *compiler, struct ir3_instruction *instr,
-			   struct copy_ctx *ctx)
+               struct copy_ctx *ctx)
 {
-	/* Set up the bookkeeping */
-	memset(ctx->physreg_dst, 0, sizeof(ctx->physreg_dst));
-	memset(ctx->physreg_use_count, 0, sizeof(ctx->physreg_use_count));
+   /* Set up the bookkeeping */
+   memset(ctx->physreg_dst, 0, sizeof(ctx->physreg_dst));
+   memset(ctx->physreg_use_count, 0, sizeof(ctx->physreg_use_count));

-	for (unsigned i = 0; i < ctx->entry_count; i++) {
-		struct copy_entry *entry = &ctx->entries[i];
-		for (unsigned j = 0; j < copy_entry_size(entry); j++) {
-			if (!entry->src.flags)
-				ctx->physreg_use_count[entry->src.reg + j]++;
+   for (unsigned i = 0; i < ctx->entry_count; i++) {
+      struct copy_entry *entry = &ctx->entries[i];
+      for (unsigned j = 0; j < copy_entry_size(entry); j++) {
+         if (!entry->src.flags)
+            ctx->physreg_use_count[entry->src.reg + j]++;

-			/* Copies should not have overlapping destinations. */
-			assert(!ctx->physreg_dst[entry->dst + j]);
-			ctx->physreg_dst[entry->dst + j] = entry;
-		}
-	}
+         /* Copies should not have overlapping destinations. */
+         assert(!ctx->physreg_dst[entry->dst + j]);
+         ctx->physreg_dst[entry->dst + j] = entry;
+      }
+   }

-	bool progress = true;
-	while (progress) {
-		progress = false;
+   bool progress = true;
+   while (progress) {
+      progress = false;

-		/* Step 1: resolve paths in the transfer graph. This means finding
-		 * copies whose destination aren't blocked by something else and then
-		 * emitting them, continuing this process until every copy is blocked
-		 * and there are only cycles left.
-		 *
-		 * TODO: We should note that src is also available in dst to unblock
-		 * cycles that src is involved in.
-		 */
+      /* Step 1: resolve paths in the transfer graph. This means finding
+       * copies whose destination aren't blocked by something else and then
+       * emitting them, continuing this process until every copy is blocked
+       * and there are only cycles left.
+       *
+       * TODO: We should note that src is also available in dst to unblock
+       * cycles that src is involved in.
+       */

-		for (unsigned i = 0; i < ctx->entry_count; i++) {
-			struct copy_entry *entry = &ctx->entries[i];
-			if (!entry->done && !entry_blocked(entry, ctx)) {
-				entry->done = true;
-				progress = true;
-				do_copy(compiler, instr, entry);
-				for (unsigned j = 0; j < copy_entry_size(entry); j++) {
-					if (!entry->src.flags)
-						ctx->physreg_use_count[entry->src.reg + j]--;
-					ctx->physreg_dst[entry->dst + j] = NULL;
-				}
-			}
-		}
+      for (unsigned i = 0; i < ctx->entry_count; i++) {
+         struct copy_entry *entry = &ctx->entries[i];
+         if (!entry->done && !entry_blocked(entry, ctx)) {
+            entry->done = true;
+            progress = true;
+            do_copy(compiler, instr, entry);
+            for (unsigned j = 0; j < copy_entry_size(entry); j++) {
+               if (!entry->src.flags)
+                  ctx->physreg_use_count[entry->src.reg + j]--;
+               ctx->physreg_dst[entry->dst + j] = NULL;
+            }
+         }
+      }

-		if (progress)
-			continue;
+      if (progress)
+         continue;

-		/* Step 2: Find partially blocked copies and split them. In the
-		 * mergedregs case, we can 32-bit copies which are only blocked on one
-		 * 16-bit half, and splitting them helps get things moving.
-		 *
-		 * We can skip splitting copies if the source isn't a register,
-		 * however, because it does not unblock anything and therefore doesn't
-		 * contribute to making forward progress with step 1. These copies
-		 * should still be resolved eventually in step 1 because they can't be
-		 * part of a cycle.
-		 */
-		for (unsigned i = 0; i < ctx->entry_count; i++) {
-			struct copy_entry *entry = &ctx->entries[i];
-			if (entry->done || entry->flags & IR3_REG_HALF)
-				continue;
+      /* Step 2: Find partially blocked copies and split them. In the
+       * mergedregs case, we can 32-bit copies which are only blocked on one
+       * 16-bit half, and splitting them helps get things moving.
+       *
+       * We can skip splitting copies if the source isn't a register,
+       * however, because it does not unblock anything and therefore doesn't
+       * contribute to making forward progress with step 1. These copies
+       * should still be resolved eventually in step 1 because they can't be
+       * part of a cycle.
+       */
+      for (unsigned i = 0; i < ctx->entry_count; i++) {
+         struct copy_entry *entry = &ctx->entries[i];
+         if (entry->done || entry->flags & IR3_REG_HALF)
+            continue;

-			if (((ctx->physreg_use_count[entry->dst] == 0 ||
-				  ctx->physreg_use_count[entry->dst + 1] == 0)) &&
-				 !(entry->flags & (IR3_REG_IMMED | IR3_REG_CONST))) {
-				split_32bit_copy(ctx, entry);
-				progress = true;
-			}
-		}
-	}
+         if (((ctx->physreg_use_count[entry->dst] == 0 ||
+               ctx->physreg_use_count[entry->dst + 1] == 0)) &&
+             !(entry->flags & (IR3_REG_IMMED | IR3_REG_CONST))) {
+            split_32bit_copy(ctx, entry);
+            progress = true;
+         }
+      }
+   }

-	/* Step 3: resolve cycles through swapping.
-	 *
-	 * At this point, the transfer graph should consist of only cycles.
-	 * The reason is that, given any physreg n_1 that's the source of a
-	 * remaining entry, it has a destination n_2, which (because every
-	 * copy is blocked) is the source of some other copy whose destination
-	 * is n_3, and so we can follow the chain until we get a cycle. If we
-	 * reached some other node than n_1:
-	 *
-	 *  n_1 -> n_2 -> ... -> n_i
-	 *          ^             |
-	 *          |-------------|
-	 *
-	 *  then n_2 would be the destination of 2 copies, which is illegal
-	 *  (checked above in an assert). So n_1 must be part of a cycle:
-	 *
-	 *  n_1 -> n_2 -> ... -> n_i
-	 *  ^                     |
-	 *  |---------------------|
-	 *
-	 *  and this must be only cycle n_1 is involved in, because any other
-	 *  path starting from n_1 would also have to end in n_1, resulting in
-	 *  a node somewhere along the way being the destination of 2 copies
-	 *  when the 2 paths merge.
-	 *
-	 *  The way we resolve the cycle is through picking a copy (n_1, n_2)
-	 *  and swapping n_1 and n_2. This moves n_1 to n_2, so n_2 is taken
-	 *  out of the cycle:
-	 *
-	 *  n_1 -> ... -> n_i
-	 *  ^              |
-	 *  |--------------|
-	 *
-	 *  and we can keep repeating this until the cycle is empty.
-	 */
+   /* Step 3: resolve cycles through swapping.
+    *
+    * At this point, the transfer graph should consist of only cycles.
+    * The reason is that, given any physreg n_1 that's the source of a
+    * remaining entry, it has a destination n_2, which (because every
+    * copy is blocked) is the source of some other copy whose destination
+    * is n_3, and so we can follow the chain until we get a cycle. If we
+    * reached some other node than n_1:
+    *
+    *  n_1 -> n_2 -> ... -> n_i
+    *          ^             |
+    *          |-------------|
+    *
+    *  then n_2 would be the destination of 2 copies, which is illegal
+    *  (checked above in an assert). So n_1 must be part of a cycle:
+    *
+    *  n_1 -> n_2 -> ... -> n_i
+    *  ^                     |
+    *  |---------------------|
+    *
+    *  and this must be only cycle n_1 is involved in, because any other
+    *  path starting from n_1 would also have to end in n_1, resulting in
+    *  a node somewhere along the way being the destination of 2 copies
+    *  when the 2 paths merge.
+    *
+    *  The way we resolve the cycle is through picking a copy (n_1, n_2)
+    *  and swapping n_1 and n_2. This moves n_1 to n_2, so n_2 is taken
+    *  out of the cycle:
+    *
+    *  n_1 -> ... -> n_i
+    *  ^              |
+    *  |--------------|
+    *
+    *  and we can keep repeating this until the cycle is empty.
+    */

-	for (unsigned i = 0; i < ctx->entry_count; i++) {
-		struct copy_entry *entry = &ctx->entries[i];
-		if (entry->done)
-			continue;
+   for (unsigned i = 0; i < ctx->entry_count; i++) {
+      struct copy_entry *entry = &ctx->entries[i];
+      if (entry->done)
+         continue;

-		assert(!entry->src.flags);
+      assert(!entry->src.flags);

-		/* catch trivial copies */
-		if (entry->dst == entry->src.reg) {
-			entry->done = true;
-			continue;
-		}
+      /* catch trivial copies */
+      if (entry->dst == entry->src.reg) {
+         entry->done = true;
+         continue;
+      }

-		do_swap(compiler, instr, entry);
+      do_swap(compiler, instr, entry);

-		/* Split any blocking copies whose sources are only partially
-		 * contained within our destination.
-		 */
-		if (entry->flags & IR3_REG_HALF) {
-			for (unsigned j = 0; j < ctx->entry_count; j++) {
-				struct copy_entry *blocking = &ctx->entries[j];
+      /* Split any blocking copies whose sources are only partially
+       * contained within our destination.
+       */
+      if (entry->flags & IR3_REG_HALF) {
+         for (unsigned j = 0; j < ctx->entry_count; j++) {
+            struct copy_entry *blocking = &ctx->entries[j];

-				if (blocking->done)
-					continue;
+            if (blocking->done)
+               continue;

-				if (blocking->src.reg <= entry->dst &&
-					blocking->src.reg + 1 >= entry->dst &&
-					!(blocking->flags & IR3_REG_HALF)) {
-					split_32bit_copy(ctx, blocking);
-				}
-			}
-		}
+            if (blocking->src.reg <= entry->dst &&
+                blocking->src.reg + 1 >= entry->dst &&
+                !(blocking->flags & IR3_REG_HALF)) {
+               split_32bit_copy(ctx, blocking);
+            }
+         }
+      }

-		/* Update sources of blocking copies.
-		 *
-		 * Note: at this point, every blocking copy's source should be
-		 * contained within our destination.
-		 */
-		for (unsigned j = 0; j < ctx->entry_count; j++) {
-			struct copy_entry *blocking = &ctx->entries[j];
-			if (blocking->src.reg >= entry->dst &&
-				blocking->src.reg < entry->dst + copy_entry_size(entry)) {
-				blocking->src.reg = entry->src.reg + (blocking->src.reg - entry->dst);
-			}
-		}
-	}
+      /* Update sources of blocking copies.
+       *
+       * Note: at this point, every blocking copy's source should be
+       * contained within our destination.
+       */
+      for (unsigned j = 0; j < ctx->entry_count; j++) {
+         struct copy_entry *blocking = &ctx->entries[j];
+         if (blocking->src.reg >= entry->dst &&
+             blocking->src.reg < entry->dst + copy_entry_size(entry)) {
+            blocking->src.reg =
+               entry->src.reg + (blocking->src.reg - entry->dst);
+         }
+      }
+   }
 }

 static void
 handle_copies(struct ir3_shader_variant *v, struct ir3_instruction *instr,
-			  struct copy_entry *entries, unsigned entry_count)
+              struct copy_entry *entries, unsigned entry_count)
 {
-	struct copy_ctx ctx;	
+   struct copy_ctx ctx;

-	/* handle shared copies first */
-	ctx.entry_count = 0;
-	for (unsigned i = 0; i < entry_count; i++) {
-		if (entries[i].flags & IR3_REG_SHARED)
-			ctx.entries[ctx.entry_count++] = entries[i];
-	}
-	_handle_copies(v->shader->compiler, instr, &ctx);
+   /* handle shared copies first */
+   ctx.entry_count = 0;
+   for (unsigned i = 0; i < entry_count; i++) {
+      if (entries[i].flags & IR3_REG_SHARED)
+         ctx.entries[ctx.entry_count++] = entries[i];
+   }
+   _handle_copies(v->shader->compiler, instr, &ctx);

-	if (v->mergedregs) {
-		/* Half regs and full regs are in the same file, so handle everything
-		 * at once.
-		 */
-		ctx.entry_count = 0;
-		for (unsigned i = 0; i < entry_count; i++) {
-			if (!(entries[i].flags & IR3_REG_SHARED))
-				ctx.entries[ctx.entry_count++] = entries[i];
-		}
-		_handle_copies(v->shader->compiler, instr, &ctx);
-	} else {
-		/* There may be both half copies and full copies, so we have to split
-		 * them up since they don't interfere.
-		 */
-		ctx.entry_count = 0;
-		for (unsigned i = 0; i < entry_count; i++) {
-			if (entries[i].flags & IR3_REG_HALF)
-				ctx.entries[ctx.entry_count++] = entries[i];
-		}
-		_handle_copies(v->shader->compiler, instr, &ctx);
+   if (v->mergedregs) {
+      /* Half regs and full regs are in the same file, so handle everything
+       * at once.
+       */
+      ctx.entry_count = 0;
+      for (unsigned i = 0; i < entry_count; i++) {
+         if (!(entries[i].flags & IR3_REG_SHARED))
+            ctx.entries[ctx.entry_count++] = entries[i];
+      }
+      _handle_copies(v->shader->compiler, instr, &ctx);
+   } else {
+      /* There may be both half copies and full copies, so we have to split
+       * them up since they don't interfere.
+       */
+      ctx.entry_count = 0;
+      for (unsigned i = 0; i < entry_count; i++) {
+         if (entries[i].flags & IR3_REG_HALF)
+            ctx.entries[ctx.entry_count++] = entries[i];
+      }
+      _handle_copies(v->shader->compiler, instr, &ctx);

-		ctx.entry_count = 0;
-		for (unsigned i = 0; i < entry_count; i++) {
-			if (!(entries[i].flags & (IR3_REG_HALF | IR3_REG_SHARED)))
-				ctx.entries[ctx.entry_count++] = entries[i];
-		}
-		_handle_copies(v->shader->compiler, instr, &ctx);
-	}
+      ctx.entry_count = 0;
+      for (unsigned i = 0; i < entry_count; i++) {
+         if (!(entries[i].flags & (IR3_REG_HALF | IR3_REG_SHARED)))
+            ctx.entries[ctx.entry_count++] = entries[i];
+      }
+      _handle_copies(v->shader->compiler, instr, &ctx);
+   }
 }

 void
 ir3_lower_copies(struct ir3_shader_variant *v)
 {
-	DECLARE_ARRAY(struct copy_entry, copies);
-	copies_count = copies_sz = 0;
-	copies = NULL;
+   DECLARE_ARRAY(struct copy_entry, copies);
+   copies_count = copies_sz = 0;
+   copies = NULL;

-	foreach_block (block, &v->ir->block_list) {
-		foreach_instr_safe (instr, &block->instr_list) {
-			if (instr->opc == OPC_META_PARALLEL_COPY) {
-				copies_count = 0;
-				for (unsigned i = 0; i < instr->dsts_count; i++) {
-					struct ir3_register *dst = instr->dsts[i];
-					struct ir3_register *src = instr->srcs[i];
-					unsigned flags = src->flags & (IR3_REG_HALF | IR3_REG_SHARED);
-					unsigned dst_physreg = ra_reg_get_physreg(dst);
-					for (unsigned j = 0; j < reg_elems(dst); j++) {
-						array_insert(NULL, copies, (struct copy_entry) {
-							.dst = dst_physreg + j * reg_elem_size(dst),
-							.src = get_copy_src(src, j * reg_elem_size(dst)),
-							.flags = flags,
-						});
-					}
-				}
-				handle_copies(v, instr, copies, copies_count);
-				list_del(&instr->node);
-			} else if (instr->opc == OPC_META_COLLECT) {
-				copies_count = 0;
-				struct ir3_register *dst = instr->dsts[0];
-				unsigned flags = dst->flags & (IR3_REG_HALF | IR3_REG_SHARED);
-				for (unsigned i = 0; i < instr->srcs_count; i++) {
-					struct ir3_register *src = instr->srcs[i];
-					array_insert(NULL, copies, (struct copy_entry) {
-						.dst = ra_num_to_physreg(dst->num + i, flags),
-						.src = get_copy_src(src, 0),
-						.flags = flags,
-					});
-				}
-				handle_copies(v, instr, copies, copies_count);
-				list_del(&instr->node);
-			} else if (instr->opc == OPC_META_SPLIT) {
-				copies_count = 0;
-				struct ir3_register *dst = instr->dsts[0];
-				struct ir3_register *src = instr->srcs[0];
-				unsigned flags = src->flags & (IR3_REG_HALF | IR3_REG_SHARED);
-				array_insert(NULL, copies, (struct copy_entry) {
-					.dst = ra_reg_get_physreg(dst),
-					.src = get_copy_src(src, instr->split.off * reg_elem_size(dst)),
-					.flags = flags,
-				});
-				handle_copies(v, instr, copies, copies_count);
-				list_del(&instr->node);
-			} else if (instr->opc == OPC_META_PHI) {
-				list_del(&instr->node);
-			}
-		}
-	}
+   foreach_block (block, &v->ir->block_list) {
+      foreach_instr_safe (instr, &block->instr_list) {
+         if (instr->opc == OPC_META_PARALLEL_COPY) {
+            copies_count = 0;
+            for (unsigned i = 0; i < instr->dsts_count; i++) {
+               struct ir3_register *dst = instr->dsts[i];
+               struct ir3_register *src = instr->srcs[i];
+               unsigned flags = src->flags & (IR3_REG_HALF | IR3_REG_SHARED);
+               unsigned dst_physreg = ra_reg_get_physreg(dst);
+               for (unsigned j = 0; j < reg_elems(dst); j++) {
+                  array_insert(
+                     NULL, copies,
+                     (struct copy_entry){
+                        .dst = dst_physreg + j * reg_elem_size(dst),
+                        .src = get_copy_src(src, j * reg_elem_size(dst)),
+                        .flags = flags,
+                     });
+               }
+            }
+            handle_copies(v, instr, copies, copies_count);
+            list_del(&instr->node);
+         } else if (instr->opc == OPC_META_COLLECT) {
+            copies_count = 0;
+            struct ir3_register *dst = instr->dsts[0];
+            unsigned flags = dst->flags & (IR3_REG_HALF | IR3_REG_SHARED);
+            for (unsigned i = 0; i < instr->srcs_count; i++) {
+               struct ir3_register *src = instr->srcs[i];
+               array_insert(NULL, copies,
+                            (struct copy_entry){
+                               .dst = ra_num_to_physreg(dst->num + i, flags),
+                               .src = get_copy_src(src, 0),
+                               .flags = flags,
+                            });
+            }
+            handle_copies(v, instr, copies, copies_count);
+            list_del(&instr->node);
+         } else if (instr->opc == OPC_META_SPLIT) {
+            copies_count = 0;
+            struct ir3_register *dst = instr->dsts[0];
+            struct ir3_register *src = instr->srcs[0];
+            unsigned flags = src->flags & (IR3_REG_HALF | IR3_REG_SHARED);
+            array_insert(NULL, copies,
+                         (struct copy_entry){
+                            .dst = ra_reg_get_physreg(dst),
+                            .src = get_copy_src(
+                               src, instr->split.off * reg_elem_size(dst)),
+                            .flags = flags,
+                         });
+            handle_copies(v, instr, copies, copies_count);
+            list_del(&instr->node);
+         } else if (instr->opc == OPC_META_PHI) {
+            list_del(&instr->node);
+         }
+      }
+   }

-	if (copies)
-		ralloc_free(copies);
+   if (copies)
+      ralloc_free(copies);
 }
-
--- a/src/freedreno/ir3/ir3_lower_subgroups.c
+++ b/src/freedreno/ir3/ir3_lower_subgroups.c
@ -35,220 +35,224 @@

 static void
 replace_pred(struct ir3_block *block, struct ir3_block *old_pred,
-			 struct ir3_block *new_pred)
+             struct ir3_block *new_pred)
 {
-	for (unsigned i = 0; i < block->predecessors_count; i++) {
-		if (block->predecessors[i] == old_pred) {
-			block->predecessors[i] = new_pred;
-			return;
-		}
-	}
+   for (unsigned i = 0; i < block->predecessors_count; i++) {
+      if (block->predecessors[i] == old_pred) {
+         block->predecessors[i] = new_pred;
+         return;
+      }
+   }
 }

 static void
 replace_physical_pred(struct ir3_block *block, struct ir3_block *old_pred,
-					  struct ir3_block *new_pred)
+                      struct ir3_block *new_pred)
 {
-	for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
-		if (block->physical_predecessors[i] == old_pred) {
-			block->physical_predecessors[i] = new_pred;
-			return;
-		}
-	}
+   for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
+      if (block->physical_predecessors[i] == old_pred) {
+         block->physical_predecessors[i] = new_pred;
+         return;
+      }
+   }
 }

 static void
 mov_immed(struct ir3_register *dst, struct ir3_block *block, unsigned immed)
 {
-	struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
-	struct ir3_register *mov_dst = ir3_dst_create(mov, dst->num, dst->flags);
-	mov_dst->wrmask = dst->wrmask;
-	struct ir3_register *src =
-		ir3_src_create(mov, INVALID_REG, (dst->flags & IR3_REG_HALF) | IR3_REG_IMMED);
-	src->uim_val = immed;
-	mov->cat1.dst_type = (dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
-	mov->cat1.src_type = mov->cat1.dst_type;
-	mov->repeat = util_last_bit(mov_dst->wrmask) - 1;
+   struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
+   struct ir3_register *mov_dst = ir3_dst_create(mov, dst->num, dst->flags);
+   mov_dst->wrmask = dst->wrmask;
+   struct ir3_register *src = ir3_src_create(
+      mov, INVALID_REG, (dst->flags & IR3_REG_HALF) | IR3_REG_IMMED);
+   src->uim_val = immed;
+   mov->cat1.dst_type = (dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
+   mov->cat1.src_type = mov->cat1.dst_type;
+   mov->repeat = util_last_bit(mov_dst->wrmask) - 1;
 }

 static struct ir3_block *
 split_block(struct ir3 *ir, struct ir3_block *before_block,
-		    struct ir3_instruction *instr, struct ir3_block **then)
+            struct ir3_instruction *instr, struct ir3_block **then)
 {
-	struct ir3_block *then_block = ir3_block_create(ir);
-	struct ir3_block *after_block = ir3_block_create(ir);
-	list_add(&then_block->node, &before_block->node);
-	list_add(&after_block->node, &then_block->node);
+   struct ir3_block *then_block = ir3_block_create(ir);
+   struct ir3_block *after_block = ir3_block_create(ir);
+   list_add(&then_block->node, &before_block->node);
+   list_add(&after_block->node, &then_block->node);

-	for (unsigned i = 0; i < ARRAY_SIZE(before_block->successors); i++) {
-		after_block->successors[i] = before_block->successors[i];
-		if (after_block->successors[i])
-			replace_pred(after_block->successors[i], before_block, after_block);
-	}
+   for (unsigned i = 0; i < ARRAY_SIZE(before_block->successors); i++) {
+      after_block->successors[i] = before_block->successors[i];
+      if (after_block->successors[i])
+         replace_pred(after_block->successors[i], before_block, after_block);
+   }

-	for (unsigned i = 0; i < ARRAY_SIZE(before_block->physical_successors); i++) {
-		after_block->physical_successors[i] = before_block->physical_successors[i];
-		if (after_block->physical_successors[i]) {
-			replace_physical_pred(after_block->physical_successors[i],
-								  before_block, after_block);
-		}
-	}
+   for (unsigned i = 0; i < ARRAY_SIZE(before_block->physical_successors);
+        i++) {
+      after_block->physical_successors[i] =
+         before_block->physical_successors[i];
+      if (after_block->physical_successors[i]) {
+         replace_physical_pred(after_block->physical_successors[i],
+                               before_block, after_block);
+      }
+   }

-	before_block->successors[0] = then_block;
-	before_block->successors[1] = after_block;
-	before_block->physical_successors[0] = then_block;
-	before_block->physical_successors[1] = after_block;
-	ir3_block_add_predecessor(then_block, before_block);
-	ir3_block_add_predecessor(after_block, before_block);
-	ir3_block_add_physical_predecessor(then_block, before_block);
-	ir3_block_add_physical_predecessor(after_block, before_block);
+   before_block->successors[0] = then_block;
+   before_block->successors[1] = after_block;
+   before_block->physical_successors[0] = then_block;
+   before_block->physical_successors[1] = after_block;
+   ir3_block_add_predecessor(then_block, before_block);
+   ir3_block_add_predecessor(after_block, before_block);
+   ir3_block_add_physical_predecessor(then_block, before_block);
+   ir3_block_add_physical_predecessor(after_block, before_block);

-	then_block->successors[0] = after_block;
-	then_block->physical_successors[0] = after_block;
-	ir3_block_add_predecessor(after_block, then_block);
-	ir3_block_add_physical_predecessor(after_block, then_block);
+   then_block->successors[0] = after_block;
+   then_block->physical_successors[0] = after_block;
+   ir3_block_add_predecessor(after_block, then_block);
+   ir3_block_add_physical_predecessor(after_block, then_block);

-	foreach_instr_from_safe (rem_instr, &instr->node, &before_block->instr_list) {
-		list_del(&rem_instr->node);
-		list_addtail(&rem_instr->node, &after_block->instr_list);
-		rem_instr->block = after_block;
-	}
+   foreach_instr_from_safe (rem_instr, &instr->node,
+                            &before_block->instr_list) {
+      list_del(&rem_instr->node);
+      list_addtail(&rem_instr->node, &after_block->instr_list);
+      rem_instr->block = after_block;
+   }

-	after_block->brtype = before_block->brtype;
-	after_block->condition = before_block->condition;
+   after_block->brtype = before_block->brtype;
+   after_block->condition = before_block->condition;

-	*then = then_block;
-	return after_block;
+   *then = then_block;
+   return after_block;
 }

 static bool
 lower_block(struct ir3 *ir, struct ir3_block **block)
 {
-	bool progress = false;
+   bool progress = false;

-	foreach_instr_safe (instr, &(*block)->instr_list) {
-		switch (instr->opc) {
-		case OPC_BALLOT_MACRO:
-		case OPC_ANY_MACRO:
-		case OPC_ALL_MACRO:
-		case OPC_ELECT_MACRO:
-		case OPC_READ_COND_MACRO:
-		case OPC_READ_FIRST_MACRO:
-		case OPC_SWZ_SHARED_MACRO:
-			break;
-		default:
-			continue;
-		}
+   foreach_instr_safe (instr, &(*block)->instr_list) {
+      switch (instr->opc) {
+      case OPC_BALLOT_MACRO:
+      case OPC_ANY_MACRO:
+      case OPC_ALL_MACRO:
+      case OPC_ELECT_MACRO:
+      case OPC_READ_COND_MACRO:
+      case OPC_READ_FIRST_MACRO:
+      case OPC_SWZ_SHARED_MACRO:
+         break;
+      default:
+         continue;
+      }

-		struct ir3_block *before_block = *block;
-		struct ir3_block *then_block;
-		struct ir3_block *after_block =
-			split_block(ir, before_block, instr, &then_block);
+      struct ir3_block *before_block = *block;
+      struct ir3_block *then_block;
+      struct ir3_block *after_block =
+         split_block(ir, before_block, instr, &then_block);

-		/* For ballot, the destination must be initialized to 0 before we do
-		 * the movmsk because the condition may be 0 and then the movmsk will
-		 * be skipped. Because it's a shared register we have to wrap the
-		 * initialization in a getone block.
-		 */
-		if (instr->opc == OPC_BALLOT_MACRO) {
-			before_block->brtype = IR3_BRANCH_GETONE;
-			before_block->condition = NULL;
-			mov_immed(instr->dsts[0], then_block, 0);
-			before_block = after_block;
-			after_block = split_block(ir, before_block, instr, &then_block);
-		}
+      /* For ballot, the destination must be initialized to 0 before we do
+       * the movmsk because the condition may be 0 and then the movmsk will
+       * be skipped. Because it's a shared register we have to wrap the
+       * initialization in a getone block.
+       */
+      if (instr->opc == OPC_BALLOT_MACRO) {
+         before_block->brtype = IR3_BRANCH_GETONE;
+         before_block->condition = NULL;
+         mov_immed(instr->dsts[0], then_block, 0);
+         before_block = after_block;
+         after_block = split_block(ir, before_block, instr, &then_block);
+      }

-		switch (instr->opc) {
-		case OPC_BALLOT_MACRO:
-		case OPC_READ_COND_MACRO:
-		case OPC_ANY_MACRO:
-		case OPC_ALL_MACRO:
-			before_block->condition = instr->srcs[0]->def->instr;
-			break;
-		default:
-			before_block->condition = NULL;
-			break;
-		}
+      switch (instr->opc) {
+      case OPC_BALLOT_MACRO:
+      case OPC_READ_COND_MACRO:
+      case OPC_ANY_MACRO:
+      case OPC_ALL_MACRO:
+         before_block->condition = instr->srcs[0]->def->instr;
+         break;
+      default:
+         before_block->condition = NULL;
+         break;
+      }

-		switch (instr->opc) {
-		case OPC_BALLOT_MACRO:
-		case OPC_READ_COND_MACRO:
-			before_block->brtype = IR3_BRANCH_COND;
-			break;
-		case OPC_ANY_MACRO:
-			before_block->brtype = IR3_BRANCH_ANY;
-			break;
-		case OPC_ALL_MACRO:
-			before_block->brtype = IR3_BRANCH_ALL;
-			break;
-		case OPC_ELECT_MACRO:
-		case OPC_READ_FIRST_MACRO:
-		case OPC_SWZ_SHARED_MACRO:
-			before_block->brtype = IR3_BRANCH_GETONE;
-			break;
-		default:
-			unreachable("bad opcode");
-		}
+      switch (instr->opc) {
+      case OPC_BALLOT_MACRO:
+      case OPC_READ_COND_MACRO:
+         before_block->brtype = IR3_BRANCH_COND;
+         break;
+      case OPC_ANY_MACRO:
+         before_block->brtype = IR3_BRANCH_ANY;
+         break;
+      case OPC_ALL_MACRO:
+         before_block->brtype = IR3_BRANCH_ALL;
+         break;
+      case OPC_ELECT_MACRO:
+      case OPC_READ_FIRST_MACRO:
+      case OPC_SWZ_SHARED_MACRO:
+         before_block->brtype = IR3_BRANCH_GETONE;
+         break;
+      default:
+         unreachable("bad opcode");
+      }

-		switch (instr->opc) {
-		case OPC_ALL_MACRO:
-		case OPC_ANY_MACRO:
-		case OPC_ELECT_MACRO:
-			mov_immed(instr->dsts[0], then_block, 1);
-			mov_immed(instr->dsts[0], before_block, 0);
-			break;
+      switch (instr->opc) {
+      case OPC_ALL_MACRO:
+      case OPC_ANY_MACRO:
+      case OPC_ELECT_MACRO:
+         mov_immed(instr->dsts[0], then_block, 1);
+         mov_immed(instr->dsts[0], before_block, 0);
+         break;

-		case OPC_BALLOT_MACRO: {
-			unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask);
-			struct ir3_instruction *movmsk = ir3_instr_create(then_block, OPC_MOVMSK, 1, 0);
-			ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags);
-			movmsk->repeat = comp_count - 1;
-			break;
-		}
+      case OPC_BALLOT_MACRO: {
+         unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask);
+         struct ir3_instruction *movmsk =
+            ir3_instr_create(then_block, OPC_MOVMSK, 1, 0);
+         ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags);
+         movmsk->repeat = comp_count - 1;
+         break;
+      }

-		case OPC_READ_COND_MACRO:
-		case OPC_READ_FIRST_MACRO: {
-			struct ir3_instruction *mov = ir3_instr_create(then_block, OPC_MOV, 1, 1);
-			unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0;
-			ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags);
-			struct ir3_register *new_src = ir3_src_create(mov, 0, 0);
-			*new_src = *instr->srcs[src];
-			mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
-			break;
-		}
+      case OPC_READ_COND_MACRO:
+      case OPC_READ_FIRST_MACRO: {
+         struct ir3_instruction *mov =
+            ir3_instr_create(then_block, OPC_MOV, 1, 1);
+         unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0;
+         ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags);
+         struct ir3_register *new_src = ir3_src_create(mov, 0, 0);
+         *new_src = *instr->srcs[src];
+         mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
+         break;
+      }

-		case OPC_SWZ_SHARED_MACRO: {
-			struct ir3_instruction *swz =
-				ir3_instr_create(then_block, OPC_SWZ, 2, 2);
-			ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags);
-			ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags);
-			ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags);
-			ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags);
-			swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32;
-			swz->repeat = 1;
-			break;
-		}
+      case OPC_SWZ_SHARED_MACRO: {
+         struct ir3_instruction *swz =
+            ir3_instr_create(then_block, OPC_SWZ, 2, 2);
+         ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags);
+         ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags);
+         ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags);
+         ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags);
+         swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32;
+         swz->repeat = 1;
+         break;
+      }

-		default:
-			unreachable("bad opcode");
-		}
+      default:
+         unreachable("bad opcode");
+      }

-		*block = after_block;
-		list_delinit(&instr->node);
-		progress = true;
-	}
+      *block = after_block;
+      list_delinit(&instr->node);
+      progress = true;
+   }

-	return progress;
+   return progress;
 }

 bool
 ir3_lower_subgroups(struct ir3 *ir)
 {
-	bool progress = false;
+   bool progress = false;

-	foreach_block (block, &ir->block_list)
-		progress |= lower_block(ir, &block);
+   foreach_block (block, &ir->block_list)
+      progress |= lower_block(ir, &block);

-	return progress;
+   return progress;
 }
-
--- a/src/freedreno/ir3/ir3_merge_regs.c
+++ b/src/freedreno/ir3/ir3_merge_regs.c
@ -21,8 +21,8 @@
 * SOFTWARE.
 */

-#include "ir3_ra.h"
 #include "ir3_compiler.h"
+#include "ir3_ra.h"
 #include "ralloc.h"

 /* This pass "merges" compatible phi-web SSA values. First, we insert a bunch
@ -71,13 +71,13 @@
 static unsigned
 index_instrs(struct ir3_block *block, unsigned index)
 {
-	foreach_instr (instr, &block->instr_list)
-		instr->ip = index++;
+   foreach_instr (instr, &block->instr_list)
+      instr->ip = index++;

-	for (unsigned i = 0; i < block->dom_children_count; i++)
-		index = index_instrs(block->dom_children[i], index);
+   for (unsigned i = 0; i < block->dom_children_count; i++)
+      index = index_instrs(block->dom_children[i], index);

-	return index;
+   return index;
 }

 /* Definitions within a merge set are ordered by instr->ip as set above: */
@ -85,27 +85,27 @@ index_instrs(struct ir3_block *block, unsigned index)
 static bool
 def_after(struct ir3_register *a, struct ir3_register *b)
 {
-	return a->instr->ip > b->instr->ip;
+   return a->instr->ip > b->instr->ip;
 }

 static bool
 def_dominates(struct ir3_register *a, struct ir3_register *b)
 {
-	if (def_after(a, b)) {
-		return false;
-	} else if (a->instr->block == b->instr->block) {
-		return def_after(b, a);
-	} else {
-		return ir3_block_dominates(a->instr->block, b->instr->block);
-	}
+   if (def_after(a, b)) {
+      return false;
+   } else if (a->instr->block == b->instr->block) {
+      return def_after(b, a);
+   } else {
+      return ir3_block_dominates(a->instr->block, b->instr->block);
+   }
 }

 /* This represents a region inside a register. The offset is relative to the
 * start of the register, and offset + size <= size(reg).
 */
 struct def_value {
-	struct ir3_register *reg;
-	unsigned offset, size;
+   struct ir3_register *reg;
+   unsigned offset, size;
 };

 /* Chase any copies to get the source of a region inside a register. This is
@ -114,456 +114,452 @@ struct def_value {
 static struct def_value
 chase_copies(struct def_value value)
 {
-	while (true) {
-		struct ir3_instruction *instr = value.reg->instr;
-		if (instr->opc == OPC_META_SPLIT) {
-			value.offset += instr->split.off * reg_elem_size(value.reg);
-			value.reg = instr->srcs[0]->def;
-		} else if (instr->opc == OPC_META_COLLECT) {
-			if (value.offset % reg_elem_size(value.reg) != 0 ||
-				value.size > reg_elem_size(value.reg) ||
-				value.offset + value.size > reg_size(value.reg))
-				break;
-			struct ir3_register *src = instr->srcs[value.offset / reg_elem_size(value.reg)];
-			if (!src->def)
-				break;
-			value.offset = 0;
-			value.reg = src->def;
-		} else {
-			/* TODO: parallelcopy */
-			break;
-		}
-	}
+   while (true) {
+      struct ir3_instruction *instr = value.reg->instr;
+      if (instr->opc == OPC_META_SPLIT) {
+         value.offset += instr->split.off * reg_elem_size(value.reg);
+         value.reg = instr->srcs[0]->def;
+      } else if (instr->opc == OPC_META_COLLECT) {
+         if (value.offset % reg_elem_size(value.reg) != 0 ||
+             value.size > reg_elem_size(value.reg) ||
+             value.offset + value.size > reg_size(value.reg))
+            break;
+         struct ir3_register *src =
+            instr->srcs[value.offset / reg_elem_size(value.reg)];
+         if (!src->def)
+            break;
+         value.offset = 0;
+         value.reg = src->def;
+      } else {
+         /* TODO: parallelcopy */
+         break;
+      }
+   }

-	return value;
+   return value;
 }

 /* This represents an entry in the merge set, and consists of a register +
 * offset from the merge set base.
 */
 struct merge_def {
-	struct ir3_register *reg;
-	unsigned offset;
+   struct ir3_register *reg;
+   unsigned offset;
 };

 static bool
 can_skip_interference(const struct merge_def *a, const struct merge_def *b)
 {
-	unsigned a_start = a->offset;
-	unsigned b_start = b->offset;
-	unsigned a_end = a_start + reg_size(a->reg);
-	unsigned b_end = b_start + reg_size(b->reg);
+   unsigned a_start = a->offset;
+   unsigned b_start = b->offset;
+   unsigned a_end = a_start + reg_size(a->reg);
+   unsigned b_end = b_start + reg_size(b->reg);

-	/* Registers that don't overlap never interfere */
-	if (a_end <= b_start || b_end <= a_start)
-		return true;
+   /* Registers that don't overlap never interfere */
+   if (a_end <= b_start || b_end <= a_start)
+      return true;

-	/* Disallow skipping interference unless one definition contains the
-	 * other. This restriction is important for register allocation, because
-	 * it means that at any given point in the program, the live values in a
-	 * given merge set will form a tree. If they didn't, then one live value
-	 * would partially overlap another, and they would have overlapping live
-	 * ranges because they're live at the same point. This simplifies register
-	 * allocation and spilling.
-	 */
-	if (!((a_start <= b_start && a_end >= b_end) ||
-		  (b_start <= a_start && b_end >= a_end)))
-		return false;
+   /* Disallow skipping interference unless one definition contains the
+    * other. This restriction is important for register allocation, because
+    * it means that at any given point in the program, the live values in a
+    * given merge set will form a tree. If they didn't, then one live value
+    * would partially overlap another, and they would have overlapping live
+    * ranges because they're live at the same point. This simplifies register
+    * allocation and spilling.
+    */
+   if (!((a_start <= b_start && a_end >= b_end) ||
+         (b_start <= a_start && b_end >= a_end)))
+      return false;

-	/* For each register, chase the intersection of a and b to find the
-	 * ultimate source.
-	 */
-	unsigned start = MAX2(a_start, b_start);
-	unsigned end = MIN2(a_end, b_end);
-	struct def_value a_value =
-		chase_copies((struct def_value) {
-				.reg = a->reg,
-				.offset = start - a_start,
-				.size = end - start,
-		});
-	struct def_value b_value =
-		chase_copies((struct def_value) {
-				.reg = b->reg,
-				.offset = start - b_start,
-				.size = end - start,
-		});
-	return a_value.reg == b_value.reg && a_value.offset == b_value.offset;
+   /* For each register, chase the intersection of a and b to find the
+    * ultimate source.
+    */
+   unsigned start = MAX2(a_start, b_start);
+   unsigned end = MIN2(a_end, b_end);
+   struct def_value a_value = chase_copies((struct def_value){
+      .reg = a->reg,
+      .offset = start - a_start,
+      .size = end - start,
+   });
+   struct def_value b_value = chase_copies((struct def_value){
+      .reg = b->reg,
+      .offset = start - b_start,
+      .size = end - start,
+   });
+   return a_value.reg == b_value.reg && a_value.offset == b_value.offset;
 }

 static struct ir3_merge_set *
 get_merge_set(struct ir3_register *def)
 {
-	if (def->merge_set)
-		return def->merge_set;
+   if (def->merge_set)
+      return def->merge_set;

-	struct ir3_merge_set *set = ralloc(def, struct ir3_merge_set);
-	set->preferred_reg = ~0;
-	set->interval_start = ~0;
-	set->size = reg_size(def);
-	set->alignment = (def->flags & IR3_REG_HALF) ? 1 : 2;
-	set->regs_count = 1;
-	set->regs = ralloc(set, struct ir3_register *);
-	set->regs[0] = def;
+   struct ir3_merge_set *set = ralloc(def, struct ir3_merge_set);
+   set->preferred_reg = ~0;
+   set->interval_start = ~0;
+   set->size = reg_size(def);
+   set->alignment = (def->flags & IR3_REG_HALF) ? 1 : 2;
+   set->regs_count = 1;
+   set->regs = ralloc(set, struct ir3_register *);
+   set->regs[0] = def;

-	return set;
+   return set;
 }

 /* Merges b into a */
 static struct ir3_merge_set *
-merge_merge_sets(struct ir3_merge_set *a, struct ir3_merge_set *b,
-			     int b_offset)
+merge_merge_sets(struct ir3_merge_set *a, struct ir3_merge_set *b, int b_offset)
 {
-	if (b_offset < 0)
-		return merge_merge_sets(b, a, -b_offset);
+   if (b_offset < 0)
+      return merge_merge_sets(b, a, -b_offset);

-	struct ir3_register **new_regs =
-		rzalloc_array(a, struct ir3_register *, a->regs_count + b->regs_count);
+   struct ir3_register **new_regs =
+      rzalloc_array(a, struct ir3_register *, a->regs_count + b->regs_count);

-	unsigned a_index = 0, b_index = 0, new_index = 0;
-	for (; a_index < a->regs_count || b_index < b->regs_count; new_index++) {
-		if (b_index < b->regs_count &&
-			(a_index == a->regs_count ||
-			 def_after(a->regs[a_index], b->regs[b_index]))) {
-			new_regs[new_index] = b->regs[b_index++];
-			new_regs[new_index]->merge_set_offset += b_offset;
-		} else {
-			new_regs[new_index] = a->regs[a_index++];
-		}
-		new_regs[new_index]->merge_set = a;
-	}
+   unsigned a_index = 0, b_index = 0, new_index = 0;
+   for (; a_index < a->regs_count || b_index < b->regs_count; new_index++) {
+      if (b_index < b->regs_count &&
+          (a_index == a->regs_count ||
+           def_after(a->regs[a_index], b->regs[b_index]))) {
+         new_regs[new_index] = b->regs[b_index++];
+         new_regs[new_index]->merge_set_offset += b_offset;
+      } else {
+         new_regs[new_index] = a->regs[a_index++];
+      }
+      new_regs[new_index]->merge_set = a;
+   }

-	assert(new_index == a->regs_count + b->regs_count);
+   assert(new_index == a->regs_count + b->regs_count);

-	/* Technically this should be the lcm, but because alignment is only 1 or
-	 * 2 so far this should be ok.
-	 */
-	a->alignment = MAX2(a->alignment, b->alignment);
-	a->regs_count += b->regs_count;
-	ralloc_free(a->regs);
-	a->regs = new_regs;
-	a->size = MAX2(a->size, b->size + b_offset);
+   /* Technically this should be the lcm, but because alignment is only 1 or
+    * 2 so far this should be ok.
+    */
+   a->alignment = MAX2(a->alignment, b->alignment);
+   a->regs_count += b->regs_count;
+   ralloc_free(a->regs);
+   a->regs = new_regs;
+   a->size = MAX2(a->size, b->size + b_offset);

-	return a;
+   return a;
 }

 static bool
-merge_sets_interfere(struct ir3_liveness *live,
-					 struct ir3_merge_set *a, struct ir3_merge_set *b,
-					 int b_offset)
+merge_sets_interfere(struct ir3_liveness *live, struct ir3_merge_set *a,
+                     struct ir3_merge_set *b, int b_offset)
 {
-	if (b_offset < 0)
-		return merge_sets_interfere(live, b, a, -b_offset);
+   if (b_offset < 0)
+      return merge_sets_interfere(live, b, a, -b_offset);

-	struct merge_def dom[a->regs_count + b->regs_count];
-	unsigned a_index = 0, b_index = 0;
-	int dom_index = -1;
+   struct merge_def dom[a->regs_count + b->regs_count];
+   unsigned a_index = 0, b_index = 0;
+   int dom_index = -1;

-	/* Reject trying to merge the sets if the alignment doesn't work out */
-	if (b_offset % a->alignment != 0)
-		return true;
+   /* Reject trying to merge the sets if the alignment doesn't work out */
+   if (b_offset % a->alignment != 0)
+      return true;

-	while (a_index < a->regs_count || b_index < b->regs_count) {
-		struct merge_def current;
-		if (a_index == a->regs_count) {
-			current.reg = b->regs[b_index];
-			current.offset = current.reg->merge_set_offset + b_offset;
-			b_index++;
-		} else if (b_index == b->regs_count) {
-			current.reg = a->regs[a_index];
-			current.offset = current.reg->merge_set_offset;
-			a_index++;
-		} else {
-			if (def_after(b->regs[b_index], a->regs[a_index])) {
-				current.reg = a->regs[a_index];
-				current.offset = current.reg->merge_set_offset;
-				a_index++;
-			} else {
-				current.reg = b->regs[b_index];
-				current.offset = current.reg->merge_set_offset + b_offset;
-				b_index++;
-			}
-		}
+   while (a_index < a->regs_count || b_index < b->regs_count) {
+      struct merge_def current;
+      if (a_index == a->regs_count) {
+         current.reg = b->regs[b_index];
+         current.offset = current.reg->merge_set_offset + b_offset;
+         b_index++;
+      } else if (b_index == b->regs_count) {
+         current.reg = a->regs[a_index];
+         current.offset = current.reg->merge_set_offset;
+         a_index++;
+      } else {
+         if (def_after(b->regs[b_index], a->regs[a_index])) {
+            current.reg = a->regs[a_index];
+            current.offset = current.reg->merge_set_offset;
+            a_index++;
+         } else {
+            current.reg = b->regs[b_index];
+            current.offset = current.reg->merge_set_offset + b_offset;
+            b_index++;
+         }
+      }

-		while (dom_index >= 0 &&
-			   !def_dominates(dom[dom_index].reg, current.reg)) {
-			dom_index--;
-		}
+      while (dom_index >= 0 &&
+             !def_dominates(dom[dom_index].reg, current.reg)) {
+         dom_index--;
+      }

-		/* TODO: in the original paper, just dom[dom_index] needs to be
-		 * checked for interference. We implement the value-chasing extension
-		 * as well as support for sub-registers, which complicates this
-		 * significantly because it's no longer the case that if a dominates b
-		 * dominates c and a and b don't interfere then we only need to check
-		 * interference between b and c to be sure a and c don't interfere --
-		 * this means we may have to check for interference against values
-		 * higher in the stack then dom[dom_index]. In the paper there's a
-		 * description of a way to do less interference tests with the
-		 * value-chasing extension, but we'd have to come up with something
-		 * ourselves for handling the similar problems that come up with
-		 * allowing values to contain subregisters. For now we just test
-		 * everything in the stack.
-		 */
-		for (int i = 0; i <= dom_index; i++) {
-			if (can_skip_interference(&current, &dom[i]))
-				continue;
+      /* TODO: in the original paper, just dom[dom_index] needs to be
+       * checked for interference. We implement the value-chasing extension
+       * as well as support for sub-registers, which complicates this
+       * significantly because it's no longer the case that if a dominates b
+       * dominates c and a and b don't interfere then we only need to check
+       * interference between b and c to be sure a and c don't interfere --
+       * this means we may have to check for interference against values
+       * higher in the stack then dom[dom_index]. In the paper there's a
+       * description of a way to do less interference tests with the
+       * value-chasing extension, but we'd have to come up with something
+       * ourselves for handling the similar problems that come up with
+       * allowing values to contain subregisters. For now we just test
+       * everything in the stack.
+       */
+      for (int i = 0; i <= dom_index; i++) {
+         if (can_skip_interference(&current, &dom[i]))
+            continue;

-			/* Ok, now we actually have to check interference. Since we know
-			 * that dom[i] dominates current, this boils down to checking
-			 * whether dom[i] is live after current.
-			 */
-			if (ir3_def_live_after(live, dom[i].reg, current.reg->instr))
-				return true;
-		}
+         /* Ok, now we actually have to check interference. Since we know
+          * that dom[i] dominates current, this boils down to checking
+          * whether dom[i] is live after current.
+          */
+         if (ir3_def_live_after(live, dom[i].reg, current.reg->instr))
+            return true;
+      }

-		dom[++dom_index] = current;
-	}
+      dom[++dom_index] = current;
+   }

-	return false;
+   return false;
 }

 static void
-try_merge_defs(struct ir3_liveness *live,
-			   struct ir3_register *a, struct ir3_register *b,
-			   unsigned b_offset)
+try_merge_defs(struct ir3_liveness *live, struct ir3_register *a,
+               struct ir3_register *b, unsigned b_offset)
 {
-	struct ir3_merge_set *a_set = get_merge_set(a);
-	struct ir3_merge_set *b_set = get_merge_set(b);
+   struct ir3_merge_set *a_set = get_merge_set(a);
+   struct ir3_merge_set *b_set = get_merge_set(b);

-	if (a_set == b_set) {
-		/* Note: Even in this case we may not always successfully be able to
-		 * coalesce this copy, if the offsets don't line up. But in any
-		 * case, we can't do anything.
-		 */
-		return;
-	}
+   if (a_set == b_set) {
+      /* Note: Even in this case we may not always successfully be able to
+       * coalesce this copy, if the offsets don't line up. But in any
+       * case, we can't do anything.
+       */
+      return;
+   }

-	int b_set_offset = a->merge_set_offset + b_offset - b->merge_set_offset;
+   int b_set_offset = a->merge_set_offset + b_offset - b->merge_set_offset;

-	if (!merge_sets_interfere(live, a_set, b_set, b_set_offset))
-		merge_merge_sets(a_set, b_set, b_set_offset);
+   if (!merge_sets_interfere(live, a_set, b_set, b_set_offset))
+      merge_merge_sets(a_set, b_set, b_set_offset);
 }

 static void
-coalesce_phi(struct ir3_liveness *live,
-			 struct ir3_instruction *phi)
+coalesce_phi(struct ir3_liveness *live, struct ir3_instruction *phi)
 {
-	for (unsigned i = 0; i < phi->srcs_count; i++) {
-		if (phi->srcs[i]->def)
-			try_merge_defs(live, phi->dsts[0], phi->srcs[i]->def, 0);
-	}
+   for (unsigned i = 0; i < phi->srcs_count; i++) {
+      if (phi->srcs[i]->def)
+         try_merge_defs(live, phi->dsts[0], phi->srcs[i]->def, 0);
+   }
 }

 static void
 aggressive_coalesce_parallel_copy(struct ir3_liveness *live,
-								  struct ir3_instruction *pcopy)
+                                  struct ir3_instruction *pcopy)
 {
-	for (unsigned i = 0; i < pcopy->dsts_count; i++) {
-		if (!(pcopy->srcs[i]->flags & IR3_REG_SSA))
-			continue;
-		try_merge_defs(live, pcopy->dsts[i], pcopy->srcs[i]->def, 0);
-	}
+   for (unsigned i = 0; i < pcopy->dsts_count; i++) {
+      if (!(pcopy->srcs[i]->flags & IR3_REG_SSA))
+         continue;
+      try_merge_defs(live, pcopy->dsts[i], pcopy->srcs[i]->def, 0);
+   }
 }

 static void
 aggressive_coalesce_split(struct ir3_liveness *live,
-						 struct ir3_instruction *split)
+                          struct ir3_instruction *split)
 {
-	try_merge_defs(live, split->srcs[0]->def, split->dsts[0],
-				   split->split.off * reg_elem_size(split->dsts[0]));
+   try_merge_defs(live, split->srcs[0]->def, split->dsts[0],
+                  split->split.off * reg_elem_size(split->dsts[0]));
 }

 static void
 aggressive_coalesce_collect(struct ir3_liveness *live,
-						   struct ir3_instruction *collect)
+                            struct ir3_instruction *collect)
 {
-	for (unsigned i = 0, offset = 0; i < collect->srcs_count;
-		 offset += reg_elem_size(collect->srcs[i]), i++) {
-		if (!(collect->srcs[i]->flags & IR3_REG_SSA))
-			continue;
-		try_merge_defs(live, collect->dsts[0], collect->srcs[i]->def, offset);
-	}
+   for (unsigned i = 0, offset = 0; i < collect->srcs_count;
+        offset += reg_elem_size(collect->srcs[i]), i++) {
+      if (!(collect->srcs[i]->flags & IR3_REG_SSA))
+         continue;
+      try_merge_defs(live, collect->dsts[0], collect->srcs[i]->def, offset);
+   }
 }

 static void
 create_parallel_copy(struct ir3_block *block)
 {
-	for (unsigned i = 0; i < 2; i++) {
-		if (!block->successors[i])
-			continue;
+   for (unsigned i = 0; i < 2; i++) {
+      if (!block->successors[i])
+         continue;

-		struct ir3_block *succ = block->successors[i];
+      struct ir3_block *succ = block->successors[i];

-		unsigned pred_idx = ir3_block_get_pred_index(succ, block);
+      unsigned pred_idx = ir3_block_get_pred_index(succ, block);

-		unsigned phi_count = 0;
-		foreach_instr (phi, &succ->instr_list) {
-			if (phi->opc != OPC_META_PHI)
-				break;
+      unsigned phi_count = 0;
+      foreach_instr (phi, &succ->instr_list) {
+         if (phi->opc != OPC_META_PHI)
+            break;

-			/* Avoid undef */
-			if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) &&
-				 !phi->srcs[pred_idx]->def)
-				continue;
+         /* Avoid undef */
+         if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) &&
+             !phi->srcs[pred_idx]->def)
+            continue;

-			/* We don't support critical edges. If we were to support them,
-			 * we'd need to insert parallel copies after the phi node to solve
-			 * the lost-copy problem.
-			 */
-			assert(i == 0 && !block->successors[1]);
-			phi_count++;
-		}
+         /* We don't support critical edges. If we were to support them,
+          * we'd need to insert parallel copies after the phi node to solve
+          * the lost-copy problem.
+          */
+         assert(i == 0 && !block->successors[1]);
+         phi_count++;
+      }

-		if (phi_count == 0)
-			continue;
+      if (phi_count == 0)
+         continue;

-		struct ir3_register *src[phi_count];
-		unsigned j = 0;
-		foreach_instr (phi, &succ->instr_list) {
-			if (phi->opc != OPC_META_PHI)
-				break;
-			if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) &&
-				 !phi->srcs[pred_idx]->def)
-				continue;
-			src[j++] = phi->srcs[pred_idx];
-		}
-		assert(j == phi_count);
+      struct ir3_register *src[phi_count];
+      unsigned j = 0;
+      foreach_instr (phi, &succ->instr_list) {
+         if (phi->opc != OPC_META_PHI)
+            break;
+         if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) &&
+             !phi->srcs[pred_idx]->def)
+            continue;
+         src[j++] = phi->srcs[pred_idx];
+      }
+      assert(j == phi_count);

-		struct ir3_instruction *pcopy =
-			ir3_instr_create(block, OPC_META_PARALLEL_COPY, phi_count, phi_count);
+      struct ir3_instruction *pcopy =
+         ir3_instr_create(block, OPC_META_PARALLEL_COPY, phi_count, phi_count);

-		for (j = 0; j < phi_count; j++) {
-			struct ir3_register *reg = __ssa_dst(pcopy);
-			reg->flags |= src[j]->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
-			reg->size = reg_elems(src[j]);
-		}
+      for (j = 0; j < phi_count; j++) {
+         struct ir3_register *reg = __ssa_dst(pcopy);
+         reg->flags |= src[j]->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
+         reg->size = reg_elems(src[j]);
+      }

-		for (j = 0; j < phi_count; j++) {
-			pcopy->srcs[pcopy->srcs_count++] = ir3_reg_clone(block->shader, src[j]);
-		}
+      for (j = 0; j < phi_count; j++) {
+         pcopy->srcs[pcopy->srcs_count++] =
+            ir3_reg_clone(block->shader, src[j]);
+      }

-		j = 0;
-		foreach_instr (phi, &succ->instr_list) {
-			if (phi->opc != OPC_META_PHI)
-				break;
-			if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) &&
-				 !phi->srcs[pred_idx]->def)
-				continue;
-			phi->srcs[pred_idx]->def = pcopy->dsts[j];
-			phi->srcs[pred_idx]->flags = pcopy->dsts[j]->flags;
-			j++;
-		}
-		assert(j == phi_count);
-	}
+      j = 0;
+      foreach_instr (phi, &succ->instr_list) {
+         if (phi->opc != OPC_META_PHI)
+            break;
+         if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) &&
+             !phi->srcs[pred_idx]->def)
+            continue;
+         phi->srcs[pred_idx]->def = pcopy->dsts[j];
+         phi->srcs[pred_idx]->flags = pcopy->dsts[j]->flags;
+         j++;
+      }
+      assert(j == phi_count);
+   }
 }

 void
 ir3_create_parallel_copies(struct ir3 *ir)
 {
-	foreach_block (block, &ir->block_list) {
-		create_parallel_copy(block);
-	}
+   foreach_block (block, &ir->block_list) {
+      create_parallel_copy(block);
+   }
 }

 static void
 index_merge_sets(struct ir3 *ir)
 {
-	unsigned offset = 0;
-	foreach_block (block, &ir->block_list) {
-		foreach_instr (instr, &block->instr_list) {
-			for (unsigned i = 0; i < instr->dsts_count; i++) {
-				struct ir3_register *dst = instr->dsts[i];
+   unsigned offset = 0;
+   foreach_block (block, &ir->block_list) {
+      foreach_instr (instr, &block->instr_list) {
+         for (unsigned i = 0; i < instr->dsts_count; i++) {
+            struct ir3_register *dst = instr->dsts[i];

-				unsigned dst_offset;
-				struct ir3_merge_set *merge_set = dst->merge_set;
-				unsigned size = reg_size(dst);
-				if (merge_set) {
-					if (merge_set->interval_start == ~0) {
-						merge_set->interval_start = offset;
-						offset += merge_set->size;
-					}
-					dst_offset = merge_set->interval_start + dst->merge_set_offset;
-				} else {
-					dst_offset = offset;
-					offset += size;
-				}
+            unsigned dst_offset;
+            struct ir3_merge_set *merge_set = dst->merge_set;
+            unsigned size = reg_size(dst);
+            if (merge_set) {
+               if (merge_set->interval_start == ~0) {
+                  merge_set->interval_start = offset;
+                  offset += merge_set->size;
+               }
+               dst_offset = merge_set->interval_start + dst->merge_set_offset;
+            } else {
+               dst_offset = offset;
+               offset += size;
+            }

-				dst->interval_start = dst_offset;
-				dst->interval_end = dst_offset + size;
-			}
-		}
-	}
+            dst->interval_start = dst_offset;
+            dst->interval_end = dst_offset + size;
+         }
+      }
+   }
 }

-#define RESET	"\x1b[0m"
-#define BLUE	"\x1b[0;34m"
-#define SYN_SSA(x)		BLUE x RESET
+#define RESET      "\x1b[0m"
+#define BLUE       "\x1b[0;34m"
+#define SYN_SSA(x) BLUE x RESET

 static void
 dump_merge_sets(struct ir3 *ir)
 {
-	printf("merge sets:\n");
-	struct set *merge_sets = _mesa_pointer_set_create(NULL);
-	foreach_block (block, &ir->block_list) {
-		foreach_instr (instr, &block->instr_list) {
-			for (unsigned i = 0; i < instr->dsts_count; i++) {
-				struct ir3_register *dst = instr->dsts[i];
+   printf("merge sets:\n");
+   struct set *merge_sets = _mesa_pointer_set_create(NULL);
+   foreach_block (block, &ir->block_list) {
+      foreach_instr (instr, &block->instr_list) {
+         for (unsigned i = 0; i < instr->dsts_count; i++) {
+            struct ir3_register *dst = instr->dsts[i];

-				struct ir3_merge_set *merge_set = dst->merge_set;
-				if (!merge_set || _mesa_set_search(merge_sets, merge_set))
-					continue;
+            struct ir3_merge_set *merge_set = dst->merge_set;
+            if (!merge_set || _mesa_set_search(merge_sets, merge_set))
+               continue;

-				printf("merge set, size %u, align %u:\n", merge_set->size, merge_set->alignment);
-				for (unsigned j = 0; j < merge_set->regs_count; j++) {
-					struct ir3_register *reg = merge_set->regs[j];
-					printf("\t"SYN_SSA("ssa_%u")":%u, offset %u\n", reg->instr->serialno,
-						   reg->name, reg->merge_set_offset);
-				}
+            printf("merge set, size %u, align %u:\n", merge_set->size,
+                   merge_set->alignment);
+            for (unsigned j = 0; j < merge_set->regs_count; j++) {
+               struct ir3_register *reg = merge_set->regs[j];
+               printf("\t" SYN_SSA("ssa_%u") ":%u, offset %u\n",
+                      reg->instr->serialno, reg->name, reg->merge_set_offset);
+            }

-				_mesa_set_add(merge_sets, merge_set);
-			}
-		}
-	}
+            _mesa_set_add(merge_sets, merge_set);
+         }
+      }
+   }

-	ralloc_free(merge_sets);
+   ralloc_free(merge_sets);
 }

 void
 ir3_merge_regs(struct ir3_liveness *live, struct ir3 *ir)
 {
-	index_instrs(ir3_start_block(ir), 0);
+   index_instrs(ir3_start_block(ir), 0);

-	/* First pass: coalesce phis, which must be together. */
-	foreach_block (block, &ir->block_list) {
-		foreach_instr (instr, &block->instr_list) {
-			if (instr->opc != OPC_META_PHI)
-				break;
+   /* First pass: coalesce phis, which must be together. */
+   foreach_block (block, &ir->block_list) {
+      foreach_instr (instr, &block->instr_list) {
+         if (instr->opc != OPC_META_PHI)
+            break;

-			coalesce_phi(live, instr);
-		}
-	}
+         coalesce_phi(live, instr);
+      }
+   }

-	/* Second pass: aggressively coalesce parallelcopy, split, collect */
-	foreach_block (block, &ir->block_list) {
-		foreach_instr (instr, &block->instr_list) {
-			switch (instr->opc) {
-				case OPC_META_SPLIT:
-					aggressive_coalesce_split(live, instr);
-					break;
-				case OPC_META_COLLECT:
-					aggressive_coalesce_collect(live, instr);
-					break;
-				case OPC_META_PARALLEL_COPY:
-					aggressive_coalesce_parallel_copy(live, instr);
-					break;
-				default:
-					break;
-			}
-		}
-	}
+   /* Second pass: aggressively coalesce parallelcopy, split, collect */
+   foreach_block (block, &ir->block_list) {
+      foreach_instr (instr, &block->instr_list) {
+         switch (instr->opc) {
+         case OPC_META_SPLIT:
+            aggressive_coalesce_split(live, instr);
+            break;
+         case OPC_META_COLLECT:
+            aggressive_coalesce_collect(live, instr);
+            break;
+         case OPC_META_PARALLEL_COPY:
+            aggressive_coalesce_parallel_copy(live, instr);
+            break;
+         default:
+            break;
+         }
+      }
+   }

-	index_merge_sets(ir);
+   index_merge_sets(ir);

-	if (ir3_shader_debug & IR3_DBG_RAMSGS)
-		dump_merge_sets(ir);
+   if (ir3_shader_debug & IR3_DBG_RAMSGS)
+      dump_merge_sets(ir);
 }
-
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@ -43,15 +43,19 @@ bool ir3_nir_move_varying_inputs(nir_shader *shader);
 int ir3_nir_coord_offset(nir_ssa_def *ssa);
 bool ir3_nir_lower_tex_prefetch(nir_shader *shader);

-
 void ir3_nir_lower_to_explicit_output(nir_shader *shader,
-		struct ir3_shader_variant *v, unsigned topology);
-void ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_shader_variant *v);
-void ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology);
-void ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology);
+                                      struct ir3_shader_variant *v,
+                                      unsigned topology);
+void ir3_nir_lower_to_explicit_input(nir_shader *shader,
+                                     struct ir3_shader_variant *v);
+void ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
+                             unsigned topology);
+void ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v,
+                             unsigned topology);
 void ir3_nir_lower_gs(nir_shader *shader);

-const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
+const nir_shader_compiler_options *
+ir3_get_compiler_options(struct ir3_compiler *compiler);
 void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
 void ir3_nir_lower_io_to_temporaries(nir_shader *s);
 void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s);
@ -59,29 +63,30 @@ void ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s);
 void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s);

 void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
-		struct ir3_const_state *const_state);
+                           struct ir3_const_state *const_state);
 bool ir3_nir_lower_load_constant(nir_shader *nir, struct ir3_shader_variant *v);
 void ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v);
 bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v);
 bool ir3_nir_fixup_load_uniform(nir_shader *nir);

-nir_ssa_def *
-ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift);
+nir_ssa_def *ir3_nir_try_propagate_bit_shift(nir_builder *b,
+                                             nir_ssa_def *offset,
+                                             int32_t shift);

 static inline nir_intrinsic_instr *
 ir3_bindless_resource(nir_src src)
 {
-	if (!src.is_ssa)
-		return NULL;
+   if (!src.is_ssa)
+      return NULL;

-	if (src.ssa->parent_instr->type != nir_instr_type_intrinsic)
-		return NULL;
+   if (src.ssa->parent_instr->type != nir_instr_type_intrinsic)
+      return NULL;

-	nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src.ssa->parent_instr);
-	if (intrin->intrinsic != nir_intrinsic_bindless_resource_ir3)
-		return NULL;
+   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src.ssa->parent_instr);
+   if (intrin->intrinsic != nir_intrinsic_bindless_resource_ir3)
+      return NULL;

-	return intrin;
+   return intrin;
 }

 #endif /* IR3_NIR_H_ */
--- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
+++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
@ -21,54 +21,55 @@
 * SOFTWARE.
 */

-#include "ir3_nir.h"
-#include "ir3_compiler.h"
 #include "compiler/nir/nir.h"
 #include "compiler/nir/nir_builder.h"
 #include "util/u_math.h"
+#include "ir3_compiler.h"
+#include "ir3_nir.h"

 static inline bool
-get_ubo_load_range(nir_shader *nir, nir_intrinsic_instr *instr, uint32_t alignment, struct ir3_ubo_range *r)
+get_ubo_load_range(nir_shader *nir, nir_intrinsic_instr *instr,
+                   uint32_t alignment, struct ir3_ubo_range *r)
 {
-	uint32_t offset = nir_intrinsic_range_base(instr);
-	uint32_t size = nir_intrinsic_range(instr);
+   uint32_t offset = nir_intrinsic_range_base(instr);
+   uint32_t size = nir_intrinsic_range(instr);

-	/* If the offset is constant, the range is trivial (and NIR may not have
-	 * figured it out).
-	 */
-	if (nir_src_is_const(instr->src[1])) {
-		offset = nir_src_as_uint(instr->src[1]);
-		size = nir_intrinsic_dest_components(instr) * 4;
-	}
+   /* If the offset is constant, the range is trivial (and NIR may not have
+    * figured it out).
+    */
+   if (nir_src_is_const(instr->src[1])) {
+      offset = nir_src_as_uint(instr->src[1]);
+      size = nir_intrinsic_dest_components(instr) * 4;
+   }

-	/* If we haven't figured out the range accessed in the UBO, bail. */
-	if (size == ~0)
-		return false;
+   /* If we haven't figured out the range accessed in the UBO, bail. */
+   if (size == ~0)
+      return false;

-	r->start = ROUND_DOWN_TO(offset, alignment * 16);
-	r->end = ALIGN(offset + size, alignment * 16);
+   r->start = ROUND_DOWN_TO(offset, alignment * 16);
+   r->end = ALIGN(offset + size, alignment * 16);

-	return true;
+   return true;
 }

 static bool
 get_ubo_info(nir_intrinsic_instr *instr, struct ir3_ubo_info *ubo)
 {
-	if (nir_src_is_const(instr->src[0])) {
-		ubo->block = nir_src_as_uint(instr->src[0]);
-		ubo->bindless_base = 0;
-		ubo->bindless = false;
-		return true;
-	} else {
-		nir_intrinsic_instr *rsrc = ir3_bindless_resource(instr->src[0]);
-		if (rsrc && nir_src_is_const(rsrc->src[0])) {
-			ubo->block = nir_src_as_uint(rsrc->src[0]);
-			ubo->bindless_base = nir_intrinsic_desc_set(rsrc);
-			ubo->bindless = true;
-			return true;
-		}
-	}
-	return false;
+   if (nir_src_is_const(instr->src[0])) {
+      ubo->block = nir_src_as_uint(instr->src[0]);
+      ubo->bindless_base = 0;
+      ubo->bindless = false;
+      return true;
+   } else {
+      nir_intrinsic_instr *rsrc = ir3_bindless_resource(instr->src[0]);
+      if (rsrc && nir_src_is_const(rsrc->src[0])) {
+         ubo->block = nir_src_as_uint(rsrc->src[0]);
+         ubo->bindless_base = nir_intrinsic_desc_set(rsrc);
+         ubo->bindless = true;
+         return true;
+      }
+   }
+   return false;
 }

 /**
@ -76,24 +77,23 @@ get_ubo_info(nir_intrinsic_instr *instr, struct ir3_ubo_info *ubo)
 */
 static const struct ir3_ubo_range *
 get_existing_range(nir_intrinsic_instr *instr,
-		const struct ir3_ubo_analysis_state *state,
-		struct ir3_ubo_range *r)
+                   const struct ir3_ubo_analysis_state *state,
+                   struct ir3_ubo_range *r)
 {
-	struct ir3_ubo_info ubo = {};
+   struct ir3_ubo_info ubo = {};

-	if (!get_ubo_info(instr, &ubo))
-		return NULL;
+   if (!get_ubo_info(instr, &ubo))
+      return NULL;

-	for (int i = 0; i < state->num_enabled; i++) {
-		const struct ir3_ubo_range *range = &state->range[i];
-		if (!memcmp(&range->ubo, &ubo, sizeof(ubo)) &&
-				r->start >= range->start &&
-				r->end <= range->end) {
-			return range;
-		}
-	}
+   for (int i = 0; i < state->num_enabled; i++) {
+      const struct ir3_ubo_range *range = &state->range[i];
+      if (!memcmp(&range->ubo, &ubo, sizeof(ubo)) && r->start >= range->start &&
+          r->end <= range->end) {
+         return range;
+      }
+   }

-	return NULL;
+   return NULL;
 }

 /**
@ -103,26 +103,26 @@ get_existing_range(nir_intrinsic_instr *instr,
 static void
 merge_neighbors(struct ir3_ubo_analysis_state *state, int index)
 {
-	struct ir3_ubo_range *a = &state->range[index];
+   struct ir3_ubo_range *a = &state->range[index];

-	/* index is always the first slot that would have neighbored/overlapped with
-	 * the new range.
-	 */
-	for (int i = index + 1; i < state->num_enabled; i++) {
-		struct ir3_ubo_range *b = &state->range[i];
-		if (memcmp(&a->ubo, &b->ubo, sizeof(a->ubo)))
-			continue;
+   /* index is always the first slot that would have neighbored/overlapped with
+    * the new range.
+    */
+   for (int i = index + 1; i < state->num_enabled; i++) {
+      struct ir3_ubo_range *b = &state->range[i];
+      if (memcmp(&a->ubo, &b->ubo, sizeof(a->ubo)))
+         continue;

-		if (a->start > b->end || a->end < b->start)
-			continue;
+      if (a->start > b->end || a->end < b->start)
+         continue;

-		/* Merge B into A. */
-		a->start = MIN2(a->start, b->start);
-		a->end = MAX2(a->end, b->end);
+      /* Merge B into A. */
+      a->start = MIN2(a->start, b->start);
+      a->end = MAX2(a->end, b->end);

-		/* Swap the last enabled range into B's now unused slot */
-		*b = state->range[--state->num_enabled];
-	}
+      /* Swap the last enabled range into B's now unused slot */
+      *b = state->range[--state->num_enabled];
+   }
 }

 /**
@ -134,59 +134,59 @@ merge_neighbors(struct ir3_ubo_analysis_state *state, int index)
 */
 static void
 gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr,
-		struct ir3_ubo_analysis_state *state, uint32_t alignment,
-		uint32_t *upload_remaining)
+                  struct ir3_ubo_analysis_state *state, uint32_t alignment,
+                  uint32_t *upload_remaining)
 {
-	if (ir3_shader_debug & IR3_DBG_NOUBOOPT)
-		return;
+   if (ir3_shader_debug & IR3_DBG_NOUBOOPT)
+      return;

-	struct ir3_ubo_info ubo = {};
-	if (!get_ubo_info(instr, &ubo))
-		return;
+   struct ir3_ubo_info ubo = {};
+   if (!get_ubo_info(instr, &ubo))
+      return;

-	struct ir3_ubo_range r;
-	if (!get_ubo_load_range(nir, instr, alignment, &r))
-		return;
+   struct ir3_ubo_range r;
+   if (!get_ubo_load_range(nir, instr, alignment, &r))
+      return;

-	/* See if there's an existing range for this UBO we want to merge into. */
-	for (int i = 0; i < state->num_enabled; i++) {
-		struct ir3_ubo_range *plan_r = &state->range[i];
-		if (memcmp(&plan_r->ubo, &ubo, sizeof(ubo)))
-			continue;
+   /* See if there's an existing range for this UBO we want to merge into. */
+   for (int i = 0; i < state->num_enabled; i++) {
+      struct ir3_ubo_range *plan_r = &state->range[i];
+      if (memcmp(&plan_r->ubo, &ubo, sizeof(ubo)))
+         continue;

-		/* Don't extend existing uploads unless they're
-		 * neighboring/overlapping.
-		 */
-		if (r.start > plan_r->end || r.end < plan_r->start)
-			continue;
+      /* Don't extend existing uploads unless they're
+       * neighboring/overlapping.
+       */
+      if (r.start > plan_r->end || r.end < plan_r->start)
+         continue;

-		r.start = MIN2(r.start, plan_r->start);
-		r.end = MAX2(r.end, plan_r->end);
+      r.start = MIN2(r.start, plan_r->start);
+      r.end = MAX2(r.end, plan_r->end);

-		uint32_t added = (plan_r->start - r.start) + (r.end - plan_r->end);
-		if (added >= *upload_remaining)
-			return;
+      uint32_t added = (plan_r->start - r.start) + (r.end - plan_r->end);
+      if (added >= *upload_remaining)
+         return;

-		plan_r->start = r.start;
-		plan_r->end = r.end;
-		*upload_remaining -= added;
+      plan_r->start = r.start;
+      plan_r->end = r.end;
+      *upload_remaining -= added;

-		merge_neighbors(state, i);
-		return;
-	}
+      merge_neighbors(state, i);
+      return;
+   }

-	if (state->num_enabled == ARRAY_SIZE(state->range))
-		return;
+   if (state->num_enabled == ARRAY_SIZE(state->range))
+      return;

-	uint32_t added = r.end - r.start;
-	if (added >= *upload_remaining)
-		return;
+   uint32_t added = r.end - r.start;
+   if (added >= *upload_remaining)
+      return;

-	struct ir3_ubo_range *plan_r = &state->range[state->num_enabled++];
-	plan_r->ubo = ubo;
-	plan_r->start = r.start;
-	plan_r->end = r.end;
-	*upload_remaining -= added;
+   struct ir3_ubo_range *plan_r = &state->range[state->num_enabled++];
+   plan_r->ubo = ubo;
+   plan_r->start = r.start;
+   plan_r->end = r.end;
+   *upload_remaining -= added;
 }

 /* For indirect offset, it is common to see a pattern of multiple
@ -197,7 +197,8 @@ gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr,
 *
 * Detect this, and peel out the const_offset part, to end up with:
 *
- *    vec4 32 ssa_34 = intrinsic load_uniform (ssa_base) (base=N+const_offset, 0, 0)
+ *    vec4 32 ssa_34 = intrinsic load_uniform (ssa_base) (base=N+const_offset,
+ * 0, 0)
 *
 * Or similarly:
 *
@ -207,7 +208,8 @@ gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr,
 * Can be converted to:
 *
 *    vec1 32 ssa_base = imul24 a, b
- *    vec4 32 ssa_34 = intrinsic load_uniform (ssa_base) (base=N+const_offset, 0, 0)
+ *    vec4 32 ssa_34 = intrinsic load_uniform (ssa_base) (base=N+const_offset,
+ * 0, 0)
 *
 * This gives the other opt passes something much easier to work
 * with (ie. not requiring value range tracking)
@ -215,38 +217,38 @@ gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr,
 static void
 handle_partial_const(nir_builder *b, nir_ssa_def **srcp, int *offp)
 {
-	if ((*srcp)->parent_instr->type != nir_instr_type_alu)
-		return;
+   if ((*srcp)->parent_instr->type != nir_instr_type_alu)
+      return;

-	nir_alu_instr *alu = nir_instr_as_alu((*srcp)->parent_instr);
+   nir_alu_instr *alu = nir_instr_as_alu((*srcp)->parent_instr);

-	if (alu->op == nir_op_imad24_ir3) {
-		/* This case is slightly more complicated as we need to
-		 * replace the imad24_ir3 with an imul24:
-		 */
-		if (!nir_src_is_const(alu->src[2].src))
-			return;
+   if (alu->op == nir_op_imad24_ir3) {
+      /* This case is slightly more complicated as we need to
+       * replace the imad24_ir3 with an imul24:
+       */
+      if (!nir_src_is_const(alu->src[2].src))
+         return;

-		*offp += nir_src_as_uint(alu->src[2].src);
-		*srcp = nir_imul24(b, nir_ssa_for_alu_src(b, alu, 0),
-				nir_ssa_for_alu_src(b, alu, 1));
+      *offp += nir_src_as_uint(alu->src[2].src);
+      *srcp = nir_imul24(b, nir_ssa_for_alu_src(b, alu, 0),
+                         nir_ssa_for_alu_src(b, alu, 1));

-		return;
-	}
+      return;
+   }

-	if (alu->op != nir_op_iadd)
-		return;
+   if (alu->op != nir_op_iadd)
+      return;

-	if (!(alu->src[0].src.is_ssa && alu->src[1].src.is_ssa))
-		return;
+   if (!(alu->src[0].src.is_ssa && alu->src[1].src.is_ssa))
+      return;

-	if (nir_src_is_const(alu->src[0].src)) {
-		*offp += nir_src_as_uint(alu->src[0].src);
-		*srcp = alu->src[1].src.ssa;
-	} else if (nir_src_is_const(alu->src[1].src)) {
-		*srcp = alu->src[0].src.ssa;
-		*offp += nir_src_as_uint(alu->src[1].src);
-	}
+   if (nir_src_is_const(alu->src[0].src)) {
+      *offp += nir_src_as_uint(alu->src[0].src);
+      *srcp = alu->src[1].src.ssa;
+   } else if (nir_src_is_const(alu->src[1].src)) {
+      *srcp = alu->src[0].src.ssa;
+      *offp += nir_src_as_uint(alu->src[1].src);
+   }
 }

 /* Tracks the maximum bindful UBO accessed so that we reduce the UBO
@ -255,258 +257,256 @@ handle_partial_const(nir_builder *b, nir_ssa_def **srcp, int *offp)
 static void
 track_ubo_use(nir_intrinsic_instr *instr, nir_builder *b, int *num_ubos)
 {
-	if (ir3_bindless_resource(instr->src[0])) {
-		assert(!b->shader->info.first_ubo_is_default_ubo); /* only set for GL */
-		return;
-	}
+   if (ir3_bindless_resource(instr->src[0])) {
+      assert(!b->shader->info.first_ubo_is_default_ubo); /* only set for GL */
+      return;
+   }

-	if (nir_src_is_const(instr->src[0])) {
-		int block = nir_src_as_uint(instr->src[0]);
-		*num_ubos = MAX2(*num_ubos, block + 1);
-	} else {
-		*num_ubos = b->shader->info.num_ubos;
-	}
+   if (nir_src_is_const(instr->src[0])) {
+      int block = nir_src_as_uint(instr->src[0]);
+      *num_ubos = MAX2(*num_ubos, block + 1);
+   } else {
+      *num_ubos = b->shader->info.num_ubos;
+   }
 }

 static bool
 lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
-		const struct ir3_ubo_analysis_state *state,
-		int *num_ubos, uint32_t alignment)
+                          const struct ir3_ubo_analysis_state *state,
+                          int *num_ubos, uint32_t alignment)
 {
-	b->cursor = nir_before_instr(&instr->instr);
+   b->cursor = nir_before_instr(&instr->instr);

-	struct ir3_ubo_range r;
-	if (!get_ubo_load_range(b->shader, instr, alignment, &r)) {
-		track_ubo_use(instr, b, num_ubos);
-		return false;
-	}
+   struct ir3_ubo_range r;
+   if (!get_ubo_load_range(b->shader, instr, alignment, &r)) {
+      track_ubo_use(instr, b, num_ubos);
+      return false;
+   }

-	/* We don't lower dynamic block index UBO loads to load_uniform, but we
-	 * could probably with some effort determine a block stride in number of
-	 * registers.
-	 */
-	const struct ir3_ubo_range *range = get_existing_range(instr, state, &r);
-	if (!range) {
-		track_ubo_use(instr, b, num_ubos);
-		return false;
-	}
+   /* We don't lower dynamic block index UBO loads to load_uniform, but we
+    * could probably with some effort determine a block stride in number of
+    * registers.
+    */
+   const struct ir3_ubo_range *range = get_existing_range(instr, state, &r);
+   if (!range) {
+      track_ubo_use(instr, b, num_ubos);
+      return false;
+   }

-	nir_ssa_def *ubo_offset = nir_ssa_for_src(b, instr->src[1], 1);
-	int const_offset = 0;
+   nir_ssa_def *ubo_offset = nir_ssa_for_src(b, instr->src[1], 1);
+   int const_offset = 0;

-	handle_partial_const(b, &ubo_offset, &const_offset);
+   handle_partial_const(b, &ubo_offset, &const_offset);

-	/* UBO offset is in bytes, but uniform offset is in units of
-	 * dwords, so we need to divide by 4 (right-shift by 2). For ldc the
-	 * offset is in units of 16 bytes, so we need to multiply by 4. And
-	 * also the same for the constant part of the offset:
-	 */
-	const int shift = -2;
-	nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, -2);
-	nir_ssa_def *uniform_offset = NULL;
-	if (new_offset) {
-		uniform_offset = new_offset;
-	} else {
-		uniform_offset = shift > 0 ?
-			nir_ishl(b, ubo_offset, nir_imm_int(b,  shift)) :
-			nir_ushr(b, ubo_offset, nir_imm_int(b, -shift));
-	}
+   /* UBO offset is in bytes, but uniform offset is in units of
+    * dwords, so we need to divide by 4 (right-shift by 2). For ldc the
+    * offset is in units of 16 bytes, so we need to multiply by 4. And
+    * also the same for the constant part of the offset:
+    */
+   const int shift = -2;
+   nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, -2);
+   nir_ssa_def *uniform_offset = NULL;
+   if (new_offset) {
+      uniform_offset = new_offset;
+   } else {
+      uniform_offset = shift > 0
+                          ? nir_ishl(b, ubo_offset, nir_imm_int(b, shift))
+                          : nir_ushr(b, ubo_offset, nir_imm_int(b, -shift));
+   }

-	debug_assert(!(const_offset & 0x3));
-	const_offset >>= 2;
+   debug_assert(!(const_offset & 0x3));
+   const_offset >>= 2;

-	const int range_offset = ((int)range->offset - (int)range->start) / 4;
-	const_offset += range_offset;
+   const int range_offset = ((int)range->offset - (int)range->start) / 4;
+   const_offset += range_offset;

-	/* The range_offset could be negative, if if only part of the UBO
-	 * block is accessed, range->start can be greater than range->offset.
-	 * But we can't underflow const_offset.  If necessary we need to
-	 * insert nir instructions to compensate (which can hopefully be
-	 * optimized away)
-	 */
-	if (const_offset < 0) {
-		uniform_offset = nir_iadd_imm(b, uniform_offset, const_offset);
-		const_offset = 0;
-	}
+   /* The range_offset could be negative, if if only part of the UBO
+    * block is accessed, range->start can be greater than range->offset.
+    * But we can't underflow const_offset.  If necessary we need to
+    * insert nir instructions to compensate (which can hopefully be
+    * optimized away)
+    */
+   if (const_offset < 0) {
+      uniform_offset = nir_iadd_imm(b, uniform_offset, const_offset);
+      const_offset = 0;
+   }

-	nir_ssa_def *uniform =
-		nir_load_uniform(b, instr->num_components, instr->dest.ssa.bit_size, uniform_offset, .base = const_offset);
+   nir_ssa_def *uniform =
+      nir_load_uniform(b, instr->num_components, instr->dest.ssa.bit_size,
+                       uniform_offset, .base = const_offset);

-	nir_ssa_def_rewrite_uses(&instr->dest.ssa,
-							 uniform);
+   nir_ssa_def_rewrite_uses(&instr->dest.ssa, uniform);

-	nir_instr_remove(&instr->instr);
+   nir_instr_remove(&instr->instr);

-	return true;
+   return true;
 }

 static bool
 instr_is_load_ubo(nir_instr *instr)
 {
-	if (instr->type != nir_instr_type_intrinsic)
-		return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;

-	nir_intrinsic_op op = nir_instr_as_intrinsic(instr)->intrinsic;
+   nir_intrinsic_op op = nir_instr_as_intrinsic(instr)->intrinsic;

-	/* nir_lower_ubo_vec4 happens after this pass. */
-	assert(op != nir_intrinsic_load_ubo_vec4);
+   /* nir_lower_ubo_vec4 happens after this pass. */
+   assert(op != nir_intrinsic_load_ubo_vec4);

-	return op == nir_intrinsic_load_ubo;
+   return op == nir_intrinsic_load_ubo;
 }

 void
 ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
 {
-	struct ir3_const_state *const_state = ir3_const_state(v);
-	struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
-	struct ir3_compiler *compiler = v->shader->compiler;
+   struct ir3_const_state *const_state = ir3_const_state(v);
+   struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
+   struct ir3_compiler *compiler = v->shader->compiler;

-	/* Limit our uploads to the amount of constant buffer space available in
-	 * the hardware, minus what the shader compiler may need for various
-	 * driver params.  We do this UBO-to-push-constant before the real
-	 * allocation of the driver params' const space, because UBO pointers can
-	 * be driver params but this pass usually eliminatings them.
-	 */
-	struct ir3_const_state worst_case_const_state = { };
-	ir3_setup_const_state(nir, v, &worst_case_const_state);
-	const uint32_t max_upload = (ir3_max_const(v) -
-			worst_case_const_state.offsets.immediate) * 16;
+   /* Limit our uploads to the amount of constant buffer space available in
+    * the hardware, minus what the shader compiler may need for various
+    * driver params.  We do this UBO-to-push-constant before the real
+    * allocation of the driver params' const space, because UBO pointers can
+    * be driver params but this pass usually eliminatings them.
+    */
+   struct ir3_const_state worst_case_const_state = {};
+   ir3_setup_const_state(nir, v, &worst_case_const_state);
+   const uint32_t max_upload =
+      (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 16;

-	memset(state, 0, sizeof(*state));
+   memset(state, 0, sizeof(*state));

-	uint32_t upload_remaining = max_upload;
-	nir_foreach_function (function, nir) {
-		if (function->impl) {
-			nir_foreach_block (block, function->impl) {
-				nir_foreach_instr (instr, block) {
-					if (instr_is_load_ubo(instr))
-						gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr),
-								state, compiler->const_upload_unit,
-								&upload_remaining);
-				}
-			}
-		}
-	}
+   uint32_t upload_remaining = max_upload;
+   nir_foreach_function (function, nir) {
+      if (function->impl) {
+         nir_foreach_block (block, function->impl) {
+            nir_foreach_instr (instr, block) {
+               if (instr_is_load_ubo(instr))
+                  gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr), state,
+                                    compiler->const_upload_unit,
+                                    &upload_remaining);
+            }
+         }
+      }
+   }

-	/* For now, everything we upload is accessed statically and thus will be
-	 * used by the shader. Once we can upload dynamically indexed data, we may
-	 * upload sparsely accessed arrays, at which point we probably want to
-	 * give priority to smaller UBOs, on the assumption that big UBOs will be
-	 * accessed dynamically.  Alternatively, we can track statically and
-	 * dynamically accessed ranges separately and upload static rangtes
-	 * first.
-	 */
+   /* For now, everything we upload is accessed statically and thus will be
+    * used by the shader. Once we can upload dynamically indexed data, we may
+    * upload sparsely accessed arrays, at which point we probably want to
+    * give priority to smaller UBOs, on the assumption that big UBOs will be
+    * accessed dynamically.  Alternatively, we can track statically and
+    * dynamically accessed ranges separately and upload static rangtes
+    * first.
+    */

-	uint32_t offset = v->shader->num_reserved_user_consts * 16;
-	for (uint32_t i = 0; i < state->num_enabled; i++) {
-		uint32_t range_size = state->range[i].end - state->range[i].start;
+   uint32_t offset = v->shader->num_reserved_user_consts * 16;
+   for (uint32_t i = 0; i < state->num_enabled; i++) {
+      uint32_t range_size = state->range[i].end - state->range[i].start;

-		debug_assert(offset <= max_upload);
-		state->range[i].offset = offset;
-		assert(offset <= max_upload);
-		offset += range_size;
-
-	}
-	state->size = offset;
+      debug_assert(offset <= max_upload);
+      state->range[i].offset = offset;
+      assert(offset <= max_upload);
+      offset += range_size;
+   }
+   state->size = offset;
 }

 bool
 ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v)
 {
-	struct ir3_compiler *compiler = v->shader->compiler;
-	/* For the binning pass variant, we re-use the corresponding draw-pass
-	 * variants const_state and ubo state.  To make these clear, in this
-	 * pass it is const (read-only)
-	 */
-	const struct ir3_const_state *const_state = ir3_const_state(v);
-	const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
+   struct ir3_compiler *compiler = v->shader->compiler;
+   /* For the binning pass variant, we re-use the corresponding draw-pass
+    * variants const_state and ubo state.  To make these clear, in this
+    * pass it is const (read-only)
+    */
+   const struct ir3_const_state *const_state = ir3_const_state(v);
+   const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;

-	int num_ubos = 0;
-	bool progress = false;
-	nir_foreach_function (function, nir) {
-		if (function->impl) {
-			nir_builder builder;
-			nir_builder_init(&builder, function->impl);
-			nir_foreach_block (block, function->impl) {
-				nir_foreach_instr_safe (instr, block) {
-					if (!instr_is_load_ubo(instr))
-						continue;
-					progress |=
-						lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr),
-								&builder, state, &num_ubos,
-								compiler->const_upload_unit);
-				}
-			}
+   int num_ubos = 0;
+   bool progress = false;
+   nir_foreach_function (function, nir) {
+      if (function->impl) {
+         nir_builder builder;
+         nir_builder_init(&builder, function->impl);
+         nir_foreach_block (block, function->impl) {
+            nir_foreach_instr_safe (instr, block) {
+               if (!instr_is_load_ubo(instr))
+                  continue;
+               progress |= lower_ubo_load_to_uniform(
+                  nir_instr_as_intrinsic(instr), &builder, state, &num_ubos,
+                  compiler->const_upload_unit);
+            }
+         }

-			nir_metadata_preserve(function->impl, nir_metadata_block_index |
-								  nir_metadata_dominance);
-		}
-	}
-	/* Update the num_ubos field for GL (first_ubo_is_default_ubo).  With
-	 * Vulkan's bindless, we don't use the num_ubos field, so we can leave it
-	 * incremented.
-	 */
-	if (nir->info.first_ubo_is_default_ubo)
-	    nir->info.num_ubos = num_ubos;
+         nir_metadata_preserve(
+            function->impl, nir_metadata_block_index | nir_metadata_dominance);
+      }
+   }
+   /* Update the num_ubos field for GL (first_ubo_is_default_ubo).  With
+    * Vulkan's bindless, we don't use the num_ubos field, so we can leave it
+    * incremented.
+    */
+   if (nir->info.first_ubo_is_default_ubo)
+      nir->info.num_ubos = num_ubos;

-	return progress;
+   return progress;
 }

-
 static bool
 fixup_load_uniform_filter(const nir_instr *instr, const void *arg)
 {
-	if (instr->type != nir_instr_type_intrinsic)
-		return false;
-	return nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_uniform;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+   return nir_instr_as_intrinsic(instr)->intrinsic ==
+          nir_intrinsic_load_uniform;
 }

 static nir_ssa_def *
 fixup_load_uniform_instr(struct nir_builder *b, nir_instr *instr, void *arg)
 {
-	nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);

-	/* We don't need to worry about non-indirect case: */
-	if (nir_src_is_const(intr->src[0]))
-		return NULL;
+   /* We don't need to worry about non-indirect case: */
+   if (nir_src_is_const(intr->src[0]))
+      return NULL;

-	const unsigned base_offset_limit = (1 << 9);  /* 9 bits */
-	unsigned base_offset = nir_intrinsic_base(intr);
+   const unsigned base_offset_limit = (1 << 9); /* 9 bits */
+   unsigned base_offset = nir_intrinsic_base(intr);

-	/* Or cases were base offset is lower than the hw limit: */
-	if (base_offset < base_offset_limit)
-		return NULL;
+   /* Or cases were base offset is lower than the hw limit: */
+   if (base_offset < base_offset_limit)
+      return NULL;

-	b->cursor = nir_before_instr(instr);
+   b->cursor = nir_before_instr(instr);

-	nir_ssa_def *offset = nir_ssa_for_src(b, intr->src[0], 1);
+   nir_ssa_def *offset = nir_ssa_for_src(b, intr->src[0], 1);

-	/* We'd like to avoid a sequence like:
-	 *
-	 *   vec4 32 ssa_18 = intrinsic load_uniform (ssa_4) (1024, 0, 0)
-	 *   vec4 32 ssa_19 = intrinsic load_uniform (ssa_4) (1072, 0, 0)
-	 *   vec4 32 ssa_20 = intrinsic load_uniform (ssa_4) (1120, 0, 0)
-	 *
-	 * From turning into a unique offset value (which requires reloading
-	 * a0.x for each instruction).  So instead of just adding the constant
-	 * base_offset to the non-const offset, be a bit more clever and only
-	 * extract the part that cannot be encoded.  Afterwards CSE should
-	 * turn the result into:
-	 *
-	 *   vec1 32 ssa_5 = load_const (1024)
-	 *   vec4 32 ssa_6  = iadd ssa4_, ssa_5
-	 *   vec4 32 ssa_18 = intrinsic load_uniform (ssa_5) (0, 0, 0)
-	 *   vec4 32 ssa_19 = intrinsic load_uniform (ssa_5) (48, 0, 0)
-	 *   vec4 32 ssa_20 = intrinsic load_uniform (ssa_5) (96, 0, 0)
-	 */
-	unsigned new_base_offset = base_offset % base_offset_limit;
+   /* We'd like to avoid a sequence like:
+    *
+    *   vec4 32 ssa_18 = intrinsic load_uniform (ssa_4) (1024, 0, 0)
+    *   vec4 32 ssa_19 = intrinsic load_uniform (ssa_4) (1072, 0, 0)
+    *   vec4 32 ssa_20 = intrinsic load_uniform (ssa_4) (1120, 0, 0)
+    *
+    * From turning into a unique offset value (which requires reloading
+    * a0.x for each instruction).  So instead of just adding the constant
+    * base_offset to the non-const offset, be a bit more clever and only
+    * extract the part that cannot be encoded.  Afterwards CSE should
+    * turn the result into:
+    *
+    *   vec1 32 ssa_5 = load_const (1024)
+    *   vec4 32 ssa_6  = iadd ssa4_, ssa_5
+    *   vec4 32 ssa_18 = intrinsic load_uniform (ssa_5) (0, 0, 0)
+    *   vec4 32 ssa_19 = intrinsic load_uniform (ssa_5) (48, 0, 0)
+    *   vec4 32 ssa_20 = intrinsic load_uniform (ssa_5) (96, 0, 0)
+    */
+   unsigned new_base_offset = base_offset % base_offset_limit;

-	nir_intrinsic_set_base(intr, new_base_offset);
-	offset = nir_iadd_imm(b, offset, base_offset - new_base_offset);
+   nir_intrinsic_set_base(intr, new_base_offset);
+   offset = nir_iadd_imm(b, offset, base_offset - new_base_offset);

-	nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(offset));
+   nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(offset));

-	return NIR_LOWER_INSTR_PROGRESS;
+   return NIR_LOWER_INSTR_PROGRESS;
 }

 /**
@ -520,59 +520,59 @@ fixup_load_uniform_instr(struct nir_builder *b, nir_instr *instr, void *arg)
 bool
 ir3_nir_fixup_load_uniform(nir_shader *nir)
 {
-	return nir_shader_lower_instructions(nir,
-			fixup_load_uniform_filter, fixup_load_uniform_instr,
-			NULL);
+   return nir_shader_lower_instructions(nir, fixup_load_uniform_filter,
+                                        fixup_load_uniform_instr, NULL);
 }
 static nir_ssa_def *
 ir3_nir_lower_load_const_instr(nir_builder *b, nir_instr *in_instr, void *data)
 {
-	struct ir3_const_state *const_state = data;
-	nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in_instr);
+   struct ir3_const_state *const_state = data;
+   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in_instr);

-	/* Pick a UBO index to use as our constant data.  Skip UBO 0 since that's
-	 * reserved for gallium's cb0.
-	 */
-	if (const_state->constant_data_ubo == -1) {
-		if (b->shader->info.num_ubos == 0)
-			b->shader->info.num_ubos++;
-		const_state->constant_data_ubo = b->shader->info.num_ubos++;
-	}
+   /* Pick a UBO index to use as our constant data.  Skip UBO 0 since that's
+    * reserved for gallium's cb0.
+    */
+   if (const_state->constant_data_ubo == -1) {
+      if (b->shader->info.num_ubos == 0)
+         b->shader->info.num_ubos++;
+      const_state->constant_data_ubo = b->shader->info.num_ubos++;
+   }

-	unsigned num_components = instr->num_components;
-	if (nir_dest_bit_size(instr->dest) == 16) {
-		/* We can't do 16b loads -- either from LDC (32-bit only in any of our
-		 * traces, and disasm that doesn't look like it really supports it) or
-		 * from the constant file (where CONSTANT_DEMOTION_ENABLE means we get
-		 * automatic 32b-to-16b conversions when we ask for 16b from it).
-		 * Instead, we'll load 32b from a UBO and unpack from there.
-		 */
-		num_components = DIV_ROUND_UP(num_components, 2);
-	}
-	unsigned base = nir_intrinsic_base(instr);
-	nir_ssa_def *index = nir_imm_int(b, const_state->constant_data_ubo);
-	nir_ssa_def *offset = nir_iadd_imm(b, nir_ssa_for_src(b, instr->src[0], 1), base);
+   unsigned num_components = instr->num_components;
+   if (nir_dest_bit_size(instr->dest) == 16) {
+      /* We can't do 16b loads -- either from LDC (32-bit only in any of our
+       * traces, and disasm that doesn't look like it really supports it) or
+       * from the constant file (where CONSTANT_DEMOTION_ENABLE means we get
+       * automatic 32b-to-16b conversions when we ask for 16b from it).
+       * Instead, we'll load 32b from a UBO and unpack from there.
+       */
+      num_components = DIV_ROUND_UP(num_components, 2);
+   }
+   unsigned base = nir_intrinsic_base(instr);
+   nir_ssa_def *index = nir_imm_int(b, const_state->constant_data_ubo);
+   nir_ssa_def *offset =
+      nir_iadd_imm(b, nir_ssa_for_src(b, instr->src[0], 1), base);

-	nir_ssa_def *result =
-		nir_load_ubo(b, num_components, 32, index, offset,
-					 .align_mul = nir_intrinsic_align_mul(instr),
-					 .align_offset = nir_intrinsic_align_offset(instr),
-					 .range_base = base,
-					 .range = nir_intrinsic_range(instr));
+   nir_ssa_def *result =
+      nir_load_ubo(b, num_components, 32, index, offset,
+                   .align_mul = nir_intrinsic_align_mul(instr),
+                   .align_offset = nir_intrinsic_align_offset(instr),
+                   .range_base = base, .range = nir_intrinsic_range(instr));

-	if (nir_dest_bit_size(instr->dest) == 16) {
-		result = nir_bitcast_vector(b, result, 16);
-		result = nir_channels(b, result, BITSET_MASK(instr->num_components));
-	}
+   if (nir_dest_bit_size(instr->dest) == 16) {
+      result = nir_bitcast_vector(b, result, 16);
+      result = nir_channels(b, result, BITSET_MASK(instr->num_components));
+   }

-	return result;
+   return result;
 }

 static bool
 ir3_lower_load_const_filter(const nir_instr *instr, const void *data)
 {
-        return (instr->type == nir_instr_type_intrinsic &&
-                nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_constant);
+   return (instr->type == nir_instr_type_intrinsic &&
+           nir_instr_as_intrinsic(instr)->intrinsic ==
+              nir_intrinsic_load_constant);
 }

 /* Lowers load_constant intrinsics to UBO accesses so we can run them through
@ -581,26 +581,26 @@ ir3_lower_load_const_filter(const nir_instr *instr, const void *data)
 bool
 ir3_nir_lower_load_constant(nir_shader *nir, struct ir3_shader_variant *v)
 {
-	struct ir3_const_state *const_state = ir3_const_state(v);
+   struct ir3_const_state *const_state = ir3_const_state(v);

-	const_state->constant_data_ubo = -1;
+   const_state->constant_data_ubo = -1;

-	bool progress = nir_shader_lower_instructions(nir,
-			ir3_lower_load_const_filter, ir3_nir_lower_load_const_instr,
-			const_state);
+   bool progress = nir_shader_lower_instructions(
+      nir, ir3_lower_load_const_filter, ir3_nir_lower_load_const_instr,
+      const_state);

-	if (progress) {
-		struct ir3_compiler *compiler = v->shader->compiler;
+   if (progress) {
+      struct ir3_compiler *compiler = v->shader->compiler;

-		/* Save a copy of the NIR constant data to the variant for
-			* inclusion in the final assembly.
-			*/
-		v->constant_data_size = align(nir->constant_data_size,
-				compiler->const_upload_unit * 4 * sizeof(uint32_t));
-		v->constant_data = rzalloc_size(v, v->constant_data_size);
-		memcpy(v->constant_data, nir->constant_data,
-				nir->constant_data_size);
-	}
+      /* Save a copy of the NIR constant data to the variant for
+       * inclusion in the final assembly.
+       */
+      v->constant_data_size =
+         align(nir->constant_data_size,
+               compiler->const_upload_unit * 4 * sizeof(uint32_t));
+      v->constant_data = rzalloc_size(v, v->constant_data_size);
+      memcpy(v->constant_data, nir->constant_data, nir->constant_data_size);
+   }

-	return progress;
+   return progress;
 }
--- a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c
+++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c
@ -21,8 +21,8 @@
 * IN THE SOFTWARE.
 */

-#include "ir3_nir.h"
 #include "compiler/nir/nir_builder.h"
+#include "ir3_nir.h"

 /**
 * This pass moves to NIR certain offset computations for different I/O
@ -34,7 +34,6 @@
 *   holds the result of the original byte-offset source divided by 4.
 */

-
 /* Returns the ir3-specific intrinsic opcode corresponding to an SSBO
 * instruction that is handled by this pass. It also conveniently returns
 * the offset source index in @offset_src_idx.
@ -44,269 +43,269 @@
 */
 static int
 get_ir3_intrinsic_for_ssbo_intrinsic(unsigned intrinsic,
-									 uint8_t *offset_src_idx)
+                                     uint8_t *offset_src_idx)
 {
-	debug_assert(offset_src_idx);
+   debug_assert(offset_src_idx);

-	*offset_src_idx = 1;
+   *offset_src_idx = 1;

-	switch (intrinsic) {
-	case nir_intrinsic_store_ssbo:
-		*offset_src_idx = 2;
-		return nir_intrinsic_store_ssbo_ir3;
-	case nir_intrinsic_load_ssbo:
-		return nir_intrinsic_load_ssbo_ir3;
-	case nir_intrinsic_ssbo_atomic_add:
-		return nir_intrinsic_ssbo_atomic_add_ir3;
-	case nir_intrinsic_ssbo_atomic_imin:
-		return nir_intrinsic_ssbo_atomic_imin_ir3;
-	case nir_intrinsic_ssbo_atomic_umin:
-		return nir_intrinsic_ssbo_atomic_umin_ir3;
-	case nir_intrinsic_ssbo_atomic_imax:
-		return nir_intrinsic_ssbo_atomic_imax_ir3;
-	case nir_intrinsic_ssbo_atomic_umax:
-		return nir_intrinsic_ssbo_atomic_umax_ir3;
-	case nir_intrinsic_ssbo_atomic_and:
-		return nir_intrinsic_ssbo_atomic_and_ir3;
-	case nir_intrinsic_ssbo_atomic_or:
-		return nir_intrinsic_ssbo_atomic_or_ir3;
-	case nir_intrinsic_ssbo_atomic_xor:
-		return nir_intrinsic_ssbo_atomic_xor_ir3;
-	case nir_intrinsic_ssbo_atomic_exchange:
-		return nir_intrinsic_ssbo_atomic_exchange_ir3;
-	case nir_intrinsic_ssbo_atomic_comp_swap:
-		return nir_intrinsic_ssbo_atomic_comp_swap_ir3;
-	default:
-		break;
-	}
+   switch (intrinsic) {
+   case nir_intrinsic_store_ssbo:
+      *offset_src_idx = 2;
+      return nir_intrinsic_store_ssbo_ir3;
+   case nir_intrinsic_load_ssbo:
+      return nir_intrinsic_load_ssbo_ir3;
+   case nir_intrinsic_ssbo_atomic_add:
+      return nir_intrinsic_ssbo_atomic_add_ir3;
+   case nir_intrinsic_ssbo_atomic_imin:
+      return nir_intrinsic_ssbo_atomic_imin_ir3;
+   case nir_intrinsic_ssbo_atomic_umin:
+      return nir_intrinsic_ssbo_atomic_umin_ir3;
+   case nir_intrinsic_ssbo_atomic_imax:
+      return nir_intrinsic_ssbo_atomic_imax_ir3;
+   case nir_intrinsic_ssbo_atomic_umax:
+      return nir_intrinsic_ssbo_atomic_umax_ir3;
+   case nir_intrinsic_ssbo_atomic_and:
+      return nir_intrinsic_ssbo_atomic_and_ir3;
+   case nir_intrinsic_ssbo_atomic_or:
+      return nir_intrinsic_ssbo_atomic_or_ir3;
+   case nir_intrinsic_ssbo_atomic_xor:
+      return nir_intrinsic_ssbo_atomic_xor_ir3;
+   case nir_intrinsic_ssbo_atomic_exchange:
+      return nir_intrinsic_ssbo_atomic_exchange_ir3;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+      return nir_intrinsic_ssbo_atomic_comp_swap_ir3;
+   default:
+      break;
+   }

-	return -1;
+   return -1;
 }

 static nir_ssa_def *
 check_and_propagate_bit_shift32(nir_builder *b, nir_alu_instr *alu_instr,
-								int32_t direction, int32_t shift)
+                                int32_t direction, int32_t shift)
 {
-	debug_assert(alu_instr->src[1].src.is_ssa);
-	nir_ssa_def *shift_ssa = alu_instr->src[1].src.ssa;
+   debug_assert(alu_instr->src[1].src.is_ssa);
+   nir_ssa_def *shift_ssa = alu_instr->src[1].src.ssa;

-	/* Only propagate if the shift is a const value so we can check value range
-	 * statically.
-	 */
-	nir_const_value *const_val = nir_src_as_const_value(alu_instr->src[1].src);
-	if (!const_val)
-		return NULL;
+   /* Only propagate if the shift is a const value so we can check value range
+    * statically.
+    */
+   nir_const_value *const_val = nir_src_as_const_value(alu_instr->src[1].src);
+   if (!const_val)
+      return NULL;

-	int32_t current_shift = const_val[0].i32 * direction;
-	int32_t new_shift = current_shift + shift;
+   int32_t current_shift = const_val[0].i32 * direction;
+   int32_t new_shift = current_shift + shift;

-	/* If the merge would reverse the direction, bail out.
-	 * e.g, 'x << 2' then 'x >> 4' is not 'x >> 2'.
-	 */
-	if (current_shift * new_shift < 0)
-		return NULL;
+   /* If the merge would reverse the direction, bail out.
+    * e.g, 'x << 2' then 'x >> 4' is not 'x >> 2'.
+    */
+   if (current_shift * new_shift < 0)
+      return NULL;

-	/* If the propagation would overflow an int32_t, bail out too to be on the
-	 * safe side.
-	 */
-	if (new_shift < -31 || new_shift > 31)
-		return NULL;
+   /* If the propagation would overflow an int32_t, bail out too to be on the
+    * safe side.
+    */
+   if (new_shift < -31 || new_shift > 31)
+      return NULL;

-	/* Add or substract shift depending on the final direction (SHR vs. SHL). */
-	if (shift * direction < 0)
-		shift_ssa = nir_isub(b, shift_ssa, nir_imm_int(b, abs(shift)));
-	else
-		shift_ssa = nir_iadd(b, shift_ssa, nir_imm_int(b, abs(shift)));
+   /* Add or substract shift depending on the final direction (SHR vs. SHL). */
+   if (shift * direction < 0)
+      shift_ssa = nir_isub(b, shift_ssa, nir_imm_int(b, abs(shift)));
+   else
+      shift_ssa = nir_iadd(b, shift_ssa, nir_imm_int(b, abs(shift)));

-	return shift_ssa;
+   return shift_ssa;
 }

 nir_ssa_def *
-ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift)
+ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset,
+                                int32_t shift)
 {
-	nir_instr *offset_instr = offset->parent_instr;
-	if (offset_instr->type != nir_instr_type_alu)
-		return NULL;
+   nir_instr *offset_instr = offset->parent_instr;
+   if (offset_instr->type != nir_instr_type_alu)
+      return NULL;

-	nir_alu_instr *alu = nir_instr_as_alu(offset_instr);
-	nir_ssa_def *shift_ssa;
-	nir_ssa_def *new_offset = NULL;
+   nir_alu_instr *alu = nir_instr_as_alu(offset_instr);
+   nir_ssa_def *shift_ssa;
+   nir_ssa_def *new_offset = NULL;

-	/* the first src could be something like ssa_18.x, but we only want
-	 * the single component.  Otherwise the ishl/ishr/ushr could turn
-	 * into a vec4 operation:
-	 */
-	nir_ssa_def *src0 = nir_mov_alu(b, alu->src[0], 1);
+   /* the first src could be something like ssa_18.x, but we only want
+    * the single component.  Otherwise the ishl/ishr/ushr could turn
+    * into a vec4 operation:
+    */
+   nir_ssa_def *src0 = nir_mov_alu(b, alu->src[0], 1);

-	switch (alu->op) {
-	case nir_op_ishl:
-		shift_ssa = check_and_propagate_bit_shift32(b, alu, 1, shift);
-		if (shift_ssa)
-			new_offset = nir_ishl(b, src0, shift_ssa);
-		break;
-	case nir_op_ishr:
-		shift_ssa = check_and_propagate_bit_shift32(b, alu, -1, shift);
-		if (shift_ssa)
-			new_offset = nir_ishr(b, src0, shift_ssa);
-		break;
-	case nir_op_ushr:
-		shift_ssa = check_and_propagate_bit_shift32(b, alu, -1, shift);
-		if (shift_ssa)
-			new_offset = nir_ushr(b, src0, shift_ssa);
-		break;
-	default:
-		return NULL;
-	}
+   switch (alu->op) {
+   case nir_op_ishl:
+      shift_ssa = check_and_propagate_bit_shift32(b, alu, 1, shift);
+      if (shift_ssa)
+         new_offset = nir_ishl(b, src0, shift_ssa);
+      break;
+   case nir_op_ishr:
+      shift_ssa = check_and_propagate_bit_shift32(b, alu, -1, shift);
+      if (shift_ssa)
+         new_offset = nir_ishr(b, src0, shift_ssa);
+      break;
+   case nir_op_ushr:
+      shift_ssa = check_and_propagate_bit_shift32(b, alu, -1, shift);
+      if (shift_ssa)
+         new_offset = nir_ushr(b, src0, shift_ssa);
+      break;
+   default:
+      return NULL;
+   }

-	return new_offset;
+   return new_offset;
 }

 static bool
 lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
-					  unsigned ir3_ssbo_opcode, uint8_t offset_src_idx)
+                      unsigned ir3_ssbo_opcode, uint8_t offset_src_idx)
 {
-	unsigned num_srcs = nir_intrinsic_infos[intrinsic->intrinsic].num_srcs;
-	int shift = 2;
+   unsigned num_srcs = nir_intrinsic_infos[intrinsic->intrinsic].num_srcs;
+   int shift = 2;

-	bool has_dest = nir_intrinsic_infos[intrinsic->intrinsic].has_dest;
-	nir_ssa_def *new_dest = NULL;
+   bool has_dest = nir_intrinsic_infos[intrinsic->intrinsic].has_dest;
+   nir_ssa_def *new_dest = NULL;

-	/* for 16-bit ssbo access, offset is in 16-bit words instead of dwords */
-	if ((has_dest && intrinsic->dest.ssa.bit_size == 16) ||
-		(!has_dest && intrinsic->src[0].ssa->bit_size == 16))
-		shift = 1;
+   /* for 16-bit ssbo access, offset is in 16-bit words instead of dwords */
+   if ((has_dest && intrinsic->dest.ssa.bit_size == 16) ||
+       (!has_dest && intrinsic->src[0].ssa->bit_size == 16))
+      shift = 1;

-	/* Here we create a new intrinsic and copy over all contents from the old one. */
+   /* Here we create a new intrinsic and copy over all contents from the old
+    * one. */

-	nir_intrinsic_instr *new_intrinsic;
-	nir_src *target_src;
+   nir_intrinsic_instr *new_intrinsic;
+   nir_src *target_src;

-	b->cursor = nir_before_instr(&intrinsic->instr);
+   b->cursor = nir_before_instr(&intrinsic->instr);

-	/* 'offset_src_idx' holds the index of the source that represent the offset. */
-	new_intrinsic =
-		nir_intrinsic_instr_create(b->shader, ir3_ssbo_opcode);
+   /* 'offset_src_idx' holds the index of the source that represent the offset. */
+   new_intrinsic = nir_intrinsic_instr_create(b->shader, ir3_ssbo_opcode);

-	debug_assert(intrinsic->src[offset_src_idx].is_ssa);
-	nir_ssa_def *offset = intrinsic->src[offset_src_idx].ssa;
+   debug_assert(intrinsic->src[offset_src_idx].is_ssa);
+   nir_ssa_def *offset = intrinsic->src[offset_src_idx].ssa;

-	/* Since we don't have value range checking, we first try to propagate
-	 * the division by 4 ('offset >> 2') into another bit-shift instruction that
-	 * possibly defines the offset. If that's the case, we emit a similar
-	 * instructions adjusting (merging) the shift value.
-	 *
-	 * Here we use the convention that shifting right is negative while shifting
-	 * left is positive. So 'x / 4' ~ 'x >> 2' or 'x << -2'.
-	 */
-	nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, offset, -shift);
+   /* Since we don't have value range checking, we first try to propagate
+    * the division by 4 ('offset >> 2') into another bit-shift instruction that
+    * possibly defines the offset. If that's the case, we emit a similar
+    * instructions adjusting (merging) the shift value.
+    *
+    * Here we use the convention that shifting right is negative while shifting
+    * left is positive. So 'x / 4' ~ 'x >> 2' or 'x << -2'.
+    */
+   nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, offset, -shift);

-	/* The new source that will hold the dword-offset is always the last
-	 * one for every intrinsic.
-	 */
-	target_src = &new_intrinsic->src[num_srcs];
-	*target_src = nir_src_for_ssa(offset);
+   /* The new source that will hold the dword-offset is always the last
+    * one for every intrinsic.
+    */
+   target_src = &new_intrinsic->src[num_srcs];
+   *target_src = nir_src_for_ssa(offset);

-	if (has_dest) {
-		debug_assert(intrinsic->dest.is_ssa);
-		nir_ssa_def *dest = &intrinsic->dest.ssa;
-		nir_ssa_dest_init(&new_intrinsic->instr, &new_intrinsic->dest,
-						  dest->num_components, dest->bit_size, NULL);
-		new_dest = &new_intrinsic->dest.ssa;
-	}
+   if (has_dest) {
+      debug_assert(intrinsic->dest.is_ssa);
+      nir_ssa_def *dest = &intrinsic->dest.ssa;
+      nir_ssa_dest_init(&new_intrinsic->instr, &new_intrinsic->dest,
+                        dest->num_components, dest->bit_size, NULL);
+      new_dest = &new_intrinsic->dest.ssa;
+   }

-	for (unsigned i = 0; i < num_srcs; i++)
-		new_intrinsic->src[i] = nir_src_for_ssa(intrinsic->src[i].ssa);
+   for (unsigned i = 0; i < num_srcs; i++)
+      new_intrinsic->src[i] = nir_src_for_ssa(intrinsic->src[i].ssa);

-	nir_intrinsic_copy_const_indices(new_intrinsic, intrinsic);
+   nir_intrinsic_copy_const_indices(new_intrinsic, intrinsic);

-	new_intrinsic->num_components = intrinsic->num_components;
+   new_intrinsic->num_components = intrinsic->num_components;

-	/* If we managed to propagate the division by 4, just use the new offset
-	 * register and don't emit the SHR.
-	 */
-	if (new_offset)
-		offset = new_offset;
-	else
-		offset = nir_ushr(b, offset, nir_imm_int(b, shift));
+   /* If we managed to propagate the division by 4, just use the new offset
+    * register and don't emit the SHR.
+    */
+   if (new_offset)
+      offset = new_offset;
+   else
+      offset = nir_ushr(b, offset, nir_imm_int(b, shift));

-	/* Insert the new intrinsic right before the old one. */
-	nir_builder_instr_insert(b, &new_intrinsic->instr);
+   /* Insert the new intrinsic right before the old one. */
+   nir_builder_instr_insert(b, &new_intrinsic->instr);

-	/* Replace the last source of the new intrinsic by the result of
-	 * the offset divided by 4.
-	 */
-	nir_instr_rewrite_src(&new_intrinsic->instr,
-						  target_src,
-						  nir_src_for_ssa(offset));
+   /* Replace the last source of the new intrinsic by the result of
+    * the offset divided by 4.
+    */
+   nir_instr_rewrite_src(&new_intrinsic->instr, target_src,
+                         nir_src_for_ssa(offset));

-	if (has_dest) {
-		/* Replace the uses of the original destination by that
-		 * of the new intrinsic.
-		 */
-		nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa,
-								 new_dest);
-	}
+   if (has_dest) {
+      /* Replace the uses of the original destination by that
+       * of the new intrinsic.
+       */
+      nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, new_dest);
+   }

-	/* Finally remove the original intrinsic. */
-	nir_instr_remove(&intrinsic->instr);
+   /* Finally remove the original intrinsic. */
+   nir_instr_remove(&intrinsic->instr);

-	return true;
+   return true;
 }

 static bool
-lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx, int gpu_id)
+lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx,
+                       int gpu_id)
 {
-	bool progress = false;
+   bool progress = false;

-	nir_foreach_instr_safe (instr, block) {
-		if (instr->type != nir_instr_type_intrinsic)
-			continue;
+   nir_foreach_instr_safe (instr, block) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;

-		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);

-		/* SSBO */
-		int ir3_intrinsic;
-		uint8_t offset_src_idx;
-		ir3_intrinsic = get_ir3_intrinsic_for_ssbo_intrinsic(intr->intrinsic,
-															 &offset_src_idx);
-		if (ir3_intrinsic != -1) {
-			progress |= lower_offset_for_ssbo(intr, b, (unsigned) ir3_intrinsic,
-											  offset_src_idx);
-		}
-	}
+      /* SSBO */
+      int ir3_intrinsic;
+      uint8_t offset_src_idx;
+      ir3_intrinsic =
+         get_ir3_intrinsic_for_ssbo_intrinsic(intr->intrinsic, &offset_src_idx);
+      if (ir3_intrinsic != -1) {
+         progress |= lower_offset_for_ssbo(intr, b, (unsigned)ir3_intrinsic,
+                                           offset_src_idx);
+      }
+   }

-	return progress;
+   return progress;
 }

 static bool
 lower_io_offsets_func(nir_function_impl *impl, int gpu_id)
 {
-	void *mem_ctx = ralloc_parent(impl);
-	nir_builder b;
-	nir_builder_init(&b, impl);
+   void *mem_ctx = ralloc_parent(impl);
+   nir_builder b;
+   nir_builder_init(&b, impl);

-	bool progress = false;
-	nir_foreach_block_safe (block, impl) {
-		progress |= lower_io_offsets_block(block, &b, mem_ctx, gpu_id);
-	}
+   bool progress = false;
+   nir_foreach_block_safe (block, impl) {
+      progress |= lower_io_offsets_block(block, &b, mem_ctx, gpu_id);
+   }

-	if (progress) {
-		nir_metadata_preserve(impl, nir_metadata_block_index |
-									nir_metadata_dominance);
-	}
+   if (progress) {
+      nir_metadata_preserve(impl,
+                            nir_metadata_block_index | nir_metadata_dominance);
+   }

-	return progress;
+   return progress;
 }

 bool
 ir3_nir_lower_io_offsets(nir_shader *shader, int gpu_id)
 {
-	bool progress = false;
+   bool progress = false;

-	nir_foreach_function (function, shader) {
-		if (function->impl)
-			progress |= lower_io_offsets_func(function->impl, gpu_id);
-	}
+   nir_foreach_function (function, shader) {
+      if (function->impl)
+         progress |= lower_io_offsets_func(function->impl, gpu_id);
+   }

-	return progress;
+   return progress;
 }
--- a/src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_offset.c
+++ b/src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_offset.c
@ -21,8 +21,8 @@
 * IN THE SOFTWARE.
 */

-#include "ir3_nir.h"
 #include "compiler/nir/nir_builder.h"
+#include "ir3_nir.h"

 /**
 * This pass lowers load_barycentric_at_offset to dsx.3d/dsy.3d and alu
@ -32,75 +32,72 @@
 static nir_ssa_def *
 load(nir_builder *b, unsigned ncomp, nir_intrinsic_op op)
 {
-	nir_intrinsic_instr *load_size = nir_intrinsic_instr_create(b->shader, op);
-	nir_ssa_dest_init(&load_size->instr, &load_size->dest, ncomp, 32, NULL);
-	nir_builder_instr_insert(b, &load_size->instr);
+   nir_intrinsic_instr *load_size = nir_intrinsic_instr_create(b->shader, op);
+   nir_ssa_dest_init(&load_size->instr, &load_size->dest, ncomp, 32, NULL);
+   nir_builder_instr_insert(b, &load_size->instr);

-	return &load_size->dest.ssa;
+   return &load_size->dest.ssa;
 }

 static nir_ssa_def *
-ir3_nir_lower_load_barycentric_at_offset_instr(nir_builder *b,
-	   nir_instr *instr, void *data)
+ir3_nir_lower_load_barycentric_at_offset_instr(nir_builder *b, nir_instr *instr,
+                                               void *data)
 {
-	nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);

 #define chan(var, c) nir_channel(b, var, c)

-	nir_ssa_def *off = intr->src[0].ssa;
-	nir_ssa_def *ij = load(b, 2, nir_intrinsic_load_barycentric_pixel);
-	nir_ssa_def *s  = load(b, 1, nir_intrinsic_load_size_ir3);
+   nir_ssa_def *off = intr->src[0].ssa;
+   nir_ssa_def *ij = load(b, 2, nir_intrinsic_load_barycentric_pixel);
+   nir_ssa_def *s = load(b, 1, nir_intrinsic_load_size_ir3);

-	s = nir_frcp(b, s);
+   s = nir_frcp(b, s);

-	/* scaled ij with s as 3rd component: */
-	nir_ssa_def *sij = nir_vec3(b,
-			nir_fmul(b, chan(ij, 0), s),
-			nir_fmul(b, chan(ij, 1), s),
-			s);
+   /* scaled ij with s as 3rd component: */
+   nir_ssa_def *sij =
+      nir_vec3(b, nir_fmul(b, chan(ij, 0), s), nir_fmul(b, chan(ij, 1), s), s);

-	nir_ssa_def *foo = nir_fddx(b, sij);
-	nir_ssa_def *bar = nir_fddy(b, sij);
+   nir_ssa_def *foo = nir_fddx(b, sij);
+   nir_ssa_def *bar = nir_fddy(b, sij);

-	if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
-		b->shader->info.fs.needs_quad_helper_invocations = true;
+   if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
+      b->shader->info.fs.needs_quad_helper_invocations = true;

-	nir_ssa_def *x, *y, *z, *i, *j;
+   nir_ssa_def *x, *y, *z, *i, *j;

-	x = nir_ffma(b, chan(off, 0), chan(foo, 0), chan(sij, 0));
-	y = nir_ffma(b, chan(off, 0), chan(foo, 1), chan(sij, 1));
-	z = nir_ffma(b, chan(off, 0), chan(foo, 2), chan(sij, 2));
+   x = nir_ffma(b, chan(off, 0), chan(foo, 0), chan(sij, 0));
+   y = nir_ffma(b, chan(off, 0), chan(foo, 1), chan(sij, 1));
+   z = nir_ffma(b, chan(off, 0), chan(foo, 2), chan(sij, 2));

-	x = nir_ffma(b, chan(off, 1), chan(bar, 0), x);
-	y = nir_ffma(b, chan(off, 1), chan(bar, 1), y);
-	z = nir_ffma(b, chan(off, 1), chan(bar, 2), z);
+   x = nir_ffma(b, chan(off, 1), chan(bar, 0), x);
+   y = nir_ffma(b, chan(off, 1), chan(bar, 1), y);
+   z = nir_ffma(b, chan(off, 1), chan(bar, 2), z);

-	/* convert back into primitive space: */
-	z = nir_frcp(b, z);
-	i = nir_fmul(b, z, x);
-	j = nir_fmul(b, z, y);
+   /* convert back into primitive space: */
+   z = nir_frcp(b, z);
+   i = nir_fmul(b, z, x);
+   j = nir_fmul(b, z, y);

-	ij = nir_vec2(b, i, j);
+   ij = nir_vec2(b, i, j);

-	return ij;
+   return ij;
 }

 static bool
 ir3_nir_lower_load_barycentric_at_offset_filter(const nir_instr *instr,
-		const void *data)
+                                                const void *data)
 {
-	return (instr->type == nir_instr_type_intrinsic &&
-			nir_instr_as_intrinsic(instr)->intrinsic ==
-			nir_intrinsic_load_barycentric_at_offset);
+   return (instr->type == nir_instr_type_intrinsic &&
+           nir_instr_as_intrinsic(instr)->intrinsic ==
+              nir_intrinsic_load_barycentric_at_offset);
 }

 bool
 ir3_nir_lower_load_barycentric_at_offset(nir_shader *shader)
 {
-	debug_assert(shader->info.stage == MESA_SHADER_FRAGMENT);
+   debug_assert(shader->info.stage == MESA_SHADER_FRAGMENT);

-	return nir_shader_lower_instructions(shader,
-			ir3_nir_lower_load_barycentric_at_offset_filter,
-			ir3_nir_lower_load_barycentric_at_offset_instr,
-			NULL);
+   return nir_shader_lower_instructions(
+      shader, ir3_nir_lower_load_barycentric_at_offset_filter,
+      ir3_nir_lower_load_barycentric_at_offset_instr, NULL);
 }
--- a/src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_sample.c
+++ b/src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_sample.c
@ -21,8 +21,8 @@
 * IN THE SOFTWARE.
 */

-#include "ir3_nir.h"
 #include "compiler/nir/nir_builder.h"
+#include "ir3_nir.h"

 /**
 * This pass lowers load_barycentric_at_sample to load_sample_pos_from_id
@ -35,61 +35,60 @@
 static nir_ssa_def *
 load_sample_pos(nir_builder *b, nir_ssa_def *samp_id)
 {
-	return nir_load_sample_pos_from_id(b, 32, samp_id);
+   return nir_load_sample_pos_from_id(b, 32, samp_id);
 }

 static nir_ssa_def *
 lower_load_barycentric_at_sample(nir_builder *b, nir_intrinsic_instr *intr)
 {
-	nir_ssa_def *pos = load_sample_pos(b, intr->src[0].ssa);
+   nir_ssa_def *pos = load_sample_pos(b, intr->src[0].ssa);

-	return nir_load_barycentric_at_offset(b, 32, pos);
+   return nir_load_barycentric_at_offset(b, 32, pos);
 }

 static nir_ssa_def *
 lower_load_sample_pos(nir_builder *b, nir_intrinsic_instr *intr)
 {
-	nir_ssa_def *pos = load_sample_pos(b, nir_load_sample_id(b));
+   nir_ssa_def *pos = load_sample_pos(b, nir_load_sample_id(b));

-	/* Note that gl_SamplePosition is offset by +vec2(0.5, 0.5) vs the
-	 * offset passed to interpolateAtOffset().   See
-	 * dEQP-GLES31.functional.shaders.multisample_interpolation.interpolate_at_offset.at_sample_position.default_framebuffer
-	 * for example.
-	 */
-	nir_ssa_def *half = nir_imm_float(b, 0.5);
-	return nir_fadd(b, pos, nir_vec2(b, half, half));
+   /* Note that gl_SamplePosition is offset by +vec2(0.5, 0.5) vs the
+    * offset passed to interpolateAtOffset().   See
+    * dEQP-GLES31.functional.shaders.multisample_interpolation.interpolate_at_offset.at_sample_position.default_framebuffer
+    * for example.
+    */
+   nir_ssa_def *half = nir_imm_float(b, 0.5);
+   return nir_fadd(b, pos, nir_vec2(b, half, half));
 }

 static nir_ssa_def *
-ir3_nir_lower_load_barycentric_at_sample_instr(nir_builder *b,
-		nir_instr *instr, void *data)
+ir3_nir_lower_load_barycentric_at_sample_instr(nir_builder *b, nir_instr *instr,
+                                               void *data)
 {
-	nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);

-	if (intr->intrinsic == nir_intrinsic_load_sample_pos)
-		return lower_load_sample_pos(b, intr);
-	else
-		return lower_load_barycentric_at_sample(b, intr);
+   if (intr->intrinsic == nir_intrinsic_load_sample_pos)
+      return lower_load_sample_pos(b, intr);
+   else
+      return lower_load_barycentric_at_sample(b, intr);
 }

 static bool
 ir3_nir_lower_load_barycentric_at_sample_filter(const nir_instr *instr,
-		const void *data)
+                                                const void *data)
 {
-	if (instr->type != nir_instr_type_intrinsic)
-		return false;
-	nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-	return (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample ||
-			intr->intrinsic == nir_intrinsic_load_sample_pos);
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   return (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample ||
+           intr->intrinsic == nir_intrinsic_load_sample_pos);
 }

 bool
 ir3_nir_lower_load_barycentric_at_sample(nir_shader *shader)
 {
-	debug_assert(shader->info.stage == MESA_SHADER_FRAGMENT);
+   debug_assert(shader->info.stage == MESA_SHADER_FRAGMENT);

-	return nir_shader_lower_instructions(shader,
-			ir3_nir_lower_load_barycentric_at_sample_filter,
-			ir3_nir_lower_load_barycentric_at_sample_instr,
-			NULL);
+   return nir_shader_lower_instructions(
+      shader, ir3_nir_lower_load_barycentric_at_sample_filter,
+      ir3_nir_lower_load_barycentric_at_sample_instr, NULL);
 }
--- a/src/freedreno/ir3/ir3_nir_lower_tess.c
+++ b/src/freedreno/ir3/ir3_nir_lower_tess.c
--- a/src/freedreno/ir3/ir3_nir_lower_tex_prefetch.c
+++ b/src/freedreno/ir3/ir3_nir_lower_tex_prefetch.c
@ -31,97 +31,97 @@
 static int
 coord_offset(nir_ssa_def *ssa)
 {
-	nir_instr *parent_instr = ssa->parent_instr;
+   nir_instr *parent_instr = ssa->parent_instr;

-	/* The coordinate of a texture sampling instruction eligible for
-	 * pre-fetch is either going to be a load_interpolated_input/
-	 * load_input, or a vec2 assembling non-swizzled components of
-	 * a load_interpolated_input/load_input (due to varying packing)
-	 */
+   /* The coordinate of a texture sampling instruction eligible for
+    * pre-fetch is either going to be a load_interpolated_input/
+    * load_input, or a vec2 assembling non-swizzled components of
+    * a load_interpolated_input/load_input (due to varying packing)
+    */

-	if (parent_instr->type == nir_instr_type_alu) {
-		nir_alu_instr *alu = nir_instr_as_alu(parent_instr);
+   if (parent_instr->type == nir_instr_type_alu) {
+      nir_alu_instr *alu = nir_instr_as_alu(parent_instr);

-		if (alu->op != nir_op_vec2)
-			return -1;
+      if (alu->op != nir_op_vec2)
+         return -1;

-		if (!alu->src[0].src.is_ssa)
-			return -1;
+      if (!alu->src[0].src.is_ssa)
+         return -1;

-		int base_offset = coord_offset(alu->src[0].src.ssa) +
-				alu->src[0].swizzle[0];
+      int base_offset =
+         coord_offset(alu->src[0].src.ssa) + alu->src[0].swizzle[0];

-		/* NOTE it might be possible to support more than 2D? */
-		for (int i = 1; i < 2; i++) {
-			if (!alu->src[i].src.is_ssa)
-				return -1;
+      /* NOTE it might be possible to support more than 2D? */
+      for (int i = 1; i < 2; i++) {
+         if (!alu->src[i].src.is_ssa)
+            return -1;

-			int nth_offset = coord_offset(alu->src[i].src.ssa) +
-					alu->src[i].swizzle[0];
+         int nth_offset =
+            coord_offset(alu->src[i].src.ssa) + alu->src[i].swizzle[0];

-			if (nth_offset != (base_offset + i))
-				return -1;
-		}
+         if (nth_offset != (base_offset + i))
+            return -1;
+      }

-		return base_offset;
-	}
+      return base_offset;
+   }

-	if (parent_instr->type != nir_instr_type_intrinsic)
-		return -1;
+   if (parent_instr->type != nir_instr_type_intrinsic)
+      return -1;

-	nir_intrinsic_instr *input = nir_instr_as_intrinsic(parent_instr);
+   nir_intrinsic_instr *input = nir_instr_as_intrinsic(parent_instr);

-	if (input->intrinsic != nir_intrinsic_load_interpolated_input)
-		return -1;
+   if (input->intrinsic != nir_intrinsic_load_interpolated_input)
+      return -1;

-	/* limit to load_barycentric_pixel, other interpolation modes don't seem
-	 * to be supported:
-	 */
-	if (!input->src[0].is_ssa)
-		return -1;
+   /* limit to load_barycentric_pixel, other interpolation modes don't seem
+    * to be supported:
+    */
+   if (!input->src[0].is_ssa)
+      return -1;

-	nir_intrinsic_instr *interp =
-		nir_instr_as_intrinsic(input->src[0].ssa->parent_instr);
+   nir_intrinsic_instr *interp =
+      nir_instr_as_intrinsic(input->src[0].ssa->parent_instr);

-	if (interp->intrinsic != nir_intrinsic_load_barycentric_pixel)
-		return -1;
+   if (interp->intrinsic != nir_intrinsic_load_barycentric_pixel)
+      return -1;

-	/* we also need a const input offset: */
-	if (!nir_src_is_const(input->src[1]))
-		return -1;
+   /* we also need a const input offset: */
+   if (!nir_src_is_const(input->src[1]))
+      return -1;

-	unsigned base = nir_src_as_uint(input->src[1]) + nir_intrinsic_base(input);
-	unsigned comp = nir_intrinsic_component(input);
+   unsigned base = nir_src_as_uint(input->src[1]) + nir_intrinsic_base(input);
+   unsigned comp = nir_intrinsic_component(input);

-	return (4 * base) + comp;
+   return (4 * base) + comp;
 }

 int
 ir3_nir_coord_offset(nir_ssa_def *ssa)
 {

-	assert (ssa->num_components == 2);
-	return coord_offset(ssa);
+   assert(ssa->num_components == 2);
+   return coord_offset(ssa);
 }

 static bool
 has_src(nir_tex_instr *tex, nir_tex_src_type type)
 {
-	return nir_tex_instr_src_index(tex, type) >= 0;
+   return nir_tex_instr_src_index(tex, type) >= 0;
 }

 static bool
 ok_bindless_src(nir_tex_instr *tex, nir_tex_src_type type)
 {
-	int idx = nir_tex_instr_src_index(tex, type);
-	assert(idx >= 0);
-	nir_intrinsic_instr *bindless = ir3_bindless_resource(tex->src[idx].src);
+   int idx = nir_tex_instr_src_index(tex, type);
+   assert(idx >= 0);
+   nir_intrinsic_instr *bindless = ir3_bindless_resource(tex->src[idx].src);

-	/* TODO from SP_FS_BINDLESS_PREFETCH[n] it looks like this limit should
-	 * be 1<<8 ?
-	 */
-	return nir_src_is_const(bindless->src[0]) &&
-			(nir_src_as_uint(bindless->src[0]) < (1 << 16));
+   /* TODO from SP_FS_BINDLESS_PREFETCH[n] it looks like this limit should
+    * be 1<<8 ?
+    */
+   return nir_src_is_const(bindless->src[0]) &&
+          (nir_src_as_uint(bindless->src[0]) < (1 << 16));
 }

 /**
@ -134,107 +134,103 @@ ok_bindless_src(nir_tex_instr *tex, nir_tex_src_type type)
 static bool
 ok_tex_samp(nir_tex_instr *tex)
 {
-	if (has_src(tex, nir_tex_src_texture_handle)) {
-		/* bindless case: */
+   if (has_src(tex, nir_tex_src_texture_handle)) {
+      /* bindless case: */

-		assert(has_src(tex, nir_tex_src_sampler_handle));
+      assert(has_src(tex, nir_tex_src_sampler_handle));

-		return ok_bindless_src(tex, nir_tex_src_texture_handle) &&
-				ok_bindless_src(tex, nir_tex_src_sampler_handle);
-	} else {
-		assert(!has_src(tex, nir_tex_src_texture_offset));
-		assert(!has_src(tex, nir_tex_src_sampler_offset));
+      return ok_bindless_src(tex, nir_tex_src_texture_handle) &&
+             ok_bindless_src(tex, nir_tex_src_sampler_handle);
+   } else {
+      assert(!has_src(tex, nir_tex_src_texture_offset));
+      assert(!has_src(tex, nir_tex_src_sampler_offset));

-		return (tex->texture_index <= 0x1f) &&
-				(tex->sampler_index <= 0xf);
-	}
+      return (tex->texture_index <= 0x1f) && (tex->sampler_index <= 0xf);
+   }
 }

 static bool
 lower_tex_prefetch_block(nir_block *block)
 {
-	bool progress = false;
+   bool progress = false;

-	nir_foreach_instr_safe (instr, block) {
-		if (instr->type != nir_instr_type_tex)
-			continue;
+   nir_foreach_instr_safe (instr, block) {
+      if (instr->type != nir_instr_type_tex)
+         continue;

-		nir_tex_instr *tex = nir_instr_as_tex(instr);
-		if (tex->op != nir_texop_tex)
-			continue;
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      if (tex->op != nir_texop_tex)
+         continue;

-		if (has_src(tex, nir_tex_src_bias) ||
-				has_src(tex, nir_tex_src_lod) ||
-				has_src(tex, nir_tex_src_comparator) ||
-				has_src(tex, nir_tex_src_projector) ||
-				has_src(tex, nir_tex_src_offset) ||
-				has_src(tex, nir_tex_src_ddx) ||
-				has_src(tex, nir_tex_src_ddy) ||
-				has_src(tex, nir_tex_src_ms_index) ||
-				has_src(tex, nir_tex_src_texture_offset) ||
-				has_src(tex, nir_tex_src_sampler_offset))
-			continue;
+      if (has_src(tex, nir_tex_src_bias) || has_src(tex, nir_tex_src_lod) ||
+          has_src(tex, nir_tex_src_comparator) ||
+          has_src(tex, nir_tex_src_projector) ||
+          has_src(tex, nir_tex_src_offset) || has_src(tex, nir_tex_src_ddx) ||
+          has_src(tex, nir_tex_src_ddy) || has_src(tex, nir_tex_src_ms_index) ||
+          has_src(tex, nir_tex_src_texture_offset) ||
+          has_src(tex, nir_tex_src_sampler_offset))
+         continue;

-		/* only prefetch for simple 2d tex fetch case */
-		if (tex->sampler_dim != GLSL_SAMPLER_DIM_2D || tex->is_array)
-			continue;
+      /* only prefetch for simple 2d tex fetch case */
+      if (tex->sampler_dim != GLSL_SAMPLER_DIM_2D || tex->is_array)
+         continue;

-		if (!ok_tex_samp(tex))
-			continue;
+      if (!ok_tex_samp(tex))
+         continue;

-		int idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
-		/* First source should be the sampling coordinate. */
-		nir_tex_src *coord = &tex->src[idx];
-		debug_assert(coord->src.is_ssa);
+      int idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+      /* First source should be the sampling coordinate. */
+      nir_tex_src *coord = &tex->src[idx];
+      debug_assert(coord->src.is_ssa);

-		if (ir3_nir_coord_offset(coord->src.ssa) >= 0) {
-			tex->op = nir_texop_tex_prefetch;
+      if (ir3_nir_coord_offset(coord->src.ssa) >= 0) {
+         tex->op = nir_texop_tex_prefetch;

-			progress |= true;
-		}
-	}
+         progress |= true;
+      }
+   }

-	return progress;
+   return progress;
 }

 static bool
 lower_tex_prefetch_func(nir_function_impl *impl)
 {
-	/* Only instructions in the the outer-most block are considered
-	 * eligible for pre-dispatch, because they need to be move-able
-	 * to the beginning of the shader to avoid locking down the
-	 * register holding the pre-fetched result for too long.
-	 */
-	nir_block *block = nir_start_block(impl);
-	if (!block)
-		return false;
+   /* Only instructions in the the outer-most block are considered
+    * eligible for pre-dispatch, because they need to be move-able
+    * to the beginning of the shader to avoid locking down the
+    * register holding the pre-fetched result for too long.
+    */
+   nir_block *block = nir_start_block(impl);
+   if (!block)
+      return false;

-	bool progress = lower_tex_prefetch_block(block);
+   bool progress = lower_tex_prefetch_block(block);

-	if (progress) {
-		nir_metadata_preserve(impl, nir_metadata_block_index |
-				nir_metadata_dominance);
-	}
+   if (progress) {
+      nir_metadata_preserve(impl,
+                            nir_metadata_block_index | nir_metadata_dominance);
+   }

-	return progress;
+   return progress;
 }

 bool
 ir3_nir_lower_tex_prefetch(nir_shader *shader)
 {
-	bool progress = false;
+   bool progress = false;

-	assert(shader->info.stage == MESA_SHADER_FRAGMENT);
+   assert(shader->info.stage == MESA_SHADER_FRAGMENT);

-	nir_foreach_function (function, shader) {
-		/* Only texture sampling instructions inside the main function
-		 * are eligible for pre-dispatch.
-		 */
-		if (!function->impl || !function->is_entrypoint)
-			continue;
+   nir_foreach_function (function, shader) {
+      /* Only texture sampling instructions inside the main function
+       * are eligible for pre-dispatch.
+       */
+      if (!function->impl || !function->is_entrypoint)
+         continue;

-		progress |= lower_tex_prefetch_func(function->impl);
-	}
+      progress |= lower_tex_prefetch_func(function->impl);
+   }

-	return progress;
+   return progress;
 }
--- a/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c
+++ b/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c
@ -21,8 +21,8 @@
 * IN THE SOFTWARE.
 */

-#include "ir3_nir.h"
 #include "compiler/nir/nir_builder.h"
+#include "ir3_nir.h"

 /* A4XX has a broken GATHER4 operation. It performs the texture swizzle on the
 * gather results, rather than before. As a result, it must be emulated with
@ -32,70 +32,68 @@
 static nir_ssa_def *
 ir3_nir_lower_tg4_to_tex_instr(nir_builder *b, nir_instr *instr, void *data)
 {
-	nir_tex_instr *tg4 = nir_instr_as_tex(instr);
-	static const int offsets[3][2] = { {0, 1}, {1, 1}, {1, 0} };
+   nir_tex_instr *tg4 = nir_instr_as_tex(instr);
+   static const int offsets[3][2] = {{0, 1}, {1, 1}, {1, 0}};

-	nir_ssa_def *results[4];
-	int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset);
-	for (int i = 0; i < 4; i++) {
-		int num_srcs = tg4->num_srcs + 1 /* lod */;
-		if (offset_index < 0 && i < 3)
-			num_srcs++;
+   nir_ssa_def *results[4];
+   int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset);
+   for (int i = 0; i < 4; i++) {
+      int num_srcs = tg4->num_srcs + 1 /* lod */;
+      if (offset_index < 0 && i < 3)
+         num_srcs++;

-		nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs);
-		tex->op = nir_texop_txl;
-		tex->sampler_dim = tg4->sampler_dim;
-		tex->coord_components = tg4->coord_components;
-		tex->is_array = tg4->is_array;
-		tex->is_shadow = tg4->is_shadow;
-		tex->is_new_style_shadow = tg4->is_new_style_shadow;
-		tex->texture_index = tg4->texture_index;
-		tex->sampler_index = tg4->sampler_index;
-		tex->dest_type = tg4->dest_type;
+      nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs);
+      tex->op = nir_texop_txl;
+      tex->sampler_dim = tg4->sampler_dim;
+      tex->coord_components = tg4->coord_components;
+      tex->is_array = tg4->is_array;
+      tex->is_shadow = tg4->is_shadow;
+      tex->is_new_style_shadow = tg4->is_new_style_shadow;
+      tex->texture_index = tg4->texture_index;
+      tex->sampler_index = tg4->sampler_index;
+      tex->dest_type = tg4->dest_type;

-		for (int j = 0; j < tg4->num_srcs; j++) {
-			nir_src_copy(&tex->src[j].src, &tg4->src[j].src, tex);
-			tex->src[j].src_type = tg4->src[j].src_type;
-		}
-		if (i != 3) {
-			nir_ssa_def *offset =
-				nir_vec2(b, nir_imm_int(b, offsets[i][0]),
-						nir_imm_int(b, offsets[i][1]));
-			if (offset_index < 0) {
-				tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset);
-				tex->src[tg4->num_srcs].src_type = nir_tex_src_offset;
-			} else {
-				assert(nir_tex_instr_src_size(tex, offset_index) == 2);
-				nir_ssa_def *orig = nir_ssa_for_src(
-					b, tex->src[offset_index].src, 2);
-				tex->src[offset_index].src =
-					nir_src_for_ssa(nir_iadd(b, orig, offset));
-			}
-		}
-		tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0));
-		tex->src[num_srcs - 1].src_type = nir_tex_src_lod;
+      for (int j = 0; j < tg4->num_srcs; j++) {
+         nir_src_copy(&tex->src[j].src, &tg4->src[j].src, tex);
+         tex->src[j].src_type = tg4->src[j].src_type;
+      }
+      if (i != 3) {
+         nir_ssa_def *offset = nir_vec2(b, nir_imm_int(b, offsets[i][0]),
+                                        nir_imm_int(b, offsets[i][1]));
+         if (offset_index < 0) {
+            tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset);
+            tex->src[tg4->num_srcs].src_type = nir_tex_src_offset;
+         } else {
+            assert(nir_tex_instr_src_size(tex, offset_index) == 2);
+            nir_ssa_def *orig =
+               nir_ssa_for_src(b, tex->src[offset_index].src, 2);
+            tex->src[offset_index].src =
+               nir_src_for_ssa(nir_iadd(b, orig, offset));
+         }
+      }
+      tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0));
+      tex->src[num_srcs - 1].src_type = nir_tex_src_lod;

-		nir_ssa_dest_init(&tex->instr, &tex->dest,
-				nir_tex_instr_dest_size(tex), 32, NULL);
-		nir_builder_instr_insert(b, &tex->instr);
+      nir_ssa_dest_init(&tex->instr, &tex->dest, nir_tex_instr_dest_size(tex),
+                        32, NULL);
+      nir_builder_instr_insert(b, &tex->instr);

-		results[i] = nir_channel(b, &tex->dest.ssa, tg4->component);
-	}
+      results[i] = nir_channel(b, &tex->dest.ssa, tg4->component);
+   }

-	return nir_vec(b, results, 4);
+   return nir_vec(b, results, 4);
 }

 static bool
 ir3_nir_lower_tg4_to_tex_filter(const nir_instr *instr, const void *data)
 {
-	return (instr->type == nir_instr_type_tex &&
-			nir_instr_as_tex(instr)->op == nir_texop_tg4);
+   return (instr->type == nir_instr_type_tex &&
+           nir_instr_as_tex(instr)->op == nir_texop_tg4);
 }

 bool
 ir3_nir_lower_tg4_to_tex(nir_shader *shader)
 {
-	return nir_shader_lower_instructions(shader,
-			ir3_nir_lower_tg4_to_tex_filter,
-			ir3_nir_lower_tg4_to_tex_instr, NULL);
+   return nir_shader_lower_instructions(shader, ir3_nir_lower_tg4_to_tex_filter,
+                                        ir3_nir_lower_tg4_to_tex_instr, NULL);
 }
--- a/src/freedreno/ir3/ir3_nir_move_varying_inputs.c
+++ b/src/freedreno/ir3/ir3_nir_move_varying_inputs.c
@ -21,8 +21,8 @@
 * IN THE SOFTWARE.
 */

-#include "ir3_nir.h"
 #include "compiler/nir/nir_builder.h"
+#include "ir3_nir.h"

 /**
 * This pass moves varying fetches (and the instructions they depend on
@ -46,25 +46,23 @@
 */

 typedef struct {
-	nir_block *start_block;
-	bool precondition_failed;
+   nir_block *start_block;
+   bool precondition_failed;
 } precond_state;

 typedef struct {
-	nir_shader *shader;
-	nir_block *start_block;
+   nir_shader *shader;
+   nir_block *start_block;
 } state;

-
-
 static void check_precondition_instr(precond_state *state, nir_instr *instr);
 static void move_instruction_to_start_block(state *state, nir_instr *instr);

 static bool
 check_precondition_src(nir_src *src, void *state)
 {
-	check_precondition_instr(state, src->ssa->parent_instr);
-	return true;
+   check_precondition_instr(state, src->ssa->parent_instr);
+   return true;
 }

 /* Recursively check if there is even a single dependency which
@ -73,163 +71,163 @@ check_precondition_src(nir_src *src, void *state)
 static void
 check_precondition_instr(precond_state *state, nir_instr *instr)
 {
-	if (instr->block == state->start_block)
-		return;
+   if (instr->block == state->start_block)
+      return;

-	switch (instr->type) {
-		case nir_instr_type_alu:
-		case nir_instr_type_deref:
-		case nir_instr_type_load_const:
-		case nir_instr_type_ssa_undef:
-			/* These could be safely moved around */
-			break;
-		case nir_instr_type_intrinsic: {
-			nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-			if (!nir_intrinsic_can_reorder(intr)) {
-				state->precondition_failed = true;
-				return;
-			}
-			break;
-		}
-		default:
-			state->precondition_failed = true;
-			return;
-	}
+   switch (instr->type) {
+   case nir_instr_type_alu:
+   case nir_instr_type_deref:
+   case nir_instr_type_load_const:
+   case nir_instr_type_ssa_undef:
+      /* These could be safely moved around */
+      break;
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+      if (!nir_intrinsic_can_reorder(intr)) {
+         state->precondition_failed = true;
+         return;
+      }
+      break;
+   }
+   default:
+      state->precondition_failed = true;
+      return;
+   }

-	nir_foreach_src(instr, check_precondition_src, state);
+   nir_foreach_src(instr, check_precondition_src, state);
 }

 static void
 check_precondition_block(precond_state *state, nir_block *block)
 {
-	nir_foreach_instr_safe (instr, block) {
-		if (instr->type != nir_instr_type_intrinsic)
-			continue;
+   nir_foreach_instr_safe (instr, block) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;

-		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);

-		switch (intr->intrinsic) {
-		case nir_intrinsic_load_interpolated_input:
-		case nir_intrinsic_load_input:
-			break;
-		default:
-			continue;
-		}
+      switch (intr->intrinsic) {
+      case nir_intrinsic_load_interpolated_input:
+      case nir_intrinsic_load_input:
+         break;
+      default:
+         continue;
+      }

-		check_precondition_instr(state, instr);
+      check_precondition_instr(state, instr);

-		if (state->precondition_failed)
-			return;
-	}
+      if (state->precondition_failed)
+         return;
+   }
 }

 static bool
 move_src(nir_src *src, void *state)
 {
-	/* At this point we shouldn't have any non-ssa src: */
-	debug_assert(src->is_ssa);
-	move_instruction_to_start_block(state, src->ssa->parent_instr);
-	return true;
+   /* At this point we shouldn't have any non-ssa src: */
+   debug_assert(src->is_ssa);
+   move_instruction_to_start_block(state, src->ssa->parent_instr);
+   return true;
 }

 static void
 move_instruction_to_start_block(state *state, nir_instr *instr)
 {
-	/* nothing to do if the instruction is already in the start block */
-	if (instr->block == state->start_block)
-		return;
+   /* nothing to do if the instruction is already in the start block */
+   if (instr->block == state->start_block)
+      return;

-	/* first move (recursively) all src's to ensure they appear before
-	 * load*_input that we are trying to move:
-	 */
-	nir_foreach_src(instr, move_src, state);
+   /* first move (recursively) all src's to ensure they appear before
+    * load*_input that we are trying to move:
+    */
+   nir_foreach_src(instr, move_src, state);

-	/* and then move the instruction itself:
-	 */
-	exec_node_remove(&instr->node);
-	exec_list_push_tail(&state->start_block->instr_list, &instr->node);
-	instr->block = state->start_block;
+   /* and then move the instruction itself:
+    */
+   exec_node_remove(&instr->node);
+   exec_list_push_tail(&state->start_block->instr_list, &instr->node);
+   instr->block = state->start_block;
 }

 static bool
 move_varying_inputs_block(state *state, nir_block *block)
 {
-	bool progress = false;
+   bool progress = false;

-	nir_foreach_instr_safe (instr, block) {
-		if (instr->type != nir_instr_type_intrinsic)
-			continue;
+   nir_foreach_instr_safe (instr, block) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;

-		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);

-		switch (intr->intrinsic) {
-		case nir_intrinsic_load_interpolated_input:
-		case nir_intrinsic_load_input:
-			/* TODO any others to handle? */
-			break;
-		default:
-			continue;
-		}
+      switch (intr->intrinsic) {
+      case nir_intrinsic_load_interpolated_input:
+      case nir_intrinsic_load_input:
+         /* TODO any others to handle? */
+         break;
+      default:
+         continue;
+      }

-		debug_assert(intr->dest.is_ssa);
+      debug_assert(intr->dest.is_ssa);

-		move_instruction_to_start_block(state, instr);
+      move_instruction_to_start_block(state, instr);

-		progress = true;
-	}
+      progress = true;
+   }

-	return progress;
+   return progress;
 }

 bool
 ir3_nir_move_varying_inputs(nir_shader *shader)
 {
-	bool progress = false;
+   bool progress = false;

-	debug_assert(shader->info.stage == MESA_SHADER_FRAGMENT);
+   debug_assert(shader->info.stage == MESA_SHADER_FRAGMENT);

-	nir_foreach_function (function, shader) {
-		precond_state state;
+   nir_foreach_function (function, shader) {
+      precond_state state;

-		if (!function->impl)
-			continue;
+      if (!function->impl)
+         continue;

-		state.precondition_failed = false;
-		state.start_block = nir_start_block(function->impl);
+      state.precondition_failed = false;
+      state.start_block = nir_start_block(function->impl);

-		nir_foreach_block (block, function->impl) {
-			if (block == state.start_block)
-				continue;
+      nir_foreach_block (block, function->impl) {
+         if (block == state.start_block)
+            continue;

-			check_precondition_block(&state, block);
+         check_precondition_block(&state, block);

-			if (state.precondition_failed)
-				return false;
-		}
-	}
+         if (state.precondition_failed)
+            return false;
+      }
+   }

-	nir_foreach_function (function, shader) {
-		state state;
+   nir_foreach_function (function, shader) {
+      state state;

-		if (!function->impl)
-			continue;
+      if (!function->impl)
+         continue;

-		state.shader = shader;
-		state.start_block = nir_start_block(function->impl);
+      state.shader = shader;
+      state.start_block = nir_start_block(function->impl);

-		bool progress = false;
-		nir_foreach_block (block, function->impl) {
-			/* don't need to move anything that is already in the first block */
-			if (block == state.start_block)
-				continue;
-			progress |= move_varying_inputs_block(&state, block);
-		}
+      bool progress = false;
+      nir_foreach_block (block, function->impl) {
+         /* don't need to move anything that is already in the first block */
+         if (block == state.start_block)
+            continue;
+         progress |= move_varying_inputs_block(&state, block);
+      }

-		if (progress) {
-			nir_metadata_preserve(function->impl,
-				nir_metadata_block_index | nir_metadata_dominance);
-		}
-	}
+      if (progress) {
+         nir_metadata_preserve(
+            function->impl, nir_metadata_block_index | nir_metadata_dominance);
+      }
+   }

-	return progress;
+   return progress;
 }
--- a/src/freedreno/ir3/ir3_postsched.c
+++ b/src/freedreno/ir3/ir3_postsched.c
--- a/src/freedreno/ir3/ir3_print.c
+++ b/src/freedreno/ir3/ir3_print.c
@ -33,425 +33,452 @@
 #define PTRID(x) ((unsigned long)(x))

 /* ansi escape sequences: */
-#define RESET	"\x1b[0m"
-#define RED		"\x1b[0;31m"
-#define GREEN	"\x1b[0;32m"
-#define BLUE	"\x1b[0;34m"
-#define MAGENTA	"\x1b[0;35m"
+#define RESET   "\x1b[0m"
+#define RED     "\x1b[0;31m"
+#define GREEN   "\x1b[0;32m"
+#define BLUE    "\x1b[0;34m"
+#define MAGENTA "\x1b[0;35m"

 /* syntax coloring, mostly to make it easier to see different sorts of
 * srcs (immediate, constant, ssa, array, ...)
 */
-#define SYN_REG(x)		RED x RESET
-#define SYN_IMMED(x)	GREEN x RESET
-#define SYN_CONST(x)	GREEN x RESET
-#define SYN_SSA(x)		BLUE x RESET
-#define SYN_ARRAY(x)	MAGENTA x RESET
+#define SYN_REG(x)   RED x RESET
+#define SYN_IMMED(x) GREEN x RESET
+#define SYN_CONST(x) GREEN x RESET
+#define SYN_SSA(x)   BLUE x RESET
+#define SYN_ARRAY(x) MAGENTA x RESET

 static const char *
 type_name(type_t type)
 {
-	static const char *type_names[] = {
-			[TYPE_F16] = "f16",
-			[TYPE_F32] = "f32",
-			[TYPE_U16] = "u16",
-			[TYPE_U32] = "u32",
-			[TYPE_S16] = "s16",
-			[TYPE_S32] = "s32",
-			[TYPE_U8]  = "u8",
-			[TYPE_S8]  = "s8",
-	};
-	return type_names[type];
+   static const char *type_names[] = {
+      [TYPE_F16] = "f16", [TYPE_F32] = "f32", [TYPE_U16] = "u16",
+      [TYPE_U32] = "u32", [TYPE_S16] = "s16", [TYPE_S32] = "s32",
+      [TYPE_U8] = "u8",   [TYPE_S8] = "s8",
+   };
+   return type_names[type];
 }

-static void print_instr_name(struct log_stream *stream, struct ir3_instruction *instr, bool flags)
+static void
+print_instr_name(struct log_stream *stream, struct ir3_instruction *instr,
+                 bool flags)
 {
-	if (!instr)
-		return;
+   if (!instr)
+      return;
 #ifdef DEBUG
-	mesa_log_stream_printf(stream, "%04u:", instr->serialno);
+   mesa_log_stream_printf(stream, "%04u:", instr->serialno);
 #endif
-	mesa_log_stream_printf(stream, "%04u:", instr->name);
-	mesa_log_stream_printf(stream, "%04u:", instr->ip);
-	if (instr->flags & IR3_INSTR_UNUSED) {
-		mesa_log_stream_printf(stream, "XXX: ");
-	} else {
-		mesa_log_stream_printf(stream, "%03u: ", instr->use_count);
-	}
+   mesa_log_stream_printf(stream, "%04u:", instr->name);
+   mesa_log_stream_printf(stream, "%04u:", instr->ip);
+   if (instr->flags & IR3_INSTR_UNUSED) {
+      mesa_log_stream_printf(stream, "XXX: ");
+   } else {
+      mesa_log_stream_printf(stream, "%03u: ", instr->use_count);
+   }

-	if (flags) {
-		mesa_log_stream_printf(stream, "\t");
-		if (instr->flags & IR3_INSTR_SY)
-			mesa_log_stream_printf(stream, "(sy)");
-		if (instr->flags & IR3_INSTR_SS)
-			mesa_log_stream_printf(stream, "(ss)");
-		if (instr->flags & IR3_INSTR_JP)
-			mesa_log_stream_printf(stream, "(jp)");
-		if (instr->repeat)
-			mesa_log_stream_printf(stream, "(rpt%d)", instr->repeat);
-		if (instr->nop)
-			mesa_log_stream_printf(stream, "(nop%d)", instr->nop);
-		if (instr->flags & IR3_INSTR_UL)
-			mesa_log_stream_printf(stream, "(ul)");
-	} else {
-		mesa_log_stream_printf(stream, " ");
-	}
+   if (flags) {
+      mesa_log_stream_printf(stream, "\t");
+      if (instr->flags & IR3_INSTR_SY)
+         mesa_log_stream_printf(stream, "(sy)");
+      if (instr->flags & IR3_INSTR_SS)
+         mesa_log_stream_printf(stream, "(ss)");
+      if (instr->flags & IR3_INSTR_JP)
+         mesa_log_stream_printf(stream, "(jp)");
+      if (instr->repeat)
+         mesa_log_stream_printf(stream, "(rpt%d)", instr->repeat);
+      if (instr->nop)
+         mesa_log_stream_printf(stream, "(nop%d)", instr->nop);
+      if (instr->flags & IR3_INSTR_UL)
+         mesa_log_stream_printf(stream, "(ul)");
+   } else {
+      mesa_log_stream_printf(stream, " ");
+   }

-	if (is_meta(instr)) {
-		switch (instr->opc) {
-		case OPC_META_INPUT:  mesa_log_stream_printf(stream, "_meta:in");   break;
-		case OPC_META_SPLIT:			mesa_log_stream_printf(stream, "_meta:split");        break;
-		case OPC_META_COLLECT:			mesa_log_stream_printf(stream, "_meta:collect");      break;
-		case OPC_META_TEX_PREFETCH:		mesa_log_stream_printf(stream, "_meta:tex_prefetch"); break;
-		case OPC_META_PARALLEL_COPY:	mesa_log_stream_printf(stream, "_meta:parallel_copy"); break;
-		case OPC_META_PHI:				mesa_log_stream_printf(stream, "_meta:phi");          break;
+   if (is_meta(instr)) {
+      switch (instr->opc) {
+      case OPC_META_INPUT:
+         mesa_log_stream_printf(stream, "_meta:in");
+         break;
+      case OPC_META_SPLIT:
+         mesa_log_stream_printf(stream, "_meta:split");
+         break;
+      case OPC_META_COLLECT:
+         mesa_log_stream_printf(stream, "_meta:collect");
+         break;
+      case OPC_META_TEX_PREFETCH:
+         mesa_log_stream_printf(stream, "_meta:tex_prefetch");
+         break;
+      case OPC_META_PARALLEL_COPY:
+         mesa_log_stream_printf(stream, "_meta:parallel_copy");
+         break;
+      case OPC_META_PHI:
+         mesa_log_stream_printf(stream, "_meta:phi");
+         break;

-		/* shouldn't hit here.. just for debugging: */
-		default: mesa_log_stream_printf(stream, "_meta:%d", instr->opc);    break;
-		}
-	} else if (opc_cat(instr->opc) == 1) {
-		if (instr->opc == OPC_MOV) {
-			if (instr->cat1.src_type == instr->cat1.dst_type)
-				mesa_log_stream_printf(stream, "mov");
-			else
-				mesa_log_stream_printf(stream, "cov");
-		} else {
-			mesa_log_stream_printf(stream, "%s", disasm_a3xx_instr_name(instr->opc));
-		}
+      /* shouldn't hit here.. just for debugging: */
+      default:
+         mesa_log_stream_printf(stream, "_meta:%d", instr->opc);
+         break;
+      }
+   } else if (opc_cat(instr->opc) == 1) {
+      if (instr->opc == OPC_MOV) {
+         if (instr->cat1.src_type == instr->cat1.dst_type)
+            mesa_log_stream_printf(stream, "mov");
+         else
+            mesa_log_stream_printf(stream, "cov");
+      } else {
+         mesa_log_stream_printf(stream, "%s",
+                                disasm_a3xx_instr_name(instr->opc));
+      }

-		if (instr->opc != OPC_MOVMSK) {
-			mesa_log_stream_printf(stream, ".%s%s", type_name(instr->cat1.src_type),
-					type_name(instr->cat1.dst_type));
-		}
-	} else if (instr->opc == OPC_B) {
-		const char *name[8] = {
-			[BRANCH_PLAIN] = "br",
-			[BRANCH_OR]    = "brao",
-			[BRANCH_AND]   = "braa",
-			[BRANCH_CONST] = "brac",
-			[BRANCH_ANY]   = "bany",
-			[BRANCH_ALL]   = "ball",
-			[BRANCH_X]     = "brax",
-		};
-		mesa_log_stream_printf(stream, "%s", name[instr->cat0.brtype]);
-	} else {
-		mesa_log_stream_printf(stream, "%s", disasm_a3xx_instr_name(instr->opc));
-		if (instr->flags & IR3_INSTR_3D)
-			mesa_log_stream_printf(stream, ".3d");
-		if (instr->flags & IR3_INSTR_A)
-			mesa_log_stream_printf(stream, ".a");
-		if (instr->flags & IR3_INSTR_O)
-			mesa_log_stream_printf(stream, ".o");
-		if (instr->flags & IR3_INSTR_P)
-			mesa_log_stream_printf(stream, ".p");
-		if (instr->flags & IR3_INSTR_S)
-			mesa_log_stream_printf(stream, ".s");
-		if (instr->flags & IR3_INSTR_A1EN)
-			mesa_log_stream_printf(stream, ".a1en");
-		if (instr->opc == OPC_LDC)
-			mesa_log_stream_printf(stream, ".offset%d", instr->cat6.d);
-		if (instr->flags & IR3_INSTR_B) {
-			mesa_log_stream_printf(stream, ".base%d",
-				   is_tex(instr) ? instr->cat5.tex_base : instr->cat6.base);
-		}
-		if (instr->flags & IR3_INSTR_S2EN)
-			mesa_log_stream_printf(stream, ".s2en");
+      if (instr->opc != OPC_MOVMSK) {
+         mesa_log_stream_printf(stream, ".%s%s",
+                                type_name(instr->cat1.src_type),
+                                type_name(instr->cat1.dst_type));
+      }
+   } else if (instr->opc == OPC_B) {
+      const char *name[8] = {
+         [BRANCH_PLAIN] = "br",   [BRANCH_OR] = "brao",  [BRANCH_AND] = "braa",
+         [BRANCH_CONST] = "brac", [BRANCH_ANY] = "bany", [BRANCH_ALL] = "ball",
+         [BRANCH_X] = "brax",
+      };
+      mesa_log_stream_printf(stream, "%s", name[instr->cat0.brtype]);
+   } else {
+      mesa_log_stream_printf(stream, "%s", disasm_a3xx_instr_name(instr->opc));
+      if (instr->flags & IR3_INSTR_3D)
+         mesa_log_stream_printf(stream, ".3d");
+      if (instr->flags & IR3_INSTR_A)
+         mesa_log_stream_printf(stream, ".a");
+      if (instr->flags & IR3_INSTR_O)
+         mesa_log_stream_printf(stream, ".o");
+      if (instr->flags & IR3_INSTR_P)
+         mesa_log_stream_printf(stream, ".p");
+      if (instr->flags & IR3_INSTR_S)
+         mesa_log_stream_printf(stream, ".s");
+      if (instr->flags & IR3_INSTR_A1EN)
+         mesa_log_stream_printf(stream, ".a1en");
+      if (instr->opc == OPC_LDC)
+         mesa_log_stream_printf(stream, ".offset%d", instr->cat6.d);
+      if (instr->flags & IR3_INSTR_B) {
+         mesa_log_stream_printf(
+            stream, ".base%d",
+            is_tex(instr) ? instr->cat5.tex_base : instr->cat6.base);
+      }
+      if (instr->flags & IR3_INSTR_S2EN)
+         mesa_log_stream_printf(stream, ".s2en");

-		static const char *cond[0x7] = {
-				"lt",
-				"le",
-				"gt",
-				"ge",
-				"eq",
-				"ne",
-		};
+      static const char *cond[0x7] = {
+         "lt", "le", "gt", "ge", "eq", "ne",
+      };

-		switch (instr->opc) {
-		case OPC_CMPS_F:
-		case OPC_CMPS_U:
-		case OPC_CMPS_S:
-		case OPC_CMPV_F:
-		case OPC_CMPV_U:
-		case OPC_CMPV_S:
-			mesa_log_stream_printf(stream, ".%s", cond[instr->cat2.condition & 0x7]);
-			break;
-		default:
-			break;
-		}
-	}
+      switch (instr->opc) {
+      case OPC_CMPS_F:
+      case OPC_CMPS_U:
+      case OPC_CMPS_S:
+      case OPC_CMPV_F:
+      case OPC_CMPV_U:
+      case OPC_CMPV_S:
+         mesa_log_stream_printf(stream, ".%s",
+                                cond[instr->cat2.condition & 0x7]);
+         break;
+      default:
+         break;
+      }
+   }
 }

-static void print_ssa_def_name(struct log_stream *stream, struct ir3_register *reg)
+static void
+print_ssa_def_name(struct log_stream *stream, struct ir3_register *reg)
 {
-	mesa_log_stream_printf(stream, SYN_SSA("ssa_%u"), reg->instr->serialno);
-		if (reg->name != 0)
-			mesa_log_stream_printf(stream, ":%u", reg->name);
+   mesa_log_stream_printf(stream, SYN_SSA("ssa_%u"), reg->instr->serialno);
+   if (reg->name != 0)
+      mesa_log_stream_printf(stream, ":%u", reg->name);
 }

-static void print_ssa_name(struct log_stream *stream, struct ir3_register *reg, bool dst)
+static void
+print_ssa_name(struct log_stream *stream, struct ir3_register *reg, bool dst)
 {
-	if (!dst) {
-		if (!reg->def)
-			mesa_log_stream_printf(stream, SYN_SSA("undef"));
-		else
-			print_ssa_def_name(stream, reg->def);
-	} else {
-		print_ssa_def_name(stream, reg);
-	}
+   if (!dst) {
+      if (!reg->def)
+         mesa_log_stream_printf(stream, SYN_SSA("undef"));
+      else
+         print_ssa_def_name(stream, reg->def);
+   } else {
+      print_ssa_def_name(stream, reg);
+   }

-	if (reg->num != INVALID_REG && !(reg->flags & IR3_REG_ARRAY))
-		mesa_log_stream_printf(stream, "("SYN_REG("r%u.%c")")", reg_num(reg), "xyzw"[reg_comp(reg)]);
+   if (reg->num != INVALID_REG && !(reg->flags & IR3_REG_ARRAY))
+      mesa_log_stream_printf(stream, "(" SYN_REG("r%u.%c") ")", reg_num(reg),
+                             "xyzw"[reg_comp(reg)]);
 }

-static void print_reg_name(struct log_stream *stream, struct ir3_instruction *instr,
-						   struct ir3_register *reg, bool dest)
+static void
+print_reg_name(struct log_stream *stream, struct ir3_instruction *instr,
+               struct ir3_register *reg, bool dest)
 {
-	if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
-			(reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
-		mesa_log_stream_printf(stream, "(absneg)");
-	else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))
-		mesa_log_stream_printf(stream, "(neg)");
-	else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS))
-		mesa_log_stream_printf(stream, "(abs)");
+   if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
+       (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
+      mesa_log_stream_printf(stream, "(absneg)");
+   else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))
+      mesa_log_stream_printf(stream, "(neg)");
+   else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS))
+      mesa_log_stream_printf(stream, "(abs)");

-	if (reg->flags & IR3_REG_FIRST_KILL)
-		mesa_log_stream_printf(stream, "(kill)");
-	if (reg->flags & IR3_REG_UNUSED)
-		mesa_log_stream_printf(stream, "(unused)");
+   if (reg->flags & IR3_REG_FIRST_KILL)
+      mesa_log_stream_printf(stream, "(kill)");
+   if (reg->flags & IR3_REG_UNUSED)
+      mesa_log_stream_printf(stream, "(unused)");

-	if (reg->flags & IR3_REG_R)
-		mesa_log_stream_printf(stream, "(r)");
+   if (reg->flags & IR3_REG_R)
+      mesa_log_stream_printf(stream, "(r)");

-	/* Right now all instructions that use tied registers only have one
-	 * destination register, so we can just print (tied) as if it's a flag,
-	 * although it's more convenient for RA if it's a pointer.
-	 */
-	if (reg->tied)
-		printf("(tied)");
+   /* Right now all instructions that use tied registers only have one
+    * destination register, so we can just print (tied) as if it's a flag,
+    * although it's more convenient for RA if it's a pointer.
+    */
+   if (reg->tied)
+      printf("(tied)");

-	if (reg->flags & IR3_REG_SHARED)
-		mesa_log_stream_printf(stream, "s");
-	if (reg->flags & IR3_REG_HALF)
-		mesa_log_stream_printf(stream, "h");
+   if (reg->flags & IR3_REG_SHARED)
+      mesa_log_stream_printf(stream, "s");
+   if (reg->flags & IR3_REG_HALF)
+      mesa_log_stream_printf(stream, "h");

-	if (reg->flags & IR3_REG_IMMED) {
-		mesa_log_stream_printf(stream, SYN_IMMED("imm[%f,%d,0x%x]"), reg->fim_val, reg->iim_val, reg->iim_val);
-	} else if (reg->flags & IR3_REG_ARRAY) {
-		if (reg->flags & IR3_REG_SSA) {
-			print_ssa_name(stream, reg, dest);
-			mesa_log_stream_printf(stream, ":");
-		}
-		mesa_log_stream_printf(stream, SYN_ARRAY("arr[id=%u, offset=%d, size=%u]"), reg->array.id,
-				reg->array.offset, reg->size);
-		if (reg->array.base != INVALID_REG)
-			mesa_log_stream_printf(stream, "("SYN_REG("r%u.%c")")", reg->array.base >> 2,
-				   "xyzw"[reg->array.base & 0x3]);
-	} else if (reg->flags & IR3_REG_SSA) {
-		print_ssa_name(stream, reg, dest);
-	} else if (reg->flags & IR3_REG_RELATIV) {
-		if (reg->flags & IR3_REG_CONST)
-			mesa_log_stream_printf(stream, SYN_CONST("c<a0.x + %d>"), reg->array.offset);
-		else
-			mesa_log_stream_printf(stream, SYN_REG("r<a0.x + %d>")" (%u)", reg->array.offset, reg->size);
-	} else {
-		if (reg->flags & IR3_REG_CONST)
-			mesa_log_stream_printf(stream, SYN_CONST("c%u.%c"), reg_num(reg), "xyzw"[reg_comp(reg)]);
-		else
-			mesa_log_stream_printf(stream, SYN_REG("r%u.%c"), reg_num(reg), "xyzw"[reg_comp(reg)]);
-	}
+   if (reg->flags & IR3_REG_IMMED) {
+      mesa_log_stream_printf(stream, SYN_IMMED("imm[%f,%d,0x%x]"), reg->fim_val,
+                             reg->iim_val, reg->iim_val);
+   } else if (reg->flags & IR3_REG_ARRAY) {
+      if (reg->flags & IR3_REG_SSA) {
+         print_ssa_name(stream, reg, dest);
+         mesa_log_stream_printf(stream, ":");
+      }
+      mesa_log_stream_printf(stream,
+                             SYN_ARRAY("arr[id=%u, offset=%d, size=%u]"),
+                             reg->array.id, reg->array.offset, reg->size);
+      if (reg->array.base != INVALID_REG)
+         mesa_log_stream_printf(stream, "(" SYN_REG("r%u.%c") ")",
+                                reg->array.base >> 2,
+                                "xyzw"[reg->array.base & 0x3]);
+   } else if (reg->flags & IR3_REG_SSA) {
+      print_ssa_name(stream, reg, dest);
+   } else if (reg->flags & IR3_REG_RELATIV) {
+      if (reg->flags & IR3_REG_CONST)
+         mesa_log_stream_printf(stream, SYN_CONST("c<a0.x + %d>"),
+                                reg->array.offset);
+      else
+         mesa_log_stream_printf(stream, SYN_REG("r<a0.x + %d>") " (%u)",
+                                reg->array.offset, reg->size);
+   } else {
+      if (reg->flags & IR3_REG_CONST)
+         mesa_log_stream_printf(stream, SYN_CONST("c%u.%c"), reg_num(reg),
+                                "xyzw"[reg_comp(reg)]);
+      else
+         mesa_log_stream_printf(stream, SYN_REG("r%u.%c"), reg_num(reg),
+                                "xyzw"[reg_comp(reg)]);
+   }

-	if (reg->wrmask > 0x1)
-		mesa_log_stream_printf(stream, " (wrmask=0x%x)", reg->wrmask);
+   if (reg->wrmask > 0x1)
+      mesa_log_stream_printf(stream, " (wrmask=0x%x)", reg->wrmask);
 }

 static void
 tab(struct log_stream *stream, int lvl)
 {
-	for (int i = 0; i < lvl; i++)
-		mesa_log_stream_printf(stream, "\t");
+   for (int i = 0; i < lvl; i++)
+      mesa_log_stream_printf(stream, "\t");
 }

 static void
 print_instr(struct log_stream *stream, struct ir3_instruction *instr, int lvl)
 {
-	tab(stream, lvl);
+   tab(stream, lvl);

-	print_instr_name(stream, instr, true);
+   print_instr_name(stream, instr, true);

-	if (is_tex(instr)) {
-		mesa_log_stream_printf(stream, " (%s)(", type_name(instr->cat5.type));
-		for (unsigned i = 0; i < 4; i++)
-			if (instr->dsts[0]->wrmask & (1 << i))
-				mesa_log_stream_printf(stream, "%c", "xyzw"[i]);
-		mesa_log_stream_printf(stream, ")");
-	} else if ((instr->srcs_count > 0 || instr->dsts_count > 0) && (instr->opc != OPC_B)) {
-		/* NOTE the b(ranch) instruction has a suffix, which is
-		 * handled below
-		 */
-		mesa_log_stream_printf(stream, " ");
-	}
+   if (is_tex(instr)) {
+      mesa_log_stream_printf(stream, " (%s)(", type_name(instr->cat5.type));
+      for (unsigned i = 0; i < 4; i++)
+         if (instr->dsts[0]->wrmask & (1 << i))
+            mesa_log_stream_printf(stream, "%c", "xyzw"[i]);
+      mesa_log_stream_printf(stream, ")");
+   } else if ((instr->srcs_count > 0 || instr->dsts_count > 0) &&
+              (instr->opc != OPC_B)) {
+      /* NOTE the b(ranch) instruction has a suffix, which is
+       * handled below
+       */
+      mesa_log_stream_printf(stream, " ");
+   }

-	if (!is_flow(instr) || instr->opc == OPC_END || instr->opc == OPC_CHMASK) {
-		bool first = true;
-		foreach_dst (reg, instr) {
-			if (reg->wrmask == 0)
-				continue;
-			if (!first)
-				mesa_log_stream_printf(stream, ", ");
-			print_reg_name(stream, instr, reg, true);
-			first = false;
-		}
-		foreach_src (reg, instr) {
-			if (!first)
-				mesa_log_stream_printf(stream, ", ");
-			print_reg_name(stream, instr, reg, false);
-			first = false;
-		}
-	}
+   if (!is_flow(instr) || instr->opc == OPC_END || instr->opc == OPC_CHMASK) {
+      bool first = true;
+      foreach_dst (reg, instr) {
+         if (reg->wrmask == 0)
+            continue;
+         if (!first)
+            mesa_log_stream_printf(stream, ", ");
+         print_reg_name(stream, instr, reg, true);
+         first = false;
+      }
+      foreach_src (reg, instr) {
+         if (!first)
+            mesa_log_stream_printf(stream, ", ");
+         print_reg_name(stream, instr, reg, false);
+         first = false;
+      }
+   }

-	if (is_tex(instr) && !(instr->flags & IR3_INSTR_S2EN)) {
-		if (!!(instr->flags & IR3_INSTR_B)) {
-			if (!!(instr->flags & IR3_INSTR_A1EN)) {
-				mesa_log_stream_printf(stream, ", s#%d", instr->cat5.samp);
-			} else {
-				mesa_log_stream_printf(stream, ", s#%d, t#%d", instr->cat5.samp & 0xf,
-					   instr->cat5.samp >> 4);
-			}
-		} else {
-			mesa_log_stream_printf(stream, ", s#%d, t#%d", instr->cat5.samp, instr->cat5.tex);
-		}
-	}
+   if (is_tex(instr) && !(instr->flags & IR3_INSTR_S2EN)) {
+      if (!!(instr->flags & IR3_INSTR_B)) {
+         if (!!(instr->flags & IR3_INSTR_A1EN)) {
+            mesa_log_stream_printf(stream, ", s#%d", instr->cat5.samp);
+         } else {
+            mesa_log_stream_printf(stream, ", s#%d, t#%d",
+                                   instr->cat5.samp & 0xf,
+                                   instr->cat5.samp >> 4);
+         }
+      } else {
+         mesa_log_stream_printf(stream, ", s#%d, t#%d", instr->cat5.samp,
+                                instr->cat5.tex);
+      }
+   }

-	if (instr->opc == OPC_META_SPLIT) {
-		mesa_log_stream_printf(stream, ", off=%d", instr->split.off);
-	} else if (instr->opc == OPC_META_TEX_PREFETCH) {
-		mesa_log_stream_printf(stream, ", tex=%d, samp=%d, input_offset=%d", instr->prefetch.tex,
-				instr->prefetch.samp, instr->prefetch.input_offset);
-	}
+   if (instr->opc == OPC_META_SPLIT) {
+      mesa_log_stream_printf(stream, ", off=%d", instr->split.off);
+   } else if (instr->opc == OPC_META_TEX_PREFETCH) {
+      mesa_log_stream_printf(stream, ", tex=%d, samp=%d, input_offset=%d",
+                             instr->prefetch.tex, instr->prefetch.samp,
+                             instr->prefetch.input_offset);
+   }

-	if (is_flow(instr) && instr->cat0.target) {
-		/* the predicate register src is implied: */
-		if (instr->opc == OPC_B) {
-			static const struct {
-				const char *suffix;
-				int nsrc;
-				bool idx;
-			} brinfo[7] = {
-				[BRANCH_PLAIN] = { "r",   1, false },
-				[BRANCH_OR]    = { "rao", 2, false },
-				[BRANCH_AND]   = { "raa", 2, false },
-				[BRANCH_CONST] = { "rac", 0, true  },
-				[BRANCH_ANY]   = { "any", 1, false },
-				[BRANCH_ALL]   = { "all", 1, false },
-				[BRANCH_X]     = { "rax", 0, false },
-			};
+   if (is_flow(instr) && instr->cat0.target) {
+      /* the predicate register src is implied: */
+      if (instr->opc == OPC_B) {
+         static const struct {
+            const char *suffix;
+            int nsrc;
+            bool idx;
+         } brinfo[7] = {
+            [BRANCH_PLAIN] = {"r", 1, false}, [BRANCH_OR] = {"rao", 2, false},
+            [BRANCH_AND] = {"raa", 2, false}, [BRANCH_CONST] = {"rac", 0, true},
+            [BRANCH_ANY] = {"any", 1, false}, [BRANCH_ALL] = {"all", 1, false},
+            [BRANCH_X] = {"rax", 0, false},
+         };

-			mesa_log_stream_printf(stream, "%s", brinfo[instr->cat0.brtype].suffix);
-			if (brinfo[instr->cat0.brtype].idx) {
-				mesa_log_stream_printf(stream, ".%u", instr->cat0.idx);
-			}
-			if (brinfo[instr->cat0.brtype].nsrc >= 1) {
-				mesa_log_stream_printf(stream, " %sp0.%c (",
-						instr->cat0.inv1 ? "!" : "",
-						"xyzw"[instr->cat0.comp1 & 0x3]);
-				print_reg_name(stream, instr, instr->srcs[0], false);
-				mesa_log_stream_printf(stream, "), ");
-			}
-			if (brinfo[instr->cat0.brtype].nsrc >= 2) {
-				mesa_log_stream_printf(stream, " %sp0.%c (",
-						instr->cat0.inv2 ? "!" : "",
-						"xyzw"[instr->cat0.comp2 & 0x3]);
-				print_reg_name(stream, instr, instr->srcs[1], false);
-				mesa_log_stream_printf(stream, "), ");
-			}
-		}
-		mesa_log_stream_printf(stream, " target=block%u", block_id(instr->cat0.target));
-	}
+         mesa_log_stream_printf(stream, "%s",
+                                brinfo[instr->cat0.brtype].suffix);
+         if (brinfo[instr->cat0.brtype].idx) {
+            mesa_log_stream_printf(stream, ".%u", instr->cat0.idx);
+         }
+         if (brinfo[instr->cat0.brtype].nsrc >= 1) {
+            mesa_log_stream_printf(stream, " %sp0.%c (",
+                                   instr->cat0.inv1 ? "!" : "",
+                                   "xyzw"[instr->cat0.comp1 & 0x3]);
+            print_reg_name(stream, instr, instr->srcs[0], false);
+            mesa_log_stream_printf(stream, "), ");
+         }
+         if (brinfo[instr->cat0.brtype].nsrc >= 2) {
+            mesa_log_stream_printf(stream, " %sp0.%c (",
+                                   instr->cat0.inv2 ? "!" : "",
+                                   "xyzw"[instr->cat0.comp2 & 0x3]);
+            print_reg_name(stream, instr, instr->srcs[1], false);
+            mesa_log_stream_printf(stream, "), ");
+         }
+      }
+      mesa_log_stream_printf(stream, " target=block%u",
+                             block_id(instr->cat0.target));
+   }

-	if (instr->deps_count) {
-		mesa_log_stream_printf(stream, ", false-deps:");
-		unsigned n = 0;
-		for (unsigned i = 0; i < instr->deps_count; i++) {
-			if (!instr->deps[i])
-				continue;
-			if (n++ > 0)
-				mesa_log_stream_printf(stream, ", ");
-			mesa_log_stream_printf(stream, SYN_SSA("ssa_%u"), instr->deps[i]->serialno);
-		}
-	}
+   if (instr->deps_count) {
+      mesa_log_stream_printf(stream, ", false-deps:");
+      unsigned n = 0;
+      for (unsigned i = 0; i < instr->deps_count; i++) {
+         if (!instr->deps[i])
+            continue;
+         if (n++ > 0)
+            mesa_log_stream_printf(stream, ", ");
+         mesa_log_stream_printf(stream, SYN_SSA("ssa_%u"),
+                                instr->deps[i]->serialno);
+      }
+   }

-	mesa_log_stream_printf(stream, "\n");
+   mesa_log_stream_printf(stream, "\n");
 }

-void ir3_print_instr(struct ir3_instruction *instr)
+void
+ir3_print_instr(struct ir3_instruction *instr)
 {
-	struct log_stream *stream = mesa_log_streami();
-	print_instr(stream, instr, 0);
-	mesa_log_stream_destroy(stream);
+   struct log_stream *stream = mesa_log_streami();
+   print_instr(stream, instr, 0);
+   mesa_log_stream_destroy(stream);
 }

 static void
 print_block(struct ir3_block *block, int lvl)
 {
-	struct log_stream *stream = mesa_log_streami();
+   struct log_stream *stream = mesa_log_streami();

-	tab(stream, lvl); mesa_log_stream_printf(stream, "block%u {\n", block_id(block));
+   tab(stream, lvl);
+   mesa_log_stream_printf(stream, "block%u {\n", block_id(block));

-	if (block->predecessors_count > 0) {
-		tab(stream, lvl+1);
-		mesa_log_stream_printf(stream, "pred: ");
-		for (unsigned i = 0; i < block->predecessors_count; i++) {
-			struct ir3_block *pred = block->predecessors[i];
-			if (i != 0)
-				mesa_log_stream_printf(stream, ", ");
-			mesa_log_stream_printf(stream, "block%u", block_id(pred));
-		}
-		mesa_log_stream_printf(stream, "\n");
-	}
+   if (block->predecessors_count > 0) {
+      tab(stream, lvl + 1);
+      mesa_log_stream_printf(stream, "pred: ");
+      for (unsigned i = 0; i < block->predecessors_count; i++) {
+         struct ir3_block *pred = block->predecessors[i];
+         if (i != 0)
+            mesa_log_stream_printf(stream, ", ");
+         mesa_log_stream_printf(stream, "block%u", block_id(pred));
+      }
+      mesa_log_stream_printf(stream, "\n");
+   }

-	foreach_instr (instr, &block->instr_list) {
-		print_instr(stream, instr, lvl+1);
-	}
+   foreach_instr (instr, &block->instr_list) {
+      print_instr(stream, instr, lvl + 1);
+   }

-	tab(stream, lvl+1); mesa_log_stream_printf(stream, "/* keeps:\n");
-	for (unsigned i = 0; i < block->keeps_count; i++) {
-		print_instr(stream, block->keeps[i], lvl+2);
-	}
-	tab(stream, lvl+1); mesa_log_stream_printf(stream, " */\n");
+   tab(stream, lvl + 1);
+   mesa_log_stream_printf(stream, "/* keeps:\n");
+   for (unsigned i = 0; i < block->keeps_count; i++) {
+      print_instr(stream, block->keeps[i], lvl + 2);
+   }
+   tab(stream, lvl + 1);
+   mesa_log_stream_printf(stream, " */\n");

-	if (block->successors[1]) {
-		/* leading into if/else: */
-		tab(stream, lvl+1);
-		mesa_log_stream_printf(stream, "/* succs: if ");
-		switch (block->brtype) {
-		case IR3_BRANCH_COND:
-			break;
-		case IR3_BRANCH_ANY:
-			printf("any ");
-			break;
-		case IR3_BRANCH_ALL:
-			printf("all ");
-			break;
-		case IR3_BRANCH_GETONE:
-			printf("getone ");
-			break;
-		}
-		if (block->condition)
-			mesa_log_stream_printf(stream, SYN_SSA("ssa_%u")" ", block->condition->serialno);
-		mesa_log_stream_printf(stream, "block%u; else block%u; */\n",
-				block_id(block->successors[0]),
-				block_id(block->successors[1]));
-	} else if (block->successors[0]) {
-		tab(stream, lvl+1);
-		mesa_log_stream_printf(stream, "/* succs: block%u; */\n",
-				block_id(block->successors[0]));
-	}
-	tab(stream, lvl); mesa_log_stream_printf(stream, "}\n");
+   if (block->successors[1]) {
+      /* leading into if/else: */
+      tab(stream, lvl + 1);
+      mesa_log_stream_printf(stream, "/* succs: if ");
+      switch (block->brtype) {
+      case IR3_BRANCH_COND:
+         break;
+      case IR3_BRANCH_ANY:
+         printf("any ");
+         break;
+      case IR3_BRANCH_ALL:
+         printf("all ");
+         break;
+      case IR3_BRANCH_GETONE:
+         printf("getone ");
+         break;
+      }
+      if (block->condition)
+         mesa_log_stream_printf(stream, SYN_SSA("ssa_%u") " ",
+                                block->condition->serialno);
+      mesa_log_stream_printf(stream, "block%u; else block%u; */\n",
+                             block_id(block->successors[0]),
+                             block_id(block->successors[1]));
+   } else if (block->successors[0]) {
+      tab(stream, lvl + 1);
+      mesa_log_stream_printf(stream, "/* succs: block%u; */\n",
+                             block_id(block->successors[0]));
+   }
+   tab(stream, lvl);
+   mesa_log_stream_printf(stream, "}\n");
 }

 void
 ir3_print(struct ir3 *ir)
 {
-	foreach_block (block, &ir->block_list)
-		print_block(block, 0);
+   foreach_block (block, &ir->block_list)
+      print_block(block, 0);
 }
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
--- a/src/freedreno/ir3/ir3_ra.h
+++ b/src/freedreno/ir3/ir3_ra.h
@ -24,62 +24,68 @@
 #ifndef _IR3_RA_H
 #define _IR3_RA_H

+#include "util/rb_tree.h"
 #include "ir3.h"
 #include "ir3_compiler.h"
-#include "util/rb_tree.h"

 #ifdef DEBUG
 #define RA_DEBUG (ir3_shader_debug & IR3_DBG_RAMSGS)
 #else
 #define RA_DEBUG 0
 #endif
-#define d(fmt, ...) do { if (RA_DEBUG) { \
-	printf("RA: "fmt"\n", ##__VA_ARGS__); \
-} } while (0)
+#define d(fmt, ...)                                                            \
+   do {                                                                        \
+      if (RA_DEBUG) {                                                          \
+         printf("RA: " fmt "\n", ##__VA_ARGS__);                               \
+      }                                                                        \
+   } while (0)

-#define di(instr, fmt, ...) do { if (RA_DEBUG) { \
-	printf("RA: "fmt": ", ##__VA_ARGS__); \
-	ir3_print_instr(instr); \
-} } while (0)
+#define di(instr, fmt, ...)                                                    \
+   do {                                                                        \
+      if (RA_DEBUG) {                                                          \
+         printf("RA: " fmt ": ", ##__VA_ARGS__);                               \
+         ir3_print_instr(instr);                                               \
+      }                                                                        \
+   } while (0)

 typedef uint16_t physreg_t;

 static inline unsigned
 ra_physreg_to_num(physreg_t physreg, unsigned flags)
 {
-	if (!(flags & IR3_REG_HALF))
-		physreg /= 2;
-	if (flags & IR3_REG_SHARED)
-		physreg += 48 * 4;
-	return physreg;
+   if (!(flags & IR3_REG_HALF))
+      physreg /= 2;
+   if (flags & IR3_REG_SHARED)
+      physreg += 48 * 4;
+   return physreg;
 }

 static inline physreg_t
 ra_num_to_physreg(unsigned num, unsigned flags)
 {
-	if (flags & IR3_REG_SHARED)
-		num -= 48 * 4;
-	if (!(flags & IR3_REG_HALF))
-		num *= 2;
-	return num;
+   if (flags & IR3_REG_SHARED)
+      num -= 48 * 4;
+   if (!(flags & IR3_REG_HALF))
+      num *= 2;
+   return num;
 }

 static inline unsigned
 ra_reg_get_num(const struct ir3_register *reg)
 {
-	return (reg->flags & IR3_REG_ARRAY) ? reg->array.base : reg->num;
+   return (reg->flags & IR3_REG_ARRAY) ? reg->array.base : reg->num;
 }

 static inline physreg_t
 ra_reg_get_physreg(const struct ir3_register *reg)
 {
-	return ra_num_to_physreg(ra_reg_get_num(reg), reg->flags);
+   return ra_num_to_physreg(ra_reg_get_num(reg), reg->flags);
 }

 static inline bool
 def_is_gpr(const struct ir3_register *reg)
 {
-	return reg_num(reg) != REG_A0 && reg_num(reg) != REG_P0;
+   return reg_num(reg) != REG_A0 && reg_num(reg) != REG_P0;
 }

 /* Note: don't count undef as a source.
@ -87,16 +93,14 @@ def_is_gpr(const struct ir3_register *reg)
 static inline bool
 ra_reg_is_src(const struct ir3_register *reg)
 {
-	return (reg->flags & IR3_REG_SSA) && reg->def &&
-		def_is_gpr(reg->def);
+   return (reg->flags & IR3_REG_SSA) && reg->def && def_is_gpr(reg->def);
 }

 static inline bool
 ra_reg_is_dst(const struct ir3_register *reg)
 {
-	return (reg->flags & IR3_REG_SSA) &&
-		def_is_gpr(reg) &&
-		((reg->flags & IR3_REG_ARRAY) || reg->wrmask);
+   return (reg->flags & IR3_REG_SSA) && def_is_gpr(reg) &&
+          ((reg->flags & IR3_REG_ARRAY) || reg->wrmask);
 }

 /* Iterators for sources and destinations which:
@ -105,53 +109,54 @@ ra_reg_is_dst(const struct ir3_register *reg)
 * - Consider array destinations as both a source and a destination
 */

-#define ra_foreach_src(__srcreg, __instr) \
-	for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \
-		for (unsigned __cnt = (__instr)->srcs_count, __i = 0; __i < __cnt; __i++) \
-			if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i])))
+#define ra_foreach_src(__srcreg, __instr)                                      \
+   for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \
+      for (unsigned __cnt = (__instr)->srcs_count, __i = 0; __i < __cnt;       \
+           __i++)                                                              \
+         if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i])))

-#define ra_foreach_src_rev(__srcreg, __instr) \
-	for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \
-		for (int __cnt = (__instr)->srcs_count, __i = __cnt - 1; __i >= 0; __i--) \
-			if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i])))
+#define ra_foreach_src_rev(__srcreg, __instr)                                  \
+   for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \
+      for (int __cnt = (__instr)->srcs_count, __i = __cnt - 1; __i >= 0;       \
+           __i--)                                                              \
+         if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i])))

-#define ra_foreach_dst(__dstreg, __instr) \
-	for (struct ir3_register *__dstreg = (void *)~0; __dstreg; __dstreg = NULL) \
-		for (unsigned __cnt = (__instr)->dsts_count, __i = 0; __i < __cnt; __i++) \
-			if (ra_reg_is_dst((__dstreg = (__instr)->dsts[__i])))
+#define ra_foreach_dst(__dstreg, __instr)                                      \
+   for (struct ir3_register *__dstreg = (void *)~0; __dstreg; __dstreg = NULL) \
+      for (unsigned __cnt = (__instr)->dsts_count, __i = 0; __i < __cnt;       \
+           __i++)                                                              \
+         if (ra_reg_is_dst((__dstreg = (__instr)->dsts[__i])))

-
-#define RA_HALF_SIZE (4 * 48)
-#define RA_FULL_SIZE (4 * 48 * 2)
-#define RA_SHARED_SIZE (2 * 4 * 8)
+#define RA_HALF_SIZE     (4 * 48)
+#define RA_FULL_SIZE     (4 * 48 * 2)
+#define RA_SHARED_SIZE   (2 * 4 * 8)
 #define RA_MAX_FILE_SIZE RA_FULL_SIZE

 struct ir3_liveness {
-	unsigned block_count;
-	DECLARE_ARRAY(struct ir3_register *, definitions);
-	DECLARE_ARRAY(BITSET_WORD *, live_out);
-	DECLARE_ARRAY(BITSET_WORD *, live_in);
+   unsigned block_count;
+   DECLARE_ARRAY(struct ir3_register *, definitions);
+   DECLARE_ARRAY(BITSET_WORD *, live_out);
+   DECLARE_ARRAY(BITSET_WORD *, live_in);
 };

 struct ir3_liveness *ir3_calc_liveness(struct ir3_shader_variant *v);

 bool ir3_def_live_after(struct ir3_liveness *live, struct ir3_register *def,
-						struct ir3_instruction *instr);
+                        struct ir3_instruction *instr);

 void ir3_create_parallel_copies(struct ir3 *ir);

 void ir3_merge_regs(struct ir3_liveness *live, struct ir3 *ir);

 struct ir3_pressure {
-	unsigned full, half, shared;
+   unsigned full, half, shared;
 };

-void ir3_calc_pressure(struct ir3_shader_variant *v,
-					   struct ir3_liveness *live,
-					   struct ir3_pressure *max_pressure);
+void ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,
+                       struct ir3_pressure *max_pressure);

-void ir3_ra_validate(struct ir3_shader_variant *v,
-					 unsigned full_size, unsigned half_size, unsigned block_count);
+void ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
+                     unsigned half_size, unsigned block_count);

 void ir3_lower_copies(struct ir3_shader_variant *v);

@ -176,91 +181,90 @@ void ir3_lower_copies(struct ir3_shader_variant *v);
 */

 struct ir3_reg_interval {
-	struct rb_node node;
+   struct rb_node node;

-	struct rb_tree children;
+   struct rb_tree children;

-	struct ir3_reg_interval *parent;
+   struct ir3_reg_interval *parent;

-	struct ir3_register *reg;
+   struct ir3_register *reg;

-	bool inserted;
+   bool inserted;
 };

 struct ir3_reg_ctx {
-	/* The tree of top-level intervals in the forest. */
-	struct rb_tree intervals;
+   /* The tree of top-level intervals in the forest. */
+   struct rb_tree intervals;

-	/* Users of ir3_reg_ctx need to keep around additional state that is
-	 * modified when top-level intervals are added or removed. For register
-	 * pressure tracking, this is just the register pressure, but for RA we
-	 * need to keep track of the physreg of each top-level interval. These
-	 * callbacks provide a place to let users deriving from ir3_reg_ctx update
-	 * their state when top-level intervals are inserted/removed.
-	 */
+   /* Users of ir3_reg_ctx need to keep around additional state that is
+    * modified when top-level intervals are added or removed. For register
+    * pressure tracking, this is just the register pressure, but for RA we
+    * need to keep track of the physreg of each top-level interval. These
+    * callbacks provide a place to let users deriving from ir3_reg_ctx update
+    * their state when top-level intervals are inserted/removed.
+    */

-	/* Called when an interval is added and it turns out to be at the top
-	 * level.
-	 */
-	void (*interval_add)(struct ir3_reg_ctx *ctx,
-						 struct ir3_reg_interval *interval);
+   /* Called when an interval is added and it turns out to be at the top
+    * level.
+    */
+   void (*interval_add)(struct ir3_reg_ctx *ctx,
+                        struct ir3_reg_interval *interval);

-	/* Called when an interval is deleted from the top level. */
-	void (*interval_delete)(struct ir3_reg_ctx *ctx,
-							struct ir3_reg_interval *interval);
+   /* Called when an interval is deleted from the top level. */
+   void (*interval_delete)(struct ir3_reg_ctx *ctx,
+                           struct ir3_reg_interval *interval);

-	/* Called when an interval is deleted and its child becomes top-level.
-	 */
-	void (*interval_readd)(struct ir3_reg_ctx *ctx,
-						   struct ir3_reg_interval *parent,
-						   struct ir3_reg_interval *child);
+   /* Called when an interval is deleted and its child becomes top-level.
+    */
+   void (*interval_readd)(struct ir3_reg_ctx *ctx,
+                          struct ir3_reg_interval *parent,
+                          struct ir3_reg_interval *child);
 };

 static inline struct ir3_reg_interval *
 ir3_rb_node_to_interval(struct rb_node *node)
 {
-	return rb_node_data(struct ir3_reg_interval, node, node);
+   return rb_node_data(struct ir3_reg_interval, node, node);
 }

 static inline const struct ir3_reg_interval *
 ir3_rb_node_to_interval_const(const struct rb_node *node)
 {
-	return rb_node_data(struct ir3_reg_interval, node, node);
+   return rb_node_data(struct ir3_reg_interval, node, node);
 }

 static inline struct ir3_reg_interval *
 ir3_reg_interval_next(struct ir3_reg_interval *interval)
 {
-	struct rb_node *next = rb_node_next(&interval->node);
-	return next ? ir3_rb_node_to_interval(next) : NULL;
+   struct rb_node *next = rb_node_next(&interval->node);
+   return next ? ir3_rb_node_to_interval(next) : NULL;
 }

 static inline struct ir3_reg_interval *
 ir3_reg_interval_next_or_null(struct ir3_reg_interval *interval)
 {
-	return interval ? ir3_reg_interval_next(interval) : NULL;
+   return interval ? ir3_reg_interval_next(interval) : NULL;
 }

 static inline void
-ir3_reg_interval_init(struct ir3_reg_interval *interval, struct ir3_register *reg)
+ir3_reg_interval_init(struct ir3_reg_interval *interval,
+                      struct ir3_register *reg)
 {
-	rb_tree_init(&interval->children);
-	interval->reg = reg;
-	interval->parent = NULL;
-	interval->inserted = false;
+   rb_tree_init(&interval->children);
+   interval->reg = reg;
+   interval->parent = NULL;
+   interval->inserted = false;
 }

-void
-ir3_reg_interval_dump(struct ir3_reg_interval *interval);
+void ir3_reg_interval_dump(struct ir3_reg_interval *interval);

 void ir3_reg_interval_insert(struct ir3_reg_ctx *ctx,
-							 struct ir3_reg_interval *interval);
+                             struct ir3_reg_interval *interval);

 void ir3_reg_interval_remove(struct ir3_reg_ctx *ctx,
-							 struct ir3_reg_interval *interval);
+                             struct ir3_reg_interval *interval);

 void ir3_reg_interval_remove_all(struct ir3_reg_ctx *ctx,
-								 struct ir3_reg_interval *interval);
+                                 struct ir3_reg_interval *interval);

 #endif
-
--- a/src/freedreno/ir3/ir3_ra_validate.c
+++ b/src/freedreno/ir3/ir3_ra_validate.c
@ -73,59 +73,61 @@
 */

 #define UNKNOWN ((struct ir3_register *)NULL)
-#define UNDEF ((struct ir3_register *)(uintptr_t)1)
+#define UNDEF   ((struct ir3_register *)(uintptr_t)1)
 #define OVERDEF ((struct ir3_register *)(uintptr_t)2)

 struct reg_state {
-	struct ir3_register *def;
-	unsigned offset;
+   struct ir3_register *def;
+   unsigned offset;
 };

 struct file_state {
-	struct reg_state regs[RA_MAX_FILE_SIZE];
+   struct reg_state regs[RA_MAX_FILE_SIZE];
 };

 struct reaching_state {
-	struct file_state half, full, shared;
+   struct file_state half, full, shared;
 };

 struct ra_val_ctx {
-	struct ir3_instruction *current_instr;
+   struct ir3_instruction *current_instr;

-	struct reaching_state reaching;
-	struct reaching_state *block_reaching;
-	unsigned block_count;
+   struct reaching_state reaching;
+   struct reaching_state *block_reaching;
+   unsigned block_count;

-	unsigned full_size, half_size;
+   unsigned full_size, half_size;

-	bool merged_regs;
+   bool merged_regs;

-	bool failed;
+   bool failed;
 };

 static void
 validate_error(struct ra_val_ctx *ctx, const char *condstr)
 {
-	fprintf(stderr, "ra validation fail: %s\n", condstr);
-	fprintf(stderr, "  -> for instruction: ");
-	ir3_print_instr(ctx->current_instr);
-	abort();
+   fprintf(stderr, "ra validation fail: %s\n", condstr);
+   fprintf(stderr, "  -> for instruction: ");
+   ir3_print_instr(ctx->current_instr);
+   abort();
 }

-#define validate_assert(ctx, cond) do { \
-	if (!(cond)) { \
-		validate_error(ctx, #cond); \
-	} } while (0)
+#define validate_assert(ctx, cond)                                             \
+   do {                                                                        \
+      if (!(cond)) {                                                           \
+         validate_error(ctx, #cond);                                           \
+      }                                                                        \
+   } while (0)

 static unsigned
 get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
 {
-	if (reg->flags & IR3_REG_SHARED)
-		return RA_SHARED_SIZE;
-	else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
-		return ctx->full_size;
-	else
-		return ctx->half_size;
+   if (reg->flags & IR3_REG_SHARED)
+      return RA_SHARED_SIZE;
+   else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
+      return ctx->full_size;
+   else
+      return ctx->half_size;
 }

 /* Validate simple things, like the registers being in-bounds. This way we
@ -135,438 +137,434 @@ get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
 static void
 validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
 {
-	ctx->current_instr = instr;
-	ra_foreach_dst (dst, instr) {
-		unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
-		validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
-		if (dst->tied)
-			validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));
-	}
+   ctx->current_instr = instr;
+   ra_foreach_dst (dst, instr) {
+      unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
+      validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
+      if (dst->tied)
+         validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));
+   }

-	ra_foreach_src (src, instr) {
-		unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
-		validate_assert(ctx, src_max <= get_file_size(ctx, src));
-	}
+   ra_foreach_src (src, instr) {
+      unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
+      validate_assert(ctx, src_max <= get_file_size(ctx, src));
+   }
 }

 /* This is the lattice operator. */
 static bool
 merge_reg(struct reg_state *dst, const struct reg_state *src)
 {
-	if (dst->def == UNKNOWN) {
-		*dst = *src;
-		return src->def != UNKNOWN;
-	} else if (dst->def == OVERDEF) {
-		return false;
-	} else {
-		if (src->def == UNKNOWN)
-			return false;
-		else if (src->def == OVERDEF) {
-			*dst = *src;
-			return true;
-		} else {
-			if (dst->def != src->def || dst->offset != src->offset) {
-				dst->def = OVERDEF;
-				dst->offset = 0;
-				return true;
-			} else {
-				return false;
-			}
-		}
-	}
+   if (dst->def == UNKNOWN) {
+      *dst = *src;
+      return src->def != UNKNOWN;
+   } else if (dst->def == OVERDEF) {
+      return false;
+   } else {
+      if (src->def == UNKNOWN)
+         return false;
+      else if (src->def == OVERDEF) {
+         *dst = *src;
+         return true;
+      } else {
+         if (dst->def != src->def || dst->offset != src->offset) {
+            dst->def = OVERDEF;
+            dst->offset = 0;
+            return true;
+         } else {
+            return false;
+         }
+      }
+   }
 }

 static bool
 merge_file(struct file_state *dst, const struct file_state *src, unsigned size)
 {
-	bool progress = false;
-	for (unsigned i = 0; i < size; i++)
-		progress |= merge_reg(&dst->regs[i], &src->regs[i]);
-	return progress;
+   bool progress = false;
+   for (unsigned i = 0; i < size; i++)
+      progress |= merge_reg(&dst->regs[i], &src->regs[i]);
+   return progress;
 }

 static bool
 merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
-			const struct reaching_state *src)
+            const struct reaching_state *src)
 {
-	bool progress = false;
-	progress |= merge_file(&dst->full, &src->full, ctx->full_size);
-	progress |= merge_file(&dst->half, &src->half, ctx->half_size);
-	return progress;
+   bool progress = false;
+   progress |= merge_file(&dst->full, &src->full, ctx->full_size);
+   progress |= merge_file(&dst->half, &src->half, ctx->half_size);
+   return progress;
 }

 static bool
 merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
-					 const struct reaching_state *src)
+                     const struct reaching_state *src)
 {
-	return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
+   return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
 }

 static struct file_state *
 ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
 {
-	if (reg->flags & IR3_REG_SHARED)
-		return &ctx->reaching.shared;
-	else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
-		return &ctx->reaching.full;
-	else
-		return &ctx->reaching.half;
+   if (reg->flags & IR3_REG_SHARED)
+      return &ctx->reaching.shared;
+   else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
+      return &ctx->reaching.full;
+   else
+      return &ctx->reaching.half;
 }

 static void
 propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
 {
-	ra_foreach_dst (dst, instr) {
-		struct file_state *file = ra_val_get_file(ctx, dst);
-		physreg_t physreg = ra_reg_get_physreg(dst);
-		for (unsigned i = 0; i < reg_size(dst); i++) {
-			file->regs[physreg + i] = (struct reg_state) {
-				.def = dst,
-				.offset = i,
-			};
-		}
-	}
+   ra_foreach_dst (dst, instr) {
+      struct file_state *file = ra_val_get_file(ctx, dst);
+      physreg_t physreg = ra_reg_get_physreg(dst);
+      for (unsigned i = 0; i < reg_size(dst); i++) {
+         file->regs[physreg + i] = (struct reg_state){
+            .def = dst,
+            .offset = i,
+         };
+      }
+   }
 }

 static void
 propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
 {
-	struct ir3_register *dst = split->dsts[0];
-	struct ir3_register *src = split->srcs[0];
-	physreg_t dst_physreg = ra_reg_get_physreg(dst);
-	physreg_t src_physreg = ra_reg_get_physreg(src);
-	struct file_state *file = ra_val_get_file(ctx, dst);
+   struct ir3_register *dst = split->dsts[0];
+   struct ir3_register *src = split->srcs[0];
+   physreg_t dst_physreg = ra_reg_get_physreg(dst);
+   physreg_t src_physreg = ra_reg_get_physreg(src);
+   struct file_state *file = ra_val_get_file(ctx, dst);

-	unsigned offset = split->split.off * reg_elem_size(src);
-	for (unsigned i = 0; i < reg_elem_size(src); i++) {
-		file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
-	}
+   unsigned offset = split->split.off * reg_elem_size(src);
+   for (unsigned i = 0; i < reg_elem_size(src); i++) {
+      file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
+   }
 }

 static void
 propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
 {
-	struct ir3_register *dst = collect->dsts[0];
-	physreg_t dst_physreg = ra_reg_get_physreg(dst);
-	struct file_state *file = ra_val_get_file(ctx, dst);
+   struct ir3_register *dst = collect->dsts[0];
+   physreg_t dst_physreg = ra_reg_get_physreg(dst);
+   struct file_state *file = ra_val_get_file(ctx, dst);

-	unsigned size = reg_size(dst);
-	struct reg_state srcs[size];
+   unsigned size = reg_size(dst);
+   struct reg_state srcs[size];

-	for (unsigned i = 0; i < collect->srcs_count; i++) {
-		struct ir3_register *src = collect->srcs[i];
-		unsigned dst_offset = i * reg_elem_size(dst);
-		for (unsigned j = 0; j < reg_elem_size(dst); j++) {
-			if (!ra_reg_is_src(src)) {
-				srcs[dst_offset + j] = (struct reg_state) {
-					.def = dst,
-					.offset = dst_offset + j,
-				};
-			} else {
-				physreg_t src_physreg = ra_reg_get_physreg(src);
-				srcs[dst_offset + j] = file->regs[src_physreg + j];
-			}
-		}
-	}
+   for (unsigned i = 0; i < collect->srcs_count; i++) {
+      struct ir3_register *src = collect->srcs[i];
+      unsigned dst_offset = i * reg_elem_size(dst);
+      for (unsigned j = 0; j < reg_elem_size(dst); j++) {
+         if (!ra_reg_is_src(src)) {
+            srcs[dst_offset + j] = (struct reg_state){
+               .def = dst,
+               .offset = dst_offset + j,
+            };
+         } else {
+            physreg_t src_physreg = ra_reg_get_physreg(src);
+            srcs[dst_offset + j] = file->regs[src_physreg + j];
+         }
+      }
+   }

-	for (unsigned i = 0; i < size; i++)
-		file->regs[dst_physreg + i] = srcs[i];
+   for (unsigned i = 0; i < size; i++)
+      file->regs[dst_physreg + i] = srcs[i];
 }

 static void
 propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
 {
-	unsigned size = 0;
-	for (unsigned i = 0; i < pcopy->dsts_count; i++) {
-		size += reg_size(pcopy->srcs[i]);
-	}
+   unsigned size = 0;
+   for (unsigned i = 0; i < pcopy->dsts_count; i++) {
+      size += reg_size(pcopy->srcs[i]);
+   }

-	struct reg_state srcs[size];
+   struct reg_state srcs[size];

-	unsigned offset = 0;
-	for (unsigned i = 0; i < pcopy->srcs_count; i++) {
-		struct ir3_register *dst = pcopy->dsts[i];
-		struct ir3_register *src = pcopy->srcs[i];
-		struct file_state *file = ra_val_get_file(ctx, dst);
+   unsigned offset = 0;
+   for (unsigned i = 0; i < pcopy->srcs_count; i++) {
+      struct ir3_register *dst = pcopy->dsts[i];
+      struct ir3_register *src = pcopy->srcs[i];
+      struct file_state *file = ra_val_get_file(ctx, dst);

-		for (unsigned j = 0; j < reg_size(dst); j++) {
-			if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
-				srcs[offset + j] = (struct reg_state) {
-					.def = dst,
-					.offset = j,
-				};
-			} else {
-				physreg_t src_physreg = ra_reg_get_physreg(src);
-				srcs[offset + j] = file->regs[src_physreg + j];
-			}
-		}
+      for (unsigned j = 0; j < reg_size(dst); j++) {
+         if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
+            srcs[offset + j] = (struct reg_state){
+               .def = dst,
+               .offset = j,
+            };
+         } else {
+            physreg_t src_physreg = ra_reg_get_physreg(src);
+            srcs[offset + j] = file->regs[src_physreg + j];
+         }
+      }

-		offset += reg_size(dst);
-	}
-	assert(offset == size);
+      offset += reg_size(dst);
+   }
+   assert(offset == size);

-	offset = 0;
-	for (unsigned i = 0; i < pcopy->dsts_count; i++) {
-		struct ir3_register *dst = pcopy->dsts[i];
-		physreg_t dst_physreg = ra_reg_get_physreg(dst);
-		struct file_state *file = ra_val_get_file(ctx, dst);
+   offset = 0;
+   for (unsigned i = 0; i < pcopy->dsts_count; i++) {
+      struct ir3_register *dst = pcopy->dsts[i];
+      physreg_t dst_physreg = ra_reg_get_physreg(dst);
+      struct file_state *file = ra_val_get_file(ctx, dst);

-		for (unsigned j = 0; j < reg_size(dst); j++)
-			file->regs[dst_physreg + j] = srcs[offset + j];
+      for (unsigned j = 0; j < reg_size(dst); j++)
+         file->regs[dst_physreg + j] = srcs[offset + j];

-		offset += reg_size(dst);
-	}
-	assert(offset == size);
+      offset += reg_size(dst);
+   }
+   assert(offset == size);
 }

 static void
 propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
 {
-	if (instr->opc == OPC_META_SPLIT)
-		propagate_split(ctx, instr);
-	else if (instr->opc == OPC_META_COLLECT)
-		propagate_collect(ctx, instr);
-	else if (instr->opc == OPC_META_PARALLEL_COPY)
-		propagate_parallelcopy(ctx, instr);
-	else
-		propagate_normal_instr(ctx, instr);
+   if (instr->opc == OPC_META_SPLIT)
+      propagate_split(ctx, instr);
+   else if (instr->opc == OPC_META_COLLECT)
+      propagate_collect(ctx, instr);
+   else if (instr->opc == OPC_META_PARALLEL_COPY)
+      propagate_parallelcopy(ctx, instr);
+   else
+      propagate_normal_instr(ctx, instr);
 }

 static bool
 propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
 {
-	ctx->reaching = ctx->block_reaching[block->index];
+   ctx->reaching = ctx->block_reaching[block->index];

-	foreach_instr (instr, &block->instr_list) {
-		propagate_instr(ctx, instr);
-	}
+   foreach_instr (instr, &block->instr_list) {
+      propagate_instr(ctx, instr);
+   }

-	bool progress = false;
-	for (unsigned i = 0; i < 2; i++) {
-		struct ir3_block *succ = block->successors[i];
-		if (!succ)
-			continue;
-		progress |= merge_state(ctx,
-								&ctx->block_reaching[succ->index],
-								&ctx->reaching);
-	}
-	for (unsigned i = 0; i < 2; i++) {
-		struct ir3_block *succ = block->physical_successors[i];
-		if (!succ)
-			continue;
-		progress |= merge_state_physical(ctx,
-										 &ctx->block_reaching[succ->index],
-										 &ctx->reaching);
-	}
-	return progress;
+   bool progress = false;
+   for (unsigned i = 0; i < 2; i++) {
+      struct ir3_block *succ = block->successors[i];
+      if (!succ)
+         continue;
+      progress |=
+         merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
+   }
+   for (unsigned i = 0; i < 2; i++) {
+      struct ir3_block *succ = block->physical_successors[i];
+      if (!succ)
+         continue;
+      progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
+                                       &ctx->reaching);
+   }
+   return progress;
 }

 static void
 chase_definition(struct reg_state *state)
 {
-	while (true) {
-		struct ir3_instruction *instr = state->def->instr;
-		switch (instr->opc) {
-		case OPC_META_SPLIT: {
-			struct ir3_register *new_def = instr->srcs[0]->def;
-			unsigned offset = instr->split.off * reg_elem_size(new_def);
-			*state = (struct reg_state) {
-				.def = new_def,
-				.offset = state->offset + offset,
-			};
-			break;
-		}
-		case OPC_META_COLLECT: {
-			unsigned src_idx = state->offset / reg_elem_size(state->def);
-			unsigned src_offset = state->offset % reg_elem_size(state->def);
-			struct ir3_register *new_def = instr->srcs[src_idx]->def;
-			if (new_def) {
-				*state = (struct reg_state) {
-					.def = new_def,
-					.offset = src_offset,
-				};
-			} else {
-				/* Bail on immed/const */
-				return;
-			}
-			break;
-		}
-		case OPC_META_PARALLEL_COPY: {
-			unsigned dst_idx = ~0;
-			for (unsigned i = 0; i < instr->dsts_count; i++) {
-				if (instr->dsts[i] == state->def) {
-					dst_idx = i;
-					break;
-				}
-			}
-			assert(dst_idx != ~0);
+   while (true) {
+      struct ir3_instruction *instr = state->def->instr;
+      switch (instr->opc) {
+      case OPC_META_SPLIT: {
+         struct ir3_register *new_def = instr->srcs[0]->def;
+         unsigned offset = instr->split.off * reg_elem_size(new_def);
+         *state = (struct reg_state){
+            .def = new_def,
+            .offset = state->offset + offset,
+         };
+         break;
+      }
+      case OPC_META_COLLECT: {
+         unsigned src_idx = state->offset / reg_elem_size(state->def);
+         unsigned src_offset = state->offset % reg_elem_size(state->def);
+         struct ir3_register *new_def = instr->srcs[src_idx]->def;
+         if (new_def) {
+            *state = (struct reg_state){
+               .def = new_def,
+               .offset = src_offset,
+            };
+         } else {
+            /* Bail on immed/const */
+            return;
+         }
+         break;
+      }
+      case OPC_META_PARALLEL_COPY: {
+         unsigned dst_idx = ~0;
+         for (unsigned i = 0; i < instr->dsts_count; i++) {
+            if (instr->dsts[i] == state->def) {
+               dst_idx = i;
+               break;
+            }
+         }
+         assert(dst_idx != ~0);

-			struct ir3_register *new_def = instr->srcs[dst_idx]->def;
-			if (new_def) {
-				state->def = new_def;
-			} else {
-				/* Bail on immed/const */
-				return;
-			}
-			break;
-		}
-		default:
-			return;
-		}
-	}
+         struct ir3_register *new_def = instr->srcs[dst_idx]->def;
+         if (new_def) {
+            state->def = new_def;
+         } else {
+            /* Bail on immed/const */
+            return;
+         }
+         break;
+      }
+      default:
+         return;
+      }
+   }
 }

 static void
 dump_reg_state(struct reg_state *state)
 {
-	if (state->def == UNDEF) {
-		fprintf(stderr, "no reaching definition");
-	} else if (state->def == OVERDEF) {
-		fprintf(stderr, "more than one reaching definition or partial definition");
-	} else {
-		/* The analysis should always remove UNKNOWN eventually. */
-		assert(state->def != UNKNOWN);
+   if (state->def == UNDEF) {
+      fprintf(stderr, "no reaching definition");
+   } else if (state->def == OVERDEF) {
+      fprintf(stderr,
+              "more than one reaching definition or partial definition");
+   } else {
+      /* The analysis should always remove UNKNOWN eventually. */
+      assert(state->def != UNKNOWN);

-		fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u",
-				state->def->instr->serialno, state->def->name,
-				(state->def->flags & IR3_REG_HALF) ? "h" : "",
-				state->def->num / 4, "xyzw"[state->def->num % 4],
-				state->offset);
-	}
+      fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno,
+              state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "",
+              state->def->num / 4, "xyzw"[state->def->num % 4],
+              state -> offset);
+   }
 }

 static void
 check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
-				   struct ir3_register *src)
+                   struct ir3_register *src)
 {
-	struct file_state *file = ra_val_get_file(ctx, src);
-	physreg_t physreg = ra_reg_get_physreg(src);
-	for (unsigned i = 0; i < reg_size(src); i++) {
-		struct reg_state expected = (struct reg_state) {
-			.def = src->def,
-			.offset = i,
-		};
-		chase_definition(&expected);
+   struct file_state *file = ra_val_get_file(ctx, src);
+   physreg_t physreg = ra_reg_get_physreg(src);
+   for (unsigned i = 0; i < reg_size(src); i++) {
+      struct reg_state expected = (struct reg_state){
+         .def = src->def,
+         .offset = i,
+      };
+      chase_definition(&expected);

-		struct reg_state actual = file->regs[physreg + i];
+      struct reg_state actual = file->regs[physreg + i];

-		if (expected.def != actual.def ||
-			expected.offset != actual.offset) {
-			fprintf(stderr, "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
-					src->def->instr->serialno, src->def->name, i);
-			fprintf(stderr, "expected: ");
-			dump_reg_state(&expected);
-			fprintf(stderr, "\n");
-			fprintf(stderr, "actual: ");
-			dump_reg_state(&actual);
-			fprintf(stderr, "\n");
-			fprintf(stderr, "-> for instruction: ");
-			ir3_print_instr(instr);
-			ctx->failed = true;
-		}
-	}
+      if (expected.def != actual.def || expected.offset != actual.offset) {
+         fprintf(
+            stderr,
+            "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
+            src->def->instr->serialno, src->def->name, i);
+         fprintf(stderr, "expected: ");
+         dump_reg_state(&expected);
+         fprintf(stderr, "\n");
+         fprintf(stderr, "actual: ");
+         dump_reg_state(&actual);
+         fprintf(stderr, "\n");
+         fprintf(stderr, "-> for instruction: ");
+         ir3_print_instr(instr);
+         ctx->failed = true;
+      }
+   }
 }

 static void
 check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
 {
-	if (instr->opc == OPC_META_SPLIT ||
-		instr->opc == OPC_META_COLLECT ||
-		instr->opc == OPC_META_PARALLEL_COPY ||
-		instr->opc == OPC_META_PHI) {
-		return;
-	}
+   if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT ||
+       instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) {
+      return;
+   }

-	ra_foreach_src (src, instr) {
-		check_reaching_src(ctx, instr, src);
-	}
+   ra_foreach_src (src, instr) {
+      check_reaching_src(ctx, instr, src);
+   }
 }

 static void
 check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)
 {
-	ctx->reaching = ctx->block_reaching[block->index];
+   ctx->reaching = ctx->block_reaching[block->index];

-	foreach_instr (instr, &block->instr_list) {
-		check_reaching_instr(ctx, instr);
-		propagate_instr(ctx, instr);
-	}
+   foreach_instr (instr, &block->instr_list) {
+      check_reaching_instr(ctx, instr);
+      propagate_instr(ctx, instr);
+   }

-	for (unsigned i = 0; i < 2; i++) {
-		struct ir3_block *succ = block->successors[i];
-		if (!succ)
-			continue;
+   for (unsigned i = 0; i < 2; i++) {
+      struct ir3_block *succ = block->successors[i];
+      if (!succ)
+         continue;

-		unsigned pred_idx = ir3_block_get_pred_index(succ, block);
-		foreach_instr (instr, &succ->instr_list) {
-			if (instr->opc != OPC_META_PHI)
-				break;
-			if (instr->srcs[pred_idx]->def)
-				check_reaching_src(ctx, instr, instr->srcs[pred_idx]);
-		}
-	}
+      unsigned pred_idx = ir3_block_get_pred_index(succ, block);
+      foreach_instr (instr, &succ->instr_list) {
+         if (instr->opc != OPC_META_PHI)
+            break;
+         if (instr->srcs[pred_idx]->def)
+            check_reaching_src(ctx, instr, instr->srcs[pred_idx]);
+      }
+   }
 }

 static void
 check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
 {
-	ctx->block_reaching =
-		rzalloc_array(ctx, struct reaching_state, ctx->block_count);
+   ctx->block_reaching =
+      rzalloc_array(ctx, struct reaching_state, ctx->block_count);

-	struct reaching_state *start = &ctx->block_reaching[0];
-	for (unsigned i = 0; i < ctx->full_size; i++)
-		start->full.regs[i].def = UNDEF;
-	for (unsigned i = 0; i < ctx->half_size; i++)
-		start->half.regs[i].def = UNDEF;
-	for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
-		start->shared.regs[i].def = UNDEF;
+   struct reaching_state *start = &ctx->block_reaching[0];
+   for (unsigned i = 0; i < ctx->full_size; i++)
+      start->full.regs[i].def = UNDEF;
+   for (unsigned i = 0; i < ctx->half_size; i++)
+      start->half.regs[i].def = UNDEF;
+   for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
+      start->shared.regs[i].def = UNDEF;

-	bool progress;
-	do {
-		progress = false;
-		foreach_block (block, &ir->block_list) {
-			progress |= propagate_block(ctx, block);
-		}
-	} while (progress);
+   bool progress;
+   do {
+      progress = false;
+      foreach_block (block, &ir->block_list) {
+         progress |= propagate_block(ctx, block);
+      }
+   } while (progress);

-	foreach_block (block, &ir->block_list) {
-		check_reaching_block(ctx, block);
-	}
+   foreach_block (block, &ir->block_list) {
+      check_reaching_block(ctx, block);
+   }

-	if (ctx->failed) {
-		fprintf(stderr, "failing shader:\n");
-		ir3_print(ir);
-		abort();
-	}
+   if (ctx->failed) {
+      fprintf(stderr, "failing shader:\n");
+      ir3_print(ir);
+      abort();
+   }
 }

 void
-ir3_ra_validate(struct ir3_shader_variant *v,
-			    unsigned full_size, unsigned half_size, unsigned block_count)
+ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
+                unsigned half_size, unsigned block_count)
 {
 #ifdef NDEBUG
-#  define VALIDATE 0
+#define VALIDATE 0
 #else
-#  define VALIDATE 1
+#define VALIDATE 1
 #endif

-	if (!VALIDATE)
-		return;
+   if (!VALIDATE)
+      return;

-	struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
-	ctx->merged_regs = v->mergedregs;
-	ctx->full_size = full_size;
-	ctx->half_size = half_size;
-	ctx->block_count = block_count;
+   struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
+   ctx->merged_regs = v->mergedregs;
+   ctx->full_size = full_size;
+   ctx->half_size = half_size;
+   ctx->block_count = block_count;

-	foreach_block (block, &v->ir->block_list) {
-		foreach_instr (instr, &block->instr_list) {
-			validate_simple(ctx, instr);
-		}
-	}
+   foreach_block (block, &v->ir->block_list) {
+      foreach_instr (instr, &block->instr_list) {
+         validate_simple(ctx, instr);
+      }
+   }

-	check_reaching_defs(ctx, v->ir);
+   check_reaching_defs(ctx, v->ir);

-	ralloc_free(ctx);
+   ralloc_free(ctx);
 }
-
--- a/src/freedreno/ir3/ir3_sched.c
+++ b/src/freedreno/ir3/ir3_sched.c
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
--- a/src/freedreno/ir3/ir3_spill.c
+++ b/src/freedreno/ir3/ir3_spill.c
@ -21,9 +21,9 @@
 * SOFTWARE.
 */

+#include "util/rb_tree.h"
 #include "ir3_ra.h"
 #include "ir3_shader.h"
-#include "util/rb_tree.h"

 /*
 * This pass does one thing so far:
@ -36,326 +36,330 @@
 */

 struct ra_spill_interval {
-	struct ir3_reg_interval interval;
+   struct ir3_reg_interval interval;
 };

 struct ra_spill_ctx {
-	struct ir3_reg_ctx reg_ctx;
+   struct ir3_reg_ctx reg_ctx;

-	struct ra_spill_interval *intervals;
+   struct ra_spill_interval *intervals;

-	struct ir3_pressure cur_pressure, max_pressure;
+   struct ir3_pressure cur_pressure, max_pressure;

-	struct ir3_liveness *live;
+   struct ir3_liveness *live;

-	const struct ir3_compiler *compiler;
+   const struct ir3_compiler *compiler;
 };

 static void
-ra_spill_interval_init(struct ra_spill_interval *interval, struct ir3_register *reg)
+ra_spill_interval_init(struct ra_spill_interval *interval,
+                       struct ir3_register *reg)
 {
-	ir3_reg_interval_init(&interval->interval, reg);
+   ir3_reg_interval_init(&interval->interval, reg);
 }

 static void
-ra_pressure_add(struct ir3_pressure *pressure, struct ra_spill_interval *interval)
+ra_pressure_add(struct ir3_pressure *pressure,
+                struct ra_spill_interval *interval)
 {
-	unsigned size = reg_size(interval->interval.reg);
-	if (interval->interval.reg->flags & IR3_REG_SHARED)
-		pressure->shared += size;
-	else if (interval->interval.reg->flags & IR3_REG_HALF)
-		pressure->half += size;
-	else
-		pressure->full += size;
+   unsigned size = reg_size(interval->interval.reg);
+   if (interval->interval.reg->flags & IR3_REG_SHARED)
+      pressure->shared += size;
+   else if (interval->interval.reg->flags & IR3_REG_HALF)
+      pressure->half += size;
+   else
+      pressure->full += size;
 }

 static void
-ra_pressure_sub(struct ir3_pressure *pressure, struct ra_spill_interval *interval)
+ra_pressure_sub(struct ir3_pressure *pressure,
+                struct ra_spill_interval *interval)
 {
-	unsigned size = reg_size(interval->interval.reg);
-	if (interval->interval.reg->flags & IR3_REG_SHARED)
-		pressure->shared -= size;
-	else if (interval->interval.reg->flags & IR3_REG_HALF)
-		pressure->half -= size;
-	else
-		pressure->full -= size;
+   unsigned size = reg_size(interval->interval.reg);
+   if (interval->interval.reg->flags & IR3_REG_SHARED)
+      pressure->shared -= size;
+   else if (interval->interval.reg->flags & IR3_REG_HALF)
+      pressure->half -= size;
+   else
+      pressure->full -= size;
 }

 static struct ra_spill_interval *
 ir3_reg_interval_to_interval(struct ir3_reg_interval *interval)
 {
-	return rb_node_data(struct ra_spill_interval, interval, interval);
+   return rb_node_data(struct ra_spill_interval, interval, interval);
 }

 static struct ra_spill_ctx *
 ir3_reg_ctx_to_ctx(struct ir3_reg_ctx *ctx)
 {
-	return rb_node_data(struct ra_spill_ctx, ctx, reg_ctx);
+   return rb_node_data(struct ra_spill_ctx, ctx, reg_ctx);
 }

 static void
 interval_add(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval)
 {
-	struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
-	struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);
+   struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
+   struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);

-	ra_pressure_add(&ctx->cur_pressure, interval);
+   ra_pressure_add(&ctx->cur_pressure, interval);
 }

 static void
 interval_delete(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval)
 {
-	struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
-	struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);
+   struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
+   struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);

-	ra_pressure_sub(&ctx->cur_pressure, interval);
+   ra_pressure_sub(&ctx->cur_pressure, interval);
 }

 static void
 interval_readd(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_parent,
-			   struct ir3_reg_interval *_child)
+               struct ir3_reg_interval *_child)
 {
-	interval_add(_ctx, _child);
+   interval_add(_ctx, _child);
 }

 static void
 spill_ctx_init(struct ra_spill_ctx *ctx)
 {
-	rb_tree_init(&ctx->reg_ctx.intervals);
-	ctx->reg_ctx.interval_add = interval_add;
-	ctx->reg_ctx.interval_delete = interval_delete;
-	ctx->reg_ctx.interval_readd = interval_readd;
+   rb_tree_init(&ctx->reg_ctx.intervals);
+   ctx->reg_ctx.interval_add = interval_add;
+   ctx->reg_ctx.interval_delete = interval_delete;
+   ctx->reg_ctx.interval_readd = interval_readd;
 }

 static void
-ra_spill_ctx_insert(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval)
+ra_spill_ctx_insert(struct ra_spill_ctx *ctx,
+                    struct ra_spill_interval *interval)
 {
-	ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval);
+   ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval);
 }

 static void
-ra_spill_ctx_remove(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval)
+ra_spill_ctx_remove(struct ra_spill_ctx *ctx,
+                    struct ra_spill_interval *interval)
 {
-	ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval);
+   ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval);
 }

 static void
 init_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
 {
-	struct ra_spill_interval *interval = &ctx->intervals[dst->name];
-	ra_spill_interval_init(interval, dst);
+   struct ra_spill_interval *interval = &ctx->intervals[dst->name];
+   ra_spill_interval_init(interval, dst);
 }

 static void
 insert_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
 {
-	struct ra_spill_interval *interval = &ctx->intervals[dst->name];
-	if (interval->interval.inserted)
-		return;
+   struct ra_spill_interval *interval = &ctx->intervals[dst->name];
+   if (interval->interval.inserted)
+      return;

-	ra_spill_ctx_insert(ctx, interval);
+   ra_spill_ctx_insert(ctx, interval);

-	/* For precolored inputs, make sure we leave enough registers to allow for
-	 * holes in the inputs. It can happen that the binning shader has a lower
-	 * register pressure than the main shader, but the main shader decided to
-	 * add holes between the inputs which means that the binning shader has a
-	 * higher register demand.
-	 */
-	if (dst->instr->opc == OPC_META_INPUT &&
-		dst->num != INVALID_REG) {
-		physreg_t physreg = ra_reg_get_physreg(dst);
-		physreg_t max = physreg + reg_size(dst);
+   /* For precolored inputs, make sure we leave enough registers to allow for
+    * holes in the inputs. It can happen that the binning shader has a lower
+    * register pressure than the main shader, but the main shader decided to
+    * add holes between the inputs which means that the binning shader has a
+    * higher register demand.
+    */
+   if (dst->instr->opc == OPC_META_INPUT && dst->num != INVALID_REG) {
+      physreg_t physreg = ra_reg_get_physreg(dst);
+      physreg_t max = physreg + reg_size(dst);

-		if (interval->interval.reg->flags & IR3_REG_SHARED)
-			ctx->max_pressure.shared = MAX2(ctx->max_pressure.shared, max);
-		else if (interval->interval.reg->flags & IR3_REG_HALF)
-			ctx->max_pressure.half = MAX2(ctx->max_pressure.half, max);
-		else
-			ctx->max_pressure.full = MAX2(ctx->max_pressure.full, max);
-	}
+      if (interval->interval.reg->flags & IR3_REG_SHARED)
+         ctx->max_pressure.shared = MAX2(ctx->max_pressure.shared, max);
+      else if (interval->interval.reg->flags & IR3_REG_HALF)
+         ctx->max_pressure.half = MAX2(ctx->max_pressure.half, max);
+      else
+         ctx->max_pressure.full = MAX2(ctx->max_pressure.full, max);
+   }
 }

 static void
-remove_src_early(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, struct ir3_register *src)
+remove_src_early(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
+                 struct ir3_register *src)
 {
-	if (!(src->flags & IR3_REG_FIRST_KILL))
-		return;
+   if (!(src->flags & IR3_REG_FIRST_KILL))
+      return;

-	struct ra_spill_interval *interval = &ctx->intervals[src->def->name];
+   struct ra_spill_interval *interval = &ctx->intervals[src->def->name];

-	if (!interval->interval.inserted || interval->interval.parent ||
-		!rb_tree_is_empty(&interval->interval.children))
-		return;
+   if (!interval->interval.inserted || interval->interval.parent ||
+       !rb_tree_is_empty(&interval->interval.children))
+      return;

-	ra_spill_ctx_remove(ctx, interval);
+   ra_spill_ctx_remove(ctx, interval);
 }

 static void
-remove_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, struct ir3_register *src)
+remove_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
+           struct ir3_register *src)
 {
-	if (!(src->flags & IR3_REG_FIRST_KILL))
-		return;
+   if (!(src->flags & IR3_REG_FIRST_KILL))
+      return;

-	struct ra_spill_interval *interval = &ctx->intervals[src->def->name];
+   struct ra_spill_interval *interval = &ctx->intervals[src->def->name];

-	if (!interval->interval.inserted)
-		return;
+   if (!interval->interval.inserted)
+      return;

-	ra_spill_ctx_remove(ctx, interval);
+   ra_spill_ctx_remove(ctx, interval);
 }

 static void
 remove_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
 {
-	struct ra_spill_interval *interval = &ctx->intervals[dst->name];
+   struct ra_spill_interval *interval = &ctx->intervals[dst->name];

-	if (!interval->interval.inserted)
-		return;
+   if (!interval->interval.inserted)
+      return;

-	ra_spill_ctx_remove(ctx, interval);
+   ra_spill_ctx_remove(ctx, interval);
 }

 static void
 update_max_pressure(struct ra_spill_ctx *ctx)
 {
-	d("pressure:");
-	d("\tfull: %u", ctx->cur_pressure.full);
-	d("\thalf: %u", ctx->cur_pressure.half);
-	d("\tshared: %u", ctx->cur_pressure.shared);
+   d("pressure:");
+   d("\tfull: %u", ctx->cur_pressure.full);
+   d("\thalf: %u", ctx->cur_pressure.half);
+   d("\tshared: %u", ctx->cur_pressure.shared);

-	ctx->max_pressure.full =
-		MAX2(ctx->max_pressure.full, ctx->cur_pressure.full);
-	ctx->max_pressure.half =
-		MAX2(ctx->max_pressure.half, ctx->cur_pressure.half);
-	ctx->max_pressure.shared =
-		MAX2(ctx->max_pressure.shared, ctx->cur_pressure.shared);
+   ctx->max_pressure.full =
+      MAX2(ctx->max_pressure.full, ctx->cur_pressure.full);
+   ctx->max_pressure.half =
+      MAX2(ctx->max_pressure.half, ctx->cur_pressure.half);
+   ctx->max_pressure.shared =
+      MAX2(ctx->max_pressure.shared, ctx->cur_pressure.shared);
 }

 static void
 handle_instr(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
 {
-	if (RA_DEBUG) {
-		printf("processing: ");
-		ir3_print_instr(instr);
-	}
+   if (RA_DEBUG) {
+      printf("processing: ");
+      ir3_print_instr(instr);
+   }

-	ra_foreach_dst(dst, instr) {
-		init_dst(ctx, dst);
-	}
+   ra_foreach_dst (dst, instr) {
+      init_dst(ctx, dst);
+   }

-	/* Handle tied destinations. If a destination is tied to a source and that
-	 * source is live-through, then we need to allocate a new register for the
-	 * destination which is live-through itself and cannot overlap the
-	 * sources.
-	 */
+   /* Handle tied destinations. If a destination is tied to a source and that
+    * source is live-through, then we need to allocate a new register for the
+    * destination which is live-through itself and cannot overlap the
+    * sources.
+    */

-	ra_foreach_dst(dst, instr) {
-		struct ir3_register *tied_src = dst->tied;
-		if (tied_src && !(tied_src->flags & IR3_REG_FIRST_KILL))
-			insert_dst(ctx, dst);
-	}
+   ra_foreach_dst (dst, instr) {
+      struct ir3_register *tied_src = dst->tied;
+      if (tied_src && !(tied_src->flags & IR3_REG_FIRST_KILL))
+         insert_dst(ctx, dst);
+   }

-	update_max_pressure(ctx);
+   update_max_pressure(ctx);

-	ra_foreach_src(src, instr) {
-		if (src->flags & IR3_REG_FIRST_KILL)
-			remove_src_early(ctx, instr, src);
-	}
+   ra_foreach_src (src, instr) {
+      if (src->flags & IR3_REG_FIRST_KILL)
+         remove_src_early(ctx, instr, src);
+   }

+   ra_foreach_dst (dst, instr) {
+      insert_dst(ctx, dst);
+   }

-	ra_foreach_dst(dst, instr) {
-		insert_dst(ctx, dst);
-	}
+   update_max_pressure(ctx);

-	update_max_pressure(ctx);
-
-	for (unsigned i = 0; i < instr->srcs_count; i++) {
-		if (ra_reg_is_src(instr->srcs[i]) && 
-			(instr->srcs[i]->flags & IR3_REG_FIRST_KILL))
-			remove_src(ctx, instr, instr->srcs[i]);
-	}
-	for (unsigned i = 0; i < instr->dsts_count; i++) {
-		if (ra_reg_is_dst(instr->dsts[i]) &&
-				 (instr->dsts[i]->flags & IR3_REG_UNUSED))
-			remove_dst(ctx, instr->dsts[i]);
-	}
+   for (unsigned i = 0; i < instr->srcs_count; i++) {
+      if (ra_reg_is_src(instr->srcs[i]) &&
+          (instr->srcs[i]->flags & IR3_REG_FIRST_KILL))
+         remove_src(ctx, instr, instr->srcs[i]);
+   }
+   for (unsigned i = 0; i < instr->dsts_count; i++) {
+      if (ra_reg_is_dst(instr->dsts[i]) &&
+          (instr->dsts[i]->flags & IR3_REG_UNUSED))
+         remove_dst(ctx, instr->dsts[i]);
+   }
 }

 static void
 handle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
 {
-	init_dst(ctx, instr->dsts[0]);
-	insert_dst(ctx, instr->dsts[0]);
+   init_dst(ctx, instr->dsts[0]);
+   insert_dst(ctx, instr->dsts[0]);
 }

 static void
 remove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
 {
-	ra_foreach_src(src, instr)
-		remove_src(ctx, instr, src);
-	if (instr->dsts[0]->flags & IR3_REG_UNUSED)
-		remove_dst(ctx, instr->dsts[0]);
+   ra_foreach_src (src, instr)
+      remove_src(ctx, instr, src);
+   if (instr->dsts[0]->flags & IR3_REG_UNUSED)
+      remove_dst(ctx, instr->dsts[0]);
 }

 static void
 handle_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def)
 {
-	struct ra_spill_interval *interval = &ctx->intervals[def->name];
-	ra_spill_interval_init(interval, def);
-	insert_dst(ctx, def);
+   struct ra_spill_interval *interval = &ctx->intervals[def->name];
+   ra_spill_interval_init(interval, def);
+   insert_dst(ctx, def);
 }

 static void
 handle_block(struct ra_spill_ctx *ctx, struct ir3_block *block)
 {
-	memset(&ctx->cur_pressure, 0, sizeof(ctx->cur_pressure));
-	rb_tree_init(&ctx->reg_ctx.intervals);
+   memset(&ctx->cur_pressure, 0, sizeof(ctx->cur_pressure));
+   rb_tree_init(&ctx->reg_ctx.intervals);

-	unsigned name;
-	BITSET_FOREACH_SET(name, ctx->live->live_in[block->index],
-					   ctx->live->definitions_count) {
-		struct ir3_register *reg = ctx->live->definitions[name];
-		handle_live_in(ctx, reg);
-	}
+   unsigned name;
+   BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
+                       ctx->live->definitions_count) {
+      struct ir3_register *reg = ctx->live->definitions[name];
+      handle_live_in(ctx, reg);
+   }

-	foreach_instr (instr, &block->instr_list) {
-		if (instr->opc != OPC_META_PHI && instr->opc != OPC_META_INPUT &&
-			instr->opc != OPC_META_TEX_PREFETCH)
-			break;
-		handle_input_phi(ctx, instr);
-	}
+   foreach_instr (instr, &block->instr_list) {
+      if (instr->opc != OPC_META_PHI && instr->opc != OPC_META_INPUT &&
+          instr->opc != OPC_META_TEX_PREFETCH)
+         break;
+      handle_input_phi(ctx, instr);
+   }

-	update_max_pressure(ctx);
+   update_max_pressure(ctx);

-	foreach_instr (instr, &block->instr_list) {
-		if (instr->opc == OPC_META_PHI || instr->opc == OPC_META_INPUT ||
-			instr->opc == OPC_META_TEX_PREFETCH)
-			remove_input_phi(ctx, instr);
-		else
-			handle_instr(ctx, instr);
-	}
+   foreach_instr (instr, &block->instr_list) {
+      if (instr->opc == OPC_META_PHI || instr->opc == OPC_META_INPUT ||
+          instr->opc == OPC_META_TEX_PREFETCH)
+         remove_input_phi(ctx, instr);
+      else
+         handle_instr(ctx, instr);
+   }
 }

 void
 ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,
-				  struct ir3_pressure *max_pressure)
+                  struct ir3_pressure *max_pressure)
 {
-	struct ra_spill_ctx ctx = {};
-	ctx.live = live;
-	ctx.intervals = calloc(live->definitions_count, sizeof(*ctx.intervals));
-	ctx.compiler = v->shader->compiler;
-	spill_ctx_init(&ctx);
+   struct ra_spill_ctx ctx = {};
+   ctx.live = live;
+   ctx.intervals = calloc(live->definitions_count, sizeof(*ctx.intervals));
+   ctx.compiler = v->shader->compiler;
+   spill_ctx_init(&ctx);

-	foreach_block (block, &v->ir->block_list) {
-		handle_block(&ctx, block);
-	}
+   foreach_block (block, &v->ir->block_list) {
+      handle_block(&ctx, block);
+   }

-	assert(ctx.cur_pressure.full == 0);
-	assert(ctx.cur_pressure.half == 0);
-	assert(ctx.cur_pressure.shared == 0);
+   assert(ctx.cur_pressure.full == 0);
+   assert(ctx.cur_pressure.half == 0);
+   assert(ctx.cur_pressure.shared == 0);

-	free(ctx.intervals);
+   free(ctx.intervals);

-	*max_pressure = ctx.max_pressure;
+   *max_pressure = ctx.max_pressure;
 }
-
--- a/src/freedreno/ir3/ir3_validate.c
+++ b/src/freedreno/ir3/ir3_validate.c
@ -28,61 +28,64 @@
 #include "ir3.h"

 struct ir3_validate_ctx {
-	struct ir3 *ir;
+   struct ir3 *ir;

-	/* Current instruction being validated: */
-	struct ir3_instruction *current_instr;
+   /* Current instruction being validated: */
+   struct ir3_instruction *current_instr;

-	/* Set of instructions found so far, used to validate that we
-	 * don't have SSA uses that occure before def's
-	 */
-	struct set *defs;
+   /* Set of instructions found so far, used to validate that we
+    * don't have SSA uses that occure before def's
+    */
+   struct set *defs;
 };

 static void
 validate_error(struct ir3_validate_ctx *ctx, const char *condstr)
 {
-	fprintf(stderr, "validation fail: %s\n", condstr);
-	fprintf(stderr, "  -> for instruction: ");
-	ir3_print_instr(ctx->current_instr);
-	abort();
+   fprintf(stderr, "validation fail: %s\n", condstr);
+   fprintf(stderr, "  -> for instruction: ");
+   ir3_print_instr(ctx->current_instr);
+   abort();
 }

-#define validate_assert(ctx, cond) do { \
-	if (!(cond)) { \
-		validate_error(ctx, #cond); \
-	} } while (0)
+#define validate_assert(ctx, cond)                                             \
+   do {                                                                        \
+      if (!(cond)) {                                                           \
+         validate_error(ctx, #cond);                                           \
+      }                                                                        \
+   } while (0)

 static unsigned
 reg_class_flags(struct ir3_register *reg)
 {
-	return reg->flags & (IR3_REG_HALF | IR3_REG_SHARED);
+   return reg->flags & (IR3_REG_HALF | IR3_REG_SHARED);
 }

 static void
 validate_src(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr,
-			 struct ir3_register *reg)
+             struct ir3_register *reg)
 {
-	if (!(reg->flags & IR3_REG_SSA) || !reg->def)
-		return;
+   if (!(reg->flags & IR3_REG_SSA) || !reg->def)
+      return;

-	struct ir3_register *src = reg->def;
+   struct ir3_register *src = reg->def;

-	validate_assert(ctx, _mesa_set_search(ctx->defs, src->instr));
-	validate_assert(ctx, src->wrmask == reg->wrmask);
-	validate_assert(ctx, reg_class_flags(src) == reg_class_flags(reg));
+   validate_assert(ctx, _mesa_set_search(ctx->defs, src->instr));
+   validate_assert(ctx, src->wrmask == reg->wrmask);
+   validate_assert(ctx, reg_class_flags(src) == reg_class_flags(reg));

-	if (reg->tied) {
-		validate_assert(ctx, reg->tied->tied == reg);
-		bool found = false;
-		foreach_dst (dst, instr) {
-			if (dst == reg->tied) {
-				found = true;
-				break;
-			}
-		}
-		validate_assert(ctx, found && "tied register not in the same instruction");
-	}
+   if (reg->tied) {
+      validate_assert(ctx, reg->tied->tied == reg);
+      bool found = false;
+      foreach_dst (dst, instr) {
+         if (dst == reg->tied) {
+            found = true;
+            break;
+         }
+      }
+      validate_assert(ctx,
+                      found && "tied register not in the same instruction");
+   }
 }

 /* phi sources are logically read at the end of the predecessor basic block,
@ -90,275 +93,280 @@ validate_src(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr,
 * use comes after the definition for loop phis.
 */
 static void
-validate_phi_src(struct ir3_validate_ctx *ctx, struct ir3_block *block, struct ir3_block *pred)
+validate_phi_src(struct ir3_validate_ctx *ctx, struct ir3_block *block,
+                 struct ir3_block *pred)
 {
-	unsigned pred_idx = ir3_block_get_pred_index(block, pred);
+   unsigned pred_idx = ir3_block_get_pred_index(block, pred);

-	foreach_instr (phi, &block->instr_list) {
-		if (phi->opc != OPC_META_PHI)
-			break;
+   foreach_instr (phi, &block->instr_list) {
+      if (phi->opc != OPC_META_PHI)
+         break;

-		ctx->current_instr = phi;
-		validate_assert(ctx, phi->srcs_count == block->predecessors_count);
-		validate_src(ctx, phi, phi->srcs[pred_idx]);
-	}
+      ctx->current_instr = phi;
+      validate_assert(ctx, phi->srcs_count == block->predecessors_count);
+      validate_src(ctx, phi, phi->srcs[pred_idx]);
+   }
 }

 static void
 validate_phi(struct ir3_validate_ctx *ctx, struct ir3_instruction *phi)
 {
-	_mesa_set_add(ctx->defs, phi);
-	validate_assert(ctx, phi->dsts_count == 1);
-	validate_assert(ctx, is_dest_gpr(phi->dsts[0]));
+   _mesa_set_add(ctx->defs, phi);
+   validate_assert(ctx, phi->dsts_count == 1);
+   validate_assert(ctx, is_dest_gpr(phi->dsts[0]));
 }

 static void
 validate_dst(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr,
-			 struct ir3_register *reg)
+             struct ir3_register *reg)
 {
-	if (reg->tied) {
-		validate_assert(ctx, reg->tied->tied == reg);
-		validate_assert(ctx, reg_class_flags(reg->tied) == reg_class_flags(reg));
-		validate_assert(ctx, reg->tied->wrmask == reg->wrmask);
-		if (reg->flags & IR3_REG_ARRAY) {
-			validate_assert(ctx, reg->tied->array.base == reg->array.base);
-			validate_assert(ctx, reg->tied->size == reg->size);
-		}
-		bool found = false;
-		foreach_src (src, instr) {
-			if (src == reg->tied) {
-				found = true;
-				break;
-			}
-		}
-		validate_assert(ctx, found && "tied register not in the same instruction");
-	}
+   if (reg->tied) {
+      validate_assert(ctx, reg->tied->tied == reg);
+      validate_assert(ctx, reg_class_flags(reg->tied) == reg_class_flags(reg));
+      validate_assert(ctx, reg->tied->wrmask == reg->wrmask);
+      if (reg->flags & IR3_REG_ARRAY) {
+         validate_assert(ctx, reg->tied->array.base == reg->array.base);
+         validate_assert(ctx, reg->tied->size == reg->size);
+      }
+      bool found = false;
+      foreach_src (src, instr) {
+         if (src == reg->tied) {
+            found = true;
+            break;
+         }
+      }
+      validate_assert(ctx,
+                      found && "tied register not in the same instruction");
+   }

-	if (reg->flags & IR3_REG_SSA)
-		validate_assert(ctx, reg->instr == instr);
+   if (reg->flags & IR3_REG_SSA)
+      validate_assert(ctx, reg->instr == instr);

-	if (reg->flags & IR3_REG_RELATIV)
-		validate_assert(ctx, instr->address);
+   if (reg->flags & IR3_REG_RELATIV)
+      validate_assert(ctx, instr->address);
 }

-#define validate_reg_size(ctx, reg, type) \
-	validate_assert(ctx, type_size(type) == (((reg)->flags & IR3_REG_HALF) ? 16 : 32))
+#define validate_reg_size(ctx, reg, type)                                      \
+   validate_assert(                                                            \
+      ctx, type_size(type) == (((reg)->flags & IR3_REG_HALF) ? 16 : 32))

 static void
 validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
 {
-	struct ir3_register *last_reg = NULL;
+   struct ir3_register *last_reg = NULL;

-	foreach_src_n (reg, n, instr) {
-		if (reg->flags & IR3_REG_RELATIV)
-			validate_assert(ctx, instr->address);
+   foreach_src_n (reg, n, instr) {
+      if (reg->flags & IR3_REG_RELATIV)
+         validate_assert(ctx, instr->address);

-		validate_src(ctx, instr, reg);
+      validate_src(ctx, instr, reg);

-		/* Validate that all src's are either half of full.
-		 *
-		 * Note: tex instructions w/ .s2en are a bit special in that the
-		 * tex/samp src reg is half-reg for non-bindless and full for
-		 * bindless, irrespective of the precision of other srcs. The
-		 * tex/samp src is the first src reg when .s2en is set
-		 */
-		if (reg->tied) {
-			/* must have the same size as the destination, handled in
-			 * validate_reg().
-			 */
-		} else if (reg == instr->address) {
-			validate_assert(ctx, reg->flags & IR3_REG_HALF);
-		} else if ((instr->flags & IR3_INSTR_S2EN) && (n < 2)) {
-			if (n == 0) {
-				if (instr->flags & IR3_INSTR_B)
-					validate_assert(ctx, !(reg->flags & IR3_REG_HALF));
-				else
-					validate_assert(ctx, reg->flags & IR3_REG_HALF);
-			}
-		} else if (opc_cat(instr->opc) == 6) {
-			/* handled below */
-		} else if (opc_cat(instr->opc) == 0) {
-			/* end/chmask/etc are allowed to have different size sources */
-		} else if (n > 0) {
-			validate_assert(ctx, (last_reg->flags & IR3_REG_HALF) == (reg->flags & IR3_REG_HALF));
-		}
+      /* Validate that all src's are either half of full.
+       *
+       * Note: tex instructions w/ .s2en are a bit special in that the
+       * tex/samp src reg is half-reg for non-bindless and full for
+       * bindless, irrespective of the precision of other srcs. The
+       * tex/samp src is the first src reg when .s2en is set
+       */
+      if (reg->tied) {
+         /* must have the same size as the destination, handled in
+          * validate_reg().
+          */
+      } else if (reg == instr->address) {
+         validate_assert(ctx, reg->flags & IR3_REG_HALF);
+      } else if ((instr->flags & IR3_INSTR_S2EN) && (n < 2)) {
+         if (n == 0) {
+            if (instr->flags & IR3_INSTR_B)
+               validate_assert(ctx, !(reg->flags & IR3_REG_HALF));
+            else
+               validate_assert(ctx, reg->flags & IR3_REG_HALF);
+         }
+      } else if (opc_cat(instr->opc) == 6) {
+         /* handled below */
+      } else if (opc_cat(instr->opc) == 0) {
+         /* end/chmask/etc are allowed to have different size sources */
+      } else if (n > 0) {
+         validate_assert(ctx, (last_reg->flags & IR3_REG_HALF) ==
+                                 (reg->flags & IR3_REG_HALF));
+      }

-		last_reg = reg;
-	}
+      last_reg = reg;
+   }

-	for (unsigned i = 0; i < instr->dsts_count; i++) {
-		struct ir3_register *reg = instr->dsts[i];
+   for (unsigned i = 0; i < instr->dsts_count; i++) {
+      struct ir3_register *reg = instr->dsts[i];

-		validate_dst(ctx, instr, reg);
-	}
+      validate_dst(ctx, instr, reg);
+   }

-	_mesa_set_add(ctx->defs, instr);
+   _mesa_set_add(ctx->defs, instr);

-	/* Check that src/dst types match the register types, and for
-	 * instructions that have different opcodes depending on type,
-	 * that the opcodes are correct.
-	 */
-	switch (opc_cat(instr->opc)) {
-	case 1: /* move instructions */
-		if (instr->opc == OPC_MOVMSK || instr->opc == OPC_BALLOT_MACRO) {
-			validate_assert(ctx, instr->dsts_count == 1);
-			validate_assert(ctx, instr->dsts[0]->flags & IR3_REG_SHARED);
-			validate_assert(ctx, !(instr->dsts[0]->flags & IR3_REG_HALF));
-			validate_assert(ctx, util_is_power_of_two_or_zero(instr->dsts[0]->wrmask + 1));
-		} else if (instr->opc == OPC_ANY_MACRO || instr->opc == OPC_ALL_MACRO ||
-				   instr->opc == OPC_READ_FIRST_MACRO ||
-				   instr->opc == OPC_READ_COND_MACRO) {
-			/* nothing yet */
-		} else if (instr->opc == OPC_ELECT_MACRO) {
-			validate_assert(ctx, instr->dsts_count == 1);
-			validate_assert(ctx, !(instr->dsts[0]->flags & IR3_REG_SHARED));
-		} else {
-			foreach_dst (dst, instr)
-				validate_reg_size(ctx, dst, instr->cat1.dst_type);
-			foreach_src (src, instr) {
-				if (!src->tied && src != instr->address)
-					validate_reg_size(ctx, src, instr->cat1.src_type);
-			}
+   /* Check that src/dst types match the register types, and for
+    * instructions that have different opcodes depending on type,
+    * that the opcodes are correct.
+    */
+   switch (opc_cat(instr->opc)) {
+   case 1: /* move instructions */
+      if (instr->opc == OPC_MOVMSK || instr->opc == OPC_BALLOT_MACRO) {
+         validate_assert(ctx, instr->dsts_count == 1);
+         validate_assert(ctx, instr->dsts[0]->flags & IR3_REG_SHARED);
+         validate_assert(ctx, !(instr->dsts[0]->flags & IR3_REG_HALF));
+         validate_assert(
+            ctx, util_is_power_of_two_or_zero(instr->dsts[0]->wrmask + 1));
+      } else if (instr->opc == OPC_ANY_MACRO || instr->opc == OPC_ALL_MACRO ||
+                 instr->opc == OPC_READ_FIRST_MACRO ||
+                 instr->opc == OPC_READ_COND_MACRO) {
+         /* nothing yet */
+      } else if (instr->opc == OPC_ELECT_MACRO) {
+         validate_assert(ctx, instr->dsts_count == 1);
+         validate_assert(ctx, !(instr->dsts[0]->flags & IR3_REG_SHARED));
+      } else {
+         foreach_dst (dst, instr)
+            validate_reg_size(ctx, dst, instr->cat1.dst_type);
+         foreach_src (src, instr) {
+            if (!src->tied && src != instr->address)
+               validate_reg_size(ctx, src, instr->cat1.src_type);
+         }

-			switch (instr->opc) {
-				case OPC_SWZ:
-					validate_assert(ctx, instr->srcs_count == 2);
-					validate_assert(ctx, instr->dsts_count == 2);
-					break;
-				case OPC_GAT:
-					validate_assert(ctx, instr->srcs_count == 4);
-					validate_assert(ctx, instr->dsts_count == 1);
-					break;
-				case OPC_SCT:
-					validate_assert(ctx, instr->srcs_count == 1);
-					validate_assert(ctx, instr->dsts_count == 4);
-					break;
-				default:
-					break;
-			}
-		}
+         switch (instr->opc) {
+         case OPC_SWZ:
+            validate_assert(ctx, instr->srcs_count == 2);
+            validate_assert(ctx, instr->dsts_count == 2);
+            break;
+         case OPC_GAT:
+            validate_assert(ctx, instr->srcs_count == 4);
+            validate_assert(ctx, instr->dsts_count == 1);
+            break;
+         case OPC_SCT:
+            validate_assert(ctx, instr->srcs_count == 1);
+            validate_assert(ctx, instr->dsts_count == 4);
+            break;
+         default:
+            break;
+         }
+      }

-		if (instr->opc != OPC_MOV)
-			validate_assert(ctx, !instr->address);
+      if (instr->opc != OPC_MOV)
+         validate_assert(ctx, !instr->address);

-		break;
-	case 3:
-		/* Validate that cat3 opc matches the src type.  We've already checked that all
-		 * the src regs are same type
-		 */
-		if (instr->srcs[0]->flags & IR3_REG_HALF) {
-			validate_assert(ctx, instr->opc == cat3_half_opc(instr->opc));
-		} else {
-			validate_assert(ctx, instr->opc == cat3_full_opc(instr->opc));
-		}
-		break;
-	case 4:
-		/* Validate that cat4 opc matches the dst type: */
-		if (instr->dsts[0]->flags & IR3_REG_HALF) {
-			validate_assert(ctx, instr->opc == cat4_half_opc(instr->opc));
-		} else {
-			validate_assert(ctx, instr->opc == cat4_full_opc(instr->opc));
-		}
-		break;
-	case 5:
-		validate_reg_size(ctx, instr->dsts[0], instr->cat5.type);
-		break;
-	case 6:
-		switch (instr->opc) {
-		case OPC_RESINFO:
-		case OPC_RESFMT:
-			validate_reg_size(ctx, instr->dsts[0], instr->cat6.type);
-			validate_reg_size(ctx, instr->srcs[0], instr->cat6.type);
-			break;
-		case OPC_L2G:
-		case OPC_G2L:
-			validate_assert(ctx, !(instr->dsts[0]->flags & IR3_REG_HALF));
-			validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
-			break;
-		case OPC_STG:
-			validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
-			validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
-			validate_reg_size(ctx, instr->srcs[2], instr->cat6.type);
-			validate_assert(ctx, !(instr->srcs[3]->flags & IR3_REG_HALF));
-			break;
-		case OPC_STG_A:
-			validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
-			validate_assert(ctx, !(instr->srcs[2]->flags & IR3_REG_HALF));
-			validate_assert(ctx, !(instr->srcs[3]->flags & IR3_REG_HALF));
-			validate_reg_size(ctx, instr->srcs[4], instr->cat6.type);
-			validate_assert(ctx, !(instr->srcs[5]->flags & IR3_REG_HALF));
-			break;
-		case OPC_STL:
-		case OPC_STP:
-		case OPC_STLW:
-			validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
-			validate_reg_size(ctx, instr->srcs[1], instr->cat6.type);
-			validate_assert(ctx, !(instr->srcs[2]->flags & IR3_REG_HALF));
-			break;
-		case OPC_STIB:
-			if (instr->flags & IR3_INSTR_B) {
-				validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
-				validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
-				validate_reg_size(ctx, instr->srcs[2], instr->cat6.type);
-			} else {
-				validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
-				validate_reg_size(ctx, instr->srcs[1], instr->cat6.type);
-				validate_assert(ctx, !(instr->srcs[2]->flags & IR3_REG_HALF));
-			}
-			break;
-		default:
-			validate_reg_size(ctx, instr->dsts[0], instr->cat6.type);
-			validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
-			if (instr->srcs_count > 1)
-				validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
-			break;
-		}
-	}
+      break;
+   case 3:
+      /* Validate that cat3 opc matches the src type.  We've already checked
+       * that all the src regs are same type
+       */
+      if (instr->srcs[0]->flags & IR3_REG_HALF) {
+         validate_assert(ctx, instr->opc == cat3_half_opc(instr->opc));
+      } else {
+         validate_assert(ctx, instr->opc == cat3_full_opc(instr->opc));
+      }
+      break;
+   case 4:
+      /* Validate that cat4 opc matches the dst type: */
+      if (instr->dsts[0]->flags & IR3_REG_HALF) {
+         validate_assert(ctx, instr->opc == cat4_half_opc(instr->opc));
+      } else {
+         validate_assert(ctx, instr->opc == cat4_full_opc(instr->opc));
+      }
+      break;
+   case 5:
+      validate_reg_size(ctx, instr->dsts[0], instr->cat5.type);
+      break;
+   case 6:
+      switch (instr->opc) {
+      case OPC_RESINFO:
+      case OPC_RESFMT:
+         validate_reg_size(ctx, instr->dsts[0], instr->cat6.type);
+         validate_reg_size(ctx, instr->srcs[0], instr->cat6.type);
+         break;
+      case OPC_L2G:
+      case OPC_G2L:
+         validate_assert(ctx, !(instr->dsts[0]->flags & IR3_REG_HALF));
+         validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
+         break;
+      case OPC_STG:
+         validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
+         validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
+         validate_reg_size(ctx, instr->srcs[2], instr->cat6.type);
+         validate_assert(ctx, !(instr->srcs[3]->flags & IR3_REG_HALF));
+         break;
+      case OPC_STG_A:
+         validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
+         validate_assert(ctx, !(instr->srcs[2]->flags & IR3_REG_HALF));
+         validate_assert(ctx, !(instr->srcs[3]->flags & IR3_REG_HALF));
+         validate_reg_size(ctx, instr->srcs[4], instr->cat6.type);
+         validate_assert(ctx, !(instr->srcs[5]->flags & IR3_REG_HALF));
+         break;
+      case OPC_STL:
+      case OPC_STP:
+      case OPC_STLW:
+         validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
+         validate_reg_size(ctx, instr->srcs[1], instr->cat6.type);
+         validate_assert(ctx, !(instr->srcs[2]->flags & IR3_REG_HALF));
+         break;
+      case OPC_STIB:
+         if (instr->flags & IR3_INSTR_B) {
+            validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
+            validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
+            validate_reg_size(ctx, instr->srcs[2], instr->cat6.type);
+         } else {
+            validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
+            validate_reg_size(ctx, instr->srcs[1], instr->cat6.type);
+            validate_assert(ctx, !(instr->srcs[2]->flags & IR3_REG_HALF));
+         }
+         break;
+      default:
+         validate_reg_size(ctx, instr->dsts[0], instr->cat6.type);
+         validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
+         if (instr->srcs_count > 1)
+            validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
+         break;
+      }
+   }
 }

 void
 ir3_validate(struct ir3 *ir)
 {
 #ifdef NDEBUG
-#  define VALIDATE 0
+#define VALIDATE 0
 #else
-#  define VALIDATE 1
+#define VALIDATE 1
 #endif

-	if (!VALIDATE)
-		return;
+   if (!VALIDATE)
+      return;

-	struct ir3_validate_ctx *ctx = ralloc_size(NULL, sizeof(*ctx));
+   struct ir3_validate_ctx *ctx = ralloc_size(NULL, sizeof(*ctx));

-	ctx->ir = ir;
-	ctx->defs = _mesa_pointer_set_create(ctx);
+   ctx->ir = ir;
+   ctx->defs = _mesa_pointer_set_create(ctx);

-	foreach_block (block, &ir->block_list) {
-		/* We require that the first block does not have any predecessors,
-		 * which allows us to assume that phi nodes and meta:input's do not
-		 * appear in the same basic block.
-		 */
-		validate_assert(ctx,
-				block != ir3_start_block(ir) || block->predecessors_count == 0);
+   foreach_block (block, &ir->block_list) {
+      /* We require that the first block does not have any predecessors,
+       * which allows us to assume that phi nodes and meta:input's do not
+       * appear in the same basic block.
+       */
+      validate_assert(
+         ctx, block != ir3_start_block(ir) || block->predecessors_count == 0);

-		struct ir3_instruction *prev = NULL;
-		foreach_instr (instr, &block->instr_list) {
-			ctx->current_instr = instr;
-			if (instr->opc == OPC_META_PHI) {
-				/* phis must be the first in the block */
-				validate_assert(ctx, prev == NULL || prev->opc == OPC_META_PHI);
-				validate_phi(ctx, instr);
-			} else {
-				validate_instr(ctx, instr);
-			}
-			prev = instr;
-		}
+      struct ir3_instruction *prev = NULL;
+      foreach_instr (instr, &block->instr_list) {
+         ctx->current_instr = instr;
+         if (instr->opc == OPC_META_PHI) {
+            /* phis must be the first in the block */
+            validate_assert(ctx, prev == NULL || prev->opc == OPC_META_PHI);
+            validate_phi(ctx, instr);
+         } else {
+            validate_instr(ctx, instr);
+         }
+         prev = instr;
+      }

-		for (unsigned i = 0; i < 2; i++) {
-			if (block->successors[i])
-				validate_phi_src(ctx, block->successors[i], block);
-		}
-	}
+      for (unsigned i = 0; i < 2; i++) {
+         if (block->successors[i])
+            validate_phi_src(ctx, block->successors[i], block);
+      }
+   }

-	ralloc_free(ctx);
+   ralloc_free(ctx);
 }
--- a/src/freedreno/ir3/regmask.h
+++ b/src/freedreno/ir3/regmask.h
@ -32,100 +32,100 @@
 typedef BITSET_DECLARE(regmaskstate_t, 2 * MAX_REG);

 typedef struct {
-	bool mergedregs;
-	regmaskstate_t mask;
+   bool mergedregs;
+   regmaskstate_t mask;
 } regmask_t;

 static inline bool
 __regmask_get(regmask_t *regmask, bool half, unsigned n)
 {
-	if (regmask->mergedregs) {
-		/* a6xx+ case, with merged register file, we track things in terms
-		 * of half-precision registers, with a full precisions register
-		 * using two half-precision slots:
-		 */
-		if (half) {
-			return BITSET_TEST(regmask->mask, n);
-		} else {
-			n *= 2;
-			return BITSET_TEST(regmask->mask, n) ||
-				BITSET_TEST(regmask->mask, n+1);
-		}
-	} else {
-		/* pre a6xx case, with separate register file for half and full
-		 * precision:
-		 */
-		if (half)
-			n += MAX_REG;
-		return BITSET_TEST(regmask->mask, n);
-	}
+   if (regmask->mergedregs) {
+      /* a6xx+ case, with merged register file, we track things in terms
+       * of half-precision registers, with a full precisions register
+       * using two half-precision slots:
+       */
+      if (half) {
+         return BITSET_TEST(regmask->mask, n);
+      } else {
+         n *= 2;
+         return BITSET_TEST(regmask->mask, n) ||
+                BITSET_TEST(regmask->mask, n + 1);
+      }
+   } else {
+      /* pre a6xx case, with separate register file for half and full
+       * precision:
+       */
+      if (half)
+         n += MAX_REG;
+      return BITSET_TEST(regmask->mask, n);
+   }
 }

 static inline void
 __regmask_set(regmask_t *regmask, bool half, unsigned n)
 {
-	if (regmask->mergedregs) {
-		/* a6xx+ case, with merged register file, we track things in terms
-		 * of half-precision registers, with a full precisions register
-		 * using two half-precision slots:
-		 */
-		if (half) {
-			BITSET_SET(regmask->mask, n);
-		} else {
-			n *= 2;
-			BITSET_SET(regmask->mask, n);
-			BITSET_SET(regmask->mask, n+1);
-		}
-	} else {
-		/* pre a6xx case, with separate register file for half and full
-		 * precision:
-		 */
-		if (half)
-			n += MAX_REG;
-		BITSET_SET(regmask->mask, n);
-	}
+   if (regmask->mergedregs) {
+      /* a6xx+ case, with merged register file, we track things in terms
+       * of half-precision registers, with a full precisions register
+       * using two half-precision slots:
+       */
+      if (half) {
+         BITSET_SET(regmask->mask, n);
+      } else {
+         n *= 2;
+         BITSET_SET(regmask->mask, n);
+         BITSET_SET(regmask->mask, n + 1);
+      }
+   } else {
+      /* pre a6xx case, with separate register file for half and full
+       * precision:
+       */
+      if (half)
+         n += MAX_REG;
+      BITSET_SET(regmask->mask, n);
+   }
 }

 static inline void
 __regmask_clear(regmask_t *regmask, bool half, unsigned n)
 {
-	if (regmask->mergedregs) {
-		/* a6xx+ case, with merged register file, we track things in terms
-		 * of half-precision registers, with a full precisions register
-		 * using two half-precision slots:
-		 */
-		if (half) {
-			BITSET_CLEAR(regmask->mask, n);
-		} else {
-			n *= 2;
-			BITSET_CLEAR(regmask->mask, n);
-			BITSET_CLEAR(regmask->mask, n+1);
-		}
-	} else {
-		/* pre a6xx case, with separate register file for half and full
-		 * precision:
-		 */
-		if (half)
-			n += MAX_REG;
-		BITSET_CLEAR(regmask->mask, n);
-	}
+   if (regmask->mergedregs) {
+      /* a6xx+ case, with merged register file, we track things in terms
+       * of half-precision registers, with a full precisions register
+       * using two half-precision slots:
+       */
+      if (half) {
+         BITSET_CLEAR(regmask->mask, n);
+      } else {
+         n *= 2;
+         BITSET_CLEAR(regmask->mask, n);
+         BITSET_CLEAR(regmask->mask, n + 1);
+      }
+   } else {
+      /* pre a6xx case, with separate register file for half and full
+       * precision:
+       */
+      if (half)
+         n += MAX_REG;
+      BITSET_CLEAR(regmask->mask, n);
+   }
 }

 static inline void
 regmask_init(regmask_t *regmask, bool mergedregs)
 {
-	memset(&regmask->mask, 0, sizeof(regmask->mask));
-	regmask->mergedregs = mergedregs;
+   memset(&regmask->mask, 0, sizeof(regmask->mask));
+   regmask->mergedregs = mergedregs;
 }

 static inline void
 regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
 {
-	assert(dst->mergedregs == a->mergedregs);
-	assert(dst->mergedregs == b->mergedregs);
+   assert(dst->mergedregs == a->mergedregs);
+   assert(dst->mergedregs == b->mergedregs);

-	for (unsigned i = 0; i < ARRAY_SIZE(dst->mask); i++)
-		dst->mask[i] = a->mask[i] | b->mask[i];
+   for (unsigned i = 0; i < ARRAY_SIZE(dst->mask); i++)
+      dst->mask[i] = a->mask[i] | b->mask[i];
 }

 #endif /* REGMASK_H_ */
--- a/src/freedreno/ir3/tests/delay.c
+++ b/src/freedreno/ir3/tests/delay.c
@ -42,8 +42,8 @@
 /* clang-format on */

 static const struct test {
-	const char *asmstr;
-	unsigned expected_delay;
+   const char *asmstr;
+   unsigned expected_delay;
 } tests[] = {
   /* clang-format off */
   TEST(6,
@ -101,16 +101,16 @@ static const struct test {
 static struct ir3_shader *
 parse_asm(struct ir3_compiler *c, const char *asmstr)
 {
-	struct ir3_kernel_info info = {};
-	FILE *in = fmemopen((void *)asmstr, strlen(asmstr), "r");
-	struct ir3_shader *shader = ir3_parse_asm(c, &info, in);
+   struct ir3_kernel_info info = {};
+   FILE *in = fmemopen((void *)asmstr, strlen(asmstr), "r");
+   struct ir3_shader *shader = ir3_parse_asm(c, &info, in);

-	fclose(in);
+   fclose(in);

-	if (!shader)
-		errx(-1, "assembler failed");
+   if (!shader)
+      errx(-1, "assembler failed");

-	return shader;
+   return shader;
 }

 /**
@ -124,71 +124,70 @@ parse_asm(struct ir3_compiler *c, const char *asmstr)
 static void
 fixup_wrmask(struct ir3 *ir)
 {
-	struct ir3_block *block = ir3_start_block(ir);
+   struct ir3_block *block = ir3_start_block(ir);

-	foreach_instr_safe (instr, &block->instr_list) {
-		instr->dsts[0]->wrmask = MASK(instr->repeat + 1);
-		foreach_src (reg, instr) {
-			if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
-				continue;
+   foreach_instr_safe (instr, &block->instr_list) {
+      instr->dsts[0]->wrmask = MASK(instr->repeat + 1);
+      foreach_src (reg, instr) {
+         if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
+            continue;

-			if (reg->flags & IR3_REG_R)
-				reg->wrmask = MASK(instr->repeat + 1);
-			else
-				reg->wrmask = 1;
-		}
-	}
+         if (reg->flags & IR3_REG_R)
+            reg->wrmask = MASK(instr->repeat + 1);
+         else
+            reg->wrmask = 1;
+      }
+   }
 }

-
 int
 main(int argc, char **argv)
 {
-	struct ir3_compiler *c;
-	int result = 0;
+   struct ir3_compiler *c;
+   int result = 0;

-	c = ir3_compiler_create(NULL, 630, false);
+   c = ir3_compiler_create(NULL, 630, false);

-	for (int i = 0; i < ARRAY_SIZE(tests); i++) {
-		const struct test *test = &tests[i];
-		struct ir3_shader *shader = parse_asm(c, test->asmstr);
-		struct ir3 *ir = shader->variants->ir;
+   for (int i = 0; i < ARRAY_SIZE(tests); i++) {
+      const struct test *test = &tests[i];
+      struct ir3_shader *shader = parse_asm(c, test->asmstr);
+      struct ir3 *ir = shader->variants->ir;

-		fixup_wrmask(ir);
+      fixup_wrmask(ir);

-		ir3_debug_print(ir, "AFTER fixup_wrmask");
+      ir3_debug_print(ir, "AFTER fixup_wrmask");

-		struct ir3_block *block =
-			list_first_entry(&ir->block_list, struct ir3_block, node);
-		struct ir3_instruction *last = NULL;
+      struct ir3_block *block =
+         list_first_entry(&ir->block_list, struct ir3_block, node);
+      struct ir3_instruction *last = NULL;

-		foreach_instr_rev (instr, &block->instr_list) {
-			if (is_meta(instr))
-				continue;
-			last = instr;
-			break;
-		}
+      foreach_instr_rev (instr, &block->instr_list) {
+         if (is_meta(instr))
+            continue;
+         last = instr;
+         break;
+      }

-		/* The delay calc is expecting the instr to not yet be added to the
-		 * block, so remove it from the block so that it doesn't get counted
-		 * in the distance from assigner:
-		 */
-		list_delinit(&last->node);
+      /* The delay calc is expecting the instr to not yet be added to the
+       * block, so remove it from the block so that it doesn't get counted
+       * in the distance from assigner:
+       */
+      list_delinit(&last->node);

-		unsigned n = ir3_delay_calc_exact(block, last, true);
+      unsigned n = ir3_delay_calc_exact(block, last, true);

-		if (n != test->expected_delay) {
-			printf("%d: FAIL: Expected delay %u, but got %u, for:\n%s\n",
-				i, test->expected_delay, n, test->asmstr);
-			result = -1;
-		} else {
-			printf("%d: PASS\n", i);
-		}
+      if (n != test->expected_delay) {
+         printf("%d: FAIL: Expected delay %u, but got %u, for:\n%s\n", i,
+                test->expected_delay, n, test->asmstr);
+         result = -1;
+      } else {
+         printf("%d: PASS\n", i);
+      }

-		ir3_shader_destroy(shader);
-	}
+      ir3_shader_destroy(shader);
+   }

-	ir3_compiler_destroy(c);
+   ir3_compiler_destroy(c);

-	return result;
+   return result;
 }
--- a/src/freedreno/ir3/tests/disasm.c
+++ b/src/freedreno/ir3/tests/disasm.c
@ -48,15 +48,16 @@
 /* clang-format on */

 static const struct test {
-	int gpu_id;
-	const char *instr;
-	const char *expected;
-	/**
-	 * Do we expect asm parse fail (ie. for things not (yet) supported by ir3_parser.y)
-	 */
-	bool parse_fail;
+   int gpu_id;
+   const char *instr;
+   const char *expected;
+   /**
+    * Do we expect asm parse fail (ie. for things not (yet) supported by
+    * ir3_parser.y)
+    */
+   bool parse_fail;
 } tests[] = {
-/* clang-format off */
+   /* clang-format off */
 	/* cat0 */
 	INSTR_6XX(00000000_00000000, "nop"),
 	INSTR_6XX(00000200_00000000, "(rpt2)nop"),
@ -351,128 +352,132 @@ static const struct test {
   INSTR_6XX(e0fa0000_00000000, "fence.g.l.r.w"),
   INSTR_6XX(e09a0000_00000000, "fence.r.w"),
   INSTR_6XX(f0420000_00000000, "(sy)bar.g"),
-/* clang-format on */
+   /* clang-format on */
 };

 static void
 trim(char *string)
 {
-	for (int len = strlen(string); len > 0 && string[len - 1] == '\n'; len--)
-		string[len - 1] = 0;
+   for (int len = strlen(string); len > 0 && string[len - 1] == '\n'; len--)
+      string[len - 1] = 0;
 }

 int
 main(int argc, char **argv)
 {
-	int retval = 0;
-	int decode_fails = 0, asm_fails = 0, encode_fails = 0;
-	const int output_size = 4096;
-	char *disasm_output = malloc(output_size);
-	FILE *fdisasm = fmemopen(disasm_output, output_size, "w+");
-	if (!fdisasm) {
-		fprintf(stderr, "failed to fmemopen\n");
-		return 1;
-	}
+   int retval = 0;
+   int decode_fails = 0, asm_fails = 0, encode_fails = 0;
+   const int output_size = 4096;
+   char *disasm_output = malloc(output_size);
+   FILE *fdisasm = fmemopen(disasm_output, output_size, "w+");
+   if (!fdisasm) {
+      fprintf(stderr, "failed to fmemopen\n");
+      return 1;
+   }

-	struct ir3_compiler *compilers[10] = {};
+   struct ir3_compiler *compilers[10] = {};

-	for (int i = 0; i < ARRAY_SIZE(tests); i++) {
-		const struct test *test = &tests[i];
-		printf("Testing a%d %s: \"%s\"...\n",
-				test->gpu_id, test->instr, test->expected);
+   for (int i = 0; i < ARRAY_SIZE(tests); i++) {
+      const struct test *test = &tests[i];
+      printf("Testing a%d %s: \"%s\"...\n", test->gpu_id, test->instr,
+             test->expected);

-		rewind(fdisasm);
-		memset(disasm_output, 0, output_size);
+      rewind(fdisasm);
+      memset(disasm_output, 0, output_size);

-		/*
-		 * Test disassembly:
-		 */
+      /*
+       * Test disassembly:
+       */

-		uint32_t code[2] = {
-			strtoll(&test->instr[9], NULL, 16),
-			strtoll(&test->instr[0], NULL, 16),
-		};
-		isa_decode(code, 8, fdisasm, &(struct isa_decode_options){
-			.gpu_id = test->gpu_id,
-			.show_errors = true,
-		});
-		fflush(fdisasm);
+      uint32_t code[2] = {
+         strtoll(&test->instr[9], NULL, 16),
+         strtoll(&test->instr[0], NULL, 16),
+      };
+      isa_decode(code, 8, fdisasm,
+                 &(struct isa_decode_options){
+                    .gpu_id = test->gpu_id,
+                    .show_errors = true,
+                 });
+      fflush(fdisasm);

-		trim(disasm_output);
+      trim(disasm_output);

-		if (strcmp(disasm_output, test->expected) != 0) {
-			printf("FAIL: disasm\n");
-			printf("  Expected: \"%s\"\n", test->expected);
-			printf("  Got:      \"%s\"\n", disasm_output);
-			retval = 1;
-			decode_fails++;
-			continue;
-		}
+      if (strcmp(disasm_output, test->expected) != 0) {
+         printf("FAIL: disasm\n");
+         printf("  Expected: \"%s\"\n", test->expected);
+         printf("  Got:      \"%s\"\n", disasm_output);
+         retval = 1;
+         decode_fails++;
+         continue;
+      }

-		/*
-		 * Test assembly, which should result in the identical binary:
-		 */
+      /*
+       * Test assembly, which should result in the identical binary:
+       */

-		unsigned gen = test->gpu_id / 100;
-		if (!compilers[gen]) {
-			compilers[gen] = ir3_compiler_create(NULL, test->gpu_id, false);
-		}
+      unsigned gen = test->gpu_id / 100;
+      if (!compilers[gen]) {
+         compilers[gen] = ir3_compiler_create(NULL, test->gpu_id, false);
+      }

-		FILE *fasm = fmemopen((void *)test->expected, strlen(test->expected), "r");
+      FILE *fasm =
+         fmemopen((void *)test->expected, strlen(test->expected), "r");

-		struct ir3_kernel_info info = {};
-		struct ir3_shader *shader = ir3_parse_asm(compilers[gen], &info, fasm);
-		fclose(fasm);
-		if (!shader) {
-			printf("FAIL: %sexpected assembler fail\n", test->parse_fail ? "" : "un");
-			asm_fails++;
-			/* If this is an instruction that the asm parser is not expected
-			 * to handle, don't count it as a fail.
-			 */
-			if (!test->parse_fail)
-				retval = 1;
-			continue;
-		} else if (test->parse_fail) {
-			/* If asm parse starts passing, and we don't expect that, flag
-			 * it as a fail so we don't forget to update the test vector:
-			 */
-			printf("FAIL: unexpected parse success, please remove '.parse_fail=true'\n");
-			retval = 1;
-		}
+      struct ir3_kernel_info info = {};
+      struct ir3_shader *shader = ir3_parse_asm(compilers[gen], &info, fasm);
+      fclose(fasm);
+      if (!shader) {
+         printf("FAIL: %sexpected assembler fail\n",
+                test->parse_fail ? "" : "un");
+         asm_fails++;
+         /* If this is an instruction that the asm parser is not expected
+          * to handle, don't count it as a fail.
+          */
+         if (!test->parse_fail)
+            retval = 1;
+         continue;
+      } else if (test->parse_fail) {
+         /* If asm parse starts passing, and we don't expect that, flag
+          * it as a fail so we don't forget to update the test vector:
+          */
+         printf(
+            "FAIL: unexpected parse success, please remove '.parse_fail=true'\n");
+         retval = 1;
+      }

-		struct ir3_shader_variant *v = shader->variants;
-		if (memcmp(v->bin, code, sizeof(code))) {
-			printf("FAIL: assembler\n");
-			printf("  Expected: %08x_%08x\n", code[1], code[0]);
-			printf("  Got:      %08x_%08x\n", v->bin[1], v->bin[0]);
-			retval = 1;
-			encode_fails++;
-		}
+      struct ir3_shader_variant *v = shader->variants;
+      if (memcmp(v->bin, code, sizeof(code))) {
+         printf("FAIL: assembler\n");
+         printf("  Expected: %08x_%08x\n", code[1], code[0]);
+         printf("  Got:      %08x_%08x\n", v->bin[1], v->bin[0]);
+         retval = 1;
+         encode_fails++;
+      }

-		ir3_shader_destroy(shader);
-	}
+      ir3_shader_destroy(shader);
+   }

-	if (decode_fails)
-		printf("%d/%d decode fails\n", decode_fails, (int)ARRAY_SIZE(tests));
-	if (asm_fails)
-		printf("%d/%d assembler fails\n", asm_fails, (int)ARRAY_SIZE(tests));
-	if (encode_fails)
-		printf("%d/%d encode fails\n", encode_fails, (int)ARRAY_SIZE(tests));
+   if (decode_fails)
+      printf("%d/%d decode fails\n", decode_fails, (int)ARRAY_SIZE(tests));
+   if (asm_fails)
+      printf("%d/%d assembler fails\n", asm_fails, (int)ARRAY_SIZE(tests));
+   if (encode_fails)
+      printf("%d/%d encode fails\n", encode_fails, (int)ARRAY_SIZE(tests));

-	if (retval) {
-		printf("FAILED!\n");
-	} else {
-		printf("PASSED!\n");
-	}
+   if (retval) {
+      printf("FAILED!\n");
+   } else {
+      printf("PASSED!\n");
+   }

-	for (unsigned i = 0; i < ARRAY_SIZE(compilers); i++) {
-		if (!compilers[i])
-			continue;
-		ir3_compiler_destroy(compilers[i]);
-	}
+   for (unsigned i = 0; i < ARRAY_SIZE(compilers); i++) {
+      if (!compilers[i])
+         continue;
+      ir3_compiler_destroy(compilers[i]);
+   }

-	fclose(fdisasm);
-	free(disasm_output);
+   fclose(fdisasm);
+   free(disasm_output);

-	return retval;
+   return retval;
 }