diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index 112573dc613..74bd1cd7a9b 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -645,19 +645,32 @@ get_instruction_priority(const struct v3d_device_info *devinfo,
                 return next_score;
         next_score++;
 
+        /* Empirical testing shows that using priorities to hide latency of
+         * TMU operations when scheduling QPU leads to slightly worse
+         * performance, even at 2 threads. We think this is because the thread
+         * switching is already quite effective at hiding latency and NIR
+         * scheduling (and possibly TMU pipelining too) are sufficient to hide
+         * TMU latency, so piling up on that here doesn't provide any benefits
+         * and instead may cause us to postpone critical paths that depend on
+         * the TMU results.
+         */
+#if 0
         /* Schedule texture read results collection late to hide latency. */
         if (v3d_qpu_waits_on_tmu(inst))
                 return next_score;
         next_score++;
+#endif
 
         /* Default score for things that aren't otherwise special. */
         baseline_score = next_score;
         next_score++;
 
+#if 0
         /* Schedule texture read setup early to hide their latency better. */
         if (v3d_qpu_writes_tmu(devinfo, inst))
                 return next_score;
         next_score++;
+#endif
 
         /* We should increase the maximum if we assert here */
         assert(next_score < MAX_SCHEDULE_PRIORITY);