turnip: Add debug option to print gmem load/store skip stats
TU_DEBUG=log_skip_gmem_ops would print stats about skipped gmem/load every second. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15974>
This commit is contained in:
parent
0c489f18cb
commit
725ae34458
|
@ -2808,6 +2808,22 @@ tu_begin_load_store_cond_exec(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs, bool load)
|
||||
{
|
||||
tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
|
||||
|
||||
if (!unlikely(cmd->device->physical_device->instance->debug_flags &
|
||||
TU_DEBUG_LOG_SKIP_GMEM_OPS))
|
||||
return;
|
||||
|
||||
uint64_t result_iova;
|
||||
if (load)
|
||||
result_iova = global_iova(cmd, dbg_gmem_taken_loads);
|
||||
else
|
||||
result_iova = global_iova(cmd, dbg_gmem_taken_stores);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 7);
|
||||
tu_cs_emit(cs, CP_MEM_TO_MEM_0_NEG_B);
|
||||
tu_cs_emit_qw(cs, result_iova);
|
||||
tu_cs_emit_qw(cs, result_iova);
|
||||
tu_cs_emit_qw(cs, global_iova(cmd, dbg_one));
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -2815,6 +2831,22 @@ tu_end_load_store_cond_exec(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs, bool load)
|
||||
{
|
||||
tu_cond_exec_end(cs);
|
||||
|
||||
if (!unlikely(cmd->device->physical_device->instance->debug_flags &
|
||||
TU_DEBUG_LOG_SKIP_GMEM_OPS))
|
||||
return;
|
||||
|
||||
uint64_t result_iova;
|
||||
if (load)
|
||||
result_iova = global_iova(cmd, dbg_gmem_total_loads);
|
||||
else
|
||||
result_iova = global_iova(cmd, dbg_gmem_total_stores);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 7);
|
||||
tu_cs_emit(cs, CP_MEM_TO_MEM_0_NEG_B);
|
||||
tu_cs_emit_qw(cs, result_iova);
|
||||
tu_cs_emit_qw(cs, result_iova);
|
||||
tu_cs_emit_qw(cs, global_iova(cmd, dbg_one));
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -340,6 +340,7 @@ static const struct debug_control tu_debug_options[] = {
|
|||
{ "dontcare_as_load", TU_DEBUG_DONT_CARE_AS_LOAD },
|
||||
{ "rast_order", TU_DEBUG_RAST_ORDER },
|
||||
{ "unaligned_store", TU_DEBUG_UNALIGNED_STORE },
|
||||
{ "log_skip_gmem_ops", TU_DEBUG_LOG_SKIP_GMEM_OPS },
|
||||
{ NULL, 0 }
|
||||
};
|
||||
|
||||
|
@ -1810,6 +1811,11 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
struct tu6_global *global = device->global_bo->map;
|
||||
tu_init_clear_blit_shaders(device);
|
||||
global->predicate = 0;
|
||||
global->dbg_one = (uint32_t)-1;
|
||||
global->dbg_gmem_total_loads = 0;
|
||||
global->dbg_gmem_taken_loads = 0;
|
||||
global->dbg_gmem_total_stores = 0;
|
||||
global->dbg_gmem_taken_stores = 0;
|
||||
tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK],
|
||||
&(VkClearColorValue) {}, false);
|
||||
tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK],
|
||||
|
|
|
@ -1143,6 +1143,11 @@ tu_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
|
|||
submit->perf_pass_index : ~0;
|
||||
struct tu_queue_submit submit_req;
|
||||
|
||||
if (unlikely(queue->device->physical_device->instance->debug_flags &
|
||||
TU_DEBUG_LOG_SKIP_GMEM_OPS)) {
|
||||
tu_dbg_log_gmem_load_store_skips(queue->device);
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&queue->device->submit_mutex);
|
||||
|
||||
VkResult ret = tu_queue_submit_create_locked(queue, submit,
|
||||
|
|
|
@ -362,6 +362,11 @@ tu_QueueSubmit(VkQueue _queue,
|
|||
TU_FROM_HANDLE(tu_syncobj, fence, _fence);
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
if (unlikely(queue->device->physical_device->instance->debug_flags &
|
||||
TU_DEBUG_LOG_SKIP_GMEM_OPS)) {
|
||||
tu_dbg_log_gmem_load_store_skips(queue->device);
|
||||
}
|
||||
|
||||
uint32_t max_entry_count = 0;
|
||||
for (uint32_t i = 0; i < submitCount; ++i) {
|
||||
const VkSubmitInfo *submit = pSubmits + i;
|
||||
|
|
|
@ -269,6 +269,7 @@ enum tu_debug_flags
|
|||
TU_DEBUG_RAST_ORDER = 1 << 14,
|
||||
TU_DEBUG_UNALIGNED_STORE = 1 << 15,
|
||||
TU_DEBUG_LAYOUT = 1 << 16,
|
||||
TU_DEBUG_LOG_SKIP_GMEM_OPS = 1 << 15,
|
||||
};
|
||||
|
||||
struct tu_instance
|
||||
|
@ -490,6 +491,12 @@ struct tu6_global
|
|||
/* To know when renderpass stats for autotune are valid */
|
||||
volatile uint32_t autotune_fence;
|
||||
|
||||
volatile uint32_t dbg_one;
|
||||
volatile uint32_t dbg_gmem_total_loads;
|
||||
volatile uint32_t dbg_gmem_taken_loads;
|
||||
volatile uint32_t dbg_gmem_total_stores;
|
||||
volatile uint32_t dbg_gmem_taken_stores;
|
||||
|
||||
/* note: larger global bo will be used for customBorderColors */
|
||||
struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[];
|
||||
};
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include <string.h>
|
||||
|
||||
#include "util/u_math.h"
|
||||
#include "util/timespec.h"
|
||||
#include "vk_enum_to_str.h"
|
||||
|
||||
void PRINTFLIKE(3, 4)
|
||||
|
@ -216,3 +217,55 @@ tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
|
|||
tu_tiling_config_update_pipe_layout(fb, device);
|
||||
tu_tiling_config_update_pipes(fb, device);
|
||||
}
|
||||
|
||||
void
|
||||
tu_dbg_log_gmem_load_store_skips(struct tu_device *device)
|
||||
{
|
||||
static uint32_t last_skipped_loads = 0;
|
||||
static uint32_t last_skipped_stores = 0;
|
||||
static uint32_t last_total_loads = 0;
|
||||
static uint32_t last_total_stores = 0;
|
||||
static struct timespec last_time = {};
|
||||
|
||||
pthread_mutex_lock(&device->submit_mutex);
|
||||
|
||||
struct timespec current_time;
|
||||
clock_gettime(CLOCK_MONOTONIC, ¤t_time);
|
||||
|
||||
if (timespec_sub_to_nsec(¤t_time, &last_time) > 1000 * 1000 * 1000) {
|
||||
last_time = current_time;
|
||||
} else {
|
||||
pthread_mutex_unlock(&device->submit_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
struct tu6_global *global = device->global_bo->map;
|
||||
|
||||
uint32_t current_taken_loads = global->dbg_gmem_taken_loads;
|
||||
uint32_t current_taken_stores = global->dbg_gmem_taken_stores;
|
||||
uint32_t current_total_loads = global->dbg_gmem_total_loads;
|
||||
uint32_t current_total_stores = global->dbg_gmem_total_stores;
|
||||
|
||||
uint32_t skipped_loads = current_total_loads - current_taken_loads;
|
||||
uint32_t skipped_stores = current_total_stores - current_taken_stores;
|
||||
|
||||
uint32_t current_time_frame_skipped_loads = skipped_loads - last_skipped_loads;
|
||||
uint32_t current_time_frame_skipped_stores = skipped_stores - last_skipped_stores;
|
||||
|
||||
uint32_t current_time_frame_total_loads = current_total_loads - last_total_loads;
|
||||
uint32_t current_time_frame_total_stores = current_total_stores - last_total_stores;
|
||||
|
||||
mesa_logi("[GMEM] loads total: %u skipped: %.1f%%\n",
|
||||
current_time_frame_total_loads,
|
||||
current_time_frame_skipped_loads / (float) current_time_frame_total_loads * 100.f);
|
||||
mesa_logi("[GMEM] stores total: %u skipped: %.1f%%\n",
|
||||
current_time_frame_total_stores,
|
||||
current_time_frame_skipped_stores / (float) current_time_frame_total_stores * 100.f);
|
||||
|
||||
last_skipped_loads = skipped_loads;
|
||||
last_skipped_stores = skipped_stores;
|
||||
last_total_loads = current_total_loads;
|
||||
last_total_stores = current_total_stores;
|
||||
|
||||
pthread_mutex_unlock(&device->submit_mutex);
|
||||
}
|
|
@ -326,4 +326,7 @@ tu6_pack_border_color(struct bcolor_entry *bcolor, const VkClearColorValue *val,
|
|||
#undef PACK_F
|
||||
}
|
||||
|
||||
void
|
||||
tu_dbg_log_gmem_load_store_skips(struct tu_device *device);
|
||||
|
||||
#endif /* TU_UTIL_H */
|
||||
|
|
Loading…
Reference in New Issue