turnip: Add debug option to print gmem load/store skip stats

TU_DEBUG=log_skip_gmem_ops would print stats about skipped
gmem/load every second.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15974>
This commit is contained in:
Danylo Piliaiev 2022-04-15 17:26:48 +03:00 committed by Marge Bot
parent 0c489f18cb
commit 725ae34458
7 changed files with 111 additions and 0 deletions

View File

@ -2808,6 +2808,22 @@ tu_begin_load_store_cond_exec(struct tu_cmd_buffer *cmd,
struct tu_cs *cs, bool load)
{
tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
if (!unlikely(cmd->device->physical_device->instance->debug_flags &
TU_DEBUG_LOG_SKIP_GMEM_OPS))
return;
uint64_t result_iova;
if (load)
result_iova = global_iova(cmd, dbg_gmem_taken_loads);
else
result_iova = global_iova(cmd, dbg_gmem_taken_stores);
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 7);
tu_cs_emit(cs, CP_MEM_TO_MEM_0_NEG_B);
tu_cs_emit_qw(cs, result_iova);
tu_cs_emit_qw(cs, result_iova);
tu_cs_emit_qw(cs, global_iova(cmd, dbg_one));
}
static void
@ -2815,6 +2831,22 @@ tu_end_load_store_cond_exec(struct tu_cmd_buffer *cmd,
struct tu_cs *cs, bool load)
{
tu_cond_exec_end(cs);
if (!unlikely(cmd->device->physical_device->instance->debug_flags &
TU_DEBUG_LOG_SKIP_GMEM_OPS))
return;
uint64_t result_iova;
if (load)
result_iova = global_iova(cmd, dbg_gmem_total_loads);
else
result_iova = global_iova(cmd, dbg_gmem_total_stores);
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 7);
tu_cs_emit(cs, CP_MEM_TO_MEM_0_NEG_B);
tu_cs_emit_qw(cs, result_iova);
tu_cs_emit_qw(cs, result_iova);
tu_cs_emit_qw(cs, global_iova(cmd, dbg_one));
}
void

View File

@ -340,6 +340,7 @@ static const struct debug_control tu_debug_options[] = {
{ "dontcare_as_load", TU_DEBUG_DONT_CARE_AS_LOAD },
{ "rast_order", TU_DEBUG_RAST_ORDER },
{ "unaligned_store", TU_DEBUG_UNALIGNED_STORE },
{ "log_skip_gmem_ops", TU_DEBUG_LOG_SKIP_GMEM_OPS },
{ NULL, 0 }
};
@ -1810,6 +1811,11 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
struct tu6_global *global = device->global_bo->map;
tu_init_clear_blit_shaders(device);
global->predicate = 0;
global->dbg_one = (uint32_t)-1;
global->dbg_gmem_total_loads = 0;
global->dbg_gmem_taken_loads = 0;
global->dbg_gmem_total_stores = 0;
global->dbg_gmem_taken_stores = 0;
tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK],
&(VkClearColorValue) {}, false);
tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK],

View File

@ -1143,6 +1143,11 @@ tu_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
submit->perf_pass_index : ~0;
struct tu_queue_submit submit_req;
if (unlikely(queue->device->physical_device->instance->debug_flags &
TU_DEBUG_LOG_SKIP_GMEM_OPS)) {
tu_dbg_log_gmem_load_store_skips(queue->device);
}
pthread_mutex_lock(&queue->device->submit_mutex);
VkResult ret = tu_queue_submit_create_locked(queue, submit,

View File

@ -362,6 +362,11 @@ tu_QueueSubmit(VkQueue _queue,
TU_FROM_HANDLE(tu_syncobj, fence, _fence);
VkResult result = VK_SUCCESS;
if (unlikely(queue->device->physical_device->instance->debug_flags &
TU_DEBUG_LOG_SKIP_GMEM_OPS)) {
tu_dbg_log_gmem_load_store_skips(queue->device);
}
uint32_t max_entry_count = 0;
for (uint32_t i = 0; i < submitCount; ++i) {
const VkSubmitInfo *submit = pSubmits + i;

View File

@ -269,6 +269,7 @@ enum tu_debug_flags
TU_DEBUG_RAST_ORDER = 1 << 14,
TU_DEBUG_UNALIGNED_STORE = 1 << 15,
TU_DEBUG_LAYOUT = 1 << 16,
TU_DEBUG_LOG_SKIP_GMEM_OPS = 1 << 15,
};
struct tu_instance
@ -490,6 +491,12 @@ struct tu6_global
/* To know when renderpass stats for autotune are valid */
volatile uint32_t autotune_fence;
volatile uint32_t dbg_one;
volatile uint32_t dbg_gmem_total_loads;
volatile uint32_t dbg_gmem_taken_loads;
volatile uint32_t dbg_gmem_total_stores;
volatile uint32_t dbg_gmem_taken_stores;
/* note: larger global bo will be used for customBorderColors */
struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[];
};

View File

@ -31,6 +31,7 @@
#include <string.h>
#include "util/u_math.h"
#include "util/timespec.h"
#include "vk_enum_to_str.h"
void PRINTFLIKE(3, 4)
@ -216,3 +217,55 @@ tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
tu_tiling_config_update_pipe_layout(fb, device);
tu_tiling_config_update_pipes(fb, device);
}
void
tu_dbg_log_gmem_load_store_skips(struct tu_device *device)
{
static uint32_t last_skipped_loads = 0;
static uint32_t last_skipped_stores = 0;
static uint32_t last_total_loads = 0;
static uint32_t last_total_stores = 0;
static struct timespec last_time = {};
pthread_mutex_lock(&device->submit_mutex);
struct timespec current_time;
clock_gettime(CLOCK_MONOTONIC, &current_time);
if (timespec_sub_to_nsec(&current_time, &last_time) > 1000 * 1000 * 1000) {
last_time = current_time;
} else {
pthread_mutex_unlock(&device->submit_mutex);
return;
}
struct tu6_global *global = device->global_bo->map;
uint32_t current_taken_loads = global->dbg_gmem_taken_loads;
uint32_t current_taken_stores = global->dbg_gmem_taken_stores;
uint32_t current_total_loads = global->dbg_gmem_total_loads;
uint32_t current_total_stores = global->dbg_gmem_total_stores;
uint32_t skipped_loads = current_total_loads - current_taken_loads;
uint32_t skipped_stores = current_total_stores - current_taken_stores;
uint32_t current_time_frame_skipped_loads = skipped_loads - last_skipped_loads;
uint32_t current_time_frame_skipped_stores = skipped_stores - last_skipped_stores;
uint32_t current_time_frame_total_loads = current_total_loads - last_total_loads;
uint32_t current_time_frame_total_stores = current_total_stores - last_total_stores;
mesa_logi("[GMEM] loads total: %u skipped: %.1f%%\n",
current_time_frame_total_loads,
current_time_frame_skipped_loads / (float) current_time_frame_total_loads * 100.f);
mesa_logi("[GMEM] stores total: %u skipped: %.1f%%\n",
current_time_frame_total_stores,
current_time_frame_skipped_stores / (float) current_time_frame_total_stores * 100.f);
last_skipped_loads = skipped_loads;
last_skipped_stores = skipped_stores;
last_total_loads = current_total_loads;
last_total_stores = current_total_stores;
pthread_mutex_unlock(&device->submit_mutex);
}

View File

@ -326,4 +326,7 @@ tu6_pack_border_color(struct bcolor_entry *bcolor, const VkClearColorValue *val,
#undef PACK_F
}
void
tu_dbg_log_gmem_load_store_skips(struct tu_device *device);
#endif /* TU_UTIL_H */