winsys/radeon: consolidate tracing into winsys v2

This move the tracing timeout and printing into winsys and add
an debug environement variable for it (R600_DEBUG=trace_cs).

Lot of file touched because of winsys API changes.

v2: Do not write lockup file if ib uniq id does not match last one

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Reviewed-by: Marek Olšák <maraeo@gmail.com>
This commit is contained in:
Jerome Glisse 2013-04-23 19:22:33 -04:00
parent 53fbae7eac
commit abb96fdea7
16 changed files with 68 additions and 95 deletions

View File

@ -379,7 +379,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
sizeof(struct pipe_transfer), 64,
UTIL_SLAB_SINGLETHREADED);
r300->cs = rws->cs_create(rws, RING_GFX);
r300->cs = rws->cs_create(rws, RING_GFX, NULL);
if (r300->cs == NULL)
goto fail;

View File

@ -52,7 +52,7 @@ static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags)
}
r300->flush_counter++;
r300->rws->cs_flush(r300->cs, flags);
r300->rws->cs_flush(r300->cs, flags, 0);
r300->dirty_hw = 0;
/* New kitchen sink, baby. */
@ -100,11 +100,11 @@ void r300_flush(struct pipe_context *pipe,
* and we cannot emit an empty CS. Let's write to some reg. */
CS_LOCALS(r300);
OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0);
r300->rws->cs_flush(r300->cs, flags);
r300->rws->cs_flush(r300->cs, flags, 0);
} else {
/* Even if hw is not dirty, we should at least reset the CS in case
* the space checking failed for the first draw operation. */
r300->rws->cs_flush(r300->cs, flags);
r300->rws->cs_flush(r300->cs, flags, 0);
}
}

View File

@ -463,7 +463,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
}
ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags);
ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags, ctx->screen->cs_count++);
ctx->flags = 0;

View File

@ -137,21 +137,17 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
for (i = 0; i < R600_NUM_ATOMS; i++) {
if (ctx->atoms[i] && ctx->atoms[i]->dirty) {
num_dw += ctx->atoms[i]->num_dw;
#if R600_TRACE_CS
if (ctx->screen->trace_bo) {
num_dw += R600_TRACE_CS_DWORDS;
}
#endif
}
}
/* The upper-bound of how much space a draw command would take. */
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
#if R600_TRACE_CS
if (ctx->screen->trace_bo) {
num_dw += R600_TRACE_CS_DWORDS;
}
#endif
}
/* Count in queries_suspend. */
@ -339,37 +335,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
}
/* Flush the CS. */
#if R600_TRACE_CS
if (ctx->screen->trace_bo) {
struct r600_screen *rscreen = ctx->screen;
unsigned i;
for (i = 0; i < cs->cdw; i++) {
fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]);
}
rscreen->cs_count++;
}
#endif
ctx->ws->cs_flush(ctx->rings.gfx.cs, flags);
#if R600_TRACE_CS
if (ctx->screen->trace_bo) {
struct r600_screen *rscreen = ctx->screen;
unsigned i;
for (i = 0; i < 10; i++) {
usleep(5);
if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
break;
}
}
if (i == 10) {
fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n",
rscreen->trace_ptr[1], rscreen->trace_ptr[0]);
} else {
fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5);
}
}
#endif
ctx->ws->cs_flush(ctx->rings.gfx.cs, flags, ctx->screen->cs_count++);
}
void r600_begin_new_cs(struct r600_context *ctx)

View File

@ -45,6 +45,7 @@ static const struct debug_named_value debug_options[] = {
{ "texdepth", DBG_TEX_DEPTH, "Print texture depth info" },
{ "compute", DBG_COMPUTE, "Print compute info" },
{ "vm", DBG_VM, "Print virtual addresses when creating resources" },
{ "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" },
/* shaders */
{ "fs", DBG_FS, "Print fetch shaders" },
@ -212,7 +213,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags)
}
rctx->rings.dma.flushing = true;
rctx->ws->cs_flush(cs, flags);
rctx->ws->cs_flush(cs, flags, 0);
rctx->rings.dma.flushing = false;
}
@ -430,14 +431,18 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
goto fail;
}
rctx->rings.gfx.cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
if (rscreen->trace_bo) {
rctx->rings.gfx.cs = rctx->ws->cs_create(rctx->ws, RING_GFX, rscreen->trace_bo->cs_buf);
} else {
rctx->rings.gfx.cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL);
}
rctx->rings.gfx.flush = r600_flush_gfx_ring;
rctx->ws->cs_set_flush_callback(rctx->rings.gfx.cs, r600_flush_from_winsys, rctx);
rctx->rings.gfx.flushing = false;
rctx->rings.dma.cs = NULL;
if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA);
rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA, NULL);
rctx->rings.dma.flush = r600_flush_dma_ring;
rctx->ws->cs_set_flush_callback(rctx->rings.dma.cs, r600_flush_dma_from_winsys, rctx);
rctx->rings.dma.flushing = false;
@ -958,12 +963,10 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
rscreen->ws->buffer_unmap(rscreen->fences.bo->cs_buf);
pipe_resource_reference((struct pipe_resource**)&rscreen->fences.bo, NULL);
}
#if R600_TRACE_CS
if (rscreen->trace_bo) {
rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
}
#endif
pipe_mutex_destroy(rscreen->fences.mutex);
rscreen->ws->destroy(rscreen->ws);
@ -1308,9 +1311,8 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
rscreen->global_pool = compute_memory_pool_new(rscreen);
#if R600_TRACE_CS
rscreen->cs_count = 0;
if (rscreen->info.drm_minor >= 28) {
if (rscreen->info.drm_minor >= 28 && (rscreen->debug_flags & DBG_TRACE_CS)) {
rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->screen,
PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING,
@ -1320,7 +1322,6 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
PIPE_TRANSFER_UNSYNCHRONIZED);
}
}
#endif
/* Create the auxiliary context. */
pipe_mutex_init(rscreen->aux_context_lock);

View File

@ -37,8 +37,6 @@
#define R600_NUM_ATOMS 41
#define R600_TRACE_CS 0
/* the number of CS dwords for flushing and drawing */
#define R600_MAX_FLUSH_CS_DWORDS 16
#define R600_MAX_DRAW_CS_DWORDS 34
@ -245,7 +243,8 @@ typedef boolean (*r600g_dma_blit_t)(struct pipe_context *ctx,
/* logging */
#define DBG_TEX_DEPTH (1 << 0)
#define DBG_COMPUTE (1 << 1)
#define DBG_VM (1 << 2)
#define DBG_VM (1 << 2)
#define DBG_TRACE_CS (1 << 3)
/* shaders */
#define DBG_FS (1 << 8)
#define DBG_VS (1 << 9)
@ -284,11 +283,9 @@ struct r600_screen {
* XXX: Not sure if this is the best place for global_pool. Also,
* it's not thread safe, so it won't work with multiple contexts. */
struct compute_memory_pool *global_pool;
#if R600_TRACE_CS
struct r600_resource *trace_bo;
uint32_t *trace_ptr;
unsigned cs_count;
#endif
r600g_dma_blit_t dma_blit;
/* Auxiliary context. Mainly used to initialize resources.
@ -654,19 +651,15 @@ static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
cs->cdw += cb->num_dw;
}
#if R600_TRACE_CS
void r600_trace_emit(struct r600_context *rctx);
#endif
static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
{
atom->emit(rctx, atom);
atom->dirty = false;
#if R600_TRACE_CS
if (rctx->screen->trace_bo) {
r600_trace_emit(rctx);
}
#endif
}
static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso)

View File

@ -1488,11 +1488,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
(info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
}
#if R600_TRACE_CS
if (rctx->screen->trace_bo) {
r600_trace_emit(rctx);
}
#endif
/* Set the depth buffer as dirty. */
if (rctx->framebuffer.state.zsbuf) {
@ -1764,7 +1762,6 @@ void r600_init_common_state_functions(struct r600_context *rctx)
rctx->context.draw_vbo = r600_draw_vbo;
}
#if R600_TRACE_CS
void r600_trace_emit(struct r600_context *rctx)
{
struct r600_screen *rscreen = rctx->screen;
@ -1782,4 +1779,3 @@ void r600_trace_emit(struct r600_context *rctx)
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
r600_write_value(cs, reloc);
}
#endif

View File

@ -109,7 +109,7 @@ static void flush(struct ruvd_decoder *dec)
while(dec->cs->cdw % 16)
pm4[dec->cs->cdw++] = RUVD_PKT2();
dec->ws->cs_flush(dec->cs, 0);
dec->ws->cs_flush(dec->cs, 0, 0);
}
/* add a new set register command to the IB */
@ -859,7 +859,7 @@ struct pipe_video_decoder *ruvd_create_decoder(struct pipe_context *context,
dec->set_dtb = set_dtb;
dec->stream_handle = alloc_stream_handle();
dec->ws = ws;
dec->cs = ws->cs_create(ws, RING_UVD);
dec->cs = ws->cs_create(ws, RING_UVD, NULL);
if (!dec->cs) {
RUVD_ERR("Can't get command submission context.\n");
goto error;

View File

@ -225,7 +225,7 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
#endif
/* Flush the CS. */
ctx->ws->cs_flush(ctx->cs, flags);
ctx->ws->cs_flush(ctx->cs, flags, 0);
#if R600_TRACE_CS
if (ctx->screen->trace_bo) {

View File

@ -200,7 +200,7 @@ static void radeonsi_launch_grid(
}
#endif
rctx->ws->cs_flush(rctx->cs, RADEON_FLUSH_COMPUTE);
rctx->ws->cs_flush(rctx->cs, RADEON_FLUSH_COMPUTE, 0);
rctx->ws->buffer_wait(program->shader.bo->buf, 0);
FREE(pm4);

View File

@ -233,7 +233,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
case TAHITI:
si_init_state_functions(rctx);
LIST_INITHEAD(&rctx->active_query_list);
rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL);
rctx->max_db = 8;
si_init_config(rctx);
break;

View File

@ -167,7 +167,9 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
}
static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws, enum ring_type ring_type)
static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws,
enum ring_type ring_type,
struct radeon_winsys_cs_handle *trace_buf)
{
struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
struct radeon_drm_cs *cs;
@ -179,6 +181,7 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws,
pipe_semaphore_init(&cs->flush_completed, 0);
cs->ws = ws;
cs->trace_buf = (struct radeon_bo*)trace_buf;
if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
FREE(cs);
@ -413,7 +416,7 @@ static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
OUT_CS(&cs->base, index * RELOC_DWORDS);
}
void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc)
void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
{
unsigned i;
@ -432,9 +435,9 @@ void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc)
}
}
#if RADEON_CS_DUMP_ON_LOCKUP
radeon_dump_cs_on_lockup(csc);
#endif
if (cs->trace_buf) {
radeon_dump_cs_on_lockup(cs, csc);
}
for (i = 0; i < csc->crelocs; i++)
p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
@ -458,7 +461,7 @@ void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id)
{
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
struct radeon_cs_context *tmp;
@ -474,6 +477,8 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
cs->csc = cs->cst;
cs->cst = tmp;
cs->cst->cs_trace_id = cs_trace_id;
/* If the CS is not empty or overflowed, emit it in a separate thread. */
if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
unsigned i, crelocs = cs->cst->crelocs;
@ -536,7 +541,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
}
}
pipe_mutex_unlock(cs->ws->cs_stack_lock);
radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
}
} else {
radeon_cs_context_cleanup(cs->cst);

View File

@ -30,8 +30,6 @@
#include "radeon_drm_bo.h"
#include <radeon_drm.h>
#define RADEON_CS_DUMP_ON_LOCKUP 0
struct radeon_cs_context {
uint32_t buf[RADEON_MAX_CMDBUF_DWORDS];
@ -41,6 +39,8 @@ struct radeon_cs_context {
uint64_t chunk_array[3];
uint32_t flags[2];
uint32_t cs_trace_id;
/* Relocs. */
unsigned nrelocs;
unsigned crelocs;
@ -78,6 +78,7 @@ struct radeon_drm_cs {
int flush_started;
pipe_semaphore flush_completed;
struct radeon_bo *trace_buf;
};
int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo);
@ -121,10 +122,8 @@ radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs);
void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws);
void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc);
void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc);
#if RADEON_CS_DUMP_ON_LOCKUP
void radeon_dump_cs_on_lockup(struct radeon_cs_context *csc);
#endif
void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context *csc);
#endif

View File

@ -34,18 +34,18 @@
#include "radeon_drm_cs.h"
#include "radeon_drm_bo.h"
#if RADEON_CS_DUMP_ON_LOCKUP
static bool dumped = false;
#define RADEON_CS_DUMP_AFTER_MS_TIMEOUT 500
void radeon_dump_cs_on_lockup(struct radeon_cs_context *csc)
void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
{
struct drm_radeon_gem_busy args;
FILE *dump;
unsigned i, lockup;
uint32_t *ptr;
char fname[32];
/* only dump the first cs to cause a lockup */
if (!csc->crelocs || dumped) {
if (!csc->crelocs) {
/* can not determine if there was a lockup if no bo were use by
* the cs and most likely in such case no lockup occurs
*/
@ -54,19 +54,27 @@ void radeon_dump_cs_on_lockup(struct radeon_cs_context *csc)
memset(&args, 0, sizeof(args));
args.handle = csc->relocs_bo[0]->handle;
for (i = 0; i < 10; i++) {
usleep(5);
for (i = 0; i < RADEON_CS_DUMP_AFTER_MS_TIMEOUT; i++) {
usleep(1);
lockup = drmCommandWriteRead(csc->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args));
if (!lockup) {
break;
}
}
if (!lockup || i < 10) {
if (!lockup || i < RADEON_CS_DUMP_AFTER_MS_TIMEOUT) {
return;
}
ptr = radeon_bo_do_map(cs->trace_buf);
fprintf(stderr, "timeout on cs lockup likely happen at cs 0x%08x dw 0x%08x\n", ptr[1], ptr[0]);
if (csc->cs_trace_id != ptr[1]) {
return;
}
/* ok we are most likely facing a lockup write the standalone replay file */
dump = fopen("radeon_lockup.c", "w");
snprintf(fname, sizeof(fname), "rlockup_0x%08x.c", csc->cs_trace_id);
dump = fopen(fname, "w");
if (dump == NULL) {
return;
}
@ -74,8 +82,9 @@ void radeon_dump_cs_on_lockup(struct radeon_cs_context *csc)
fprintf(dump, " * in same directory. You can find radeon_ctx.h in mesa tree :\n");
fprintf(dump, " * mesa/src/gallium/winsys/radeon/tools/radeon_ctx.h\n");
fprintf(dump, " * Build with :\n");
fprintf(dump, " * gcc -O0 -g radeon_lockup.c -ldrm -o radeon_lockup -I/usr/include/libdrm\n");
fprintf(dump, " * gcc -O0 -g %s -ldrm -o rlockup_0x%08x -I/usr/include/libdrm\n", fname, csc->cs_trace_id);
fprintf(dump, " */\n");
fprintf(dump, " /* timeout on cs lockup likely happen at cs 0x%08x dw 0x%08x*/\n", ptr[1], ptr[0]);
fprintf(dump, "#include <stdio.h>\n");
fprintf(dump, "#include <stdint.h>\n");
fprintf(dump, "#include \"radeon_ctx.h\"\n");
@ -107,6 +116,7 @@ void radeon_dump_cs_on_lockup(struct radeon_cs_context *csc)
}
fprintf(dump, "};\n\n");
fprintf(dump, "/* cs %d dw */\n", csc->chunks[0].length_dw);
fprintf(dump, "static uint32_t cs[] = {\n");
ptr = csc->buf;
for (i = 0; i < csc->chunks[0].length_dw; i++) {
@ -147,6 +157,4 @@ void radeon_dump_cs_on_lockup(struct radeon_cs_context *csc)
fprintf(dump, " bo_wait(&ctx, bo[0]);\n");
fprintf(dump, "}\n");
fclose(dump);
dumped = true;
}
#endif

View File

@ -551,7 +551,7 @@ next:
pipe_mutex_unlock(ws->cs_stack_lock);
if (cs) {
radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
pipe_mutex_lock(ws->cs_stack_lock);
for (i = 1; i < p_atomic_read(&ws->ncs); i++) {

View File

@ -368,8 +368,12 @@ struct radeon_winsys {
* Create a command stream.
*
* \param ws The winsys this function is called from.
* \param ring_type The ring type (GFX, DMA, UVD)
* \param trace_buf Trace buffer when tracing is enabled
*/
struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws, enum ring_type ring_type);
struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
enum ring_type ring_type,
struct radeon_winsys_cs_handle *trace_buf);
/**
* Destroy a command stream.
@ -425,10 +429,11 @@ struct radeon_winsys {
/**
* Flush a command stream.
*
* \param cs A command stream to flush.
* \param flags, RADEON_FLUSH_ASYNC or 0.
* \param cs A command stream to flush.
* \param flags, RADEON_FLUSH_ASYNC or 0.
* \param cs_trace_id A unique identifiant for the cs
*/
void (*cs_flush)(struct radeon_winsys_cs *cs, unsigned flags);
void (*cs_flush)(struct radeon_winsys_cs *cs, unsigned flags, uint32_t cs_trace_id);
/**
* Set a flush callback which is called from winsys when flush is