radeon: convert software queries to the new style

Software queries are all queries that do not require suspend/resume
and explicit handling of result buffers.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
[Fixed a rebase conflict and re-tested before pushing.]
This commit is contained in:
Nicolai Hähnle 2015-11-18 11:55:09 +01:00
parent 829a9808a9
commit 019106760d
1 changed files with 194 additions and 172 deletions

View File

@ -51,15 +51,195 @@ struct r600_query {
unsigned num_cs_dw;
/* linked list of queries */
struct list_head list;
/* for custom non-GPU queries */
/* For transform feedback: which stream the query is for */
unsigned stream;
};
/* Queries without buffer handling or suspend/resume. */
struct r600_query_sw {
struct r600_query b;
uint64_t begin_result;
uint64_t end_result;
/* Fence for GPU_FINISHED. */
struct pipe_fence_handle *fence;
/* For transform feedback: which stream the query is for */
unsigned stream;
};
static void r600_query_sw_destroy(struct r600_common_context *rctx,
struct r600_query *rquery)
{
struct pipe_screen *screen = rctx->b.screen;
struct r600_query_sw *query = (struct r600_query_sw *)rquery;
screen->fence_reference(screen, &query->fence, NULL);
FREE(query);
}
static enum radeon_value_id winsys_id_from_type(unsigned type)
{
switch (type) {
case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
case R600_QUERY_NUM_CS_FLUSHES: return RADEON_NUM_CS_FLUSHES;
case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
default: unreachable("query type does not correspond to winsys id");
}
}
static boolean r600_query_sw_begin(struct r600_common_context *rctx,
struct r600_query *rquery)
{
struct r600_query_sw *query = (struct r600_query_sw *)rquery;
switch(query->b.type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_GPU_FINISHED:
break;
case R600_QUERY_DRAW_CALLS:
query->begin_result = rctx->num_draw_calls;
break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
query->begin_result = 0;
break;
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_CS_FLUSHES:
case R600_QUERY_NUM_BYTES_MOVED: {
enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
break;
}
case R600_QUERY_GPU_LOAD:
query->begin_result = r600_gpu_load_begin(rctx->screen);
break;
case R600_QUERY_NUM_COMPILATIONS:
query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
break;
case R600_QUERY_NUM_SHADERS_CREATED:
query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
break;
default:
unreachable("r600_query_sw_begin: bad query type");
}
return TRUE;
}
static void r600_query_sw_end(struct r600_common_context *rctx,
struct r600_query *rquery)
{
struct r600_query_sw *query = (struct r600_query_sw *)rquery;
switch(query->b.type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
break;
case PIPE_QUERY_GPU_FINISHED:
rctx->b.flush(&rctx->b, &query->fence, 0);
break;
case R600_QUERY_DRAW_CALLS:
query->begin_result = rctx->num_draw_calls;
break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_CS_FLUSHES:
case R600_QUERY_NUM_BYTES_MOVED: {
enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
break;
}
case R600_QUERY_GPU_LOAD:
query->end_result = r600_gpu_load_end(rctx->screen,
query->begin_result);
query->begin_result = 0;
break;
case R600_QUERY_NUM_COMPILATIONS:
query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
break;
case R600_QUERY_NUM_SHADERS_CREATED:
query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
break;
default:
unreachable("r600_query_sw_end: bad query type");
}
}
static boolean r600_query_sw_get_result(struct r600_common_context *rctx,
struct r600_query *rquery,
boolean wait,
union pipe_query_result *result)
{
struct r600_query_sw *query = (struct r600_query_sw *)rquery;
switch (query->b.type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* Convert from cycles per millisecond to cycles per second (Hz). */
result->timestamp_disjoint.frequency =
(uint64_t)rctx->screen->info.r600_clock_crystal_freq * 1000;
result->timestamp_disjoint.disjoint = FALSE;
return TRUE;
case PIPE_QUERY_GPU_FINISHED: {
struct pipe_screen *screen = rctx->b.screen;
result->b = screen->fence_finish(screen, query->fence,
wait ? PIPE_TIMEOUT_INFINITE : 0);
return result->b;
}
}
result->u64 = query->end_result - query->begin_result;
switch (query->b.type) {
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_GPU_TEMPERATURE:
result->u64 /= 1000;
break;
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
result->u64 *= 1000000;
break;
}
return TRUE;
}
static struct r600_query_ops sw_query_ops = {
.destroy = r600_query_sw_destroy,
.begin = r600_query_sw_begin,
.end = r600_query_sw_end,
.get_result = r600_query_sw_get_result
};
static struct pipe_query *r600_query_sw_create(struct pipe_context *ctx,
unsigned query_type)
{
struct r600_query_sw *query;
query = CALLOC_STRUCT(r600_query_sw);
if (query == NULL)
return NULL;
query->b.type = query_type;
query->b.ops = &sw_query_ops;
return (struct pipe_query *)query;
}
static void r600_do_destroy_query(struct r600_common_context *, struct r600_query *);
static boolean r600_do_begin_query(struct r600_common_context *, struct r600_query *);
static void r600_do_end_query(struct r600_common_context *, struct r600_query *);
@ -82,8 +262,7 @@ static bool r600_is_timer_query(unsigned type)
static bool r600_query_needs_begin(unsigned type)
{
return type != PIPE_QUERY_GPU_FINISHED &&
type != PIPE_QUERY_TIMESTAMP;
return type != PIPE_QUERY_TIMESTAMP;
}
static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, unsigned type)
@ -91,27 +270,6 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
unsigned j, i, num_results, buf_size = 4096;
uint32_t *results;
/* Non-GPU queries. */
switch (type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_GPU_FINISHED:
case R600_QUERY_DRAW_CALLS:
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_CS_FLUSHES:
case R600_QUERY_NUM_BYTES_MOVED:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_GPU_LOAD:
case R600_QUERY_NUM_COMPILATIONS:
case R600_QUERY_NUM_SHADERS_CREATED:
return NULL;
}
/* Queries are normally read by the CPU after
* being written by the gpu, hence staging is probably a good
* usage pattern.
@ -380,7 +538,11 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *query;
bool skip_allocation = false;
if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
query_type == PIPE_QUERY_GPU_FINISHED ||
query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
return r600_query_sw_create(ctx, query_type);
query = CALLOC_STRUCT(r600_query);
if (query == NULL)
@ -417,38 +579,18 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
query->num_cs_dw = 6;
break;
/* Non-GPU queries and queries not requiring a buffer. */
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_GPU_FINISHED:
case R600_QUERY_DRAW_CALLS:
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_CS_FLUSHES:
case R600_QUERY_NUM_BYTES_MOVED:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_GPU_LOAD:
case R600_QUERY_NUM_COMPILATIONS:
case R600_QUERY_NUM_SHADERS_CREATED:
skip_allocation = true;
break;
default:
assert(0);
FREE(query);
return NULL;
}
if (!skip_allocation) {
query->buffer.buf = r600_new_query_buffer(rctx, query_type);
if (!query->buffer.buf) {
FREE(query);
return NULL;
}
query->buffer.buf = r600_new_query_buffer(rctx, query_type);
if (!query->buffer.buf) {
FREE(query);
return NULL;
}
return (struct pipe_query*)query;
}
@ -496,42 +638,6 @@ static boolean r600_do_begin_query(struct r600_common_context *rctx,
return false;
}
/* Non-GPU queries. */
switch (rquery->type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
return true;
case R600_QUERY_DRAW_CALLS:
rquery->begin_result = rctx->num_draw_calls;
return true;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
rquery->begin_result = 0;
return true;
case R600_QUERY_BUFFER_WAIT_TIME:
rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
return true;
case R600_QUERY_NUM_CS_FLUSHES:
rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
return true;
case R600_QUERY_NUM_BYTES_MOVED:
rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
return true;
case R600_QUERY_GPU_LOAD:
rquery->begin_result = r600_gpu_load_begin(rctx->screen);
return true;
case R600_QUERY_NUM_COMPILATIONS:
rquery->begin_result = p_atomic_read(&rctx->screen->num_compilations);
return true;
case R600_QUERY_NUM_SHADERS_CREATED:
rquery->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
return true;
}
/* Discard the old query buffers. */
while (prev) {
struct r600_query_buffer *qbuf = prev;
@ -570,57 +676,6 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
static void r600_do_end_query(struct r600_common_context *rctx,
struct r600_query *rquery)
{
/* Non-GPU queries. */
switch (rquery->type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
return;
case PIPE_QUERY_GPU_FINISHED:
rctx->b.flush(&rctx->b, &rquery->fence, 0);
return;
case R600_QUERY_DRAW_CALLS:
rquery->end_result = rctx->num_draw_calls;
return;
case R600_QUERY_REQUESTED_VRAM:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_VRAM_MEMORY);
return;
case R600_QUERY_REQUESTED_GTT:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY);
return;
case R600_QUERY_BUFFER_WAIT_TIME:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
return;
case R600_QUERY_NUM_CS_FLUSHES:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
return;
case R600_QUERY_NUM_BYTES_MOVED:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
return;
case R600_QUERY_VRAM_USAGE:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_VRAM_USAGE);
return;
case R600_QUERY_GTT_USAGE:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GTT_USAGE);
return;
case R600_QUERY_GPU_TEMPERATURE:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GPU_TEMPERATURE) / 1000;
return;
case R600_QUERY_CURRENT_GPU_SCLK:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_SCLK) * 1000000;
return;
case R600_QUERY_CURRENT_GPU_MCLK:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000;
return;
case R600_QUERY_GPU_LOAD:
rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result);
return;
case R600_QUERY_NUM_COMPILATIONS:
rquery->end_result = p_atomic_read(&rctx->screen->num_compilations);
return;
case R600_QUERY_NUM_SHADERS_CREATED:
rquery->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
return;
}
r600_emit_query_end(rctx, rquery);
if (r600_query_needs_begin(rquery->type))
@ -651,42 +706,9 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
boolean wait,
union pipe_query_result *result)
{
struct pipe_screen *screen = ctx->b.screen;
unsigned results_base = 0;
char *map;
/* Non-GPU queries. */
switch (query->type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* Convert from cycles per millisecond to cycles per second (Hz). */
result->timestamp_disjoint.frequency =
(uint64_t)ctx->screen->info.r600_clock_crystal_freq * 1000;
result->timestamp_disjoint.disjoint = FALSE;
return TRUE;
case PIPE_QUERY_GPU_FINISHED:
result->b = screen->fence_finish(screen, query->fence,
wait ? PIPE_TIMEOUT_INFINITE : 0);
return result->b;
case R600_QUERY_DRAW_CALLS:
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_CS_FLUSHES:
case R600_QUERY_NUM_BYTES_MOVED:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_NUM_COMPILATIONS:
case R600_QUERY_NUM_SHADERS_CREATED:
result->u64 = query->end_result - query->begin_result;
return TRUE;
case R600_QUERY_GPU_LOAD:
result->u64 = query->end_result;
return TRUE;
}
map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf,
PIPE_TRANSFER_READ |
(wait ? 0 : PIPE_TRANSFER_DONTBLOCK));