freedreno/a6xx: Implement primitive count queries on GPU

The driver can't determine PIPE_QUERY_PRIMITIVES_GENERATED or
PIPE_QUERY_PRIMITIVES_EMITTED once we support geometry or
tessellation, since these stages add primitives at runtime.  Use the
WRITE_PRIMITIVE_COUNTS event to write back the primitive counts and
implement a hw query for this.

Reviewed-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
Kristian H. Kristensen 2019-09-05 15:12:23 -07:00
parent 1acf8d2354
commit 30ab3e39fd
15 changed files with 128 additions and 18 deletions

View File

@ -2394,6 +2394,10 @@ to upconvert to 32b float internally?
<bitfield name="B_OFF" low="14" high="22" shr="2" type="uint"/>
<bitfield name="B_EN" pos="23" type="boolean"/>
</reg32>
<reg32 offset="0x9218" name="VPC_SO_STREAM_COUNTS_LO"/>
<reg32 offset="0x9219" name="VPC_SO_STREAM_COUNTS_HI"/>
<array offset="0x921a" name="VPC_SO" stride="7" length="4">
<reg32 offset="0" name="BUFFER_BASE_LO"/>
<reg32 offset="1" name="BUFFER_BASE_HI"/>

View File

@ -15,6 +15,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
<value name="VIZQUERY_START" value="7"/> <!-- on a2xx (??) -->
<value name="VIZQUERY_END" value="8"/>
<value name="SC_WAIT_WC" value="9"/>
<value name="WRITE_PRIMITIVE_COUNTS" value="9" variants="A6XX"/>
<value name="RST_PIX_CNT" value="13"/>
<value name="RST_VTX_CNT" value="14"/>
<value name="TILE_FLUSH" value="15"/>

View File

@ -218,7 +218,7 @@ fd2_create_batch_query(struct pipe_context *pctx,
counters_per_group[entry->gid]++;
}
q = fd_acc_create_query2(ctx, 0, &perfcntr);
q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
aq = fd_acc_query(q);
/* sample buffer size is based on # of queries: */

View File

@ -433,7 +433,7 @@ fd5_create_batch_query(struct pipe_context *pctx,
counters_per_group[entry->gid]++;
}
q = fd_acc_create_query2(ctx, 0, &perfcntr);
q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
aq = fd_acc_query(q);
/* sample buffer size is based on # of queries: */

View File

@ -252,6 +252,96 @@ static const struct fd_acc_sample_provider timestamp = {
.result = timestamp_accumulate_result,
};
struct PACKED fd6_primitives_sample {
struct {
uint64_t generated, emitted;
} start[4], stop[4], result;
};
#define primitives_relocw(ring, aq, field) \
OUT_RELOCW(ring, fd_resource((aq)->prsc)->bo, offsetof(struct fd6_primitives_sample, field), 0, 0);
#define primitives_reloc(ring, aq, field) \
OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, offsetof(struct fd6_primitives_sample, field), 0, 0);
static void
primitive_counts_resume(struct fd_acc_query *aq, struct fd_batch *batch)
{
struct fd_ringbuffer *ring = batch->draw;
OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS_LO, 2);
primitives_relocw(ring, aq, start[0]);
fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
}
static void
primitive_counts_pause(struct fd_acc_query *aq, struct fd_batch *batch)
{
struct fd_ringbuffer *ring = batch->draw;
OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS_LO, 2);
primitives_relocw(ring, aq, stop[0]);
fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
/* result += stop - start: */
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
primitives_relocw(ring, aq, result.emitted);
primitives_reloc(ring, aq, result.emitted);
primitives_reloc(ring, aq, stop[aq->base.index].emitted);
primitives_reloc(ring, aq, start[aq->base.index].emitted);
/* result += stop - start: */
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
primitives_relocw(ring, aq, result.generated);
primitives_reloc(ring, aq, result.generated);
primitives_reloc(ring, aq, stop[aq->base.index].generated);
primitives_reloc(ring, aq, start[aq->base.index].generated);
}
static void
primitives_generated_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result)
{
struct fd6_primitives_sample *ps = buf;
result->u64 = ps->result.generated;
}
static const struct fd_acc_sample_provider primitives_generated = {
.query_type = PIPE_QUERY_PRIMITIVES_GENERATED,
.active = FD_STAGE_DRAW,
.size = sizeof(struct fd6_primitives_sample),
.resume = primitive_counts_resume,
.pause = primitive_counts_pause,
.result = primitives_generated_result,
};
static void
primitives_emitted_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result)
{
struct fd6_primitives_sample *ps = buf;
result->u64 = ps->result.emitted;
}
static const struct fd_acc_sample_provider primitives_emitted = {
.query_type = PIPE_QUERY_PRIMITIVES_EMITTED,
.active = FD_STAGE_DRAW,
.size = sizeof(struct fd6_primitives_sample),
.resume = primitive_counts_resume,
.pause = primitive_counts_pause,
.result = primitives_emitted_result,
};
/*
* Performance Counter (batch) queries:
*
@ -433,7 +523,7 @@ fd6_create_batch_query(struct pipe_context *pctx,
counters_per_group[entry->gid]++;
}
q = fd_acc_create_query2(ctx, 0, &perfcntr);
q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
aq = fd_acc_query(q);
/* sample buffer size is based on # of queries: */
@ -463,4 +553,7 @@ fd6_query_context_init(struct pipe_context *pctx)
fd_acc_query_register_provider(pctx, &time_elapsed);
fd_acc_query_register_provider(pctx, &timestamp);
fd_acc_query_register_provider(pctx, &primitives_generated);
fd_acc_query_register_provider(pctx, &primitives_emitted);
}

View File

@ -55,7 +55,7 @@ enum fd_render_stage {
FD_STAGE_ALL = 0xff,
};
#define MAX_HW_SAMPLE_PROVIDERS 5
#define MAX_HW_SAMPLE_PROVIDERS 7
struct fd_hw_sample_provider;
struct fd_hw_sample;

View File

@ -331,7 +331,7 @@ struct fd_context {
void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info);
/* query: */
struct fd_query * (*create_query)(struct fd_context *ctx, unsigned query_type);
struct fd_query * (*create_query)(struct fd_context *ctx, unsigned query_type, unsigned index);
void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles);
void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
struct fd_ringbuffer *ring);

View File

@ -41,11 +41,12 @@ static struct pipe_query *
fd_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index)
{
struct fd_context *ctx = fd_context(pctx);
struct fd_query *q;
struct fd_query *q = NULL;
q = fd_sw_create_query(ctx, query_type);
if (!q && ctx->create_query)
q = ctx->create_query(ctx, query_type);
if (ctx->create_query)
q = ctx->create_query(ctx, query_type, index);
if (!q)
q = fd_sw_create_query(ctx, query_type, index);
return (struct pipe_query *) q;
}

View File

@ -46,6 +46,7 @@ struct fd_query {
const struct fd_query_funcs *funcs;
bool active;
int type;
unsigned index;
};
static inline struct fd_query *
@ -102,6 +103,12 @@ int pidx(unsigned query_type)
return 3;
case PIPE_QUERY_TIMESTAMP:
return 4;
case PIPE_QUERY_PRIMITIVES_GENERATED:
return 5;
case PIPE_QUERY_PRIMITIVES_EMITTED:
return 6;
default:
return -1;
}

View File

@ -173,7 +173,7 @@ static const struct fd_query_funcs acc_query_funcs = {
struct fd_query *
fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
const struct fd_acc_sample_provider *provider)
unsigned index, const struct fd_acc_sample_provider *provider)
{
struct fd_acc_query *aq;
struct fd_query *q;
@ -192,19 +192,21 @@ fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
q = &aq->base;
q->funcs = &acc_query_funcs;
q->type = query_type;
q->index = index;
return q;
}
struct fd_query *
fd_acc_create_query(struct fd_context *ctx, unsigned query_type)
fd_acc_create_query(struct fd_context *ctx, unsigned query_type,
unsigned index)
{
int idx = pidx(query_type);
if ((idx < 0) || !ctx->acc_sample_providers[idx])
return NULL;
return fd_acc_create_query2(ctx, query_type,
return fd_acc_create_query2(ctx, query_type, index,
ctx->acc_sample_providers[idx]);
}

View File

@ -97,9 +97,10 @@ fd_acc_query(struct fd_query *q)
return (struct fd_acc_query *)q;
}
struct fd_query * fd_acc_create_query(struct fd_context *ctx, unsigned query_type);
struct fd_query * fd_acc_create_query(struct fd_context *ctx, unsigned query_type,
unsigned index);
struct fd_query * fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
const struct fd_acc_sample_provider *provider);
unsigned index, const struct fd_acc_sample_provider *provider);
void fd_acc_query_set_stage(struct fd_batch *batch, enum fd_render_stage stage);
void fd_acc_query_register_provider(struct pipe_context *pctx,
const struct fd_acc_sample_provider *provider);

View File

@ -266,7 +266,7 @@ static const struct fd_query_funcs hw_query_funcs = {
};
struct fd_query *
fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
{
struct fd_hw_query *hq;
struct fd_query *q;
@ -289,6 +289,7 @@ fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
q = &hq->base;
q->funcs = &hw_query_funcs;
q->type = query_type;
q->index = index;
return q;
}

View File

@ -136,7 +136,7 @@ fd_hw_query(struct fd_query *q)
return (struct fd_hw_query *)q;
}
struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type);
struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index);
/* helper for sample providers: */
struct fd_hw_sample * fd_hw_sample_init(struct fd_batch *batch, uint32_t size);
/* don't call directly, use fd_hw_sample_reference() */

View File

@ -162,7 +162,7 @@ static const struct fd_query_funcs sw_query_funcs = {
};
struct fd_query *
fd_sw_create_query(struct fd_context *ctx, unsigned query_type)
fd_sw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
{
struct fd_sw_query *sq;
struct fd_query *q;

View File

@ -48,6 +48,6 @@ fd_sw_query(struct fd_query *q)
}
struct fd_query * fd_sw_create_query(struct fd_context *ctx,
unsigned query_type);
unsigned query_type, unsigned index);
#endif /* FREEDRENO_QUERY_SW_H_ */