freedreno/a6xx: Implement primitive count queries on GPU
The driver can't determine PIPE_QUERY_PRIMITIVES_GENERATED or PIPE_QUERY_PRIMITIVES_EMITTED once we support geometry or tessellation, since these stages add primitives at runtime. Use the WRITE_PRIMITIVE_COUNTS event to write back the primitive counts and implement a hw query for this. Reviewed-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
parent
1acf8d2354
commit
30ab3e39fd
|
@ -2394,6 +2394,10 @@ to upconvert to 32b float internally?
|
||||||
<bitfield name="B_OFF" low="14" high="22" shr="2" type="uint"/>
|
<bitfield name="B_OFF" low="14" high="22" shr="2" type="uint"/>
|
||||||
<bitfield name="B_EN" pos="23" type="boolean"/>
|
<bitfield name="B_EN" pos="23" type="boolean"/>
|
||||||
</reg32>
|
</reg32>
|
||||||
|
|
||||||
|
<reg32 offset="0x9218" name="VPC_SO_STREAM_COUNTS_LO"/>
|
||||||
|
<reg32 offset="0x9219" name="VPC_SO_STREAM_COUNTS_HI"/>
|
||||||
|
|
||||||
<array offset="0x921a" name="VPC_SO" stride="7" length="4">
|
<array offset="0x921a" name="VPC_SO" stride="7" length="4">
|
||||||
<reg32 offset="0" name="BUFFER_BASE_LO"/>
|
<reg32 offset="0" name="BUFFER_BASE_LO"/>
|
||||||
<reg32 offset="1" name="BUFFER_BASE_HI"/>
|
<reg32 offset="1" name="BUFFER_BASE_HI"/>
|
||||||
|
|
|
@ -15,6 +15,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
|
||||||
<value name="VIZQUERY_START" value="7"/> <!-- on a2xx (??) -->
|
<value name="VIZQUERY_START" value="7"/> <!-- on a2xx (??) -->
|
||||||
<value name="VIZQUERY_END" value="8"/>
|
<value name="VIZQUERY_END" value="8"/>
|
||||||
<value name="SC_WAIT_WC" value="9"/>
|
<value name="SC_WAIT_WC" value="9"/>
|
||||||
|
<value name="WRITE_PRIMITIVE_COUNTS" value="9" variants="A6XX"/>
|
||||||
<value name="RST_PIX_CNT" value="13"/>
|
<value name="RST_PIX_CNT" value="13"/>
|
||||||
<value name="RST_VTX_CNT" value="14"/>
|
<value name="RST_VTX_CNT" value="14"/>
|
||||||
<value name="TILE_FLUSH" value="15"/>
|
<value name="TILE_FLUSH" value="15"/>
|
||||||
|
|
|
@ -218,7 +218,7 @@ fd2_create_batch_query(struct pipe_context *pctx,
|
||||||
counters_per_group[entry->gid]++;
|
counters_per_group[entry->gid]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
q = fd_acc_create_query2(ctx, 0, &perfcntr);
|
q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
|
||||||
aq = fd_acc_query(q);
|
aq = fd_acc_query(q);
|
||||||
|
|
||||||
/* sample buffer size is based on # of queries: */
|
/* sample buffer size is based on # of queries: */
|
||||||
|
|
|
@ -433,7 +433,7 @@ fd5_create_batch_query(struct pipe_context *pctx,
|
||||||
counters_per_group[entry->gid]++;
|
counters_per_group[entry->gid]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
q = fd_acc_create_query2(ctx, 0, &perfcntr);
|
q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
|
||||||
aq = fd_acc_query(q);
|
aq = fd_acc_query(q);
|
||||||
|
|
||||||
/* sample buffer size is based on # of queries: */
|
/* sample buffer size is based on # of queries: */
|
||||||
|
|
|
@ -252,6 +252,96 @@ static const struct fd_acc_sample_provider timestamp = {
|
||||||
.result = timestamp_accumulate_result,
|
.result = timestamp_accumulate_result,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct PACKED fd6_primitives_sample {
|
||||||
|
struct {
|
||||||
|
uint64_t generated, emitted;
|
||||||
|
} start[4], stop[4], result;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#define primitives_relocw(ring, aq, field) \
|
||||||
|
OUT_RELOCW(ring, fd_resource((aq)->prsc)->bo, offsetof(struct fd6_primitives_sample, field), 0, 0);
|
||||||
|
#define primitives_reloc(ring, aq, field) \
|
||||||
|
OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, offsetof(struct fd6_primitives_sample, field), 0, 0);
|
||||||
|
|
||||||
|
static void
|
||||||
|
primitive_counts_resume(struct fd_acc_query *aq, struct fd_batch *batch)
|
||||||
|
{
|
||||||
|
struct fd_ringbuffer *ring = batch->draw;
|
||||||
|
|
||||||
|
OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS_LO, 2);
|
||||||
|
primitives_relocw(ring, aq, start[0]);
|
||||||
|
|
||||||
|
fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
primitive_counts_pause(struct fd_acc_query *aq, struct fd_batch *batch)
|
||||||
|
{
|
||||||
|
struct fd_ringbuffer *ring = batch->draw;
|
||||||
|
|
||||||
|
OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS_LO, 2);
|
||||||
|
primitives_relocw(ring, aq, stop[0]);
|
||||||
|
|
||||||
|
fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
|
||||||
|
|
||||||
|
fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
|
||||||
|
|
||||||
|
/* result += stop - start: */
|
||||||
|
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
|
||||||
|
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
|
||||||
|
CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
|
||||||
|
primitives_relocw(ring, aq, result.emitted);
|
||||||
|
primitives_reloc(ring, aq, result.emitted);
|
||||||
|
primitives_reloc(ring, aq, stop[aq->base.index].emitted);
|
||||||
|
primitives_reloc(ring, aq, start[aq->base.index].emitted);
|
||||||
|
|
||||||
|
/* result += stop - start: */
|
||||||
|
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
|
||||||
|
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
|
||||||
|
CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
|
||||||
|
primitives_relocw(ring, aq, result.generated);
|
||||||
|
primitives_reloc(ring, aq, result.generated);
|
||||||
|
primitives_reloc(ring, aq, stop[aq->base.index].generated);
|
||||||
|
primitives_reloc(ring, aq, start[aq->base.index].generated);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
primitives_generated_result(struct fd_acc_query *aq, void *buf,
|
||||||
|
union pipe_query_result *result)
|
||||||
|
{
|
||||||
|
struct fd6_primitives_sample *ps = buf;
|
||||||
|
|
||||||
|
result->u64 = ps->result.generated;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct fd_acc_sample_provider primitives_generated = {
|
||||||
|
.query_type = PIPE_QUERY_PRIMITIVES_GENERATED,
|
||||||
|
.active = FD_STAGE_DRAW,
|
||||||
|
.size = sizeof(struct fd6_primitives_sample),
|
||||||
|
.resume = primitive_counts_resume,
|
||||||
|
.pause = primitive_counts_pause,
|
||||||
|
.result = primitives_generated_result,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
primitives_emitted_result(struct fd_acc_query *aq, void *buf,
|
||||||
|
union pipe_query_result *result)
|
||||||
|
{
|
||||||
|
struct fd6_primitives_sample *ps = buf;
|
||||||
|
|
||||||
|
result->u64 = ps->result.emitted;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct fd_acc_sample_provider primitives_emitted = {
|
||||||
|
.query_type = PIPE_QUERY_PRIMITIVES_EMITTED,
|
||||||
|
.active = FD_STAGE_DRAW,
|
||||||
|
.size = sizeof(struct fd6_primitives_sample),
|
||||||
|
.resume = primitive_counts_resume,
|
||||||
|
.pause = primitive_counts_pause,
|
||||||
|
.result = primitives_emitted_result,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Performance Counter (batch) queries:
|
* Performance Counter (batch) queries:
|
||||||
*
|
*
|
||||||
|
@ -433,7 +523,7 @@ fd6_create_batch_query(struct pipe_context *pctx,
|
||||||
counters_per_group[entry->gid]++;
|
counters_per_group[entry->gid]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
q = fd_acc_create_query2(ctx, 0, &perfcntr);
|
q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
|
||||||
aq = fd_acc_query(q);
|
aq = fd_acc_query(q);
|
||||||
|
|
||||||
/* sample buffer size is based on # of queries: */
|
/* sample buffer size is based on # of queries: */
|
||||||
|
@ -463,4 +553,7 @@ fd6_query_context_init(struct pipe_context *pctx)
|
||||||
|
|
||||||
fd_acc_query_register_provider(pctx, &time_elapsed);
|
fd_acc_query_register_provider(pctx, &time_elapsed);
|
||||||
fd_acc_query_register_provider(pctx, ×tamp);
|
fd_acc_query_register_provider(pctx, ×tamp);
|
||||||
|
|
||||||
|
fd_acc_query_register_provider(pctx, &primitives_generated);
|
||||||
|
fd_acc_query_register_provider(pctx, &primitives_emitted);
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,7 +55,7 @@ enum fd_render_stage {
|
||||||
FD_STAGE_ALL = 0xff,
|
FD_STAGE_ALL = 0xff,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define MAX_HW_SAMPLE_PROVIDERS 5
|
#define MAX_HW_SAMPLE_PROVIDERS 7
|
||||||
struct fd_hw_sample_provider;
|
struct fd_hw_sample_provider;
|
||||||
struct fd_hw_sample;
|
struct fd_hw_sample;
|
||||||
|
|
||||||
|
|
|
@ -331,7 +331,7 @@ struct fd_context {
|
||||||
void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info);
|
void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info);
|
||||||
|
|
||||||
/* query: */
|
/* query: */
|
||||||
struct fd_query * (*create_query)(struct fd_context *ctx, unsigned query_type);
|
struct fd_query * (*create_query)(struct fd_context *ctx, unsigned query_type, unsigned index);
|
||||||
void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles);
|
void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles);
|
||||||
void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
|
void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
|
||||||
struct fd_ringbuffer *ring);
|
struct fd_ringbuffer *ring);
|
||||||
|
|
|
@ -41,11 +41,12 @@ static struct pipe_query *
|
||||||
fd_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index)
|
fd_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index)
|
||||||
{
|
{
|
||||||
struct fd_context *ctx = fd_context(pctx);
|
struct fd_context *ctx = fd_context(pctx);
|
||||||
struct fd_query *q;
|
struct fd_query *q = NULL;
|
||||||
|
|
||||||
q = fd_sw_create_query(ctx, query_type);
|
if (ctx->create_query)
|
||||||
if (!q && ctx->create_query)
|
q = ctx->create_query(ctx, query_type, index);
|
||||||
q = ctx->create_query(ctx, query_type);
|
if (!q)
|
||||||
|
q = fd_sw_create_query(ctx, query_type, index);
|
||||||
|
|
||||||
return (struct pipe_query *) q;
|
return (struct pipe_query *) q;
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,6 +46,7 @@ struct fd_query {
|
||||||
const struct fd_query_funcs *funcs;
|
const struct fd_query_funcs *funcs;
|
||||||
bool active;
|
bool active;
|
||||||
int type;
|
int type;
|
||||||
|
unsigned index;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct fd_query *
|
static inline struct fd_query *
|
||||||
|
@ -102,6 +103,12 @@ int pidx(unsigned query_type)
|
||||||
return 3;
|
return 3;
|
||||||
case PIPE_QUERY_TIMESTAMP:
|
case PIPE_QUERY_TIMESTAMP:
|
||||||
return 4;
|
return 4;
|
||||||
|
|
||||||
|
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||||
|
return 5;
|
||||||
|
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||||
|
return 6;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -173,7 +173,7 @@ static const struct fd_query_funcs acc_query_funcs = {
|
||||||
|
|
||||||
struct fd_query *
|
struct fd_query *
|
||||||
fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
|
fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
|
||||||
const struct fd_acc_sample_provider *provider)
|
unsigned index, const struct fd_acc_sample_provider *provider)
|
||||||
{
|
{
|
||||||
struct fd_acc_query *aq;
|
struct fd_acc_query *aq;
|
||||||
struct fd_query *q;
|
struct fd_query *q;
|
||||||
|
@ -192,19 +192,21 @@ fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
|
||||||
q = &aq->base;
|
q = &aq->base;
|
||||||
q->funcs = &acc_query_funcs;
|
q->funcs = &acc_query_funcs;
|
||||||
q->type = query_type;
|
q->type = query_type;
|
||||||
|
q->index = index;
|
||||||
|
|
||||||
return q;
|
return q;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct fd_query *
|
struct fd_query *
|
||||||
fd_acc_create_query(struct fd_context *ctx, unsigned query_type)
|
fd_acc_create_query(struct fd_context *ctx, unsigned query_type,
|
||||||
|
unsigned index)
|
||||||
{
|
{
|
||||||
int idx = pidx(query_type);
|
int idx = pidx(query_type);
|
||||||
|
|
||||||
if ((idx < 0) || !ctx->acc_sample_providers[idx])
|
if ((idx < 0) || !ctx->acc_sample_providers[idx])
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
return fd_acc_create_query2(ctx, query_type,
|
return fd_acc_create_query2(ctx, query_type, index,
|
||||||
ctx->acc_sample_providers[idx]);
|
ctx->acc_sample_providers[idx]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -97,9 +97,10 @@ fd_acc_query(struct fd_query *q)
|
||||||
return (struct fd_acc_query *)q;
|
return (struct fd_acc_query *)q;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct fd_query * fd_acc_create_query(struct fd_context *ctx, unsigned query_type);
|
struct fd_query * fd_acc_create_query(struct fd_context *ctx, unsigned query_type,
|
||||||
|
unsigned index);
|
||||||
struct fd_query * fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
|
struct fd_query * fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
|
||||||
const struct fd_acc_sample_provider *provider);
|
unsigned index, const struct fd_acc_sample_provider *provider);
|
||||||
void fd_acc_query_set_stage(struct fd_batch *batch, enum fd_render_stage stage);
|
void fd_acc_query_set_stage(struct fd_batch *batch, enum fd_render_stage stage);
|
||||||
void fd_acc_query_register_provider(struct pipe_context *pctx,
|
void fd_acc_query_register_provider(struct pipe_context *pctx,
|
||||||
const struct fd_acc_sample_provider *provider);
|
const struct fd_acc_sample_provider *provider);
|
||||||
|
|
|
@ -266,7 +266,7 @@ static const struct fd_query_funcs hw_query_funcs = {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct fd_query *
|
struct fd_query *
|
||||||
fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
|
fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
|
||||||
{
|
{
|
||||||
struct fd_hw_query *hq;
|
struct fd_hw_query *hq;
|
||||||
struct fd_query *q;
|
struct fd_query *q;
|
||||||
|
@ -289,6 +289,7 @@ fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
|
||||||
q = &hq->base;
|
q = &hq->base;
|
||||||
q->funcs = &hw_query_funcs;
|
q->funcs = &hw_query_funcs;
|
||||||
q->type = query_type;
|
q->type = query_type;
|
||||||
|
q->index = index;
|
||||||
|
|
||||||
return q;
|
return q;
|
||||||
}
|
}
|
||||||
|
|
|
@ -136,7 +136,7 @@ fd_hw_query(struct fd_query *q)
|
||||||
return (struct fd_hw_query *)q;
|
return (struct fd_hw_query *)q;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type);
|
struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index);
|
||||||
/* helper for sample providers: */
|
/* helper for sample providers: */
|
||||||
struct fd_hw_sample * fd_hw_sample_init(struct fd_batch *batch, uint32_t size);
|
struct fd_hw_sample * fd_hw_sample_init(struct fd_batch *batch, uint32_t size);
|
||||||
/* don't call directly, use fd_hw_sample_reference() */
|
/* don't call directly, use fd_hw_sample_reference() */
|
||||||
|
|
|
@ -162,7 +162,7 @@ static const struct fd_query_funcs sw_query_funcs = {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct fd_query *
|
struct fd_query *
|
||||||
fd_sw_create_query(struct fd_context *ctx, unsigned query_type)
|
fd_sw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
|
||||||
{
|
{
|
||||||
struct fd_sw_query *sq;
|
struct fd_sw_query *sq;
|
||||||
struct fd_query *q;
|
struct fd_query *q;
|
||||||
|
|
|
@ -48,6 +48,6 @@ fd_sw_query(struct fd_query *q)
|
||||||
}
|
}
|
||||||
|
|
||||||
struct fd_query * fd_sw_create_query(struct fd_context *ctx,
|
struct fd_query * fd_sw_create_query(struct fd_context *ctx,
|
||||||
unsigned query_type);
|
unsigned query_type, unsigned index);
|
||||||
|
|
||||||
#endif /* FREEDRENO_QUERY_SW_H_ */
|
#endif /* FREEDRENO_QUERY_SW_H_ */
|
||||||
|
|
Loading…
Reference in New Issue