anv: add perfetto source
v2: Increase custom stall data (Felix) Fixup build (Felix) v3: Add API enum (Rohan) Fixup old comment (Rohan) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Rohan Garg <rohan.garg@intel.com> Acked-by: Antonio Caggiano <antonio.caggiano@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13996>
This commit is contained in:
parent
b70143f4e3
commit
e760c5b37b
|
@ -161,6 +161,14 @@ set of HW counters :
|
|||
|
||||
INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer
|
||||
|
||||
Vulkan applications can also be instrumented to be Perfetto producers.
|
||||
To enable this for given application, set the environment variable as
|
||||
follow :
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
PERFETTO_TRACE=1 my_vulkan_app
|
||||
|
||||
Panfrost
|
||||
^^^^^^^^
|
||||
|
||||
|
|
|
@ -0,0 +1,585 @@
|
|||
/*
|
||||
* Copyright © 2021 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "common/intel_gem.h"
|
||||
#include "perf/intel_perf.h"
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/u_process.h"
|
||||
|
||||
#include "intel_driver_ds.h"
|
||||
#include "intel_pps_priv.h"
|
||||
#include "intel_tracepoints.h"
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
|
||||
#include "util/u_perfetto.h"
|
||||
|
||||
#include "intel_tracepoints_perfetto.h"
|
||||
|
||||
/* Just naming stages */
|
||||
static const struct {
|
||||
const char *name;
|
||||
|
||||
/* Tells us if a given stage is pipelined. This is used to build stacks of
|
||||
* pipelined elements so that the perfetto UI doesn't get confused by elements
|
||||
* ending out of order.
|
||||
*/
|
||||
bool pipelined;
|
||||
|
||||
/* The perfetto UI requires that there is a parent-child relationship
|
||||
* within a row of elements. Which means that all children elements must
|
||||
* end within the lifespan of their parent.
|
||||
*
|
||||
* Some elements like stalls and command buffers follow that relationship,
|
||||
* but not all. This tells us in which UI row the elements should live.
|
||||
*/
|
||||
enum intel_ds_queue_stage draw_stage;
|
||||
} intel_queue_stage_desc[INTEL_DS_QUEUE_STAGE_N_STAGES] = {
|
||||
/* Order must match the enum! */
|
||||
{
|
||||
"cmd-buffer",
|
||||
false,
|
||||
INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
|
||||
},
|
||||
{
|
||||
"stall",
|
||||
false,
|
||||
INTEL_DS_QUEUE_STAGE_STALL,
|
||||
},
|
||||
{
|
||||
"compute",
|
||||
true,
|
||||
INTEL_DS_QUEUE_STAGE_COMPUTE,
|
||||
},
|
||||
{
|
||||
"render-pass",
|
||||
true,
|
||||
INTEL_DS_QUEUE_STAGE_RENDER_PASS,
|
||||
},
|
||||
{
|
||||
"blorp",
|
||||
true,
|
||||
INTEL_DS_QUEUE_STAGE_BLORP,
|
||||
},
|
||||
{
|
||||
"draw",
|
||||
true,
|
||||
INTEL_DS_QUEUE_STAGE_DRAW,
|
||||
},
|
||||
};
|
||||
|
||||
struct IntelRenderpassIncrementalState {
|
||||
bool was_cleared = true;
|
||||
};
|
||||
|
||||
struct IntelRenderpassTraits : public perfetto::DefaultDataSourceTraits {
|
||||
using IncrementalStateType = IntelRenderpassIncrementalState;
|
||||
};
|
||||
|
||||
class IntelRenderpassDataSource : public perfetto::DataSource<IntelRenderpassDataSource,
|
||||
IntelRenderpassTraits> {
|
||||
public:
|
||||
void OnSetup(const SetupArgs &) override
|
||||
{
|
||||
// Use this callback to apply any custom configuration to your data source
|
||||
// based on the TraceConfig in SetupArgs.
|
||||
}
|
||||
|
||||
void OnStart(const StartArgs &) override
|
||||
{
|
||||
// This notification can be used to initialize the GPU driver, enable
|
||||
// counters, etc. StartArgs will contains the DataSourceDescriptor,
|
||||
// which can be extended.
|
||||
u_trace_perfetto_start();
|
||||
PERFETTO_LOG("Tracing started");
|
||||
}
|
||||
|
||||
void OnStop(const StopArgs &) override
|
||||
{
|
||||
PERFETTO_LOG("Tracing stopped");
|
||||
|
||||
// Undo any initialization done in OnStart.
|
||||
u_trace_perfetto_stop();
|
||||
// TODO we should perhaps block until queued traces are flushed?
|
||||
|
||||
Trace([](IntelRenderpassDataSource::TraceContext ctx) {
|
||||
auto packet = ctx.NewTracePacket();
|
||||
packet->Finalize();
|
||||
ctx.Flush();
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(IntelRenderpassDataSource);
|
||||
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(IntelRenderpassDataSource);
|
||||
|
||||
using perfetto::protos::pbzero::InternedGpuRenderStageSpecification_RenderStageCategory;
|
||||
|
||||
enum InternedGpuRenderStageSpecification_RenderStageCategory
|
||||
i915_engine_class_to_category(enum drm_i915_gem_engine_class engine_class)
|
||||
{
|
||||
switch (engine_class) {
|
||||
case I915_ENGINE_CLASS_RENDER:
|
||||
return InternedGpuRenderStageSpecification_RenderStageCategory::
|
||||
InternedGpuRenderStageSpecification_RenderStageCategory_GRAPHICS;
|
||||
default:
|
||||
return InternedGpuRenderStageSpecification_RenderStageCategory::InternedGpuRenderStageSpecification_RenderStageCategory_OTHER;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sync_timestamp(IntelRenderpassDataSource::TraceContext &ctx,
|
||||
struct intel_ds_device *device)
|
||||
{
|
||||
uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
|
||||
uint64_t gpu_ts = intel_device_info_timebase_scale(&device->info,
|
||||
intel_read_gpu_timestamp(device->fd));
|
||||
|
||||
if (cpu_ts < device->next_clock_sync_ns)
|
||||
return;
|
||||
|
||||
PERFETTO_LOG("sending clocks gpu=0x%08x", device->gpu_clock_id);
|
||||
|
||||
device->sync_gpu_ts = gpu_ts;
|
||||
device->next_clock_sync_ns = cpu_ts + 1000000000ull;
|
||||
|
||||
auto packet = ctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
|
||||
packet->set_timestamp(cpu_ts);
|
||||
|
||||
auto event = packet->set_clock_snapshot();
|
||||
{
|
||||
auto clock = event->add_clocks();
|
||||
|
||||
clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
|
||||
clock->set_timestamp(cpu_ts);
|
||||
}
|
||||
{
|
||||
auto clock = event->add_clocks();
|
||||
|
||||
clock->set_clock_id(device->gpu_clock_id);
|
||||
clock->set_timestamp(gpu_ts);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
send_descriptors(IntelRenderpassDataSource::TraceContext &ctx,
|
||||
struct intel_ds_device *device)
|
||||
{
|
||||
struct intel_ds_queue *queue;
|
||||
|
||||
PERFETTO_LOG("Sending renderstage descriptors");
|
||||
|
||||
device->event_id = 0;
|
||||
u_vector_foreach(queue, &device->queues) {
|
||||
for (uint32_t s = 0; s < ARRAY_SIZE(queue->stages); s++) {
|
||||
queue->stages[s].start_ns = 0;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto packet = ctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
|
||||
packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
|
||||
packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
|
||||
|
||||
auto interned_data = packet->set_interned_data();
|
||||
|
||||
{
|
||||
auto desc = interned_data->add_graphics_contexts();
|
||||
desc->set_iid(device->iid);
|
||||
desc->set_pid(getpid());
|
||||
switch (device->api) {
|
||||
case INTEL_DS_API_OPENGL:
|
||||
desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api_OPEN_GL);
|
||||
break;
|
||||
case INTEL_DS_API_VULKAN:
|
||||
desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api_VULKAN);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit all the IID picked at device/queue creation. */
|
||||
u_vector_foreach(queue, &device->queues) {
|
||||
for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) {
|
||||
{
|
||||
/* We put the stage number in there so that all rows are order
|
||||
* by intel_ds_queue_stage.
|
||||
*/
|
||||
char name[100];
|
||||
snprintf(name, sizeof(name), "%.10s-%s-%u-%s",
|
||||
util_get_process_name(),
|
||||
queue->name, s, intel_queue_stage_desc[s].name);
|
||||
|
||||
auto desc = interned_data->add_gpu_specifications();
|
||||
desc->set_iid(queue->stages[s].queue_iid);
|
||||
desc->set_name(name);
|
||||
}
|
||||
{
|
||||
auto desc = interned_data->add_gpu_specifications();
|
||||
desc->set_iid(queue->stages[s].stage_iid);
|
||||
desc->set_name(intel_queue_stage_desc[s].name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
device->next_clock_sync_ns = 0;
|
||||
sync_timestamp(ctx, device);
|
||||
}
|
||||
|
||||
typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*);
|
||||
|
||||
static void
|
||||
begin_event(struct intel_ds_queue *queue, uint64_t ts_ns,
|
||||
enum intel_ds_queue_stage stage_id)
|
||||
{
|
||||
/* If we haven't managed to calibrate the alignment between GPU and CPU
|
||||
* timestamps yet, then skip this trace, otherwise perfetto won't know
|
||||
* what to do with it.
|
||||
*/
|
||||
if (!queue->device->sync_gpu_ts) {
|
||||
queue->stages[stage_id].start_ns = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
queue->stages[stage_id].start_ns = ts_ns;
|
||||
}
|
||||
|
||||
static void
|
||||
end_event(struct intel_ds_queue *queue, uint64_t ts_ns,
|
||||
enum intel_ds_queue_stage stage_id,
|
||||
uint32_t submission_id, const void* payload = nullptr,
|
||||
trace_payload_as_extra_func payload_as_extra = nullptr)
|
||||
{
|
||||
struct intel_ds_device *device = queue->device;
|
||||
|
||||
/* If we haven't managed to calibrate the alignment between GPU and CPU
|
||||
* timestamps yet, then skip this trace, otherwise perfetto won't know
|
||||
* what to do with it.
|
||||
*/
|
||||
if (!device->sync_gpu_ts)
|
||||
return;
|
||||
|
||||
struct intel_ds_stage *stage = &queue->stages[stage_id];
|
||||
uint64_t start_ns = stage->start_ns;
|
||||
|
||||
if (!start_ns)
|
||||
return;
|
||||
|
||||
uint64_t evt_id = device->event_id++;
|
||||
|
||||
IntelRenderpassDataSource::Trace([=](IntelRenderpassDataSource::TraceContext tctx) {
|
||||
if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
|
||||
send_descriptors(tctx, queue->device);
|
||||
state->was_cleared = false;
|
||||
}
|
||||
|
||||
sync_timestamp(tctx, queue->device);
|
||||
|
||||
auto packet = tctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(start_ns);
|
||||
packet->set_timestamp_clock_id(queue->device->gpu_clock_id);
|
||||
|
||||
assert(ts_ns >= start_ns);
|
||||
|
||||
auto event = packet->set_gpu_render_stage_event();
|
||||
event->set_gpu_id(queue->device->gpu_id);
|
||||
|
||||
event->set_hw_queue_iid(stage->queue_iid);
|
||||
event->set_stage_iid(stage->stage_iid);
|
||||
event->set_context(queue->device->iid);
|
||||
event->set_event_id(evt_id);
|
||||
event->set_duration(ts_ns - start_ns);
|
||||
event->set_submission_id(submission_id);
|
||||
|
||||
if (payload && payload_as_extra) {
|
||||
payload_as_extra(event, payload);
|
||||
}
|
||||
});
|
||||
|
||||
stage->start_ns = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
custom_trace_payload_as_extra_end_stall(perfetto::protos::pbzero::GpuRenderStageEvent *event,
|
||||
const struct trace_intel_end_stall *payload)
|
||||
{
|
||||
char buf[256];
|
||||
|
||||
{
|
||||
auto data = event->add_extra_data();
|
||||
data->set_name("stall_reason");
|
||||
|
||||
snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s : %s",
|
||||
(payload->flags & INTEL_DS_DEPTH_CACHE_FLUSH_BIT) ? "+depth_flush" : "",
|
||||
(payload->flags & INTEL_DS_DATA_CACHE_FLUSH_BIT) ? "+dc_flush" : "",
|
||||
(payload->flags & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "",
|
||||
(payload->flags & INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT) ? "+rt_flush" : "",
|
||||
(payload->flags & INTEL_DS_TILE_CACHE_FLUSH_BIT) ? "+tile_flush" : "",
|
||||
(payload->flags & INTEL_DS_STATE_CACHE_INVALIDATE_BIT) ? "+state_inv" : "",
|
||||
(payload->flags & INTEL_DS_CONST_CACHE_INVALIDATE_BIT) ? "+const_inv" : "",
|
||||
(payload->flags & INTEL_DS_VF_CACHE_INVALIDATE_BIT) ? "+vf_inv" : "",
|
||||
(payload->flags & INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT) ? "+tex_inv" : "",
|
||||
(payload->flags & INTEL_DS_INST_CACHE_INVALIDATE_BIT) ? "+inst_inv" : "",
|
||||
(payload->flags & INTEL_DS_STALL_AT_SCOREBOARD_BIT) ? "+pb_stall" : "",
|
||||
(payload->flags & INTEL_DS_DEPTH_STALL_BIT) ? "+depth_stall" : "",
|
||||
(payload->flags & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "",
|
||||
(payload->flags & INTEL_DS_CS_STALL_BIT) ? "+cs_stall" : "",
|
||||
payload->reason ? payload->reason : "unknown");
|
||||
|
||||
assert(strlen(buf) > 0);
|
||||
|
||||
data->set_value(buf);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* HAVE_PERFETTO */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
|
||||
/*
|
||||
* Trace callbacks, called from u_trace once the timestamps from GPU have been
|
||||
* collected.
|
||||
*/
|
||||
|
||||
#define CREATE_DUAL_EVENT_CALLBACK(event_name, stage) \
|
||||
void \
|
||||
intel_ds_begin_##event_name(struct intel_ds_device *device, \
|
||||
uint64_t ts_ns, \
|
||||
const void *flush_data, \
|
||||
const struct trace_intel_begin_##event_name *payload) \
|
||||
{ \
|
||||
const struct intel_ds_flush_data *flush = \
|
||||
(const struct intel_ds_flush_data *) flush_data; \
|
||||
begin_event(flush->queue, ts_ns, stage); \
|
||||
} \
|
||||
\
|
||||
void \
|
||||
intel_ds_end_##event_name(struct intel_ds_device *device, \
|
||||
uint64_t ts_ns, \
|
||||
const void *flush_data, \
|
||||
const struct trace_intel_end_##event_name *payload) \
|
||||
{ \
|
||||
const struct intel_ds_flush_data *flush = \
|
||||
(const struct intel_ds_flush_data *) flush_data; \
|
||||
end_event(flush->queue, ts_ns, stage, flush->submission_id, \
|
||||
payload, \
|
||||
(trace_payload_as_extra_func) \
|
||||
&trace_payload_as_extra_intel_end_##event_name); \
|
||||
} \
|
||||
|
||||
|
||||
CREATE_DUAL_EVENT_CALLBACK(cmd_buffer, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
|
||||
CREATE_DUAL_EVENT_CALLBACK(render_pass, INTEL_DS_QUEUE_STAGE_RENDER_PASS)
|
||||
CREATE_DUAL_EVENT_CALLBACK(blorp, INTEL_DS_QUEUE_STAGE_BLORP)
|
||||
CREATE_DUAL_EVENT_CALLBACK(draw, INTEL_DS_QUEUE_STAGE_DRAW)
|
||||
CREATE_DUAL_EVENT_CALLBACK(draw_indexed, INTEL_DS_QUEUE_STAGE_DRAW)
|
||||
CREATE_DUAL_EVENT_CALLBACK(draw_indexed_multi, INTEL_DS_QUEUE_STAGE_DRAW)
|
||||
CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect, INTEL_DS_QUEUE_STAGE_DRAW)
|
||||
CREATE_DUAL_EVENT_CALLBACK(draw_multi, INTEL_DS_QUEUE_STAGE_DRAW)
|
||||
CREATE_DUAL_EVENT_CALLBACK(draw_indirect, INTEL_DS_QUEUE_STAGE_DRAW)
|
||||
CREATE_DUAL_EVENT_CALLBACK(draw_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW)
|
||||
CREATE_DUAL_EVENT_CALLBACK(draw_indirect_byte_count, INTEL_DS_QUEUE_STAGE_DRAW)
|
||||
CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW)
|
||||
CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE)
|
||||
|
||||
void
|
||||
intel_ds_begin_stall(struct intel_ds_device *device,
|
||||
uint64_t ts_ns,
|
||||
const void *flush_data,
|
||||
const struct trace_intel_begin_stall *payload)
|
||||
{
|
||||
const struct intel_ds_flush_data *flush =
|
||||
(const struct intel_ds_flush_data *) flush_data;
|
||||
begin_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL);
|
||||
}
|
||||
|
||||
void
|
||||
intel_ds_end_stall(struct intel_ds_device *device,
|
||||
uint64_t ts_ns,
|
||||
const void *flush_data,
|
||||
const struct trace_intel_end_stall *payload)
|
||||
{
|
||||
const struct intel_ds_flush_data *flush =
|
||||
(const struct intel_ds_flush_data *) flush_data;
|
||||
end_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL, flush->submission_id,
|
||||
payload,
|
||||
(trace_payload_as_extra_func)custom_trace_payload_as_extra_end_stall);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
intel_ds_begin_submit(struct intel_ds_queue *queue)
|
||||
{
|
||||
return perfetto::base::GetBootTimeNs().count();
|
||||
}
|
||||
|
||||
void
|
||||
intel_ds_end_submit(struct intel_ds_queue *queue,
|
||||
uint64_t start_ts)
|
||||
{
|
||||
if (!u_trace_context_actively_tracing(&queue->device->trace_context)) {
|
||||
queue->device->sync_gpu_ts = 0;
|
||||
queue->device->next_clock_sync_ns = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t end_ts = perfetto::base::GetBootTimeNs().count();
|
||||
uint32_t submission_id = queue->submission_id++;
|
||||
|
||||
IntelRenderpassDataSource::Trace([=](IntelRenderpassDataSource::TraceContext tctx) {
|
||||
if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
|
||||
send_descriptors(tctx, queue->device);
|
||||
state->was_cleared = false;
|
||||
}
|
||||
|
||||
sync_timestamp(tctx, queue->device);
|
||||
|
||||
auto packet = tctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(start_ts);
|
||||
|
||||
auto event = packet->set_vulkan_api_event();
|
||||
auto submit = event->set_vk_queue_submit();
|
||||
|
||||
// submit->set_pid(os_get_pid());
|
||||
// submit->set_tid(os_get_tid());
|
||||
submit->set_duration_ns(end_ts - start_ts);
|
||||
submit->set_vk_queue((uintptr_t) queue);
|
||||
submit->set_submission_id(submission_id);
|
||||
});
|
||||
}
|
||||
|
||||
#endif /* HAVE_PERFETTO */
|
||||
|
||||
static void
|
||||
intel_driver_ds_init_once(void)
|
||||
{
|
||||
#ifdef HAVE_PERFETTO
|
||||
util_perfetto_init();
|
||||
perfetto::DataSourceDescriptor dsd;
|
||||
dsd.set_name("gpu.renderstages.intel");
|
||||
IntelRenderpassDataSource::Register(dsd);
|
||||
#endif
|
||||
}
|
||||
|
||||
static once_flag intel_driver_ds_once_flag = ONCE_FLAG_INIT;
|
||||
|
||||
static uint64_t get_iid()
|
||||
{
|
||||
static uint64_t iid = 1;
|
||||
return iid++;
|
||||
}
|
||||
|
||||
void
|
||||
intel_driver_ds_init(void)
|
||||
{
|
||||
call_once(&intel_driver_ds_once_flag,
|
||||
intel_driver_ds_init_once);
|
||||
}
|
||||
|
||||
void
|
||||
intel_ds_device_init(struct intel_ds_device *device,
|
||||
struct intel_device_info *devinfo,
|
||||
int drm_fd,
|
||||
uint32_t gpu_id,
|
||||
enum intel_ds_api api)
|
||||
{
|
||||
memset(device, 0, sizeof(*device));
|
||||
|
||||
assert(gpu_id < 128);
|
||||
device->gpu_id = gpu_id;
|
||||
device->gpu_clock_id = intel_pps_clock_id(gpu_id);
|
||||
device->fd = drm_fd;
|
||||
device->info = *devinfo;
|
||||
device->iid = get_iid();
|
||||
device->api = api;
|
||||
u_vector_init(&device->queues, 4, sizeof(struct intel_ds_queue));
|
||||
}
|
||||
|
||||
void
|
||||
intel_ds_device_fini(struct intel_ds_device *device)
|
||||
{
|
||||
u_trace_context_fini(&device->trace_context);
|
||||
u_vector_finish(&device->queues);
|
||||
}
|
||||
|
||||
struct intel_ds_queue *
|
||||
intel_ds_device_add_queue(struct intel_ds_device *device,
|
||||
const char *fmt_name,
|
||||
...)
|
||||
{
|
||||
struct intel_ds_queue *queue =
|
||||
(struct intel_ds_queue *) u_vector_add(&device->queues);
|
||||
va_list ap;
|
||||
|
||||
memset(queue, 0, sizeof(*queue));
|
||||
|
||||
queue->device = device;
|
||||
queue->queue_id = u_vector_length(&device->queues) - 1;
|
||||
|
||||
va_start(ap, fmt_name);
|
||||
vsnprintf(queue->name, sizeof(queue->name), fmt_name, ap);
|
||||
va_end(ap);
|
||||
|
||||
for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) {
|
||||
queue->stages[s].queue_iid = get_iid();
|
||||
queue->stages[s].stage_iid = get_iid();
|
||||
}
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
void intel_ds_flush_data_init(struct intel_ds_flush_data *data,
|
||||
struct intel_ds_queue *queue,
|
||||
uint64_t submission_id)
|
||||
{
|
||||
memset(data, 0, sizeof(*data));
|
||||
|
||||
data->queue = queue;
|
||||
data->submission_id = submission_id;
|
||||
|
||||
u_trace_init(&data->trace, &queue->device->trace_context);
|
||||
}
|
||||
|
||||
void intel_ds_flush_data_fini(struct intel_ds_flush_data *data)
|
||||
{
|
||||
u_trace_fini(&data->trace);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,193 @@
|
|||
/*
|
||||
* Copyright © 2021 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef INTEL_DRIVER_DS_H
|
||||
#define INTEL_DRIVER_DS_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "util/macros.h"
|
||||
#include "util/perf/u_trace.h"
|
||||
#include "util/u_vector.h"
|
||||
|
||||
#include "dev/intel_device_info.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum intel_ds_api {
|
||||
INTEL_DS_API_OPENGL,
|
||||
INTEL_DS_API_VULKAN,
|
||||
};
|
||||
|
||||
enum intel_ds_stall_flag {
|
||||
INTEL_DS_DEPTH_CACHE_FLUSH_BIT = BITFIELD_BIT(0),
|
||||
INTEL_DS_DATA_CACHE_FLUSH_BIT = BITFIELD_BIT(1),
|
||||
INTEL_DS_HDC_PIPELINE_FLUSH_BIT = BITFIELD_BIT(2),
|
||||
INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT = BITFIELD_BIT(3),
|
||||
INTEL_DS_TILE_CACHE_FLUSH_BIT = BITFIELD_BIT(4),
|
||||
INTEL_DS_STATE_CACHE_INVALIDATE_BIT = BITFIELD_BIT(5),
|
||||
INTEL_DS_CONST_CACHE_INVALIDATE_BIT = BITFIELD_BIT(6),
|
||||
INTEL_DS_VF_CACHE_INVALIDATE_BIT = BITFIELD_BIT(7),
|
||||
INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT = BITFIELD_BIT(8),
|
||||
INTEL_DS_INST_CACHE_INVALIDATE_BIT = BITFIELD_BIT(9),
|
||||
INTEL_DS_STALL_AT_SCOREBOARD_BIT = BITFIELD_BIT(10),
|
||||
INTEL_DS_DEPTH_STALL_BIT = BITFIELD_BIT(11),
|
||||
INTEL_DS_CS_STALL_BIT = BITFIELD_BIT(12),
|
||||
};
|
||||
|
||||
/* Convert internal driver PIPE_CONTROL stall bits to intel_ds_stall_flag. */
|
||||
typedef enum intel_ds_stall_flag (*intel_ds_stall_cb_t)(uint32_t flags);
|
||||
|
||||
enum intel_ds_queue_stage {
|
||||
INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
|
||||
INTEL_DS_QUEUE_STAGE_STALL,
|
||||
INTEL_DS_QUEUE_STAGE_COMPUTE,
|
||||
INTEL_DS_QUEUE_STAGE_RENDER_PASS,
|
||||
INTEL_DS_QUEUE_STAGE_BLORP,
|
||||
INTEL_DS_QUEUE_STAGE_DRAW,
|
||||
INTEL_DS_QUEUE_STAGE_N_STAGES,
|
||||
};
|
||||
|
||||
struct intel_ds_device {
|
||||
struct intel_device_info info;
|
||||
|
||||
/* DRM fd */
|
||||
int fd;
|
||||
|
||||
/* API of this device */
|
||||
enum intel_ds_api api;
|
||||
|
||||
/* GPU identifier (minor number) */
|
||||
uint32_t gpu_id;
|
||||
|
||||
/* Clock identifier for this device. */
|
||||
uint32_t gpu_clock_id;
|
||||
|
||||
/* The timestamp at the point where we first emitted the clock_sync..
|
||||
* this will be a *later* timestamp that the first GPU traces (since
|
||||
* we capture the first clock_sync from the CPU *after* the first GPU
|
||||
* tracepoints happen). To avoid confusing perfetto we need to drop
|
||||
* the GPU traces with timestamps before this.
|
||||
*/
|
||||
uint64_t sync_gpu_ts;
|
||||
|
||||
/* Next timestamp after which we should resend a clock correlation. */
|
||||
uint64_t next_clock_sync_ns;
|
||||
|
||||
/* Unique perfetto identifier for the context */
|
||||
uint64_t iid;
|
||||
|
||||
/* Event ID generator */
|
||||
uint64_t event_id;
|
||||
|
||||
struct u_trace_context trace_context;
|
||||
|
||||
/* List of intel_ds_queue */
|
||||
struct u_vector queues;
|
||||
};
|
||||
|
||||
struct intel_ds_stage {
|
||||
/* Unique hw_queue IID */
|
||||
uint64_t queue_iid;
|
||||
|
||||
/* Unique stage IID */
|
||||
uint64_t stage_iid;
|
||||
|
||||
/* Start timestamp of the last work element */
|
||||
uint64_t start_ns;
|
||||
};
|
||||
|
||||
struct intel_ds_queue {
|
||||
/* Device this queue belongs to */
|
||||
struct intel_ds_device *device;
|
||||
|
||||
/* Unique queue ID across the device */
|
||||
uint32_t queue_id;
|
||||
|
||||
/* Unique name of the queue */
|
||||
char name[80];
|
||||
|
||||
/* Counter incremented on each intel_ds_end_submit() call */
|
||||
uint64_t submission_id;
|
||||
|
||||
struct intel_ds_stage stages[INTEL_DS_QUEUE_STAGE_N_STAGES];
|
||||
};
|
||||
|
||||
struct intel_ds_flush_data {
|
||||
struct intel_ds_queue *queue;
|
||||
|
||||
/* u_trace element in which we copy other traces in case we deal with
|
||||
* reusable command buffers.
|
||||
*/
|
||||
struct u_trace trace;
|
||||
|
||||
/* Unique submission ID associated with the trace */
|
||||
uint64_t submission_id;
|
||||
};
|
||||
|
||||
void intel_driver_ds_init(void);
|
||||
|
||||
void intel_ds_device_init(struct intel_ds_device *device,
|
||||
struct intel_device_info *devinfo,
|
||||
int drm_fd,
|
||||
uint32_t gpu_id,
|
||||
enum intel_ds_api api);
|
||||
void intel_ds_device_fini(struct intel_ds_device *device);
|
||||
|
||||
struct intel_ds_queue *intel_ds_device_add_queue(struct intel_ds_device *device,
|
||||
const char *fmt_name,
|
||||
...);
|
||||
|
||||
void intel_ds_flush_data_init(struct intel_ds_flush_data *data,
|
||||
struct intel_ds_queue *queue,
|
||||
uint64_t submission_id);
|
||||
|
||||
void intel_ds_flush_data_fini(struct intel_ds_flush_data *data);
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
|
||||
uint64_t intel_ds_begin_submit(struct intel_ds_queue *queue);
|
||||
void intel_ds_end_submit(struct intel_ds_queue *queue,
|
||||
uint64_t start_ts);
|
||||
|
||||
#else
|
||||
|
||||
static inline uint64_t intel_ds_begin_submit(struct intel_ds_queue *queue)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void intel_ds_end_submit(struct intel_ds_queue *queue,
|
||||
uint64_t start_ts)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* HAVE_PERFETTO */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* INTEL_DRIVER_DS_H */
|
|
@ -34,14 +34,18 @@ def define_tracepoints(args):
|
|||
from u_trace import TracepointArg as Arg
|
||||
from u_trace import TracepointArgStruct as ArgStruct
|
||||
|
||||
Header('anv_private.h', scope=HeaderScope.SOURCE)
|
||||
Header('intel_driver_ds.h', scope=HeaderScope.SOURCE)
|
||||
Header('blorp/blorp_priv.h', scope=HeaderScope.HEADER)
|
||||
Header('ds/intel_driver_ds.h', scope=HeaderScope.HEADER)
|
||||
|
||||
def begin_end_tp(name, tp_args=[], tp_struct=None, end_pipelined=True):
|
||||
Tracepoint('begin_{0}'.format(name))
|
||||
Tracepoint('end_{0}'.format(name),
|
||||
def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None, end_pipelined=True):
|
||||
Tracepoint('intel_begin_{0}'.format(name),
|
||||
tp_perfetto='intel_ds_begin_{0}'.format(name))
|
||||
Tracepoint('intel_end_{0}'.format(name),
|
||||
args=tp_args,
|
||||
tp_struct=tp_struct,
|
||||
tp_perfetto='intel_ds_end_{0}'.format(name),
|
||||
tp_print=tp_print,
|
||||
end_of_pipe=end_pipelined)
|
||||
|
||||
|
||||
|
@ -73,8 +77,8 @@ def define_tracepoints(args):
|
|||
Arg(type='uint32_t', name='height', var='height', c_format='%u'),
|
||||
Arg(type='enum isl_aux_op', name='hiz_op', var='hiz_op', c_format='%s', to_prim_type='isl_aux_op_to_name({})'),
|
||||
Arg(type='enum isl_aux_op', name='fast_clear_op', var='fast_clear_op', c_format='%s', to_prim_type='isl_aux_op_to_name({})'),
|
||||
Arg(type='enum blorp_shader_type', name='type', var='shader_type', c_format='%s', to_prim_type='blorp_shader_type_to_name({})'),
|
||||
Arg(type='enum blorp_shader_pipeline', name='pipe', var='shader_pipe', c_format='%s', to_prim_type='blorp_shader_pipeline_to_name({})'),])
|
||||
Arg(type='enum blorp_shader_type', name='blorp_type', var='shader_type', c_format='%s', to_prim_type='blorp_shader_type_to_name({})'),
|
||||
Arg(type='enum blorp_shader_pipeline', name='blorp_pipe', var='shader_pipe', c_format='%s', to_prim_type='blorp_shader_pipeline_to_name({})'),])
|
||||
|
||||
begin_end_tp('draw',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='count'),],
|
||||
|
@ -110,42 +114,58 @@ def define_tracepoints(args):
|
|||
ArgStruct(type='uint32_t', var='group_z'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='group_x', var='group_x', c_format='%u'),
|
||||
Arg(type='uint32_t', name='group_y', var='group_y', c_format='%u'),
|
||||
Arg(type='uint32_t', name='group_z', var='group_z', c_format='%u'),])
|
||||
Arg(type='uint32_t', name='group_z', var='group_z', c_format='%u'),],
|
||||
tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'])
|
||||
|
||||
def flag_bits(args):
|
||||
bits = [Arg(type='enum intel_ds_stall_flag', name='flags', var='decode_cb(flags)', c_format='0x%x')]
|
||||
for a in args:
|
||||
bits.append(Arg(type='bool', name=a[1], var='__entry->flags & INTEL_DS_{0}_BIT'.format(a[0]), c_format='%u'))
|
||||
return bits
|
||||
|
||||
def stall_args(args):
|
||||
fmt = ''
|
||||
exprs = []
|
||||
for a in args:
|
||||
fmt += '%s'
|
||||
exprs.append('(__entry->flags & ANV_PIPE_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
|
||||
exprs.append('(__entry->flags & INTEL_DS_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
|
||||
fmt += ' : %s'
|
||||
exprs.append('__entry->reason ? __entry->reason : "unknown"')
|
||||
fmt = [fmt]
|
||||
fmt += exprs
|
||||
return fmt
|
||||
|
||||
Tracepoint('stall',
|
||||
args=[ArgStruct(type='uint32_t', var='flags'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='flags', var='flags', c_format='0x%x'),],
|
||||
tp_print=stall_args([['DEPTH_CACHE_FLUSH', 'depth_flush'],
|
||||
stall_flags = [['DEPTH_CACHE_FLUSH', 'depth_flush'],
|
||||
['DATA_CACHE_FLUSH', 'dc_flush'],
|
||||
['HDC_PIPELINE_FLUSH', 'hdc_flush'],
|
||||
['RENDER_TARGET_CACHE_FLUSH', 'rt_flush'],
|
||||
['TILE_CACHE_FLUSH', 'tile_flush'],
|
||||
['STATE_CACHE_INVALIDATE', 'state_inval'],
|
||||
['CONSTANT_CACHE_INVALIDATE', 'const_inval'],
|
||||
['CONST_CACHE_INVALIDATE', 'const_inval'],
|
||||
['VF_CACHE_INVALIDATE', 'vf_inval'],
|
||||
['TEXTURE_CACHE_INVALIDATE', 'tex_inval'],
|
||||
['INSTRUCTION_CACHE_INVALIDATE', 'ic_inval'],
|
||||
['INST_CACHE_INVALIDATE', 'ic_inval'],
|
||||
['STALL_AT_SCOREBOARD', 'pb_stall'],
|
||||
['DEPTH_STALL', 'depth_stall'],
|
||||
['CS_STALL', 'cs_stall'],
|
||||
]))
|
||||
['CS_STALL', 'cs_stall']]
|
||||
|
||||
begin_end_tp('stall',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='flags'),
|
||||
ArgStruct(type='intel_ds_stall_cb_t', var='decode_cb'),
|
||||
ArgStruct(type='const char *', var='reason'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='flags', var='decode_cb(flags)', c_format='0x%x'),
|
||||
Arg(type='const char *', name='reason', var='reason', c_format='%s'),],
|
||||
tp_print=stall_args(stall_flags),
|
||||
end_pipelined=False)
|
||||
|
||||
|
||||
def generate_code(args):
|
||||
from u_trace import utrace_generate
|
||||
from u_trace import utrace_generate_perfetto_utils
|
||||
|
||||
utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr, ctx_param='struct anv_device *dev')
|
||||
utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr,
|
||||
ctx_param='struct intel_ds_device *dev')
|
||||
utrace_generate_perfetto_utils(hpath=args.perfetto_hdr)
|
||||
|
||||
|
||||
def main():
|
||||
|
@ -153,6 +173,7 @@ def main():
|
|||
parser.add_argument('-p', '--import-path', required=True)
|
||||
parser.add_argument('--utrace-src', required=True)
|
||||
parser.add_argument('--utrace-hdr', required=True)
|
||||
parser.add_argument('--perfetto-hdr', required=True)
|
||||
args = parser.parse_args()
|
||||
sys.path.insert(0, args.import_path)
|
||||
define_tracepoints(args)
|
|
@ -3,11 +3,56 @@
|
|||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
pps_intel_sources = [
|
||||
'intel_pps_perf.cc',
|
||||
'intel_pps_driver.cc',
|
||||
intel_tracepoint_files = custom_target(
|
||||
'intel_tracepoints.[ch]',
|
||||
input : 'intel_tracepoints.py',
|
||||
output : ['intel_tracepoints.h',
|
||||
'intel_tracepoints_perfetto.h',
|
||||
'intel_tracepoints.c'],
|
||||
command : [
|
||||
prog_python, '@INPUT@',
|
||||
'-p', join_paths(meson.source_root(), 'src/util/perf/'),
|
||||
'--utrace-hdr', '@OUTPUT0@',
|
||||
'--perfetto-hdr', '@OUTPUT1@',
|
||||
'--utrace-src', '@OUTPUT2@',
|
||||
],
|
||||
depend_files : u_trace_py,
|
||||
)
|
||||
|
||||
libintel_driver_ds_deps = [
|
||||
idep_mesautil,
|
||||
idep_nir_headers,
|
||||
]
|
||||
|
||||
if with_perfetto
|
||||
libintel_driver_ds_deps += dep_perfetto
|
||||
endif
|
||||
|
||||
idep_intel_driver_ds_headers = declare_dependency(
|
||||
sources : intel_tracepoint_files[0],
|
||||
include_directories : [inc_include, inc_mapi, inc_mesa, inc_intel],
|
||||
)
|
||||
|
||||
libintel_driver_ds = static_library(
|
||||
'intel-driver-ds',
|
||||
sources : ['intel_driver_ds.cc', intel_tracepoint_files],
|
||||
include_directories : [inc_src, inc_include, inc_intel, inc_mapi, inc_mesa],
|
||||
link_with : [libintel_perf, libintel_dev],
|
||||
dependencies : libintel_driver_ds_deps,
|
||||
cpp_args : '-std=c++17',
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
)
|
||||
|
||||
idep_intel_driver_ds = declare_dependency(
|
||||
link_with : libintel_driver_ds,
|
||||
include_directories : [inc_include, inc_mapi, inc_mesa, inc_intel],
|
||||
)
|
||||
|
||||
if with_perfetto and (with_datasources.contains('intel') or with_datasources.contains('auto'))
|
||||
pps_intel_sources = files(
|
||||
'intel_pps_perf.cc',
|
||||
'intel_pps_driver.cc',
|
||||
)
|
||||
|
||||
pps_intel_lib = static_library(
|
||||
'pps-intel',
|
||||
|
@ -30,3 +75,4 @@ pps_datasources += pps_intel_dep
|
|||
if not with_datasources.contains('intel')
|
||||
with_datasources += 'intel'
|
||||
endif
|
||||
endif
|
||||
|
|
|
@ -28,6 +28,7 @@ subdir('isl')
|
|||
subdir('common')
|
||||
subdir('compiler')
|
||||
subdir('perf')
|
||||
subdir('ds')
|
||||
if with_intel_tools
|
||||
subdir('tools')
|
||||
endif
|
||||
|
@ -37,6 +38,3 @@ endif
|
|||
if with_intel_vk
|
||||
subdir('vulkan')
|
||||
endif
|
||||
if with_perfetto and (with_datasources.contains('intel') or with_datasources.contains('auto'))
|
||||
subdir('ds')
|
||||
endif
|
||||
|
|
|
@ -2374,10 +2374,15 @@ anv_queue_submit(struct vk_queue *vk_queue,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
uint64_t start_ts = intel_ds_begin_submit(queue->ds);
|
||||
|
||||
pthread_mutex_lock(&device->mutex);
|
||||
result = anv_queue_submit_locked(queue, submit);
|
||||
/* Take submission ID under lock */
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
|
||||
intel_ds_end_submit(queue->ds, start_ts);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -302,7 +302,7 @@ static VkResult anv_create_cmd_buffer(
|
|||
|
||||
anv_measure_init(cmd_buffer);
|
||||
|
||||
u_trace_init(&cmd_buffer->trace, &device->trace_context);
|
||||
u_trace_init(&cmd_buffer->trace, &device->ds.trace_context);
|
||||
|
||||
*pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer);
|
||||
|
||||
|
@ -407,7 +407,7 @@ anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer)
|
|||
anv_measure_reset(cmd_buffer);
|
||||
|
||||
u_trace_fini(&cmd_buffer->trace);
|
||||
u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->trace_context);
|
||||
u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->ds.trace_context);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -1126,6 +1126,8 @@ VkResult anv_CreateInstance(
|
|||
|
||||
anv_init_dri_options(instance);
|
||||
|
||||
intel_driver_ds_init();
|
||||
|
||||
*pInstance = anv_instance_to_handle(instance);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
|
@ -52,6 +52,7 @@
|
|||
#include "blorp/blorp.h"
|
||||
#include "compiler/brw_compiler.h"
|
||||
#include "compiler/brw_rt.h"
|
||||
#include "ds/intel_driver_ds.h"
|
||||
#include "util/bitset.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/macros.h"
|
||||
|
@ -1061,7 +1062,11 @@ struct anv_queue {
|
|||
|
||||
const struct anv_queue_family * family;
|
||||
|
||||
uint32_t index_in_family;
|
||||
|
||||
uint32_t exec_flags;
|
||||
|
||||
struct intel_ds_queue * ds;
|
||||
};
|
||||
|
||||
struct anv_pipeline_cache {
|
||||
|
@ -1216,7 +1221,7 @@ struct anv_device {
|
|||
|
||||
struct intel_debug_block_frame *debug_frame_desc;
|
||||
|
||||
struct u_trace_context trace_context;
|
||||
struct intel_ds_device ds;
|
||||
};
|
||||
|
||||
#if defined(GFX_VERx10) && GFX_VERx10 >= 90
|
||||
|
@ -2395,6 +2400,9 @@ enum anv_pipe_bits {
|
|||
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
|
||||
ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
|
||||
|
||||
enum intel_ds_stall_flag
|
||||
anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
|
||||
|
||||
static inline enum anv_pipe_bits
|
||||
anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
|
||||
VkAccessFlags2KHR flags)
|
||||
|
@ -4549,16 +4557,25 @@ struct anv_memcpy_state {
|
|||
};
|
||||
|
||||
struct anv_utrace_flush_copy {
|
||||
struct u_trace trace;
|
||||
/* Needs to be the first field */
|
||||
struct intel_ds_flush_data ds;
|
||||
|
||||
/* Batch stuff to implement of copy of timestamps recorded in another
|
||||
* buffer.
|
||||
*/
|
||||
struct anv_reloc_list relocs;
|
||||
struct anv_batch batch;
|
||||
struct anv_bo *batch_bo;
|
||||
|
||||
/* Buffer of 64bits timestamps */
|
||||
struct anv_bo *trace_bo;
|
||||
|
||||
/* Syncobj to be signaled when the batch completes */
|
||||
struct vk_sync *sync;
|
||||
|
||||
/* Queue on which all the recorded traces are submitted */
|
||||
struct anv_queue *queue;
|
||||
|
||||
struct anv_memcpy_state memcpy_state;
|
||||
};
|
||||
|
||||
|
@ -4570,6 +4587,25 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
|||
struct anv_cmd_buffer **cmd_buffers,
|
||||
struct anv_utrace_flush_copy **out_flush_data);
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
void anv_perfetto_init(void);
|
||||
uint64_t anv_perfetto_begin_submit(struct anv_queue *queue);
|
||||
void anv_perfetto_end_submit(struct anv_queue *queue, uint32_t submission_id,
|
||||
uint64_t start_ts);
|
||||
#else
|
||||
static inline void anv_perfetto_init(void)
|
||||
{
|
||||
}
|
||||
static inline uint64_t anv_perfetto_begin_submit(struct anv_queue *queue)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void anv_perfetto_end_submit(struct anv_queue *queue,
|
||||
uint32_t submission_id,
|
||||
uint64_t start_ts)
|
||||
{}
|
||||
#endif
|
||||
|
||||
|
||||
#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
|
||||
VK_FROM_HANDLE(__anv_type, __name, __handle)
|
||||
|
|
|
@ -48,6 +48,8 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue,
|
|||
assert(queue->vk.queue_family_index < pdevice->queue.family_count);
|
||||
queue->family = &pdevice->queue.families[queue->vk.queue_family_index];
|
||||
|
||||
queue->index_in_family = index_in_family;
|
||||
|
||||
queue->exec_flags = exec_flags;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
|
@ -31,7 +31,7 @@ command_buffers_count_utraces(struct anv_device *device,
|
|||
struct anv_cmd_buffer **cmd_buffers,
|
||||
uint32_t *utrace_copies)
|
||||
{
|
||||
if (!u_trace_context_actively_tracing(&device->trace_context))
|
||||
if (!u_trace_context_actively_tracing(&device->ds.trace_context))
|
||||
return 0;
|
||||
|
||||
uint32_t utraces = 0;
|
||||
|
@ -51,10 +51,10 @@ anv_utrace_delete_flush_data(struct u_trace_context *utctx,
|
|||
void *flush_data)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, trace_context);
|
||||
container_of(utctx, struct anv_device, ds.trace_context);
|
||||
struct anv_utrace_flush_copy *flush = flush_data;
|
||||
|
||||
u_trace_fini(&flush->trace);
|
||||
intel_ds_flush_data_fini(&flush->ds);
|
||||
|
||||
if (flush->trace_bo) {
|
||||
assert(flush->batch_bo);
|
||||
|
@ -76,7 +76,7 @@ anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx,
|
|||
uint32_t count)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, trace_context);
|
||||
container_of(utctx, struct anv_device, ds.trace_context);
|
||||
struct anv_utrace_flush_copy *flush = cmdstream;
|
||||
struct anv_address from_addr = (struct anv_address) {
|
||||
.bo = ts_from, .offset = from_offset * sizeof(uint64_t) };
|
||||
|
@ -111,7 +111,7 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
|||
if (!flush)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
u_trace_init(&flush->trace, &device->trace_context);
|
||||
intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id);
|
||||
|
||||
result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
|
||||
0, 0, &flush->sync);
|
||||
|
@ -155,14 +155,14 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
|||
} else {
|
||||
u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
|
||||
u_trace_end_iterator(&cmd_buffers[i]->trace),
|
||||
&flush->trace,
|
||||
&flush->ds.trace,
|
||||
flush,
|
||||
anv_device_utrace_emit_copy_ts_buffer);
|
||||
}
|
||||
}
|
||||
anv_genX(&device->info, emit_so_memcpy_fini)(&flush->memcpy_state);
|
||||
|
||||
u_trace_flush(&flush->trace, flush, true);
|
||||
u_trace_flush(&flush->ds.trace, flush, true);
|
||||
|
||||
if (flush->batch.status != VK_SUCCESS) {
|
||||
result = flush->batch.status;
|
||||
|
@ -175,6 +175,8 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
|||
}
|
||||
}
|
||||
|
||||
flush->queue = queue;
|
||||
|
||||
*out_flush_data = flush;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
@ -196,7 +198,7 @@ static void *
|
|||
anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, trace_context);
|
||||
container_of(utctx, struct anv_device, ds.trace_context);
|
||||
|
||||
struct anv_bo *bo = NULL;
|
||||
UNUSED VkResult result =
|
||||
|
@ -211,7 +213,7 @@ static void
|
|||
anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, trace_context);
|
||||
container_of(utctx, struct anv_device, ds.trace_context);
|
||||
struct anv_bo *bo = timestamps;
|
||||
|
||||
anv_device_release_bo(device, bo);
|
||||
|
@ -237,7 +239,7 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
|
|||
void *timestamps, unsigned idx, void *flush_data)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, trace_context);
|
||||
container_of(utctx, struct anv_device, ds.trace_context);
|
||||
struct anv_bo *bo = timestamps;
|
||||
struct anv_utrace_flush_copy *flush = flush_data;
|
||||
|
||||
|
@ -261,19 +263,80 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
|
|||
return intel_device_info_timebase_scale(&device->info, ts[idx]);
|
||||
}
|
||||
|
||||
static const char *
|
||||
queue_family_to_name(const struct anv_queue_family *family)
|
||||
{
|
||||
switch (family->engine_class) {
|
||||
case I915_ENGINE_CLASS_RENDER:
|
||||
return "render";
|
||||
case I915_ENGINE_CLASS_COPY:
|
||||
return "copy";
|
||||
case I915_ENGINE_CLASS_VIDEO:
|
||||
return "video";
|
||||
case I915_ENGINE_CLASS_VIDEO_ENHANCE:
|
||||
return "video-enh";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_utrace_init(struct anv_device *device)
|
||||
{
|
||||
u_trace_context_init(&device->trace_context, device,
|
||||
intel_ds_device_init(&device->ds, &device->info, device->fd,
|
||||
device->physical->local_minor - 128,
|
||||
INTEL_DS_API_VULKAN);
|
||||
u_trace_context_init(&device->ds.trace_context,
|
||||
&device->ds,
|
||||
anv_utrace_create_ts_buffer,
|
||||
anv_utrace_destroy_ts_buffer,
|
||||
anv_utrace_record_ts,
|
||||
anv_utrace_read_ts,
|
||||
anv_utrace_delete_flush_data);
|
||||
|
||||
for (uint32_t q = 0; q < device->queue_count; q++) {
|
||||
struct anv_queue *queue = &device->queues[q];
|
||||
|
||||
queue->ds =
|
||||
intel_ds_device_add_queue(&device->ds, "%s%u",
|
||||
queue_family_to_name(queue->family),
|
||||
queue->index_in_family);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_utrace_finish(struct anv_device *device)
|
||||
{
|
||||
u_trace_context_fini(&device->trace_context);
|
||||
intel_ds_device_fini(&device->ds);
|
||||
}
|
||||
|
||||
enum intel_ds_stall_flag
|
||||
anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)
|
||||
{
|
||||
static const struct {
|
||||
enum anv_pipe_bits anv;
|
||||
enum intel_ds_stall_flag ds;
|
||||
} anv_to_ds_flags[] = {
|
||||
{ .anv = ANV_PIPE_DEPTH_CACHE_FLUSH_BIT, .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, },
|
||||
{ .anv = ANV_PIPE_DATA_CACHE_FLUSH_BIT, .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, },
|
||||
{ .anv = ANV_PIPE_TILE_CACHE_FLUSH_BIT, .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, },
|
||||
{ .anv = ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT, .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, },
|
||||
{ .anv = ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, },
|
||||
{ .anv = ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, },
|
||||
{ .anv = ANV_PIPE_VF_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, },
|
||||
{ .anv = ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, },
|
||||
{ .anv = ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, },
|
||||
{ .anv = ANV_PIPE_DEPTH_STALL_BIT, .ds = INTEL_DS_DEPTH_STALL_BIT, },
|
||||
{ .anv = ANV_PIPE_CS_STALL_BIT, .ds = INTEL_DS_CS_STALL_BIT, },
|
||||
{ .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
|
||||
{ .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
|
||||
};
|
||||
|
||||
enum intel_ds_stall_flag ret = 0;
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(anv_to_ds_flags); i++) {
|
||||
if (anv_to_ds_flags[i].anv & bits)
|
||||
ret |= anv_to_ds_flags[i].ds;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -107,7 +107,7 @@ VkResult anv_QueuePresentKHR(
|
|||
vk_semaphore_reset_temporary(&queue->device->vk, semaphore);
|
||||
}
|
||||
|
||||
u_trace_context_process(&queue->device->trace_context, true);
|
||||
u_trace_context_process(&device->ds.trace_context, true);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -34,13 +34,13 @@
|
|||
#include "common/intel_l3_config.h"
|
||||
#include "blorp/blorp_genX_exec.h"
|
||||
|
||||
#include "anv_tracepoints.h"
|
||||
#include "ds/intel_tracepoints.h"
|
||||
|
||||
static void blorp_measure_start(struct blorp_batch *_batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
|
||||
trace_begin_blorp(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_blorp(&cmd_buffer->trace, cmd_buffer);
|
||||
anv_measure_snapshot(cmd_buffer,
|
||||
params->snapshot_type,
|
||||
NULL, 0);
|
||||
|
@ -50,7 +50,7 @@ static void blorp_measure_end(struct blorp_batch *_batch,
|
|||
const struct blorp_params *params)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
|
||||
trace_end_blorp(&cmd_buffer->trace, cmd_buffer,
|
||||
trace_intel_end_blorp(&cmd_buffer->trace, cmd_buffer,
|
||||
params->x1 - params->x0,
|
||||
params->y1 - params->y0,
|
||||
params->hiz_op,
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#include "nir/nir_xfb_info.h"
|
||||
|
||||
#include "anv_tracepoints.h"
|
||||
#include "ds/intel_tracepoints.h"
|
||||
|
||||
/* We reserve :
|
||||
* - GPR 14 for secondary command buffer returns
|
||||
|
@ -1763,7 +1763,7 @@ genX(BeginCommandBuffer)(
|
|||
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
|
||||
cmd_buffer->usage_flags &= ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
|
||||
|
||||
trace_begin_cmd_buffer(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_cmd_buffer(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
|
||||
|
||||
|
@ -1938,7 +1938,7 @@ genX(EndCommandBuffer)(
|
|||
|
||||
emit_isp_disable(cmd_buffer);
|
||||
|
||||
trace_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer, cmd_buffer->level);
|
||||
trace_intel_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer, cmd_buffer->level);
|
||||
|
||||
anv_cmd_buffer_end_batch_buffer(cmd_buffer);
|
||||
|
||||
|
@ -2405,8 +2405,10 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
|
|||
else if (bits == 0)
|
||||
return;
|
||||
|
||||
if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS | ANV_PIPE_INVALIDATE_BITS))
|
||||
trace_stall(&cmd_buffer->trace, cmd_buffer, bits);
|
||||
bool trace_flush =
|
||||
(bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS | ANV_PIPE_INVALIDATE_BITS)) != 0;
|
||||
if (trace_flush)
|
||||
trace_intel_begin_stall(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
if ((GFX_VER >= 8 && GFX_VER <= 9) &&
|
||||
(bits & ANV_PIPE_CS_STALL_BIT) &&
|
||||
|
@ -2425,6 +2427,11 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
|
|||
cmd_buffer->device,
|
||||
cmd_buffer->state.current_pipeline,
|
||||
bits);
|
||||
|
||||
if (trace_flush) {
|
||||
trace_intel_end_stall(&cmd_buffer->trace, cmd_buffer, bits,
|
||||
anv_pipe_flush_bit_to_ds_stall_flag, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -3963,7 +3970,7 @@ void genX(CmdDraw)(
|
|||
anv_measure_snapshot(cmd_buffer,
|
||||
INTEL_SNAPSHOT_DRAW,
|
||||
"draw", count);
|
||||
trace_begin_draw(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_draw(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
|
@ -3993,7 +4000,7 @@ void genX(CmdDraw)(
|
|||
|
||||
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
|
||||
|
||||
trace_end_draw(&cmd_buffer->trace, cmd_buffer, count);
|
||||
trace_intel_end_draw(&cmd_buffer->trace, cmd_buffer, count);
|
||||
}
|
||||
|
||||
void genX(CmdDrawMultiEXT)(
|
||||
|
@ -4018,7 +4025,7 @@ void genX(CmdDrawMultiEXT)(
|
|||
anv_measure_snapshot(cmd_buffer,
|
||||
INTEL_SNAPSHOT_DRAW,
|
||||
"draw_multi", count);
|
||||
trace_begin_draw_multi(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_draw_multi(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
|
@ -4051,7 +4058,7 @@ void genX(CmdDrawMultiEXT)(
|
|||
|
||||
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
|
||||
|
||||
trace_end_draw_multi(&cmd_buffer->trace, cmd_buffer, count);
|
||||
trace_intel_end_draw_multi(&cmd_buffer->trace, cmd_buffer, count);
|
||||
}
|
||||
|
||||
void genX(CmdDrawIndexed)(
|
||||
|
@ -4077,7 +4084,7 @@ void genX(CmdDrawIndexed)(
|
|||
INTEL_SNAPSHOT_DRAW,
|
||||
"draw indexed",
|
||||
count);
|
||||
trace_begin_draw_indexed(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_draw_indexed(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
|
@ -4105,7 +4112,7 @@ void genX(CmdDrawIndexed)(
|
|||
|
||||
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM);
|
||||
|
||||
trace_end_draw_indexed(&cmd_buffer->trace, cmd_buffer, count);
|
||||
trace_intel_end_draw_indexed(&cmd_buffer->trace, cmd_buffer, count);
|
||||
}
|
||||
|
||||
void genX(CmdDrawMultiIndexedEXT)(
|
||||
|
@ -4132,7 +4139,7 @@ void genX(CmdDrawMultiIndexedEXT)(
|
|||
INTEL_SNAPSHOT_DRAW,
|
||||
"draw indexed_multi",
|
||||
count);
|
||||
trace_begin_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
|
@ -4220,7 +4227,7 @@ void genX(CmdDrawMultiIndexedEXT)(
|
|||
|
||||
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM);
|
||||
|
||||
trace_end_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer, count);
|
||||
trace_intel_end_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer, count);
|
||||
}
|
||||
|
||||
/* Auto-Draw / Indirect Registers */
|
||||
|
@ -4256,7 +4263,7 @@ void genX(CmdDrawIndirectByteCountEXT)(
|
|||
INTEL_SNAPSHOT_DRAW,
|
||||
"draw indirect byte count",
|
||||
instanceCount);
|
||||
trace_begin_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
|
@ -4300,7 +4307,7 @@ void genX(CmdDrawIndirectByteCountEXT)(
|
|||
|
||||
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
|
||||
|
||||
trace_end_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer,
|
||||
trace_intel_end_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer,
|
||||
instanceCount);
|
||||
#endif /* GFX_VERx10 >= 75 */
|
||||
}
|
||||
|
@ -4358,7 +4365,7 @@ void genX(CmdDrawIndirect)(
|
|||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
trace_begin_draw_indirect(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_draw_indirect(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
|
@ -4393,7 +4400,7 @@ void genX(CmdDrawIndirect)(
|
|||
offset += stride;
|
||||
}
|
||||
|
||||
trace_end_draw_indirect(&cmd_buffer->trace, cmd_buffer, drawCount);
|
||||
trace_intel_end_draw_indirect(&cmd_buffer->trace, cmd_buffer, drawCount);
|
||||
}
|
||||
|
||||
void genX(CmdDrawIndexedIndirect)(
|
||||
|
@ -4411,7 +4418,7 @@ void genX(CmdDrawIndexedIndirect)(
|
|||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
trace_begin_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
|
@ -4447,7 +4454,7 @@ void genX(CmdDrawIndexedIndirect)(
|
|||
offset += stride;
|
||||
}
|
||||
|
||||
trace_end_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer, drawCount);
|
||||
trace_intel_end_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer, drawCount);
|
||||
}
|
||||
|
||||
static struct mi_value
|
||||
|
@ -4574,7 +4581,7 @@ void genX(CmdDrawIndirectCount)(
|
|||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
trace_begin_draw_indirect_count(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_draw_indirect_count(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
|
@ -4616,7 +4623,7 @@ void genX(CmdDrawIndirectCount)(
|
|||
|
||||
mi_value_unref(&b, max);
|
||||
|
||||
trace_end_draw_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount);
|
||||
trace_intel_end_draw_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount);
|
||||
}
|
||||
|
||||
void genX(CmdDrawIndexedIndirectCount)(
|
||||
|
@ -4638,7 +4645,7 @@ void genX(CmdDrawIndexedIndirectCount)(
|
|||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
trace_begin_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
|
@ -4681,7 +4688,8 @@ void genX(CmdDrawIndexedIndirectCount)(
|
|||
|
||||
mi_value_unref(&b, max);
|
||||
|
||||
trace_end_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount);
|
||||
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace,
|
||||
cmd_buffer, maxDrawCount);
|
||||
|
||||
}
|
||||
|
||||
|
@ -5058,7 +5066,7 @@ void genX(CmdDispatchBase)(
|
|||
prog_data->local_size[0] * prog_data->local_size[1] *
|
||||
prog_data->local_size[2]);
|
||||
|
||||
trace_begin_compute(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_compute(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
if (prog_data->uses_num_work_groups) {
|
||||
struct anv_state state =
|
||||
|
@ -5084,7 +5092,7 @@ void genX(CmdDispatchBase)(
|
|||
emit_cs_walker(cmd_buffer, pipeline, false, prog_data, groupCountX,
|
||||
groupCountY, groupCountZ);
|
||||
|
||||
trace_end_compute(&cmd_buffer->trace, cmd_buffer,
|
||||
trace_intel_end_compute(&cmd_buffer->trace, cmd_buffer,
|
||||
groupCountX, groupCountY, groupCountZ);
|
||||
}
|
||||
|
||||
|
@ -5119,7 +5127,7 @@ void genX(CmdDispatchIndirect)(
|
|||
INTEL_SNAPSHOT_COMPUTE,
|
||||
"compute indirect",
|
||||
0);
|
||||
trace_begin_compute(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_compute(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
if (prog_data->uses_num_work_groups) {
|
||||
cmd_buffer->state.compute.num_workgroups = addr;
|
||||
|
@ -5194,7 +5202,7 @@ void genX(CmdDispatchIndirect)(
|
|||
|
||||
emit_cs_walker(cmd_buffer, pipeline, true, prog_data, 0, 0, 0);
|
||||
|
||||
trace_end_compute(&cmd_buffer->trace, cmd_buffer, 0, 0, 0);
|
||||
trace_intel_end_compute(&cmd_buffer->trace, cmd_buffer, 0, 0, 0);
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
|
@ -6800,7 +6808,7 @@ void genX(CmdBeginRenderPass2)(
|
|||
cmd_buffer->state.render_area = pRenderPassBeginInfo->renderArea;
|
||||
|
||||
anv_measure_beginrenderpass(cmd_buffer);
|
||||
trace_begin_render_pass(&cmd_buffer->trace, cmd_buffer);
|
||||
trace_intel_begin_render_pass(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
result = genX(cmd_buffer_setup_attachments)(cmd_buffer, pass,
|
||||
framebuffer,
|
||||
|
@ -6843,7 +6851,7 @@ void genX(CmdEndRenderPass2)(
|
|||
|
||||
cmd_buffer_end_subpass(cmd_buffer);
|
||||
|
||||
trace_end_render_pass(&cmd_buffer->trace, cmd_buffer,
|
||||
trace_intel_end_render_pass(&cmd_buffer->trace, cmd_buffer,
|
||||
cmd_buffer->state.render_area.extent.width,
|
||||
cmd_buffer->state.render_area.extent.height,
|
||||
cmd_buffer->state.pass->attachment_count,
|
||||
|
|
|
@ -33,19 +33,6 @@ anv_entrypoints = custom_target(
|
|||
depend_files : vk_entrypoints_gen_depend_files,
|
||||
)
|
||||
|
||||
anv_tracepoints = custom_target(
|
||||
'anv_tracepoints.[ch]',
|
||||
input: 'anv_tracepoints.py',
|
||||
output: ['anv_tracepoints.h', 'anv_tracepoints.c'],
|
||||
command: [
|
||||
prog_python, '@INPUT@',
|
||||
'-p', join_paths(meson.source_root(), 'src/util/perf/'),
|
||||
'--utrace-hdr', '@OUTPUT0@',
|
||||
'--utrace-src', '@OUTPUT1@',
|
||||
],
|
||||
depend_files: u_trace_py,
|
||||
)
|
||||
|
||||
intel_icd = custom_target(
|
||||
'intel_icd',
|
||||
input : [vk_icd_gen, vk_api_xml],
|
||||
|
@ -78,7 +65,7 @@ foreach g : [['70', ['gfx7_cmd_buffer.c']], ['75', ['gfx7_cmd_buffer.c']],
|
|||
_gfx_ver = g[0]
|
||||
libanv_per_hw_ver_libs += static_library(
|
||||
'anv_per_hw_ver@0@'.format(_gfx_ver),
|
||||
[anv_per_hw_ver_files, g[1], anv_entrypoints[0], anv_tracepoints[0]],
|
||||
[anv_per_hw_ver_files, g[1], anv_entrypoints[0]],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
|
||||
],
|
||||
|
@ -90,7 +77,7 @@ foreach g : [['70', ['gfx7_cmd_buffer.c']], ['75', ['gfx7_cmd_buffer.c']],
|
|||
dependencies : [
|
||||
dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml,
|
||||
idep_vulkan_util_headers, idep_vulkan_wsi_headers,
|
||||
idep_vulkan_runtime_headers,
|
||||
idep_vulkan_runtime_headers, idep_intel_driver_ds_headers,
|
||||
],
|
||||
)
|
||||
endforeach
|
||||
|
@ -142,6 +129,8 @@ anv_flags = [
|
|||
c_sse2_args,
|
||||
]
|
||||
|
||||
anv_cpp_flags = []
|
||||
|
||||
if with_platform_x11
|
||||
anv_deps += dep_xcb_dri3
|
||||
endif
|
||||
|
@ -164,24 +153,27 @@ else
|
|||
libanv_files += files('anv_android_stubs.c')
|
||||
endif
|
||||
|
||||
anv_deps += idep_intel_driver_ds_headers
|
||||
|
||||
libanv_common = static_library(
|
||||
'anv_common',
|
||||
[
|
||||
libanv_files, anv_entrypoints, sha1_h,
|
||||
gen_xml_pack
|
||||
gen_xml_pack,
|
||||
],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
inc_util,
|
||||
],
|
||||
c_args : anv_flags,
|
||||
cpp_args : anv_cpp_flags,
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
dependencies : anv_deps,
|
||||
)
|
||||
|
||||
libvulkan_intel = shared_library(
|
||||
'vulkan_intel',
|
||||
[files('anv_gem.c'), anv_entrypoints[0], anv_tracepoints],
|
||||
[files('anv_gem.c'), anv_entrypoints[0]],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
],
|
||||
|
@ -193,6 +185,7 @@ libvulkan_intel = shared_library(
|
|||
dep_thread, dep_dl, dep_m, anv_deps, idep_libintel_common,
|
||||
idep_nir, idep_genxml, idep_vulkan_util, idep_vulkan_wsi,
|
||||
idep_vulkan_runtime, idep_mesautil, idep_xmlconfig,
|
||||
idep_intel_driver_ds,
|
||||
],
|
||||
c_args : anv_flags,
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
|
@ -216,7 +209,7 @@ endif
|
|||
if with_tests
|
||||
libvulkan_intel_test = static_library(
|
||||
'vulkan_intel_test',
|
||||
[files('anv_gem_stubs.c'), anv_entrypoints[0], anv_tracepoints[0]],
|
||||
[files('anv_gem_stubs.c'), anv_entrypoints[0]],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
],
|
||||
|
@ -241,13 +234,13 @@ if with_tests
|
|||
'anv_@0@'.format(t),
|
||||
executable(
|
||||
t,
|
||||
['tests/@0@.c'.format(t), anv_entrypoints[0], anv_tracepoints[0]],
|
||||
['tests/@0@.c'.format(t), anv_entrypoints[0]],
|
||||
c_args : [ c_sse2_args ],
|
||||
link_with : libvulkan_intel_test,
|
||||
dependencies : [
|
||||
dep_libdrm, dep_thread, dep_m, dep_valgrind,
|
||||
idep_vulkan_util, idep_vulkan_wsi_headers,
|
||||
idep_vulkan_runtime,
|
||||
idep_vulkan_runtime, idep_intel_driver_ds,
|
||||
],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
|
|
Loading…
Reference in New Issue