From e760c5b37be938427a9c88182ea99f7f66721ca3 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sun, 21 Nov 2021 18:23:57 +0200 Subject: [PATCH] anv: add perfetto source v2: Increase custom stall data (Felix) Fixup build (Felix) v3: Add API enum (Rohan) Fixup old comment (Rohan) Signed-off-by: Lionel Landwerlin Reviewed-by: Rohan Garg Acked-by: Antonio Caggiano Part-of: --- docs/perfetto.rst | 8 + src/intel/ds/intel_driver_ds.cc | 585 ++++++++++++++++++ src/intel/ds/intel_driver_ds.h | 193 ++++++ .../intel_tracepoints.py} | 73 ++- src/intel/ds/meson.build | 94 ++- src/intel/meson.build | 4 +- src/intel/vulkan/anv_batch_chain.c | 5 + src/intel/vulkan/anv_cmd_buffer.c | 4 +- src/intel/vulkan/anv_device.c | 2 + src/intel/vulkan/anv_private.h | 40 +- src/intel/vulkan/anv_queue.c | 2 + src/intel/vulkan/anv_utrace.c | 87 ++- src/intel/vulkan/anv_wsi.c | 2 +- src/intel/vulkan/genX_blorp_exec.c | 18 +- src/intel/vulkan/genX_cmd_buffer.c | 82 +-- src/intel/vulkan/meson.build | 33 +- 16 files changed, 1096 insertions(+), 136 deletions(-) create mode 100644 src/intel/ds/intel_driver_ds.cc create mode 100644 src/intel/ds/intel_driver_ds.h rename src/intel/{vulkan/anv_tracepoints.py => ds/intel_tracepoints.py} (70%) diff --git a/docs/perfetto.rst b/docs/perfetto.rst index 689031d73e5..c9b39ec7f73 100644 --- a/docs/perfetto.rst +++ b/docs/perfetto.rst @@ -161,6 +161,14 @@ set of HW counters : INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer +Vulkan applications can also be instrumented to be Perfetto producers. +To enable this for given application, set the environment variable as +follow : + +.. code-block:: console + + PERFETTO_TRACE=1 my_vulkan_app + Panfrost ^^^^^^^^ diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc new file mode 100644 index 00000000000..d8b1a6906cf --- /dev/null +++ b/src/intel/ds/intel_driver_ds.cc @@ -0,0 +1,585 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "common/intel_gem.h" +#include "perf/intel_perf.h" + +#include "util/hash_table.h" +#include "util/u_process.h" + +#include "intel_driver_ds.h" +#include "intel_pps_priv.h" +#include "intel_tracepoints.h" + +#ifdef HAVE_PERFETTO + +#include "util/u_perfetto.h" + +#include "intel_tracepoints_perfetto.h" + +/* Just naming stages */ +static const struct { + const char *name; + + /* Tells us if a given stage is pipelined. This is used to build stacks of + * pipelined elements so that the perfetto UI doesn't get confused by elements + * ending out of order. + */ + bool pipelined; + + /* The perfetto UI requires that there is a parent-child relationship + * within a row of elements. Which means that all children elements must + * end within the lifespan of their parent. + * + * Some elements like stalls and command buffers follow that relationship, + * but not all. This tells us in which UI row the elements should live. + */ + enum intel_ds_queue_stage draw_stage; +} intel_queue_stage_desc[INTEL_DS_QUEUE_STAGE_N_STAGES] = { + /* Order must match the enum! */ + { + "cmd-buffer", + false, + INTEL_DS_QUEUE_STAGE_CMD_BUFFER, + }, + { + "stall", + false, + INTEL_DS_QUEUE_STAGE_STALL, + }, + { + "compute", + true, + INTEL_DS_QUEUE_STAGE_COMPUTE, + }, + { + "render-pass", + true, + INTEL_DS_QUEUE_STAGE_RENDER_PASS, + }, + { + "blorp", + true, + INTEL_DS_QUEUE_STAGE_BLORP, + }, + { + "draw", + true, + INTEL_DS_QUEUE_STAGE_DRAW, + }, +}; + +struct IntelRenderpassIncrementalState { + bool was_cleared = true; +}; + +struct IntelRenderpassTraits : public perfetto::DefaultDataSourceTraits { + using IncrementalStateType = IntelRenderpassIncrementalState; +}; + +class IntelRenderpassDataSource : public perfetto::DataSource { +public: + void OnSetup(const SetupArgs &) override + { + // Use this callback to apply any custom configuration to your data source + // based on the TraceConfig in SetupArgs. + } + + void OnStart(const StartArgs &) override + { + // This notification can be used to initialize the GPU driver, enable + // counters, etc. StartArgs will contains the DataSourceDescriptor, + // which can be extended. + u_trace_perfetto_start(); + PERFETTO_LOG("Tracing started"); + } + + void OnStop(const StopArgs &) override + { + PERFETTO_LOG("Tracing stopped"); + + // Undo any initialization done in OnStart. + u_trace_perfetto_stop(); + // TODO we should perhaps block until queued traces are flushed? + + Trace([](IntelRenderpassDataSource::TraceContext ctx) { + auto packet = ctx.NewTracePacket(); + packet->Finalize(); + ctx.Flush(); + }); + } +}; + +PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(IntelRenderpassDataSource); +PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(IntelRenderpassDataSource); + +using perfetto::protos::pbzero::InternedGpuRenderStageSpecification_RenderStageCategory; + +enum InternedGpuRenderStageSpecification_RenderStageCategory +i915_engine_class_to_category(enum drm_i915_gem_engine_class engine_class) +{ + switch (engine_class) { + case I915_ENGINE_CLASS_RENDER: + return InternedGpuRenderStageSpecification_RenderStageCategory:: + InternedGpuRenderStageSpecification_RenderStageCategory_GRAPHICS; + default: + return InternedGpuRenderStageSpecification_RenderStageCategory::InternedGpuRenderStageSpecification_RenderStageCategory_OTHER; + } +} + +static void +sync_timestamp(IntelRenderpassDataSource::TraceContext &ctx, + struct intel_ds_device *device) +{ + uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count(); + uint64_t gpu_ts = intel_device_info_timebase_scale(&device->info, + intel_read_gpu_timestamp(device->fd)); + + if (cpu_ts < device->next_clock_sync_ns) + return; + + PERFETTO_LOG("sending clocks gpu=0x%08x", device->gpu_clock_id); + + device->sync_gpu_ts = gpu_ts; + device->next_clock_sync_ns = cpu_ts + 1000000000ull; + + auto packet = ctx.NewTracePacket(); + + packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); + packet->set_timestamp(cpu_ts); + + auto event = packet->set_clock_snapshot(); + { + auto clock = event->add_clocks(); + + clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); + clock->set_timestamp(cpu_ts); + } + { + auto clock = event->add_clocks(); + + clock->set_clock_id(device->gpu_clock_id); + clock->set_timestamp(gpu_ts); + } +} + +static void +send_descriptors(IntelRenderpassDataSource::TraceContext &ctx, + struct intel_ds_device *device) +{ + struct intel_ds_queue *queue; + + PERFETTO_LOG("Sending renderstage descriptors"); + + device->event_id = 0; + u_vector_foreach(queue, &device->queues) { + for (uint32_t s = 0; s < ARRAY_SIZE(queue->stages); s++) { + queue->stages[s].start_ns = 0; + } + } + + { + auto packet = ctx.NewTracePacket(); + + packet->set_timestamp(perfetto::base::GetBootTimeNs().count()); + packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); + packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED); + + auto interned_data = packet->set_interned_data(); + + { + auto desc = interned_data->add_graphics_contexts(); + desc->set_iid(device->iid); + desc->set_pid(getpid()); + switch (device->api) { + case INTEL_DS_API_OPENGL: + desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api_OPEN_GL); + break; + case INTEL_DS_API_VULKAN: + desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api_VULKAN); + break; + default: + break; + } + } + + /* Emit all the IID picked at device/queue creation. */ + u_vector_foreach(queue, &device->queues) { + for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) { + { + /* We put the stage number in there so that all rows are order + * by intel_ds_queue_stage. + */ + char name[100]; + snprintf(name, sizeof(name), "%.10s-%s-%u-%s", + util_get_process_name(), + queue->name, s, intel_queue_stage_desc[s].name); + + auto desc = interned_data->add_gpu_specifications(); + desc->set_iid(queue->stages[s].queue_iid); + desc->set_name(name); + } + { + auto desc = interned_data->add_gpu_specifications(); + desc->set_iid(queue->stages[s].stage_iid); + desc->set_name(intel_queue_stage_desc[s].name); + } + } + } + } + + device->next_clock_sync_ns = 0; + sync_timestamp(ctx, device); +} + +typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*); + +static void +begin_event(struct intel_ds_queue *queue, uint64_t ts_ns, + enum intel_ds_queue_stage stage_id) +{ + /* If we haven't managed to calibrate the alignment between GPU and CPU + * timestamps yet, then skip this trace, otherwise perfetto won't know + * what to do with it. + */ + if (!queue->device->sync_gpu_ts) { + queue->stages[stage_id].start_ns = 0; + return; + } + + queue->stages[stage_id].start_ns = ts_ns; +} + +static void +end_event(struct intel_ds_queue *queue, uint64_t ts_ns, + enum intel_ds_queue_stage stage_id, + uint32_t submission_id, const void* payload = nullptr, + trace_payload_as_extra_func payload_as_extra = nullptr) +{ + struct intel_ds_device *device = queue->device; + + /* If we haven't managed to calibrate the alignment between GPU and CPU + * timestamps yet, then skip this trace, otherwise perfetto won't know + * what to do with it. + */ + if (!device->sync_gpu_ts) + return; + + struct intel_ds_stage *stage = &queue->stages[stage_id]; + uint64_t start_ns = stage->start_ns; + + if (!start_ns) + return; + + uint64_t evt_id = device->event_id++; + + IntelRenderpassDataSource::Trace([=](IntelRenderpassDataSource::TraceContext tctx) { + if (auto state = tctx.GetIncrementalState(); state->was_cleared) { + send_descriptors(tctx, queue->device); + state->was_cleared = false; + } + + sync_timestamp(tctx, queue->device); + + auto packet = tctx.NewTracePacket(); + + packet->set_timestamp(start_ns); + packet->set_timestamp_clock_id(queue->device->gpu_clock_id); + + assert(ts_ns >= start_ns); + + auto event = packet->set_gpu_render_stage_event(); + event->set_gpu_id(queue->device->gpu_id); + + event->set_hw_queue_iid(stage->queue_iid); + event->set_stage_iid(stage->stage_iid); + event->set_context(queue->device->iid); + event->set_event_id(evt_id); + event->set_duration(ts_ns - start_ns); + event->set_submission_id(submission_id); + + if (payload && payload_as_extra) { + payload_as_extra(event, payload); + } + }); + + stage->start_ns = 0; +} + +static void +custom_trace_payload_as_extra_end_stall(perfetto::protos::pbzero::GpuRenderStageEvent *event, + const struct trace_intel_end_stall *payload) +{ + char buf[256]; + + { + auto data = event->add_extra_data(); + data->set_name("stall_reason"); + + snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s : %s", + (payload->flags & INTEL_DS_DEPTH_CACHE_FLUSH_BIT) ? "+depth_flush" : "", + (payload->flags & INTEL_DS_DATA_CACHE_FLUSH_BIT) ? "+dc_flush" : "", + (payload->flags & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "", + (payload->flags & INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT) ? "+rt_flush" : "", + (payload->flags & INTEL_DS_TILE_CACHE_FLUSH_BIT) ? "+tile_flush" : "", + (payload->flags & INTEL_DS_STATE_CACHE_INVALIDATE_BIT) ? "+state_inv" : "", + (payload->flags & INTEL_DS_CONST_CACHE_INVALIDATE_BIT) ? "+const_inv" : "", + (payload->flags & INTEL_DS_VF_CACHE_INVALIDATE_BIT) ? "+vf_inv" : "", + (payload->flags & INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT) ? "+tex_inv" : "", + (payload->flags & INTEL_DS_INST_CACHE_INVALIDATE_BIT) ? "+inst_inv" : "", + (payload->flags & INTEL_DS_STALL_AT_SCOREBOARD_BIT) ? "+pb_stall" : "", + (payload->flags & INTEL_DS_DEPTH_STALL_BIT) ? "+depth_stall" : "", + (payload->flags & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "", + (payload->flags & INTEL_DS_CS_STALL_BIT) ? "+cs_stall" : "", + payload->reason ? payload->reason : "unknown"); + + assert(strlen(buf) > 0); + + data->set_value(buf); + } +} + +#endif /* HAVE_PERFETTO */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef HAVE_PERFETTO + +/* + * Trace callbacks, called from u_trace once the timestamps from GPU have been + * collected. + */ + +#define CREATE_DUAL_EVENT_CALLBACK(event_name, stage) \ + void \ + intel_ds_begin_##event_name(struct intel_ds_device *device, \ + uint64_t ts_ns, \ + const void *flush_data, \ + const struct trace_intel_begin_##event_name *payload) \ + { \ + const struct intel_ds_flush_data *flush = \ + (const struct intel_ds_flush_data *) flush_data; \ + begin_event(flush->queue, ts_ns, stage); \ + } \ + \ + void \ + intel_ds_end_##event_name(struct intel_ds_device *device, \ + uint64_t ts_ns, \ + const void *flush_data, \ + const struct trace_intel_end_##event_name *payload) \ + { \ + const struct intel_ds_flush_data *flush = \ + (const struct intel_ds_flush_data *) flush_data; \ + end_event(flush->queue, ts_ns, stage, flush->submission_id, \ + payload, \ + (trace_payload_as_extra_func) \ + &trace_payload_as_extra_intel_end_##event_name); \ + } \ + + +CREATE_DUAL_EVENT_CALLBACK(cmd_buffer, INTEL_DS_QUEUE_STAGE_CMD_BUFFER) +CREATE_DUAL_EVENT_CALLBACK(render_pass, INTEL_DS_QUEUE_STAGE_RENDER_PASS) +CREATE_DUAL_EVENT_CALLBACK(blorp, INTEL_DS_QUEUE_STAGE_BLORP) +CREATE_DUAL_EVENT_CALLBACK(draw, INTEL_DS_QUEUE_STAGE_DRAW) +CREATE_DUAL_EVENT_CALLBACK(draw_indexed, INTEL_DS_QUEUE_STAGE_DRAW) +CREATE_DUAL_EVENT_CALLBACK(draw_indexed_multi, INTEL_DS_QUEUE_STAGE_DRAW) +CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect, INTEL_DS_QUEUE_STAGE_DRAW) +CREATE_DUAL_EVENT_CALLBACK(draw_multi, INTEL_DS_QUEUE_STAGE_DRAW) +CREATE_DUAL_EVENT_CALLBACK(draw_indirect, INTEL_DS_QUEUE_STAGE_DRAW) +CREATE_DUAL_EVENT_CALLBACK(draw_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW) +CREATE_DUAL_EVENT_CALLBACK(draw_indirect_byte_count, INTEL_DS_QUEUE_STAGE_DRAW) +CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW) +CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE) + +void +intel_ds_begin_stall(struct intel_ds_device *device, + uint64_t ts_ns, + const void *flush_data, + const struct trace_intel_begin_stall *payload) +{ + const struct intel_ds_flush_data *flush = + (const struct intel_ds_flush_data *) flush_data; + begin_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL); +} + +void +intel_ds_end_stall(struct intel_ds_device *device, + uint64_t ts_ns, + const void *flush_data, + const struct trace_intel_end_stall *payload) +{ + const struct intel_ds_flush_data *flush = + (const struct intel_ds_flush_data *) flush_data; + end_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL, flush->submission_id, + payload, + (trace_payload_as_extra_func)custom_trace_payload_as_extra_end_stall); +} + +uint64_t +intel_ds_begin_submit(struct intel_ds_queue *queue) +{ + return perfetto::base::GetBootTimeNs().count(); +} + +void +intel_ds_end_submit(struct intel_ds_queue *queue, + uint64_t start_ts) +{ + if (!u_trace_context_actively_tracing(&queue->device->trace_context)) { + queue->device->sync_gpu_ts = 0; + queue->device->next_clock_sync_ns = 0; + return; + } + + uint64_t end_ts = perfetto::base::GetBootTimeNs().count(); + uint32_t submission_id = queue->submission_id++; + + IntelRenderpassDataSource::Trace([=](IntelRenderpassDataSource::TraceContext tctx) { + if (auto state = tctx.GetIncrementalState(); state->was_cleared) { + send_descriptors(tctx, queue->device); + state->was_cleared = false; + } + + sync_timestamp(tctx, queue->device); + + auto packet = tctx.NewTracePacket(); + + packet->set_timestamp(start_ts); + + auto event = packet->set_vulkan_api_event(); + auto submit = event->set_vk_queue_submit(); + + // submit->set_pid(os_get_pid()); + // submit->set_tid(os_get_tid()); + submit->set_duration_ns(end_ts - start_ts); + submit->set_vk_queue((uintptr_t) queue); + submit->set_submission_id(submission_id); + }); +} + +#endif /* HAVE_PERFETTO */ + +static void +intel_driver_ds_init_once(void) +{ +#ifdef HAVE_PERFETTO + util_perfetto_init(); + perfetto::DataSourceDescriptor dsd; + dsd.set_name("gpu.renderstages.intel"); + IntelRenderpassDataSource::Register(dsd); +#endif +} + +static once_flag intel_driver_ds_once_flag = ONCE_FLAG_INIT; + +static uint64_t get_iid() +{ + static uint64_t iid = 1; + return iid++; +} + +void +intel_driver_ds_init(void) +{ + call_once(&intel_driver_ds_once_flag, + intel_driver_ds_init_once); +} + +void +intel_ds_device_init(struct intel_ds_device *device, + struct intel_device_info *devinfo, + int drm_fd, + uint32_t gpu_id, + enum intel_ds_api api) +{ + memset(device, 0, sizeof(*device)); + + assert(gpu_id < 128); + device->gpu_id = gpu_id; + device->gpu_clock_id = intel_pps_clock_id(gpu_id); + device->fd = drm_fd; + device->info = *devinfo; + device->iid = get_iid(); + device->api = api; + u_vector_init(&device->queues, 4, sizeof(struct intel_ds_queue)); +} + +void +intel_ds_device_fini(struct intel_ds_device *device) +{ + u_trace_context_fini(&device->trace_context); + u_vector_finish(&device->queues); +} + +struct intel_ds_queue * +intel_ds_device_add_queue(struct intel_ds_device *device, + const char *fmt_name, + ...) +{ + struct intel_ds_queue *queue = + (struct intel_ds_queue *) u_vector_add(&device->queues); + va_list ap; + + memset(queue, 0, sizeof(*queue)); + + queue->device = device; + queue->queue_id = u_vector_length(&device->queues) - 1; + + va_start(ap, fmt_name); + vsnprintf(queue->name, sizeof(queue->name), fmt_name, ap); + va_end(ap); + + for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) { + queue->stages[s].queue_iid = get_iid(); + queue->stages[s].stage_iid = get_iid(); + } + + return queue; +} + +void intel_ds_flush_data_init(struct intel_ds_flush_data *data, + struct intel_ds_queue *queue, + uint64_t submission_id) +{ + memset(data, 0, sizeof(*data)); + + data->queue = queue; + data->submission_id = submission_id; + + u_trace_init(&data->trace, &queue->device->trace_context); +} + +void intel_ds_flush_data_fini(struct intel_ds_flush_data *data) +{ + u_trace_fini(&data->trace); +} + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/ds/intel_driver_ds.h b/src/intel/ds/intel_driver_ds.h new file mode 100644 index 00000000000..8b833d19794 --- /dev/null +++ b/src/intel/ds/intel_driver_ds.h @@ -0,0 +1,193 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef INTEL_DRIVER_DS_H +#define INTEL_DRIVER_DS_H + +#include + +#include "util/macros.h" +#include "util/perf/u_trace.h" +#include "util/u_vector.h" + +#include "dev/intel_device_info.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum intel_ds_api { + INTEL_DS_API_OPENGL, + INTEL_DS_API_VULKAN, +}; + +enum intel_ds_stall_flag { + INTEL_DS_DEPTH_CACHE_FLUSH_BIT = BITFIELD_BIT(0), + INTEL_DS_DATA_CACHE_FLUSH_BIT = BITFIELD_BIT(1), + INTEL_DS_HDC_PIPELINE_FLUSH_BIT = BITFIELD_BIT(2), + INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT = BITFIELD_BIT(3), + INTEL_DS_TILE_CACHE_FLUSH_BIT = BITFIELD_BIT(4), + INTEL_DS_STATE_CACHE_INVALIDATE_BIT = BITFIELD_BIT(5), + INTEL_DS_CONST_CACHE_INVALIDATE_BIT = BITFIELD_BIT(6), + INTEL_DS_VF_CACHE_INVALIDATE_BIT = BITFIELD_BIT(7), + INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT = BITFIELD_BIT(8), + INTEL_DS_INST_CACHE_INVALIDATE_BIT = BITFIELD_BIT(9), + INTEL_DS_STALL_AT_SCOREBOARD_BIT = BITFIELD_BIT(10), + INTEL_DS_DEPTH_STALL_BIT = BITFIELD_BIT(11), + INTEL_DS_CS_STALL_BIT = BITFIELD_BIT(12), +}; + +/* Convert internal driver PIPE_CONTROL stall bits to intel_ds_stall_flag. */ +typedef enum intel_ds_stall_flag (*intel_ds_stall_cb_t)(uint32_t flags); + +enum intel_ds_queue_stage { + INTEL_DS_QUEUE_STAGE_CMD_BUFFER, + INTEL_DS_QUEUE_STAGE_STALL, + INTEL_DS_QUEUE_STAGE_COMPUTE, + INTEL_DS_QUEUE_STAGE_RENDER_PASS, + INTEL_DS_QUEUE_STAGE_BLORP, + INTEL_DS_QUEUE_STAGE_DRAW, + INTEL_DS_QUEUE_STAGE_N_STAGES, +}; + +struct intel_ds_device { + struct intel_device_info info; + + /* DRM fd */ + int fd; + + /* API of this device */ + enum intel_ds_api api; + + /* GPU identifier (minor number) */ + uint32_t gpu_id; + + /* Clock identifier for this device. */ + uint32_t gpu_clock_id; + + /* The timestamp at the point where we first emitted the clock_sync.. + * this will be a *later* timestamp that the first GPU traces (since + * we capture the first clock_sync from the CPU *after* the first GPU + * tracepoints happen). To avoid confusing perfetto we need to drop + * the GPU traces with timestamps before this. + */ + uint64_t sync_gpu_ts; + + /* Next timestamp after which we should resend a clock correlation. */ + uint64_t next_clock_sync_ns; + + /* Unique perfetto identifier for the context */ + uint64_t iid; + + /* Event ID generator */ + uint64_t event_id; + + struct u_trace_context trace_context; + + /* List of intel_ds_queue */ + struct u_vector queues; +}; + +struct intel_ds_stage { + /* Unique hw_queue IID */ + uint64_t queue_iid; + + /* Unique stage IID */ + uint64_t stage_iid; + + /* Start timestamp of the last work element */ + uint64_t start_ns; +}; + +struct intel_ds_queue { + /* Device this queue belongs to */ + struct intel_ds_device *device; + + /* Unique queue ID across the device */ + uint32_t queue_id; + + /* Unique name of the queue */ + char name[80]; + + /* Counter incremented on each intel_ds_end_submit() call */ + uint64_t submission_id; + + struct intel_ds_stage stages[INTEL_DS_QUEUE_STAGE_N_STAGES]; +}; + +struct intel_ds_flush_data { + struct intel_ds_queue *queue; + + /* u_trace element in which we copy other traces in case we deal with + * reusable command buffers. + */ + struct u_trace trace; + + /* Unique submission ID associated with the trace */ + uint64_t submission_id; +}; + +void intel_driver_ds_init(void); + +void intel_ds_device_init(struct intel_ds_device *device, + struct intel_device_info *devinfo, + int drm_fd, + uint32_t gpu_id, + enum intel_ds_api api); +void intel_ds_device_fini(struct intel_ds_device *device); + +struct intel_ds_queue *intel_ds_device_add_queue(struct intel_ds_device *device, + const char *fmt_name, + ...); + +void intel_ds_flush_data_init(struct intel_ds_flush_data *data, + struct intel_ds_queue *queue, + uint64_t submission_id); + +void intel_ds_flush_data_fini(struct intel_ds_flush_data *data); + +#ifdef HAVE_PERFETTO + +uint64_t intel_ds_begin_submit(struct intel_ds_queue *queue); +void intel_ds_end_submit(struct intel_ds_queue *queue, + uint64_t start_ts); + +#else + +static inline uint64_t intel_ds_begin_submit(struct intel_ds_queue *queue) +{ + return 0; +} + +static inline void intel_ds_end_submit(struct intel_ds_queue *queue, + uint64_t start_ts) +{ +} + +#endif /* HAVE_PERFETTO */ + +#ifdef __cplusplus +} +#endif + +#endif /* INTEL_DRIVER_DS_H */ diff --git a/src/intel/vulkan/anv_tracepoints.py b/src/intel/ds/intel_tracepoints.py similarity index 70% rename from src/intel/vulkan/anv_tracepoints.py rename to src/intel/ds/intel_tracepoints.py index ef9a373aed6..9a776a22f17 100644 --- a/src/intel/vulkan/anv_tracepoints.py +++ b/src/intel/ds/intel_tracepoints.py @@ -34,14 +34,18 @@ def define_tracepoints(args): from u_trace import TracepointArg as Arg from u_trace import TracepointArgStruct as ArgStruct - Header('anv_private.h', scope=HeaderScope.SOURCE) + Header('intel_driver_ds.h', scope=HeaderScope.SOURCE) Header('blorp/blorp_priv.h', scope=HeaderScope.HEADER) + Header('ds/intel_driver_ds.h', scope=HeaderScope.HEADER) - def begin_end_tp(name, tp_args=[], tp_struct=None, end_pipelined=True): - Tracepoint('begin_{0}'.format(name)) - Tracepoint('end_{0}'.format(name), + def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None, end_pipelined=True): + Tracepoint('intel_begin_{0}'.format(name), + tp_perfetto='intel_ds_begin_{0}'.format(name)) + Tracepoint('intel_end_{0}'.format(name), args=tp_args, tp_struct=tp_struct, + tp_perfetto='intel_ds_end_{0}'.format(name), + tp_print=tp_print, end_of_pipe=end_pipelined) @@ -73,8 +77,8 @@ def define_tracepoints(args): Arg(type='uint32_t', name='height', var='height', c_format='%u'), Arg(type='enum isl_aux_op', name='hiz_op', var='hiz_op', c_format='%s', to_prim_type='isl_aux_op_to_name({})'), Arg(type='enum isl_aux_op', name='fast_clear_op', var='fast_clear_op', c_format='%s', to_prim_type='isl_aux_op_to_name({})'), - Arg(type='enum blorp_shader_type', name='type', var='shader_type', c_format='%s', to_prim_type='blorp_shader_type_to_name({})'), - Arg(type='enum blorp_shader_pipeline', name='pipe', var='shader_pipe', c_format='%s', to_prim_type='blorp_shader_pipeline_to_name({})'),]) + Arg(type='enum blorp_shader_type', name='blorp_type', var='shader_type', c_format='%s', to_prim_type='blorp_shader_type_to_name({})'), + Arg(type='enum blorp_shader_pipeline', name='blorp_pipe', var='shader_pipe', c_format='%s', to_prim_type='blorp_shader_pipeline_to_name({})'),]) begin_end_tp('draw', tp_args=[ArgStruct(type='uint32_t', var='count'),], @@ -110,42 +114,58 @@ def define_tracepoints(args): ArgStruct(type='uint32_t', var='group_z'),], tp_struct=[Arg(type='uint32_t', name='group_x', var='group_x', c_format='%u'), Arg(type='uint32_t', name='group_y', var='group_y', c_format='%u'), - Arg(type='uint32_t', name='group_z', var='group_z', c_format='%u'),]) + Arg(type='uint32_t', name='group_z', var='group_z', c_format='%u'),], + tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z']) + + def flag_bits(args): + bits = [Arg(type='enum intel_ds_stall_flag', name='flags', var='decode_cb(flags)', c_format='0x%x')] + for a in args: + bits.append(Arg(type='bool', name=a[1], var='__entry->flags & INTEL_DS_{0}_BIT'.format(a[0]), c_format='%u')) + return bits def stall_args(args): fmt = '' exprs = [] for a in args: fmt += '%s' - exprs.append('(__entry->flags & ANV_PIPE_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1])) + exprs.append('(__entry->flags & INTEL_DS_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1])) + fmt += ' : %s' + exprs.append('__entry->reason ? __entry->reason : "unknown"') fmt = [fmt] fmt += exprs return fmt - Tracepoint('stall', - args=[ArgStruct(type='uint32_t', var='flags'),], - tp_struct=[Arg(type='uint32_t', name='flags', var='flags', c_format='0x%x'),], - tp_print=stall_args([['DEPTH_CACHE_FLUSH', 'depth_flush'], - ['DATA_CACHE_FLUSH', 'dc_flush'], - ['HDC_PIPELINE_FLUSH', 'hdc_flush'], - ['RENDER_TARGET_CACHE_FLUSH', 'rt_flush'], - ['TILE_CACHE_FLUSH', 'tile_flush'], - ['STATE_CACHE_INVALIDATE', 'state_inval'], - ['CONSTANT_CACHE_INVALIDATE', 'const_inval'], - ['VF_CACHE_INVALIDATE', 'vf_inval'], - ['TEXTURE_CACHE_INVALIDATE', 'tex_inval'], - ['INSTRUCTION_CACHE_INVALIDATE', 'ic_inval'], - ['STALL_AT_SCOREBOARD', 'pb_stall'], - ['DEPTH_STALL', 'depth_stall'], - ['CS_STALL', 'cs_stall'], - ])) + stall_flags = [['DEPTH_CACHE_FLUSH', 'depth_flush'], + ['DATA_CACHE_FLUSH', 'dc_flush'], + ['HDC_PIPELINE_FLUSH', 'hdc_flush'], + ['RENDER_TARGET_CACHE_FLUSH', 'rt_flush'], + ['TILE_CACHE_FLUSH', 'tile_flush'], + ['STATE_CACHE_INVALIDATE', 'state_inval'], + ['CONST_CACHE_INVALIDATE', 'const_inval'], + ['VF_CACHE_INVALIDATE', 'vf_inval'], + ['TEXTURE_CACHE_INVALIDATE', 'tex_inval'], + ['INST_CACHE_INVALIDATE', 'ic_inval'], + ['STALL_AT_SCOREBOARD', 'pb_stall'], + ['DEPTH_STALL', 'depth_stall'], + ['CS_STALL', 'cs_stall']] + begin_end_tp('stall', + tp_args=[ArgStruct(type='uint32_t', var='flags'), + ArgStruct(type='intel_ds_stall_cb_t', var='decode_cb'), + ArgStruct(type='const char *', var='reason'),], + tp_struct=[Arg(type='uint32_t', name='flags', var='decode_cb(flags)', c_format='0x%x'), + Arg(type='const char *', name='reason', var='reason', c_format='%s'),], + tp_print=stall_args(stall_flags), + end_pipelined=False) def generate_code(args): from u_trace import utrace_generate + from u_trace import utrace_generate_perfetto_utils - utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr, ctx_param='struct anv_device *dev') + utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr, + ctx_param='struct intel_ds_device *dev') + utrace_generate_perfetto_utils(hpath=args.perfetto_hdr) def main(): @@ -153,6 +173,7 @@ def main(): parser.add_argument('-p', '--import-path', required=True) parser.add_argument('--utrace-src', required=True) parser.add_argument('--utrace-hdr', required=True) + parser.add_argument('--perfetto-hdr', required=True) args = parser.parse_args() sys.path.insert(0, args.import_path) define_tracepoints(args) diff --git a/src/intel/ds/meson.build b/src/intel/ds/meson.build index b518bb7d901..15a4cd9cb86 100644 --- a/src/intel/ds/meson.build +++ b/src/intel/ds/meson.build @@ -3,30 +3,76 @@ # # SPDX-License-Identifier: MIT -pps_intel_sources = [ - 'intel_pps_perf.cc', - 'intel_pps_driver.cc', +intel_tracepoint_files = custom_target( + 'intel_tracepoints.[ch]', + input : 'intel_tracepoints.py', + output : ['intel_tracepoints.h', + 'intel_tracepoints_perfetto.h', + 'intel_tracepoints.c'], + command : [ + prog_python, '@INPUT@', + '-p', join_paths(meson.source_root(), 'src/util/perf/'), + '--utrace-hdr', '@OUTPUT0@', + '--perfetto-hdr', '@OUTPUT1@', + '--utrace-src', '@OUTPUT2@', + ], + depend_files : u_trace_py, +) + +libintel_driver_ds_deps = [ + idep_mesautil, + idep_nir_headers, ] - -pps_intel_lib = static_library( - 'pps-intel', - sources: pps_intel_sources, - include_directories: [inc_tool, inc_src, inc_include, inc_intel], - link_with: [libintel_perf, libintel_dev], - dependencies: [dep_perfetto, dep_libdrm, idep_mesautil], - cpp_args: '-std=c++17' -) - -compile_args_pps_intel = ['-DPPS_INTEL'] - -pps_intel_dep = declare_dependency( - link_with: pps_intel_lib, - include_directories: [inc_tool, inc_include], - compile_args: compile_args_pps_intel, -) - -pps_datasources += pps_intel_dep -if not with_datasources.contains('intel') - with_datasources += 'intel' +if with_perfetto + libintel_driver_ds_deps += dep_perfetto +endif + +idep_intel_driver_ds_headers = declare_dependency( + sources : intel_tracepoint_files[0], + include_directories : [inc_include, inc_mapi, inc_mesa, inc_intel], +) + +libintel_driver_ds = static_library( + 'intel-driver-ds', + sources : ['intel_driver_ds.cc', intel_tracepoint_files], + include_directories : [inc_src, inc_include, inc_intel, inc_mapi, inc_mesa], + link_with : [libintel_perf, libintel_dev], + dependencies : libintel_driver_ds_deps, + cpp_args : '-std=c++17', + gnu_symbol_visibility : 'hidden', +) + +idep_intel_driver_ds = declare_dependency( + link_with : libintel_driver_ds, + include_directories : [inc_include, inc_mapi, inc_mesa, inc_intel], +) + +if with_perfetto and (with_datasources.contains('intel') or with_datasources.contains('auto')) + pps_intel_sources = files( + 'intel_pps_perf.cc', + 'intel_pps_driver.cc', + ) + + pps_intel_lib = static_library( + 'pps-intel', + sources: pps_intel_sources, + include_directories: [inc_tool, inc_src, inc_include, inc_intel], + link_with: [libintel_perf, libintel_dev], + dependencies: [dep_perfetto, dep_libdrm, idep_mesautil], + cpp_args: '-std=c++17' + ) + + compile_args_pps_intel = ['-DPPS_INTEL'] + + pps_intel_dep = declare_dependency( + link_with: pps_intel_lib, + include_directories: [inc_tool, inc_include], + compile_args: compile_args_pps_intel, + ) + + pps_datasources += pps_intel_dep + if not with_datasources.contains('intel') + with_datasources += 'intel' + endif endif diff --git a/src/intel/meson.build b/src/intel/meson.build index 4389de598c9..5d177553d89 100644 --- a/src/intel/meson.build +++ b/src/intel/meson.build @@ -28,6 +28,7 @@ subdir('isl') subdir('common') subdir('compiler') subdir('perf') +subdir('ds') if with_intel_tools subdir('tools') endif @@ -37,6 +38,3 @@ endif if with_intel_vk subdir('vulkan') endif -if with_perfetto and (with_datasources.contains('intel') or with_datasources.contains('auto')) - subdir('ds') -endif diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index 85062f1ea87..15677320c4d 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -2374,10 +2374,15 @@ anv_queue_submit(struct vk_queue *vk_queue, return VK_SUCCESS; } + uint64_t start_ts = intel_ds_begin_submit(queue->ds); + pthread_mutex_lock(&device->mutex); result = anv_queue_submit_locked(queue, submit); + /* Take submission ID under lock */ pthread_mutex_unlock(&device->mutex); + intel_ds_end_submit(queue->ds, start_ts); + return result; } diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index d404b3a305b..d197f3e9dd0 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -302,7 +302,7 @@ static VkResult anv_create_cmd_buffer( anv_measure_init(cmd_buffer); - u_trace_init(&cmd_buffer->trace, &device->trace_context); + u_trace_init(&cmd_buffer->trace, &device->ds.trace_context); *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer); @@ -407,7 +407,7 @@ anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer) anv_measure_reset(cmd_buffer); u_trace_fini(&cmd_buffer->trace); - u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->trace_context); + u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->ds.trace_context); return VK_SUCCESS; } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index a11f35f0dd9..f3dad230768 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1126,6 +1126,8 @@ VkResult anv_CreateInstance( anv_init_dri_options(instance); + intel_driver_ds_init(); + *pInstance = anv_instance_to_handle(instance); return VK_SUCCESS; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 5e028f1dfbb..e2ae3957625 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -52,6 +52,7 @@ #include "blorp/blorp.h" #include "compiler/brw_compiler.h" #include "compiler/brw_rt.h" +#include "ds/intel_driver_ds.h" #include "util/bitset.h" #include "util/bitscan.h" #include "util/macros.h" @@ -1061,7 +1062,11 @@ struct anv_queue { const struct anv_queue_family * family; + uint32_t index_in_family; + uint32_t exec_flags; + + struct intel_ds_queue * ds; }; struct anv_pipeline_cache { @@ -1216,7 +1221,7 @@ struct anv_device { struct intel_debug_block_frame *debug_frame_desc; - struct u_trace_context trace_context; + struct intel_ds_device ds; }; #if defined(GFX_VERx10) && GFX_VERx10 >= 90 @@ -2395,6 +2400,9 @@ enum anv_pipe_bits { ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \ ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) +enum intel_ds_stall_flag +anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits); + static inline enum anv_pipe_bits anv_pipe_flush_bits_for_access_flags(struct anv_device *device, VkAccessFlags2KHR flags) @@ -4549,16 +4557,25 @@ struct anv_memcpy_state { }; struct anv_utrace_flush_copy { - struct u_trace trace; + /* Needs to be the first field */ + struct intel_ds_flush_data ds; + /* Batch stuff to implement of copy of timestamps recorded in another + * buffer. + */ struct anv_reloc_list relocs; struct anv_batch batch; struct anv_bo *batch_bo; + /* Buffer of 64bits timestamps */ struct anv_bo *trace_bo; + /* Syncobj to be signaled when the batch completes */ struct vk_sync *sync; + /* Queue on which all the recorded traces are submitted */ + struct anv_queue *queue; + struct anv_memcpy_state memcpy_state; }; @@ -4570,6 +4587,25 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, struct anv_cmd_buffer **cmd_buffers, struct anv_utrace_flush_copy **out_flush_data); +#ifdef HAVE_PERFETTO +void anv_perfetto_init(void); +uint64_t anv_perfetto_begin_submit(struct anv_queue *queue); +void anv_perfetto_end_submit(struct anv_queue *queue, uint32_t submission_id, + uint64_t start_ts); +#else +static inline void anv_perfetto_init(void) +{ +} +static inline uint64_t anv_perfetto_begin_submit(struct anv_queue *queue) +{ + return 0; +} +static inline void anv_perfetto_end_submit(struct anv_queue *queue, + uint32_t submission_id, + uint64_t start_ts) +{} +#endif + #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ VK_FROM_HANDLE(__anv_type, __name, __handle) diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index ccf799eb08c..93d835c68cb 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -48,6 +48,8 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue, assert(queue->vk.queue_family_index < pdevice->queue.family_count); queue->family = &pdevice->queue.families[queue->vk.queue_family_index]; + queue->index_in_family = index_in_family; + queue->exec_flags = exec_flags; return VK_SUCCESS; diff --git a/src/intel/vulkan/anv_utrace.c b/src/intel/vulkan/anv_utrace.c index 349fa554ffb..8fbabbb41a6 100644 --- a/src/intel/vulkan/anv_utrace.c +++ b/src/intel/vulkan/anv_utrace.c @@ -31,7 +31,7 @@ command_buffers_count_utraces(struct anv_device *device, struct anv_cmd_buffer **cmd_buffers, uint32_t *utrace_copies) { - if (!u_trace_context_actively_tracing(&device->trace_context)) + if (!u_trace_context_actively_tracing(&device->ds.trace_context)) return 0; uint32_t utraces = 0; @@ -51,10 +51,10 @@ anv_utrace_delete_flush_data(struct u_trace_context *utctx, void *flush_data) { struct anv_device *device = - container_of(utctx, struct anv_device, trace_context); + container_of(utctx, struct anv_device, ds.trace_context); struct anv_utrace_flush_copy *flush = flush_data; - u_trace_fini(&flush->trace); + intel_ds_flush_data_fini(&flush->ds); if (flush->trace_bo) { assert(flush->batch_bo); @@ -76,7 +76,7 @@ anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx, uint32_t count) { struct anv_device *device = - container_of(utctx, struct anv_device, trace_context); + container_of(utctx, struct anv_device, ds.trace_context); struct anv_utrace_flush_copy *flush = cmdstream; struct anv_address from_addr = (struct anv_address) { .bo = ts_from, .offset = from_offset * sizeof(uint64_t) }; @@ -111,7 +111,7 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, if (!flush) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - u_trace_init(&flush->trace, &device->trace_context); + intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id); result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type, 0, 0, &flush->sync); @@ -155,14 +155,14 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, } else { u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace), u_trace_end_iterator(&cmd_buffers[i]->trace), - &flush->trace, + &flush->ds.trace, flush, anv_device_utrace_emit_copy_ts_buffer); } } anv_genX(&device->info, emit_so_memcpy_fini)(&flush->memcpy_state); - u_trace_flush(&flush->trace, flush, true); + u_trace_flush(&flush->ds.trace, flush, true); if (flush->batch.status != VK_SUCCESS) { result = flush->batch.status; @@ -175,6 +175,8 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, } } + flush->queue = queue; + *out_flush_data = flush; return VK_SUCCESS; @@ -196,7 +198,7 @@ static void * anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b) { struct anv_device *device = - container_of(utctx, struct anv_device, trace_context); + container_of(utctx, struct anv_device, ds.trace_context); struct anv_bo *bo = NULL; UNUSED VkResult result = @@ -211,7 +213,7 @@ static void anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps) { struct anv_device *device = - container_of(utctx, struct anv_device, trace_context); + container_of(utctx, struct anv_device, ds.trace_context); struct anv_bo *bo = timestamps; anv_device_release_bo(device, bo); @@ -237,7 +239,7 @@ anv_utrace_read_ts(struct u_trace_context *utctx, void *timestamps, unsigned idx, void *flush_data) { struct anv_device *device = - container_of(utctx, struct anv_device, trace_context); + container_of(utctx, struct anv_device, ds.trace_context); struct anv_bo *bo = timestamps; struct anv_utrace_flush_copy *flush = flush_data; @@ -261,19 +263,80 @@ anv_utrace_read_ts(struct u_trace_context *utctx, return intel_device_info_timebase_scale(&device->info, ts[idx]); } +static const char * +queue_family_to_name(const struct anv_queue_family *family) +{ + switch (family->engine_class) { + case I915_ENGINE_CLASS_RENDER: + return "render"; + case I915_ENGINE_CLASS_COPY: + return "copy"; + case I915_ENGINE_CLASS_VIDEO: + return "video"; + case I915_ENGINE_CLASS_VIDEO_ENHANCE: + return "video-enh"; + default: + return "unknown"; + } +} + void anv_device_utrace_init(struct anv_device *device) { - u_trace_context_init(&device->trace_context, device, + intel_ds_device_init(&device->ds, &device->info, device->fd, + device->physical->local_minor - 128, + INTEL_DS_API_VULKAN); + u_trace_context_init(&device->ds.trace_context, + &device->ds, anv_utrace_create_ts_buffer, anv_utrace_destroy_ts_buffer, anv_utrace_record_ts, anv_utrace_read_ts, anv_utrace_delete_flush_data); + + for (uint32_t q = 0; q < device->queue_count; q++) { + struct anv_queue *queue = &device->queues[q]; + + queue->ds = + intel_ds_device_add_queue(&device->ds, "%s%u", + queue_family_to_name(queue->family), + queue->index_in_family); + } } void anv_device_utrace_finish(struct anv_device *device) { - u_trace_context_fini(&device->trace_context); + intel_ds_device_fini(&device->ds); +} + +enum intel_ds_stall_flag +anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits) +{ + static const struct { + enum anv_pipe_bits anv; + enum intel_ds_stall_flag ds; + } anv_to_ds_flags[] = { + { .anv = ANV_PIPE_DEPTH_CACHE_FLUSH_BIT, .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, }, + { .anv = ANV_PIPE_DATA_CACHE_FLUSH_BIT, .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, }, + { .anv = ANV_PIPE_TILE_CACHE_FLUSH_BIT, .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, }, + { .anv = ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT, .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, }, + { .anv = ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, }, + { .anv = ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, }, + { .anv = ANV_PIPE_VF_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, }, + { .anv = ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, }, + { .anv = ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, }, + { .anv = ANV_PIPE_DEPTH_STALL_BIT, .ds = INTEL_DS_DEPTH_STALL_BIT, }, + { .anv = ANV_PIPE_CS_STALL_BIT, .ds = INTEL_DS_CS_STALL_BIT, }, + { .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, }, + { .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, }, + }; + + enum intel_ds_stall_flag ret = 0; + for (uint32_t i = 0; i < ARRAY_SIZE(anv_to_ds_flags); i++) { + if (anv_to_ds_flags[i].anv & bits) + ret |= anv_to_ds_flags[i].ds; + } + + return ret; } diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c index 037965a4ec8..67ffefd59e4 100644 --- a/src/intel/vulkan/anv_wsi.c +++ b/src/intel/vulkan/anv_wsi.c @@ -107,7 +107,7 @@ VkResult anv_QueuePresentKHR( vk_semaphore_reset_temporary(&queue->device->vk, semaphore); } - u_trace_context_process(&queue->device->trace_context, true); + u_trace_context_process(&device->ds.trace_context, true); return result; } diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index bbb90b4ace4..7cd7c5febda 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -34,13 +34,13 @@ #include "common/intel_l3_config.h" #include "blorp/blorp_genX_exec.h" -#include "anv_tracepoints.h" +#include "ds/intel_tracepoints.h" static void blorp_measure_start(struct blorp_batch *_batch, const struct blorp_params *params) { struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch; - trace_begin_blorp(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_blorp(&cmd_buffer->trace, cmd_buffer); anv_measure_snapshot(cmd_buffer, params->snapshot_type, NULL, 0); @@ -50,13 +50,13 @@ static void blorp_measure_end(struct blorp_batch *_batch, const struct blorp_params *params) { struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch; - trace_end_blorp(&cmd_buffer->trace, cmd_buffer, - params->x1 - params->x0, - params->y1 - params->y0, - params->hiz_op, - params->fast_clear_op, - params->shader_type, - params->shader_pipeline); + trace_intel_end_blorp(&cmd_buffer->trace, cmd_buffer, + params->x1 - params->x0, + params->y1 - params->y0, + params->hiz_op, + params->fast_clear_op, + params->shader_type, + params->shader_pipeline); } static void * diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 1aa79f3805a..fda86b35048 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -38,7 +38,7 @@ #include "nir/nir_xfb_info.h" -#include "anv_tracepoints.h" +#include "ds/intel_tracepoints.h" /* We reserve : * - GPR 14 for secondary command buffer returns @@ -1763,7 +1763,7 @@ genX(BeginCommandBuffer)( if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) cmd_buffer->usage_flags &= ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; - trace_begin_cmd_buffer(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_cmd_buffer(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_emit_state_base_address)(cmd_buffer); @@ -1938,7 +1938,7 @@ genX(EndCommandBuffer)( emit_isp_disable(cmd_buffer); - trace_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer, cmd_buffer->level); + trace_intel_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer, cmd_buffer->level); anv_cmd_buffer_end_batch_buffer(cmd_buffer); @@ -2405,8 +2405,10 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) else if (bits == 0) return; - if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS | ANV_PIPE_INVALIDATE_BITS)) - trace_stall(&cmd_buffer->trace, cmd_buffer, bits); + bool trace_flush = + (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS | ANV_PIPE_INVALIDATE_BITS)) != 0; + if (trace_flush) + trace_intel_begin_stall(&cmd_buffer->trace, cmd_buffer); if ((GFX_VER >= 8 && GFX_VER <= 9) && (bits & ANV_PIPE_CS_STALL_BIT) && @@ -2425,6 +2427,11 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->device, cmd_buffer->state.current_pipeline, bits); + + if (trace_flush) { + trace_intel_end_stall(&cmd_buffer->trace, cmd_buffer, bits, + anv_pipe_flush_bit_to_ds_stall_flag, NULL); + } } static void @@ -3963,7 +3970,7 @@ void genX(CmdDraw)( anv_measure_snapshot(cmd_buffer, INTEL_SNAPSHOT_DRAW, "draw", count); - trace_begin_draw(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_draw(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -3993,7 +4000,7 @@ void genX(CmdDraw)( update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL); - trace_end_draw(&cmd_buffer->trace, cmd_buffer, count); + trace_intel_end_draw(&cmd_buffer->trace, cmd_buffer, count); } void genX(CmdDrawMultiEXT)( @@ -4018,7 +4025,7 @@ void genX(CmdDrawMultiEXT)( anv_measure_snapshot(cmd_buffer, INTEL_SNAPSHOT_DRAW, "draw_multi", count); - trace_begin_draw_multi(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_draw_multi(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -4051,7 +4058,7 @@ void genX(CmdDrawMultiEXT)( update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL); - trace_end_draw_multi(&cmd_buffer->trace, cmd_buffer, count); + trace_intel_end_draw_multi(&cmd_buffer->trace, cmd_buffer, count); } void genX(CmdDrawIndexed)( @@ -4077,7 +4084,7 @@ void genX(CmdDrawIndexed)( INTEL_SNAPSHOT_DRAW, "draw indexed", count); - trace_begin_draw_indexed(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_draw_indexed(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -4105,7 +4112,7 @@ void genX(CmdDrawIndexed)( update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM); - trace_end_draw_indexed(&cmd_buffer->trace, cmd_buffer, count); + trace_intel_end_draw_indexed(&cmd_buffer->trace, cmd_buffer, count); } void genX(CmdDrawMultiIndexedEXT)( @@ -4132,7 +4139,7 @@ void genX(CmdDrawMultiIndexedEXT)( INTEL_SNAPSHOT_DRAW, "draw indexed_multi", count); - trace_begin_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -4220,7 +4227,7 @@ void genX(CmdDrawMultiIndexedEXT)( update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM); - trace_end_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer, count); + trace_intel_end_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer, count); } /* Auto-Draw / Indirect Registers */ @@ -4256,7 +4263,7 @@ void genX(CmdDrawIndirectByteCountEXT)( INTEL_SNAPSHOT_DRAW, "draw indirect byte count", instanceCount); - trace_begin_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -4300,8 +4307,8 @@ void genX(CmdDrawIndirectByteCountEXT)( update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL); - trace_end_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer, - instanceCount); + trace_intel_end_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer, + instanceCount); #endif /* GFX_VERx10 >= 75 */ } @@ -4358,7 +4365,7 @@ void genX(CmdDrawIndirect)( if (anv_batch_has_error(&cmd_buffer->batch)) return; - trace_begin_draw_indirect(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_draw_indirect(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -4393,7 +4400,7 @@ void genX(CmdDrawIndirect)( offset += stride; } - trace_end_draw_indirect(&cmd_buffer->trace, cmd_buffer, drawCount); + trace_intel_end_draw_indirect(&cmd_buffer->trace, cmd_buffer, drawCount); } void genX(CmdDrawIndexedIndirect)( @@ -4411,7 +4418,7 @@ void genX(CmdDrawIndexedIndirect)( if (anv_batch_has_error(&cmd_buffer->batch)) return; - trace_begin_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -4447,7 +4454,7 @@ void genX(CmdDrawIndexedIndirect)( offset += stride; } - trace_end_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer, drawCount); + trace_intel_end_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer, drawCount); } static struct mi_value @@ -4574,7 +4581,7 @@ void genX(CmdDrawIndirectCount)( if (anv_batch_has_error(&cmd_buffer->batch)) return; - trace_begin_draw_indirect_count(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_draw_indirect_count(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -4616,7 +4623,7 @@ void genX(CmdDrawIndirectCount)( mi_value_unref(&b, max); - trace_end_draw_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount); + trace_intel_end_draw_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount); } void genX(CmdDrawIndexedIndirectCount)( @@ -4638,7 +4645,7 @@ void genX(CmdDrawIndexedIndirectCount)( if (anv_batch_has_error(&cmd_buffer->batch)) return; - trace_begin_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -4681,7 +4688,8 @@ void genX(CmdDrawIndexedIndirectCount)( mi_value_unref(&b, max); - trace_end_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount); + trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, + cmd_buffer, maxDrawCount); } @@ -5058,7 +5066,7 @@ void genX(CmdDispatchBase)( prog_data->local_size[0] * prog_data->local_size[1] * prog_data->local_size[2]); - trace_begin_compute(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_compute(&cmd_buffer->trace, cmd_buffer); if (prog_data->uses_num_work_groups) { struct anv_state state = @@ -5084,8 +5092,8 @@ void genX(CmdDispatchBase)( emit_cs_walker(cmd_buffer, pipeline, false, prog_data, groupCountX, groupCountY, groupCountZ); - trace_end_compute(&cmd_buffer->trace, cmd_buffer, - groupCountX, groupCountY, groupCountZ); + trace_intel_end_compute(&cmd_buffer->trace, cmd_buffer, + groupCountX, groupCountY, groupCountZ); } #define GPGPU_DISPATCHDIMX 0x2500 @@ -5119,7 +5127,7 @@ void genX(CmdDispatchIndirect)( INTEL_SNAPSHOT_COMPUTE, "compute indirect", 0); - trace_begin_compute(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_compute(&cmd_buffer->trace, cmd_buffer); if (prog_data->uses_num_work_groups) { cmd_buffer->state.compute.num_workgroups = addr; @@ -5194,7 +5202,7 @@ void genX(CmdDispatchIndirect)( emit_cs_walker(cmd_buffer, pipeline, true, prog_data, 0, 0, 0); - trace_end_compute(&cmd_buffer->trace, cmd_buffer, 0, 0, 0); + trace_intel_end_compute(&cmd_buffer->trace, cmd_buffer, 0, 0, 0); } #if GFX_VERx10 >= 125 @@ -6800,7 +6808,7 @@ void genX(CmdBeginRenderPass2)( cmd_buffer->state.render_area = pRenderPassBeginInfo->renderArea; anv_measure_beginrenderpass(cmd_buffer); - trace_begin_render_pass(&cmd_buffer->trace, cmd_buffer); + trace_intel_begin_render_pass(&cmd_buffer->trace, cmd_buffer); result = genX(cmd_buffer_setup_attachments)(cmd_buffer, pass, framebuffer, @@ -6843,13 +6851,13 @@ void genX(CmdEndRenderPass2)( cmd_buffer_end_subpass(cmd_buffer); - trace_end_render_pass(&cmd_buffer->trace, cmd_buffer, - cmd_buffer->state.render_area.extent.width, - cmd_buffer->state.render_area.extent.height, - cmd_buffer->state.pass->attachment_count, - cmd_buffer->state.pass->attachment_count > 0 ? - cmd_buffer->state.pass->attachments[0].samples : 0, - cmd_buffer->state.pass->subpass_count); + trace_intel_end_render_pass(&cmd_buffer->trace, cmd_buffer, + cmd_buffer->state.render_area.extent.width, + cmd_buffer->state.render_area.extent.height, + cmd_buffer->state.pass->attachment_count, + cmd_buffer->state.pass->attachment_count > 0 ? + cmd_buffer->state.pass->attachments[0].samples : 0, + cmd_buffer->state.pass->subpass_count); cmd_buffer->state.hiz_enabled = false; diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index be0308cacbe..8e6c702490a 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -33,19 +33,6 @@ anv_entrypoints = custom_target( depend_files : vk_entrypoints_gen_depend_files, ) -anv_tracepoints = custom_target( - 'anv_tracepoints.[ch]', - input: 'anv_tracepoints.py', - output: ['anv_tracepoints.h', 'anv_tracepoints.c'], - command: [ - prog_python, '@INPUT@', - '-p', join_paths(meson.source_root(), 'src/util/perf/'), - '--utrace-hdr', '@OUTPUT0@', - '--utrace-src', '@OUTPUT1@', - ], - depend_files: u_trace_py, -) - intel_icd = custom_target( 'intel_icd', input : [vk_icd_gen, vk_api_xml], @@ -78,7 +65,7 @@ foreach g : [['70', ['gfx7_cmd_buffer.c']], ['75', ['gfx7_cmd_buffer.c']], _gfx_ver = g[0] libanv_per_hw_ver_libs += static_library( 'anv_per_hw_ver@0@'.format(_gfx_ver), - [anv_per_hw_ver_files, g[1], anv_entrypoints[0], anv_tracepoints[0]], + [anv_per_hw_ver_files, g[1], anv_entrypoints[0]], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel, ], @@ -90,7 +77,7 @@ foreach g : [['70', ['gfx7_cmd_buffer.c']], ['75', ['gfx7_cmd_buffer.c']], dependencies : [ dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml, idep_vulkan_util_headers, idep_vulkan_wsi_headers, - idep_vulkan_runtime_headers, + idep_vulkan_runtime_headers, idep_intel_driver_ds_headers, ], ) endforeach @@ -142,6 +129,8 @@ anv_flags = [ c_sse2_args, ] +anv_cpp_flags = [] + if with_platform_x11 anv_deps += dep_xcb_dri3 endif @@ -164,24 +153,27 @@ else libanv_files += files('anv_android_stubs.c') endif +anv_deps += idep_intel_driver_ds_headers + libanv_common = static_library( 'anv_common', [ libanv_files, anv_entrypoints, sha1_h, - gen_xml_pack + gen_xml_pack, ], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler, inc_util, ], c_args : anv_flags, + cpp_args : anv_cpp_flags, gnu_symbol_visibility : 'hidden', dependencies : anv_deps, ) libvulkan_intel = shared_library( 'vulkan_intel', - [files('anv_gem.c'), anv_entrypoints[0], anv_tracepoints], + [files('anv_gem.c'), anv_entrypoints[0]], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler, ], @@ -193,6 +185,7 @@ libvulkan_intel = shared_library( dep_thread, dep_dl, dep_m, anv_deps, idep_libintel_common, idep_nir, idep_genxml, idep_vulkan_util, idep_vulkan_wsi, idep_vulkan_runtime, idep_mesautil, idep_xmlconfig, + idep_intel_driver_ds, ], c_args : anv_flags, gnu_symbol_visibility : 'hidden', @@ -216,7 +209,7 @@ endif if with_tests libvulkan_intel_test = static_library( 'vulkan_intel_test', - [files('anv_gem_stubs.c'), anv_entrypoints[0], anv_tracepoints[0]], + [files('anv_gem_stubs.c'), anv_entrypoints[0]], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler, ], @@ -241,13 +234,13 @@ if with_tests 'anv_@0@'.format(t), executable( t, - ['tests/@0@.c'.format(t), anv_entrypoints[0], anv_tracepoints[0]], + ['tests/@0@.c'.format(t), anv_entrypoints[0]], c_args : [ c_sse2_args ], link_with : libvulkan_intel_test, dependencies : [ dep_libdrm, dep_thread, dep_m, dep_valgrind, idep_vulkan_util, idep_vulkan_wsi_headers, - idep_vulkan_runtime, + idep_vulkan_runtime, idep_intel_driver_ds, ], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,