crocus: initial gallium driver for Intel gfx 4-7
This is a gallium driver for the Intel gfx 4-7 GPUs. It was initially cloned from the iris driver by Ilia Mirkin, then I ported over large reams of code from i965 until it worked. Acked-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11146>
This commit is contained in:
parent
8da92b5c0a
commit
f3630548f1
|
@ -231,6 +231,7 @@ with_gallium_v3d = gallium_drivers.contains('v3d')
|
|||
with_gallium_panfrost = gallium_drivers.contains('panfrost')
|
||||
with_gallium_etnaviv = gallium_drivers.contains('etnaviv')
|
||||
with_gallium_tegra = gallium_drivers.contains('tegra')
|
||||
with_gallium_crocus = gallium_drivers.contains('crocus')
|
||||
with_gallium_iris = gallium_drivers.contains('iris')
|
||||
with_gallium_i915 = gallium_drivers.contains('i915')
|
||||
with_gallium_svga = gallium_drivers.contains('svga')
|
||||
|
@ -284,7 +285,7 @@ with_broadcom_vk = _vulkan_drivers.contains('broadcom')
|
|||
with_any_vk = _vulkan_drivers.length() != 0
|
||||
|
||||
with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
|
||||
with_any_intel = with_dri_i965 or with_intel_vk or with_gallium_iris
|
||||
with_any_intel = with_dri_i965 or with_intel_vk or with_gallium_iris or with_gallium_crocus
|
||||
|
||||
if with_swrast_vk and not with_gallium_softpipe
|
||||
error('swrast vulkan requires gallium swrast')
|
||||
|
@ -795,7 +796,7 @@ if with_gallium_st_nine
|
|||
error('The nine state tracker requires gallium softpipe/llvmpipe.')
|
||||
elif not (with_gallium_radeonsi or with_gallium_nouveau or with_gallium_r600
|
||||
or with_gallium_r300 or with_gallium_svga or with_gallium_i915
|
||||
or with_gallium_iris)
|
||||
or with_gallium_iris or with_gallium_crocus)
|
||||
error('The nine state tracker requires at least one non-swrast gallium driver.')
|
||||
endif
|
||||
if not with_dri3
|
||||
|
|
|
@ -67,7 +67,7 @@ option(
|
|||
choices : [
|
||||
'auto', 'kmsro', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno',
|
||||
'swrast', 'v3d', 'vc4', 'etnaviv', 'tegra', 'i915', 'svga', 'virgl',
|
||||
'swr', 'panfrost', 'iris', 'lima', 'zink', 'd3d12', 'asahi'
|
||||
'swr', 'panfrost', 'iris', 'lima', 'zink', 'd3d12', 'asahi', 'crocus'
|
||||
],
|
||||
description : 'List of gallium drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
|
||||
)
|
||||
|
|
|
@ -70,6 +70,7 @@ static const struct pipe_loader_ops pipe_loader_drm_ops;
|
|||
static const struct drm_driver_descriptor *driver_descriptors[] = {
|
||||
&i915_driver_descriptor,
|
||||
&iris_driver_descriptor,
|
||||
&crocus_driver_descriptor,
|
||||
&nouveau_driver_descriptor,
|
||||
&r300_driver_descriptor,
|
||||
&r600_driver_descriptor,
|
||||
|
|
|
@ -112,6 +112,26 @@ DRM_DRIVER_DESCRIPTOR(iris, iris_driconf, ARRAY_SIZE(iris_driconf))
|
|||
DRM_DRIVER_DESCRIPTOR_STUB(iris)
|
||||
#endif
|
||||
|
||||
#ifdef GALLIUM_CROCUS
|
||||
#include "crocus/drm/crocus_drm_public.h"
|
||||
|
||||
static struct pipe_screen *
|
||||
pipe_crocus_create_screen(int fd, const struct pipe_screen_config *config)
|
||||
{
|
||||
struct pipe_screen *screen;
|
||||
|
||||
screen = crocus_drm_screen_create(fd, config);
|
||||
return screen ? debug_screen_wrap(screen) : NULL;
|
||||
}
|
||||
|
||||
const driOptionDescription crocus_driconf[] = {
|
||||
#include "crocus/driinfo_crocus.h"
|
||||
};
|
||||
DRM_DRIVER_DESCRIPTOR(crocus, crocus_driconf, ARRAY_SIZE(crocus_driconf))
|
||||
#else
|
||||
DRM_DRIVER_DESCRIPTOR_STUB(crocus)
|
||||
#endif
|
||||
|
||||
#ifdef GALLIUM_NOUVEAU
|
||||
#include "nouveau/drm/nouveau_drm_public.h"
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ struct pipe_screen_config;
|
|||
|
||||
extern const struct drm_driver_descriptor i915_driver_descriptor;
|
||||
extern const struct drm_driver_descriptor iris_driver_descriptor;
|
||||
extern const struct drm_driver_descriptor crocus_driver_descriptor;
|
||||
extern const struct drm_driver_descriptor nouveau_driver_descriptor;
|
||||
extern const struct drm_driver_descriptor r300_driver_descriptor;
|
||||
extern const struct drm_driver_descriptor r600_driver_descriptor;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,325 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CROCUS_BATCH_DOT_H
|
||||
#define CROCUS_BATCH_DOT_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
#include "common/intel_decoder.h"
|
||||
#include "drm-uapi/i915_drm.h"
|
||||
|
||||
#include "crocus_fence.h"
|
||||
#include "crocus_fine_fence.h"
|
||||
|
||||
#include "crocus_bufmgr.h"
|
||||
/* The kernel assumes batchbuffers are smaller than 256kB. */
|
||||
#define MAX_BATCH_SIZE (256 * 1024)
|
||||
|
||||
/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
|
||||
* Address, which means that we can't put binding tables beyond 64kB. This
|
||||
* effectively limits the maximum statebuffer size to 64kB.
|
||||
*/
|
||||
#define MAX_STATE_SIZE (64 * 1024)
|
||||
|
||||
/* Our target batch size - flush approximately at this point. */
|
||||
#define BATCH_SZ (20 * 1024)
|
||||
#define STATE_SZ (16 * 1024)
|
||||
|
||||
enum crocus_batch_name {
|
||||
CROCUS_BATCH_RENDER,
|
||||
CROCUS_BATCH_COMPUTE,
|
||||
};
|
||||
|
||||
#define CROCUS_BATCH_COUNT 2
|
||||
|
||||
struct crocus_address {
|
||||
struct crocus_bo *bo;
|
||||
int32_t offset;
|
||||
uint32_t reloc_flags;
|
||||
};
|
||||
|
||||
struct crocus_reloc_list {
|
||||
struct drm_i915_gem_relocation_entry *relocs;
|
||||
int reloc_count;
|
||||
int reloc_array_size;
|
||||
};
|
||||
|
||||
struct crocus_growing_bo {
|
||||
struct crocus_bo *bo;
|
||||
void *map;
|
||||
void *map_next;
|
||||
struct crocus_bo *partial_bo;
|
||||
void *partial_bo_map;
|
||||
unsigned partial_bytes;
|
||||
struct crocus_reloc_list relocs;
|
||||
unsigned used;
|
||||
};
|
||||
|
||||
struct crocus_batch {
|
||||
struct crocus_context *ice;
|
||||
struct crocus_screen *screen;
|
||||
struct pipe_debug_callback *dbg;
|
||||
struct pipe_device_reset_callback *reset;
|
||||
|
||||
/** What batch is this? (e.g. CROCUS_BATCH_RENDER/COMPUTE) */
|
||||
enum crocus_batch_name name;
|
||||
|
||||
/** buffers: command, state */
|
||||
struct crocus_growing_bo command, state;
|
||||
|
||||
/** Size of the primary batch if we've moved on to a secondary. */
|
||||
unsigned primary_batch_size;
|
||||
|
||||
bool state_base_address_emitted;
|
||||
uint8_t pipe_controls_since_last_cs_stall;
|
||||
|
||||
uint32_t hw_ctx_id;
|
||||
|
||||
uint32_t valid_reloc_flags;
|
||||
|
||||
bool use_shadow_copy;
|
||||
bool no_wrap;
|
||||
|
||||
/** The validation list */
|
||||
struct drm_i915_gem_exec_object2 *validation_list;
|
||||
struct crocus_bo **exec_bos;
|
||||
int exec_count;
|
||||
int exec_array_size;
|
||||
|
||||
/** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
|
||||
* instruction is a MI_BATCH_BUFFER_END).
|
||||
*/
|
||||
bool noop_enabled;
|
||||
|
||||
/**
|
||||
* A list of crocus_syncobjs associated with this batch.
|
||||
*
|
||||
* The first list entry will always be a signalling sync-point, indicating
|
||||
* that this batch has completed. The others are likely to be sync-points
|
||||
* to wait on before executing the batch.
|
||||
*/
|
||||
struct util_dynarray syncobjs;
|
||||
|
||||
/** A list of drm_i915_exec_fences to have execbuf signal or wait on */
|
||||
struct util_dynarray exec_fences;
|
||||
|
||||
/** The amount of aperture space (in bytes) used by all exec_bos */
|
||||
int aperture_space;
|
||||
|
||||
struct {
|
||||
/** Uploader to use for sequence numbers */
|
||||
struct u_upload_mgr *uploader;
|
||||
|
||||
/** GPU buffer and CPU map where our seqno's will be written. */
|
||||
struct crocus_state_ref ref;
|
||||
uint32_t *map;
|
||||
|
||||
/** The sequence number to write the next time we add a fence. */
|
||||
uint32_t next;
|
||||
} fine_fences;
|
||||
|
||||
/** A seqno (and syncobj) for the last batch that was submitted. */
|
||||
struct crocus_fine_fence *last_fence;
|
||||
|
||||
/** List of other batches which we might need to flush to use a BO */
|
||||
struct crocus_batch *other_batches[CROCUS_BATCH_COUNT - 1];
|
||||
|
||||
struct {
|
||||
/**
|
||||
* Set of struct brw_bo * that have been rendered to within this
|
||||
* batchbuffer and would need flushing before being used from another
|
||||
* cache domain that isn't coherent with it (i.e. the sampler).
|
||||
*/
|
||||
struct hash_table *render;
|
||||
|
||||
/**
|
||||
* Set of struct brw_bo * that have been used as a depth buffer within
|
||||
* this batchbuffer and would need flushing before being used from
|
||||
* another cache domain that isn't coherent with it (i.e. the sampler).
|
||||
*/
|
||||
struct set *depth;
|
||||
} cache;
|
||||
|
||||
struct intel_batch_decode_ctx decoder;
|
||||
struct hash_table_u64 *state_sizes;
|
||||
|
||||
/** Have we emitted any draw calls to this batch? */
|
||||
bool contains_draw;
|
||||
|
||||
/** Batch contains fence signal operation. */
|
||||
bool contains_fence_signal;
|
||||
};
|
||||
|
||||
static inline bool
|
||||
batch_has_fine_fence(struct crocus_batch *batch)
|
||||
{
|
||||
return !!batch->fine_fences.uploader;
|
||||
}
|
||||
|
||||
#define BATCH_HAS_FINE_FENCES(batch) (!!(batch)->fine_fences.uploader)
|
||||
void crocus_init_batch(struct crocus_context *ctx,
|
||||
enum crocus_batch_name name,
|
||||
int priority);
|
||||
void crocus_batch_free(struct crocus_batch *batch);
|
||||
void crocus_batch_maybe_flush(struct crocus_batch *batch, unsigned estimate);
|
||||
|
||||
void _crocus_batch_flush(struct crocus_batch *batch, const char *file, int line);
|
||||
#define crocus_batch_flush(batch) _crocus_batch_flush((batch), __FILE__, __LINE__)
|
||||
|
||||
bool crocus_batch_references(struct crocus_batch *batch, struct crocus_bo *bo);
|
||||
|
||||
bool crocus_batch_prepare_noop(struct crocus_batch *batch, bool noop_enable);
|
||||
|
||||
#define RELOC_WRITE EXEC_OBJECT_WRITE
|
||||
#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT
|
||||
/* Inverted meaning, but using the same bit...emit_reloc will flip it. */
|
||||
#define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS
|
||||
|
||||
void crocus_use_pinned_bo(struct crocus_batch *batch, struct crocus_bo *bo,
|
||||
bool writable);
|
||||
uint64_t crocus_command_reloc(struct crocus_batch *batch, uint32_t batch_offset,
|
||||
struct crocus_bo *target, uint32_t target_offset,
|
||||
unsigned int reloc_flags);
|
||||
uint64_t crocus_state_reloc(struct crocus_batch *batch, uint32_t batch_offset,
|
||||
struct crocus_bo *target, uint32_t target_offset,
|
||||
unsigned int reloc_flags);
|
||||
|
||||
enum pipe_reset_status crocus_batch_check_for_reset(struct crocus_batch *batch);
|
||||
|
||||
void crocus_grow_buffer(struct crocus_batch *batch, bool grow_state,
|
||||
unsigned used, unsigned new_size);
|
||||
|
||||
static inline unsigned
|
||||
crocus_batch_bytes_used(struct crocus_batch *batch)
|
||||
{
|
||||
return batch->command.map_next - batch->command.map;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the current command buffer has \param size bytes of space
|
||||
* remaining. If not, this creates a secondary batch buffer and emits
|
||||
* a jump from the primary batch to the start of the secondary.
|
||||
*
|
||||
* Most callers want crocus_get_command_space() instead.
|
||||
*/
|
||||
static inline void
|
||||
crocus_require_command_space(struct crocus_batch *batch, unsigned size)
|
||||
{
|
||||
const unsigned required_bytes = crocus_batch_bytes_used(batch) + size;
|
||||
unsigned used = crocus_batch_bytes_used(batch);
|
||||
if (required_bytes >= BATCH_SZ && !batch->no_wrap) {
|
||||
crocus_batch_flush(batch);
|
||||
} else if (used + size >= batch->command.bo->size) {
|
||||
const unsigned new_size =
|
||||
MIN2(batch->command.bo->size + batch->command.bo->size / 2,
|
||||
MAX_BATCH_SIZE);
|
||||
|
||||
crocus_grow_buffer(batch, false, used, new_size);
|
||||
batch->command.map_next = (void *)batch->command.map + used;
|
||||
assert(crocus_batch_bytes_used(batch) + size < batch->command.bo->size);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate space in the current command buffer, and return a pointer
|
||||
* to the mapped area so the caller can write commands there.
|
||||
*
|
||||
* This should be called whenever emitting commands.
|
||||
*/
|
||||
static inline void *
|
||||
crocus_get_command_space(struct crocus_batch *batch, unsigned bytes)
|
||||
{
|
||||
crocus_require_command_space(batch, bytes);
|
||||
void *map = batch->command.map_next;
|
||||
batch->command.map_next += bytes;
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to emit GPU commands - allocates space, copies them there.
|
||||
*/
|
||||
static inline void
|
||||
crocus_batch_emit(struct crocus_batch *batch, const void *data, unsigned size)
|
||||
{
|
||||
void *map = crocus_get_command_space(batch, size);
|
||||
memcpy(map, data, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a pointer to the batch's signalling syncobj. Does not refcount.
|
||||
*/
|
||||
static inline struct crocus_syncobj *
|
||||
crocus_batch_get_signal_syncobj(struct crocus_batch *batch)
|
||||
{
|
||||
/* The signalling syncobj is the first one in the list. */
|
||||
struct crocus_syncobj *syncobj =
|
||||
((struct crocus_syncobj **)util_dynarray_begin(&batch->syncobjs))[0];
|
||||
return syncobj;
|
||||
}
|
||||
|
||||
/**
|
||||
* Take a reference to the batch's signalling syncobj.
|
||||
*
|
||||
* Callers can use this to wait for the the current batch under construction
|
||||
* to complete (after flushing it).
|
||||
*/
|
||||
static inline void
|
||||
crocus_batch_reference_signal_syncobj(struct crocus_batch *batch,
|
||||
struct crocus_syncobj **out_syncobj)
|
||||
{
|
||||
struct crocus_syncobj *syncobj = crocus_batch_get_signal_syncobj(batch);
|
||||
crocus_syncobj_reference(batch->screen, out_syncobj, syncobj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
|
||||
*/
|
||||
static inline void
|
||||
crocus_record_state_size(struct hash_table_u64 *ht, uint32_t offset_from_base,
|
||||
uint32_t size)
|
||||
{
|
||||
if (ht) {
|
||||
_mesa_hash_table_u64_insert(ht, offset_from_base,
|
||||
(void *)(uintptr_t)size);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
crocus_ptr_in_state_buffer(struct crocus_batch *batch, void *p)
|
||||
{
|
||||
return (char *)p >= (char *)batch->state.map &&
|
||||
(char *)p < (char *)batch->state.map + batch->state.bo->size;
|
||||
}
|
||||
|
||||
static inline void
|
||||
crocus_require_statebuffer_space(struct crocus_batch *batch, int size)
|
||||
{
|
||||
if (batch->state.used + size >= STATE_SZ)
|
||||
crocus_batch_flush(batch);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,836 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_surface.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "intel/blorp/blorp.h"
|
||||
#include "crocus_context.h"
|
||||
#include "crocus_resource.h"
|
||||
#include "crocus_screen.h"
|
||||
|
||||
void crocus_blitter_begin(struct crocus_context *ice, enum crocus_blitter_op op, bool render_cond)
|
||||
{
|
||||
util_blitter_save_vertex_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_VERTEX]);
|
||||
util_blitter_save_tessctrl_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL]);
|
||||
util_blitter_save_tesseval_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]);
|
||||
util_blitter_save_geometry_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]);
|
||||
util_blitter_save_so_targets(ice->blitter, ice->state.so_targets,
|
||||
(struct pipe_stream_output_target**)ice->state.so_target);
|
||||
util_blitter_save_vertex_buffer_slot(ice->blitter, ice->state.vertex_buffers);
|
||||
util_blitter_save_vertex_elements(ice->blitter, (void *)ice->state.cso_vertex_elements);
|
||||
if (op & CROCUS_SAVE_FRAGMENT_STATE) {
|
||||
util_blitter_save_blend(ice->blitter, ice->state.cso_blend);
|
||||
util_blitter_save_depth_stencil_alpha(ice->blitter, ice->state.cso_zsa);
|
||||
util_blitter_save_stencil_ref(ice->blitter, &ice->state.stencil_ref);
|
||||
util_blitter_save_fragment_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]);
|
||||
util_blitter_save_sample_mask(ice->blitter, ice->state.sample_mask);
|
||||
util_blitter_save_rasterizer(ice->blitter, ice->state.cso_rast);
|
||||
util_blitter_save_scissor(ice->blitter, &ice->state.scissors[0]);
|
||||
util_blitter_save_viewport(ice->blitter, &ice->state.viewports[0]);
|
||||
util_blitter_save_fragment_constant_buffer_slot(ice->blitter, &ice->state.shaders[MESA_SHADER_FRAGMENT].constbufs[0]);
|
||||
}
|
||||
|
||||
if (!render_cond)
|
||||
util_blitter_save_render_condition(ice->blitter,
|
||||
(struct pipe_query *)ice->condition.query,
|
||||
ice->condition.condition,
|
||||
ice->condition.mode);
|
||||
|
||||
// util_blitter_save_scissor(ice->blitter, &ice->scissors[0]);
|
||||
if (op & CROCUS_SAVE_FRAMEBUFFER)
|
||||
util_blitter_save_framebuffer(ice->blitter, &ice->state.framebuffer);
|
||||
|
||||
if (op & CROCUS_SAVE_TEXTURES) {
|
||||
util_blitter_save_fragment_sampler_states(ice->blitter, 1, (void **)ice->state.shaders[MESA_SHADER_FRAGMENT].samplers);
|
||||
util_blitter_save_fragment_sampler_views(ice->blitter, 1, (struct pipe_sampler_view **)ice->state.shaders[MESA_SHADER_FRAGMENT].textures);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for handling mirror image blits.
|
||||
*
|
||||
* If coord0 > coord1, swap them and return "true" (mirrored).
|
||||
*/
|
||||
static bool
|
||||
apply_mirror(float *coord0, float *coord1)
|
||||
{
|
||||
if (*coord0 > *coord1) {
|
||||
float tmp = *coord0;
|
||||
*coord0 = *coord1;
|
||||
*coord1 = tmp;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the number of pixels to clip for each side of a rect
|
||||
*
|
||||
* \param x0 The rect's left coordinate
|
||||
* \param y0 The rect's bottom coordinate
|
||||
* \param x1 The rect's right coordinate
|
||||
* \param y1 The rect's top coordinate
|
||||
* \param min_x The clipping region's left coordinate
|
||||
* \param min_y The clipping region's bottom coordinate
|
||||
* \param max_x The clipping region's right coordinate
|
||||
* \param max_y The clipping region's top coordinate
|
||||
* \param clipped_x0 The number of pixels to clip from the left side
|
||||
* \param clipped_y0 The number of pixels to clip from the bottom side
|
||||
* \param clipped_x1 The number of pixels to clip from the right side
|
||||
* \param clipped_y1 The number of pixels to clip from the top side
|
||||
*
|
||||
* \return false if we clip everything away, true otherwise
|
||||
*/
|
||||
static inline bool
|
||||
compute_pixels_clipped(float x0, float y0, float x1, float y1,
|
||||
float min_x, float min_y, float max_x, float max_y,
|
||||
float *clipped_x0, float *clipped_y0,
|
||||
float *clipped_x1, float *clipped_y1)
|
||||
{
|
||||
/* If we are going to clip everything away, stop. */
|
||||
if (!(min_x <= max_x &&
|
||||
min_y <= max_y &&
|
||||
x0 <= max_x &&
|
||||
y0 <= max_y &&
|
||||
min_x <= x1 &&
|
||||
min_y <= y1 &&
|
||||
x0 <= x1 &&
|
||||
y0 <= y1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (x0 < min_x)
|
||||
*clipped_x0 = min_x - x0;
|
||||
else
|
||||
*clipped_x0 = 0;
|
||||
if (max_x < x1)
|
||||
*clipped_x1 = x1 - max_x;
|
||||
else
|
||||
*clipped_x1 = 0;
|
||||
|
||||
if (y0 < min_y)
|
||||
*clipped_y0 = min_y - y0;
|
||||
else
|
||||
*clipped_y0 = 0;
|
||||
if (max_y < y1)
|
||||
*clipped_y1 = y1 - max_y;
|
||||
else
|
||||
*clipped_y1 = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clips a coordinate (left, right, top or bottom) for the src or dst rect
|
||||
* (whichever requires the largest clip) and adjusts the coordinate
|
||||
* for the other rect accordingly.
|
||||
*
|
||||
* \param mirror true if mirroring is required
|
||||
* \param src the source rect coordinate (for example src_x0)
|
||||
* \param dst0 the dst rect coordinate (for example dst_x0)
|
||||
* \param dst1 the opposite dst rect coordinate (for example dst_x1)
|
||||
* \param clipped_dst0 number of pixels to clip from the dst coordinate
|
||||
* \param clipped_dst1 number of pixels to clip from the opposite dst coordinate
|
||||
* \param scale the src vs dst scale involved for that coordinate
|
||||
* \param is_left_or_bottom true if we are clipping the left or bottom sides
|
||||
* of the rect.
|
||||
*/
|
||||
static void
|
||||
clip_coordinates(bool mirror,
|
||||
float *src, float *dst0, float *dst1,
|
||||
float clipped_dst0,
|
||||
float clipped_dst1,
|
||||
float scale,
|
||||
bool is_left_or_bottom)
|
||||
{
|
||||
/* When clipping we need to add or subtract pixels from the original
|
||||
* coordinates depending on whether we are acting on the left/bottom
|
||||
* or right/top sides of the rect respectively. We assume we have to
|
||||
* add them in the code below, and multiply by -1 when we should
|
||||
* subtract.
|
||||
*/
|
||||
int mult = is_left_or_bottom ? 1 : -1;
|
||||
|
||||
if (!mirror) {
|
||||
*dst0 += clipped_dst0 * mult;
|
||||
*src += clipped_dst0 * scale * mult;
|
||||
} else {
|
||||
*dst1 -= clipped_dst1 * mult;
|
||||
*src += clipped_dst1 * scale * mult;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a scissor rectangle to blit coordinates.
|
||||
*
|
||||
* Returns true if the blit was entirely scissored away.
|
||||
*/
|
||||
static bool
|
||||
apply_blit_scissor(const struct pipe_scissor_state *scissor,
|
||||
float *src_x0, float *src_y0,
|
||||
float *src_x1, float *src_y1,
|
||||
float *dst_x0, float *dst_y0,
|
||||
float *dst_x1, float *dst_y1,
|
||||
bool mirror_x, bool mirror_y)
|
||||
{
|
||||
float clip_dst_x0, clip_dst_x1, clip_dst_y0, clip_dst_y1;
|
||||
|
||||
/* Compute number of pixels to scissor away. */
|
||||
if (!compute_pixels_clipped(*dst_x0, *dst_y0, *dst_x1, *dst_y1,
|
||||
scissor->minx, scissor->miny,
|
||||
scissor->maxx, scissor->maxy,
|
||||
&clip_dst_x0, &clip_dst_y0,
|
||||
&clip_dst_x1, &clip_dst_y1))
|
||||
return true;
|
||||
|
||||
// XXX: comments assume source clipping, which we don't do
|
||||
|
||||
/* When clipping any of the two rects we need to adjust the coordinates
|
||||
* in the other rect considering the scaling factor involved. To obtain
|
||||
* the best precision we want to make sure that we only clip once per
|
||||
* side to avoid accumulating errors due to the scaling adjustment.
|
||||
*
|
||||
* For example, if src_x0 and dst_x0 need both to be clipped we want to
|
||||
* avoid the situation where we clip src_x0 first, then adjust dst_x0
|
||||
* accordingly but then we realize that the resulting dst_x0 still needs
|
||||
* to be clipped, so we clip dst_x0 and adjust src_x0 again. Because we are
|
||||
* applying scaling factors to adjust the coordinates in each clipping
|
||||
* pass we lose some precision and that can affect the results of the
|
||||
* blorp blit operation slightly. What we want to do here is detect the
|
||||
* rect that we should clip first for each side so that when we adjust
|
||||
* the other rect we ensure the resulting coordinate does not need to be
|
||||
* clipped again.
|
||||
*
|
||||
* The code below implements this by comparing the number of pixels that
|
||||
* we need to clip for each side of both rects considering the scales
|
||||
* involved. For example, clip_src_x0 represents the number of pixels
|
||||
* to be clipped for the src rect's left side, so if clip_src_x0 = 5,
|
||||
* clip_dst_x0 = 4 and scale_x = 2 it means that we are clipping more
|
||||
* from the dst rect so we should clip dst_x0 only and adjust src_x0.
|
||||
* This is because clipping 4 pixels in the dst is equivalent to
|
||||
* clipping 4 * 2 = 8 > 5 in the src.
|
||||
*/
|
||||
|
||||
if (*src_x0 == *src_x1 || *src_y0 == *src_y1
|
||||
|| *dst_x0 == *dst_x1 || *dst_y0 == *dst_y1)
|
||||
return true;
|
||||
|
||||
float scale_x = (float) (*src_x1 - *src_x0) / (*dst_x1 - *dst_x0);
|
||||
float scale_y = (float) (*src_y1 - *src_y0) / (*dst_y1 - *dst_y0);
|
||||
|
||||
/* Clip left side */
|
||||
clip_coordinates(mirror_x, src_x0, dst_x0, dst_x1,
|
||||
clip_dst_x0, clip_dst_x1, scale_x, true);
|
||||
|
||||
/* Clip right side */
|
||||
clip_coordinates(mirror_x, src_x1, dst_x1, dst_x0,
|
||||
clip_dst_x1, clip_dst_x0, scale_x, false);
|
||||
|
||||
/* Clip bottom side */
|
||||
clip_coordinates(mirror_y, src_y0, dst_y0, dst_y1,
|
||||
clip_dst_y0, clip_dst_y1, scale_y, true);
|
||||
|
||||
/* Clip top side */
|
||||
clip_coordinates(mirror_y, src_y1, dst_y1, dst_y0,
|
||||
clip_dst_y1, clip_dst_y0, scale_y, false);
|
||||
|
||||
/* Check for invalid bounds
|
||||
* Can't blit for 0-dimensions
|
||||
*/
|
||||
return *src_x0 == *src_x1 || *src_y0 == *src_y1
|
||||
|| *dst_x0 == *dst_x1 || *dst_y0 == *dst_y1;
|
||||
}
|
||||
|
||||
void
|
||||
crocus_blorp_surf_for_resource(struct crocus_vtable *vtbl,
|
||||
struct isl_device *isl_dev,
|
||||
struct blorp_surf *surf,
|
||||
struct pipe_resource *p_res,
|
||||
enum isl_aux_usage aux_usage,
|
||||
unsigned level,
|
||||
bool is_render_target)
|
||||
{
|
||||
struct crocus_resource *res = (void *) p_res;
|
||||
|
||||
assert(!crocus_resource_unfinished_aux_import(res));
|
||||
|
||||
if (isl_aux_usage_has_hiz(aux_usage) &&
|
||||
!crocus_resource_level_has_hiz(res, level))
|
||||
aux_usage = ISL_AUX_USAGE_NONE;
|
||||
|
||||
*surf = (struct blorp_surf) {
|
||||
.surf = &res->surf,
|
||||
.addr = (struct blorp_address) {
|
||||
.buffer = res->bo,
|
||||
.offset = res->offset,
|
||||
.reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
|
||||
.mocs = crocus_mocs(res->bo, isl_dev),
|
||||
},
|
||||
.aux_usage = aux_usage,
|
||||
};
|
||||
|
||||
if (aux_usage != ISL_AUX_USAGE_NONE) {
|
||||
surf->aux_surf = &res->aux.surf;
|
||||
surf->aux_addr = (struct blorp_address) {
|
||||
.buffer = res->aux.bo,
|
||||
.offset = res->aux.offset,
|
||||
.reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
|
||||
.mocs = crocus_mocs(res->bo, isl_dev),
|
||||
};
|
||||
surf->clear_color =
|
||||
crocus_resource_get_clear_color(res);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tex_cache_flush_hack(struct crocus_batch *batch,
|
||||
enum isl_format view_format,
|
||||
enum isl_format surf_format)
|
||||
{
|
||||
/* The WaSamplerCacheFlushBetweenRedescribedSurfaceReads workaround says:
|
||||
*
|
||||
* "Currently Sampler assumes that a surface would not have two
|
||||
* different format associate with it. It will not properly cache
|
||||
* the different views in the MT cache, causing a data corruption."
|
||||
*
|
||||
* We may need to handle this for texture views in general someday, but
|
||||
* for now we handle it here, as it hurts copies and blits particularly
|
||||
* badly because they ofter reinterpret formats.
|
||||
*
|
||||
* If the BO hasn't been referenced yet this batch, we assume that the
|
||||
* texture cache doesn't contain any relevant data nor need flushing.
|
||||
*
|
||||
* Icelake (Gen11+) claims to fix this issue, but seems to still have
|
||||
* issues with ASTC formats.
|
||||
*/
|
||||
bool need_flush = view_format != surf_format;
|
||||
if (!need_flush)
|
||||
return;
|
||||
|
||||
const char *reason =
|
||||
"workaround: WaSamplerCacheFlushBetweenRedescribedSurfaceReads";
|
||||
|
||||
crocus_emit_pipe_control_flush(batch, reason, PIPE_CONTROL_CS_STALL);
|
||||
crocus_emit_pipe_control_flush(batch, reason,
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
|
||||
}
|
||||
|
||||
static struct crocus_resource *
|
||||
crocus_resource_for_aspect(const struct intel_device_info *devinfo,
|
||||
struct pipe_resource *p_res, unsigned pipe_mask)
|
||||
{
|
||||
if (pipe_mask == PIPE_MASK_S) {
|
||||
struct crocus_resource *junk, *s_res;
|
||||
crocus_get_depth_stencil_resources(devinfo, p_res, &junk, &s_res);
|
||||
return s_res;
|
||||
} else {
|
||||
return (struct crocus_resource *)p_res;
|
||||
}
|
||||
}
|
||||
|
||||
static enum pipe_format
|
||||
pipe_format_for_aspect(enum pipe_format format, unsigned pipe_mask)
|
||||
{
|
||||
if (pipe_mask == PIPE_MASK_S) {
|
||||
return util_format_stencil_only(format);
|
||||
} else if (pipe_mask == PIPE_MASK_Z) {
|
||||
return util_format_get_depth_only(format);
|
||||
} else {
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_u_blitter(struct crocus_context *ice,
|
||||
const struct pipe_blit_info *info)
|
||||
{
|
||||
struct pipe_blit_info dinfo = *info;
|
||||
if (!util_format_has_alpha(dinfo.dst.resource->format))
|
||||
dinfo.mask &= ~PIPE_MASK_A;
|
||||
crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);
|
||||
util_blitter_blit(ice->blitter, &dinfo);
|
||||
}
|
||||
|
||||
/**
|
||||
* The pipe->blit() driver hook.
|
||||
*
|
||||
* This performs a blit between two surfaces, which copies data but may
|
||||
* also perform format conversion, scaling, flipping, and so on.
|
||||
*/
|
||||
static void
|
||||
crocus_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
enum blorp_batch_flags blorp_flags = 0;
|
||||
|
||||
/* We don't support color masking. */
|
||||
assert((info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA ||
|
||||
(info->mask & PIPE_MASK_RGBA) == 0);
|
||||
|
||||
if (info->render_condition_enable)
|
||||
if (!crocus_check_conditional_render(ice))
|
||||
return;
|
||||
|
||||
if (devinfo->ver <= 5) {
|
||||
if (!screen->vtbl.blit_blt(batch, info)) {
|
||||
|
||||
if (!util_format_is_depth_or_stencil(info->src.resource->format) &&
|
||||
info->dst.resource->target != PIPE_TEXTURE_3D)
|
||||
goto use_blorp;
|
||||
|
||||
if (!util_blitter_is_blit_supported(ice->blitter, info)) {
|
||||
if (util_format_is_depth_or_stencil(info->src.resource->format)) {
|
||||
|
||||
struct pipe_blit_info depth_blit = *info;
|
||||
depth_blit.mask = PIPE_MASK_Z;
|
||||
crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);
|
||||
util_blitter_blit(ice->blitter, &depth_blit);
|
||||
|
||||
struct pipe_surface *dst_view, dst_templ;
|
||||
util_blitter_default_dst_texture(&dst_templ, info->dst.resource, info->dst.level, info->dst.box.z);
|
||||
dst_view = ctx->create_surface(ctx, info->dst.resource, &dst_templ);
|
||||
|
||||
crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);
|
||||
|
||||
util_blitter_clear_depth_stencil(ice->blitter, dst_view, PIPE_CLEAR_STENCIL,
|
||||
0, 0, info->dst.box.x, info->dst.box.y,
|
||||
info->dst.box.width, info->dst.box.height);
|
||||
crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);
|
||||
util_blitter_stencil_fallback(ice->blitter,
|
||||
info->dst.resource,
|
||||
info->dst.level,
|
||||
&info->dst.box,
|
||||
info->src.resource,
|
||||
info->src.level,
|
||||
&info->src.box, NULL);
|
||||
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
crocus_u_blitter(ice, info);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (devinfo->ver == 6) {
|
||||
if (info->src.resource->target == PIPE_TEXTURE_3D &&
|
||||
info->dst.resource->target == PIPE_TEXTURE_3D) {
|
||||
crocus_u_blitter(ice, info);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
use_blorp:
|
||||
if (info->render_condition_enable) {
|
||||
if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT)
|
||||
blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;
|
||||
}
|
||||
|
||||
float src_x0 = info->src.box.x;
|
||||
float src_x1 = info->src.box.x + info->src.box.width;
|
||||
float src_y0 = info->src.box.y;
|
||||
float src_y1 = info->src.box.y + info->src.box.height;
|
||||
float dst_x0 = info->dst.box.x;
|
||||
float dst_x1 = info->dst.box.x + info->dst.box.width;
|
||||
float dst_y0 = info->dst.box.y;
|
||||
float dst_y1 = info->dst.box.y + info->dst.box.height;
|
||||
bool mirror_x = apply_mirror(&src_x0, &src_x1);
|
||||
bool mirror_y = apply_mirror(&src_y0, &src_y1);
|
||||
enum blorp_filter filter;
|
||||
|
||||
if (info->scissor_enable) {
|
||||
bool noop = apply_blit_scissor(&info->scissor,
|
||||
&src_x0, &src_y0, &src_x1, &src_y1,
|
||||
&dst_x0, &dst_y0, &dst_x1, &dst_y1,
|
||||
mirror_x, mirror_y);
|
||||
if (noop)
|
||||
return;
|
||||
}
|
||||
|
||||
if (abs(info->dst.box.width) == abs(info->src.box.width) &&
|
||||
abs(info->dst.box.height) == abs(info->src.box.height)) {
|
||||
if (info->src.resource->nr_samples > 1 &&
|
||||
info->dst.resource->nr_samples <= 1) {
|
||||
/* The OpenGL ES 3.2 specification, section 16.2.1, says:
|
||||
*
|
||||
* "If the read framebuffer is multisampled (its effective
|
||||
* value of SAMPLE_BUFFERS is one) and the draw framebuffer
|
||||
* is not (its value of SAMPLE_BUFFERS is zero), the samples
|
||||
* corresponding to each pixel location in the source are
|
||||
* converted to a single sample before being written to the
|
||||
* destination. The filter parameter is ignored. If the
|
||||
* source formats are integer types or stencil values, a
|
||||
* single sample’s value is selected for each pixel. If the
|
||||
* source formats are floating-point or normalized types,
|
||||
* the sample values for each pixel are resolved in an
|
||||
* implementation-dependent manner. If the source formats
|
||||
* are depth values, sample values are resolved in an
|
||||
* implementation-dependent manner where the result will be
|
||||
* between the minimum and maximum depth values in the pixel."
|
||||
*
|
||||
* When selecting a single sample, we always choose sample 0.
|
||||
*/
|
||||
if (util_format_is_depth_or_stencil(info->src.format) ||
|
||||
util_format_is_pure_integer(info->src.format)) {
|
||||
filter = BLORP_FILTER_SAMPLE_0;
|
||||
} else {
|
||||
filter = BLORP_FILTER_AVERAGE;
|
||||
}
|
||||
} else {
|
||||
/* The OpenGL 4.6 specification, section 18.3.1, says:
|
||||
*
|
||||
* "If the source and destination dimensions are identical,
|
||||
* no filtering is applied."
|
||||
*
|
||||
* Using BLORP_FILTER_NONE will also handle the upsample case by
|
||||
* replicating the one value in the source to all values in the
|
||||
* destination.
|
||||
*/
|
||||
filter = BLORP_FILTER_NONE;
|
||||
}
|
||||
} else if (info->filter == PIPE_TEX_FILTER_LINEAR) {
|
||||
filter = BLORP_FILTER_BILINEAR;
|
||||
} else {
|
||||
filter = BLORP_FILTER_NEAREST;
|
||||
}
|
||||
|
||||
struct blorp_batch blorp_batch;
|
||||
blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
|
||||
|
||||
float src_z_step = (float)info->src.box.depth / (float)info->dst.box.depth;
|
||||
|
||||
/* There is no interpolation to the pixel center during rendering, so
|
||||
* add the 0.5 offset ourselves here.
|
||||
*/
|
||||
float depth_center_offset = 0;
|
||||
if (info->src.resource->target == PIPE_TEXTURE_3D)
|
||||
depth_center_offset = 0.5 / info->dst.box.depth * info->src.box.depth;
|
||||
|
||||
/* Perform a blit for each aspect requested by the caller. PIPE_MASK_R is
|
||||
* used to represent the color aspect. */
|
||||
unsigned aspect_mask = info->mask & (PIPE_MASK_R | PIPE_MASK_ZS);
|
||||
while (aspect_mask) {
|
||||
unsigned aspect = 1 << u_bit_scan(&aspect_mask);
|
||||
|
||||
struct crocus_resource *src_res =
|
||||
crocus_resource_for_aspect(devinfo, info->src.resource, aspect);
|
||||
struct crocus_resource *dst_res =
|
||||
crocus_resource_for_aspect(devinfo, info->dst.resource, aspect);
|
||||
|
||||
enum pipe_format src_pfmt =
|
||||
pipe_format_for_aspect(info->src.format, aspect);
|
||||
enum pipe_format dst_pfmt =
|
||||
pipe_format_for_aspect(info->dst.format, aspect);
|
||||
|
||||
if (crocus_resource_unfinished_aux_import(src_res))
|
||||
crocus_resource_finish_aux_import(ctx->screen, src_res);
|
||||
if (crocus_resource_unfinished_aux_import(dst_res))
|
||||
crocus_resource_finish_aux_import(ctx->screen, dst_res);
|
||||
|
||||
struct crocus_format_info src_fmt =
|
||||
crocus_format_for_usage(devinfo, src_pfmt, ISL_SURF_USAGE_TEXTURE_BIT);
|
||||
enum isl_aux_usage src_aux_usage =
|
||||
crocus_resource_texture_aux_usage(src_res);
|
||||
|
||||
crocus_resource_prepare_texture(ice, src_res, src_fmt.fmt,
|
||||
info->src.level, 1, info->src.box.z,
|
||||
info->src.box.depth);
|
||||
// crocus_emit_buffer_barrier_for(batch, src_res->bo,
|
||||
// CROCUS_DOMAIN_OTHER_READ);
|
||||
|
||||
struct crocus_format_info dst_fmt =
|
||||
crocus_format_for_usage(devinfo, dst_pfmt,
|
||||
ISL_SURF_USAGE_RENDER_TARGET_BIT);
|
||||
enum isl_aux_usage dst_aux_usage =
|
||||
crocus_resource_render_aux_usage(ice, dst_res, info->dst.level,
|
||||
dst_fmt.fmt, false);
|
||||
|
||||
struct blorp_surf src_surf, dst_surf;
|
||||
crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &src_surf,
|
||||
&src_res->base, src_aux_usage,
|
||||
info->src.level, false);
|
||||
crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &dst_surf,
|
||||
&dst_res->base, dst_aux_usage,
|
||||
info->dst.level, true);
|
||||
|
||||
crocus_resource_prepare_render(ice, dst_res, info->dst.level,
|
||||
info->dst.box.z, info->dst.box.depth,
|
||||
dst_aux_usage);
|
||||
// crocus_emit_buffer_barrier_for(batch, dst_res->bo,
|
||||
// CROCUS_DOMAIN_RENDER_WRITE);
|
||||
|
||||
if (crocus_batch_references(batch, src_res->bo))
|
||||
tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format);
|
||||
|
||||
if (dst_res->base.target == PIPE_BUFFER) {
|
||||
util_range_add(&dst_res->base, &dst_res->valid_buffer_range,
|
||||
dst_x0, dst_x1);
|
||||
}
|
||||
|
||||
struct isl_swizzle src_swiz = pipe_to_isl_swizzles(src_fmt.swizzles);
|
||||
struct isl_swizzle dst_swiz = pipe_to_isl_swizzles(dst_fmt.swizzles);
|
||||
|
||||
for (int slice = 0; slice < info->dst.box.depth; slice++) {
|
||||
unsigned dst_z = info->dst.box.z + slice;
|
||||
float src_z = info->src.box.z + slice * src_z_step +
|
||||
depth_center_offset;
|
||||
|
||||
crocus_batch_maybe_flush(batch, 1500);
|
||||
|
||||
blorp_blit(&blorp_batch,
|
||||
&src_surf, info->src.level, src_z,
|
||||
src_fmt.fmt, src_swiz,
|
||||
&dst_surf, info->dst.level, dst_z,
|
||||
dst_fmt.fmt, dst_swiz,
|
||||
src_x0, src_y0, src_x1, src_y1,
|
||||
dst_x0, dst_y0, dst_x1, dst_y1,
|
||||
filter, mirror_x, mirror_y);
|
||||
|
||||
}
|
||||
|
||||
tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format);
|
||||
|
||||
crocus_resource_finish_render(ice, dst_res, info->dst.level,
|
||||
info->dst.box.z, info->dst.box.depth,
|
||||
dst_aux_usage);
|
||||
}
|
||||
|
||||
blorp_batch_finish(&blorp_batch);
|
||||
|
||||
crocus_flush_and_dirty_for_history(ice, batch, (struct crocus_resource *)
|
||||
info->dst.resource,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH,
|
||||
"cache history: post-blit");
|
||||
}
|
||||
|
||||
static void
|
||||
get_copy_region_aux_settings(struct crocus_resource *res,
|
||||
enum isl_aux_usage *out_aux_usage,
|
||||
bool is_render_target)
|
||||
{
|
||||
switch (res->aux.usage) {
|
||||
case ISL_AUX_USAGE_MCS:
|
||||
/* A stencil resolve operation must be performed prior to doing resource
|
||||
* copies or used by CPU.
|
||||
* (see HSD 1209978162)
|
||||
*/
|
||||
if (is_render_target && isl_surf_usage_is_stencil(res->surf.usage)) {
|
||||
*out_aux_usage = ISL_AUX_USAGE_NONE;
|
||||
} else {
|
||||
*out_aux_usage = res->aux.usage;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
*out_aux_usage = ISL_AUX_USAGE_NONE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a GPU-based raw memory copy between compatible view classes.
|
||||
*
|
||||
* Does not perform any flushing - the new data may still be left in the
|
||||
* render cache, and old data may remain in other caches.
|
||||
*
|
||||
* Wraps blorp_copy() and blorp_buffer_copy().
|
||||
*/
|
||||
void
|
||||
crocus_copy_region(struct blorp_context *blorp,
|
||||
struct crocus_batch *batch,
|
||||
struct pipe_resource *dst,
|
||||
unsigned dst_level,
|
||||
unsigned dstx, unsigned dsty, unsigned dstz,
|
||||
struct pipe_resource *src,
|
||||
unsigned src_level,
|
||||
const struct pipe_box *src_box)
|
||||
{
|
||||
struct blorp_batch blorp_batch;
|
||||
struct crocus_context *ice = blorp->driver_ctx;
|
||||
struct crocus_screen *screen = (void *) ice->ctx.screen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
struct crocus_resource *src_res = (void *) src;
|
||||
struct crocus_resource *dst_res = (void *) dst;
|
||||
|
||||
if (devinfo->ver <= 5) {
|
||||
if (screen->vtbl.copy_region_blt(batch, dst_res,
|
||||
dst_level, dstx, dsty, dstz,
|
||||
src_res, src_level, src_box))
|
||||
return;
|
||||
}
|
||||
enum isl_aux_usage src_aux_usage, dst_aux_usage;
|
||||
get_copy_region_aux_settings(src_res, &src_aux_usage,
|
||||
false);
|
||||
get_copy_region_aux_settings(dst_res, &dst_aux_usage,
|
||||
true);
|
||||
|
||||
if (crocus_batch_references(batch, src_res->bo))
|
||||
tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format);
|
||||
|
||||
if (dst->target == PIPE_BUFFER)
|
||||
util_range_add(&dst_res->base, &dst_res->valid_buffer_range, dstx, dstx + src_box->width);
|
||||
|
||||
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
|
||||
struct blorp_address src_addr = {
|
||||
.buffer = crocus_resource_bo(src), .offset = src_box->x,
|
||||
};
|
||||
struct blorp_address dst_addr = {
|
||||
.buffer = crocus_resource_bo(dst), .offset = dstx,
|
||||
.reloc_flags = EXEC_OBJECT_WRITE,
|
||||
};
|
||||
|
||||
crocus_batch_maybe_flush(batch, 1500);
|
||||
|
||||
blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
|
||||
blorp_buffer_copy(&blorp_batch, src_addr, dst_addr, src_box->width);
|
||||
blorp_batch_finish(&blorp_batch);
|
||||
} else {
|
||||
// XXX: what about one surface being a buffer and not the other?
|
||||
|
||||
struct blorp_surf src_surf, dst_surf;
|
||||
crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &src_surf,
|
||||
src, src_aux_usage, src_level, false);
|
||||
crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &dst_surf,
|
||||
dst, dst_aux_usage, dst_level, true);
|
||||
|
||||
crocus_resource_prepare_access(ice, src_res, src_level, 1,
|
||||
src_box->z, src_box->depth,
|
||||
src_aux_usage, false);
|
||||
crocus_resource_prepare_access(ice, dst_res, dst_level, 1,
|
||||
dstz, src_box->depth,
|
||||
dst_aux_usage, false);
|
||||
|
||||
blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
|
||||
|
||||
for (int slice = 0; slice < src_box->depth; slice++) {
|
||||
crocus_batch_maybe_flush(batch, 1500);
|
||||
|
||||
blorp_copy(&blorp_batch, &src_surf, src_level, src_box->z + slice,
|
||||
&dst_surf, dst_level, dstz + slice,
|
||||
src_box->x, src_box->y, dstx, dsty,
|
||||
src_box->width, src_box->height);
|
||||
}
|
||||
blorp_batch_finish(&blorp_batch);
|
||||
|
||||
crocus_resource_finish_write(ice, dst_res, dst_level, dstz,
|
||||
src_box->depth, dst_aux_usage);
|
||||
}
|
||||
|
||||
tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format);
|
||||
}
|
||||
|
||||
static struct crocus_batch *
|
||||
get_preferred_batch(struct crocus_context *ice, struct crocus_bo *bo)
|
||||
{
|
||||
/* If the compute batch is already using this buffer, we'd prefer to
|
||||
* continue queueing in the compute batch.
|
||||
*/
|
||||
if (crocus_batch_references(&ice->batches[CROCUS_BATCH_COMPUTE], bo))
|
||||
return &ice->batches[CROCUS_BATCH_COMPUTE];
|
||||
|
||||
/* Otherwise default to the render batch. */
|
||||
return &ice->batches[CROCUS_BATCH_RENDER];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The pipe->resource_copy_region() driver hook.
|
||||
*
|
||||
* This implements ARB_copy_image semantics - a raw memory copy between
|
||||
* compatible view classes.
|
||||
*/
|
||||
static void
|
||||
crocus_resource_copy_region(struct pipe_context *ctx,
|
||||
struct pipe_resource *p_dst,
|
||||
unsigned dst_level,
|
||||
unsigned dstx, unsigned dsty, unsigned dstz,
|
||||
struct pipe_resource *p_src,
|
||||
unsigned src_level,
|
||||
const struct pipe_box *src_box)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
struct crocus_resource *src = (void *) p_src;
|
||||
struct crocus_resource *dst = (void *) p_dst;
|
||||
|
||||
if (crocus_resource_unfinished_aux_import(src))
|
||||
crocus_resource_finish_aux_import(ctx->screen, src);
|
||||
if (crocus_resource_unfinished_aux_import(dst))
|
||||
crocus_resource_finish_aux_import(ctx->screen, dst);
|
||||
|
||||
/* Use MI_COPY_MEM_MEM for tiny (<= 16 byte, % 4) buffer copies. */
|
||||
if (p_src->target == PIPE_BUFFER && p_dst->target == PIPE_BUFFER &&
|
||||
(src_box->width % 4 == 0) && src_box->width <= 16 &&
|
||||
screen->vtbl.copy_mem_mem) {
|
||||
struct crocus_bo *dst_bo = crocus_resource_bo(p_dst);
|
||||
batch = get_preferred_batch(ice, dst_bo);
|
||||
crocus_batch_maybe_flush(batch, 24 + 5 * (src_box->width / 4));
|
||||
crocus_emit_pipe_control_flush(batch,
|
||||
"stall for MI_COPY_MEM_MEM copy_region",
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
screen->vtbl.copy_mem_mem(batch, dst_bo, dstx, crocus_resource_bo(p_src),
|
||||
src_box->x, src_box->width);
|
||||
return;
|
||||
}
|
||||
|
||||
if (devinfo->ver < 6 && util_format_is_depth_or_stencil(p_dst->format)) {
|
||||
util_resource_copy_region(ctx, p_dst, dst_level, dstx, dsty, dstz,
|
||||
p_src, src_level, src_box);
|
||||
return;
|
||||
}
|
||||
crocus_copy_region(&ice->blorp, batch, p_dst, dst_level, dstx, dsty, dstz,
|
||||
p_src, src_level, src_box);
|
||||
|
||||
if (util_format_is_depth_and_stencil(p_dst->format) &&
|
||||
util_format_has_stencil(util_format_description(p_src->format)) &&
|
||||
devinfo->ver >= 6) {
|
||||
struct crocus_resource *junk, *s_src_res, *s_dst_res;
|
||||
crocus_get_depth_stencil_resources(devinfo, p_src, &junk, &s_src_res);
|
||||
crocus_get_depth_stencil_resources(devinfo, p_dst, &junk, &s_dst_res);
|
||||
|
||||
crocus_copy_region(&ice->blorp, batch, &s_dst_res->base, dst_level, dstx,
|
||||
dsty, dstz, &s_src_res->base, src_level, src_box);
|
||||
}
|
||||
|
||||
crocus_flush_and_dirty_for_history(ice, batch, dst,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH,
|
||||
"cache history: post copy_region");
|
||||
}
|
||||
|
||||
void
|
||||
crocus_init_blit_functions(struct pipe_context *ctx)
|
||||
{
|
||||
ctx->blit = crocus_blit;
|
||||
ctx->resource_copy_region = crocus_resource_copy_region;
|
||||
}
|
|
@ -0,0 +1,399 @@
|
|||
/*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file crocus_blorp.c
|
||||
*
|
||||
* ============================= GENXML CODE =============================
|
||||
* [This file is compiled once per generation.]
|
||||
* =======================================================================
|
||||
*
|
||||
* GenX specific code for working with BLORP (blitting, resolves, clears
|
||||
* on the 3D engine). This provides the driver-specific hooks needed to
|
||||
* implement the BLORP API.
|
||||
*
|
||||
* See crocus_blit.c, crocus_clear.c, and so on.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "crocus_batch.h"
|
||||
#include "crocus_resource.h"
|
||||
#include "crocus_context.h"
|
||||
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "intel/common/intel_l3_config.h"
|
||||
|
||||
#include "blorp/blorp_genX_exec.h"
|
||||
|
||||
#if GFX_VER <= 5
|
||||
#include "gen4_blorp_exec.h"
|
||||
#endif
|
||||
|
||||
static uint32_t *
|
||||
stream_state(struct crocus_batch *batch,
|
||||
unsigned size,
|
||||
unsigned alignment,
|
||||
uint32_t *out_offset,
|
||||
struct crocus_bo **out_bo)
|
||||
{
|
||||
uint32_t offset = ALIGN(batch->state.used, alignment);
|
||||
|
||||
if (offset + size >= STATE_SZ && !batch->no_wrap) {
|
||||
crocus_batch_flush(batch);
|
||||
offset = ALIGN(batch->state.used, alignment);
|
||||
} else if (offset + size >= batch->state.bo->size) {
|
||||
const unsigned new_size =
|
||||
MIN2(batch->state.bo->size + batch->state.bo->size / 2,
|
||||
MAX_STATE_SIZE);
|
||||
crocus_grow_buffer(batch, true, batch->state.used, new_size);
|
||||
assert(offset + size < batch->state.bo->size);
|
||||
}
|
||||
|
||||
crocus_record_state_size(batch->state_sizes, offset, size);
|
||||
|
||||
batch->state.used = offset + size;
|
||||
*out_offset = offset;
|
||||
|
||||
/* If the caller has asked for a BO, we leave them the responsibility of
|
||||
* adding bo->gtt_offset (say, by handing an address to genxml). If not,
|
||||
* we assume they want the offset from a base address.
|
||||
*/
|
||||
if (out_bo)
|
||||
*out_bo = batch->state.bo;
|
||||
|
||||
return (uint32_t *)batch->state.map + (offset >> 2);
|
||||
}
|
||||
|
||||
static void *
|
||||
blorp_emit_dwords(struct blorp_batch *blorp_batch, unsigned n)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
return crocus_get_command_space(batch, n * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
blorp_emit_reloc(struct blorp_batch *blorp_batch, UNUSED void *location,
|
||||
struct blorp_address addr, uint32_t delta)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
uint32_t offset;
|
||||
|
||||
if (GFX_VER < 6 && crocus_ptr_in_state_buffer(batch, location)) {
|
||||
offset = (char *)location - (char *)batch->state.map;
|
||||
return crocus_state_reloc(batch, offset,
|
||||
addr.buffer, addr.offset + delta,
|
||||
addr.reloc_flags);
|
||||
}
|
||||
|
||||
assert(!crocus_ptr_in_state_buffer(batch, location));
|
||||
|
||||
offset = (char *)location - (char *)batch->command.map;
|
||||
return crocus_command_reloc(batch, offset,
|
||||
addr.buffer, addr.offset + delta,
|
||||
addr.reloc_flags);
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_surface_reloc(struct blorp_batch *blorp_batch, uint32_t ss_offset,
|
||||
struct blorp_address addr, uint32_t delta)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
struct crocus_bo *bo = addr.buffer;
|
||||
|
||||
uint64_t reloc_val =
|
||||
crocus_state_reloc(batch, ss_offset, bo, addr.offset + delta,
|
||||
addr.reloc_flags);
|
||||
|
||||
void *reloc_ptr = (void *)batch->state.map + ss_offset;
|
||||
*(uint32_t *)reloc_ptr = reloc_val;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
blorp_get_surface_address(struct blorp_batch *blorp_batch,
|
||||
struct blorp_address addr)
|
||||
{
|
||||
/* We'll let blorp_surface_reloc write the address. */
|
||||
return 0ull;
|
||||
}
|
||||
|
||||
#if GFX_VER >= 7
|
||||
static struct blorp_address
|
||||
blorp_get_surface_base_address(struct blorp_batch *blorp_batch)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
return (struct blorp_address) {
|
||||
.buffer = batch->state.bo,
|
||||
.offset = 0
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
static void *
|
||||
blorp_alloc_dynamic_state(struct blorp_batch *blorp_batch,
|
||||
uint32_t size,
|
||||
uint32_t alignment,
|
||||
uint32_t *offset)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
|
||||
return stream_state(batch, size, alignment, offset, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_alloc_binding_table(struct blorp_batch *blorp_batch,
|
||||
unsigned num_entries,
|
||||
unsigned state_size,
|
||||
unsigned state_alignment,
|
||||
uint32_t *bt_offset,
|
||||
uint32_t *surface_offsets,
|
||||
void **surface_maps)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
uint32_t *bt_map = stream_state(batch, num_entries * sizeof(uint32_t), 32,
|
||||
bt_offset, NULL);
|
||||
|
||||
for (unsigned i = 0; i < num_entries; i++) {
|
||||
surface_maps[i] = stream_state(batch,
|
||||
state_size, state_alignment,
|
||||
&(surface_offsets)[i], NULL);
|
||||
bt_map[i] = surface_offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void *
|
||||
blorp_alloc_vertex_buffer(struct blorp_batch *blorp_batch,
|
||||
uint32_t size,
|
||||
struct blorp_address *addr)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
struct crocus_bo *bo;
|
||||
uint32_t offset;
|
||||
|
||||
void *map = stream_state(batch, size, 64,
|
||||
&offset, &bo);
|
||||
|
||||
*addr = (struct blorp_address) {
|
||||
.buffer = bo,
|
||||
.offset = offset,
|
||||
.reloc_flags = RELOC_32BIT,
|
||||
#if GFX_VER >= 7
|
||||
.mocs = crocus_mocs(bo, &batch->screen->isl_dev),
|
||||
#endif
|
||||
};
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
*/
|
||||
static void
|
||||
blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch,
|
||||
const struct blorp_address *addrs,
|
||||
UNUSED uint32_t *sizes,
|
||||
unsigned num_vbs)
|
||||
{
|
||||
}
|
||||
|
||||
static struct blorp_address
|
||||
blorp_get_workaround_address(struct blorp_batch *blorp_batch)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
|
||||
return (struct blorp_address) {
|
||||
.buffer = batch->ice->workaround_bo,
|
||||
.offset = batch->ice->workaround_offset,
|
||||
};
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_flush_range(UNUSED struct blorp_batch *blorp_batch,
|
||||
UNUSED void *start,
|
||||
UNUSED size_t size)
|
||||
{
|
||||
/* All allocated states come from the batch which we will flush before we
|
||||
* submit it. There's nothing for us to do here.
|
||||
*/
|
||||
}
|
||||
|
||||
#if GFX_VER >= 7
|
||||
static const struct intel_l3_config *
|
||||
blorp_get_l3_config(struct blorp_batch *blorp_batch)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
return batch->screen->l3_config_3d;
|
||||
}
|
||||
#else /* GFX_VER < 7 */
|
||||
static void
|
||||
blorp_emit_urb_config(struct blorp_batch *blorp_batch,
|
||||
unsigned vs_entry_size,
|
||||
UNUSED unsigned sf_entry_size)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
#if GFX_VER <= 5
|
||||
batch->screen->vtbl.calculate_urb_fence(batch, 0, vs_entry_size, sf_entry_size);
|
||||
#else
|
||||
genX(upload_urb)(batch, vs_entry_size, false, vs_entry_size);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
crocus_blorp_exec(struct blorp_batch *blorp_batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
struct crocus_context *ice = blorp_batch->blorp->driver_ctx;
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
|
||||
/* Flush the sampler and render caches. We definitely need to flush the
|
||||
* sampler cache so that we get updated contents from the render cache for
|
||||
* the glBlitFramebuffer() source. Also, we are sometimes warned in the
|
||||
* docs to flush the cache between reinterpretations of the same surface
|
||||
* data with different formats, which blorp does for stencil and depth
|
||||
* data.
|
||||
*/
|
||||
if (params->src.enabled)
|
||||
crocus_cache_flush_for_read(batch, params->src.addr.buffer);
|
||||
if (params->dst.enabled) {
|
||||
crocus_cache_flush_for_render(batch, params->dst.addr.buffer,
|
||||
params->dst.view.format,
|
||||
params->dst.aux_usage);
|
||||
}
|
||||
if (params->depth.enabled)
|
||||
crocus_cache_flush_for_depth(batch, params->depth.addr.buffer);
|
||||
if (params->stencil.enabled)
|
||||
crocus_cache_flush_for_depth(batch, params->stencil.addr.buffer);
|
||||
|
||||
crocus_require_command_space(batch, 1400);
|
||||
crocus_require_statebuffer_space(batch, 600);
|
||||
batch->no_wrap = true;
|
||||
#if GFX_VER == 6
|
||||
/* Emit workaround flushes when we switch from drawing to blorping. */
|
||||
crocus_emit_post_sync_nonzero_flush(batch);
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 6
|
||||
crocus_emit_depth_stall_flushes(batch);
|
||||
#endif
|
||||
|
||||
blorp_emit(blorp_batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
|
||||
rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
|
||||
rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
|
||||
}
|
||||
|
||||
batch->screen->vtbl.update_surface_base_address(batch);
|
||||
crocus_handle_always_flush_cache(batch);
|
||||
|
||||
batch->contains_draw = true;
|
||||
blorp_exec(blorp_batch, params);
|
||||
|
||||
batch->no_wrap = false;
|
||||
crocus_handle_always_flush_cache(batch);
|
||||
|
||||
/* We've smashed all state compared to what the normal 3D pipeline
|
||||
* rendering tracks for GL.
|
||||
*/
|
||||
|
||||
uint64_t skip_bits = (CROCUS_DIRTY_POLYGON_STIPPLE |
|
||||
CROCUS_DIRTY_GEN7_SO_BUFFERS |
|
||||
CROCUS_DIRTY_SO_DECL_LIST |
|
||||
CROCUS_DIRTY_LINE_STIPPLE |
|
||||
CROCUS_ALL_DIRTY_FOR_COMPUTE |
|
||||
CROCUS_DIRTY_GEN6_SCISSOR_RECT |
|
||||
CROCUS_DIRTY_GEN75_VF |
|
||||
CROCUS_DIRTY_SF_CL_VIEWPORT);
|
||||
|
||||
uint64_t skip_stage_bits = (CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE |
|
||||
CROCUS_STAGE_DIRTY_UNCOMPILED_VS |
|
||||
CROCUS_STAGE_DIRTY_UNCOMPILED_TCS |
|
||||
CROCUS_STAGE_DIRTY_UNCOMPILED_TES |
|
||||
CROCUS_STAGE_DIRTY_UNCOMPILED_GS |
|
||||
CROCUS_STAGE_DIRTY_UNCOMPILED_FS |
|
||||
CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS |
|
||||
CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS |
|
||||
CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES |
|
||||
CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS);
|
||||
|
||||
if (!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) {
|
||||
/* BLORP disabled tessellation, that's fine for the next draw */
|
||||
skip_stage_bits |= CROCUS_STAGE_DIRTY_TCS |
|
||||
CROCUS_STAGE_DIRTY_TES |
|
||||
CROCUS_STAGE_DIRTY_CONSTANTS_TCS |
|
||||
CROCUS_STAGE_DIRTY_CONSTANTS_TES |
|
||||
CROCUS_STAGE_DIRTY_BINDINGS_TCS |
|
||||
CROCUS_STAGE_DIRTY_BINDINGS_TES;
|
||||
}
|
||||
|
||||
if (!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) {
|
||||
/* BLORP disabled geometry shaders, that's fine for the next draw */
|
||||
skip_stage_bits |= CROCUS_STAGE_DIRTY_GS |
|
||||
CROCUS_STAGE_DIRTY_CONSTANTS_GS |
|
||||
CROCUS_STAGE_DIRTY_BINDINGS_GS;
|
||||
}
|
||||
|
||||
/* we can skip flagging CROCUS_DIRTY_DEPTH_BUFFER, if
|
||||
* BLORP_BATCH_NO_EMIT_DEPTH_STENCIL is set.
|
||||
*/
|
||||
if (blorp_batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL)
|
||||
skip_bits |= CROCUS_DIRTY_DEPTH_BUFFER;
|
||||
|
||||
if (!params->wm_prog_data)
|
||||
skip_bits |= CROCUS_DIRTY_GEN6_BLEND_STATE;
|
||||
|
||||
ice->state.dirty |= ~skip_bits;
|
||||
ice->state.stage_dirty |= ~skip_stage_bits;
|
||||
|
||||
ice->urb.vsize = 0;
|
||||
ice->urb.gs_present = false;
|
||||
ice->urb.gsize = 0;
|
||||
ice->urb.tess_present = false;
|
||||
ice->urb.hsize = 0;
|
||||
ice->urb.dsize = 0;
|
||||
|
||||
if (params->dst.enabled) {
|
||||
crocus_render_cache_add_bo(batch, params->dst.addr.buffer,
|
||||
params->dst.view.format,
|
||||
params->dst.aux_usage);
|
||||
}
|
||||
if (params->depth.enabled)
|
||||
crocus_depth_cache_add_bo(batch, params->depth.addr.buffer);
|
||||
if (params->stencil.enabled)
|
||||
crocus_depth_cache_add_bo(batch, params->stencil.addr.buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_measure_start(struct blorp_batch *blorp_batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
genX(init_blorp)(struct crocus_context *ice)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
|
||||
|
||||
blorp_init(&ice->blorp, ice, &screen->isl_dev);
|
||||
ice->blorp.compiler = screen->compiler;
|
||||
ice->blorp.lookup_shader = crocus_blorp_lookup_shader;
|
||||
ice->blorp.upload_shader = crocus_blorp_upload_shader;
|
||||
ice->blorp.exec = crocus_blorp_exec;
|
||||
}
|
|
@ -0,0 +1,337 @@
|
|||
/*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* blt command encoding for gen4/5 */
|
||||
#include "crocus_context.h"
|
||||
|
||||
#include "crocus_genx_macros.h"
|
||||
#include "crocus_genx_protos.h"
|
||||
#include "crocus_resource.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_BLIT
|
||||
|
||||
#if GFX_VER <= 5
|
||||
|
||||
static bool validate_blit_for_blt(struct crocus_batch *batch,
|
||||
const struct pipe_blit_info *info)
|
||||
{
|
||||
/* If the source and destination are the same size with no mirroring,
|
||||
* the rectangles are within the size of the texture and there is no
|
||||
* scissor, then we can probably use the blit engine.
|
||||
*/
|
||||
if (info->dst.box.width != info->src.box.width ||
|
||||
info->dst.box.height != info->src.box.height)
|
||||
return false;
|
||||
|
||||
if (info->scissor_enable)
|
||||
return false;
|
||||
|
||||
if (info->dst.box.height < 0 || info->src.box.height < 0)
|
||||
return false;
|
||||
|
||||
if (info->dst.box.depth > 1 || info->src.box.depth > 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int crocus_resource_blt_pitch(struct crocus_resource *res)
|
||||
{
|
||||
int pitch = res->surf.row_pitch_B;
|
||||
if (res->surf.tiling != ISL_TILING_LINEAR)
|
||||
pitch /= 4;
|
||||
return pitch;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
color_depth_for_cpp(int cpp)
|
||||
{
|
||||
switch (cpp) {
|
||||
case 4: return COLOR_DEPTH__32bit;
|
||||
case 2: return COLOR_DEPTH__565;
|
||||
case 1: return COLOR_DEPTH__8bit;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
}
|
||||
|
||||
static bool emit_copy_blt(struct crocus_batch *batch,
|
||||
struct crocus_resource *src,
|
||||
struct crocus_resource *dst,
|
||||
unsigned cpp,
|
||||
int32_t src_pitch,
|
||||
unsigned src_offset,
|
||||
int32_t dst_pitch,
|
||||
unsigned dst_offset,
|
||||
uint16_t src_x, uint16_t src_y,
|
||||
uint16_t dst_x, uint16_t dst_y,
|
||||
uint16_t w, uint16_t h)
|
||||
|
||||
{
|
||||
uint32_t src_tile_w, src_tile_h;
|
||||
uint32_t dst_tile_w, dst_tile_h;
|
||||
int dst_y2 = dst_y + h;
|
||||
int dst_x2 = dst_x + w;
|
||||
|
||||
DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
|
||||
__func__,
|
||||
src, src_pitch, src_offset, src_x, src_y,
|
||||
dst, dst_pitch, dst_offset, dst_x, dst_y, w, h);
|
||||
|
||||
isl_get_tile_dims(src->surf.tiling, cpp, &src_tile_w, &src_tile_h);
|
||||
isl_get_tile_dims(dst->surf.tiling, cpp, &dst_tile_w, &dst_tile_h);
|
||||
|
||||
/* For Tiled surfaces, the pitch has to be a multiple of the Tile width
|
||||
* (X direction width of the Tile). This is ensured while allocating the
|
||||
* buffer object.
|
||||
*/
|
||||
assert(src->surf.tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
|
||||
assert(dst->surf.tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);
|
||||
|
||||
/* For big formats (such as floating point), do the copy using 16 or
|
||||
* 32bpp and multiply the coordinates.
|
||||
*/
|
||||
if (cpp > 4) {
|
||||
if (cpp % 4 == 2) {
|
||||
dst_x *= cpp / 2;
|
||||
dst_x2 *= cpp / 2;
|
||||
src_x *= cpp / 2;
|
||||
cpp = 2;
|
||||
} else {
|
||||
assert(cpp % 4 == 0);
|
||||
dst_x *= cpp / 4;
|
||||
dst_x2 *= cpp / 4;
|
||||
src_x *= cpp / 4;
|
||||
cpp = 4;
|
||||
}
|
||||
}
|
||||
|
||||
/* For tiled source and destination, pitch value should be specified
|
||||
* as a number of Dwords.
|
||||
*/
|
||||
if (dst->surf.tiling != ISL_TILING_LINEAR)
|
||||
dst_pitch /= 4;
|
||||
|
||||
if (src->surf.tiling != ISL_TILING_LINEAR)
|
||||
src_pitch /= 4;
|
||||
|
||||
assert(cpp <= 4);
|
||||
crocus_emit_cmd(batch, GENX(XY_SRC_COPY_BLT), xyblt) {
|
||||
xyblt.RasterOperation = 0xCC;
|
||||
xyblt.DestinationTilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;
|
||||
xyblt.SourceTilingEnable = src->surf.tiling != ISL_TILING_LINEAR;
|
||||
xyblt.SourceBaseAddress = ro_bo(src->bo, src_offset);
|
||||
xyblt.DestinationBaseAddress = rw_bo(dst->bo, dst_offset);
|
||||
xyblt.ColorDepth = color_depth_for_cpp(cpp);
|
||||
xyblt._32bppByteMask = cpp == 4 ? 0x3 : 0x1;
|
||||
xyblt.DestinationX1Coordinate = dst_x;
|
||||
xyblt.DestinationY1Coordinate = dst_y;
|
||||
xyblt.DestinationX2Coordinate = dst_x2;
|
||||
xyblt.DestinationY2Coordinate = dst_y2;
|
||||
xyblt.DestinationPitch = dst_pitch;
|
||||
xyblt.SourceX1Coordinate = src_x;
|
||||
xyblt.SourceY1Coordinate = src_y;
|
||||
xyblt.SourcePitch = src_pitch;
|
||||
};
|
||||
|
||||
crocus_emit_mi_flush(batch);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool crocus_emit_blt(struct crocus_batch *batch,
|
||||
struct crocus_resource *src,
|
||||
struct crocus_resource *dst,
|
||||
unsigned dst_level,
|
||||
unsigned dst_x, unsigned dst_y,
|
||||
unsigned dst_z,
|
||||
unsigned src_level,
|
||||
const struct pipe_box *src_box)
|
||||
{
|
||||
const struct isl_format_layout *src_fmtl = isl_format_get_layout(src->surf.format);
|
||||
unsigned src_cpp = src_fmtl->bpb / 8;
|
||||
const struct isl_format_layout *dst_fmtl = isl_format_get_layout(dst->surf.format);
|
||||
const unsigned dst_cpp = dst_fmtl->bpb / 8;
|
||||
uint16_t src_x, src_y;
|
||||
uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
|
||||
uint32_t src_width = src_box->width, src_height = src_box->height;
|
||||
|
||||
/* gen4/5 can't handle Y tiled blits. */
|
||||
if (src->surf.tiling == ISL_TILING_Y0 || dst->surf.tiling == ISL_TILING_Y0)
|
||||
return false;
|
||||
|
||||
if (src->surf.format != dst->surf.format)
|
||||
return false;
|
||||
|
||||
if (src_cpp != dst_cpp)
|
||||
return false;
|
||||
|
||||
src_x = src_box->x;
|
||||
src_y = src_box->y;
|
||||
|
||||
assert(src_cpp == dst_cpp);
|
||||
|
||||
crocus_resource_get_image_offset(src, src_level, src_box->z, &src_image_x,
|
||||
&src_image_y);
|
||||
if (util_format_is_compressed(src->base.format)) {
|
||||
int bw = util_format_get_blockwidth(src->base.format);
|
||||
int bh = util_format_get_blockheight(src->base.format);
|
||||
assert(src_x % bw == 0);
|
||||
assert(src_y % bh == 0);
|
||||
src_x /= (int)bw;
|
||||
src_y /= (int)bh;
|
||||
src_width = DIV_ROUND_UP(src_width, (int)bw);
|
||||
src_height = DIV_ROUND_UP(src_height, (int)bh);
|
||||
}
|
||||
|
||||
crocus_resource_get_image_offset(dst, dst_level, dst_z, &dst_image_x,
|
||||
&dst_image_y);
|
||||
if (util_format_is_compressed(dst->base.format)) {
|
||||
int bw = util_format_get_blockwidth(dst->base.format);
|
||||
int bh = util_format_get_blockheight(dst->base.format);
|
||||
assert(dst_x % bw == 0);
|
||||
assert(dst_y % bh == 0);
|
||||
dst_x /= (int)bw;
|
||||
dst_y /= (int)bh;
|
||||
}
|
||||
src_x += src_image_x;
|
||||
src_y += src_image_y;
|
||||
dst_x += dst_image_x;
|
||||
dst_y += dst_image_y;
|
||||
|
||||
/* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
|
||||
* Data Size Limitations):
|
||||
*
|
||||
* The BLT engine is capable of transferring very large quantities of
|
||||
* graphics data. Any graphics data read from and written to the
|
||||
* destination is permitted to represent a number of pixels that
|
||||
* occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
|
||||
* at the destination. The maximum number of pixels that may be
|
||||
* represented per scan line’s worth of graphics data depends on the
|
||||
* color depth.
|
||||
*
|
||||
* The blitter's pitch is a signed 16-bit integer, but measured in bytes
|
||||
* for linear surfaces and DWords for tiled surfaces. So the maximum
|
||||
* pitch is 32k linear and 128k tiled.
|
||||
*/
|
||||
if (crocus_resource_blt_pitch(src) >= 32768 ||
|
||||
crocus_resource_blt_pitch(dst) >= 32768) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* We need to split the blit into chunks that each fit within the blitter's
|
||||
* restrictions. We can't use a chunk size of 32768 because we need to
|
||||
* ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
|
||||
* a nice round power of two, big enough that performance won't suffer, and
|
||||
* small enough to guarantee everything fits.
|
||||
*/
|
||||
const uint32_t max_chunk_size = 16384;
|
||||
|
||||
for (uint32_t chunk_x = 0; chunk_x < src_width; chunk_x += max_chunk_size) {
|
||||
for (uint32_t chunk_y = 0; chunk_y < src_height; chunk_y += max_chunk_size) {
|
||||
const uint32_t chunk_w = MIN2(max_chunk_size, src_width - chunk_x);
|
||||
const uint32_t chunk_h = MIN2(max_chunk_size, src_height - chunk_y);
|
||||
|
||||
ASSERTED uint32_t z_offset_el, array_offset;
|
||||
uint32_t src_offset, src_tile_x, src_tile_y;
|
||||
isl_tiling_get_intratile_offset_el(src->surf.tiling,
|
||||
src_cpp * 8, src->surf.row_pitch_B,
|
||||
src->surf.array_pitch_el_rows,
|
||||
src_x + chunk_x, src_y + chunk_y, 0, 0,
|
||||
&src_offset,
|
||||
&src_tile_x, &src_tile_y,
|
||||
&z_offset_el, &array_offset);
|
||||
assert(z_offset_el == 0);
|
||||
assert(array_offset == 0);
|
||||
|
||||
uint32_t dst_offset, dst_tile_x, dst_tile_y;
|
||||
isl_tiling_get_intratile_offset_el(dst->surf.tiling,
|
||||
dst_cpp * 8, dst->surf.row_pitch_B,
|
||||
dst->surf.array_pitch_el_rows,
|
||||
dst_x + chunk_x, dst_y + chunk_y, 0, 0,
|
||||
&dst_offset,
|
||||
&dst_tile_x, &dst_tile_y,
|
||||
&z_offset_el, &array_offset);
|
||||
assert(z_offset_el == 0);
|
||||
assert(array_offset == 0);
|
||||
if (!emit_copy_blt(batch, src, dst,
|
||||
src_cpp, src->surf.row_pitch_B,
|
||||
src_offset,
|
||||
dst->surf.row_pitch_B, dst_offset,
|
||||
src_tile_x, src_tile_y,
|
||||
dst_tile_x, dst_tile_y,
|
||||
chunk_w, chunk_h)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool crocus_blit_blt(struct crocus_batch *batch,
|
||||
const struct pipe_blit_info *info)
|
||||
{
|
||||
if (!validate_blit_for_blt(batch, info))
|
||||
return false;
|
||||
|
||||
return crocus_emit_blt(batch,
|
||||
(struct crocus_resource *)info->src.resource,
|
||||
(struct crocus_resource *)info->dst.resource,
|
||||
info->dst.level,
|
||||
info->dst.box.x,
|
||||
info->dst.box.y,
|
||||
info->dst.box.z,
|
||||
info->src.level,
|
||||
&info->src.box);
|
||||
}
|
||||
|
||||
|
||||
static bool crocus_copy_region_blt(struct crocus_batch *batch,
|
||||
struct crocus_resource *dst,
|
||||
unsigned dst_level,
|
||||
unsigned dstx, unsigned dsty, unsigned dstz,
|
||||
struct crocus_resource *src,
|
||||
unsigned src_level,
|
||||
const struct pipe_box *src_box)
|
||||
{
|
||||
if (dst->base.target == PIPE_BUFFER || src->base.target == PIPE_BUFFER)
|
||||
return false;
|
||||
return crocus_emit_blt(batch,
|
||||
src,
|
||||
dst,
|
||||
dst_level,
|
||||
dstx, dsty, dstz,
|
||||
src_level,
|
||||
src_box);
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
genX(init_blt)(struct crocus_screen *screen)
|
||||
{
|
||||
#if GFX_VER <= 5
|
||||
screen->vtbl.blit_blt = crocus_blit_blt;
|
||||
screen->vtbl.copy_region_blt = crocus_copy_region_blt;
|
||||
#else
|
||||
screen->vtbl.blit_blt = NULL;
|
||||
screen->vtbl.copy_region_blt = NULL;
|
||||
#endif
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,331 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CROCUS_BUFMGR_H
|
||||
#define CROCUS_BUFMGR_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#include "util/macros.h"
|
||||
#include "util/u_atomic.h"
|
||||
#include "util/list.h"
|
||||
#include "pipe/p_defines.h"
|
||||
|
||||
struct crocus_batch;
|
||||
struct intel_device_info;
|
||||
struct pipe_debug_callback;
|
||||
|
||||
#define CROCUS_BINDER_SIZE (64 * 1024)
|
||||
#define CROCUS_MAX_BINDERS 100
|
||||
|
||||
struct crocus_bo {
|
||||
/**
|
||||
* Size in bytes of the buffer object.
|
||||
*
|
||||
* The size may be larger than the size originally requested for the
|
||||
* allocation, such as being aligned to page size.
|
||||
*/
|
||||
uint64_t size;
|
||||
|
||||
/** Buffer manager context associated with this buffer object */
|
||||
struct crocus_bufmgr *bufmgr;
|
||||
|
||||
/** The GEM handle for this buffer object. */
|
||||
uint32_t gem_handle;
|
||||
|
||||
/**
|
||||
* Virtual address of the buffer inside the PPGTT (Per-Process Graphics
|
||||
* Translation Table).
|
||||
*
|
||||
* Although each hardware context has its own VMA, we assign BO's to the
|
||||
* same address in all contexts, for simplicity.
|
||||
*/
|
||||
uint64_t gtt_offset;
|
||||
|
||||
/**
|
||||
* The validation list index for this buffer, or -1 when not in a batch.
|
||||
* Note that a single buffer may be in multiple batches (contexts), and
|
||||
* this is a global field, which refers to the last batch using the BO.
|
||||
* It should not be considered authoritative, but can be used to avoid a
|
||||
* linear walk of the validation list in the common case by guessing that
|
||||
* exec_bos[bo->index] == bo and confirming whether that's the case.
|
||||
*
|
||||
* XXX: this is not ideal now that we have more than one batch per context,
|
||||
* XXX: as the index will flop back and forth between the render index and
|
||||
* XXX: compute index...
|
||||
*/
|
||||
unsigned index;
|
||||
|
||||
/**
|
||||
* Boolean of whether the GPU is definitely not accessing the buffer.
|
||||
*
|
||||
* This is only valid when reusable, since non-reusable
|
||||
* buffers are those that have been shared with other
|
||||
* processes, so we don't know their state.
|
||||
*/
|
||||
bool idle;
|
||||
|
||||
int refcount;
|
||||
const char *name;
|
||||
|
||||
uint64_t kflags;
|
||||
|
||||
/**
|
||||
* Kenel-assigned global name for this object
|
||||
*
|
||||
* List contains both flink named and prime fd'd objects
|
||||
*/
|
||||
unsigned global_name;
|
||||
|
||||
/**
|
||||
* Current tiling mode
|
||||
*/
|
||||
uint32_t tiling_mode;
|
||||
uint32_t swizzle_mode;
|
||||
uint32_t stride;
|
||||
|
||||
time_t free_time;
|
||||
|
||||
/** Mapped address for the buffer, saved across map/unmap cycles */
|
||||
void *map_cpu;
|
||||
/** GTT virtual address for the buffer, saved across map/unmap cycles */
|
||||
void *map_gtt;
|
||||
/** WC CPU address for the buffer, saved across map/unmap cycles */
|
||||
void *map_wc;
|
||||
|
||||
/** BO cache list */
|
||||
struct list_head head;
|
||||
|
||||
/** List of GEM handle exports of this buffer (bo_export) */
|
||||
struct list_head exports;
|
||||
|
||||
/**
|
||||
* Boolean of whether this buffer can be re-used
|
||||
*/
|
||||
bool reusable;
|
||||
|
||||
/**
|
||||
* Boolean of whether this buffer has been shared with an external client.
|
||||
*/
|
||||
bool external;
|
||||
|
||||
/**
|
||||
* Boolean of whether this buffer is cache coherent
|
||||
*/
|
||||
bool cache_coherent;
|
||||
|
||||
/**
|
||||
* Boolean of whether this buffer points into user memory
|
||||
*/
|
||||
bool userptr;
|
||||
|
||||
/** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
|
||||
uint32_t hash;
|
||||
};
|
||||
|
||||
#define BO_ALLOC_ZEROED (1 << 0)
|
||||
#define BO_ALLOC_COHERENT (1 << 1)
|
||||
|
||||
/**
|
||||
* Allocate a buffer object.
|
||||
*
|
||||
* Buffer objects are not necessarily initially mapped into CPU virtual
|
||||
* address space or graphics device aperture. They must be mapped
|
||||
* using crocus_bo_map() to be used by the CPU.
|
||||
*/
|
||||
struct crocus_bo *crocus_bo_alloc(struct crocus_bufmgr *bufmgr,
|
||||
const char *name, uint64_t size);
|
||||
|
||||
/**
|
||||
* Allocate a tiled buffer object.
|
||||
*
|
||||
* Alignment for tiled objects is set automatically; the 'flags'
|
||||
* argument provides a hint about how the object will be used initially.
|
||||
*
|
||||
* Valid tiling formats are:
|
||||
* I915_TILING_NONE
|
||||
* I915_TILING_X
|
||||
* I915_TILING_Y
|
||||
*/
|
||||
struct crocus_bo *crocus_bo_alloc_tiled(struct crocus_bufmgr *bufmgr,
|
||||
const char *name, uint64_t size,
|
||||
uint32_t alignment,
|
||||
uint32_t tiling_mode, uint32_t pitch,
|
||||
unsigned flags);
|
||||
|
||||
struct crocus_bo *crocus_bo_create_userptr(struct crocus_bufmgr *bufmgr,
|
||||
const char *name, void *ptr,
|
||||
size_t size);
|
||||
|
||||
/** Takes a reference on a buffer object */
|
||||
static inline void
|
||||
crocus_bo_reference(struct crocus_bo *bo)
|
||||
{
|
||||
p_atomic_inc(&bo->refcount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Releases a reference on a buffer object, freeing the data if
|
||||
* no references remain.
|
||||
*/
|
||||
void crocus_bo_unreference(struct crocus_bo *bo);
|
||||
|
||||
#define MAP_READ PIPE_MAP_READ
|
||||
#define MAP_WRITE PIPE_MAP_WRITE
|
||||
#define MAP_ASYNC PIPE_MAP_UNSYNCHRONIZED
|
||||
#define MAP_PERSISTENT PIPE_MAP_PERSISTENT
|
||||
#define MAP_COHERENT PIPE_MAP_COHERENT
|
||||
/* internal */
|
||||
#define MAP_INTERNAL_MASK (0xff << 24)
|
||||
#define MAP_RAW (0x01 << 24)
|
||||
|
||||
#define MAP_FLAGS (MAP_READ | MAP_WRITE | MAP_ASYNC | \
|
||||
MAP_PERSISTENT | MAP_COHERENT | MAP_INTERNAL_MASK)
|
||||
|
||||
/**
|
||||
* Maps the buffer into userspace.
|
||||
*
|
||||
* This function will block waiting for any existing execution on the
|
||||
* buffer to complete, first. The resulting mapping is returned.
|
||||
*/
|
||||
MUST_CHECK void *crocus_bo_map(struct pipe_debug_callback *dbg,
|
||||
struct crocus_bo *bo, unsigned flags);
|
||||
|
||||
/**
|
||||
* Reduces the refcount on the userspace mapping of the buffer
|
||||
* object.
|
||||
*/
|
||||
static inline int crocus_bo_unmap(struct crocus_bo *bo) { return 0; }
|
||||
|
||||
/**
|
||||
* Waits for rendering to an object by the GPU to have completed.
|
||||
*
|
||||
* This is not required for any access to the BO by bo_map,
|
||||
* bo_subdata, etc. It is merely a way for the driver to implement
|
||||
* glFinish.
|
||||
*/
|
||||
void crocus_bo_wait_rendering(struct crocus_bo *bo);
|
||||
|
||||
/**
|
||||
* Unref a buffer manager instance.
|
||||
*/
|
||||
void crocus_bufmgr_unref(struct crocus_bufmgr *bufmgr);
|
||||
|
||||
/**
|
||||
* Get the current tiling (and resulting swizzling) mode for the bo.
|
||||
*
|
||||
* \param buf Buffer to get tiling mode for
|
||||
* \param tiling_mode returned tiling mode
|
||||
* \param swizzle_mode returned swizzling mode
|
||||
*/
|
||||
int crocus_bo_get_tiling(struct crocus_bo *bo, uint32_t *tiling_mode,
|
||||
uint32_t *swizzle_mode);
|
||||
|
||||
/**
|
||||
* Create a visible name for a buffer which can be used by other apps
|
||||
*
|
||||
* \param buf Buffer to create a name for
|
||||
* \param name Returned name
|
||||
*/
|
||||
int crocus_bo_flink(struct crocus_bo *bo, uint32_t *name);
|
||||
|
||||
/**
|
||||
* Is this buffer shared with external clients (exported)?
|
||||
*/
|
||||
static inline bool
|
||||
crocus_bo_is_external(const struct crocus_bo *bo)
|
||||
{
|
||||
return bo->external;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns 1 if mapping the buffer for write could cause the process
|
||||
* to block, due to the object being active in the GPU.
|
||||
*/
|
||||
int crocus_bo_busy(struct crocus_bo *bo);
|
||||
|
||||
/**
|
||||
* Specify the volatility of the buffer.
|
||||
* \param bo Buffer to create a name for
|
||||
* \param madv The purgeable status
|
||||
*
|
||||
* Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
|
||||
* reclaimed under memory pressure. If you subsequently require the buffer,
|
||||
* then you must pass I915_MADV_WILLNEED to mark the buffer as required.
|
||||
*
|
||||
* Returns 1 if the buffer was retained, or 0 if it was discarded whilst
|
||||
* marked as I915_MADV_DONTNEED.
|
||||
*/
|
||||
int crocus_bo_madvise(struct crocus_bo *bo, int madv);
|
||||
|
||||
/* drm_bacon_bufmgr_gem.c */
|
||||
struct crocus_bufmgr *
|
||||
crocus_bufmgr_get_for_fd(struct intel_device_info *devinfo, int fd,
|
||||
bool bo_reuse);
|
||||
int crocus_bufmgr_get_fd(struct crocus_bufmgr *bufmgr);
|
||||
|
||||
struct crocus_bo *crocus_bo_gem_create_from_name(struct crocus_bufmgr *bufmgr,
|
||||
const char *name,
|
||||
unsigned handle);
|
||||
|
||||
int crocus_bo_wait(struct crocus_bo *bo, int64_t timeout_ns);
|
||||
|
||||
uint32_t crocus_create_hw_context(struct crocus_bufmgr *bufmgr);
|
||||
uint32_t crocus_clone_hw_context(struct crocus_bufmgr *bufmgr, uint32_t ctx_id);
|
||||
|
||||
#define CROCUS_CONTEXT_LOW_PRIORITY ((I915_CONTEXT_MIN_USER_PRIORITY - 1) / 2)
|
||||
#define CROCUS_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
|
||||
#define CROCUS_CONTEXT_HIGH_PRIORITY ((I915_CONTEXT_MAX_USER_PRIORITY + 1) / 2)
|
||||
|
||||
int crocus_hw_context_set_priority(struct crocus_bufmgr *bufmgr,
|
||||
uint32_t ctx_id, int priority);
|
||||
|
||||
void crocus_destroy_hw_context(struct crocus_bufmgr *bufmgr, uint32_t ctx_id);
|
||||
|
||||
int crocus_bo_export_dmabuf(struct crocus_bo *bo, int *prime_fd);
|
||||
struct crocus_bo *crocus_bo_import_dmabuf(struct crocus_bufmgr *bufmgr,
|
||||
int prime_fd, uint32_t tiling,
|
||||
uint32_t stride);
|
||||
|
||||
/**
|
||||
* Exports a bo as a GEM handle into a given DRM file descriptor
|
||||
* \param bo Buffer to export
|
||||
* \param drm_fd File descriptor where the new handle is created
|
||||
* \param out_handle Pointer to store the new handle
|
||||
*
|
||||
* Returns 0 if the buffer was successfully exported, a non zero error code
|
||||
* otherwise.
|
||||
*/
|
||||
int crocus_bo_export_gem_handle_for_device(struct crocus_bo *bo, int drm_fd,
|
||||
uint32_t *out_handle);
|
||||
|
||||
uint32_t crocus_bo_export_gem_handle(struct crocus_bo *bo);
|
||||
|
||||
int crocus_reg_read(struct crocus_bufmgr *bufmgr, uint32_t offset,
|
||||
uint64_t *out);
|
||||
|
||||
int drm_ioctl(int fd, unsigned long request, void *arg);
|
||||
|
||||
#endif /* CROCUS_BUFMGR_H */
|
|
@ -0,0 +1,859 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_surface.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "crocus_context.h"
|
||||
#include "crocus_resource.h"
|
||||
#include "crocus_screen.h"
|
||||
#include "intel/compiler/brw_compiler.h"
|
||||
#include "util/format_srgb.h"
|
||||
|
||||
static bool
|
||||
crocus_is_color_fast_clear_compatible(struct crocus_context *ice,
|
||||
enum isl_format format,
|
||||
const union isl_color_value color)
|
||||
{
|
||||
if (isl_format_has_int_channel(format)) {
|
||||
perf_debug(&ice->dbg, "Integer fast clear not enabled for %s",
|
||||
isl_format_get_name(format));
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (!isl_format_has_color_component(format, i)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (color.f32[i] != 0.0f && color.f32[i] != 1.0f) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
can_fast_clear_color(struct crocus_context *ice,
|
||||
struct pipe_resource *p_res,
|
||||
unsigned level,
|
||||
const struct pipe_box *box,
|
||||
bool render_condition_enabled,
|
||||
enum isl_format format,
|
||||
enum isl_format render_format,
|
||||
union isl_color_value color)
|
||||
{
|
||||
struct crocus_resource *res = (void *) p_res;
|
||||
|
||||
if (INTEL_DEBUG & DEBUG_NO_FAST_CLEAR)
|
||||
return false;
|
||||
|
||||
if (!isl_aux_usage_has_fast_clears(res->aux.usage))
|
||||
return false;
|
||||
|
||||
/* Check for partial clear */
|
||||
if (box->x > 0 || box->y > 0 ||
|
||||
box->width < minify(p_res->width0, level) ||
|
||||
box->height < minify(p_res->height0, level)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Avoid conditional fast clears to maintain correct tracking of the aux
|
||||
* state (see iris_resource_finish_write for more info). Note that partial
|
||||
* fast clears (if they existed) would not pose a problem with conditional
|
||||
* rendering.
|
||||
*/
|
||||
if (render_condition_enabled &&
|
||||
ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* We store clear colors as floats or uints as needed. If there are
|
||||
* texture views in play, the formats will not properly be respected
|
||||
* during resolves because the resolve operations only know about the
|
||||
* resource and not the renderbuffer.
|
||||
*/
|
||||
if (isl_format_srgb_to_linear(render_format) !=
|
||||
isl_format_srgb_to_linear(format)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* XXX: if (irb->mt->supports_fast_clear)
|
||||
* see intel_miptree_create_for_dri_image()
|
||||
*/
|
||||
|
||||
if (!crocus_is_color_fast_clear_compatible(ice, format, color))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static union isl_color_value
|
||||
convert_fast_clear_color(struct crocus_context *ice,
|
||||
struct crocus_resource *res,
|
||||
enum isl_format render_format,
|
||||
const union isl_color_value color)
|
||||
{
|
||||
union isl_color_value override_color = color;
|
||||
struct pipe_resource *p_res = (void *) res;
|
||||
|
||||
const enum pipe_format format = p_res->format;
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(format);
|
||||
unsigned colormask = util_format_colormask(desc);
|
||||
|
||||
if (util_format_is_intensity(format) ||
|
||||
util_format_is_luminance(format) ||
|
||||
util_format_is_luminance_alpha(format)) {
|
||||
override_color.u32[1] = override_color.u32[0];
|
||||
override_color.u32[2] = override_color.u32[0];
|
||||
if (util_format_is_intensity(format))
|
||||
override_color.u32[3] = override_color.u32[0];
|
||||
} else {
|
||||
for (int chan = 0; chan < 3; chan++) {
|
||||
if (!(colormask & (1 << chan)))
|
||||
override_color.u32[chan] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (util_format_is_unorm(format)) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
override_color.f32[i] = CLAMP(override_color.f32[i], 0.0f, 1.0f);
|
||||
} else if (util_format_is_snorm(format)) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
override_color.f32[i] = CLAMP(override_color.f32[i], -1.0f, 1.0f);
|
||||
} else if (util_format_is_pure_uint(format)) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
unsigned bits = util_format_get_component_bits(
|
||||
format, UTIL_FORMAT_COLORSPACE_RGB, i);
|
||||
if (bits < 32) {
|
||||
uint32_t max = (1u << bits) - 1;
|
||||
override_color.u32[i] = MIN2(override_color.u32[i], max);
|
||||
}
|
||||
}
|
||||
} else if (util_format_is_pure_sint(format)) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
unsigned bits = util_format_get_component_bits(
|
||||
format, UTIL_FORMAT_COLORSPACE_RGB, i);
|
||||
if (bits < 32) {
|
||||
int32_t max = (1 << (bits - 1)) - 1;
|
||||
int32_t min = -(1 << (bits - 1));
|
||||
override_color.i32[i] = CLAMP(override_color.i32[i], min, max);
|
||||
}
|
||||
}
|
||||
} else if (format == PIPE_FORMAT_R11G11B10_FLOAT ||
|
||||
format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
|
||||
/* these packed float formats only store unsigned values */
|
||||
for (int i = 0; i < 4; i++)
|
||||
override_color.f32[i] = MAX2(override_color.f32[i], 0.0f);
|
||||
}
|
||||
|
||||
if (!(colormask & 1 << 3)) {
|
||||
if (util_format_is_pure_integer(format))
|
||||
override_color.u32[3] = 1;
|
||||
else
|
||||
override_color.f32[3] = 1.0f;
|
||||
}
|
||||
|
||||
/* Handle linear to SRGB conversion */
|
||||
if (isl_format_is_srgb(render_format)) {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
override_color.f32[i] =
|
||||
util_format_linear_to_srgb_float(override_color.f32[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return override_color;
|
||||
}
|
||||
|
||||
static void
|
||||
fast_clear_color(struct crocus_context *ice,
|
||||
struct crocus_resource *res,
|
||||
unsigned level,
|
||||
const struct pipe_box *box,
|
||||
enum isl_format format,
|
||||
union isl_color_value color,
|
||||
enum blorp_batch_flags blorp_flags)
|
||||
{
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
struct pipe_resource *p_res = (void *) res;
|
||||
|
||||
color = convert_fast_clear_color(ice, res, format, color);
|
||||
|
||||
bool color_changed = !!memcmp(&res->aux.clear_color, &color,
|
||||
sizeof(color));
|
||||
|
||||
if (color_changed) {
|
||||
/* If we are clearing to a new clear value, we need to resolve fast
|
||||
* clears from other levels/layers first, since we can't have different
|
||||
* levels/layers with different fast clear colors.
|
||||
*/
|
||||
for (unsigned res_lvl = 0; res_lvl < res->surf.levels; res_lvl++) {
|
||||
const unsigned level_layers =
|
||||
crocus_get_num_logical_layers(res, res_lvl);
|
||||
for (unsigned layer = 0; layer < level_layers; layer++) {
|
||||
if (res_lvl == level &&
|
||||
layer >= box->z &&
|
||||
layer < box->z + box->depth) {
|
||||
/* We're going to clear this layer anyway. Leave it alone. */
|
||||
continue;
|
||||
}
|
||||
|
||||
enum isl_aux_state aux_state =
|
||||
crocus_resource_get_aux_state(res, res_lvl, layer);
|
||||
|
||||
if (aux_state != ISL_AUX_STATE_CLEAR &&
|
||||
aux_state != ISL_AUX_STATE_PARTIAL_CLEAR &&
|
||||
aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) {
|
||||
/* This slice doesn't have any fast-cleared bits. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If we got here, then the level may have fast-clear bits that use
|
||||
* the old clear value. We need to do a color resolve to get rid
|
||||
* of their use of the clear color before we can change it.
|
||||
* Fortunately, few applications ever change their clear color at
|
||||
* different levels/layers, so this shouldn't happen often.
|
||||
*/
|
||||
crocus_resource_prepare_access(ice, res,
|
||||
res_lvl, 1, layer, 1,
|
||||
res->aux.usage,
|
||||
false);
|
||||
perf_debug(&ice->dbg,
|
||||
"Resolving resource (%p) level %d, layer %d: color changing from "
|
||||
"(%0.2f, %0.2f, %0.2f, %0.2f) to "
|
||||
"(%0.2f, %0.2f, %0.2f, %0.2f)\n",
|
||||
res, res_lvl, layer,
|
||||
res->aux.clear_color.f32[0],
|
||||
res->aux.clear_color.f32[1],
|
||||
res->aux.clear_color.f32[2],
|
||||
res->aux.clear_color.f32[3],
|
||||
color.f32[0], color.f32[1], color.f32[2], color.f32[3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
crocus_resource_set_clear_color(ice, res, color);
|
||||
|
||||
/* If the buffer is already in ISL_AUX_STATE_CLEAR, and the color hasn't
|
||||
* changed, the clear is redundant and can be skipped.
|
||||
*/
|
||||
const enum isl_aux_state aux_state =
|
||||
crocus_resource_get_aux_state(res, level, box->z);
|
||||
if (!color_changed && box->depth == 1 && aux_state == ISL_AUX_STATE_CLEAR)
|
||||
return;
|
||||
|
||||
/* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
|
||||
*
|
||||
* "Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization."
|
||||
*
|
||||
* In other words, fast clear ops are not properly synchronized with
|
||||
* other drawing. We need to use a PIPE_CONTROL to ensure that the
|
||||
* contents of the previous draw hit the render target before we resolve
|
||||
* and again afterwards to ensure that the resolve is complete before we
|
||||
* do any more regular drawing.
|
||||
*/
|
||||
crocus_emit_end_of_pipe_sync(batch,
|
||||
"fast clear: pre-flush",
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH);
|
||||
|
||||
/* If we reach this point, we need to fast clear to change the state to
|
||||
* ISL_AUX_STATE_CLEAR, or to update the fast clear color (or both).
|
||||
*/
|
||||
blorp_flags |= color_changed ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR;
|
||||
|
||||
struct blorp_batch blorp_batch;
|
||||
blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
|
||||
|
||||
struct blorp_surf surf;
|
||||
crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf,
|
||||
p_res, res->aux.usage, level, true);
|
||||
|
||||
/* In newer gens (> 9), the hardware will do a linear -> sRGB conversion of
|
||||
* the clear color during the fast clear, if the surface format is of sRGB
|
||||
* type. We use the linear version of the surface format here to prevent
|
||||
* that from happening, since we already do our own linear -> sRGB
|
||||
* conversion in convert_fast_clear_color().
|
||||
*/
|
||||
blorp_fast_clear(&blorp_batch, &surf, isl_format_srgb_to_linear(format),
|
||||
ISL_SWIZZLE_IDENTITY,
|
||||
level, box->z, box->depth,
|
||||
box->x, box->y, box->x + box->width,
|
||||
box->y + box->height);
|
||||
blorp_batch_finish(&blorp_batch);
|
||||
crocus_emit_end_of_pipe_sync(batch,
|
||||
"fast clear: post flush",
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH);
|
||||
|
||||
crocus_resource_set_aux_state(ice, res, level, box->z,
|
||||
box->depth, ISL_AUX_STATE_CLEAR);
|
||||
ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS;
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
clear_color(struct crocus_context *ice,
|
||||
struct pipe_resource *p_res,
|
||||
unsigned level,
|
||||
const struct pipe_box *box,
|
||||
bool render_condition_enabled,
|
||||
enum isl_format format,
|
||||
struct isl_swizzle swizzle,
|
||||
union isl_color_value color)
|
||||
{
|
||||
struct crocus_resource *res = (void *) p_res;
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
const struct intel_device_info *devinfo = &batch->screen->devinfo;
|
||||
enum blorp_batch_flags blorp_flags = 0;
|
||||
|
||||
if (render_condition_enabled) {
|
||||
if (!crocus_check_conditional_render(ice))
|
||||
return;
|
||||
|
||||
if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT)
|
||||
blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;
|
||||
}
|
||||
|
||||
if (p_res->target == PIPE_BUFFER)
|
||||
util_range_add(&res->base, &res->valid_buffer_range, box->x, box->x + box->width);
|
||||
|
||||
crocus_batch_maybe_flush(batch, 1500);
|
||||
|
||||
bool can_fast_clear = can_fast_clear_color(ice, p_res, level, box,
|
||||
render_condition_enabled,
|
||||
res->surf.format, format, color);
|
||||
if (can_fast_clear) {
|
||||
fast_clear_color(ice, res, level, box, format, color,
|
||||
blorp_flags);
|
||||
return;
|
||||
}
|
||||
|
||||
bool color_write_disable[4] = { false, false, false, false };
|
||||
enum isl_aux_usage aux_usage =
|
||||
crocus_resource_render_aux_usage(ice, res, format,
|
||||
false, false);
|
||||
|
||||
crocus_resource_prepare_render(ice, res, level,
|
||||
box->z, box->depth, aux_usage);
|
||||
|
||||
struct blorp_surf surf;
|
||||
crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf,
|
||||
p_res, aux_usage, level, true);
|
||||
|
||||
struct blorp_batch blorp_batch;
|
||||
blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
|
||||
|
||||
if (!isl_format_supports_rendering(devinfo, format) &&
|
||||
isl_format_is_rgbx(format))
|
||||
format = isl_format_rgbx_to_rgba(format);
|
||||
|
||||
blorp_clear(&blorp_batch, &surf, format, swizzle,
|
||||
level, box->z, box->depth, box->x, box->y,
|
||||
box->x + box->width, box->y + box->height,
|
||||
color, color_write_disable);
|
||||
|
||||
blorp_batch_finish(&blorp_batch);
|
||||
crocus_flush_and_dirty_for_history(ice, batch, res,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH,
|
||||
"cache history: post color clear");
|
||||
|
||||
crocus_resource_finish_render(ice, res, level,
|
||||
box->z, box->depth, aux_usage);
|
||||
}
|
||||
|
||||
static bool
|
||||
can_fast_clear_depth(struct crocus_context *ice,
|
||||
struct crocus_resource *res,
|
||||
unsigned level,
|
||||
const struct pipe_box *box,
|
||||
bool render_condition_enabled,
|
||||
float depth)
|
||||
{
|
||||
struct pipe_resource *p_res = (void *) res;
|
||||
struct pipe_context *ctx = (void *) ice;
|
||||
struct crocus_screen *screen = (void *) ctx->screen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
if (devinfo->ver < 6)
|
||||
return false;
|
||||
|
||||
if (INTEL_DEBUG & DEBUG_NO_FAST_CLEAR)
|
||||
return false;
|
||||
|
||||
/* Check for partial clears */
|
||||
if (box->x > 0 || box->y > 0 ||
|
||||
box->width < u_minify(p_res->width0, level) ||
|
||||
box->height < u_minify(p_res->height0, level)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Avoid conditional fast clears to maintain correct tracking of the aux
|
||||
* state (see iris_resource_finish_write for more info). Note that partial
|
||||
* fast clears would not pose a problem with conditional rendering.
|
||||
*/
|
||||
if (render_condition_enabled &&
|
||||
ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!crocus_resource_level_has_hiz(res, level))
|
||||
return false;
|
||||
|
||||
if (res->base.format == PIPE_FORMAT_Z16_UNORM) {
|
||||
/* From the Sandy Bridge PRM, volume 2 part 1, page 314:
|
||||
*
|
||||
* "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
|
||||
* enabled (the legacy method of clearing must be performed):
|
||||
*
|
||||
* - DevSNB{W/A}]: When depth buffer format is D16_UNORM and the
|
||||
* width of the map (LOD0) is not multiple of 16, fast clear
|
||||
* optimization must be disabled.
|
||||
*/
|
||||
if (devinfo->ver == 6 &&
|
||||
(minify(res->surf.phys_level0_sa.width,
|
||||
level) % 16) != 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
fast_clear_depth(struct crocus_context *ice,
|
||||
struct crocus_resource *res,
|
||||
unsigned level,
|
||||
const struct pipe_box *box,
|
||||
float depth)
|
||||
{
|
||||
struct pipe_resource *p_res = (void *) res;
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
|
||||
/* Quantize the clear value to what can be stored in the actual depth
|
||||
* buffer. This makes the following check more accurate because it now
|
||||
* checks if the actual depth bits will match. It also prevents us from
|
||||
* getting a too-accurate depth value during depth testing or when sampling
|
||||
* with HiZ enabled.
|
||||
*/
|
||||
const unsigned nbits = p_res->format == PIPE_FORMAT_Z16_UNORM ? 16 : 24;
|
||||
const uint32_t depth_max = (1 << nbits) - 1;
|
||||
depth = p_res->format == PIPE_FORMAT_Z32_FLOAT ? depth :
|
||||
(unsigned)(depth * depth_max) / (float)depth_max;
|
||||
|
||||
bool update_clear_depth = false;
|
||||
|
||||
/* If we're clearing to a new clear value, then we need to resolve any clear
|
||||
* flags out of the HiZ buffer into the real depth buffer.
|
||||
*/
|
||||
if (res->aux.clear_color.f32[0] != depth) {
|
||||
for (unsigned res_level = 0; res_level < res->surf.levels; res_level++) {
|
||||
if (!crocus_resource_level_has_hiz(res, res_level))
|
||||
continue;
|
||||
|
||||
const unsigned level_layers =
|
||||
crocus_get_num_logical_layers(res, res_level);
|
||||
for (unsigned layer = 0; layer < level_layers; layer++) {
|
||||
if (res_level == level &&
|
||||
layer >= box->z &&
|
||||
layer < box->z + box->depth) {
|
||||
/* We're going to clear this layer anyway. Leave it alone. */
|
||||
continue;
|
||||
}
|
||||
|
||||
enum isl_aux_state aux_state =
|
||||
crocus_resource_get_aux_state(res, res_level, layer);
|
||||
|
||||
if (aux_state != ISL_AUX_STATE_CLEAR &&
|
||||
aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) {
|
||||
/* This slice doesn't have any fast-cleared bits. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If we got here, then the level may have fast-clear bits that
|
||||
* use the old clear value. We need to do a depth resolve to get
|
||||
* rid of their use of the clear value before we can change it.
|
||||
* Fortunately, few applications ever change their depth clear
|
||||
* value so this shouldn't happen often.
|
||||
*/
|
||||
crocus_hiz_exec(ice, batch, res, res_level, layer, 1,
|
||||
ISL_AUX_OP_FULL_RESOLVE, false);
|
||||
crocus_resource_set_aux_state(ice, res, res_level, layer, 1,
|
||||
ISL_AUX_STATE_RESOLVED);
|
||||
}
|
||||
}
|
||||
const union isl_color_value clear_value = { .f32 = {depth, } };
|
||||
crocus_resource_set_clear_color(ice, res, clear_value);
|
||||
update_clear_depth = true;
|
||||
}
|
||||
|
||||
for (unsigned l = 0; l < box->depth; l++) {
|
||||
enum isl_aux_state aux_state =
|
||||
crocus_resource_level_has_hiz(res, level) ?
|
||||
crocus_resource_get_aux_state(res, level, box->z + l) :
|
||||
ISL_AUX_STATE_AUX_INVALID;
|
||||
if (update_clear_depth || aux_state != ISL_AUX_STATE_CLEAR) {
|
||||
if (aux_state == ISL_AUX_STATE_CLEAR) {
|
||||
perf_debug(&ice->dbg, "Performing HiZ clear just to update the "
|
||||
"depth clear value\n");
|
||||
}
|
||||
crocus_hiz_exec(ice, batch, res, level,
|
||||
box->z + l, 1, ISL_AUX_OP_FAST_CLEAR,
|
||||
update_clear_depth);
|
||||
}
|
||||
}
|
||||
|
||||
crocus_resource_set_aux_state(ice, res, level, box->z, box->depth,
|
||||
ISL_AUX_STATE_CLEAR);
|
||||
ice->state.dirty |= CROCUS_DIRTY_DEPTH_BUFFER;
|
||||
}
|
||||
|
||||
static void
|
||||
clear_depth_stencil(struct crocus_context *ice,
|
||||
struct pipe_resource *p_res,
|
||||
unsigned level,
|
||||
const struct pipe_box *box,
|
||||
bool render_condition_enabled,
|
||||
bool clear_depth,
|
||||
bool clear_stencil,
|
||||
float depth,
|
||||
uint8_t stencil)
|
||||
{
|
||||
struct crocus_resource *res = (void *) p_res;
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
enum blorp_batch_flags blorp_flags = 0;
|
||||
|
||||
if (render_condition_enabled) {
|
||||
if (!crocus_check_conditional_render(ice))
|
||||
return;
|
||||
|
||||
if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT)
|
||||
blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;
|
||||
}
|
||||
|
||||
crocus_batch_maybe_flush(batch, 1500);
|
||||
|
||||
struct crocus_resource *z_res;
|
||||
struct crocus_resource *stencil_res;
|
||||
struct blorp_surf z_surf;
|
||||
struct blorp_surf stencil_surf;
|
||||
|
||||
crocus_get_depth_stencil_resources(&batch->screen->devinfo, p_res, &z_res, &stencil_res);
|
||||
if (z_res && clear_depth &&
|
||||
can_fast_clear_depth(ice, z_res, level, box, render_condition_enabled,
|
||||
depth)) {
|
||||
fast_clear_depth(ice, z_res, level, box, depth);
|
||||
crocus_flush_and_dirty_for_history(ice, batch, res, 0,
|
||||
"cache history: post fast Z clear");
|
||||
clear_depth = false;
|
||||
z_res = NULL;
|
||||
}
|
||||
|
||||
/* At this point, we might have fast cleared the depth buffer. So if there's
|
||||
* no stencil clear pending, return early.
|
||||
*/
|
||||
if (!(clear_depth || (clear_stencil && stencil_res))) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (clear_depth && z_res) {
|
||||
const enum isl_aux_usage aux_usage =
|
||||
crocus_resource_render_aux_usage(ice, z_res, level, z_res->surf.format,
|
||||
false);
|
||||
crocus_resource_prepare_render(ice, z_res, level, box->z, box->depth,
|
||||
aux_usage);
|
||||
crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev,
|
||||
&z_surf, &z_res->base, aux_usage,
|
||||
level, true);
|
||||
}
|
||||
|
||||
struct blorp_batch blorp_batch;
|
||||
blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
|
||||
|
||||
uint8_t stencil_mask = clear_stencil && stencil_res ? 0xff : 0;
|
||||
if (stencil_mask) {
|
||||
crocus_resource_prepare_access(ice, stencil_res, level, 1, box->z,
|
||||
box->depth, stencil_res->aux.usage, false);
|
||||
crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev,
|
||||
&stencil_surf, &stencil_res->base,
|
||||
stencil_res->aux.usage, level, true);
|
||||
}
|
||||
|
||||
blorp_clear_depth_stencil(&blorp_batch, &z_surf, &stencil_surf,
|
||||
level, box->z, box->depth,
|
||||
box->x, box->y,
|
||||
box->x + box->width,
|
||||
box->y + box->height,
|
||||
clear_depth && z_res, depth,
|
||||
stencil_mask, stencil);
|
||||
|
||||
blorp_batch_finish(&blorp_batch);
|
||||
crocus_flush_and_dirty_for_history(ice, batch, res, 0,
|
||||
"cache history: post slow ZS clear");
|
||||
|
||||
if (clear_depth && z_res) {
|
||||
crocus_resource_finish_render(ice, z_res, level,
|
||||
box->z, box->depth, z_surf.aux_usage);
|
||||
}
|
||||
|
||||
if (stencil_mask) {
|
||||
crocus_resource_finish_write(ice, stencil_res, level, box->z, box->depth,
|
||||
stencil_res->aux.usage);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The pipe->clear() driver hook.
|
||||
*
|
||||
* This clears buffers attached to the current draw framebuffer.
|
||||
*/
|
||||
static void
|
||||
crocus_clear(struct pipe_context *ctx,
|
||||
unsigned buffers,
|
||||
const struct pipe_scissor_state *scissor_state,
|
||||
const union pipe_color_union *p_color,
|
||||
double depth,
|
||||
unsigned stencil)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
|
||||
struct crocus_screen *screen = (void *) ctx->screen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
assert(buffers != 0);
|
||||
|
||||
struct pipe_box box = {
|
||||
.width = cso_fb->width,
|
||||
.height = cso_fb->height,
|
||||
};
|
||||
|
||||
if (scissor_state) {
|
||||
box.x = scissor_state->minx;
|
||||
box.y = scissor_state->miny;
|
||||
box.width = MIN2(box.width, scissor_state->maxx - scissor_state->minx);
|
||||
box.height = MIN2(box.height, scissor_state->maxy - scissor_state->miny);
|
||||
}
|
||||
|
||||
if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
|
||||
if (devinfo->ver < 6) {
|
||||
crocus_blitter_begin(ice, CROCUS_SAVE_FRAGMENT_STATE, true);
|
||||
util_blitter_clear(ice->blitter, cso_fb->width, cso_fb->height,
|
||||
util_framebuffer_get_num_layers(cso_fb),
|
||||
buffers & PIPE_CLEAR_DEPTHSTENCIL, p_color, depth, stencil, false);
|
||||
} else {
|
||||
struct pipe_surface *psurf = cso_fb->zsbuf;
|
||||
box.depth = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1;
|
||||
box.z = psurf->u.tex.first_layer;
|
||||
|
||||
clear_depth_stencil(ice, psurf->texture, psurf->u.tex.level, &box, true,
|
||||
buffers & PIPE_CLEAR_DEPTH,
|
||||
buffers & PIPE_CLEAR_STENCIL,
|
||||
depth, stencil);
|
||||
}
|
||||
buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
|
||||
}
|
||||
|
||||
if (buffers & PIPE_CLEAR_COLOR) {
|
||||
/* pipe_color_union and isl_color_value are interchangeable */
|
||||
union isl_color_value *color = (void *) p_color;
|
||||
|
||||
for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
|
||||
if (buffers & (PIPE_CLEAR_COLOR0 << i)) {
|
||||
struct pipe_surface *psurf = cso_fb->cbufs[i];
|
||||
struct crocus_surface *isurf = (void *) psurf;
|
||||
box.depth = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1,
|
||||
box.z = psurf->u.tex.first_layer,
|
||||
|
||||
clear_color(ice, psurf->texture, psurf->u.tex.level, &box,
|
||||
true, isurf->view.format, isurf->view.swizzle,
|
||||
*color);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The pipe->clear_texture() driver hook.
|
||||
*
|
||||
* This clears the given texture resource.
|
||||
*/
|
||||
static void
|
||||
crocus_clear_texture(struct pipe_context *ctx,
|
||||
struct pipe_resource *p_res,
|
||||
unsigned level,
|
||||
const struct pipe_box *box,
|
||||
const void *data)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct crocus_screen *screen = (void *) ctx->screen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
struct crocus_resource *res = (void *) p_res;
|
||||
|
||||
if (devinfo->ver < 6) {
|
||||
util_clear_texture(ctx, p_res,
|
||||
level, box, data);
|
||||
return;
|
||||
}
|
||||
|
||||
if (crocus_resource_unfinished_aux_import(res))
|
||||
crocus_resource_finish_aux_import(ctx->screen, res);
|
||||
|
||||
if (util_format_is_depth_or_stencil(p_res->format)) {
|
||||
const struct util_format_unpack_description *fmt_unpack =
|
||||
util_format_unpack_description(p_res->format);
|
||||
|
||||
float depth = 0.0;
|
||||
uint8_t stencil = 0;
|
||||
|
||||
if (fmt_unpack->unpack_z_float)
|
||||
fmt_unpack->unpack_z_float(&depth, 0, data, 0, 1, 1);
|
||||
|
||||
if (fmt_unpack->unpack_s_8uint)
|
||||
fmt_unpack->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
|
||||
|
||||
clear_depth_stencil(ice, p_res, level, box, true, true, true,
|
||||
depth, stencil);
|
||||
} else {
|
||||
union isl_color_value color;
|
||||
struct crocus_resource *res = (void *) p_res;
|
||||
enum isl_format format = res->surf.format;
|
||||
|
||||
if (!isl_format_supports_rendering(devinfo, format)) {
|
||||
const struct isl_format_layout *fmtl = isl_format_get_layout(format);
|
||||
// XXX: actually just get_copy_format_for_bpb from BLORP
|
||||
// XXX: don't cut and paste this
|
||||
switch (fmtl->bpb) {
|
||||
case 8: format = ISL_FORMAT_R8_UINT; break;
|
||||
case 16: format = ISL_FORMAT_R8G8_UINT; break;
|
||||
case 24: format = ISL_FORMAT_R8G8B8_UINT; break;
|
||||
case 32: format = ISL_FORMAT_R8G8B8A8_UINT; break;
|
||||
case 48: format = ISL_FORMAT_R16G16B16_UINT; break;
|
||||
case 64: format = ISL_FORMAT_R16G16B16A16_UINT; break;
|
||||
case 96: format = ISL_FORMAT_R32G32B32_UINT; break;
|
||||
case 128: format = ISL_FORMAT_R32G32B32A32_UINT; break;
|
||||
default:
|
||||
unreachable("Unknown format bpb");
|
||||
}
|
||||
|
||||
/* No aux surfaces for non-renderable surfaces */
|
||||
assert(res->aux.usage == ISL_AUX_USAGE_NONE);
|
||||
}
|
||||
|
||||
isl_color_value_unpack(&color, format, data);
|
||||
|
||||
clear_color(ice, p_res, level, box, true, format,
|
||||
ISL_SWIZZLE_IDENTITY, color);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The pipe->clear_render_target() driver hook.
|
||||
*
|
||||
* This clears the given render target surface.
|
||||
*/
|
||||
static void
|
||||
crocus_clear_render_target(struct pipe_context *ctx,
|
||||
struct pipe_surface *psurf,
|
||||
const union pipe_color_union *p_color,
|
||||
unsigned dst_x, unsigned dst_y,
|
||||
unsigned width, unsigned height,
|
||||
bool render_condition_enabled)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct crocus_surface *isurf = (void *) psurf;
|
||||
struct pipe_box box = {
|
||||
.x = dst_x,
|
||||
.y = dst_y,
|
||||
.z = psurf->u.tex.first_layer,
|
||||
.width = width,
|
||||
.height = height,
|
||||
.depth = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1
|
||||
};
|
||||
|
||||
/* pipe_color_union and isl_color_value are interchangeable */
|
||||
union isl_color_value *color = (void *) p_color;
|
||||
|
||||
clear_color(ice, psurf->texture, psurf->u.tex.level, &box,
|
||||
render_condition_enabled,
|
||||
isurf->view.format, isurf->view.swizzle, *color);
|
||||
}
|
||||
|
||||
/**
|
||||
* The pipe->clear_depth_stencil() driver hook.
|
||||
*
|
||||
* This clears the given depth/stencil surface.
|
||||
*/
|
||||
static void
|
||||
crocus_clear_depth_stencil(struct pipe_context *ctx,
|
||||
struct pipe_surface *psurf,
|
||||
unsigned flags,
|
||||
double depth,
|
||||
unsigned stencil,
|
||||
unsigned dst_x, unsigned dst_y,
|
||||
unsigned width, unsigned height,
|
||||
bool render_condition_enabled)
|
||||
{
|
||||
return;
|
||||
#if 0
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct pipe_box box = {
|
||||
.x = dst_x,
|
||||
.y = dst_y,
|
||||
.z = psurf->u.tex.first_layer,
|
||||
.width = width,
|
||||
.height = height,
|
||||
.depth = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1
|
||||
};
|
||||
uint32_t blit_flags = 0;
|
||||
|
||||
assert(util_format_is_depth_or_stencil(psurf->texture->format));
|
||||
|
||||
crocus_blitter_begin(ice, CROCUS_SAVE_FRAGMENT_STATE);
|
||||
util_blitter_clear(ice->blitter, width, height,
|
||||
1, flags, NULL, depth, stencil, render_condition_enabled);
|
||||
#if 0
|
||||
clear_depth_stencil(ice, psurf->texture, psurf->u.tex.level, &box,
|
||||
render_condition_enabled,
|
||||
flags & PIPE_CLEAR_DEPTH, flags & PIPE_CLEAR_STENCIL,
|
||||
depth, stencil);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
crocus_init_clear_functions(struct pipe_context *ctx)
|
||||
{
|
||||
ctx->clear = crocus_clear;
|
||||
ctx->clear_texture = crocus_clear_texture;
|
||||
ctx->clear_render_target = crocus_clear_render_target;
|
||||
ctx->clear_depth_stencil = crocus_clear_depth_stencil;
|
||||
}
|
|
@ -0,0 +1,336 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "drm-uapi/i915_drm.h"
|
||||
#include "crocus_context.h"
|
||||
#include "crocus_resource.h"
|
||||
#include "crocus_screen.h"
|
||||
#include "common/intel_defines.h"
|
||||
#include "common/intel_sample_positions.h"
|
||||
|
||||
/**
|
||||
* The pipe->set_debug_callback() driver hook.
|
||||
*/
|
||||
static void
|
||||
crocus_set_debug_callback(struct pipe_context *ctx,
|
||||
const struct pipe_debug_callback *cb)
|
||||
{
|
||||
struct crocus_context *ice = (struct crocus_context *)ctx;
|
||||
|
||||
if (cb)
|
||||
ice->dbg = *cb;
|
||||
else
|
||||
memset(&ice->dbg, 0, sizeof(ice->dbg));
|
||||
}
|
||||
|
||||
static bool
|
||||
crocus_init_identifier_bo(struct crocus_context *ice)
|
||||
{
|
||||
void *bo_map;
|
||||
|
||||
bo_map = crocus_bo_map(NULL, ice->workaround_bo, MAP_READ | MAP_WRITE);
|
||||
if (!bo_map)
|
||||
return false;
|
||||
|
||||
ice->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE;
|
||||
ice->workaround_offset = ALIGN(
|
||||
intel_debug_write_identifiers(bo_map, 4096, "Crocus") + 8, 8);
|
||||
|
||||
crocus_bo_unmap(ice->workaround_bo);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called from the batch module when it detects a GPU hang.
|
||||
*
|
||||
* In this case, we've lost our GEM context, and can't rely on any existing
|
||||
* state on the GPU. We must mark everything dirty and wipe away any saved
|
||||
* assumptions about the last known state of the GPU.
|
||||
*/
|
||||
void
|
||||
crocus_lost_context_state(struct crocus_batch *batch)
|
||||
{
|
||||
/* The batch module doesn't have an crocus_context, because we want to
|
||||
* avoid introducing lots of layering violations. Unfortunately, here
|
||||
* we do need to inform the context of batch catastrophe. We know the
|
||||
* batch is one of our context's, so hackily claw our way back.
|
||||
*/
|
||||
struct crocus_context *ice = batch->ice;
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
if (batch->name == CROCUS_BATCH_RENDER) {
|
||||
screen->vtbl.init_render_context(batch);
|
||||
} else if (batch->name == CROCUS_BATCH_COMPUTE) {
|
||||
screen->vtbl.init_compute_context(batch);
|
||||
} else {
|
||||
unreachable("unhandled batch reset");
|
||||
}
|
||||
|
||||
ice->state.dirty = ~0ull;
|
||||
memset(ice->state.last_grid, 0, sizeof(ice->state.last_grid));
|
||||
batch->state_base_address_emitted = false;
|
||||
screen->vtbl.lost_genx_state(ice, batch);
|
||||
}
|
||||
|
||||
static enum pipe_reset_status
|
||||
crocus_get_device_reset_status(struct pipe_context *ctx)
|
||||
{
|
||||
struct crocus_context *ice = (struct crocus_context *)ctx;
|
||||
|
||||
enum pipe_reset_status worst_reset = PIPE_NO_RESET;
|
||||
|
||||
/* Check the reset status of each batch's hardware context, and take the
|
||||
* worst status (if one was guilty, proclaim guilt).
|
||||
*/
|
||||
for (int i = 0; i < ice->batch_count; i++) {
|
||||
/* This will also recreate the hardware contexts as necessary, so any
|
||||
* future queries will show no resets. We only want to report once.
|
||||
*/
|
||||
enum pipe_reset_status batch_reset =
|
||||
crocus_batch_check_for_reset(&ice->batches[i]);
|
||||
|
||||
if (batch_reset == PIPE_NO_RESET)
|
||||
continue;
|
||||
|
||||
if (worst_reset == PIPE_NO_RESET) {
|
||||
worst_reset = batch_reset;
|
||||
} else {
|
||||
/* GUILTY < INNOCENT < UNKNOWN */
|
||||
worst_reset = MIN2(worst_reset, batch_reset);
|
||||
}
|
||||
}
|
||||
|
||||
if (worst_reset != PIPE_NO_RESET && ice->reset.reset)
|
||||
ice->reset.reset(ice->reset.data, worst_reset);
|
||||
|
||||
return worst_reset;
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_set_device_reset_callback(struct pipe_context *ctx,
|
||||
const struct pipe_device_reset_callback *cb)
|
||||
{
|
||||
struct crocus_context *ice = (struct crocus_context *)ctx;
|
||||
|
||||
if (cb)
|
||||
ice->reset = *cb;
|
||||
else
|
||||
memset(&ice->reset, 0, sizeof(ice->reset));
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_get_sample_position(struct pipe_context *ctx,
|
||||
unsigned sample_count,
|
||||
unsigned sample_index,
|
||||
float *out_value)
|
||||
{
|
||||
union {
|
||||
struct {
|
||||
float x[16];
|
||||
float y[16];
|
||||
} a;
|
||||
struct {
|
||||
float _0XOffset, _1XOffset, _2XOffset, _3XOffset,
|
||||
_4XOffset, _5XOffset, _6XOffset, _7XOffset,
|
||||
_8XOffset, _9XOffset, _10XOffset, _11XOffset,
|
||||
_12XOffset, _13XOffset, _14XOffset, _15XOffset;
|
||||
float _0YOffset, _1YOffset, _2YOffset, _3YOffset,
|
||||
_4YOffset, _5YOffset, _6YOffset, _7YOffset,
|
||||
_8YOffset, _9YOffset, _10YOffset, _11YOffset,
|
||||
_12YOffset, _13YOffset, _14YOffset, _15YOffset;
|
||||
} v;
|
||||
} u;
|
||||
switch (sample_count) {
|
||||
case 1: INTEL_SAMPLE_POS_1X(u.v._); break;
|
||||
case 2: INTEL_SAMPLE_POS_2X(u.v._); break;
|
||||
case 4: INTEL_SAMPLE_POS_4X(u.v._); break;
|
||||
case 8: INTEL_SAMPLE_POS_8X(u.v._); break;
|
||||
case 16: INTEL_SAMPLE_POS_16X(u.v._); break;
|
||||
default: unreachable("invalid sample count");
|
||||
}
|
||||
|
||||
out_value[0] = u.a.x[sample_index];
|
||||
out_value[1] = u.a.y[sample_index];
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy a context, freeing any associated memory.
|
||||
*/
|
||||
static void
|
||||
crocus_destroy_context(struct pipe_context *ctx)
|
||||
{
|
||||
struct crocus_context *ice = (struct crocus_context *)ctx;
|
||||
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
|
||||
if (ctx->stream_uploader)
|
||||
u_upload_destroy(ctx->stream_uploader);
|
||||
|
||||
if (ice->blitter)
|
||||
util_blitter_destroy(ice->blitter);
|
||||
screen->vtbl.destroy_state(ice);
|
||||
crocus_destroy_program_cache(ice);
|
||||
u_upload_destroy(ice->query_buffer_uploader);
|
||||
|
||||
crocus_bo_unreference(ice->workaround_bo);
|
||||
|
||||
slab_destroy_child(&ice->transfer_pool);
|
||||
|
||||
crocus_batch_free(&ice->batches[CROCUS_BATCH_RENDER]);
|
||||
if (ice->batches[CROCUS_BATCH_COMPUTE].ice)
|
||||
crocus_batch_free(&ice->batches[CROCUS_BATCH_COMPUTE]);
|
||||
|
||||
ralloc_free(ice);
|
||||
}
|
||||
|
||||
#define genX_call(devinfo, func, ...) \
|
||||
switch ((devinfo)->verx10) { \
|
||||
case 75: \
|
||||
gfx75_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 70: \
|
||||
gfx7_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 60: \
|
||||
gfx6_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 50: \
|
||||
gfx5_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 45: \
|
||||
gfx45_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 40: \
|
||||
gfx4_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
default: \
|
||||
unreachable("Unknown hardware generation"); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a context.
|
||||
*
|
||||
* This is where each context begins.
|
||||
*/
|
||||
struct pipe_context *
|
||||
crocus_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen*)pscreen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
struct crocus_context *ice = rzalloc(NULL, struct crocus_context);
|
||||
|
||||
if (!ice)
|
||||
return NULL;
|
||||
|
||||
struct pipe_context *ctx = &ice->ctx;
|
||||
|
||||
ctx->screen = pscreen;
|
||||
ctx->priv = priv;
|
||||
|
||||
ctx->stream_uploader = u_upload_create_default(ctx);
|
||||
if (!ctx->stream_uploader) {
|
||||
free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
ctx->const_uploader = ctx->stream_uploader;
|
||||
|
||||
ctx->destroy = crocus_destroy_context;
|
||||
ctx->set_debug_callback = crocus_set_debug_callback;
|
||||
ctx->set_device_reset_callback = crocus_set_device_reset_callback;
|
||||
ctx->get_device_reset_status = crocus_get_device_reset_status;
|
||||
ctx->get_sample_position = crocus_get_sample_position;
|
||||
|
||||
ice->shaders.urb_size = devinfo->urb.size;
|
||||
|
||||
crocus_init_context_fence_functions(ctx);
|
||||
crocus_init_blit_functions(ctx);
|
||||
crocus_init_clear_functions(ctx);
|
||||
crocus_init_program_functions(ctx);
|
||||
crocus_init_resource_functions(ctx);
|
||||
crocus_init_flush_functions(ctx);
|
||||
|
||||
crocus_init_program_cache(ice);
|
||||
|
||||
slab_create_child(&ice->transfer_pool, &screen->transfer_pool);
|
||||
|
||||
ice->query_buffer_uploader =
|
||||
u_upload_create(ctx, 4096, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING,
|
||||
0);
|
||||
|
||||
ice->workaround_bo =
|
||||
crocus_bo_alloc(screen->bufmgr, "workaround", 4096);
|
||||
if (!ice->workaround_bo)
|
||||
return NULL;
|
||||
|
||||
if (!crocus_init_identifier_bo(ice))
|
||||
return NULL;
|
||||
|
||||
genX_call(devinfo, init_state, ice);
|
||||
genX_call(devinfo, init_blorp, ice);
|
||||
genX_call(devinfo, init_query, ice);
|
||||
|
||||
ice->blitter = util_blitter_create(&ice->ctx);
|
||||
if (ice->blitter == NULL)
|
||||
return NULL;
|
||||
int priority = 0;
|
||||
if (flags & PIPE_CONTEXT_HIGH_PRIORITY)
|
||||
priority = INTEL_CONTEXT_HIGH_PRIORITY;
|
||||
if (flags & PIPE_CONTEXT_LOW_PRIORITY)
|
||||
priority = INTEL_CONTEXT_LOW_PRIORITY;
|
||||
|
||||
ice->batch_count = devinfo->ver >= 7 ? CROCUS_BATCH_COUNT : 1;
|
||||
for (int i = 0; i < ice->batch_count; i++) {
|
||||
crocus_init_batch(ice, (enum crocus_batch_name) i,
|
||||
priority);
|
||||
}
|
||||
|
||||
ice->urb.size = devinfo->urb.size;
|
||||
screen->vtbl.init_render_context(&ice->batches[CROCUS_BATCH_RENDER]);
|
||||
if (ice->batch_count > 1)
|
||||
screen->vtbl.init_compute_context(&ice->batches[CROCUS_BATCH_COMPUTE]);
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
bool
|
||||
crocus_sw_check_cond_render(struct crocus_context *ice)
|
||||
{
|
||||
struct crocus_query *q = ice->condition.query;
|
||||
union pipe_query_result result;
|
||||
|
||||
bool wait = ice->condition.mode == PIPE_RENDER_COND_WAIT ||
|
||||
ice->condition.mode == PIPE_RENDER_COND_BY_REGION_WAIT;
|
||||
if (!q)
|
||||
return true;
|
||||
|
||||
bool ret = ice->ctx.get_query_result(&ice->ctx, (void *)q, wait, &result);
|
||||
if (!ret)
|
||||
return true;
|
||||
|
||||
return ice->condition.condition ? result.u64 == 0 : result.u64 != 0;
|
||||
}
|
|
@ -0,0 +1,955 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef CROCUS_CONTEXT_H
|
||||
#define CROCUS_CONTEXT_H
|
||||
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "intel/blorp/blorp.h"
|
||||
#include "intel/dev/intel_debug.h"
|
||||
#include "intel/compiler/brw_compiler.h"
|
||||
#include "crocus_batch.h"
|
||||
#include "crocus_fence.h"
|
||||
#include "crocus_resource.h"
|
||||
#include "crocus_screen.h"
|
||||
#include "util/u_blitter.h"
|
||||
|
||||
struct crocus_bo;
|
||||
struct crocus_context;
|
||||
struct blorp_batch;
|
||||
struct blorp_params;
|
||||
|
||||
#define CROCUS_MAX_TEXTURE_BUFFER_SIZE (1 << 27)
|
||||
#define CROCUS_MAX_TEXTURE_SAMPLERS 32
|
||||
/* CROCUS_MAX_ABOS and CROCUS_MAX_SSBOS must be the same. */
|
||||
#define CROCUS_MAX_ABOS 16
|
||||
#define CROCUS_MAX_SSBOS 16
|
||||
#define CROCUS_MAX_VIEWPORTS 16
|
||||
#define CROCUS_MAX_CLIP_PLANES 8
|
||||
|
||||
enum crocus_param_domain {
|
||||
BRW_PARAM_DOMAIN_BUILTIN = 0,
|
||||
BRW_PARAM_DOMAIN_IMAGE,
|
||||
};
|
||||
|
||||
enum {
|
||||
DRI_CONF_BO_REUSE_DISABLED,
|
||||
DRI_CONF_BO_REUSE_ALL
|
||||
};
|
||||
|
||||
#define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val))
|
||||
#define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24)
|
||||
#define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff)
|
||||
#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
|
||||
#define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8)
|
||||
#define BRW_PARAM_IMAGE_OFFSET(value)(BRW_PARAM_VALUE(value) & 0xf)
|
||||
|
||||
/**
|
||||
* Dirty flags. When state changes, we flag some combination of these
|
||||
* to indicate that particular GPU commands need to be re-emitted.
|
||||
*
|
||||
* Each bit typically corresponds to a single 3DSTATE_* command packet, but
|
||||
* in rare cases they map to a group of related packets that need to be
|
||||
* emitted together.
|
||||
*
|
||||
* See crocus_upload_render_state().
|
||||
*/
|
||||
#define CROCUS_DIRTY_COLOR_CALC_STATE (1ull << 0)
|
||||
#define CROCUS_DIRTY_POLYGON_STIPPLE (1ull << 1)
|
||||
#define CROCUS_DIRTY_CC_VIEWPORT (1ull << 2)
|
||||
#define CROCUS_DIRTY_SF_CL_VIEWPORT (1ull << 3)
|
||||
#define CROCUS_DIRTY_RASTER (1ull << 4)
|
||||
#define CROCUS_DIRTY_CLIP (1ull << 5)
|
||||
#define CROCUS_DIRTY_LINE_STIPPLE (1ull << 6)
|
||||
#define CROCUS_DIRTY_VERTEX_ELEMENTS (1ull << 7)
|
||||
#define CROCUS_DIRTY_VERTEX_BUFFERS (1ull << 8)
|
||||
#define CROCUS_DIRTY_DRAWING_RECTANGLE (1ull << 9)
|
||||
#define CROCUS_DIRTY_GEN6_URB (1ull << 10)
|
||||
#define CROCUS_DIRTY_DEPTH_BUFFER (1ull << 11)
|
||||
#define CROCUS_DIRTY_WM (1ull << 12)
|
||||
#define CROCUS_DIRTY_SO_DECL_LIST (1ull << 13)
|
||||
#define CROCUS_DIRTY_STREAMOUT (1ull << 14)
|
||||
#define CROCUS_DIRTY_GEN4_CONSTANT_COLOR (1ull << 15)
|
||||
#define CROCUS_DIRTY_GEN4_CURBE (1ull << 16)
|
||||
#define CROCUS_DIRTY_GEN4_URB_FENCE (1ull << 17)
|
||||
#define CROCUS_DIRTY_GEN5_PIPELINED_POINTERS (1ull << 18)
|
||||
#define CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS (1ull << 19)
|
||||
#define CROCUS_DIRTY_GEN6_BLEND_STATE (1ull << 20)
|
||||
#define CROCUS_DIRTY_GEN6_SCISSOR_RECT (1ull << 21)
|
||||
#define CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL (1ull << 22)
|
||||
#define CROCUS_DIRTY_GEN6_MULTISAMPLE (1ull << 23)
|
||||
#define CROCUS_DIRTY_GEN6_SAMPLE_MASK (1ull << 24)
|
||||
#define CROCUS_DIRTY_GEN7_SBE (1ull << 25)
|
||||
#define CROCUS_DIRTY_GEN7_L3_CONFIG (1ull << 26)
|
||||
#define CROCUS_DIRTY_GEN7_SO_BUFFERS (1ull << 27)
|
||||
#define CROCUS_DIRTY_GEN75_VF (1ull << 28)
|
||||
#define CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES (1ull << 29)
|
||||
#define CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES (1ull << 30)
|
||||
#define CROCUS_DIRTY_VF_STATISTICS (1ull << 31)
|
||||
#define CROCUS_DIRTY_GEN4_CLIP_PROG (1ull << 32)
|
||||
#define CROCUS_DIRTY_GEN4_SF_PROG (1ull << 33)
|
||||
#define CROCUS_DIRTY_GEN4_FF_GS_PROG (1ull << 34)
|
||||
#define CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS (1ull << 35)
|
||||
#define CROCUS_DIRTY_GEN6_SVBI (1ull << 36)
|
||||
|
||||
#define CROCUS_ALL_DIRTY_FOR_COMPUTE (CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES)
|
||||
|
||||
#define CROCUS_ALL_DIRTY_FOR_RENDER (~CROCUS_ALL_DIRTY_FOR_COMPUTE)
|
||||
|
||||
/**
|
||||
* Per-stage dirty flags. When state changes, we flag some combination of
|
||||
* these to indicate that particular GPU commands need to be re-emitted.
|
||||
* Unlike the IRIS_DIRTY_* flags these are shader stage-specific and can be
|
||||
* indexed by shifting the mask by the shader stage index.
|
||||
*
|
||||
* See crocus_upload_render_state().
|
||||
*/
|
||||
#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS (1ull << 0)
|
||||
#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS (1ull << 1)
|
||||
#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES (1ull << 2)
|
||||
#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS (1ull << 3)
|
||||
#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_PS (1ull << 4)
|
||||
#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS (1ull << 5)
|
||||
#define CROCUS_STAGE_DIRTY_UNCOMPILED_VS (1ull << 6)
|
||||
#define CROCUS_STAGE_DIRTY_UNCOMPILED_TCS (1ull << 7)
|
||||
#define CROCUS_STAGE_DIRTY_UNCOMPILED_TES (1ull << 8)
|
||||
#define CROCUS_STAGE_DIRTY_UNCOMPILED_GS (1ull << 9)
|
||||
#define CROCUS_STAGE_DIRTY_UNCOMPILED_FS (1ull << 10)
|
||||
#define CROCUS_STAGE_DIRTY_UNCOMPILED_CS (1ull << 11)
|
||||
#define CROCUS_STAGE_DIRTY_VS (1ull << 12)
|
||||
#define CROCUS_STAGE_DIRTY_TCS (1ull << 13)
|
||||
#define CROCUS_STAGE_DIRTY_TES (1ull << 14)
|
||||
#define CROCUS_STAGE_DIRTY_GS (1ull << 15)
|
||||
#define CROCUS_STAGE_DIRTY_FS (1ull << 16)
|
||||
#define CROCUS_STAGE_DIRTY_CS (1ull << 17)
|
||||
#define CROCUS_SHIFT_FOR_STAGE_DIRTY_CONSTANTS 18
|
||||
#define CROCUS_STAGE_DIRTY_CONSTANTS_VS (1ull << 18)
|
||||
#define CROCUS_STAGE_DIRTY_CONSTANTS_TCS (1ull << 19)
|
||||
#define CROCUS_STAGE_DIRTY_CONSTANTS_TES (1ull << 20)
|
||||
#define CROCUS_STAGE_DIRTY_CONSTANTS_GS (1ull << 21)
|
||||
#define CROCUS_STAGE_DIRTY_CONSTANTS_FS (1ull << 22)
|
||||
#define CROCUS_STAGE_DIRTY_CONSTANTS_CS (1ull << 23)
|
||||
#define CROCUS_STAGE_DIRTY_BINDINGS_VS (1ull << 24)
|
||||
#define CROCUS_STAGE_DIRTY_BINDINGS_TCS (1ull << 25)
|
||||
#define CROCUS_STAGE_DIRTY_BINDINGS_TES (1ull << 26)
|
||||
#define CROCUS_STAGE_DIRTY_BINDINGS_GS (1ull << 27)
|
||||
#define CROCUS_STAGE_DIRTY_BINDINGS_FS (1ull << 28)
|
||||
#define CROCUS_STAGE_DIRTY_BINDINGS_CS (1ull << 29)
|
||||
|
||||
#define CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE (CROCUS_STAGE_DIRTY_CS | \
|
||||
CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS | \
|
||||
CROCUS_STAGE_DIRTY_UNCOMPILED_CS | \
|
||||
CROCUS_STAGE_DIRTY_CONSTANTS_CS | \
|
||||
CROCUS_STAGE_DIRTY_BINDINGS_CS)
|
||||
|
||||
#define CROCUS_ALL_STAGE_DIRTY_FOR_RENDER (~CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE)
|
||||
|
||||
#define CROCUS_ALL_STAGE_DIRTY_BINDINGS (CROCUS_STAGE_DIRTY_BINDINGS_VS | \
|
||||
CROCUS_STAGE_DIRTY_BINDINGS_TCS | \
|
||||
CROCUS_STAGE_DIRTY_BINDINGS_TES | \
|
||||
CROCUS_STAGE_DIRTY_BINDINGS_GS | \
|
||||
CROCUS_STAGE_DIRTY_BINDINGS_FS | \
|
||||
CROCUS_STAGE_DIRTY_BINDINGS_CS)
|
||||
|
||||
#define CROCUS_RENDER_STAGE_DIRTY_CONSTANTS (CROCUS_STAGE_DIRTY_CONSTANTS_VS | \
|
||||
CROCUS_STAGE_DIRTY_CONSTANTS_TCS | \
|
||||
CROCUS_STAGE_DIRTY_CONSTANTS_TES | \
|
||||
CROCUS_STAGE_DIRTY_CONSTANTS_GS | \
|
||||
CROCUS_STAGE_DIRTY_CONSTANTS_FS)
|
||||
|
||||
/**
|
||||
* Non-orthogonal state (NOS) dependency flags.
|
||||
*
|
||||
* Shader programs may depend on non-orthogonal state. These flags are
|
||||
* used to indicate that a shader's key depends on the state provided by
|
||||
* a certain Gallium CSO. Changing any CSOs marked as a dependency will
|
||||
* cause the driver to re-compute the shader key, possibly triggering a
|
||||
* shader recompile.
|
||||
*/
|
||||
enum crocus_nos_dep {
|
||||
CROCUS_NOS_FRAMEBUFFER,
|
||||
CROCUS_NOS_DEPTH_STENCIL_ALPHA,
|
||||
CROCUS_NOS_RASTERIZER,
|
||||
CROCUS_NOS_BLEND,
|
||||
CROCUS_NOS_LAST_VUE_MAP,
|
||||
CROCUS_NOS_TEXTURES,
|
||||
CROCUS_NOS_VERTEX_ELEMENTS,
|
||||
CROCUS_NOS_COUNT,
|
||||
};
|
||||
|
||||
struct crocus_depth_stencil_alpha_state;
|
||||
|
||||
/**
|
||||
* Cache IDs for the in-memory program cache (ice->shaders.cache).
|
||||
*/
|
||||
enum crocus_program_cache_id {
|
||||
CROCUS_CACHE_VS = MESA_SHADER_VERTEX,
|
||||
CROCUS_CACHE_TCS = MESA_SHADER_TESS_CTRL,
|
||||
CROCUS_CACHE_TES = MESA_SHADER_TESS_EVAL,
|
||||
CROCUS_CACHE_GS = MESA_SHADER_GEOMETRY,
|
||||
CROCUS_CACHE_FS = MESA_SHADER_FRAGMENT,
|
||||
CROCUS_CACHE_CS = MESA_SHADER_COMPUTE,
|
||||
CROCUS_CACHE_BLORP,
|
||||
CROCUS_CACHE_SF,
|
||||
CROCUS_CACHE_CLIP,
|
||||
CROCUS_CACHE_FF_GS,
|
||||
};
|
||||
|
||||
/** @{
|
||||
*
|
||||
* Defines for PIPE_CONTROL operations, which trigger cache flushes,
|
||||
* synchronization, pipelined memory writes, and so on.
|
||||
*
|
||||
* The bits here are not the actual hardware values. The actual fields
|
||||
* move between various generations, so we just have flags for each
|
||||
* potential operation, and use genxml to encode the actual packet.
|
||||
*/
|
||||
enum pipe_control_flags
|
||||
{
|
||||
PIPE_CONTROL_FLUSH_LLC = (1 << 1),
|
||||
PIPE_CONTROL_LRI_POST_SYNC_OP = (1 << 2),
|
||||
PIPE_CONTROL_STORE_DATA_INDEX = (1 << 3),
|
||||
PIPE_CONTROL_CS_STALL = (1 << 4),
|
||||
PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET = (1 << 5),
|
||||
PIPE_CONTROL_SYNC_GFDT = (1 << 6),
|
||||
PIPE_CONTROL_TLB_INVALIDATE = (1 << 7),
|
||||
PIPE_CONTROL_MEDIA_STATE_CLEAR = (1 << 8),
|
||||
PIPE_CONTROL_WRITE_IMMEDIATE = (1 << 9),
|
||||
PIPE_CONTROL_WRITE_DEPTH_COUNT = (1 << 10),
|
||||
PIPE_CONTROL_WRITE_TIMESTAMP = (1 << 11),
|
||||
PIPE_CONTROL_DEPTH_STALL = (1 << 12),
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH = (1 << 13),
|
||||
PIPE_CONTROL_INSTRUCTION_INVALIDATE = (1 << 14),
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE = (1 << 15),
|
||||
PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
|
||||
PIPE_CONTROL_NOTIFY_ENABLE = (1 << 17),
|
||||
PIPE_CONTROL_FLUSH_ENABLE = (1 << 18),
|
||||
PIPE_CONTROL_DATA_CACHE_FLUSH = (1 << 19),
|
||||
PIPE_CONTROL_VF_CACHE_INVALIDATE = (1 << 20),
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE = (1 << 21),
|
||||
PIPE_CONTROL_STATE_CACHE_INVALIDATE = (1 << 22),
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD = (1 << 23),
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH = (1 << 24),
|
||||
PIPE_CONTROL_TILE_CACHE_FLUSH = (1 << 25),
|
||||
};
|
||||
|
||||
#define PIPE_CONTROL_CACHE_FLUSH_BITS \
|
||||
(PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
|
||||
PIPE_CONTROL_DATA_CACHE_FLUSH | \
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH)
|
||||
|
||||
#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
|
||||
(PIPE_CONTROL_STATE_CACHE_INVALIDATE | \
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
|
||||
PIPE_CONTROL_VF_CACHE_INVALIDATE | \
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
|
||||
PIPE_CONTROL_INSTRUCTION_INVALIDATE)
|
||||
|
||||
enum crocus_predicate_state {
|
||||
/* The first two states are used if we can determine whether to draw
|
||||
* without having to look at the values in the query object buffer. This
|
||||
* will happen if there is no conditional render in progress, if the query
|
||||
* object is already completed or if something else has already added
|
||||
* samples to the preliminary result.
|
||||
*/
|
||||
CROCUS_PREDICATE_STATE_RENDER,
|
||||
CROCUS_PREDICATE_STATE_DONT_RENDER,
|
||||
|
||||
/* In this case whether to draw or not depends on the result of an
|
||||
* MI_PREDICATE command so the predicate enable bit needs to be checked.
|
||||
*/
|
||||
CROCUS_PREDICATE_STATE_USE_BIT,
|
||||
/* In this case, either MI_PREDICATE doesn't exist or we lack the
|
||||
* necessary kernel features to use it. Stall for the query result.
|
||||
*/
|
||||
CROCUS_PREDICATE_STATE_STALL_FOR_QUERY,
|
||||
};
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* An uncompiled, API-facing shader. This is the Gallium CSO for shaders.
|
||||
* It primarily contains the NIR for the shader.
|
||||
*
|
||||
* Each API-facing shader can be compiled into multiple shader variants,
|
||||
* based on non-orthogonal state dependencies, recorded in the shader key.
|
||||
*
|
||||
* See crocus_compiled_shader, which represents a compiled shader variant.
|
||||
*/
|
||||
struct crocus_uncompiled_shader {
|
||||
struct nir_shader *nir;
|
||||
|
||||
struct pipe_stream_output_info stream_output;
|
||||
|
||||
/* A SHA1 of the serialized NIR for the disk cache. */
|
||||
unsigned char nir_sha1[20];
|
||||
|
||||
unsigned program_id;
|
||||
|
||||
/** Bitfield of (1 << CROCUS_NOS_*) flags. */
|
||||
unsigned nos;
|
||||
|
||||
/** Have any shader variants been compiled yet? */
|
||||
bool compiled_once;
|
||||
|
||||
/** Should we use ALT mode for math? Useful for ARB programs. */
|
||||
bool use_alt_mode;
|
||||
|
||||
bool needs_edge_flag;
|
||||
|
||||
/** Constant data scraped from the shader by nir_opt_large_constants */
|
||||
struct pipe_resource *const_data;
|
||||
|
||||
/** Surface state for const_data */
|
||||
struct crocus_state_ref const_data_state;
|
||||
};
|
||||
|
||||
enum crocus_surface_group {
|
||||
CROCUS_SURFACE_GROUP_RENDER_TARGET,
|
||||
CROCUS_SURFACE_GROUP_RENDER_TARGET_READ,
|
||||
CROCUS_SURFACE_GROUP_SOL,
|
||||
CROCUS_SURFACE_GROUP_CS_WORK_GROUPS,
|
||||
CROCUS_SURFACE_GROUP_TEXTURE,
|
||||
CROCUS_SURFACE_GROUP_TEXTURE_GATHER,
|
||||
CROCUS_SURFACE_GROUP_IMAGE,
|
||||
CROCUS_SURFACE_GROUP_UBO,
|
||||
CROCUS_SURFACE_GROUP_SSBO,
|
||||
|
||||
CROCUS_SURFACE_GROUP_COUNT,
|
||||
};
|
||||
|
||||
enum {
|
||||
/* Invalid value for a binding table index. */
|
||||
CROCUS_SURFACE_NOT_USED = 0xa0a0a0a0,
|
||||
};
|
||||
|
||||
struct crocus_binding_table {
|
||||
uint32_t size_bytes;
|
||||
|
||||
/** Number of surfaces in each group, before compacting. */
|
||||
uint32_t sizes[CROCUS_SURFACE_GROUP_COUNT];
|
||||
|
||||
/** Initial offset of each group. */
|
||||
uint32_t offsets[CROCUS_SURFACE_GROUP_COUNT];
|
||||
|
||||
/** Mask of surfaces used in each group. */
|
||||
uint64_t used_mask[CROCUS_SURFACE_GROUP_COUNT];
|
||||
};
|
||||
|
||||
/**
|
||||
* A compiled shader variant, containing a pointer to the GPU assembly,
|
||||
* as well as program data and other packets needed by state upload.
|
||||
*
|
||||
* There can be several crocus_compiled_shader variants per API-level shader
|
||||
* (crocus_uncompiled_shader), due to state-based recompiles (brw_*_prog_key).
|
||||
*/
|
||||
struct crocus_compiled_shader {
|
||||
/** Reference to the uploaded assembly. */
|
||||
uint32_t offset;
|
||||
|
||||
/* asm size in map */
|
||||
uint32_t map_size;
|
||||
|
||||
/** The program data (owned by the program cache hash table) */
|
||||
struct brw_stage_prog_data *prog_data;
|
||||
uint32_t prog_data_size;
|
||||
|
||||
/** A list of system values to be uploaded as uniforms. */
|
||||
enum brw_param_builtin *system_values;
|
||||
unsigned num_system_values;
|
||||
|
||||
/** Number of constbufs expected by the shader. */
|
||||
unsigned num_cbufs;
|
||||
|
||||
/**
|
||||
* Derived 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets
|
||||
* (the VUE-based information for transform feedback outputs).
|
||||
*/
|
||||
uint32_t *streamout;
|
||||
|
||||
struct crocus_binding_table bt;
|
||||
|
||||
uint32_t bind_bo_offset;
|
||||
uint32_t surf_offset[128];//TODO
|
||||
};
|
||||
|
||||
/**
|
||||
* API context state that is replicated per shader stage.
|
||||
*/
|
||||
struct crocus_shader_state {
|
||||
/** Uniform Buffers */
|
||||
struct pipe_constant_buffer constbufs[PIPE_MAX_CONSTANT_BUFFERS];
|
||||
|
||||
bool sysvals_need_upload;
|
||||
|
||||
/** Shader Storage Buffers */
|
||||
struct pipe_shader_buffer ssbo[PIPE_MAX_SHADER_BUFFERS];
|
||||
|
||||
/** Shader Storage Images (image load store) */
|
||||
struct crocus_image_view image[PIPE_MAX_SHADER_IMAGES];
|
||||
|
||||
struct crocus_sampler_state *samplers[CROCUS_MAX_TEXTURE_SAMPLERS];
|
||||
struct crocus_sampler_view *textures[CROCUS_MAX_TEXTURE_SAMPLERS];
|
||||
|
||||
/** Bitfield of which constant buffers are bound (non-null). */
|
||||
uint32_t bound_cbufs;
|
||||
|
||||
/** Bitfield of which image views are bound (non-null). */
|
||||
uint32_t bound_image_views;
|
||||
|
||||
/** Bitfield of which sampler views are bound (non-null). */
|
||||
uint32_t bound_sampler_views;
|
||||
|
||||
/** Bitfield of which shader storage buffers are bound (non-null). */
|
||||
uint32_t bound_ssbos;
|
||||
|
||||
/** Bitfield of which shader storage buffers are writable. */
|
||||
uint32_t writable_ssbos;
|
||||
|
||||
uint32_t sampler_offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* The API context (derived from pipe_context).
|
||||
*
|
||||
* Most driver state is tracked here.
|
||||
*/
|
||||
struct crocus_context {
|
||||
struct pipe_context ctx;
|
||||
|
||||
/** A debug callback for KHR_debug output. */
|
||||
struct pipe_debug_callback dbg;
|
||||
|
||||
/** A device reset status callback for notifying that the GPU is hosed. */
|
||||
struct pipe_device_reset_callback reset;
|
||||
|
||||
/** Slab allocator for crocus_transfer_map objects. */
|
||||
struct slab_child_pool transfer_pool;
|
||||
|
||||
struct blorp_context blorp;
|
||||
|
||||
int batch_count;
|
||||
struct crocus_batch batches[CROCUS_BATCH_COUNT];
|
||||
|
||||
struct u_upload_mgr *query_buffer_uploader;
|
||||
|
||||
struct blitter_context *blitter;
|
||||
|
||||
struct {
|
||||
struct {
|
||||
/**
|
||||
* Either the value of BaseVertex for indexed draw calls or the value
|
||||
* of the argument <first> for non-indexed draw calls.
|
||||
*/
|
||||
int firstvertex;
|
||||
int baseinstance;
|
||||
} params;
|
||||
|
||||
/**
|
||||
* Are the above values the ones stored in the draw_params buffer?
|
||||
* If so, we can compare them against new values to see if anything
|
||||
* changed. If not, we need to assume they changed.
|
||||
*/
|
||||
bool params_valid;
|
||||
|
||||
/**
|
||||
* Resource and offset that stores draw_parameters from the indirect
|
||||
* buffer or to the buffer that stures the previous values for non
|
||||
* indirect draws.
|
||||
*/
|
||||
struct crocus_state_ref draw_params;
|
||||
|
||||
struct {
|
||||
/**
|
||||
* The value of DrawID. This always comes in from it's own vertex
|
||||
* buffer since it's not part of the indirect draw parameters.
|
||||
*/
|
||||
int drawid;
|
||||
|
||||
/**
|
||||
* Stores if an indexed or non-indexed draw (~0/0). Useful to
|
||||
* calculate BaseVertex as an AND of firstvertex and is_indexed_draw.
|
||||
*/
|
||||
int is_indexed_draw;
|
||||
} derived_params;
|
||||
|
||||
/**
|
||||
* Resource and offset used for GL_ARB_shader_draw_parameters which
|
||||
* contains parameters that are not present in the indirect buffer as
|
||||
* drawid and is_indexed_draw. They will go in their own vertex element.
|
||||
*/
|
||||
struct crocus_state_ref derived_draw_params;
|
||||
} draw;
|
||||
|
||||
struct {
|
||||
struct crocus_uncompiled_shader *uncompiled[MESA_SHADER_STAGES];
|
||||
struct crocus_compiled_shader *prog[MESA_SHADER_STAGES];
|
||||
struct brw_vue_map *last_vue_map;
|
||||
|
||||
struct crocus_bo *cache_bo;
|
||||
uint32_t cache_next_offset;
|
||||
void *cache_bo_map;
|
||||
struct hash_table *cache;
|
||||
|
||||
unsigned urb_size;
|
||||
|
||||
/* gen 4/5 clip/sf progs */
|
||||
struct crocus_compiled_shader *clip_prog;
|
||||
struct crocus_compiled_shader *sf_prog;
|
||||
/* gen4/5 prims, gen6 streamout */
|
||||
struct crocus_compiled_shader *ff_gs_prog;
|
||||
uint32_t clip_offset;
|
||||
uint32_t sf_offset;
|
||||
uint32_t wm_offset;
|
||||
uint32_t vs_offset;
|
||||
uint32_t gs_offset;
|
||||
uint32_t cc_offset;
|
||||
|
||||
/** Is a GS or TES outputting points or lines? */
|
||||
bool output_topology_is_points_or_lines;
|
||||
|
||||
/* Track last VS URB entry size */
|
||||
unsigned last_vs_entry_size;
|
||||
|
||||
/**
|
||||
* Scratch buffers for various sizes and stages.
|
||||
*
|
||||
* Indexed by the "Per-Thread Scratch Space" field's 4-bit encoding,
|
||||
* and shader stage.
|
||||
*/
|
||||
struct crocus_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES];
|
||||
} shaders;
|
||||
|
||||
struct {
|
||||
struct crocus_query *query;
|
||||
bool condition;
|
||||
enum pipe_render_cond_flag mode;
|
||||
} condition;
|
||||
|
||||
struct intel_perf_context *perf_ctx;
|
||||
|
||||
struct {
|
||||
uint64_t dirty;
|
||||
uint64_t stage_dirty;
|
||||
uint64_t stage_dirty_for_nos[CROCUS_NOS_COUNT];
|
||||
|
||||
unsigned num_viewports;
|
||||
unsigned sample_mask;
|
||||
struct crocus_blend_state *cso_blend;
|
||||
struct crocus_rasterizer_state *cso_rast;
|
||||
struct crocus_depth_stencil_alpha_state *cso_zsa;
|
||||
struct crocus_vertex_element_state *cso_vertex_elements;
|
||||
struct pipe_blend_color blend_color;
|
||||
struct pipe_poly_stipple poly_stipple;
|
||||
struct pipe_viewport_state viewports[CROCUS_MAX_VIEWPORTS];
|
||||
struct pipe_scissor_state scissors[CROCUS_MAX_VIEWPORTS];
|
||||
struct pipe_stencil_ref stencil_ref;
|
||||
struct pipe_framebuffer_state framebuffer;
|
||||
struct pipe_clip_state clip_planes;
|
||||
|
||||
float default_outer_level[4];
|
||||
float default_inner_level[2];
|
||||
|
||||
/** Bitfield of which vertex buffers are bound (non-null). */
|
||||
uint32_t bound_vertex_buffers;
|
||||
struct pipe_vertex_buffer vertex_buffers[16];
|
||||
uint32_t vb_end[16];
|
||||
|
||||
bool primitive_restart;
|
||||
unsigned cut_index;
|
||||
enum pipe_prim_type prim_mode:8;
|
||||
bool prim_is_points_or_lines;
|
||||
uint8_t vertices_per_patch;
|
||||
|
||||
bool window_space_position;
|
||||
|
||||
/** The last compute group size */
|
||||
uint32_t last_block[3];
|
||||
|
||||
/** The last compute grid size */
|
||||
uint32_t last_grid[3];
|
||||
/** Reference to the BO containing the compute grid size */
|
||||
struct crocus_state_ref grid_size;
|
||||
|
||||
/**
|
||||
* Array of aux usages for drawing, altered to account for any
|
||||
* self-dependencies from resources bound for sampling and rendering.
|
||||
*/
|
||||
enum isl_aux_usage draw_aux_usage[BRW_MAX_DRAW_BUFFERS];
|
||||
|
||||
/** Aux usage of the fb's depth buffer (which may or may not exist). */
|
||||
enum isl_aux_usage hiz_usage;
|
||||
|
||||
/** Bitfield of whether color blending is enabled for RT[i] */
|
||||
uint8_t blend_enables;
|
||||
|
||||
/** Are depth writes enabled? (Depth buffer may or may not exist.) */
|
||||
bool depth_writes_enabled;
|
||||
|
||||
/** Are stencil writes enabled? (Stencil buffer may or may not exist.) */
|
||||
bool stencil_writes_enabled;
|
||||
|
||||
/** GenX-specific current state */
|
||||
struct crocus_genx_state *genx;
|
||||
|
||||
struct crocus_shader_state shaders[MESA_SHADER_STAGES];
|
||||
|
||||
/** Do vertex shader uses shader draw parameters ? */
|
||||
bool vs_uses_draw_params;
|
||||
bool vs_uses_derived_draw_params;
|
||||
bool vs_needs_sgvs_element;
|
||||
bool vs_uses_vertexid;
|
||||
bool vs_uses_instanceid;
|
||||
|
||||
/** Do vertex shader uses edge flag ? */
|
||||
bool vs_needs_edge_flag;
|
||||
|
||||
struct pipe_stream_output_target *so_target[PIPE_MAX_SO_BUFFERS];
|
||||
bool streamout_active;
|
||||
int so_targets;
|
||||
|
||||
bool statistics_counters_enabled;
|
||||
|
||||
/** Current conditional rendering mode */
|
||||
enum crocus_predicate_state predicate;
|
||||
bool predicate_supported;
|
||||
|
||||
/**
|
||||
* Query BO with a MI_PREDICATE_RESULT snapshot calculated on the
|
||||
* render context that needs to be uploaded to the compute context.
|
||||
*/
|
||||
struct crocus_bo *compute_predicate;
|
||||
|
||||
/** Is a PIPE_QUERY_PRIMITIVES_GENERATED query active? */
|
||||
bool prims_generated_query_active;
|
||||
|
||||
/** 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets */
|
||||
uint32_t *streamout;
|
||||
|
||||
/**
|
||||
* Resources containing streamed state which our render context
|
||||
* currently points to. Used to re-add these to the validation
|
||||
* list when we start a new batch and haven't resubmitted commands.
|
||||
*/
|
||||
struct {
|
||||
struct pipe_resource *res;
|
||||
uint32_t offset;
|
||||
uint32_t size;
|
||||
uint32_t index_size;
|
||||
bool prim_restart;
|
||||
} index_buffer;
|
||||
|
||||
uint32_t sf_vp_address;
|
||||
uint32_t clip_vp_address;
|
||||
uint32_t cc_vp_address;
|
||||
|
||||
uint32_t stats_wm;
|
||||
float global_depth_offset_clamp;
|
||||
|
||||
uint32_t last_xfb_verts_per_prim;
|
||||
uint64_t svbi;
|
||||
} state;
|
||||
|
||||
/* BRW_NEW_URB_ALLOCATIONS:
|
||||
*/
|
||||
struct {
|
||||
uint32_t vsize; /* vertex size plus header in urb registers */
|
||||
uint32_t gsize; /* GS output size in urb registers */
|
||||
uint32_t hsize; /* Tessellation control output size in urb registers */
|
||||
uint32_t dsize; /* Tessellation evaluation output size in urb registers */
|
||||
uint32_t csize; /* constant buffer size in urb registers */
|
||||
uint32_t sfsize; /* setup data size in urb registers */
|
||||
|
||||
bool constrained;
|
||||
|
||||
uint32_t nr_vs_entries;
|
||||
uint32_t nr_hs_entries;
|
||||
uint32_t nr_ds_entries;
|
||||
uint32_t nr_gs_entries;
|
||||
uint32_t nr_clip_entries;
|
||||
uint32_t nr_sf_entries;
|
||||
uint32_t nr_cs_entries;
|
||||
|
||||
uint32_t vs_start;
|
||||
uint32_t hs_start;
|
||||
uint32_t ds_start;
|
||||
uint32_t gs_start;
|
||||
uint32_t clip_start;
|
||||
uint32_t sf_start;
|
||||
uint32_t cs_start;
|
||||
/**
|
||||
* URB size in the current configuration. The units this is expressed
|
||||
* in are somewhat inconsistent, see intel_device_info::urb::size.
|
||||
*
|
||||
* FINISHME: Represent the URB size consistently in KB on all platforms.
|
||||
*/
|
||||
uint32_t size;
|
||||
|
||||
/* True if the most recently sent _3DSTATE_URB message allocated
|
||||
* URB space for the GS.
|
||||
*/
|
||||
bool gs_present;
|
||||
|
||||
/* True if the most recently sent _3DSTATE_URB message allocated
|
||||
* URB space for the HS and DS.
|
||||
*/
|
||||
bool tess_present;
|
||||
} urb;
|
||||
|
||||
/* GEN4/5 curbe */
|
||||
struct {
|
||||
unsigned wm_start;
|
||||
unsigned wm_size;
|
||||
unsigned clip_start;
|
||||
unsigned clip_size;
|
||||
unsigned vs_start;
|
||||
unsigned vs_size;
|
||||
unsigned total_size;
|
||||
|
||||
struct crocus_resource *curbe_res;
|
||||
unsigned curbe_offset;
|
||||
} curbe;
|
||||
|
||||
/**
|
||||
* A buffer containing a marker + description of the driver. This buffer is
|
||||
* added to all execbufs syscalls so that we can identify the driver that
|
||||
* generated a hang by looking at the content of the buffer in the error
|
||||
* state. It is also used for hardware workarounds that require scratch
|
||||
* writes or reads from some unimportant memory. To avoid overriding the
|
||||
* debug data, use the workaround_address field for workarounds.
|
||||
*/
|
||||
struct crocus_bo *workaround_bo;
|
||||
unsigned workaround_offset;
|
||||
};
|
||||
|
||||
#define perf_debug(dbg, ...) do { \
|
||||
if (INTEL_DEBUG & DEBUG_PERF) \
|
||||
dbg_printf(__VA_ARGS__); \
|
||||
if (unlikely(dbg)) \
|
||||
pipe_debug_message(dbg, PERF_INFO, __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
|
||||
struct pipe_context *
|
||||
crocus_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
|
||||
|
||||
void crocus_lost_context_state(struct crocus_batch *batch);
|
||||
|
||||
void crocus_init_blit_functions(struct pipe_context *ctx);
|
||||
void crocus_init_clear_functions(struct pipe_context *ctx);
|
||||
void crocus_init_program_functions(struct pipe_context *ctx);
|
||||
void crocus_init_resource_functions(struct pipe_context *ctx);
|
||||
bool crocus_update_compiled_shaders(struct crocus_context *ice);
|
||||
void crocus_update_compiled_compute_shader(struct crocus_context *ice);
|
||||
void crocus_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
|
||||
unsigned threads, uint32_t *dst);
|
||||
|
||||
|
||||
/* crocus_blit.c */
|
||||
enum crocus_blitter_op
|
||||
{
|
||||
CROCUS_SAVE_TEXTURES = 1,
|
||||
CROCUS_SAVE_FRAMEBUFFER = 2,
|
||||
CROCUS_SAVE_FRAGMENT_STATE = 4,
|
||||
CROCUS_DISABLE_RENDER_COND = 8,
|
||||
};
|
||||
void crocus_blitter_begin(struct crocus_context *ice, enum crocus_blitter_op op, bool render_cond);
|
||||
|
||||
void crocus_blorp_surf_for_resource(struct crocus_vtable *vtbl,
|
||||
struct isl_device *isl_dev,
|
||||
struct blorp_surf *surf,
|
||||
struct pipe_resource *p_res,
|
||||
enum isl_aux_usage aux_usage,
|
||||
unsigned level,
|
||||
bool is_render_target);
|
||||
void crocus_copy_region(struct blorp_context *blorp,
|
||||
struct crocus_batch *batch,
|
||||
struct pipe_resource *dst,
|
||||
unsigned dst_level,
|
||||
unsigned dstx, unsigned dsty, unsigned dstz,
|
||||
struct pipe_resource *src,
|
||||
unsigned src_level,
|
||||
const struct pipe_box *src_box);
|
||||
|
||||
/* crocus_draw.c */
|
||||
void crocus_draw_vbo(struct pipe_context *ctx,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws);
|
||||
void crocus_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
|
||||
|
||||
/* crocus_pipe_control.c */
|
||||
|
||||
void crocus_emit_pipe_control_flush(struct crocus_batch *batch,
|
||||
const char *reason, uint32_t flags);
|
||||
void crocus_emit_pipe_control_write(struct crocus_batch *batch,
|
||||
const char *reason, uint32_t flags,
|
||||
struct crocus_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
void crocus_emit_mi_flush(struct crocus_batch *batch);
|
||||
void crocus_emit_depth_stall_flushes(struct crocus_batch *batch);
|
||||
void crocus_emit_post_sync_nonzero_flush(struct crocus_batch *batch);
|
||||
void crocus_emit_end_of_pipe_sync(struct crocus_batch *batch,
|
||||
const char *reason, uint32_t flags);
|
||||
void crocus_flush_all_caches(struct crocus_batch *batch);
|
||||
|
||||
#define crocus_handle_always_flush_cache(batch) \
|
||||
if (unlikely(batch->screen->driconf.always_flush_cache)) \
|
||||
crocus_flush_all_caches(batch);
|
||||
|
||||
void crocus_init_flush_functions(struct pipe_context *ctx);
|
||||
|
||||
/* crocus_program.c */
|
||||
const struct shader_info *crocus_get_shader_info(const struct crocus_context *ice,
|
||||
gl_shader_stage stage);
|
||||
struct crocus_bo *crocus_get_scratch_space(struct crocus_context *ice,
|
||||
unsigned per_thread_scratch,
|
||||
gl_shader_stage stage);
|
||||
uint32_t crocus_group_index_to_bti(const struct crocus_binding_table *bt,
|
||||
enum crocus_surface_group group,
|
||||
uint32_t index);
|
||||
uint32_t crocus_bti_to_group_index(const struct crocus_binding_table *bt,
|
||||
enum crocus_surface_group group,
|
||||
uint32_t bti);
|
||||
|
||||
/* crocus_disk_cache.c */
|
||||
|
||||
void crocus_disk_cache_store(struct disk_cache *cache,
|
||||
const struct crocus_uncompiled_shader *ish,
|
||||
const struct crocus_compiled_shader *shader,
|
||||
void *map,
|
||||
const void *prog_key,
|
||||
uint32_t prog_key_size);
|
||||
struct crocus_compiled_shader *
|
||||
crocus_disk_cache_retrieve(struct crocus_context *ice,
|
||||
const struct crocus_uncompiled_shader *ish,
|
||||
const void *prog_key,
|
||||
uint32_t prog_key_size);
|
||||
|
||||
/* crocus_program_cache.c */
|
||||
|
||||
void crocus_init_program_cache(struct crocus_context *ice);
|
||||
void crocus_destroy_program_cache(struct crocus_context *ice);
|
||||
void crocus_print_program_cache(struct crocus_context *ice);
|
||||
struct crocus_compiled_shader *crocus_find_cached_shader(struct crocus_context *ice,
|
||||
enum crocus_program_cache_id,
|
||||
uint32_t key_size,
|
||||
const void *key);
|
||||
struct crocus_compiled_shader *crocus_upload_shader(struct crocus_context *ice,
|
||||
enum crocus_program_cache_id,
|
||||
uint32_t key_size,
|
||||
const void *key,
|
||||
const void *assembly,
|
||||
uint32_t asm_size,
|
||||
struct brw_stage_prog_data *,
|
||||
uint32_t prog_data_size,
|
||||
uint32_t *streamout,
|
||||
enum brw_param_builtin *sysv,
|
||||
unsigned num_system_values,
|
||||
unsigned num_cbufs,
|
||||
const struct crocus_binding_table *bt);
|
||||
const void *crocus_find_previous_compile(const struct crocus_context *ice,
|
||||
enum crocus_program_cache_id cache_id,
|
||||
unsigned program_string_id);
|
||||
bool crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch,
|
||||
const void *key,
|
||||
uint32_t key_size,
|
||||
uint32_t *kernel_out,
|
||||
void *prog_data_out);
|
||||
bool crocus_blorp_upload_shader(struct blorp_batch *blorp_batch,
|
||||
uint32_t stage,
|
||||
const void *key, uint32_t key_size,
|
||||
const void *kernel, uint32_t kernel_size,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_data_size,
|
||||
uint32_t *kernel_out,
|
||||
void *prog_data_out);
|
||||
|
||||
/* crocus_resolve.c */
|
||||
|
||||
void crocus_predraw_resolve_inputs(struct crocus_context *ice,
|
||||
struct crocus_batch *batch,
|
||||
bool *draw_aux_buffer_disabled,
|
||||
gl_shader_stage stage,
|
||||
bool consider_framebuffer);
|
||||
void crocus_predraw_resolve_framebuffer(struct crocus_context *ice,
|
||||
struct crocus_batch *batch,
|
||||
bool *draw_aux_buffer_disabled);
|
||||
void crocus_postdraw_update_resolve_tracking(struct crocus_context *ice,
|
||||
struct crocus_batch *batch);
|
||||
void crocus_cache_sets_clear(struct crocus_batch *batch);
|
||||
void crocus_flush_depth_and_render_caches(struct crocus_batch *batch);
|
||||
void crocus_cache_flush_for_read(struct crocus_batch *batch, struct crocus_bo *bo);
|
||||
void crocus_cache_flush_for_render(struct crocus_batch *batch,
|
||||
struct crocus_bo *bo,
|
||||
enum isl_format format,
|
||||
enum isl_aux_usage aux_usage);
|
||||
void crocus_render_cache_add_bo(struct crocus_batch *batch,
|
||||
struct crocus_bo *bo,
|
||||
enum isl_format format,
|
||||
enum isl_aux_usage aux_usage);
|
||||
void crocus_cache_flush_for_depth(struct crocus_batch *batch, struct crocus_bo *bo);
|
||||
void crocus_depth_cache_add_bo(struct crocus_batch *batch, struct crocus_bo *bo);
|
||||
int crocus_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
|
||||
struct pipe_driver_query_info *info);
|
||||
int crocus_get_driver_query_group_info(struct pipe_screen *pscreen,
|
||||
unsigned index,
|
||||
struct pipe_driver_query_group_info *info);
|
||||
|
||||
struct pipe_rasterizer_state *crocus_get_rast_state(struct crocus_context *ctx);
|
||||
|
||||
bool crocus_sw_check_cond_render(struct crocus_context *ice);
|
||||
static inline bool crocus_check_conditional_render(struct crocus_context *ice)
|
||||
{
|
||||
if (ice->state.predicate == CROCUS_PREDICATE_STATE_STALL_FOR_QUERY)
|
||||
return crocus_sw_check_cond_render(ice);
|
||||
return ice->state.predicate != CROCUS_PREDICATE_STATE_DONT_RENDER;
|
||||
}
|
||||
|
||||
#ifdef genX
|
||||
# include "crocus_genx_protos.h"
|
||||
#else
|
||||
# define genX(x) gfx4_##x
|
||||
# include "crocus_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gfx45_##x
|
||||
# include "crocus_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gfx5_##x
|
||||
# include "crocus_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gfx6_##x
|
||||
# include "crocus_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gfx7_##x
|
||||
# include "crocus_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gfx75_##x
|
||||
# include "crocus_genx_protos.h"
|
||||
# undef genX
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef CROCUS_DEFINES_H
|
||||
#define CROCUS_DEFINES_H
|
||||
|
||||
/**
|
||||
* @file crocus_defines.h
|
||||
*
|
||||
* Random hardware #defines that we're not using GENXML for.
|
||||
*/
|
||||
|
||||
#define MI_PREDICATE (0xC << 23)
|
||||
# define MI_PREDICATE_LOADOP_KEEP (0 << 6)
|
||||
# define MI_PREDICATE_LOADOP_LOAD (2 << 6)
|
||||
# define MI_PREDICATE_LOADOP_LOADINV (3 << 6)
|
||||
# define MI_PREDICATE_COMBINEOP_SET (0 << 3)
|
||||
# define MI_PREDICATE_COMBINEOP_AND (1 << 3)
|
||||
# define MI_PREDICATE_COMBINEOP_OR (2 << 3)
|
||||
# define MI_PREDICATE_COMBINEOP_XOR (3 << 3)
|
||||
# define MI_PREDICATE_COMPAREOP_TRUE (0 << 0)
|
||||
# define MI_PREDICATE_COMPAREOP_FALSE (1 << 0)
|
||||
# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL (2 << 0)
|
||||
# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL (3 << 0)
|
||||
|
||||
/* Predicate registers */
|
||||
#define MI_PREDICATE_SRC0 0x2400
|
||||
#define MI_PREDICATE_SRC1 0x2408
|
||||
#define MI_PREDICATE_DATA 0x2410
|
||||
#define MI_PREDICATE_RESULT 0x2418
|
||||
#define MI_PREDICATE_RESULT_1 0x241C
|
||||
#define MI_PREDICATE_RESULT_2 0x2214
|
||||
|
||||
#define CS_GPR(n) (0x2600 + (n) * 8)
|
||||
|
||||
/* The number of bits in our TIMESTAMP queries. */
|
||||
#define TIMESTAMP_BITS 36
|
||||
|
||||
#endif
|
|
@ -0,0 +1,263 @@
|
|||
/*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file crocus_disk_cache.c
|
||||
*
|
||||
* Functions for interacting with the on-disk shader cache.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "util/blob.h"
|
||||
#include "util/build_id.h"
|
||||
#include "util/disk_cache.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
|
||||
#include "crocus_context.h"
|
||||
|
||||
static bool debug = false;
|
||||
|
||||
/**
|
||||
* Compute a disk cache key for the given uncompiled shader and NOS key.
|
||||
*/
|
||||
static void
|
||||
crocus_disk_cache_compute_key(struct disk_cache *cache,
|
||||
const struct crocus_uncompiled_shader *ish,
|
||||
const void *orig_prog_key,
|
||||
uint32_t prog_key_size,
|
||||
cache_key cache_key)
|
||||
{
|
||||
/* Create a copy of the program key with program_string_id zeroed out.
|
||||
* It's essentially random data which we don't want to include in our
|
||||
* hashing and comparisons. We'll set a proper value on a cache hit.
|
||||
*/
|
||||
union brw_any_prog_key prog_key;
|
||||
memcpy(&prog_key, orig_prog_key, prog_key_size);
|
||||
prog_key.base.program_string_id = 0;
|
||||
|
||||
uint8_t data[sizeof(prog_key) + sizeof(ish->nir_sha1)];
|
||||
uint32_t data_size = prog_key_size + sizeof(ish->nir_sha1);
|
||||
|
||||
memcpy(data, ish->nir_sha1, sizeof(ish->nir_sha1));
|
||||
memcpy(data + sizeof(ish->nir_sha1), &prog_key, prog_key_size);
|
||||
|
||||
disk_cache_compute_key(cache, data, data_size, cache_key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Store the given compiled shader in the disk cache.
|
||||
*
|
||||
* This should only be called on newly compiled shaders. No checking is
|
||||
* done to prevent repeated stores of the same shader.
|
||||
*/
|
||||
void
|
||||
crocus_disk_cache_store(struct disk_cache *cache,
|
||||
const struct crocus_uncompiled_shader *ish,
|
||||
const struct crocus_compiled_shader *shader,
|
||||
void *map,
|
||||
const void *prog_key,
|
||||
uint32_t prog_key_size)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
if (!cache)
|
||||
return;
|
||||
|
||||
gl_shader_stage stage = ish->nir->info.stage;
|
||||
const struct brw_stage_prog_data *prog_data = shader->prog_data;
|
||||
|
||||
cache_key cache_key;
|
||||
crocus_disk_cache_compute_key(cache, ish, prog_key, prog_key_size, cache_key);
|
||||
|
||||
if (debug) {
|
||||
char sha1[41];
|
||||
_mesa_sha1_format(sha1, cache_key);
|
||||
fprintf(stderr, "[mesa disk cache] storing %s\n", sha1);
|
||||
}
|
||||
|
||||
struct blob blob;
|
||||
blob_init(&blob);
|
||||
|
||||
/* We write the following data to the cache blob:
|
||||
*
|
||||
* 1. Prog data (must come first because it has the assembly size)
|
||||
* 2. Assembly code
|
||||
* 3. Number of entries in the system value array
|
||||
* 4. System value array
|
||||
* 5. Legacy param array (only used for compute workgroup ID)
|
||||
* 6. Binding table
|
||||
*/
|
||||
blob_write_bytes(&blob, shader->prog_data, brw_prog_data_size(stage));
|
||||
blob_write_bytes(&blob, map + shader->offset, shader->prog_data->program_size);
|
||||
blob_write_bytes(&blob, &shader->num_system_values, sizeof(unsigned));
|
||||
blob_write_bytes(&blob, shader->system_values,
|
||||
shader->num_system_values * sizeof(enum brw_param_builtin));
|
||||
blob_write_bytes(&blob, prog_data->param,
|
||||
prog_data->nr_params * sizeof(uint32_t));
|
||||
blob_write_bytes(&blob, &shader->bt, sizeof(shader->bt));
|
||||
|
||||
disk_cache_put(cache, cache_key, blob.data, blob.size, NULL);
|
||||
blob_finish(&blob);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a compiled shader in the disk cache. If found, upload it
|
||||
* to the in-memory program cache so we can use it.
|
||||
*/
|
||||
struct crocus_compiled_shader *
|
||||
crocus_disk_cache_retrieve(struct crocus_context *ice,
|
||||
const struct crocus_uncompiled_shader *ish,
|
||||
const void *prog_key,
|
||||
uint32_t key_size)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
struct crocus_screen *screen = (void *) ice->ctx.screen;
|
||||
struct disk_cache *cache = screen->disk_cache;
|
||||
gl_shader_stage stage = ish->nir->info.stage;
|
||||
|
||||
if (!cache)
|
||||
return NULL;
|
||||
|
||||
cache_key cache_key;
|
||||
crocus_disk_cache_compute_key(cache, ish, prog_key, key_size, cache_key);
|
||||
|
||||
if (debug) {
|
||||
char sha1[41];
|
||||
_mesa_sha1_format(sha1, cache_key);
|
||||
fprintf(stderr, "[mesa disk cache] retrieving %s: ", sha1);
|
||||
}
|
||||
|
||||
size_t size;
|
||||
void *buffer = disk_cache_get(screen->disk_cache, cache_key, &size);
|
||||
|
||||
if (debug)
|
||||
fprintf(stderr, "%s\n", buffer ? "found" : "missing");
|
||||
|
||||
if (!buffer)
|
||||
return NULL;
|
||||
|
||||
const uint32_t prog_data_size = brw_prog_data_size(stage);
|
||||
|
||||
struct brw_stage_prog_data *prog_data = ralloc_size(NULL, prog_data_size);
|
||||
const void *assembly;
|
||||
uint32_t num_system_values;
|
||||
uint32_t *system_values = NULL;
|
||||
uint32_t *so_decls = NULL;
|
||||
|
||||
struct blob_reader blob;
|
||||
blob_reader_init(&blob, buffer, size);
|
||||
blob_copy_bytes(&blob, prog_data, prog_data_size);
|
||||
assembly = blob_read_bytes(&blob, prog_data->program_size);
|
||||
num_system_values = blob_read_uint32(&blob);
|
||||
if (num_system_values) {
|
||||
system_values =
|
||||
ralloc_array(NULL, enum brw_param_builtin, num_system_values);
|
||||
blob_copy_bytes(&blob, system_values,
|
||||
num_system_values * sizeof(enum brw_param_builtin));
|
||||
}
|
||||
|
||||
prog_data->param = NULL;
|
||||
prog_data->pull_param = NULL;
|
||||
assert(prog_data->nr_pull_params == 0);
|
||||
|
||||
if (prog_data->nr_params) {
|
||||
prog_data->param = ralloc_array(NULL, uint32_t, prog_data->nr_params);
|
||||
blob_copy_bytes(&blob, prog_data->param,
|
||||
prog_data->nr_params * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
struct crocus_binding_table bt;
|
||||
blob_copy_bytes(&blob, &bt, sizeof(bt));
|
||||
|
||||
if ((stage == MESA_SHADER_VERTEX ||
|
||||
stage == MESA_SHADER_TESS_EVAL ||
|
||||
stage == MESA_SHADER_GEOMETRY) && screen->devinfo.ver > 6) {
|
||||
struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
|
||||
so_decls = screen->vtbl.create_so_decl_list(&ish->stream_output,
|
||||
&vue_prog_data->vue_map);
|
||||
}
|
||||
|
||||
/* System values and uniforms are stored in constant buffer 0, the
|
||||
* user-facing UBOs are indexed by one. So if any constant buffer is
|
||||
* needed, the constant buffer 0 will be needed, so account for it.
|
||||
*/
|
||||
unsigned num_cbufs = ish->nir->info.num_ubos;
|
||||
|
||||
if (num_cbufs || ish->nir->num_uniforms)
|
||||
num_cbufs++;
|
||||
|
||||
if (num_system_values)
|
||||
num_cbufs++;
|
||||
|
||||
/* Upload our newly read shader to the in-memory program cache and
|
||||
* return it to the caller.
|
||||
*/
|
||||
struct crocus_compiled_shader *shader =
|
||||
crocus_upload_shader(ice, stage, key_size, prog_key, assembly,
|
||||
prog_data->program_size,
|
||||
prog_data, prog_data_size, so_decls, system_values,
|
||||
num_system_values, num_cbufs, &bt);
|
||||
|
||||
free(buffer);
|
||||
|
||||
return shader;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the on-disk shader cache.
|
||||
*/
|
||||
void
|
||||
crocus_disk_cache_init(struct crocus_screen *screen)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
if (INTEL_DEBUG & DEBUG_DISK_CACHE_DISABLE_MASK)
|
||||
return;
|
||||
|
||||
/* array length = print length + nul char + 1 extra to verify it's unused */
|
||||
char renderer[13];
|
||||
UNUSED int len =
|
||||
snprintf(renderer, sizeof(renderer), "crocus_%04x", screen->pci_id);
|
||||
assert(len == sizeof(renderer) - 2);
|
||||
|
||||
const struct build_id_note *note =
|
||||
build_id_find_nhdr_for_addr(crocus_disk_cache_init);
|
||||
assert(note && build_id_length(note) == 20); /* sha1 */
|
||||
|
||||
const uint8_t *id_sha1 = build_id_data(note);
|
||||
assert(id_sha1);
|
||||
|
||||
char timestamp[41];
|
||||
_mesa_sha1_format(timestamp, id_sha1);
|
||||
|
||||
const uint64_t driver_flags =
|
||||
brw_get_compiler_config_value(screen->compiler);
|
||||
screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,511 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file crocus_draw.c
|
||||
*
|
||||
* The main driver hooks for drawing and launching compute shaders.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "util/u_draw.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_transfer.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "intel/compiler/brw_compiler.h"
|
||||
#include "intel/compiler/brw_eu_defines.h"
|
||||
#include "crocus_context.h"
|
||||
#include "crocus_defines.h"
|
||||
#include "util/u_prim_restart.h"
|
||||
#include "indices/u_primconvert.h"
|
||||
#include "util/u_prim.h"
|
||||
|
||||
static bool
|
||||
prim_is_points_or_lines(enum pipe_prim_type mode)
|
||||
{
|
||||
/* We don't need to worry about adjacency - it can only be used with
|
||||
* geometry shaders, and we don't care about this info when GS is on.
|
||||
*/
|
||||
return mode == PIPE_PRIM_POINTS ||
|
||||
mode == PIPE_PRIM_LINES ||
|
||||
mode == PIPE_PRIM_LINE_LOOP ||
|
||||
mode == PIPE_PRIM_LINE_STRIP;
|
||||
}
|
||||
|
||||
static bool
|
||||
can_cut_index_handle_restart_index(struct crocus_context *ice,
|
||||
const struct pipe_draw_info *draw)
|
||||
{
|
||||
switch (draw->index_size) {
|
||||
case 1:
|
||||
return draw->restart_index == 0xff;
|
||||
case 2:
|
||||
return draw->restart_index == 0xffff;
|
||||
case 4:
|
||||
return draw->restart_index == 0xffffffff;
|
||||
default:
|
||||
unreachable("illegal index size\n");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
can_cut_index_handle_prim(struct crocus_context *ice,
|
||||
const struct pipe_draw_info *draw)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen*)ice->ctx.screen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
/* Haswell can do it all. */
|
||||
if (devinfo->is_haswell)
|
||||
return true;
|
||||
|
||||
if (!can_cut_index_handle_restart_index(ice, draw))
|
||||
return false;
|
||||
|
||||
switch (draw->mode) {
|
||||
case PIPE_PRIM_POINTS:
|
||||
case PIPE_PRIM_LINES:
|
||||
case PIPE_PRIM_LINE_STRIP:
|
||||
case PIPE_PRIM_TRIANGLES:
|
||||
case PIPE_PRIM_TRIANGLE_STRIP:
|
||||
case PIPE_PRIM_LINES_ADJACENCY:
|
||||
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
|
||||
case PIPE_PRIM_TRIANGLES_ADJACENCY:
|
||||
case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record the current primitive mode and restart information, flagging
|
||||
* related packets as dirty if necessary.
|
||||
*
|
||||
* This must be called before updating compiled shaders, because the patch
|
||||
* information informs the TCS key.
|
||||
*/
|
||||
static void
|
||||
crocus_update_draw_info(struct crocus_context *ice,
|
||||
const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_start_count_bias *draw)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
|
||||
enum pipe_prim_type mode = info->mode;
|
||||
|
||||
if (screen->devinfo.ver < 6) {
|
||||
/* Slight optimization to avoid the GS program when not needed:
|
||||
*/
|
||||
struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice);
|
||||
if (mode == PIPE_PRIM_QUAD_STRIP && !rs_state->flatshade &&
|
||||
rs_state->fill_front == PIPE_POLYGON_MODE_FILL &&
|
||||
rs_state->fill_back == PIPE_POLYGON_MODE_FILL)
|
||||
mode = PIPE_PRIM_TRIANGLE_STRIP;
|
||||
if (mode == PIPE_PRIM_QUADS &&
|
||||
draw->count == 4 &&
|
||||
!rs_state->flatshade &&
|
||||
rs_state->fill_front == PIPE_POLYGON_MODE_FILL &&
|
||||
rs_state->fill_back == PIPE_POLYGON_MODE_FILL)
|
||||
mode = PIPE_PRIM_TRIANGLE_FAN;
|
||||
}
|
||||
|
||||
if (ice->state.prim_mode != mode) {
|
||||
ice->state.prim_mode = mode;
|
||||
|
||||
if (screen->devinfo.ver < 6)
|
||||
ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG;
|
||||
if (screen->devinfo.ver <= 6)
|
||||
ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;
|
||||
|
||||
if (screen->devinfo.ver >= 7)
|
||||
ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;
|
||||
|
||||
/* For XY Clip enables */
|
||||
bool points_or_lines = prim_is_points_or_lines(mode);
|
||||
if (points_or_lines != ice->state.prim_is_points_or_lines) {
|
||||
ice->state.prim_is_points_or_lines = points_or_lines;
|
||||
ice->state.dirty |= CROCUS_DIRTY_CLIP;
|
||||
}
|
||||
}
|
||||
|
||||
if (info->mode == PIPE_PRIM_PATCHES &&
|
||||
ice->state.vertices_per_patch != info->vertices_per_patch) {
|
||||
ice->state.vertices_per_patch = info->vertices_per_patch;
|
||||
|
||||
/* This is needed for key->input_vertices */
|
||||
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_TCS;
|
||||
|
||||
/* Flag constants dirty for gl_PatchVerticesIn if needed. */
|
||||
const struct shader_info *tcs_info =
|
||||
crocus_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
|
||||
if (tcs_info &&
|
||||
BITSET_TEST(tcs_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
|
||||
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TCS;
|
||||
ice->state.shaders[MESA_SHADER_TESS_CTRL].sysvals_need_upload = true;
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned cut_index = info->primitive_restart ? info->restart_index :
|
||||
ice->state.cut_index;
|
||||
if (ice->state.primitive_restart != info->primitive_restart ||
|
||||
ice->state.cut_index != cut_index) {
|
||||
if (screen->devinfo.is_haswell)
|
||||
ice->state.dirty |= CROCUS_DIRTY_GEN75_VF;
|
||||
ice->state.primitive_restart = info->primitive_restart;
|
||||
ice->state.cut_index = info->restart_index;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update shader draw parameters, flagging VF packets as dirty if necessary.
|
||||
*/
|
||||
static void
|
||||
crocus_update_draw_parameters(struct crocus_context *ice,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draw)
|
||||
{
|
||||
bool changed = false;
|
||||
|
||||
if (ice->state.vs_uses_draw_params) {
|
||||
struct crocus_state_ref *draw_params = &ice->draw.draw_params;
|
||||
|
||||
if (indirect && indirect->buffer) {
|
||||
pipe_resource_reference(&draw_params->res, indirect->buffer);
|
||||
draw_params->offset =
|
||||
indirect->offset + (info->index_size ? 12 : 8);
|
||||
|
||||
changed = true;
|
||||
ice->draw.params_valid = false;
|
||||
} else {
|
||||
int firstvertex = info->index_size ? draw->index_bias : draw->start;
|
||||
|
||||
if (!ice->draw.params_valid ||
|
||||
ice->draw.params.firstvertex != firstvertex ||
|
||||
ice->draw.params.baseinstance != info->start_instance) {
|
||||
|
||||
changed = true;
|
||||
ice->draw.params.firstvertex = firstvertex;
|
||||
ice->draw.params.baseinstance = info->start_instance;
|
||||
ice->draw.params_valid = true;
|
||||
|
||||
u_upload_data(ice->ctx.stream_uploader, 0,
|
||||
sizeof(ice->draw.params), 4, &ice->draw.params,
|
||||
&draw_params->offset, &draw_params->res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ice->state.vs_uses_derived_draw_params) {
|
||||
struct crocus_state_ref *derived_params = &ice->draw.derived_draw_params;
|
||||
int is_indexed_draw = info->index_size ? -1 : 0;
|
||||
|
||||
if (ice->draw.derived_params.drawid != drawid_offset ||
|
||||
ice->draw.derived_params.is_indexed_draw != is_indexed_draw) {
|
||||
|
||||
changed = true;
|
||||
ice->draw.derived_params.drawid = drawid_offset;
|
||||
ice->draw.derived_params.is_indexed_draw = is_indexed_draw;
|
||||
|
||||
u_upload_data(ice->ctx.stream_uploader, 0,
|
||||
sizeof(ice->draw.derived_params), 4,
|
||||
&ice->draw.derived_params, &derived_params->offset,
|
||||
&derived_params->res);
|
||||
}
|
||||
}
|
||||
|
||||
if (changed) {
|
||||
ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS |
|
||||
CROCUS_DIRTY_VERTEX_ELEMENTS;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_indirect_draw_vbo(struct crocus_context *ice,
|
||||
const struct pipe_draw_info *dinfo,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *dindirect,
|
||||
const struct pipe_draw_start_count_bias *draws)
|
||||
{
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
struct pipe_draw_info info = *dinfo;
|
||||
struct pipe_draw_indirect_info indirect = *dindirect;
|
||||
const struct intel_device_info *devinfo = &batch->screen->devinfo;
|
||||
|
||||
if (devinfo->is_haswell && indirect.indirect_draw_count &&
|
||||
ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
|
||||
/* Upload MI_PREDICATE_RESULT to GPR15.*/
|
||||
screen->vtbl.load_register_reg64(batch, CS_GPR(15), MI_PREDICATE_RESULT);
|
||||
}
|
||||
|
||||
uint64_t orig_dirty = ice->state.dirty;
|
||||
uint64_t orig_stage_dirty = ice->state.stage_dirty;
|
||||
|
||||
for (int i = 0; i < indirect.draw_count; i++) {
|
||||
crocus_batch_maybe_flush(batch, 1500);
|
||||
crocus_require_statebuffer_space(batch, 2400);
|
||||
|
||||
crocus_update_draw_parameters(ice, &info, drawid_offset + i, &indirect, draws);
|
||||
|
||||
screen->vtbl.upload_render_state(ice, batch, &info, drawid_offset + i, &indirect, draws);
|
||||
|
||||
ice->state.dirty &= ~CROCUS_ALL_DIRTY_FOR_RENDER;
|
||||
ice->state.stage_dirty &= ~CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;
|
||||
|
||||
indirect.offset += indirect.stride;
|
||||
}
|
||||
|
||||
if (devinfo->is_haswell && indirect.indirect_draw_count &&
|
||||
ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
|
||||
/* Restore MI_PREDICATE_RESULT. */
|
||||
screen->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(15));
|
||||
}
|
||||
|
||||
/* Put this back for post-draw resolves, we'll clear it again after. */
|
||||
ice->state.dirty = orig_dirty;
|
||||
ice->state.stage_dirty = orig_stage_dirty;
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_simple_draw_vbo(struct crocus_context *ice,
|
||||
const struct pipe_draw_info *draw,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *sc)
|
||||
{
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
|
||||
crocus_batch_maybe_flush(batch, 1500);
|
||||
crocus_require_statebuffer_space(batch, 2400);
|
||||
|
||||
crocus_update_draw_parameters(ice, draw, drawid_offset, indirect, sc);
|
||||
|
||||
screen->vtbl.upload_render_state(ice, batch, draw, drawid_offset, indirect, sc);
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_draw_vbo_get_vertex_count(struct pipe_context *ctx,
|
||||
const struct pipe_draw_info *info_in,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
|
||||
struct pipe_draw_info info = *info_in;
|
||||
struct pipe_draw_start_count_bias draw;
|
||||
|
||||
uint32_t val = screen->vtbl.get_so_offset(indirect->count_from_stream_output);
|
||||
|
||||
draw.start = 0;
|
||||
draw.count = val;
|
||||
ctx->draw_vbo(ctx, &info, drawid_offset, NULL, &draw, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* The pipe->draw_vbo() driver hook. Performs a draw on the GPU.
|
||||
*/
|
||||
void
|
||||
crocus_draw_vbo(struct pipe_context *ctx,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
unsigned num_draws)
|
||||
{
|
||||
if (num_draws > 1) {
|
||||
util_draw_multi(ctx, info, drawid_offset, indirect, draws, num_draws);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!indirect && (!draws[0].count || !info->instance_count))
|
||||
return;
|
||||
|
||||
struct crocus_context *ice = (struct crocus_context *) ctx;
|
||||
struct crocus_screen *screen = (struct crocus_screen*)ice->ctx.screen;
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
|
||||
if (!crocus_check_conditional_render(ice))
|
||||
return;
|
||||
|
||||
if (info->primitive_restart && !can_cut_index_handle_prim(ice, info)) {
|
||||
util_draw_vbo_without_prim_restart(ctx, info, drawid_offset,
|
||||
indirect, draws);
|
||||
return;
|
||||
}
|
||||
|
||||
if (indirect && indirect->count_from_stream_output &&
|
||||
!screen->devinfo.is_haswell) {
|
||||
crocus_draw_vbo_get_vertex_count(ctx, info, drawid_offset, indirect);
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* The hardware is capable of removing dangling vertices on its own; however,
|
||||
* prior to Gen6, we sometimes convert quads into trifans (and quad strips
|
||||
* into tristrips), since pre-Gen6 hardware requires a GS to render quads.
|
||||
* This function manually trims dangling vertices from a draw call involving
|
||||
* quads so that those dangling vertices won't get drawn when we convert to
|
||||
* trifans/tristrips.
|
||||
*/
|
||||
if (screen->devinfo.ver < 6) {
|
||||
if (info->mode == PIPE_PRIM_QUADS || info->mode == PIPE_PRIM_QUAD_STRIP) {
|
||||
bool trim = u_trim_pipe_prim(info->mode, (unsigned *)&draws[0].count);
|
||||
if (!trim)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* We can't safely re-emit 3DSTATE_SO_BUFFERS because it may zero the
|
||||
* write offsets, changing the behavior.
|
||||
*/
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
|
||||
ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_RENDER & ~CROCUS_DIRTY_GEN7_SO_BUFFERS;
|
||||
ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;
|
||||
}
|
||||
|
||||
/* Emit Sandybridge workaround flushes on every primitive, for safety. */
|
||||
if (screen->devinfo.ver == 6)
|
||||
crocus_emit_post_sync_nonzero_flush(batch);
|
||||
|
||||
crocus_update_draw_info(ice, info, draws);
|
||||
|
||||
if (!crocus_update_compiled_shaders(ice))
|
||||
return;
|
||||
|
||||
if (ice->state.dirty & CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES) {
|
||||
bool draw_aux_buffer_disabled[BRW_MAX_DRAW_BUFFERS] = { };
|
||||
for (gl_shader_stage stage = 0; stage < MESA_SHADER_COMPUTE; stage++) {
|
||||
if (ice->shaders.prog[stage])
|
||||
crocus_predraw_resolve_inputs(ice, batch, draw_aux_buffer_disabled,
|
||||
stage, true);
|
||||
}
|
||||
crocus_predraw_resolve_framebuffer(ice, batch, draw_aux_buffer_disabled);
|
||||
}
|
||||
|
||||
crocus_handle_always_flush_cache(batch);
|
||||
|
||||
if (indirect && indirect->buffer)
|
||||
crocus_indirect_draw_vbo(ice, info, drawid_offset, indirect, draws);
|
||||
else
|
||||
crocus_simple_draw_vbo(ice, info, drawid_offset, indirect, draws);
|
||||
|
||||
crocus_handle_always_flush_cache(batch);
|
||||
|
||||
crocus_postdraw_update_resolve_tracking(ice, batch);
|
||||
|
||||
ice->state.dirty &= ~CROCUS_ALL_DIRTY_FOR_RENDER;
|
||||
ice->state.stage_dirty &= ~CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_update_grid_size_resource(struct crocus_context *ice,
|
||||
const struct pipe_grid_info *grid)
|
||||
{
|
||||
struct crocus_state_ref *grid_ref = &ice->state.grid_size;
|
||||
const struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_COMPUTE];
|
||||
bool grid_needs_surface = shader->bt.used_mask[CROCUS_SURFACE_GROUP_CS_WORK_GROUPS];
|
||||
|
||||
if (grid->indirect) {
|
||||
pipe_resource_reference(&grid_ref->res, grid->indirect);
|
||||
grid_ref->offset = grid->indirect_offset;
|
||||
|
||||
/* Zero out the grid size so that the next non-indirect grid launch will
|
||||
* re-upload it properly.
|
||||
*/
|
||||
memset(ice->state.last_grid, 0, sizeof(ice->state.last_grid));
|
||||
} else if (memcmp(ice->state.last_grid, grid->grid, sizeof(grid->grid)) != 0) {
|
||||
memcpy(ice->state.last_grid, grid->grid, sizeof(grid->grid));
|
||||
u_upload_data(ice->ctx.const_uploader, 0, sizeof(grid->grid), 4,
|
||||
grid->grid, &grid_ref->offset, &grid_ref->res);
|
||||
}
|
||||
|
||||
/* Skip surface upload if we don't need it or we already have one */
|
||||
if (!grid_needs_surface)
|
||||
return;
|
||||
|
||||
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_CS;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
crocus_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *grid)
|
||||
{
|
||||
struct crocus_context *ice = (struct crocus_context *) ctx;
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_COMPUTE];
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
|
||||
if (!crocus_check_conditional_render(ice))
|
||||
return;
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
|
||||
ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_COMPUTE;
|
||||
ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE;
|
||||
}
|
||||
|
||||
/* We can't do resolves on the compute engine, so awkwardly, we have to
|
||||
* do them on the render batch...
|
||||
*/
|
||||
if (ice->state.dirty & CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES) {
|
||||
crocus_predraw_resolve_inputs(ice, &ice->batches[CROCUS_BATCH_RENDER], NULL,
|
||||
MESA_SHADER_COMPUTE, false);
|
||||
}
|
||||
|
||||
crocus_batch_maybe_flush(batch, 1500);
|
||||
crocus_require_statebuffer_space(batch, 2500);
|
||||
crocus_update_compiled_compute_shader(ice);
|
||||
|
||||
if (memcmp(ice->state.last_block, grid->block, sizeof(grid->block)) != 0) {
|
||||
memcpy(ice->state.last_block, grid->block, sizeof(grid->block));
|
||||
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_CS;
|
||||
ice->state.shaders[MESA_SHADER_COMPUTE].sysvals_need_upload = true;
|
||||
}
|
||||
|
||||
crocus_update_grid_size_resource(ice, grid);
|
||||
|
||||
if (ice->state.compute_predicate) {
|
||||
screen->vtbl.emit_compute_predicate(batch);
|
||||
ice->state.compute_predicate = NULL;
|
||||
}
|
||||
|
||||
crocus_handle_always_flush_cache(batch);
|
||||
|
||||
screen->vtbl.upload_compute_state(ice, batch, grid);
|
||||
|
||||
crocus_handle_always_flush_cache(batch);
|
||||
|
||||
ice->state.dirty &= ~CROCUS_ALL_DIRTY_FOR_COMPUTE;
|
||||
ice->state.stage_dirty &= ~CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE;
|
||||
|
||||
/* Note: since compute shaders can't access the framebuffer, there's
|
||||
* no need to call crocus_postdraw_update_resolve_tracking.
|
||||
*/
|
||||
}
|
|
@ -0,0 +1,571 @@
|
|||
/*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file crocus_fence.c
|
||||
*
|
||||
* Fences for driver and IPC serialisation, scheduling and synchronisation.
|
||||
*/
|
||||
|
||||
#include "util/u_inlines.h"
|
||||
#include "intel/common/intel_gem.h"
|
||||
|
||||
#include "crocus_batch.h"
|
||||
#include "crocus_bufmgr.h"
|
||||
#include "crocus_context.h"
|
||||
#include "crocus_fence.h"
|
||||
#include "crocus_screen.h"
|
||||
|
||||
static uint32_t
|
||||
gem_syncobj_create(int fd, uint32_t flags)
|
||||
{
|
||||
struct drm_syncobj_create args = {
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &args);
|
||||
|
||||
return args.handle;
|
||||
}
|
||||
|
||||
static void
|
||||
gem_syncobj_destroy(int fd, uint32_t handle)
|
||||
{
|
||||
struct drm_syncobj_destroy args = {
|
||||
.handle = handle,
|
||||
};
|
||||
|
||||
intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_DESTROY, &args);
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a new sync-point.
|
||||
*/
|
||||
struct crocus_syncobj *
|
||||
crocus_create_syncobj(struct crocus_screen *screen)
|
||||
{
|
||||
struct crocus_syncobj *syncobj = malloc(sizeof(*syncobj));
|
||||
|
||||
if (!syncobj)
|
||||
return NULL;
|
||||
|
||||
syncobj->handle = gem_syncobj_create(screen->fd, 0);
|
||||
assert(syncobj->handle);
|
||||
|
||||
pipe_reference_init(&syncobj->ref, 1);
|
||||
|
||||
return syncobj;
|
||||
}
|
||||
|
||||
void
|
||||
crocus_syncobj_destroy(struct crocus_screen *screen,
|
||||
struct crocus_syncobj *syncobj)
|
||||
{
|
||||
gem_syncobj_destroy(screen->fd, syncobj->handle);
|
||||
free(syncobj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a sync-point to the batch, with the given flags.
|
||||
*
|
||||
* \p flags One of I915_EXEC_FENCE_WAIT or I915_EXEC_FENCE_SIGNAL.
|
||||
*/
|
||||
void
|
||||
crocus_batch_add_syncobj(struct crocus_batch *batch,
|
||||
struct crocus_syncobj *syncobj, unsigned flags)
|
||||
{
|
||||
struct drm_i915_gem_exec_fence *fence =
|
||||
util_dynarray_grow(&batch->exec_fences, struct drm_i915_gem_exec_fence, 1);
|
||||
|
||||
*fence = (struct drm_i915_gem_exec_fence){
|
||||
.handle = syncobj->handle,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
struct crocus_syncobj **store =
|
||||
util_dynarray_grow(&batch->syncobjs, struct crocus_syncobj *, 1);
|
||||
|
||||
*store = NULL;
|
||||
crocus_syncobj_reference(batch->screen, store, syncobj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk through a batch's dependencies (any I915_EXEC_FENCE_WAIT syncobjs)
|
||||
* and unreference any which have already passed.
|
||||
*
|
||||
* Sometimes the compute batch is seldom used, and accumulates references
|
||||
* to stale render batches that are no longer of interest, so we can free
|
||||
* those up.
|
||||
*/
|
||||
static void
|
||||
clear_stale_syncobjs(struct crocus_batch *batch)
|
||||
{
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
|
||||
int n = util_dynarray_num_elements(&batch->syncobjs, struct crocus_syncobj *);
|
||||
|
||||
assert(n == util_dynarray_num_elements(&batch->exec_fences,
|
||||
struct drm_i915_gem_exec_fence));
|
||||
|
||||
/* Skip the first syncobj, as it's the signalling one. */
|
||||
for (int i = n - 1; i > 1; i--) {
|
||||
struct crocus_syncobj **syncobj =
|
||||
util_dynarray_element(&batch->syncobjs, struct crocus_syncobj *, i);
|
||||
struct drm_i915_gem_exec_fence *fence =
|
||||
util_dynarray_element(&batch->exec_fences,
|
||||
struct drm_i915_gem_exec_fence, i);
|
||||
assert(fence->flags & I915_EXEC_FENCE_WAIT);
|
||||
|
||||
if (crocus_wait_syncobj(&screen->base, *syncobj, 0))
|
||||
continue;
|
||||
|
||||
/* This sync object has already passed, there's no need to continue
|
||||
* marking it as a dependency; we can stop holding on to the reference.
|
||||
*/
|
||||
crocus_syncobj_reference(screen, syncobj, NULL);
|
||||
|
||||
/* Remove it from the lists; move the last element here. */
|
||||
struct crocus_syncobj **nth_syncobj =
|
||||
util_dynarray_pop_ptr(&batch->syncobjs, struct crocus_syncobj *);
|
||||
struct drm_i915_gem_exec_fence *nth_fence =
|
||||
util_dynarray_pop_ptr(&batch->exec_fences,
|
||||
struct drm_i915_gem_exec_fence);
|
||||
|
||||
if (syncobj != nth_syncobj) {
|
||||
*syncobj = *nth_syncobj;
|
||||
memcpy(fence, nth_fence, sizeof(*fence));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------- */
|
||||
|
||||
struct pipe_fence_handle {
|
||||
struct pipe_reference ref;
|
||||
|
||||
struct pipe_context *unflushed_ctx;
|
||||
|
||||
struct crocus_fine_fence *fine[CROCUS_BATCH_COUNT];
|
||||
};
|
||||
|
||||
static void
|
||||
crocus_fence_destroy(struct pipe_screen *p_screen,
|
||||
struct pipe_fence_handle *fence)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)p_screen;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++)
|
||||
crocus_fine_fence_reference(screen, &fence->fine[i], NULL);
|
||||
|
||||
free(fence);
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_fence_reference(struct pipe_screen *p_screen,
|
||||
struct pipe_fence_handle **dst,
|
||||
struct pipe_fence_handle *src)
|
||||
{
|
||||
if (pipe_reference(&(*dst)->ref, &src->ref))
|
||||
crocus_fence_destroy(p_screen, *dst);
|
||||
|
||||
*dst = src;
|
||||
}
|
||||
|
||||
bool
|
||||
crocus_wait_syncobj(struct pipe_screen *p_screen,
|
||||
struct crocus_syncobj *syncobj, int64_t timeout_nsec)
|
||||
{
|
||||
if (!syncobj)
|
||||
return false;
|
||||
|
||||
struct crocus_screen *screen = (struct crocus_screen *)p_screen;
|
||||
struct drm_syncobj_wait args = {
|
||||
.handles = (uintptr_t)&syncobj->handle,
|
||||
.count_handles = 1,
|
||||
.timeout_nsec = timeout_nsec,
|
||||
};
|
||||
return intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_fence_flush(struct pipe_context *ctx,
|
||||
struct pipe_fence_handle **out_fence, unsigned flags)
|
||||
{
|
||||
struct crocus_screen *screen = (void *)ctx->screen;
|
||||
struct crocus_context *ice = (struct crocus_context *)ctx;
|
||||
|
||||
const bool deferred = flags & PIPE_FLUSH_DEFERRED;
|
||||
|
||||
if (!deferred) {
|
||||
for (unsigned i = 0; i < ice->batch_count; i++)
|
||||
crocus_batch_flush(&ice->batches[i]);
|
||||
}
|
||||
|
||||
if (!out_fence)
|
||||
return;
|
||||
|
||||
struct pipe_fence_handle *fence = calloc(1, sizeof(*fence));
|
||||
if (!fence)
|
||||
return;
|
||||
|
||||
pipe_reference_init(&fence->ref, 1);
|
||||
|
||||
if (deferred)
|
||||
fence->unflushed_ctx = ctx;
|
||||
|
||||
for (unsigned b = 0; b < ice->batch_count; b++) {
|
||||
struct crocus_batch *batch = &ice->batches[b];
|
||||
|
||||
if (deferred && crocus_batch_bytes_used(batch) > 0) {
|
||||
struct crocus_fine_fence *fine =
|
||||
crocus_fine_fence_new(batch, CROCUS_FENCE_BOTTOM_OF_PIPE);
|
||||
crocus_fine_fence_reference(screen, &fence->fine[b], fine);
|
||||
crocus_fine_fence_reference(screen, &fine, NULL);
|
||||
} else {
|
||||
/* This batch has no commands queued up (perhaps we just flushed,
|
||||
* or all the commands are on the other batch). Wait for the last
|
||||
* syncobj on this engine - unless it's already finished by now.
|
||||
*/
|
||||
if (crocus_fine_fence_signaled(batch->last_fence))
|
||||
continue;
|
||||
|
||||
crocus_fine_fence_reference(screen, &fence->fine[b],
|
||||
batch->last_fence);
|
||||
}
|
||||
}
|
||||
|
||||
crocus_fence_reference(ctx->screen, out_fence, NULL);
|
||||
*out_fence = fence;
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_fence_await(struct pipe_context *ctx, struct pipe_fence_handle *fence)
|
||||
{
|
||||
struct crocus_context *ice = (struct crocus_context *)ctx;
|
||||
|
||||
/* Unflushed fences from the same context are no-ops. */
|
||||
if (ctx && ctx == fence->unflushed_ctx)
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
|
||||
struct crocus_fine_fence *fine = fence->fine[i];
|
||||
|
||||
if (crocus_fine_fence_signaled(fine))
|
||||
continue;
|
||||
|
||||
for (unsigned b = 0; b < ice->batch_count; b++) {
|
||||
struct crocus_batch *batch = &ice->batches[b];
|
||||
|
||||
/* We're going to make any future work in this batch wait for our
|
||||
* fence to have gone by. But any currently queued work doesn't
|
||||
* need to wait. Flush the batch now, so it can happen sooner.
|
||||
*/
|
||||
crocus_batch_flush(batch);
|
||||
|
||||
/* Before adding a new reference, clean out any stale ones. */
|
||||
clear_stale_syncobjs(batch);
|
||||
|
||||
crocus_batch_add_syncobj(batch, fine->syncobj, I915_EXEC_FENCE_WAIT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define NSEC_PER_SEC (1000 * USEC_PER_SEC)
|
||||
#define USEC_PER_SEC (1000 * MSEC_PER_SEC)
|
||||
#define MSEC_PER_SEC (1000)
|
||||
|
||||
static uint64_t
|
||||
gettime_ns(void)
|
||||
{
|
||||
struct timespec current;
|
||||
clock_gettime(CLOCK_MONOTONIC, ¤t);
|
||||
return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
rel2abs(uint64_t timeout)
|
||||
{
|
||||
if (timeout == 0)
|
||||
return 0;
|
||||
|
||||
uint64_t current_time = gettime_ns();
|
||||
uint64_t max_timeout = (uint64_t)INT64_MAX - current_time;
|
||||
|
||||
timeout = MIN2(max_timeout, timeout);
|
||||
|
||||
return current_time + timeout;
|
||||
}
|
||||
|
||||
static bool
|
||||
crocus_fence_finish(struct pipe_screen *p_screen, struct pipe_context *ctx,
|
||||
struct pipe_fence_handle *fence, uint64_t timeout)
|
||||
{
|
||||
struct crocus_context *ice = (struct crocus_context *)ctx;
|
||||
struct crocus_screen *screen = (struct crocus_screen *)p_screen;
|
||||
|
||||
/* If we created the fence with PIPE_FLUSH_DEFERRED, we may not have
|
||||
* flushed yet. Check if our syncobj is the current batch's signalling
|
||||
* syncobj - if so, we haven't flushed and need to now.
|
||||
*
|
||||
* The Gallium docs mention that a flush will occur if \p ctx matches
|
||||
* the context the fence was created with. It may be NULL, so we check
|
||||
* that it matches first.
|
||||
*/
|
||||
if (ctx && ctx == fence->unflushed_ctx) {
|
||||
for (unsigned i = 0; i < ice->batch_count; i++) {
|
||||
struct crocus_fine_fence *fine = fence->fine[i];
|
||||
|
||||
if (crocus_fine_fence_signaled(fine))
|
||||
continue;
|
||||
|
||||
if (fine->syncobj == crocus_batch_get_signal_syncobj(&ice->batches[i]))
|
||||
crocus_batch_flush(&ice->batches[i]);
|
||||
}
|
||||
|
||||
/* The fence is no longer deferred. */
|
||||
fence->unflushed_ctx = NULL;
|
||||
}
|
||||
|
||||
unsigned int handle_count = 0;
|
||||
uint32_t handles[ARRAY_SIZE(fence->fine)];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
|
||||
struct crocus_fine_fence *fine = fence->fine[i];
|
||||
|
||||
if (crocus_fine_fence_signaled(fine))
|
||||
continue;
|
||||
|
||||
handles[handle_count++] = fine->syncobj->handle;
|
||||
}
|
||||
|
||||
if (handle_count == 0)
|
||||
return true;
|
||||
|
||||
struct drm_syncobj_wait args = {
|
||||
.handles = (uintptr_t)handles,
|
||||
.count_handles = handle_count,
|
||||
.timeout_nsec = rel2abs(timeout),
|
||||
.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL
|
||||
};
|
||||
if (fence->unflushed_ctx) {
|
||||
/* This fence had a deferred flush from another context. We can't
|
||||
* safely flush it here, because the context might be bound to a
|
||||
* different thread, and poking at its internals wouldn't be safe.
|
||||
*
|
||||
* Instead, use the WAIT_FOR_SUBMIT flag to block and hope that
|
||||
* another thread submits the work.
|
||||
*/
|
||||
args.flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
|
||||
}
|
||||
return intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args) == 0;
|
||||
}
|
||||
|
||||
#ifndef SYNC_IOC_MAGIC
|
||||
/* duplicated from linux/sync_file.h to avoid build-time dependency
|
||||
* on new (v4.7) kernel headers. Once distro's are mostly using
|
||||
* something newer than v4.7 drop this and #include <linux/sync_file.h>
|
||||
* instead.
|
||||
*/
|
||||
struct sync_merge_data {
|
||||
char name[32];
|
||||
__s32 fd2;
|
||||
__s32 fence;
|
||||
__u32 flags;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
#define SYNC_IOC_MAGIC '>'
|
||||
#define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data)
|
||||
#endif
|
||||
|
||||
static int
|
||||
sync_merge_fd(int sync_fd, int new_fd)
|
||||
{
|
||||
if (sync_fd == -1)
|
||||
return new_fd;
|
||||
|
||||
if (new_fd == -1)
|
||||
return sync_fd;
|
||||
|
||||
struct sync_merge_data args = {
|
||||
.name = "crocus fence",
|
||||
.fd2 = new_fd,
|
||||
.fence = -1,
|
||||
};
|
||||
|
||||
intel_ioctl(sync_fd, SYNC_IOC_MERGE, &args);
|
||||
close(new_fd);
|
||||
close(sync_fd);
|
||||
|
||||
return args.fence;
|
||||
}
|
||||
|
||||
static int
|
||||
crocus_fence_get_fd(struct pipe_screen *p_screen,
|
||||
struct pipe_fence_handle *fence)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)p_screen;
|
||||
int fd = -1;
|
||||
|
||||
/* Deferred fences aren't supported. */
|
||||
if (fence->unflushed_ctx)
|
||||
return -1;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
|
||||
struct crocus_fine_fence *fine = fence->fine[i];
|
||||
|
||||
if (crocus_fine_fence_signaled(fine))
|
||||
continue;
|
||||
|
||||
struct drm_syncobj_handle args = {
|
||||
.handle = fine->syncobj->handle,
|
||||
.flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
|
||||
.fd = -1,
|
||||
};
|
||||
|
||||
intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
|
||||
fd = sync_merge_fd(fd, args.fd);
|
||||
}
|
||||
|
||||
if (fd == -1) {
|
||||
/* Our fence has no syncobj's recorded. This means that all of the
|
||||
* batches had already completed, their syncobj's had been signalled,
|
||||
* and so we didn't bother to record them. But we're being asked to
|
||||
* export such a fence. So export a dummy already-signalled syncobj.
|
||||
*/
|
||||
struct drm_syncobj_handle args = {
|
||||
.flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
|
||||
.fd = -1,
|
||||
};
|
||||
|
||||
args.handle = gem_syncobj_create(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED);
|
||||
intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
|
||||
gem_syncobj_destroy(screen->fd, args.handle);
|
||||
return args.fd;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_fence_create_fd(struct pipe_context *ctx, struct pipe_fence_handle **out,
|
||||
int fd, enum pipe_fd_type type)
|
||||
{
|
||||
assert(type == PIPE_FD_TYPE_NATIVE_SYNC || type == PIPE_FD_TYPE_SYNCOBJ);
|
||||
|
||||
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
|
||||
struct drm_syncobj_handle args = {
|
||||
.fd = fd,
|
||||
};
|
||||
|
||||
if (type == PIPE_FD_TYPE_NATIVE_SYNC) {
|
||||
args.flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE;
|
||||
args.handle = gem_syncobj_create(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED);
|
||||
}
|
||||
|
||||
if (intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args) == -1) {
|
||||
fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %s\n",
|
||||
strerror(errno));
|
||||
if (type == PIPE_FD_TYPE_NATIVE_SYNC)
|
||||
gem_syncobj_destroy(screen->fd, args.handle);
|
||||
*out = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
struct crocus_syncobj *syncobj = malloc(sizeof(*syncobj));
|
||||
if (!syncobj) {
|
||||
*out = NULL;
|
||||
return;
|
||||
}
|
||||
syncobj->handle = args.handle;
|
||||
pipe_reference_init(&syncobj->ref, 1);
|
||||
|
||||
struct crocus_fine_fence *fine = calloc(1, sizeof(*fine));
|
||||
if (!fine) {
|
||||
free(syncobj);
|
||||
*out = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
static const uint32_t zero = 0;
|
||||
|
||||
/* Fences work in terms of crocus_fine_fence, but we don't actually have a
|
||||
* seqno for an imported fence. So, create a fake one which always
|
||||
* returns as 'not signaled' so we fall back to using the sync object.
|
||||
*/
|
||||
fine->seqno = UINT32_MAX;
|
||||
fine->map = &zero;
|
||||
fine->syncobj = syncobj;
|
||||
fine->flags = CROCUS_FENCE_END;
|
||||
pipe_reference_init(&fine->reference, 1);
|
||||
|
||||
struct pipe_fence_handle *fence = calloc(1, sizeof(*fence));
|
||||
if (!fence) {
|
||||
free(fine);
|
||||
free(syncobj);
|
||||
*out = NULL;
|
||||
return;
|
||||
}
|
||||
pipe_reference_init(&fence->ref, 1);
|
||||
fence->fine[0] = fine;
|
||||
|
||||
*out = fence;
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_fence_signal(struct pipe_context *ctx, struct pipe_fence_handle *fence)
|
||||
{
|
||||
struct crocus_context *ice = (struct crocus_context *)ctx;
|
||||
|
||||
if (ctx == fence->unflushed_ctx)
|
||||
return;
|
||||
|
||||
for (unsigned b = 0; b < ice->batch_count; b++) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
|
||||
struct crocus_fine_fence *fine = fence->fine[i];
|
||||
|
||||
/* already signaled fence skipped */
|
||||
if (crocus_fine_fence_signaled(fine))
|
||||
continue;
|
||||
|
||||
ice->batches[b].contains_fence_signal = true;
|
||||
crocus_batch_add_syncobj(&ice->batches[b], fine->syncobj,
|
||||
I915_EXEC_FENCE_SIGNAL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
crocus_init_screen_fence_functions(struct pipe_screen *screen)
|
||||
{
|
||||
screen->fence_reference = crocus_fence_reference;
|
||||
screen->fence_finish = crocus_fence_finish;
|
||||
screen->fence_get_fd = crocus_fence_get_fd;
|
||||
}
|
||||
|
||||
void
|
||||
crocus_init_context_fence_functions(struct pipe_context *ctx)
|
||||
{
|
||||
ctx->flush = crocus_fence_flush;
|
||||
ctx->create_fence_fd = crocus_fence_create_fd;
|
||||
ctx->fence_server_sync = crocus_fence_await;
|
||||
ctx->fence_server_signal = crocus_fence_signal;
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CROCUS_FENCE_H
|
||||
#define CROCUS_FENCE_H
|
||||
|
||||
#include "util/u_inlines.h"
|
||||
|
||||
struct pipe_screen;
|
||||
struct crocus_screen;
|
||||
struct crocus_batch;
|
||||
|
||||
struct crocus_syncobj {
|
||||
struct pipe_reference ref;
|
||||
uint32_t handle;
|
||||
};
|
||||
|
||||
void crocus_init_context_fence_functions(struct pipe_context *ctx);
|
||||
void crocus_init_screen_fence_functions(struct pipe_screen *screen);
|
||||
|
||||
struct crocus_syncobj *crocus_create_syncobj(struct crocus_screen *screen);
|
||||
void crocus_syncobj_destroy(struct crocus_screen *, struct crocus_syncobj *);
|
||||
void crocus_batch_add_syncobj(struct crocus_batch *batch,
|
||||
struct crocus_syncobj *syncobj,
|
||||
unsigned flags);
|
||||
bool crocus_wait_syncobj(struct pipe_screen *screen,
|
||||
struct crocus_syncobj *syncobj,
|
||||
int64_t timeout_nsec);
|
||||
static inline void
|
||||
crocus_syncobj_reference(struct crocus_screen *screen,
|
||||
struct crocus_syncobj **dst,
|
||||
struct crocus_syncobj *src)
|
||||
{
|
||||
if (pipe_reference(&(*dst)->ref, &src->ref))
|
||||
crocus_syncobj_destroy(screen, *dst);
|
||||
|
||||
*dst = src;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,85 @@
|
|||
#include "crocus_context.h"
|
||||
#include "crocus_fine_fence.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
|
||||
static void
|
||||
crocus_fine_fence_reset(struct crocus_batch *batch)
|
||||
{
|
||||
u_upload_alloc(batch->fine_fences.uploader,
|
||||
0, sizeof(uint64_t), sizeof(uint64_t),
|
||||
&batch->fine_fences.ref.offset, &batch->fine_fences.ref.res,
|
||||
(void **)&batch->fine_fences.map);
|
||||
WRITE_ONCE(*batch->fine_fences.map, 0);
|
||||
batch->fine_fences.next++;
|
||||
}
|
||||
|
||||
void
|
||||
crocus_fine_fence_init(struct crocus_batch *batch)
|
||||
{
|
||||
batch->fine_fences.ref.res = NULL;
|
||||
batch->fine_fences.next = 0;
|
||||
if (batch_has_fine_fence(batch))
|
||||
crocus_fine_fence_reset(batch);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
crocus_fine_fence_next(struct crocus_batch *batch)
|
||||
{
|
||||
if (!batch_has_fine_fence(batch))
|
||||
return UINT32_MAX;
|
||||
|
||||
uint32_t seqno = batch->fine_fences.next++;
|
||||
|
||||
if (batch->fine_fences.next == 0)
|
||||
crocus_fine_fence_reset(batch);
|
||||
|
||||
return seqno;
|
||||
}
|
||||
|
||||
void
|
||||
crocus_fine_fence_destroy(struct crocus_screen *screen,
|
||||
struct crocus_fine_fence *fine)
|
||||
{
|
||||
crocus_syncobj_reference(screen, &fine->syncobj, NULL);
|
||||
pipe_resource_reference(&fine->ref.res, NULL);
|
||||
free(fine);
|
||||
}
|
||||
|
||||
struct crocus_fine_fence *
|
||||
crocus_fine_fence_new(struct crocus_batch *batch, unsigned flags)
|
||||
{
|
||||
struct crocus_fine_fence *fine = calloc(1, sizeof(*fine));
|
||||
if (!fine)
|
||||
return NULL;
|
||||
|
||||
pipe_reference_init(&fine->reference, 1);
|
||||
|
||||
fine->seqno = crocus_fine_fence_next(batch);
|
||||
|
||||
crocus_syncobj_reference(batch->screen, &fine->syncobj,
|
||||
crocus_batch_get_signal_syncobj(batch));
|
||||
|
||||
if (!batch_has_fine_fence(batch))
|
||||
return fine;
|
||||
pipe_resource_reference(&fine->ref.res, batch->fine_fences.ref.res);
|
||||
fine->ref.offset = batch->fine_fences.ref.offset;
|
||||
fine->map = batch->fine_fences.map;
|
||||
fine->flags = flags;
|
||||
|
||||
unsigned pc;
|
||||
if (flags & CROCUS_FENCE_TOP_OF_PIPE) {
|
||||
pc = PIPE_CONTROL_WRITE_IMMEDIATE | PIPE_CONTROL_CS_STALL;
|
||||
} else {
|
||||
pc = PIPE_CONTROL_WRITE_IMMEDIATE |
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_TILE_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DATA_CACHE_FLUSH;
|
||||
}
|
||||
crocus_emit_pipe_control_write(batch, "fence: fine", pc,
|
||||
crocus_resource_bo(fine->ref.res),
|
||||
fine->ref.offset,
|
||||
fine->seqno);
|
||||
|
||||
return fine;
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CROCUS_FINE_FENCE_DOT_H
|
||||
#define CROCUS_FINE_FENCE_DOT_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "crocus_screen.h"
|
||||
#include "crocus_resource.h"
|
||||
|
||||
/**
|
||||
* A lightweight sequence number fence.
|
||||
*
|
||||
* We emit PIPE_CONTROLs inside a batch (possibly in the middle)
|
||||
* which update a monotonically increasing, 32-bit counter. We
|
||||
* can then check if that moment has passed by either:
|
||||
*
|
||||
* 1. Checking on the CPU by snooping on the DWord via a coherent map
|
||||
*
|
||||
* 2. Blocking on the GPU with MI_SEMAPHORE_WAIT from a second batch
|
||||
* (relying on mid-batch preemption to switch GPU execution to the
|
||||
* batch that writes it).
|
||||
*/
|
||||
struct crocus_fine_fence {
|
||||
struct pipe_reference reference;
|
||||
|
||||
/** Buffer where the seqno lives */
|
||||
struct crocus_state_ref ref;
|
||||
|
||||
/** Coherent CPU map of the buffer containing the seqno DWord. */
|
||||
const uint32_t *map;
|
||||
|
||||
/**
|
||||
* A drm_syncobj pointing which will be signaled at the end of the
|
||||
* batch which writes this seqno. This can be used to block until
|
||||
* the seqno has definitely passed (but may wait longer than necessary).
|
||||
*/
|
||||
struct crocus_syncobj *syncobj;
|
||||
|
||||
#define CROCUS_FENCE_BOTTOM_OF_PIPE 0x0 /**< Written by bottom-of-pipe flush */
|
||||
#define CROCUS_FENCE_TOP_OF_PIPE 0x1 /**< Written by top-of-pipe flush */
|
||||
#define CROCUS_FENCE_END 0x2 /**< Written at the end of a batch */
|
||||
|
||||
/** Information about the type of flush involved (see CROCUS_FENCE_*) */
|
||||
uint32_t flags;
|
||||
|
||||
/**
|
||||
* Sequence number expected to be written by the flush we inserted
|
||||
* when creating this fence. The crocus_fine_fence is 'signaled' when *@map
|
||||
* (written by the flush on the GPU) is greater-than-or-equal to @seqno.
|
||||
*/
|
||||
uint32_t seqno;
|
||||
};
|
||||
|
||||
void crocus_fine_fence_init(struct crocus_batch *batch);
|
||||
|
||||
struct crocus_fine_fence *crocus_fine_fence_new(struct crocus_batch *batch,
|
||||
unsigned flags);
|
||||
|
||||
void crocus_fine_fence_destroy(struct crocus_screen *screen,
|
||||
struct crocus_fine_fence *sq);
|
||||
|
||||
static inline void
|
||||
crocus_fine_fence_reference(struct crocus_screen *screen,
|
||||
struct crocus_fine_fence **dst,
|
||||
struct crocus_fine_fence *src)
|
||||
{
|
||||
if (pipe_reference(&(*dst)->reference, &src->reference))
|
||||
crocus_fine_fence_destroy(screen, *dst);
|
||||
|
||||
*dst = src;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if this seqno has passed.
|
||||
*
|
||||
* NULL is considered signaled.
|
||||
*/
|
||||
static inline bool
|
||||
crocus_fine_fence_signaled(const struct crocus_fine_fence *sq)
|
||||
{
|
||||
if (sq && !sq->map)
|
||||
return false;
|
||||
return !sq || (READ_ONCE(*sq->map) >= sq->seqno);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,576 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file crocus_formats.c
|
||||
*
|
||||
* Converts Gallium formats (PIPE_FORMAT_*) to hardware ones (ISL_FORMAT_*).
|
||||
* Provides information about which formats support what features.
|
||||
*/
|
||||
|
||||
#include "util/bitscan.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/format/u_format.h"
|
||||
|
||||
#include "crocus_resource.h"
|
||||
#include "crocus_screen.h"
|
||||
|
||||
static enum isl_format
|
||||
crocus_isl_format_for_pipe_format(enum pipe_format pf)
|
||||
{
|
||||
static const enum isl_format table[PIPE_FORMAT_COUNT] = {
|
||||
[0 ... PIPE_FORMAT_COUNT-1] = ISL_FORMAT_UNSUPPORTED,
|
||||
|
||||
[PIPE_FORMAT_B8G8R8A8_UNORM] = ISL_FORMAT_B8G8R8A8_UNORM,
|
||||
[PIPE_FORMAT_B8G8R8X8_UNORM] = ISL_FORMAT_B8G8R8X8_UNORM,
|
||||
[PIPE_FORMAT_B5G5R5A1_UNORM] = ISL_FORMAT_B5G5R5A1_UNORM,
|
||||
[PIPE_FORMAT_B4G4R4A4_UNORM] = ISL_FORMAT_B4G4R4A4_UNORM,
|
||||
[PIPE_FORMAT_B5G6R5_UNORM] = ISL_FORMAT_B5G6R5_UNORM,
|
||||
[PIPE_FORMAT_R10G10B10A2_UNORM] = ISL_FORMAT_R10G10B10A2_UNORM,
|
||||
|
||||
[PIPE_FORMAT_Z16_UNORM] = ISL_FORMAT_R16_UNORM,
|
||||
[PIPE_FORMAT_Z32_UNORM] = ISL_FORMAT_R32_UNORM,
|
||||
[PIPE_FORMAT_Z32_FLOAT] = ISL_FORMAT_R32_FLOAT,
|
||||
|
||||
/* We translate the combined depth/stencil formats to depth only here */
|
||||
[PIPE_FORMAT_Z24_UNORM_S8_UINT] = ISL_FORMAT_R24_UNORM_X8_TYPELESS,
|
||||
[PIPE_FORMAT_Z24X8_UNORM] = ISL_FORMAT_R24_UNORM_X8_TYPELESS,
|
||||
[PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = ISL_FORMAT_R32_FLOAT,
|
||||
|
||||
[PIPE_FORMAT_S8_UINT] = ISL_FORMAT_R8_UINT,
|
||||
[PIPE_FORMAT_X24S8_UINT] = ISL_FORMAT_R8_UINT,
|
||||
[PIPE_FORMAT_X32_S8X24_UINT] = ISL_FORMAT_R8_UINT,
|
||||
|
||||
[PIPE_FORMAT_R64_FLOAT] = ISL_FORMAT_R64_FLOAT,
|
||||
[PIPE_FORMAT_R64G64_FLOAT] = ISL_FORMAT_R64G64_FLOAT,
|
||||
[PIPE_FORMAT_R64G64B64_FLOAT] = ISL_FORMAT_R64G64B64_FLOAT,
|
||||
[PIPE_FORMAT_R64G64B64A64_FLOAT] = ISL_FORMAT_R64G64B64A64_FLOAT,
|
||||
[PIPE_FORMAT_R32_FLOAT] = ISL_FORMAT_R32_FLOAT,
|
||||
[PIPE_FORMAT_R32G32_FLOAT] = ISL_FORMAT_R32G32_FLOAT,
|
||||
[PIPE_FORMAT_R32G32B32_FLOAT] = ISL_FORMAT_R32G32B32_FLOAT,
|
||||
[PIPE_FORMAT_R32G32B32A32_FLOAT] = ISL_FORMAT_R32G32B32A32_FLOAT,
|
||||
[PIPE_FORMAT_R32_UNORM] = ISL_FORMAT_R32_UNORM,
|
||||
[PIPE_FORMAT_R32G32_UNORM] = ISL_FORMAT_R32G32_UNORM,
|
||||
[PIPE_FORMAT_R32G32B32_UNORM] = ISL_FORMAT_R32G32B32_UNORM,
|
||||
[PIPE_FORMAT_R32G32B32A32_UNORM] = ISL_FORMAT_R32G32B32A32_UNORM,
|
||||
[PIPE_FORMAT_R32_USCALED] = ISL_FORMAT_R32_USCALED,
|
||||
[PIPE_FORMAT_R32G32_USCALED] = ISL_FORMAT_R32G32_USCALED,
|
||||
[PIPE_FORMAT_R32G32B32_USCALED] = ISL_FORMAT_R32G32B32_USCALED,
|
||||
[PIPE_FORMAT_R32G32B32A32_USCALED] = ISL_FORMAT_R32G32B32A32_USCALED,
|
||||
[PIPE_FORMAT_R32_SNORM] = ISL_FORMAT_R32_SNORM,
|
||||
[PIPE_FORMAT_R32G32_SNORM] = ISL_FORMAT_R32G32_SNORM,
|
||||
[PIPE_FORMAT_R32G32B32_SNORM] = ISL_FORMAT_R32G32B32_SNORM,
|
||||
[PIPE_FORMAT_R32G32B32A32_SNORM] = ISL_FORMAT_R32G32B32A32_SNORM,
|
||||
[PIPE_FORMAT_R32_SSCALED] = ISL_FORMAT_R32_SSCALED,
|
||||
[PIPE_FORMAT_R32G32_SSCALED] = ISL_FORMAT_R32G32_SSCALED,
|
||||
[PIPE_FORMAT_R32G32B32_SSCALED] = ISL_FORMAT_R32G32B32_SSCALED,
|
||||
[PIPE_FORMAT_R32G32B32A32_SSCALED] = ISL_FORMAT_R32G32B32A32_SSCALED,
|
||||
[PIPE_FORMAT_R16_UNORM] = ISL_FORMAT_R16_UNORM,
|
||||
[PIPE_FORMAT_R16G16_UNORM] = ISL_FORMAT_R16G16_UNORM,
|
||||
[PIPE_FORMAT_R16G16B16_UNORM] = ISL_FORMAT_R16G16B16_UNORM,
|
||||
[PIPE_FORMAT_R16G16B16A16_UNORM] = ISL_FORMAT_R16G16B16A16_UNORM,
|
||||
[PIPE_FORMAT_R16_USCALED] = ISL_FORMAT_R16_USCALED,
|
||||
[PIPE_FORMAT_R16G16_USCALED] = ISL_FORMAT_R16G16_USCALED,
|
||||
[PIPE_FORMAT_R16G16B16_USCALED] = ISL_FORMAT_R16G16B16_USCALED,
|
||||
[PIPE_FORMAT_R16G16B16A16_USCALED] = ISL_FORMAT_R16G16B16A16_USCALED,
|
||||
[PIPE_FORMAT_R16_SNORM] = ISL_FORMAT_R16_SNORM,
|
||||
[PIPE_FORMAT_R16G16_SNORM] = ISL_FORMAT_R16G16_SNORM,
|
||||
[PIPE_FORMAT_R16G16B16_SNORM] = ISL_FORMAT_R16G16B16_SNORM,
|
||||
[PIPE_FORMAT_R16G16B16A16_SNORM] = ISL_FORMAT_R16G16B16A16_SNORM,
|
||||
[PIPE_FORMAT_R16_SSCALED] = ISL_FORMAT_R16_SSCALED,
|
||||
[PIPE_FORMAT_R16G16_SSCALED] = ISL_FORMAT_R16G16_SSCALED,
|
||||
[PIPE_FORMAT_R16G16B16_SSCALED] = ISL_FORMAT_R16G16B16_SSCALED,
|
||||
[PIPE_FORMAT_R16G16B16A16_SSCALED] = ISL_FORMAT_R16G16B16A16_SSCALED,
|
||||
[PIPE_FORMAT_R8_UNORM] = ISL_FORMAT_R8_UNORM,
|
||||
[PIPE_FORMAT_R8G8_UNORM] = ISL_FORMAT_R8G8_UNORM,
|
||||
[PIPE_FORMAT_R8G8B8_UNORM] = ISL_FORMAT_R8G8B8_UNORM,
|
||||
[PIPE_FORMAT_R8G8B8A8_UNORM] = ISL_FORMAT_R8G8B8A8_UNORM,
|
||||
[PIPE_FORMAT_R8_USCALED] = ISL_FORMAT_R8_USCALED,
|
||||
[PIPE_FORMAT_R8G8_USCALED] = ISL_FORMAT_R8G8_USCALED,
|
||||
[PIPE_FORMAT_R8G8B8_USCALED] = ISL_FORMAT_R8G8B8_USCALED,
|
||||
[PIPE_FORMAT_R8G8B8A8_USCALED] = ISL_FORMAT_R8G8B8A8_USCALED,
|
||||
[PIPE_FORMAT_R8_SNORM] = ISL_FORMAT_R8_SNORM,
|
||||
[PIPE_FORMAT_R8G8_SNORM] = ISL_FORMAT_R8G8_SNORM,
|
||||
[PIPE_FORMAT_R8G8B8_SNORM] = ISL_FORMAT_R8G8B8_SNORM,
|
||||
[PIPE_FORMAT_R8G8B8A8_SNORM] = ISL_FORMAT_R8G8B8A8_SNORM,
|
||||
[PIPE_FORMAT_R8_SSCALED] = ISL_FORMAT_R8_SSCALED,
|
||||
[PIPE_FORMAT_R8G8_SSCALED] = ISL_FORMAT_R8G8_SSCALED,
|
||||
[PIPE_FORMAT_R8G8B8_SSCALED] = ISL_FORMAT_R8G8B8_SSCALED,
|
||||
[PIPE_FORMAT_R8G8B8A8_SSCALED] = ISL_FORMAT_R8G8B8A8_SSCALED,
|
||||
[PIPE_FORMAT_R32_FIXED] = ISL_FORMAT_R32_SFIXED,
|
||||
[PIPE_FORMAT_R32G32_FIXED] = ISL_FORMAT_R32G32_SFIXED,
|
||||
[PIPE_FORMAT_R32G32B32_FIXED] = ISL_FORMAT_R32G32B32_SFIXED,
|
||||
[PIPE_FORMAT_R32G32B32A32_FIXED] = ISL_FORMAT_R32G32B32A32_SFIXED,
|
||||
[PIPE_FORMAT_R16_FLOAT] = ISL_FORMAT_R16_FLOAT,
|
||||
[PIPE_FORMAT_R16G16_FLOAT] = ISL_FORMAT_R16G16_FLOAT,
|
||||
[PIPE_FORMAT_R16G16B16_FLOAT] = ISL_FORMAT_R16G16B16_FLOAT,
|
||||
[PIPE_FORMAT_R16G16B16A16_FLOAT] = ISL_FORMAT_R16G16B16A16_FLOAT,
|
||||
|
||||
[PIPE_FORMAT_R8G8B8_SRGB] = ISL_FORMAT_R8G8B8_UNORM_SRGB,
|
||||
[PIPE_FORMAT_B8G8R8A8_SRGB] = ISL_FORMAT_B8G8R8A8_UNORM_SRGB,
|
||||
[PIPE_FORMAT_B8G8R8X8_SRGB] = ISL_FORMAT_B8G8R8X8_UNORM_SRGB,
|
||||
[PIPE_FORMAT_R8G8B8A8_SRGB] = ISL_FORMAT_R8G8B8A8_UNORM_SRGB,
|
||||
|
||||
[PIPE_FORMAT_DXT1_RGB] = ISL_FORMAT_BC1_UNORM,
|
||||
[PIPE_FORMAT_DXT1_RGBA] = ISL_FORMAT_BC1_UNORM,
|
||||
[PIPE_FORMAT_DXT3_RGBA] = ISL_FORMAT_BC2_UNORM,
|
||||
[PIPE_FORMAT_DXT5_RGBA] = ISL_FORMAT_BC3_UNORM,
|
||||
|
||||
[PIPE_FORMAT_DXT1_SRGB] = ISL_FORMAT_BC1_UNORM_SRGB,
|
||||
[PIPE_FORMAT_DXT1_SRGBA] = ISL_FORMAT_BC1_UNORM_SRGB,
|
||||
[PIPE_FORMAT_DXT3_SRGBA] = ISL_FORMAT_BC2_UNORM_SRGB,
|
||||
[PIPE_FORMAT_DXT5_SRGBA] = ISL_FORMAT_BC3_UNORM_SRGB,
|
||||
|
||||
[PIPE_FORMAT_RGTC1_UNORM] = ISL_FORMAT_BC4_UNORM,
|
||||
[PIPE_FORMAT_RGTC1_SNORM] = ISL_FORMAT_BC4_SNORM,
|
||||
[PIPE_FORMAT_RGTC2_UNORM] = ISL_FORMAT_BC5_UNORM,
|
||||
[PIPE_FORMAT_RGTC2_SNORM] = ISL_FORMAT_BC5_SNORM,
|
||||
|
||||
[PIPE_FORMAT_R10G10B10A2_USCALED] = ISL_FORMAT_R10G10B10A2_USCALED,
|
||||
[PIPE_FORMAT_R11G11B10_FLOAT] = ISL_FORMAT_R11G11B10_FLOAT,
|
||||
[PIPE_FORMAT_R9G9B9E5_FLOAT] = ISL_FORMAT_R9G9B9E5_SHAREDEXP,
|
||||
[PIPE_FORMAT_R1_UNORM] = ISL_FORMAT_R1_UNORM,
|
||||
[PIPE_FORMAT_R10G10B10X2_USCALED] = ISL_FORMAT_R10G10B10X2_USCALED,
|
||||
[PIPE_FORMAT_B10G10R10A2_UNORM] = ISL_FORMAT_B10G10R10A2_UNORM,
|
||||
[PIPE_FORMAT_R8G8B8X8_UNORM] = ISL_FORMAT_R8G8B8X8_UNORM,
|
||||
|
||||
[PIPE_FORMAT_I8_UNORM] = ISL_FORMAT_R8_UNORM,
|
||||
[PIPE_FORMAT_I16_UNORM] = ISL_FORMAT_R16_UNORM,
|
||||
[PIPE_FORMAT_I8_SNORM] = ISL_FORMAT_R8_SNORM,
|
||||
[PIPE_FORMAT_I16_SNORM] = ISL_FORMAT_R16_SNORM,
|
||||
[PIPE_FORMAT_I16_FLOAT] = ISL_FORMAT_R16_FLOAT,
|
||||
[PIPE_FORMAT_I32_FLOAT] = ISL_FORMAT_R32_FLOAT,
|
||||
|
||||
[PIPE_FORMAT_L8_UINT] = ISL_FORMAT_L8_UINT,
|
||||
[PIPE_FORMAT_L8_UNORM] = ISL_FORMAT_L8_UNORM,
|
||||
[PIPE_FORMAT_L8_SNORM] = ISL_FORMAT_R8_SNORM,
|
||||
[PIPE_FORMAT_L8_SINT] = ISL_FORMAT_L8_SINT,
|
||||
[PIPE_FORMAT_L16_UNORM] = ISL_FORMAT_L16_UNORM,
|
||||
[PIPE_FORMAT_L16_SNORM] = ISL_FORMAT_R16_SNORM,
|
||||
[PIPE_FORMAT_L16_FLOAT] = ISL_FORMAT_L16_FLOAT,
|
||||
[PIPE_FORMAT_L32_FLOAT] = ISL_FORMAT_L32_FLOAT,
|
||||
|
||||
[PIPE_FORMAT_A8_UNORM] = ISL_FORMAT_A8_UNORM,
|
||||
[PIPE_FORMAT_A16_UNORM] = ISL_FORMAT_A16_UNORM,
|
||||
[PIPE_FORMAT_A16_FLOAT] = ISL_FORMAT_A16_FLOAT,
|
||||
[PIPE_FORMAT_A32_FLOAT] = ISL_FORMAT_A32_FLOAT,
|
||||
|
||||
[PIPE_FORMAT_L8A8_UNORM] = ISL_FORMAT_L8A8_UNORM,
|
||||
[PIPE_FORMAT_L16A16_UNORM] = ISL_FORMAT_L16A16_UNORM,
|
||||
[PIPE_FORMAT_L16A16_FLOAT] = ISL_FORMAT_L16A16_FLOAT,
|
||||
[PIPE_FORMAT_L32A32_FLOAT] = ISL_FORMAT_L32A32_FLOAT,
|
||||
|
||||
/* Sadly, we have to use luminance[-alpha] formats for sRGB decoding. */
|
||||
[PIPE_FORMAT_R8_SRGB] = ISL_FORMAT_L8_UNORM_SRGB,
|
||||
[PIPE_FORMAT_L8_SRGB] = ISL_FORMAT_L8_UNORM_SRGB,
|
||||
[PIPE_FORMAT_L8A8_SRGB] = ISL_FORMAT_L8A8_UNORM_SRGB,
|
||||
|
||||
[PIPE_FORMAT_R10G10B10A2_SSCALED] = ISL_FORMAT_R10G10B10A2_SSCALED,
|
||||
[PIPE_FORMAT_R10G10B10A2_SNORM] = ISL_FORMAT_R10G10B10A2_SNORM,
|
||||
|
||||
[PIPE_FORMAT_B10G10R10A2_USCALED] = ISL_FORMAT_B10G10R10A2_USCALED,
|
||||
[PIPE_FORMAT_B10G10R10A2_SSCALED] = ISL_FORMAT_B10G10R10A2_SSCALED,
|
||||
[PIPE_FORMAT_B10G10R10A2_SNORM] = ISL_FORMAT_B10G10R10A2_SNORM,
|
||||
|
||||
[PIPE_FORMAT_R8_UINT] = ISL_FORMAT_R8_UINT,
|
||||
[PIPE_FORMAT_R8G8_UINT] = ISL_FORMAT_R8G8_UINT,
|
||||
[PIPE_FORMAT_R8G8B8_UINT] = ISL_FORMAT_R8G8B8_UINT,
|
||||
[PIPE_FORMAT_R8G8B8A8_UINT] = ISL_FORMAT_R8G8B8A8_UINT,
|
||||
|
||||
[PIPE_FORMAT_R8_SINT] = ISL_FORMAT_R8_SINT,
|
||||
[PIPE_FORMAT_R8G8_SINT] = ISL_FORMAT_R8G8_SINT,
|
||||
[PIPE_FORMAT_R8G8B8_SINT] = ISL_FORMAT_R8G8B8_SINT,
|
||||
[PIPE_FORMAT_R8G8B8A8_SINT] = ISL_FORMAT_R8G8B8A8_SINT,
|
||||
|
||||
[PIPE_FORMAT_R16_UINT] = ISL_FORMAT_R16_UINT,
|
||||
[PIPE_FORMAT_R16G16_UINT] = ISL_FORMAT_R16G16_UINT,
|
||||
[PIPE_FORMAT_R16G16B16_UINT] = ISL_FORMAT_R16G16B16_UINT,
|
||||
[PIPE_FORMAT_R16G16B16A16_UINT] = ISL_FORMAT_R16G16B16A16_UINT,
|
||||
|
||||
[PIPE_FORMAT_R16_SINT] = ISL_FORMAT_R16_SINT,
|
||||
[PIPE_FORMAT_R16G16_SINT] = ISL_FORMAT_R16G16_SINT,
|
||||
[PIPE_FORMAT_R16G16B16_SINT] = ISL_FORMAT_R16G16B16_SINT,
|
||||
[PIPE_FORMAT_R16G16B16A16_SINT] = ISL_FORMAT_R16G16B16A16_SINT,
|
||||
|
||||
[PIPE_FORMAT_R32_UINT] = ISL_FORMAT_R32_UINT,
|
||||
[PIPE_FORMAT_R32G32_UINT] = ISL_FORMAT_R32G32_UINT,
|
||||
[PIPE_FORMAT_R32G32B32_UINT] = ISL_FORMAT_R32G32B32_UINT,
|
||||
[PIPE_FORMAT_R32G32B32A32_UINT] = ISL_FORMAT_R32G32B32A32_UINT,
|
||||
|
||||
[PIPE_FORMAT_R32_SINT] = ISL_FORMAT_R32_SINT,
|
||||
[PIPE_FORMAT_R32G32_SINT] = ISL_FORMAT_R32G32_SINT,
|
||||
[PIPE_FORMAT_R32G32B32_SINT] = ISL_FORMAT_R32G32B32_SINT,
|
||||
[PIPE_FORMAT_R32G32B32A32_SINT] = ISL_FORMAT_R32G32B32A32_SINT,
|
||||
|
||||
[PIPE_FORMAT_B10G10R10A2_UINT] = ISL_FORMAT_B10G10R10A2_UINT,
|
||||
|
||||
[PIPE_FORMAT_ETC1_RGB8] = ISL_FORMAT_ETC1_RGB8,
|
||||
|
||||
[PIPE_FORMAT_R8G8B8X8_SRGB] = ISL_FORMAT_R8G8B8X8_UNORM_SRGB,
|
||||
[PIPE_FORMAT_B10G10R10X2_UNORM] = ISL_FORMAT_B10G10R10X2_UNORM,
|
||||
[PIPE_FORMAT_R16G16B16X16_UNORM] = ISL_FORMAT_R16G16B16X16_UNORM,
|
||||
[PIPE_FORMAT_R16G16B16X16_FLOAT] = ISL_FORMAT_R16G16B16X16_FLOAT,
|
||||
[PIPE_FORMAT_R32G32B32X32_FLOAT] = ISL_FORMAT_R32G32B32X32_FLOAT,
|
||||
|
||||
[PIPE_FORMAT_R10G10B10A2_UINT] = ISL_FORMAT_R10G10B10A2_UINT,
|
||||
|
||||
[PIPE_FORMAT_B5G6R5_SRGB] = ISL_FORMAT_B5G6R5_UNORM_SRGB,
|
||||
|
||||
[PIPE_FORMAT_BPTC_RGBA_UNORM] = ISL_FORMAT_BC7_UNORM,
|
||||
[PIPE_FORMAT_BPTC_SRGBA] = ISL_FORMAT_BC7_UNORM_SRGB,
|
||||
[PIPE_FORMAT_BPTC_RGB_FLOAT] = ISL_FORMAT_BC6H_SF16,
|
||||
[PIPE_FORMAT_BPTC_RGB_UFLOAT] = ISL_FORMAT_BC6H_UF16,
|
||||
|
||||
[PIPE_FORMAT_ETC2_RGB8] = ISL_FORMAT_ETC2_RGB8,
|
||||
[PIPE_FORMAT_ETC2_SRGB8] = ISL_FORMAT_ETC2_SRGB8,
|
||||
[PIPE_FORMAT_ETC2_RGB8A1] = ISL_FORMAT_ETC2_RGB8_PTA,
|
||||
[PIPE_FORMAT_ETC2_SRGB8A1] = ISL_FORMAT_ETC2_SRGB8_PTA,
|
||||
[PIPE_FORMAT_ETC2_RGBA8] = ISL_FORMAT_ETC2_EAC_RGBA8,
|
||||
[PIPE_FORMAT_ETC2_SRGBA8] = ISL_FORMAT_ETC2_EAC_SRGB8_A8,
|
||||
[PIPE_FORMAT_ETC2_R11_UNORM] = ISL_FORMAT_EAC_R11,
|
||||
[PIPE_FORMAT_ETC2_R11_SNORM] = ISL_FORMAT_EAC_SIGNED_R11,
|
||||
[PIPE_FORMAT_ETC2_RG11_UNORM] = ISL_FORMAT_EAC_RG11,
|
||||
[PIPE_FORMAT_ETC2_RG11_SNORM] = ISL_FORMAT_EAC_SIGNED_RG11,
|
||||
|
||||
[PIPE_FORMAT_FXT1_RGB] = ISL_FORMAT_FXT1,
|
||||
[PIPE_FORMAT_FXT1_RGBA] = ISL_FORMAT_FXT1,
|
||||
|
||||
[PIPE_FORMAT_ASTC_4x4] = ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16,
|
||||
[PIPE_FORMAT_ASTC_5x4] = ISL_FORMAT_ASTC_LDR_2D_5X4_FLT16,
|
||||
[PIPE_FORMAT_ASTC_5x5] = ISL_FORMAT_ASTC_LDR_2D_5X5_FLT16,
|
||||
[PIPE_FORMAT_ASTC_6x5] = ISL_FORMAT_ASTC_LDR_2D_6X5_FLT16,
|
||||
[PIPE_FORMAT_ASTC_6x6] = ISL_FORMAT_ASTC_LDR_2D_6X6_FLT16,
|
||||
[PIPE_FORMAT_ASTC_8x5] = ISL_FORMAT_ASTC_LDR_2D_8X5_FLT16,
|
||||
[PIPE_FORMAT_ASTC_8x6] = ISL_FORMAT_ASTC_LDR_2D_8X6_FLT16,
|
||||
[PIPE_FORMAT_ASTC_8x8] = ISL_FORMAT_ASTC_LDR_2D_8X8_FLT16,
|
||||
[PIPE_FORMAT_ASTC_10x5] = ISL_FORMAT_ASTC_LDR_2D_10X5_FLT16,
|
||||
[PIPE_FORMAT_ASTC_10x6] = ISL_FORMAT_ASTC_LDR_2D_10X6_FLT16,
|
||||
[PIPE_FORMAT_ASTC_10x8] = ISL_FORMAT_ASTC_LDR_2D_10X8_FLT16,
|
||||
[PIPE_FORMAT_ASTC_10x10] = ISL_FORMAT_ASTC_LDR_2D_10X10_FLT16,
|
||||
[PIPE_FORMAT_ASTC_12x10] = ISL_FORMAT_ASTC_LDR_2D_12X10_FLT16,
|
||||
[PIPE_FORMAT_ASTC_12x12] = ISL_FORMAT_ASTC_LDR_2D_12X12_FLT16,
|
||||
|
||||
[PIPE_FORMAT_ASTC_4x4_SRGB] = ISL_FORMAT_ASTC_LDR_2D_4X4_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_5x4_SRGB] = ISL_FORMAT_ASTC_LDR_2D_5X4_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_5x5_SRGB] = ISL_FORMAT_ASTC_LDR_2D_5X5_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_6x5_SRGB] = ISL_FORMAT_ASTC_LDR_2D_6X5_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_6x6_SRGB] = ISL_FORMAT_ASTC_LDR_2D_6X6_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_8x5_SRGB] = ISL_FORMAT_ASTC_LDR_2D_8X5_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_8x6_SRGB] = ISL_FORMAT_ASTC_LDR_2D_8X6_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_8x8_SRGB] = ISL_FORMAT_ASTC_LDR_2D_8X8_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_10x5_SRGB] = ISL_FORMAT_ASTC_LDR_2D_10X5_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_10x6_SRGB] = ISL_FORMAT_ASTC_LDR_2D_10X6_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_10x8_SRGB] = ISL_FORMAT_ASTC_LDR_2D_10X8_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_10x10_SRGB] = ISL_FORMAT_ASTC_LDR_2D_10X10_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_12x10_SRGB] = ISL_FORMAT_ASTC_LDR_2D_12X10_U8SRGB,
|
||||
[PIPE_FORMAT_ASTC_12x12_SRGB] = ISL_FORMAT_ASTC_LDR_2D_12X12_U8SRGB,
|
||||
|
||||
[PIPE_FORMAT_A1B5G5R5_UNORM] = ISL_FORMAT_A1B5G5R5_UNORM,
|
||||
|
||||
/* We support these so that we know the API expects no alpha channel.
|
||||
* Otherwise, the state tracker would just give us a format with alpha
|
||||
* and we wouldn't know to override the swizzle to 1.
|
||||
*/
|
||||
[PIPE_FORMAT_R16G16B16X16_UINT] = ISL_FORMAT_R16G16B16A16_UINT,
|
||||
[PIPE_FORMAT_R16G16B16X16_SINT] = ISL_FORMAT_R16G16B16A16_SINT,
|
||||
[PIPE_FORMAT_R32G32B32X32_UINT] = ISL_FORMAT_R32G32B32A32_UINT,
|
||||
[PIPE_FORMAT_R32G32B32X32_SINT] = ISL_FORMAT_R32G32B32A32_SINT,
|
||||
[PIPE_FORMAT_R10G10B10X2_SNORM] = ISL_FORMAT_R10G10B10A2_SNORM,
|
||||
};
|
||||
assert(pf < PIPE_FORMAT_COUNT);
|
||||
return table[pf];
|
||||
}
|
||||
|
||||
static enum isl_format
|
||||
get_render_format(enum pipe_format pformat, enum isl_format def_format)
|
||||
{
|
||||
switch (pformat) {
|
||||
case PIPE_FORMAT_A16_UNORM: return ISL_FORMAT_R16_UNORM;
|
||||
case PIPE_FORMAT_A16_FLOAT: return ISL_FORMAT_R16_FLOAT;
|
||||
case PIPE_FORMAT_A32_FLOAT: return ISL_FORMAT_R32_FLOAT;
|
||||
|
||||
case PIPE_FORMAT_I8_UNORM: return ISL_FORMAT_R8_UNORM;
|
||||
case PIPE_FORMAT_I16_UNORM: return ISL_FORMAT_R16_UNORM;
|
||||
case PIPE_FORMAT_I16_FLOAT: return ISL_FORMAT_R16_FLOAT;
|
||||
case PIPE_FORMAT_I32_FLOAT: return ISL_FORMAT_R32_FLOAT;
|
||||
|
||||
case PIPE_FORMAT_L8_UNORM: return ISL_FORMAT_R8_UNORM;
|
||||
case PIPE_FORMAT_L8_UINT: return ISL_FORMAT_R8_UINT;
|
||||
case PIPE_FORMAT_L8_SINT: return ISL_FORMAT_R8_SINT;
|
||||
case PIPE_FORMAT_L16_UNORM: return ISL_FORMAT_R16_UNORM;
|
||||
case PIPE_FORMAT_L16_FLOAT: return ISL_FORMAT_R16_FLOAT;
|
||||
case PIPE_FORMAT_L32_FLOAT: return ISL_FORMAT_R32_FLOAT;
|
||||
|
||||
case PIPE_FORMAT_L8A8_UNORM: return ISL_FORMAT_R8G8_UNORM;
|
||||
case PIPE_FORMAT_L16A16_UNORM: return ISL_FORMAT_R16G16_UNORM;
|
||||
case PIPE_FORMAT_L16A16_FLOAT: return ISL_FORMAT_R16G16_FLOAT;
|
||||
case PIPE_FORMAT_L32A32_FLOAT: return ISL_FORMAT_R32G32_FLOAT;
|
||||
|
||||
default:
|
||||
return def_format;
|
||||
}
|
||||
}
|
||||
|
||||
struct crocus_format_info
|
||||
crocus_format_for_usage(const struct intel_device_info *devinfo,
|
||||
enum pipe_format pformat,
|
||||
isl_surf_usage_flags_t usage)
|
||||
{
|
||||
struct crocus_format_info info = { crocus_isl_format_for_pipe_format(pformat),
|
||||
{ PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W } };
|
||||
|
||||
if (info.fmt == ISL_FORMAT_UNSUPPORTED)
|
||||
return info;
|
||||
|
||||
if (pformat == PIPE_FORMAT_A8_UNORM) {
|
||||
info.fmt = ISL_FORMAT_A8_UNORM;
|
||||
}
|
||||
|
||||
if (usage & ISL_SURF_USAGE_RENDER_TARGET_BIT)
|
||||
info.fmt = get_render_format(pformat, info.fmt);
|
||||
if (devinfo->ver < 6) {
|
||||
if (pformat == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
|
||||
info.fmt = ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS;
|
||||
if (pformat == PIPE_FORMAT_X32_S8X24_UINT)
|
||||
info.fmt = ISL_FORMAT_X32_TYPELESS_G8X24_UINT;
|
||||
if (pformat == PIPE_FORMAT_X24S8_UINT)
|
||||
info.fmt = ISL_FORMAT_X24_TYPELESS_G8_UINT;
|
||||
}
|
||||
|
||||
const struct isl_format_layout *fmtl = isl_format_get_layout(info.fmt);
|
||||
|
||||
if (util_format_is_snorm(pformat)) {
|
||||
if (util_format_is_intensity(pformat)) {
|
||||
info.swizzles[0] = PIPE_SWIZZLE_X;
|
||||
info.swizzles[1] = PIPE_SWIZZLE_X;
|
||||
info.swizzles[2] = PIPE_SWIZZLE_X;
|
||||
info.swizzles[3] = PIPE_SWIZZLE_X;
|
||||
} else if (util_format_is_luminance(pformat)) {
|
||||
info.swizzles[0] = PIPE_SWIZZLE_X;
|
||||
info.swizzles[1] = PIPE_SWIZZLE_X;
|
||||
info.swizzles[2] = PIPE_SWIZZLE_X;
|
||||
info.swizzles[3] = PIPE_SWIZZLE_1;
|
||||
} else if (util_format_is_luminance_alpha(pformat)) {
|
||||
info.swizzles[0] = PIPE_SWIZZLE_X;
|
||||
info.swizzles[1] = PIPE_SWIZZLE_X;
|
||||
info.swizzles[2] = PIPE_SWIZZLE_X;
|
||||
info.swizzles[3] = PIPE_SWIZZLE_Y;
|
||||
} else if (util_format_is_alpha(pformat)) {
|
||||
info.swizzles[0] = PIPE_SWIZZLE_0;
|
||||
info.swizzles[1] = PIPE_SWIZZLE_0;
|
||||
info.swizzles[2] = PIPE_SWIZZLE_0;
|
||||
info.swizzles[3] = PIPE_SWIZZLE_X;
|
||||
}
|
||||
}
|
||||
|
||||
/* When faking RGBX pipe formats with RGBA ISL formats, override alpha. */
|
||||
if (!util_format_has_alpha(pformat) && fmtl->channels.a.type != ISL_VOID) {
|
||||
info.swizzles[0] = PIPE_SWIZZLE_X;
|
||||
info.swizzles[1] = PIPE_SWIZZLE_Y;
|
||||
info.swizzles[2] = PIPE_SWIZZLE_Z;
|
||||
info.swizzles[3] = PIPE_SWIZZLE_1;
|
||||
}
|
||||
|
||||
/* We choose RGBA over RGBX for rendering the hardware doesn't support
|
||||
* rendering to RGBX. However, when this internal override is used on Gen9+,
|
||||
* fast clears don't work correctly.
|
||||
*
|
||||
* i965 fixes this by pretending to not support RGBX formats, and the higher
|
||||
* layers of Mesa pick the RGBA format instead. Gallium doesn't work that
|
||||
* way, and might choose a different format, like BGRX instead of RGBX,
|
||||
* which will also cause problems when sampling from a surface fast cleared
|
||||
* as RGBX. So we always choose RGBA instead of RGBX explicitly
|
||||
* here.
|
||||
*/
|
||||
if (isl_format_is_rgbx(info.fmt) &&
|
||||
!isl_format_supports_rendering(devinfo, info.fmt) &&
|
||||
(usage & ISL_SURF_USAGE_RENDER_TARGET_BIT)) {
|
||||
info.fmt = isl_format_rgbx_to_rgba(info.fmt);
|
||||
info.swizzles[0] = PIPE_SWIZZLE_X;
|
||||
info.swizzles[1] = PIPE_SWIZZLE_Y;
|
||||
info.swizzles[2] = PIPE_SWIZZLE_Z;
|
||||
info.swizzles[3] = PIPE_SWIZZLE_1;
|
||||
}
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
/**
|
||||
* The pscreen->is_format_supported() driver hook.
|
||||
*
|
||||
* Returns true if the given format is supported for the given usage
|
||||
* (PIPE_BIND_*) and sample count.
|
||||
*/
|
||||
bool
|
||||
crocus_is_format_supported(struct pipe_screen *pscreen,
|
||||
enum pipe_format pformat,
|
||||
enum pipe_texture_target target,
|
||||
unsigned sample_count, unsigned storage_sample_count,
|
||||
unsigned usage)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
if (!util_is_power_of_two_or_zero(sample_count))
|
||||
return false;
|
||||
if (devinfo->ver >= 7) {
|
||||
if (sample_count > 8 || sample_count == 2)
|
||||
return false;
|
||||
} else if (devinfo->ver == 6) {
|
||||
if (sample_count > 4 || sample_count == 2)
|
||||
return false;
|
||||
} else if (sample_count > 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pformat == PIPE_FORMAT_NONE)
|
||||
return true;
|
||||
|
||||
enum isl_format format = crocus_isl_format_for_pipe_format(pformat);
|
||||
|
||||
if (format == ISL_FORMAT_UNSUPPORTED)
|
||||
return false;
|
||||
|
||||
/* no stencil texturing prior to haswell */
|
||||
if (!devinfo->is_haswell) {
|
||||
if (pformat == PIPE_FORMAT_S8_UINT ||
|
||||
pformat == PIPE_FORMAT_X24S8_UINT ||
|
||||
pformat == PIPE_FORMAT_S8X24_UINT ||
|
||||
pformat == PIPE_FORMAT_X32_S8X24_UINT)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
const struct isl_format_layout *fmtl = isl_format_get_layout(format);
|
||||
const bool is_integer = isl_format_has_int_channel(format);
|
||||
bool supported = true;
|
||||
|
||||
if (sample_count > 1)
|
||||
supported &= isl_format_supports_multisampling(devinfo, format);
|
||||
|
||||
if (usage & PIPE_BIND_DEPTH_STENCIL) {
|
||||
supported &= format == ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS ||
|
||||
format == ISL_FORMAT_R32_FLOAT ||
|
||||
format == ISL_FORMAT_R24_UNORM_X8_TYPELESS ||
|
||||
format == ISL_FORMAT_R16_UNORM ||
|
||||
format == ISL_FORMAT_R8_UINT;
|
||||
}
|
||||
|
||||
if (usage & PIPE_BIND_RENDER_TARGET) {
|
||||
/* Alpha and luminance-alpha formats other than A8_UNORM are not
|
||||
* renderable.
|
||||
*
|
||||
* For BLORP, we can apply the swizzle in the shader. But for
|
||||
* general rendering, this would mean recompiling the shader, which
|
||||
* we'd like to avoid doing. So we mark these formats non-renderable.
|
||||
*
|
||||
* We do support A8_UNORM as it's required and is renderable.
|
||||
*/
|
||||
if (pformat != PIPE_FORMAT_A8_UNORM &&
|
||||
(util_format_is_alpha(pformat) ||
|
||||
util_format_is_luminance_alpha(pformat)))
|
||||
supported = false;
|
||||
|
||||
enum isl_format rt_format = format;
|
||||
|
||||
if (isl_format_is_rgbx(format) &&
|
||||
!isl_format_supports_rendering(devinfo, format))
|
||||
rt_format = isl_format_rgbx_to_rgba(format);
|
||||
|
||||
supported &= isl_format_supports_rendering(devinfo, rt_format);
|
||||
|
||||
if (!is_integer)
|
||||
supported &= isl_format_supports_alpha_blending(devinfo, rt_format);
|
||||
}
|
||||
|
||||
if (usage & PIPE_BIND_SHADER_IMAGE) {
|
||||
/* Dataport doesn't support compression, and we can't resolve an MCS
|
||||
* compressed surface. (Buffer images may have sample count of 0.)
|
||||
*/
|
||||
supported &= sample_count == 0;
|
||||
|
||||
supported &= isl_format_supports_typed_writes(devinfo, format);
|
||||
supported &= isl_has_matching_typed_storage_image_format(devinfo, format);
|
||||
}
|
||||
|
||||
if (usage & PIPE_BIND_SAMPLER_VIEW) {
|
||||
supported &= isl_format_supports_sampling(devinfo, format);
|
||||
bool ignore_filtering = false;
|
||||
|
||||
if (is_integer)
|
||||
ignore_filtering = true;
|
||||
|
||||
/* I said them, but I lied them. */
|
||||
if (devinfo->ver < 5 && (format == ISL_FORMAT_R32G32B32A32_FLOAT ||
|
||||
format == ISL_FORMAT_R24_UNORM_X8_TYPELESS ||
|
||||
format == ISL_FORMAT_R32_FLOAT ||
|
||||
format == ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS))
|
||||
ignore_filtering = true;
|
||||
if (!ignore_filtering)
|
||||
supported &= isl_format_supports_filtering(devinfo, format);
|
||||
|
||||
/* Don't advertise 3-component RGB formats for non-buffer textures.
|
||||
* This ensures that they are renderable from an API perspective since
|
||||
* the state tracker will fall back to RGBA or RGBX, which are
|
||||
* renderable. We want to render internally for copies and blits,
|
||||
* even if the application doesn't.
|
||||
*
|
||||
* Buffer textures don't need to be renderable, so we support real RGB.
|
||||
* This is useful for PBO upload, and 32-bit RGB support is mandatory.
|
||||
*/
|
||||
if (target != PIPE_BUFFER)
|
||||
supported &= fmtl->bpb != 24 && fmtl->bpb != 48 && fmtl->bpb != 96;
|
||||
}
|
||||
|
||||
if (usage & PIPE_BIND_VERTEX_BUFFER) {
|
||||
supported &= isl_format_supports_vertex_fetch(devinfo, format);
|
||||
|
||||
if (!devinfo->is_haswell) {
|
||||
/* W/A: Pre-Haswell, the hardware doesn't really support the formats
|
||||
* we'd like to use here, so upload everything as UINT and fix it in
|
||||
* the shader
|
||||
*/
|
||||
if (format == ISL_FORMAT_R10G10B10A2_UNORM ||
|
||||
format == ISL_FORMAT_B10G10R10A2_UNORM ||
|
||||
format == ISL_FORMAT_R10G10B10A2_SNORM ||
|
||||
format == ISL_FORMAT_B10G10R10A2_SNORM ||
|
||||
format == ISL_FORMAT_R10G10B10A2_USCALED ||
|
||||
format == ISL_FORMAT_B10G10R10A2_USCALED ||
|
||||
format == ISL_FORMAT_R10G10B10A2_SSCALED ||
|
||||
format == ISL_FORMAT_B10G10R10A2_SSCALED)
|
||||
supported = true;
|
||||
|
||||
if (format == ISL_FORMAT_R8G8B8_SINT ||
|
||||
format == ISL_FORMAT_R8G8B8_UINT ||
|
||||
format == ISL_FORMAT_R16G16B16_SINT ||
|
||||
format == ISL_FORMAT_R16G16B16_UINT)
|
||||
supported = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (usage & PIPE_BIND_INDEX_BUFFER) {
|
||||
supported &= format == ISL_FORMAT_R8_UINT ||
|
||||
format == ISL_FORMAT_R16_UINT ||
|
||||
format == ISL_FORMAT_R32_UINT;
|
||||
}
|
||||
|
||||
return supported;
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* Copyright © 2019 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Macro and function definitions needed in order to use genxml.
|
||||
*
|
||||
* This should only be included in sources compiled per-generation.
|
||||
*/
|
||||
|
||||
#include "crocus_batch.h"
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
#define __gen_address_type struct crocus_address
|
||||
#define __gen_user_data struct crocus_batch
|
||||
#define __gen_combine_address crocus_combine_address
|
||||
|
||||
static inline void *
|
||||
__gen_get_batch_dwords(struct crocus_batch *batch, unsigned dwords)
|
||||
{
|
||||
return crocus_get_command_space(batch, dwords * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
static inline struct crocus_address
|
||||
__gen_address_offset(struct crocus_address addr, uint64_t offset)
|
||||
{
|
||||
addr.offset += offset;
|
||||
return addr;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
__gen_combine_address(struct crocus_batch *batch, void *location,
|
||||
struct crocus_address addr, uint32_t delta)
|
||||
{
|
||||
uint32_t offset = (char *)location - (char *)batch->command.map;
|
||||
|
||||
if (addr.bo == NULL) {
|
||||
return addr.offset + delta;
|
||||
} else {
|
||||
if (GFX_VER < 6 && crocus_ptr_in_state_buffer(batch, location)) {
|
||||
offset = (char *) location - (char *) batch->state.map;
|
||||
return crocus_state_reloc(batch, offset, addr.bo,
|
||||
addr.offset + delta,
|
||||
addr.reloc_flags);
|
||||
}
|
||||
|
||||
assert(!crocus_ptr_in_state_buffer(batch, location));
|
||||
|
||||
offset = (char *) location - (char *) batch->command.map;
|
||||
return crocus_command_reloc(batch, offset, addr.bo,
|
||||
addr.offset + delta,
|
||||
addr.reloc_flags);
|
||||
}
|
||||
}
|
||||
|
||||
#define __gen_address_type struct crocus_address
|
||||
#define __gen_user_data struct crocus_batch
|
||||
|
||||
#define __genxml_cmd_length(cmd) cmd ## _length
|
||||
#define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
|
||||
#define __genxml_cmd_header(cmd) cmd ## _header
|
||||
#define __genxml_cmd_pack(cmd) cmd ## _pack
|
||||
#define __genxml_reg_num(cmd) cmd ## _num
|
||||
|
||||
#include "genxml/genX_pack.h"
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_bits.h"
|
||||
|
||||
/* CS_GPR(15) is reserved for combining conditional rendering predicates
|
||||
* with GL_ARB_indirect_parameters draw number predicates.
|
||||
*/
|
||||
#define MI_BUILDER_NUM_ALLOC_GPRS 15
|
||||
#include "common/mi_builder.h"
|
||||
|
||||
#define _crocus_pack_command(batch, cmd, dst, name) \
|
||||
for (struct cmd name = { __genxml_cmd_header(cmd) }, \
|
||||
*_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
|
||||
({ __genxml_cmd_pack(cmd)(batch, (void *)_dst, &name); \
|
||||
_dst = NULL; \
|
||||
}))
|
||||
|
||||
#define crocus_pack_command(cmd, dst, name) \
|
||||
_crocus_pack_command(NULL, cmd, dst, name)
|
||||
|
||||
#define _crocus_pack_state(batch, cmd, dst, name) \
|
||||
for (struct cmd name = {}, \
|
||||
*_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
|
||||
__genxml_cmd_pack(cmd)(batch, (void *)_dst, &name), \
|
||||
_dst = NULL)
|
||||
|
||||
#define crocus_pack_state(cmd, dst, name) \
|
||||
_crocus_pack_state(NULL, cmd, dst, name)
|
||||
|
||||
#define crocus_emit_cmd(batch, cmd, name) \
|
||||
_crocus_pack_command(batch, cmd, __gen_get_batch_dwords(batch, __genxml_cmd_length(cmd)), name)
|
||||
|
||||
#define crocus_emit_merge(batch, dwords0, dwords1, num_dwords) \
|
||||
do { \
|
||||
uint32_t *dw = __gen_get_batch_dwords(batch, num_dwords); \
|
||||
for (uint32_t i = 0; i < num_dwords; i++) \
|
||||
dw[i] = (dwords0)[i] | (dwords1)[i]; \
|
||||
VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dwords)); \
|
||||
} while (0)
|
||||
|
||||
#define crocus_emit_reg(batch, reg, name) \
|
||||
for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL; \
|
||||
({ \
|
||||
uint32_t _dw[__genxml_cmd_length(reg)]; \
|
||||
__genxml_cmd_pack(reg)(NULL, _dw, &name); \
|
||||
for (unsigned i = 0; i < __genxml_cmd_length(reg); i++) { \
|
||||
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
|
||||
lri.RegisterOffset = __genxml_reg_num(reg); \
|
||||
lri.DataDWord = _dw[i]; \
|
||||
} \
|
||||
} \
|
||||
_cont = NULL; \
|
||||
}))
|
||||
|
||||
|
||||
/**
|
||||
* crocus_address constructor helpers:
|
||||
*
|
||||
* When using these to construct a CSO, pass NULL for \p bo, and manually
|
||||
* pin the BO later. Otherwise, genxml's address handling will add the
|
||||
* BO to the current batch's validation list at CSO creation time, rather
|
||||
* than at draw time as desired.
|
||||
*/
|
||||
|
||||
UNUSED static struct crocus_address
|
||||
ro_bo(struct crocus_bo *bo, uint64_t offset)
|
||||
{
|
||||
return (struct crocus_address) { .bo = bo, .offset = offset, .reloc_flags = RELOC_32BIT };
|
||||
}
|
||||
|
||||
UNUSED static struct crocus_address
|
||||
rw_bo(struct crocus_bo *bo, uint64_t offset)
|
||||
{
|
||||
return (struct crocus_address) { .bo = bo, .offset = offset, .reloc_flags = RELOC_32BIT | RELOC_WRITE };
|
||||
}
|
||||
|
||||
UNUSED static struct crocus_address
|
||||
ggtt_bo(struct crocus_bo *bo, uint64_t offset)
|
||||
{
|
||||
return (struct crocus_address) { .bo = bo, .offset = offset, .reloc_flags = RELOC_WRITE | RELOC_NEEDS_GGTT };
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Copyright © 2019 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* GenX-specific function declarations.
|
||||
*
|
||||
* Don't include this directly, it will be included by crocus_context.h.
|
||||
*
|
||||
* NOTE: This header can be included multiple times, from the same file.
|
||||
*/
|
||||
|
||||
/* crocus_state.c */
|
||||
void genX(init_state)(struct crocus_context *ice);
|
||||
void genX(init_screen_state)(struct crocus_screen *screen);
|
||||
void genX(upload_urb)(struct crocus_batch *batch,
|
||||
unsigned vs_size,
|
||||
bool gs_present,
|
||||
unsigned gs_size);
|
||||
void genX(emit_hashing_mode)(struct crocus_context *ice,
|
||||
struct crocus_batch *batch,
|
||||
unsigned width, unsigned height,
|
||||
unsigned scale);
|
||||
|
||||
/* crocus_blorp.c */
|
||||
void genX(init_blorp)(struct crocus_context *ice);
|
||||
|
||||
/* crocus_query.c */
|
||||
void genX(init_query)(struct crocus_context *ice);
|
||||
void genX(init_screen_query)(struct crocus_screen *screen);
|
||||
void genX(math_add32_gpr0)(struct crocus_context *ice,
|
||||
struct crocus_batch *batch,
|
||||
uint32_t x);
|
||||
void genX(math_div32_gpr0)(struct crocus_context *ice,
|
||||
struct crocus_batch *batch,
|
||||
uint32_t D);
|
||||
|
||||
/* crocus_blt.c */
|
||||
void genX(init_blt)(struct crocus_screen *screen);
|
|
@ -0,0 +1,484 @@
|
|||
/*
|
||||
* Copyright © 2019 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "crocus_monitor.h"
|
||||
|
||||
#include <xf86drm.h>
|
||||
|
||||
#include "crocus_screen.h"
|
||||
#include "crocus_context.h"
|
||||
|
||||
#include "perf/intel_perf.h"
|
||||
#include "perf/intel_perf_query.h"
|
||||
#include "perf/intel_perf_regs.h"
|
||||
|
||||
struct crocus_monitor_object {
|
||||
int num_active_counters;
|
||||
int *active_counters;
|
||||
|
||||
size_t result_size;
|
||||
unsigned char *result_buffer;
|
||||
|
||||
struct intel_perf_query_object *query;
|
||||
};
|
||||
|
||||
int
|
||||
crocus_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
|
||||
struct pipe_driver_query_info *info)
|
||||
{
|
||||
const struct crocus_screen *screen = (struct crocus_screen *)pscreen;
|
||||
assert(screen->monitor_cfg);
|
||||
if (!screen->monitor_cfg)
|
||||
return 0;
|
||||
|
||||
const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
|
||||
|
||||
if (!info) {
|
||||
/* return the number of metrics */
|
||||
return monitor_cfg->num_counters;
|
||||
}
|
||||
|
||||
const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
|
||||
const int group = monitor_cfg->counters[index].group;
|
||||
const int counter_index = monitor_cfg->counters[index].counter;
|
||||
struct intel_perf_query_counter *counter =
|
||||
&perf_cfg->queries[group].counters[counter_index];
|
||||
|
||||
info->group_id = group;
|
||||
info->name = counter->name;
|
||||
info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
|
||||
|
||||
if (counter->type == INTEL_PERF_COUNTER_TYPE_THROUGHPUT)
|
||||
info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
|
||||
else
|
||||
info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
|
||||
switch (counter->data_type) {
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
|
||||
info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
|
||||
info->max_value.u32 = 0;
|
||||
break;
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
|
||||
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
|
||||
info->max_value.u64 = 0;
|
||||
break;
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
|
||||
info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
|
||||
info->max_value.u64 = -1;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
/* indicates that this is an OA query, not a pipeline statistics query */
|
||||
info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
|
||||
return 1;
|
||||
}
|
||||
|
||||
typedef void (*bo_unreference_t)(void *);
|
||||
typedef void *(*bo_map_t)(void *, void *, unsigned flags);
|
||||
typedef void (*bo_unmap_t)(void *);
|
||||
typedef void (*emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
|
||||
typedef void (*emit_mi_flush_t)(void *);
|
||||
typedef void (*capture_frequency_stat_register_t)(void *, void *,
|
||||
uint32_t );
|
||||
typedef void (*store_register_mem64_t)(void *ctx, void *bo,
|
||||
uint32_t reg, uint32_t offset);
|
||||
typedef bool (*batch_references_t)(void *batch, void *bo);
|
||||
typedef void (*bo_wait_rendering_t)(void *bo);
|
||||
typedef int (*bo_busy_t)(void *bo);
|
||||
|
||||
static void *
|
||||
crocus_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
|
||||
{
|
||||
return crocus_bo_alloc(bufmgr, name, size);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void
|
||||
crocus_monitor_emit_mi_flush(struct crocus_context *ice)
|
||||
{
|
||||
const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_DATA_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_VF_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_CS_STALL;
|
||||
crocus_emit_pipe_control_flush(&ice->batches[CROCUS_BATCH_RENDER],
|
||||
"OA metrics", flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
crocus_monitor_emit_mi_report_perf_count(void *c,
|
||||
void *bo,
|
||||
uint32_t offset_in_bytes,
|
||||
uint32_t report_id)
|
||||
{
|
||||
struct crocus_context *ice = c;
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
screen->vtbl.emit_mi_report_perf_count(batch, bo, offset_in_bytes, report_id);
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_monitor_batchbuffer_flush(void *c, const char *file, int line)
|
||||
{
|
||||
struct crocus_context *ice = c;
|
||||
_crocus_batch_flush(&ice->batches[CROCUS_BATCH_RENDER], __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void
|
||||
crocus_monitor_capture_frequency_stat_register(void *ctx,
|
||||
void *bo,
|
||||
uint32_t bo_offset)
|
||||
{
|
||||
struct crocus_context *ice = ctx;
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
ice->vtbl.store_register_mem32(batch, GEN9_RPSTAT0, bo, bo_offset, false);
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_monitor_store_register_mem64(void *ctx, void *bo,
|
||||
uint32_t reg, uint32_t offset)
|
||||
{
|
||||
struct crocus_context *ice = ctx;
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool
|
||||
crocus_monitor_init_metrics(struct crocus_screen *screen)
|
||||
{
|
||||
struct crocus_monitor_config *monitor_cfg =
|
||||
rzalloc(screen, struct crocus_monitor_config);
|
||||
struct intel_perf_config *perf_cfg = NULL;
|
||||
if (unlikely(!monitor_cfg))
|
||||
goto allocation_error;
|
||||
perf_cfg = intel_perf_new(monitor_cfg);
|
||||
if (unlikely(!perf_cfg))
|
||||
goto allocation_error;
|
||||
|
||||
monitor_cfg->perf_cfg = perf_cfg;
|
||||
|
||||
perf_cfg->vtbl.bo_alloc = crocus_oa_bo_alloc;
|
||||
perf_cfg->vtbl.bo_unreference = (bo_unreference_t)crocus_bo_unreference;
|
||||
perf_cfg->vtbl.bo_map = (bo_map_t)crocus_bo_map;
|
||||
perf_cfg->vtbl.bo_unmap = (bo_unmap_t)crocus_bo_unmap;
|
||||
|
||||
perf_cfg->vtbl.emit_mi_report_perf_count =
|
||||
(emit_mi_report_t)crocus_monitor_emit_mi_report_perf_count;
|
||||
perf_cfg->vtbl.batchbuffer_flush = crocus_monitor_batchbuffer_flush;
|
||||
perf_cfg->vtbl.batch_references = (batch_references_t)crocus_batch_references;
|
||||
perf_cfg->vtbl.bo_wait_rendering =
|
||||
(bo_wait_rendering_t)crocus_bo_wait_rendering;
|
||||
perf_cfg->vtbl.bo_busy = (bo_busy_t)crocus_bo_busy;
|
||||
|
||||
intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd, false, false);
|
||||
screen->monitor_cfg = monitor_cfg;
|
||||
|
||||
/* a gallium "group" is equivalent to a gen "query"
|
||||
* a gallium "query" is equivalent to a gen "query_counter"
|
||||
*
|
||||
* Each gen_query supports a specific number of query_counters. To
|
||||
* allocate the array of crocus_monitor_counter, we need an upper bound
|
||||
* (ignoring duplicate query_counters).
|
||||
*/
|
||||
int gen_query_counters_count = 0;
|
||||
for (int gen_query_id = 0;
|
||||
gen_query_id < perf_cfg->n_queries;
|
||||
++gen_query_id) {
|
||||
gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
|
||||
}
|
||||
|
||||
monitor_cfg->counters = rzalloc_size(monitor_cfg,
|
||||
sizeof(struct crocus_monitor_counter) *
|
||||
gen_query_counters_count);
|
||||
if (unlikely(!monitor_cfg->counters))
|
||||
goto allocation_error;
|
||||
|
||||
int crocus_monitor_id = 0;
|
||||
for (int group = 0; group < perf_cfg->n_queries; ++group) {
|
||||
for (int counter = 0;
|
||||
counter < perf_cfg->queries[group].n_counters;
|
||||
++counter) {
|
||||
/* Check previously identified metrics to filter out duplicates. The
|
||||
* user is not helped by having the same metric available in several
|
||||
* groups. (n^2 algorithm).
|
||||
*/
|
||||
bool duplicate = false;
|
||||
for (int existing_group = 0;
|
||||
existing_group < group && !duplicate;
|
||||
++existing_group) {
|
||||
for (int existing_counter = 0;
|
||||
existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
|
||||
++existing_counter) {
|
||||
const char *current_name =
|
||||
perf_cfg->queries[group].counters[counter].name;
|
||||
const char *existing_name =
|
||||
perf_cfg->queries[existing_group].counters[existing_counter].name;
|
||||
if (strcmp(current_name, existing_name) == 0) {
|
||||
duplicate = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (duplicate)
|
||||
continue;
|
||||
monitor_cfg->counters[crocus_monitor_id].group = group;
|
||||
monitor_cfg->counters[crocus_monitor_id].counter = counter;
|
||||
++crocus_monitor_id;
|
||||
}
|
||||
}
|
||||
monitor_cfg->num_counters = crocus_monitor_id;
|
||||
return monitor_cfg->num_counters;
|
||||
|
||||
allocation_error:
|
||||
if (monitor_cfg)
|
||||
free(monitor_cfg->counters);
|
||||
free(perf_cfg);
|
||||
free(monitor_cfg);
|
||||
return false;
|
||||
}
|
||||
|
||||
int
|
||||
crocus_get_monitor_group_info(struct pipe_screen *pscreen,
|
||||
unsigned group_index,
|
||||
struct pipe_driver_query_group_info *info)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
|
||||
if (!screen->monitor_cfg) {
|
||||
if (!crocus_monitor_init_metrics(screen))
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
|
||||
const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
|
||||
|
||||
if (!info) {
|
||||
/* return the count that can be queried */
|
||||
return perf_cfg->n_queries;
|
||||
}
|
||||
|
||||
if (group_index >= perf_cfg->n_queries) {
|
||||
/* out of range */
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct intel_perf_query_info *query = &perf_cfg->queries[group_index];
|
||||
|
||||
info->name = query->name;
|
||||
info->max_active_queries = query->n_counters;
|
||||
info->num_queries = query->n_counters;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_init_monitor_ctx(struct crocus_context *ice)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
|
||||
struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
|
||||
|
||||
ice->perf_ctx = intel_perf_new_context(ice);
|
||||
if (unlikely(!ice->perf_ctx))
|
||||
return;
|
||||
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
|
||||
intel_perf_init_context(perf_ctx,
|
||||
perf_cfg,
|
||||
ice,
|
||||
ice,
|
||||
screen->bufmgr,
|
||||
&screen->devinfo,
|
||||
ice->batches[CROCUS_BATCH_RENDER].hw_ctx_id,
|
||||
screen->fd);
|
||||
}
|
||||
|
||||
/* entry point for GenPerfMonitorsAMD */
|
||||
struct crocus_monitor_object *
|
||||
crocus_create_monitor_object(struct crocus_context *ice,
|
||||
unsigned num_queries,
|
||||
unsigned *query_types)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
|
||||
struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
|
||||
struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
|
||||
struct intel_perf_query_object *query_obj = NULL;
|
||||
|
||||
/* initialize perf context if this has not already been done. This
|
||||
* function is the first entry point that carries the gl context.
|
||||
*/
|
||||
if (ice->perf_ctx == NULL) {
|
||||
crocus_init_monitor_ctx(ice);
|
||||
}
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
assert(num_queries > 0);
|
||||
int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
|
||||
assert(query_index <= monitor_cfg->num_counters);
|
||||
const int group = monitor_cfg->counters[query_index].group;
|
||||
|
||||
struct crocus_monitor_object *monitor =
|
||||
calloc(1, sizeof(struct crocus_monitor_object));
|
||||
if (unlikely(!monitor))
|
||||
goto allocation_failure;
|
||||
|
||||
monitor->num_active_counters = num_queries;
|
||||
monitor->active_counters = calloc(num_queries, sizeof(int));
|
||||
if (unlikely(!monitor->active_counters))
|
||||
goto allocation_failure;
|
||||
|
||||
for (int i = 0; i < num_queries; ++i) {
|
||||
unsigned current_query = query_types[i];
|
||||
unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;
|
||||
|
||||
/* all queries must be in the same group */
|
||||
assert(current_query_index <= monitor_cfg->num_counters);
|
||||
assert(monitor_cfg->counters[current_query_index].group == group);
|
||||
monitor->active_counters[i] =
|
||||
monitor_cfg->counters[current_query_index].counter;
|
||||
}
|
||||
|
||||
/* create the intel_perf_query */
|
||||
query_obj = intel_perf_new_query(perf_ctx, group);
|
||||
if (unlikely(!query_obj))
|
||||
goto allocation_failure;
|
||||
|
||||
monitor->query = query_obj;
|
||||
monitor->result_size = perf_cfg->queries[group].data_size;
|
||||
monitor->result_buffer = calloc(1, monitor->result_size);
|
||||
if (unlikely(!monitor->result_buffer))
|
||||
goto allocation_failure;
|
||||
|
||||
return monitor;
|
||||
|
||||
allocation_failure:
|
||||
if (monitor) {
|
||||
free(monitor->active_counters);
|
||||
free(monitor->result_buffer);
|
||||
}
|
||||
free(query_obj);
|
||||
free(monitor);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
crocus_destroy_monitor_object(struct pipe_context *ctx,
|
||||
struct crocus_monitor_object *monitor)
|
||||
{
|
||||
struct crocus_context *ice = (struct crocus_context *)ctx;
|
||||
|
||||
intel_perf_delete_query(ice->perf_ctx, monitor->query);
|
||||
free(monitor->result_buffer);
|
||||
monitor->result_buffer = NULL;
|
||||
free(monitor->active_counters);
|
||||
monitor->active_counters = NULL;
|
||||
free(monitor);
|
||||
}
|
||||
|
||||
bool
|
||||
crocus_begin_monitor(struct pipe_context *ctx,
|
||||
struct crocus_monitor_object *monitor)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
return intel_perf_begin_query(perf_ctx, monitor->query);
|
||||
}
|
||||
|
||||
bool
|
||||
crocus_end_monitor(struct pipe_context *ctx,
|
||||
struct crocus_monitor_object *monitor)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
intel_perf_end_query(perf_ctx, monitor->query);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
crocus_get_monitor_result(struct pipe_context *ctx,
|
||||
struct crocus_monitor_object *monitor,
|
||||
bool wait,
|
||||
union pipe_numeric_type_union *result)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
|
||||
bool monitor_ready =
|
||||
intel_perf_is_query_ready(perf_ctx, monitor->query, batch);
|
||||
|
||||
if (!monitor_ready) {
|
||||
if (!wait)
|
||||
return false;
|
||||
intel_perf_wait_query(perf_ctx, monitor->query, batch);
|
||||
}
|
||||
|
||||
assert(intel_perf_is_query_ready(perf_ctx, monitor->query, batch));
|
||||
|
||||
unsigned bytes_written;
|
||||
intel_perf_get_query_data(perf_ctx, monitor->query, batch,
|
||||
monitor->result_size,
|
||||
(unsigned*) monitor->result_buffer,
|
||||
&bytes_written);
|
||||
if (bytes_written != monitor->result_size)
|
||||
return false;
|
||||
|
||||
/* copy metrics into the batch result */
|
||||
for (int i = 0; i < monitor->num_active_counters; ++i) {
|
||||
int current_counter = monitor->active_counters[i];
|
||||
const struct intel_perf_query_info *info =
|
||||
intel_perf_query_info(monitor->query);
|
||||
const struct intel_perf_query_counter *counter =
|
||||
&info->counters[current_counter];
|
||||
assert(intel_perf_query_counter_get_size(counter));
|
||||
switch (counter->data_type) {
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
|
||||
result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
|
||||
break;
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
|
||||
result[i].f = *(float*)(monitor->result_buffer + counter->offset);
|
||||
break;
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
|
||||
result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);
|
||||
break;
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: {
|
||||
double v = *(double*)(monitor->result_buffer + counter->offset);
|
||||
result[i].f = v;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("unexpected counter data type");
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* Copyright © 2019 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CROCUS_MONITOR_H
|
||||
#define CROCUS_MONITOR_H
|
||||
|
||||
#include "pipe/p_screen.h"
|
||||
|
||||
struct crocus_monitor_counter {
|
||||
int group;
|
||||
int counter;
|
||||
};
|
||||
|
||||
struct crocus_monitor_config {
|
||||
struct intel_perf_config *perf_cfg;
|
||||
|
||||
/* gallium requires an index for each counter */
|
||||
int num_counters;
|
||||
struct crocus_monitor_counter *counters;
|
||||
};
|
||||
|
||||
int crocus_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
|
||||
struct pipe_driver_query_info *info);
|
||||
int crocus_get_monitor_group_info(struct pipe_screen *pscreen,
|
||||
unsigned index,
|
||||
struct pipe_driver_query_group_info *info);
|
||||
|
||||
struct crocus_context;
|
||||
struct crocus_screen;
|
||||
|
||||
struct crocus_monitor_object *
|
||||
crocus_create_monitor_object(struct crocus_context *ice,
|
||||
unsigned num_queries,
|
||||
unsigned *query_types);
|
||||
|
||||
struct pipe_query;
|
||||
void crocus_destroy_monitor_object(struct pipe_context *ctx,
|
||||
struct crocus_monitor_object *monitor);
|
||||
|
||||
bool
|
||||
crocus_begin_monitor(struct pipe_context *ctx,
|
||||
struct crocus_monitor_object *monitor);
|
||||
bool
|
||||
crocus_end_monitor(struct pipe_context *ctx,
|
||||
struct crocus_monitor_object *monitor);
|
||||
|
||||
bool
|
||||
crocus_get_monitor_result(struct pipe_context *ctx,
|
||||
struct crocus_monitor_object *monitor,
|
||||
bool wait,
|
||||
union pipe_numeric_type_union *result);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef CROCUS_PIPE_H
|
||||
#define CROCUS_PIPE_H
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
|
||||
static inline gl_shader_stage
|
||||
stage_from_pipe(enum pipe_shader_type pstage)
|
||||
{
|
||||
static const gl_shader_stage stages[PIPE_SHADER_TYPES] = {
|
||||
[PIPE_SHADER_VERTEX] = MESA_SHADER_VERTEX,
|
||||
[PIPE_SHADER_TESS_CTRL] = MESA_SHADER_TESS_CTRL,
|
||||
[PIPE_SHADER_TESS_EVAL] = MESA_SHADER_TESS_EVAL,
|
||||
[PIPE_SHADER_GEOMETRY] = MESA_SHADER_GEOMETRY,
|
||||
[PIPE_SHADER_FRAGMENT] = MESA_SHADER_FRAGMENT,
|
||||
[PIPE_SHADER_COMPUTE] = MESA_SHADER_COMPUTE,
|
||||
};
|
||||
return stages[pstage];
|
||||
}
|
||||
|
||||
static inline enum pipe_shader_type
|
||||
stage_to_pipe(gl_shader_stage stage)
|
||||
{
|
||||
static const enum pipe_shader_type pstages[MESA_SHADER_STAGES] = {
|
||||
[MESA_SHADER_VERTEX] = PIPE_SHADER_VERTEX,
|
||||
[MESA_SHADER_TESS_CTRL] = PIPE_SHADER_TESS_CTRL,
|
||||
[MESA_SHADER_TESS_EVAL] = PIPE_SHADER_TESS_EVAL,
|
||||
[MESA_SHADER_GEOMETRY] = PIPE_SHADER_GEOMETRY,
|
||||
[MESA_SHADER_FRAGMENT] = PIPE_SHADER_FRAGMENT,
|
||||
[MESA_SHADER_COMPUTE] = PIPE_SHADER_COMPUTE,
|
||||
};
|
||||
return pstages[stage];
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an swizzle enumeration (i.e. PIPE_SWIZZLE_X) to one of the HW's
|
||||
* "Shader Channel Select" enumerations (i.e. SCS_RED). The mappings are
|
||||
*
|
||||
* SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
|
||||
* 0 1 2 3 4 5
|
||||
* 4 5 6 7 0 1
|
||||
* SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
|
||||
*
|
||||
* which is simply adding 4 then modding by 8 (or anding with 7).
|
||||
*/
|
||||
static inline enum isl_channel_select
|
||||
pipe_swizzle_to_isl_channel(enum pipe_swizzle swizzle)
|
||||
{
|
||||
return (swizzle + 4) & 7;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,368 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file crocus_pipe_control.c
|
||||
*
|
||||
* PIPE_CONTROL is the main flushing and synchronization primitive on Intel
|
||||
* GPUs. It can invalidate caches, stall until rendering reaches various
|
||||
* stages of completion, write to memory, and other things. In a way, it's
|
||||
* a swiss army knife command - it has all kinds of capabilities, but some
|
||||
* significant limitations as well.
|
||||
*
|
||||
* Unfortunately, it's notoriously complicated and difficult to use. Many
|
||||
* sub-commands can't be used together. Some are meant to be used at the
|
||||
* top of the pipeline (invalidating caches before drawing), while some are
|
||||
* meant to be used at the end (stalling or flushing after drawing).
|
||||
*
|
||||
* Also, there's a list of restrictions a mile long, which vary by generation.
|
||||
* Do this before doing that, or suffer the consequences (usually a GPU hang).
|
||||
*
|
||||
* This file contains helpers for emitting them safely. You can simply call
|
||||
* crocus_emit_pipe_control_flush() with the desired operations (as logical
|
||||
* PIPE_CONTROL_* bits), and it will take care of splitting it into multiple
|
||||
* PIPE_CONTROL commands as necessary. The per-generation workarounds are
|
||||
* applied in crocus_emit_raw_pipe_control() in crocus_state.c.
|
||||
*/
|
||||
|
||||
#include "crocus_context.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/set.h"
|
||||
|
||||
/**
|
||||
* Emit a PIPE_CONTROL with various flushing flags.
|
||||
*
|
||||
* The caller is responsible for deciding what flags are appropriate for the
|
||||
* given generation.
|
||||
*/
|
||||
void
|
||||
crocus_emit_pipe_control_flush(struct crocus_batch *batch,
|
||||
const char *reason,
|
||||
uint32_t flags)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &batch->screen->devinfo;
|
||||
|
||||
if (devinfo->ver >= 6 &&
|
||||
(flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
|
||||
(flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
|
||||
/* A pipe control command with flush and invalidate bits set
|
||||
* simultaneously is an inherently racy operation on Gen6+ if the
|
||||
* contents of the flushed caches were intended to become visible from
|
||||
* any of the invalidated caches. Split it in two PIPE_CONTROLs, the
|
||||
* first one should stall the pipeline to make sure that the flushed R/W
|
||||
* caches are coherent with memory once the specified R/O caches are
|
||||
* invalidated. On pre-Gen6 hardware the (implicit) R/O cache
|
||||
* invalidation seems to happen at the bottom of the pipeline together
|
||||
* with any write cache flush, so this shouldn't be a concern. In order
|
||||
* to ensure a full stall, we do an end-of-pipe sync.
|
||||
*/
|
||||
crocus_emit_end_of_pipe_sync(batch, reason,
|
||||
flags & PIPE_CONTROL_CACHE_FLUSH_BITS);
|
||||
flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
|
||||
}
|
||||
|
||||
batch->screen->vtbl.emit_raw_pipe_control(batch, reason, flags, NULL, 0, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit a PIPE_CONTROL that writes to a buffer object.
|
||||
*
|
||||
* \p flags should contain one of the following items:
|
||||
* - PIPE_CONTROL_WRITE_IMMEDIATE
|
||||
* - PIPE_CONTROL_WRITE_TIMESTAMP
|
||||
* - PIPE_CONTROL_WRITE_DEPTH_COUNT
|
||||
*/
|
||||
void
|
||||
crocus_emit_pipe_control_write(struct crocus_batch *batch,
|
||||
const char *reason, uint32_t flags,
|
||||
struct crocus_bo *bo, uint32_t offset,
|
||||
uint64_t imm)
|
||||
{
|
||||
batch->screen->vtbl.emit_raw_pipe_control(batch, reason, flags, bo, offset, imm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Restriction [DevSNB, DevIVB]:
|
||||
*
|
||||
* Prior to changing Depth/Stencil Buffer state (i.e. any combination of
|
||||
* 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
|
||||
* 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
|
||||
* (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
|
||||
* cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
|
||||
* another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
|
||||
* unless SW can otherwise guarantee that the pipeline from WM onwards is
|
||||
* already flushed (e.g., via a preceding MI_FLUSH).
|
||||
*/
|
||||
void
|
||||
crocus_emit_depth_stall_flushes(struct crocus_batch *batch)
|
||||
{
|
||||
UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;
|
||||
|
||||
assert(devinfo->ver >= 6);
|
||||
|
||||
crocus_emit_pipe_control_flush(batch, "depth stall", PIPE_CONTROL_DEPTH_STALL);
|
||||
crocus_emit_pipe_control_flush(batch, "depth stall", PIPE_CONTROL_DEPTH_CACHE_FLUSH);
|
||||
crocus_emit_pipe_control_flush(batch, "depth stall", PIPE_CONTROL_DEPTH_STALL);
|
||||
}
|
||||
|
||||
/*
|
||||
* From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
|
||||
*
|
||||
* Write synchronization is a special case of end-of-pipe
|
||||
* synchronization that requires that the render cache and/or depth
|
||||
* related caches are flushed to memory, where the data will become
|
||||
* globally visible. This type of synchronization is required prior to
|
||||
* SW (CPU) actually reading the result data from memory, or initiating
|
||||
* an operation that will use as a read surface (such as a texture
|
||||
* surface) a previous render target and/or depth/stencil buffer
|
||||
*
|
||||
* From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
|
||||
*
|
||||
* Exercising the write cache flush bits (Render Target Cache Flush
|
||||
* Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
|
||||
* ensures the write caches are flushed and doesn't guarantee the data
|
||||
* is globally visible.
|
||||
*
|
||||
* SW can track the completion of the end-of-pipe-synchronization by
|
||||
* using "Notify Enable" and "PostSync Operation - Write Immediate
|
||||
* Data" in the PIPE_CONTROL command.
|
||||
*/
|
||||
void
|
||||
crocus_emit_end_of_pipe_sync(struct crocus_batch *batch,
|
||||
const char *reason, uint32_t flags)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &batch->screen->devinfo;
|
||||
|
||||
if (devinfo->ver >= 6) {
|
||||
/* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
|
||||
*
|
||||
* "The most common action to perform upon reaching a synchronization
|
||||
* point is to write a value out to memory. An immediate value
|
||||
* (included with the synchronization command) may be written."
|
||||
*
|
||||
* From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
|
||||
*
|
||||
* "In case the data flushed out by the render engine is to be read
|
||||
* back in to the render engine in coherent manner, then the render
|
||||
* engine has to wait for the fence completion before accessing the
|
||||
* flushed data. This can be achieved by following means on various
|
||||
* products: PIPE_CONTROL command with CS Stall and the required
|
||||
* write caches flushed with Post-Sync-Operation as Write Immediate
|
||||
* Data.
|
||||
*
|
||||
* Example:
|
||||
* - Workload-1 (3D/GPGPU/MEDIA)
|
||||
* - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate
|
||||
* Data, Required Write Cache Flush bits set)
|
||||
* - Workload-2 (Can use the data produce or output by Workload-1)
|
||||
*/
|
||||
crocus_emit_pipe_control_write(batch, reason,
|
||||
flags | PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_WRITE_IMMEDIATE,
|
||||
batch->ice->workaround_bo,
|
||||
batch->ice->workaround_offset, 0);
|
||||
|
||||
if (batch->screen->devinfo.is_haswell) {
|
||||
#define GEN7_3DPRIM_START_INSTANCE 0x243C
|
||||
batch->screen->vtbl.load_register_mem32(batch, GEN7_3DPRIM_START_INSTANCE,
|
||||
batch->ice->workaround_bo,
|
||||
batch->ice->workaround_offset);
|
||||
}
|
||||
} else {
|
||||
/* On gen4-5, a regular pipe control seems to suffice. */
|
||||
crocus_emit_pipe_control_flush(batch, reason, flags);
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit a pipelined flush to either flush render and texture cache for
|
||||
* reading from a FBO-drawn texture, or flush so that frontbuffer
|
||||
* render appears on the screen in DRI1.
|
||||
*
|
||||
* This is also used for the always_flush_cache driconf debug option.
|
||||
*/
|
||||
void
|
||||
crocus_emit_mi_flush(struct crocus_batch *batch)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &batch->screen->devinfo;
|
||||
int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH;
|
||||
if (devinfo->ver >= 6) {
|
||||
flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_DATA_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_VF_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_CS_STALL;
|
||||
}
|
||||
crocus_emit_pipe_control_flush(batch, "mi flush", flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emits a PIPE_CONTROL with a non-zero post-sync operation, for
|
||||
* implementing two workarounds on gen6. From section 1.4.7.1
|
||||
* "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
|
||||
*
|
||||
* [DevSNB-C+{W/A}] Before any depth stall flush (including those
|
||||
* produced by non-pipelined state commands), software needs to first
|
||||
* send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
|
||||
* 0.
|
||||
*
|
||||
* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
|
||||
* =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
|
||||
*
|
||||
* And the workaround for these two requires this workaround first:
|
||||
*
|
||||
* [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
|
||||
* BEFORE the pipe-control with a post-sync op and no write-cache
|
||||
* flushes.
|
||||
*
|
||||
* And this last workaround is tricky because of the requirements on
|
||||
* that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
|
||||
* volume 2 part 1:
|
||||
*
|
||||
* "1 of the following must also be set:
|
||||
* - Render Target Cache Flush Enable ([12] of DW1)
|
||||
* - Depth Cache Flush Enable ([0] of DW1)
|
||||
* - Stall at Pixel Scoreboard ([1] of DW1)
|
||||
* - Depth Stall ([13] of DW1)
|
||||
* - Post-Sync Operation ([13] of DW1)
|
||||
* - Notify Enable ([8] of DW1)"
|
||||
*
|
||||
* The cache flushes require the workaround flush that triggered this
|
||||
* one, so we can't use it. Depth stall would trigger the same.
|
||||
* Post-sync nonzero is what triggered this second workaround, so we
|
||||
* can't use that one either. Notify enable is IRQs, which aren't
|
||||
* really our business. That leaves only stall at scoreboard.
|
||||
*/
|
||||
void
|
||||
crocus_emit_post_sync_nonzero_flush(struct crocus_batch *batch)
|
||||
{
|
||||
crocus_emit_pipe_control_flush(batch, "nonzero",
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
|
||||
crocus_emit_pipe_control_write(batch, "nonzero",
|
||||
PIPE_CONTROL_WRITE_IMMEDIATE,
|
||||
batch->ice->workaround_bo,
|
||||
batch->ice->workaround_offset, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush and invalidate all caches (for debugging purposes).
|
||||
*/
|
||||
void
|
||||
crocus_flush_all_caches(struct crocus_batch *batch)
|
||||
{
|
||||
crocus_emit_pipe_control_flush(batch, "debug: flush all caches",
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_DATA_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_VF_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_STATE_CACHE_INVALIDATE);
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_texture_barrier(struct pipe_context *ctx, unsigned flags)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct crocus_batch *render_batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
struct crocus_batch *compute_batch = &ice->batches[CROCUS_BATCH_COMPUTE];
|
||||
const struct intel_device_info *devinfo = &render_batch->screen->devinfo;
|
||||
|
||||
if (devinfo->ver < 6) {
|
||||
crocus_emit_mi_flush(render_batch);
|
||||
return;
|
||||
}
|
||||
|
||||
if (render_batch->contains_draw) {
|
||||
crocus_batch_maybe_flush(render_batch, 48);
|
||||
crocus_emit_pipe_control_flush(render_batch,
|
||||
"API: texture barrier (1/2)",
|
||||
(flags == 1 ? PIPE_CONTROL_DEPTH_CACHE_FLUSH : 0) |
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
crocus_emit_pipe_control_flush(render_batch,
|
||||
"API: texture barrier (2/2)",
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
|
||||
}
|
||||
|
||||
if (compute_batch->contains_draw) {
|
||||
crocus_batch_maybe_flush(compute_batch, 48);
|
||||
crocus_emit_pipe_control_flush(compute_batch,
|
||||
"API: texture barrier (1/2)",
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
crocus_emit_pipe_control_flush(compute_batch,
|
||||
"API: texture barrier (2/2)",
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_memory_barrier(struct pipe_context *ctx, unsigned flags)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
|
||||
const struct intel_device_info *devinfo = &ice->batches[0].screen->devinfo;
|
||||
|
||||
assert(devinfo->ver == 7);
|
||||
|
||||
if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
|
||||
PIPE_BARRIER_INDEX_BUFFER |
|
||||
PIPE_BARRIER_INDIRECT_BUFFER)) {
|
||||
bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
|
||||
}
|
||||
|
||||
if (flags & PIPE_BARRIER_CONSTANT_BUFFER) {
|
||||
bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE;
|
||||
}
|
||||
|
||||
if (flags & (PIPE_BARRIER_TEXTURE | PIPE_BARRIER_FRAMEBUFFER)) {
|
||||
bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH;
|
||||
}
|
||||
|
||||
/* Typed surface messages are handled by the render cache on IVB, so we
|
||||
* need to flush it too.
|
||||
*/
|
||||
if (!devinfo->is_haswell)
|
||||
bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
|
||||
|
||||
for (int i = 0; i < ice->batch_count; i++) {
|
||||
if (ice->batches[i].contains_draw) {
|
||||
crocus_batch_maybe_flush(&ice->batches[i], 24);
|
||||
crocus_emit_pipe_control_flush(&ice->batches[i], "API: memory barrier",
|
||||
bits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
crocus_init_flush_functions(struct pipe_context *ctx)
|
||||
{
|
||||
ctx->memory_barrier = crocus_memory_barrier;
|
||||
ctx->texture_barrier = crocus_texture_barrier;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,347 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file crocus_program_cache.c
|
||||
*
|
||||
* The in-memory program cache. This is basically a hash table mapping
|
||||
* API-specified shaders and a state key to a compiled variant. It also
|
||||
* takes care of uploading shader assembly into a BO for use on the GPU.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "util/u_atomic.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "intel/compiler/brw_compiler.h"
|
||||
#include "intel/compiler/brw_eu.h"
|
||||
#include "intel/compiler/brw_nir.h"
|
||||
#include "crocus_context.h"
|
||||
#include "crocus_resource.h"
|
||||
|
||||
struct keybox {
|
||||
uint16_t size;
|
||||
enum crocus_program_cache_id cache_id;
|
||||
uint8_t data[0];
|
||||
};
|
||||
|
||||
static struct keybox *
|
||||
make_keybox(void *mem_ctx, enum crocus_program_cache_id cache_id,
|
||||
const void *key, uint32_t key_size)
|
||||
{
|
||||
struct keybox *keybox =
|
||||
ralloc_size(mem_ctx, sizeof(struct keybox) + key_size);
|
||||
|
||||
keybox->cache_id = cache_id;
|
||||
keybox->size = key_size;
|
||||
memcpy(keybox->data, key, key_size);
|
||||
|
||||
return keybox;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
keybox_hash(const void *void_key)
|
||||
{
|
||||
const struct keybox *key = void_key;
|
||||
return _mesa_hash_data(&key->cache_id, key->size + sizeof(key->cache_id));
|
||||
}
|
||||
|
||||
static bool
|
||||
keybox_equals(const void *void_a, const void *void_b)
|
||||
{
|
||||
const struct keybox *a = void_a, *b = void_b;
|
||||
if (a->size != b->size)
|
||||
return false;
|
||||
|
||||
return memcmp(a->data, b->data, a->size) == 0;
|
||||
}
|
||||
|
||||
struct crocus_compiled_shader *
|
||||
crocus_find_cached_shader(struct crocus_context *ice,
|
||||
enum crocus_program_cache_id cache_id,
|
||||
uint32_t key_size, const void *key)
|
||||
{
|
||||
struct keybox *keybox = make_keybox(NULL, cache_id, key, key_size);
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search(ice->shaders.cache, keybox);
|
||||
|
||||
ralloc_free(keybox);
|
||||
|
||||
return entry ? entry->data : NULL;
|
||||
}
|
||||
|
||||
const void *
|
||||
crocus_find_previous_compile(const struct crocus_context *ice,
|
||||
enum crocus_program_cache_id cache_id,
|
||||
unsigned program_string_id)
|
||||
{
|
||||
hash_table_foreach(ice->shaders.cache, entry) {
|
||||
const struct keybox *keybox = entry->key;
|
||||
const struct brw_base_prog_key *key = (const void *)keybox->data;
|
||||
if (keybox->cache_id == cache_id &&
|
||||
key->program_string_id == program_string_id) {
|
||||
return keybox->data;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Look for an existing entry in the cache that has identical assembly code.
|
||||
*
|
||||
* This is useful for programs generating shaders at runtime, where multiple
|
||||
* distinct shaders (from an API perspective) may compile to the same assembly
|
||||
* in our backend. This saves space in the program cache buffer.
|
||||
*/
|
||||
static const struct crocus_compiled_shader *
|
||||
find_existing_assembly(struct hash_table *cache, void *map,
|
||||
const void *assembly, unsigned assembly_size)
|
||||
{
|
||||
hash_table_foreach (cache, entry) {
|
||||
const struct crocus_compiled_shader *existing = entry->data;
|
||||
|
||||
if (existing->map_size != assembly_size)
|
||||
continue;
|
||||
|
||||
if (memcmp(map + existing->offset, assembly, assembly_size) == 0)
|
||||
return existing;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_cache_new_bo(struct crocus_context *ice,
|
||||
uint32_t new_size)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
|
||||
struct crocus_bo *new_bo;
|
||||
new_bo = crocus_bo_alloc(screen->bufmgr, "program cache", new_size);
|
||||
|
||||
void *map = crocus_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE |
|
||||
MAP_ASYNC | MAP_PERSISTENT);
|
||||
|
||||
if (ice->shaders.cache_next_offset != 0) {
|
||||
memcpy(map, ice->shaders.cache_bo_map, ice->shaders.cache_next_offset);
|
||||
}
|
||||
|
||||
crocus_bo_unmap(ice->shaders.cache_bo);
|
||||
crocus_bo_unreference(ice->shaders.cache_bo);
|
||||
ice->shaders.cache_bo = new_bo;
|
||||
ice->shaders.cache_bo_map = map;
|
||||
|
||||
if (screen->devinfo.ver == 4) {
|
||||
/* reemit all shaders on GEN4 only. */
|
||||
ice->state.dirty |= CROCUS_DIRTY_CLIP | CROCUS_DIRTY_RASTER |
|
||||
CROCUS_DIRTY_WM;
|
||||
}
|
||||
ice->batches[CROCUS_BATCH_RENDER].state_base_address_emitted = false;
|
||||
ice->batches[CROCUS_BATCH_COMPUTE].state_base_address_emitted = false;
|
||||
/* unset state base address */
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
crocus_alloc_item_data(struct crocus_context *ice, uint32_t size)
|
||||
{
|
||||
if (ice->shaders.cache_next_offset + size > ice->shaders.cache_bo->size) {
|
||||
uint32_t new_size = ice->shaders.cache_bo->size * 2;
|
||||
while (ice->shaders.cache_next_offset + size > new_size)
|
||||
new_size *= 2;
|
||||
|
||||
crocus_cache_new_bo(ice, new_size);
|
||||
}
|
||||
uint32_t offset = ice->shaders.cache_next_offset;
|
||||
|
||||
/* Programs are always 64-byte aligned, so set up the next one now */
|
||||
ice->shaders.cache_next_offset = ALIGN(offset + size, 64);
|
||||
return offset;
|
||||
}
|
||||
|
||||
struct crocus_compiled_shader *
|
||||
crocus_upload_shader(struct crocus_context *ice,
|
||||
enum crocus_program_cache_id cache_id, uint32_t key_size,
|
||||
const void *key, const void *assembly, uint32_t asm_size,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_data_size, uint32_t *streamout,
|
||||
enum brw_param_builtin *system_values,
|
||||
unsigned num_system_values, unsigned num_cbufs,
|
||||
const struct crocus_binding_table *bt)
|
||||
{
|
||||
struct hash_table *cache = ice->shaders.cache;
|
||||
struct crocus_compiled_shader *shader =
|
||||
rzalloc_size(cache, sizeof(struct crocus_compiled_shader));
|
||||
const struct crocus_compiled_shader *existing = find_existing_assembly(
|
||||
cache, ice->shaders.cache_bo_map, assembly, asm_size);
|
||||
|
||||
/* If we can find a matching prog in the cache already, then reuse the
|
||||
* existing stuff without creating new copy into the underlying buffer
|
||||
* object. This is notably useful for programs generating shaders at
|
||||
* runtime, where multiple shaders may compile to the same thing in our
|
||||
* backend.
|
||||
*/
|
||||
if (existing) {
|
||||
shader->offset = existing->offset;
|
||||
shader->map_size = existing->map_size;
|
||||
} else {
|
||||
shader->offset = crocus_alloc_item_data(ice, asm_size);
|
||||
shader->map_size = asm_size;
|
||||
|
||||
memcpy(ice->shaders.cache_bo_map + shader->offset, assembly, asm_size);
|
||||
}
|
||||
|
||||
shader->prog_data = prog_data;
|
||||
shader->prog_data_size = prog_data_size;
|
||||
shader->streamout = streamout;
|
||||
shader->system_values = system_values;
|
||||
shader->num_system_values = num_system_values;
|
||||
shader->num_cbufs = num_cbufs;
|
||||
shader->bt = *bt;
|
||||
|
||||
ralloc_steal(shader, shader->prog_data);
|
||||
if (prog_data_size > 16) {
|
||||
ralloc_steal(shader->prog_data, prog_data->param);
|
||||
ralloc_steal(shader->prog_data, prog_data->pull_param);
|
||||
}
|
||||
ralloc_steal(shader, shader->streamout);
|
||||
ralloc_steal(shader, shader->system_values);
|
||||
|
||||
struct keybox *keybox = make_keybox(shader, cache_id, key, key_size);
|
||||
_mesa_hash_table_insert(ice->shaders.cache, keybox, shader);
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
bool
|
||||
crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch, const void *key,
|
||||
uint32_t key_size, uint32_t *kernel_out,
|
||||
void *prog_data_out)
|
||||
{
|
||||
struct blorp_context *blorp = blorp_batch->blorp;
|
||||
struct crocus_context *ice = blorp->driver_ctx;
|
||||
struct crocus_compiled_shader *shader =
|
||||
crocus_find_cached_shader(ice, CROCUS_CACHE_BLORP, key_size, key);
|
||||
|
||||
if (!shader)
|
||||
return false;
|
||||
|
||||
*kernel_out = shader->offset;
|
||||
*((void **)prog_data_out) = shader->prog_data;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
crocus_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage,
|
||||
const void *key, uint32_t key_size,
|
||||
const void *kernel, uint32_t kernel_size,
|
||||
const struct brw_stage_prog_data *prog_data_templ,
|
||||
uint32_t prog_data_size, uint32_t *kernel_out,
|
||||
void *prog_data_out)
|
||||
{
|
||||
struct blorp_context *blorp = blorp_batch->blorp;
|
||||
struct crocus_context *ice = blorp->driver_ctx;
|
||||
|
||||
struct brw_stage_prog_data *prog_data = ralloc_size(NULL, prog_data_size);
|
||||
memcpy(prog_data, prog_data_templ, prog_data_size);
|
||||
|
||||
struct crocus_binding_table bt;
|
||||
memset(&bt, 0, sizeof(bt));
|
||||
|
||||
struct crocus_compiled_shader *shader = crocus_upload_shader(
|
||||
ice, CROCUS_CACHE_BLORP, key_size, key, kernel, kernel_size, prog_data,
|
||||
prog_data_size, NULL, NULL, 0, 0, &bt);
|
||||
|
||||
*kernel_out = shader->offset;
|
||||
*((void **)prog_data_out) = shader->prog_data;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
crocus_init_program_cache(struct crocus_context *ice)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
|
||||
ice->shaders.cache =
|
||||
_mesa_hash_table_create(ice, keybox_hash, keybox_equals);
|
||||
|
||||
ice->shaders.cache_bo =
|
||||
crocus_bo_alloc(screen->bufmgr, "program_cache", 16384);
|
||||
ice->shaders.cache_bo_map =
|
||||
crocus_bo_map(NULL, ice->shaders.cache_bo,
|
||||
MAP_READ | MAP_WRITE | MAP_ASYNC | MAP_PERSISTENT);
|
||||
}
|
||||
|
||||
void
|
||||
crocus_destroy_program_cache(struct crocus_context *ice)
|
||||
{
|
||||
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
ice->shaders.prog[i] = NULL;
|
||||
}
|
||||
|
||||
if (ice->shaders.cache_bo) {
|
||||
crocus_bo_unmap(ice->shaders.cache_bo);
|
||||
crocus_bo_unreference(ice->shaders.cache_bo);
|
||||
ice->shaders.cache_bo_map = NULL;
|
||||
ice->shaders.cache_bo = NULL;
|
||||
}
|
||||
|
||||
ralloc_free(ice->shaders.cache);
|
||||
}
|
||||
|
||||
static const char *
|
||||
cache_name(enum crocus_program_cache_id cache_id)
|
||||
{
|
||||
if (cache_id == CROCUS_CACHE_BLORP)
|
||||
return "BLORP";
|
||||
|
||||
if (cache_id == CROCUS_CACHE_SF)
|
||||
return "SF";
|
||||
|
||||
if (cache_id == CROCUS_CACHE_CLIP)
|
||||
return "CLIP";
|
||||
|
||||
if (cache_id == CROCUS_CACHE_FF_GS)
|
||||
return "FF_GS";
|
||||
|
||||
return _mesa_shader_stage_to_string(cache_id);
|
||||
}
|
||||
|
||||
void
|
||||
crocus_print_program_cache(struct crocus_context *ice)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
hash_table_foreach(ice->shaders.cache, entry) {
|
||||
const struct keybox *keybox = entry->key;
|
||||
struct crocus_compiled_shader *shader = entry->data;
|
||||
fprintf(stderr, "%s:\n", cache_name(keybox->cache_id));
|
||||
brw_disassemble(devinfo, ice->shaders.cache_bo_map + shader->offset, 0,
|
||||
shader->prog_data->program_size, NULL, stderr);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,996 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file crocus_query.c
|
||||
*
|
||||
* ============================= GENXML CODE =============================
|
||||
* [This file is compiled once per generation.]
|
||||
* =======================================================================
|
||||
*
|
||||
* Query object support. This allows measuring various simple statistics
|
||||
* via counters on the GPU. We use GenX code for MI_MATH calculations.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include "perf/intel_perf.h"
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "crocus_context.h"
|
||||
#include "crocus_defines.h"
|
||||
#include "crocus_fence.h"
|
||||
#include "crocus_monitor.h"
|
||||
#include "crocus_resource.h"
|
||||
#include "crocus_screen.h"
|
||||
|
||||
#include "crocus_genx_macros.h"
|
||||
|
||||
#if GFX_VER == 6
|
||||
// TOOD: Add these to genxml?
|
||||
#define SO_PRIM_STORAGE_NEEDED(n) (0x2280)
|
||||
#define SO_NUM_PRIMS_WRITTEN(n) (0x2288)
|
||||
|
||||
// TODO: remove HS/DS/CS
|
||||
#define GFX6_IA_VERTICES_COUNT_num 0x2310
|
||||
#define GFX6_IA_PRIMITIVES_COUNT_num 0x2318
|
||||
#define GFX6_VS_INVOCATION_COUNT_num 0x2320
|
||||
#define GFX6_HS_INVOCATION_COUNT_num 0x2300
|
||||
#define GFX6_DS_INVOCATION_COUNT_num 0x2308
|
||||
#define GFX6_GS_INVOCATION_COUNT_num 0x2328
|
||||
#define GFX6_GS_PRIMITIVES_COUNT_num 0x2330
|
||||
#define GFX6_CL_INVOCATION_COUNT_num 0x2338
|
||||
#define GFX6_CL_PRIMITIVES_COUNT_num 0x2340
|
||||
#define GFX6_PS_INVOCATION_COUNT_num 0x2348
|
||||
#define GFX6_CS_INVOCATION_COUNT_num 0x2290
|
||||
#define GFX6_PS_DEPTH_COUNT_num 0x2350
|
||||
|
||||
#elif GFX_VER == 7
|
||||
#define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
|
||||
#define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
|
||||
#endif
|
||||
|
||||
struct crocus_query {
|
||||
enum pipe_query_type type;
|
||||
int index;
|
||||
|
||||
bool ready;
|
||||
|
||||
bool stalled;
|
||||
|
||||
uint64_t result;
|
||||
|
||||
struct crocus_state_ref query_state_ref;
|
||||
struct crocus_query_snapshots *map;
|
||||
struct crocus_syncobj *syncobj;
|
||||
|
||||
int batch_idx;
|
||||
|
||||
struct crocus_monitor_object *monitor;
|
||||
|
||||
/* Fence for PIPE_QUERY_GPU_FINISHED. */
|
||||
struct pipe_fence_handle *fence;
|
||||
};
|
||||
|
||||
struct crocus_query_snapshots {
|
||||
/** crocus_render_condition's saved MI_PREDICATE_RESULT value. */
|
||||
uint64_t predicate_result;
|
||||
|
||||
/** Have the start/end snapshots landed? */
|
||||
uint64_t snapshots_landed;
|
||||
|
||||
/** Starting and ending counter snapshots */
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
};
|
||||
|
||||
struct crocus_query_so_overflow {
|
||||
uint64_t predicate_result;
|
||||
uint64_t snapshots_landed;
|
||||
|
||||
struct {
|
||||
uint64_t prim_storage_needed[2];
|
||||
uint64_t num_prims[2];
|
||||
} stream[4];
|
||||
};
|
||||
|
||||
#if GFX_VERx10 == 75
|
||||
static struct mi_value
|
||||
query_mem64(struct crocus_query *q, uint32_t offset)
|
||||
{
|
||||
return mi_mem64(rw_bo(crocus_resource_bo(q->query_state_ref.res),
|
||||
q->query_state_ref.offset + offset));
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Is this type of query written by PIPE_CONTROL?
|
||||
*/
|
||||
static bool
|
||||
crocus_is_query_pipelined(struct crocus_query *q)
|
||||
{
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
mark_available(struct crocus_context *ice, struct crocus_query *q)
|
||||
{
|
||||
#if GFX_VERx10 == 75
|
||||
struct crocus_batch *batch = &ice->batches[q->batch_idx];
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
|
||||
unsigned offset = offsetof(struct crocus_query_snapshots, snapshots_landed);
|
||||
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
|
||||
offset += q->query_state_ref.offset;
|
||||
|
||||
if (!crocus_is_query_pipelined(q)) {
|
||||
screen->vtbl.store_data_imm64(batch, bo, offset, true);
|
||||
} else {
|
||||
/* Order available *after* the query results. */
|
||||
flags |= PIPE_CONTROL_FLUSH_ENABLE;
|
||||
crocus_emit_pipe_control_write(batch, "query: mark available",
|
||||
flags, bo, offset, true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
|
||||
*/
|
||||
static void
|
||||
crocus_pipelined_write(struct crocus_batch *batch,
|
||||
struct crocus_query *q,
|
||||
enum pipe_control_flags flags,
|
||||
unsigned offset)
|
||||
{
|
||||
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
|
||||
|
||||
crocus_emit_pipe_control_write(batch, "query: pipelined snapshot write",
|
||||
flags,
|
||||
bo, offset, 0ull);
|
||||
}
|
||||
|
||||
static void
|
||||
write_value(struct crocus_context *ice, struct crocus_query *q, unsigned offset)
|
||||
{
|
||||
struct crocus_batch *batch = &ice->batches[q->batch_idx];
|
||||
#if GFX_VER >= 6
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
|
||||
#endif
|
||||
|
||||
if (!crocus_is_query_pipelined(q)) {
|
||||
crocus_emit_pipe_control_flush(batch,
|
||||
"query: non-pipelined snapshot write",
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
q->stalled = true;
|
||||
}
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
|
||||
crocus_pipelined_write(&ice->batches[CROCUS_BATCH_RENDER], q,
|
||||
PIPE_CONTROL_WRITE_DEPTH_COUNT |
|
||||
PIPE_CONTROL_DEPTH_STALL,
|
||||
offset);
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
crocus_pipelined_write(&ice->batches[CROCUS_BATCH_RENDER], q,
|
||||
PIPE_CONTROL_WRITE_TIMESTAMP,
|
||||
offset);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
#if GFX_VER >= 6
|
||||
screen->vtbl.store_register_mem64(batch,
|
||||
q->index == 0 ?
|
||||
GENX(CL_INVOCATION_COUNT_num) :
|
||||
SO_PRIM_STORAGE_NEEDED(q->index),
|
||||
bo, offset, false);
|
||||
#endif
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
#if GFX_VER >= 6
|
||||
screen->vtbl.store_register_mem64(batch,
|
||||
SO_NUM_PRIMS_WRITTEN(q->index),
|
||||
bo, offset, false);
|
||||
#endif
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
|
||||
#if GFX_VER >= 6
|
||||
static const uint32_t index_to_reg[] = {
|
||||
GENX(IA_VERTICES_COUNT_num),
|
||||
GENX(IA_PRIMITIVES_COUNT_num),
|
||||
GENX(VS_INVOCATION_COUNT_num),
|
||||
GENX(GS_INVOCATION_COUNT_num),
|
||||
GENX(GS_PRIMITIVES_COUNT_num),
|
||||
GENX(CL_INVOCATION_COUNT_num),
|
||||
GENX(CL_PRIMITIVES_COUNT_num),
|
||||
GENX(PS_INVOCATION_COUNT_num),
|
||||
GENX(HS_INVOCATION_COUNT_num),
|
||||
GENX(DS_INVOCATION_COUNT_num),
|
||||
GENX(CS_INVOCATION_COUNT_num),
|
||||
};
|
||||
uint32_t reg = index_to_reg[q->index];
|
||||
|
||||
#if GFX_VER == 6
|
||||
/* Gfx6 GS code counts full primitives, that is, it won't count individual
|
||||
* triangles in a triangle strip. Use CL_INVOCATION_COUNT for that.
|
||||
*/
|
||||
if (q->index == PIPE_STAT_QUERY_GS_PRIMITIVES)
|
||||
reg = GENX(CL_INVOCATION_COUNT_num);
|
||||
#endif
|
||||
|
||||
screen->vtbl.store_register_mem64(batch, reg, bo, offset, false);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
#if GFX_VER >= 6
|
||||
static void
|
||||
write_overflow_values(struct crocus_context *ice, struct crocus_query *q, bool end)
|
||||
{
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
|
||||
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
|
||||
uint32_t offset = q->query_state_ref.offset;
|
||||
crocus_emit_pipe_control_flush(batch,
|
||||
"query: write SO overflow snapshots",
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
int s = q->index + i;
|
||||
int g_idx = offset + offsetof(struct crocus_query_so_overflow,
|
||||
stream[s].num_prims[end]);
|
||||
int w_idx = offset + offsetof(struct crocus_query_so_overflow,
|
||||
stream[s].prim_storage_needed[end]);
|
||||
screen->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
|
||||
bo, g_idx, false);
|
||||
screen->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
|
||||
bo, w_idx, false);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
static uint64_t
|
||||
crocus_raw_timestamp_delta(uint64_t time0, uint64_t time1)
|
||||
{
|
||||
if (time0 > time1) {
|
||||
return (1ULL << TIMESTAMP_BITS) + time1 - time0;
|
||||
} else {
|
||||
return time1 - time0;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
stream_overflowed(struct crocus_query_so_overflow *so, int s)
|
||||
{
|
||||
return (so->stream[s].prim_storage_needed[1] -
|
||||
so->stream[s].prim_storage_needed[0]) !=
|
||||
(so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
|
||||
}
|
||||
|
||||
static void
|
||||
calculate_result_on_cpu(const struct intel_device_info *devinfo,
|
||||
struct crocus_query *q)
|
||||
{
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
|
||||
q->result = q->map->end != q->map->start;
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
/* The timestamp is the single starting snapshot. */
|
||||
q->result = intel_device_info_timebase_scale(devinfo, q->map->start);
|
||||
q->result &= (1ull << TIMESTAMP_BITS) - 1;
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
q->result = crocus_raw_timestamp_delta(q->map->start, q->map->end);
|
||||
q->result = intel_device_info_timebase_scale(devinfo, q->result);
|
||||
q->result &= (1ull << TIMESTAMP_BITS) - 1;
|
||||
break;
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
q->result = stream_overflowed((void *) q->map, q->index);
|
||||
break;
|
||||
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
|
||||
q->result = false;
|
||||
for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
|
||||
q->result |= stream_overflowed((void *) q->map, i);
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
|
||||
q->result = q->map->end - q->map->start;
|
||||
|
||||
/* WaDividePSInvocationCountBy4:HSW,BDW */
|
||||
if (GFX_VER == 7 && devinfo->is_haswell && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
|
||||
q->result /= 4;
|
||||
break;
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
default:
|
||||
q->result = q->map->end - q->map->start;
|
||||
break;
|
||||
}
|
||||
|
||||
q->ready = true;
|
||||
}
|
||||
|
||||
#if GFX_VERx10 == 75
|
||||
/**
|
||||
* Calculate the streamout overflow for stream \p idx:
|
||||
*
|
||||
* (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
|
||||
*/
|
||||
static struct mi_value
|
||||
calc_overflow_for_stream(struct mi_builder *b,
|
||||
struct crocus_query *q,
|
||||
int idx)
|
||||
{
|
||||
#define C(counter, i) query_mem64(q, \
|
||||
offsetof(struct crocus_query_so_overflow, stream[idx].counter[i]))
|
||||
|
||||
return mi_isub(b, mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
|
||||
mi_isub(b, C(prim_storage_needed, 1),
|
||||
C(prim_storage_needed, 0)));
|
||||
#undef C
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate whether any stream has overflowed.
|
||||
*/
|
||||
static struct mi_value
|
||||
calc_overflow_any_stream(struct mi_builder *b, struct crocus_query *q)
|
||||
{
|
||||
struct mi_value stream_result[MAX_VERTEX_STREAMS];
|
||||
for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
|
||||
stream_result[i] = calc_overflow_for_stream(b, q, i);
|
||||
|
||||
struct mi_value result = stream_result[0];
|
||||
for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
|
||||
result = mi_ior(b, result, stream_result[i]);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
query_is_boolean(enum pipe_query_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the result using MI_MATH.
|
||||
*/
|
||||
static struct mi_value
|
||||
calculate_result_on_gpu(const struct intel_device_info *devinfo,
|
||||
struct mi_builder *b,
|
||||
struct crocus_query *q)
|
||||
{
|
||||
struct mi_value result;
|
||||
struct mi_value start_val =
|
||||
query_mem64(q, offsetof(struct crocus_query_snapshots, start));
|
||||
struct mi_value end_val =
|
||||
query_mem64(q, offsetof(struct crocus_query_snapshots, end));
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
result = calc_overflow_for_stream(b, q, q->index);
|
||||
break;
|
||||
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
|
||||
result = calc_overflow_any_stream(b, q);
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP: {
|
||||
/* TODO: This discards any fractional bits of the timebase scale.
|
||||
* We would need to do a bit of fixed point math on the CS ALU, or
|
||||
* launch an actual shader to calculate this with full precision.
|
||||
*/
|
||||
uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
|
||||
result = mi_iand(b, mi_imm((1ull << 36) - 1),
|
||||
mi_imul_imm(b, start_val, scale));
|
||||
break;
|
||||
}
|
||||
case PIPE_QUERY_TIME_ELAPSED: {
|
||||
/* TODO: This discards fractional bits (see above). */
|
||||
uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
|
||||
result = mi_imul_imm(b, mi_isub(b, end_val, start_val), scale);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
result = mi_isub(b, end_val, start_val);
|
||||
break;
|
||||
}
|
||||
/* WaDividePSInvocationCountBy4:HSW,BDW */
|
||||
if (GFX_VER == 7 && devinfo->is_haswell &&
|
||||
q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
|
||||
q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
|
||||
result = mi_ushr32_imm(b, result, 2);
|
||||
|
||||
if (query_is_boolean(q->type))
|
||||
result = mi_iand(b, mi_nz(b, result), mi_imm(1));
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct pipe_query *
|
||||
crocus_create_query(struct pipe_context *ctx,
|
||||
unsigned query_type,
|
||||
unsigned index)
|
||||
{
|
||||
struct crocus_query *q = calloc(1, sizeof(struct crocus_query));
|
||||
|
||||
q->type = query_type;
|
||||
q->index = index;
|
||||
q->monitor = NULL;
|
||||
|
||||
if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
|
||||
q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
|
||||
q->batch_idx = CROCUS_BATCH_COMPUTE;
|
||||
else
|
||||
q->batch_idx = CROCUS_BATCH_RENDER;
|
||||
return (struct pipe_query *) q;
|
||||
}
|
||||
|
||||
static struct pipe_query *
|
||||
crocus_create_batch_query(struct pipe_context *ctx,
|
||||
unsigned num_queries,
|
||||
unsigned *query_types)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct crocus_query *q = calloc(1, sizeof(struct crocus_query));
|
||||
if (unlikely(!q))
|
||||
return NULL;
|
||||
q->type = PIPE_QUERY_DRIVER_SPECIFIC;
|
||||
q->index = -1;
|
||||
q->monitor = crocus_create_monitor_object(ice, num_queries, query_types);
|
||||
if (unlikely(!q->monitor)) {
|
||||
free(q);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return (struct pipe_query *) q;
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
|
||||
{
|
||||
struct crocus_query *query = (void *) p_query;
|
||||
struct crocus_screen *screen = (void *) ctx->screen;
|
||||
if (query->monitor) {
|
||||
crocus_destroy_monitor_object(ctx, query->monitor);
|
||||
query->monitor = NULL;
|
||||
} else {
|
||||
crocus_syncobj_reference(screen, &query->syncobj, NULL);
|
||||
screen->base.fence_reference(ctx->screen, &query->fence, NULL);
|
||||
}
|
||||
free(query);
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
crocus_begin_query(struct pipe_context *ctx, struct pipe_query *query)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct crocus_query *q = (void *) query;
|
||||
|
||||
if (q->monitor)
|
||||
return crocus_begin_monitor(ctx, q->monitor);
|
||||
|
||||
void *ptr = NULL;
|
||||
uint32_t size;
|
||||
|
||||
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
|
||||
q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
|
||||
size = sizeof(struct crocus_query_so_overflow);
|
||||
else
|
||||
size = sizeof(struct crocus_query_snapshots);
|
||||
|
||||
u_upload_alloc(ice->query_buffer_uploader, 0,
|
||||
size, size, &q->query_state_ref.offset,
|
||||
&q->query_state_ref.res, &ptr);
|
||||
|
||||
if (!crocus_resource_bo(q->query_state_ref.res))
|
||||
return false;
|
||||
|
||||
q->map = ptr;
|
||||
if (!q->map)
|
||||
return false;
|
||||
|
||||
q->result = 0ull;
|
||||
q->ready = false;
|
||||
WRITE_ONCE(q->map->snapshots_landed, false);
|
||||
|
||||
if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
|
||||
ice->state.prims_generated_query_active = true;
|
||||
ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP;
|
||||
}
|
||||
|
||||
#if GFX_VER <= 5
|
||||
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER ||
|
||||
q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
|
||||
ice->state.stats_wm++;
|
||||
ice->state.dirty |= CROCUS_DIRTY_WM | CROCUS_DIRTY_COLOR_CALC_STATE;
|
||||
}
|
||||
#endif
|
||||
#if GFX_VER >= 6
|
||||
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
|
||||
q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
|
||||
write_overflow_values(ice, q, false);
|
||||
else
|
||||
#endif
|
||||
write_value(ice, q,
|
||||
q->query_state_ref.offset +
|
||||
offsetof(struct crocus_query_snapshots, start));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
crocus_end_query(struct pipe_context *ctx, struct pipe_query *query)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct crocus_query *q = (void *) query;
|
||||
|
||||
if (q->monitor)
|
||||
return crocus_end_monitor(ctx, q->monitor);
|
||||
|
||||
if (q->type == PIPE_QUERY_GPU_FINISHED) {
|
||||
ctx->flush(ctx, &q->fence, PIPE_FLUSH_DEFERRED);
|
||||
return true;
|
||||
}
|
||||
|
||||
struct crocus_batch *batch = &ice->batches[q->batch_idx];
|
||||
|
||||
if (q->type == PIPE_QUERY_TIMESTAMP) {
|
||||
crocus_begin_query(ctx, query);
|
||||
crocus_batch_reference_signal_syncobj(batch, &q->syncobj);
|
||||
mark_available(ice, q);
|
||||
return true;
|
||||
}
|
||||
|
||||
#if GFX_VER <= 5
|
||||
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER ||
|
||||
q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
|
||||
ice->state.stats_wm--;
|
||||
ice->state.dirty |= CROCUS_DIRTY_WM | CROCUS_DIRTY_COLOR_CALC_STATE;
|
||||
}
|
||||
#endif
|
||||
if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
|
||||
ice->state.prims_generated_query_active = false;
|
||||
ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP;
|
||||
}
|
||||
|
||||
#if GFX_VER >= 6
|
||||
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
|
||||
q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
|
||||
write_overflow_values(ice, q, true);
|
||||
else
|
||||
#endif
|
||||
write_value(ice, q,
|
||||
q->query_state_ref.offset +
|
||||
offsetof(struct crocus_query_snapshots, end));
|
||||
|
||||
crocus_batch_reference_signal_syncobj(batch, &q->syncobj);
|
||||
mark_available(ice, q);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* See if the snapshots have landed for a query, and if so, compute the
|
||||
* result and mark it ready. Does not flush (unlike crocus_get_query_result).
|
||||
*/
|
||||
static void
|
||||
crocus_check_query_no_flush(struct crocus_context *ice, struct crocus_query *q)
|
||||
{
|
||||
struct crocus_screen *screen = (void *) ice->ctx.screen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
|
||||
calculate_result_on_cpu(devinfo, q);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
crocus_get_query_result(struct pipe_context *ctx,
|
||||
struct pipe_query *query,
|
||||
bool wait,
|
||||
union pipe_query_result *result)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct crocus_query *q = (void *) query;
|
||||
|
||||
if (q->monitor)
|
||||
return crocus_get_monitor_result(ctx, q->monitor, wait, result->batch);
|
||||
|
||||
struct crocus_screen *screen = (void *) ctx->screen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
if (unlikely(screen->no_hw)) {
|
||||
result->u64 = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!q->ready) {
|
||||
struct crocus_batch *batch = &ice->batches[q->batch_idx];
|
||||
if (q->syncobj == crocus_batch_get_signal_syncobj(batch))
|
||||
crocus_batch_flush(batch);
|
||||
|
||||
#if GFX_VERx10 == 75
|
||||
while (!READ_ONCE(q->map->snapshots_landed)) {
|
||||
if (wait)
|
||||
crocus_wait_syncobj(ctx->screen, q->syncobj, INT64_MAX);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
assert(READ_ONCE(q->map->snapshots_landed));
|
||||
#else
|
||||
if (wait)
|
||||
crocus_wait_syncobj(ctx->screen, q->syncobj, INT64_MAX);
|
||||
#endif
|
||||
calculate_result_on_cpu(devinfo, q);
|
||||
}
|
||||
|
||||
assert(q->ready);
|
||||
|
||||
result->u64 = q->result;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#if GFX_VER == 7
|
||||
static void
|
||||
crocus_get_query_result_resource(struct pipe_context *ctx,
|
||||
struct pipe_query *query,
|
||||
bool wait,
|
||||
enum pipe_query_value_type result_type,
|
||||
int index,
|
||||
struct pipe_resource *p_res,
|
||||
unsigned offset)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct crocus_query *q = (void *) query;
|
||||
struct crocus_batch *batch = &ice->batches[q->batch_idx];
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
const struct intel_device_info *devinfo = &batch->screen->devinfo;
|
||||
struct crocus_resource *res = (void *) p_res;
|
||||
struct crocus_bo *query_bo = crocus_resource_bo(q->query_state_ref.res);
|
||||
struct crocus_bo *dst_bo = crocus_resource_bo(p_res);
|
||||
unsigned snapshots_landed_offset =
|
||||
offsetof(struct crocus_query_snapshots, snapshots_landed);
|
||||
|
||||
res->bind_history |= PIPE_BIND_QUERY_BUFFER;
|
||||
|
||||
if (index == -1) {
|
||||
/* They're asking for the availability of the result. If we still
|
||||
* have commands queued up which produce the result, submit them
|
||||
* now so that progress happens. Either way, copy the snapshots
|
||||
* landed field to the destination resource.
|
||||
*/
|
||||
if (q->syncobj == crocus_batch_get_signal_syncobj(batch))
|
||||
crocus_batch_flush(batch);
|
||||
|
||||
screen->vtbl.copy_mem_mem(batch, dst_bo, offset,
|
||||
query_bo, snapshots_landed_offset,
|
||||
result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
|
||||
/* The final snapshots happen to have landed, so let's just compute
|
||||
* the result on the CPU now...
|
||||
*/
|
||||
calculate_result_on_cpu(devinfo, q);
|
||||
}
|
||||
|
||||
if (q->ready) {
|
||||
/* We happen to have the result on the CPU, so just copy it. */
|
||||
if (result_type <= PIPE_QUERY_TYPE_U32) {
|
||||
screen->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
|
||||
} else {
|
||||
screen->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
|
||||
}
|
||||
|
||||
/* Make sure the result lands before they use bind the QBO elsewhere
|
||||
* and use the result.
|
||||
*/
|
||||
// XXX: Why? i965 doesn't do this.
|
||||
crocus_emit_pipe_control_flush(batch,
|
||||
"query: unknown QBO flushing hack",
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
return;
|
||||
}
|
||||
|
||||
#if GFX_VERx10 == 75
|
||||
bool predicated = !wait && !q->stalled;
|
||||
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, &batch->screen->devinfo, batch);
|
||||
|
||||
struct mi_value result = calculate_result_on_gpu(devinfo, &b, q);
|
||||
struct mi_value dst =
|
||||
result_type <= PIPE_QUERY_TYPE_U32 ? mi_mem32(rw_bo(dst_bo, offset))
|
||||
: mi_mem64(rw_bo(dst_bo, offset));
|
||||
|
||||
if (predicated) {
|
||||
mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
|
||||
mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
|
||||
mi_store_if(&b, dst, result);
|
||||
} else {
|
||||
mi_store(&b, dst, result);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
crocus_set_active_query_state(struct pipe_context *ctx, bool enable)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
|
||||
if (ice->state.statistics_counters_enabled == enable)
|
||||
return;
|
||||
|
||||
// XXX: most packets aren't paying attention to this yet, because it'd
|
||||
// have to be done dynamically at draw time, which is a pain
|
||||
ice->state.statistics_counters_enabled = enable;
|
||||
ice->state.dirty |= CROCUS_DIRTY_CLIP |
|
||||
CROCUS_DIRTY_RASTER |
|
||||
CROCUS_DIRTY_STREAMOUT |
|
||||
CROCUS_DIRTY_WM;
|
||||
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS |
|
||||
CROCUS_STAGE_DIRTY_TCS |
|
||||
CROCUS_STAGE_DIRTY_TES |
|
||||
CROCUS_STAGE_DIRTY_VS;
|
||||
}
|
||||
|
||||
static void
|
||||
set_predicate_enable(struct crocus_context *ice, bool value)
|
||||
{
|
||||
if (value)
|
||||
ice->state.predicate = CROCUS_PREDICATE_STATE_RENDER;
|
||||
else
|
||||
ice->state.predicate = CROCUS_PREDICATE_STATE_DONT_RENDER;
|
||||
}
|
||||
|
||||
#if GFX_VER == 7
|
||||
static void
|
||||
set_predicate_for_result(struct crocus_context *ice,
|
||||
struct crocus_query *q,
|
||||
bool inverted)
|
||||
{
|
||||
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
|
||||
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
|
||||
|
||||
#if GFX_VERx10 != 75
|
||||
/* IVB doesn't have enough MI for this */
|
||||
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
|
||||
q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
|
||||
ice->state.predicate = CROCUS_PREDICATE_STATE_STALL_FOR_QUERY;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* The CPU doesn't have the query result yet; use hardware predication */
|
||||
ice->state.predicate = CROCUS_PREDICATE_STATE_USE_BIT;
|
||||
|
||||
/* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
|
||||
crocus_emit_pipe_control_flush(batch,
|
||||
"conditional rendering: set predicate",
|
||||
PIPE_CONTROL_FLUSH_ENABLE);
|
||||
q->stalled = true;
|
||||
|
||||
#if GFX_VERx10 != 75
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
screen->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, bo,
|
||||
q->query_state_ref.offset + offsetof(struct crocus_query_snapshots, start));
|
||||
screen->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, bo,
|
||||
q->query_state_ref.offset + offsetof(struct crocus_query_snapshots, end));
|
||||
|
||||
uint32_t mi_predicate = MI_PREDICATE | MI_PREDICATE_COMBINEOP_SET |
|
||||
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
|
||||
if (inverted)
|
||||
mi_predicate |= MI_PREDICATE_LOADOP_LOAD;
|
||||
else
|
||||
mi_predicate |= MI_PREDICATE_LOADOP_LOADINV;
|
||||
crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
|
||||
#else
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, &batch->screen->devinfo, batch);
|
||||
|
||||
struct mi_value result;
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
result = calc_overflow_for_stream(&b, q, q->index);
|
||||
break;
|
||||
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
|
||||
result = calc_overflow_any_stream(&b, q);
|
||||
break;
|
||||
default: {
|
||||
/* PIPE_QUERY_OCCLUSION_* */
|
||||
struct mi_value start =
|
||||
query_mem64(q, offsetof(struct crocus_query_snapshots, start));
|
||||
struct mi_value end =
|
||||
query_mem64(q, offsetof(struct crocus_query_snapshots, end));
|
||||
result = mi_isub(&b, end, start);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result = inverted ? mi_z(&b, result) : mi_nz(&b, result);
|
||||
result = mi_iand(&b, result, mi_imm(1));
|
||||
|
||||
/* We immediately set the predicate on the render batch, as all the
|
||||
* counters come from 3D operations. However, we may need to predicate
|
||||
* a compute dispatch, which executes in a different GEM context and has
|
||||
* a different MI_PREDICATE_RESULT register. So, we save the result to
|
||||
* memory and reload it in crocus_launch_grid.
|
||||
*/
|
||||
mi_value_ref(&b, result);
|
||||
|
||||
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), result);
|
||||
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
|
||||
|
||||
unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
|
||||
MI_PREDICATE_COMBINEOP_SET |
|
||||
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
|
||||
|
||||
crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
|
||||
mi_store(&b, query_mem64(q, offsetof(struct crocus_query_snapshots,
|
||||
predicate_result)), result);
|
||||
#endif
|
||||
ice->state.compute_predicate = bo;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
crocus_render_condition(struct pipe_context *ctx,
|
||||
struct pipe_query *query,
|
||||
bool condition,
|
||||
enum pipe_render_cond_flag mode)
|
||||
{
|
||||
struct crocus_context *ice = (void *) ctx;
|
||||
struct crocus_query *q = (void *) query;
|
||||
|
||||
/* The old condition isn't relevant; we'll update it if necessary */
|
||||
ice->state.compute_predicate = NULL;
|
||||
ice->condition.query = q;
|
||||
ice->condition.condition = condition;
|
||||
ice->condition.mode = mode;
|
||||
|
||||
if (!q) {
|
||||
ice->state.predicate = CROCUS_PREDICATE_STATE_RENDER;
|
||||
return;
|
||||
}
|
||||
|
||||
crocus_check_query_no_flush(ice, q);
|
||||
|
||||
if (q->result || q->ready) {
|
||||
set_predicate_enable(ice, (q->result != 0) ^ condition);
|
||||
} else {
|
||||
if (mode == PIPE_RENDER_COND_NO_WAIT ||
|
||||
mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
|
||||
perf_debug(&ice->dbg, "Conditional rendering demoted from "
|
||||
"\"no wait\" to \"wait\".");
|
||||
}
|
||||
#if GFX_VER == 7
|
||||
set_predicate_for_result(ice, q, condition);
|
||||
#else
|
||||
ice->state.predicate = CROCUS_PREDICATE_STATE_STALL_FOR_QUERY;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_resolve_conditional_render(struct crocus_context *ice)
|
||||
{
|
||||
struct pipe_context *ctx = (void *) ice;
|
||||
struct crocus_query *q = ice->condition.query;
|
||||
struct pipe_query *query = (void *) q;
|
||||
union pipe_query_result result;
|
||||
|
||||
if (ice->state.predicate != CROCUS_PREDICATE_STATE_USE_BIT)
|
||||
return;
|
||||
|
||||
assert(q);
|
||||
|
||||
crocus_get_query_result(ctx, query, true, &result);
|
||||
set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
|
||||
}
|
||||
|
||||
#if GFX_VER >= 7
|
||||
static void
|
||||
crocus_emit_compute_predicate(struct crocus_batch *batch)
|
||||
{
|
||||
struct crocus_context *ice = batch->ice;
|
||||
struct crocus_screen *screen = batch->screen;
|
||||
screen->vtbl.load_register_mem32(batch, MI_PREDICATE_SRC0,
|
||||
ice->state.compute_predicate, 0);
|
||||
screen->vtbl.load_register_imm32(batch, MI_PREDICATE_SRC1, 0);
|
||||
unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
|
||||
MI_PREDICATE_COMBINEOP_SET |
|
||||
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
|
||||
|
||||
crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
genX(init_screen_query)(struct crocus_screen *screen)
|
||||
{
|
||||
screen->vtbl.resolve_conditional_render = crocus_resolve_conditional_render;
|
||||
#if GFX_VER >= 7
|
||||
screen->vtbl.emit_compute_predicate = crocus_emit_compute_predicate;
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
genX(init_query)(struct crocus_context *ice)
|
||||
{
|
||||
struct pipe_context *ctx = &ice->ctx;
|
||||
|
||||
ctx->create_query = crocus_create_query;
|
||||
ctx->create_batch_query = crocus_create_batch_query;
|
||||
ctx->destroy_query = crocus_destroy_query;
|
||||
ctx->begin_query = crocus_begin_query;
|
||||
ctx->end_query = crocus_end_query;
|
||||
ctx->get_query_result = crocus_get_query_result;
|
||||
#if GFX_VER == 7
|
||||
ctx->get_query_result_resource = crocus_get_query_result_resource;
|
||||
#endif
|
||||
ctx->set_active_query_state = crocus_set_active_query_state;
|
||||
ctx->render_condition = crocus_render_condition;
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,501 @@
|
|||
/*
|
||||
* Copyright 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef CROCUS_RESOURCE_H
|
||||
#define CROCUS_RESOURCE_H
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_range.h"
|
||||
#include "intel/isl/isl.h"
|
||||
|
||||
#include "crocus_bufmgr.h"
|
||||
|
||||
struct crocus_batch;
|
||||
struct crocus_context;
|
||||
|
||||
#define CROCUS_MAX_MIPLEVELS 15
|
||||
|
||||
struct crocus_format_info {
|
||||
enum isl_format fmt;
|
||||
enum pipe_swizzle swizzles[4];
|
||||
};
|
||||
|
||||
static inline enum isl_channel_select
|
||||
pipe_to_isl_swizzle(const enum pipe_swizzle pswz, bool green_to_blue)
|
||||
{
|
||||
unsigned swz = (pswz + 4) & 7;
|
||||
|
||||
return (green_to_blue && swz == ISL_CHANNEL_SELECT_GREEN) ? ISL_CHANNEL_SELECT_BLUE : swz;
|
||||
}
|
||||
|
||||
static inline struct isl_swizzle
|
||||
pipe_to_isl_swizzles(const enum pipe_swizzle pswz[4])
|
||||
{
|
||||
struct isl_swizzle swz;
|
||||
swz.r = pipe_to_isl_swizzle(pswz[0], false);
|
||||
swz.g = pipe_to_isl_swizzle(pswz[1], false);
|
||||
swz.b = pipe_to_isl_swizzle(pswz[2], false);
|
||||
swz.a = pipe_to_isl_swizzle(pswz[3], false);
|
||||
return swz;
|
||||
}
|
||||
|
||||
static inline void
|
||||
crocus_combine_swizzle(enum pipe_swizzle outswz[4],
|
||||
const enum pipe_swizzle fswz[4],
|
||||
const enum pipe_swizzle vswz[4])
|
||||
{
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
switch (vswz[i]) {
|
||||
case PIPE_SWIZZLE_X: outswz[i] = fswz[0]; break;
|
||||
case PIPE_SWIZZLE_Y: outswz[i] = fswz[1]; break;
|
||||
case PIPE_SWIZZLE_Z: outswz[i] = fswz[2]; break;
|
||||
case PIPE_SWIZZLE_W: outswz[i] = fswz[3]; break;
|
||||
case PIPE_SWIZZLE_1: outswz[i] = PIPE_SWIZZLE_1; break;
|
||||
case PIPE_SWIZZLE_0: outswz[i] = PIPE_SWIZZLE_0; break;
|
||||
default: unreachable("invalid swizzle");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resources represent a GPU buffer object or image (mipmap tree).
|
||||
*
|
||||
* They contain the storage (BO) and layout information (ISL surface).
|
||||
*/
|
||||
struct crocus_resource {
|
||||
struct pipe_resource base;
|
||||
enum pipe_format internal_format;
|
||||
|
||||
/**
|
||||
* The ISL surface layout information for this resource.
|
||||
*
|
||||
* This is not filled out for PIPE_BUFFER resources, but is guaranteed
|
||||
* to be zeroed. Note that this also guarantees that res->surf.tiling
|
||||
* will be ISL_TILING_LINEAR, so it's safe to check that.
|
||||
*/
|
||||
struct isl_surf surf;
|
||||
|
||||
/** Backing storage for the resource */
|
||||
struct crocus_bo *bo;
|
||||
|
||||
/** offset at which data starts in the BO */
|
||||
uint64_t offset;
|
||||
|
||||
/**
|
||||
* A bitfield of PIPE_BIND_* indicating how this resource was bound
|
||||
* in the past. Only meaningful for PIPE_BUFFER; used for flushing.
|
||||
*/
|
||||
unsigned bind_history;
|
||||
|
||||
/**
|
||||
* A bitfield of MESA_SHADER_* stages indicating where this resource
|
||||
* was bound.
|
||||
*/
|
||||
unsigned bind_stages;
|
||||
|
||||
/**
|
||||
* For PIPE_BUFFER resources, a range which may contain valid data.
|
||||
*
|
||||
* This is a conservative estimate of what part of the buffer contains
|
||||
* valid data that we have to preserve. The rest of the buffer is
|
||||
* considered invalid, and we can promote writes to that region to
|
||||
* be unsynchronized writes, avoiding blit copies.
|
||||
*/
|
||||
struct util_range valid_buffer_range;
|
||||
|
||||
/**
|
||||
* Auxiliary buffer information (CCS, MCS, or HiZ).
|
||||
*/
|
||||
struct {
|
||||
/** The surface layout for the auxiliary buffer. */
|
||||
struct isl_surf surf;
|
||||
|
||||
/** The buffer object containing the auxiliary data. */
|
||||
struct crocus_bo *bo;
|
||||
|
||||
/** Offset into 'bo' where the auxiliary surface starts. */
|
||||
uint32_t offset;
|
||||
|
||||
struct {
|
||||
struct isl_surf surf;
|
||||
|
||||
/** Offset into 'bo' where the auxiliary surface starts. */
|
||||
uint32_t offset;
|
||||
} extra_aux;
|
||||
|
||||
/**
|
||||
* Fast clear color for this surface. For depth surfaces, the clear
|
||||
* value is stored as a float32 in the red component.
|
||||
*/
|
||||
union isl_color_value clear_color;
|
||||
|
||||
/**
|
||||
* \brief The type of auxiliary compression used by this resource.
|
||||
*
|
||||
* This describes the type of auxiliary compression that is intended to
|
||||
* be used by this resource. An aux usage of ISL_AUX_USAGE_NONE means
|
||||
* that auxiliary compression is permanently disabled. An aux usage
|
||||
* other than ISL_AUX_USAGE_NONE does not imply that auxiliary
|
||||
* compression will always be enabled for this surface.
|
||||
*/
|
||||
enum isl_aux_usage usage;
|
||||
|
||||
/**
|
||||
* \brief Maps miptree slices to their current aux state.
|
||||
*
|
||||
* This two-dimensional array is indexed as [level][layer] and stores an
|
||||
* aux state for each slice.
|
||||
*/
|
||||
enum isl_aux_state **state;
|
||||
|
||||
/**
|
||||
* If (1 << level) is set, HiZ is enabled for that miplevel.
|
||||
*/
|
||||
uint16_t has_hiz;
|
||||
} aux;
|
||||
|
||||
/**
|
||||
* \brief Shadow miptree for sampling when the main isn't supported by HW.
|
||||
*
|
||||
* To workaround various sampler bugs and limitations, we blit the main
|
||||
* texture into a new texture that can be sampled.
|
||||
*
|
||||
* This miptree may be used for:
|
||||
* - Stencil texturing (pre-BDW) as required by GL_ARB_stencil_texturing.
|
||||
*/
|
||||
struct crocus_resource *shadow;
|
||||
bool shadow_needs_update;
|
||||
|
||||
/**
|
||||
* For external surfaces, this is format that was used to create or import
|
||||
* the surface. For internal surfaces, this will always be
|
||||
* PIPE_FORMAT_NONE.
|
||||
*/
|
||||
enum pipe_format external_format;
|
||||
|
||||
/**
|
||||
* For external surfaces, this is DRM format modifier that was used to
|
||||
* create or import the surface. For internal surfaces, this will always
|
||||
* be DRM_FORMAT_MOD_INVALID.
|
||||
*/
|
||||
const struct isl_drm_modifier_info *mod_info;
|
||||
|
||||
/**
|
||||
* The screen the resource was originally created with, stored for refcounting.
|
||||
*/
|
||||
struct pipe_screen *orig_screen;
|
||||
};
|
||||
|
||||
/**
|
||||
* A simple <resource, offset> tuple for storing a reference to a
|
||||
* piece of state stored in a GPU buffer object.
|
||||
*/
|
||||
struct crocus_state_ref {
|
||||
struct pipe_resource *res;
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* Gallium CSO for sampler views (texture views).
|
||||
*
|
||||
* In addition to the normal pipe_resource, this adds an ISL view
|
||||
* which may reinterpret the format or restrict levels/layers.
|
||||
*
|
||||
* These can also be linear texture buffers.
|
||||
*/
|
||||
struct crocus_sampler_view {
|
||||
struct pipe_sampler_view base;
|
||||
struct isl_view view;
|
||||
struct isl_view gather_view;
|
||||
|
||||
enum pipe_swizzle swizzle[4];
|
||||
union isl_color_value clear_color;
|
||||
|
||||
/* A short-cut (not a reference) to the actual resource being viewed.
|
||||
* Multi-planar (or depth+stencil) images may have multiple resources
|
||||
* chained together; this skips having to traverse base->texture->*.
|
||||
*/
|
||||
struct crocus_resource *res;
|
||||
};
|
||||
|
||||
/**
|
||||
* Image view representation.
|
||||
*/
|
||||
struct crocus_image_view {
|
||||
struct pipe_image_view base;
|
||||
struct isl_view view;
|
||||
};
|
||||
|
||||
/**
|
||||
* Gallium CSO for surfaces (framebuffer attachments).
|
||||
*
|
||||
* A view of a surface that can be bound to a color render target or
|
||||
* depth/stencil attachment.
|
||||
*/
|
||||
struct crocus_surface {
|
||||
struct pipe_surface base;
|
||||
struct isl_view view;
|
||||
struct isl_view read_view;
|
||||
struct isl_surf surf;
|
||||
union isl_color_value clear_color;
|
||||
|
||||
struct pipe_resource *align_res;
|
||||
};
|
||||
|
||||
/**
|
||||
* Transfer object - information about a buffer mapping.
|
||||
*/
|
||||
struct crocus_transfer {
|
||||
struct pipe_transfer base;
|
||||
struct pipe_debug_callback *dbg;
|
||||
void *buffer;
|
||||
void *ptr;
|
||||
|
||||
/** A linear staging resource for GPU-based copy_region transfers. */
|
||||
struct pipe_resource *staging;
|
||||
struct blorp_context *blorp;
|
||||
struct crocus_batch *batch;
|
||||
|
||||
bool dest_had_defined_contents;
|
||||
bool has_swizzling;
|
||||
|
||||
void (*unmap)(struct crocus_transfer *);
|
||||
};
|
||||
|
||||
/**
|
||||
* Unwrap a pipe_resource to get the underlying crocus_bo (for convenience).
|
||||
*/
|
||||
static inline struct crocus_bo *
|
||||
crocus_resource_bo(struct pipe_resource *p_res)
|
||||
{
|
||||
struct crocus_resource *res = (void *) p_res;
|
||||
return res->bo;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
crocus_mocs(const struct crocus_bo *bo,
|
||||
const struct isl_device *dev)
|
||||
{
|
||||
return isl_mocs(dev, 0, bo && crocus_bo_is_external(bo));
|
||||
}
|
||||
|
||||
struct crocus_format_info crocus_format_for_usage(const struct intel_device_info *,
|
||||
enum pipe_format pf,
|
||||
isl_surf_usage_flags_t usage);
|
||||
|
||||
struct pipe_resource *crocus_resource_get_separate_stencil(struct pipe_resource *);
|
||||
|
||||
void crocus_get_depth_stencil_resources(const struct intel_device_info *devinfo,
|
||||
struct pipe_resource *res,
|
||||
struct crocus_resource **out_z,
|
||||
struct crocus_resource **out_s);
|
||||
bool crocus_resource_set_clear_color(struct crocus_context *ice,
|
||||
struct crocus_resource *res,
|
||||
union isl_color_value color);
|
||||
union isl_color_value
|
||||
crocus_resource_get_clear_color(const struct crocus_resource *res);
|
||||
|
||||
void crocus_init_screen_resource_functions(struct pipe_screen *pscreen);
|
||||
|
||||
void crocus_dirty_for_history(struct crocus_context *ice,
|
||||
struct crocus_resource *res);
|
||||
uint32_t crocus_flush_bits_for_history(struct crocus_resource *res);
|
||||
|
||||
void crocus_flush_and_dirty_for_history(struct crocus_context *ice,
|
||||
struct crocus_batch *batch,
|
||||
struct crocus_resource *res,
|
||||
uint32_t extra_flags,
|
||||
const char *reason);
|
||||
|
||||
unsigned crocus_get_num_logical_layers(const struct crocus_resource *res,
|
||||
unsigned level);
|
||||
|
||||
void crocus_resource_disable_aux(struct crocus_resource *res);
|
||||
|
||||
#define INTEL_REMAINING_LAYERS UINT32_MAX
|
||||
#define INTEL_REMAINING_LEVELS UINT32_MAX
|
||||
|
||||
void
|
||||
crocus_hiz_exec(struct crocus_context *ice,
|
||||
struct crocus_batch *batch,
|
||||
struct crocus_resource *res,
|
||||
unsigned int level, unsigned int start_layer,
|
||||
unsigned int num_layers, enum isl_aux_op op,
|
||||
bool update_clear_depth);
|
||||
|
||||
/**
|
||||
* Prepare a miptree for access
|
||||
*
|
||||
* This function should be called prior to any access to miptree in order to
|
||||
* perform any needed resolves.
|
||||
*
|
||||
* \param[in] start_level The first mip level to be accessed
|
||||
*
|
||||
* \param[in] num_levels The number of miplevels to be accessed or
|
||||
* INTEL_REMAINING_LEVELS to indicate every level
|
||||
* above start_level will be accessed
|
||||
*
|
||||
* \param[in] start_layer The first array slice or 3D layer to be accessed
|
||||
*
|
||||
* \param[in] num_layers The number of array slices or 3D layers be
|
||||
* accessed or INTEL_REMAINING_LAYERS to indicate
|
||||
* every layer above start_layer will be accessed
|
||||
*
|
||||
* \param[in] aux_supported Whether or not the access will support the
|
||||
* miptree's auxiliary compression format; this
|
||||
* must be false for uncompressed miptrees
|
||||
*
|
||||
* \param[in] fast_clear_supported Whether or not the access will support
|
||||
* fast clears in the miptree's auxiliary
|
||||
* compression format
|
||||
*/
|
||||
void
|
||||
crocus_resource_prepare_access(struct crocus_context *ice,
|
||||
struct crocus_resource *res,
|
||||
uint32_t start_level, uint32_t num_levels,
|
||||
uint32_t start_layer, uint32_t num_layers,
|
||||
enum isl_aux_usage aux_usage,
|
||||
bool fast_clear_supported);
|
||||
|
||||
/**
|
||||
* Complete a write operation
|
||||
*
|
||||
* This function should be called after any operation writes to a miptree.
|
||||
* This will update the miptree's compression state so that future resolves
|
||||
* happen correctly. Technically, this function can be called before the
|
||||
* write occurs but the caller must ensure that they don't interlace
|
||||
* crocus_resource_prepare_access and crocus_resource_finish_write calls to
|
||||
* overlapping layer/level ranges.
|
||||
*
|
||||
* \param[in] level The mip level that was written
|
||||
*
|
||||
* \param[in] start_layer The first array slice or 3D layer written
|
||||
*
|
||||
* \param[in] num_layers The number of array slices or 3D layers
|
||||
* written or INTEL_REMAINING_LAYERS to indicate
|
||||
* every layer above start_layer was written
|
||||
*
|
||||
* \param[in] written_with_aux Whether or not the write was done with
|
||||
* auxiliary compression enabled
|
||||
*/
|
||||
void
|
||||
crocus_resource_finish_write(struct crocus_context *ice,
|
||||
struct crocus_resource *res, uint32_t level,
|
||||
uint32_t start_layer, uint32_t num_layers,
|
||||
enum isl_aux_usage aux_usage);
|
||||
|
||||
/** Get the auxiliary compression state of a miptree slice */
|
||||
enum isl_aux_state
|
||||
crocus_resource_get_aux_state(const struct crocus_resource *res,
|
||||
uint32_t level, uint32_t layer);
|
||||
|
||||
/**
|
||||
* Set the auxiliary compression state of a miptree slice range
|
||||
*
|
||||
* This function directly sets the auxiliary compression state of a slice
|
||||
* range of a miptree. It only modifies data structures and does not do any
|
||||
* resolves. This should only be called by code which directly performs
|
||||
* compression operations such as fast clears and resolves. Most code should
|
||||
* use crocus_resource_prepare_access or crocus_resource_finish_write.
|
||||
*/
|
||||
void
|
||||
crocus_resource_set_aux_state(struct crocus_context *ice,
|
||||
struct crocus_resource *res, uint32_t level,
|
||||
uint32_t start_layer, uint32_t num_layers,
|
||||
enum isl_aux_state aux_state);
|
||||
|
||||
/**
|
||||
* Prepare a miptree for raw access
|
||||
*
|
||||
* This helper prepares the miptree for access that knows nothing about any
|
||||
* sort of compression whatsoever. This is useful when mapping the surface or
|
||||
* using it with the blitter.
|
||||
*/
|
||||
static inline void
|
||||
crocus_resource_access_raw(struct crocus_context *ice,
|
||||
struct crocus_resource *res,
|
||||
uint32_t level, uint32_t layer,
|
||||
uint32_t num_layers,
|
||||
bool write)
|
||||
{
|
||||
crocus_resource_prepare_access(ice, res, level, 1, layer, num_layers,
|
||||
ISL_AUX_USAGE_NONE, false);
|
||||
if (write) {
|
||||
crocus_resource_finish_write(ice, res, level, layer, num_layers,
|
||||
ISL_AUX_USAGE_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
crocus_resource_get_image_offset(struct crocus_resource *res,
|
||||
uint32_t level, uint32_t z,
|
||||
uint32_t *x, uint32_t *y);
|
||||
static inline enum isl_aux_usage
|
||||
crocus_resource_texture_aux_usage(const struct crocus_resource *res)
|
||||
{
|
||||
return res->aux.usage == ISL_AUX_USAGE_MCS ? ISL_AUX_USAGE_MCS : ISL_AUX_USAGE_NONE;
|
||||
}
|
||||
|
||||
void crocus_resource_prepare_texture(struct crocus_context *ice,
|
||||
struct crocus_resource *res,
|
||||
enum isl_format view_format,
|
||||
uint32_t start_level, uint32_t num_levels,
|
||||
uint32_t start_layer, uint32_t num_layers);
|
||||
|
||||
static inline bool
|
||||
crocus_resource_unfinished_aux_import(struct crocus_resource *res)
|
||||
{
|
||||
return res->base.next != NULL && res->mod_info &&
|
||||
res->mod_info->aux_usage != ISL_AUX_USAGE_NONE;
|
||||
}
|
||||
|
||||
void crocus_resource_finish_aux_import(struct pipe_screen *pscreen,
|
||||
struct crocus_resource *res);
|
||||
|
||||
bool crocus_has_invalid_primary(const struct crocus_resource *res,
|
||||
unsigned start_level, unsigned num_levels,
|
||||
unsigned start_layer, unsigned num_layers);
|
||||
|
||||
void crocus_resource_check_level_layer(const struct crocus_resource *res,
|
||||
uint32_t level, uint32_t layer);
|
||||
|
||||
bool crocus_resource_level_has_hiz(const struct crocus_resource *res,
|
||||
uint32_t level);
|
||||
bool crocus_has_color_unresolved(const struct crocus_resource *res,
|
||||
unsigned start_level, unsigned num_levels,
|
||||
unsigned start_layer, unsigned num_layers);
|
||||
|
||||
enum isl_aux_usage crocus_resource_render_aux_usage(struct crocus_context *ice,
|
||||
struct crocus_resource *res,
|
||||
enum isl_format render_fmt,
|
||||
bool blend_enabled,
|
||||
bool draw_aux_disabled);
|
||||
void crocus_resource_prepare_render(struct crocus_context *ice,
|
||||
struct crocus_resource *res, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
enum isl_aux_usage aux_usage);
|
||||
void crocus_resource_finish_render(struct crocus_context *ice,
|
||||
struct crocus_resource *res, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
enum isl_aux_usage aux_usage);
|
||||
#endif
|
|
@ -0,0 +1,829 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file crocus_screen.c
|
||||
*
|
||||
* Screen related driver hooks and capability lists.
|
||||
*
|
||||
* A program may use multiple rendering contexts (crocus_context), but
|
||||
* they all share a common screen (crocus_screen). Global driver state
|
||||
* can be stored in the screen; it may be accessed by multiple threads.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/u_transfer_helper.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/xmlconfig.h"
|
||||
#include "drm-uapi/i915_drm.h"
|
||||
#include "crocus_context.h"
|
||||
#include "crocus_defines.h"
|
||||
#include "crocus_fence.h"
|
||||
#include "crocus_pipe.h"
|
||||
#include "crocus_resource.h"
|
||||
#include "crocus_screen.h"
|
||||
#include "intel/compiler/brw_compiler.h"
|
||||
#include "intel/common/intel_gem.h"
|
||||
#include "intel/common/intel_l3_config.h"
|
||||
#include "crocus_monitor.h"
|
||||
|
||||
#define genX_call(devinfo, func, ...) \
|
||||
switch ((devinfo)->verx10) { \
|
||||
case 75: \
|
||||
gfx75_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 70: \
|
||||
gfx7_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 60: \
|
||||
gfx6_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 50: \
|
||||
gfx5_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 45: \
|
||||
gfx45_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 40: \
|
||||
gfx4_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
default: \
|
||||
unreachable("Unknown hardware generation"); \
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_flush_frontbuffer(struct pipe_screen *_screen,
|
||||
struct pipe_context *_pipe,
|
||||
struct pipe_resource *resource,
|
||||
unsigned level, unsigned layer,
|
||||
void *context_private, struct pipe_box *box)
|
||||
{
|
||||
}
|
||||
|
||||
static const char *
|
||||
crocus_get_vendor(struct pipe_screen *pscreen)
|
||||
{
|
||||
return "Intel";
|
||||
}
|
||||
|
||||
static const char *
|
||||
crocus_get_device_vendor(struct pipe_screen *pscreen)
|
||||
{
|
||||
return "Intel";
|
||||
}
|
||||
|
||||
static const char *
|
||||
crocus_get_name(struct pipe_screen *pscreen)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
|
||||
static char buf[128];
|
||||
|
||||
const char *name = intel_get_device_name(screen->pci_id);
|
||||
|
||||
if (!name)
|
||||
name = "Intel Unknown";
|
||||
|
||||
snprintf(buf, sizeof(buf), "Mesa %s", name);
|
||||
return buf;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
get_aperture_size(int fd)
|
||||
{
|
||||
struct drm_i915_gem_get_aperture aperture = {};
|
||||
intel_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
|
||||
return aperture.aper_size;
|
||||
}
|
||||
|
||||
static int
|
||||
crocus_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
switch (param) {
|
||||
case PIPE_CAP_NPOT_TEXTURES:
|
||||
case PIPE_CAP_ANISOTROPIC_FILTER:
|
||||
case PIPE_CAP_POINT_SPRITE:
|
||||
case PIPE_CAP_OCCLUSION_QUERY:
|
||||
case PIPE_CAP_TEXTURE_SWIZZLE:
|
||||
case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
|
||||
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
|
||||
case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
|
||||
case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
|
||||
case PIPE_CAP_VERTEX_SHADER_SATURATE:
|
||||
case PIPE_CAP_PRIMITIVE_RESTART:
|
||||
case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
|
||||
case PIPE_CAP_INDEP_BLEND_ENABLE:
|
||||
case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
|
||||
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
|
||||
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
|
||||
case PIPE_CAP_DEPTH_CLIP_DISABLE:
|
||||
case PIPE_CAP_TGSI_INSTANCEID:
|
||||
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
|
||||
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP:
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
|
||||
case PIPE_CAP_CONDITIONAL_RENDER:
|
||||
case PIPE_CAP_TEXTURE_BARRIER:
|
||||
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
|
||||
case PIPE_CAP_START_INSTANCE:
|
||||
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
|
||||
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
|
||||
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
|
||||
case PIPE_CAP_ACCELERATED:
|
||||
case PIPE_CAP_UMA:
|
||||
case PIPE_CAP_CLIP_HALFZ:
|
||||
case PIPE_CAP_TGSI_TEXCOORD:
|
||||
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
|
||||
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
|
||||
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
|
||||
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
|
||||
case PIPE_CAP_TGSI_TEX_TXF_LZ:
|
||||
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
case PIPE_CAP_TGSI_VOTE:
|
||||
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
|
||||
case PIPE_CAP_TEXTURE_GATHER_SM5:
|
||||
case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
|
||||
case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
|
||||
case PIPE_CAP_NIR_COMPACT_ARRAYS:
|
||||
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
|
||||
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
|
||||
case PIPE_CAP_INVALIDATE_BUFFER:
|
||||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED:
|
||||
case PIPE_CAP_FENCE_SIGNAL:
|
||||
case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION:
|
||||
return true;
|
||||
case PIPE_CAP_INT64:
|
||||
case PIPE_CAP_INT64_DIVMOD:
|
||||
case PIPE_CAP_TGSI_BALLOT:
|
||||
case PIPE_CAP_PACKED_UNIFORMS:
|
||||
case PIPE_CAP_GL_CLAMP:
|
||||
return false;
|
||||
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
|
||||
return devinfo->ver <= 5;
|
||||
case PIPE_CAP_TEXTURE_QUERY_LOD:
|
||||
case PIPE_CAP_QUERY_TIME_ELAPSED:
|
||||
return devinfo->ver >= 5;
|
||||
case PIPE_CAP_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
|
||||
case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
|
||||
case PIPE_CAP_TGSI_CLOCK:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_COMPUTE:
|
||||
case PIPE_CAP_SAMPLER_VIEW_TARGET:
|
||||
case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
|
||||
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
|
||||
case PIPE_CAP_GL_SPIRV:
|
||||
case PIPE_CAP_GL_SPIRV_VARIABLE_POINTERS:
|
||||
case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
|
||||
case PIPE_CAP_DOUBLES:
|
||||
return devinfo->ver >= 7;
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
|
||||
return devinfo->is_haswell;
|
||||
case PIPE_CAP_CULL_DISTANCE:
|
||||
case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
|
||||
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
|
||||
case PIPE_CAP_SAMPLE_SHADING:
|
||||
case PIPE_CAP_CUBE_MAP_ARRAY:
|
||||
case PIPE_CAP_QUERY_SO_OVERFLOW:
|
||||
case PIPE_CAP_TEXTURE_MULTISAMPLE:
|
||||
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
|
||||
case PIPE_CAP_QUERY_TIMESTAMP:
|
||||
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
|
||||
case PIPE_CAP_INDEP_BLEND_FUNC:
|
||||
case PIPE_CAP_TEXTURE_SHADOW_LOD:
|
||||
case PIPE_CAP_LOAD_CONSTBUF:
|
||||
case PIPE_CAP_DRAW_PARAMETERS:
|
||||
case PIPE_CAP_CLEAR_SCISSORED:
|
||||
return devinfo->ver >= 6;
|
||||
case PIPE_CAP_FBFETCH:
|
||||
return devinfo->verx10 >= 45 ? BRW_MAX_DRAW_BUFFERS : 0;
|
||||
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
|
||||
return devinfo->ver >= 6 ? 1 : 0;
|
||||
case PIPE_CAP_MAX_RENDER_TARGETS:
|
||||
return BRW_MAX_DRAW_BUFFERS;
|
||||
case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
|
||||
if (devinfo->ver >= 7)
|
||||
return 16384;
|
||||
else
|
||||
return 8192;
|
||||
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
|
||||
if (devinfo->ver >= 7)
|
||||
return CROCUS_MAX_MIPLEVELS; /* 16384x16384 */
|
||||
else
|
||||
return CROCUS_MAX_MIPLEVELS - 1; /* 8192x8192 */
|
||||
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
|
||||
return 12; /* 2048x2048 */
|
||||
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
|
||||
return (devinfo->ver >= 6) ? 4 : 0;
|
||||
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
|
||||
return devinfo->ver >= 7 ? 2048 : 512;
|
||||
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
|
||||
return BRW_MAX_SOL_BINDINGS / CROCUS_MAX_SOL_BUFFERS;
|
||||
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
|
||||
return BRW_MAX_SOL_BINDINGS;
|
||||
case PIPE_CAP_GLSL_FEATURE_LEVEL: {
|
||||
if (devinfo->is_haswell)
|
||||
return 460;
|
||||
else if (devinfo->ver >= 7)
|
||||
return 420;
|
||||
else if (devinfo->ver >= 6)
|
||||
return 330;
|
||||
return 120;
|
||||
}
|
||||
case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
|
||||
return devinfo->ver < 6 ? 120 : 130;
|
||||
|
||||
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
|
||||
/* 3DSTATE_CONSTANT_XS requires the start of UBOs to be 32B aligned */
|
||||
return 32;
|
||||
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
|
||||
return CROCUS_MAP_BUFFER_ALIGNMENT;
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
/* Choose a cacheline (64 bytes) so that we can safely have the CPU and
|
||||
* GPU writing the same SSBO on non-coherent systems (Atom CPUs). With
|
||||
* UBOs, the GPU never writes, so there's no problem. For an SSBO, the
|
||||
* GPU and the CPU can be updating disjoint regions of the buffer
|
||||
* simultaneously and that will break if the regions overlap the same
|
||||
* cacheline.
|
||||
*/
|
||||
return devinfo->ver >= 7 ? 64 : 0;
|
||||
case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
|
||||
return devinfo->ver >= 7 ? (1 << 27) : 0;
|
||||
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
|
||||
return 16; // XXX: u_screen says 256 is the minimum value...
|
||||
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
|
||||
return true;
|
||||
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
|
||||
return CROCUS_MAX_TEXTURE_BUFFER_SIZE;
|
||||
case PIPE_CAP_MAX_VIEWPORTS:
|
||||
return devinfo->ver >= 6 ? 16 : 1;
|
||||
case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
|
||||
return devinfo->ver >= 6 ? 256 : 0;
|
||||
case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
|
||||
return devinfo->ver >= 6 ? 1024 : 0;
|
||||
case PIPE_CAP_MAX_GS_INVOCATIONS:
|
||||
return devinfo->ver >= 7 ? 32 : 1;
|
||||
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
|
||||
if (devinfo->ver >= 7)
|
||||
return 4;
|
||||
else if (devinfo->ver == 6)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
|
||||
if (devinfo->ver >= 7)
|
||||
return -32;
|
||||
else if (devinfo->ver == 6)
|
||||
return -8;
|
||||
else
|
||||
return 0;
|
||||
case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
|
||||
if (devinfo->ver >= 7)
|
||||
return 31;
|
||||
else if (devinfo->ver == 6)
|
||||
return 7;
|
||||
else
|
||||
return 0;
|
||||
case PIPE_CAP_MAX_VERTEX_STREAMS:
|
||||
return devinfo->ver >= 7 ? 4 : 1;
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
return 0x8086;
|
||||
case PIPE_CAP_DEVICE_ID:
|
||||
return screen->pci_id;
|
||||
case PIPE_CAP_VIDEO_MEMORY: {
|
||||
/* Once a batch uses more than 75% of the maximum mappable size, we
|
||||
* assume that there's some fragmentation, and we start doing extra
|
||||
* flushing, etc. That's the big cliff apps will care about.
|
||||
*/
|
||||
const unsigned gpu_mappable_megabytes =
|
||||
(screen->aperture_bytes * 3 / 4) / (1024 * 1024);
|
||||
|
||||
const long system_memory_pages = sysconf(_SC_PHYS_PAGES);
|
||||
const long system_page_size = sysconf(_SC_PAGE_SIZE);
|
||||
|
||||
if (system_memory_pages <= 0 || system_page_size <= 0)
|
||||
return -1;
|
||||
|
||||
const uint64_t system_memory_bytes =
|
||||
(uint64_t) system_memory_pages * (uint64_t) system_page_size;
|
||||
|
||||
const unsigned system_memory_megabytes =
|
||||
(unsigned) (system_memory_bytes / (1024 * 1024));
|
||||
|
||||
return MIN2(system_memory_megabytes, gpu_mappable_megabytes);
|
||||
}
|
||||
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
|
||||
case PIPE_CAP_MAX_VARYINGS:
|
||||
return (screen->devinfo.ver >= 6) ? 32 : 16;
|
||||
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
|
||||
/* AMD_pinned_memory assumes the flexibility of using client memory
|
||||
* for any buffer (incl. vertex buffers) which rules out the prospect
|
||||
* of using snooped buffers, as using snooped buffers without
|
||||
* cogniscience is likely to be detrimental to performance and require
|
||||
* extensive checking in the driver for correctness, e.g. to prevent
|
||||
* illegal snoop <-> snoop transfers.
|
||||
*/
|
||||
return devinfo->has_llc;
|
||||
case PIPE_CAP_THROTTLE:
|
||||
return screen->driconf.disable_throttling ? 0 : 1;
|
||||
|
||||
case PIPE_CAP_CONTEXT_PRIORITY_MASK:
|
||||
return PIPE_CONTEXT_PRIORITY_LOW |
|
||||
PIPE_CONTEXT_PRIORITY_MEDIUM |
|
||||
PIPE_CONTEXT_PRIORITY_HIGH;
|
||||
|
||||
case PIPE_CAP_FRONTEND_NOOP:
|
||||
return true;
|
||||
// XXX: don't hardcode 00:00:02.0 PCI here
|
||||
case PIPE_CAP_PCI_GROUP:
|
||||
return 0;
|
||||
case PIPE_CAP_PCI_BUS:
|
||||
return 0;
|
||||
case PIPE_CAP_PCI_DEVICE:
|
||||
return 2;
|
||||
case PIPE_CAP_PCI_FUNCTION:
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return u_pipe_screen_get_param_defaults(pscreen, param);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static float
|
||||
crocus_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
switch (param) {
|
||||
case PIPE_CAPF_MAX_LINE_WIDTH:
|
||||
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
|
||||
if (devinfo->ver >= 6)
|
||||
return 7.375f;
|
||||
else
|
||||
return 7.0f;
|
||||
|
||||
case PIPE_CAPF_MAX_POINT_WIDTH:
|
||||
case PIPE_CAPF_MAX_POINT_WIDTH_AA:
|
||||
return 255.0f;
|
||||
|
||||
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
|
||||
return 16.0f;
|
||||
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
|
||||
return 15.0f;
|
||||
case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
|
||||
case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
|
||||
case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
|
||||
return 0.0f;
|
||||
default:
|
||||
unreachable("unknown param");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
crocus_get_shader_param(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_type p_stage,
|
||||
enum pipe_shader_cap param)
|
||||
{
|
||||
gl_shader_stage stage = stage_from_pipe(p_stage);
|
||||
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
if (devinfo->ver < 6 &&
|
||||
p_stage != PIPE_SHADER_VERTEX &&
|
||||
p_stage != PIPE_SHADER_FRAGMENT)
|
||||
return 0;
|
||||
|
||||
if (devinfo->ver == 6 &&
|
||||
p_stage != PIPE_SHADER_VERTEX &&
|
||||
p_stage != PIPE_SHADER_FRAGMENT &&
|
||||
p_stage != PIPE_SHADER_GEOMETRY)
|
||||
return 0;
|
||||
|
||||
/* this is probably not totally correct.. but it's a start: */
|
||||
switch (param) {
|
||||
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
|
||||
return stage == MESA_SHADER_FRAGMENT ? 1024 : 16384;
|
||||
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
|
||||
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
|
||||
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
|
||||
return stage == MESA_SHADER_FRAGMENT ? 1024 : 0;
|
||||
|
||||
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
|
||||
return UINT_MAX;
|
||||
|
||||
case PIPE_SHADER_CAP_MAX_INPUTS:
|
||||
if (stage == MESA_SHADER_VERTEX ||
|
||||
stage == MESA_SHADER_GEOMETRY)
|
||||
return 16; /* Gen7 vec4 geom backend */
|
||||
return 32;
|
||||
case PIPE_SHADER_CAP_MAX_OUTPUTS:
|
||||
return 32;
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
|
||||
return 16 * 1024 * sizeof(float);
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
|
||||
return devinfo->ver >= 6 ? 16 : 1;
|
||||
case PIPE_SHADER_CAP_MAX_TEMPS:
|
||||
return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
|
||||
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
|
||||
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
|
||||
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
|
||||
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
|
||||
/* Lie about these to avoid st/mesa's GLSL IR lowering of indirects,
|
||||
* which we don't want. Our compiler backend will check brw_compiler's
|
||||
* options and call nir_lower_indirect_derefs appropriately anyway.
|
||||
*/
|
||||
return true;
|
||||
case PIPE_SHADER_CAP_SUBROUTINES:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_INTEGERS:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_INT64_ATOMICS:
|
||||
case PIPE_SHADER_CAP_FP16:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
|
||||
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
|
||||
return devinfo->is_haswell ? CROCUS_MAX_TEXTURE_SAMPLERS : 16;
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
|
||||
if (devinfo->ver >= 7 &&
|
||||
(p_stage == PIPE_SHADER_FRAGMENT ||
|
||||
p_stage == PIPE_SHADER_COMPUTE))
|
||||
return CROCUS_MAX_TEXTURE_SAMPLERS;
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
return devinfo->ver >= 7 ? (CROCUS_MAX_ABOS + CROCUS_MAX_SSBOS) : 0;
|
||||
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
|
||||
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_NIR;
|
||||
case PIPE_SHADER_CAP_SUPPORTED_IRS:
|
||||
return 1 << PIPE_SHADER_IR_NIR;
|
||||
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
|
||||
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
|
||||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
case PIPE_SHADER_CAP_FP16_DERIVATIVES:
|
||||
case PIPE_SHADER_CAP_INT16:
|
||||
case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
|
||||
case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
|
||||
return 0;
|
||||
default:
|
||||
unreachable("unknown shader param");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
crocus_get_compute_param(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param,
|
||||
void *ret)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
|
||||
const uint32_t max_invocations = 32 * max_threads;
|
||||
|
||||
if (devinfo->ver < 7)
|
||||
return 0;
|
||||
#define RET(x) do { \
|
||||
if (ret) \
|
||||
memcpy(ret, x, sizeof(x)); \
|
||||
return sizeof(x); \
|
||||
} while (0)
|
||||
|
||||
switch (param) {
|
||||
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
|
||||
RET((uint32_t []){ 32 });
|
||||
|
||||
case PIPE_COMPUTE_CAP_IR_TARGET:
|
||||
if (ret)
|
||||
strcpy(ret, "gen");
|
||||
return 4;
|
||||
|
||||
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
|
||||
RET((uint64_t []) { 3 });
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
|
||||
RET(((uint64_t []) { 65535, 65535, 65535 }));
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
|
||||
/* MaxComputeWorkGroupSize[0..2] */
|
||||
RET(((uint64_t []) {max_invocations, max_invocations, max_invocations}));
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
|
||||
/* MaxComputeWorkGroupInvocations */
|
||||
RET((uint64_t []) { max_invocations });
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
|
||||
/* MaxComputeSharedMemorySize */
|
||||
RET((uint64_t []) { 64 * 1024 });
|
||||
|
||||
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
|
||||
RET((uint32_t []) { 1 });
|
||||
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
RET((uint32_t []) { BRW_SUBGROUP_SIZE });
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
|
||||
RET((uint64_t []) { max_invocations });
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
|
||||
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
|
||||
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
|
||||
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
|
||||
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
|
||||
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
|
||||
|
||||
// XXX: I think these are for Clover...
|
||||
return 0;
|
||||
|
||||
default:
|
||||
unreachable("unknown compute param");
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
crocus_get_timestamp(struct pipe_screen *pscreen)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *) pscreen;
|
||||
const unsigned TIMESTAMP = 0x2358;
|
||||
uint64_t result;
|
||||
|
||||
crocus_reg_read(screen->bufmgr, TIMESTAMP | 1, &result);
|
||||
|
||||
result = intel_device_info_timebase_scale(&screen->devinfo, result);
|
||||
result &= (1ull << TIMESTAMP_BITS) - 1;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
crocus_screen_destroy(struct crocus_screen *screen)
|
||||
{
|
||||
u_transfer_helper_destroy(screen->base.transfer_helper);
|
||||
crocus_bufmgr_unref(screen->bufmgr);
|
||||
disk_cache_destroy(screen->disk_cache);
|
||||
close(screen->winsys_fd);
|
||||
ralloc_free(screen);
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_screen_unref(struct pipe_screen *pscreen)
|
||||
{
|
||||
crocus_pscreen_unref(pscreen);
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_query_memory_info(struct pipe_screen *pscreen,
|
||||
struct pipe_memory_info *info)
|
||||
{
|
||||
}
|
||||
|
||||
static const void *
|
||||
crocus_get_compiler_options(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_ir ir,
|
||||
enum pipe_shader_type pstage)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *) pscreen;
|
||||
gl_shader_stage stage = stage_from_pipe(pstage);
|
||||
assert(ir == PIPE_SHADER_IR_NIR);
|
||||
|
||||
return screen->compiler->glsl_compiler_options[stage].NirOptions;
|
||||
}
|
||||
|
||||
static struct disk_cache *
|
||||
crocus_get_disk_shader_cache(struct pipe_screen *pscreen)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *) pscreen;
|
||||
return screen->disk_cache;
|
||||
}
|
||||
|
||||
static const struct intel_l3_config *
|
||||
crocus_get_default_l3_config(const struct intel_device_info *devinfo,
|
||||
bool compute)
|
||||
{
|
||||
bool wants_dc_cache = true;
|
||||
bool has_slm = compute;
|
||||
const struct intel_l3_weights w =
|
||||
intel_get_default_l3_weights(devinfo, wants_dc_cache, has_slm);
|
||||
return intel_get_l3_config(devinfo, w);
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_shader_debug_log(void *data, const char *fmt, ...)
|
||||
{
|
||||
struct pipe_debug_callback *dbg = data;
|
||||
unsigned id = 0;
|
||||
va_list args;
|
||||
|
||||
if (!dbg->debug_message)
|
||||
return;
|
||||
|
||||
va_start(args, fmt);
|
||||
dbg->debug_message(dbg->data, &id, PIPE_DEBUG_TYPE_SHADER_INFO, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static void
|
||||
crocus_shader_perf_log(void *data, const char *fmt, ...)
|
||||
{
|
||||
struct pipe_debug_callback *dbg = data;
|
||||
unsigned id = 0;
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
|
||||
va_list args_copy;
|
||||
va_copy(args_copy, args);
|
||||
vfprintf(stderr, fmt, args_copy);
|
||||
va_end(args_copy);
|
||||
}
|
||||
|
||||
if (dbg->debug_message) {
|
||||
dbg->debug_message(dbg->data, &id, PIPE_DEBUG_TYPE_PERF_INFO, fmt, args);
|
||||
}
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static bool
|
||||
crocus_detect_swizzling(struct crocus_screen *screen)
|
||||
{
|
||||
/* Broadwell PRM says:
|
||||
*
|
||||
* "Before Gen8, there was a historical configuration control field to
|
||||
* swizzle address bit[6] for in X/Y tiling modes. This was set in three
|
||||
* different places: TILECTL[1:0], ARB_MODE[5:4], and
|
||||
* DISP_ARB_CTL[14:13].
|
||||
*
|
||||
* For Gen8 and subsequent generations, the swizzle fields are all
|
||||
* reserved, and the CPU's memory controller performs all address
|
||||
* swizzling modifications."
|
||||
*/
|
||||
uint32_t tiling = I915_TILING_X;
|
||||
uint32_t swizzle_mode = 0;
|
||||
struct crocus_bo *buffer =
|
||||
crocus_bo_alloc_tiled(screen->bufmgr, "swizzle test", 32768,
|
||||
0, tiling, 512, 0);
|
||||
if (buffer == NULL)
|
||||
return false;
|
||||
|
||||
crocus_bo_get_tiling(buffer, &tiling, &swizzle_mode);
|
||||
crocus_bo_unreference(buffer);
|
||||
|
||||
return swizzle_mode != I915_BIT_6_SWIZZLE_NONE;
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
crocus_screen_create(int fd, const struct pipe_screen_config *config)
|
||||
{
|
||||
struct crocus_screen *screen = rzalloc(NULL, struct crocus_screen);
|
||||
if (!screen)
|
||||
return NULL;
|
||||
|
||||
if (!intel_get_device_info_from_fd(fd, &screen->devinfo))
|
||||
return NULL;
|
||||
screen->pci_id = screen->devinfo.chipset_id;
|
||||
screen->no_hw = screen->devinfo.no_hw;
|
||||
|
||||
if (screen->devinfo.ver >= 8)
|
||||
return NULL;
|
||||
|
||||
p_atomic_set(&screen->refcount, 1);
|
||||
|
||||
screen->aperture_bytes = get_aperture_size(fd);
|
||||
|
||||
if (getenv("INTEL_NO_HW") != NULL)
|
||||
screen->no_hw = true;
|
||||
|
||||
bool bo_reuse = false;
|
||||
int bo_reuse_mode = driQueryOptioni(config->options, "bo_reuse");
|
||||
switch (bo_reuse_mode) {
|
||||
case DRI_CONF_BO_REUSE_DISABLED:
|
||||
break;
|
||||
case DRI_CONF_BO_REUSE_ALL:
|
||||
bo_reuse = true;
|
||||
break;
|
||||
}
|
||||
|
||||
screen->bufmgr = crocus_bufmgr_get_for_fd(&screen->devinfo, fd, bo_reuse);
|
||||
if (!screen->bufmgr)
|
||||
return NULL;
|
||||
screen->fd = crocus_bufmgr_get_fd(screen->bufmgr);
|
||||
screen->winsys_fd = fd;
|
||||
|
||||
screen->has_swizzling = crocus_detect_swizzling(screen);
|
||||
brw_process_intel_debug_variable();
|
||||
|
||||
screen->driconf.dual_color_blend_by_location =
|
||||
driQueryOptionb(config->options, "dual_color_blend_by_location");
|
||||
screen->driconf.disable_throttling =
|
||||
driQueryOptionb(config->options, "disable_throttling");
|
||||
screen->driconf.always_flush_cache =
|
||||
driQueryOptionb(config->options, "always_flush_cache");
|
||||
|
||||
screen->precompile = env_var_as_boolean("shader_precompile", true);
|
||||
|
||||
isl_device_init(&screen->isl_dev, &screen->devinfo,
|
||||
screen->has_swizzling);
|
||||
|
||||
screen->compiler = brw_compiler_create(screen, &screen->devinfo);
|
||||
screen->compiler->shader_debug_log = crocus_shader_debug_log;
|
||||
screen->compiler->shader_perf_log = crocus_shader_perf_log;
|
||||
screen->compiler->supports_pull_constants = false;
|
||||
screen->compiler->supports_shader_constants = false;
|
||||
screen->compiler->compact_params = false;
|
||||
screen->compiler->constant_buffer_0_is_relative = true;
|
||||
|
||||
if (screen->devinfo.ver == 7) {
|
||||
screen->l3_config_3d = crocus_get_default_l3_config(&screen->devinfo, false);
|
||||
screen->l3_config_cs = crocus_get_default_l3_config(&screen->devinfo, true);
|
||||
}
|
||||
|
||||
crocus_disk_cache_init(screen);
|
||||
|
||||
slab_create_parent(&screen->transfer_pool,
|
||||
sizeof(struct crocus_transfer), 64);
|
||||
|
||||
screen->subslice_total = intel_device_info_subslice_total(&screen->devinfo);
|
||||
assert(screen->subslice_total >= 1);
|
||||
|
||||
struct pipe_screen *pscreen = &screen->base;
|
||||
|
||||
crocus_init_screen_fence_functions(pscreen);
|
||||
crocus_init_screen_resource_functions(pscreen);
|
||||
|
||||
pscreen->destroy = crocus_screen_unref;
|
||||
pscreen->get_name = crocus_get_name;
|
||||
pscreen->get_vendor = crocus_get_vendor;
|
||||
pscreen->get_device_vendor = crocus_get_device_vendor;
|
||||
pscreen->get_param = crocus_get_param;
|
||||
pscreen->get_shader_param = crocus_get_shader_param;
|
||||
pscreen->get_compute_param = crocus_get_compute_param;
|
||||
pscreen->get_paramf = crocus_get_paramf;
|
||||
pscreen->get_compiler_options = crocus_get_compiler_options;
|
||||
pscreen->get_disk_shader_cache = crocus_get_disk_shader_cache;
|
||||
pscreen->is_format_supported = crocus_is_format_supported;
|
||||
pscreen->context_create = crocus_create_context;
|
||||
pscreen->flush_frontbuffer = crocus_flush_frontbuffer;
|
||||
pscreen->get_timestamp = crocus_get_timestamp;
|
||||
pscreen->query_memory_info = crocus_query_memory_info;
|
||||
pscreen->get_driver_query_group_info = crocus_get_monitor_group_info;
|
||||
pscreen->get_driver_query_info = crocus_get_monitor_info;
|
||||
|
||||
genX_call(&screen->devinfo, init_screen_state, screen);
|
||||
genX_call(&screen->devinfo, init_screen_query, screen);
|
||||
return pscreen;
|
||||
}
|
|
@ -0,0 +1,253 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef CROCUS_SCREEN_H
|
||||
#define CROCUS_SCREEN_H
|
||||
|
||||
#include "pipe/p_screen.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "frontend/drm_driver.h"
|
||||
#include "util/disk_cache.h"
|
||||
#include "util/slab.h"
|
||||
#include "util/u_screen.h"
|
||||
#include "intel/dev/intel_device_info.h"
|
||||
#include "intel/isl/isl.h"
|
||||
#include "crocus_bufmgr.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
|
||||
struct crocus_monitor_config;
|
||||
struct crocus_resource;
|
||||
struct crocus_context;
|
||||
struct crocus_sampler_state;
|
||||
struct brw_vue_map;
|
||||
struct brw_tcs_prog_key;
|
||||
struct brw_tes_prog_key;
|
||||
struct brw_cs_prog_key;
|
||||
struct brw_wm_prog_key;
|
||||
struct brw_vs_prog_key;
|
||||
struct brw_gs_prog_key;
|
||||
struct shader_info;
|
||||
|
||||
#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
|
||||
#define WRITE_ONCE(x, v) *(volatile __typeof__(x) *)&(x) = (v)
|
||||
|
||||
#define CROCUS_MAX_TEXTURE_SAMPLERS 32
|
||||
#define CROCUS_MAX_SOL_BUFFERS 4
|
||||
#define CROCUS_MAP_BUFFER_ALIGNMENT 64
|
||||
|
||||
|
||||
/**
|
||||
* Virtual table for generation-specific (genxml) function calls.
|
||||
*/
|
||||
struct crocus_vtable {
|
||||
void (*destroy_state)(struct crocus_context *ice);
|
||||
void (*init_render_context)(struct crocus_batch *batch);
|
||||
void (*init_compute_context)(struct crocus_batch *batch);
|
||||
void (*upload_render_state)(struct crocus_context *ice,
|
||||
struct crocus_batch *batch,
|
||||
const struct pipe_draw_info *draw,
|
||||
unsigned drawid_offset,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *sc);
|
||||
void (*update_surface_base_address)(struct crocus_batch *batch);
|
||||
|
||||
void (*upload_compute_state)(struct crocus_context *ice,
|
||||
struct crocus_batch *batch,
|
||||
const struct pipe_grid_info *grid);
|
||||
void (*rebind_buffer)(struct crocus_context *ice,
|
||||
struct crocus_resource *res);
|
||||
void (*resolve_conditional_render)(struct crocus_context *ice);
|
||||
void (*emit_compute_predicate)(struct crocus_batch *batch);
|
||||
void (*load_register_reg32)(struct crocus_batch *batch, uint32_t dst,
|
||||
uint32_t src);
|
||||
void (*load_register_reg64)(struct crocus_batch *batch, uint32_t dst,
|
||||
uint32_t src);
|
||||
void (*load_register_imm32)(struct crocus_batch *batch, uint32_t reg,
|
||||
uint32_t val);
|
||||
void (*load_register_imm64)(struct crocus_batch *batch, uint32_t reg,
|
||||
uint64_t val);
|
||||
void (*load_register_mem32)(struct crocus_batch *batch, uint32_t reg,
|
||||
struct crocus_bo *bo, uint32_t offset);
|
||||
void (*load_register_mem64)(struct crocus_batch *batch, uint32_t reg,
|
||||
struct crocus_bo *bo, uint32_t offset);
|
||||
void (*store_register_mem32)(struct crocus_batch *batch, uint32_t reg,
|
||||
struct crocus_bo *bo, uint32_t offset,
|
||||
bool predicated);
|
||||
void (*store_register_mem64)(struct crocus_batch *batch, uint32_t reg,
|
||||
struct crocus_bo *bo, uint32_t offset,
|
||||
bool predicated);
|
||||
void (*store_data_imm32)(struct crocus_batch *batch,
|
||||
struct crocus_bo *bo, uint32_t offset,
|
||||
uint32_t value);
|
||||
void (*store_data_imm64)(struct crocus_batch *batch,
|
||||
struct crocus_bo *bo, uint32_t offset,
|
||||
uint64_t value);
|
||||
void (*copy_mem_mem)(struct crocus_batch *batch,
|
||||
struct crocus_bo *dst_bo, uint32_t dst_offset,
|
||||
struct crocus_bo *src_bo, uint32_t src_offset,
|
||||
unsigned bytes);
|
||||
void (*emit_raw_pipe_control)(struct crocus_batch *batch,
|
||||
const char *reason, uint32_t flags,
|
||||
struct crocus_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
|
||||
void (*emit_mi_report_perf_count)(struct crocus_batch *batch,
|
||||
struct crocus_bo *bo,
|
||||
uint32_t offset_in_bytes,
|
||||
uint32_t report_id);
|
||||
|
||||
uint32_t *(*create_so_decl_list)(const struct pipe_stream_output_info *sol,
|
||||
const struct brw_vue_map *vue_map);
|
||||
void (*populate_vs_key)(const struct crocus_context *ice,
|
||||
const struct shader_info *info,
|
||||
gl_shader_stage last_stage,
|
||||
struct brw_vs_prog_key *key);
|
||||
void (*populate_tcs_key)(const struct crocus_context *ice,
|
||||
struct brw_tcs_prog_key *key);
|
||||
void (*populate_tes_key)(const struct crocus_context *ice,
|
||||
const struct shader_info *info,
|
||||
gl_shader_stage last_stage,
|
||||
struct brw_tes_prog_key *key);
|
||||
void (*populate_gs_key)(const struct crocus_context *ice,
|
||||
const struct shader_info *info,
|
||||
gl_shader_stage last_stage,
|
||||
struct brw_gs_prog_key *key);
|
||||
void (*populate_fs_key)(const struct crocus_context *ice,
|
||||
const struct shader_info *info,
|
||||
struct brw_wm_prog_key *key);
|
||||
void (*populate_cs_key)(const struct crocus_context *ice,
|
||||
struct brw_cs_prog_key *key);
|
||||
void (*lost_genx_state)(struct crocus_context *ice, struct crocus_batch *batch);
|
||||
|
||||
void (*finish_batch)(struct crocus_batch *batch); /* haswell only */
|
||||
|
||||
void (*upload_urb_fence)(struct crocus_batch *batch); /* gen4/5 only */
|
||||
|
||||
bool (*blit_blt)(struct crocus_batch *batch,
|
||||
const struct pipe_blit_info *info);
|
||||
bool (*copy_region_blt)(struct crocus_batch *batch,
|
||||
struct crocus_resource *dst,
|
||||
unsigned dst_level,
|
||||
unsigned dstx, unsigned dsty, unsigned dstz,
|
||||
struct crocus_resource *src,
|
||||
unsigned src_level,
|
||||
const struct pipe_box *src_box);
|
||||
bool (*calculate_urb_fence)(struct crocus_batch *batch, unsigned csize,
|
||||
unsigned vsize, unsigned sfsize);
|
||||
void (*batch_reset_dirty)(struct crocus_batch *batch);
|
||||
unsigned (*translate_prim_type)(enum pipe_prim_type prim, uint8_t verts_per_patch);
|
||||
|
||||
void (*update_so_strides)(struct crocus_context *ice,
|
||||
uint16_t *strides);
|
||||
|
||||
uint32_t (*get_so_offset)(struct pipe_stream_output_target *tgt);
|
||||
};
|
||||
|
||||
struct crocus_screen {
|
||||
struct pipe_screen base;
|
||||
|
||||
uint32_t refcount;
|
||||
|
||||
/** Global slab allocator for crocus_transfer_map objects */
|
||||
struct slab_parent_pool transfer_pool;
|
||||
|
||||
/** drm device file descriptor, shared with bufmgr, do not close. */
|
||||
int fd;
|
||||
|
||||
/**
|
||||
* drm device file descriptor to used for window system integration, owned
|
||||
* by iris_screen, can be a different DRM instance than fd.
|
||||
*/
|
||||
int winsys_fd;
|
||||
|
||||
/** PCI ID for our GPU device */
|
||||
int pci_id;
|
||||
|
||||
bool no_hw;
|
||||
|
||||
struct crocus_vtable vtbl;
|
||||
|
||||
/** Global program_string_id counter (see get_program_string_id()) */
|
||||
unsigned program_id;
|
||||
|
||||
/** Precompile shaders at link time? (Can be disabled for debugging.) */
|
||||
bool precompile;
|
||||
|
||||
/** driconf options and application workarounds */
|
||||
struct {
|
||||
/** Dual color blend by location instead of index (for broken apps) */
|
||||
bool dual_color_blend_by_location;
|
||||
bool disable_throttling;
|
||||
bool always_flush_cache;
|
||||
} driconf;
|
||||
|
||||
unsigned subslice_total;
|
||||
|
||||
uint64_t aperture_bytes;
|
||||
|
||||
struct intel_device_info devinfo;
|
||||
struct isl_device isl_dev;
|
||||
struct crocus_bufmgr *bufmgr;
|
||||
struct brw_compiler *compiler;
|
||||
struct crocus_monitor_config *monitor_cfg;
|
||||
bool has_swizzling;
|
||||
|
||||
const struct intel_l3_config *l3_config_3d;
|
||||
const struct intel_l3_config *l3_config_cs;
|
||||
|
||||
struct disk_cache *disk_cache;
|
||||
};
|
||||
|
||||
struct pipe_screen *
|
||||
crocus_screen_create(int fd, const struct pipe_screen_config *config);
|
||||
|
||||
void crocus_screen_destroy(struct crocus_screen *screen);
|
||||
|
||||
UNUSED static inline struct pipe_screen *
|
||||
crocus_pscreen_ref(struct pipe_screen *pscreen)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *) pscreen;
|
||||
|
||||
p_atomic_inc(&screen->refcount);
|
||||
return pscreen;
|
||||
}
|
||||
|
||||
UNUSED static inline void
|
||||
crocus_pscreen_unref(struct pipe_screen *pscreen)
|
||||
{
|
||||
struct crocus_screen *screen = (struct crocus_screen *) pscreen;
|
||||
|
||||
if (p_atomic_dec_zero(&screen->refcount))
|
||||
crocus_screen_destroy(screen);
|
||||
}
|
||||
|
||||
bool
|
||||
crocus_is_format_supported(struct pipe_screen *pscreen,
|
||||
enum pipe_format format,
|
||||
enum pipe_texture_target target,
|
||||
unsigned sample_count,
|
||||
unsigned storage_sample_count,
|
||||
unsigned usage);
|
||||
|
||||
void crocus_disk_cache_init(struct crocus_screen *screen);
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,16 @@
|
|||
Quick TODO list from what I can see:
|
||||
|
||||
General:
|
||||
Re-emit SURFACE_STATE_BASE_ADDRESS at the top of every batch
|
||||
|
||||
Gen4:
|
||||
rgb32 issue
|
||||
|
||||
Gen5:
|
||||
rgb32 issue
|
||||
|
||||
Gen6:
|
||||
vec4 push constants
|
||||
|
||||
Gen7:
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
// crocus specific driconf options
|
||||
|
||||
DRI_CONF_SECTION_DEBUG
|
||||
DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false)
|
||||
DRI_CONF_DISABLE_THROTTLING(false)
|
||||
DRI_CONF_ALWAYS_FLUSH_CACHE(false)
|
||||
DRI_CONF_SECTION_END
|
||||
|
||||
DRI_CONF_SECTION_PERFORMANCE
|
||||
DRI_CONF_OPT_E(bo_reuse, 1, 0, 1, "Buffer object reuse",)
|
||||
DRI_CONF_SECTION_END
|
|
@ -0,0 +1,190 @@
|
|||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
static inline struct blorp_address
|
||||
dynamic_state_address(struct blorp_batch *blorp_batch, uint32_t offset)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
|
||||
return (struct blorp_address) {
|
||||
.buffer = batch->state.bo,
|
||||
.offset = offset,
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
static inline struct blorp_address
|
||||
instruction_state_address(struct blorp_batch *blorp_batch, uint32_t offset)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
|
||||
return (struct blorp_address) {
|
||||
.buffer = batch->ice->shaders.cache_bo,
|
||||
.offset = offset,
|
||||
};
|
||||
}
|
||||
|
||||
static struct blorp_address
|
||||
blorp_emit_vs_state(struct blorp_batch *blorp_batch)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
|
||||
uint32_t offset;
|
||||
blorp_emit_dynamic(blorp_batch, GENX(VS_STATE), vs, 64, &offset) {
|
||||
vs.Enable = false;
|
||||
vs.URBEntryAllocationSize = batch->ice->urb.vsize - 1;
|
||||
#if GFX_VER == 5
|
||||
vs.NumberofURBEntries = batch->ice->urb.nr_vs_entries >> 2;
|
||||
#else
|
||||
vs.NumberofURBEntries = batch->ice->urb.nr_vs_entries;
|
||||
#endif
|
||||
}
|
||||
|
||||
return dynamic_state_address(blorp_batch, offset);
|
||||
}
|
||||
|
||||
static struct blorp_address
|
||||
blorp_emit_sf_state(struct blorp_batch *blorp_batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
const struct brw_sf_prog_data *prog_data = params->sf_prog_data;
|
||||
|
||||
uint32_t offset;
|
||||
blorp_emit_dynamic(blorp_batch, GENX(SF_STATE), sf, 64, &offset) {
|
||||
#if GFX_VER == 4
|
||||
sf.KernelStartPointer =
|
||||
instruction_state_address(blorp_batch, params->sf_prog_kernel);
|
||||
#else
|
||||
sf.KernelStartPointer = params->sf_prog_kernel;
|
||||
#endif
|
||||
sf.GRFRegisterCount = DIV_ROUND_UP(prog_data->total_grf, 16) - 1;
|
||||
sf.VertexURBEntryReadLength = prog_data->urb_read_length;
|
||||
sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
|
||||
sf.DispatchGRFStartRegisterForURBData = 3;
|
||||
sf.URBEntryAllocationSize = batch->ice->urb.sfsize - 1;
|
||||
sf.NumberofURBEntries = batch->ice->urb.nr_sf_entries;
|
||||
|
||||
#if GFX_VER == 5
|
||||
sf.MaximumNumberofThreads = MIN2(48, batch->ice->urb.nr_sf_entries) - 1;
|
||||
#else
|
||||
sf.MaximumNumberofThreads = MIN2(24, batch->ice->urb.nr_sf_entries) - 1;
|
||||
#endif
|
||||
sf.ViewportTransformEnable = false;
|
||||
|
||||
sf.CullMode = CULLMODE_NONE;
|
||||
}
|
||||
|
||||
return dynamic_state_address(blorp_batch, offset);
|
||||
}
|
||||
|
||||
static struct blorp_address
|
||||
blorp_emit_wm_state(struct blorp_batch *blorp_batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
|
||||
|
||||
uint32_t offset;
|
||||
blorp_emit_dynamic(blorp_batch, GENX(WM_STATE), wm, 64, &offset) {
|
||||
if (params->src.enabled) {
|
||||
/* Iron Lake can't do sampler prefetch */
|
||||
wm.SamplerCount = (GFX_VER != 5);
|
||||
wm.BindingTableEntryCount = 2;
|
||||
uint32_t sampler = blorp_emit_sampler_state(blorp_batch);
|
||||
wm.SamplerStatePointer = dynamic_state_address(blorp_batch, sampler);
|
||||
}
|
||||
|
||||
if (prog_data) {
|
||||
wm.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
prog_data->base.dispatch_grf_start_reg;
|
||||
wm.SetupURBEntryReadLength = prog_data->num_varying_inputs * 2;
|
||||
wm.SetupURBEntryReadOffset = 0;
|
||||
|
||||
wm.DepthCoefficientURBReadOffset = 1;
|
||||
wm.PixelShaderKillsPixel = prog_data->uses_kill;
|
||||
wm.ThreadDispatchEnable = true;
|
||||
wm.EarlyDepthTestEnable = true;
|
||||
|
||||
wm._8PixelDispatchEnable = prog_data->dispatch_8;
|
||||
wm._16PixelDispatchEnable = prog_data->dispatch_16;
|
||||
wm._32PixelDispatchEnable = prog_data->dispatch_32;
|
||||
|
||||
#if GFX_VER == 4
|
||||
wm.KernelStartPointer0 =
|
||||
instruction_state_address(blorp_batch, params->wm_prog_kernel);
|
||||
wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
|
||||
#else
|
||||
wm.KernelStartPointer0 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, wm, 0);
|
||||
wm.KernelStartPointer1 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, wm, 1);
|
||||
wm.KernelStartPointer2 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, wm, 2);
|
||||
wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
|
||||
wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(prog_data, wm, 1);
|
||||
wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(prog_data, wm, 2);
|
||||
#endif
|
||||
}
|
||||
|
||||
wm.MaximumNumberofThreads =
|
||||
blorp_batch->blorp->compiler->devinfo->max_wm_threads - 1;
|
||||
}
|
||||
|
||||
return dynamic_state_address(blorp_batch, offset);
|
||||
}
|
||||
|
||||
static struct blorp_address
|
||||
blorp_emit_color_calc_state(struct blorp_batch *blorp_batch)
|
||||
{
|
||||
uint32_t cc_viewport = blorp_emit_cc_viewport(blorp_batch);
|
||||
|
||||
uint32_t offset;
|
||||
blorp_emit_dynamic(blorp_batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) {
|
||||
cc.CCViewportStatePointer = dynamic_state_address(blorp_batch, cc_viewport);
|
||||
}
|
||||
|
||||
return dynamic_state_address(blorp_batch, offset);
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_emit_pipeline(struct blorp_batch *blorp_batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
struct crocus_batch *batch = blorp_batch->driver_batch;
|
||||
|
||||
emit_urb_config(blorp_batch, params, NULL);
|
||||
|
||||
blorp_emit(blorp_batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) {
|
||||
pp.PointertoVSState = blorp_emit_vs_state(blorp_batch);
|
||||
pp.GSEnable = false;
|
||||
pp.ClipEnable = false;
|
||||
pp.PointertoSFState = blorp_emit_sf_state(blorp_batch, params);
|
||||
pp.PointertoWMState = blorp_emit_wm_state(blorp_batch, params);
|
||||
pp.PointertoColorCalcState = blorp_emit_color_calc_state(blorp_batch);
|
||||
}
|
||||
|
||||
batch->screen->vtbl.upload_urb_fence(batch);
|
||||
|
||||
blorp_emit(blorp_batch, GENX(CS_URB_STATE), curb);
|
||||
blorp_emit(blorp_batch, GENX(CONSTANT_BUFFER), curb);
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
# Copyright © 2017-2019 Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
files_libcrocus = files(
|
||||
'gen4_blorp_exec.h',
|
||||
'driinfo_crocus.h',
|
||||
'crocus_batch.c',
|
||||
'crocus_batch.h',
|
||||
'crocus_blit.c',
|
||||
'crocus_bufmgr.c',
|
||||
'crocus_bufmgr.h',
|
||||
'crocus_clear.c',
|
||||
'crocus_context.c',
|
||||
'crocus_context.h',
|
||||
'crocus_draw.c',
|
||||
'crocus_fence.c',
|
||||
'crocus_fence.h',
|
||||
'crocus_fine_fence.c',
|
||||
'crocus_fine_fence.h',
|
||||
'crocus_formats.c',
|
||||
'crocus_genx_macros.h',
|
||||
'crocus_genx_protos.h',
|
||||
'crocus_monitor.c',
|
||||
'crocus_pipe.h',
|
||||
'crocus_pipe_control.c',
|
||||
'crocus_program.c',
|
||||
'crocus_program_cache.c',
|
||||
'crocus_resolve.c',
|
||||
'crocus_resource.c',
|
||||
'crocus_resource.h',
|
||||
'crocus_screen.c',
|
||||
'crocus_screen.h',
|
||||
'crocus_disk_cache.c',
|
||||
)
|
||||
|
||||
crocus_per_hw_ver_libs = []
|
||||
foreach v : ['40', '45', '50', '60', '70', '75']
|
||||
crocus_per_hw_ver_libs += static_library(
|
||||
'crocus_per_hw_ver@0@'.format(v),
|
||||
['crocus_blorp.c', 'crocus_query.c', 'crocus_state.c', 'crocus_blt.c', gen_xml_pack],
|
||||
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_intel],
|
||||
c_args : [
|
||||
no_override_init_args, c_sse2_args,
|
||||
'-DGFX_VERx10=@0@'.format(v),
|
||||
],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
|
||||
)
|
||||
endforeach
|
||||
|
||||
libcrocus = static_library(
|
||||
'crocus',
|
||||
[files_libcrocus, gen_xml_pack],
|
||||
include_directories : [
|
||||
inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_intel,
|
||||
inc_gallium_drivers,
|
||||
# these should not be necessary, but main/macros.h...
|
||||
inc_mesa, inc_mapi
|
||||
],
|
||||
c_args : [c_sse2_args],
|
||||
cpp_args : [c_sse2_args],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
dependencies : [dep_libdrm, dep_valgrind, idep_genxml, idep_libintel_common, idep_nir_headers],
|
||||
link_with : [
|
||||
crocus_per_hw_ver_libs, libintel_compiler, libintel_dev, libisl,
|
||||
libblorp, libintel_perf
|
||||
],
|
||||
)
|
||||
|
||||
driver_crocus = declare_dependency(
|
||||
compile_args : '-DGALLIUM_CROCUS',
|
||||
link_with : [libcrocus, libcrocuswinsys],
|
||||
)
|
|
@ -129,6 +129,12 @@ if with_gallium_tegra
|
|||
else
|
||||
driver_tegra = declare_dependency()
|
||||
endif
|
||||
if with_gallium_crocus
|
||||
subdir('winsys/crocus/drm')
|
||||
subdir('drivers/crocus')
|
||||
else
|
||||
driver_crocus = declare_dependency()
|
||||
endif
|
||||
if with_gallium_iris
|
||||
subdir('winsys/iris/drm')
|
||||
subdir('drivers/iris')
|
||||
|
|
|
@ -64,7 +64,7 @@ libgallium_nine = shared_library(
|
|||
dep_selinux, dep_libdrm, dep_llvm, dep_thread,
|
||||
idep_xmlconfig, idep_mesautil, idep_nir,
|
||||
driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
|
||||
driver_i915, driver_svga, driver_iris
|
||||
driver_i915, driver_svga, driver_iris, driver_crocus
|
||||
],
|
||||
name_prefix : '',
|
||||
version : '.'.join(nine_version),
|
||||
|
|
|
@ -58,7 +58,7 @@ libgallium_dri = shared_library(
|
|||
driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
|
||||
driver_tegra, driver_i915, driver_svga, driver_virgl,
|
||||
driver_swr, driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12,
|
||||
driver_asahi
|
||||
driver_asahi, driver_crocus
|
||||
],
|
||||
# Will be deleted during installation, see install_megadrivers.py
|
||||
install : true,
|
||||
|
@ -98,6 +98,7 @@ foreach d : [[with_gallium_kmsro, [
|
|||
[with_gallium_panfrost, 'panfrost_dri.so'],
|
||||
[with_gallium_etnaviv, 'etnaviv_dri.so'],
|
||||
[with_gallium_tegra, 'tegra_dri.so'],
|
||||
[with_gallium_crocus, 'crocus_dri.so'],
|
||||
[with_gallium_iris, 'iris_dri.so'],
|
||||
[with_gallium_i915, 'i915_dri.so'],
|
||||
[with_gallium_r300, 'r300_dri.so'],
|
||||
|
|
|
@ -42,6 +42,10 @@ DEFINE_LOADER_DRM_ENTRYPOINT(i915)
|
|||
DEFINE_LOADER_DRM_ENTRYPOINT(iris)
|
||||
#endif
|
||||
|
||||
#if defined(GALLIUM_CROCUS)
|
||||
DEFINE_LOADER_DRM_ENTRYPOINT(crocus)
|
||||
#endif
|
||||
|
||||
#if defined(GALLIUM_NOUVEAU)
|
||||
DEFINE_LOADER_DRM_ENTRYPOINT(nouveau)
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CROCUS_DRM_PUBLIC_H
|
||||
#define CROCUS_DRM_PUBLIC_H
|
||||
|
||||
struct pipe_screen;
|
||||
struct pipe_screen_config;
|
||||
|
||||
struct pipe_screen *
|
||||
crocus_drm_screen_create(int drm_fd, const struct pipe_screen_config *config);
|
||||
|
||||
#endif /* CROCUS_DRM_PUBLIC_H */
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "util/os_file.h"
|
||||
|
||||
#include "crocus_drm_public.h"
|
||||
#include "crocus/crocus_screen.h"
|
||||
|
||||
struct pipe_screen *
|
||||
crocus_drm_screen_create(int fd, const struct pipe_screen_config *config)
|
||||
{
|
||||
int newfd = os_dupfd_cloexec(fd);
|
||||
if (newfd < 0)
|
||||
return NULL;
|
||||
return crocus_screen_create(newfd, config);
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
# Copyright © 2017 Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
libcrocuswinsys = static_library(
|
||||
'crocuswinsys',
|
||||
files('crocus_drm_winsys.c'),
|
||||
include_directories : [
|
||||
inc_src, inc_include,
|
||||
inc_gallium, inc_gallium_aux, inc_gallium_drivers,
|
||||
],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
)
|
|
@ -829,7 +829,7 @@ decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
|
|||
struct intel_field_iterator iter;
|
||||
intel_field_iterator_init(&iter, inst, p, 0, false);
|
||||
while (intel_field_iterator_next(&iter)) {
|
||||
if (str_ends_with(iter.name, "Pointer")) {
|
||||
if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
|
||||
state_offset = iter.raw_value;
|
||||
break;
|
||||
}
|
||||
|
@ -900,6 +900,13 @@ decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
|
|||
decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
|
||||
const uint32_t *p)
|
||||
{
|
||||
decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
|
||||
const uint32_t *p)
|
||||
|
@ -1208,6 +1215,7 @@ struct custom_decoder {
|
|||
{ "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
|
||||
{ "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
|
||||
{ "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
|
||||
{ "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
|
||||
{ "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
|
||||
{ "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
|
||||
{ "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
|
||||
|
|
|
@ -76,6 +76,7 @@ static const struct {
|
|||
{ 0x8086, "i915", i915_chip_ids, ARRAY_SIZE(i915_chip_ids) },
|
||||
{ 0x8086, "i965", i965_chip_ids, ARRAY_SIZE(i965_chip_ids) },
|
||||
{ 0x8086, "iris", NULL, -1, is_kernel_i915 },
|
||||
{ 0x8086, "crocus", NULL, -1, is_kernel_i915 },
|
||||
{ 0x1002, "radeon", r100_chip_ids, ARRAY_SIZE(r100_chip_ids) },
|
||||
{ 0x1002, "r200", r200_chip_ids, ARRAY_SIZE(r200_chip_ids) },
|
||||
{ 0x1002, "r300", r300_chip_ids, ARRAY_SIZE(r300_chip_ids) },
|
||||
|
|
Loading…
Reference in New Issue