crocus: initial gallium driver for Intel gfx 4-7

This is a gallium driver for the Intel gfx 4-7 GPUs.

It was initially cloned from the iris driver by Ilia Mirkin,
then I ported over large reams of code from i965 until it worked.

Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11146>
This commit is contained in:
Dave Airlie 2021-06-01 13:14:51 +10:00
parent 8da92b5c0a
commit f3630548f1
51 changed files with 28508 additions and 6 deletions

View File

@ -231,6 +231,7 @@ with_gallium_v3d = gallium_drivers.contains('v3d')
with_gallium_panfrost = gallium_drivers.contains('panfrost')
with_gallium_etnaviv = gallium_drivers.contains('etnaviv')
with_gallium_tegra = gallium_drivers.contains('tegra')
with_gallium_crocus = gallium_drivers.contains('crocus')
with_gallium_iris = gallium_drivers.contains('iris')
with_gallium_i915 = gallium_drivers.contains('i915')
with_gallium_svga = gallium_drivers.contains('svga')
@ -284,7 +285,7 @@ with_broadcom_vk = _vulkan_drivers.contains('broadcom')
with_any_vk = _vulkan_drivers.length() != 0
with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
with_any_intel = with_dri_i965 or with_intel_vk or with_gallium_iris
with_any_intel = with_dri_i965 or with_intel_vk or with_gallium_iris or with_gallium_crocus
if with_swrast_vk and not with_gallium_softpipe
error('swrast vulkan requires gallium swrast')
@ -795,7 +796,7 @@ if with_gallium_st_nine
error('The nine state tracker requires gallium softpipe/llvmpipe.')
elif not (with_gallium_radeonsi or with_gallium_nouveau or with_gallium_r600
or with_gallium_r300 or with_gallium_svga or with_gallium_i915
or with_gallium_iris)
or with_gallium_iris or with_gallium_crocus)
error('The nine state tracker requires at least one non-swrast gallium driver.')
endif
if not with_dri3

View File

@ -67,7 +67,7 @@ option(
choices : [
'auto', 'kmsro', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno',
'swrast', 'v3d', 'vc4', 'etnaviv', 'tegra', 'i915', 'svga', 'virgl',
'swr', 'panfrost', 'iris', 'lima', 'zink', 'd3d12', 'asahi'
'swr', 'panfrost', 'iris', 'lima', 'zink', 'd3d12', 'asahi', 'crocus'
],
description : 'List of gallium drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
)

View File

@ -70,6 +70,7 @@ static const struct pipe_loader_ops pipe_loader_drm_ops;
static const struct drm_driver_descriptor *driver_descriptors[] = {
&i915_driver_descriptor,
&iris_driver_descriptor,
&crocus_driver_descriptor,
&nouveau_driver_descriptor,
&r300_driver_descriptor,
&r600_driver_descriptor,

View File

@ -112,6 +112,26 @@ DRM_DRIVER_DESCRIPTOR(iris, iris_driconf, ARRAY_SIZE(iris_driconf))
DRM_DRIVER_DESCRIPTOR_STUB(iris)
#endif
#ifdef GALLIUM_CROCUS
#include "crocus/drm/crocus_drm_public.h"
static struct pipe_screen *
pipe_crocus_create_screen(int fd, const struct pipe_screen_config *config)
{
struct pipe_screen *screen;
screen = crocus_drm_screen_create(fd, config);
return screen ? debug_screen_wrap(screen) : NULL;
}
const driOptionDescription crocus_driconf[] = {
#include "crocus/driinfo_crocus.h"
};
DRM_DRIVER_DESCRIPTOR(crocus, crocus_driconf, ARRAY_SIZE(crocus_driconf))
#else
DRM_DRIVER_DESCRIPTOR_STUB(crocus)
#endif
#ifdef GALLIUM_NOUVEAU
#include "nouveau/drm/nouveau_drm_public.h"

View File

@ -6,6 +6,7 @@ struct pipe_screen_config;
extern const struct drm_driver_descriptor i915_driver_descriptor;
extern const struct drm_driver_descriptor iris_driver_descriptor;
extern const struct drm_driver_descriptor crocus_driver_descriptor;
extern const struct drm_driver_descriptor nouveau_driver_descriptor;
extern const struct drm_driver_descriptor r300_driver_descriptor;
extern const struct drm_driver_descriptor r600_driver_descriptor;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,325 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef CROCUS_BATCH_DOT_H
#define CROCUS_BATCH_DOT_H
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include "util/u_dynarray.h"
#include "common/intel_decoder.h"
#include "drm-uapi/i915_drm.h"
#include "crocus_fence.h"
#include "crocus_fine_fence.h"
#include "crocus_bufmgr.h"
/* The kernel assumes batchbuffers are smaller than 256kB. */
#define MAX_BATCH_SIZE (256 * 1024)
/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
* Address, which means that we can't put binding tables beyond 64kB. This
* effectively limits the maximum statebuffer size to 64kB.
*/
#define MAX_STATE_SIZE (64 * 1024)
/* Our target batch size - flush approximately at this point. */
#define BATCH_SZ (20 * 1024)
#define STATE_SZ (16 * 1024)
enum crocus_batch_name {
CROCUS_BATCH_RENDER,
CROCUS_BATCH_COMPUTE,
};
#define CROCUS_BATCH_COUNT 2
struct crocus_address {
struct crocus_bo *bo;
int32_t offset;
uint32_t reloc_flags;
};
struct crocus_reloc_list {
struct drm_i915_gem_relocation_entry *relocs;
int reloc_count;
int reloc_array_size;
};
struct crocus_growing_bo {
struct crocus_bo *bo;
void *map;
void *map_next;
struct crocus_bo *partial_bo;
void *partial_bo_map;
unsigned partial_bytes;
struct crocus_reloc_list relocs;
unsigned used;
};
struct crocus_batch {
struct crocus_context *ice;
struct crocus_screen *screen;
struct pipe_debug_callback *dbg;
struct pipe_device_reset_callback *reset;
/** What batch is this? (e.g. CROCUS_BATCH_RENDER/COMPUTE) */
enum crocus_batch_name name;
/** buffers: command, state */
struct crocus_growing_bo command, state;
/** Size of the primary batch if we've moved on to a secondary. */
unsigned primary_batch_size;
bool state_base_address_emitted;
uint8_t pipe_controls_since_last_cs_stall;
uint32_t hw_ctx_id;
uint32_t valid_reloc_flags;
bool use_shadow_copy;
bool no_wrap;
/** The validation list */
struct drm_i915_gem_exec_object2 *validation_list;
struct crocus_bo **exec_bos;
int exec_count;
int exec_array_size;
/** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
* instruction is a MI_BATCH_BUFFER_END).
*/
bool noop_enabled;
/**
* A list of crocus_syncobjs associated with this batch.
*
* The first list entry will always be a signalling sync-point, indicating
* that this batch has completed. The others are likely to be sync-points
* to wait on before executing the batch.
*/
struct util_dynarray syncobjs;
/** A list of drm_i915_exec_fences to have execbuf signal or wait on */
struct util_dynarray exec_fences;
/** The amount of aperture space (in bytes) used by all exec_bos */
int aperture_space;
struct {
/** Uploader to use for sequence numbers */
struct u_upload_mgr *uploader;
/** GPU buffer and CPU map where our seqno's will be written. */
struct crocus_state_ref ref;
uint32_t *map;
/** The sequence number to write the next time we add a fence. */
uint32_t next;
} fine_fences;
/** A seqno (and syncobj) for the last batch that was submitted. */
struct crocus_fine_fence *last_fence;
/** List of other batches which we might need to flush to use a BO */
struct crocus_batch *other_batches[CROCUS_BATCH_COUNT - 1];
struct {
/**
* Set of struct brw_bo * that have been rendered to within this
* batchbuffer and would need flushing before being used from another
* cache domain that isn't coherent with it (i.e. the sampler).
*/
struct hash_table *render;
/**
* Set of struct brw_bo * that have been used as a depth buffer within
* this batchbuffer and would need flushing before being used from
* another cache domain that isn't coherent with it (i.e. the sampler).
*/
struct set *depth;
} cache;
struct intel_batch_decode_ctx decoder;
struct hash_table_u64 *state_sizes;
/** Have we emitted any draw calls to this batch? */
bool contains_draw;
/** Batch contains fence signal operation. */
bool contains_fence_signal;
};
static inline bool
batch_has_fine_fence(struct crocus_batch *batch)
{
return !!batch->fine_fences.uploader;
}
#define BATCH_HAS_FINE_FENCES(batch) (!!(batch)->fine_fences.uploader)
void crocus_init_batch(struct crocus_context *ctx,
enum crocus_batch_name name,
int priority);
void crocus_batch_free(struct crocus_batch *batch);
void crocus_batch_maybe_flush(struct crocus_batch *batch, unsigned estimate);
void _crocus_batch_flush(struct crocus_batch *batch, const char *file, int line);
#define crocus_batch_flush(batch) _crocus_batch_flush((batch), __FILE__, __LINE__)
bool crocus_batch_references(struct crocus_batch *batch, struct crocus_bo *bo);
bool crocus_batch_prepare_noop(struct crocus_batch *batch, bool noop_enable);
#define RELOC_WRITE EXEC_OBJECT_WRITE
#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT
/* Inverted meaning, but using the same bit...emit_reloc will flip it. */
#define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS
void crocus_use_pinned_bo(struct crocus_batch *batch, struct crocus_bo *bo,
bool writable);
uint64_t crocus_command_reloc(struct crocus_batch *batch, uint32_t batch_offset,
struct crocus_bo *target, uint32_t target_offset,
unsigned int reloc_flags);
uint64_t crocus_state_reloc(struct crocus_batch *batch, uint32_t batch_offset,
struct crocus_bo *target, uint32_t target_offset,
unsigned int reloc_flags);
enum pipe_reset_status crocus_batch_check_for_reset(struct crocus_batch *batch);
void crocus_grow_buffer(struct crocus_batch *batch, bool grow_state,
unsigned used, unsigned new_size);
static inline unsigned
crocus_batch_bytes_used(struct crocus_batch *batch)
{
return batch->command.map_next - batch->command.map;
}
/**
* Ensure the current command buffer has \param size bytes of space
* remaining. If not, this creates a secondary batch buffer and emits
* a jump from the primary batch to the start of the secondary.
*
* Most callers want crocus_get_command_space() instead.
*/
static inline void
crocus_require_command_space(struct crocus_batch *batch, unsigned size)
{
const unsigned required_bytes = crocus_batch_bytes_used(batch) + size;
unsigned used = crocus_batch_bytes_used(batch);
if (required_bytes >= BATCH_SZ && !batch->no_wrap) {
crocus_batch_flush(batch);
} else if (used + size >= batch->command.bo->size) {
const unsigned new_size =
MIN2(batch->command.bo->size + batch->command.bo->size / 2,
MAX_BATCH_SIZE);
crocus_grow_buffer(batch, false, used, new_size);
batch->command.map_next = (void *)batch->command.map + used;
assert(crocus_batch_bytes_used(batch) + size < batch->command.bo->size);
}
}
/**
* Allocate space in the current command buffer, and return a pointer
* to the mapped area so the caller can write commands there.
*
* This should be called whenever emitting commands.
*/
static inline void *
crocus_get_command_space(struct crocus_batch *batch, unsigned bytes)
{
crocus_require_command_space(batch, bytes);
void *map = batch->command.map_next;
batch->command.map_next += bytes;
return map;
}
/**
* Helper to emit GPU commands - allocates space, copies them there.
*/
static inline void
crocus_batch_emit(struct crocus_batch *batch, const void *data, unsigned size)
{
void *map = crocus_get_command_space(batch, size);
memcpy(map, data, size);
}
/**
* Get a pointer to the batch's signalling syncobj. Does not refcount.
*/
static inline struct crocus_syncobj *
crocus_batch_get_signal_syncobj(struct crocus_batch *batch)
{
/* The signalling syncobj is the first one in the list. */
struct crocus_syncobj *syncobj =
((struct crocus_syncobj **)util_dynarray_begin(&batch->syncobjs))[0];
return syncobj;
}
/**
* Take a reference to the batch's signalling syncobj.
*
* Callers can use this to wait for the the current batch under construction
* to complete (after flushing it).
*/
static inline void
crocus_batch_reference_signal_syncobj(struct crocus_batch *batch,
struct crocus_syncobj **out_syncobj)
{
struct crocus_syncobj *syncobj = crocus_batch_get_signal_syncobj(batch);
crocus_syncobj_reference(batch->screen, out_syncobj, syncobj);
}
/**
* Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
*/
static inline void
crocus_record_state_size(struct hash_table_u64 *ht, uint32_t offset_from_base,
uint32_t size)
{
if (ht) {
_mesa_hash_table_u64_insert(ht, offset_from_base,
(void *)(uintptr_t)size);
}
}
static inline bool
crocus_ptr_in_state_buffer(struct crocus_batch *batch, void *p)
{
return (char *)p >= (char *)batch->state.map &&
(char *)p < (char *)batch->state.map + batch->state.bo->size;
}
static inline void
crocus_require_statebuffer_space(struct crocus_batch *batch, int size)
{
if (batch->state.used + size >= STATE_SZ)
crocus_batch_flush(batch);
}
#endif

View File

@ -0,0 +1,836 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <stdio.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_surface.h"
#include "util/ralloc.h"
#include "intel/blorp/blorp.h"
#include "crocus_context.h"
#include "crocus_resource.h"
#include "crocus_screen.h"
void crocus_blitter_begin(struct crocus_context *ice, enum crocus_blitter_op op, bool render_cond)
{
util_blitter_save_vertex_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_VERTEX]);
util_blitter_save_tessctrl_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL]);
util_blitter_save_tesseval_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]);
util_blitter_save_geometry_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]);
util_blitter_save_so_targets(ice->blitter, ice->state.so_targets,
(struct pipe_stream_output_target**)ice->state.so_target);
util_blitter_save_vertex_buffer_slot(ice->blitter, ice->state.vertex_buffers);
util_blitter_save_vertex_elements(ice->blitter, (void *)ice->state.cso_vertex_elements);
if (op & CROCUS_SAVE_FRAGMENT_STATE) {
util_blitter_save_blend(ice->blitter, ice->state.cso_blend);
util_blitter_save_depth_stencil_alpha(ice->blitter, ice->state.cso_zsa);
util_blitter_save_stencil_ref(ice->blitter, &ice->state.stencil_ref);
util_blitter_save_fragment_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]);
util_blitter_save_sample_mask(ice->blitter, ice->state.sample_mask);
util_blitter_save_rasterizer(ice->blitter, ice->state.cso_rast);
util_blitter_save_scissor(ice->blitter, &ice->state.scissors[0]);
util_blitter_save_viewport(ice->blitter, &ice->state.viewports[0]);
util_blitter_save_fragment_constant_buffer_slot(ice->blitter, &ice->state.shaders[MESA_SHADER_FRAGMENT].constbufs[0]);
}
if (!render_cond)
util_blitter_save_render_condition(ice->blitter,
(struct pipe_query *)ice->condition.query,
ice->condition.condition,
ice->condition.mode);
// util_blitter_save_scissor(ice->blitter, &ice->scissors[0]);
if (op & CROCUS_SAVE_FRAMEBUFFER)
util_blitter_save_framebuffer(ice->blitter, &ice->state.framebuffer);
if (op & CROCUS_SAVE_TEXTURES) {
util_blitter_save_fragment_sampler_states(ice->blitter, 1, (void **)ice->state.shaders[MESA_SHADER_FRAGMENT].samplers);
util_blitter_save_fragment_sampler_views(ice->blitter, 1, (struct pipe_sampler_view **)ice->state.shaders[MESA_SHADER_FRAGMENT].textures);
}
}
/**
* Helper function for handling mirror image blits.
*
* If coord0 > coord1, swap them and return "true" (mirrored).
*/
static bool
apply_mirror(float *coord0, float *coord1)
{
if (*coord0 > *coord1) {
float tmp = *coord0;
*coord0 = *coord1;
*coord1 = tmp;
return true;
}
return false;
}
/**
* Compute the number of pixels to clip for each side of a rect
*
* \param x0 The rect's left coordinate
* \param y0 The rect's bottom coordinate
* \param x1 The rect's right coordinate
* \param y1 The rect's top coordinate
* \param min_x The clipping region's left coordinate
* \param min_y The clipping region's bottom coordinate
* \param max_x The clipping region's right coordinate
* \param max_y The clipping region's top coordinate
* \param clipped_x0 The number of pixels to clip from the left side
* \param clipped_y0 The number of pixels to clip from the bottom side
* \param clipped_x1 The number of pixels to clip from the right side
* \param clipped_y1 The number of pixels to clip from the top side
*
* \return false if we clip everything away, true otherwise
*/
static inline bool
compute_pixels_clipped(float x0, float y0, float x1, float y1,
float min_x, float min_y, float max_x, float max_y,
float *clipped_x0, float *clipped_y0,
float *clipped_x1, float *clipped_y1)
{
/* If we are going to clip everything away, stop. */
if (!(min_x <= max_x &&
min_y <= max_y &&
x0 <= max_x &&
y0 <= max_y &&
min_x <= x1 &&
min_y <= y1 &&
x0 <= x1 &&
y0 <= y1)) {
return false;
}
if (x0 < min_x)
*clipped_x0 = min_x - x0;
else
*clipped_x0 = 0;
if (max_x < x1)
*clipped_x1 = x1 - max_x;
else
*clipped_x1 = 0;
if (y0 < min_y)
*clipped_y0 = min_y - y0;
else
*clipped_y0 = 0;
if (max_y < y1)
*clipped_y1 = y1 - max_y;
else
*clipped_y1 = 0;
return true;
}
/**
* Clips a coordinate (left, right, top or bottom) for the src or dst rect
* (whichever requires the largest clip) and adjusts the coordinate
* for the other rect accordingly.
*
* \param mirror true if mirroring is required
* \param src the source rect coordinate (for example src_x0)
* \param dst0 the dst rect coordinate (for example dst_x0)
* \param dst1 the opposite dst rect coordinate (for example dst_x1)
* \param clipped_dst0 number of pixels to clip from the dst coordinate
* \param clipped_dst1 number of pixels to clip from the opposite dst coordinate
* \param scale the src vs dst scale involved for that coordinate
* \param is_left_or_bottom true if we are clipping the left or bottom sides
* of the rect.
*/
static void
clip_coordinates(bool mirror,
float *src, float *dst0, float *dst1,
float clipped_dst0,
float clipped_dst1,
float scale,
bool is_left_or_bottom)
{
/* When clipping we need to add or subtract pixels from the original
* coordinates depending on whether we are acting on the left/bottom
* or right/top sides of the rect respectively. We assume we have to
* add them in the code below, and multiply by -1 when we should
* subtract.
*/
int mult = is_left_or_bottom ? 1 : -1;
if (!mirror) {
*dst0 += clipped_dst0 * mult;
*src += clipped_dst0 * scale * mult;
} else {
*dst1 -= clipped_dst1 * mult;
*src += clipped_dst1 * scale * mult;
}
}
/**
* Apply a scissor rectangle to blit coordinates.
*
* Returns true if the blit was entirely scissored away.
*/
static bool
apply_blit_scissor(const struct pipe_scissor_state *scissor,
float *src_x0, float *src_y0,
float *src_x1, float *src_y1,
float *dst_x0, float *dst_y0,
float *dst_x1, float *dst_y1,
bool mirror_x, bool mirror_y)
{
float clip_dst_x0, clip_dst_x1, clip_dst_y0, clip_dst_y1;
/* Compute number of pixels to scissor away. */
if (!compute_pixels_clipped(*dst_x0, *dst_y0, *dst_x1, *dst_y1,
scissor->minx, scissor->miny,
scissor->maxx, scissor->maxy,
&clip_dst_x0, &clip_dst_y0,
&clip_dst_x1, &clip_dst_y1))
return true;
// XXX: comments assume source clipping, which we don't do
/* When clipping any of the two rects we need to adjust the coordinates
* in the other rect considering the scaling factor involved. To obtain
* the best precision we want to make sure that we only clip once per
* side to avoid accumulating errors due to the scaling adjustment.
*
* For example, if src_x0 and dst_x0 need both to be clipped we want to
* avoid the situation where we clip src_x0 first, then adjust dst_x0
* accordingly but then we realize that the resulting dst_x0 still needs
* to be clipped, so we clip dst_x0 and adjust src_x0 again. Because we are
* applying scaling factors to adjust the coordinates in each clipping
* pass we lose some precision and that can affect the results of the
* blorp blit operation slightly. What we want to do here is detect the
* rect that we should clip first for each side so that when we adjust
* the other rect we ensure the resulting coordinate does not need to be
* clipped again.
*
* The code below implements this by comparing the number of pixels that
* we need to clip for each side of both rects considering the scales
* involved. For example, clip_src_x0 represents the number of pixels
* to be clipped for the src rect's left side, so if clip_src_x0 = 5,
* clip_dst_x0 = 4 and scale_x = 2 it means that we are clipping more
* from the dst rect so we should clip dst_x0 only and adjust src_x0.
* This is because clipping 4 pixels in the dst is equivalent to
* clipping 4 * 2 = 8 > 5 in the src.
*/
if (*src_x0 == *src_x1 || *src_y0 == *src_y1
|| *dst_x0 == *dst_x1 || *dst_y0 == *dst_y1)
return true;
float scale_x = (float) (*src_x1 - *src_x0) / (*dst_x1 - *dst_x0);
float scale_y = (float) (*src_y1 - *src_y0) / (*dst_y1 - *dst_y0);
/* Clip left side */
clip_coordinates(mirror_x, src_x0, dst_x0, dst_x1,
clip_dst_x0, clip_dst_x1, scale_x, true);
/* Clip right side */
clip_coordinates(mirror_x, src_x1, dst_x1, dst_x0,
clip_dst_x1, clip_dst_x0, scale_x, false);
/* Clip bottom side */
clip_coordinates(mirror_y, src_y0, dst_y0, dst_y1,
clip_dst_y0, clip_dst_y1, scale_y, true);
/* Clip top side */
clip_coordinates(mirror_y, src_y1, dst_y1, dst_y0,
clip_dst_y1, clip_dst_y0, scale_y, false);
/* Check for invalid bounds
* Can't blit for 0-dimensions
*/
return *src_x0 == *src_x1 || *src_y0 == *src_y1
|| *dst_x0 == *dst_x1 || *dst_y0 == *dst_y1;
}
void
crocus_blorp_surf_for_resource(struct crocus_vtable *vtbl,
struct isl_device *isl_dev,
struct blorp_surf *surf,
struct pipe_resource *p_res,
enum isl_aux_usage aux_usage,
unsigned level,
bool is_render_target)
{
struct crocus_resource *res = (void *) p_res;
assert(!crocus_resource_unfinished_aux_import(res));
if (isl_aux_usage_has_hiz(aux_usage) &&
!crocus_resource_level_has_hiz(res, level))
aux_usage = ISL_AUX_USAGE_NONE;
*surf = (struct blorp_surf) {
.surf = &res->surf,
.addr = (struct blorp_address) {
.buffer = res->bo,
.offset = res->offset,
.reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
.mocs = crocus_mocs(res->bo, isl_dev),
},
.aux_usage = aux_usage,
};
if (aux_usage != ISL_AUX_USAGE_NONE) {
surf->aux_surf = &res->aux.surf;
surf->aux_addr = (struct blorp_address) {
.buffer = res->aux.bo,
.offset = res->aux.offset,
.reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
.mocs = crocus_mocs(res->bo, isl_dev),
};
surf->clear_color =
crocus_resource_get_clear_color(res);
}
}
static void
tex_cache_flush_hack(struct crocus_batch *batch,
enum isl_format view_format,
enum isl_format surf_format)
{
/* The WaSamplerCacheFlushBetweenRedescribedSurfaceReads workaround says:
*
* "Currently Sampler assumes that a surface would not have two
* different format associate with it. It will not properly cache
* the different views in the MT cache, causing a data corruption."
*
* We may need to handle this for texture views in general someday, but
* for now we handle it here, as it hurts copies and blits particularly
* badly because they ofter reinterpret formats.
*
* If the BO hasn't been referenced yet this batch, we assume that the
* texture cache doesn't contain any relevant data nor need flushing.
*
* Icelake (Gen11+) claims to fix this issue, but seems to still have
* issues with ASTC formats.
*/
bool need_flush = view_format != surf_format;
if (!need_flush)
return;
const char *reason =
"workaround: WaSamplerCacheFlushBetweenRedescribedSurfaceReads";
crocus_emit_pipe_control_flush(batch, reason, PIPE_CONTROL_CS_STALL);
crocus_emit_pipe_control_flush(batch, reason,
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
}
static struct crocus_resource *
crocus_resource_for_aspect(const struct intel_device_info *devinfo,
struct pipe_resource *p_res, unsigned pipe_mask)
{
if (pipe_mask == PIPE_MASK_S) {
struct crocus_resource *junk, *s_res;
crocus_get_depth_stencil_resources(devinfo, p_res, &junk, &s_res);
return s_res;
} else {
return (struct crocus_resource *)p_res;
}
}
static enum pipe_format
pipe_format_for_aspect(enum pipe_format format, unsigned pipe_mask)
{
if (pipe_mask == PIPE_MASK_S) {
return util_format_stencil_only(format);
} else if (pipe_mask == PIPE_MASK_Z) {
return util_format_get_depth_only(format);
} else {
return format;
}
}
static void
crocus_u_blitter(struct crocus_context *ice,
const struct pipe_blit_info *info)
{
struct pipe_blit_info dinfo = *info;
if (!util_format_has_alpha(dinfo.dst.resource->format))
dinfo.mask &= ~PIPE_MASK_A;
crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);
util_blitter_blit(ice->blitter, &dinfo);
}
/**
* The pipe->blit() driver hook.
*
* This performs a blit between two surfaces, which copies data but may
* also perform format conversion, scaling, flipping, and so on.
*/
static void
crocus_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
{
struct crocus_context *ice = (void *) ctx;
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
const struct intel_device_info *devinfo = &screen->devinfo;
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
enum blorp_batch_flags blorp_flags = 0;
/* We don't support color masking. */
assert((info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA ||
(info->mask & PIPE_MASK_RGBA) == 0);
if (info->render_condition_enable)
if (!crocus_check_conditional_render(ice))
return;
if (devinfo->ver <= 5) {
if (!screen->vtbl.blit_blt(batch, info)) {
if (!util_format_is_depth_or_stencil(info->src.resource->format) &&
info->dst.resource->target != PIPE_TEXTURE_3D)
goto use_blorp;
if (!util_blitter_is_blit_supported(ice->blitter, info)) {
if (util_format_is_depth_or_stencil(info->src.resource->format)) {
struct pipe_blit_info depth_blit = *info;
depth_blit.mask = PIPE_MASK_Z;
crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);
util_blitter_blit(ice->blitter, &depth_blit);
struct pipe_surface *dst_view, dst_templ;
util_blitter_default_dst_texture(&dst_templ, info->dst.resource, info->dst.level, info->dst.box.z);
dst_view = ctx->create_surface(ctx, info->dst.resource, &dst_templ);
crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);
util_blitter_clear_depth_stencil(ice->blitter, dst_view, PIPE_CLEAR_STENCIL,
0, 0, info->dst.box.x, info->dst.box.y,
info->dst.box.width, info->dst.box.height);
crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);
util_blitter_stencil_fallback(ice->blitter,
info->dst.resource,
info->dst.level,
&info->dst.box,
info->src.resource,
info->src.level,
&info->src.box, NULL);
}
return;
}
crocus_u_blitter(ice, info);
}
return;
}
if (devinfo->ver == 6) {
if (info->src.resource->target == PIPE_TEXTURE_3D &&
info->dst.resource->target == PIPE_TEXTURE_3D) {
crocus_u_blitter(ice, info);
return;
}
}
use_blorp:
if (info->render_condition_enable) {
if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT)
blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;
}
float src_x0 = info->src.box.x;
float src_x1 = info->src.box.x + info->src.box.width;
float src_y0 = info->src.box.y;
float src_y1 = info->src.box.y + info->src.box.height;
float dst_x0 = info->dst.box.x;
float dst_x1 = info->dst.box.x + info->dst.box.width;
float dst_y0 = info->dst.box.y;
float dst_y1 = info->dst.box.y + info->dst.box.height;
bool mirror_x = apply_mirror(&src_x0, &src_x1);
bool mirror_y = apply_mirror(&src_y0, &src_y1);
enum blorp_filter filter;
if (info->scissor_enable) {
bool noop = apply_blit_scissor(&info->scissor,
&src_x0, &src_y0, &src_x1, &src_y1,
&dst_x0, &dst_y0, &dst_x1, &dst_y1,
mirror_x, mirror_y);
if (noop)
return;
}
if (abs(info->dst.box.width) == abs(info->src.box.width) &&
abs(info->dst.box.height) == abs(info->src.box.height)) {
if (info->src.resource->nr_samples > 1 &&
info->dst.resource->nr_samples <= 1) {
/* The OpenGL ES 3.2 specification, section 16.2.1, says:
*
* "If the read framebuffer is multisampled (its effective
* value of SAMPLE_BUFFERS is one) and the draw framebuffer
* is not (its value of SAMPLE_BUFFERS is zero), the samples
* corresponding to each pixel location in the source are
* converted to a single sample before being written to the
* destination. The filter parameter is ignored. If the
* source formats are integer types or stencil values, a
* single samples value is selected for each pixel. If the
* source formats are floating-point or normalized types,
* the sample values for each pixel are resolved in an
* implementation-dependent manner. If the source formats
* are depth values, sample values are resolved in an
* implementation-dependent manner where the result will be
* between the minimum and maximum depth values in the pixel."
*
* When selecting a single sample, we always choose sample 0.
*/
if (util_format_is_depth_or_stencil(info->src.format) ||
util_format_is_pure_integer(info->src.format)) {
filter = BLORP_FILTER_SAMPLE_0;
} else {
filter = BLORP_FILTER_AVERAGE;
}
} else {
/* The OpenGL 4.6 specification, section 18.3.1, says:
*
* "If the source and destination dimensions are identical,
* no filtering is applied."
*
* Using BLORP_FILTER_NONE will also handle the upsample case by
* replicating the one value in the source to all values in the
* destination.
*/
filter = BLORP_FILTER_NONE;
}
} else if (info->filter == PIPE_TEX_FILTER_LINEAR) {
filter = BLORP_FILTER_BILINEAR;
} else {
filter = BLORP_FILTER_NEAREST;
}
struct blorp_batch blorp_batch;
blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
float src_z_step = (float)info->src.box.depth / (float)info->dst.box.depth;
/* There is no interpolation to the pixel center during rendering, so
* add the 0.5 offset ourselves here.
*/
float depth_center_offset = 0;
if (info->src.resource->target == PIPE_TEXTURE_3D)
depth_center_offset = 0.5 / info->dst.box.depth * info->src.box.depth;
/* Perform a blit for each aspect requested by the caller. PIPE_MASK_R is
* used to represent the color aspect. */
unsigned aspect_mask = info->mask & (PIPE_MASK_R | PIPE_MASK_ZS);
while (aspect_mask) {
unsigned aspect = 1 << u_bit_scan(&aspect_mask);
struct crocus_resource *src_res =
crocus_resource_for_aspect(devinfo, info->src.resource, aspect);
struct crocus_resource *dst_res =
crocus_resource_for_aspect(devinfo, info->dst.resource, aspect);
enum pipe_format src_pfmt =
pipe_format_for_aspect(info->src.format, aspect);
enum pipe_format dst_pfmt =
pipe_format_for_aspect(info->dst.format, aspect);
if (crocus_resource_unfinished_aux_import(src_res))
crocus_resource_finish_aux_import(ctx->screen, src_res);
if (crocus_resource_unfinished_aux_import(dst_res))
crocus_resource_finish_aux_import(ctx->screen, dst_res);
struct crocus_format_info src_fmt =
crocus_format_for_usage(devinfo, src_pfmt, ISL_SURF_USAGE_TEXTURE_BIT);
enum isl_aux_usage src_aux_usage =
crocus_resource_texture_aux_usage(src_res);
crocus_resource_prepare_texture(ice, src_res, src_fmt.fmt,
info->src.level, 1, info->src.box.z,
info->src.box.depth);
// crocus_emit_buffer_barrier_for(batch, src_res->bo,
// CROCUS_DOMAIN_OTHER_READ);
struct crocus_format_info dst_fmt =
crocus_format_for_usage(devinfo, dst_pfmt,
ISL_SURF_USAGE_RENDER_TARGET_BIT);
enum isl_aux_usage dst_aux_usage =
crocus_resource_render_aux_usage(ice, dst_res, info->dst.level,
dst_fmt.fmt, false);
struct blorp_surf src_surf, dst_surf;
crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &src_surf,
&src_res->base, src_aux_usage,
info->src.level, false);
crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &dst_surf,
&dst_res->base, dst_aux_usage,
info->dst.level, true);
crocus_resource_prepare_render(ice, dst_res, info->dst.level,
info->dst.box.z, info->dst.box.depth,
dst_aux_usage);
// crocus_emit_buffer_barrier_for(batch, dst_res->bo,
// CROCUS_DOMAIN_RENDER_WRITE);
if (crocus_batch_references(batch, src_res->bo))
tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format);
if (dst_res->base.target == PIPE_BUFFER) {
util_range_add(&dst_res->base, &dst_res->valid_buffer_range,
dst_x0, dst_x1);
}
struct isl_swizzle src_swiz = pipe_to_isl_swizzles(src_fmt.swizzles);
struct isl_swizzle dst_swiz = pipe_to_isl_swizzles(dst_fmt.swizzles);
for (int slice = 0; slice < info->dst.box.depth; slice++) {
unsigned dst_z = info->dst.box.z + slice;
float src_z = info->src.box.z + slice * src_z_step +
depth_center_offset;
crocus_batch_maybe_flush(batch, 1500);
blorp_blit(&blorp_batch,
&src_surf, info->src.level, src_z,
src_fmt.fmt, src_swiz,
&dst_surf, info->dst.level, dst_z,
dst_fmt.fmt, dst_swiz,
src_x0, src_y0, src_x1, src_y1,
dst_x0, dst_y0, dst_x1, dst_y1,
filter, mirror_x, mirror_y);
}
tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format);
crocus_resource_finish_render(ice, dst_res, info->dst.level,
info->dst.box.z, info->dst.box.depth,
dst_aux_usage);
}
blorp_batch_finish(&blorp_batch);
crocus_flush_and_dirty_for_history(ice, batch, (struct crocus_resource *)
info->dst.resource,
PIPE_CONTROL_RENDER_TARGET_FLUSH,
"cache history: post-blit");
}
static void
get_copy_region_aux_settings(struct crocus_resource *res,
enum isl_aux_usage *out_aux_usage,
bool is_render_target)
{
switch (res->aux.usage) {
case ISL_AUX_USAGE_MCS:
/* A stencil resolve operation must be performed prior to doing resource
* copies or used by CPU.
* (see HSD 1209978162)
*/
if (is_render_target && isl_surf_usage_is_stencil(res->surf.usage)) {
*out_aux_usage = ISL_AUX_USAGE_NONE;
} else {
*out_aux_usage = res->aux.usage;
}
break;
default:
*out_aux_usage = ISL_AUX_USAGE_NONE;
break;
}
}
/**
* Perform a GPU-based raw memory copy between compatible view classes.
*
* Does not perform any flushing - the new data may still be left in the
* render cache, and old data may remain in other caches.
*
* Wraps blorp_copy() and blorp_buffer_copy().
*/
void
crocus_copy_region(struct blorp_context *blorp,
struct crocus_batch *batch,
struct pipe_resource *dst,
unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
unsigned src_level,
const struct pipe_box *src_box)
{
struct blorp_batch blorp_batch;
struct crocus_context *ice = blorp->driver_ctx;
struct crocus_screen *screen = (void *) ice->ctx.screen;
const struct intel_device_info *devinfo = &screen->devinfo;
struct crocus_resource *src_res = (void *) src;
struct crocus_resource *dst_res = (void *) dst;
if (devinfo->ver <= 5) {
if (screen->vtbl.copy_region_blt(batch, dst_res,
dst_level, dstx, dsty, dstz,
src_res, src_level, src_box))
return;
}
enum isl_aux_usage src_aux_usage, dst_aux_usage;
get_copy_region_aux_settings(src_res, &src_aux_usage,
false);
get_copy_region_aux_settings(dst_res, &dst_aux_usage,
true);
if (crocus_batch_references(batch, src_res->bo))
tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format);
if (dst->target == PIPE_BUFFER)
util_range_add(&dst_res->base, &dst_res->valid_buffer_range, dstx, dstx + src_box->width);
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
struct blorp_address src_addr = {
.buffer = crocus_resource_bo(src), .offset = src_box->x,
};
struct blorp_address dst_addr = {
.buffer = crocus_resource_bo(dst), .offset = dstx,
.reloc_flags = EXEC_OBJECT_WRITE,
};
crocus_batch_maybe_flush(batch, 1500);
blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
blorp_buffer_copy(&blorp_batch, src_addr, dst_addr, src_box->width);
blorp_batch_finish(&blorp_batch);
} else {
// XXX: what about one surface being a buffer and not the other?
struct blorp_surf src_surf, dst_surf;
crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &src_surf,
src, src_aux_usage, src_level, false);
crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &dst_surf,
dst, dst_aux_usage, dst_level, true);
crocus_resource_prepare_access(ice, src_res, src_level, 1,
src_box->z, src_box->depth,
src_aux_usage, false);
crocus_resource_prepare_access(ice, dst_res, dst_level, 1,
dstz, src_box->depth,
dst_aux_usage, false);
blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
for (int slice = 0; slice < src_box->depth; slice++) {
crocus_batch_maybe_flush(batch, 1500);
blorp_copy(&blorp_batch, &src_surf, src_level, src_box->z + slice,
&dst_surf, dst_level, dstz + slice,
src_box->x, src_box->y, dstx, dsty,
src_box->width, src_box->height);
}
blorp_batch_finish(&blorp_batch);
crocus_resource_finish_write(ice, dst_res, dst_level, dstz,
src_box->depth, dst_aux_usage);
}
tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format);
}
static struct crocus_batch *
get_preferred_batch(struct crocus_context *ice, struct crocus_bo *bo)
{
/* If the compute batch is already using this buffer, we'd prefer to
* continue queueing in the compute batch.
*/
if (crocus_batch_references(&ice->batches[CROCUS_BATCH_COMPUTE], bo))
return &ice->batches[CROCUS_BATCH_COMPUTE];
/* Otherwise default to the render batch. */
return &ice->batches[CROCUS_BATCH_RENDER];
}
/**
* The pipe->resource_copy_region() driver hook.
*
* This implements ARB_copy_image semantics - a raw memory copy between
* compatible view classes.
*/
static void
crocus_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *p_dst,
unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *p_src,
unsigned src_level,
const struct pipe_box *src_box)
{
struct crocus_context *ice = (void *) ctx;
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
const struct intel_device_info *devinfo = &screen->devinfo;
struct crocus_resource *src = (void *) p_src;
struct crocus_resource *dst = (void *) p_dst;
if (crocus_resource_unfinished_aux_import(src))
crocus_resource_finish_aux_import(ctx->screen, src);
if (crocus_resource_unfinished_aux_import(dst))
crocus_resource_finish_aux_import(ctx->screen, dst);
/* Use MI_COPY_MEM_MEM for tiny (<= 16 byte, % 4) buffer copies. */
if (p_src->target == PIPE_BUFFER && p_dst->target == PIPE_BUFFER &&
(src_box->width % 4 == 0) && src_box->width <= 16 &&
screen->vtbl.copy_mem_mem) {
struct crocus_bo *dst_bo = crocus_resource_bo(p_dst);
batch = get_preferred_batch(ice, dst_bo);
crocus_batch_maybe_flush(batch, 24 + 5 * (src_box->width / 4));
crocus_emit_pipe_control_flush(batch,
"stall for MI_COPY_MEM_MEM copy_region",
PIPE_CONTROL_CS_STALL);
screen->vtbl.copy_mem_mem(batch, dst_bo, dstx, crocus_resource_bo(p_src),
src_box->x, src_box->width);
return;
}
if (devinfo->ver < 6 && util_format_is_depth_or_stencil(p_dst->format)) {
util_resource_copy_region(ctx, p_dst, dst_level, dstx, dsty, dstz,
p_src, src_level, src_box);
return;
}
crocus_copy_region(&ice->blorp, batch, p_dst, dst_level, dstx, dsty, dstz,
p_src, src_level, src_box);
if (util_format_is_depth_and_stencil(p_dst->format) &&
util_format_has_stencil(util_format_description(p_src->format)) &&
devinfo->ver >= 6) {
struct crocus_resource *junk, *s_src_res, *s_dst_res;
crocus_get_depth_stencil_resources(devinfo, p_src, &junk, &s_src_res);
crocus_get_depth_stencil_resources(devinfo, p_dst, &junk, &s_dst_res);
crocus_copy_region(&ice->blorp, batch, &s_dst_res->base, dst_level, dstx,
dsty, dstz, &s_src_res->base, src_level, src_box);
}
crocus_flush_and_dirty_for_history(ice, batch, dst,
PIPE_CONTROL_RENDER_TARGET_FLUSH,
"cache history: post copy_region");
}
void
crocus_init_blit_functions(struct pipe_context *ctx)
{
ctx->blit = crocus_blit;
ctx->resource_copy_region = crocus_resource_copy_region;
}

View File

@ -0,0 +1,399 @@
/*
* Copyright © 2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* @file crocus_blorp.c
*
* ============================= GENXML CODE =============================
* [This file is compiled once per generation.]
* =======================================================================
*
* GenX specific code for working with BLORP (blitting, resolves, clears
* on the 3D engine). This provides the driver-specific hooks needed to
* implement the BLORP API.
*
* See crocus_blit.c, crocus_clear.c, and so on.
*/
#include <assert.h>
#include "crocus_batch.h"
#include "crocus_resource.h"
#include "crocus_context.h"
#include "util/u_upload_mgr.h"
#include "intel/common/intel_l3_config.h"
#include "blorp/blorp_genX_exec.h"
#if GFX_VER <= 5
#include "gen4_blorp_exec.h"
#endif
static uint32_t *
stream_state(struct crocus_batch *batch,
unsigned size,
unsigned alignment,
uint32_t *out_offset,
struct crocus_bo **out_bo)
{
uint32_t offset = ALIGN(batch->state.used, alignment);
if (offset + size >= STATE_SZ && !batch->no_wrap) {
crocus_batch_flush(batch);
offset = ALIGN(batch->state.used, alignment);
} else if (offset + size >= batch->state.bo->size) {
const unsigned new_size =
MIN2(batch->state.bo->size + batch->state.bo->size / 2,
MAX_STATE_SIZE);
crocus_grow_buffer(batch, true, batch->state.used, new_size);
assert(offset + size < batch->state.bo->size);
}
crocus_record_state_size(batch->state_sizes, offset, size);
batch->state.used = offset + size;
*out_offset = offset;
/* If the caller has asked for a BO, we leave them the responsibility of
* adding bo->gtt_offset (say, by handing an address to genxml). If not,
* we assume they want the offset from a base address.
*/
if (out_bo)
*out_bo = batch->state.bo;
return (uint32_t *)batch->state.map + (offset >> 2);
}
static void *
blorp_emit_dwords(struct blorp_batch *blorp_batch, unsigned n)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
return crocus_get_command_space(batch, n * sizeof(uint32_t));
}
static uint64_t
blorp_emit_reloc(struct blorp_batch *blorp_batch, UNUSED void *location,
struct blorp_address addr, uint32_t delta)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
uint32_t offset;
if (GFX_VER < 6 && crocus_ptr_in_state_buffer(batch, location)) {
offset = (char *)location - (char *)batch->state.map;
return crocus_state_reloc(batch, offset,
addr.buffer, addr.offset + delta,
addr.reloc_flags);
}
assert(!crocus_ptr_in_state_buffer(batch, location));
offset = (char *)location - (char *)batch->command.map;
return crocus_command_reloc(batch, offset,
addr.buffer, addr.offset + delta,
addr.reloc_flags);
}
static void
blorp_surface_reloc(struct blorp_batch *blorp_batch, uint32_t ss_offset,
struct blorp_address addr, uint32_t delta)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
struct crocus_bo *bo = addr.buffer;
uint64_t reloc_val =
crocus_state_reloc(batch, ss_offset, bo, addr.offset + delta,
addr.reloc_flags);
void *reloc_ptr = (void *)batch->state.map + ss_offset;
*(uint32_t *)reloc_ptr = reloc_val;
}
static uint64_t
blorp_get_surface_address(struct blorp_batch *blorp_batch,
struct blorp_address addr)
{
/* We'll let blorp_surface_reloc write the address. */
return 0ull;
}
#if GFX_VER >= 7
static struct blorp_address
blorp_get_surface_base_address(struct blorp_batch *blorp_batch)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
return (struct blorp_address) {
.buffer = batch->state.bo,
.offset = 0
};
}
#endif
static void *
blorp_alloc_dynamic_state(struct blorp_batch *blorp_batch,
uint32_t size,
uint32_t alignment,
uint32_t *offset)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
return stream_state(batch, size, alignment, offset, NULL);
}
static void
blorp_alloc_binding_table(struct blorp_batch *blorp_batch,
unsigned num_entries,
unsigned state_size,
unsigned state_alignment,
uint32_t *bt_offset,
uint32_t *surface_offsets,
void **surface_maps)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
uint32_t *bt_map = stream_state(batch, num_entries * sizeof(uint32_t), 32,
bt_offset, NULL);
for (unsigned i = 0; i < num_entries; i++) {
surface_maps[i] = stream_state(batch,
state_size, state_alignment,
&(surface_offsets)[i], NULL);
bt_map[i] = surface_offsets[i];
}
}
static void *
blorp_alloc_vertex_buffer(struct blorp_batch *blorp_batch,
uint32_t size,
struct blorp_address *addr)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
struct crocus_bo *bo;
uint32_t offset;
void *map = stream_state(batch, size, 64,
&offset, &bo);
*addr = (struct blorp_address) {
.buffer = bo,
.offset = offset,
.reloc_flags = RELOC_32BIT,
#if GFX_VER >= 7
.mocs = crocus_mocs(bo, &batch->screen->isl_dev),
#endif
};
return map;
}
/**
*/
static void
blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch,
const struct blorp_address *addrs,
UNUSED uint32_t *sizes,
unsigned num_vbs)
{
}
static struct blorp_address
blorp_get_workaround_address(struct blorp_batch *blorp_batch)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
return (struct blorp_address) {
.buffer = batch->ice->workaround_bo,
.offset = batch->ice->workaround_offset,
};
}
static void
blorp_flush_range(UNUSED struct blorp_batch *blorp_batch,
UNUSED void *start,
UNUSED size_t size)
{
/* All allocated states come from the batch which we will flush before we
* submit it. There's nothing for us to do here.
*/
}
#if GFX_VER >= 7
static const struct intel_l3_config *
blorp_get_l3_config(struct blorp_batch *blorp_batch)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
return batch->screen->l3_config_3d;
}
#else /* GFX_VER < 7 */
static void
blorp_emit_urb_config(struct blorp_batch *blorp_batch,
unsigned vs_entry_size,
UNUSED unsigned sf_entry_size)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
#if GFX_VER <= 5
batch->screen->vtbl.calculate_urb_fence(batch, 0, vs_entry_size, sf_entry_size);
#else
genX(upload_urb)(batch, vs_entry_size, false, vs_entry_size);
#endif
}
#endif
static void
crocus_blorp_exec(struct blorp_batch *blorp_batch,
const struct blorp_params *params)
{
struct crocus_context *ice = blorp_batch->blorp->driver_ctx;
struct crocus_batch *batch = blorp_batch->driver_batch;
/* Flush the sampler and render caches. We definitely need to flush the
* sampler cache so that we get updated contents from the render cache for
* the glBlitFramebuffer() source. Also, we are sometimes warned in the
* docs to flush the cache between reinterpretations of the same surface
* data with different formats, which blorp does for stencil and depth
* data.
*/
if (params->src.enabled)
crocus_cache_flush_for_read(batch, params->src.addr.buffer);
if (params->dst.enabled) {
crocus_cache_flush_for_render(batch, params->dst.addr.buffer,
params->dst.view.format,
params->dst.aux_usage);
}
if (params->depth.enabled)
crocus_cache_flush_for_depth(batch, params->depth.addr.buffer);
if (params->stencil.enabled)
crocus_cache_flush_for_depth(batch, params->stencil.addr.buffer);
crocus_require_command_space(batch, 1400);
crocus_require_statebuffer_space(batch, 600);
batch->no_wrap = true;
#if GFX_VER == 6
/* Emit workaround flushes when we switch from drawing to blorping. */
crocus_emit_post_sync_nonzero_flush(batch);
#endif
#if GFX_VER >= 6
crocus_emit_depth_stall_flushes(batch);
#endif
blorp_emit(blorp_batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
}
batch->screen->vtbl.update_surface_base_address(batch);
crocus_handle_always_flush_cache(batch);
batch->contains_draw = true;
blorp_exec(blorp_batch, params);
batch->no_wrap = false;
crocus_handle_always_flush_cache(batch);
/* We've smashed all state compared to what the normal 3D pipeline
* rendering tracks for GL.
*/
uint64_t skip_bits = (CROCUS_DIRTY_POLYGON_STIPPLE |
CROCUS_DIRTY_GEN7_SO_BUFFERS |
CROCUS_DIRTY_SO_DECL_LIST |
CROCUS_DIRTY_LINE_STIPPLE |
CROCUS_ALL_DIRTY_FOR_COMPUTE |
CROCUS_DIRTY_GEN6_SCISSOR_RECT |
CROCUS_DIRTY_GEN75_VF |
CROCUS_DIRTY_SF_CL_VIEWPORT);
uint64_t skip_stage_bits = (CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE |
CROCUS_STAGE_DIRTY_UNCOMPILED_VS |
CROCUS_STAGE_DIRTY_UNCOMPILED_TCS |
CROCUS_STAGE_DIRTY_UNCOMPILED_TES |
CROCUS_STAGE_DIRTY_UNCOMPILED_GS |
CROCUS_STAGE_DIRTY_UNCOMPILED_FS |
CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS |
CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS |
CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES |
CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS);
if (!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) {
/* BLORP disabled tessellation, that's fine for the next draw */
skip_stage_bits |= CROCUS_STAGE_DIRTY_TCS |
CROCUS_STAGE_DIRTY_TES |
CROCUS_STAGE_DIRTY_CONSTANTS_TCS |
CROCUS_STAGE_DIRTY_CONSTANTS_TES |
CROCUS_STAGE_DIRTY_BINDINGS_TCS |
CROCUS_STAGE_DIRTY_BINDINGS_TES;
}
if (!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) {
/* BLORP disabled geometry shaders, that's fine for the next draw */
skip_stage_bits |= CROCUS_STAGE_DIRTY_GS |
CROCUS_STAGE_DIRTY_CONSTANTS_GS |
CROCUS_STAGE_DIRTY_BINDINGS_GS;
}
/* we can skip flagging CROCUS_DIRTY_DEPTH_BUFFER, if
* BLORP_BATCH_NO_EMIT_DEPTH_STENCIL is set.
*/
if (blorp_batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL)
skip_bits |= CROCUS_DIRTY_DEPTH_BUFFER;
if (!params->wm_prog_data)
skip_bits |= CROCUS_DIRTY_GEN6_BLEND_STATE;
ice->state.dirty |= ~skip_bits;
ice->state.stage_dirty |= ~skip_stage_bits;
ice->urb.vsize = 0;
ice->urb.gs_present = false;
ice->urb.gsize = 0;
ice->urb.tess_present = false;
ice->urb.hsize = 0;
ice->urb.dsize = 0;
if (params->dst.enabled) {
crocus_render_cache_add_bo(batch, params->dst.addr.buffer,
params->dst.view.format,
params->dst.aux_usage);
}
if (params->depth.enabled)
crocus_depth_cache_add_bo(batch, params->depth.addr.buffer);
if (params->stencil.enabled)
crocus_depth_cache_add_bo(batch, params->stencil.addr.buffer);
}
static void
blorp_measure_start(struct blorp_batch *blorp_batch,
const struct blorp_params *params)
{
}
void
genX(init_blorp)(struct crocus_context *ice)
{
struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
blorp_init(&ice->blorp, ice, &screen->isl_dev);
ice->blorp.compiler = screen->compiler;
ice->blorp.lookup_shader = crocus_blorp_lookup_shader;
ice->blorp.upload_shader = crocus_blorp_upload_shader;
ice->blorp.exec = crocus_blorp_exec;
}

View File

@ -0,0 +1,337 @@
/*
* Copyright © 2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/* blt command encoding for gen4/5 */
#include "crocus_context.h"
#include "crocus_genx_macros.h"
#include "crocus_genx_protos.h"
#include "crocus_resource.h"
#define FILE_DEBUG_FLAG DEBUG_BLIT
#if GFX_VER <= 5
static bool validate_blit_for_blt(struct crocus_batch *batch,
const struct pipe_blit_info *info)
{
/* If the source and destination are the same size with no mirroring,
* the rectangles are within the size of the texture and there is no
* scissor, then we can probably use the blit engine.
*/
if (info->dst.box.width != info->src.box.width ||
info->dst.box.height != info->src.box.height)
return false;
if (info->scissor_enable)
return false;
if (info->dst.box.height < 0 || info->src.box.height < 0)
return false;
if (info->dst.box.depth > 1 || info->src.box.depth > 1)
return false;
return true;
}
static inline int crocus_resource_blt_pitch(struct crocus_resource *res)
{
int pitch = res->surf.row_pitch_B;
if (res->surf.tiling != ISL_TILING_LINEAR)
pitch /= 4;
return pitch;
}
static uint32_t
color_depth_for_cpp(int cpp)
{
switch (cpp) {
case 4: return COLOR_DEPTH__32bit;
case 2: return COLOR_DEPTH__565;
case 1: return COLOR_DEPTH__8bit;
default:
unreachable("not reached");
}
}
static bool emit_copy_blt(struct crocus_batch *batch,
struct crocus_resource *src,
struct crocus_resource *dst,
unsigned cpp,
int32_t src_pitch,
unsigned src_offset,
int32_t dst_pitch,
unsigned dst_offset,
uint16_t src_x, uint16_t src_y,
uint16_t dst_x, uint16_t dst_y,
uint16_t w, uint16_t h)
{
uint32_t src_tile_w, src_tile_h;
uint32_t dst_tile_w, dst_tile_h;
int dst_y2 = dst_y + h;
int dst_x2 = dst_x + w;
DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
__func__,
src, src_pitch, src_offset, src_x, src_y,
dst, dst_pitch, dst_offset, dst_x, dst_y, w, h);
isl_get_tile_dims(src->surf.tiling, cpp, &src_tile_w, &src_tile_h);
isl_get_tile_dims(dst->surf.tiling, cpp, &dst_tile_w, &dst_tile_h);
/* For Tiled surfaces, the pitch has to be a multiple of the Tile width
* (X direction width of the Tile). This is ensured while allocating the
* buffer object.
*/
assert(src->surf.tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
assert(dst->surf.tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);
/* For big formats (such as floating point), do the copy using 16 or
* 32bpp and multiply the coordinates.
*/
if (cpp > 4) {
if (cpp % 4 == 2) {
dst_x *= cpp / 2;
dst_x2 *= cpp / 2;
src_x *= cpp / 2;
cpp = 2;
} else {
assert(cpp % 4 == 0);
dst_x *= cpp / 4;
dst_x2 *= cpp / 4;
src_x *= cpp / 4;
cpp = 4;
}
}
/* For tiled source and destination, pitch value should be specified
* as a number of Dwords.
*/
if (dst->surf.tiling != ISL_TILING_LINEAR)
dst_pitch /= 4;
if (src->surf.tiling != ISL_TILING_LINEAR)
src_pitch /= 4;
assert(cpp <= 4);
crocus_emit_cmd(batch, GENX(XY_SRC_COPY_BLT), xyblt) {
xyblt.RasterOperation = 0xCC;
xyblt.DestinationTilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;
xyblt.SourceTilingEnable = src->surf.tiling != ISL_TILING_LINEAR;
xyblt.SourceBaseAddress = ro_bo(src->bo, src_offset);
xyblt.DestinationBaseAddress = rw_bo(dst->bo, dst_offset);
xyblt.ColorDepth = color_depth_for_cpp(cpp);
xyblt._32bppByteMask = cpp == 4 ? 0x3 : 0x1;
xyblt.DestinationX1Coordinate = dst_x;
xyblt.DestinationY1Coordinate = dst_y;
xyblt.DestinationX2Coordinate = dst_x2;
xyblt.DestinationY2Coordinate = dst_y2;
xyblt.DestinationPitch = dst_pitch;
xyblt.SourceX1Coordinate = src_x;
xyblt.SourceY1Coordinate = src_y;
xyblt.SourcePitch = src_pitch;
};
crocus_emit_mi_flush(batch);
return true;
}
static bool crocus_emit_blt(struct crocus_batch *batch,
struct crocus_resource *src,
struct crocus_resource *dst,
unsigned dst_level,
unsigned dst_x, unsigned dst_y,
unsigned dst_z,
unsigned src_level,
const struct pipe_box *src_box)
{
const struct isl_format_layout *src_fmtl = isl_format_get_layout(src->surf.format);
unsigned src_cpp = src_fmtl->bpb / 8;
const struct isl_format_layout *dst_fmtl = isl_format_get_layout(dst->surf.format);
const unsigned dst_cpp = dst_fmtl->bpb / 8;
uint16_t src_x, src_y;
uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
uint32_t src_width = src_box->width, src_height = src_box->height;
/* gen4/5 can't handle Y tiled blits. */
if (src->surf.tiling == ISL_TILING_Y0 || dst->surf.tiling == ISL_TILING_Y0)
return false;
if (src->surf.format != dst->surf.format)
return false;
if (src_cpp != dst_cpp)
return false;
src_x = src_box->x;
src_y = src_box->y;
assert(src_cpp == dst_cpp);
crocus_resource_get_image_offset(src, src_level, src_box->z, &src_image_x,
&src_image_y);
if (util_format_is_compressed(src->base.format)) {
int bw = util_format_get_blockwidth(src->base.format);
int bh = util_format_get_blockheight(src->base.format);
assert(src_x % bw == 0);
assert(src_y % bh == 0);
src_x /= (int)bw;
src_y /= (int)bh;
src_width = DIV_ROUND_UP(src_width, (int)bw);
src_height = DIV_ROUND_UP(src_height, (int)bh);
}
crocus_resource_get_image_offset(dst, dst_level, dst_z, &dst_image_x,
&dst_image_y);
if (util_format_is_compressed(dst->base.format)) {
int bw = util_format_get_blockwidth(dst->base.format);
int bh = util_format_get_blockheight(dst->base.format);
assert(dst_x % bw == 0);
assert(dst_y % bh == 0);
dst_x /= (int)bw;
dst_y /= (int)bh;
}
src_x += src_image_x;
src_y += src_image_y;
dst_x += dst_image_x;
dst_y += dst_image_y;
/* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
* Data Size Limitations):
*
* The BLT engine is capable of transferring very large quantities of
* graphics data. Any graphics data read from and written to the
* destination is permitted to represent a number of pixels that
* occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
* at the destination. The maximum number of pixels that may be
* represented per scan lines worth of graphics data depends on the
* color depth.
*
* The blitter's pitch is a signed 16-bit integer, but measured in bytes
* for linear surfaces and DWords for tiled surfaces. So the maximum
* pitch is 32k linear and 128k tiled.
*/
if (crocus_resource_blt_pitch(src) >= 32768 ||
crocus_resource_blt_pitch(dst) >= 32768) {
return false;
}
/* We need to split the blit into chunks that each fit within the blitter's
* restrictions. We can't use a chunk size of 32768 because we need to
* ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
* a nice round power of two, big enough that performance won't suffer, and
* small enough to guarantee everything fits.
*/
const uint32_t max_chunk_size = 16384;
for (uint32_t chunk_x = 0; chunk_x < src_width; chunk_x += max_chunk_size) {
for (uint32_t chunk_y = 0; chunk_y < src_height; chunk_y += max_chunk_size) {
const uint32_t chunk_w = MIN2(max_chunk_size, src_width - chunk_x);
const uint32_t chunk_h = MIN2(max_chunk_size, src_height - chunk_y);
ASSERTED uint32_t z_offset_el, array_offset;
uint32_t src_offset, src_tile_x, src_tile_y;
isl_tiling_get_intratile_offset_el(src->surf.tiling,
src_cpp * 8, src->surf.row_pitch_B,
src->surf.array_pitch_el_rows,
src_x + chunk_x, src_y + chunk_y, 0, 0,
&src_offset,
&src_tile_x, &src_tile_y,
&z_offset_el, &array_offset);
assert(z_offset_el == 0);
assert(array_offset == 0);
uint32_t dst_offset, dst_tile_x, dst_tile_y;
isl_tiling_get_intratile_offset_el(dst->surf.tiling,
dst_cpp * 8, dst->surf.row_pitch_B,
dst->surf.array_pitch_el_rows,
dst_x + chunk_x, dst_y + chunk_y, 0, 0,
&dst_offset,
&dst_tile_x, &dst_tile_y,
&z_offset_el, &array_offset);
assert(z_offset_el == 0);
assert(array_offset == 0);
if (!emit_copy_blt(batch, src, dst,
src_cpp, src->surf.row_pitch_B,
src_offset,
dst->surf.row_pitch_B, dst_offset,
src_tile_x, src_tile_y,
dst_tile_x, dst_tile_y,
chunk_w, chunk_h)) {
return false;
}
}
}
return true;
}
static bool crocus_blit_blt(struct crocus_batch *batch,
const struct pipe_blit_info *info)
{
if (!validate_blit_for_blt(batch, info))
return false;
return crocus_emit_blt(batch,
(struct crocus_resource *)info->src.resource,
(struct crocus_resource *)info->dst.resource,
info->dst.level,
info->dst.box.x,
info->dst.box.y,
info->dst.box.z,
info->src.level,
&info->src.box);
}
static bool crocus_copy_region_blt(struct crocus_batch *batch,
struct crocus_resource *dst,
unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct crocus_resource *src,
unsigned src_level,
const struct pipe_box *src_box)
{
if (dst->base.target == PIPE_BUFFER || src->base.target == PIPE_BUFFER)
return false;
return crocus_emit_blt(batch,
src,
dst,
dst_level,
dstx, dsty, dstz,
src_level,
src_box);
}
#endif
void
genX(init_blt)(struct crocus_screen *screen)
{
#if GFX_VER <= 5
screen->vtbl.blit_blt = crocus_blit_blt;
screen->vtbl.copy_region_blt = crocus_copy_region_blt;
#else
screen->vtbl.blit_blt = NULL;
screen->vtbl.copy_region_blt = NULL;
#endif
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,331 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef CROCUS_BUFMGR_H
#define CROCUS_BUFMGR_H
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/types.h>
#include "util/macros.h"
#include "util/u_atomic.h"
#include "util/list.h"
#include "pipe/p_defines.h"
struct crocus_batch;
struct intel_device_info;
struct pipe_debug_callback;
#define CROCUS_BINDER_SIZE (64 * 1024)
#define CROCUS_MAX_BINDERS 100
struct crocus_bo {
/**
* Size in bytes of the buffer object.
*
* The size may be larger than the size originally requested for the
* allocation, such as being aligned to page size.
*/
uint64_t size;
/** Buffer manager context associated with this buffer object */
struct crocus_bufmgr *bufmgr;
/** The GEM handle for this buffer object. */
uint32_t gem_handle;
/**
* Virtual address of the buffer inside the PPGTT (Per-Process Graphics
* Translation Table).
*
* Although each hardware context has its own VMA, we assign BO's to the
* same address in all contexts, for simplicity.
*/
uint64_t gtt_offset;
/**
* The validation list index for this buffer, or -1 when not in a batch.
* Note that a single buffer may be in multiple batches (contexts), and
* this is a global field, which refers to the last batch using the BO.
* It should not be considered authoritative, but can be used to avoid a
* linear walk of the validation list in the common case by guessing that
* exec_bos[bo->index] == bo and confirming whether that's the case.
*
* XXX: this is not ideal now that we have more than one batch per context,
* XXX: as the index will flop back and forth between the render index and
* XXX: compute index...
*/
unsigned index;
/**
* Boolean of whether the GPU is definitely not accessing the buffer.
*
* This is only valid when reusable, since non-reusable
* buffers are those that have been shared with other
* processes, so we don't know their state.
*/
bool idle;
int refcount;
const char *name;
uint64_t kflags;
/**
* Kenel-assigned global name for this object
*
* List contains both flink named and prime fd'd objects
*/
unsigned global_name;
/**
* Current tiling mode
*/
uint32_t tiling_mode;
uint32_t swizzle_mode;
uint32_t stride;
time_t free_time;
/** Mapped address for the buffer, saved across map/unmap cycles */
void *map_cpu;
/** GTT virtual address for the buffer, saved across map/unmap cycles */
void *map_gtt;
/** WC CPU address for the buffer, saved across map/unmap cycles */
void *map_wc;
/** BO cache list */
struct list_head head;
/** List of GEM handle exports of this buffer (bo_export) */
struct list_head exports;
/**
* Boolean of whether this buffer can be re-used
*/
bool reusable;
/**
* Boolean of whether this buffer has been shared with an external client.
*/
bool external;
/**
* Boolean of whether this buffer is cache coherent
*/
bool cache_coherent;
/**
* Boolean of whether this buffer points into user memory
*/
bool userptr;
/** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
uint32_t hash;
};
#define BO_ALLOC_ZEROED (1 << 0)
#define BO_ALLOC_COHERENT (1 << 1)
/**
* Allocate a buffer object.
*
* Buffer objects are not necessarily initially mapped into CPU virtual
* address space or graphics device aperture. They must be mapped
* using crocus_bo_map() to be used by the CPU.
*/
struct crocus_bo *crocus_bo_alloc(struct crocus_bufmgr *bufmgr,
const char *name, uint64_t size);
/**
* Allocate a tiled buffer object.
*
* Alignment for tiled objects is set automatically; the 'flags'
* argument provides a hint about how the object will be used initially.
*
* Valid tiling formats are:
* I915_TILING_NONE
* I915_TILING_X
* I915_TILING_Y
*/
struct crocus_bo *crocus_bo_alloc_tiled(struct crocus_bufmgr *bufmgr,
const char *name, uint64_t size,
uint32_t alignment,
uint32_t tiling_mode, uint32_t pitch,
unsigned flags);
struct crocus_bo *crocus_bo_create_userptr(struct crocus_bufmgr *bufmgr,
const char *name, void *ptr,
size_t size);
/** Takes a reference on a buffer object */
static inline void
crocus_bo_reference(struct crocus_bo *bo)
{
p_atomic_inc(&bo->refcount);
}
/**
* Releases a reference on a buffer object, freeing the data if
* no references remain.
*/
void crocus_bo_unreference(struct crocus_bo *bo);
#define MAP_READ PIPE_MAP_READ
#define MAP_WRITE PIPE_MAP_WRITE
#define MAP_ASYNC PIPE_MAP_UNSYNCHRONIZED
#define MAP_PERSISTENT PIPE_MAP_PERSISTENT
#define MAP_COHERENT PIPE_MAP_COHERENT
/* internal */
#define MAP_INTERNAL_MASK (0xff << 24)
#define MAP_RAW (0x01 << 24)
#define MAP_FLAGS (MAP_READ | MAP_WRITE | MAP_ASYNC | \
MAP_PERSISTENT | MAP_COHERENT | MAP_INTERNAL_MASK)
/**
* Maps the buffer into userspace.
*
* This function will block waiting for any existing execution on the
* buffer to complete, first. The resulting mapping is returned.
*/
MUST_CHECK void *crocus_bo_map(struct pipe_debug_callback *dbg,
struct crocus_bo *bo, unsigned flags);
/**
* Reduces the refcount on the userspace mapping of the buffer
* object.
*/
static inline int crocus_bo_unmap(struct crocus_bo *bo) { return 0; }
/**
* Waits for rendering to an object by the GPU to have completed.
*
* This is not required for any access to the BO by bo_map,
* bo_subdata, etc. It is merely a way for the driver to implement
* glFinish.
*/
void crocus_bo_wait_rendering(struct crocus_bo *bo);
/**
* Unref a buffer manager instance.
*/
void crocus_bufmgr_unref(struct crocus_bufmgr *bufmgr);
/**
* Get the current tiling (and resulting swizzling) mode for the bo.
*
* \param buf Buffer to get tiling mode for
* \param tiling_mode returned tiling mode
* \param swizzle_mode returned swizzling mode
*/
int crocus_bo_get_tiling(struct crocus_bo *bo, uint32_t *tiling_mode,
uint32_t *swizzle_mode);
/**
* Create a visible name for a buffer which can be used by other apps
*
* \param buf Buffer to create a name for
* \param name Returned name
*/
int crocus_bo_flink(struct crocus_bo *bo, uint32_t *name);
/**
* Is this buffer shared with external clients (exported)?
*/
static inline bool
crocus_bo_is_external(const struct crocus_bo *bo)
{
return bo->external;
}
/**
* Returns 1 if mapping the buffer for write could cause the process
* to block, due to the object being active in the GPU.
*/
int crocus_bo_busy(struct crocus_bo *bo);
/**
* Specify the volatility of the buffer.
* \param bo Buffer to create a name for
* \param madv The purgeable status
*
* Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
* reclaimed under memory pressure. If you subsequently require the buffer,
* then you must pass I915_MADV_WILLNEED to mark the buffer as required.
*
* Returns 1 if the buffer was retained, or 0 if it was discarded whilst
* marked as I915_MADV_DONTNEED.
*/
int crocus_bo_madvise(struct crocus_bo *bo, int madv);
/* drm_bacon_bufmgr_gem.c */
struct crocus_bufmgr *
crocus_bufmgr_get_for_fd(struct intel_device_info *devinfo, int fd,
bool bo_reuse);
int crocus_bufmgr_get_fd(struct crocus_bufmgr *bufmgr);
struct crocus_bo *crocus_bo_gem_create_from_name(struct crocus_bufmgr *bufmgr,
const char *name,
unsigned handle);
int crocus_bo_wait(struct crocus_bo *bo, int64_t timeout_ns);
uint32_t crocus_create_hw_context(struct crocus_bufmgr *bufmgr);
uint32_t crocus_clone_hw_context(struct crocus_bufmgr *bufmgr, uint32_t ctx_id);
#define CROCUS_CONTEXT_LOW_PRIORITY ((I915_CONTEXT_MIN_USER_PRIORITY - 1) / 2)
#define CROCUS_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
#define CROCUS_CONTEXT_HIGH_PRIORITY ((I915_CONTEXT_MAX_USER_PRIORITY + 1) / 2)
int crocus_hw_context_set_priority(struct crocus_bufmgr *bufmgr,
uint32_t ctx_id, int priority);
void crocus_destroy_hw_context(struct crocus_bufmgr *bufmgr, uint32_t ctx_id);
int crocus_bo_export_dmabuf(struct crocus_bo *bo, int *prime_fd);
struct crocus_bo *crocus_bo_import_dmabuf(struct crocus_bufmgr *bufmgr,
int prime_fd, uint32_t tiling,
uint32_t stride);
/**
* Exports a bo as a GEM handle into a given DRM file descriptor
* \param bo Buffer to export
* \param drm_fd File descriptor where the new handle is created
* \param out_handle Pointer to store the new handle
*
* Returns 0 if the buffer was successfully exported, a non zero error code
* otherwise.
*/
int crocus_bo_export_gem_handle_for_device(struct crocus_bo *bo, int drm_fd,
uint32_t *out_handle);
uint32_t crocus_bo_export_gem_handle(struct crocus_bo *bo);
int crocus_reg_read(struct crocus_bufmgr *bufmgr, uint32_t offset,
uint64_t *out);
int drm_ioctl(int fd, unsigned long request, void *arg);
#endif /* CROCUS_BUFMGR_H */

View File

@ -0,0 +1,859 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <stdio.h>
#include <errno.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "util/u_inlines.h"
#include "util/u_surface.h"
#include "util/format/u_format.h"
#include "util/u_upload_mgr.h"
#include "util/ralloc.h"
#include "crocus_context.h"
#include "crocus_resource.h"
#include "crocus_screen.h"
#include "intel/compiler/brw_compiler.h"
#include "util/format_srgb.h"
static bool
crocus_is_color_fast_clear_compatible(struct crocus_context *ice,
enum isl_format format,
const union isl_color_value color)
{
if (isl_format_has_int_channel(format)) {
perf_debug(&ice->dbg, "Integer fast clear not enabled for %s",
isl_format_get_name(format));
return false;
}
for (int i = 0; i < 4; i++) {
if (!isl_format_has_color_component(format, i)) {
continue;
}
if (color.f32[i] != 0.0f && color.f32[i] != 1.0f) {
return false;
}
}
return true;
}
static bool
can_fast_clear_color(struct crocus_context *ice,
struct pipe_resource *p_res,
unsigned level,
const struct pipe_box *box,
bool render_condition_enabled,
enum isl_format format,
enum isl_format render_format,
union isl_color_value color)
{
struct crocus_resource *res = (void *) p_res;
if (INTEL_DEBUG & DEBUG_NO_FAST_CLEAR)
return false;
if (!isl_aux_usage_has_fast_clears(res->aux.usage))
return false;
/* Check for partial clear */
if (box->x > 0 || box->y > 0 ||
box->width < minify(p_res->width0, level) ||
box->height < minify(p_res->height0, level)) {
return false;
}
/* Avoid conditional fast clears to maintain correct tracking of the aux
* state (see iris_resource_finish_write for more info). Note that partial
* fast clears (if they existed) would not pose a problem with conditional
* rendering.
*/
if (render_condition_enabled &&
ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
return false;
}
/* We store clear colors as floats or uints as needed. If there are
* texture views in play, the formats will not properly be respected
* during resolves because the resolve operations only know about the
* resource and not the renderbuffer.
*/
if (isl_format_srgb_to_linear(render_format) !=
isl_format_srgb_to_linear(format)) {
return false;
}
/* XXX: if (irb->mt->supports_fast_clear)
* see intel_miptree_create_for_dri_image()
*/
if (!crocus_is_color_fast_clear_compatible(ice, format, color))
return false;
return true;
}
static union isl_color_value
convert_fast_clear_color(struct crocus_context *ice,
struct crocus_resource *res,
enum isl_format render_format,
const union isl_color_value color)
{
union isl_color_value override_color = color;
struct pipe_resource *p_res = (void *) res;
const enum pipe_format format = p_res->format;
const struct util_format_description *desc =
util_format_description(format);
unsigned colormask = util_format_colormask(desc);
if (util_format_is_intensity(format) ||
util_format_is_luminance(format) ||
util_format_is_luminance_alpha(format)) {
override_color.u32[1] = override_color.u32[0];
override_color.u32[2] = override_color.u32[0];
if (util_format_is_intensity(format))
override_color.u32[3] = override_color.u32[0];
} else {
for (int chan = 0; chan < 3; chan++) {
if (!(colormask & (1 << chan)))
override_color.u32[chan] = 0;
}
}
if (util_format_is_unorm(format)) {
for (int i = 0; i < 4; i++)
override_color.f32[i] = CLAMP(override_color.f32[i], 0.0f, 1.0f);
} else if (util_format_is_snorm(format)) {
for (int i = 0; i < 4; i++)
override_color.f32[i] = CLAMP(override_color.f32[i], -1.0f, 1.0f);
} else if (util_format_is_pure_uint(format)) {
for (int i = 0; i < 4; i++) {
unsigned bits = util_format_get_component_bits(
format, UTIL_FORMAT_COLORSPACE_RGB, i);
if (bits < 32) {
uint32_t max = (1u << bits) - 1;
override_color.u32[i] = MIN2(override_color.u32[i], max);
}
}
} else if (util_format_is_pure_sint(format)) {
for (int i = 0; i < 4; i++) {
unsigned bits = util_format_get_component_bits(
format, UTIL_FORMAT_COLORSPACE_RGB, i);
if (bits < 32) {
int32_t max = (1 << (bits - 1)) - 1;
int32_t min = -(1 << (bits - 1));
override_color.i32[i] = CLAMP(override_color.i32[i], min, max);
}
}
} else if (format == PIPE_FORMAT_R11G11B10_FLOAT ||
format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
/* these packed float formats only store unsigned values */
for (int i = 0; i < 4; i++)
override_color.f32[i] = MAX2(override_color.f32[i], 0.0f);
}
if (!(colormask & 1 << 3)) {
if (util_format_is_pure_integer(format))
override_color.u32[3] = 1;
else
override_color.f32[3] = 1.0f;
}
/* Handle linear to SRGB conversion */
if (isl_format_is_srgb(render_format)) {
for (int i = 0; i < 3; i++) {
override_color.f32[i] =
util_format_linear_to_srgb_float(override_color.f32[i]);
}
}
return override_color;
}
static void
fast_clear_color(struct crocus_context *ice,
struct crocus_resource *res,
unsigned level,
const struct pipe_box *box,
enum isl_format format,
union isl_color_value color,
enum blorp_batch_flags blorp_flags)
{
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
struct crocus_screen *screen = batch->screen;
struct pipe_resource *p_res = (void *) res;
color = convert_fast_clear_color(ice, res, format, color);
bool color_changed = !!memcmp(&res->aux.clear_color, &color,
sizeof(color));
if (color_changed) {
/* If we are clearing to a new clear value, we need to resolve fast
* clears from other levels/layers first, since we can't have different
* levels/layers with different fast clear colors.
*/
for (unsigned res_lvl = 0; res_lvl < res->surf.levels; res_lvl++) {
const unsigned level_layers =
crocus_get_num_logical_layers(res, res_lvl);
for (unsigned layer = 0; layer < level_layers; layer++) {
if (res_lvl == level &&
layer >= box->z &&
layer < box->z + box->depth) {
/* We're going to clear this layer anyway. Leave it alone. */
continue;
}
enum isl_aux_state aux_state =
crocus_resource_get_aux_state(res, res_lvl, layer);
if (aux_state != ISL_AUX_STATE_CLEAR &&
aux_state != ISL_AUX_STATE_PARTIAL_CLEAR &&
aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) {
/* This slice doesn't have any fast-cleared bits. */
continue;
}
/* If we got here, then the level may have fast-clear bits that use
* the old clear value. We need to do a color resolve to get rid
* of their use of the clear color before we can change it.
* Fortunately, few applications ever change their clear color at
* different levels/layers, so this shouldn't happen often.
*/
crocus_resource_prepare_access(ice, res,
res_lvl, 1, layer, 1,
res->aux.usage,
false);
perf_debug(&ice->dbg,
"Resolving resource (%p) level %d, layer %d: color changing from "
"(%0.2f, %0.2f, %0.2f, %0.2f) to "
"(%0.2f, %0.2f, %0.2f, %0.2f)\n",
res, res_lvl, layer,
res->aux.clear_color.f32[0],
res->aux.clear_color.f32[1],
res->aux.clear_color.f32[2],
res->aux.clear_color.f32[3],
color.f32[0], color.f32[1], color.f32[2], color.f32[3]);
}
}
}
crocus_resource_set_clear_color(ice, res, color);
/* If the buffer is already in ISL_AUX_STATE_CLEAR, and the color hasn't
* changed, the clear is redundant and can be skipped.
*/
const enum isl_aux_state aux_state =
crocus_resource_get_aux_state(res, level, box->z);
if (!color_changed && box->depth == 1 && aux_state == ISL_AUX_STATE_CLEAR)
return;
/* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
*
* "Any transition from any value in {Clear, Render, Resolve} to a
* different value in {Clear, Render, Resolve} requires end of pipe
* synchronization."
*
* In other words, fast clear ops are not properly synchronized with
* other drawing. We need to use a PIPE_CONTROL to ensure that the
* contents of the previous draw hit the render target before we resolve
* and again afterwards to ensure that the resolve is complete before we
* do any more regular drawing.
*/
crocus_emit_end_of_pipe_sync(batch,
"fast clear: pre-flush",
PIPE_CONTROL_RENDER_TARGET_FLUSH);
/* If we reach this point, we need to fast clear to change the state to
* ISL_AUX_STATE_CLEAR, or to update the fast clear color (or both).
*/
blorp_flags |= color_changed ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR;
struct blorp_batch blorp_batch;
blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
struct blorp_surf surf;
crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf,
p_res, res->aux.usage, level, true);
/* In newer gens (> 9), the hardware will do a linear -> sRGB conversion of
* the clear color during the fast clear, if the surface format is of sRGB
* type. We use the linear version of the surface format here to prevent
* that from happening, since we already do our own linear -> sRGB
* conversion in convert_fast_clear_color().
*/
blorp_fast_clear(&blorp_batch, &surf, isl_format_srgb_to_linear(format),
ISL_SWIZZLE_IDENTITY,
level, box->z, box->depth,
box->x, box->y, box->x + box->width,
box->y + box->height);
blorp_batch_finish(&blorp_batch);
crocus_emit_end_of_pipe_sync(batch,
"fast clear: post flush",
PIPE_CONTROL_RENDER_TARGET_FLUSH);
crocus_resource_set_aux_state(ice, res, level, box->z,
box->depth, ISL_AUX_STATE_CLEAR);
ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS;
return;
}
static void
clear_color(struct crocus_context *ice,
struct pipe_resource *p_res,
unsigned level,
const struct pipe_box *box,
bool render_condition_enabled,
enum isl_format format,
struct isl_swizzle swizzle,
union isl_color_value color)
{
struct crocus_resource *res = (void *) p_res;
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
struct crocus_screen *screen = batch->screen;
const struct intel_device_info *devinfo = &batch->screen->devinfo;
enum blorp_batch_flags blorp_flags = 0;
if (render_condition_enabled) {
if (!crocus_check_conditional_render(ice))
return;
if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT)
blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;
}
if (p_res->target == PIPE_BUFFER)
util_range_add(&res->base, &res->valid_buffer_range, box->x, box->x + box->width);
crocus_batch_maybe_flush(batch, 1500);
bool can_fast_clear = can_fast_clear_color(ice, p_res, level, box,
render_condition_enabled,
res->surf.format, format, color);
if (can_fast_clear) {
fast_clear_color(ice, res, level, box, format, color,
blorp_flags);
return;
}
bool color_write_disable[4] = { false, false, false, false };
enum isl_aux_usage aux_usage =
crocus_resource_render_aux_usage(ice, res, format,
false, false);
crocus_resource_prepare_render(ice, res, level,
box->z, box->depth, aux_usage);
struct blorp_surf surf;
crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf,
p_res, aux_usage, level, true);
struct blorp_batch blorp_batch;
blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
if (!isl_format_supports_rendering(devinfo, format) &&
isl_format_is_rgbx(format))
format = isl_format_rgbx_to_rgba(format);
blorp_clear(&blorp_batch, &surf, format, swizzle,
level, box->z, box->depth, box->x, box->y,
box->x + box->width, box->y + box->height,
color, color_write_disable);
blorp_batch_finish(&blorp_batch);
crocus_flush_and_dirty_for_history(ice, batch, res,
PIPE_CONTROL_RENDER_TARGET_FLUSH,
"cache history: post color clear");
crocus_resource_finish_render(ice, res, level,
box->z, box->depth, aux_usage);
}
static bool
can_fast_clear_depth(struct crocus_context *ice,
struct crocus_resource *res,
unsigned level,
const struct pipe_box *box,
bool render_condition_enabled,
float depth)
{
struct pipe_resource *p_res = (void *) res;
struct pipe_context *ctx = (void *) ice;
struct crocus_screen *screen = (void *) ctx->screen;
const struct intel_device_info *devinfo = &screen->devinfo;
if (devinfo->ver < 6)
return false;
if (INTEL_DEBUG & DEBUG_NO_FAST_CLEAR)
return false;
/* Check for partial clears */
if (box->x > 0 || box->y > 0 ||
box->width < u_minify(p_res->width0, level) ||
box->height < u_minify(p_res->height0, level)) {
return false;
}
/* Avoid conditional fast clears to maintain correct tracking of the aux
* state (see iris_resource_finish_write for more info). Note that partial
* fast clears would not pose a problem with conditional rendering.
*/
if (render_condition_enabled &&
ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
return false;
}
if (!crocus_resource_level_has_hiz(res, level))
return false;
if (res->base.format == PIPE_FORMAT_Z16_UNORM) {
/* From the Sandy Bridge PRM, volume 2 part 1, page 314:
*
* "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
* enabled (the legacy method of clearing must be performed):
*
* - DevSNB{W/A}]: When depth buffer format is D16_UNORM and the
* width of the map (LOD0) is not multiple of 16, fast clear
* optimization must be disabled.
*/
if (devinfo->ver == 6 &&
(minify(res->surf.phys_level0_sa.width,
level) % 16) != 0)
return false;
}
return true;
}
static void
fast_clear_depth(struct crocus_context *ice,
struct crocus_resource *res,
unsigned level,
const struct pipe_box *box,
float depth)
{
struct pipe_resource *p_res = (void *) res;
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
/* Quantize the clear value to what can be stored in the actual depth
* buffer. This makes the following check more accurate because it now
* checks if the actual depth bits will match. It also prevents us from
* getting a too-accurate depth value during depth testing or when sampling
* with HiZ enabled.
*/
const unsigned nbits = p_res->format == PIPE_FORMAT_Z16_UNORM ? 16 : 24;
const uint32_t depth_max = (1 << nbits) - 1;
depth = p_res->format == PIPE_FORMAT_Z32_FLOAT ? depth :
(unsigned)(depth * depth_max) / (float)depth_max;
bool update_clear_depth = false;
/* If we're clearing to a new clear value, then we need to resolve any clear
* flags out of the HiZ buffer into the real depth buffer.
*/
if (res->aux.clear_color.f32[0] != depth) {
for (unsigned res_level = 0; res_level < res->surf.levels; res_level++) {
if (!crocus_resource_level_has_hiz(res, res_level))
continue;
const unsigned level_layers =
crocus_get_num_logical_layers(res, res_level);
for (unsigned layer = 0; layer < level_layers; layer++) {
if (res_level == level &&
layer >= box->z &&
layer < box->z + box->depth) {
/* We're going to clear this layer anyway. Leave it alone. */
continue;
}
enum isl_aux_state aux_state =
crocus_resource_get_aux_state(res, res_level, layer);
if (aux_state != ISL_AUX_STATE_CLEAR &&
aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) {
/* This slice doesn't have any fast-cleared bits. */
continue;
}
/* If we got here, then the level may have fast-clear bits that
* use the old clear value. We need to do a depth resolve to get
* rid of their use of the clear value before we can change it.
* Fortunately, few applications ever change their depth clear
* value so this shouldn't happen often.
*/
crocus_hiz_exec(ice, batch, res, res_level, layer, 1,
ISL_AUX_OP_FULL_RESOLVE, false);
crocus_resource_set_aux_state(ice, res, res_level, layer, 1,
ISL_AUX_STATE_RESOLVED);
}
}
const union isl_color_value clear_value = { .f32 = {depth, } };
crocus_resource_set_clear_color(ice, res, clear_value);
update_clear_depth = true;
}
for (unsigned l = 0; l < box->depth; l++) {
enum isl_aux_state aux_state =
crocus_resource_level_has_hiz(res, level) ?
crocus_resource_get_aux_state(res, level, box->z + l) :
ISL_AUX_STATE_AUX_INVALID;
if (update_clear_depth || aux_state != ISL_AUX_STATE_CLEAR) {
if (aux_state == ISL_AUX_STATE_CLEAR) {
perf_debug(&ice->dbg, "Performing HiZ clear just to update the "
"depth clear value\n");
}
crocus_hiz_exec(ice, batch, res, level,
box->z + l, 1, ISL_AUX_OP_FAST_CLEAR,
update_clear_depth);
}
}
crocus_resource_set_aux_state(ice, res, level, box->z, box->depth,
ISL_AUX_STATE_CLEAR);
ice->state.dirty |= CROCUS_DIRTY_DEPTH_BUFFER;
}
static void
clear_depth_stencil(struct crocus_context *ice,
struct pipe_resource *p_res,
unsigned level,
const struct pipe_box *box,
bool render_condition_enabled,
bool clear_depth,
bool clear_stencil,
float depth,
uint8_t stencil)
{
struct crocus_resource *res = (void *) p_res;
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
struct crocus_screen *screen = batch->screen;
enum blorp_batch_flags blorp_flags = 0;
if (render_condition_enabled) {
if (!crocus_check_conditional_render(ice))
return;
if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT)
blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;
}
crocus_batch_maybe_flush(batch, 1500);
struct crocus_resource *z_res;
struct crocus_resource *stencil_res;
struct blorp_surf z_surf;
struct blorp_surf stencil_surf;
crocus_get_depth_stencil_resources(&batch->screen->devinfo, p_res, &z_res, &stencil_res);
if (z_res && clear_depth &&
can_fast_clear_depth(ice, z_res, level, box, render_condition_enabled,
depth)) {
fast_clear_depth(ice, z_res, level, box, depth);
crocus_flush_and_dirty_for_history(ice, batch, res, 0,
"cache history: post fast Z clear");
clear_depth = false;
z_res = NULL;
}
/* At this point, we might have fast cleared the depth buffer. So if there's
* no stencil clear pending, return early.
*/
if (!(clear_depth || (clear_stencil && stencil_res))) {
return;
}
if (clear_depth && z_res) {
const enum isl_aux_usage aux_usage =
crocus_resource_render_aux_usage(ice, z_res, level, z_res->surf.format,
false);
crocus_resource_prepare_render(ice, z_res, level, box->z, box->depth,
aux_usage);
crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev,
&z_surf, &z_res->base, aux_usage,
level, true);
}
struct blorp_batch blorp_batch;
blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
uint8_t stencil_mask = clear_stencil && stencil_res ? 0xff : 0;
if (stencil_mask) {
crocus_resource_prepare_access(ice, stencil_res, level, 1, box->z,
box->depth, stencil_res->aux.usage, false);
crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev,
&stencil_surf, &stencil_res->base,
stencil_res->aux.usage, level, true);
}
blorp_clear_depth_stencil(&blorp_batch, &z_surf, &stencil_surf,
level, box->z, box->depth,
box->x, box->y,
box->x + box->width,
box->y + box->height,
clear_depth && z_res, depth,
stencil_mask, stencil);
blorp_batch_finish(&blorp_batch);
crocus_flush_and_dirty_for_history(ice, batch, res, 0,
"cache history: post slow ZS clear");
if (clear_depth && z_res) {
crocus_resource_finish_render(ice, z_res, level,
box->z, box->depth, z_surf.aux_usage);
}
if (stencil_mask) {
crocus_resource_finish_write(ice, stencil_res, level, box->z, box->depth,
stencil_res->aux.usage);
}
}
/**
* The pipe->clear() driver hook.
*
* This clears buffers attached to the current draw framebuffer.
*/
static void
crocus_clear(struct pipe_context *ctx,
unsigned buffers,
const struct pipe_scissor_state *scissor_state,
const union pipe_color_union *p_color,
double depth,
unsigned stencil)
{
struct crocus_context *ice = (void *) ctx;
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
struct crocus_screen *screen = (void *) ctx->screen;
const struct intel_device_info *devinfo = &screen->devinfo;
assert(buffers != 0);
struct pipe_box box = {
.width = cso_fb->width,
.height = cso_fb->height,
};
if (scissor_state) {
box.x = scissor_state->minx;
box.y = scissor_state->miny;
box.width = MIN2(box.width, scissor_state->maxx - scissor_state->minx);
box.height = MIN2(box.height, scissor_state->maxy - scissor_state->miny);
}
if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
if (devinfo->ver < 6) {
crocus_blitter_begin(ice, CROCUS_SAVE_FRAGMENT_STATE, true);
util_blitter_clear(ice->blitter, cso_fb->width, cso_fb->height,
util_framebuffer_get_num_layers(cso_fb),
buffers & PIPE_CLEAR_DEPTHSTENCIL, p_color, depth, stencil, false);
} else {
struct pipe_surface *psurf = cso_fb->zsbuf;
box.depth = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1;
box.z = psurf->u.tex.first_layer;
clear_depth_stencil(ice, psurf->texture, psurf->u.tex.level, &box, true,
buffers & PIPE_CLEAR_DEPTH,
buffers & PIPE_CLEAR_STENCIL,
depth, stencil);
}
buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
}
if (buffers & PIPE_CLEAR_COLOR) {
/* pipe_color_union and isl_color_value are interchangeable */
union isl_color_value *color = (void *) p_color;
for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
if (buffers & (PIPE_CLEAR_COLOR0 << i)) {
struct pipe_surface *psurf = cso_fb->cbufs[i];
struct crocus_surface *isurf = (void *) psurf;
box.depth = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1,
box.z = psurf->u.tex.first_layer,
clear_color(ice, psurf->texture, psurf->u.tex.level, &box,
true, isurf->view.format, isurf->view.swizzle,
*color);
}
}
}
}
/**
* The pipe->clear_texture() driver hook.
*
* This clears the given texture resource.
*/
static void
crocus_clear_texture(struct pipe_context *ctx,
struct pipe_resource *p_res,
unsigned level,
const struct pipe_box *box,
const void *data)
{
struct crocus_context *ice = (void *) ctx;
struct crocus_screen *screen = (void *) ctx->screen;
const struct intel_device_info *devinfo = &screen->devinfo;
struct crocus_resource *res = (void *) p_res;
if (devinfo->ver < 6) {
util_clear_texture(ctx, p_res,
level, box, data);
return;
}
if (crocus_resource_unfinished_aux_import(res))
crocus_resource_finish_aux_import(ctx->screen, res);
if (util_format_is_depth_or_stencil(p_res->format)) {
const struct util_format_unpack_description *fmt_unpack =
util_format_unpack_description(p_res->format);
float depth = 0.0;
uint8_t stencil = 0;
if (fmt_unpack->unpack_z_float)
fmt_unpack->unpack_z_float(&depth, 0, data, 0, 1, 1);
if (fmt_unpack->unpack_s_8uint)
fmt_unpack->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
clear_depth_stencil(ice, p_res, level, box, true, true, true,
depth, stencil);
} else {
union isl_color_value color;
struct crocus_resource *res = (void *) p_res;
enum isl_format format = res->surf.format;
if (!isl_format_supports_rendering(devinfo, format)) {
const struct isl_format_layout *fmtl = isl_format_get_layout(format);
// XXX: actually just get_copy_format_for_bpb from BLORP
// XXX: don't cut and paste this
switch (fmtl->bpb) {
case 8: format = ISL_FORMAT_R8_UINT; break;
case 16: format = ISL_FORMAT_R8G8_UINT; break;
case 24: format = ISL_FORMAT_R8G8B8_UINT; break;
case 32: format = ISL_FORMAT_R8G8B8A8_UINT; break;
case 48: format = ISL_FORMAT_R16G16B16_UINT; break;
case 64: format = ISL_FORMAT_R16G16B16A16_UINT; break;
case 96: format = ISL_FORMAT_R32G32B32_UINT; break;
case 128: format = ISL_FORMAT_R32G32B32A32_UINT; break;
default:
unreachable("Unknown format bpb");
}
/* No aux surfaces for non-renderable surfaces */
assert(res->aux.usage == ISL_AUX_USAGE_NONE);
}
isl_color_value_unpack(&color, format, data);
clear_color(ice, p_res, level, box, true, format,
ISL_SWIZZLE_IDENTITY, color);
}
}
/**
* The pipe->clear_render_target() driver hook.
*
* This clears the given render target surface.
*/
static void
crocus_clear_render_target(struct pipe_context *ctx,
struct pipe_surface *psurf,
const union pipe_color_union *p_color,
unsigned dst_x, unsigned dst_y,
unsigned width, unsigned height,
bool render_condition_enabled)
{
struct crocus_context *ice = (void *) ctx;
struct crocus_surface *isurf = (void *) psurf;
struct pipe_box box = {
.x = dst_x,
.y = dst_y,
.z = psurf->u.tex.first_layer,
.width = width,
.height = height,
.depth = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1
};
/* pipe_color_union and isl_color_value are interchangeable */
union isl_color_value *color = (void *) p_color;
clear_color(ice, psurf->texture, psurf->u.tex.level, &box,
render_condition_enabled,
isurf->view.format, isurf->view.swizzle, *color);
}
/**
* The pipe->clear_depth_stencil() driver hook.
*
* This clears the given depth/stencil surface.
*/
static void
crocus_clear_depth_stencil(struct pipe_context *ctx,
struct pipe_surface *psurf,
unsigned flags,
double depth,
unsigned stencil,
unsigned dst_x, unsigned dst_y,
unsigned width, unsigned height,
bool render_condition_enabled)
{
return;
#if 0
struct crocus_context *ice = (void *) ctx;
struct pipe_box box = {
.x = dst_x,
.y = dst_y,
.z = psurf->u.tex.first_layer,
.width = width,
.height = height,
.depth = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1
};
uint32_t blit_flags = 0;
assert(util_format_is_depth_or_stencil(psurf->texture->format));
crocus_blitter_begin(ice, CROCUS_SAVE_FRAGMENT_STATE);
util_blitter_clear(ice->blitter, width, height,
1, flags, NULL, depth, stencil, render_condition_enabled);
#if 0
clear_depth_stencil(ice, psurf->texture, psurf->u.tex.level, &box,
render_condition_enabled,
flags & PIPE_CLEAR_DEPTH, flags & PIPE_CLEAR_STENCIL,
depth, stencil);
#endif
#endif
}
void
crocus_init_clear_functions(struct pipe_context *ctx)
{
ctx->clear = crocus_clear;
ctx->clear_texture = crocus_clear_texture;
ctx->clear_render_target = crocus_clear_render_target;
ctx->clear_depth_stencil = crocus_clear_depth_stencil;
}

View File

@ -0,0 +1,336 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <stdio.h>
#include <time.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "util/ralloc.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h"
#include "util/u_upload_mgr.h"
#include "drm-uapi/i915_drm.h"
#include "crocus_context.h"
#include "crocus_resource.h"
#include "crocus_screen.h"
#include "common/intel_defines.h"
#include "common/intel_sample_positions.h"
/**
* The pipe->set_debug_callback() driver hook.
*/
static void
crocus_set_debug_callback(struct pipe_context *ctx,
const struct pipe_debug_callback *cb)
{
struct crocus_context *ice = (struct crocus_context *)ctx;
if (cb)
ice->dbg = *cb;
else
memset(&ice->dbg, 0, sizeof(ice->dbg));
}
static bool
crocus_init_identifier_bo(struct crocus_context *ice)
{
void *bo_map;
bo_map = crocus_bo_map(NULL, ice->workaround_bo, MAP_READ | MAP_WRITE);
if (!bo_map)
return false;
ice->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE;
ice->workaround_offset = ALIGN(
intel_debug_write_identifiers(bo_map, 4096, "Crocus") + 8, 8);
crocus_bo_unmap(ice->workaround_bo);
return true;
}
/**
* Called from the batch module when it detects a GPU hang.
*
* In this case, we've lost our GEM context, and can't rely on any existing
* state on the GPU. We must mark everything dirty and wipe away any saved
* assumptions about the last known state of the GPU.
*/
void
crocus_lost_context_state(struct crocus_batch *batch)
{
/* The batch module doesn't have an crocus_context, because we want to
* avoid introducing lots of layering violations. Unfortunately, here
* we do need to inform the context of batch catastrophe. We know the
* batch is one of our context's, so hackily claw our way back.
*/
struct crocus_context *ice = batch->ice;
struct crocus_screen *screen = batch->screen;
if (batch->name == CROCUS_BATCH_RENDER) {
screen->vtbl.init_render_context(batch);
} else if (batch->name == CROCUS_BATCH_COMPUTE) {
screen->vtbl.init_compute_context(batch);
} else {
unreachable("unhandled batch reset");
}
ice->state.dirty = ~0ull;
memset(ice->state.last_grid, 0, sizeof(ice->state.last_grid));
batch->state_base_address_emitted = false;
screen->vtbl.lost_genx_state(ice, batch);
}
static enum pipe_reset_status
crocus_get_device_reset_status(struct pipe_context *ctx)
{
struct crocus_context *ice = (struct crocus_context *)ctx;
enum pipe_reset_status worst_reset = PIPE_NO_RESET;
/* Check the reset status of each batch's hardware context, and take the
* worst status (if one was guilty, proclaim guilt).
*/
for (int i = 0; i < ice->batch_count; i++) {
/* This will also recreate the hardware contexts as necessary, so any
* future queries will show no resets. We only want to report once.
*/
enum pipe_reset_status batch_reset =
crocus_batch_check_for_reset(&ice->batches[i]);
if (batch_reset == PIPE_NO_RESET)
continue;
if (worst_reset == PIPE_NO_RESET) {
worst_reset = batch_reset;
} else {
/* GUILTY < INNOCENT < UNKNOWN */
worst_reset = MIN2(worst_reset, batch_reset);
}
}
if (worst_reset != PIPE_NO_RESET && ice->reset.reset)
ice->reset.reset(ice->reset.data, worst_reset);
return worst_reset;
}
static void
crocus_set_device_reset_callback(struct pipe_context *ctx,
const struct pipe_device_reset_callback *cb)
{
struct crocus_context *ice = (struct crocus_context *)ctx;
if (cb)
ice->reset = *cb;
else
memset(&ice->reset, 0, sizeof(ice->reset));
}
static void
crocus_get_sample_position(struct pipe_context *ctx,
unsigned sample_count,
unsigned sample_index,
float *out_value)
{
union {
struct {
float x[16];
float y[16];
} a;
struct {
float _0XOffset, _1XOffset, _2XOffset, _3XOffset,
_4XOffset, _5XOffset, _6XOffset, _7XOffset,
_8XOffset, _9XOffset, _10XOffset, _11XOffset,
_12XOffset, _13XOffset, _14XOffset, _15XOffset;
float _0YOffset, _1YOffset, _2YOffset, _3YOffset,
_4YOffset, _5YOffset, _6YOffset, _7YOffset,
_8YOffset, _9YOffset, _10YOffset, _11YOffset,
_12YOffset, _13YOffset, _14YOffset, _15YOffset;
} v;
} u;
switch (sample_count) {
case 1: INTEL_SAMPLE_POS_1X(u.v._); break;
case 2: INTEL_SAMPLE_POS_2X(u.v._); break;
case 4: INTEL_SAMPLE_POS_4X(u.v._); break;
case 8: INTEL_SAMPLE_POS_8X(u.v._); break;
case 16: INTEL_SAMPLE_POS_16X(u.v._); break;
default: unreachable("invalid sample count");
}
out_value[0] = u.a.x[sample_index];
out_value[1] = u.a.y[sample_index];
}
/**
* Destroy a context, freeing any associated memory.
*/
static void
crocus_destroy_context(struct pipe_context *ctx)
{
struct crocus_context *ice = (struct crocus_context *)ctx;
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
if (ctx->stream_uploader)
u_upload_destroy(ctx->stream_uploader);
if (ice->blitter)
util_blitter_destroy(ice->blitter);
screen->vtbl.destroy_state(ice);
crocus_destroy_program_cache(ice);
u_upload_destroy(ice->query_buffer_uploader);
crocus_bo_unreference(ice->workaround_bo);
slab_destroy_child(&ice->transfer_pool);
crocus_batch_free(&ice->batches[CROCUS_BATCH_RENDER]);
if (ice->batches[CROCUS_BATCH_COMPUTE].ice)
crocus_batch_free(&ice->batches[CROCUS_BATCH_COMPUTE]);
ralloc_free(ice);
}
#define genX_call(devinfo, func, ...) \
switch ((devinfo)->verx10) { \
case 75: \
gfx75_##func(__VA_ARGS__); \
break; \
case 70: \
gfx7_##func(__VA_ARGS__); \
break; \
case 60: \
gfx6_##func(__VA_ARGS__); \
break; \
case 50: \
gfx5_##func(__VA_ARGS__); \
break; \
case 45: \
gfx45_##func(__VA_ARGS__); \
break; \
case 40: \
gfx4_##func(__VA_ARGS__); \
break; \
default: \
unreachable("Unknown hardware generation"); \
}
/**
* Create a context.
*
* This is where each context begins.
*/
struct pipe_context *
crocus_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags)
{
struct crocus_screen *screen = (struct crocus_screen*)pscreen;
const struct intel_device_info *devinfo = &screen->devinfo;
struct crocus_context *ice = rzalloc(NULL, struct crocus_context);
if (!ice)
return NULL;
struct pipe_context *ctx = &ice->ctx;
ctx->screen = pscreen;
ctx->priv = priv;
ctx->stream_uploader = u_upload_create_default(ctx);
if (!ctx->stream_uploader) {
free(ctx);
return NULL;
}
ctx->const_uploader = ctx->stream_uploader;
ctx->destroy = crocus_destroy_context;
ctx->set_debug_callback = crocus_set_debug_callback;
ctx->set_device_reset_callback = crocus_set_device_reset_callback;
ctx->get_device_reset_status = crocus_get_device_reset_status;
ctx->get_sample_position = crocus_get_sample_position;
ice->shaders.urb_size = devinfo->urb.size;
crocus_init_context_fence_functions(ctx);
crocus_init_blit_functions(ctx);
crocus_init_clear_functions(ctx);
crocus_init_program_functions(ctx);
crocus_init_resource_functions(ctx);
crocus_init_flush_functions(ctx);
crocus_init_program_cache(ice);
slab_create_child(&ice->transfer_pool, &screen->transfer_pool);
ice->query_buffer_uploader =
u_upload_create(ctx, 4096, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING,
0);
ice->workaround_bo =
crocus_bo_alloc(screen->bufmgr, "workaround", 4096);
if (!ice->workaround_bo)
return NULL;
if (!crocus_init_identifier_bo(ice))
return NULL;
genX_call(devinfo, init_state, ice);
genX_call(devinfo, init_blorp, ice);
genX_call(devinfo, init_query, ice);
ice->blitter = util_blitter_create(&ice->ctx);
if (ice->blitter == NULL)
return NULL;
int priority = 0;
if (flags & PIPE_CONTEXT_HIGH_PRIORITY)
priority = INTEL_CONTEXT_HIGH_PRIORITY;
if (flags & PIPE_CONTEXT_LOW_PRIORITY)
priority = INTEL_CONTEXT_LOW_PRIORITY;
ice->batch_count = devinfo->ver >= 7 ? CROCUS_BATCH_COUNT : 1;
for (int i = 0; i < ice->batch_count; i++) {
crocus_init_batch(ice, (enum crocus_batch_name) i,
priority);
}
ice->urb.size = devinfo->urb.size;
screen->vtbl.init_render_context(&ice->batches[CROCUS_BATCH_RENDER]);
if (ice->batch_count > 1)
screen->vtbl.init_compute_context(&ice->batches[CROCUS_BATCH_COMPUTE]);
return ctx;
}
bool
crocus_sw_check_cond_render(struct crocus_context *ice)
{
struct crocus_query *q = ice->condition.query;
union pipe_query_result result;
bool wait = ice->condition.mode == PIPE_RENDER_COND_WAIT ||
ice->condition.mode == PIPE_RENDER_COND_BY_REGION_WAIT;
if (!q)
return true;
bool ret = ice->ctx.get_query_result(&ice->ctx, (void *)q, wait, &result);
if (!ret)
return true;
return ice->condition.condition ? result.u64 == 0 : result.u64 != 0;
}

View File

@ -0,0 +1,955 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef CROCUS_CONTEXT_H
#define CROCUS_CONTEXT_H
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "util/u_debug.h"
#include "intel/blorp/blorp.h"
#include "intel/dev/intel_debug.h"
#include "intel/compiler/brw_compiler.h"
#include "crocus_batch.h"
#include "crocus_fence.h"
#include "crocus_resource.h"
#include "crocus_screen.h"
#include "util/u_blitter.h"
struct crocus_bo;
struct crocus_context;
struct blorp_batch;
struct blorp_params;
#define CROCUS_MAX_TEXTURE_BUFFER_SIZE (1 << 27)
#define CROCUS_MAX_TEXTURE_SAMPLERS 32
/* CROCUS_MAX_ABOS and CROCUS_MAX_SSBOS must be the same. */
#define CROCUS_MAX_ABOS 16
#define CROCUS_MAX_SSBOS 16
#define CROCUS_MAX_VIEWPORTS 16
#define CROCUS_MAX_CLIP_PLANES 8
enum crocus_param_domain {
BRW_PARAM_DOMAIN_BUILTIN = 0,
BRW_PARAM_DOMAIN_IMAGE,
};
enum {
DRI_CONF_BO_REUSE_DISABLED,
DRI_CONF_BO_REUSE_ALL
};
#define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val))
#define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24)
#define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff)
#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
#define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8)
#define BRW_PARAM_IMAGE_OFFSET(value)(BRW_PARAM_VALUE(value) & 0xf)
/**
* Dirty flags. When state changes, we flag some combination of these
* to indicate that particular GPU commands need to be re-emitted.
*
* Each bit typically corresponds to a single 3DSTATE_* command packet, but
* in rare cases they map to a group of related packets that need to be
* emitted together.
*
* See crocus_upload_render_state().
*/
#define CROCUS_DIRTY_COLOR_CALC_STATE (1ull << 0)
#define CROCUS_DIRTY_POLYGON_STIPPLE (1ull << 1)
#define CROCUS_DIRTY_CC_VIEWPORT (1ull << 2)
#define CROCUS_DIRTY_SF_CL_VIEWPORT (1ull << 3)
#define CROCUS_DIRTY_RASTER (1ull << 4)
#define CROCUS_DIRTY_CLIP (1ull << 5)
#define CROCUS_DIRTY_LINE_STIPPLE (1ull << 6)
#define CROCUS_DIRTY_VERTEX_ELEMENTS (1ull << 7)
#define CROCUS_DIRTY_VERTEX_BUFFERS (1ull << 8)
#define CROCUS_DIRTY_DRAWING_RECTANGLE (1ull << 9)
#define CROCUS_DIRTY_GEN6_URB (1ull << 10)
#define CROCUS_DIRTY_DEPTH_BUFFER (1ull << 11)
#define CROCUS_DIRTY_WM (1ull << 12)
#define CROCUS_DIRTY_SO_DECL_LIST (1ull << 13)
#define CROCUS_DIRTY_STREAMOUT (1ull << 14)
#define CROCUS_DIRTY_GEN4_CONSTANT_COLOR (1ull << 15)
#define CROCUS_DIRTY_GEN4_CURBE (1ull << 16)
#define CROCUS_DIRTY_GEN4_URB_FENCE (1ull << 17)
#define CROCUS_DIRTY_GEN5_PIPELINED_POINTERS (1ull << 18)
#define CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS (1ull << 19)
#define CROCUS_DIRTY_GEN6_BLEND_STATE (1ull << 20)
#define CROCUS_DIRTY_GEN6_SCISSOR_RECT (1ull << 21)
#define CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL (1ull << 22)
#define CROCUS_DIRTY_GEN6_MULTISAMPLE (1ull << 23)
#define CROCUS_DIRTY_GEN6_SAMPLE_MASK (1ull << 24)
#define CROCUS_DIRTY_GEN7_SBE (1ull << 25)
#define CROCUS_DIRTY_GEN7_L3_CONFIG (1ull << 26)
#define CROCUS_DIRTY_GEN7_SO_BUFFERS (1ull << 27)
#define CROCUS_DIRTY_GEN75_VF (1ull << 28)
#define CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES (1ull << 29)
#define CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES (1ull << 30)
#define CROCUS_DIRTY_VF_STATISTICS (1ull << 31)
#define CROCUS_DIRTY_GEN4_CLIP_PROG (1ull << 32)
#define CROCUS_DIRTY_GEN4_SF_PROG (1ull << 33)
#define CROCUS_DIRTY_GEN4_FF_GS_PROG (1ull << 34)
#define CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS (1ull << 35)
#define CROCUS_DIRTY_GEN6_SVBI (1ull << 36)
#define CROCUS_ALL_DIRTY_FOR_COMPUTE (CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES)
#define CROCUS_ALL_DIRTY_FOR_RENDER (~CROCUS_ALL_DIRTY_FOR_COMPUTE)
/**
* Per-stage dirty flags. When state changes, we flag some combination of
* these to indicate that particular GPU commands need to be re-emitted.
* Unlike the IRIS_DIRTY_* flags these are shader stage-specific and can be
* indexed by shifting the mask by the shader stage index.
*
* See crocus_upload_render_state().
*/
#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS (1ull << 0)
#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS (1ull << 1)
#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES (1ull << 2)
#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS (1ull << 3)
#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_PS (1ull << 4)
#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS (1ull << 5)
#define CROCUS_STAGE_DIRTY_UNCOMPILED_VS (1ull << 6)
#define CROCUS_STAGE_DIRTY_UNCOMPILED_TCS (1ull << 7)
#define CROCUS_STAGE_DIRTY_UNCOMPILED_TES (1ull << 8)
#define CROCUS_STAGE_DIRTY_UNCOMPILED_GS (1ull << 9)
#define CROCUS_STAGE_DIRTY_UNCOMPILED_FS (1ull << 10)
#define CROCUS_STAGE_DIRTY_UNCOMPILED_CS (1ull << 11)
#define CROCUS_STAGE_DIRTY_VS (1ull << 12)
#define CROCUS_STAGE_DIRTY_TCS (1ull << 13)
#define CROCUS_STAGE_DIRTY_TES (1ull << 14)
#define CROCUS_STAGE_DIRTY_GS (1ull << 15)
#define CROCUS_STAGE_DIRTY_FS (1ull << 16)
#define CROCUS_STAGE_DIRTY_CS (1ull << 17)
#define CROCUS_SHIFT_FOR_STAGE_DIRTY_CONSTANTS 18
#define CROCUS_STAGE_DIRTY_CONSTANTS_VS (1ull << 18)
#define CROCUS_STAGE_DIRTY_CONSTANTS_TCS (1ull << 19)
#define CROCUS_STAGE_DIRTY_CONSTANTS_TES (1ull << 20)
#define CROCUS_STAGE_DIRTY_CONSTANTS_GS (1ull << 21)
#define CROCUS_STAGE_DIRTY_CONSTANTS_FS (1ull << 22)
#define CROCUS_STAGE_DIRTY_CONSTANTS_CS (1ull << 23)
#define CROCUS_STAGE_DIRTY_BINDINGS_VS (1ull << 24)
#define CROCUS_STAGE_DIRTY_BINDINGS_TCS (1ull << 25)
#define CROCUS_STAGE_DIRTY_BINDINGS_TES (1ull << 26)
#define CROCUS_STAGE_DIRTY_BINDINGS_GS (1ull << 27)
#define CROCUS_STAGE_DIRTY_BINDINGS_FS (1ull << 28)
#define CROCUS_STAGE_DIRTY_BINDINGS_CS (1ull << 29)
#define CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE (CROCUS_STAGE_DIRTY_CS | \
CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS | \
CROCUS_STAGE_DIRTY_UNCOMPILED_CS | \
CROCUS_STAGE_DIRTY_CONSTANTS_CS | \
CROCUS_STAGE_DIRTY_BINDINGS_CS)
#define CROCUS_ALL_STAGE_DIRTY_FOR_RENDER (~CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE)
#define CROCUS_ALL_STAGE_DIRTY_BINDINGS (CROCUS_STAGE_DIRTY_BINDINGS_VS | \
CROCUS_STAGE_DIRTY_BINDINGS_TCS | \
CROCUS_STAGE_DIRTY_BINDINGS_TES | \
CROCUS_STAGE_DIRTY_BINDINGS_GS | \
CROCUS_STAGE_DIRTY_BINDINGS_FS | \
CROCUS_STAGE_DIRTY_BINDINGS_CS)
#define CROCUS_RENDER_STAGE_DIRTY_CONSTANTS (CROCUS_STAGE_DIRTY_CONSTANTS_VS | \
CROCUS_STAGE_DIRTY_CONSTANTS_TCS | \
CROCUS_STAGE_DIRTY_CONSTANTS_TES | \
CROCUS_STAGE_DIRTY_CONSTANTS_GS | \
CROCUS_STAGE_DIRTY_CONSTANTS_FS)
/**
* Non-orthogonal state (NOS) dependency flags.
*
* Shader programs may depend on non-orthogonal state. These flags are
* used to indicate that a shader's key depends on the state provided by
* a certain Gallium CSO. Changing any CSOs marked as a dependency will
* cause the driver to re-compute the shader key, possibly triggering a
* shader recompile.
*/
enum crocus_nos_dep {
CROCUS_NOS_FRAMEBUFFER,
CROCUS_NOS_DEPTH_STENCIL_ALPHA,
CROCUS_NOS_RASTERIZER,
CROCUS_NOS_BLEND,
CROCUS_NOS_LAST_VUE_MAP,
CROCUS_NOS_TEXTURES,
CROCUS_NOS_VERTEX_ELEMENTS,
CROCUS_NOS_COUNT,
};
struct crocus_depth_stencil_alpha_state;
/**
* Cache IDs for the in-memory program cache (ice->shaders.cache).
*/
enum crocus_program_cache_id {
CROCUS_CACHE_VS = MESA_SHADER_VERTEX,
CROCUS_CACHE_TCS = MESA_SHADER_TESS_CTRL,
CROCUS_CACHE_TES = MESA_SHADER_TESS_EVAL,
CROCUS_CACHE_GS = MESA_SHADER_GEOMETRY,
CROCUS_CACHE_FS = MESA_SHADER_FRAGMENT,
CROCUS_CACHE_CS = MESA_SHADER_COMPUTE,
CROCUS_CACHE_BLORP,
CROCUS_CACHE_SF,
CROCUS_CACHE_CLIP,
CROCUS_CACHE_FF_GS,
};
/** @{
*
* Defines for PIPE_CONTROL operations, which trigger cache flushes,
* synchronization, pipelined memory writes, and so on.
*
* The bits here are not the actual hardware values. The actual fields
* move between various generations, so we just have flags for each
* potential operation, and use genxml to encode the actual packet.
*/
enum pipe_control_flags
{
PIPE_CONTROL_FLUSH_LLC = (1 << 1),
PIPE_CONTROL_LRI_POST_SYNC_OP = (1 << 2),
PIPE_CONTROL_STORE_DATA_INDEX = (1 << 3),
PIPE_CONTROL_CS_STALL = (1 << 4),
PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET = (1 << 5),
PIPE_CONTROL_SYNC_GFDT = (1 << 6),
PIPE_CONTROL_TLB_INVALIDATE = (1 << 7),
PIPE_CONTROL_MEDIA_STATE_CLEAR = (1 << 8),
PIPE_CONTROL_WRITE_IMMEDIATE = (1 << 9),
PIPE_CONTROL_WRITE_DEPTH_COUNT = (1 << 10),
PIPE_CONTROL_WRITE_TIMESTAMP = (1 << 11),
PIPE_CONTROL_DEPTH_STALL = (1 << 12),
PIPE_CONTROL_RENDER_TARGET_FLUSH = (1 << 13),
PIPE_CONTROL_INSTRUCTION_INVALIDATE = (1 << 14),
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE = (1 << 15),
PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
PIPE_CONTROL_NOTIFY_ENABLE = (1 << 17),
PIPE_CONTROL_FLUSH_ENABLE = (1 << 18),
PIPE_CONTROL_DATA_CACHE_FLUSH = (1 << 19),
PIPE_CONTROL_VF_CACHE_INVALIDATE = (1 << 20),
PIPE_CONTROL_CONST_CACHE_INVALIDATE = (1 << 21),
PIPE_CONTROL_STATE_CACHE_INVALIDATE = (1 << 22),
PIPE_CONTROL_STALL_AT_SCOREBOARD = (1 << 23),
PIPE_CONTROL_DEPTH_CACHE_FLUSH = (1 << 24),
PIPE_CONTROL_TILE_CACHE_FLUSH = (1 << 25),
};
#define PIPE_CONTROL_CACHE_FLUSH_BITS \
(PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
PIPE_CONTROL_DATA_CACHE_FLUSH | \
PIPE_CONTROL_RENDER_TARGET_FLUSH)
#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
(PIPE_CONTROL_STATE_CACHE_INVALIDATE | \
PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
PIPE_CONTROL_VF_CACHE_INVALIDATE | \
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
PIPE_CONTROL_INSTRUCTION_INVALIDATE)
enum crocus_predicate_state {
/* The first two states are used if we can determine whether to draw
* without having to look at the values in the query object buffer. This
* will happen if there is no conditional render in progress, if the query
* object is already completed or if something else has already added
* samples to the preliminary result.
*/
CROCUS_PREDICATE_STATE_RENDER,
CROCUS_PREDICATE_STATE_DONT_RENDER,
/* In this case whether to draw or not depends on the result of an
* MI_PREDICATE command so the predicate enable bit needs to be checked.
*/
CROCUS_PREDICATE_STATE_USE_BIT,
/* In this case, either MI_PREDICATE doesn't exist or we lack the
* necessary kernel features to use it. Stall for the query result.
*/
CROCUS_PREDICATE_STATE_STALL_FOR_QUERY,
};
/** @} */
/**
* An uncompiled, API-facing shader. This is the Gallium CSO for shaders.
* It primarily contains the NIR for the shader.
*
* Each API-facing shader can be compiled into multiple shader variants,
* based on non-orthogonal state dependencies, recorded in the shader key.
*
* See crocus_compiled_shader, which represents a compiled shader variant.
*/
struct crocus_uncompiled_shader {
struct nir_shader *nir;
struct pipe_stream_output_info stream_output;
/* A SHA1 of the serialized NIR for the disk cache. */
unsigned char nir_sha1[20];
unsigned program_id;
/** Bitfield of (1 << CROCUS_NOS_*) flags. */
unsigned nos;
/** Have any shader variants been compiled yet? */
bool compiled_once;
/** Should we use ALT mode for math? Useful for ARB programs. */
bool use_alt_mode;
bool needs_edge_flag;
/** Constant data scraped from the shader by nir_opt_large_constants */
struct pipe_resource *const_data;
/** Surface state for const_data */
struct crocus_state_ref const_data_state;
};
enum crocus_surface_group {
CROCUS_SURFACE_GROUP_RENDER_TARGET,
CROCUS_SURFACE_GROUP_RENDER_TARGET_READ,
CROCUS_SURFACE_GROUP_SOL,
CROCUS_SURFACE_GROUP_CS_WORK_GROUPS,
CROCUS_SURFACE_GROUP_TEXTURE,
CROCUS_SURFACE_GROUP_TEXTURE_GATHER,
CROCUS_SURFACE_GROUP_IMAGE,
CROCUS_SURFACE_GROUP_UBO,
CROCUS_SURFACE_GROUP_SSBO,
CROCUS_SURFACE_GROUP_COUNT,
};
enum {
/* Invalid value for a binding table index. */
CROCUS_SURFACE_NOT_USED = 0xa0a0a0a0,
};
struct crocus_binding_table {
uint32_t size_bytes;
/** Number of surfaces in each group, before compacting. */
uint32_t sizes[CROCUS_SURFACE_GROUP_COUNT];
/** Initial offset of each group. */
uint32_t offsets[CROCUS_SURFACE_GROUP_COUNT];
/** Mask of surfaces used in each group. */
uint64_t used_mask[CROCUS_SURFACE_GROUP_COUNT];
};
/**
* A compiled shader variant, containing a pointer to the GPU assembly,
* as well as program data and other packets needed by state upload.
*
* There can be several crocus_compiled_shader variants per API-level shader
* (crocus_uncompiled_shader), due to state-based recompiles (brw_*_prog_key).
*/
struct crocus_compiled_shader {
/** Reference to the uploaded assembly. */
uint32_t offset;
/* asm size in map */
uint32_t map_size;
/** The program data (owned by the program cache hash table) */
struct brw_stage_prog_data *prog_data;
uint32_t prog_data_size;
/** A list of system values to be uploaded as uniforms. */
enum brw_param_builtin *system_values;
unsigned num_system_values;
/** Number of constbufs expected by the shader. */
unsigned num_cbufs;
/**
* Derived 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets
* (the VUE-based information for transform feedback outputs).
*/
uint32_t *streamout;
struct crocus_binding_table bt;
uint32_t bind_bo_offset;
uint32_t surf_offset[128];//TODO
};
/**
* API context state that is replicated per shader stage.
*/
struct crocus_shader_state {
/** Uniform Buffers */
struct pipe_constant_buffer constbufs[PIPE_MAX_CONSTANT_BUFFERS];
bool sysvals_need_upload;
/** Shader Storage Buffers */
struct pipe_shader_buffer ssbo[PIPE_MAX_SHADER_BUFFERS];
/** Shader Storage Images (image load store) */
struct crocus_image_view image[PIPE_MAX_SHADER_IMAGES];
struct crocus_sampler_state *samplers[CROCUS_MAX_TEXTURE_SAMPLERS];
struct crocus_sampler_view *textures[CROCUS_MAX_TEXTURE_SAMPLERS];
/** Bitfield of which constant buffers are bound (non-null). */
uint32_t bound_cbufs;
/** Bitfield of which image views are bound (non-null). */
uint32_t bound_image_views;
/** Bitfield of which sampler views are bound (non-null). */
uint32_t bound_sampler_views;
/** Bitfield of which shader storage buffers are bound (non-null). */
uint32_t bound_ssbos;
/** Bitfield of which shader storage buffers are writable. */
uint32_t writable_ssbos;
uint32_t sampler_offset;
};
/**
* The API context (derived from pipe_context).
*
* Most driver state is tracked here.
*/
struct crocus_context {
struct pipe_context ctx;
/** A debug callback for KHR_debug output. */
struct pipe_debug_callback dbg;
/** A device reset status callback for notifying that the GPU is hosed. */
struct pipe_device_reset_callback reset;
/** Slab allocator for crocus_transfer_map objects. */
struct slab_child_pool transfer_pool;
struct blorp_context blorp;
int batch_count;
struct crocus_batch batches[CROCUS_BATCH_COUNT];
struct u_upload_mgr *query_buffer_uploader;
struct blitter_context *blitter;
struct {
struct {
/**
* Either the value of BaseVertex for indexed draw calls or the value
* of the argument <first> for non-indexed draw calls.
*/
int firstvertex;
int baseinstance;
} params;
/**
* Are the above values the ones stored in the draw_params buffer?
* If so, we can compare them against new values to see if anything
* changed. If not, we need to assume they changed.
*/
bool params_valid;
/**
* Resource and offset that stores draw_parameters from the indirect
* buffer or to the buffer that stures the previous values for non
* indirect draws.
*/
struct crocus_state_ref draw_params;
struct {
/**
* The value of DrawID. This always comes in from it's own vertex
* buffer since it's not part of the indirect draw parameters.
*/
int drawid;
/**
* Stores if an indexed or non-indexed draw (~0/0). Useful to
* calculate BaseVertex as an AND of firstvertex and is_indexed_draw.
*/
int is_indexed_draw;
} derived_params;
/**
* Resource and offset used for GL_ARB_shader_draw_parameters which
* contains parameters that are not present in the indirect buffer as
* drawid and is_indexed_draw. They will go in their own vertex element.
*/
struct crocus_state_ref derived_draw_params;
} draw;
struct {
struct crocus_uncompiled_shader *uncompiled[MESA_SHADER_STAGES];
struct crocus_compiled_shader *prog[MESA_SHADER_STAGES];
struct brw_vue_map *last_vue_map;
struct crocus_bo *cache_bo;
uint32_t cache_next_offset;
void *cache_bo_map;
struct hash_table *cache;
unsigned urb_size;
/* gen 4/5 clip/sf progs */
struct crocus_compiled_shader *clip_prog;
struct crocus_compiled_shader *sf_prog;
/* gen4/5 prims, gen6 streamout */
struct crocus_compiled_shader *ff_gs_prog;
uint32_t clip_offset;
uint32_t sf_offset;
uint32_t wm_offset;
uint32_t vs_offset;
uint32_t gs_offset;
uint32_t cc_offset;
/** Is a GS or TES outputting points or lines? */
bool output_topology_is_points_or_lines;
/* Track last VS URB entry size */
unsigned last_vs_entry_size;
/**
* Scratch buffers for various sizes and stages.
*
* Indexed by the "Per-Thread Scratch Space" field's 4-bit encoding,
* and shader stage.
*/
struct crocus_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES];
} shaders;
struct {
struct crocus_query *query;
bool condition;
enum pipe_render_cond_flag mode;
} condition;
struct intel_perf_context *perf_ctx;
struct {
uint64_t dirty;
uint64_t stage_dirty;
uint64_t stage_dirty_for_nos[CROCUS_NOS_COUNT];
unsigned num_viewports;
unsigned sample_mask;
struct crocus_blend_state *cso_blend;
struct crocus_rasterizer_state *cso_rast;
struct crocus_depth_stencil_alpha_state *cso_zsa;
struct crocus_vertex_element_state *cso_vertex_elements;
struct pipe_blend_color blend_color;
struct pipe_poly_stipple poly_stipple;
struct pipe_viewport_state viewports[CROCUS_MAX_VIEWPORTS];
struct pipe_scissor_state scissors[CROCUS_MAX_VIEWPORTS];
struct pipe_stencil_ref stencil_ref;
struct pipe_framebuffer_state framebuffer;
struct pipe_clip_state clip_planes;
float default_outer_level[4];
float default_inner_level[2];
/** Bitfield of which vertex buffers are bound (non-null). */
uint32_t bound_vertex_buffers;
struct pipe_vertex_buffer vertex_buffers[16];
uint32_t vb_end[16];
bool primitive_restart;
unsigned cut_index;
enum pipe_prim_type prim_mode:8;
bool prim_is_points_or_lines;
uint8_t vertices_per_patch;
bool window_space_position;
/** The last compute group size */
uint32_t last_block[3];
/** The last compute grid size */
uint32_t last_grid[3];
/** Reference to the BO containing the compute grid size */
struct crocus_state_ref grid_size;
/**
* Array of aux usages for drawing, altered to account for any
* self-dependencies from resources bound for sampling and rendering.
*/
enum isl_aux_usage draw_aux_usage[BRW_MAX_DRAW_BUFFERS];
/** Aux usage of the fb's depth buffer (which may or may not exist). */
enum isl_aux_usage hiz_usage;
/** Bitfield of whether color blending is enabled for RT[i] */
uint8_t blend_enables;
/** Are depth writes enabled? (Depth buffer may or may not exist.) */
bool depth_writes_enabled;
/** Are stencil writes enabled? (Stencil buffer may or may not exist.) */
bool stencil_writes_enabled;
/** GenX-specific current state */
struct crocus_genx_state *genx;
struct crocus_shader_state shaders[MESA_SHADER_STAGES];
/** Do vertex shader uses shader draw parameters ? */
bool vs_uses_draw_params;
bool vs_uses_derived_draw_params;
bool vs_needs_sgvs_element;
bool vs_uses_vertexid;
bool vs_uses_instanceid;
/** Do vertex shader uses edge flag ? */
bool vs_needs_edge_flag;
struct pipe_stream_output_target *so_target[PIPE_MAX_SO_BUFFERS];
bool streamout_active;
int so_targets;
bool statistics_counters_enabled;
/** Current conditional rendering mode */
enum crocus_predicate_state predicate;
bool predicate_supported;
/**
* Query BO with a MI_PREDICATE_RESULT snapshot calculated on the
* render context that needs to be uploaded to the compute context.
*/
struct crocus_bo *compute_predicate;
/** Is a PIPE_QUERY_PRIMITIVES_GENERATED query active? */
bool prims_generated_query_active;
/** 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets */
uint32_t *streamout;
/**
* Resources containing streamed state which our render context
* currently points to. Used to re-add these to the validation
* list when we start a new batch and haven't resubmitted commands.
*/
struct {
struct pipe_resource *res;
uint32_t offset;
uint32_t size;
uint32_t index_size;
bool prim_restart;
} index_buffer;
uint32_t sf_vp_address;
uint32_t clip_vp_address;
uint32_t cc_vp_address;
uint32_t stats_wm;
float global_depth_offset_clamp;
uint32_t last_xfb_verts_per_prim;
uint64_t svbi;
} state;
/* BRW_NEW_URB_ALLOCATIONS:
*/
struct {
uint32_t vsize; /* vertex size plus header in urb registers */
uint32_t gsize; /* GS output size in urb registers */
uint32_t hsize; /* Tessellation control output size in urb registers */
uint32_t dsize; /* Tessellation evaluation output size in urb registers */
uint32_t csize; /* constant buffer size in urb registers */
uint32_t sfsize; /* setup data size in urb registers */
bool constrained;
uint32_t nr_vs_entries;
uint32_t nr_hs_entries;
uint32_t nr_ds_entries;
uint32_t nr_gs_entries;
uint32_t nr_clip_entries;
uint32_t nr_sf_entries;
uint32_t nr_cs_entries;
uint32_t vs_start;
uint32_t hs_start;
uint32_t ds_start;
uint32_t gs_start;
uint32_t clip_start;
uint32_t sf_start;
uint32_t cs_start;
/**
* URB size in the current configuration. The units this is expressed
* in are somewhat inconsistent, see intel_device_info::urb::size.
*
* FINISHME: Represent the URB size consistently in KB on all platforms.
*/
uint32_t size;
/* True if the most recently sent _3DSTATE_URB message allocated
* URB space for the GS.
*/
bool gs_present;
/* True if the most recently sent _3DSTATE_URB message allocated
* URB space for the HS and DS.
*/
bool tess_present;
} urb;
/* GEN4/5 curbe */
struct {
unsigned wm_start;
unsigned wm_size;
unsigned clip_start;
unsigned clip_size;
unsigned vs_start;
unsigned vs_size;
unsigned total_size;
struct crocus_resource *curbe_res;
unsigned curbe_offset;
} curbe;
/**
* A buffer containing a marker + description of the driver. This buffer is
* added to all execbufs syscalls so that we can identify the driver that
* generated a hang by looking at the content of the buffer in the error
* state. It is also used for hardware workarounds that require scratch
* writes or reads from some unimportant memory. To avoid overriding the
* debug data, use the workaround_address field for workarounds.
*/
struct crocus_bo *workaround_bo;
unsigned workaround_offset;
};
#define perf_debug(dbg, ...) do { \
if (INTEL_DEBUG & DEBUG_PERF) \
dbg_printf(__VA_ARGS__); \
if (unlikely(dbg)) \
pipe_debug_message(dbg, PERF_INFO, __VA_ARGS__); \
} while(0)
struct pipe_context *
crocus_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
void crocus_lost_context_state(struct crocus_batch *batch);
void crocus_init_blit_functions(struct pipe_context *ctx);
void crocus_init_clear_functions(struct pipe_context *ctx);
void crocus_init_program_functions(struct pipe_context *ctx);
void crocus_init_resource_functions(struct pipe_context *ctx);
bool crocus_update_compiled_shaders(struct crocus_context *ice);
void crocus_update_compiled_compute_shader(struct crocus_context *ice);
void crocus_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
unsigned threads, uint32_t *dst);
/* crocus_blit.c */
enum crocus_blitter_op
{
CROCUS_SAVE_TEXTURES = 1,
CROCUS_SAVE_FRAMEBUFFER = 2,
CROCUS_SAVE_FRAGMENT_STATE = 4,
CROCUS_DISABLE_RENDER_COND = 8,
};
void crocus_blitter_begin(struct crocus_context *ice, enum crocus_blitter_op op, bool render_cond);
void crocus_blorp_surf_for_resource(struct crocus_vtable *vtbl,
struct isl_device *isl_dev,
struct blorp_surf *surf,
struct pipe_resource *p_res,
enum isl_aux_usage aux_usage,
unsigned level,
bool is_render_target);
void crocus_copy_region(struct blorp_context *blorp,
struct crocus_batch *batch,
struct pipe_resource *dst,
unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
unsigned src_level,
const struct pipe_box *src_box);
/* crocus_draw.c */
void crocus_draw_vbo(struct pipe_context *ctx,
const struct pipe_draw_info *info,
unsigned drawid_offset,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *draws,
unsigned num_draws);
void crocus_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
/* crocus_pipe_control.c */
void crocus_emit_pipe_control_flush(struct crocus_batch *batch,
const char *reason, uint32_t flags);
void crocus_emit_pipe_control_write(struct crocus_batch *batch,
const char *reason, uint32_t flags,
struct crocus_bo *bo, uint32_t offset,
uint64_t imm);
void crocus_emit_mi_flush(struct crocus_batch *batch);
void crocus_emit_depth_stall_flushes(struct crocus_batch *batch);
void crocus_emit_post_sync_nonzero_flush(struct crocus_batch *batch);
void crocus_emit_end_of_pipe_sync(struct crocus_batch *batch,
const char *reason, uint32_t flags);
void crocus_flush_all_caches(struct crocus_batch *batch);
#define crocus_handle_always_flush_cache(batch) \
if (unlikely(batch->screen->driconf.always_flush_cache)) \
crocus_flush_all_caches(batch);
void crocus_init_flush_functions(struct pipe_context *ctx);
/* crocus_program.c */
const struct shader_info *crocus_get_shader_info(const struct crocus_context *ice,
gl_shader_stage stage);
struct crocus_bo *crocus_get_scratch_space(struct crocus_context *ice,
unsigned per_thread_scratch,
gl_shader_stage stage);
uint32_t crocus_group_index_to_bti(const struct crocus_binding_table *bt,
enum crocus_surface_group group,
uint32_t index);
uint32_t crocus_bti_to_group_index(const struct crocus_binding_table *bt,
enum crocus_surface_group group,
uint32_t bti);
/* crocus_disk_cache.c */
void crocus_disk_cache_store(struct disk_cache *cache,
const struct crocus_uncompiled_shader *ish,
const struct crocus_compiled_shader *shader,
void *map,
const void *prog_key,
uint32_t prog_key_size);
struct crocus_compiled_shader *
crocus_disk_cache_retrieve(struct crocus_context *ice,
const struct crocus_uncompiled_shader *ish,
const void *prog_key,
uint32_t prog_key_size);
/* crocus_program_cache.c */
void crocus_init_program_cache(struct crocus_context *ice);
void crocus_destroy_program_cache(struct crocus_context *ice);
void crocus_print_program_cache(struct crocus_context *ice);
struct crocus_compiled_shader *crocus_find_cached_shader(struct crocus_context *ice,
enum crocus_program_cache_id,
uint32_t key_size,
const void *key);
struct crocus_compiled_shader *crocus_upload_shader(struct crocus_context *ice,
enum crocus_program_cache_id,
uint32_t key_size,
const void *key,
const void *assembly,
uint32_t asm_size,
struct brw_stage_prog_data *,
uint32_t prog_data_size,
uint32_t *streamout,
enum brw_param_builtin *sysv,
unsigned num_system_values,
unsigned num_cbufs,
const struct crocus_binding_table *bt);
const void *crocus_find_previous_compile(const struct crocus_context *ice,
enum crocus_program_cache_id cache_id,
unsigned program_string_id);
bool crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch,
const void *key,
uint32_t key_size,
uint32_t *kernel_out,
void *prog_data_out);
bool crocus_blorp_upload_shader(struct blorp_batch *blorp_batch,
uint32_t stage,
const void *key, uint32_t key_size,
const void *kernel, uint32_t kernel_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size,
uint32_t *kernel_out,
void *prog_data_out);
/* crocus_resolve.c */
void crocus_predraw_resolve_inputs(struct crocus_context *ice,
struct crocus_batch *batch,
bool *draw_aux_buffer_disabled,
gl_shader_stage stage,
bool consider_framebuffer);
void crocus_predraw_resolve_framebuffer(struct crocus_context *ice,
struct crocus_batch *batch,
bool *draw_aux_buffer_disabled);
void crocus_postdraw_update_resolve_tracking(struct crocus_context *ice,
struct crocus_batch *batch);
void crocus_cache_sets_clear(struct crocus_batch *batch);
void crocus_flush_depth_and_render_caches(struct crocus_batch *batch);
void crocus_cache_flush_for_read(struct crocus_batch *batch, struct crocus_bo *bo);
void crocus_cache_flush_for_render(struct crocus_batch *batch,
struct crocus_bo *bo,
enum isl_format format,
enum isl_aux_usage aux_usage);
void crocus_render_cache_add_bo(struct crocus_batch *batch,
struct crocus_bo *bo,
enum isl_format format,
enum isl_aux_usage aux_usage);
void crocus_cache_flush_for_depth(struct crocus_batch *batch, struct crocus_bo *bo);
void crocus_depth_cache_add_bo(struct crocus_batch *batch, struct crocus_bo *bo);
int crocus_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
struct pipe_driver_query_info *info);
int crocus_get_driver_query_group_info(struct pipe_screen *pscreen,
unsigned index,
struct pipe_driver_query_group_info *info);
struct pipe_rasterizer_state *crocus_get_rast_state(struct crocus_context *ctx);
bool crocus_sw_check_cond_render(struct crocus_context *ice);
static inline bool crocus_check_conditional_render(struct crocus_context *ice)
{
if (ice->state.predicate == CROCUS_PREDICATE_STATE_STALL_FOR_QUERY)
return crocus_sw_check_cond_render(ice);
return ice->state.predicate != CROCUS_PREDICATE_STATE_DONT_RENDER;
}
#ifdef genX
# include "crocus_genx_protos.h"
#else
# define genX(x) gfx4_##x
# include "crocus_genx_protos.h"
# undef genX
# define genX(x) gfx45_##x
# include "crocus_genx_protos.h"
# undef genX
# define genX(x) gfx5_##x
# include "crocus_genx_protos.h"
# undef genX
# define genX(x) gfx6_##x
# include "crocus_genx_protos.h"
# undef genX
# define genX(x) gfx7_##x
# include "crocus_genx_protos.h"
# undef genX
# define genX(x) gfx75_##x
# include "crocus_genx_protos.h"
# undef genX
#endif
#endif

View File

@ -0,0 +1,58 @@
/*
* Copyright © 2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef CROCUS_DEFINES_H
#define CROCUS_DEFINES_H
/**
* @file crocus_defines.h
*
* Random hardware #defines that we're not using GENXML for.
*/
#define MI_PREDICATE (0xC << 23)
# define MI_PREDICATE_LOADOP_KEEP (0 << 6)
# define MI_PREDICATE_LOADOP_LOAD (2 << 6)
# define MI_PREDICATE_LOADOP_LOADINV (3 << 6)
# define MI_PREDICATE_COMBINEOP_SET (0 << 3)
# define MI_PREDICATE_COMBINEOP_AND (1 << 3)
# define MI_PREDICATE_COMBINEOP_OR (2 << 3)
# define MI_PREDICATE_COMBINEOP_XOR (3 << 3)
# define MI_PREDICATE_COMPAREOP_TRUE (0 << 0)
# define MI_PREDICATE_COMPAREOP_FALSE (1 << 0)
# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL (2 << 0)
# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL (3 << 0)
/* Predicate registers */
#define MI_PREDICATE_SRC0 0x2400
#define MI_PREDICATE_SRC1 0x2408
#define MI_PREDICATE_DATA 0x2410
#define MI_PREDICATE_RESULT 0x2418
#define MI_PREDICATE_RESULT_1 0x241C
#define MI_PREDICATE_RESULT_2 0x2214
#define CS_GPR(n) (0x2600 + (n) * 8)
/* The number of bits in our TIMESTAMP queries. */
#define TIMESTAMP_BITS 36
#endif

View File

@ -0,0 +1,263 @@
/*
* Copyright © 2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* @file crocus_disk_cache.c
*
* Functions for interacting with the on-disk shader cache.
*/
#include <stdio.h>
#include <stdint.h>
#include <assert.h>
#include <string.h>
#include "compiler/nir/nir.h"
#include "util/blob.h"
#include "util/build_id.h"
#include "util/disk_cache.h"
#include "util/mesa-sha1.h"
#include "crocus_context.h"
static bool debug = false;
/**
* Compute a disk cache key for the given uncompiled shader and NOS key.
*/
static void
crocus_disk_cache_compute_key(struct disk_cache *cache,
const struct crocus_uncompiled_shader *ish,
const void *orig_prog_key,
uint32_t prog_key_size,
cache_key cache_key)
{
/* Create a copy of the program key with program_string_id zeroed out.
* It's essentially random data which we don't want to include in our
* hashing and comparisons. We'll set a proper value on a cache hit.
*/
union brw_any_prog_key prog_key;
memcpy(&prog_key, orig_prog_key, prog_key_size);
prog_key.base.program_string_id = 0;
uint8_t data[sizeof(prog_key) + sizeof(ish->nir_sha1)];
uint32_t data_size = prog_key_size + sizeof(ish->nir_sha1);
memcpy(data, ish->nir_sha1, sizeof(ish->nir_sha1));
memcpy(data + sizeof(ish->nir_sha1), &prog_key, prog_key_size);
disk_cache_compute_key(cache, data, data_size, cache_key);
}
/**
* Store the given compiled shader in the disk cache.
*
* This should only be called on newly compiled shaders. No checking is
* done to prevent repeated stores of the same shader.
*/
void
crocus_disk_cache_store(struct disk_cache *cache,
const struct crocus_uncompiled_shader *ish,
const struct crocus_compiled_shader *shader,
void *map,
const void *prog_key,
uint32_t prog_key_size)
{
#ifdef ENABLE_SHADER_CACHE
if (!cache)
return;
gl_shader_stage stage = ish->nir->info.stage;
const struct brw_stage_prog_data *prog_data = shader->prog_data;
cache_key cache_key;
crocus_disk_cache_compute_key(cache, ish, prog_key, prog_key_size, cache_key);
if (debug) {
char sha1[41];
_mesa_sha1_format(sha1, cache_key);
fprintf(stderr, "[mesa disk cache] storing %s\n", sha1);
}
struct blob blob;
blob_init(&blob);
/* We write the following data to the cache blob:
*
* 1. Prog data (must come first because it has the assembly size)
* 2. Assembly code
* 3. Number of entries in the system value array
* 4. System value array
* 5. Legacy param array (only used for compute workgroup ID)
* 6. Binding table
*/
blob_write_bytes(&blob, shader->prog_data, brw_prog_data_size(stage));
blob_write_bytes(&blob, map + shader->offset, shader->prog_data->program_size);
blob_write_bytes(&blob, &shader->num_system_values, sizeof(unsigned));
blob_write_bytes(&blob, shader->system_values,
shader->num_system_values * sizeof(enum brw_param_builtin));
blob_write_bytes(&blob, prog_data->param,
prog_data->nr_params * sizeof(uint32_t));
blob_write_bytes(&blob, &shader->bt, sizeof(shader->bt));
disk_cache_put(cache, cache_key, blob.data, blob.size, NULL);
blob_finish(&blob);
#endif
}
/**
* Search for a compiled shader in the disk cache. If found, upload it
* to the in-memory program cache so we can use it.
*/
struct crocus_compiled_shader *
crocus_disk_cache_retrieve(struct crocus_context *ice,
const struct crocus_uncompiled_shader *ish,
const void *prog_key,
uint32_t key_size)
{
#ifdef ENABLE_SHADER_CACHE
struct crocus_screen *screen = (void *) ice->ctx.screen;
struct disk_cache *cache = screen->disk_cache;
gl_shader_stage stage = ish->nir->info.stage;
if (!cache)
return NULL;
cache_key cache_key;
crocus_disk_cache_compute_key(cache, ish, prog_key, key_size, cache_key);
if (debug) {
char sha1[41];
_mesa_sha1_format(sha1, cache_key);
fprintf(stderr, "[mesa disk cache] retrieving %s: ", sha1);
}
size_t size;
void *buffer = disk_cache_get(screen->disk_cache, cache_key, &size);
if (debug)
fprintf(stderr, "%s\n", buffer ? "found" : "missing");
if (!buffer)
return NULL;
const uint32_t prog_data_size = brw_prog_data_size(stage);
struct brw_stage_prog_data *prog_data = ralloc_size(NULL, prog_data_size);
const void *assembly;
uint32_t num_system_values;
uint32_t *system_values = NULL;
uint32_t *so_decls = NULL;
struct blob_reader blob;
blob_reader_init(&blob, buffer, size);
blob_copy_bytes(&blob, prog_data, prog_data_size);
assembly = blob_read_bytes(&blob, prog_data->program_size);
num_system_values = blob_read_uint32(&blob);
if (num_system_values) {
system_values =
ralloc_array(NULL, enum brw_param_builtin, num_system_values);
blob_copy_bytes(&blob, system_values,
num_system_values * sizeof(enum brw_param_builtin));
}
prog_data->param = NULL;
prog_data->pull_param = NULL;
assert(prog_data->nr_pull_params == 0);
if (prog_data->nr_params) {
prog_data->param = ralloc_array(NULL, uint32_t, prog_data->nr_params);
blob_copy_bytes(&blob, prog_data->param,
prog_data->nr_params * sizeof(uint32_t));
}
struct crocus_binding_table bt;
blob_copy_bytes(&blob, &bt, sizeof(bt));
if ((stage == MESA_SHADER_VERTEX ||
stage == MESA_SHADER_TESS_EVAL ||
stage == MESA_SHADER_GEOMETRY) && screen->devinfo.ver > 6) {
struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
so_decls = screen->vtbl.create_so_decl_list(&ish->stream_output,
&vue_prog_data->vue_map);
}
/* System values and uniforms are stored in constant buffer 0, the
* user-facing UBOs are indexed by one. So if any constant buffer is
* needed, the constant buffer 0 will be needed, so account for it.
*/
unsigned num_cbufs = ish->nir->info.num_ubos;
if (num_cbufs || ish->nir->num_uniforms)
num_cbufs++;
if (num_system_values)
num_cbufs++;
/* Upload our newly read shader to the in-memory program cache and
* return it to the caller.
*/
struct crocus_compiled_shader *shader =
crocus_upload_shader(ice, stage, key_size, prog_key, assembly,
prog_data->program_size,
prog_data, prog_data_size, so_decls, system_values,
num_system_values, num_cbufs, &bt);
free(buffer);
return shader;
#else
return NULL;
#endif
}
/**
* Initialize the on-disk shader cache.
*/
void
crocus_disk_cache_init(struct crocus_screen *screen)
{
#ifdef ENABLE_SHADER_CACHE
if (INTEL_DEBUG & DEBUG_DISK_CACHE_DISABLE_MASK)
return;
/* array length = print length + nul char + 1 extra to verify it's unused */
char renderer[13];
UNUSED int len =
snprintf(renderer, sizeof(renderer), "crocus_%04x", screen->pci_id);
assert(len == sizeof(renderer) - 2);
const struct build_id_note *note =
build_id_find_nhdr_for_addr(crocus_disk_cache_init);
assert(note && build_id_length(note) == 20); /* sha1 */
const uint8_t *id_sha1 = build_id_data(note);
assert(id_sha1);
char timestamp[41];
_mesa_sha1_format(timestamp, id_sha1);
const uint64_t driver_flags =
brw_get_compiler_config_value(screen->compiler);
screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
#endif
}

View File

@ -0,0 +1,511 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* @file crocus_draw.c
*
* The main driver hooks for drawing and launching compute shaders.
*/
#include <stdio.h>
#include <errno.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "util/u_draw.h"
#include "util/u_inlines.h"
#include "util/u_transfer.h"
#include "util/u_upload_mgr.h"
#include "intel/compiler/brw_compiler.h"
#include "intel/compiler/brw_eu_defines.h"
#include "crocus_context.h"
#include "crocus_defines.h"
#include "util/u_prim_restart.h"
#include "indices/u_primconvert.h"
#include "util/u_prim.h"
static bool
prim_is_points_or_lines(enum pipe_prim_type mode)
{
/* We don't need to worry about adjacency - it can only be used with
* geometry shaders, and we don't care about this info when GS is on.
*/
return mode == PIPE_PRIM_POINTS ||
mode == PIPE_PRIM_LINES ||
mode == PIPE_PRIM_LINE_LOOP ||
mode == PIPE_PRIM_LINE_STRIP;
}
static bool
can_cut_index_handle_restart_index(struct crocus_context *ice,
const struct pipe_draw_info *draw)
{
switch (draw->index_size) {
case 1:
return draw->restart_index == 0xff;
case 2:
return draw->restart_index == 0xffff;
case 4:
return draw->restart_index == 0xffffffff;
default:
unreachable("illegal index size\n");
}
return false;
}
static bool
can_cut_index_handle_prim(struct crocus_context *ice,
const struct pipe_draw_info *draw)
{
struct crocus_screen *screen = (struct crocus_screen*)ice->ctx.screen;
const struct intel_device_info *devinfo = &screen->devinfo;
/* Haswell can do it all. */
if (devinfo->is_haswell)
return true;
if (!can_cut_index_handle_restart_index(ice, draw))
return false;
switch (draw->mode) {
case PIPE_PRIM_POINTS:
case PIPE_PRIM_LINES:
case PIPE_PRIM_LINE_STRIP:
case PIPE_PRIM_TRIANGLES:
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_LINES_ADJACENCY:
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
case PIPE_PRIM_TRIANGLES_ADJACENCY:
case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
return true;
default:
break;
}
return false;
}
/**
* Record the current primitive mode and restart information, flagging
* related packets as dirty if necessary.
*
* This must be called before updating compiled shaders, because the patch
* information informs the TCS key.
*/
static void
crocus_update_draw_info(struct crocus_context *ice,
const struct pipe_draw_info *info,
const struct pipe_draw_start_count_bias *draw)
{
struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
enum pipe_prim_type mode = info->mode;
if (screen->devinfo.ver < 6) {
/* Slight optimization to avoid the GS program when not needed:
*/
struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice);
if (mode == PIPE_PRIM_QUAD_STRIP && !rs_state->flatshade &&
rs_state->fill_front == PIPE_POLYGON_MODE_FILL &&
rs_state->fill_back == PIPE_POLYGON_MODE_FILL)
mode = PIPE_PRIM_TRIANGLE_STRIP;
if (mode == PIPE_PRIM_QUADS &&
draw->count == 4 &&
!rs_state->flatshade &&
rs_state->fill_front == PIPE_POLYGON_MODE_FILL &&
rs_state->fill_back == PIPE_POLYGON_MODE_FILL)
mode = PIPE_PRIM_TRIANGLE_FAN;
}
if (ice->state.prim_mode != mode) {
ice->state.prim_mode = mode;
if (screen->devinfo.ver < 6)
ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG;
if (screen->devinfo.ver <= 6)
ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;
if (screen->devinfo.ver >= 7)
ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;
/* For XY Clip enables */
bool points_or_lines = prim_is_points_or_lines(mode);
if (points_or_lines != ice->state.prim_is_points_or_lines) {
ice->state.prim_is_points_or_lines = points_or_lines;
ice->state.dirty |= CROCUS_DIRTY_CLIP;
}
}
if (info->mode == PIPE_PRIM_PATCHES &&
ice->state.vertices_per_patch != info->vertices_per_patch) {
ice->state.vertices_per_patch = info->vertices_per_patch;
/* This is needed for key->input_vertices */
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_TCS;
/* Flag constants dirty for gl_PatchVerticesIn if needed. */
const struct shader_info *tcs_info =
crocus_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
if (tcs_info &&
BITSET_TEST(tcs_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TCS;
ice->state.shaders[MESA_SHADER_TESS_CTRL].sysvals_need_upload = true;
}
}
const unsigned cut_index = info->primitive_restart ? info->restart_index :
ice->state.cut_index;
if (ice->state.primitive_restart != info->primitive_restart ||
ice->state.cut_index != cut_index) {
if (screen->devinfo.is_haswell)
ice->state.dirty |= CROCUS_DIRTY_GEN75_VF;
ice->state.primitive_restart = info->primitive_restart;
ice->state.cut_index = info->restart_index;
}
}
/**
* Update shader draw parameters, flagging VF packets as dirty if necessary.
*/
static void
crocus_update_draw_parameters(struct crocus_context *ice,
const struct pipe_draw_info *info,
unsigned drawid_offset,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *draw)
{
bool changed = false;
if (ice->state.vs_uses_draw_params) {
struct crocus_state_ref *draw_params = &ice->draw.draw_params;
if (indirect && indirect->buffer) {
pipe_resource_reference(&draw_params->res, indirect->buffer);
draw_params->offset =
indirect->offset + (info->index_size ? 12 : 8);
changed = true;
ice->draw.params_valid = false;
} else {
int firstvertex = info->index_size ? draw->index_bias : draw->start;
if (!ice->draw.params_valid ||
ice->draw.params.firstvertex != firstvertex ||
ice->draw.params.baseinstance != info->start_instance) {
changed = true;
ice->draw.params.firstvertex = firstvertex;
ice->draw.params.baseinstance = info->start_instance;
ice->draw.params_valid = true;
u_upload_data(ice->ctx.stream_uploader, 0,
sizeof(ice->draw.params), 4, &ice->draw.params,
&draw_params->offset, &draw_params->res);
}
}
}
if (ice->state.vs_uses_derived_draw_params) {
struct crocus_state_ref *derived_params = &ice->draw.derived_draw_params;
int is_indexed_draw = info->index_size ? -1 : 0;
if (ice->draw.derived_params.drawid != drawid_offset ||
ice->draw.derived_params.is_indexed_draw != is_indexed_draw) {
changed = true;
ice->draw.derived_params.drawid = drawid_offset;
ice->draw.derived_params.is_indexed_draw = is_indexed_draw;
u_upload_data(ice->ctx.stream_uploader, 0,
sizeof(ice->draw.derived_params), 4,
&ice->draw.derived_params, &derived_params->offset,
&derived_params->res);
}
}
if (changed) {
ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS |
CROCUS_DIRTY_VERTEX_ELEMENTS;
}
}
static void
crocus_indirect_draw_vbo(struct crocus_context *ice,
const struct pipe_draw_info *dinfo,
unsigned drawid_offset,
const struct pipe_draw_indirect_info *dindirect,
const struct pipe_draw_start_count_bias *draws)
{
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
struct crocus_screen *screen = batch->screen;
struct pipe_draw_info info = *dinfo;
struct pipe_draw_indirect_info indirect = *dindirect;
const struct intel_device_info *devinfo = &batch->screen->devinfo;
if (devinfo->is_haswell && indirect.indirect_draw_count &&
ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
/* Upload MI_PREDICATE_RESULT to GPR15.*/
screen->vtbl.load_register_reg64(batch, CS_GPR(15), MI_PREDICATE_RESULT);
}
uint64_t orig_dirty = ice->state.dirty;
uint64_t orig_stage_dirty = ice->state.stage_dirty;
for (int i = 0; i < indirect.draw_count; i++) {
crocus_batch_maybe_flush(batch, 1500);
crocus_require_statebuffer_space(batch, 2400);
crocus_update_draw_parameters(ice, &info, drawid_offset + i, &indirect, draws);
screen->vtbl.upload_render_state(ice, batch, &info, drawid_offset + i, &indirect, draws);
ice->state.dirty &= ~CROCUS_ALL_DIRTY_FOR_RENDER;
ice->state.stage_dirty &= ~CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;
indirect.offset += indirect.stride;
}
if (devinfo->is_haswell && indirect.indirect_draw_count &&
ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
/* Restore MI_PREDICATE_RESULT. */
screen->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(15));
}
/* Put this back for post-draw resolves, we'll clear it again after. */
ice->state.dirty = orig_dirty;
ice->state.stage_dirty = orig_stage_dirty;
}
static void
crocus_simple_draw_vbo(struct crocus_context *ice,
const struct pipe_draw_info *draw,
unsigned drawid_offset,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *sc)
{
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
struct crocus_screen *screen = batch->screen;
crocus_batch_maybe_flush(batch, 1500);
crocus_require_statebuffer_space(batch, 2400);
crocus_update_draw_parameters(ice, draw, drawid_offset, indirect, sc);
screen->vtbl.upload_render_state(ice, batch, draw, drawid_offset, indirect, sc);
}
static void
crocus_draw_vbo_get_vertex_count(struct pipe_context *ctx,
const struct pipe_draw_info *info_in,
unsigned drawid_offset,
const struct pipe_draw_indirect_info *indirect)
{
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
struct pipe_draw_info info = *info_in;
struct pipe_draw_start_count_bias draw;
uint32_t val = screen->vtbl.get_so_offset(indirect->count_from_stream_output);
draw.start = 0;
draw.count = val;
ctx->draw_vbo(ctx, &info, drawid_offset, NULL, &draw, 1);
}
/**
* The pipe->draw_vbo() driver hook. Performs a draw on the GPU.
*/
void
crocus_draw_vbo(struct pipe_context *ctx,
const struct pipe_draw_info *info,
unsigned drawid_offset,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *draws,
unsigned num_draws)
{
if (num_draws > 1) {
util_draw_multi(ctx, info, drawid_offset, indirect, draws, num_draws);
return;
}
if (!indirect && (!draws[0].count || !info->instance_count))
return;
struct crocus_context *ice = (struct crocus_context *) ctx;
struct crocus_screen *screen = (struct crocus_screen*)ice->ctx.screen;
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
if (!crocus_check_conditional_render(ice))
return;
if (info->primitive_restart && !can_cut_index_handle_prim(ice, info)) {
util_draw_vbo_without_prim_restart(ctx, info, drawid_offset,
indirect, draws);
return;
}
if (indirect && indirect->count_from_stream_output &&
!screen->devinfo.is_haswell) {
crocus_draw_vbo_get_vertex_count(ctx, info, drawid_offset, indirect);
return;
}
/**
* The hardware is capable of removing dangling vertices on its own; however,
* prior to Gen6, we sometimes convert quads into trifans (and quad strips
* into tristrips), since pre-Gen6 hardware requires a GS to render quads.
* This function manually trims dangling vertices from a draw call involving
* quads so that those dangling vertices won't get drawn when we convert to
* trifans/tristrips.
*/
if (screen->devinfo.ver < 6) {
if (info->mode == PIPE_PRIM_QUADS || info->mode == PIPE_PRIM_QUAD_STRIP) {
bool trim = u_trim_pipe_prim(info->mode, (unsigned *)&draws[0].count);
if (!trim)
return;
}
}
/* We can't safely re-emit 3DSTATE_SO_BUFFERS because it may zero the
* write offsets, changing the behavior.
*/
if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_RENDER & ~CROCUS_DIRTY_GEN7_SO_BUFFERS;
ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;
}
/* Emit Sandybridge workaround flushes on every primitive, for safety. */
if (screen->devinfo.ver == 6)
crocus_emit_post_sync_nonzero_flush(batch);
crocus_update_draw_info(ice, info, draws);
if (!crocus_update_compiled_shaders(ice))
return;
if (ice->state.dirty & CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES) {
bool draw_aux_buffer_disabled[BRW_MAX_DRAW_BUFFERS] = { };
for (gl_shader_stage stage = 0; stage < MESA_SHADER_COMPUTE; stage++) {
if (ice->shaders.prog[stage])
crocus_predraw_resolve_inputs(ice, batch, draw_aux_buffer_disabled,
stage, true);
}
crocus_predraw_resolve_framebuffer(ice, batch, draw_aux_buffer_disabled);
}
crocus_handle_always_flush_cache(batch);
if (indirect && indirect->buffer)
crocus_indirect_draw_vbo(ice, info, drawid_offset, indirect, draws);
else
crocus_simple_draw_vbo(ice, info, drawid_offset, indirect, draws);
crocus_handle_always_flush_cache(batch);
crocus_postdraw_update_resolve_tracking(ice, batch);
ice->state.dirty &= ~CROCUS_ALL_DIRTY_FOR_RENDER;
ice->state.stage_dirty &= ~CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;
}
static void
crocus_update_grid_size_resource(struct crocus_context *ice,
const struct pipe_grid_info *grid)
{
struct crocus_state_ref *grid_ref = &ice->state.grid_size;
const struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_COMPUTE];
bool grid_needs_surface = shader->bt.used_mask[CROCUS_SURFACE_GROUP_CS_WORK_GROUPS];
if (grid->indirect) {
pipe_resource_reference(&grid_ref->res, grid->indirect);
grid_ref->offset = grid->indirect_offset;
/* Zero out the grid size so that the next non-indirect grid launch will
* re-upload it properly.
*/
memset(ice->state.last_grid, 0, sizeof(ice->state.last_grid));
} else if (memcmp(ice->state.last_grid, grid->grid, sizeof(grid->grid)) != 0) {
memcpy(ice->state.last_grid, grid->grid, sizeof(grid->grid));
u_upload_data(ice->ctx.const_uploader, 0, sizeof(grid->grid), 4,
grid->grid, &grid_ref->offset, &grid_ref->res);
}
/* Skip surface upload if we don't need it or we already have one */
if (!grid_needs_surface)
return;
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_CS;
}
void
crocus_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *grid)
{
struct crocus_context *ice = (struct crocus_context *) ctx;
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_COMPUTE];
struct crocus_screen *screen = batch->screen;
if (!crocus_check_conditional_render(ice))
return;
if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_COMPUTE;
ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE;
}
/* We can't do resolves on the compute engine, so awkwardly, we have to
* do them on the render batch...
*/
if (ice->state.dirty & CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES) {
crocus_predraw_resolve_inputs(ice, &ice->batches[CROCUS_BATCH_RENDER], NULL,
MESA_SHADER_COMPUTE, false);
}
crocus_batch_maybe_flush(batch, 1500);
crocus_require_statebuffer_space(batch, 2500);
crocus_update_compiled_compute_shader(ice);
if (memcmp(ice->state.last_block, grid->block, sizeof(grid->block)) != 0) {
memcpy(ice->state.last_block, grid->block, sizeof(grid->block));
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_CS;
ice->state.shaders[MESA_SHADER_COMPUTE].sysvals_need_upload = true;
}
crocus_update_grid_size_resource(ice, grid);
if (ice->state.compute_predicate) {
screen->vtbl.emit_compute_predicate(batch);
ice->state.compute_predicate = NULL;
}
crocus_handle_always_flush_cache(batch);
screen->vtbl.upload_compute_state(ice, batch, grid);
crocus_handle_always_flush_cache(batch);
ice->state.dirty &= ~CROCUS_ALL_DIRTY_FOR_COMPUTE;
ice->state.stage_dirty &= ~CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE;
/* Note: since compute shaders can't access the framebuffer, there's
* no need to call crocus_postdraw_update_resolve_tracking.
*/
}

View File

@ -0,0 +1,571 @@
/*
* Copyright © 2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* @file crocus_fence.c
*
* Fences for driver and IPC serialisation, scheduling and synchronisation.
*/
#include "util/u_inlines.h"
#include "intel/common/intel_gem.h"
#include "crocus_batch.h"
#include "crocus_bufmgr.h"
#include "crocus_context.h"
#include "crocus_fence.h"
#include "crocus_screen.h"
static uint32_t
gem_syncobj_create(int fd, uint32_t flags)
{
struct drm_syncobj_create args = {
.flags = flags,
};
intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &args);
return args.handle;
}
static void
gem_syncobj_destroy(int fd, uint32_t handle)
{
struct drm_syncobj_destroy args = {
.handle = handle,
};
intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_DESTROY, &args);
}
/**
* Make a new sync-point.
*/
struct crocus_syncobj *
crocus_create_syncobj(struct crocus_screen *screen)
{
struct crocus_syncobj *syncobj = malloc(sizeof(*syncobj));
if (!syncobj)
return NULL;
syncobj->handle = gem_syncobj_create(screen->fd, 0);
assert(syncobj->handle);
pipe_reference_init(&syncobj->ref, 1);
return syncobj;
}
void
crocus_syncobj_destroy(struct crocus_screen *screen,
struct crocus_syncobj *syncobj)
{
gem_syncobj_destroy(screen->fd, syncobj->handle);
free(syncobj);
}
/**
* Add a sync-point to the batch, with the given flags.
*
* \p flags One of I915_EXEC_FENCE_WAIT or I915_EXEC_FENCE_SIGNAL.
*/
void
crocus_batch_add_syncobj(struct crocus_batch *batch,
struct crocus_syncobj *syncobj, unsigned flags)
{
struct drm_i915_gem_exec_fence *fence =
util_dynarray_grow(&batch->exec_fences, struct drm_i915_gem_exec_fence, 1);
*fence = (struct drm_i915_gem_exec_fence){
.handle = syncobj->handle,
.flags = flags,
};
struct crocus_syncobj **store =
util_dynarray_grow(&batch->syncobjs, struct crocus_syncobj *, 1);
*store = NULL;
crocus_syncobj_reference(batch->screen, store, syncobj);
}
/**
* Walk through a batch's dependencies (any I915_EXEC_FENCE_WAIT syncobjs)
* and unreference any which have already passed.
*
* Sometimes the compute batch is seldom used, and accumulates references
* to stale render batches that are no longer of interest, so we can free
* those up.
*/
static void
clear_stale_syncobjs(struct crocus_batch *batch)
{
struct crocus_screen *screen = batch->screen;
int n = util_dynarray_num_elements(&batch->syncobjs, struct crocus_syncobj *);
assert(n == util_dynarray_num_elements(&batch->exec_fences,
struct drm_i915_gem_exec_fence));
/* Skip the first syncobj, as it's the signalling one. */
for (int i = n - 1; i > 1; i--) {
struct crocus_syncobj **syncobj =
util_dynarray_element(&batch->syncobjs, struct crocus_syncobj *, i);
struct drm_i915_gem_exec_fence *fence =
util_dynarray_element(&batch->exec_fences,
struct drm_i915_gem_exec_fence, i);
assert(fence->flags & I915_EXEC_FENCE_WAIT);
if (crocus_wait_syncobj(&screen->base, *syncobj, 0))
continue;
/* This sync object has already passed, there's no need to continue
* marking it as a dependency; we can stop holding on to the reference.
*/
crocus_syncobj_reference(screen, syncobj, NULL);
/* Remove it from the lists; move the last element here. */
struct crocus_syncobj **nth_syncobj =
util_dynarray_pop_ptr(&batch->syncobjs, struct crocus_syncobj *);
struct drm_i915_gem_exec_fence *nth_fence =
util_dynarray_pop_ptr(&batch->exec_fences,
struct drm_i915_gem_exec_fence);
if (syncobj != nth_syncobj) {
*syncobj = *nth_syncobj;
memcpy(fence, nth_fence, sizeof(*fence));
}
}
}
/* ------------------------------------------------------------------- */
struct pipe_fence_handle {
struct pipe_reference ref;
struct pipe_context *unflushed_ctx;
struct crocus_fine_fence *fine[CROCUS_BATCH_COUNT];
};
static void
crocus_fence_destroy(struct pipe_screen *p_screen,
struct pipe_fence_handle *fence)
{
struct crocus_screen *screen = (struct crocus_screen *)p_screen;
for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++)
crocus_fine_fence_reference(screen, &fence->fine[i], NULL);
free(fence);
}
static void
crocus_fence_reference(struct pipe_screen *p_screen,
struct pipe_fence_handle **dst,
struct pipe_fence_handle *src)
{
if (pipe_reference(&(*dst)->ref, &src->ref))
crocus_fence_destroy(p_screen, *dst);
*dst = src;
}
bool
crocus_wait_syncobj(struct pipe_screen *p_screen,
struct crocus_syncobj *syncobj, int64_t timeout_nsec)
{
if (!syncobj)
return false;
struct crocus_screen *screen = (struct crocus_screen *)p_screen;
struct drm_syncobj_wait args = {
.handles = (uintptr_t)&syncobj->handle,
.count_handles = 1,
.timeout_nsec = timeout_nsec,
};
return intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
}
static void
crocus_fence_flush(struct pipe_context *ctx,
struct pipe_fence_handle **out_fence, unsigned flags)
{
struct crocus_screen *screen = (void *)ctx->screen;
struct crocus_context *ice = (struct crocus_context *)ctx;
const bool deferred = flags & PIPE_FLUSH_DEFERRED;
if (!deferred) {
for (unsigned i = 0; i < ice->batch_count; i++)
crocus_batch_flush(&ice->batches[i]);
}
if (!out_fence)
return;
struct pipe_fence_handle *fence = calloc(1, sizeof(*fence));
if (!fence)
return;
pipe_reference_init(&fence->ref, 1);
if (deferred)
fence->unflushed_ctx = ctx;
for (unsigned b = 0; b < ice->batch_count; b++) {
struct crocus_batch *batch = &ice->batches[b];
if (deferred && crocus_batch_bytes_used(batch) > 0) {
struct crocus_fine_fence *fine =
crocus_fine_fence_new(batch, CROCUS_FENCE_BOTTOM_OF_PIPE);
crocus_fine_fence_reference(screen, &fence->fine[b], fine);
crocus_fine_fence_reference(screen, &fine, NULL);
} else {
/* This batch has no commands queued up (perhaps we just flushed,
* or all the commands are on the other batch). Wait for the last
* syncobj on this engine - unless it's already finished by now.
*/
if (crocus_fine_fence_signaled(batch->last_fence))
continue;
crocus_fine_fence_reference(screen, &fence->fine[b],
batch->last_fence);
}
}
crocus_fence_reference(ctx->screen, out_fence, NULL);
*out_fence = fence;
}
static void
crocus_fence_await(struct pipe_context *ctx, struct pipe_fence_handle *fence)
{
struct crocus_context *ice = (struct crocus_context *)ctx;
/* Unflushed fences from the same context are no-ops. */
if (ctx && ctx == fence->unflushed_ctx)
return;
for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
struct crocus_fine_fence *fine = fence->fine[i];
if (crocus_fine_fence_signaled(fine))
continue;
for (unsigned b = 0; b < ice->batch_count; b++) {
struct crocus_batch *batch = &ice->batches[b];
/* We're going to make any future work in this batch wait for our
* fence to have gone by. But any currently queued work doesn't
* need to wait. Flush the batch now, so it can happen sooner.
*/
crocus_batch_flush(batch);
/* Before adding a new reference, clean out any stale ones. */
clear_stale_syncobjs(batch);
crocus_batch_add_syncobj(batch, fine->syncobj, I915_EXEC_FENCE_WAIT);
}
}
}
#define NSEC_PER_SEC (1000 * USEC_PER_SEC)
#define USEC_PER_SEC (1000 * MSEC_PER_SEC)
#define MSEC_PER_SEC (1000)
static uint64_t
gettime_ns(void)
{
struct timespec current;
clock_gettime(CLOCK_MONOTONIC, &current);
return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
}
static uint64_t
rel2abs(uint64_t timeout)
{
if (timeout == 0)
return 0;
uint64_t current_time = gettime_ns();
uint64_t max_timeout = (uint64_t)INT64_MAX - current_time;
timeout = MIN2(max_timeout, timeout);
return current_time + timeout;
}
static bool
crocus_fence_finish(struct pipe_screen *p_screen, struct pipe_context *ctx,
struct pipe_fence_handle *fence, uint64_t timeout)
{
struct crocus_context *ice = (struct crocus_context *)ctx;
struct crocus_screen *screen = (struct crocus_screen *)p_screen;
/* If we created the fence with PIPE_FLUSH_DEFERRED, we may not have
* flushed yet. Check if our syncobj is the current batch's signalling
* syncobj - if so, we haven't flushed and need to now.
*
* The Gallium docs mention that a flush will occur if \p ctx matches
* the context the fence was created with. It may be NULL, so we check
* that it matches first.
*/
if (ctx && ctx == fence->unflushed_ctx) {
for (unsigned i = 0; i < ice->batch_count; i++) {
struct crocus_fine_fence *fine = fence->fine[i];
if (crocus_fine_fence_signaled(fine))
continue;
if (fine->syncobj == crocus_batch_get_signal_syncobj(&ice->batches[i]))
crocus_batch_flush(&ice->batches[i]);
}
/* The fence is no longer deferred. */
fence->unflushed_ctx = NULL;
}
unsigned int handle_count = 0;
uint32_t handles[ARRAY_SIZE(fence->fine)];
for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
struct crocus_fine_fence *fine = fence->fine[i];
if (crocus_fine_fence_signaled(fine))
continue;
handles[handle_count++] = fine->syncobj->handle;
}
if (handle_count == 0)
return true;
struct drm_syncobj_wait args = {
.handles = (uintptr_t)handles,
.count_handles = handle_count,
.timeout_nsec = rel2abs(timeout),
.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL
};
if (fence->unflushed_ctx) {
/* This fence had a deferred flush from another context. We can't
* safely flush it here, because the context might be bound to a
* different thread, and poking at its internals wouldn't be safe.
*
* Instead, use the WAIT_FOR_SUBMIT flag to block and hope that
* another thread submits the work.
*/
args.flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
}
return intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args) == 0;
}
#ifndef SYNC_IOC_MAGIC
/* duplicated from linux/sync_file.h to avoid build-time dependency
* on new (v4.7) kernel headers. Once distro's are mostly using
* something newer than v4.7 drop this and #include <linux/sync_file.h>
* instead.
*/
struct sync_merge_data {
char name[32];
__s32 fd2;
__s32 fence;
__u32 flags;
__u32 pad;
};
#define SYNC_IOC_MAGIC '>'
#define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data)
#endif
static int
sync_merge_fd(int sync_fd, int new_fd)
{
if (sync_fd == -1)
return new_fd;
if (new_fd == -1)
return sync_fd;
struct sync_merge_data args = {
.name = "crocus fence",
.fd2 = new_fd,
.fence = -1,
};
intel_ioctl(sync_fd, SYNC_IOC_MERGE, &args);
close(new_fd);
close(sync_fd);
return args.fence;
}
static int
crocus_fence_get_fd(struct pipe_screen *p_screen,
struct pipe_fence_handle *fence)
{
struct crocus_screen *screen = (struct crocus_screen *)p_screen;
int fd = -1;
/* Deferred fences aren't supported. */
if (fence->unflushed_ctx)
return -1;
for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
struct crocus_fine_fence *fine = fence->fine[i];
if (crocus_fine_fence_signaled(fine))
continue;
struct drm_syncobj_handle args = {
.handle = fine->syncobj->handle,
.flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
.fd = -1,
};
intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
fd = sync_merge_fd(fd, args.fd);
}
if (fd == -1) {
/* Our fence has no syncobj's recorded. This means that all of the
* batches had already completed, their syncobj's had been signalled,
* and so we didn't bother to record them. But we're being asked to
* export such a fence. So export a dummy already-signalled syncobj.
*/
struct drm_syncobj_handle args = {
.flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
.fd = -1,
};
args.handle = gem_syncobj_create(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED);
intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
gem_syncobj_destroy(screen->fd, args.handle);
return args.fd;
}
return fd;
}
static void
crocus_fence_create_fd(struct pipe_context *ctx, struct pipe_fence_handle **out,
int fd, enum pipe_fd_type type)
{
assert(type == PIPE_FD_TYPE_NATIVE_SYNC || type == PIPE_FD_TYPE_SYNCOBJ);
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
struct drm_syncobj_handle args = {
.fd = fd,
};
if (type == PIPE_FD_TYPE_NATIVE_SYNC) {
args.flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE;
args.handle = gem_syncobj_create(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED);
}
if (intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args) == -1) {
fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %s\n",
strerror(errno));
if (type == PIPE_FD_TYPE_NATIVE_SYNC)
gem_syncobj_destroy(screen->fd, args.handle);
*out = NULL;
return;
}
struct crocus_syncobj *syncobj = malloc(sizeof(*syncobj));
if (!syncobj) {
*out = NULL;
return;
}
syncobj->handle = args.handle;
pipe_reference_init(&syncobj->ref, 1);
struct crocus_fine_fence *fine = calloc(1, sizeof(*fine));
if (!fine) {
free(syncobj);
*out = NULL;
return;
}
static const uint32_t zero = 0;
/* Fences work in terms of crocus_fine_fence, but we don't actually have a
* seqno for an imported fence. So, create a fake one which always
* returns as 'not signaled' so we fall back to using the sync object.
*/
fine->seqno = UINT32_MAX;
fine->map = &zero;
fine->syncobj = syncobj;
fine->flags = CROCUS_FENCE_END;
pipe_reference_init(&fine->reference, 1);
struct pipe_fence_handle *fence = calloc(1, sizeof(*fence));
if (!fence) {
free(fine);
free(syncobj);
*out = NULL;
return;
}
pipe_reference_init(&fence->ref, 1);
fence->fine[0] = fine;
*out = fence;
}
static void
crocus_fence_signal(struct pipe_context *ctx, struct pipe_fence_handle *fence)
{
struct crocus_context *ice = (struct crocus_context *)ctx;
if (ctx == fence->unflushed_ctx)
return;
for (unsigned b = 0; b < ice->batch_count; b++) {
for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
struct crocus_fine_fence *fine = fence->fine[i];
/* already signaled fence skipped */
if (crocus_fine_fence_signaled(fine))
continue;
ice->batches[b].contains_fence_signal = true;
crocus_batch_add_syncobj(&ice->batches[b], fine->syncobj,
I915_EXEC_FENCE_SIGNAL);
}
}
}
void
crocus_init_screen_fence_functions(struct pipe_screen *screen)
{
screen->fence_reference = crocus_fence_reference;
screen->fence_finish = crocus_fence_finish;
screen->fence_get_fd = crocus_fence_get_fd;
}
void
crocus_init_context_fence_functions(struct pipe_context *ctx)
{
ctx->flush = crocus_fence_flush;
ctx->create_fence_fd = crocus_fence_create_fd;
ctx->fence_server_sync = crocus_fence_await;
ctx->fence_server_signal = crocus_fence_signal;
}

View File

@ -0,0 +1,60 @@
/*
* Copyright © 2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef CROCUS_FENCE_H
#define CROCUS_FENCE_H
#include "util/u_inlines.h"
struct pipe_screen;
struct crocus_screen;
struct crocus_batch;
struct crocus_syncobj {
struct pipe_reference ref;
uint32_t handle;
};
void crocus_init_context_fence_functions(struct pipe_context *ctx);
void crocus_init_screen_fence_functions(struct pipe_screen *screen);
struct crocus_syncobj *crocus_create_syncobj(struct crocus_screen *screen);
void crocus_syncobj_destroy(struct crocus_screen *, struct crocus_syncobj *);
void crocus_batch_add_syncobj(struct crocus_batch *batch,
struct crocus_syncobj *syncobj,
unsigned flags);
bool crocus_wait_syncobj(struct pipe_screen *screen,
struct crocus_syncobj *syncobj,
int64_t timeout_nsec);
static inline void
crocus_syncobj_reference(struct crocus_screen *screen,
struct crocus_syncobj **dst,
struct crocus_syncobj *src)
{
if (pipe_reference(&(*dst)->ref, &src->ref))
crocus_syncobj_destroy(screen, *dst);
*dst = src;
}
#endif

View File

@ -0,0 +1,85 @@
#include "crocus_context.h"
#include "crocus_fine_fence.h"
#include "util/u_upload_mgr.h"
static void
crocus_fine_fence_reset(struct crocus_batch *batch)
{
u_upload_alloc(batch->fine_fences.uploader,
0, sizeof(uint64_t), sizeof(uint64_t),
&batch->fine_fences.ref.offset, &batch->fine_fences.ref.res,
(void **)&batch->fine_fences.map);
WRITE_ONCE(*batch->fine_fences.map, 0);
batch->fine_fences.next++;
}
void
crocus_fine_fence_init(struct crocus_batch *batch)
{
batch->fine_fences.ref.res = NULL;
batch->fine_fences.next = 0;
if (batch_has_fine_fence(batch))
crocus_fine_fence_reset(batch);
}
static uint32_t
crocus_fine_fence_next(struct crocus_batch *batch)
{
if (!batch_has_fine_fence(batch))
return UINT32_MAX;
uint32_t seqno = batch->fine_fences.next++;
if (batch->fine_fences.next == 0)
crocus_fine_fence_reset(batch);
return seqno;
}
void
crocus_fine_fence_destroy(struct crocus_screen *screen,
struct crocus_fine_fence *fine)
{
crocus_syncobj_reference(screen, &fine->syncobj, NULL);
pipe_resource_reference(&fine->ref.res, NULL);
free(fine);
}
struct crocus_fine_fence *
crocus_fine_fence_new(struct crocus_batch *batch, unsigned flags)
{
struct crocus_fine_fence *fine = calloc(1, sizeof(*fine));
if (!fine)
return NULL;
pipe_reference_init(&fine->reference, 1);
fine->seqno = crocus_fine_fence_next(batch);
crocus_syncobj_reference(batch->screen, &fine->syncobj,
crocus_batch_get_signal_syncobj(batch));
if (!batch_has_fine_fence(batch))
return fine;
pipe_resource_reference(&fine->ref.res, batch->fine_fences.ref.res);
fine->ref.offset = batch->fine_fences.ref.offset;
fine->map = batch->fine_fences.map;
fine->flags = flags;
unsigned pc;
if (flags & CROCUS_FENCE_TOP_OF_PIPE) {
pc = PIPE_CONTROL_WRITE_IMMEDIATE | PIPE_CONTROL_CS_STALL;
} else {
pc = PIPE_CONTROL_WRITE_IMMEDIATE |
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DATA_CACHE_FLUSH;
}
crocus_emit_pipe_control_write(batch, "fence: fine", pc,
crocus_resource_bo(fine->ref.res),
fine->ref.offset,
fine->seqno);
return fine;
}

View File

@ -0,0 +1,109 @@
/*
* Copyright © 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef CROCUS_FINE_FENCE_DOT_H
#define CROCUS_FINE_FENCE_DOT_H
#include <stdbool.h>
#include <stdint.h>
#include "crocus_screen.h"
#include "crocus_resource.h"
/**
* A lightweight sequence number fence.
*
* We emit PIPE_CONTROLs inside a batch (possibly in the middle)
* which update a monotonically increasing, 32-bit counter. We
* can then check if that moment has passed by either:
*
* 1. Checking on the CPU by snooping on the DWord via a coherent map
*
* 2. Blocking on the GPU with MI_SEMAPHORE_WAIT from a second batch
* (relying on mid-batch preemption to switch GPU execution to the
* batch that writes it).
*/
struct crocus_fine_fence {
struct pipe_reference reference;
/** Buffer where the seqno lives */
struct crocus_state_ref ref;
/** Coherent CPU map of the buffer containing the seqno DWord. */
const uint32_t *map;
/**
* A drm_syncobj pointing which will be signaled at the end of the
* batch which writes this seqno. This can be used to block until
* the seqno has definitely passed (but may wait longer than necessary).
*/
struct crocus_syncobj *syncobj;
#define CROCUS_FENCE_BOTTOM_OF_PIPE 0x0 /**< Written by bottom-of-pipe flush */
#define CROCUS_FENCE_TOP_OF_PIPE 0x1 /**< Written by top-of-pipe flush */
#define CROCUS_FENCE_END 0x2 /**< Written at the end of a batch */
/** Information about the type of flush involved (see CROCUS_FENCE_*) */
uint32_t flags;
/**
* Sequence number expected to be written by the flush we inserted
* when creating this fence. The crocus_fine_fence is 'signaled' when *@map
* (written by the flush on the GPU) is greater-than-or-equal to @seqno.
*/
uint32_t seqno;
};
void crocus_fine_fence_init(struct crocus_batch *batch);
struct crocus_fine_fence *crocus_fine_fence_new(struct crocus_batch *batch,
unsigned flags);
void crocus_fine_fence_destroy(struct crocus_screen *screen,
struct crocus_fine_fence *sq);
static inline void
crocus_fine_fence_reference(struct crocus_screen *screen,
struct crocus_fine_fence **dst,
struct crocus_fine_fence *src)
{
if (pipe_reference(&(*dst)->reference, &src->reference))
crocus_fine_fence_destroy(screen, *dst);
*dst = src;
}
/**
* Return true if this seqno has passed.
*
* NULL is considered signaled.
*/
static inline bool
crocus_fine_fence_signaled(const struct crocus_fine_fence *sq)
{
if (sq && !sq->map)
return false;
return !sq || (READ_ONCE(*sq->map) >= sq->seqno);
}
#endif

View File

@ -0,0 +1,576 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* @file crocus_formats.c
*
* Converts Gallium formats (PIPE_FORMAT_*) to hardware ones (ISL_FORMAT_*).
* Provides information about which formats support what features.
*/
#include "util/bitscan.h"
#include "util/macros.h"
#include "util/format/u_format.h"
#include "crocus_resource.h"
#include "crocus_screen.h"
static enum isl_format
crocus_isl_format_for_pipe_format(enum pipe_format pf)
{
static const enum isl_format table[PIPE_FORMAT_COUNT] = {
[0 ... PIPE_FORMAT_COUNT-1] = ISL_FORMAT_UNSUPPORTED,
[PIPE_FORMAT_B8G8R8A8_UNORM] = ISL_FORMAT_B8G8R8A8_UNORM,
[PIPE_FORMAT_B8G8R8X8_UNORM] = ISL_FORMAT_B8G8R8X8_UNORM,
[PIPE_FORMAT_B5G5R5A1_UNORM] = ISL_FORMAT_B5G5R5A1_UNORM,
[PIPE_FORMAT_B4G4R4A4_UNORM] = ISL_FORMAT_B4G4R4A4_UNORM,
[PIPE_FORMAT_B5G6R5_UNORM] = ISL_FORMAT_B5G6R5_UNORM,
[PIPE_FORMAT_R10G10B10A2_UNORM] = ISL_FORMAT_R10G10B10A2_UNORM,
[PIPE_FORMAT_Z16_UNORM] = ISL_FORMAT_R16_UNORM,
[PIPE_FORMAT_Z32_UNORM] = ISL_FORMAT_R32_UNORM,
[PIPE_FORMAT_Z32_FLOAT] = ISL_FORMAT_R32_FLOAT,
/* We translate the combined depth/stencil formats to depth only here */
[PIPE_FORMAT_Z24_UNORM_S8_UINT] = ISL_FORMAT_R24_UNORM_X8_TYPELESS,
[PIPE_FORMAT_Z24X8_UNORM] = ISL_FORMAT_R24_UNORM_X8_TYPELESS,
[PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = ISL_FORMAT_R32_FLOAT,
[PIPE_FORMAT_S8_UINT] = ISL_FORMAT_R8_UINT,
[PIPE_FORMAT_X24S8_UINT] = ISL_FORMAT_R8_UINT,
[PIPE_FORMAT_X32_S8X24_UINT] = ISL_FORMAT_R8_UINT,
[PIPE_FORMAT_R64_FLOAT] = ISL_FORMAT_R64_FLOAT,
[PIPE_FORMAT_R64G64_FLOAT] = ISL_FORMAT_R64G64_FLOAT,
[PIPE_FORMAT_R64G64B64_FLOAT] = ISL_FORMAT_R64G64B64_FLOAT,
[PIPE_FORMAT_R64G64B64A64_FLOAT] = ISL_FORMAT_R64G64B64A64_FLOAT,
[PIPE_FORMAT_R32_FLOAT] = ISL_FORMAT_R32_FLOAT,
[PIPE_FORMAT_R32G32_FLOAT] = ISL_FORMAT_R32G32_FLOAT,
[PIPE_FORMAT_R32G32B32_FLOAT] = ISL_FORMAT_R32G32B32_FLOAT,
[PIPE_FORMAT_R32G32B32A32_FLOAT] = ISL_FORMAT_R32G32B32A32_FLOAT,
[PIPE_FORMAT_R32_UNORM] = ISL_FORMAT_R32_UNORM,
[PIPE_FORMAT_R32G32_UNORM] = ISL_FORMAT_R32G32_UNORM,
[PIPE_FORMAT_R32G32B32_UNORM] = ISL_FORMAT_R32G32B32_UNORM,
[PIPE_FORMAT_R32G32B32A32_UNORM] = ISL_FORMAT_R32G32B32A32_UNORM,
[PIPE_FORMAT_R32_USCALED] = ISL_FORMAT_R32_USCALED,
[PIPE_FORMAT_R32G32_USCALED] = ISL_FORMAT_R32G32_USCALED,
[PIPE_FORMAT_R32G32B32_USCALED] = ISL_FORMAT_R32G32B32_USCALED,
[PIPE_FORMAT_R32G32B32A32_USCALED] = ISL_FORMAT_R32G32B32A32_USCALED,
[PIPE_FORMAT_R32_SNORM] = ISL_FORMAT_R32_SNORM,
[PIPE_FORMAT_R32G32_SNORM] = ISL_FORMAT_R32G32_SNORM,
[PIPE_FORMAT_R32G32B32_SNORM] = ISL_FORMAT_R32G32B32_SNORM,
[PIPE_FORMAT_R32G32B32A32_SNORM] = ISL_FORMAT_R32G32B32A32_SNORM,
[PIPE_FORMAT_R32_SSCALED] = ISL_FORMAT_R32_SSCALED,
[PIPE_FORMAT_R32G32_SSCALED] = ISL_FORMAT_R32G32_SSCALED,
[PIPE_FORMAT_R32G32B32_SSCALED] = ISL_FORMAT_R32G32B32_SSCALED,
[PIPE_FORMAT_R32G32B32A32_SSCALED] = ISL_FORMAT_R32G32B32A32_SSCALED,
[PIPE_FORMAT_R16_UNORM] = ISL_FORMAT_R16_UNORM,
[PIPE_FORMAT_R16G16_UNORM] = ISL_FORMAT_R16G16_UNORM,
[PIPE_FORMAT_R16G16B16_UNORM] = ISL_FORMAT_R16G16B16_UNORM,
[PIPE_FORMAT_R16G16B16A16_UNORM] = ISL_FORMAT_R16G16B16A16_UNORM,
[PIPE_FORMAT_R16_USCALED] = ISL_FORMAT_R16_USCALED,
[PIPE_FORMAT_R16G16_USCALED] = ISL_FORMAT_R16G16_USCALED,
[PIPE_FORMAT_R16G16B16_USCALED] = ISL_FORMAT_R16G16B16_USCALED,
[PIPE_FORMAT_R16G16B16A16_USCALED] = ISL_FORMAT_R16G16B16A16_USCALED,
[PIPE_FORMAT_R16_SNORM] = ISL_FORMAT_R16_SNORM,
[PIPE_FORMAT_R16G16_SNORM] = ISL_FORMAT_R16G16_SNORM,
[PIPE_FORMAT_R16G16B16_SNORM] = ISL_FORMAT_R16G16B16_SNORM,
[PIPE_FORMAT_R16G16B16A16_SNORM] = ISL_FORMAT_R16G16B16A16_SNORM,
[PIPE_FORMAT_R16_SSCALED] = ISL_FORMAT_R16_SSCALED,
[PIPE_FORMAT_R16G16_SSCALED] = ISL_FORMAT_R16G16_SSCALED,
[PIPE_FORMAT_R16G16B16_SSCALED] = ISL_FORMAT_R16G16B16_SSCALED,
[PIPE_FORMAT_R16G16B16A16_SSCALED] = ISL_FORMAT_R16G16B16A16_SSCALED,
[PIPE_FORMAT_R8_UNORM] = ISL_FORMAT_R8_UNORM,
[PIPE_FORMAT_R8G8_UNORM] = ISL_FORMAT_R8G8_UNORM,
[PIPE_FORMAT_R8G8B8_UNORM] = ISL_FORMAT_R8G8B8_UNORM,
[PIPE_FORMAT_R8G8B8A8_UNORM] = ISL_FORMAT_R8G8B8A8_UNORM,
[PIPE_FORMAT_R8_USCALED] = ISL_FORMAT_R8_USCALED,
[PIPE_FORMAT_R8G8_USCALED] = ISL_FORMAT_R8G8_USCALED,
[PIPE_FORMAT_R8G8B8_USCALED] = ISL_FORMAT_R8G8B8_USCALED,
[PIPE_FORMAT_R8G8B8A8_USCALED] = ISL_FORMAT_R8G8B8A8_USCALED,
[PIPE_FORMAT_R8_SNORM] = ISL_FORMAT_R8_SNORM,
[PIPE_FORMAT_R8G8_SNORM] = ISL_FORMAT_R8G8_SNORM,
[PIPE_FORMAT_R8G8B8_SNORM] = ISL_FORMAT_R8G8B8_SNORM,
[PIPE_FORMAT_R8G8B8A8_SNORM] = ISL_FORMAT_R8G8B8A8_SNORM,
[PIPE_FORMAT_R8_SSCALED] = ISL_FORMAT_R8_SSCALED,
[PIPE_FORMAT_R8G8_SSCALED] = ISL_FORMAT_R8G8_SSCALED,
[PIPE_FORMAT_R8G8B8_SSCALED] = ISL_FORMAT_R8G8B8_SSCALED,
[PIPE_FORMAT_R8G8B8A8_SSCALED] = ISL_FORMAT_R8G8B8A8_SSCALED,
[PIPE_FORMAT_R32_FIXED] = ISL_FORMAT_R32_SFIXED,
[PIPE_FORMAT_R32G32_FIXED] = ISL_FORMAT_R32G32_SFIXED,
[PIPE_FORMAT_R32G32B32_FIXED] = ISL_FORMAT_R32G32B32_SFIXED,
[PIPE_FORMAT_R32G32B32A32_FIXED] = ISL_FORMAT_R32G32B32A32_SFIXED,
[PIPE_FORMAT_R16_FLOAT] = ISL_FORMAT_R16_FLOAT,
[PIPE_FORMAT_R16G16_FLOAT] = ISL_FORMAT_R16G16_FLOAT,
[PIPE_FORMAT_R16G16B16_FLOAT] = ISL_FORMAT_R16G16B16_FLOAT,
[PIPE_FORMAT_R16G16B16A16_FLOAT] = ISL_FORMAT_R16G16B16A16_FLOAT,
[PIPE_FORMAT_R8G8B8_SRGB] = ISL_FORMAT_R8G8B8_UNORM_SRGB,
[PIPE_FORMAT_B8G8R8A8_SRGB] = ISL_FORMAT_B8G8R8A8_UNORM_SRGB,
[PIPE_FORMAT_B8G8R8X8_SRGB] = ISL_FORMAT_B8G8R8X8_UNORM_SRGB,
[PIPE_FORMAT_R8G8B8A8_SRGB] = ISL_FORMAT_R8G8B8A8_UNORM_SRGB,
[PIPE_FORMAT_DXT1_RGB] = ISL_FORMAT_BC1_UNORM,
[PIPE_FORMAT_DXT1_RGBA] = ISL_FORMAT_BC1_UNORM,
[PIPE_FORMAT_DXT3_RGBA] = ISL_FORMAT_BC2_UNORM,
[PIPE_FORMAT_DXT5_RGBA] = ISL_FORMAT_BC3_UNORM,
[PIPE_FORMAT_DXT1_SRGB] = ISL_FORMAT_BC1_UNORM_SRGB,
[PIPE_FORMAT_DXT1_SRGBA] = ISL_FORMAT_BC1_UNORM_SRGB,
[PIPE_FORMAT_DXT3_SRGBA] = ISL_FORMAT_BC2_UNORM_SRGB,
[PIPE_FORMAT_DXT5_SRGBA] = ISL_FORMAT_BC3_UNORM_SRGB,
[PIPE_FORMAT_RGTC1_UNORM] = ISL_FORMAT_BC4_UNORM,
[PIPE_FORMAT_RGTC1_SNORM] = ISL_FORMAT_BC4_SNORM,
[PIPE_FORMAT_RGTC2_UNORM] = ISL_FORMAT_BC5_UNORM,
[PIPE_FORMAT_RGTC2_SNORM] = ISL_FORMAT_BC5_SNORM,
[PIPE_FORMAT_R10G10B10A2_USCALED] = ISL_FORMAT_R10G10B10A2_USCALED,
[PIPE_FORMAT_R11G11B10_FLOAT] = ISL_FORMAT_R11G11B10_FLOAT,
[PIPE_FORMAT_R9G9B9E5_FLOAT] = ISL_FORMAT_R9G9B9E5_SHAREDEXP,
[PIPE_FORMAT_R1_UNORM] = ISL_FORMAT_R1_UNORM,
[PIPE_FORMAT_R10G10B10X2_USCALED] = ISL_FORMAT_R10G10B10X2_USCALED,
[PIPE_FORMAT_B10G10R10A2_UNORM] = ISL_FORMAT_B10G10R10A2_UNORM,
[PIPE_FORMAT_R8G8B8X8_UNORM] = ISL_FORMAT_R8G8B8X8_UNORM,
[PIPE_FORMAT_I8_UNORM] = ISL_FORMAT_R8_UNORM,
[PIPE_FORMAT_I16_UNORM] = ISL_FORMAT_R16_UNORM,
[PIPE_FORMAT_I8_SNORM] = ISL_FORMAT_R8_SNORM,
[PIPE_FORMAT_I16_SNORM] = ISL_FORMAT_R16_SNORM,
[PIPE_FORMAT_I16_FLOAT] = ISL_FORMAT_R16_FLOAT,
[PIPE_FORMAT_I32_FLOAT] = ISL_FORMAT_R32_FLOAT,
[PIPE_FORMAT_L8_UINT] = ISL_FORMAT_L8_UINT,
[PIPE_FORMAT_L8_UNORM] = ISL_FORMAT_L8_UNORM,
[PIPE_FORMAT_L8_SNORM] = ISL_FORMAT_R8_SNORM,
[PIPE_FORMAT_L8_SINT] = ISL_FORMAT_L8_SINT,
[PIPE_FORMAT_L16_UNORM] = ISL_FORMAT_L16_UNORM,
[PIPE_FORMAT_L16_SNORM] = ISL_FORMAT_R16_SNORM,
[PIPE_FORMAT_L16_FLOAT] = ISL_FORMAT_L16_FLOAT,
[PIPE_FORMAT_L32_FLOAT] = ISL_FORMAT_L32_FLOAT,
[PIPE_FORMAT_A8_UNORM] = ISL_FORMAT_A8_UNORM,
[PIPE_FORMAT_A16_UNORM] = ISL_FORMAT_A16_UNORM,
[PIPE_FORMAT_A16_FLOAT] = ISL_FORMAT_A16_FLOAT,
[PIPE_FORMAT_A32_FLOAT] = ISL_FORMAT_A32_FLOAT,
[PIPE_FORMAT_L8A8_UNORM] = ISL_FORMAT_L8A8_UNORM,
[PIPE_FORMAT_L16A16_UNORM] = ISL_FORMAT_L16A16_UNORM,
[PIPE_FORMAT_L16A16_FLOAT] = ISL_FORMAT_L16A16_FLOAT,
[PIPE_FORMAT_L32A32_FLOAT] = ISL_FORMAT_L32A32_FLOAT,
/* Sadly, we have to use luminance[-alpha] formats for sRGB decoding. */
[PIPE_FORMAT_R8_SRGB] = ISL_FORMAT_L8_UNORM_SRGB,
[PIPE_FORMAT_L8_SRGB] = ISL_FORMAT_L8_UNORM_SRGB,
[PIPE_FORMAT_L8A8_SRGB] = ISL_FORMAT_L8A8_UNORM_SRGB,
[PIPE_FORMAT_R10G10B10A2_SSCALED] = ISL_FORMAT_R10G10B10A2_SSCALED,
[PIPE_FORMAT_R10G10B10A2_SNORM] = ISL_FORMAT_R10G10B10A2_SNORM,
[PIPE_FORMAT_B10G10R10A2_USCALED] = ISL_FORMAT_B10G10R10A2_USCALED,
[PIPE_FORMAT_B10G10R10A2_SSCALED] = ISL_FORMAT_B10G10R10A2_SSCALED,
[PIPE_FORMAT_B10G10R10A2_SNORM] = ISL_FORMAT_B10G10R10A2_SNORM,
[PIPE_FORMAT_R8_UINT] = ISL_FORMAT_R8_UINT,
[PIPE_FORMAT_R8G8_UINT] = ISL_FORMAT_R8G8_UINT,
[PIPE_FORMAT_R8G8B8_UINT] = ISL_FORMAT_R8G8B8_UINT,
[PIPE_FORMAT_R8G8B8A8_UINT] = ISL_FORMAT_R8G8B8A8_UINT,
[PIPE_FORMAT_R8_SINT] = ISL_FORMAT_R8_SINT,
[PIPE_FORMAT_R8G8_SINT] = ISL_FORMAT_R8G8_SINT,
[PIPE_FORMAT_R8G8B8_SINT] = ISL_FORMAT_R8G8B8_SINT,
[PIPE_FORMAT_R8G8B8A8_SINT] = ISL_FORMAT_R8G8B8A8_SINT,
[PIPE_FORMAT_R16_UINT] = ISL_FORMAT_R16_UINT,
[PIPE_FORMAT_R16G16_UINT] = ISL_FORMAT_R16G16_UINT,
[PIPE_FORMAT_R16G16B16_UINT] = ISL_FORMAT_R16G16B16_UINT,
[PIPE_FORMAT_R16G16B16A16_UINT] = ISL_FORMAT_R16G16B16A16_UINT,
[PIPE_FORMAT_R16_SINT] = ISL_FORMAT_R16_SINT,
[PIPE_FORMAT_R16G16_SINT] = ISL_FORMAT_R16G16_SINT,
[PIPE_FORMAT_R16G16B16_SINT] = ISL_FORMAT_R16G16B16_SINT,
[PIPE_FORMAT_R16G16B16A16_SINT] = ISL_FORMAT_R16G16B16A16_SINT,
[PIPE_FORMAT_R32_UINT] = ISL_FORMAT_R32_UINT,
[PIPE_FORMAT_R32G32_UINT] = ISL_FORMAT_R32G32_UINT,
[PIPE_FORMAT_R32G32B32_UINT] = ISL_FORMAT_R32G32B32_UINT,
[PIPE_FORMAT_R32G32B32A32_UINT] = ISL_FORMAT_R32G32B32A32_UINT,
[PIPE_FORMAT_R32_SINT] = ISL_FORMAT_R32_SINT,
[PIPE_FORMAT_R32G32_SINT] = ISL_FORMAT_R32G32_SINT,
[PIPE_FORMAT_R32G32B32_SINT] = ISL_FORMAT_R32G32B32_SINT,
[PIPE_FORMAT_R32G32B32A32_SINT] = ISL_FORMAT_R32G32B32A32_SINT,
[PIPE_FORMAT_B10G10R10A2_UINT] = ISL_FORMAT_B10G10R10A2_UINT,
[PIPE_FORMAT_ETC1_RGB8] = ISL_FORMAT_ETC1_RGB8,
[PIPE_FORMAT_R8G8B8X8_SRGB] = ISL_FORMAT_R8G8B8X8_UNORM_SRGB,
[PIPE_FORMAT_B10G10R10X2_UNORM] = ISL_FORMAT_B10G10R10X2_UNORM,
[PIPE_FORMAT_R16G16B16X16_UNORM] = ISL_FORMAT_R16G16B16X16_UNORM,
[PIPE_FORMAT_R16G16B16X16_FLOAT] = ISL_FORMAT_R16G16B16X16_FLOAT,
[PIPE_FORMAT_R32G32B32X32_FLOAT] = ISL_FORMAT_R32G32B32X32_FLOAT,
[PIPE_FORMAT_R10G10B10A2_UINT] = ISL_FORMAT_R10G10B10A2_UINT,
[PIPE_FORMAT_B5G6R5_SRGB] = ISL_FORMAT_B5G6R5_UNORM_SRGB,
[PIPE_FORMAT_BPTC_RGBA_UNORM] = ISL_FORMAT_BC7_UNORM,
[PIPE_FORMAT_BPTC_SRGBA] = ISL_FORMAT_BC7_UNORM_SRGB,
[PIPE_FORMAT_BPTC_RGB_FLOAT] = ISL_FORMAT_BC6H_SF16,
[PIPE_FORMAT_BPTC_RGB_UFLOAT] = ISL_FORMAT_BC6H_UF16,
[PIPE_FORMAT_ETC2_RGB8] = ISL_FORMAT_ETC2_RGB8,
[PIPE_FORMAT_ETC2_SRGB8] = ISL_FORMAT_ETC2_SRGB8,
[PIPE_FORMAT_ETC2_RGB8A1] = ISL_FORMAT_ETC2_RGB8_PTA,
[PIPE_FORMAT_ETC2_SRGB8A1] = ISL_FORMAT_ETC2_SRGB8_PTA,
[PIPE_FORMAT_ETC2_RGBA8] = ISL_FORMAT_ETC2_EAC_RGBA8,
[PIPE_FORMAT_ETC2_SRGBA8] = ISL_FORMAT_ETC2_EAC_SRGB8_A8,
[PIPE_FORMAT_ETC2_R11_UNORM] = ISL_FORMAT_EAC_R11,
[PIPE_FORMAT_ETC2_R11_SNORM] = ISL_FORMAT_EAC_SIGNED_R11,
[PIPE_FORMAT_ETC2_RG11_UNORM] = ISL_FORMAT_EAC_RG11,
[PIPE_FORMAT_ETC2_RG11_SNORM] = ISL_FORMAT_EAC_SIGNED_RG11,
[PIPE_FORMAT_FXT1_RGB] = ISL_FORMAT_FXT1,
[PIPE_FORMAT_FXT1_RGBA] = ISL_FORMAT_FXT1,
[PIPE_FORMAT_ASTC_4x4] = ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16,
[PIPE_FORMAT_ASTC_5x4] = ISL_FORMAT_ASTC_LDR_2D_5X4_FLT16,
[PIPE_FORMAT_ASTC_5x5] = ISL_FORMAT_ASTC_LDR_2D_5X5_FLT16,
[PIPE_FORMAT_ASTC_6x5] = ISL_FORMAT_ASTC_LDR_2D_6X5_FLT16,
[PIPE_FORMAT_ASTC_6x6] = ISL_FORMAT_ASTC_LDR_2D_6X6_FLT16,
[PIPE_FORMAT_ASTC_8x5] = ISL_FORMAT_ASTC_LDR_2D_8X5_FLT16,
[PIPE_FORMAT_ASTC_8x6] = ISL_FORMAT_ASTC_LDR_2D_8X6_FLT16,
[PIPE_FORMAT_ASTC_8x8] = ISL_FORMAT_ASTC_LDR_2D_8X8_FLT16,
[PIPE_FORMAT_ASTC_10x5] = ISL_FORMAT_ASTC_LDR_2D_10X5_FLT16,
[PIPE_FORMAT_ASTC_10x6] = ISL_FORMAT_ASTC_LDR_2D_10X6_FLT16,
[PIPE_FORMAT_ASTC_10x8] = ISL_FORMAT_ASTC_LDR_2D_10X8_FLT16,
[PIPE_FORMAT_ASTC_10x10] = ISL_FORMAT_ASTC_LDR_2D_10X10_FLT16,
[PIPE_FORMAT_ASTC_12x10] = ISL_FORMAT_ASTC_LDR_2D_12X10_FLT16,
[PIPE_FORMAT_ASTC_12x12] = ISL_FORMAT_ASTC_LDR_2D_12X12_FLT16,
[PIPE_FORMAT_ASTC_4x4_SRGB] = ISL_FORMAT_ASTC_LDR_2D_4X4_U8SRGB,
[PIPE_FORMAT_ASTC_5x4_SRGB] = ISL_FORMAT_ASTC_LDR_2D_5X4_U8SRGB,
[PIPE_FORMAT_ASTC_5x5_SRGB] = ISL_FORMAT_ASTC_LDR_2D_5X5_U8SRGB,
[PIPE_FORMAT_ASTC_6x5_SRGB] = ISL_FORMAT_ASTC_LDR_2D_6X5_U8SRGB,
[PIPE_FORMAT_ASTC_6x6_SRGB] = ISL_FORMAT_ASTC_LDR_2D_6X6_U8SRGB,
[PIPE_FORMAT_ASTC_8x5_SRGB] = ISL_FORMAT_ASTC_LDR_2D_8X5_U8SRGB,
[PIPE_FORMAT_ASTC_8x6_SRGB] = ISL_FORMAT_ASTC_LDR_2D_8X6_U8SRGB,
[PIPE_FORMAT_ASTC_8x8_SRGB] = ISL_FORMAT_ASTC_LDR_2D_8X8_U8SRGB,
[PIPE_FORMAT_ASTC_10x5_SRGB] = ISL_FORMAT_ASTC_LDR_2D_10X5_U8SRGB,
[PIPE_FORMAT_ASTC_10x6_SRGB] = ISL_FORMAT_ASTC_LDR_2D_10X6_U8SRGB,
[PIPE_FORMAT_ASTC_10x8_SRGB] = ISL_FORMAT_ASTC_LDR_2D_10X8_U8SRGB,
[PIPE_FORMAT_ASTC_10x10_SRGB] = ISL_FORMAT_ASTC_LDR_2D_10X10_U8SRGB,
[PIPE_FORMAT_ASTC_12x10_SRGB] = ISL_FORMAT_ASTC_LDR_2D_12X10_U8SRGB,
[PIPE_FORMAT_ASTC_12x12_SRGB] = ISL_FORMAT_ASTC_LDR_2D_12X12_U8SRGB,
[PIPE_FORMAT_A1B5G5R5_UNORM] = ISL_FORMAT_A1B5G5R5_UNORM,
/* We support these so that we know the API expects no alpha channel.
* Otherwise, the state tracker would just give us a format with alpha
* and we wouldn't know to override the swizzle to 1.
*/
[PIPE_FORMAT_R16G16B16X16_UINT] = ISL_FORMAT_R16G16B16A16_UINT,
[PIPE_FORMAT_R16G16B16X16_SINT] = ISL_FORMAT_R16G16B16A16_SINT,
[PIPE_FORMAT_R32G32B32X32_UINT] = ISL_FORMAT_R32G32B32A32_UINT,
[PIPE_FORMAT_R32G32B32X32_SINT] = ISL_FORMAT_R32G32B32A32_SINT,
[PIPE_FORMAT_R10G10B10X2_SNORM] = ISL_FORMAT_R10G10B10A2_SNORM,
};
assert(pf < PIPE_FORMAT_COUNT);
return table[pf];
}
static enum isl_format
get_render_format(enum pipe_format pformat, enum isl_format def_format)
{
switch (pformat) {
case PIPE_FORMAT_A16_UNORM: return ISL_FORMAT_R16_UNORM;
case PIPE_FORMAT_A16_FLOAT: return ISL_FORMAT_R16_FLOAT;
case PIPE_FORMAT_A32_FLOAT: return ISL_FORMAT_R32_FLOAT;
case PIPE_FORMAT_I8_UNORM: return ISL_FORMAT_R8_UNORM;
case PIPE_FORMAT_I16_UNORM: return ISL_FORMAT_R16_UNORM;
case PIPE_FORMAT_I16_FLOAT: return ISL_FORMAT_R16_FLOAT;
case PIPE_FORMAT_I32_FLOAT: return ISL_FORMAT_R32_FLOAT;
case PIPE_FORMAT_L8_UNORM: return ISL_FORMAT_R8_UNORM;
case PIPE_FORMAT_L8_UINT: return ISL_FORMAT_R8_UINT;
case PIPE_FORMAT_L8_SINT: return ISL_FORMAT_R8_SINT;
case PIPE_FORMAT_L16_UNORM: return ISL_FORMAT_R16_UNORM;
case PIPE_FORMAT_L16_FLOAT: return ISL_FORMAT_R16_FLOAT;
case PIPE_FORMAT_L32_FLOAT: return ISL_FORMAT_R32_FLOAT;
case PIPE_FORMAT_L8A8_UNORM: return ISL_FORMAT_R8G8_UNORM;
case PIPE_FORMAT_L16A16_UNORM: return ISL_FORMAT_R16G16_UNORM;
case PIPE_FORMAT_L16A16_FLOAT: return ISL_FORMAT_R16G16_FLOAT;
case PIPE_FORMAT_L32A32_FLOAT: return ISL_FORMAT_R32G32_FLOAT;
default:
return def_format;
}
}
struct crocus_format_info
crocus_format_for_usage(const struct intel_device_info *devinfo,
enum pipe_format pformat,
isl_surf_usage_flags_t usage)
{
struct crocus_format_info info = { crocus_isl_format_for_pipe_format(pformat),
{ PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W } };
if (info.fmt == ISL_FORMAT_UNSUPPORTED)
return info;
if (pformat == PIPE_FORMAT_A8_UNORM) {
info.fmt = ISL_FORMAT_A8_UNORM;
}
if (usage & ISL_SURF_USAGE_RENDER_TARGET_BIT)
info.fmt = get_render_format(pformat, info.fmt);
if (devinfo->ver < 6) {
if (pformat == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
info.fmt = ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS;
if (pformat == PIPE_FORMAT_X32_S8X24_UINT)
info.fmt = ISL_FORMAT_X32_TYPELESS_G8X24_UINT;
if (pformat == PIPE_FORMAT_X24S8_UINT)
info.fmt = ISL_FORMAT_X24_TYPELESS_G8_UINT;
}
const struct isl_format_layout *fmtl = isl_format_get_layout(info.fmt);
if (util_format_is_snorm(pformat)) {
if (util_format_is_intensity(pformat)) {
info.swizzles[0] = PIPE_SWIZZLE_X;
info.swizzles[1] = PIPE_SWIZZLE_X;
info.swizzles[2] = PIPE_SWIZZLE_X;
info.swizzles[3] = PIPE_SWIZZLE_X;
} else if (util_format_is_luminance(pformat)) {
info.swizzles[0] = PIPE_SWIZZLE_X;
info.swizzles[1] = PIPE_SWIZZLE_X;
info.swizzles[2] = PIPE_SWIZZLE_X;
info.swizzles[3] = PIPE_SWIZZLE_1;
} else if (util_format_is_luminance_alpha(pformat)) {
info.swizzles[0] = PIPE_SWIZZLE_X;
info.swizzles[1] = PIPE_SWIZZLE_X;
info.swizzles[2] = PIPE_SWIZZLE_X;
info.swizzles[3] = PIPE_SWIZZLE_Y;
} else if (util_format_is_alpha(pformat)) {
info.swizzles[0] = PIPE_SWIZZLE_0;
info.swizzles[1] = PIPE_SWIZZLE_0;
info.swizzles[2] = PIPE_SWIZZLE_0;
info.swizzles[3] = PIPE_SWIZZLE_X;
}
}
/* When faking RGBX pipe formats with RGBA ISL formats, override alpha. */
if (!util_format_has_alpha(pformat) && fmtl->channels.a.type != ISL_VOID) {
info.swizzles[0] = PIPE_SWIZZLE_X;
info.swizzles[1] = PIPE_SWIZZLE_Y;
info.swizzles[2] = PIPE_SWIZZLE_Z;
info.swizzles[3] = PIPE_SWIZZLE_1;
}
/* We choose RGBA over RGBX for rendering the hardware doesn't support
* rendering to RGBX. However, when this internal override is used on Gen9+,
* fast clears don't work correctly.
*
* i965 fixes this by pretending to not support RGBX formats, and the higher
* layers of Mesa pick the RGBA format instead. Gallium doesn't work that
* way, and might choose a different format, like BGRX instead of RGBX,
* which will also cause problems when sampling from a surface fast cleared
* as RGBX. So we always choose RGBA instead of RGBX explicitly
* here.
*/
if (isl_format_is_rgbx(info.fmt) &&
!isl_format_supports_rendering(devinfo, info.fmt) &&
(usage & ISL_SURF_USAGE_RENDER_TARGET_BIT)) {
info.fmt = isl_format_rgbx_to_rgba(info.fmt);
info.swizzles[0] = PIPE_SWIZZLE_X;
info.swizzles[1] = PIPE_SWIZZLE_Y;
info.swizzles[2] = PIPE_SWIZZLE_Z;
info.swizzles[3] = PIPE_SWIZZLE_1;
}
return info;
}
/**
* The pscreen->is_format_supported() driver hook.
*
* Returns true if the given format is supported for the given usage
* (PIPE_BIND_*) and sample count.
*/
bool
crocus_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format pformat,
enum pipe_texture_target target,
unsigned sample_count, unsigned storage_sample_count,
unsigned usage)
{
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
const struct intel_device_info *devinfo = &screen->devinfo;
if (!util_is_power_of_two_or_zero(sample_count))
return false;
if (devinfo->ver >= 7) {
if (sample_count > 8 || sample_count == 2)
return false;
} else if (devinfo->ver == 6) {
if (sample_count > 4 || sample_count == 2)
return false;
} else if (sample_count > 1) {
return false;
}
if (pformat == PIPE_FORMAT_NONE)
return true;
enum isl_format format = crocus_isl_format_for_pipe_format(pformat);
if (format == ISL_FORMAT_UNSUPPORTED)
return false;
/* no stencil texturing prior to haswell */
if (!devinfo->is_haswell) {
if (pformat == PIPE_FORMAT_S8_UINT ||
pformat == PIPE_FORMAT_X24S8_UINT ||
pformat == PIPE_FORMAT_S8X24_UINT ||
pformat == PIPE_FORMAT_X32_S8X24_UINT)
return FALSE;
}
const struct isl_format_layout *fmtl = isl_format_get_layout(format);
const bool is_integer = isl_format_has_int_channel(format);
bool supported = true;
if (sample_count > 1)
supported &= isl_format_supports_multisampling(devinfo, format);
if (usage & PIPE_BIND_DEPTH_STENCIL) {
supported &= format == ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS ||
format == ISL_FORMAT_R32_FLOAT ||
format == ISL_FORMAT_R24_UNORM_X8_TYPELESS ||
format == ISL_FORMAT_R16_UNORM ||
format == ISL_FORMAT_R8_UINT;
}
if (usage & PIPE_BIND_RENDER_TARGET) {
/* Alpha and luminance-alpha formats other than A8_UNORM are not
* renderable.
*
* For BLORP, we can apply the swizzle in the shader. But for
* general rendering, this would mean recompiling the shader, which
* we'd like to avoid doing. So we mark these formats non-renderable.
*
* We do support A8_UNORM as it's required and is renderable.
*/
if (pformat != PIPE_FORMAT_A8_UNORM &&
(util_format_is_alpha(pformat) ||
util_format_is_luminance_alpha(pformat)))
supported = false;
enum isl_format rt_format = format;
if (isl_format_is_rgbx(format) &&
!isl_format_supports_rendering(devinfo, format))
rt_format = isl_format_rgbx_to_rgba(format);
supported &= isl_format_supports_rendering(devinfo, rt_format);
if (!is_integer)
supported &= isl_format_supports_alpha_blending(devinfo, rt_format);
}
if (usage & PIPE_BIND_SHADER_IMAGE) {
/* Dataport doesn't support compression, and we can't resolve an MCS
* compressed surface. (Buffer images may have sample count of 0.)
*/
supported &= sample_count == 0;
supported &= isl_format_supports_typed_writes(devinfo, format);
supported &= isl_has_matching_typed_storage_image_format(devinfo, format);
}
if (usage & PIPE_BIND_SAMPLER_VIEW) {
supported &= isl_format_supports_sampling(devinfo, format);
bool ignore_filtering = false;
if (is_integer)
ignore_filtering = true;
/* I said them, but I lied them. */
if (devinfo->ver < 5 && (format == ISL_FORMAT_R32G32B32A32_FLOAT ||
format == ISL_FORMAT_R24_UNORM_X8_TYPELESS ||
format == ISL_FORMAT_R32_FLOAT ||
format == ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS))
ignore_filtering = true;
if (!ignore_filtering)
supported &= isl_format_supports_filtering(devinfo, format);
/* Don't advertise 3-component RGB formats for non-buffer textures.
* This ensures that they are renderable from an API perspective since
* the state tracker will fall back to RGBA or RGBX, which are
* renderable. We want to render internally for copies and blits,
* even if the application doesn't.
*
* Buffer textures don't need to be renderable, so we support real RGB.
* This is useful for PBO upload, and 32-bit RGB support is mandatory.
*/
if (target != PIPE_BUFFER)
supported &= fmtl->bpb != 24 && fmtl->bpb != 48 && fmtl->bpb != 96;
}
if (usage & PIPE_BIND_VERTEX_BUFFER) {
supported &= isl_format_supports_vertex_fetch(devinfo, format);
if (!devinfo->is_haswell) {
/* W/A: Pre-Haswell, the hardware doesn't really support the formats
* we'd like to use here, so upload everything as UINT and fix it in
* the shader
*/
if (format == ISL_FORMAT_R10G10B10A2_UNORM ||
format == ISL_FORMAT_B10G10R10A2_UNORM ||
format == ISL_FORMAT_R10G10B10A2_SNORM ||
format == ISL_FORMAT_B10G10R10A2_SNORM ||
format == ISL_FORMAT_R10G10B10A2_USCALED ||
format == ISL_FORMAT_B10G10R10A2_USCALED ||
format == ISL_FORMAT_R10G10B10A2_SSCALED ||
format == ISL_FORMAT_B10G10R10A2_SSCALED)
supported = true;
if (format == ISL_FORMAT_R8G8B8_SINT ||
format == ISL_FORMAT_R8G8B8_UINT ||
format == ISL_FORMAT_R16G16B16_SINT ||
format == ISL_FORMAT_R16G16B16_UINT)
supported = true;
}
}
if (usage & PIPE_BIND_INDEX_BUFFER) {
supported &= format == ISL_FORMAT_R8_UINT ||
format == ISL_FORMAT_R16_UINT ||
format == ISL_FORMAT_R32_UINT;
}
return supported;
}

View File

@ -0,0 +1,164 @@
/*
* Copyright © 2019 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* Macro and function definitions needed in order to use genxml.
*
* This should only be included in sources compiled per-generation.
*/
#include "crocus_batch.h"
#include "genxml/gen_macros.h"
#define __gen_address_type struct crocus_address
#define __gen_user_data struct crocus_batch
#define __gen_combine_address crocus_combine_address
static inline void *
__gen_get_batch_dwords(struct crocus_batch *batch, unsigned dwords)
{
return crocus_get_command_space(batch, dwords * sizeof(uint32_t));
}
static inline struct crocus_address
__gen_address_offset(struct crocus_address addr, uint64_t offset)
{
addr.offset += offset;
return addr;
}
static uint64_t
__gen_combine_address(struct crocus_batch *batch, void *location,
struct crocus_address addr, uint32_t delta)
{
uint32_t offset = (char *)location - (char *)batch->command.map;
if (addr.bo == NULL) {
return addr.offset + delta;
} else {
if (GFX_VER < 6 && crocus_ptr_in_state_buffer(batch, location)) {
offset = (char *) location - (char *) batch->state.map;
return crocus_state_reloc(batch, offset, addr.bo,
addr.offset + delta,
addr.reloc_flags);
}
assert(!crocus_ptr_in_state_buffer(batch, location));
offset = (char *) location - (char *) batch->command.map;
return crocus_command_reloc(batch, offset, addr.bo,
addr.offset + delta,
addr.reloc_flags);
}
}
#define __gen_address_type struct crocus_address
#define __gen_user_data struct crocus_batch
#define __genxml_cmd_length(cmd) cmd ## _length
#define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
#define __genxml_cmd_header(cmd) cmd ## _header
#define __genxml_cmd_pack(cmd) cmd ## _pack
#define __genxml_reg_num(cmd) cmd ## _num
#include "genxml/genX_pack.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_bits.h"
/* CS_GPR(15) is reserved for combining conditional rendering predicates
* with GL_ARB_indirect_parameters draw number predicates.
*/
#define MI_BUILDER_NUM_ALLOC_GPRS 15
#include "common/mi_builder.h"
#define _crocus_pack_command(batch, cmd, dst, name) \
for (struct cmd name = { __genxml_cmd_header(cmd) }, \
*_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
({ __genxml_cmd_pack(cmd)(batch, (void *)_dst, &name); \
_dst = NULL; \
}))
#define crocus_pack_command(cmd, dst, name) \
_crocus_pack_command(NULL, cmd, dst, name)
#define _crocus_pack_state(batch, cmd, dst, name) \
for (struct cmd name = {}, \
*_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
__genxml_cmd_pack(cmd)(batch, (void *)_dst, &name), \
_dst = NULL)
#define crocus_pack_state(cmd, dst, name) \
_crocus_pack_state(NULL, cmd, dst, name)
#define crocus_emit_cmd(batch, cmd, name) \
_crocus_pack_command(batch, cmd, __gen_get_batch_dwords(batch, __genxml_cmd_length(cmd)), name)
#define crocus_emit_merge(batch, dwords0, dwords1, num_dwords) \
do { \
uint32_t *dw = __gen_get_batch_dwords(batch, num_dwords); \
for (uint32_t i = 0; i < num_dwords; i++) \
dw[i] = (dwords0)[i] | (dwords1)[i]; \
VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dwords)); \
} while (0)
#define crocus_emit_reg(batch, reg, name) \
for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL; \
({ \
uint32_t _dw[__genxml_cmd_length(reg)]; \
__genxml_cmd_pack(reg)(NULL, _dw, &name); \
for (unsigned i = 0; i < __genxml_cmd_length(reg); i++) { \
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
lri.RegisterOffset = __genxml_reg_num(reg); \
lri.DataDWord = _dw[i]; \
} \
} \
_cont = NULL; \
}))
/**
* crocus_address constructor helpers:
*
* When using these to construct a CSO, pass NULL for \p bo, and manually
* pin the BO later. Otherwise, genxml's address handling will add the
* BO to the current batch's validation list at CSO creation time, rather
* than at draw time as desired.
*/
UNUSED static struct crocus_address
ro_bo(struct crocus_bo *bo, uint64_t offset)
{
return (struct crocus_address) { .bo = bo, .offset = offset, .reloc_flags = RELOC_32BIT };
}
UNUSED static struct crocus_address
rw_bo(struct crocus_bo *bo, uint64_t offset)
{
return (struct crocus_address) { .bo = bo, .offset = offset, .reloc_flags = RELOC_32BIT | RELOC_WRITE };
}
UNUSED static struct crocus_address
ggtt_bo(struct crocus_bo *bo, uint64_t offset)
{
return (struct crocus_address) { .bo = bo, .offset = offset, .reloc_flags = RELOC_WRITE | RELOC_NEEDS_GGTT };
}

View File

@ -0,0 +1,56 @@
/*
* Copyright © 2019 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/* GenX-specific function declarations.
*
* Don't include this directly, it will be included by crocus_context.h.
*
* NOTE: This header can be included multiple times, from the same file.
*/
/* crocus_state.c */
void genX(init_state)(struct crocus_context *ice);
void genX(init_screen_state)(struct crocus_screen *screen);
void genX(upload_urb)(struct crocus_batch *batch,
unsigned vs_size,
bool gs_present,
unsigned gs_size);
void genX(emit_hashing_mode)(struct crocus_context *ice,
struct crocus_batch *batch,
unsigned width, unsigned height,
unsigned scale);
/* crocus_blorp.c */
void genX(init_blorp)(struct crocus_context *ice);
/* crocus_query.c */
void genX(init_query)(struct crocus_context *ice);
void genX(init_screen_query)(struct crocus_screen *screen);
void genX(math_add32_gpr0)(struct crocus_context *ice,
struct crocus_batch *batch,
uint32_t x);
void genX(math_div32_gpr0)(struct crocus_context *ice,
struct crocus_batch *batch,
uint32_t D);
/* crocus_blt.c */
void genX(init_blt)(struct crocus_screen *screen);

View File

@ -0,0 +1,484 @@
/*
* Copyright © 2019 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "crocus_monitor.h"
#include <xf86drm.h>
#include "crocus_screen.h"
#include "crocus_context.h"
#include "perf/intel_perf.h"
#include "perf/intel_perf_query.h"
#include "perf/intel_perf_regs.h"
struct crocus_monitor_object {
int num_active_counters;
int *active_counters;
size_t result_size;
unsigned char *result_buffer;
struct intel_perf_query_object *query;
};
int
crocus_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
struct pipe_driver_query_info *info)
{
const struct crocus_screen *screen = (struct crocus_screen *)pscreen;
assert(screen->monitor_cfg);
if (!screen->monitor_cfg)
return 0;
const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
if (!info) {
/* return the number of metrics */
return monitor_cfg->num_counters;
}
const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
const int group = monitor_cfg->counters[index].group;
const int counter_index = monitor_cfg->counters[index].counter;
struct intel_perf_query_counter *counter =
&perf_cfg->queries[group].counters[counter_index];
info->group_id = group;
info->name = counter->name;
info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
if (counter->type == INTEL_PERF_COUNTER_TYPE_THROUGHPUT)
info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
else
info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
switch (counter->data_type) {
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
info->max_value.u32 = 0;
break;
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->max_value.u64 = 0;
break;
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
info->max_value.u64 = -1;
break;
default:
assert(false);
break;
}
/* indicates that this is an OA query, not a pipeline statistics query */
info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
return 1;
}
typedef void (*bo_unreference_t)(void *);
typedef void *(*bo_map_t)(void *, void *, unsigned flags);
typedef void (*bo_unmap_t)(void *);
typedef void (*emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
typedef void (*emit_mi_flush_t)(void *);
typedef void (*capture_frequency_stat_register_t)(void *, void *,
uint32_t );
typedef void (*store_register_mem64_t)(void *ctx, void *bo,
uint32_t reg, uint32_t offset);
typedef bool (*batch_references_t)(void *batch, void *bo);
typedef void (*bo_wait_rendering_t)(void *bo);
typedef int (*bo_busy_t)(void *bo);
static void *
crocus_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
{
return crocus_bo_alloc(bufmgr, name, size);
}
#if 0
static void
crocus_monitor_emit_mi_flush(struct crocus_context *ice)
{
const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
PIPE_CONTROL_DATA_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_VF_CACHE_INVALIDATE |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_CS_STALL;
crocus_emit_pipe_control_flush(&ice->batches[CROCUS_BATCH_RENDER],
"OA metrics", flags);
}
#endif
static void
crocus_monitor_emit_mi_report_perf_count(void *c,
void *bo,
uint32_t offset_in_bytes,
uint32_t report_id)
{
struct crocus_context *ice = c;
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
struct crocus_screen *screen = batch->screen;
screen->vtbl.emit_mi_report_perf_count(batch, bo, offset_in_bytes, report_id);
}
static void
crocus_monitor_batchbuffer_flush(void *c, const char *file, int line)
{
struct crocus_context *ice = c;
_crocus_batch_flush(&ice->batches[CROCUS_BATCH_RENDER], __FILE__, __LINE__);
}
#if 0
static void
crocus_monitor_capture_frequency_stat_register(void *ctx,
void *bo,
uint32_t bo_offset)
{
struct crocus_context *ice = ctx;
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
ice->vtbl.store_register_mem32(batch, GEN9_RPSTAT0, bo, bo_offset, false);
}
static void
crocus_monitor_store_register_mem64(void *ctx, void *bo,
uint32_t reg, uint32_t offset)
{
struct crocus_context *ice = ctx;
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
}
#endif
static bool
crocus_monitor_init_metrics(struct crocus_screen *screen)
{
struct crocus_monitor_config *monitor_cfg =
rzalloc(screen, struct crocus_monitor_config);
struct intel_perf_config *perf_cfg = NULL;
if (unlikely(!monitor_cfg))
goto allocation_error;
perf_cfg = intel_perf_new(monitor_cfg);
if (unlikely(!perf_cfg))
goto allocation_error;
monitor_cfg->perf_cfg = perf_cfg;
perf_cfg->vtbl.bo_alloc = crocus_oa_bo_alloc;
perf_cfg->vtbl.bo_unreference = (bo_unreference_t)crocus_bo_unreference;
perf_cfg->vtbl.bo_map = (bo_map_t)crocus_bo_map;
perf_cfg->vtbl.bo_unmap = (bo_unmap_t)crocus_bo_unmap;
perf_cfg->vtbl.emit_mi_report_perf_count =
(emit_mi_report_t)crocus_monitor_emit_mi_report_perf_count;
perf_cfg->vtbl.batchbuffer_flush = crocus_monitor_batchbuffer_flush;
perf_cfg->vtbl.batch_references = (batch_references_t)crocus_batch_references;
perf_cfg->vtbl.bo_wait_rendering =
(bo_wait_rendering_t)crocus_bo_wait_rendering;
perf_cfg->vtbl.bo_busy = (bo_busy_t)crocus_bo_busy;
intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd, false, false);
screen->monitor_cfg = monitor_cfg;
/* a gallium "group" is equivalent to a gen "query"
* a gallium "query" is equivalent to a gen "query_counter"
*
* Each gen_query supports a specific number of query_counters. To
* allocate the array of crocus_monitor_counter, we need an upper bound
* (ignoring duplicate query_counters).
*/
int gen_query_counters_count = 0;
for (int gen_query_id = 0;
gen_query_id < perf_cfg->n_queries;
++gen_query_id) {
gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
}
monitor_cfg->counters = rzalloc_size(monitor_cfg,
sizeof(struct crocus_monitor_counter) *
gen_query_counters_count);
if (unlikely(!monitor_cfg->counters))
goto allocation_error;
int crocus_monitor_id = 0;
for (int group = 0; group < perf_cfg->n_queries; ++group) {
for (int counter = 0;
counter < perf_cfg->queries[group].n_counters;
++counter) {
/* Check previously identified metrics to filter out duplicates. The
* user is not helped by having the same metric available in several
* groups. (n^2 algorithm).
*/
bool duplicate = false;
for (int existing_group = 0;
existing_group < group && !duplicate;
++existing_group) {
for (int existing_counter = 0;
existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
++existing_counter) {
const char *current_name =
perf_cfg->queries[group].counters[counter].name;
const char *existing_name =
perf_cfg->queries[existing_group].counters[existing_counter].name;
if (strcmp(current_name, existing_name) == 0) {
duplicate = true;
}
}
}
if (duplicate)
continue;
monitor_cfg->counters[crocus_monitor_id].group = group;
monitor_cfg->counters[crocus_monitor_id].counter = counter;
++crocus_monitor_id;
}
}
monitor_cfg->num_counters = crocus_monitor_id;
return monitor_cfg->num_counters;
allocation_error:
if (monitor_cfg)
free(monitor_cfg->counters);
free(perf_cfg);
free(monitor_cfg);
return false;
}
int
crocus_get_monitor_group_info(struct pipe_screen *pscreen,
unsigned group_index,
struct pipe_driver_query_group_info *info)
{
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
if (!screen->monitor_cfg) {
if (!crocus_monitor_init_metrics(screen))
return 0;
}
const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
if (!info) {
/* return the count that can be queried */
return perf_cfg->n_queries;
}
if (group_index >= perf_cfg->n_queries) {
/* out of range */
return 0;
}
struct intel_perf_query_info *query = &perf_cfg->queries[group_index];
info->name = query->name;
info->max_active_queries = query->n_counters;
info->num_queries = query->n_counters;
return 1;
}
static void
crocus_init_monitor_ctx(struct crocus_context *ice)
{
struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
ice->perf_ctx = intel_perf_new_context(ice);
if (unlikely(!ice->perf_ctx))
return;
struct intel_perf_context *perf_ctx = ice->perf_ctx;
struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
intel_perf_init_context(perf_ctx,
perf_cfg,
ice,
ice,
screen->bufmgr,
&screen->devinfo,
ice->batches[CROCUS_BATCH_RENDER].hw_ctx_id,
screen->fd);
}
/* entry point for GenPerfMonitorsAMD */
struct crocus_monitor_object *
crocus_create_monitor_object(struct crocus_context *ice,
unsigned num_queries,
unsigned *query_types)
{
struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
struct intel_perf_query_object *query_obj = NULL;
/* initialize perf context if this has not already been done. This
* function is the first entry point that carries the gl context.
*/
if (ice->perf_ctx == NULL) {
crocus_init_monitor_ctx(ice);
}
struct intel_perf_context *perf_ctx = ice->perf_ctx;
assert(num_queries > 0);
int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
assert(query_index <= monitor_cfg->num_counters);
const int group = monitor_cfg->counters[query_index].group;
struct crocus_monitor_object *monitor =
calloc(1, sizeof(struct crocus_monitor_object));
if (unlikely(!monitor))
goto allocation_failure;
monitor->num_active_counters = num_queries;
monitor->active_counters = calloc(num_queries, sizeof(int));
if (unlikely(!monitor->active_counters))
goto allocation_failure;
for (int i = 0; i < num_queries; ++i) {
unsigned current_query = query_types[i];
unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;
/* all queries must be in the same group */
assert(current_query_index <= monitor_cfg->num_counters);
assert(monitor_cfg->counters[current_query_index].group == group);
monitor->active_counters[i] =
monitor_cfg->counters[current_query_index].counter;
}
/* create the intel_perf_query */
query_obj = intel_perf_new_query(perf_ctx, group);
if (unlikely(!query_obj))
goto allocation_failure;
monitor->query = query_obj;
monitor->result_size = perf_cfg->queries[group].data_size;
monitor->result_buffer = calloc(1, monitor->result_size);
if (unlikely(!monitor->result_buffer))
goto allocation_failure;
return monitor;
allocation_failure:
if (monitor) {
free(monitor->active_counters);
free(monitor->result_buffer);
}
free(query_obj);
free(monitor);
return NULL;
}
void
crocus_destroy_monitor_object(struct pipe_context *ctx,
struct crocus_monitor_object *monitor)
{
struct crocus_context *ice = (struct crocus_context *)ctx;
intel_perf_delete_query(ice->perf_ctx, monitor->query);
free(monitor->result_buffer);
monitor->result_buffer = NULL;
free(monitor->active_counters);
monitor->active_counters = NULL;
free(monitor);
}
bool
crocus_begin_monitor(struct pipe_context *ctx,
struct crocus_monitor_object *monitor)
{
struct crocus_context *ice = (void *) ctx;
struct intel_perf_context *perf_ctx = ice->perf_ctx;
return intel_perf_begin_query(perf_ctx, monitor->query);
}
bool
crocus_end_monitor(struct pipe_context *ctx,
struct crocus_monitor_object *monitor)
{
struct crocus_context *ice = (void *) ctx;
struct intel_perf_context *perf_ctx = ice->perf_ctx;
intel_perf_end_query(perf_ctx, monitor->query);
return true;
}
bool
crocus_get_monitor_result(struct pipe_context *ctx,
struct crocus_monitor_object *monitor,
bool wait,
union pipe_numeric_type_union *result)
{
struct crocus_context *ice = (void *) ctx;
struct intel_perf_context *perf_ctx = ice->perf_ctx;
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
bool monitor_ready =
intel_perf_is_query_ready(perf_ctx, monitor->query, batch);
if (!monitor_ready) {
if (!wait)
return false;
intel_perf_wait_query(perf_ctx, monitor->query, batch);
}
assert(intel_perf_is_query_ready(perf_ctx, monitor->query, batch));
unsigned bytes_written;
intel_perf_get_query_data(perf_ctx, monitor->query, batch,
monitor->result_size,
(unsigned*) monitor->result_buffer,
&bytes_written);
if (bytes_written != monitor->result_size)
return false;
/* copy metrics into the batch result */
for (int i = 0; i < monitor->num_active_counters; ++i) {
int current_counter = monitor->active_counters[i];
const struct intel_perf_query_info *info =
intel_perf_query_info(monitor->query);
const struct intel_perf_query_counter *counter =
&info->counters[current_counter];
assert(intel_perf_query_counter_get_size(counter));
switch (counter->data_type) {
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
break;
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
result[i].f = *(float*)(monitor->result_buffer + counter->offset);
break;
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);
break;
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: {
double v = *(double*)(monitor->result_buffer + counter->offset);
result[i].f = v;
break;
}
default:
unreachable("unexpected counter data type");
}
}
return true;
}

View File

@ -0,0 +1,72 @@
/*
* Copyright © 2019 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef CROCUS_MONITOR_H
#define CROCUS_MONITOR_H
#include "pipe/p_screen.h"
struct crocus_monitor_counter {
int group;
int counter;
};
struct crocus_monitor_config {
struct intel_perf_config *perf_cfg;
/* gallium requires an index for each counter */
int num_counters;
struct crocus_monitor_counter *counters;
};
int crocus_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
struct pipe_driver_query_info *info);
int crocus_get_monitor_group_info(struct pipe_screen *pscreen,
unsigned index,
struct pipe_driver_query_group_info *info);
struct crocus_context;
struct crocus_screen;
struct crocus_monitor_object *
crocus_create_monitor_object(struct crocus_context *ice,
unsigned num_queries,
unsigned *query_types);
struct pipe_query;
void crocus_destroy_monitor_object(struct pipe_context *ctx,
struct crocus_monitor_object *monitor);
bool
crocus_begin_monitor(struct pipe_context *ctx,
struct crocus_monitor_object *monitor);
bool
crocus_end_monitor(struct pipe_context *ctx,
struct crocus_monitor_object *monitor);
bool
crocus_get_monitor_result(struct pipe_context *ctx,
struct crocus_monitor_object *monitor,
bool wait,
union pipe_numeric_type_union *result);
#endif

View File

@ -0,0 +1,74 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef CROCUS_PIPE_H
#define CROCUS_PIPE_H
#include "pipe/p_defines.h"
#include "compiler/shader_enums.h"
static inline gl_shader_stage
stage_from_pipe(enum pipe_shader_type pstage)
{
static const gl_shader_stage stages[PIPE_SHADER_TYPES] = {
[PIPE_SHADER_VERTEX] = MESA_SHADER_VERTEX,
[PIPE_SHADER_TESS_CTRL] = MESA_SHADER_TESS_CTRL,
[PIPE_SHADER_TESS_EVAL] = MESA_SHADER_TESS_EVAL,
[PIPE_SHADER_GEOMETRY] = MESA_SHADER_GEOMETRY,
[PIPE_SHADER_FRAGMENT] = MESA_SHADER_FRAGMENT,
[PIPE_SHADER_COMPUTE] = MESA_SHADER_COMPUTE,
};
return stages[pstage];
}
static inline enum pipe_shader_type
stage_to_pipe(gl_shader_stage stage)
{
static const enum pipe_shader_type pstages[MESA_SHADER_STAGES] = {
[MESA_SHADER_VERTEX] = PIPE_SHADER_VERTEX,
[MESA_SHADER_TESS_CTRL] = PIPE_SHADER_TESS_CTRL,
[MESA_SHADER_TESS_EVAL] = PIPE_SHADER_TESS_EVAL,
[MESA_SHADER_GEOMETRY] = PIPE_SHADER_GEOMETRY,
[MESA_SHADER_FRAGMENT] = PIPE_SHADER_FRAGMENT,
[MESA_SHADER_COMPUTE] = PIPE_SHADER_COMPUTE,
};
return pstages[stage];
}
/**
* Convert an swizzle enumeration (i.e. PIPE_SWIZZLE_X) to one of the HW's
* "Shader Channel Select" enumerations (i.e. SCS_RED). The mappings are
*
* SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
* 0 1 2 3 4 5
* 4 5 6 7 0 1
* SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
*
* which is simply adding 4 then modding by 8 (or anding with 7).
*/
static inline enum isl_channel_select
pipe_swizzle_to_isl_channel(enum pipe_swizzle swizzle)
{
return (swizzle + 4) & 7;
}
#endif

View File

@ -0,0 +1,368 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* @file crocus_pipe_control.c
*
* PIPE_CONTROL is the main flushing and synchronization primitive on Intel
* GPUs. It can invalidate caches, stall until rendering reaches various
* stages of completion, write to memory, and other things. In a way, it's
* a swiss army knife command - it has all kinds of capabilities, but some
* significant limitations as well.
*
* Unfortunately, it's notoriously complicated and difficult to use. Many
* sub-commands can't be used together. Some are meant to be used at the
* top of the pipeline (invalidating caches before drawing), while some are
* meant to be used at the end (stalling or flushing after drawing).
*
* Also, there's a list of restrictions a mile long, which vary by generation.
* Do this before doing that, or suffer the consequences (usually a GPU hang).
*
* This file contains helpers for emitting them safely. You can simply call
* crocus_emit_pipe_control_flush() with the desired operations (as logical
* PIPE_CONTROL_* bits), and it will take care of splitting it into multiple
* PIPE_CONTROL commands as necessary. The per-generation workarounds are
* applied in crocus_emit_raw_pipe_control() in crocus_state.c.
*/
#include "crocus_context.h"
#include "util/hash_table.h"
#include "util/set.h"
/**
* Emit a PIPE_CONTROL with various flushing flags.
*
* The caller is responsible for deciding what flags are appropriate for the
* given generation.
*/
void
crocus_emit_pipe_control_flush(struct crocus_batch *batch,
const char *reason,
uint32_t flags)
{
const struct intel_device_info *devinfo = &batch->screen->devinfo;
if (devinfo->ver >= 6 &&
(flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
(flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
/* A pipe control command with flush and invalidate bits set
* simultaneously is an inherently racy operation on Gen6+ if the
* contents of the flushed caches were intended to become visible from
* any of the invalidated caches. Split it in two PIPE_CONTROLs, the
* first one should stall the pipeline to make sure that the flushed R/W
* caches are coherent with memory once the specified R/O caches are
* invalidated. On pre-Gen6 hardware the (implicit) R/O cache
* invalidation seems to happen at the bottom of the pipeline together
* with any write cache flush, so this shouldn't be a concern. In order
* to ensure a full stall, we do an end-of-pipe sync.
*/
crocus_emit_end_of_pipe_sync(batch, reason,
flags & PIPE_CONTROL_CACHE_FLUSH_BITS);
flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
}
batch->screen->vtbl.emit_raw_pipe_control(batch, reason, flags, NULL, 0, 0);
}
/**
* Emit a PIPE_CONTROL that writes to a buffer object.
*
* \p flags should contain one of the following items:
* - PIPE_CONTROL_WRITE_IMMEDIATE
* - PIPE_CONTROL_WRITE_TIMESTAMP
* - PIPE_CONTROL_WRITE_DEPTH_COUNT
*/
void
crocus_emit_pipe_control_write(struct crocus_batch *batch,
const char *reason, uint32_t flags,
struct crocus_bo *bo, uint32_t offset,
uint64_t imm)
{
batch->screen->vtbl.emit_raw_pipe_control(batch, reason, flags, bo, offset, imm);
}
/**
* Restriction [DevSNB, DevIVB]:
*
* Prior to changing Depth/Stencil Buffer state (i.e. any combination of
* 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
* 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
* (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
* cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
* another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
* unless SW can otherwise guarantee that the pipeline from WM onwards is
* already flushed (e.g., via a preceding MI_FLUSH).
*/
void
crocus_emit_depth_stall_flushes(struct crocus_batch *batch)
{
UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;
assert(devinfo->ver >= 6);
crocus_emit_pipe_control_flush(batch, "depth stall", PIPE_CONTROL_DEPTH_STALL);
crocus_emit_pipe_control_flush(batch, "depth stall", PIPE_CONTROL_DEPTH_CACHE_FLUSH);
crocus_emit_pipe_control_flush(batch, "depth stall", PIPE_CONTROL_DEPTH_STALL);
}
/*
* From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
*
* Write synchronization is a special case of end-of-pipe
* synchronization that requires that the render cache and/or depth
* related caches are flushed to memory, where the data will become
* globally visible. This type of synchronization is required prior to
* SW (CPU) actually reading the result data from memory, or initiating
* an operation that will use as a read surface (such as a texture
* surface) a previous render target and/or depth/stencil buffer
*
* From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
*
* Exercising the write cache flush bits (Render Target Cache Flush
* Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
* ensures the write caches are flushed and doesn't guarantee the data
* is globally visible.
*
* SW can track the completion of the end-of-pipe-synchronization by
* using "Notify Enable" and "PostSync Operation - Write Immediate
* Data" in the PIPE_CONTROL command.
*/
void
crocus_emit_end_of_pipe_sync(struct crocus_batch *batch,
const char *reason, uint32_t flags)
{
const struct intel_device_info *devinfo = &batch->screen->devinfo;
if (devinfo->ver >= 6) {
/* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
*
* "The most common action to perform upon reaching a synchronization
* point is to write a value out to memory. An immediate value
* (included with the synchronization command) may be written."
*
* From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
*
* "In case the data flushed out by the render engine is to be read
* back in to the render engine in coherent manner, then the render
* engine has to wait for the fence completion before accessing the
* flushed data. This can be achieved by following means on various
* products: PIPE_CONTROL command with CS Stall and the required
* write caches flushed with Post-Sync-Operation as Write Immediate
* Data.
*
* Example:
* - Workload-1 (3D/GPGPU/MEDIA)
* - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate
* Data, Required Write Cache Flush bits set)
* - Workload-2 (Can use the data produce or output by Workload-1)
*/
crocus_emit_pipe_control_write(batch, reason,
flags | PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_WRITE_IMMEDIATE,
batch->ice->workaround_bo,
batch->ice->workaround_offset, 0);
if (batch->screen->devinfo.is_haswell) {
#define GEN7_3DPRIM_START_INSTANCE 0x243C
batch->screen->vtbl.load_register_mem32(batch, GEN7_3DPRIM_START_INSTANCE,
batch->ice->workaround_bo,
batch->ice->workaround_offset);
}
} else {
/* On gen4-5, a regular pipe control seems to suffice. */
crocus_emit_pipe_control_flush(batch, reason, flags);
}
}
/* Emit a pipelined flush to either flush render and texture cache for
* reading from a FBO-drawn texture, or flush so that frontbuffer
* render appears on the screen in DRI1.
*
* This is also used for the always_flush_cache driconf debug option.
*/
void
crocus_emit_mi_flush(struct crocus_batch *batch)
{
const struct intel_device_info *devinfo = &batch->screen->devinfo;
int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH;
if (devinfo->ver >= 6) {
flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
PIPE_CONTROL_DATA_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_VF_CACHE_INVALIDATE |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_CS_STALL;
}
crocus_emit_pipe_control_flush(batch, "mi flush", flags);
}
/**
* Emits a PIPE_CONTROL with a non-zero post-sync operation, for
* implementing two workarounds on gen6. From section 1.4.7.1
* "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
*
* [DevSNB-C+{W/A}] Before any depth stall flush (including those
* produced by non-pipelined state commands), software needs to first
* send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
* 0.
*
* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
* =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
*
* And the workaround for these two requires this workaround first:
*
* [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
* BEFORE the pipe-control with a post-sync op and no write-cache
* flushes.
*
* And this last workaround is tricky because of the requirements on
* that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
* volume 2 part 1:
*
* "1 of the following must also be set:
* - Render Target Cache Flush Enable ([12] of DW1)
* - Depth Cache Flush Enable ([0] of DW1)
* - Stall at Pixel Scoreboard ([1] of DW1)
* - Depth Stall ([13] of DW1)
* - Post-Sync Operation ([13] of DW1)
* - Notify Enable ([8] of DW1)"
*
* The cache flushes require the workaround flush that triggered this
* one, so we can't use it. Depth stall would trigger the same.
* Post-sync nonzero is what triggered this second workaround, so we
* can't use that one either. Notify enable is IRQs, which aren't
* really our business. That leaves only stall at scoreboard.
*/
void
crocus_emit_post_sync_nonzero_flush(struct crocus_batch *batch)
{
crocus_emit_pipe_control_flush(batch, "nonzero",
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_STALL_AT_SCOREBOARD);
crocus_emit_pipe_control_write(batch, "nonzero",
PIPE_CONTROL_WRITE_IMMEDIATE,
batch->ice->workaround_bo,
batch->ice->workaround_offset, 0);
}
/**
* Flush and invalidate all caches (for debugging purposes).
*/
void
crocus_flush_all_caches(struct crocus_batch *batch)
{
crocus_emit_pipe_control_flush(batch, "debug: flush all caches",
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_DATA_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_VF_CACHE_INVALIDATE |
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
PIPE_CONTROL_STATE_CACHE_INVALIDATE);
}
static void
crocus_texture_barrier(struct pipe_context *ctx, unsigned flags)
{
struct crocus_context *ice = (void *) ctx;
struct crocus_batch *render_batch = &ice->batches[CROCUS_BATCH_RENDER];
struct crocus_batch *compute_batch = &ice->batches[CROCUS_BATCH_COMPUTE];
const struct intel_device_info *devinfo = &render_batch->screen->devinfo;
if (devinfo->ver < 6) {
crocus_emit_mi_flush(render_batch);
return;
}
if (render_batch->contains_draw) {
crocus_batch_maybe_flush(render_batch, 48);
crocus_emit_pipe_control_flush(render_batch,
"API: texture barrier (1/2)",
(flags == 1 ? PIPE_CONTROL_DEPTH_CACHE_FLUSH : 0) |
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_CS_STALL);
crocus_emit_pipe_control_flush(render_batch,
"API: texture barrier (2/2)",
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
}
if (compute_batch->contains_draw) {
crocus_batch_maybe_flush(compute_batch, 48);
crocus_emit_pipe_control_flush(compute_batch,
"API: texture barrier (1/2)",
PIPE_CONTROL_CS_STALL);
crocus_emit_pipe_control_flush(compute_batch,
"API: texture barrier (2/2)",
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
}
}
static void
crocus_memory_barrier(struct pipe_context *ctx, unsigned flags)
{
struct crocus_context *ice = (void *) ctx;
unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
const struct intel_device_info *devinfo = &ice->batches[0].screen->devinfo;
assert(devinfo->ver == 7);
if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
PIPE_BARRIER_INDEX_BUFFER |
PIPE_BARRIER_INDIRECT_BUFFER)) {
bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
}
if (flags & PIPE_BARRIER_CONSTANT_BUFFER) {
bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_CONST_CACHE_INVALIDATE;
}
if (flags & (PIPE_BARRIER_TEXTURE | PIPE_BARRIER_FRAMEBUFFER)) {
bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_RENDER_TARGET_FLUSH;
}
/* Typed surface messages are handled by the render cache on IVB, so we
* need to flush it too.
*/
if (!devinfo->is_haswell)
bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
for (int i = 0; i < ice->batch_count; i++) {
if (ice->batches[i].contains_draw) {
crocus_batch_maybe_flush(&ice->batches[i], 24);
crocus_emit_pipe_control_flush(&ice->batches[i], "API: memory barrier",
bits);
}
}
}
void
crocus_init_flush_functions(struct pipe_context *ctx)
{
ctx->memory_barrier = crocus_memory_barrier;
ctx->texture_barrier = crocus_texture_barrier;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,347 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* @file crocus_program_cache.c
*
* The in-memory program cache. This is basically a hash table mapping
* API-specified shaders and a state key to a compiled variant. It also
* takes care of uploading shader assembly into a BO for use on the GPU.
*/
#include <stdio.h>
#include <errno.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "util/u_atomic.h"
#include "util/u_upload_mgr.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "intel/compiler/brw_compiler.h"
#include "intel/compiler/brw_eu.h"
#include "intel/compiler/brw_nir.h"
#include "crocus_context.h"
#include "crocus_resource.h"
struct keybox {
uint16_t size;
enum crocus_program_cache_id cache_id;
uint8_t data[0];
};
static struct keybox *
make_keybox(void *mem_ctx, enum crocus_program_cache_id cache_id,
const void *key, uint32_t key_size)
{
struct keybox *keybox =
ralloc_size(mem_ctx, sizeof(struct keybox) + key_size);
keybox->cache_id = cache_id;
keybox->size = key_size;
memcpy(keybox->data, key, key_size);
return keybox;
}
static uint32_t
keybox_hash(const void *void_key)
{
const struct keybox *key = void_key;
return _mesa_hash_data(&key->cache_id, key->size + sizeof(key->cache_id));
}
static bool
keybox_equals(const void *void_a, const void *void_b)
{
const struct keybox *a = void_a, *b = void_b;
if (a->size != b->size)
return false;
return memcmp(a->data, b->data, a->size) == 0;
}
struct crocus_compiled_shader *
crocus_find_cached_shader(struct crocus_context *ice,
enum crocus_program_cache_id cache_id,
uint32_t key_size, const void *key)
{
struct keybox *keybox = make_keybox(NULL, cache_id, key, key_size);
struct hash_entry *entry =
_mesa_hash_table_search(ice->shaders.cache, keybox);
ralloc_free(keybox);
return entry ? entry->data : NULL;
}
const void *
crocus_find_previous_compile(const struct crocus_context *ice,
enum crocus_program_cache_id cache_id,
unsigned program_string_id)
{
hash_table_foreach(ice->shaders.cache, entry) {
const struct keybox *keybox = entry->key;
const struct brw_base_prog_key *key = (const void *)keybox->data;
if (keybox->cache_id == cache_id &&
key->program_string_id == program_string_id) {
return keybox->data;
}
}
return NULL;
}
/**
* Look for an existing entry in the cache that has identical assembly code.
*
* This is useful for programs generating shaders at runtime, where multiple
* distinct shaders (from an API perspective) may compile to the same assembly
* in our backend. This saves space in the program cache buffer.
*/
static const struct crocus_compiled_shader *
find_existing_assembly(struct hash_table *cache, void *map,
const void *assembly, unsigned assembly_size)
{
hash_table_foreach (cache, entry) {
const struct crocus_compiled_shader *existing = entry->data;
if (existing->map_size != assembly_size)
continue;
if (memcmp(map + existing->offset, assembly, assembly_size) == 0)
return existing;
}
return NULL;
}
static void
crocus_cache_new_bo(struct crocus_context *ice,
uint32_t new_size)
{
struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
struct crocus_bo *new_bo;
new_bo = crocus_bo_alloc(screen->bufmgr, "program cache", new_size);
void *map = crocus_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE |
MAP_ASYNC | MAP_PERSISTENT);
if (ice->shaders.cache_next_offset != 0) {
memcpy(map, ice->shaders.cache_bo_map, ice->shaders.cache_next_offset);
}
crocus_bo_unmap(ice->shaders.cache_bo);
crocus_bo_unreference(ice->shaders.cache_bo);
ice->shaders.cache_bo = new_bo;
ice->shaders.cache_bo_map = map;
if (screen->devinfo.ver == 4) {
/* reemit all shaders on GEN4 only. */
ice->state.dirty |= CROCUS_DIRTY_CLIP | CROCUS_DIRTY_RASTER |
CROCUS_DIRTY_WM;
}
ice->batches[CROCUS_BATCH_RENDER].state_base_address_emitted = false;
ice->batches[CROCUS_BATCH_COMPUTE].state_base_address_emitted = false;
/* unset state base address */
}
static uint32_t
crocus_alloc_item_data(struct crocus_context *ice, uint32_t size)
{
if (ice->shaders.cache_next_offset + size > ice->shaders.cache_bo->size) {
uint32_t new_size = ice->shaders.cache_bo->size * 2;
while (ice->shaders.cache_next_offset + size > new_size)
new_size *= 2;
crocus_cache_new_bo(ice, new_size);
}
uint32_t offset = ice->shaders.cache_next_offset;
/* Programs are always 64-byte aligned, so set up the next one now */
ice->shaders.cache_next_offset = ALIGN(offset + size, 64);
return offset;
}
struct crocus_compiled_shader *
crocus_upload_shader(struct crocus_context *ice,
enum crocus_program_cache_id cache_id, uint32_t key_size,
const void *key, const void *assembly, uint32_t asm_size,
struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size, uint32_t *streamout,
enum brw_param_builtin *system_values,
unsigned num_system_values, unsigned num_cbufs,
const struct crocus_binding_table *bt)
{
struct hash_table *cache = ice->shaders.cache;
struct crocus_compiled_shader *shader =
rzalloc_size(cache, sizeof(struct crocus_compiled_shader));
const struct crocus_compiled_shader *existing = find_existing_assembly(
cache, ice->shaders.cache_bo_map, assembly, asm_size);
/* If we can find a matching prog in the cache already, then reuse the
* existing stuff without creating new copy into the underlying buffer
* object. This is notably useful for programs generating shaders at
* runtime, where multiple shaders may compile to the same thing in our
* backend.
*/
if (existing) {
shader->offset = existing->offset;
shader->map_size = existing->map_size;
} else {
shader->offset = crocus_alloc_item_data(ice, asm_size);
shader->map_size = asm_size;
memcpy(ice->shaders.cache_bo_map + shader->offset, assembly, asm_size);
}
shader->prog_data = prog_data;
shader->prog_data_size = prog_data_size;
shader->streamout = streamout;
shader->system_values = system_values;
shader->num_system_values = num_system_values;
shader->num_cbufs = num_cbufs;
shader->bt = *bt;
ralloc_steal(shader, shader->prog_data);
if (prog_data_size > 16) {
ralloc_steal(shader->prog_data, prog_data->param);
ralloc_steal(shader->prog_data, prog_data->pull_param);
}
ralloc_steal(shader, shader->streamout);
ralloc_steal(shader, shader->system_values);
struct keybox *keybox = make_keybox(shader, cache_id, key, key_size);
_mesa_hash_table_insert(ice->shaders.cache, keybox, shader);
return shader;
}
bool
crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch, const void *key,
uint32_t key_size, uint32_t *kernel_out,
void *prog_data_out)
{
struct blorp_context *blorp = blorp_batch->blorp;
struct crocus_context *ice = blorp->driver_ctx;
struct crocus_compiled_shader *shader =
crocus_find_cached_shader(ice, CROCUS_CACHE_BLORP, key_size, key);
if (!shader)
return false;
*kernel_out = shader->offset;
*((void **)prog_data_out) = shader->prog_data;
return true;
}
bool
crocus_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage,
const void *key, uint32_t key_size,
const void *kernel, uint32_t kernel_size,
const struct brw_stage_prog_data *prog_data_templ,
uint32_t prog_data_size, uint32_t *kernel_out,
void *prog_data_out)
{
struct blorp_context *blorp = blorp_batch->blorp;
struct crocus_context *ice = blorp->driver_ctx;
struct brw_stage_prog_data *prog_data = ralloc_size(NULL, prog_data_size);
memcpy(prog_data, prog_data_templ, prog_data_size);
struct crocus_binding_table bt;
memset(&bt, 0, sizeof(bt));
struct crocus_compiled_shader *shader = crocus_upload_shader(
ice, CROCUS_CACHE_BLORP, key_size, key, kernel, kernel_size, prog_data,
prog_data_size, NULL, NULL, 0, 0, &bt);
*kernel_out = shader->offset;
*((void **)prog_data_out) = shader->prog_data;
return true;
}
void
crocus_init_program_cache(struct crocus_context *ice)
{
struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
ice->shaders.cache =
_mesa_hash_table_create(ice, keybox_hash, keybox_equals);
ice->shaders.cache_bo =
crocus_bo_alloc(screen->bufmgr, "program_cache", 16384);
ice->shaders.cache_bo_map =
crocus_bo_map(NULL, ice->shaders.cache_bo,
MAP_READ | MAP_WRITE | MAP_ASYNC | MAP_PERSISTENT);
}
void
crocus_destroy_program_cache(struct crocus_context *ice)
{
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
ice->shaders.prog[i] = NULL;
}
if (ice->shaders.cache_bo) {
crocus_bo_unmap(ice->shaders.cache_bo);
crocus_bo_unreference(ice->shaders.cache_bo);
ice->shaders.cache_bo_map = NULL;
ice->shaders.cache_bo = NULL;
}
ralloc_free(ice->shaders.cache);
}
static const char *
cache_name(enum crocus_program_cache_id cache_id)
{
if (cache_id == CROCUS_CACHE_BLORP)
return "BLORP";
if (cache_id == CROCUS_CACHE_SF)
return "SF";
if (cache_id == CROCUS_CACHE_CLIP)
return "CLIP";
if (cache_id == CROCUS_CACHE_FF_GS)
return "FF_GS";
return _mesa_shader_stage_to_string(cache_id);
}
void
crocus_print_program_cache(struct crocus_context *ice)
{
struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
const struct intel_device_info *devinfo = &screen->devinfo;
hash_table_foreach(ice->shaders.cache, entry) {
const struct keybox *keybox = entry->key;
struct crocus_compiled_shader *shader = entry->data;
fprintf(stderr, "%s:\n", cache_name(keybox->cache_id));
brw_disassemble(devinfo, ice->shaders.cache_bo_map + shader->offset, 0,
shader->prog_data->program_size, NULL, stderr);
}
}

View File

@ -0,0 +1,996 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* @file crocus_query.c
*
* ============================= GENXML CODE =============================
* [This file is compiled once per generation.]
* =======================================================================
*
* Query object support. This allows measuring various simple statistics
* via counters on the GPU. We use GenX code for MI_MATH calculations.
*/
#include <stdio.h>
#include <errno.h>
#include "perf/intel_perf.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "util/u_inlines.h"
#include "util/u_upload_mgr.h"
#include "crocus_context.h"
#include "crocus_defines.h"
#include "crocus_fence.h"
#include "crocus_monitor.h"
#include "crocus_resource.h"
#include "crocus_screen.h"
#include "crocus_genx_macros.h"
#if GFX_VER == 6
// TOOD: Add these to genxml?
#define SO_PRIM_STORAGE_NEEDED(n) (0x2280)
#define SO_NUM_PRIMS_WRITTEN(n) (0x2288)
// TODO: remove HS/DS/CS
#define GFX6_IA_VERTICES_COUNT_num 0x2310
#define GFX6_IA_PRIMITIVES_COUNT_num 0x2318
#define GFX6_VS_INVOCATION_COUNT_num 0x2320
#define GFX6_HS_INVOCATION_COUNT_num 0x2300
#define GFX6_DS_INVOCATION_COUNT_num 0x2308
#define GFX6_GS_INVOCATION_COUNT_num 0x2328
#define GFX6_GS_PRIMITIVES_COUNT_num 0x2330
#define GFX6_CL_INVOCATION_COUNT_num 0x2338
#define GFX6_CL_PRIMITIVES_COUNT_num 0x2340
#define GFX6_PS_INVOCATION_COUNT_num 0x2348
#define GFX6_CS_INVOCATION_COUNT_num 0x2290
#define GFX6_PS_DEPTH_COUNT_num 0x2350
#elif GFX_VER == 7
#define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
#define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
#endif
struct crocus_query {
enum pipe_query_type type;
int index;
bool ready;
bool stalled;
uint64_t result;
struct crocus_state_ref query_state_ref;
struct crocus_query_snapshots *map;
struct crocus_syncobj *syncobj;
int batch_idx;
struct crocus_monitor_object *monitor;
/* Fence for PIPE_QUERY_GPU_FINISHED. */
struct pipe_fence_handle *fence;
};
struct crocus_query_snapshots {
/** crocus_render_condition's saved MI_PREDICATE_RESULT value. */
uint64_t predicate_result;
/** Have the start/end snapshots landed? */
uint64_t snapshots_landed;
/** Starting and ending counter snapshots */
uint64_t start;
uint64_t end;
};
struct crocus_query_so_overflow {
uint64_t predicate_result;
uint64_t snapshots_landed;
struct {
uint64_t prim_storage_needed[2];
uint64_t num_prims[2];
} stream[4];
};
#if GFX_VERx10 == 75
static struct mi_value
query_mem64(struct crocus_query *q, uint32_t offset)
{
return mi_mem64(rw_bo(crocus_resource_bo(q->query_state_ref.res),
q->query_state_ref.offset + offset));
}
#endif
/**
* Is this type of query written by PIPE_CONTROL?
*/
static bool
crocus_is_query_pipelined(struct crocus_query *q)
{
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_TIME_ELAPSED:
return true;
default:
return false;
}
}
static void
mark_available(struct crocus_context *ice, struct crocus_query *q)
{
#if GFX_VERx10 == 75
struct crocus_batch *batch = &ice->batches[q->batch_idx];
struct crocus_screen *screen = batch->screen;
unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
unsigned offset = offsetof(struct crocus_query_snapshots, snapshots_landed);
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
offset += q->query_state_ref.offset;
if (!crocus_is_query_pipelined(q)) {
screen->vtbl.store_data_imm64(batch, bo, offset, true);
} else {
/* Order available *after* the query results. */
flags |= PIPE_CONTROL_FLUSH_ENABLE;
crocus_emit_pipe_control_write(batch, "query: mark available",
flags, bo, offset, true);
}
#endif
}
/**
* Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
*/
static void
crocus_pipelined_write(struct crocus_batch *batch,
struct crocus_query *q,
enum pipe_control_flags flags,
unsigned offset)
{
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
crocus_emit_pipe_control_write(batch, "query: pipelined snapshot write",
flags,
bo, offset, 0ull);
}
static void
write_value(struct crocus_context *ice, struct crocus_query *q, unsigned offset)
{
struct crocus_batch *batch = &ice->batches[q->batch_idx];
#if GFX_VER >= 6
struct crocus_screen *screen = batch->screen;
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
#endif
if (!crocus_is_query_pipelined(q)) {
crocus_emit_pipe_control_flush(batch,
"query: non-pipelined snapshot write",
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_STALL_AT_SCOREBOARD);
q->stalled = true;
}
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
crocus_pipelined_write(&ice->batches[CROCUS_BATCH_RENDER], q,
PIPE_CONTROL_WRITE_DEPTH_COUNT |
PIPE_CONTROL_DEPTH_STALL,
offset);
break;
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIMESTAMP_DISJOINT:
crocus_pipelined_write(&ice->batches[CROCUS_BATCH_RENDER], q,
PIPE_CONTROL_WRITE_TIMESTAMP,
offset);
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
#if GFX_VER >= 6
screen->vtbl.store_register_mem64(batch,
q->index == 0 ?
GENX(CL_INVOCATION_COUNT_num) :
SO_PRIM_STORAGE_NEEDED(q->index),
bo, offset, false);
#endif
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
#if GFX_VER >= 6
screen->vtbl.store_register_mem64(batch,
SO_NUM_PRIMS_WRITTEN(q->index),
bo, offset, false);
#endif
break;
case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
#if GFX_VER >= 6
static const uint32_t index_to_reg[] = {
GENX(IA_VERTICES_COUNT_num),
GENX(IA_PRIMITIVES_COUNT_num),
GENX(VS_INVOCATION_COUNT_num),
GENX(GS_INVOCATION_COUNT_num),
GENX(GS_PRIMITIVES_COUNT_num),
GENX(CL_INVOCATION_COUNT_num),
GENX(CL_PRIMITIVES_COUNT_num),
GENX(PS_INVOCATION_COUNT_num),
GENX(HS_INVOCATION_COUNT_num),
GENX(DS_INVOCATION_COUNT_num),
GENX(CS_INVOCATION_COUNT_num),
};
uint32_t reg = index_to_reg[q->index];
#if GFX_VER == 6
/* Gfx6 GS code counts full primitives, that is, it won't count individual
* triangles in a triangle strip. Use CL_INVOCATION_COUNT for that.
*/
if (q->index == PIPE_STAT_QUERY_GS_PRIMITIVES)
reg = GENX(CL_INVOCATION_COUNT_num);
#endif
screen->vtbl.store_register_mem64(batch, reg, bo, offset, false);
#endif
break;
}
default:
assert(false);
}
}
#if GFX_VER >= 6
static void
write_overflow_values(struct crocus_context *ice, struct crocus_query *q, bool end)
{
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
struct crocus_screen *screen = batch->screen;
uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
uint32_t offset = q->query_state_ref.offset;
crocus_emit_pipe_control_flush(batch,
"query: write SO overflow snapshots",
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_STALL_AT_SCOREBOARD);
for (uint32_t i = 0; i < count; i++) {
int s = q->index + i;
int g_idx = offset + offsetof(struct crocus_query_so_overflow,
stream[s].num_prims[end]);
int w_idx = offset + offsetof(struct crocus_query_so_overflow,
stream[s].prim_storage_needed[end]);
screen->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
bo, g_idx, false);
screen->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
bo, w_idx, false);
}
}
#endif
static uint64_t
crocus_raw_timestamp_delta(uint64_t time0, uint64_t time1)
{
if (time0 > time1) {
return (1ULL << TIMESTAMP_BITS) + time1 - time0;
} else {
return time1 - time0;
}
}
static bool
stream_overflowed(struct crocus_query_so_overflow *so, int s)
{
return (so->stream[s].prim_storage_needed[1] -
so->stream[s].prim_storage_needed[0]) !=
(so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
}
static void
calculate_result_on_cpu(const struct intel_device_info *devinfo,
struct crocus_query *q)
{
switch (q->type) {
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
q->result = q->map->end != q->map->start;
break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* The timestamp is the single starting snapshot. */
q->result = intel_device_info_timebase_scale(devinfo, q->map->start);
q->result &= (1ull << TIMESTAMP_BITS) - 1;
break;
case PIPE_QUERY_TIME_ELAPSED:
q->result = crocus_raw_timestamp_delta(q->map->start, q->map->end);
q->result = intel_device_info_timebase_scale(devinfo, q->result);
q->result &= (1ull << TIMESTAMP_BITS) - 1;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
q->result = stream_overflowed((void *) q->map, q->index);
break;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
q->result = false;
for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
q->result |= stream_overflowed((void *) q->map, i);
break;
case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
q->result = q->map->end - q->map->start;
/* WaDividePSInvocationCountBy4:HSW,BDW */
if (GFX_VER == 7 && devinfo->is_haswell && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
q->result /= 4;
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
default:
q->result = q->map->end - q->map->start;
break;
}
q->ready = true;
}
#if GFX_VERx10 == 75
/**
* Calculate the streamout overflow for stream \p idx:
*
* (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
*/
static struct mi_value
calc_overflow_for_stream(struct mi_builder *b,
struct crocus_query *q,
int idx)
{
#define C(counter, i) query_mem64(q, \
offsetof(struct crocus_query_so_overflow, stream[idx].counter[i]))
return mi_isub(b, mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
mi_isub(b, C(prim_storage_needed, 1),
C(prim_storage_needed, 0)));
#undef C
}
/**
* Calculate whether any stream has overflowed.
*/
static struct mi_value
calc_overflow_any_stream(struct mi_builder *b, struct crocus_query *q)
{
struct mi_value stream_result[MAX_VERTEX_STREAMS];
for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
stream_result[i] = calc_overflow_for_stream(b, q, i);
struct mi_value result = stream_result[0];
for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
result = mi_ior(b, result, stream_result[i]);
return result;
}
static bool
query_is_boolean(enum pipe_query_type type)
{
switch (type) {
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
return true;
default:
return false;
}
}
/**
* Calculate the result using MI_MATH.
*/
static struct mi_value
calculate_result_on_gpu(const struct intel_device_info *devinfo,
struct mi_builder *b,
struct crocus_query *q)
{
struct mi_value result;
struct mi_value start_val =
query_mem64(q, offsetof(struct crocus_query_snapshots, start));
struct mi_value end_val =
query_mem64(q, offsetof(struct crocus_query_snapshots, end));
switch (q->type) {
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
result = calc_overflow_for_stream(b, q, q->index);
break;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
result = calc_overflow_any_stream(b, q);
break;
case PIPE_QUERY_TIMESTAMP: {
/* TODO: This discards any fractional bits of the timebase scale.
* We would need to do a bit of fixed point math on the CS ALU, or
* launch an actual shader to calculate this with full precision.
*/
uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
result = mi_iand(b, mi_imm((1ull << 36) - 1),
mi_imul_imm(b, start_val, scale));
break;
}
case PIPE_QUERY_TIME_ELAPSED: {
/* TODO: This discards fractional bits (see above). */
uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
result = mi_imul_imm(b, mi_isub(b, end_val, start_val), scale);
break;
}
default:
result = mi_isub(b, end_val, start_val);
break;
}
/* WaDividePSInvocationCountBy4:HSW,BDW */
if (GFX_VER == 7 && devinfo->is_haswell &&
q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
result = mi_ushr32_imm(b, result, 2);
if (query_is_boolean(q->type))
result = mi_iand(b, mi_nz(b, result), mi_imm(1));
return result;
}
#endif
static struct pipe_query *
crocus_create_query(struct pipe_context *ctx,
unsigned query_type,
unsigned index)
{
struct crocus_query *q = calloc(1, sizeof(struct crocus_query));
q->type = query_type;
q->index = index;
q->monitor = NULL;
if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
q->batch_idx = CROCUS_BATCH_COMPUTE;
else
q->batch_idx = CROCUS_BATCH_RENDER;
return (struct pipe_query *) q;
}
static struct pipe_query *
crocus_create_batch_query(struct pipe_context *ctx,
unsigned num_queries,
unsigned *query_types)
{
struct crocus_context *ice = (void *) ctx;
struct crocus_query *q = calloc(1, sizeof(struct crocus_query));
if (unlikely(!q))
return NULL;
q->type = PIPE_QUERY_DRIVER_SPECIFIC;
q->index = -1;
q->monitor = crocus_create_monitor_object(ice, num_queries, query_types);
if (unlikely(!q->monitor)) {
free(q);
return NULL;
}
return (struct pipe_query *) q;
}
static void
crocus_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
{
struct crocus_query *query = (void *) p_query;
struct crocus_screen *screen = (void *) ctx->screen;
if (query->monitor) {
crocus_destroy_monitor_object(ctx, query->monitor);
query->monitor = NULL;
} else {
crocus_syncobj_reference(screen, &query->syncobj, NULL);
screen->base.fence_reference(ctx->screen, &query->fence, NULL);
}
free(query);
}
static bool
crocus_begin_query(struct pipe_context *ctx, struct pipe_query *query)
{
struct crocus_context *ice = (void *) ctx;
struct crocus_query *q = (void *) query;
if (q->monitor)
return crocus_begin_monitor(ctx, q->monitor);
void *ptr = NULL;
uint32_t size;
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
size = sizeof(struct crocus_query_so_overflow);
else
size = sizeof(struct crocus_query_snapshots);
u_upload_alloc(ice->query_buffer_uploader, 0,
size, size, &q->query_state_ref.offset,
&q->query_state_ref.res, &ptr);
if (!crocus_resource_bo(q->query_state_ref.res))
return false;
q->map = ptr;
if (!q->map)
return false;
q->result = 0ull;
q->ready = false;
WRITE_ONCE(q->map->snapshots_landed, false);
if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
ice->state.prims_generated_query_active = true;
ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP;
}
#if GFX_VER <= 5
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER ||
q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
ice->state.stats_wm++;
ice->state.dirty |= CROCUS_DIRTY_WM | CROCUS_DIRTY_COLOR_CALC_STATE;
}
#endif
#if GFX_VER >= 6
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
write_overflow_values(ice, q, false);
else
#endif
write_value(ice, q,
q->query_state_ref.offset +
offsetof(struct crocus_query_snapshots, start));
return true;
}
static bool
crocus_end_query(struct pipe_context *ctx, struct pipe_query *query)
{
struct crocus_context *ice = (void *) ctx;
struct crocus_query *q = (void *) query;
if (q->monitor)
return crocus_end_monitor(ctx, q->monitor);
if (q->type == PIPE_QUERY_GPU_FINISHED) {
ctx->flush(ctx, &q->fence, PIPE_FLUSH_DEFERRED);
return true;
}
struct crocus_batch *batch = &ice->batches[q->batch_idx];
if (q->type == PIPE_QUERY_TIMESTAMP) {
crocus_begin_query(ctx, query);
crocus_batch_reference_signal_syncobj(batch, &q->syncobj);
mark_available(ice, q);
return true;
}
#if GFX_VER <= 5
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER ||
q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
ice->state.stats_wm--;
ice->state.dirty |= CROCUS_DIRTY_WM | CROCUS_DIRTY_COLOR_CALC_STATE;
}
#endif
if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
ice->state.prims_generated_query_active = false;
ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP;
}
#if GFX_VER >= 6
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
write_overflow_values(ice, q, true);
else
#endif
write_value(ice, q,
q->query_state_ref.offset +
offsetof(struct crocus_query_snapshots, end));
crocus_batch_reference_signal_syncobj(batch, &q->syncobj);
mark_available(ice, q);
return true;
}
/**
* See if the snapshots have landed for a query, and if so, compute the
* result and mark it ready. Does not flush (unlike crocus_get_query_result).
*/
static void
crocus_check_query_no_flush(struct crocus_context *ice, struct crocus_query *q)
{
struct crocus_screen *screen = (void *) ice->ctx.screen;
const struct intel_device_info *devinfo = &screen->devinfo;
if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
calculate_result_on_cpu(devinfo, q);
}
}
static bool
crocus_get_query_result(struct pipe_context *ctx,
struct pipe_query *query,
bool wait,
union pipe_query_result *result)
{
struct crocus_context *ice = (void *) ctx;
struct crocus_query *q = (void *) query;
if (q->monitor)
return crocus_get_monitor_result(ctx, q->monitor, wait, result->batch);
struct crocus_screen *screen = (void *) ctx->screen;
const struct intel_device_info *devinfo = &screen->devinfo;
if (unlikely(screen->no_hw)) {
result->u64 = 0;
return true;
}
if (!q->ready) {
struct crocus_batch *batch = &ice->batches[q->batch_idx];
if (q->syncobj == crocus_batch_get_signal_syncobj(batch))
crocus_batch_flush(batch);
#if GFX_VERx10 == 75
while (!READ_ONCE(q->map->snapshots_landed)) {
if (wait)
crocus_wait_syncobj(ctx->screen, q->syncobj, INT64_MAX);
else
return false;
}
assert(READ_ONCE(q->map->snapshots_landed));
#else
if (wait)
crocus_wait_syncobj(ctx->screen, q->syncobj, INT64_MAX);
#endif
calculate_result_on_cpu(devinfo, q);
}
assert(q->ready);
result->u64 = q->result;
return true;
}
#if GFX_VER == 7
static void
crocus_get_query_result_resource(struct pipe_context *ctx,
struct pipe_query *query,
bool wait,
enum pipe_query_value_type result_type,
int index,
struct pipe_resource *p_res,
unsigned offset)
{
struct crocus_context *ice = (void *) ctx;
struct crocus_query *q = (void *) query;
struct crocus_batch *batch = &ice->batches[q->batch_idx];
struct crocus_screen *screen = batch->screen;
const struct intel_device_info *devinfo = &batch->screen->devinfo;
struct crocus_resource *res = (void *) p_res;
struct crocus_bo *query_bo = crocus_resource_bo(q->query_state_ref.res);
struct crocus_bo *dst_bo = crocus_resource_bo(p_res);
unsigned snapshots_landed_offset =
offsetof(struct crocus_query_snapshots, snapshots_landed);
res->bind_history |= PIPE_BIND_QUERY_BUFFER;
if (index == -1) {
/* They're asking for the availability of the result. If we still
* have commands queued up which produce the result, submit them
* now so that progress happens. Either way, copy the snapshots
* landed field to the destination resource.
*/
if (q->syncobj == crocus_batch_get_signal_syncobj(batch))
crocus_batch_flush(batch);
screen->vtbl.copy_mem_mem(batch, dst_bo, offset,
query_bo, snapshots_landed_offset,
result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
return;
}
if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
/* The final snapshots happen to have landed, so let's just compute
* the result on the CPU now...
*/
calculate_result_on_cpu(devinfo, q);
}
if (q->ready) {
/* We happen to have the result on the CPU, so just copy it. */
if (result_type <= PIPE_QUERY_TYPE_U32) {
screen->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
} else {
screen->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
}
/* Make sure the result lands before they use bind the QBO elsewhere
* and use the result.
*/
// XXX: Why? i965 doesn't do this.
crocus_emit_pipe_control_flush(batch,
"query: unknown QBO flushing hack",
PIPE_CONTROL_CS_STALL);
return;
}
#if GFX_VERx10 == 75
bool predicated = !wait && !q->stalled;
struct mi_builder b;
mi_builder_init(&b, &batch->screen->devinfo, batch);
struct mi_value result = calculate_result_on_gpu(devinfo, &b, q);
struct mi_value dst =
result_type <= PIPE_QUERY_TYPE_U32 ? mi_mem32(rw_bo(dst_bo, offset))
: mi_mem64(rw_bo(dst_bo, offset));
if (predicated) {
mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
mi_store_if(&b, dst, result);
} else {
mi_store(&b, dst, result);
}
#endif
}
#endif
static void
crocus_set_active_query_state(struct pipe_context *ctx, bool enable)
{
struct crocus_context *ice = (void *) ctx;
if (ice->state.statistics_counters_enabled == enable)
return;
// XXX: most packets aren't paying attention to this yet, because it'd
// have to be done dynamically at draw time, which is a pain
ice->state.statistics_counters_enabled = enable;
ice->state.dirty |= CROCUS_DIRTY_CLIP |
CROCUS_DIRTY_RASTER |
CROCUS_DIRTY_STREAMOUT |
CROCUS_DIRTY_WM;
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS |
CROCUS_STAGE_DIRTY_TCS |
CROCUS_STAGE_DIRTY_TES |
CROCUS_STAGE_DIRTY_VS;
}
static void
set_predicate_enable(struct crocus_context *ice, bool value)
{
if (value)
ice->state.predicate = CROCUS_PREDICATE_STATE_RENDER;
else
ice->state.predicate = CROCUS_PREDICATE_STATE_DONT_RENDER;
}
#if GFX_VER == 7
static void
set_predicate_for_result(struct crocus_context *ice,
struct crocus_query *q,
bool inverted)
{
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
#if GFX_VERx10 != 75
/* IVB doesn't have enough MI for this */
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
ice->state.predicate = CROCUS_PREDICATE_STATE_STALL_FOR_QUERY;
return;
}
#endif
/* The CPU doesn't have the query result yet; use hardware predication */
ice->state.predicate = CROCUS_PREDICATE_STATE_USE_BIT;
/* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
crocus_emit_pipe_control_flush(batch,
"conditional rendering: set predicate",
PIPE_CONTROL_FLUSH_ENABLE);
q->stalled = true;
#if GFX_VERx10 != 75
struct crocus_screen *screen = batch->screen;
screen->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, bo,
q->query_state_ref.offset + offsetof(struct crocus_query_snapshots, start));
screen->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, bo,
q->query_state_ref.offset + offsetof(struct crocus_query_snapshots, end));
uint32_t mi_predicate = MI_PREDICATE | MI_PREDICATE_COMBINEOP_SET |
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
if (inverted)
mi_predicate |= MI_PREDICATE_LOADOP_LOAD;
else
mi_predicate |= MI_PREDICATE_LOADOP_LOADINV;
crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
#else
struct mi_builder b;
mi_builder_init(&b, &batch->screen->devinfo, batch);
struct mi_value result;
switch (q->type) {
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
result = calc_overflow_for_stream(&b, q, q->index);
break;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
result = calc_overflow_any_stream(&b, q);
break;
default: {
/* PIPE_QUERY_OCCLUSION_* */
struct mi_value start =
query_mem64(q, offsetof(struct crocus_query_snapshots, start));
struct mi_value end =
query_mem64(q, offsetof(struct crocus_query_snapshots, end));
result = mi_isub(&b, end, start);
break;
}
}
result = inverted ? mi_z(&b, result) : mi_nz(&b, result);
result = mi_iand(&b, result, mi_imm(1));
/* We immediately set the predicate on the render batch, as all the
* counters come from 3D operations. However, we may need to predicate
* a compute dispatch, which executes in a different GEM context and has
* a different MI_PREDICATE_RESULT register. So, we save the result to
* memory and reload it in crocus_launch_grid.
*/
mi_value_ref(&b, result);
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), result);
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
MI_PREDICATE_COMBINEOP_SET |
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
mi_store(&b, query_mem64(q, offsetof(struct crocus_query_snapshots,
predicate_result)), result);
#endif
ice->state.compute_predicate = bo;
}
#endif
static void
crocus_render_condition(struct pipe_context *ctx,
struct pipe_query *query,
bool condition,
enum pipe_render_cond_flag mode)
{
struct crocus_context *ice = (void *) ctx;
struct crocus_query *q = (void *) query;
/* The old condition isn't relevant; we'll update it if necessary */
ice->state.compute_predicate = NULL;
ice->condition.query = q;
ice->condition.condition = condition;
ice->condition.mode = mode;
if (!q) {
ice->state.predicate = CROCUS_PREDICATE_STATE_RENDER;
return;
}
crocus_check_query_no_flush(ice, q);
if (q->result || q->ready) {
set_predicate_enable(ice, (q->result != 0) ^ condition);
} else {
if (mode == PIPE_RENDER_COND_NO_WAIT ||
mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
perf_debug(&ice->dbg, "Conditional rendering demoted from "
"\"no wait\" to \"wait\".");
}
#if GFX_VER == 7
set_predicate_for_result(ice, q, condition);
#else
ice->state.predicate = CROCUS_PREDICATE_STATE_STALL_FOR_QUERY;
#endif
}
}
static void
crocus_resolve_conditional_render(struct crocus_context *ice)
{
struct pipe_context *ctx = (void *) ice;
struct crocus_query *q = ice->condition.query;
struct pipe_query *query = (void *) q;
union pipe_query_result result;
if (ice->state.predicate != CROCUS_PREDICATE_STATE_USE_BIT)
return;
assert(q);
crocus_get_query_result(ctx, query, true, &result);
set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
}
#if GFX_VER >= 7
static void
crocus_emit_compute_predicate(struct crocus_batch *batch)
{
struct crocus_context *ice = batch->ice;
struct crocus_screen *screen = batch->screen;
screen->vtbl.load_register_mem32(batch, MI_PREDICATE_SRC0,
ice->state.compute_predicate, 0);
screen->vtbl.load_register_imm32(batch, MI_PREDICATE_SRC1, 0);
unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
MI_PREDICATE_COMBINEOP_SET |
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
}
#endif
void
genX(init_screen_query)(struct crocus_screen *screen)
{
screen->vtbl.resolve_conditional_render = crocus_resolve_conditional_render;
#if GFX_VER >= 7
screen->vtbl.emit_compute_predicate = crocus_emit_compute_predicate;
#endif
}
void
genX(init_query)(struct crocus_context *ice)
{
struct pipe_context *ctx = &ice->ctx;
ctx->create_query = crocus_create_query;
ctx->create_batch_query = crocus_create_batch_query;
ctx->destroy_query = crocus_destroy_query;
ctx->begin_query = crocus_begin_query;
ctx->end_query = crocus_end_query;
ctx->get_query_result = crocus_get_query_result;
#if GFX_VER == 7
ctx->get_query_result_resource = crocus_get_query_result_resource;
#endif
ctx->set_active_query_state = crocus_set_active_query_state;
ctx->render_condition = crocus_render_condition;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,501 @@
/*
* Copyright 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef CROCUS_RESOURCE_H
#define CROCUS_RESOURCE_H
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_range.h"
#include "intel/isl/isl.h"
#include "crocus_bufmgr.h"
struct crocus_batch;
struct crocus_context;
#define CROCUS_MAX_MIPLEVELS 15
struct crocus_format_info {
enum isl_format fmt;
enum pipe_swizzle swizzles[4];
};
static inline enum isl_channel_select
pipe_to_isl_swizzle(const enum pipe_swizzle pswz, bool green_to_blue)
{
unsigned swz = (pswz + 4) & 7;
return (green_to_blue && swz == ISL_CHANNEL_SELECT_GREEN) ? ISL_CHANNEL_SELECT_BLUE : swz;
}
static inline struct isl_swizzle
pipe_to_isl_swizzles(const enum pipe_swizzle pswz[4])
{
struct isl_swizzle swz;
swz.r = pipe_to_isl_swizzle(pswz[0], false);
swz.g = pipe_to_isl_swizzle(pswz[1], false);
swz.b = pipe_to_isl_swizzle(pswz[2], false);
swz.a = pipe_to_isl_swizzle(pswz[3], false);
return swz;
}
static inline void
crocus_combine_swizzle(enum pipe_swizzle outswz[4],
const enum pipe_swizzle fswz[4],
const enum pipe_swizzle vswz[4])
{
for (unsigned i = 0; i < 4; i++) {
switch (vswz[i]) {
case PIPE_SWIZZLE_X: outswz[i] = fswz[0]; break;
case PIPE_SWIZZLE_Y: outswz[i] = fswz[1]; break;
case PIPE_SWIZZLE_Z: outswz[i] = fswz[2]; break;
case PIPE_SWIZZLE_W: outswz[i] = fswz[3]; break;
case PIPE_SWIZZLE_1: outswz[i] = PIPE_SWIZZLE_1; break;
case PIPE_SWIZZLE_0: outswz[i] = PIPE_SWIZZLE_0; break;
default: unreachable("invalid swizzle");
}
}
}
/**
* Resources represent a GPU buffer object or image (mipmap tree).
*
* They contain the storage (BO) and layout information (ISL surface).
*/
struct crocus_resource {
struct pipe_resource base;
enum pipe_format internal_format;
/**
* The ISL surface layout information for this resource.
*
* This is not filled out for PIPE_BUFFER resources, but is guaranteed
* to be zeroed. Note that this also guarantees that res->surf.tiling
* will be ISL_TILING_LINEAR, so it's safe to check that.
*/
struct isl_surf surf;
/** Backing storage for the resource */
struct crocus_bo *bo;
/** offset at which data starts in the BO */
uint64_t offset;
/**
* A bitfield of PIPE_BIND_* indicating how this resource was bound
* in the past. Only meaningful for PIPE_BUFFER; used for flushing.
*/
unsigned bind_history;
/**
* A bitfield of MESA_SHADER_* stages indicating where this resource
* was bound.
*/
unsigned bind_stages;
/**
* For PIPE_BUFFER resources, a range which may contain valid data.
*
* This is a conservative estimate of what part of the buffer contains
* valid data that we have to preserve. The rest of the buffer is
* considered invalid, and we can promote writes to that region to
* be unsynchronized writes, avoiding blit copies.
*/
struct util_range valid_buffer_range;
/**
* Auxiliary buffer information (CCS, MCS, or HiZ).
*/
struct {
/** The surface layout for the auxiliary buffer. */
struct isl_surf surf;
/** The buffer object containing the auxiliary data. */
struct crocus_bo *bo;
/** Offset into 'bo' where the auxiliary surface starts. */
uint32_t offset;
struct {
struct isl_surf surf;
/** Offset into 'bo' where the auxiliary surface starts. */
uint32_t offset;
} extra_aux;
/**
* Fast clear color for this surface. For depth surfaces, the clear
* value is stored as a float32 in the red component.
*/
union isl_color_value clear_color;
/**
* \brief The type of auxiliary compression used by this resource.
*
* This describes the type of auxiliary compression that is intended to
* be used by this resource. An aux usage of ISL_AUX_USAGE_NONE means
* that auxiliary compression is permanently disabled. An aux usage
* other than ISL_AUX_USAGE_NONE does not imply that auxiliary
* compression will always be enabled for this surface.
*/
enum isl_aux_usage usage;
/**
* \brief Maps miptree slices to their current aux state.
*
* This two-dimensional array is indexed as [level][layer] and stores an
* aux state for each slice.
*/
enum isl_aux_state **state;
/**
* If (1 << level) is set, HiZ is enabled for that miplevel.
*/
uint16_t has_hiz;
} aux;
/**
* \brief Shadow miptree for sampling when the main isn't supported by HW.
*
* To workaround various sampler bugs and limitations, we blit the main
* texture into a new texture that can be sampled.
*
* This miptree may be used for:
* - Stencil texturing (pre-BDW) as required by GL_ARB_stencil_texturing.
*/
struct crocus_resource *shadow;
bool shadow_needs_update;
/**
* For external surfaces, this is format that was used to create or import
* the surface. For internal surfaces, this will always be
* PIPE_FORMAT_NONE.
*/
enum pipe_format external_format;
/**
* For external surfaces, this is DRM format modifier that was used to
* create or import the surface. For internal surfaces, this will always
* be DRM_FORMAT_MOD_INVALID.
*/
const struct isl_drm_modifier_info *mod_info;
/**
* The screen the resource was originally created with, stored for refcounting.
*/
struct pipe_screen *orig_screen;
};
/**
* A simple <resource, offset> tuple for storing a reference to a
* piece of state stored in a GPU buffer object.
*/
struct crocus_state_ref {
struct pipe_resource *res;
uint32_t offset;
};
/**
* Gallium CSO for sampler views (texture views).
*
* In addition to the normal pipe_resource, this adds an ISL view
* which may reinterpret the format or restrict levels/layers.
*
* These can also be linear texture buffers.
*/
struct crocus_sampler_view {
struct pipe_sampler_view base;
struct isl_view view;
struct isl_view gather_view;
enum pipe_swizzle swizzle[4];
union isl_color_value clear_color;
/* A short-cut (not a reference) to the actual resource being viewed.
* Multi-planar (or depth+stencil) images may have multiple resources
* chained together; this skips having to traverse base->texture->*.
*/
struct crocus_resource *res;
};
/**
* Image view representation.
*/
struct crocus_image_view {
struct pipe_image_view base;
struct isl_view view;
};
/**
* Gallium CSO for surfaces (framebuffer attachments).
*
* A view of a surface that can be bound to a color render target or
* depth/stencil attachment.
*/
struct crocus_surface {
struct pipe_surface base;
struct isl_view view;
struct isl_view read_view;
struct isl_surf surf;
union isl_color_value clear_color;
struct pipe_resource *align_res;
};
/**
* Transfer object - information about a buffer mapping.
*/
struct crocus_transfer {
struct pipe_transfer base;
struct pipe_debug_callback *dbg;
void *buffer;
void *ptr;
/** A linear staging resource for GPU-based copy_region transfers. */
struct pipe_resource *staging;
struct blorp_context *blorp;
struct crocus_batch *batch;
bool dest_had_defined_contents;
bool has_swizzling;
void (*unmap)(struct crocus_transfer *);
};
/**
* Unwrap a pipe_resource to get the underlying crocus_bo (for convenience).
*/
static inline struct crocus_bo *
crocus_resource_bo(struct pipe_resource *p_res)
{
struct crocus_resource *res = (void *) p_res;
return res->bo;
}
static inline uint32_t
crocus_mocs(const struct crocus_bo *bo,
const struct isl_device *dev)
{
return isl_mocs(dev, 0, bo && crocus_bo_is_external(bo));
}
struct crocus_format_info crocus_format_for_usage(const struct intel_device_info *,
enum pipe_format pf,
isl_surf_usage_flags_t usage);
struct pipe_resource *crocus_resource_get_separate_stencil(struct pipe_resource *);
void crocus_get_depth_stencil_resources(const struct intel_device_info *devinfo,
struct pipe_resource *res,
struct crocus_resource **out_z,
struct crocus_resource **out_s);
bool crocus_resource_set_clear_color(struct crocus_context *ice,
struct crocus_resource *res,
union isl_color_value color);
union isl_color_value
crocus_resource_get_clear_color(const struct crocus_resource *res);
void crocus_init_screen_resource_functions(struct pipe_screen *pscreen);
void crocus_dirty_for_history(struct crocus_context *ice,
struct crocus_resource *res);
uint32_t crocus_flush_bits_for_history(struct crocus_resource *res);
void crocus_flush_and_dirty_for_history(struct crocus_context *ice,
struct crocus_batch *batch,
struct crocus_resource *res,
uint32_t extra_flags,
const char *reason);
unsigned crocus_get_num_logical_layers(const struct crocus_resource *res,
unsigned level);
void crocus_resource_disable_aux(struct crocus_resource *res);
#define INTEL_REMAINING_LAYERS UINT32_MAX
#define INTEL_REMAINING_LEVELS UINT32_MAX
void
crocus_hiz_exec(struct crocus_context *ice,
struct crocus_batch *batch,
struct crocus_resource *res,
unsigned int level, unsigned int start_layer,
unsigned int num_layers, enum isl_aux_op op,
bool update_clear_depth);
/**
* Prepare a miptree for access
*
* This function should be called prior to any access to miptree in order to
* perform any needed resolves.
*
* \param[in] start_level The first mip level to be accessed
*
* \param[in] num_levels The number of miplevels to be accessed or
* INTEL_REMAINING_LEVELS to indicate every level
* above start_level will be accessed
*
* \param[in] start_layer The first array slice or 3D layer to be accessed
*
* \param[in] num_layers The number of array slices or 3D layers be
* accessed or INTEL_REMAINING_LAYERS to indicate
* every layer above start_layer will be accessed
*
* \param[in] aux_supported Whether or not the access will support the
* miptree's auxiliary compression format; this
* must be false for uncompressed miptrees
*
* \param[in] fast_clear_supported Whether or not the access will support
* fast clears in the miptree's auxiliary
* compression format
*/
void
crocus_resource_prepare_access(struct crocus_context *ice,
struct crocus_resource *res,
uint32_t start_level, uint32_t num_levels,
uint32_t start_layer, uint32_t num_layers,
enum isl_aux_usage aux_usage,
bool fast_clear_supported);
/**
* Complete a write operation
*
* This function should be called after any operation writes to a miptree.
* This will update the miptree's compression state so that future resolves
* happen correctly. Technically, this function can be called before the
* write occurs but the caller must ensure that they don't interlace
* crocus_resource_prepare_access and crocus_resource_finish_write calls to
* overlapping layer/level ranges.
*
* \param[in] level The mip level that was written
*
* \param[in] start_layer The first array slice or 3D layer written
*
* \param[in] num_layers The number of array slices or 3D layers
* written or INTEL_REMAINING_LAYERS to indicate
* every layer above start_layer was written
*
* \param[in] written_with_aux Whether or not the write was done with
* auxiliary compression enabled
*/
void
crocus_resource_finish_write(struct crocus_context *ice,
struct crocus_resource *res, uint32_t level,
uint32_t start_layer, uint32_t num_layers,
enum isl_aux_usage aux_usage);
/** Get the auxiliary compression state of a miptree slice */
enum isl_aux_state
crocus_resource_get_aux_state(const struct crocus_resource *res,
uint32_t level, uint32_t layer);
/**
* Set the auxiliary compression state of a miptree slice range
*
* This function directly sets the auxiliary compression state of a slice
* range of a miptree. It only modifies data structures and does not do any
* resolves. This should only be called by code which directly performs
* compression operations such as fast clears and resolves. Most code should
* use crocus_resource_prepare_access or crocus_resource_finish_write.
*/
void
crocus_resource_set_aux_state(struct crocus_context *ice,
struct crocus_resource *res, uint32_t level,
uint32_t start_layer, uint32_t num_layers,
enum isl_aux_state aux_state);
/**
* Prepare a miptree for raw access
*
* This helper prepares the miptree for access that knows nothing about any
* sort of compression whatsoever. This is useful when mapping the surface or
* using it with the blitter.
*/
static inline void
crocus_resource_access_raw(struct crocus_context *ice,
struct crocus_resource *res,
uint32_t level, uint32_t layer,
uint32_t num_layers,
bool write)
{
crocus_resource_prepare_access(ice, res, level, 1, layer, num_layers,
ISL_AUX_USAGE_NONE, false);
if (write) {
crocus_resource_finish_write(ice, res, level, layer, num_layers,
ISL_AUX_USAGE_NONE);
}
}
void
crocus_resource_get_image_offset(struct crocus_resource *res,
uint32_t level, uint32_t z,
uint32_t *x, uint32_t *y);
static inline enum isl_aux_usage
crocus_resource_texture_aux_usage(const struct crocus_resource *res)
{
return res->aux.usage == ISL_AUX_USAGE_MCS ? ISL_AUX_USAGE_MCS : ISL_AUX_USAGE_NONE;
}
void crocus_resource_prepare_texture(struct crocus_context *ice,
struct crocus_resource *res,
enum isl_format view_format,
uint32_t start_level, uint32_t num_levels,
uint32_t start_layer, uint32_t num_layers);
static inline bool
crocus_resource_unfinished_aux_import(struct crocus_resource *res)
{
return res->base.next != NULL && res->mod_info &&
res->mod_info->aux_usage != ISL_AUX_USAGE_NONE;
}
void crocus_resource_finish_aux_import(struct pipe_screen *pscreen,
struct crocus_resource *res);
bool crocus_has_invalid_primary(const struct crocus_resource *res,
unsigned start_level, unsigned num_levels,
unsigned start_layer, unsigned num_layers);
void crocus_resource_check_level_layer(const struct crocus_resource *res,
uint32_t level, uint32_t layer);
bool crocus_resource_level_has_hiz(const struct crocus_resource *res,
uint32_t level);
bool crocus_has_color_unresolved(const struct crocus_resource *res,
unsigned start_level, unsigned num_levels,
unsigned start_layer, unsigned num_layers);
enum isl_aux_usage crocus_resource_render_aux_usage(struct crocus_context *ice,
struct crocus_resource *res,
enum isl_format render_fmt,
bool blend_enabled,
bool draw_aux_disabled);
void crocus_resource_prepare_render(struct crocus_context *ice,
struct crocus_resource *res, uint32_t level,
uint32_t start_layer, uint32_t layer_count,
enum isl_aux_usage aux_usage);
void crocus_resource_finish_render(struct crocus_context *ice,
struct crocus_resource *res, uint32_t level,
uint32_t start_layer, uint32_t layer_count,
enum isl_aux_usage aux_usage);
#endif

View File

@ -0,0 +1,829 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* @file crocus_screen.c
*
* Screen related driver hooks and capability lists.
*
* A program may use multiple rendering contexts (crocus_context), but
* they all share a common screen (crocus_screen). Global driver state
* can be stored in the screen; it may be accessed by multiple threads.
*/
#include <stdio.h>
#include <errno.h>
#include <sys/ioctl.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "util/debug.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h"
#include "util/u_transfer_helper.h"
#include "util/u_upload_mgr.h"
#include "util/ralloc.h"
#include "util/xmlconfig.h"
#include "drm-uapi/i915_drm.h"
#include "crocus_context.h"
#include "crocus_defines.h"
#include "crocus_fence.h"
#include "crocus_pipe.h"
#include "crocus_resource.h"
#include "crocus_screen.h"
#include "intel/compiler/brw_compiler.h"
#include "intel/common/intel_gem.h"
#include "intel/common/intel_l3_config.h"
#include "crocus_monitor.h"
#define genX_call(devinfo, func, ...) \
switch ((devinfo)->verx10) { \
case 75: \
gfx75_##func(__VA_ARGS__); \
break; \
case 70: \
gfx7_##func(__VA_ARGS__); \
break; \
case 60: \
gfx6_##func(__VA_ARGS__); \
break; \
case 50: \
gfx5_##func(__VA_ARGS__); \
break; \
case 45: \
gfx45_##func(__VA_ARGS__); \
break; \
case 40: \
gfx4_##func(__VA_ARGS__); \
break; \
default: \
unreachable("Unknown hardware generation"); \
}
static void
crocus_flush_frontbuffer(struct pipe_screen *_screen,
struct pipe_context *_pipe,
struct pipe_resource *resource,
unsigned level, unsigned layer,
void *context_private, struct pipe_box *box)
{
}
static const char *
crocus_get_vendor(struct pipe_screen *pscreen)
{
return "Intel";
}
static const char *
crocus_get_device_vendor(struct pipe_screen *pscreen)
{
return "Intel";
}
static const char *
crocus_get_name(struct pipe_screen *pscreen)
{
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
static char buf[128];
const char *name = intel_get_device_name(screen->pci_id);
if (!name)
name = "Intel Unknown";
snprintf(buf, sizeof(buf), "Mesa %s", name);
return buf;
}
static uint64_t
get_aperture_size(int fd)
{
struct drm_i915_gem_get_aperture aperture = {};
intel_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
return aperture.aper_size;
}
static int
crocus_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
{
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
const struct intel_device_info *devinfo = &screen->devinfo;
switch (param) {
case PIPE_CAP_NPOT_TEXTURES:
case PIPE_CAP_ANISOTROPIC_FILTER:
case PIPE_CAP_POINT_SPRITE:
case PIPE_CAP_OCCLUSION_QUERY:
case PIPE_CAP_TEXTURE_SWIZZLE:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
case PIPE_CAP_VERTEX_SHADER_SATURATE:
case PIPE_CAP_PRIMITIVE_RESTART:
case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
case PIPE_CAP_DEPTH_CLIP_DISABLE:
case PIPE_CAP_TGSI_INSTANCEID:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_CONDITIONAL_RENDER:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
case PIPE_CAP_ACCELERATED:
case PIPE_CAP_UMA:
case PIPE_CAP_CLIP_HALFZ:
case PIPE_CAP_TGSI_TEXCOORD:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_TGSI_TEX_TXF_LZ:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_TGSI_VOTE:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
case PIPE_CAP_NIR_COMPACT_ARRAYS:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED:
case PIPE_CAP_FENCE_SIGNAL:
case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION:
return true;
case PIPE_CAP_INT64:
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_TGSI_BALLOT:
case PIPE_CAP_PACKED_UNIFORMS:
case PIPE_CAP_GL_CLAMP:
return false;
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
return devinfo->ver <= 5;
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_QUERY_TIME_ELAPSED:
return devinfo->ver >= 5;
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
case PIPE_CAP_TGSI_CLOCK:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COMPUTE:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_GL_SPIRV:
case PIPE_CAP_GL_SPIRV_VARIABLE_POINTERS:
case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
case PIPE_CAP_DOUBLES:
return devinfo->ver >= 7;
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
return devinfo->is_haswell;
case PIPE_CAP_CULL_DISTANCE:
case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_QUERY_SO_OVERFLOW:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_TEXTURE_SHADOW_LOD:
case PIPE_CAP_LOAD_CONSTBUF:
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_CLEAR_SCISSORED:
return devinfo->ver >= 6;
case PIPE_CAP_FBFETCH:
return devinfo->verx10 >= 45 ? BRW_MAX_DRAW_BUFFERS : 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
return devinfo->ver >= 6 ? 1 : 0;
case PIPE_CAP_MAX_RENDER_TARGETS:
return BRW_MAX_DRAW_BUFFERS;
case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
if (devinfo->ver >= 7)
return 16384;
else
return 8192;
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
if (devinfo->ver >= 7)
return CROCUS_MAX_MIPLEVELS; /* 16384x16384 */
else
return CROCUS_MAX_MIPLEVELS - 1; /* 8192x8192 */
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
return 12; /* 2048x2048 */
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
return (devinfo->ver >= 6) ? 4 : 0;
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
return devinfo->ver >= 7 ? 2048 : 512;
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
return BRW_MAX_SOL_BINDINGS / CROCUS_MAX_SOL_BUFFERS;
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
return BRW_MAX_SOL_BINDINGS;
case PIPE_CAP_GLSL_FEATURE_LEVEL: {
if (devinfo->is_haswell)
return 460;
else if (devinfo->ver >= 7)
return 420;
else if (devinfo->ver >= 6)
return 330;
return 120;
}
case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
return devinfo->ver < 6 ? 120 : 130;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
/* 3DSTATE_CONSTANT_XS requires the start of UBOs to be 32B aligned */
return 32;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return CROCUS_MAP_BUFFER_ALIGNMENT;
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
/* Choose a cacheline (64 bytes) so that we can safely have the CPU and
* GPU writing the same SSBO on non-coherent systems (Atom CPUs). With
* UBOs, the GPU never writes, so there's no problem. For an SSBO, the
* GPU and the CPU can be updating disjoint regions of the buffer
* simultaneously and that will break if the regions overlap the same
* cacheline.
*/
return devinfo->ver >= 7 ? 64 : 0;
case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
return devinfo->ver >= 7 ? (1 << 27) : 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 16; // XXX: u_screen says 256 is the minimum value...
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return true;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return CROCUS_MAX_TEXTURE_BUFFER_SIZE;
case PIPE_CAP_MAX_VIEWPORTS:
return devinfo->ver >= 6 ? 16 : 1;
case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
return devinfo->ver >= 6 ? 256 : 0;
case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
return devinfo->ver >= 6 ? 1024 : 0;
case PIPE_CAP_MAX_GS_INVOCATIONS:
return devinfo->ver >= 7 ? 32 : 1;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
if (devinfo->ver >= 7)
return 4;
else if (devinfo->ver == 6)
return 1;
else
return 0;
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
if (devinfo->ver >= 7)
return -32;
else if (devinfo->ver == 6)
return -8;
else
return 0;
case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
if (devinfo->ver >= 7)
return 31;
else if (devinfo->ver == 6)
return 7;
else
return 0;
case PIPE_CAP_MAX_VERTEX_STREAMS:
return devinfo->ver >= 7 ? 4 : 1;
case PIPE_CAP_VENDOR_ID:
return 0x8086;
case PIPE_CAP_DEVICE_ID:
return screen->pci_id;
case PIPE_CAP_VIDEO_MEMORY: {
/* Once a batch uses more than 75% of the maximum mappable size, we
* assume that there's some fragmentation, and we start doing extra
* flushing, etc. That's the big cliff apps will care about.
*/
const unsigned gpu_mappable_megabytes =
(screen->aperture_bytes * 3 / 4) / (1024 * 1024);
const long system_memory_pages = sysconf(_SC_PHYS_PAGES);
const long system_page_size = sysconf(_SC_PAGE_SIZE);
if (system_memory_pages <= 0 || system_page_size <= 0)
return -1;
const uint64_t system_memory_bytes =
(uint64_t) system_memory_pages * (uint64_t) system_page_size;
const unsigned system_memory_megabytes =
(unsigned) (system_memory_bytes / (1024 * 1024));
return MIN2(system_memory_megabytes, gpu_mappable_megabytes);
}
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_MAX_VARYINGS:
return (screen->devinfo.ver >= 6) ? 32 : 16;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
/* AMD_pinned_memory assumes the flexibility of using client memory
* for any buffer (incl. vertex buffers) which rules out the prospect
* of using snooped buffers, as using snooped buffers without
* cogniscience is likely to be detrimental to performance and require
* extensive checking in the driver for correctness, e.g. to prevent
* illegal snoop <-> snoop transfers.
*/
return devinfo->has_llc;
case PIPE_CAP_THROTTLE:
return screen->driconf.disable_throttling ? 0 : 1;
case PIPE_CAP_CONTEXT_PRIORITY_MASK:
return PIPE_CONTEXT_PRIORITY_LOW |
PIPE_CONTEXT_PRIORITY_MEDIUM |
PIPE_CONTEXT_PRIORITY_HIGH;
case PIPE_CAP_FRONTEND_NOOP:
return true;
// XXX: don't hardcode 00:00:02.0 PCI here
case PIPE_CAP_PCI_GROUP:
return 0;
case PIPE_CAP_PCI_BUS:
return 0;
case PIPE_CAP_PCI_DEVICE:
return 2;
case PIPE_CAP_PCI_FUNCTION:
return 0;
default:
return u_pipe_screen_get_param_defaults(pscreen, param);
}
return 0;
}
static float
crocus_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
{
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
const struct intel_device_info *devinfo = &screen->devinfo;
switch (param) {
case PIPE_CAPF_MAX_LINE_WIDTH:
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
if (devinfo->ver >= 6)
return 7.375f;
else
return 7.0f;
case PIPE_CAPF_MAX_POINT_WIDTH:
case PIPE_CAPF_MAX_POINT_WIDTH_AA:
return 255.0f;
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
return 16.0f;
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
return 15.0f;
case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
return 0.0f;
default:
unreachable("unknown param");
}
}
static int
crocus_get_shader_param(struct pipe_screen *pscreen,
enum pipe_shader_type p_stage,
enum pipe_shader_cap param)
{
gl_shader_stage stage = stage_from_pipe(p_stage);
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
const struct intel_device_info *devinfo = &screen->devinfo;
if (devinfo->ver < 6 &&
p_stage != PIPE_SHADER_VERTEX &&
p_stage != PIPE_SHADER_FRAGMENT)
return 0;
if (devinfo->ver == 6 &&
p_stage != PIPE_SHADER_VERTEX &&
p_stage != PIPE_SHADER_FRAGMENT &&
p_stage != PIPE_SHADER_GEOMETRY)
return 0;
/* this is probably not totally correct.. but it's a start: */
switch (param) {
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
return stage == MESA_SHADER_FRAGMENT ? 1024 : 16384;
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
return stage == MESA_SHADER_FRAGMENT ? 1024 : 0;
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
return UINT_MAX;
case PIPE_SHADER_CAP_MAX_INPUTS:
if (stage == MESA_SHADER_VERTEX ||
stage == MESA_SHADER_GEOMETRY)
return 16; /* Gen7 vec4 geom backend */
return 32;
case PIPE_SHADER_CAP_MAX_OUTPUTS:
return 32;
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
return 16 * 1024 * sizeof(float);
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
return devinfo->ver >= 6 ? 16 : 1;
case PIPE_SHADER_CAP_MAX_TEMPS:
return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
/* Lie about these to avoid st/mesa's GLSL IR lowering of indirects,
* which we don't want. Our compiler backend will check brw_compiler's
* options and call nir_lower_indirect_derefs appropriately anyway.
*/
return true;
case PIPE_SHADER_CAP_SUBROUTINES:
return 0;
case PIPE_SHADER_CAP_INTEGERS:
return 1;
case PIPE_SHADER_CAP_INT64_ATOMICS:
case PIPE_SHADER_CAP_FP16:
return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return devinfo->is_haswell ? CROCUS_MAX_TEXTURE_SAMPLERS : 16;
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
if (devinfo->ver >= 7 &&
(p_stage == PIPE_SHADER_FRAGMENT ||
p_stage == PIPE_SHADER_COMPUTE))
return CROCUS_MAX_TEXTURE_SAMPLERS;
return 0;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return devinfo->ver >= 7 ? (CROCUS_MAX_ABOS + CROCUS_MAX_SSBOS) : 0;
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_NIR;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
return 1 << PIPE_SHADER_IR_NIR;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
case PIPE_SHADER_CAP_FP16_DERIVATIVES:
case PIPE_SHADER_CAP_INT16:
case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
return 0;
default:
unreachable("unknown shader param");
}
}
static int
crocus_get_compute_param(struct pipe_screen *pscreen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param,
void *ret)
{
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
const struct intel_device_info *devinfo = &screen->devinfo;
const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
const uint32_t max_invocations = 32 * max_threads;
if (devinfo->ver < 7)
return 0;
#define RET(x) do { \
if (ret) \
memcpy(ret, x, sizeof(x)); \
return sizeof(x); \
} while (0)
switch (param) {
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
RET((uint32_t []){ 32 });
case PIPE_COMPUTE_CAP_IR_TARGET:
if (ret)
strcpy(ret, "gen");
return 4;
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
RET((uint64_t []) { 3 });
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
RET(((uint64_t []) { 65535, 65535, 65535 }));
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
/* MaxComputeWorkGroupSize[0..2] */
RET(((uint64_t []) {max_invocations, max_invocations, max_invocations}));
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
/* MaxComputeWorkGroupInvocations */
RET((uint64_t []) { max_invocations });
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
/* MaxComputeSharedMemorySize */
RET((uint64_t []) { 64 * 1024 });
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
RET((uint32_t []) { 1 });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
RET((uint32_t []) { BRW_SUBGROUP_SIZE });
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
RET((uint64_t []) { max_invocations });
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
// XXX: I think these are for Clover...
return 0;
default:
unreachable("unknown compute param");
}
}
static uint64_t
crocus_get_timestamp(struct pipe_screen *pscreen)
{
struct crocus_screen *screen = (struct crocus_screen *) pscreen;
const unsigned TIMESTAMP = 0x2358;
uint64_t result;
crocus_reg_read(screen->bufmgr, TIMESTAMP | 1, &result);
result = intel_device_info_timebase_scale(&screen->devinfo, result);
result &= (1ull << TIMESTAMP_BITS) - 1;
return result;
}
void
crocus_screen_destroy(struct crocus_screen *screen)
{
u_transfer_helper_destroy(screen->base.transfer_helper);
crocus_bufmgr_unref(screen->bufmgr);
disk_cache_destroy(screen->disk_cache);
close(screen->winsys_fd);
ralloc_free(screen);
}
static void
crocus_screen_unref(struct pipe_screen *pscreen)
{
crocus_pscreen_unref(pscreen);
}
static void
crocus_query_memory_info(struct pipe_screen *pscreen,
struct pipe_memory_info *info)
{
}
static const void *
crocus_get_compiler_options(struct pipe_screen *pscreen,
enum pipe_shader_ir ir,
enum pipe_shader_type pstage)
{
struct crocus_screen *screen = (struct crocus_screen *) pscreen;
gl_shader_stage stage = stage_from_pipe(pstage);
assert(ir == PIPE_SHADER_IR_NIR);
return screen->compiler->glsl_compiler_options[stage].NirOptions;
}
static struct disk_cache *
crocus_get_disk_shader_cache(struct pipe_screen *pscreen)
{
struct crocus_screen *screen = (struct crocus_screen *) pscreen;
return screen->disk_cache;
}
static const struct intel_l3_config *
crocus_get_default_l3_config(const struct intel_device_info *devinfo,
bool compute)
{
bool wants_dc_cache = true;
bool has_slm = compute;
const struct intel_l3_weights w =
intel_get_default_l3_weights(devinfo, wants_dc_cache, has_slm);
return intel_get_l3_config(devinfo, w);
}
static void
crocus_shader_debug_log(void *data, const char *fmt, ...)
{
struct pipe_debug_callback *dbg = data;
unsigned id = 0;
va_list args;
if (!dbg->debug_message)
return;
va_start(args, fmt);
dbg->debug_message(dbg->data, &id, PIPE_DEBUG_TYPE_SHADER_INFO, fmt, args);
va_end(args);
}
static void
crocus_shader_perf_log(void *data, const char *fmt, ...)
{
struct pipe_debug_callback *dbg = data;
unsigned id = 0;
va_list args;
va_start(args, fmt);
if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
va_list args_copy;
va_copy(args_copy, args);
vfprintf(stderr, fmt, args_copy);
va_end(args_copy);
}
if (dbg->debug_message) {
dbg->debug_message(dbg->data, &id, PIPE_DEBUG_TYPE_PERF_INFO, fmt, args);
}
va_end(args);
}
static bool
crocus_detect_swizzling(struct crocus_screen *screen)
{
/* Broadwell PRM says:
*
* "Before Gen8, there was a historical configuration control field to
* swizzle address bit[6] for in X/Y tiling modes. This was set in three
* different places: TILECTL[1:0], ARB_MODE[5:4], and
* DISP_ARB_CTL[14:13].
*
* For Gen8 and subsequent generations, the swizzle fields are all
* reserved, and the CPU's memory controller performs all address
* swizzling modifications."
*/
uint32_t tiling = I915_TILING_X;
uint32_t swizzle_mode = 0;
struct crocus_bo *buffer =
crocus_bo_alloc_tiled(screen->bufmgr, "swizzle test", 32768,
0, tiling, 512, 0);
if (buffer == NULL)
return false;
crocus_bo_get_tiling(buffer, &tiling, &swizzle_mode);
crocus_bo_unreference(buffer);
return swizzle_mode != I915_BIT_6_SWIZZLE_NONE;
}
struct pipe_screen *
crocus_screen_create(int fd, const struct pipe_screen_config *config)
{
struct crocus_screen *screen = rzalloc(NULL, struct crocus_screen);
if (!screen)
return NULL;
if (!intel_get_device_info_from_fd(fd, &screen->devinfo))
return NULL;
screen->pci_id = screen->devinfo.chipset_id;
screen->no_hw = screen->devinfo.no_hw;
if (screen->devinfo.ver >= 8)
return NULL;
p_atomic_set(&screen->refcount, 1);
screen->aperture_bytes = get_aperture_size(fd);
if (getenv("INTEL_NO_HW") != NULL)
screen->no_hw = true;
bool bo_reuse = false;
int bo_reuse_mode = driQueryOptioni(config->options, "bo_reuse");
switch (bo_reuse_mode) {
case DRI_CONF_BO_REUSE_DISABLED:
break;
case DRI_CONF_BO_REUSE_ALL:
bo_reuse = true;
break;
}
screen->bufmgr = crocus_bufmgr_get_for_fd(&screen->devinfo, fd, bo_reuse);
if (!screen->bufmgr)
return NULL;
screen->fd = crocus_bufmgr_get_fd(screen->bufmgr);
screen->winsys_fd = fd;
screen->has_swizzling = crocus_detect_swizzling(screen);
brw_process_intel_debug_variable();
screen->driconf.dual_color_blend_by_location =
driQueryOptionb(config->options, "dual_color_blend_by_location");
screen->driconf.disable_throttling =
driQueryOptionb(config->options, "disable_throttling");
screen->driconf.always_flush_cache =
driQueryOptionb(config->options, "always_flush_cache");
screen->precompile = env_var_as_boolean("shader_precompile", true);
isl_device_init(&screen->isl_dev, &screen->devinfo,
screen->has_swizzling);
screen->compiler = brw_compiler_create(screen, &screen->devinfo);
screen->compiler->shader_debug_log = crocus_shader_debug_log;
screen->compiler->shader_perf_log = crocus_shader_perf_log;
screen->compiler->supports_pull_constants = false;
screen->compiler->supports_shader_constants = false;
screen->compiler->compact_params = false;
screen->compiler->constant_buffer_0_is_relative = true;
if (screen->devinfo.ver == 7) {
screen->l3_config_3d = crocus_get_default_l3_config(&screen->devinfo, false);
screen->l3_config_cs = crocus_get_default_l3_config(&screen->devinfo, true);
}
crocus_disk_cache_init(screen);
slab_create_parent(&screen->transfer_pool,
sizeof(struct crocus_transfer), 64);
screen->subslice_total = intel_device_info_subslice_total(&screen->devinfo);
assert(screen->subslice_total >= 1);
struct pipe_screen *pscreen = &screen->base;
crocus_init_screen_fence_functions(pscreen);
crocus_init_screen_resource_functions(pscreen);
pscreen->destroy = crocus_screen_unref;
pscreen->get_name = crocus_get_name;
pscreen->get_vendor = crocus_get_vendor;
pscreen->get_device_vendor = crocus_get_device_vendor;
pscreen->get_param = crocus_get_param;
pscreen->get_shader_param = crocus_get_shader_param;
pscreen->get_compute_param = crocus_get_compute_param;
pscreen->get_paramf = crocus_get_paramf;
pscreen->get_compiler_options = crocus_get_compiler_options;
pscreen->get_disk_shader_cache = crocus_get_disk_shader_cache;
pscreen->is_format_supported = crocus_is_format_supported;
pscreen->context_create = crocus_create_context;
pscreen->flush_frontbuffer = crocus_flush_frontbuffer;
pscreen->get_timestamp = crocus_get_timestamp;
pscreen->query_memory_info = crocus_query_memory_info;
pscreen->get_driver_query_group_info = crocus_get_monitor_group_info;
pscreen->get_driver_query_info = crocus_get_monitor_info;
genX_call(&screen->devinfo, init_screen_state, screen);
genX_call(&screen->devinfo, init_screen_query, screen);
return pscreen;
}

View File

@ -0,0 +1,253 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef CROCUS_SCREEN_H
#define CROCUS_SCREEN_H
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
#include "frontend/drm_driver.h"
#include "util/disk_cache.h"
#include "util/slab.h"
#include "util/u_screen.h"
#include "intel/dev/intel_device_info.h"
#include "intel/isl/isl.h"
#include "crocus_bufmgr.h"
#include "compiler/shader_enums.h"
struct crocus_monitor_config;
struct crocus_resource;
struct crocus_context;
struct crocus_sampler_state;
struct brw_vue_map;
struct brw_tcs_prog_key;
struct brw_tes_prog_key;
struct brw_cs_prog_key;
struct brw_wm_prog_key;
struct brw_vs_prog_key;
struct brw_gs_prog_key;
struct shader_info;
#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
#define WRITE_ONCE(x, v) *(volatile __typeof__(x) *)&(x) = (v)
#define CROCUS_MAX_TEXTURE_SAMPLERS 32
#define CROCUS_MAX_SOL_BUFFERS 4
#define CROCUS_MAP_BUFFER_ALIGNMENT 64
/**
* Virtual table for generation-specific (genxml) function calls.
*/
struct crocus_vtable {
void (*destroy_state)(struct crocus_context *ice);
void (*init_render_context)(struct crocus_batch *batch);
void (*init_compute_context)(struct crocus_batch *batch);
void (*upload_render_state)(struct crocus_context *ice,
struct crocus_batch *batch,
const struct pipe_draw_info *draw,
unsigned drawid_offset,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *sc);
void (*update_surface_base_address)(struct crocus_batch *batch);
void (*upload_compute_state)(struct crocus_context *ice,
struct crocus_batch *batch,
const struct pipe_grid_info *grid);
void (*rebind_buffer)(struct crocus_context *ice,
struct crocus_resource *res);
void (*resolve_conditional_render)(struct crocus_context *ice);
void (*emit_compute_predicate)(struct crocus_batch *batch);
void (*load_register_reg32)(struct crocus_batch *batch, uint32_t dst,
uint32_t src);
void (*load_register_reg64)(struct crocus_batch *batch, uint32_t dst,
uint32_t src);
void (*load_register_imm32)(struct crocus_batch *batch, uint32_t reg,
uint32_t val);
void (*load_register_imm64)(struct crocus_batch *batch, uint32_t reg,
uint64_t val);
void (*load_register_mem32)(struct crocus_batch *batch, uint32_t reg,
struct crocus_bo *bo, uint32_t offset);
void (*load_register_mem64)(struct crocus_batch *batch, uint32_t reg,
struct crocus_bo *bo, uint32_t offset);
void (*store_register_mem32)(struct crocus_batch *batch, uint32_t reg,
struct crocus_bo *bo, uint32_t offset,
bool predicated);
void (*store_register_mem64)(struct crocus_batch *batch, uint32_t reg,
struct crocus_bo *bo, uint32_t offset,
bool predicated);
void (*store_data_imm32)(struct crocus_batch *batch,
struct crocus_bo *bo, uint32_t offset,
uint32_t value);
void (*store_data_imm64)(struct crocus_batch *batch,
struct crocus_bo *bo, uint32_t offset,
uint64_t value);
void (*copy_mem_mem)(struct crocus_batch *batch,
struct crocus_bo *dst_bo, uint32_t dst_offset,
struct crocus_bo *src_bo, uint32_t src_offset,
unsigned bytes);
void (*emit_raw_pipe_control)(struct crocus_batch *batch,
const char *reason, uint32_t flags,
struct crocus_bo *bo, uint32_t offset,
uint64_t imm);
void (*emit_mi_report_perf_count)(struct crocus_batch *batch,
struct crocus_bo *bo,
uint32_t offset_in_bytes,
uint32_t report_id);
uint32_t *(*create_so_decl_list)(const struct pipe_stream_output_info *sol,
const struct brw_vue_map *vue_map);
void (*populate_vs_key)(const struct crocus_context *ice,
const struct shader_info *info,
gl_shader_stage last_stage,
struct brw_vs_prog_key *key);
void (*populate_tcs_key)(const struct crocus_context *ice,
struct brw_tcs_prog_key *key);
void (*populate_tes_key)(const struct crocus_context *ice,
const struct shader_info *info,
gl_shader_stage last_stage,
struct brw_tes_prog_key *key);
void (*populate_gs_key)(const struct crocus_context *ice,
const struct shader_info *info,
gl_shader_stage last_stage,
struct brw_gs_prog_key *key);
void (*populate_fs_key)(const struct crocus_context *ice,
const struct shader_info *info,
struct brw_wm_prog_key *key);
void (*populate_cs_key)(const struct crocus_context *ice,
struct brw_cs_prog_key *key);
void (*lost_genx_state)(struct crocus_context *ice, struct crocus_batch *batch);
void (*finish_batch)(struct crocus_batch *batch); /* haswell only */
void (*upload_urb_fence)(struct crocus_batch *batch); /* gen4/5 only */
bool (*blit_blt)(struct crocus_batch *batch,
const struct pipe_blit_info *info);
bool (*copy_region_blt)(struct crocus_batch *batch,
struct crocus_resource *dst,
unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct crocus_resource *src,
unsigned src_level,
const struct pipe_box *src_box);
bool (*calculate_urb_fence)(struct crocus_batch *batch, unsigned csize,
unsigned vsize, unsigned sfsize);
void (*batch_reset_dirty)(struct crocus_batch *batch);
unsigned (*translate_prim_type)(enum pipe_prim_type prim, uint8_t verts_per_patch);
void (*update_so_strides)(struct crocus_context *ice,
uint16_t *strides);
uint32_t (*get_so_offset)(struct pipe_stream_output_target *tgt);
};
struct crocus_screen {
struct pipe_screen base;
uint32_t refcount;
/** Global slab allocator for crocus_transfer_map objects */
struct slab_parent_pool transfer_pool;
/** drm device file descriptor, shared with bufmgr, do not close. */
int fd;
/**
* drm device file descriptor to used for window system integration, owned
* by iris_screen, can be a different DRM instance than fd.
*/
int winsys_fd;
/** PCI ID for our GPU device */
int pci_id;
bool no_hw;
struct crocus_vtable vtbl;
/** Global program_string_id counter (see get_program_string_id()) */
unsigned program_id;
/** Precompile shaders at link time? (Can be disabled for debugging.) */
bool precompile;
/** driconf options and application workarounds */
struct {
/** Dual color blend by location instead of index (for broken apps) */
bool dual_color_blend_by_location;
bool disable_throttling;
bool always_flush_cache;
} driconf;
unsigned subslice_total;
uint64_t aperture_bytes;
struct intel_device_info devinfo;
struct isl_device isl_dev;
struct crocus_bufmgr *bufmgr;
struct brw_compiler *compiler;
struct crocus_monitor_config *monitor_cfg;
bool has_swizzling;
const struct intel_l3_config *l3_config_3d;
const struct intel_l3_config *l3_config_cs;
struct disk_cache *disk_cache;
};
struct pipe_screen *
crocus_screen_create(int fd, const struct pipe_screen_config *config);
void crocus_screen_destroy(struct crocus_screen *screen);
UNUSED static inline struct pipe_screen *
crocus_pscreen_ref(struct pipe_screen *pscreen)
{
struct crocus_screen *screen = (struct crocus_screen *) pscreen;
p_atomic_inc(&screen->refcount);
return pscreen;
}
UNUSED static inline void
crocus_pscreen_unref(struct pipe_screen *pscreen)
{
struct crocus_screen *screen = (struct crocus_screen *) pscreen;
if (p_atomic_dec_zero(&screen->refcount))
crocus_screen_destroy(screen);
}
bool
crocus_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned storage_sample_count,
unsigned usage);
void crocus_disk_cache_init(struct crocus_screen *screen);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,16 @@
Quick TODO list from what I can see:
General:
Re-emit SURFACE_STATE_BASE_ADDRESS at the top of every batch
Gen4:
rgb32 issue
Gen5:
rgb32 issue
Gen6:
vec4 push constants
Gen7:

View File

@ -0,0 +1,11 @@
// crocus specific driconf options
DRI_CONF_SECTION_DEBUG
DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false)
DRI_CONF_DISABLE_THROTTLING(false)
DRI_CONF_ALWAYS_FLUSH_CACHE(false)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_PERFORMANCE
DRI_CONF_OPT_E(bo_reuse, 1, 0, 1, "Buffer object reuse",)
DRI_CONF_SECTION_END

View File

@ -0,0 +1,190 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
static inline struct blorp_address
dynamic_state_address(struct blorp_batch *blorp_batch, uint32_t offset)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
return (struct blorp_address) {
.buffer = batch->state.bo,
.offset = offset,
};
}
static inline struct blorp_address
instruction_state_address(struct blorp_batch *blorp_batch, uint32_t offset)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
return (struct blorp_address) {
.buffer = batch->ice->shaders.cache_bo,
.offset = offset,
};
}
static struct blorp_address
blorp_emit_vs_state(struct blorp_batch *blorp_batch)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
uint32_t offset;
blorp_emit_dynamic(blorp_batch, GENX(VS_STATE), vs, 64, &offset) {
vs.Enable = false;
vs.URBEntryAllocationSize = batch->ice->urb.vsize - 1;
#if GFX_VER == 5
vs.NumberofURBEntries = batch->ice->urb.nr_vs_entries >> 2;
#else
vs.NumberofURBEntries = batch->ice->urb.nr_vs_entries;
#endif
}
return dynamic_state_address(blorp_batch, offset);
}
static struct blorp_address
blorp_emit_sf_state(struct blorp_batch *blorp_batch,
const struct blorp_params *params)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
const struct brw_sf_prog_data *prog_data = params->sf_prog_data;
uint32_t offset;
blorp_emit_dynamic(blorp_batch, GENX(SF_STATE), sf, 64, &offset) {
#if GFX_VER == 4
sf.KernelStartPointer =
instruction_state_address(blorp_batch, params->sf_prog_kernel);
#else
sf.KernelStartPointer = params->sf_prog_kernel;
#endif
sf.GRFRegisterCount = DIV_ROUND_UP(prog_data->total_grf, 16) - 1;
sf.VertexURBEntryReadLength = prog_data->urb_read_length;
sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
sf.DispatchGRFStartRegisterForURBData = 3;
sf.URBEntryAllocationSize = batch->ice->urb.sfsize - 1;
sf.NumberofURBEntries = batch->ice->urb.nr_sf_entries;
#if GFX_VER == 5
sf.MaximumNumberofThreads = MIN2(48, batch->ice->urb.nr_sf_entries) - 1;
#else
sf.MaximumNumberofThreads = MIN2(24, batch->ice->urb.nr_sf_entries) - 1;
#endif
sf.ViewportTransformEnable = false;
sf.CullMode = CULLMODE_NONE;
}
return dynamic_state_address(blorp_batch, offset);
}
static struct blorp_address
blorp_emit_wm_state(struct blorp_batch *blorp_batch,
const struct blorp_params *params)
{
const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
uint32_t offset;
blorp_emit_dynamic(blorp_batch, GENX(WM_STATE), wm, 64, &offset) {
if (params->src.enabled) {
/* Iron Lake can't do sampler prefetch */
wm.SamplerCount = (GFX_VER != 5);
wm.BindingTableEntryCount = 2;
uint32_t sampler = blorp_emit_sampler_state(blorp_batch);
wm.SamplerStatePointer = dynamic_state_address(blorp_batch, sampler);
}
if (prog_data) {
wm.DispatchGRFStartRegisterForConstantSetupData0 =
prog_data->base.dispatch_grf_start_reg;
wm.SetupURBEntryReadLength = prog_data->num_varying_inputs * 2;
wm.SetupURBEntryReadOffset = 0;
wm.DepthCoefficientURBReadOffset = 1;
wm.PixelShaderKillsPixel = prog_data->uses_kill;
wm.ThreadDispatchEnable = true;
wm.EarlyDepthTestEnable = true;
wm._8PixelDispatchEnable = prog_data->dispatch_8;
wm._16PixelDispatchEnable = prog_data->dispatch_16;
wm._32PixelDispatchEnable = prog_data->dispatch_32;
#if GFX_VER == 4
wm.KernelStartPointer0 =
instruction_state_address(blorp_batch, params->wm_prog_kernel);
wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
#else
wm.KernelStartPointer0 = params->wm_prog_kernel +
brw_wm_prog_data_prog_offset(prog_data, wm, 0);
wm.KernelStartPointer1 = params->wm_prog_kernel +
brw_wm_prog_data_prog_offset(prog_data, wm, 1);
wm.KernelStartPointer2 = params->wm_prog_kernel +
brw_wm_prog_data_prog_offset(prog_data, wm, 2);
wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(prog_data, wm, 1);
wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(prog_data, wm, 2);
#endif
}
wm.MaximumNumberofThreads =
blorp_batch->blorp->compiler->devinfo->max_wm_threads - 1;
}
return dynamic_state_address(blorp_batch, offset);
}
static struct blorp_address
blorp_emit_color_calc_state(struct blorp_batch *blorp_batch)
{
uint32_t cc_viewport = blorp_emit_cc_viewport(blorp_batch);
uint32_t offset;
blorp_emit_dynamic(blorp_batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) {
cc.CCViewportStatePointer = dynamic_state_address(blorp_batch, cc_viewport);
}
return dynamic_state_address(blorp_batch, offset);
}
static void
blorp_emit_pipeline(struct blorp_batch *blorp_batch,
const struct blorp_params *params)
{
struct crocus_batch *batch = blorp_batch->driver_batch;
emit_urb_config(blorp_batch, params, NULL);
blorp_emit(blorp_batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) {
pp.PointertoVSState = blorp_emit_vs_state(blorp_batch);
pp.GSEnable = false;
pp.ClipEnable = false;
pp.PointertoSFState = blorp_emit_sf_state(blorp_batch, params);
pp.PointertoWMState = blorp_emit_wm_state(blorp_batch, params);
pp.PointertoColorCalcState = blorp_emit_color_calc_state(blorp_batch);
}
batch->screen->vtbl.upload_urb_fence(batch);
blorp_emit(blorp_batch, GENX(CS_URB_STATE), curb);
blorp_emit(blorp_batch, GENX(CONSTANT_BUFFER), curb);
}

View File

@ -0,0 +1,90 @@
# Copyright © 2017-2019 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
files_libcrocus = files(
'gen4_blorp_exec.h',
'driinfo_crocus.h',
'crocus_batch.c',
'crocus_batch.h',
'crocus_blit.c',
'crocus_bufmgr.c',
'crocus_bufmgr.h',
'crocus_clear.c',
'crocus_context.c',
'crocus_context.h',
'crocus_draw.c',
'crocus_fence.c',
'crocus_fence.h',
'crocus_fine_fence.c',
'crocus_fine_fence.h',
'crocus_formats.c',
'crocus_genx_macros.h',
'crocus_genx_protos.h',
'crocus_monitor.c',
'crocus_pipe.h',
'crocus_pipe_control.c',
'crocus_program.c',
'crocus_program_cache.c',
'crocus_resolve.c',
'crocus_resource.c',
'crocus_resource.h',
'crocus_screen.c',
'crocus_screen.h',
'crocus_disk_cache.c',
)
crocus_per_hw_ver_libs = []
foreach v : ['40', '45', '50', '60', '70', '75']
crocus_per_hw_ver_libs += static_library(
'crocus_per_hw_ver@0@'.format(v),
['crocus_blorp.c', 'crocus_query.c', 'crocus_state.c', 'crocus_blt.c', gen_xml_pack],
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_intel],
c_args : [
no_override_init_args, c_sse2_args,
'-DGFX_VERx10=@0@'.format(v),
],
gnu_symbol_visibility : 'hidden',
dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
)
endforeach
libcrocus = static_library(
'crocus',
[files_libcrocus, gen_xml_pack],
include_directories : [
inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_intel,
inc_gallium_drivers,
# these should not be necessary, but main/macros.h...
inc_mesa, inc_mapi
],
c_args : [c_sse2_args],
cpp_args : [c_sse2_args],
gnu_symbol_visibility : 'hidden',
dependencies : [dep_libdrm, dep_valgrind, idep_genxml, idep_libintel_common, idep_nir_headers],
link_with : [
crocus_per_hw_ver_libs, libintel_compiler, libintel_dev, libisl,
libblorp, libintel_perf
],
)
driver_crocus = declare_dependency(
compile_args : '-DGALLIUM_CROCUS',
link_with : [libcrocus, libcrocuswinsys],
)

View File

@ -129,6 +129,12 @@ if with_gallium_tegra
else
driver_tegra = declare_dependency()
endif
if with_gallium_crocus
subdir('winsys/crocus/drm')
subdir('drivers/crocus')
else
driver_crocus = declare_dependency()
endif
if with_gallium_iris
subdir('winsys/iris/drm')
subdir('drivers/iris')

View File

@ -64,7 +64,7 @@ libgallium_nine = shared_library(
dep_selinux, dep_libdrm, dep_llvm, dep_thread,
idep_xmlconfig, idep_mesautil, idep_nir,
driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
driver_i915, driver_svga, driver_iris
driver_i915, driver_svga, driver_iris, driver_crocus
],
name_prefix : '',
version : '.'.join(nine_version),

View File

@ -58,7 +58,7 @@ libgallium_dri = shared_library(
driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
driver_tegra, driver_i915, driver_svga, driver_virgl,
driver_swr, driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12,
driver_asahi
driver_asahi, driver_crocus
],
# Will be deleted during installation, see install_megadrivers.py
install : true,
@ -98,6 +98,7 @@ foreach d : [[with_gallium_kmsro, [
[with_gallium_panfrost, 'panfrost_dri.so'],
[with_gallium_etnaviv, 'etnaviv_dri.so'],
[with_gallium_tegra, 'tegra_dri.so'],
[with_gallium_crocus, 'crocus_dri.so'],
[with_gallium_iris, 'iris_dri.so'],
[with_gallium_i915, 'i915_dri.so'],
[with_gallium_r300, 'r300_dri.so'],

View File

@ -42,6 +42,10 @@ DEFINE_LOADER_DRM_ENTRYPOINT(i915)
DEFINE_LOADER_DRM_ENTRYPOINT(iris)
#endif
#if defined(GALLIUM_CROCUS)
DEFINE_LOADER_DRM_ENTRYPOINT(crocus)
#endif
#if defined(GALLIUM_NOUVEAU)
DEFINE_LOADER_DRM_ENTRYPOINT(nouveau)
#endif

View File

@ -0,0 +1,33 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef CROCUS_DRM_PUBLIC_H
#define CROCUS_DRM_PUBLIC_H
struct pipe_screen;
struct pipe_screen_config;
struct pipe_screen *
crocus_drm_screen_create(int drm_fd, const struct pipe_screen_config *config);
#endif /* CROCUS_DRM_PUBLIC_H */

View File

@ -0,0 +1,39 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <unistd.h>
#include <fcntl.h>
#include "util/os_file.h"
#include "crocus_drm_public.h"
#include "crocus/crocus_screen.h"
struct pipe_screen *
crocus_drm_screen_create(int fd, const struct pipe_screen_config *config)
{
int newfd = os_dupfd_cloexec(fd);
if (newfd < 0)
return NULL;
return crocus_screen_create(newfd, config);
}

View File

@ -0,0 +1,29 @@
# Copyright © 2017 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
libcrocuswinsys = static_library(
'crocuswinsys',
files('crocus_drm_winsys.c'),
include_directories : [
inc_src, inc_include,
inc_gallium, inc_gallium_aux, inc_gallium_drivers,
],
gnu_symbol_visibility : 'hidden',
)

View File

@ -829,7 +829,7 @@ decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
struct intel_field_iterator iter;
intel_field_iterator_init(&iter, inst, p, 0, false);
while (intel_field_iterator_next(&iter)) {
if (str_ends_with(iter.name, "Pointer")) {
if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
state_offset = iter.raw_value;
break;
}
@ -900,6 +900,13 @@ decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
}
static void
decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
const uint32_t *p)
{
decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
}
static void
decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
const uint32_t *p)
@ -1208,6 +1215,7 @@ struct custom_decoder {
{ "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
{ "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
{ "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
{ "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
{ "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
{ "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
{ "MI_LOAD_REGISTER_IMM", decode_load_register_imm },

View File

@ -76,6 +76,7 @@ static const struct {
{ 0x8086, "i915", i915_chip_ids, ARRAY_SIZE(i915_chip_ids) },
{ 0x8086, "i965", i965_chip_ids, ARRAY_SIZE(i965_chip_ids) },
{ 0x8086, "iris", NULL, -1, is_kernel_i915 },
{ 0x8086, "crocus", NULL, -1, is_kernel_i915 },
{ 0x1002, "radeon", r100_chip_ids, ARRAY_SIZE(r100_chip_ids) },
{ 0x1002, "r200", r200_chip_ids, ARRAY_SIZE(r200_chip_ids) },
{ 0x1002, "r300", r300_chip_ids, ARRAY_SIZE(r300_chip_ids) },