2016-08-18 17:36:58 +01:00
|
|
|
/*
|
|
|
|
* Copyright © 2016 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
2016-10-11 18:26:21 +01:00
|
|
|
#ifndef BLORP_GENX_EXEC_H
|
|
|
|
#define BLORP_GENX_EXEC_H
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
#include "blorp_priv.h"
|
2021-04-05 19:47:31 +01:00
|
|
|
#include "dev/intel_device_info.h"
|
2021-03-03 21:20:06 +00:00
|
|
|
#include "common/intel_sample_positions.h"
|
|
|
|
#include "common/intel_l3_config.h"
|
2017-06-05 22:19:28 +01:00
|
|
|
#include "genxml/gen_macros.h"
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* This file provides the blorp pipeline setup and execution functionality.
|
|
|
|
* It defines the following function:
|
|
|
|
*
|
|
|
|
* static void
|
|
|
|
* blorp_exec(struct blorp_context *blorp, void *batch_data,
|
2016-08-19 13:43:29 +01:00
|
|
|
* const struct blorp_params *params);
|
2016-08-18 17:36:58 +01:00
|
|
|
*
|
|
|
|
* It is the job of whoever includes this header to wrap this in something
|
|
|
|
* to get an externally visible symbol.
|
|
|
|
*
|
|
|
|
* In order for the blorp_exec function to work, the driver must provide
|
|
|
|
* implementations of the following static helper functions.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void *
|
|
|
|
blorp_emit_dwords(struct blorp_batch *batch, unsigned n);
|
|
|
|
|
|
|
|
static uint64_t
|
|
|
|
blorp_emit_reloc(struct blorp_batch *batch,
|
|
|
|
void *location, struct blorp_address address, uint32_t delta);
|
|
|
|
|
2020-06-30 23:00:13 +01:00
|
|
|
static void
|
|
|
|
blorp_measure_start(struct blorp_batch *batch,
|
|
|
|
const struct blorp_params *params);
|
|
|
|
|
2021-11-26 16:22:40 +00:00
|
|
|
static void
|
|
|
|
blorp_measure_end(struct blorp_batch *batch,
|
|
|
|
const struct blorp_params *params);
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
static void *
|
|
|
|
blorp_alloc_dynamic_state(struct blorp_batch *batch,
|
|
|
|
uint32_t size,
|
|
|
|
uint32_t alignment,
|
|
|
|
uint32_t *offset);
|
2021-06-21 20:45:24 +01:00
|
|
|
|
|
|
|
UNUSED static void *
|
|
|
|
blorp_alloc_general_state(struct blorp_batch *batch,
|
|
|
|
uint32_t size,
|
|
|
|
uint32_t alignment,
|
|
|
|
uint32_t *offset);
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
static void *
|
|
|
|
blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
|
|
|
|
struct blorp_address *addr);
|
2018-06-01 00:39:16 +01:00
|
|
|
static void
|
|
|
|
blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
|
|
|
|
const struct blorp_address *addrs,
|
2019-11-25 18:42:42 +00:00
|
|
|
uint32_t *sizes,
|
2018-06-01 00:39:16 +01:00
|
|
|
unsigned num_vbs);
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2020-01-17 18:09:13 +00:00
|
|
|
UNUSED static struct blorp_address
|
2020-02-23 12:34:49 +00:00
|
|
|
blorp_get_workaround_address(struct blorp_batch *batch);
|
2017-06-05 22:19:28 +01:00
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
static void
|
|
|
|
blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
|
|
|
|
unsigned state_size, unsigned state_alignment,
|
2016-08-24 04:51:26 +01:00
|
|
|
uint32_t *bt_offset, uint32_t *surface_offsets,
|
2016-08-18 17:36:58 +01:00
|
|
|
void **surface_maps);
|
2017-02-20 19:03:04 +00:00
|
|
|
|
2020-05-11 19:49:55 +01:00
|
|
|
static uint32_t
|
|
|
|
blorp_binding_table_offset_to_pointer(struct blorp_batch *batch,
|
|
|
|
uint32_t offset);
|
|
|
|
|
2017-02-20 19:03:04 +00:00
|
|
|
static void
|
|
|
|
blorp_flush_range(struct blorp_batch *batch, void *start, size_t size);
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
static void
|
|
|
|
blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
|
|
|
|
struct blorp_address address, uint32_t delta);
|
|
|
|
|
blorp: Add blorp_get_surface_address to the driver interface.
Currently, BLORP expects drivers to provide two functions for dealing
with buffers: blorp_emit_reloc and blorp_surface_reloc. Both record a
relocation and combine the BO address and offset into a full 64-bit
address. Traditionally, blorp_surface_reloc has written that combined
address to an implicitly-known buffer where surface states are stored.
(In contrast, blorp_emit_reloc returns the value.)
The upcoming Iris driver stores surface states in multiple buffers,
which makes it impossible for blorp_surface_reloc to write the combined
address - it only takes an offset, not the actual buffer to write to.
This commit adds a third function, blorp_get_surface_address, which
combines and returns an address, which is then passed to ISL's surface
state fill functions. Softpin-only drivers can return a real address
here and skip writing it in blorp_surface_reloc. Relocation-based
drivers are have options. They can simply return 0 from the new
function, and continue writing the address from blorp_surface_reloc.
Or, they can return a presumed address from blorp_get_surface_address,
and have other relocation processing write the real value later.
For now, i965 and anv simply return 0.
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2019-01-09 21:31:18 +00:00
|
|
|
static uint64_t
|
|
|
|
blorp_get_surface_address(struct blorp_batch *batch,
|
|
|
|
struct blorp_address address);
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7 && GFX_VER < 10
|
2017-11-11 19:10:59 +00:00
|
|
|
static struct blorp_address
|
|
|
|
blorp_get_surface_base_address(struct blorp_batch *batch);
|
|
|
|
#endif
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2021-03-03 21:49:18 +00:00
|
|
|
static const struct intel_l3_config *
|
2020-01-17 18:09:13 +00:00
|
|
|
blorp_get_l3_config(struct blorp_batch *batch);
|
|
|
|
# else
|
2016-08-18 17:36:58 +01:00
|
|
|
static void
|
2017-05-13 04:24:46 +01:00
|
|
|
blorp_emit_urb_config(struct blorp_batch *batch,
|
|
|
|
unsigned vs_entry_size, unsigned sf_entry_size);
|
2020-01-17 18:09:13 +00:00
|
|
|
#endif
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-09-10 00:30:24 +01:00
|
|
|
static void
|
|
|
|
blorp_emit_pipeline(struct blorp_batch *batch,
|
|
|
|
const struct blorp_params *params);
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
/***** BEGIN blorp_exec implementation ******/
|
|
|
|
|
|
|
|
static uint64_t
|
2016-08-24 01:20:34 +01:00
|
|
|
_blorp_combine_address(struct blorp_batch *batch, void *location,
|
|
|
|
struct blorp_address address, uint32_t delta)
|
2016-08-18 17:36:58 +01:00
|
|
|
{
|
|
|
|
if (address.buffer == NULL) {
|
|
|
|
return address.offset + delta;
|
|
|
|
} else {
|
|
|
|
return blorp_emit_reloc(batch, location, address, delta);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-24 01:20:34 +01:00
|
|
|
#define __gen_address_type struct blorp_address
|
|
|
|
#define __gen_user_data struct blorp_batch
|
|
|
|
#define __gen_combine_address _blorp_combine_address
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
#include "genxml/genX_pack.h"
|
|
|
|
|
|
|
|
#define _blorp_cmd_length(cmd) cmd ## _length
|
|
|
|
#define _blorp_cmd_length_bias(cmd) cmd ## _length_bias
|
|
|
|
#define _blorp_cmd_header(cmd) cmd ## _header
|
|
|
|
#define _blorp_cmd_pack(cmd) cmd ## _pack
|
|
|
|
|
|
|
|
#define blorp_emit(batch, cmd, name) \
|
|
|
|
for (struct cmd name = { _blorp_cmd_header(cmd) }, \
|
|
|
|
*_dst = blorp_emit_dwords(batch, _blorp_cmd_length(cmd)); \
|
|
|
|
__builtin_expect(_dst != NULL, 1); \
|
|
|
|
_blorp_cmd_pack(cmd)(batch, (void *)_dst, &name), \
|
|
|
|
_dst = NULL)
|
|
|
|
|
2019-04-03 01:08:52 +01:00
|
|
|
#define blorp_emitn(batch, cmd, n, ...) ({ \
|
2017-03-09 13:22:25 +00:00
|
|
|
uint32_t *_dw = blorp_emit_dwords(batch, n); \
|
|
|
|
if (_dw) { \
|
|
|
|
struct cmd template = { \
|
|
|
|
_blorp_cmd_header(cmd), \
|
|
|
|
.DWordLength = n - _blorp_cmd_length_bias(cmd), \
|
2019-04-03 01:08:52 +01:00
|
|
|
__VA_ARGS__ \
|
2017-03-09 13:22:25 +00:00
|
|
|
}; \
|
|
|
|
_blorp_cmd_pack(cmd)(batch, _dw, &template); \
|
|
|
|
} \
|
|
|
|
_dw ? _dw + 1 : NULL; /* Array starts at dw[1] */ \
|
2016-08-18 17:36:58 +01:00
|
|
|
})
|
|
|
|
|
2016-09-10 22:15:51 +01:00
|
|
|
#define STRUCT_ZERO(S) ({ struct S t; memset(&t, 0, sizeof(t)); t; })
|
|
|
|
|
|
|
|
#define blorp_emit_dynamic(batch, state, name, align, offset) \
|
|
|
|
for (struct state name = STRUCT_ZERO(state), \
|
|
|
|
*_dst = blorp_alloc_dynamic_state(batch, \
|
|
|
|
_blorp_cmd_length(state) * 4, \
|
|
|
|
align, offset); \
|
|
|
|
__builtin_expect(_dst != NULL, 1); \
|
|
|
|
_blorp_cmd_pack(state)(batch, (void *)_dst, &name), \
|
|
|
|
blorp_flush_range(batch, _dst, _blorp_cmd_length(state) * 4), \
|
|
|
|
_dst = NULL)
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
/* 3DSTATE_URB
|
|
|
|
* 3DSTATE_URB_VS
|
|
|
|
* 3DSTATE_URB_HS
|
|
|
|
* 3DSTATE_URB_DS
|
|
|
|
* 3DSTATE_URB_GS
|
|
|
|
*
|
|
|
|
* Assign the entire URB to the VS. Even though the VS disabled, URB space
|
|
|
|
* is still needed because the clipper loads the VUE's from the URB. From
|
|
|
|
* the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
|
|
|
|
* Dword 1.15:0 "VS Number of URB Entries":
|
|
|
|
* This field is always used (even if VS Function Enable is DISABLED).
|
|
|
|
*
|
|
|
|
* The warning below appears in the PRM (Section 3DSTATE_URB), but we can
|
|
|
|
* safely ignore it because this batch contains only one draw call.
|
|
|
|
* Because of URB corruption caused by allocating a previous GS unit
|
|
|
|
* URB entry to the VS unit, software is required to send a “GS NULL
|
|
|
|
* Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
|
|
|
|
* plus a dummy DRAW call before any case where VS will be taking over
|
|
|
|
* GS URB space.
|
|
|
|
*
|
|
|
|
* If the 3DSTATE_URB_VS is emitted, than the others must be also.
|
|
|
|
* From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
|
|
|
|
*
|
|
|
|
* 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
|
|
|
|
* programmed in order for the programming of this state to be
|
|
|
|
* valid.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
emit_urb_config(struct blorp_batch *batch,
|
2020-01-17 20:13:28 +00:00
|
|
|
const struct blorp_params *params,
|
2021-03-03 21:49:18 +00:00
|
|
|
UNUSED enum intel_urb_deref_block_size *deref_block_size)
|
2016-08-18 17:36:58 +01:00
|
|
|
{
|
2016-08-29 16:51:10 +01:00
|
|
|
/* Once vertex fetcher has written full VUE entries with complete
|
|
|
|
* header the space requirement is as follows per vertex (in bytes):
|
|
|
|
*
|
|
|
|
* Header Position Program constants
|
|
|
|
* +--------+------------+-------------------+
|
|
|
|
* | 16 | 16 | n x 16 |
|
|
|
|
* +--------+------------+-------------------+
|
|
|
|
*
|
|
|
|
* where 'n' stands for number of varying inputs expressed as vec4s.
|
|
|
|
*/
|
|
|
|
const unsigned num_varyings =
|
|
|
|
params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
|
|
|
|
const unsigned total_needed = 16 + 16 + num_varyings * 16;
|
|
|
|
|
|
|
|
/* The URB size is expressed in units of 64 bytes (512 bits) */
|
|
|
|
const unsigned vs_entry_size = DIV_ROUND_UP(total_needed, 64);
|
|
|
|
|
2020-07-09 19:07:42 +01:00
|
|
|
ASSERTED const unsigned sf_entry_size =
|
2017-05-13 04:24:46 +01:00
|
|
|
params->sf_prog_data ? params->sf_prog_data->urb_entry_size : 0;
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2020-01-17 18:09:13 +00:00
|
|
|
assert(sf_entry_size == 0);
|
|
|
|
const unsigned entry_size[4] = { vs_entry_size, 1, 1, 1 };
|
|
|
|
|
|
|
|
unsigned entries[4], start[4];
|
2021-01-26 05:41:48 +00:00
|
|
|
bool constrained;
|
2021-03-03 21:49:18 +00:00
|
|
|
intel_get_urb_config(batch->blorp->compiler->devinfo,
|
2021-03-09 17:44:02 +00:00
|
|
|
blorp_get_l3_config(batch),
|
|
|
|
false, false, entry_size,
|
|
|
|
entries, start, deref_block_size, &constrained);
|
2020-01-17 18:09:13 +00:00
|
|
|
|
2021-03-16 17:09:00 +00:00
|
|
|
#if GFX_VERx10 == 70
|
2020-01-17 18:09:13 +00:00
|
|
|
/* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
|
|
|
|
*
|
|
|
|
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
|
|
|
|
* needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
|
|
|
|
* 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
|
|
|
|
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
|
|
|
|
* needs to be sent before any combination of VS associated 3DSTATE."
|
|
|
|
*/
|
|
|
|
blorp_emit(batch, GENX(PIPE_CONTROL), pc) {
|
|
|
|
pc.DepthStallEnable = true;
|
|
|
|
pc.PostSyncOperation = WriteImmediateData;
|
2020-02-23 12:34:49 +00:00
|
|
|
pc.Address = blorp_get_workaround_address(batch);
|
2020-01-17 18:09:13 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_URB_VS), urb) {
|
|
|
|
urb._3DCommandSubOpcode += i;
|
|
|
|
urb.VSURBStartingAddress = start[i];
|
|
|
|
urb.VSURBEntryAllocationSize = entry_size[i] - 1;
|
|
|
|
urb.VSNumberofURBEntries = entries[i];
|
|
|
|
}
|
|
|
|
}
|
2021-03-16 17:14:30 +00:00
|
|
|
#else /* GFX_VER < 7 */
|
2017-05-13 04:24:46 +01:00
|
|
|
blorp_emit_urb_config(batch, vs_entry_size, sf_entry_size);
|
2020-01-17 18:09:13 +00:00
|
|
|
#endif
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2018-06-05 01:27:53 +01:00
|
|
|
static void
|
|
|
|
blorp_emit_memcpy(struct blorp_batch *batch,
|
|
|
|
struct blorp_address dst,
|
|
|
|
struct blorp_address src,
|
|
|
|
uint32_t size);
|
|
|
|
#endif
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
static void
|
|
|
|
blorp_emit_vertex_data(struct blorp_batch *batch,
|
2016-08-19 13:43:29 +01:00
|
|
|
const struct blorp_params *params,
|
2016-08-18 17:36:58 +01:00
|
|
|
struct blorp_address *addr,
|
|
|
|
uint32_t *size)
|
|
|
|
{
|
|
|
|
const float vertices[] = {
|
2016-10-07 06:03:12 +01:00
|
|
|
/* v0 */ (float)params->x1, (float)params->y1, params->z,
|
|
|
|
/* v1 */ (float)params->x0, (float)params->y1, params->z,
|
|
|
|
/* v2 */ (float)params->x0, (float)params->y0, params->z,
|
2016-08-18 17:36:58 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), addr);
|
|
|
|
memcpy(data, vertices, sizeof(vertices));
|
|
|
|
*size = sizeof(vertices);
|
2017-02-20 19:03:04 +00:00
|
|
|
blorp_flush_range(batch, data, *size);
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
blorp_emit_input_varying_data(struct blorp_batch *batch,
|
2016-08-19 13:43:29 +01:00
|
|
|
const struct blorp_params *params,
|
2016-08-18 17:36:58 +01:00
|
|
|
struct blorp_address *addr,
|
|
|
|
uint32_t *size)
|
|
|
|
{
|
|
|
|
const unsigned vec4_size_in_bytes = 4 * sizeof(float);
|
|
|
|
const unsigned max_num_varyings =
|
|
|
|
DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
|
2016-10-21 22:32:03 +01:00
|
|
|
const unsigned num_varyings =
|
|
|
|
params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-10-21 22:32:03 +01:00
|
|
|
*size = 16 + num_varyings * vec4_size_in_bytes;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-10-21 22:22:10 +01:00
|
|
|
const uint32_t *const inputs_src = (const uint32_t *)¶ms->wm_inputs;
|
2017-02-20 19:03:04 +00:00
|
|
|
void *data = blorp_alloc_vertex_buffer(batch, *size, addr);
|
|
|
|
uint32_t *inputs = data;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-10-21 21:10:52 +01:00
|
|
|
/* Copy in the VS inputs */
|
|
|
|
assert(sizeof(params->vs_inputs) == 16);
|
|
|
|
memcpy(inputs, ¶ms->vs_inputs, sizeof(params->vs_inputs));
|
2016-10-21 22:32:03 +01:00
|
|
|
inputs += 4;
|
|
|
|
|
|
|
|
if (params->wm_prog_data) {
|
|
|
|
/* Walk over the attribute slots, determine if the attribute is used by
|
|
|
|
* the program and when necessary copy the values from the input storage
|
|
|
|
* to the vertex data buffer.
|
|
|
|
*/
|
|
|
|
for (unsigned i = 0; i < max_num_varyings; i++) {
|
|
|
|
const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-10-21 22:32:03 +01:00
|
|
|
const int input_index = params->wm_prog_data->urb_setup[attr];
|
|
|
|
if (input_index < 0)
|
|
|
|
continue;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-10-21 22:32:03 +01:00
|
|
|
memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-10-21 22:32:03 +01:00
|
|
|
inputs += 4;
|
|
|
|
}
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
2017-02-20 19:03:04 +00:00
|
|
|
|
|
|
|
blorp_flush_range(batch, data, *size);
|
2018-06-05 01:27:53 +01:00
|
|
|
|
|
|
|
if (params->dst_clear_color_as_input) {
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2018-06-05 01:27:53 +01:00
|
|
|
/* In this case, the clear color isn't known statically and instead
|
|
|
|
* comes in through an indirect which we have to copy into the vertex
|
|
|
|
* buffer before we execute the 3DPRIMITIVE. We already copied the
|
|
|
|
* value of params->wm_inputs.clear_color into the vertex buffer in the
|
|
|
|
* loop above. Now we emit code to stomp it from the GPU with the
|
|
|
|
* actual clear color value.
|
|
|
|
*/
|
|
|
|
assert(num_varyings == 1);
|
|
|
|
|
|
|
|
/* The clear color is the first thing after the header */
|
|
|
|
struct blorp_address clear_color_input_addr = *addr;
|
|
|
|
clear_color_input_addr.offset += 16;
|
|
|
|
|
|
|
|
const unsigned clear_color_size =
|
2021-03-16 17:14:30 +00:00
|
|
|
GFX_VER < 10 ? batch->blorp->isl_dev->ss.clear_value_size : 4 * 4;
|
2018-06-05 01:27:53 +01:00
|
|
|
blorp_emit_memcpy(batch, clear_color_input_addr,
|
|
|
|
params->dst.clear_color_addr,
|
|
|
|
clear_color_size);
|
|
|
|
#else
|
|
|
|
unreachable("MCS partial resolve is not a thing on SNB and earlier");
|
|
|
|
#endif
|
|
|
|
}
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2020-09-24 20:19:39 +01:00
|
|
|
blorp_fill_vertex_buffer_state(struct GENX(VERTEX_BUFFER_STATE) *vb,
|
2017-11-11 21:40:03 +00:00
|
|
|
unsigned idx,
|
|
|
|
struct blorp_address addr, uint32_t size,
|
|
|
|
uint32_t stride)
|
2016-08-18 17:36:58 +01:00
|
|
|
{
|
2017-11-11 21:40:03 +00:00
|
|
|
vb[idx].VertexBufferIndex = idx;
|
|
|
|
vb[idx].BufferStartingAddress = addr;
|
|
|
|
vb[idx].BufferPitch = stride;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 6
|
genxml: Consistently use a numeric "MOCS" field
When we first started using genxml, we decided to represent MOCS as an
actual structure, and pack values. However, in many places, it was more
convenient to use a numeric value rather than treating it as a struct,
so we added secondary setters in a bunch of places as well.
We were not entirely consistent, either. Some places only had one.
Gen6 had both kinds of setters for STATE_BASE_ADDRESS, but newer gens
only had the struct-based setters. The names were sometimes "Constant
Buffer Object Control State" instead of "Memory", making it harder to
find. Many had prefixes like "Vertex Buffer MOCS"...in a vertex buffer
packet...which is a bit redundant.
On modern hardware, MOCS is simply an index into a table, but we were
still carrying around the structure with an "Index to MOCS Table" field,
in addition to the direct numeric setters. This is clunky - we really
just want a number on new hardware.
This patch eliminates the struct-based setters, and makes the numeric
setters be consistently called "MOCS". We leave the struct definition
around on Gen7-8 for reference purposes, but it is unused.
v2: Drop bonus "Depth Buffer MOCS" fields on Gen7.5 and Gen9
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
2018-12-11 08:34:11 +00:00
|
|
|
vb[idx].MOCS = addr.mocs;
|
2016-08-18 17:36:58 +01:00
|
|
|
#endif
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2017-11-11 21:40:03 +00:00
|
|
|
vb[idx].AddressModifyEnable = true;
|
2016-08-18 17:36:58 +01:00
|
|
|
#endif
|
2017-11-11 21:40:03 +00:00
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2017-11-11 21:40:03 +00:00
|
|
|
vb[idx].BufferSize = size;
|
2021-03-16 17:14:30 +00:00
|
|
|
#elif GFX_VER >= 5
|
2017-11-11 21:40:03 +00:00
|
|
|
vb[idx].BufferAccessType = stride > 0 ? VERTEXDATA : INSTANCEDATA;
|
|
|
|
vb[idx].EndAddress = vb[idx].BufferStartingAddress;
|
|
|
|
vb[idx].EndAddress.offset += size - 1;
|
2021-03-16 17:14:30 +00:00
|
|
|
#elif GFX_VER == 4
|
2017-11-11 21:40:03 +00:00
|
|
|
vb[idx].BufferAccessType = stride > 0 ? VERTEXDATA : INSTANCEDATA;
|
|
|
|
vb[idx].MaxIndex = stride > 0 ? size / stride : 0;
|
2016-08-18 17:36:58 +01:00
|
|
|
#endif
|
2021-03-24 16:08:58 +00:00
|
|
|
|
|
|
|
#if GFX_VER >= 12
|
|
|
|
vb[idx].L3BypassDisable = true;
|
|
|
|
#endif
|
2017-11-11 21:40:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
blorp_emit_vertex_buffers(struct blorp_batch *batch,
|
|
|
|
const struct blorp_params *params)
|
|
|
|
{
|
2017-11-11 22:28:17 +00:00
|
|
|
struct GENX(VERTEX_BUFFER_STATE) vb[3];
|
2018-06-05 01:27:53 +01:00
|
|
|
uint32_t num_vbs = 2;
|
2017-11-11 21:40:03 +00:00
|
|
|
memset(vb, 0, sizeof(vb));
|
|
|
|
|
2018-06-01 00:39:16 +01:00
|
|
|
struct blorp_address addrs[2] = {};
|
2019-11-25 18:42:42 +00:00
|
|
|
uint32_t sizes[2];
|
|
|
|
blorp_emit_vertex_data(batch, params, &addrs[0], &sizes[0]);
|
2020-09-24 20:19:39 +01:00
|
|
|
blorp_fill_vertex_buffer_state(vb, 0, addrs[0], sizes[0],
|
2018-06-01 00:39:16 +01:00
|
|
|
3 * sizeof(float));
|
2017-11-11 21:40:03 +00:00
|
|
|
|
2019-11-25 18:42:42 +00:00
|
|
|
blorp_emit_input_varying_data(batch, params, &addrs[1], &sizes[1]);
|
2020-09-24 20:19:39 +01:00
|
|
|
blorp_fill_vertex_buffer_state(vb, 1, addrs[1], sizes[1], 0);
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2019-11-25 18:42:42 +00:00
|
|
|
blorp_vf_invalidate_for_vb_48b_transitions(batch, addrs, sizes, num_vbs);
|
2019-01-03 16:14:50 +00:00
|
|
|
|
2017-11-11 22:28:17 +00:00
|
|
|
const unsigned num_dwords = 1 + num_vbs * GENX(VERTEX_BUFFER_STATE_length);
|
2016-08-18 17:36:58 +01:00
|
|
|
uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
|
2017-03-09 13:22:25 +00:00
|
|
|
if (!dw)
|
|
|
|
return;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2017-11-11 22:28:17 +00:00
|
|
|
for (unsigned i = 0; i < num_vbs; i++) {
|
2016-08-18 17:36:58 +01:00
|
|
|
GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]);
|
|
|
|
dw += GENX(VERTEX_BUFFER_STATE_length);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
blorp_emit_vertex_elements(struct blorp_batch *batch,
|
2016-08-19 13:43:29 +01:00
|
|
|
const struct blorp_params *params)
|
2016-08-18 17:36:58 +01:00
|
|
|
{
|
|
|
|
const unsigned num_varyings =
|
|
|
|
params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
|
2021-03-29 22:41:58 +01:00
|
|
|
bool need_ndc = batch->blorp->compiler->devinfo->ver <= 5;
|
2016-09-10 00:30:24 +01:00
|
|
|
const unsigned num_elements = 2 + need_ndc + num_varyings;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements];
|
|
|
|
memset(ve, 0, num_elements * sizeof(*ve));
|
|
|
|
|
|
|
|
/* Setup VBO for the rectangle primitive..
|
|
|
|
*
|
|
|
|
* A rectangle primitive (3DPRIM_RECTLIST) consists of only three
|
|
|
|
* vertices. The vertices reside in screen space with DirectX
|
|
|
|
* coordinates (that is, (0, 0) is the upper left corner).
|
|
|
|
*
|
|
|
|
* v2 ------ implied
|
|
|
|
* | |
|
|
|
|
* | |
|
2016-10-05 17:32:52 +01:00
|
|
|
* v1 ----- v0
|
2016-08-18 17:36:58 +01:00
|
|
|
*
|
|
|
|
* Since the VS is disabled, the clipper loads each VUE directly from
|
|
|
|
* the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
|
|
|
|
* 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
|
|
|
|
* dw0: Reserved, MBZ.
|
2016-08-26 15:37:44 +01:00
|
|
|
* dw1: Render Target Array Index. Below vertex fetcher gets programmed
|
|
|
|
* to assign this with primitive instance identifier which will be
|
|
|
|
* used for layered clears. All other renders have only one instance
|
|
|
|
* and therefore the value will be effectively zero.
|
2016-08-18 17:36:58 +01:00
|
|
|
* dw2: Viewport Index. The HiZ op disables viewport mapping and
|
|
|
|
* scissoring, so set the dword to 0.
|
|
|
|
* dw3: Point Width: The HiZ op does not emit the POINTLIST primitive,
|
|
|
|
* so set the dword to 0.
|
|
|
|
* dw4: Vertex Position X.
|
|
|
|
* dw5: Vertex Position Y.
|
|
|
|
* dw6: Vertex Position Z.
|
|
|
|
* dw7: Vertex Position W.
|
|
|
|
*
|
|
|
|
* dw8: Flat vertex input 0
|
|
|
|
* dw9: Flat vertex input 1
|
|
|
|
* ...
|
|
|
|
* dwn: Flat vertex input n - 8
|
|
|
|
*
|
|
|
|
* For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
|
|
|
|
* "Vertex URB Entry (VUE) Formats".
|
|
|
|
*
|
|
|
|
* Only vertex position X and Y are going to be variable, Z is fixed to
|
2016-08-26 15:37:44 +01:00
|
|
|
* zero and W to one. Header words dw0,2,3 are zero. There is no need to
|
2016-08-18 17:36:58 +01:00
|
|
|
* include the fixed values in the vertex buffer. Vertex fetcher can be
|
|
|
|
* instructed to fill vertex elements with constant values of one and zero
|
|
|
|
* instead of reading them from the buffer.
|
|
|
|
* Flat inputs are program constants that are not interpolated. Moreover
|
|
|
|
* their values will be the same between vertices.
|
|
|
|
*
|
|
|
|
* See the vertex element setup below.
|
|
|
|
*/
|
2017-05-13 00:22:58 +01:00
|
|
|
unsigned slot = 0;
|
|
|
|
|
|
|
|
ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {
|
|
|
|
.VertexBufferIndex = 1,
|
|
|
|
.Valid = true,
|
2018-02-14 02:13:51 +00:00
|
|
|
.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
|
2017-05-13 00:22:58 +01:00
|
|
|
.SourceElementOffset = 0,
|
|
|
|
.Component0Control = VFCOMP_STORE_SRC,
|
|
|
|
|
2021-03-29 23:46:12 +01:00
|
|
|
/* From Gfx8 onwards hardware is no more instructed to overwrite
|
2017-05-13 00:22:58 +01:00
|
|
|
* components using an element specifier. Instead one has separate
|
|
|
|
* 3DSTATE_VF_SGVS (System Generated Value Setup) state packet for it.
|
|
|
|
*/
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2017-05-13 00:22:58 +01:00
|
|
|
.Component1Control = VFCOMP_STORE_0,
|
2021-03-16 17:14:30 +00:00
|
|
|
#elif GFX_VER >= 5
|
2017-05-13 00:22:58 +01:00
|
|
|
.Component1Control = VFCOMP_STORE_IID,
|
2017-03-18 17:33:25 +00:00
|
|
|
#else
|
2017-05-13 00:22:58 +01:00
|
|
|
.Component1Control = VFCOMP_STORE_0,
|
2016-08-26 15:37:44 +01:00
|
|
|
#endif
|
2017-08-23 22:13:05 +01:00
|
|
|
.Component2Control = VFCOMP_STORE_0,
|
|
|
|
.Component3Control = VFCOMP_STORE_0,
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER <= 5
|
2016-09-10 00:30:24 +01:00
|
|
|
.DestinationElementOffset = slot * 4,
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
slot++;
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER <= 5
|
2016-09-10 00:30:24 +01:00
|
|
|
/* On Iron Lake and earlier, a native device coordinates version of the
|
|
|
|
* position goes right after the normal VUE header and before position.
|
|
|
|
* Since w == 1 for all of our coordinates, this is just a copy of the
|
|
|
|
* position.
|
|
|
|
*/
|
|
|
|
ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {
|
|
|
|
.VertexBufferIndex = 0,
|
|
|
|
.Valid = true,
|
2018-02-14 02:13:51 +00:00
|
|
|
.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT,
|
2016-09-10 00:30:24 +01:00
|
|
|
.SourceElementOffset = 0,
|
|
|
|
.Component0Control = VFCOMP_STORE_SRC,
|
|
|
|
.Component1Control = VFCOMP_STORE_SRC,
|
|
|
|
.Component2Control = VFCOMP_STORE_SRC,
|
|
|
|
.Component3Control = VFCOMP_STORE_1_FP,
|
|
|
|
.DestinationElementOffset = slot * 4,
|
2017-05-13 00:22:58 +01:00
|
|
|
};
|
|
|
|
slot++;
|
2016-09-10 00:30:24 +01:00
|
|
|
#endif
|
2017-05-13 00:22:58 +01:00
|
|
|
|
|
|
|
ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {
|
|
|
|
.VertexBufferIndex = 0,
|
|
|
|
.Valid = true,
|
2018-02-14 02:13:51 +00:00
|
|
|
.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT,
|
2017-05-13 00:22:58 +01:00
|
|
|
.SourceElementOffset = 0,
|
|
|
|
.Component0Control = VFCOMP_STORE_SRC,
|
|
|
|
.Component1Control = VFCOMP_STORE_SRC,
|
|
|
|
.Component2Control = VFCOMP_STORE_SRC,
|
|
|
|
.Component3Control = VFCOMP_STORE_1_FP,
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER <= 5
|
2016-09-10 00:30:24 +01:00
|
|
|
.DestinationElementOffset = slot * 4,
|
|
|
|
#endif
|
2017-05-13 00:22:58 +01:00
|
|
|
};
|
|
|
|
slot++;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2018-06-05 01:27:53 +01:00
|
|
|
for (unsigned i = 0; i < num_varyings; ++i) {
|
2017-05-13 00:22:58 +01:00
|
|
|
ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {
|
2018-06-05 01:27:53 +01:00
|
|
|
.VertexBufferIndex = 1,
|
2017-05-13 00:22:58 +01:00
|
|
|
.Valid = true,
|
2018-02-14 02:13:51 +00:00
|
|
|
.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
|
2018-06-05 01:27:53 +01:00
|
|
|
.SourceElementOffset = 16 + i * 4 * sizeof(float),
|
|
|
|
.Component0Control = VFCOMP_STORE_SRC,
|
2017-05-13 00:22:58 +01:00
|
|
|
.Component1Control = VFCOMP_STORE_SRC,
|
|
|
|
.Component2Control = VFCOMP_STORE_SRC,
|
|
|
|
.Component3Control = VFCOMP_STORE_SRC,
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER <= 5
|
2018-06-05 01:27:53 +01:00
|
|
|
.DestinationElementOffset = slot * 4,
|
2016-09-10 00:30:24 +01:00
|
|
|
#endif
|
2017-05-13 00:22:58 +01:00
|
|
|
};
|
|
|
|
slot++;
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
const unsigned num_dwords =
|
|
|
|
1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements;
|
|
|
|
uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_ELEMENTS), num_dwords);
|
2017-03-09 13:22:25 +00:00
|
|
|
if (!dw)
|
|
|
|
return;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
for (unsigned i = 0; i < num_elements; i++) {
|
|
|
|
GENX(VERTEX_ELEMENT_STATE_pack)(batch, dw, &ve[i]);
|
|
|
|
dw += GENX(VERTEX_ELEMENT_STATE_length);
|
|
|
|
}
|
|
|
|
|
2019-04-12 19:55:38 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_VF_STATISTICS), vf) {
|
|
|
|
vf.StatisticsEnable = false;
|
|
|
|
}
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2016-08-26 15:37:44 +01:00
|
|
|
/* Overwrite Render Target Array Index (2nd dword) in the VUE header with
|
|
|
|
* primitive instance identifier. This is used for layered clears.
|
|
|
|
*/
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
|
|
|
|
sgvs.InstanceIDEnable = true;
|
|
|
|
sgvs.InstanceIDComponentNumber = COMP_1;
|
|
|
|
sgvs.InstanceIDElementOffset = 0;
|
|
|
|
}
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
for (unsigned i = 0; i < num_elements; i++) {
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) {
|
|
|
|
vf.VertexElementIndex = i;
|
|
|
|
vf.InstancingEnable = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
|
|
|
|
topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2017-05-12 05:05:49 +01:00
|
|
|
/* 3DSTATE_VIEWPORT_STATE_POINTERS */
|
2017-03-18 17:33:25 +00:00
|
|
|
static uint32_t
|
2018-02-17 03:00:21 +00:00
|
|
|
blorp_emit_cc_viewport(struct blorp_batch *batch)
|
2017-05-12 05:05:49 +01:00
|
|
|
{
|
|
|
|
uint32_t cc_vp_offset;
|
|
|
|
blorp_emit_dynamic(batch, GENX(CC_VIEWPORT), vp, 32, &cc_vp_offset) {
|
|
|
|
vp.MinimumDepth = 0.0;
|
|
|
|
vp.MaximumDepth = 1.0;
|
|
|
|
}
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2017-05-12 05:05:49 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
|
|
|
|
vsp.CCViewportPointer = cc_vp_offset;
|
|
|
|
}
|
2021-03-16 17:14:30 +00:00
|
|
|
#elif GFX_VER == 6
|
2017-05-12 05:05:49 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
|
|
|
|
vsp.CCViewportStateChange = true;
|
|
|
|
vsp.PointertoCC_VIEWPORT = cc_vp_offset;
|
|
|
|
}
|
|
|
|
#endif
|
2017-03-18 17:33:25 +00:00
|
|
|
|
|
|
|
return cc_vp_offset;
|
2017-05-12 05:05:49 +01:00
|
|
|
}
|
|
|
|
|
2017-03-18 17:33:25 +00:00
|
|
|
static uint32_t
|
2018-02-17 03:00:21 +00:00
|
|
|
blorp_emit_sampler_state(struct blorp_batch *batch)
|
2017-05-12 05:05:49 +01:00
|
|
|
{
|
|
|
|
uint32_t offset;
|
|
|
|
blorp_emit_dynamic(batch, GENX(SAMPLER_STATE), sampler, 32, &offset) {
|
|
|
|
sampler.MipModeFilter = MIPFILTER_NONE;
|
|
|
|
sampler.MagModeFilter = MAPFILTER_LINEAR;
|
|
|
|
sampler.MinModeFilter = MAPFILTER_LINEAR;
|
|
|
|
sampler.MinLOD = 0;
|
|
|
|
sampler.MaxLOD = 0;
|
|
|
|
sampler.TCXAddressControlMode = TCM_CLAMP;
|
|
|
|
sampler.TCYAddressControlMode = TCM_CLAMP;
|
|
|
|
sampler.TCZAddressControlMode = TCM_CLAMP;
|
|
|
|
sampler.MaximumAnisotropy = RATIO21;
|
|
|
|
sampler.RAddressMinFilterRoundingEnable = true;
|
|
|
|
sampler.RAddressMagFilterRoundingEnable = true;
|
|
|
|
sampler.VAddressMinFilterRoundingEnable = true;
|
|
|
|
sampler.VAddressMagFilterRoundingEnable = true;
|
|
|
|
sampler.UAddressMinFilterRoundingEnable = true;
|
|
|
|
sampler.UAddressMagFilterRoundingEnable = true;
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER > 6
|
2017-05-12 05:05:49 +01:00
|
|
|
sampler.NonnormalizedCoordinateEnable = true;
|
2017-03-18 17:33:25 +00:00
|
|
|
#endif
|
2017-05-12 05:05:49 +01:00
|
|
|
}
|
|
|
|
|
2018-10-26 20:45:44 +01:00
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
UNUSED static uint32_t
|
|
|
|
blorp_emit_sampler_state_ps(struct blorp_batch *batch)
|
|
|
|
{
|
|
|
|
uint32_t offset = blorp_emit_sampler_state(batch);
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2017-05-12 05:05:49 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
|
|
|
|
ssp.PointertoPSSamplerState = offset;
|
|
|
|
}
|
2021-03-16 17:14:30 +00:00
|
|
|
#elif GFX_VER == 6
|
2017-05-12 05:05:49 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
|
|
|
|
ssp.VSSamplerStateChange = true;
|
|
|
|
ssp.GSSamplerStateChange = true;
|
|
|
|
ssp.PSSamplerStateChange = true;
|
|
|
|
ssp.PointertoPSSamplerState = offset;
|
|
|
|
}
|
|
|
|
#endif
|
2017-03-18 17:33:25 +00:00
|
|
|
|
|
|
|
return offset;
|
2017-05-12 05:05:49 +01:00
|
|
|
}
|
|
|
|
|
2017-03-18 17:33:25 +00:00
|
|
|
/* What follows is the code for setting up a "pipeline" on Sandy Bridge and
|
2021-03-29 23:40:04 +01:00
|
|
|
* later hardware. This file will be included by i965 for gfx4-5 as well, so
|
2021-03-16 17:14:30 +00:00
|
|
|
* this code is guarded by GFX_VER >= 6.
|
2017-03-18 17:33:25 +00:00
|
|
|
*/
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 6
|
2017-03-18 17:33:25 +00:00
|
|
|
|
2016-10-21 21:10:52 +01:00
|
|
|
static void
|
|
|
|
blorp_emit_vs_config(struct blorp_batch *batch,
|
|
|
|
const struct blorp_params *params)
|
|
|
|
{
|
|
|
|
struct brw_vs_prog_data *vs_prog_data = params->vs_prog_data;
|
2021-03-16 17:14:30 +00:00
|
|
|
assert(!vs_prog_data || GFX_VER < 11 ||
|
2018-02-07 00:47:04 +00:00
|
|
|
vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8);
|
2016-10-21 21:10:52 +01:00
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_VS), vs) {
|
|
|
|
if (vs_prog_data) {
|
2017-03-22 23:26:07 +00:00
|
|
|
vs.Enable = true;
|
2016-10-21 21:10:52 +01:00
|
|
|
|
|
|
|
vs.KernelStartPointer = params->vs_prog_kernel;
|
|
|
|
|
|
|
|
vs.DispatchGRFStartRegisterForURBData =
|
|
|
|
vs_prog_data->base.base.dispatch_grf_start_reg;
|
|
|
|
vs.VertexURBEntryReadLength =
|
|
|
|
vs_prog_data->base.urb_read_length;
|
|
|
|
vs.VertexURBEntryReadOffset = 0;
|
|
|
|
|
|
|
|
vs.MaximumNumberofThreads =
|
|
|
|
batch->blorp->isl_dev->info->max_vs_threads - 1;
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2016-10-21 21:10:52 +01:00
|
|
|
vs.SIMD8DispatchEnable =
|
|
|
|
vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
static void
|
|
|
|
blorp_emit_sf_config(struct blorp_batch *batch,
|
2020-01-17 20:13:28 +00:00
|
|
|
const struct blorp_params *params,
|
2021-03-03 21:49:18 +00:00
|
|
|
UNUSED enum intel_urb_deref_block_size urb_deref_block_size)
|
2016-08-18 17:36:58 +01:00
|
|
|
{
|
2016-11-01 21:03:43 +00:00
|
|
|
const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
/* 3DSTATE_SF
|
|
|
|
*
|
|
|
|
* Disable ViewportTransformEnable (dw2.1)
|
|
|
|
*
|
|
|
|
* From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
|
|
|
|
* Primitives Overview":
|
|
|
|
* RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
|
|
|
|
* use of screen- space coordinates).
|
|
|
|
*
|
|
|
|
* A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
|
|
|
|
* and BackFaceFillMode (dw2.5:6) to SOLID(0).
|
|
|
|
*
|
|
|
|
* From the Sandy Bridge PRM, Volume 2, Part 1, Section
|
|
|
|
* 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
|
|
|
|
* SOLID: Any triangle or rectangle object found to be front-facing
|
|
|
|
* is rendered as a solid object. This setting is required when
|
|
|
|
* (rendering rectangle (RECTLIST) objects.
|
|
|
|
*/
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2020-01-16 23:59:43 +00:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_SF), sf) {
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 12
|
2020-01-17 20:13:28 +00:00
|
|
|
sf.DerefBlockSize = urb_deref_block_size;
|
2020-01-16 23:59:43 +00:00
|
|
|
#endif
|
|
|
|
}
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_RASTER), raster) {
|
|
|
|
raster.CullMode = CULLMODE_NONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
|
2016-08-24 00:49:08 +01:00
|
|
|
sbe.VertexURBEntryReadOffset = 1;
|
2016-10-07 07:44:57 +01:00
|
|
|
if (prog_data) {
|
|
|
|
sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
|
|
|
|
sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
|
|
|
|
sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
|
|
|
|
} else {
|
|
|
|
sbe.NumberofSFOutputAttributes = 0;
|
|
|
|
sbe.VertexURBEntryReadLength = 1;
|
|
|
|
}
|
2016-08-18 17:36:58 +01:00
|
|
|
sbe.ForceVertexURBEntryReadLength = true;
|
|
|
|
sbe.ForceVertexURBEntryReadOffset = true;
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 9
|
2016-08-18 17:36:58 +01:00
|
|
|
for (unsigned i = 0; i < 32; i++)
|
|
|
|
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#elif GFX_VER >= 7
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_SF), sf) {
|
|
|
|
sf.FrontFaceFillMode = FILL_MODE_SOLID;
|
|
|
|
sf.BackFaceFillMode = FILL_MODE_SOLID;
|
|
|
|
|
2016-10-21 18:40:58 +01:00
|
|
|
sf.MultisampleRasterizationMode = params->num_samples > 1 ?
|
2016-08-18 17:36:58 +01:00
|
|
|
MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER == 7
|
2016-08-18 17:36:58 +01:00
|
|
|
sf.DepthBufferSurfaceFormat = params->depth_format;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
|
2016-08-24 00:49:08 +01:00
|
|
|
sbe.VertexURBEntryReadOffset = 1;
|
2016-08-18 17:36:58 +01:00
|
|
|
if (prog_data) {
|
|
|
|
sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
|
|
|
|
sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
|
|
|
|
sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
|
|
|
|
} else {
|
|
|
|
sbe.NumberofSFOutputAttributes = 0;
|
|
|
|
sbe.VertexURBEntryReadLength = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#else /* GFX_VER <= 6 */
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_SF), sf) {
|
|
|
|
sf.FrontFaceFillMode = FILL_MODE_SOLID;
|
|
|
|
sf.BackFaceFillMode = FILL_MODE_SOLID;
|
|
|
|
|
2016-10-21 18:40:58 +01:00
|
|
|
sf.MultisampleRasterizationMode = params->num_samples > 1 ?
|
2016-08-18 17:36:58 +01:00
|
|
|
MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
|
|
|
|
|
2016-08-24 00:49:08 +01:00
|
|
|
sf.VertexURBEntryReadOffset = 1;
|
2016-08-18 17:36:58 +01:00
|
|
|
if (prog_data) {
|
|
|
|
sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
|
|
|
|
sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
|
|
|
|
sf.ConstantInterpolationEnable = prog_data->flat_inputs;
|
|
|
|
} else {
|
|
|
|
sf.NumberofSFOutputAttributes = 0;
|
|
|
|
sf.VertexURBEntryReadLength = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#endif /* GFX_VER */
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
blorp_emit_ps_config(struct blorp_batch *batch,
|
2016-08-19 13:43:29 +01:00
|
|
|
const struct blorp_params *params)
|
2016-08-18 17:36:58 +01:00
|
|
|
{
|
2016-11-01 21:03:43 +00:00
|
|
|
const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
/* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
|
|
|
|
* nonzero to prevent the GPU from hanging. While the documentation doesn't
|
|
|
|
* mention this explicitly, it notes that the valid range for the field is
|
|
|
|
* [1,39] = [2,40] threads, which excludes zero.
|
|
|
|
*
|
|
|
|
* To be safe (and to minimize extraneous code) we go ahead and fully
|
|
|
|
* configure the WM state whether or not there is a WM program.
|
|
|
|
*/
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_WM), wm);
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_PS), ps) {
|
2016-10-08 01:17:05 +01:00
|
|
|
if (params->src.enabled) {
|
2016-08-18 17:36:58 +01:00
|
|
|
ps.SamplerCount = 1; /* Up to 4 samplers */
|
|
|
|
ps.BindingTableEntryCount = 2;
|
|
|
|
} else {
|
|
|
|
ps.BindingTableEntryCount = 1;
|
|
|
|
}
|
|
|
|
|
2021-03-30 01:24:46 +01:00
|
|
|
/* SAMPLER_STATE prefetching is broken on Gfx11 - Wa_1606682166 */
|
2021-03-16 17:14:30 +00:00
|
|
|
if (GFX_VER == 11)
|
2019-06-24 23:09:51 +01:00
|
|
|
ps.SamplerCount = 0;
|
|
|
|
|
2016-10-07 07:44:57 +01:00
|
|
|
if (prog_data) {
|
|
|
|
ps._8PixelDispatchEnable = prog_data->dispatch_8;
|
|
|
|
ps._16PixelDispatchEnable = prog_data->dispatch_16;
|
2018-05-18 07:26:02 +01:00
|
|
|
ps._32PixelDispatchEnable = prog_data->dispatch_32;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2018-05-19 00:39:21 +01:00
|
|
|
/* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:
|
|
|
|
*
|
|
|
|
* "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
|
|
|
|
* Dispatch must not be enabled for PER_PIXEL dispatch mode."
|
|
|
|
*
|
|
|
|
* Since 16x MSAA is first introduced on SKL, we don't need to apply
|
|
|
|
* the workaround on any older hardware.
|
|
|
|
*/
|
2021-03-16 17:14:30 +00:00
|
|
|
if (GFX_VER >= 9 && !prog_data->persample_dispatch &&
|
2018-05-19 00:39:21 +01:00
|
|
|
params->num_samples == 16) {
|
|
|
|
assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
|
|
|
|
ps._32PixelDispatchEnable = false;
|
|
|
|
}
|
|
|
|
|
2018-05-18 07:17:17 +01:00
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
|
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
|
|
|
|
|
|
|
|
ps.KernelStartPointer0 = params->wm_prog_kernel +
|
|
|
|
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
|
|
|
|
ps.KernelStartPointer1 = params->wm_prog_kernel +
|
|
|
|
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
|
|
|
|
ps.KernelStartPointer2 = params->wm_prog_kernel +
|
|
|
|
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
|
2016-10-07 07:44:57 +01:00
|
|
|
}
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2017-05-05 21:55:10 +01:00
|
|
|
/* 3DSTATE_PS expects the number of threads per PSD, which is always 64
|
2021-03-29 23:46:12 +01:00
|
|
|
* for pre Gfx11 and 128 for gfx11+; On gfx11+ If a programmed value is
|
2017-05-05 21:55:10 +01:00
|
|
|
* k, it implies 2(k+1) threads. It implicitly scales for different GT
|
|
|
|
* levels (which have some # of PSDs).
|
2016-08-18 17:36:58 +01:00
|
|
|
*
|
2021-03-29 23:46:12 +01:00
|
|
|
* In Gfx8 the format is U8-2 whereas in Gfx9+ it is U9-1.
|
2016-08-18 17:36:58 +01:00
|
|
|
*/
|
2021-11-02 00:22:51 +00:00
|
|
|
const struct intel_device_info *devinfo = batch->blorp->compiler->devinfo;
|
|
|
|
ps.MaximumNumberofThreadsPerPSD =
|
|
|
|
devinfo->max_threads_per_psd - (GFX_VER == 8 ? 2 : 1);
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
switch (params->fast_clear_op) {
|
2018-01-19 23:02:07 +00:00
|
|
|
case ISL_AUX_OP_NONE:
|
2016-08-19 10:23:04 +01:00
|
|
|
break;
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 10
|
2018-05-15 23:28:05 +01:00
|
|
|
case ISL_AUX_OP_AMBIGUATE:
|
|
|
|
ps.RenderTargetFastClearEnable = true;
|
|
|
|
ps.RenderTargetResolveType = FAST_CLEAR_0;
|
|
|
|
break;
|
|
|
|
#endif
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 9
|
2018-01-19 23:02:07 +00:00
|
|
|
case ISL_AUX_OP_PARTIAL_RESOLVE:
|
2016-08-18 17:36:58 +01:00
|
|
|
ps.RenderTargetResolveType = RESOLVE_PARTIAL;
|
|
|
|
break;
|
2018-01-19 23:02:07 +00:00
|
|
|
case ISL_AUX_OP_FULL_RESOLVE:
|
2016-08-18 17:36:58 +01:00
|
|
|
ps.RenderTargetResolveType = RESOLVE_FULL;
|
|
|
|
break;
|
|
|
|
#else
|
2018-01-19 23:02:07 +00:00
|
|
|
case ISL_AUX_OP_FULL_RESOLVE:
|
2016-08-18 17:36:58 +01:00
|
|
|
ps.RenderTargetResolveEnable = true;
|
|
|
|
break;
|
|
|
|
#endif
|
2018-01-19 23:02:07 +00:00
|
|
|
case ISL_AUX_OP_FAST_CLEAR:
|
2016-08-18 17:36:58 +01:00
|
|
|
ps.RenderTargetFastClearEnable = true;
|
|
|
|
break;
|
2016-08-19 10:23:04 +01:00
|
|
|
default:
|
|
|
|
unreachable("Invalid fast clear op");
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
|
2016-10-07 07:44:57 +01:00
|
|
|
if (prog_data) {
|
|
|
|
psx.PixelShaderValid = true;
|
|
|
|
psx.AttributeEnable = prog_data->num_varying_inputs > 0;
|
2016-11-01 21:03:43 +00:00
|
|
|
psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
|
2020-02-05 09:59:01 +00:00
|
|
|
psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 9
|
2020-02-05 09:59:01 +00:00
|
|
|
psx.PixelShaderComputesStencil = prog_data->computed_stencil;
|
|
|
|
#endif
|
2016-10-07 07:44:57 +01:00
|
|
|
}
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-10-08 01:17:05 +01:00
|
|
|
if (params->src.enabled)
|
2016-08-18 17:36:58 +01:00
|
|
|
psx.PixelShaderKillsPixel = true;
|
|
|
|
}
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#elif GFX_VER >= 7
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_WM), wm) {
|
|
|
|
switch (params->hiz_op) {
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_FAST_CLEAR:
|
2016-08-18 17:36:58 +01:00
|
|
|
wm.DepthBufferClear = true;
|
|
|
|
break;
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_FULL_RESOLVE:
|
2016-08-18 17:36:58 +01:00
|
|
|
wm.DepthBufferResolveEnable = true;
|
|
|
|
break;
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_AMBIGUATE:
|
2016-08-18 17:36:58 +01:00
|
|
|
wm.HierarchicalDepthBufferResolveEnable = true;
|
|
|
|
break;
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_NONE:
|
2016-08-18 17:36:58 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
unreachable("not reached");
|
|
|
|
}
|
|
|
|
|
2020-02-05 09:59:01 +00:00
|
|
|
if (prog_data) {
|
2016-08-18 17:36:58 +01:00
|
|
|
wm.ThreadDispatchEnable = true;
|
2020-02-05 09:59:01 +00:00
|
|
|
wm.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
|
|
|
|
}
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-10-08 01:17:05 +01:00
|
|
|
if (params->src.enabled)
|
2016-11-12 18:46:02 +00:00
|
|
|
wm.PixelShaderKillsPixel = true;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-10-21 18:40:58 +01:00
|
|
|
if (params->num_samples > 1) {
|
2016-08-18 17:36:58 +01:00
|
|
|
wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
|
|
|
|
wm.MultisampleDispatchMode =
|
2016-11-01 21:03:43 +00:00
|
|
|
(prog_data && prog_data->persample_dispatch) ?
|
2016-08-18 17:36:58 +01:00
|
|
|
MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
|
|
|
|
} else {
|
|
|
|
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
|
|
|
|
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_PS), ps) {
|
|
|
|
ps.MaximumNumberofThreads =
|
|
|
|
batch->blorp->isl_dev->info->max_wm_threads - 1;
|
|
|
|
|
2021-03-16 17:09:00 +00:00
|
|
|
#if GFX_VERx10 == 75
|
2016-08-18 17:36:58 +01:00
|
|
|
ps.SampleMask = 1;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (prog_data) {
|
2018-05-18 07:17:17 +01:00
|
|
|
ps._8PixelDispatchEnable = prog_data->dispatch_8;
|
|
|
|
ps._16PixelDispatchEnable = prog_data->dispatch_16;
|
2018-05-18 07:26:02 +01:00
|
|
|
ps._32PixelDispatchEnable = prog_data->dispatch_32;
|
2018-05-18 07:17:17 +01:00
|
|
|
|
2016-11-12 17:35:37 +00:00
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
2018-05-18 07:17:17 +01:00
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
|
2016-11-12 17:35:37 +00:00
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
2018-05-18 07:17:17 +01:00
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2018-05-18 07:17:17 +01:00
|
|
|
ps.KernelStartPointer0 = params->wm_prog_kernel +
|
|
|
|
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
|
|
|
|
ps.KernelStartPointer1 = params->wm_prog_kernel +
|
|
|
|
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
|
|
|
|
ps.KernelStartPointer2 = params->wm_prog_kernel +
|
|
|
|
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
ps.AttributeEnable = prog_data->num_varying_inputs > 0;
|
|
|
|
} else {
|
2021-03-29 23:46:12 +01:00
|
|
|
/* Gfx7 hardware gets angry if we don't enable at least one dispatch
|
2016-08-18 17:36:58 +01:00
|
|
|
* mode, so just enable 16-pixel dispatch if we don't have a program.
|
|
|
|
*/
|
|
|
|
ps._16PixelDispatchEnable = true;
|
|
|
|
}
|
|
|
|
|
2016-10-08 01:17:05 +01:00
|
|
|
if (params->src.enabled)
|
2016-08-18 17:36:58 +01:00
|
|
|
ps.SamplerCount = 1; /* Up to 4 samplers */
|
|
|
|
|
|
|
|
switch (params->fast_clear_op) {
|
2018-01-19 23:02:07 +00:00
|
|
|
case ISL_AUX_OP_NONE:
|
2016-08-19 10:23:04 +01:00
|
|
|
break;
|
2018-01-19 23:02:07 +00:00
|
|
|
case ISL_AUX_OP_FULL_RESOLVE:
|
2016-08-18 17:36:58 +01:00
|
|
|
ps.RenderTargetResolveEnable = true;
|
|
|
|
break;
|
2018-01-19 23:02:07 +00:00
|
|
|
case ISL_AUX_OP_FAST_CLEAR:
|
2016-08-18 17:36:58 +01:00
|
|
|
ps.RenderTargetFastClearEnable = true;
|
|
|
|
break;
|
2016-08-19 10:23:04 +01:00
|
|
|
default:
|
|
|
|
unreachable("Invalid fast clear op");
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#else /* GFX_VER <= 6 */
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_WM), wm) {
|
|
|
|
wm.MaximumNumberofThreads =
|
|
|
|
batch->blorp->isl_dev->info->max_wm_threads - 1;
|
|
|
|
|
|
|
|
switch (params->hiz_op) {
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_FAST_CLEAR:
|
2016-08-18 17:36:58 +01:00
|
|
|
wm.DepthBufferClear = true;
|
|
|
|
break;
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_FULL_RESOLVE:
|
2016-08-18 17:36:58 +01:00
|
|
|
wm.DepthBufferResolveEnable = true;
|
|
|
|
break;
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_AMBIGUATE:
|
2016-08-18 17:36:58 +01:00
|
|
|
wm.HierarchicalDepthBufferResolveEnable = true;
|
|
|
|
break;
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_NONE:
|
2016-08-18 17:36:58 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
unreachable("not reached");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (prog_data) {
|
|
|
|
wm.ThreadDispatchEnable = true;
|
|
|
|
|
|
|
|
wm._8PixelDispatchEnable = prog_data->dispatch_8;
|
|
|
|
wm._16PixelDispatchEnable = prog_data->dispatch_16;
|
2018-05-18 07:26:02 +01:00
|
|
|
wm._32PixelDispatchEnable = prog_data->dispatch_32;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2018-05-18 07:17:17 +01:00
|
|
|
wm.DispatchGRFStartRegisterForConstantSetupData0 =
|
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 0);
|
|
|
|
wm.DispatchGRFStartRegisterForConstantSetupData1 =
|
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 1);
|
|
|
|
wm.DispatchGRFStartRegisterForConstantSetupData2 =
|
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 2);
|
|
|
|
|
|
|
|
wm.KernelStartPointer0 = params->wm_prog_kernel +
|
|
|
|
brw_wm_prog_data_prog_offset(prog_data, wm, 0);
|
|
|
|
wm.KernelStartPointer1 = params->wm_prog_kernel +
|
|
|
|
brw_wm_prog_data_prog_offset(prog_data, wm, 1);
|
|
|
|
wm.KernelStartPointer2 = params->wm_prog_kernel +
|
|
|
|
brw_wm_prog_data_prog_offset(prog_data, wm, 2);
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
|
|
|
|
}
|
|
|
|
|
2016-10-08 01:17:05 +01:00
|
|
|
if (params->src.enabled) {
|
2016-08-18 17:36:58 +01:00
|
|
|
wm.SamplerCount = 1; /* Up to 4 samplers */
|
2016-11-12 18:46:02 +00:00
|
|
|
wm.PixelShaderKillsPixel = true; /* TODO: temporarily smash on */
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
|
|
|
|
2016-10-21 18:40:58 +01:00
|
|
|
if (params->num_samples > 1) {
|
2016-08-18 17:36:58 +01:00
|
|
|
wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
|
|
|
|
wm.MultisampleDispatchMode =
|
2016-11-01 21:03:43 +00:00
|
|
|
(prog_data && prog_data->persample_dispatch) ?
|
2016-08-18 17:36:58 +01:00
|
|
|
MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
|
|
|
|
} else {
|
|
|
|
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
|
|
|
|
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#endif /* GFX_VER */
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
blorp_emit_blend_state(struct blorp_batch *batch,
|
2016-08-19 13:43:29 +01:00
|
|
|
const struct blorp_params *params)
|
2016-08-18 17:36:58 +01:00
|
|
|
{
|
2021-06-24 16:01:43 +01:00
|
|
|
struct GENX(BLEND_STATE) blend = { };
|
2017-03-30 19:33:05 +01:00
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
uint32_t offset;
|
2017-03-30 19:33:05 +01:00
|
|
|
int size = GENX(BLEND_STATE_length) * 4;
|
|
|
|
size += GENX(BLEND_STATE_ENTRY_length) * 4 * params->num_draw_buffers;
|
|
|
|
uint32_t *state = blorp_alloc_dynamic_state(batch, size, 64, &offset);
|
|
|
|
uint32_t *pos = state;
|
|
|
|
|
|
|
|
GENX(BLEND_STATE_pack)(NULL, pos, &blend);
|
|
|
|
pos += GENX(BLEND_STATE_length);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
|
|
|
|
struct GENX(BLEND_STATE_ENTRY) entry = {
|
|
|
|
.PreBlendColorClampEnable = true,
|
|
|
|
.PostBlendColorClampEnable = true,
|
|
|
|
.ColorClampRange = COLORCLAMP_RTFORMAT,
|
|
|
|
|
2021-09-16 22:25:42 +01:00
|
|
|
.WriteDisableRed = params->color_write_disable & 1,
|
|
|
|
.WriteDisableGreen = params->color_write_disable & 2,
|
|
|
|
.WriteDisableBlue = params->color_write_disable & 4,
|
|
|
|
.WriteDisableAlpha = params->color_write_disable & 8,
|
2017-03-30 19:33:05 +01:00
|
|
|
};
|
|
|
|
GENX(BLEND_STATE_ENTRY_pack)(NULL, pos, &entry);
|
|
|
|
pos += GENX(BLEND_STATE_ENTRY_length);
|
2016-09-10 22:15:51 +01:00
|
|
|
}
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2017-03-30 19:33:05 +01:00
|
|
|
blorp_flush_range(batch, state, size);
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2016-08-18 17:36:58 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
|
|
|
|
sp.BlendStatePointer = offset;
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2016-08-18 17:36:58 +01:00
|
|
|
sp.BlendStatePointerValid = true;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2016-08-18 17:36:58 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
|
|
|
|
ps_blend.HasWriteableRT = true;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
blorp_emit_color_calc_state(struct blorp_batch *batch,
|
2019-06-22 13:31:53 +01:00
|
|
|
UNUSED const struct blorp_params *params)
|
2016-08-18 17:36:58 +01:00
|
|
|
{
|
2016-09-10 22:15:51 +01:00
|
|
|
uint32_t offset;
|
|
|
|
blorp_emit_dynamic(batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) {
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER <= 8
|
2016-09-10 22:15:51 +01:00
|
|
|
cc.StencilReferenceValue = params->stencil_ref;
|
2016-10-07 07:08:08 +01:00
|
|
|
#endif
|
2016-09-10 22:15:51 +01:00
|
|
|
}
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2016-08-18 17:36:58 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
|
|
|
|
sp.ColorCalcStatePointer = offset;
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2016-08-18 17:36:58 +01:00
|
|
|
sp.ColorCalcStatePointerValid = true;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
blorp_emit_depth_stencil_state(struct blorp_batch *batch,
|
2016-08-19 13:43:29 +01:00
|
|
|
const struct blorp_params *params)
|
2016-08-18 17:36:58 +01:00
|
|
|
{
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2016-10-07 06:33:32 +01:00
|
|
|
struct GENX(3DSTATE_WM_DEPTH_STENCIL) ds = {
|
|
|
|
GENX(3DSTATE_WM_DEPTH_STENCIL_header),
|
|
|
|
};
|
|
|
|
#else
|
|
|
|
struct GENX(DEPTH_STENCIL_STATE) ds = { 0 };
|
|
|
|
#endif
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-10-08 01:17:05 +01:00
|
|
|
if (params->depth.enabled) {
|
2016-10-07 07:08:08 +01:00
|
|
|
ds.DepthBufferWriteEnable = true;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-10-07 07:08:08 +01:00
|
|
|
switch (params->hiz_op) {
|
|
|
|
/* See the following sections of the Sandy Bridge PRM, Volume 2, Part1:
|
|
|
|
* - 7.5.3.1 Depth Buffer Clear
|
|
|
|
* - 7.5.3.2 Depth Buffer Resolve
|
|
|
|
* - 7.5.3.3 Hierarchical Depth Buffer Resolve
|
|
|
|
*/
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_FULL_RESOLVE:
|
2016-10-07 07:08:08 +01:00
|
|
|
ds.DepthTestEnable = true;
|
|
|
|
ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
|
|
|
|
break;
|
|
|
|
|
2019-08-15 18:17:11 +01:00
|
|
|
case ISL_AUX_OP_NONE:
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_FAST_CLEAR:
|
|
|
|
case ISL_AUX_OP_AMBIGUATE:
|
2016-10-07 07:08:08 +01:00
|
|
|
ds.DepthTestEnable = false;
|
|
|
|
break;
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_PARTIAL_RESOLVE:
|
|
|
|
unreachable("Invalid HIZ op");
|
2016-10-07 07:08:08 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-08 01:17:05 +01:00
|
|
|
if (params->stencil.enabled) {
|
2016-10-07 07:08:08 +01:00
|
|
|
ds.StencilBufferWriteEnable = true;
|
|
|
|
ds.StencilTestEnable = true;
|
|
|
|
ds.DoubleSidedStencilEnable = false;
|
|
|
|
|
|
|
|
ds.StencilTestFunction = COMPAREFUNCTION_ALWAYS;
|
|
|
|
ds.StencilPassDepthPassOp = STENCILOP_REPLACE;
|
|
|
|
|
|
|
|
ds.StencilWriteMask = params->stencil_mask;
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 9
|
2016-10-07 07:08:08 +01:00
|
|
|
ds.StencilReferenceValue = params->stencil_ref;
|
|
|
|
#endif
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2016-10-07 06:33:32 +01:00
|
|
|
uint32_t offset = 0;
|
|
|
|
uint32_t *dw = blorp_emit_dwords(batch,
|
|
|
|
GENX(3DSTATE_WM_DEPTH_STENCIL_length));
|
2017-03-09 13:22:25 +00:00
|
|
|
if (!dw)
|
|
|
|
return 0;
|
|
|
|
|
2016-10-07 06:33:32 +01:00
|
|
|
GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &ds);
|
|
|
|
#else
|
2016-08-18 17:36:58 +01:00
|
|
|
uint32_t offset;
|
2017-03-20 09:21:41 +00:00
|
|
|
void *state = blorp_alloc_dynamic_state(batch,
|
2016-08-18 17:36:58 +01:00
|
|
|
GENX(DEPTH_STENCIL_STATE_length) * 4,
|
|
|
|
64, &offset);
|
|
|
|
GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
|
2017-02-20 19:03:04 +00:00
|
|
|
blorp_flush_range(batch, state, GENX(DEPTH_STENCIL_STATE_length) * 4);
|
2016-10-07 06:33:32 +01:00
|
|
|
#endif
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER == 7
|
2016-08-18 17:36:58 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) {
|
|
|
|
sp.PointertoDEPTH_STENCIL_STATE = offset;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2022-04-06 09:50:13 +01:00
|
|
|
#if GFX_VER >= 12
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
|
|
|
|
db.DepthBoundsTestEnable = false;
|
|
|
|
db.DepthBoundsTestMinValue = 0.0;
|
|
|
|
db.DepthBoundsTestMaxValue = 1.0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
2017-05-12 05:05:49 +01:00
|
|
|
static void
|
|
|
|
blorp_emit_3dstate_multisample(struct blorp_batch *batch,
|
|
|
|
const struct blorp_params *params)
|
|
|
|
{
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
|
|
|
|
ms.NumberofMultisamples = __builtin_ffs(params->num_samples) - 1;
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2017-05-12 05:05:49 +01:00
|
|
|
/* The PRM says that this bit is valid only for DX9:
|
|
|
|
*
|
|
|
|
* SW can choose to set this bit only for DX9 API. DX10/OGL API's
|
|
|
|
* should not have any effect by setting or not setting this bit.
|
|
|
|
*/
|
|
|
|
ms.PixelPositionOffsetEnable = false;
|
2021-03-16 17:14:30 +00:00
|
|
|
#elif GFX_VER >= 7
|
2017-05-12 05:05:49 +01:00
|
|
|
|
|
|
|
switch (params->num_samples) {
|
|
|
|
case 1:
|
2021-03-03 21:58:15 +00:00
|
|
|
INTEL_SAMPLE_POS_1X(ms.Sample);
|
2017-05-12 05:05:49 +01:00
|
|
|
break;
|
|
|
|
case 2:
|
2021-03-03 21:58:15 +00:00
|
|
|
INTEL_SAMPLE_POS_2X(ms.Sample);
|
2017-05-12 05:05:49 +01:00
|
|
|
break;
|
|
|
|
case 4:
|
2021-03-03 21:58:15 +00:00
|
|
|
INTEL_SAMPLE_POS_4X(ms.Sample);
|
2017-05-12 05:05:49 +01:00
|
|
|
break;
|
|
|
|
case 8:
|
2021-03-03 21:58:15 +00:00
|
|
|
INTEL_SAMPLE_POS_8X(ms.Sample);
|
2017-05-12 05:05:49 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#else
|
2021-03-03 21:58:15 +00:00
|
|
|
INTEL_SAMPLE_POS_4X(ms.Sample);
|
2017-05-12 05:05:49 +01:00
|
|
|
#endif
|
|
|
|
ms.PixelLocation = CENTER;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
blorp_emit_pipeline(struct blorp_batch *batch,
|
|
|
|
const struct blorp_params *params)
|
|
|
|
{
|
|
|
|
uint32_t blend_state_offset = 0;
|
|
|
|
uint32_t color_calc_state_offset;
|
|
|
|
uint32_t depth_stencil_state_offset;
|
|
|
|
|
2021-03-03 21:49:18 +00:00
|
|
|
enum intel_urb_deref_block_size urb_deref_block_size;
|
2020-01-17 20:13:28 +00:00
|
|
|
emit_urb_config(batch, params, &urb_deref_block_size);
|
2017-05-12 05:05:49 +01:00
|
|
|
|
|
|
|
if (params->wm_prog_data) {
|
|
|
|
blend_state_offset = blorp_emit_blend_state(batch, params);
|
|
|
|
}
|
|
|
|
color_calc_state_offset = blorp_emit_color_calc_state(batch, params);
|
|
|
|
depth_stencil_state_offset = blorp_emit_depth_stencil_state(batch, params);
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER == 6
|
2017-05-12 05:05:49 +01:00
|
|
|
/* 3DSTATE_CC_STATE_POINTERS
|
|
|
|
*
|
|
|
|
* The pointer offsets are relative to
|
|
|
|
* CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
|
|
|
|
*
|
|
|
|
* The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
|
|
|
|
*
|
|
|
|
* The dynamic state emit helpers emit their own STATE_POINTERS packets on
|
2021-03-29 23:40:04 +01:00
|
|
|
* gfx7+. However, on gfx6 and earlier, they're all lumpped together in
|
2017-05-12 05:05:49 +01:00
|
|
|
* one CC_STATE_POINTERS packet so we have to emit that here.
|
|
|
|
*/
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
|
2022-06-20 10:47:25 +01:00
|
|
|
cc.BLEND_STATEChange = params->wm_prog_data ? true : false;
|
2017-05-12 05:05:49 +01:00
|
|
|
cc.ColorCalcStatePointerValid = true;
|
|
|
|
cc.DEPTH_STENCIL_STATEChange = true;
|
|
|
|
cc.PointertoBLEND_STATE = blend_state_offset;
|
|
|
|
cc.ColorCalcStatePointer = color_calc_state_offset;
|
|
|
|
cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
(void)blend_state_offset;
|
|
|
|
(void)color_calc_state_offset;
|
|
|
|
(void)depth_stencil_state_offset;
|
|
|
|
#endif
|
|
|
|
|
2021-10-19 07:32:46 +01:00
|
|
|
UNUSED uint32_t mocs = isl_mocs(batch->blorp->isl_dev, 0, false);
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 12
|
2018-06-11 19:29:14 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_CONSTANT_ALL), pc) {
|
|
|
|
/* Update empty push constants for all stages (bitmask = 11111b) */
|
|
|
|
pc.ShaderUpdateEnable = 0x1f;
|
2021-10-19 07:32:46 +01:00
|
|
|
pc.MOCS = mocs;
|
2018-06-11 19:29:14 +01:00
|
|
|
}
|
|
|
|
#else
|
2021-10-19 07:32:46 +01:00
|
|
|
#if GFX_VER >= 9
|
|
|
|
#define CONSTANT_MOCS xs.MOCS = mocs
|
|
|
|
#elif GFX_VER == 7
|
|
|
|
#define CONSTANT_MOCS xs.ConstantBody.MOCS = mocs
|
|
|
|
#else
|
|
|
|
#define CONSTANT_MOCS
|
|
|
|
#endif
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_CONSTANT_VS), xs) { CONSTANT_MOCS; }
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2021-10-19 07:32:46 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_CONSTANT_HS), xs) { CONSTANT_MOCS; }
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_CONSTANT_DS), xs) { CONSTANT_MOCS; }
|
2017-05-12 05:05:49 +01:00
|
|
|
#endif
|
2021-10-19 07:32:46 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), xs) { CONSTANT_MOCS; }
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), xs) { CONSTANT_MOCS; }
|
2018-06-11 19:29:14 +01:00
|
|
|
#endif
|
2021-10-19 07:32:46 +01:00
|
|
|
#undef CONSTANT_MOCS
|
2017-05-12 05:05:49 +01:00
|
|
|
|
|
|
|
if (params->src.enabled)
|
2018-10-26 20:45:44 +01:00
|
|
|
blorp_emit_sampler_state_ps(batch);
|
2017-05-12 05:05:49 +01:00
|
|
|
|
|
|
|
blorp_emit_3dstate_multisample(batch, params);
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_SAMPLE_MASK), mask) {
|
|
|
|
mask.SampleMask = (1 << params->num_samples) - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
|
|
|
|
* 3DSTATE_VS, Dword 5.0 "VS Function Enable":
|
|
|
|
*
|
|
|
|
* [DevSNB] A pipeline flush must be programmed prior to a
|
|
|
|
* 3DSTATE_VS command that causes the VS Function Enable to
|
|
|
|
* toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
|
|
|
|
* command with CS stall bit set and a post sync operation.
|
|
|
|
*
|
|
|
|
* We've already done one at the start of the BLORP operation.
|
|
|
|
*/
|
|
|
|
blorp_emit_vs_config(batch, params);
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2017-05-12 05:05:49 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_HS), hs);
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_TE), te);
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_DS), DS);
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_STREAMOUT), so);
|
|
|
|
#endif
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_GS), gs);
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_CLIP), clip) {
|
|
|
|
clip.PerspectiveDivideDisable = true;
|
|
|
|
}
|
|
|
|
|
2020-01-17 20:13:28 +00:00
|
|
|
blorp_emit_sf_config(batch, params, urb_deref_block_size);
|
2017-05-12 05:05:49 +01:00
|
|
|
blorp_emit_ps_config(batch, params);
|
|
|
|
|
2018-02-17 03:00:21 +00:00
|
|
|
blorp_emit_cc_viewport(batch);
|
anv/gen12: Lower VK_KHR_multiview using Primitive Replication
Identify if view_index is used only for position calculation, and use
Primitive Replication to implement Multiview in Gen12. This feature
allows storing per-view position information in a single execution of
the shader, treating position as an array.
The shader is transformed by adding a for-loop around it, that have an
iteration per active view (in the view_mask). Stores to the position
now store into the position array for the current index in the loop,
and load_view_index() will return the view index corresponding to the
current index in the loop.
The feature is controlled by setting the environment variable
ANV_PRIMITIVE_REPLICATION_MAX_VIEWS, which defaults to 2 if unset.
For pipelines with view counts larger than that, the regular
instancing will be used instead of Primitive Replication. To disable
it completely set the variable to 0.
v2: Don't assume position is set in vertex shader; remove only stores
for position; don't apply optimizations since other passes will
do; clone shader body without extract/reinsert; don't use
last_block (potentially stale). (Jason)
Fix view_index immediate to contain the view index, not its order.
Check for maximum number of views supported.
Add guard for gen12.
v3: Clone the entire shader function and change it before reinsert;
disable optimization when shader has memory writes. (Jason)
Use a single environment variable with _DEBUG on the name.
v4: Change to use new nir_deref_instr.
When removing stores, look for mode nir_var_shader_out instead
of the walking the list of outputs.
Ensure unused derefs are removed in the non-position part of the
shader.
Remove dead control flow when identifying if can use or not
primitive replication.
v5: Consider all the active shaders (including fragment) when deciding
that Primitive Replication can be used.
Change environment variable to ANV_PRIMITIVE_REPLICATION.
Squash the emission of 3DSTATE_PRIMITIVE_REPLICATION into this patch.
Disable Prim Rep in blorp_exec_3d.
v6: Use a loop around the shader, instead of manually unrolling, since
the regular unroll pass will kick in.
Document that we don't expect to see copy_deref or load_deref
involving the position variable.
Recover use_primitive_replication value when loading pipeline from
the cache.
Set VARYING_SLOT_LAYER to 0 in the shader. Earlier versions were
relying on ForceZeroRTAIndexEnable but that might not be
sufficient.
Disable Prim Rep in cmd_buffer_so_memcpy.
v7: Don't use Primitive Replication if position is not set, fallback
to instancing; change environment variable to be
ANV_PRIMITVE_REPLICATION_MAX_VIEWS and default it to 2 based on
experiments.
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
2018-03-27 18:10:34 +01:00
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 12
|
anv/gen12: Lower VK_KHR_multiview using Primitive Replication
Identify if view_index is used only for position calculation, and use
Primitive Replication to implement Multiview in Gen12. This feature
allows storing per-view position information in a single execution of
the shader, treating position as an array.
The shader is transformed by adding a for-loop around it, that have an
iteration per active view (in the view_mask). Stores to the position
now store into the position array for the current index in the loop,
and load_view_index() will return the view index corresponding to the
current index in the loop.
The feature is controlled by setting the environment variable
ANV_PRIMITIVE_REPLICATION_MAX_VIEWS, which defaults to 2 if unset.
For pipelines with view counts larger than that, the regular
instancing will be used instead of Primitive Replication. To disable
it completely set the variable to 0.
v2: Don't assume position is set in vertex shader; remove only stores
for position; don't apply optimizations since other passes will
do; clone shader body without extract/reinsert; don't use
last_block (potentially stale). (Jason)
Fix view_index immediate to contain the view index, not its order.
Check for maximum number of views supported.
Add guard for gen12.
v3: Clone the entire shader function and change it before reinsert;
disable optimization when shader has memory writes. (Jason)
Use a single environment variable with _DEBUG on the name.
v4: Change to use new nir_deref_instr.
When removing stores, look for mode nir_var_shader_out instead
of the walking the list of outputs.
Ensure unused derefs are removed in the non-position part of the
shader.
Remove dead control flow when identifying if can use or not
primitive replication.
v5: Consider all the active shaders (including fragment) when deciding
that Primitive Replication can be used.
Change environment variable to ANV_PRIMITIVE_REPLICATION.
Squash the emission of 3DSTATE_PRIMITIVE_REPLICATION into this patch.
Disable Prim Rep in blorp_exec_3d.
v6: Use a loop around the shader, instead of manually unrolling, since
the regular unroll pass will kick in.
Document that we don't expect to see copy_deref or load_deref
involving the position variable.
Recover use_primitive_replication value when loading pipeline from
the cache.
Set VARYING_SLOT_LAYER to 0 in the shader. Earlier versions were
relying on ForceZeroRTAIndexEnable but that might not be
sufficient.
Disable Prim Rep in cmd_buffer_so_memcpy.
v7: Don't use Primitive Replication if position is not set, fallback
to instancing; change environment variable to be
ANV_PRIMITVE_REPLICATION_MAX_VIEWS and default it to 2 based on
experiments.
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
2018-03-27 18:10:34 +01:00
|
|
|
/* Disable Primitive Replication. */
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
|
|
|
|
#endif
|
2021-04-29 23:10:57 +01:00
|
|
|
|
|
|
|
if (batch->blorp->config.use_mesh_shading) {
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_URB_ALLOC_MESH), zero);
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), zero);
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_MESH_SHADER), zero);
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_TASK_SHADER), zero);
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_MESH_CONTROL), zero);
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_TASK_CONTROL), zero);
|
|
|
|
#endif
|
|
|
|
}
|
2017-05-12 05:05:49 +01:00
|
|
|
}
|
|
|
|
|
2017-03-18 17:33:25 +00:00
|
|
|
/******** This is the end of the pipeline setup code ********/
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#endif /* GFX_VER >= 6 */
|
2017-03-18 17:33:25 +00:00
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2017-11-11 19:10:59 +00:00
|
|
|
static void
|
|
|
|
blorp_emit_memcpy(struct blorp_batch *batch,
|
|
|
|
struct blorp_address dst,
|
|
|
|
struct blorp_address src,
|
|
|
|
uint32_t size)
|
|
|
|
{
|
|
|
|
assert(size % 4 == 0);
|
|
|
|
|
|
|
|
for (unsigned dw = 0; dw < size; dw += 4) {
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2017-11-11 19:10:59 +00:00
|
|
|
blorp_emit(batch, GENX(MI_COPY_MEM_MEM), cp) {
|
|
|
|
cp.DestinationMemoryAddress = dst;
|
|
|
|
cp.SourceMemoryAddress = src;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
/* IVB does not have a general purpose register for command streamer
|
|
|
|
* commands. Therefore, we use an alternate temporary register.
|
|
|
|
*/
|
2021-03-29 23:16:59 +01:00
|
|
|
#define BLORP_TEMP_REG 0x2440 /* GFX7_3DPRIM_BASE_VERTEX */
|
2017-11-11 19:10:59 +00:00
|
|
|
blorp_emit(batch, GENX(MI_LOAD_REGISTER_MEM), load) {
|
|
|
|
load.RegisterAddress = BLORP_TEMP_REG;
|
|
|
|
load.MemoryAddress = src;
|
|
|
|
}
|
|
|
|
blorp_emit(batch, GENX(MI_STORE_REGISTER_MEM), store) {
|
|
|
|
store.RegisterAddress = BLORP_TEMP_REG;
|
|
|
|
store.MemoryAddress = dst;
|
|
|
|
}
|
|
|
|
#undef BLORP_TEMP_REG
|
|
|
|
#endif
|
|
|
|
dst.offset += 4;
|
|
|
|
src.offset += 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
static void
|
|
|
|
blorp_emit_surface_state(struct blorp_batch *batch,
|
|
|
|
const struct brw_blorp_surface_info *surface,
|
2020-09-24 20:19:39 +01:00
|
|
|
UNUSED enum isl_aux_op aux_op,
|
2016-09-20 06:04:40 +01:00
|
|
|
void *state, uint32_t state_offset,
|
2021-09-16 22:25:42 +01:00
|
|
|
uint8_t color_write_disable,
|
2016-08-18 17:36:58 +01:00
|
|
|
bool is_render_target)
|
|
|
|
{
|
2016-09-20 06:04:40 +01:00
|
|
|
const struct isl_device *isl_dev = batch->blorp->isl_dev;
|
2016-08-18 17:36:58 +01:00
|
|
|
struct isl_surf surf = surface->surf;
|
|
|
|
|
|
|
|
if (surf.dim == ISL_SURF_DIM_1D &&
|
2021-03-30 00:02:30 +01:00
|
|
|
surf.dim_layout == ISL_DIM_LAYOUT_GFX4_2D) {
|
2016-08-18 17:36:58 +01:00
|
|
|
assert(surf.logical_level0_px.height == 1);
|
|
|
|
surf.dim = ISL_SURF_DIM_2D;
|
|
|
|
}
|
|
|
|
|
2019-12-04 02:29:15 +00:00
|
|
|
if (isl_aux_usage_has_hiz(surface->aux_usage)) {
|
|
|
|
/* BLORP doesn't render with depth so we can't use HiZ */
|
|
|
|
assert(!is_render_target);
|
|
|
|
/* We can't reinterpret HiZ */
|
|
|
|
assert(surface->surf.format == surface->view.format);
|
|
|
|
}
|
2021-03-10 03:11:37 +00:00
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
enum isl_aux_usage aux_usage = surface->aux_usage;
|
|
|
|
|
2021-03-29 23:40:04 +01:00
|
|
|
/* On gfx12, implicit CCS has no aux buffer */
|
2021-03-10 03:11:37 +00:00
|
|
|
bool use_aux_address = (aux_usage != ISL_AUX_USAGE_NONE) &&
|
|
|
|
(surface->aux_addr.buffer != NULL);
|
|
|
|
|
2016-09-10 00:30:24 +01:00
|
|
|
isl_channel_mask_t write_disable_mask = 0;
|
2021-03-16 17:14:30 +00:00
|
|
|
if (is_render_target && GFX_VER <= 5) {
|
2021-09-16 22:25:42 +01:00
|
|
|
if (color_write_disable & BITFIELD_BIT(0))
|
2016-09-10 00:30:24 +01:00
|
|
|
write_disable_mask |= ISL_CHANNEL_RED_BIT;
|
2021-09-16 22:25:42 +01:00
|
|
|
if (color_write_disable & BITFIELD_BIT(1))
|
2016-09-10 00:30:24 +01:00
|
|
|
write_disable_mask |= ISL_CHANNEL_GREEN_BIT;
|
2021-09-16 22:25:42 +01:00
|
|
|
if (color_write_disable & BITFIELD_BIT(2))
|
2016-09-10 00:30:24 +01:00
|
|
|
write_disable_mask |= ISL_CHANNEL_BLUE_BIT;
|
2021-09-16 22:25:42 +01:00
|
|
|
if (color_write_disable & BITFIELD_BIT(3))
|
2016-09-10 00:30:24 +01:00
|
|
|
write_disable_mask |= ISL_CHANNEL_ALPHA_BIT;
|
|
|
|
}
|
|
|
|
|
2017-12-07 16:47:38 +00:00
|
|
|
const bool use_clear_address =
|
2021-03-16 17:14:30 +00:00
|
|
|
GFX_VER >= 10 && (surface->clear_color_addr.buffer != NULL);
|
2017-12-07 16:47:38 +00:00
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
isl_surf_fill_state(batch->blorp->isl_dev, state,
|
|
|
|
.surf = &surf, .view = &surface->view,
|
|
|
|
.aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
|
blorp: Add blorp_get_surface_address to the driver interface.
Currently, BLORP expects drivers to provide two functions for dealing
with buffers: blorp_emit_reloc and blorp_surface_reloc. Both record a
relocation and combine the BO address and offset into a full 64-bit
address. Traditionally, blorp_surface_reloc has written that combined
address to an implicitly-known buffer where surface states are stored.
(In contrast, blorp_emit_reloc returns the value.)
The upcoming Iris driver stores surface states in multiple buffers,
which makes it impossible for blorp_surface_reloc to write the combined
address - it only takes an offset, not the actual buffer to write to.
This commit adds a third function, blorp_get_surface_address, which
combines and returns an address, which is then passed to ISL's surface
state fill functions. Softpin-only drivers can return a real address
here and skip writing it in blorp_surface_reloc. Relocation-based
drivers are have options. They can simply return 0 from the new
function, and continue writing the address from blorp_surface_reloc.
Or, they can return a presumed address from blorp_get_surface_address,
and have other relocation processing write the real value later.
For now, i965 and anv simply return 0.
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2019-01-09 21:31:18 +00:00
|
|
|
.address =
|
|
|
|
blorp_get_surface_address(batch, surface->addr),
|
2021-03-10 02:18:54 +00:00
|
|
|
.aux_address = !use_aux_address ? 0 :
|
blorp: Add blorp_get_surface_address to the driver interface.
Currently, BLORP expects drivers to provide two functions for dealing
with buffers: blorp_emit_reloc and blorp_surface_reloc. Both record a
relocation and combine the BO address and offset into a full 64-bit
address. Traditionally, blorp_surface_reloc has written that combined
address to an implicitly-known buffer where surface states are stored.
(In contrast, blorp_emit_reloc returns the value.)
The upcoming Iris driver stores surface states in multiple buffers,
which makes it impossible for blorp_surface_reloc to write the combined
address - it only takes an offset, not the actual buffer to write to.
This commit adds a third function, blorp_get_surface_address, which
combines and returns an address, which is then passed to ISL's surface
state fill functions. Softpin-only drivers can return a real address
here and skip writing it in blorp_surface_reloc. Relocation-based
drivers are have options. They can simply return 0 from the new
function, and continue writing the address from blorp_surface_reloc.
Or, they can return a presumed address from blorp_get_surface_address,
and have other relocation processing write the real value later.
For now, i965 and anv simply return 0.
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2019-01-09 21:31:18 +00:00
|
|
|
blorp_get_surface_address(batch, surface->aux_addr),
|
|
|
|
.clear_address = !use_clear_address ? 0 :
|
|
|
|
blorp_get_surface_address(batch,
|
|
|
|
surface->clear_color_addr),
|
2017-11-03 22:20:08 +00:00
|
|
|
.mocs = surface->addr.mocs,
|
|
|
|
.clear_color = surface->clear_color,
|
2017-12-07 16:47:38 +00:00
|
|
|
.use_clear_address = use_clear_address,
|
2016-09-10 00:30:24 +01:00
|
|
|
.write_disables = write_disable_mask);
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-09-20 06:04:40 +01:00
|
|
|
blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset,
|
2016-08-18 17:36:58 +01:00
|
|
|
surface->addr, 0);
|
|
|
|
|
2021-03-10 03:11:37 +00:00
|
|
|
if (use_aux_address) {
|
2021-03-29 23:40:04 +01:00
|
|
|
/* On gfx7 and prior, the bottom 12 bits of the MCS base address are
|
2016-08-18 17:36:58 +01:00
|
|
|
* used to store other information. This should be ok, however, because
|
|
|
|
* surface buffer addresses are always 4K page alinged.
|
|
|
|
*/
|
|
|
|
assert((surface->aux_addr.offset & 0xfff) == 0);
|
2016-09-20 06:04:40 +01:00
|
|
|
uint32_t *aux_addr = state + isl_dev->ss.aux_addr_offset;
|
|
|
|
blorp_surface_reloc(batch, state_offset + isl_dev->ss.aux_addr_offset,
|
|
|
|
surface->aux_addr, *aux_addr);
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
2017-02-20 19:03:04 +00:00
|
|
|
|
2019-01-04 17:32:40 +00:00
|
|
|
if (aux_usage != ISL_AUX_USAGE_NONE && surface->clear_color_addr.buffer) {
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 10
|
2017-12-07 16:47:38 +00:00
|
|
|
assert((surface->clear_color_addr.offset & 0x3f) == 0);
|
|
|
|
uint32_t *clear_addr = state + isl_dev->ss.clear_color_state_offset;
|
|
|
|
blorp_surface_reloc(batch, state_offset +
|
|
|
|
isl_dev->ss.clear_color_state_offset,
|
|
|
|
surface->clear_color_addr, *clear_addr);
|
2021-03-16 17:14:30 +00:00
|
|
|
#elif GFX_VER >= 7
|
2019-01-04 17:32:40 +00:00
|
|
|
/* Fast clears just whack the AUX surface and don't actually use the
|
|
|
|
* clear color for anything. We can avoid the MI memcpy on that case.
|
|
|
|
*/
|
|
|
|
if (aux_op != ISL_AUX_OP_FAST_CLEAR) {
|
2018-03-07 18:49:03 +00:00
|
|
|
struct blorp_address dst_addr = blorp_get_surface_base_address(batch);
|
|
|
|
dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset;
|
|
|
|
blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr,
|
|
|
|
isl_dev->ss.clear_value_size);
|
|
|
|
}
|
2017-11-11 19:10:59 +00:00
|
|
|
#else
|
2021-03-29 23:40:04 +01:00
|
|
|
unreachable("Fast clears are only supported on gfx7+");
|
2017-11-11 19:10:59 +00:00
|
|
|
#endif
|
|
|
|
}
|
2017-12-07 16:47:38 +00:00
|
|
|
|
|
|
|
blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4);
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
|
|
|
|
2016-10-10 17:30:29 +01:00
|
|
|
static void
|
|
|
|
blorp_emit_null_surface_state(struct blorp_batch *batch,
|
|
|
|
const struct brw_blorp_surface_info *surface,
|
|
|
|
uint32_t *state)
|
|
|
|
{
|
|
|
|
struct GENX(RENDER_SURFACE_STATE) ss = {
|
|
|
|
.SurfaceType = SURFTYPE_NULL,
|
2018-02-14 02:13:51 +00:00
|
|
|
.SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM,
|
2016-10-10 17:30:29 +01:00
|
|
|
.Width = surface->surf.logical_level0_px.width - 1,
|
|
|
|
.Height = surface->surf.logical_level0_px.height - 1,
|
|
|
|
.MIPCountLOD = surface->view.base_level,
|
|
|
|
.MinimumArrayElement = surface->view.base_array_layer,
|
|
|
|
.Depth = surface->view.array_len - 1,
|
|
|
|
.RenderTargetViewExtent = surface->view.array_len - 1,
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 6
|
2016-10-10 17:30:29 +01:00
|
|
|
.NumberofMultisamples = ffs(surface->surf.samples) - 1,
|
2021-10-20 00:34:42 +01:00
|
|
|
.MOCS = isl_mocs(batch->blorp->isl_dev, 0, false),
|
2017-03-18 17:33:25 +00:00
|
|
|
#endif
|
2016-10-10 17:30:29 +01:00
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2016-10-10 17:30:29 +01:00
|
|
|
.SurfaceArray = surface->surf.dim != ISL_SURF_DIM_3D,
|
|
|
|
#endif
|
|
|
|
|
2018-11-12 21:59:06 +00:00
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
.TileMode = TILE4,
|
|
|
|
#elif GFX_VER >= 8
|
2016-10-10 17:30:29 +01:00
|
|
|
.TileMode = YMAJOR,
|
|
|
|
#else
|
|
|
|
.TiledSurface = true,
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &ss);
|
2017-02-20 19:03:04 +00:00
|
|
|
|
|
|
|
blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4);
|
2016-10-10 17:30:29 +01:00
|
|
|
}
|
|
|
|
|
2018-10-26 20:48:40 +01:00
|
|
|
static uint32_t
|
|
|
|
blorp_setup_binding_table(struct blorp_batch *batch,
|
|
|
|
const struct blorp_params *params)
|
2016-08-18 17:36:58 +01:00
|
|
|
{
|
2016-09-20 06:04:40 +01:00
|
|
|
const struct isl_device *isl_dev = batch->blorp->isl_dev;
|
2021-10-05 11:16:51 +01:00
|
|
|
uint32_t surface_offsets[2], bind_offset = 0;
|
2016-08-18 17:36:58 +01:00
|
|
|
void *surface_maps[2];
|
|
|
|
|
2019-06-22 13:31:53 +01:00
|
|
|
UNUSED bool has_indirect_clear_color = false;
|
2016-10-22 01:15:49 +01:00
|
|
|
if (params->use_pre_baked_binding_table) {
|
|
|
|
bind_offset = params->pre_baked_binding_table_offset;
|
2016-10-10 17:30:29 +01:00
|
|
|
} else {
|
2016-10-22 01:15:49 +01:00
|
|
|
unsigned num_surfaces = 1 + params->src.enabled;
|
|
|
|
blorp_alloc_binding_table(batch, num_surfaces,
|
|
|
|
isl_dev->ss.size, isl_dev->ss.align,
|
|
|
|
&bind_offset, surface_offsets, surface_maps);
|
|
|
|
|
|
|
|
if (params->dst.enabled) {
|
|
|
|
blorp_emit_surface_state(batch, ¶ms->dst,
|
2018-03-07 18:49:03 +00:00
|
|
|
params->fast_clear_op,
|
2016-10-22 01:15:49 +01:00
|
|
|
surface_maps[BLORP_RENDERBUFFER_BT_INDEX],
|
|
|
|
surface_offsets[BLORP_RENDERBUFFER_BT_INDEX],
|
2016-09-10 00:30:24 +01:00
|
|
|
params->color_write_disable, true);
|
2017-11-11 19:10:59 +00:00
|
|
|
if (params->dst.clear_color_addr.buffer != NULL)
|
|
|
|
has_indirect_clear_color = true;
|
2016-10-22 01:15:49 +01:00
|
|
|
} else {
|
|
|
|
assert(params->depth.enabled || params->stencil.enabled);
|
|
|
|
const struct brw_blorp_surface_info *surface =
|
|
|
|
params->depth.enabled ? ¶ms->depth : ¶ms->stencil;
|
|
|
|
blorp_emit_null_surface_state(batch, surface,
|
|
|
|
surface_maps[BLORP_RENDERBUFFER_BT_INDEX]);
|
|
|
|
}
|
2016-10-10 17:30:29 +01:00
|
|
|
|
2016-10-22 01:15:49 +01:00
|
|
|
if (params->src.enabled) {
|
|
|
|
blorp_emit_surface_state(batch, ¶ms->src,
|
2018-03-07 18:49:03 +00:00
|
|
|
params->fast_clear_op,
|
2016-10-22 01:15:49 +01:00
|
|
|
surface_maps[BLORP_TEXTURE_BT_INDEX],
|
2016-09-10 00:30:24 +01:00
|
|
|
surface_offsets[BLORP_TEXTURE_BT_INDEX],
|
2021-09-16 22:25:42 +01:00
|
|
|
0, false);
|
2017-11-11 19:10:59 +00:00
|
|
|
if (params->src.clear_color_addr.buffer != NULL)
|
|
|
|
has_indirect_clear_color = true;
|
2016-10-22 01:15:49 +01:00
|
|
|
}
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
|
|
|
|
2021-10-27 00:51:19 +01:00
|
|
|
#if GFX_VER >= 7 && GFX_VER < 12
|
2017-11-11 19:10:59 +00:00
|
|
|
if (has_indirect_clear_color) {
|
|
|
|
/* Updating a surface state object may require that the state cache be
|
|
|
|
* invalidated. From the SKL PRM, Shared Functions -> State -> State
|
|
|
|
* Caching:
|
|
|
|
*
|
|
|
|
* Whenever the RENDER_SURFACE_STATE object in memory pointed to by
|
|
|
|
* the Binding Table Pointer (BTP) and Binding Table Index (BTI) is
|
|
|
|
* modified [...], the L1 state cache must be invalidated to ensure
|
|
|
|
* the new surface or sampler state is fetched from system memory.
|
2021-10-27 00:51:19 +01:00
|
|
|
*
|
|
|
|
* XXX - Investigate why exactly this invalidation is necessary to
|
|
|
|
* avoid Vulkan regressions on ICL. It's possible that the
|
|
|
|
* MI_ATOMIC used to update the clear color isn't correctly
|
|
|
|
* ordered with the pre-existing invalidation in
|
|
|
|
* blorp_update_clear_color().
|
2017-11-11 19:10:59 +00:00
|
|
|
*/
|
|
|
|
blorp_emit(batch, GENX(PIPE_CONTROL), pipe) {
|
|
|
|
pipe.StateCacheInvalidationEnable = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-10-26 20:48:40 +01:00
|
|
|
return bind_offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
blorp_emit_btp(struct blorp_batch *batch, uint32_t bind_offset)
|
|
|
|
{
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2016-10-23 06:27:23 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), bt);
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_HS), bt);
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_DS), bt);
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_GS), bt);
|
|
|
|
|
2016-08-18 17:36:58 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) {
|
2020-05-11 19:49:55 +01:00
|
|
|
bt.PointertoPSBindingTable =
|
|
|
|
blorp_binding_table_offset_to_pointer(batch, bind_offset);
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
2021-03-16 17:14:30 +00:00
|
|
|
#elif GFX_VER >= 6
|
2016-08-18 17:36:58 +01:00
|
|
|
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
|
|
|
|
bt.PSBindingTableChange = true;
|
2020-05-11 19:49:55 +01:00
|
|
|
bt.PointertoPSBindingTable =
|
|
|
|
blorp_binding_table_offset_to_pointer(batch, bind_offset);
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
2017-03-18 17:33:25 +00:00
|
|
|
#else
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
|
2020-05-11 19:49:55 +01:00
|
|
|
bt.PointertoPSBindingTable =
|
|
|
|
blorp_binding_table_offset_to_pointer(batch, bind_offset);
|
2017-03-18 17:33:25 +00:00
|
|
|
}
|
2016-08-18 17:36:58 +01:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2017-05-12 05:05:49 +01:00
|
|
|
blorp_emit_depth_stencil_config(struct blorp_batch *batch,
|
|
|
|
const struct blorp_params *params)
|
2016-08-18 17:36:58 +01:00
|
|
|
{
|
2017-05-12 05:05:49 +01:00
|
|
|
const struct isl_device *isl_dev = batch->blorp->isl_dev;
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2017-05-12 05:05:49 +01:00
|
|
|
uint32_t *dw = blorp_emit_dwords(batch, isl_dev->ds.size / 4);
|
|
|
|
if (dw == NULL)
|
|
|
|
return;
|
|
|
|
|
2017-11-03 22:20:08 +00:00
|
|
|
struct isl_depth_stencil_hiz_emit_info info = { };
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2017-05-12 05:05:49 +01:00
|
|
|
if (params->depth.enabled) {
|
|
|
|
info.view = ¶ms->depth.view;
|
2017-11-03 22:20:08 +00:00
|
|
|
info.mocs = params->depth.addr.mocs;
|
2017-05-12 05:05:49 +01:00
|
|
|
} else if (params->stencil.enabled) {
|
|
|
|
info.view = ¶ms->stencil.view;
|
2017-11-03 22:20:08 +00:00
|
|
|
info.mocs = params->stencil.addr.mocs;
|
2021-10-19 13:31:06 +01:00
|
|
|
} else {
|
|
|
|
info.mocs = isl_mocs(isl_dev, 0, false);
|
2017-05-12 05:05:49 +01:00
|
|
|
}
|
2016-09-12 23:50:04 +01:00
|
|
|
|
2017-05-12 05:05:49 +01:00
|
|
|
if (params->depth.enabled) {
|
|
|
|
info.depth_surf = ¶ms->depth.surf;
|
2016-09-12 23:50:04 +01:00
|
|
|
|
2017-05-12 05:05:49 +01:00
|
|
|
info.depth_address =
|
|
|
|
blorp_emit_reloc(batch, dw + isl_dev->ds.depth_offset / 4,
|
|
|
|
params->depth.addr, 0);
|
|
|
|
|
|
|
|
info.hiz_usage = params->depth.aux_usage;
|
2019-08-09 18:02:50 +01:00
|
|
|
if (isl_aux_usage_has_hiz(info.hiz_usage)) {
|
2017-05-12 05:05:49 +01:00
|
|
|
info.hiz_surf = ¶ms->depth.aux_surf;
|
|
|
|
|
2017-05-30 17:53:43 +01:00
|
|
|
struct blorp_address hiz_address = params->depth.aux_addr;
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER == 6
|
2017-05-30 17:53:43 +01:00
|
|
|
/* Sandy bridge hardware does not technically support mipmapped HiZ.
|
|
|
|
* However, we have a special layout that allows us to make it work
|
|
|
|
* anyway by manually offsetting to the specified miplevel.
|
|
|
|
*/
|
2021-03-30 00:02:30 +01:00
|
|
|
assert(info.hiz_surf->dim_layout == ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ);
|
2021-07-07 17:28:05 +01:00
|
|
|
uint64_t offset_B;
|
2017-05-30 17:53:43 +01:00
|
|
|
isl_surf_get_image_offset_B_tile_sa(info.hiz_surf,
|
|
|
|
info.view->base_level, 0, 0,
|
|
|
|
&offset_B, NULL, NULL);
|
|
|
|
hiz_address.offset += offset_B;
|
|
|
|
#endif
|
|
|
|
|
2017-05-12 05:05:49 +01:00
|
|
|
info.hiz_address =
|
|
|
|
blorp_emit_reloc(batch, dw + isl_dev->ds.hiz_offset / 4,
|
2017-05-30 17:53:43 +01:00
|
|
|
hiz_address, 0);
|
2017-05-12 05:05:49 +01:00
|
|
|
|
2017-05-20 23:00:42 +01:00
|
|
|
info.depth_clear_value = params->depth.clear_color.f32[0];
|
2016-09-12 23:50:04 +01:00
|
|
|
}
|
|
|
|
}
|
2017-05-12 05:05:49 +01:00
|
|
|
|
|
|
|
if (params->stencil.enabled) {
|
|
|
|
info.stencil_surf = ¶ms->stencil.surf;
|
|
|
|
|
2019-10-24 00:24:46 +01:00
|
|
|
info.stencil_aux_usage = params->stencil.aux_usage;
|
2017-05-30 17:53:43 +01:00
|
|
|
struct blorp_address stencil_address = params->stencil.addr;
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER == 6
|
2017-05-30 17:53:43 +01:00
|
|
|
/* Sandy bridge hardware does not technically support mipmapped stencil.
|
|
|
|
* However, we have a special layout that allows us to make it work
|
|
|
|
* anyway by manually offsetting to the specified miplevel.
|
|
|
|
*/
|
2021-03-30 00:02:30 +01:00
|
|
|
assert(info.stencil_surf->dim_layout == ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ);
|
2021-07-07 17:28:05 +01:00
|
|
|
uint64_t offset_B;
|
2017-05-30 17:53:43 +01:00
|
|
|
isl_surf_get_image_offset_B_tile_sa(info.stencil_surf,
|
|
|
|
info.view->base_level, 0, 0,
|
|
|
|
&offset_B, NULL, NULL);
|
|
|
|
stencil_address.offset += offset_B;
|
|
|
|
#endif
|
|
|
|
|
2017-05-12 05:05:49 +01:00
|
|
|
info.stencil_address =
|
|
|
|
blorp_emit_reloc(batch, dw + isl_dev->ds.stencil_offset / 4,
|
2017-05-30 17:53:43 +01:00
|
|
|
stencil_address, 0);
|
2017-05-12 05:05:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
isl_emit_depth_stencil_hiz_s(isl_dev, dw, &info);
|
2019-11-13 22:30:57 +00:00
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 12
|
2021-03-30 01:15:41 +01:00
|
|
|
/* Wa_1408224581
|
2019-11-13 22:30:57 +00:00
|
|
|
*
|
2021-03-29 23:46:12 +01:00
|
|
|
* Workaround: Gfx12LP Astep only An additional pipe control with
|
2019-11-13 22:30:57 +00:00
|
|
|
* post-sync = store dword operation would be required.( w/a is to
|
|
|
|
* have an additional pipe control after the stencil state whenever
|
|
|
|
* the surface state bits of this state is changing).
|
2021-06-19 03:40:10 +01:00
|
|
|
*
|
|
|
|
* This also seems sufficient to handle Wa_14014148106.
|
2019-11-13 22:30:57 +00:00
|
|
|
*/
|
|
|
|
blorp_emit(batch, GENX(PIPE_CONTROL), pc) {
|
|
|
|
pc.PostSyncOperation = WriteImmediateData;
|
2020-02-23 12:34:49 +00:00
|
|
|
pc.Address = blorp_get_workaround_address(batch);
|
2019-11-13 22:30:57 +00:00
|
|
|
}
|
|
|
|
#endif
|
2016-09-12 23:50:04 +01:00
|
|
|
}
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2017-01-10 20:06:42 +00:00
|
|
|
/* Emits the Optimized HiZ sequence specified in the BDW+ PRMs. The
|
|
|
|
* depth/stencil buffer extents are ignored to handle APIs which perform
|
|
|
|
* clearing operations without such information.
|
|
|
|
* */
|
|
|
|
static void
|
2021-03-29 23:40:04 +01:00
|
|
|
blorp_emit_gfx8_hiz_op(struct blorp_batch *batch,
|
2017-01-10 20:06:42 +00:00
|
|
|
const struct blorp_params *params)
|
|
|
|
{
|
|
|
|
/* We should be performing an operation on a depth or stencil buffer.
|
|
|
|
*/
|
|
|
|
assert(params->depth.enabled || params->stencil.enabled);
|
|
|
|
|
2021-11-26 16:22:40 +00:00
|
|
|
blorp_measure_start(batch, params);
|
|
|
|
|
2020-12-09 23:11:45 +00:00
|
|
|
/* The stencil buffer should only be enabled if a fast clear operation is
|
|
|
|
* requested.
|
2017-01-10 20:06:42 +00:00
|
|
|
*/
|
2020-12-09 23:11:45 +00:00
|
|
|
if (params->stencil.enabled)
|
2018-01-19 23:14:37 +00:00
|
|
|
assert(params->hiz_op == ISL_AUX_OP_FAST_CLEAR);
|
2017-01-10 20:06:42 +00:00
|
|
|
|
2017-09-11 12:12:15 +01:00
|
|
|
/* From the BDW PRM Volume 2, 3DSTATE_WM_HZ_OP:
|
|
|
|
*
|
|
|
|
* 3DSTATE_MULTISAMPLE packet must be used prior to this packet to change
|
|
|
|
* the Number of Multisamples. This packet must not be used to change
|
|
|
|
* Number of Multisamples in a rendering sequence.
|
|
|
|
*
|
|
|
|
* Since HIZ may be the first thing in a batch buffer, play safe and always
|
|
|
|
* emit 3DSTATE_MULTISAMPLE.
|
|
|
|
*/
|
|
|
|
blorp_emit_3dstate_multisample(batch, params);
|
|
|
|
|
2018-10-25 22:08:52 +01:00
|
|
|
/* From the BDW PRM Volume 7, Depth Buffer Clear:
|
|
|
|
*
|
|
|
|
* The clear value must be between the min and max depth values
|
|
|
|
* (inclusive) defined in the CC_VIEWPORT. If the depth buffer format is
|
|
|
|
* D32_FLOAT, then +/-DENORM values are also allowed.
|
|
|
|
*
|
|
|
|
* Set the bounds to match our hardware limits, [0.0, 1.0].
|
|
|
|
*/
|
|
|
|
if (params->depth.enabled && params->hiz_op == ISL_AUX_OP_FAST_CLEAR) {
|
|
|
|
assert(params->depth.clear_color.f32[0] >= 0.0f);
|
|
|
|
assert(params->depth.clear_color.f32[0] <= 1.0f);
|
|
|
|
blorp_emit_cc_viewport(batch);
|
|
|
|
}
|
|
|
|
|
2020-12-09 23:11:45 +00:00
|
|
|
/* According to the SKL PRM formula for WM_INT::ThreadDispatchEnable, the
|
|
|
|
* 3DSTATE_WM::ForceThreadDispatchEnable field can force WM thread dispatch
|
|
|
|
* even when WM_HZ_OP is active. However, WM thread dispatch is normally
|
|
|
|
* disabled for HiZ ops and it appears that force-enabling it can lead to
|
|
|
|
* GPU hangs on at least Skylake. Since we don't know the current state of
|
|
|
|
* the 3DSTATE_WM packet, just emit a dummy one prior to 3DSTATE_WM_HZ_OP.
|
|
|
|
*/
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_WM), wm);
|
2018-10-16 20:58:18 +01:00
|
|
|
|
2017-01-10 20:06:42 +00:00
|
|
|
/* If we can't alter the depth stencil config and multiple layers are
|
|
|
|
* involved, the HiZ op will fail. This is because the op requires that a
|
|
|
|
* new config is emitted for each additional layer.
|
|
|
|
*/
|
|
|
|
if (batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL) {
|
|
|
|
assert(params->num_layers <= 1);
|
|
|
|
} else {
|
|
|
|
blorp_emit_depth_stencil_config(batch, params);
|
|
|
|
}
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_WM_HZ_OP), hzp) {
|
|
|
|
switch (params->hiz_op) {
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_FAST_CLEAR:
|
2017-01-10 20:06:42 +00:00
|
|
|
hzp.StencilBufferClearEnable = params->stencil.enabled;
|
|
|
|
hzp.DepthBufferClearEnable = params->depth.enabled;
|
|
|
|
hzp.StencilClearValue = params->stencil_ref;
|
2017-06-06 01:18:48 +01:00
|
|
|
hzp.FullSurfaceDepthandStencilClear = params->full_surface_hiz_op;
|
2017-01-10 20:06:42 +00:00
|
|
|
break;
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_FULL_RESOLVE:
|
2017-06-06 01:18:48 +01:00
|
|
|
assert(params->full_surface_hiz_op);
|
2020-12-09 23:11:45 +00:00
|
|
|
hzp.DepthBufferResolveEnable = true;
|
2017-01-10 20:06:42 +00:00
|
|
|
break;
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_AMBIGUATE:
|
2017-06-06 01:18:48 +01:00
|
|
|
assert(params->full_surface_hiz_op);
|
2017-01-10 20:06:42 +00:00
|
|
|
hzp.HierarchicalDepthBufferResolveEnable = true;
|
|
|
|
break;
|
2018-01-19 23:14:37 +00:00
|
|
|
case ISL_AUX_OP_PARTIAL_RESOLVE:
|
|
|
|
case ISL_AUX_OP_NONE:
|
2017-01-10 20:06:42 +00:00
|
|
|
unreachable("Invalid HIZ op");
|
|
|
|
}
|
|
|
|
|
|
|
|
hzp.NumberofMultisamples = ffs(params->num_samples) - 1;
|
|
|
|
hzp.SampleMask = 0xFFFF;
|
|
|
|
|
|
|
|
/* Due to a hardware issue, this bit MBZ */
|
|
|
|
assert(hzp.ScissorRectangleEnable == false);
|
|
|
|
|
|
|
|
/* Contrary to the HW docs both fields are inclusive */
|
|
|
|
hzp.ClearRectangleXMin = params->x0;
|
|
|
|
hzp.ClearRectangleYMin = params->y0;
|
|
|
|
|
|
|
|
/* Contrary to the HW docs both fields are exclusive */
|
|
|
|
hzp.ClearRectangleXMax = params->x1;
|
|
|
|
hzp.ClearRectangleYMax = params->y1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* PIPE_CONTROL w/ all bits clear except for “Post-Sync Operation” must set
|
|
|
|
* to “Write Immediate Data” enabled.
|
|
|
|
*/
|
|
|
|
blorp_emit(batch, GENX(PIPE_CONTROL), pc) {
|
|
|
|
pc.PostSyncOperation = WriteImmediateData;
|
2020-02-23 12:34:49 +00:00
|
|
|
pc.Address = blorp_get_workaround_address(batch);
|
2017-01-10 20:06:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DSTATE_WM_HZ_OP), hzp);
|
2021-11-26 16:22:40 +00:00
|
|
|
|
|
|
|
blorp_measure_end(batch, params);
|
2017-01-10 20:06:42 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-03-05 16:52:35 +00:00
|
|
|
static void
|
2020-09-24 20:19:39 +01:00
|
|
|
blorp_update_clear_color(UNUSED struct blorp_batch *batch,
|
2018-03-05 16:52:35 +00:00
|
|
|
const struct brw_blorp_surface_info *info,
|
|
|
|
enum isl_aux_op op)
|
|
|
|
{
|
|
|
|
if (info->clear_color_addr.buffer && op == ISL_AUX_OP_FAST_CLEAR) {
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER == 11
|
2019-04-03 01:08:52 +01:00
|
|
|
blorp_emit(batch, GENX(PIPE_CONTROL), pipe) {
|
|
|
|
pipe.CommandStreamerStallEnable = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 2 QWORDS */
|
|
|
|
const unsigned inlinedata_dw = 2 * 2;
|
|
|
|
const unsigned num_dwords = GENX(MI_ATOMIC_length) + inlinedata_dw;
|
|
|
|
|
|
|
|
struct blorp_address clear_addr = info->clear_color_addr;
|
|
|
|
uint32_t *dw = blorp_emitn(batch, GENX(MI_ATOMIC), num_dwords,
|
|
|
|
.DataSize = MI_ATOMIC_QWORD,
|
|
|
|
.ATOMICOPCODE = MI_ATOMIC_OP_MOVE8B,
|
|
|
|
.InlineData = true,
|
|
|
|
.MemoryAddress = clear_addr);
|
|
|
|
/* dw starts at dword 1, but we need to fill dwords 3 and 5 */
|
|
|
|
dw[2] = info->clear_color.u32[0];
|
2020-01-10 21:30:02 +00:00
|
|
|
dw[3] = 0;
|
2019-04-03 01:08:52 +01:00
|
|
|
dw[4] = info->clear_color.u32[1];
|
2020-01-10 21:30:02 +00:00
|
|
|
dw[5] = 0;
|
2019-04-03 01:08:52 +01:00
|
|
|
|
|
|
|
clear_addr.offset += 8;
|
|
|
|
dw = blorp_emitn(batch, GENX(MI_ATOMIC), num_dwords,
|
|
|
|
.DataSize = MI_ATOMIC_QWORD,
|
|
|
|
.ATOMICOPCODE = MI_ATOMIC_OP_MOVE8B,
|
|
|
|
.CSSTALL = true,
|
|
|
|
.ReturnDataControl = true,
|
|
|
|
.InlineData = true,
|
|
|
|
.MemoryAddress = clear_addr);
|
|
|
|
/* dw starts at dword 1, but we need to fill dwords 3 and 5 */
|
|
|
|
dw[2] = info->clear_color.u32[2];
|
2020-01-10 21:30:02 +00:00
|
|
|
dw[3] = 0;
|
2019-04-03 01:08:52 +01:00
|
|
|
dw[4] = info->clear_color.u32[3];
|
2020-01-10 21:30:02 +00:00
|
|
|
dw[5] = 0;
|
2019-04-03 01:08:52 +01:00
|
|
|
|
|
|
|
blorp_emit(batch, GENX(PIPE_CONTROL), pipe) {
|
|
|
|
pipe.StateCacheInvalidationEnable = true;
|
|
|
|
pipe.TextureCacheInvalidationEnable = true;
|
|
|
|
}
|
2021-03-16 17:14:30 +00:00
|
|
|
#elif GFX_VER >= 9
|
2019-09-18 17:44:02 +01:00
|
|
|
|
2021-03-30 01:15:41 +01:00
|
|
|
/* According to Wa_2201730850, in the Clear Color Programming Note
|
2019-09-18 17:44:02 +01:00
|
|
|
* under the Red channel, "Software shall write the converted Depth
|
|
|
|
* Clear to this dword." The only depth formats listed under the red
|
|
|
|
* channel are IEEE_FP and UNORM24_X8. These two requirements are
|
|
|
|
* incompatible with the UNORM16 depth format, so just ignore that case
|
|
|
|
* and simply perform the conversion for all depth formats.
|
|
|
|
*/
|
|
|
|
union isl_color_value fixed_color = info->clear_color;
|
2021-03-16 17:14:30 +00:00
|
|
|
if (GFX_VER == 12 && isl_surf_usage_is_depth(info->surf.usage)) {
|
2019-09-18 17:44:02 +01:00
|
|
|
isl_color_value_pack(&info->clear_color, info->surf.format,
|
|
|
|
fixed_color.u32);
|
|
|
|
}
|
|
|
|
|
2018-03-05 16:52:35 +00:00
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {
|
|
|
|
sdi.Address = info->clear_color_addr;
|
|
|
|
sdi.Address.offset += i * 4;
|
2019-09-18 17:44:02 +01:00
|
|
|
sdi.ImmediateData = fixed_color.u32[i];
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 12
|
2019-04-24 21:05:20 +01:00
|
|
|
if (i == 3)
|
|
|
|
sdi.ForceWriteCompletionCheck = true;
|
|
|
|
#endif
|
2018-03-05 16:52:35 +00:00
|
|
|
}
|
|
|
|
}
|
2019-09-18 17:44:02 +01:00
|
|
|
|
|
|
|
/* The RENDER_SURFACE_STATE::ClearColor field states that software should
|
|
|
|
* write the converted depth value 16B after the clear address:
|
|
|
|
*
|
|
|
|
* 3D Sampler will always fetch clear depth from the location 16-bytes
|
|
|
|
* above this address, where the clear depth, converted to native
|
|
|
|
* surface format by software, will be stored.
|
|
|
|
*
|
|
|
|
*/
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 12
|
2019-09-18 17:44:02 +01:00
|
|
|
if (isl_surf_usage_is_depth(info->surf.usage)) {
|
|
|
|
blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {
|
|
|
|
sdi.Address = info->clear_color_addr;
|
|
|
|
sdi.Address.offset += 4 * 4;
|
|
|
|
sdi.ImmediateData = fixed_color.u32[0];
|
|
|
|
sdi.ForceWriteCompletionCheck = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#elif GFX_VER >= 7
|
2018-03-05 16:52:35 +00:00
|
|
|
blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {
|
|
|
|
sdi.Address = info->clear_color_addr;
|
|
|
|
sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 |
|
|
|
|
ISL_CHANNEL_SELECT_GREEN << 22 |
|
|
|
|
ISL_CHANNEL_SELECT_BLUE << 19 |
|
|
|
|
ISL_CHANNEL_SELECT_ALPHA << 16;
|
|
|
|
if (isl_format_has_int_channel(info->view.format)) {
|
|
|
|
for (unsigned i = 0; i < 4; i++) {
|
|
|
|
assert(info->clear_color.u32[i] == 0 ||
|
|
|
|
info->clear_color.u32[i] == 1);
|
|
|
|
}
|
|
|
|
sdi.ImmediateData |= (info->clear_color.u32[0] != 0) << 31;
|
|
|
|
sdi.ImmediateData |= (info->clear_color.u32[1] != 0) << 30;
|
|
|
|
sdi.ImmediateData |= (info->clear_color.u32[2] != 0) << 29;
|
|
|
|
sdi.ImmediateData |= (info->clear_color.u32[3] != 0) << 28;
|
|
|
|
} else {
|
|
|
|
for (unsigned i = 0; i < 4; i++) {
|
|
|
|
assert(info->clear_color.f32[i] == 0.0f ||
|
|
|
|
info->clear_color.f32[i] == 1.0f);
|
|
|
|
}
|
|
|
|
sdi.ImmediateData |= (info->clear_color.f32[0] != 0.0f) << 31;
|
|
|
|
sdi.ImmediateData |= (info->clear_color.f32[1] != 0.0f) << 30;
|
|
|
|
sdi.ImmediateData |= (info->clear_color.f32[2] != 0.0f) << 29;
|
|
|
|
sdi.ImmediateData |= (info->clear_color.f32[3] != 0.0f) << 28;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-12 03:12:46 +01:00
|
|
|
static void
|
2018-10-26 20:52:44 +01:00
|
|
|
blorp_exec_3d(struct blorp_batch *batch, const struct blorp_params *params)
|
2017-05-12 03:12:46 +01:00
|
|
|
{
|
2018-04-27 01:09:29 +01:00
|
|
|
if (!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR)) {
|
|
|
|
blorp_update_clear_color(batch, ¶ms->dst, params->fast_clear_op);
|
|
|
|
blorp_update_clear_color(batch, ¶ms->depth, params->hiz_op);
|
|
|
|
}
|
2018-03-05 16:52:35 +00:00
|
|
|
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 8
|
2018-01-19 23:14:37 +00:00
|
|
|
if (params->hiz_op != ISL_AUX_OP_NONE) {
|
2021-03-29 23:40:04 +01:00
|
|
|
blorp_emit_gfx8_hiz_op(batch, params);
|
2017-05-12 03:12:46 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2021-11-26 16:22:40 +00:00
|
|
|
blorp_measure_start(batch, params);
|
|
|
|
|
2017-05-12 03:12:46 +01:00
|
|
|
blorp_emit_vertex_buffers(batch, params);
|
|
|
|
blorp_emit_vertex_elements(batch, params);
|
|
|
|
|
|
|
|
blorp_emit_pipeline(batch, params);
|
|
|
|
|
2018-10-26 20:48:40 +01:00
|
|
|
blorp_emit_btp(batch, blorp_setup_binding_table(batch, params));
|
2016-08-18 17:36:58 +01:00
|
|
|
|
2016-10-08 01:20:00 +01:00
|
|
|
if (!(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
|
|
|
|
blorp_emit_depth_stencil_config(batch, params);
|
2016-08-18 17:36:58 +01:00
|
|
|
|
|
|
|
blorp_emit(batch, GENX(3DPRIMITIVE), prim) {
|
|
|
|
prim.VertexAccessType = SEQUENTIAL;
|
|
|
|
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
2021-03-16 17:14:30 +00:00
|
|
|
#if GFX_VER >= 7
|
2017-04-25 21:32:34 +01:00
|
|
|
prim.PredicateEnable = batch->flags & BLORP_BATCH_PREDICATE_ENABLE;
|
|
|
|
#endif
|
2016-08-18 17:36:58 +01:00
|
|
|
prim.VertexCountPerInstance = 3;
|
|
|
|
prim.InstanceCount = params->num_layers;
|
|
|
|
}
|
2021-11-26 16:22:40 +00:00
|
|
|
|
|
|
|
blorp_measure_end(batch, params);
|
2016-08-18 17:36:58 +01:00
|
|
|
}
|
2016-10-11 18:26:21 +01:00
|
|
|
|
2018-10-26 20:52:44 +01:00
|
|
|
#if GFX_VER >= 7
|
|
|
|
|
|
|
|
static void
|
|
|
|
blorp_get_compute_push_const(struct blorp_batch *batch,
|
|
|
|
const struct blorp_params *params,
|
|
|
|
uint32_t threads,
|
|
|
|
uint32_t *state_offset,
|
|
|
|
unsigned *state_size)
|
|
|
|
{
|
|
|
|
const struct brw_cs_prog_data *cs_prog_data = params->cs_prog_data;
|
|
|
|
const unsigned push_const_size =
|
|
|
|
ALIGN(brw_cs_push_const_total_size(cs_prog_data, threads), 64);
|
|
|
|
assert(cs_prog_data->push.cross_thread.size +
|
|
|
|
cs_prog_data->push.per_thread.size == sizeof(params->wm_inputs));
|
|
|
|
|
|
|
|
if (push_const_size == 0) {
|
|
|
|
*state_offset = 0;
|
|
|
|
*state_size = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t push_const_offset;
|
|
|
|
uint32_t *push_const =
|
|
|
|
GFX_VERx10 >= 125 ?
|
|
|
|
blorp_alloc_general_state(batch, push_const_size, 64,
|
|
|
|
&push_const_offset) :
|
|
|
|
blorp_alloc_dynamic_state(batch, push_const_size, 64,
|
|
|
|
&push_const_offset);
|
|
|
|
memset(push_const, 0x0, push_const_size);
|
|
|
|
|
|
|
|
void *dst = push_const;
|
|
|
|
const void *src = (char *)¶ms->wm_inputs;
|
|
|
|
|
|
|
|
if (cs_prog_data->push.cross_thread.size > 0) {
|
|
|
|
memcpy(dst, src, cs_prog_data->push.cross_thread.size);
|
|
|
|
dst += cs_prog_data->push.cross_thread.size;
|
|
|
|
src += cs_prog_data->push.cross_thread.size;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(GFX_VERx10 < 125 || cs_prog_data->push.per_thread.size == 0);
|
|
|
|
#if GFX_VERx10 < 125
|
|
|
|
if (cs_prog_data->push.per_thread.size > 0) {
|
|
|
|
for (unsigned t = 0; t < threads; t++) {
|
|
|
|
memcpy(dst, src, (cs_prog_data->push.per_thread.dwords - 1) * 4);
|
|
|
|
|
|
|
|
uint32_t *subgroup_id = dst + cs_prog_data->push.per_thread.size - 4;
|
|
|
|
*subgroup_id = t;
|
|
|
|
|
|
|
|
dst += cs_prog_data->push.per_thread.size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
*state_offset = push_const_offset;
|
|
|
|
*state_size = push_const_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* GFX_VER >= 7 */
|
|
|
|
|
|
|
|
static void
|
|
|
|
blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|
|
|
{
|
|
|
|
assert(!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR));
|
2021-10-05 00:06:05 +01:00
|
|
|
assert(!(batch->flags & BLORP_BATCH_PREDICATE_ENABLE));
|
2018-10-26 20:52:44 +01:00
|
|
|
assert(params->hiz_op == ISL_AUX_OP_NONE);
|
|
|
|
|
2021-11-26 16:22:40 +00:00
|
|
|
blorp_measure_start(batch, params);
|
|
|
|
|
2018-10-26 20:52:44 +01:00
|
|
|
#if GFX_VER >= 7
|
|
|
|
|
|
|
|
const struct brw_cs_prog_data *cs_prog_data = params->cs_prog_data;
|
|
|
|
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
|
|
|
const struct brw_cs_dispatch_info dispatch =
|
|
|
|
brw_cs_get_dispatch_info(batch->blorp->compiler->devinfo, cs_prog_data,
|
|
|
|
NULL);
|
|
|
|
const struct intel_device_info *devinfo = batch->blorp->compiler->devinfo;
|
|
|
|
|
|
|
|
uint32_t group_x0 = params->x0 / cs_prog_data->local_size[0];
|
|
|
|
uint32_t group_y0 = params->y0 / cs_prog_data->local_size[1];
|
|
|
|
uint32_t group_z0 = params->dst.z_offset;
|
|
|
|
uint32_t group_x1 = DIV_ROUND_UP(params->x1, cs_prog_data->local_size[0]);
|
|
|
|
uint32_t group_y1 = DIV_ROUND_UP(params->y1, cs_prog_data->local_size[1]);
|
|
|
|
assert(params->num_layers >= 1);
|
|
|
|
uint32_t group_z1 = params->dst.z_offset + params->num_layers;
|
|
|
|
assert(cs_prog_data->local_size[2] == 1);
|
|
|
|
|
|
|
|
#endif /* GFX_VER >= 7 */
|
|
|
|
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(CFE_STATE), cfe) {
|
|
|
|
cfe.MaximumNumberofThreads =
|
2022-06-24 03:19:03 +01:00
|
|
|
devinfo->max_cs_threads * devinfo->subslice_total;
|
2018-10-26 20:52:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
assert(cs_prog_data->push.per_thread.regs == 0);
|
|
|
|
blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
|
|
|
|
cw.SIMDSize = dispatch.simd_size / 16;
|
|
|
|
cw.LocalXMaximum = cs_prog_data->local_size[0] - 1;
|
|
|
|
cw.LocalYMaximum = cs_prog_data->local_size[1] - 1;
|
|
|
|
cw.LocalZMaximum = cs_prog_data->local_size[2] - 1;
|
|
|
|
cw.ThreadGroupIDStartingX = group_x0;
|
|
|
|
cw.ThreadGroupIDStartingY = group_y0;
|
|
|
|
cw.ThreadGroupIDStartingZ = group_z0;
|
|
|
|
cw.ThreadGroupIDXDimension = group_x1;
|
|
|
|
cw.ThreadGroupIDYDimension = group_y1;
|
|
|
|
cw.ThreadGroupIDZDimension = group_z1;
|
|
|
|
cw.ExecutionMask = 0xffffffff;
|
2021-11-01 06:22:42 +00:00
|
|
|
cw.PostSync.MOCS = isl_mocs(batch->blorp->isl_dev, 0, false);
|
2018-10-26 20:52:44 +01:00
|
|
|
|
|
|
|
uint32_t surfaces_offset = blorp_setup_binding_table(batch, params);
|
|
|
|
|
|
|
|
uint32_t samplers_offset =
|
|
|
|
params->src.enabled ? blorp_emit_sampler_state(batch) : 0;
|
|
|
|
|
|
|
|
uint32_t push_const_offset;
|
|
|
|
unsigned push_const_size;
|
|
|
|
blorp_get_compute_push_const(batch, params, dispatch.threads,
|
|
|
|
&push_const_offset, &push_const_size);
|
|
|
|
cw.IndirectDataStartAddress = push_const_offset;
|
|
|
|
cw.IndirectDataLength = push_const_size;
|
|
|
|
|
|
|
|
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
|
|
|
|
.KernelStartPointer = params->cs_prog_kernel,
|
|
|
|
.SamplerStatePointer = samplers_offset,
|
|
|
|
.SamplerCount = params->src.enabled ? 1 : 0,
|
|
|
|
.BindingTableEntryCount = params->src.enabled ? 2 : 1,
|
|
|
|
.BindingTablePointer = surfaces_offset,
|
|
|
|
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
|
|
|
.SharedLocalMemorySize =
|
|
|
|
encode_slm_size(GFX_VER, prog_data->total_shared),
|
|
|
|
.NumberOfBarriers = cs_prog_data->uses_barrier,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
#elif GFX_VER >= 7
|
|
|
|
|
|
|
|
/* The MEDIA_VFE_STATE documentation for Gfx8+ says:
|
|
|
|
*
|
|
|
|
* "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
|
|
|
|
* the only bits that are changed are scoreboard related: Scoreboard
|
|
|
|
* Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For
|
|
|
|
* these scoreboard related states, a MEDIA_STATE_FLUSH is sufficient."
|
|
|
|
*
|
|
|
|
* Earlier generations say "MI_FLUSH" instead of "stalling PIPE_CONTROL",
|
|
|
|
* but MI_FLUSH isn't really a thing, so we assume they meant PIPE_CONTROL.
|
|
|
|
*/
|
|
|
|
blorp_emit(batch, GENX(PIPE_CONTROL), pc) {
|
|
|
|
pc.CommandStreamerStallEnable = true;
|
|
|
|
pc.StallAtPixelScoreboard = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(MEDIA_VFE_STATE), vfe) {
|
|
|
|
assert(prog_data->total_scratch == 0);
|
|
|
|
vfe.MaximumNumberofThreads =
|
|
|
|
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
|
|
|
vfe.NumberofURBEntries = GFX_VER >= 8 ? 2 : 0;
|
|
|
|
#if GFX_VER < 11
|
|
|
|
vfe.ResetGatewayTimer =
|
|
|
|
Resettingrelativetimerandlatchingtheglobaltimestamp;
|
|
|
|
#endif
|
|
|
|
#if GFX_VER < 9
|
|
|
|
vfe.BypassGatewayControl = BypassingOpenGatewayCloseGatewayprotocol;
|
|
|
|
#endif
|
|
|
|
#if GFX_VER == 7
|
2021-06-29 09:52:16 +01:00
|
|
|
vfe.GPGPUMode = true;
|
2018-10-26 20:52:44 +01:00
|
|
|
#endif
|
|
|
|
vfe.URBEntryAllocationSize = GFX_VER >= 8 ? 2 : 0;
|
|
|
|
|
|
|
|
const uint32_t vfe_curbe_allocation =
|
|
|
|
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
|
|
|
|
cs_prog_data->push.cross_thread.regs, 2);
|
|
|
|
vfe.CURBEAllocationSize = vfe_curbe_allocation;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t push_const_offset;
|
|
|
|
unsigned push_const_size;
|
|
|
|
blorp_get_compute_push_const(batch, params, dispatch.threads,
|
|
|
|
&push_const_offset, &push_const_size);
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
|
|
|
|
curbe.CURBETotalDataLength = push_const_size;
|
|
|
|
curbe.CURBEDataStartAddress = push_const_offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t surfaces_offset = blorp_setup_binding_table(batch, params);
|
|
|
|
|
|
|
|
uint32_t samplers_offset =
|
|
|
|
params->src.enabled ? blorp_emit_sampler_state(batch) : 0;
|
|
|
|
|
|
|
|
struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = {
|
|
|
|
.KernelStartPointer = params->cs_prog_kernel,
|
|
|
|
.SamplerStatePointer = samplers_offset,
|
|
|
|
.SamplerCount = params->src.enabled ? 1 : 0,
|
|
|
|
.BindingTableEntryCount = params->src.enabled ? 2 : 1,
|
|
|
|
.BindingTablePointer = surfaces_offset,
|
|
|
|
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
|
|
|
|
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
|
|
|
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
|
|
|
|
prog_data->total_shared),
|
|
|
|
.BarrierEnable = cs_prog_data->uses_barrier,
|
|
|
|
#if GFX_VER >= 8 || GEN_IS_HASWELL
|
|
|
|
.CrossThreadConstantDataReadLength =
|
|
|
|
cs_prog_data->push.cross_thread.regs,
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
uint32_t idd_offset;
|
|
|
|
uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
|
|
|
|
void *state = blorp_alloc_dynamic_state(batch, size, 64, &idd_offset);
|
|
|
|
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, state, &idd);
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), mid) {
|
|
|
|
mid.InterfaceDescriptorTotalLength = size;
|
|
|
|
mid.InterfaceDescriptorDataStartAddress = idd_offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(GPGPU_WALKER), ggw) {
|
|
|
|
ggw.SIMDSize = dispatch.simd_size / 16;
|
|
|
|
ggw.ThreadDepthCounterMaximum = 0;
|
|
|
|
ggw.ThreadHeightCounterMaximum = 0;
|
|
|
|
ggw.ThreadWidthCounterMaximum = dispatch.threads - 1;
|
|
|
|
ggw.ThreadGroupIDStartingX = group_x0;
|
|
|
|
ggw.ThreadGroupIDStartingY = group_y0;
|
|
|
|
#if GFX_VER >= 8
|
|
|
|
ggw.ThreadGroupIDStartingResumeZ = group_z0;
|
|
|
|
#else
|
|
|
|
ggw.ThreadGroupIDStartingZ = group_z0;
|
|
|
|
#endif
|
|
|
|
ggw.ThreadGroupIDXDimension = group_x1;
|
|
|
|
ggw.ThreadGroupIDYDimension = group_y1;
|
|
|
|
ggw.ThreadGroupIDZDimension = group_z1;
|
|
|
|
ggw.RightExecutionMask = dispatch.right_mask;
|
|
|
|
ggw.BottomExecutionMask = 0xffffffff;
|
|
|
|
}
|
|
|
|
|
|
|
|
#else /* GFX_VER >= 7 */
|
|
|
|
|
|
|
|
unreachable("Compute blorp is not supported on SNB and earlier");
|
|
|
|
|
|
|
|
#endif /* GFX_VER >= 7 */
|
2021-11-26 16:22:40 +00:00
|
|
|
|
|
|
|
blorp_measure_end(batch, params);
|
2018-10-26 20:52:44 +01:00
|
|
|
}
|
|
|
|
|
2021-10-26 03:02:42 +01:00
|
|
|
/* -----------------------------------------------------------------------
|
|
|
|
* -- BLORP on blitter
|
|
|
|
* -----------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "isl/isl_genX_helpers.h"
|
|
|
|
|
|
|
|
#if GFX_VER >= 12
|
|
|
|
static uint32_t
|
|
|
|
xy_bcb_tiling(const struct isl_surf *surf)
|
|
|
|
{
|
|
|
|
switch (surf->tiling) {
|
|
|
|
case ISL_TILING_LINEAR:
|
|
|
|
return XY_TILE_LINEAR;
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
case ISL_TILING_X:
|
|
|
|
return XY_TILE_X;
|
|
|
|
case ISL_TILING_4:
|
|
|
|
return XY_TILE_4;
|
|
|
|
case ISL_TILING_64:
|
|
|
|
return XY_TILE_64;
|
|
|
|
#else
|
|
|
|
case ISL_TILING_Y0:
|
|
|
|
return XY_TILE_Y;
|
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
unreachable("Invalid tiling for XY_BLOCK_COPY_BLT");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
xy_color_depth(const struct isl_format_layout *fmtl)
|
|
|
|
{
|
|
|
|
switch (fmtl->bpb) {
|
|
|
|
case 128: return XY_BPP_128_BIT;
|
|
|
|
case 96: return XY_BPP_96_BIT;
|
|
|
|
case 64: return XY_BPP_64_BIT;
|
|
|
|
case 32: return XY_BPP_32_BIT;
|
|
|
|
case 16: return XY_BPP_16_BIT;
|
|
|
|
case 8: return XY_BPP_8_BIT;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid bpp");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
static uint32_t
|
|
|
|
xy_bcb_surf_dim(const struct isl_surf *surf)
|
|
|
|
{
|
|
|
|
switch (surf->dim) {
|
|
|
|
case ISL_SURF_DIM_1D:
|
|
|
|
return XY_SURFTYPE_1D;
|
|
|
|
case ISL_SURF_DIM_2D:
|
|
|
|
return XY_SURFTYPE_2D;
|
|
|
|
case ISL_SURF_DIM_3D:
|
|
|
|
return XY_SURFTYPE_3D;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid dimensionality for XY_BLOCK_COPY_BLT");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
xy_bcb_surf_depth(const struct isl_surf *surf)
|
|
|
|
{
|
|
|
|
return surf->dim == ISL_SURF_DIM_3D ? surf->logical_level0_px.depth
|
|
|
|
: surf->logical_level0_px.array_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
xy_aux_mode(const struct brw_blorp_surface_info *info)
|
|
|
|
{
|
|
|
|
switch (info->aux_usage) {
|
|
|
|
case ISL_AUX_USAGE_CCS_E:
|
|
|
|
case ISL_AUX_USAGE_GFX12_CCS_E:
|
|
|
|
return XY_CCS_E;
|
|
|
|
case ISL_AUX_USAGE_NONE:
|
|
|
|
return XY_NONE;
|
|
|
|
default:
|
|
|
|
unreachable("Unsupported aux mode");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
UNUSED static void
|
|
|
|
blorp_xy_block_copy_blt(struct blorp_batch *batch,
|
|
|
|
const struct blorp_params *params)
|
|
|
|
{
|
|
|
|
#if GFX_VER < 12
|
|
|
|
unreachable("Blitter is only suppotred on Gfx12+");
|
|
|
|
#else
|
|
|
|
UNUSED const struct isl_device *isl_dev = batch->blorp->isl_dev;
|
|
|
|
|
|
|
|
assert(batch->flags & BLORP_BATCH_USE_BLITTER);
|
|
|
|
assert(!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR));
|
|
|
|
assert(!(batch->flags & BLORP_BATCH_PREDICATE_ENABLE));
|
|
|
|
assert(params->hiz_op == ISL_AUX_OP_NONE);
|
|
|
|
|
|
|
|
assert(params->num_layers == 1);
|
|
|
|
assert(params->dst.view.levels == 1);
|
|
|
|
assert(params->src.view.levels == 1);
|
|
|
|
|
|
|
|
#if GFX_VERx10 < 125
|
|
|
|
assert(params->dst.view.base_array_layer == 0);
|
|
|
|
assert(params->dst.z_offset == 0);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
unsigned dst_x0 = params->x0;
|
|
|
|
unsigned dst_x1 = params->x1;
|
|
|
|
unsigned src_x0 =
|
|
|
|
dst_x0 - params->wm_inputs.coord_transform[0].offset;
|
|
|
|
ASSERTED unsigned src_x1 =
|
|
|
|
dst_x1 - params->wm_inputs.coord_transform[0].offset;
|
|
|
|
unsigned dst_y0 = params->y0;
|
|
|
|
unsigned dst_y1 = params->y1;
|
|
|
|
unsigned src_y0 =
|
|
|
|
dst_y0 - params->wm_inputs.coord_transform[1].offset;
|
|
|
|
ASSERTED unsigned src_y1 =
|
|
|
|
dst_y1 - params->wm_inputs.coord_transform[1].offset;
|
|
|
|
|
|
|
|
assert(src_x1 - src_x0 == dst_x1 - dst_x0);
|
|
|
|
assert(src_y1 - src_y0 == dst_y1 - dst_y0);
|
|
|
|
|
|
|
|
const struct isl_surf *src_surf = ¶ms->src.surf;
|
|
|
|
const struct isl_surf *dst_surf = ¶ms->dst.surf;
|
|
|
|
|
|
|
|
const struct isl_format_layout *fmtl =
|
|
|
|
isl_format_get_layout(params->dst.view.format);
|
|
|
|
|
|
|
|
if (fmtl->bpb == 96) {
|
|
|
|
assert(src_surf->tiling == ISL_TILING_LINEAR &&
|
|
|
|
dst_surf->tiling == ISL_TILING_LINEAR);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(src_surf->samples == 1);
|
|
|
|
assert(dst_surf->samples == 1);
|
|
|
|
|
|
|
|
unsigned dst_pitch_unit = dst_surf->tiling == ISL_TILING_LINEAR ? 1 : 4;
|
|
|
|
unsigned src_pitch_unit = src_surf->tiling == ISL_TILING_LINEAR ? 1 : 4;
|
|
|
|
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
struct isl_extent3d src_align = isl_get_image_alignment(src_surf);
|
|
|
|
struct isl_extent3d dst_align = isl_get_image_alignment(dst_surf);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
blorp_emit(batch, GENX(XY_BLOCK_COPY_BLT), blt) {
|
|
|
|
blt.ColorDepth = xy_color_depth(fmtl);
|
|
|
|
|
|
|
|
blt.DestinationPitch = (dst_surf->row_pitch_B / dst_pitch_unit) - 1;
|
|
|
|
blt.DestinationMOCS = params->dst.addr.mocs;
|
|
|
|
blt.DestinationTiling = xy_bcb_tiling(dst_surf);
|
|
|
|
blt.DestinationX1 = dst_x0;
|
|
|
|
blt.DestinationY1 = dst_y0;
|
|
|
|
blt.DestinationX2 = dst_x1;
|
|
|
|
blt.DestinationY2 = dst_y1;
|
|
|
|
blt.DestinationBaseAddress = params->dst.addr;
|
|
|
|
blt.DestinationXOffset = params->dst.tile_x_sa;
|
|
|
|
blt.DestinationYOffset = params->dst.tile_y_sa;
|
|
|
|
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
blt.DestinationSurfaceType = xy_bcb_surf_dim(dst_surf);
|
|
|
|
blt.DestinationSurfaceWidth = dst_surf->logical_level0_px.w - 1;
|
|
|
|
blt.DestinationSurfaceHeight = dst_surf->logical_level0_px.h - 1;
|
|
|
|
blt.DestinationSurfaceDepth = xy_bcb_surf_depth(dst_surf) - 1;
|
|
|
|
blt.DestinationArrayIndex =
|
|
|
|
params->dst.view.base_array_layer + params->dst.z_offset;
|
|
|
|
blt.DestinationSurfaceQPitch = isl_get_qpitch(dst_surf) >> 2;
|
|
|
|
blt.DestinationLOD = params->dst.view.base_level;
|
|
|
|
blt.DestinationMipTailStartLOD = 15;
|
|
|
|
blt.DestinationHorizontalAlign = isl_encode_halign(dst_align.width);
|
|
|
|
blt.DestinationVerticalAlign = isl_encode_valign(dst_align.height);
|
|
|
|
blt.DestinationDepthStencilResource = false;
|
|
|
|
blt.DestinationTargetMemory =
|
|
|
|
params->dst.addr.local_hint ? XY_MEM_LOCAL : XY_MEM_SYSTEM;
|
|
|
|
|
|
|
|
if (params->dst.aux_usage != ISL_AUX_USAGE_NONE) {
|
|
|
|
blt.DestinationAuxiliarySurfaceMode = xy_aux_mode(¶ms->dst);
|
|
|
|
blt.DestinationCompressionEnable = true;
|
|
|
|
blt.DestinationCompressionFormat =
|
|
|
|
isl_get_render_compression_format(dst_surf->format);
|
|
|
|
blt.DestinationClearValueEnable = !!params->dst.clear_color_addr.buffer;
|
|
|
|
blt.DestinationClearAddress = params->dst.clear_color_addr;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
blt.SourceX1 = src_x0;
|
|
|
|
blt.SourceY1 = src_y0;
|
|
|
|
blt.SourcePitch = (src_surf->row_pitch_B / src_pitch_unit) - 1;
|
|
|
|
blt.SourceMOCS = params->src.addr.mocs;
|
|
|
|
blt.SourceTiling = xy_bcb_tiling(src_surf);
|
|
|
|
blt.SourceBaseAddress = params->src.addr;
|
|
|
|
blt.SourceXOffset = params->src.tile_x_sa;
|
|
|
|
blt.SourceYOffset = params->src.tile_y_sa;
|
|
|
|
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
blt.SourceSurfaceType = xy_bcb_surf_dim(src_surf);
|
|
|
|
blt.SourceSurfaceWidth = src_surf->logical_level0_px.w - 1;
|
|
|
|
blt.SourceSurfaceHeight = src_surf->logical_level0_px.h - 1;
|
|
|
|
blt.SourceSurfaceDepth = xy_bcb_surf_depth(src_surf) - 1;
|
|
|
|
blt.SourceArrayIndex =
|
|
|
|
params->src.view.base_array_layer + params->src.z_offset;
|
|
|
|
blt.SourceSurfaceQPitch = isl_get_qpitch(src_surf) >> 2;
|
|
|
|
blt.SourceLOD = params->src.view.base_level;
|
|
|
|
blt.SourceMipTailStartLOD = 15;
|
|
|
|
blt.SourceHorizontalAlign = isl_encode_halign(src_align.width);
|
|
|
|
blt.SourceVerticalAlign = isl_encode_valign(src_align.height);
|
|
|
|
blt.SourceDepthStencilResource = false;
|
|
|
|
blt.SourceTargetMemory =
|
|
|
|
params->src.addr.local_hint ? XY_MEM_LOCAL : XY_MEM_SYSTEM;
|
|
|
|
|
|
|
|
if (params->src.aux_usage != ISL_AUX_USAGE_NONE) {
|
|
|
|
blt.SourceAuxiliarySurfaceMode = xy_aux_mode(¶ms->src);
|
|
|
|
blt.SourceCompressionEnable = true;
|
|
|
|
blt.SourceCompressionFormat =
|
|
|
|
isl_get_render_compression_format(src_surf->format);
|
|
|
|
blt.SourceClearValueEnable = !!params->src.clear_color_addr.buffer;
|
|
|
|
blt.SourceClearAddress = params->src.clear_color_addr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* XeHP needs special MOCS values for the blitter */
|
|
|
|
blt.DestinationMOCS = isl_dev->mocs.blitter_dst;
|
|
|
|
blt.SourceMOCS = isl_dev->mocs.blitter_src;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
2018-10-26 20:52:44 +01:00
|
|
|
|
2022-02-24 18:16:27 +00:00
|
|
|
static void
|
|
|
|
blorp_exec_blitter(struct blorp_batch *batch,
|
|
|
|
const struct blorp_params *params)
|
|
|
|
{
|
|
|
|
blorp_measure_start(batch, params);
|
|
|
|
|
|
|
|
/* Someday, if we implement clears on the blit enginer, we can
|
|
|
|
* use params->src.enabled to determine which case we're in.
|
|
|
|
*/
|
|
|
|
assert(params->src.enabled);
|
|
|
|
blorp_xy_block_copy_blt(batch, params);
|
|
|
|
|
|
|
|
blorp_measure_end(batch, params);
|
|
|
|
}
|
|
|
|
|
2018-10-26 20:52:44 +01:00
|
|
|
/**
|
|
|
|
* \brief Execute a blit or render pass operation.
|
|
|
|
*
|
|
|
|
* To execute the operation, this function manually constructs and emits a
|
|
|
|
* batch to draw a rectangle primitive. The batchbuffer is flushed before
|
|
|
|
* constructing and after emitting the batch.
|
|
|
|
*
|
|
|
|
* This function alters no GL state.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)
|
|
|
|
{
|
2021-10-26 03:02:42 +01:00
|
|
|
if (batch->flags & BLORP_BATCH_USE_BLITTER) {
|
2022-02-24 18:16:27 +00:00
|
|
|
blorp_exec_blitter(batch, params);
|
2021-10-26 03:02:42 +01:00
|
|
|
} else if (batch->flags & BLORP_BATCH_USE_COMPUTE) {
|
2018-10-26 20:52:44 +01:00
|
|
|
blorp_exec_compute(batch, params);
|
2021-10-26 03:02:42 +01:00
|
|
|
} else {
|
2018-10-26 20:52:44 +01:00
|
|
|
blorp_exec_3d(batch, params);
|
2021-10-26 03:02:42 +01:00
|
|
|
}
|
2018-10-26 20:52:44 +01:00
|
|
|
}
|
|
|
|
|
2016-10-11 18:26:21 +01:00
|
|
|
#endif /* BLORP_GENX_EXEC_H */
|