mirror of https://gitlab.freedesktop.org/mesa/mesa
3701 lines
134 KiB
C
3701 lines
134 KiB
C
/*
|
||
* Copyright © 2022 Collabora Ltd. and Red Hat Inc.
|
||
* SPDX-License-Identifier: MIT
|
||
*/
|
||
#include "nvk_buffer.h"
|
||
#include "nvk_entrypoints.h"
|
||
#include "nvk_cmd_buffer.h"
|
||
#include "nvk_device.h"
|
||
#include "nvk_format.h"
|
||
#include "nvk_image.h"
|
||
#include "nvk_image_view.h"
|
||
#include "nvk_mme.h"
|
||
#include "nvk_physical_device.h"
|
||
#include "nvk_shader.h"
|
||
|
||
#include "util/bitpack_helpers.h"
|
||
#include "vk_format.h"
|
||
#include "vk_render_pass.h"
|
||
#include "vk_standard_sample_locations.h"
|
||
|
||
#include "nouveau_context.h"
|
||
|
||
#include "nv_push_cl902d.h"
|
||
#include "nv_push_cl9097.h"
|
||
#include "nv_push_cl90b5.h"
|
||
#include "nv_push_cl90c0.h"
|
||
#include "nv_push_cla097.h"
|
||
#include "nv_push_clb097.h"
|
||
#include "nv_push_clb197.h"
|
||
#include "nv_push_clc397.h"
|
||
#include "nv_push_clc597.h"
|
||
#include "drf.h"
|
||
|
||
static inline uint16_t
|
||
nvk_cmd_buffer_3d_cls(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||
struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||
return pdev->info.cls_eng3d;
|
||
}
|
||
|
||
static void
|
||
mme_set_priv_reg(struct mme_builder *b,
|
||
struct mme_value value,
|
||
struct mme_value mask,
|
||
struct mme_value reg)
|
||
{
|
||
mme_mthd(b, NV9097_WAIT_FOR_IDLE);
|
||
mme_emit(b, mme_zero());
|
||
|
||
mme_mthd(b, NV9097_SET_MME_SHADOW_SCRATCH(0));
|
||
mme_emit(b, mme_zero());
|
||
mme_emit(b, value);
|
||
mme_emit(b, mask);
|
||
|
||
mme_mthd(b, NV9097_SET_FALCON04);
|
||
mme_emit(b, reg);
|
||
|
||
struct mme_value loop_cond = mme_mov(b, mme_zero());
|
||
mme_while(b, ine, loop_cond, mme_imm(1)) {
|
||
mme_state_to(b, loop_cond, NV9097_SET_MME_SHADOW_SCRATCH(0));
|
||
mme_mthd(b, NV9097_NO_OPERATION);
|
||
mme_emit(b, mme_zero());
|
||
};
|
||
}
|
||
|
||
void
|
||
nvk_mme_set_priv_reg(struct mme_builder *b)
|
||
{
|
||
struct mme_value value = mme_load(b);
|
||
struct mme_value mask = mme_load(b);
|
||
struct mme_value reg = mme_load(b);
|
||
|
||
mme_set_priv_reg(b, value, mask, reg);
|
||
}
|
||
|
||
void
|
||
nvk_mme_set_conservative_raster_state(struct mme_builder *b)
|
||
{
|
||
struct mme_value new_state = mme_load(b);
|
||
struct mme_value old_state =
|
||
nvk_mme_load_scratch(b, CONSERVATIVE_RASTER_STATE);
|
||
|
||
mme_if(b, ine, new_state, old_state) {
|
||
nvk_mme_store_scratch(b, CONSERVATIVE_RASTER_STATE, new_state);
|
||
mme_set_priv_reg(b, new_state, mme_imm(BITFIELD_RANGE(23, 2)),
|
||
mme_imm(0x418800));
|
||
}
|
||
}
|
||
|
||
VkResult
|
||
nvk_push_draw_state_init(struct nvk_device *dev, struct nv_push *p)
|
||
{
|
||
struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||
|
||
/* 3D state */
|
||
P_MTHD(p, NV9097, SET_OBJECT);
|
||
P_NV9097_SET_OBJECT(p, {
|
||
.class_id = pdev->info.cls_eng3d,
|
||
.engine_id = 0,
|
||
});
|
||
|
||
for (uint32_t mme = 0, mme_pos = 0; mme < NVK_MME_COUNT; mme++) {
|
||
size_t size;
|
||
uint32_t *dw = nvk_build_mme(&pdev->info, mme, &size);
|
||
if (dw == NULL)
|
||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||
|
||
assert(size % sizeof(uint32_t) == 0);
|
||
const uint32_t num_dw = size / sizeof(uint32_t);
|
||
|
||
P_MTHD(p, NV9097, LOAD_MME_START_ADDRESS_RAM_POINTER);
|
||
P_NV9097_LOAD_MME_START_ADDRESS_RAM_POINTER(p, mme);
|
||
P_NV9097_LOAD_MME_START_ADDRESS_RAM(p, mme_pos);
|
||
|
||
P_1INC(p, NV9097, LOAD_MME_INSTRUCTION_RAM_POINTER);
|
||
P_NV9097_LOAD_MME_INSTRUCTION_RAM_POINTER(p, mme_pos);
|
||
P_INLINE_ARRAY(p, dw, num_dw);
|
||
|
||
mme_pos += num_dw;
|
||
|
||
free(dw);
|
||
}
|
||
|
||
if (pdev->info.cls_eng3d >= TURING_A)
|
||
P_IMMD(p, NVC597, SET_MME_DATA_FIFO_CONFIG, FIFO_SIZE_SIZE_4KB);
|
||
|
||
/* Enable FP helper invocation memory loads
|
||
*
|
||
* For generations with firmware support for our `SET_PRIV_REG` mme method
|
||
* we simply use that. On older generations we'll let the kernel do it.
|
||
* Starting with GSP we have to do it via the firmware anyway.
|
||
*
|
||
* This clears bit 3 of gr_gpcs_tpcs_sm_disp_ctrl
|
||
*
|
||
* Without it,
|
||
* dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_bvec2_fragment will
|
||
* occasionally fail.
|
||
*/
|
||
if (pdev->info.cls_eng3d >= MAXWELL_B) {
|
||
unsigned reg = pdev->info.cls_eng3d >= VOLTA_A ? 0x419ba4 : 0x419f78;
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SET_PRIV_REG));
|
||
P_INLINE_DATA(p, 0);
|
||
P_INLINE_DATA(p, BITFIELD_BIT(3));
|
||
P_INLINE_DATA(p, reg);
|
||
}
|
||
|
||
/* Disable Out Of Range Address exceptions
|
||
*
|
||
* From the SPH documentation:
|
||
*
|
||
* "The SPH fields StoreReqStart and StoreReqEnd set a range of
|
||
* attributes whose corresponding Odmap values of ST or ST_LAST are
|
||
* treated as ST_REQ. Normally, for an attribute whose Omap bit is TRUE
|
||
* and Odmap value is ST, when the shader writes data to this output, it
|
||
* can not count on being able to read it back, since the next
|
||
* downstream shader might have its Imap bit FALSE, thereby causing the
|
||
* Bmap bit to be FALSE. By including a ST type of attribute in the
|
||
* range of StoreReqStart and StoreReqEnd, the attribute’s Odmap value
|
||
* is treated as ST_REQ, so an Omap bit being TRUE causes the Bmap bit
|
||
* to be TRUE. This guarantees the shader program can output the value
|
||
* and then read it back later. This will save register space."
|
||
*
|
||
* It's unclear exactly what's going on but this seems to imply that the
|
||
* hardware actually ANDs the output mask of one shader stage together with
|
||
* the input mask of the subsequent shader stage to determine which values
|
||
* are actually used.
|
||
*
|
||
* In the case when we have an empty fragment shader, it seems the hardware
|
||
* doesn't allocate any output memory for final geometry stage at all and
|
||
* so any writes to outputs from the final shader stage generates an Out Of
|
||
* Range Address exception. We could fix this by eliminating unused
|
||
* outputs via cross-stage linking but that won't work in the case of
|
||
* VK_EXT_shader_object and VK_EXT_graphics_pipeline_library fast-link.
|
||
* Instead, the easiest solution is to just disable the exception.
|
||
*
|
||
* NOTE (Faith):
|
||
*
|
||
* This above analysis is 100% conjecture on my part based on a creative
|
||
* reading of the SPH docs and what I saw when trying to run certain
|
||
* OpenGL CTS tests on NVK + Zink. Without access to NVIDIA HW
|
||
* engineers, have no way of verifying this analysis.
|
||
*
|
||
* The CTS test in question is:
|
||
*
|
||
* KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_tessLevel
|
||
*
|
||
* This should also prevent any issues with array overruns on I/O arrays.
|
||
* Before, they would get an exception and kill the context whereas now
|
||
* they should gently get ignored.
|
||
*
|
||
* This clears bit 14 of gr_gpcs_tpcs_sms_hww_warp_esr_report_mask
|
||
*/
|
||
if (pdev->info.cls_eng3d >= MAXWELL_B) {
|
||
unsigned reg = pdev->info.cls_eng3d >= VOLTA_A ? 0x419ea8 : 0x419e44;
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SET_PRIV_REG));
|
||
P_INLINE_DATA(p, 0);
|
||
P_INLINE_DATA(p, BITFIELD_BIT(14));
|
||
P_INLINE_DATA(p, reg);
|
||
}
|
||
|
||
/* Set CONSERVATIVE_RASTER_STATE to an invalid value, to ensure the
|
||
* hardware reg is always set the first time conservative rasterization
|
||
* is enabled */
|
||
P_IMMD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CONSERVATIVE_RASTER_STATE),
|
||
~0);
|
||
|
||
P_IMMD(p, NV9097, SET_RENDER_ENABLE_C, MODE_TRUE);
|
||
|
||
P_IMMD(p, NV9097, SET_Z_COMPRESSION, ENABLE_TRUE);
|
||
P_MTHD(p, NV9097, SET_COLOR_COMPRESSION(0));
|
||
for (unsigned i = 0; i < 8; i++)
|
||
P_NV9097_SET_COLOR_COMPRESSION(p, i, ENABLE_TRUE);
|
||
|
||
P_IMMD(p, NV9097, SET_CT_SELECT, { .target_count = 1 });
|
||
|
||
// P_MTHD(cmd->push, NVC0_3D, CSAA_ENABLE);
|
||
// P_INLINE_DATA(cmd->push, 0);
|
||
|
||
P_IMMD(p, NV9097, SET_ALIASED_LINE_WIDTH_ENABLE, V_TRUE);
|
||
|
||
P_IMMD(p, NV9097, SET_DA_PRIMITIVE_RESTART_VERTEX_ARRAY, ENABLE_FALSE);
|
||
|
||
P_IMMD(p, NV9097, SET_BLEND_SEPARATE_FOR_ALPHA, ENABLE_TRUE);
|
||
P_IMMD(p, NV9097, SET_SINGLE_CT_WRITE_CONTROL, ENABLE_TRUE);
|
||
P_IMMD(p, NV9097, SET_SINGLE_ROP_CONTROL, ENABLE_FALSE);
|
||
P_IMMD(p, NV9097, SET_TWO_SIDED_STENCIL_TEST, ENABLE_TRUE);
|
||
|
||
P_IMMD(p, NV9097, SET_SHADE_MODE, V_OGL_SMOOTH);
|
||
|
||
P_IMMD(p, NV9097, SET_API_VISIBLE_CALL_LIMIT, V__128);
|
||
|
||
P_IMMD(p, NV9097, SET_ZCULL_STATS, ENABLE_TRUE);
|
||
|
||
P_IMMD(p, NV9097, SET_L1_CONFIGURATION,
|
||
DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
|
||
|
||
P_IMMD(p, NV9097, SET_REDUCE_COLOR_THRESHOLDS_ENABLE, V_FALSE);
|
||
P_IMMD(p, NV9097, SET_REDUCE_COLOR_THRESHOLDS_UNORM8, {
|
||
.all_covered_all_hit_once = 0xff,
|
||
});
|
||
P_MTHD(p, NV9097, SET_REDUCE_COLOR_THRESHOLDS_UNORM10);
|
||
P_NV9097_SET_REDUCE_COLOR_THRESHOLDS_UNORM10(p, {
|
||
.all_covered_all_hit_once = 0xff,
|
||
});
|
||
P_NV9097_SET_REDUCE_COLOR_THRESHOLDS_UNORM16(p, {
|
||
.all_covered_all_hit_once = 0xff,
|
||
});
|
||
P_NV9097_SET_REDUCE_COLOR_THRESHOLDS_FP11(p, {
|
||
.all_covered_all_hit_once = 0x3f,
|
||
});
|
||
P_NV9097_SET_REDUCE_COLOR_THRESHOLDS_FP16(p, {
|
||
.all_covered_all_hit_once = 0xff,
|
||
});
|
||
P_NV9097_SET_REDUCE_COLOR_THRESHOLDS_SRGB8(p, {
|
||
.all_covered_all_hit_once = 0xff,
|
||
});
|
||
|
||
if (pdev->info.cls_eng3d < VOLTA_A)
|
||
P_IMMD(p, NV9097, SET_ALPHA_FRACTION, 0x3f);
|
||
|
||
P_IMMD(p, NV9097, CHECK_SPH_VERSION, {
|
||
.current = 3,
|
||
.oldest_supported = 3,
|
||
});
|
||
P_IMMD(p, NV9097, CHECK_AAM_VERSION, {
|
||
.current = 2,
|
||
.oldest_supported = 2,
|
||
});
|
||
|
||
if (pdev->info.cls_eng3d < MAXWELL_A)
|
||
P_IMMD(p, NV9097, SET_SHADER_SCHEDULING, MODE_OLDEST_THREAD_FIRST);
|
||
|
||
P_IMMD(p, NV9097, SET_L2_CACHE_CONTROL_FOR_ROP_PREFETCH_READ_REQUESTS,
|
||
POLICY_EVICT_NORMAL);
|
||
P_IMMD(p, NV9097, SET_L2_CACHE_CONTROL_FOR_ROP_NONINTERLOCKED_READ_REQUESTS,
|
||
POLICY_EVICT_NORMAL);
|
||
P_IMMD(p, NV9097, SET_L2_CACHE_CONTROL_FOR_ROP_INTERLOCKED_READ_REQUESTS,
|
||
POLICY_EVICT_NORMAL);
|
||
P_IMMD(p, NV9097, SET_L2_CACHE_CONTROL_FOR_ROP_NONINTERLOCKED_WRITE_REQUESTS,
|
||
POLICY_EVICT_NORMAL);
|
||
P_IMMD(p, NV9097, SET_L2_CACHE_CONTROL_FOR_ROP_INTERLOCKED_WRITE_REQUESTS,
|
||
POLICY_EVICT_NORMAL);
|
||
|
||
P_IMMD(p, NV9097, SET_BLEND_PER_FORMAT_ENABLE, SNORM8_UNORM16_SNORM16_TRUE);
|
||
|
||
P_IMMD(p, NV9097, SET_ATTRIBUTE_DEFAULT, {
|
||
.color_front_diffuse = COLOR_FRONT_DIFFUSE_VECTOR_0001,
|
||
.color_front_specular = COLOR_FRONT_SPECULAR_VECTOR_0001,
|
||
.generic_vector = GENERIC_VECTOR_VECTOR_0001,
|
||
.fixed_fnc_texture = FIXED_FNC_TEXTURE_VECTOR_0001,
|
||
.dx9_color0 = DX9_COLOR0_VECTOR_0001,
|
||
.dx9_color1_to_color15 = DX9_COLOR1_TO_COLOR15_VECTOR_0000,
|
||
});
|
||
|
||
P_IMMD(p, NV9097, SET_DA_OUTPUT, VERTEX_ID_USES_ARRAY_START_TRUE);
|
||
|
||
P_IMMD(p, NV9097, SET_RENDER_ENABLE_CONTROL,
|
||
CONDITIONAL_LOAD_CONSTANT_BUFFER_FALSE);
|
||
|
||
P_IMMD(p, NV9097, SET_PS_OUTPUT_SAMPLE_MASK_USAGE, {
|
||
.enable = ENABLE_TRUE,
|
||
.qualify_by_anti_alias_enable = QUALIFY_BY_ANTI_ALIAS_ENABLE_ENABLE,
|
||
});
|
||
|
||
if (pdev->info.cls_eng3d < VOLTA_A)
|
||
P_IMMD(p, NV9097, SET_PRIM_CIRCULAR_BUFFER_THROTTLE, 0x3fffff);
|
||
|
||
P_IMMD(p, NV9097, SET_BLEND_OPT_CONTROL, ALLOW_FLOAT_PIXEL_KILLS_TRUE);
|
||
P_IMMD(p, NV9097, SET_BLEND_FLOAT_OPTION, ZERO_TIMES_ANYTHING_IS_ZERO_TRUE);
|
||
P_IMMD(p, NV9097, SET_BLEND_STATE_PER_TARGET, ENABLE_TRUE);
|
||
|
||
if (pdev->info.cls_eng3d < MAXWELL_A)
|
||
P_IMMD(p, NV9097, SET_MAX_TI_WARPS_PER_BATCH, 3);
|
||
|
||
if (pdev->info.cls_eng3d >= KEPLER_A &&
|
||
pdev->info.cls_eng3d < MAXWELL_A) {
|
||
P_IMMD(p, NVA097, SET_TEXTURE_INSTRUCTION_OPERAND,
|
||
ORDERING_KEPLER_ORDER);
|
||
}
|
||
|
||
P_IMMD(p, NV9097, SET_ALPHA_TEST, ENABLE_FALSE);
|
||
P_IMMD(p, NV9097, SET_TWO_SIDED_LIGHT, ENABLE_FALSE);
|
||
P_IMMD(p, NV9097, SET_COLOR_CLAMP, ENABLE_TRUE);
|
||
P_IMMD(p, NV9097, SET_PS_SATURATE, {
|
||
.output0 = OUTPUT0_FALSE,
|
||
.output1 = OUTPUT1_FALSE,
|
||
.output2 = OUTPUT2_FALSE,
|
||
.output3 = OUTPUT3_FALSE,
|
||
.output4 = OUTPUT4_FALSE,
|
||
.output5 = OUTPUT5_FALSE,
|
||
.output6 = OUTPUT6_FALSE,
|
||
.output7 = OUTPUT7_FALSE,
|
||
});
|
||
|
||
P_IMMD(p, NV9097, SET_POINT_SIZE, fui(1.0));
|
||
P_IMMD(p, NV9097, SET_ATTRIBUTE_POINT_SIZE, { .enable = ENABLE_TRUE });
|
||
|
||
/* From vulkan spec's point rasterization:
|
||
* "Point rasterization produces a fragment for each fragment area group of
|
||
* framebuffer pixels with one or more sample points that intersect a region
|
||
* centered at the point’s (xf,yf).
|
||
* This region is a square with side equal to the current point size.
|
||
* ... (xf,yf) is the exact, unrounded framebuffer coordinate of the vertex
|
||
* for the point"
|
||
*
|
||
* So it seems we always need square points with PointCoords like OpenGL
|
||
* point sprites.
|
||
*
|
||
* From OpenGL compatibility spec:
|
||
* Basic point rasterization:
|
||
* "If point sprites are enabled, then point rasterization produces a
|
||
* fragment for each framebuffer pixel whose center lies inside a square
|
||
* centered at the point’s (xw, yw), with side length equal to the current
|
||
* point size.
|
||
* ... and xw and yw are the exact, unrounded window coordinates of the
|
||
* vertex for the point"
|
||
*
|
||
* And Point multisample rasterization:
|
||
* "This region is a circle having diameter equal to the current point width
|
||
* if POINT_SPRITE is disabled, or a square with side equal to the current
|
||
* point width if POINT_SPRITE is enabled."
|
||
*/
|
||
P_IMMD(p, NV9097, SET_POINT_SPRITE, ENABLE_TRUE);
|
||
P_IMMD(p, NV9097, SET_POINT_SPRITE_SELECT, {
|
||
.rmode = RMODE_ZERO,
|
||
.origin = ORIGIN_TOP,
|
||
.texture0 = TEXTURE0_PASSTHROUGH,
|
||
.texture1 = TEXTURE1_PASSTHROUGH,
|
||
.texture2 = TEXTURE2_PASSTHROUGH,
|
||
.texture3 = TEXTURE3_PASSTHROUGH,
|
||
.texture4 = TEXTURE4_PASSTHROUGH,
|
||
.texture5 = TEXTURE5_PASSTHROUGH,
|
||
.texture6 = TEXTURE6_PASSTHROUGH,
|
||
.texture7 = TEXTURE7_PASSTHROUGH,
|
||
.texture8 = TEXTURE8_PASSTHROUGH,
|
||
.texture9 = TEXTURE9_PASSTHROUGH,
|
||
});
|
||
|
||
/* OpenGL's GL_POINT_SMOOTH */
|
||
P_IMMD(p, NV9097, SET_ANTI_ALIASED_POINT, ENABLE_FALSE);
|
||
|
||
if (pdev->info.cls_eng3d >= MAXWELL_B)
|
||
P_IMMD(p, NVB197, SET_FILL_VIA_TRIANGLE, MODE_DISABLED);
|
||
|
||
P_IMMD(p, NV9097, SET_POLY_SMOOTH, ENABLE_FALSE);
|
||
|
||
P_IMMD(p, NV9097, SET_VIEWPORT_PIXEL, CENTER_AT_HALF_INTEGERS);
|
||
|
||
P_IMMD(p, NV9097, SET_HYBRID_ANTI_ALIAS_CONTROL, {
|
||
.passes = 1,
|
||
.centroid = CENTROID_PER_FRAGMENT,
|
||
});
|
||
|
||
/* Enable multisample rasterization even for one sample rasterization,
|
||
* this way we get strict lines and rectangular line support.
|
||
* More info at: DirectX rasterization rules
|
||
*/
|
||
P_IMMD(p, NV9097, SET_ANTI_ALIAS_ENABLE, V_TRUE);
|
||
|
||
if (pdev->info.cls_eng3d >= MAXWELL_B) {
|
||
P_IMMD(p, NVB197, SET_OFFSET_RENDER_TARGET_INDEX,
|
||
BY_VIEWPORT_INDEX_FALSE);
|
||
}
|
||
|
||
/* TODO: Vertex runout */
|
||
|
||
P_IMMD(p, NV9097, SET_WINDOW_ORIGIN, {
|
||
.mode = MODE_UPPER_LEFT,
|
||
.flip_y = FLIP_Y_FALSE,
|
||
});
|
||
|
||
P_MTHD(p, NV9097, SET_WINDOW_OFFSET_X);
|
||
P_NV9097_SET_WINDOW_OFFSET_X(p, 0);
|
||
P_NV9097_SET_WINDOW_OFFSET_Y(p, 0);
|
||
|
||
P_IMMD(p, NV9097, SET_ACTIVE_ZCULL_REGION, 0x3f);
|
||
P_IMMD(p, NV9097, SET_WINDOW_CLIP_ENABLE, V_FALSE);
|
||
P_IMMD(p, NV9097, SET_CLIP_ID_TEST, ENABLE_FALSE);
|
||
|
||
// P_IMMD(p, NV9097, X_X_X_SET_CLEAR_CONTROL, {
|
||
// .respect_stencil_mask = RESPECT_STENCIL_MASK_FALSE,
|
||
// .use_clear_rect = USE_CLEAR_RECT_FALSE,
|
||
// });
|
||
|
||
P_IMMD(p, NV9097, SET_VIEWPORT_SCALE_OFFSET, ENABLE_TRUE);
|
||
|
||
P_IMMD(p, NV9097, SET_VIEWPORT_CLIP_CONTROL, {
|
||
.min_z_zero_max_z_one = MIN_Z_ZERO_MAX_Z_ONE_FALSE,
|
||
.pixel_min_z = PIXEL_MIN_Z_CLAMP,
|
||
.pixel_max_z = PIXEL_MAX_Z_CLAMP,
|
||
.geometry_guardband = GEOMETRY_GUARDBAND_SCALE_256,
|
||
.line_point_cull_guardband = LINE_POINT_CULL_GUARDBAND_SCALE_256,
|
||
.geometry_clip = GEOMETRY_CLIP_WZERO_CLIP,
|
||
.geometry_guardband_z = GEOMETRY_GUARDBAND_Z_SAME_AS_XY_GUARDBAND,
|
||
});
|
||
|
||
for (unsigned i = 0; i < 16; i++)
|
||
P_IMMD(p, NV9097, SET_SCISSOR_ENABLE(i), V_FALSE);
|
||
|
||
P_IMMD(p, NV9097, SET_CT_MRT_ENABLE, V_TRUE);
|
||
|
||
if (pdev->info.cls_eng3d < VOLTA_A) {
|
||
uint64_t shader_base_addr =
|
||
nvk_heap_contiguous_base_address(&dev->shader_heap);
|
||
|
||
P_MTHD(p, NV9097, SET_PROGRAM_REGION_A);
|
||
P_NV9097_SET_PROGRAM_REGION_A(p, shader_base_addr >> 32);
|
||
P_NV9097_SET_PROGRAM_REGION_B(p, shader_base_addr);
|
||
}
|
||
|
||
for (uint32_t group = 0; group < 5; group++) {
|
||
for (uint32_t slot = 0; slot < 16; slot++) {
|
||
P_IMMD(p, NV9097, BIND_GROUP_CONSTANT_BUFFER(group), {
|
||
.valid = VALID_FALSE,
|
||
.shader_slot = slot,
|
||
});
|
||
}
|
||
}
|
||
|
||
// P_MTHD(cmd->push, NVC0_3D, MACRO_GP_SELECT);
|
||
// P_INLINE_DATA(cmd->push, 0x40);
|
||
P_IMMD(p, NV9097, SET_RT_LAYER, {
|
||
.v = 0,
|
||
.control = CONTROL_V_SELECTS_LAYER,
|
||
});
|
||
// P_MTHD(cmd->push, NVC0_3D, MACRO_TEP_SELECT;
|
||
// P_INLINE_DATA(cmd->push, 0x30);
|
||
|
||
P_IMMD(p, NV9097, SET_POINT_CENTER_MODE, V_OGL);
|
||
P_IMMD(p, NV9097, SET_EDGE_FLAG, V_TRUE);
|
||
P_IMMD(p, NV9097, SET_SAMPLER_BINDING, V_INDEPENDENTLY);
|
||
|
||
uint64_t zero_addr = dev->zero_page->offset;
|
||
P_MTHD(p, NV9097, SET_VERTEX_STREAM_SUBSTITUTE_A);
|
||
P_NV9097_SET_VERTEX_STREAM_SUBSTITUTE_A(p, zero_addr >> 32);
|
||
P_NV9097_SET_VERTEX_STREAM_SUBSTITUTE_B(p, zero_addr);
|
||
|
||
if (pdev->info.cls_eng3d >= FERMI_A &&
|
||
pdev->info.cls_eng3d < MAXWELL_A) {
|
||
assert(dev->vab_memory);
|
||
uint64_t vab_addr = dev->vab_memory->offset;
|
||
P_MTHD(p, NV9097, SET_VAB_MEMORY_AREA_A);
|
||
P_NV9097_SET_VAB_MEMORY_AREA_A(p, vab_addr >> 32);
|
||
P_NV9097_SET_VAB_MEMORY_AREA_B(p, vab_addr);
|
||
P_NV9097_SET_VAB_MEMORY_AREA_C(p, SIZE_BYTES_256K);
|
||
}
|
||
|
||
if (pdev->info.cls_eng3d == MAXWELL_A)
|
||
P_IMMD(p, NVB097, SET_SELECT_MAXWELL_TEXTURE_HEADERS, V_TRUE);
|
||
|
||
return VK_SUCCESS;
|
||
}
|
||
|
||
static void
|
||
nvk_cmd_buffer_dirty_render_pass(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
|
||
|
||
/* These depend on color attachment count */
|
||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES);
|
||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES);
|
||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS);
|
||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS);
|
||
|
||
/* These depend on the depth/stencil format */
|
||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE);
|
||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE);
|
||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE);
|
||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE);
|
||
|
||
/* This may depend on render targets for ESO */
|
||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES);
|
||
}
|
||
|
||
void
|
||
nvk_cmd_buffer_begin_graphics(struct nvk_cmd_buffer *cmd,
|
||
const VkCommandBufferBeginInfo *pBeginInfo)
|
||
{
|
||
if (cmd->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 5);
|
||
P_MTHD(p, NV9097, INVALIDATE_SAMPLER_CACHE_NO_WFI);
|
||
P_NV9097_INVALIDATE_SAMPLER_CACHE_NO_WFI(p, {
|
||
.lines = LINES_ALL,
|
||
});
|
||
P_NV9097_INVALIDATE_TEXTURE_HEADER_CACHE_NO_WFI(p, {
|
||
.lines = LINES_ALL,
|
||
});
|
||
|
||
P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES_NO_WFI, {
|
||
.constant = CONSTANT_TRUE,
|
||
});
|
||
}
|
||
|
||
if (cmd->vk.level != VK_COMMAND_BUFFER_LEVEL_PRIMARY &&
|
||
(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) {
|
||
char gcbiar_data[VK_GCBIARR_DATA_SIZE(NVK_MAX_RTS)];
|
||
const VkRenderingInfo *resume_info =
|
||
vk_get_command_buffer_inheritance_as_rendering_resume(cmd->vk.level,
|
||
pBeginInfo,
|
||
gcbiar_data);
|
||
if (resume_info) {
|
||
nvk_CmdBeginRendering(nvk_cmd_buffer_to_handle(cmd), resume_info);
|
||
} else {
|
||
const VkCommandBufferInheritanceRenderingInfo *inheritance_info =
|
||
vk_get_command_buffer_inheritance_rendering_info(cmd->vk.level,
|
||
pBeginInfo);
|
||
assert(inheritance_info);
|
||
|
||
struct nvk_rendering_state *render = &cmd->state.gfx.render;
|
||
render->flags = inheritance_info->flags;
|
||
render->area = (VkRect2D) { };
|
||
render->layer_count = 0;
|
||
render->view_mask = inheritance_info->viewMask;
|
||
render->samples = inheritance_info->rasterizationSamples;
|
||
|
||
render->color_att_count = inheritance_info->colorAttachmentCount;
|
||
for (uint32_t i = 0; i < render->color_att_count; i++) {
|
||
render->color_att[i].vk_format =
|
||
inheritance_info->pColorAttachmentFormats[i];
|
||
}
|
||
render->depth_att.vk_format =
|
||
inheritance_info->depthAttachmentFormat;
|
||
render->stencil_att.vk_format =
|
||
inheritance_info->stencilAttachmentFormat;
|
||
|
||
nvk_cmd_buffer_dirty_render_pass(cmd);
|
||
}
|
||
}
|
||
|
||
cmd->state.gfx.shaders_dirty = ~0;
|
||
}
|
||
|
||
void
|
||
nvk_cmd_invalidate_graphics_state(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
vk_dynamic_graphics_state_dirty_all(&cmd->vk.dynamic_graphics_state);
|
||
|
||
/* From the Vulkan 1.3.275 spec:
|
||
*
|
||
* "...There is one exception to this rule - if the primary command
|
||
* buffer is inside a render pass instance, then the render pass and
|
||
* subpass state is not disturbed by executing secondary command
|
||
* buffers."
|
||
*
|
||
* We need to reset everything EXCEPT the render pass state.
|
||
*/
|
||
struct nvk_rendering_state render_save = cmd->state.gfx.render;
|
||
memset(&cmd->state.gfx, 0, sizeof(cmd->state.gfx));
|
||
cmd->state.gfx.render = render_save;
|
||
|
||
cmd->state.gfx.shaders_dirty = ~0;
|
||
}
|
||
|
||
static void
|
||
nvk_attachment_init(struct nvk_attachment *att,
|
||
const VkRenderingAttachmentInfo *info)
|
||
{
|
||
if (info == NULL || info->imageView == VK_NULL_HANDLE) {
|
||
*att = (struct nvk_attachment) { .iview = NULL, };
|
||
return;
|
||
}
|
||
|
||
VK_FROM_HANDLE(nvk_image_view, iview, info->imageView);
|
||
*att = (struct nvk_attachment) {
|
||
.vk_format = iview->vk.format,
|
||
.iview = iview,
|
||
};
|
||
|
||
if (info->resolveMode != VK_RESOLVE_MODE_NONE) {
|
||
VK_FROM_HANDLE(nvk_image_view, res_iview, info->resolveImageView);
|
||
att->resolve_mode = info->resolveMode;
|
||
att->resolve_iview = res_iview;
|
||
}
|
||
|
||
att->store_op = info->storeOp;
|
||
}
|
||
|
||
static uint32_t
|
||
nil_to_nv9097_samples_mode(enum nil_sample_layout sample_layout)
|
||
{
|
||
#define MODE(S) [NIL_SAMPLE_LAYOUT_##S] = NV9097_SET_ANTI_ALIAS_SAMPLES_MODE_##S
|
||
uint16_t nil_to_nv9097[] = {
|
||
MODE(1X1),
|
||
MODE(2X1),
|
||
MODE(2X2),
|
||
MODE(4X2),
|
||
MODE(4X4),
|
||
};
|
||
#undef MODE
|
||
assert(sample_layout < ARRAY_SIZE(nil_to_nv9097));
|
||
|
||
return nil_to_nv9097[sample_layout];
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_GetRenderingAreaGranularityKHR(
|
||
VkDevice device,
|
||
const VkRenderingAreaInfoKHR *pRenderingAreaInfo,
|
||
VkExtent2D *pGranularity)
|
||
{
|
||
*pGranularity = (VkExtent2D) { .width = 1, .height = 1 };
|
||
}
|
||
|
||
static bool
|
||
nvk_rendering_all_linear(const struct nvk_rendering_state *render)
|
||
{
|
||
/* Depth and stencil are never linear */
|
||
if (render->depth_att.iview || render->stencil_att.iview)
|
||
return false;
|
||
|
||
for (uint32_t i = 0; i < render->color_att_count; i++) {
|
||
const struct nvk_image_view *iview = render->color_att[i].iview;
|
||
if (iview == NULL)
|
||
continue;
|
||
|
||
const struct nvk_image *image = (struct nvk_image *)iview->vk.image;
|
||
const uint8_t ip = iview->planes[0].image_plane;
|
||
const struct nil_image_level *level =
|
||
&image->planes[ip].nil.levels[iview->vk.base_mip_level];
|
||
|
||
if (level->tiling.is_tiled)
|
||
return false;
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
|
||
const VkRenderingInfo *pRenderingInfo)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
struct nvk_rendering_state *render = &cmd->state.gfx.render;
|
||
|
||
memset(render, 0, sizeof(*render));
|
||
|
||
render->flags = pRenderingInfo->flags;
|
||
render->area = pRenderingInfo->renderArea;
|
||
render->view_mask = pRenderingInfo->viewMask;
|
||
render->layer_count = pRenderingInfo->layerCount;
|
||
render->samples = 0;
|
||
|
||
const uint32_t layer_count =
|
||
render->view_mask ? util_last_bit(render->view_mask) :
|
||
render->layer_count;
|
||
|
||
render->color_att_count = pRenderingInfo->colorAttachmentCount;
|
||
for (uint32_t i = 0; i < render->color_att_count; i++) {
|
||
nvk_attachment_init(&render->color_att[i],
|
||
&pRenderingInfo->pColorAttachments[i]);
|
||
}
|
||
|
||
nvk_attachment_init(&render->depth_att,
|
||
pRenderingInfo->pDepthAttachment);
|
||
nvk_attachment_init(&render->stencil_att,
|
||
pRenderingInfo->pStencilAttachment);
|
||
|
||
nvk_cmd_buffer_dirty_render_pass(cmd);
|
||
|
||
/* Always emit at least one color attachment, even if it's just a dummy. */
|
||
uint32_t color_att_count = MAX2(1, render->color_att_count);
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, color_att_count * 12 + 29);
|
||
|
||
P_IMMD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_VIEW_MASK),
|
||
render->view_mask);
|
||
|
||
P_MTHD(p, NV9097, SET_SURFACE_CLIP_HORIZONTAL);
|
||
P_NV9097_SET_SURFACE_CLIP_HORIZONTAL(p, {
|
||
.x = render->area.offset.x,
|
||
.width = render->area.extent.width,
|
||
});
|
||
P_NV9097_SET_SURFACE_CLIP_VERTICAL(p, {
|
||
.y = render->area.offset.y,
|
||
.height = render->area.extent.height,
|
||
});
|
||
|
||
const bool all_linear = nvk_rendering_all_linear(render);
|
||
|
||
enum nil_sample_layout sample_layout = NIL_SAMPLE_LAYOUT_INVALID;
|
||
for (uint32_t i = 0; i < color_att_count; i++) {
|
||
if (render->color_att[i].iview) {
|
||
const struct nvk_image_view *iview = render->color_att[i].iview;
|
||
const struct nvk_image *image = (struct nvk_image *)iview->vk.image;
|
||
/* Rendering to multi-planar images is valid for a specific single
|
||
* plane only, so assert that what we have is a single-plane, obtain
|
||
* its index, and begin rendering
|
||
*/
|
||
assert(iview->plane_count == 1);
|
||
const uint8_t ip = iview->planes[0].image_plane;
|
||
const struct nvk_image_plane *plane = &image->planes[ip];
|
||
|
||
const VkAttachmentLoadOp load_op =
|
||
pRenderingInfo->pColorAttachments[i].loadOp;
|
||
if (!all_linear && !plane->nil.levels[0].tiling.is_tiled) {
|
||
if (load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
|
||
nvk_linear_render_copy(cmd, iview, render->area, true);
|
||
|
||
plane = &image->linear_tiled_shadow;
|
||
}
|
||
|
||
const struct nil_image *nil_image = &plane->nil;
|
||
const struct nil_image_level *level =
|
||
&nil_image->levels[iview->vk.base_mip_level];
|
||
struct nil_Extent4D_Samples level_extent_sa =
|
||
nil_image_level_extent_sa(nil_image, iview->vk.base_mip_level);
|
||
|
||
assert(sample_layout == NIL_SAMPLE_LAYOUT_INVALID ||
|
||
sample_layout == nil_image->sample_layout);
|
||
sample_layout = nil_image->sample_layout;
|
||
render->samples = image->vk.samples;
|
||
|
||
uint64_t addr = nvk_image_plane_base_address(plane) + level->offset_B;
|
||
|
||
if (nil_image->dim == NIL_IMAGE_DIM_3D) {
|
||
addr += nil_image_level_z_offset_B(nil_image,
|
||
iview->vk.base_mip_level,
|
||
iview->vk.base_array_layer);
|
||
} else {
|
||
addr += iview->vk.base_array_layer *
|
||
(uint64_t)nil_image->array_stride_B;
|
||
}
|
||
|
||
P_MTHD(p, NV9097, SET_COLOR_TARGET_A(i));
|
||
P_NV9097_SET_COLOR_TARGET_A(p, i, addr >> 32);
|
||
P_NV9097_SET_COLOR_TARGET_B(p, i, addr);
|
||
|
||
if (level->tiling.is_tiled) {
|
||
const enum pipe_format p_format =
|
||
vk_format_to_pipe_format(iview->vk.format);
|
||
|
||
/* We use the stride for depth/stencil targets because the Z/S
|
||
* hardware has no concept of a tile width. Instead, we just set
|
||
* the width to the stride divided by bpp.
|
||
*/
|
||
const uint32_t row_stride_el =
|
||
level->row_stride_B / util_format_get_blocksize(p_format);
|
||
P_NV9097_SET_COLOR_TARGET_WIDTH(p, i, row_stride_el);
|
||
P_NV9097_SET_COLOR_TARGET_HEIGHT(p, i, level_extent_sa.height);
|
||
const uint8_t ct_format = nil_format_to_color_target(p_format);
|
||
P_NV9097_SET_COLOR_TARGET_FORMAT(p, i, ct_format);
|
||
|
||
P_NV9097_SET_COLOR_TARGET_MEMORY(p, i, {
|
||
.block_width = BLOCK_WIDTH_ONE_GOB,
|
||
.block_height = level->tiling.y_log2,
|
||
.block_depth = level->tiling.z_log2,
|
||
.layout = LAYOUT_BLOCKLINEAR,
|
||
.third_dimension_control = (nil_image->dim == NIL_IMAGE_DIM_3D) ?
|
||
THIRD_DIMENSION_CONTROL_THIRD_DIMENSION_DEFINES_DEPTH_SIZE :
|
||
THIRD_DIMENSION_CONTROL_THIRD_DIMENSION_DEFINES_ARRAY_SIZE,
|
||
});
|
||
|
||
P_NV9097_SET_COLOR_TARGET_THIRD_DIMENSION(p, i, layer_count);
|
||
P_NV9097_SET_COLOR_TARGET_ARRAY_PITCH(p, i,
|
||
nil_image->array_stride_B >> 2);
|
||
P_NV9097_SET_COLOR_TARGET_LAYER(p, i, 0);
|
||
} else {
|
||
/* NVIDIA can only render to 2D linear images */
|
||
assert(nil_image->dim == NIL_IMAGE_DIM_2D);
|
||
/* NVIDIA can only render to non-multisampled images */
|
||
assert(sample_layout == NIL_SAMPLE_LAYOUT_1X1);
|
||
/* NVIDIA doesn't support linear array images */
|
||
assert(iview->vk.base_array_layer == 0 && layer_count == 1);
|
||
|
||
uint32_t pitch = level->row_stride_B;
|
||
const enum pipe_format p_format =
|
||
vk_format_to_pipe_format(iview->vk.format);
|
||
/* When memory layout is set to LAYOUT_PITCH, the WIDTH field
|
||
* takes row pitch
|
||
*/
|
||
P_NV9097_SET_COLOR_TARGET_WIDTH(p, i, pitch);
|
||
P_NV9097_SET_COLOR_TARGET_HEIGHT(p, i, level_extent_sa.height);
|
||
|
||
const uint8_t ct_format = nil_format_to_color_target(p_format);
|
||
P_NV9097_SET_COLOR_TARGET_FORMAT(p, i, ct_format);
|
||
|
||
P_NV9097_SET_COLOR_TARGET_MEMORY(p, i, {
|
||
.layout = LAYOUT_PITCH,
|
||
.third_dimension_control =
|
||
THIRD_DIMENSION_CONTROL_THIRD_DIMENSION_DEFINES_ARRAY_SIZE,
|
||
});
|
||
|
||
P_NV9097_SET_COLOR_TARGET_THIRD_DIMENSION(p, i, 1);
|
||
P_NV9097_SET_COLOR_TARGET_ARRAY_PITCH(p, i, 0);
|
||
P_NV9097_SET_COLOR_TARGET_LAYER(p, i, 0);
|
||
}
|
||
|
||
P_IMMD(p, NV9097, SET_COLOR_COMPRESSION(i), nil_image->compressed);
|
||
} else {
|
||
P_MTHD(p, NV9097, SET_COLOR_TARGET_A(i));
|
||
P_NV9097_SET_COLOR_TARGET_A(p, i, 0);
|
||
P_NV9097_SET_COLOR_TARGET_B(p, i, 0);
|
||
P_NV9097_SET_COLOR_TARGET_WIDTH(p, i, 64);
|
||
P_NV9097_SET_COLOR_TARGET_HEIGHT(p, i, 0);
|
||
P_NV9097_SET_COLOR_TARGET_FORMAT(p, i, V_DISABLED);
|
||
P_NV9097_SET_COLOR_TARGET_MEMORY(p, i, {
|
||
.layout = LAYOUT_BLOCKLINEAR,
|
||
});
|
||
P_NV9097_SET_COLOR_TARGET_THIRD_DIMENSION(p, i, layer_count);
|
||
P_NV9097_SET_COLOR_TARGET_ARRAY_PITCH(p, i, 0);
|
||
P_NV9097_SET_COLOR_TARGET_LAYER(p, i, 0);
|
||
|
||
P_IMMD(p, NV9097, SET_COLOR_COMPRESSION(i), ENABLE_TRUE);
|
||
}
|
||
}
|
||
|
||
P_IMMD(p, NV9097, SET_CT_SELECT, {
|
||
.target_count = color_att_count,
|
||
.target0 = 0,
|
||
.target1 = 1,
|
||
.target2 = 2,
|
||
.target3 = 3,
|
||
.target4 = 4,
|
||
.target5 = 5,
|
||
.target6 = 6,
|
||
.target7 = 7,
|
||
});
|
||
|
||
if (render->depth_att.iview || render->stencil_att.iview) {
|
||
struct nvk_image_view *iview = render->depth_att.iview ?
|
||
render->depth_att.iview :
|
||
render->stencil_att.iview;
|
||
const struct nvk_image *image = (struct nvk_image *)iview->vk.image;
|
||
/* Depth/stencil are always single-plane */
|
||
assert(iview->plane_count == 1);
|
||
const uint8_t ip = iview->planes[0].image_plane;
|
||
struct nil_image nil_image = image->planes[ip].nil;
|
||
|
||
uint64_t addr = nvk_image_base_address(image, ip);
|
||
uint32_t mip_level = iview->vk.base_mip_level;
|
||
uint32_t base_array_layer = iview->vk.base_array_layer;
|
||
uint32_t layer_count = iview->vk.layer_count;
|
||
|
||
if (nil_image.dim == NIL_IMAGE_DIM_3D) {
|
||
uint64_t level_offset_B;
|
||
nil_image = nil_image_3d_level_as_2d_array(&nil_image, mip_level,
|
||
&level_offset_B);
|
||
addr += level_offset_B;
|
||
mip_level = 0;
|
||
base_array_layer = 0;
|
||
layer_count = iview->vk.extent.depth;
|
||
}
|
||
|
||
const struct nil_image_level *level = &nil_image.levels[mip_level];
|
||
addr += level->offset_B;
|
||
|
||
assert(sample_layout == NIL_SAMPLE_LAYOUT_INVALID ||
|
||
sample_layout == nil_image.sample_layout);
|
||
sample_layout = nil_image.sample_layout;
|
||
render->samples = image->vk.samples;
|
||
|
||
P_MTHD(p, NV9097, SET_ZT_A);
|
||
P_NV9097_SET_ZT_A(p, addr >> 32);
|
||
P_NV9097_SET_ZT_B(p, addr);
|
||
const enum pipe_format p_format =
|
||
vk_format_to_pipe_format(iview->vk.format);
|
||
const uint8_t zs_format = nil_format_to_depth_stencil(p_format);
|
||
P_NV9097_SET_ZT_FORMAT(p, zs_format);
|
||
assert(level->tiling.is_tiled);
|
||
assert(level->tiling.z_log2 == 0);
|
||
P_NV9097_SET_ZT_BLOCK_SIZE(p, {
|
||
.width = WIDTH_ONE_GOB,
|
||
.height = level->tiling.y_log2,
|
||
.depth = DEPTH_ONE_GOB,
|
||
});
|
||
P_NV9097_SET_ZT_ARRAY_PITCH(p, nil_image.array_stride_B >> 2);
|
||
|
||
P_IMMD(p, NV9097, SET_ZT_SELECT, 1 /* target_count */);
|
||
|
||
struct nil_Extent4D_Samples level_extent_sa =
|
||
nil_image_level_extent_sa(&nil_image, mip_level);
|
||
|
||
/* We use the stride for depth/stencil targets because the Z/S hardware
|
||
* has no concept of a tile width. Instead, we just set the width to
|
||
* the stride divided by bpp.
|
||
*/
|
||
const uint32_t row_stride_el =
|
||
level->row_stride_B / util_format_get_blocksize(p_format);
|
||
|
||
P_MTHD(p, NV9097, SET_ZT_SIZE_A);
|
||
P_NV9097_SET_ZT_SIZE_A(p, row_stride_el);
|
||
P_NV9097_SET_ZT_SIZE_B(p, level_extent_sa.height);
|
||
P_NV9097_SET_ZT_SIZE_C(p, {
|
||
.third_dimension = base_array_layer + layer_count,
|
||
.control = CONTROL_THIRD_DIMENSION_DEFINES_ARRAY_SIZE,
|
||
});
|
||
|
||
P_IMMD(p, NV9097, SET_ZT_LAYER, base_array_layer);
|
||
|
||
P_IMMD(p, NV9097, SET_Z_COMPRESSION, nil_image.compressed);
|
||
|
||
if (nvk_cmd_buffer_3d_cls(cmd) >= MAXWELL_B) {
|
||
P_IMMD(p, NVC597, SET_ZT_SPARSE, {
|
||
.enable = ENABLE_FALSE,
|
||
});
|
||
}
|
||
} else {
|
||
P_IMMD(p, NV9097, SET_ZT_SELECT, 0 /* target_count */);
|
||
}
|
||
|
||
/* From the Vulkan 1.3.275 spec:
|
||
*
|
||
* "It is legal for a subpass to use no color or depth/stencil
|
||
* attachments, either because it has no attachment references or
|
||
* because all of them are VK_ATTACHMENT_UNUSED. This kind of subpass
|
||
* can use shader side effects such as image stores and atomics to
|
||
* produce an output. In this case, the subpass continues to use the
|
||
* width, height, and layers of the framebuffer to define the dimensions
|
||
* of the rendering area, and the rasterizationSamples from each
|
||
* pipeline’s VkPipelineMultisampleStateCreateInfo to define the number
|
||
* of samples used in rasterization;"
|
||
*
|
||
* In the case where we have attachments, we emit SET_ANTI_ALIAS here
|
||
* because SET_COLOR_TARGET_* and SET_ZT_* don't have any other way of
|
||
* specifying the sample layout and we want to ensure it matches. When
|
||
* we don't have any attachments, we defer SET_ANTI_ALIAS to draw time
|
||
* where we base it on dynamic rasterizationSamples.
|
||
*/
|
||
if (sample_layout != NIL_SAMPLE_LAYOUT_INVALID) {
|
||
P_IMMD(p, NV9097, SET_ANTI_ALIAS,
|
||
nil_to_nv9097_samples_mode(sample_layout));
|
||
}
|
||
|
||
if (render->flags & VK_RENDERING_RESUMING_BIT)
|
||
return;
|
||
|
||
uint32_t clear_count = 0;
|
||
VkClearAttachment clear_att[NVK_MAX_RTS + 1];
|
||
for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
|
||
const VkRenderingAttachmentInfo *att_info =
|
||
&pRenderingInfo->pColorAttachments[i];
|
||
if (att_info->imageView == VK_NULL_HANDLE ||
|
||
att_info->loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR)
|
||
continue;
|
||
|
||
clear_att[clear_count++] = (VkClearAttachment) {
|
||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||
.colorAttachment = i,
|
||
.clearValue = att_info->clearValue,
|
||
};
|
||
}
|
||
|
||
clear_att[clear_count] = (VkClearAttachment) { .aspectMask = 0, };
|
||
if (pRenderingInfo->pDepthAttachment != NULL &&
|
||
pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
|
||
pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
|
||
clear_att[clear_count].aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
|
||
clear_att[clear_count].clearValue.depthStencil.depth =
|
||
pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth;
|
||
}
|
||
if (pRenderingInfo->pStencilAttachment != NULL &&
|
||
pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE &&
|
||
pRenderingInfo->pStencilAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
|
||
clear_att[clear_count].aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
||
clear_att[clear_count].clearValue.depthStencil.stencil =
|
||
pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil;
|
||
}
|
||
if (clear_att[clear_count].aspectMask != 0)
|
||
clear_count++;
|
||
|
||
if (clear_count > 0) {
|
||
const VkClearRect clear_rect = {
|
||
.rect = render->area,
|
||
.baseArrayLayer = 0,
|
||
.layerCount = render->view_mask ? 1 : render->layer_count,
|
||
};
|
||
|
||
P_MTHD(p, NV9097, SET_RENDER_ENABLE_OVERRIDE);
|
||
P_NV9097_SET_RENDER_ENABLE_OVERRIDE(p, MODE_ALWAYS_RENDER);
|
||
|
||
nvk_CmdClearAttachments(nvk_cmd_buffer_to_handle(cmd),
|
||
clear_count, clear_att, 1, &clear_rect);
|
||
p = nvk_cmd_buffer_push(cmd, 2);
|
||
P_MTHD(p, NV9097, SET_RENDER_ENABLE_OVERRIDE);
|
||
P_NV9097_SET_RENDER_ENABLE_OVERRIDE(p, MODE_USE_RENDER_ENABLE);
|
||
}
|
||
|
||
/* TODO: Attachment clears */
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdEndRendering(VkCommandBuffer commandBuffer)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
struct nvk_rendering_state *render = &cmd->state.gfx.render;
|
||
|
||
const bool all_linear = nvk_rendering_all_linear(render);
|
||
for (uint32_t i = 0; i < render->color_att_count; i++) {
|
||
struct nvk_image_view *iview = render->color_att[i].iview;
|
||
if (iview == NULL)
|
||
continue;
|
||
|
||
struct nvk_image *image = (struct nvk_image *)iview->vk.image;
|
||
const uint8_t ip = iview->planes[0].image_plane;
|
||
const struct nvk_image_plane *plane = &image->planes[ip];
|
||
if (!all_linear && !plane->nil.levels[0].tiling.is_tiled &&
|
||
render->color_att[i].store_op == VK_ATTACHMENT_STORE_OP_STORE)
|
||
nvk_linear_render_copy(cmd, iview, render->area, false);
|
||
}
|
||
|
||
bool need_resolve = false;
|
||
|
||
/* Translate render state back to VK for meta */
|
||
VkRenderingAttachmentInfo vk_color_att[NVK_MAX_RTS];
|
||
for (uint32_t i = 0; i < render->color_att_count; i++) {
|
||
if (render->color_att[i].resolve_mode != VK_RESOLVE_MODE_NONE)
|
||
need_resolve = true;
|
||
|
||
vk_color_att[i] = (VkRenderingAttachmentInfo) {
|
||
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
||
.imageView = nvk_image_view_to_handle(render->color_att[i].iview),
|
||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||
.resolveMode = render->color_att[i].resolve_mode,
|
||
.resolveImageView =
|
||
nvk_image_view_to_handle(render->color_att[i].resolve_iview),
|
||
.resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||
};
|
||
}
|
||
|
||
const VkRenderingAttachmentInfo vk_depth_att = {
|
||
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
||
.imageView = nvk_image_view_to_handle(render->depth_att.iview),
|
||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||
.resolveMode = render->depth_att.resolve_mode,
|
||
.resolveImageView =
|
||
nvk_image_view_to_handle(render->depth_att.resolve_iview),
|
||
.resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||
};
|
||
if (render->depth_att.resolve_mode != VK_RESOLVE_MODE_NONE)
|
||
need_resolve = true;
|
||
|
||
const VkRenderingAttachmentInfo vk_stencil_att = {
|
||
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
||
.imageView = nvk_image_view_to_handle(render->stencil_att.iview),
|
||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||
.resolveMode = render->stencil_att.resolve_mode,
|
||
.resolveImageView =
|
||
nvk_image_view_to_handle(render->stencil_att.resolve_iview),
|
||
.resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||
};
|
||
if (render->stencil_att.resolve_mode != VK_RESOLVE_MODE_NONE)
|
||
need_resolve = true;
|
||
|
||
const VkRenderingInfo vk_render = {
|
||
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
|
||
.renderArea = render->area,
|
||
.layerCount = render->layer_count,
|
||
.viewMask = render->view_mask,
|
||
.colorAttachmentCount = render->color_att_count,
|
||
.pColorAttachments = vk_color_att,
|
||
.pDepthAttachment = &vk_depth_att,
|
||
.pStencilAttachment = &vk_stencil_att,
|
||
};
|
||
|
||
if (render->flags & VK_RENDERING_SUSPENDING_BIT)
|
||
need_resolve = false;
|
||
|
||
memset(render, 0, sizeof(*render));
|
||
|
||
if (need_resolve) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
|
||
P_IMMD(p, NVA097, INVALIDATE_TEXTURE_DATA_CACHE, {
|
||
.lines = LINES_ALL,
|
||
});
|
||
|
||
nvk_meta_resolve_rendering(cmd, &vk_render);
|
||
}
|
||
}
|
||
|
||
void
|
||
nvk_cmd_bind_graphics_shader(struct nvk_cmd_buffer *cmd,
|
||
const gl_shader_stage stage,
|
||
struct nvk_shader *shader)
|
||
{
|
||
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
|
||
|
||
assert(stage < ARRAY_SIZE(cmd->state.gfx.shaders));
|
||
if (cmd->state.gfx.shaders[stage] == shader)
|
||
return;
|
||
|
||
cmd->state.gfx.shaders[stage] = shader;
|
||
cmd->state.gfx.shaders_dirty |= BITFIELD_BIT(stage);
|
||
|
||
/* When a pipeline with tess shaders is bound we need to re-upload the
|
||
* tessellation parameters at flush_ts_state, as the domain origin can be
|
||
* dynamic.
|
||
*/
|
||
if (stage == MESA_SHADER_TESS_EVAL)
|
||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN);
|
||
|
||
/* Emitting SET_HYBRID_ANTI_ALIAS_CONTROL requires the fragment shader */
|
||
if (stage == MESA_SHADER_FRAGMENT)
|
||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES);
|
||
}
|
||
|
||
static uint32_t
|
||
mesa_to_nv9097_shader_type(gl_shader_stage stage)
|
||
{
|
||
static const uint32_t mesa_to_nv9097[] = {
|
||
[MESA_SHADER_VERTEX] = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX,
|
||
[MESA_SHADER_TESS_CTRL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION_INIT,
|
||
[MESA_SHADER_TESS_EVAL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION,
|
||
[MESA_SHADER_GEOMETRY] = NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY,
|
||
[MESA_SHADER_FRAGMENT] = NV9097_SET_PIPELINE_SHADER_TYPE_PIXEL,
|
||
};
|
||
assert(stage < ARRAY_SIZE(mesa_to_nv9097));
|
||
return mesa_to_nv9097[stage];
|
||
}
|
||
|
||
static uint32_t
|
||
nvk_pipeline_bind_group(gl_shader_stage stage)
|
||
{
|
||
return stage;
|
||
}
|
||
|
||
static void
|
||
nvk_flush_shaders(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
if (cmd->state.gfx.shaders_dirty == 0)
|
||
return;
|
||
|
||
/* Map shader types to shaders */
|
||
struct nvk_shader *type_shader[6] = { NULL, };
|
||
uint32_t types_dirty = 0;
|
||
|
||
const uint32_t gfx_stages = BITFIELD_BIT(MESA_SHADER_VERTEX) |
|
||
BITFIELD_BIT(MESA_SHADER_TESS_CTRL) |
|
||
BITFIELD_BIT(MESA_SHADER_TESS_EVAL) |
|
||
BITFIELD_BIT(MESA_SHADER_GEOMETRY) |
|
||
BITFIELD_BIT(MESA_SHADER_FRAGMENT);
|
||
|
||
u_foreach_bit(stage, cmd->state.gfx.shaders_dirty & gfx_stages) {
|
||
uint32_t type = mesa_to_nv9097_shader_type(stage);
|
||
types_dirty |= BITFIELD_BIT(type);
|
||
|
||
/* Only copy non-NULL shaders because mesh/task alias with vertex and
|
||
* tessellation stages.
|
||
*/
|
||
if (cmd->state.gfx.shaders[stage] != NULL) {
|
||
assert(type < ARRAY_SIZE(type_shader));
|
||
assert(type_shader[type] == NULL);
|
||
type_shader[type] = cmd->state.gfx.shaders[stage];
|
||
}
|
||
}
|
||
|
||
u_foreach_bit(type, types_dirty) {
|
||
struct nvk_shader *shader = type_shader[type];
|
||
|
||
/* We always map index == type */
|
||
const uint32_t idx = type;
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
|
||
P_IMMD(p, NV9097, SET_PIPELINE_SHADER(idx), {
|
||
.enable = shader != NULL,
|
||
.type = type,
|
||
});
|
||
|
||
if (shader == NULL)
|
||
continue;
|
||
|
||
uint64_t addr = shader->hdr_addr;
|
||
if (nvk_cmd_buffer_3d_cls(cmd) >= VOLTA_A) {
|
||
P_MTHD(p, NVC397, SET_PIPELINE_PROGRAM_ADDRESS_A(idx));
|
||
P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_A(p, idx, addr >> 32);
|
||
P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_B(p, idx, addr);
|
||
} else {
|
||
assert(addr < 0xffffffff);
|
||
P_IMMD(p, NV9097, SET_PIPELINE_PROGRAM(idx), addr);
|
||
}
|
||
|
||
P_MTHD(p, NVC397, SET_PIPELINE_REGISTER_COUNT(idx));
|
||
P_NVC397_SET_PIPELINE_REGISTER_COUNT(p, idx, shader->info.num_gprs);
|
||
P_NVC397_SET_PIPELINE_BINDING(p, idx,
|
||
nvk_pipeline_bind_group(shader->info.stage));
|
||
|
||
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
|
||
p = nvk_cmd_buffer_push(cmd, 9);
|
||
|
||
P_MTHD(p, NVC397, SET_SUBTILING_PERF_KNOB_A);
|
||
P_NV9097_SET_SUBTILING_PERF_KNOB_A(p, {
|
||
.fraction_of_spm_register_file_per_subtile = 0x10,
|
||
.fraction_of_spm_pixel_output_buffer_per_subtile = 0x40,
|
||
.fraction_of_spm_triangle_ram_per_subtile = 0x16,
|
||
.fraction_of_max_quads_per_subtile = 0x20,
|
||
});
|
||
P_NV9097_SET_SUBTILING_PERF_KNOB_B(p, 0x20);
|
||
|
||
P_IMMD(p, NV9097, SET_API_MANDATED_EARLY_Z,
|
||
shader->info.fs.early_fragment_tests);
|
||
|
||
if (nvk_cmd_buffer_3d_cls(cmd) >= MAXWELL_B) {
|
||
P_IMMD(p, NVB197, SET_POST_Z_PS_IMASK,
|
||
shader->info.fs.post_depth_coverage);
|
||
} else {
|
||
assert(!shader->info.fs.post_depth_coverage);
|
||
}
|
||
|
||
P_IMMD(p, NV9097, SET_ZCULL_BOUNDS, {
|
||
.z_min_unbounded_enable = shader->info.fs.writes_depth,
|
||
.z_max_unbounded_enable = shader->info.fs.writes_depth,
|
||
});
|
||
}
|
||
}
|
||
|
||
const uint32_t vtg_stages = BITFIELD_BIT(MESA_SHADER_VERTEX) |
|
||
BITFIELD_BIT(MESA_SHADER_TESS_EVAL) |
|
||
BITFIELD_BIT(MESA_SHADER_GEOMETRY);
|
||
const uint32_t vtgm_stages = vtg_stages | BITFIELD_BIT(MESA_SHADER_MESH);
|
||
|
||
if (cmd->state.gfx.shaders_dirty & vtg_stages) {
|
||
struct nak_xfb_info *xfb = NULL;
|
||
u_foreach_bit(stage, vtg_stages) {
|
||
if (cmd->state.gfx.shaders[stage] != NULL)
|
||
xfb = &cmd->state.gfx.shaders[stage]->info.vtg.xfb;
|
||
}
|
||
|
||
if (xfb == NULL) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
|
||
for (uint8_t b = 0; b < 4; b++)
|
||
P_IMMD(p, NV9097, SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(b), 0);
|
||
} else {
|
||
for (uint8_t b = 0; b < ARRAY_SIZE(xfb->attr_count); b++) {
|
||
const uint8_t attr_count = xfb->attr_count[b];
|
||
/* upload packed varying indices in multiples of 4 bytes */
|
||
const uint32_t n = DIV_ROUND_UP(attr_count, 4);
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 5 + n);
|
||
|
||
P_MTHD(p, NV9097, SET_STREAM_OUT_CONTROL_STREAM(b));
|
||
P_NV9097_SET_STREAM_OUT_CONTROL_STREAM(p, b, xfb->stream[b]);
|
||
P_NV9097_SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(p, b, attr_count);
|
||
P_NV9097_SET_STREAM_OUT_CONTROL_STRIDE(p, b, xfb->stride[b]);
|
||
|
||
if (n > 0) {
|
||
P_MTHD(p, NV9097, SET_STREAM_OUT_LAYOUT_SELECT(b, 0));
|
||
P_INLINE_ARRAY(p, (const uint32_t*)xfb->attr_index[b], n);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if (cmd->state.gfx.shaders_dirty & vtgm_stages) {
|
||
struct nvk_shader *last_vtgm = NULL;
|
||
u_foreach_bit(stage, vtgm_stages) {
|
||
if (cmd->state.gfx.shaders[stage] != NULL)
|
||
last_vtgm = cmd->state.gfx.shaders[stage];
|
||
}
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 6);
|
||
|
||
P_IMMD(p, NV9097, SET_RT_LAYER, {
|
||
.v = 0,
|
||
.control = last_vtgm->info.vtg.writes_layer ?
|
||
CONTROL_GEOMETRY_SHADER_SELECTS_LAYER :
|
||
CONTROL_V_SELECTS_LAYER,
|
||
});
|
||
|
||
const uint8_t clip_enable = last_vtgm->info.vtg.clip_enable;
|
||
const uint8_t cull_enable = last_vtgm->info.vtg.cull_enable;
|
||
P_IMMD(p, NV9097, SET_USER_CLIP_ENABLE, {
|
||
.plane0 = ((clip_enable | cull_enable) >> 0) & 1,
|
||
.plane1 = ((clip_enable | cull_enable) >> 1) & 1,
|
||
.plane2 = ((clip_enable | cull_enable) >> 2) & 1,
|
||
.plane3 = ((clip_enable | cull_enable) >> 3) & 1,
|
||
.plane4 = ((clip_enable | cull_enable) >> 4) & 1,
|
||
.plane5 = ((clip_enable | cull_enable) >> 5) & 1,
|
||
.plane6 = ((clip_enable | cull_enable) >> 6) & 1,
|
||
.plane7 = ((clip_enable | cull_enable) >> 7) & 1,
|
||
});
|
||
P_IMMD(p, NV9097, SET_USER_CLIP_OP, {
|
||
.plane0 = (cull_enable >> 0) & 1,
|
||
.plane1 = (cull_enable >> 1) & 1,
|
||
.plane2 = (cull_enable >> 2) & 1,
|
||
.plane3 = (cull_enable >> 3) & 1,
|
||
.plane4 = (cull_enable >> 4) & 1,
|
||
.plane5 = (cull_enable >> 5) & 1,
|
||
.plane6 = (cull_enable >> 6) & 1,
|
||
.plane7 = (cull_enable >> 7) & 1,
|
||
});
|
||
}
|
||
|
||
cmd->state.gfx.shaders_dirty = 0;
|
||
}
|
||
|
||
static void
|
||
nvk_flush_vi_state(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||
struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 256);
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI) ||
|
||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDINGS_VALID)) {
|
||
u_foreach_bit(a, dyn->vi->attributes_valid) {
|
||
const struct nvk_va_format *fmt =
|
||
nvk_get_va_format(pdev, dyn->vi->attributes[a].format);
|
||
|
||
P_IMMD(p, NV9097, SET_VERTEX_ATTRIBUTE_A(a), {
|
||
.stream = dyn->vi->attributes[a].binding,
|
||
.offset = dyn->vi->attributes[a].offset,
|
||
.component_bit_widths = fmt->bit_widths,
|
||
.numerical_type = fmt->type,
|
||
.swap_r_and_b = fmt->swap_rb,
|
||
});
|
||
}
|
||
|
||
u_foreach_bit(b, dyn->vi->bindings_valid) {
|
||
const bool instanced = dyn->vi->bindings[b].input_rate ==
|
||
VK_VERTEX_INPUT_RATE_INSTANCE;
|
||
P_IMMD(p, NV9097, SET_VERTEX_STREAM_INSTANCE_A(b), instanced);
|
||
P_IMMD(p, NV9097, SET_VERTEX_STREAM_A_FREQUENCY(b),
|
||
dyn->vi->bindings[b].divisor);
|
||
}
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDINGS_VALID) ||
|
||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES)) {
|
||
for (uint32_t b = 0; b < 32; b++) {
|
||
P_IMMD(p, NV9097, SET_VERTEX_STREAM_A_FORMAT(b), {
|
||
.stride = dyn->vi_binding_strides[b],
|
||
.enable = (dyn->vi->bindings_valid & BITFIELD_BIT(b)) != 0,
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
static void
|
||
nvk_flush_ia_state(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
/** Nothing to do for MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY */
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
|
||
P_IMMD(p, NV9097, SET_DA_PRIMITIVE_RESTART,
|
||
dyn->ia.primitive_restart_enable);
|
||
}
|
||
}
|
||
|
||
static void
|
||
nvk_flush_ts_state(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS)) {
|
||
/* The hardware gets grumpy if we set this to 0 so make sure we set it
|
||
* to at least 1 in case it's dirty but uninitialized.
|
||
*/
|
||
P_IMMD(p, NV9097, SET_PATCH, MAX2(1, dyn->ts.patch_control_points));
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) {
|
||
const struct nvk_shader *shader =
|
||
cmd->state.gfx.shaders[MESA_SHADER_TESS_EVAL];
|
||
|
||
if (shader != NULL) {
|
||
enum nak_ts_prims prims = shader->info.ts.prims;
|
||
/* When the origin is lower-left, we have to flip the winding order */
|
||
if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
|
||
if (prims == NAK_TS_PRIMS_TRIANGLES_CW)
|
||
prims = NAK_TS_PRIMS_TRIANGLES_CCW;
|
||
else if (prims == NAK_TS_PRIMS_TRIANGLES_CCW)
|
||
prims = NAK_TS_PRIMS_TRIANGLES_CW;
|
||
}
|
||
P_MTHD(p, NV9097, SET_TESSELLATION_PARAMETERS);
|
||
P_NV9097_SET_TESSELLATION_PARAMETERS(p, {
|
||
shader->info.ts.domain,
|
||
shader->info.ts.spacing,
|
||
prims
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
static void
|
||
nvk_flush_vp_state(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
struct nv_push *p =
|
||
nvk_cmd_buffer_push(cmd, 16 * dyn->vp.viewport_count + 4 * NVK_MAX_VIEWPORTS);
|
||
|
||
/* Nothing to do for MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT */
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
|
||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE)) {
|
||
for (uint32_t i = 0; i < dyn->vp.viewport_count; i++) {
|
||
const VkViewport *vp = &dyn->vp.viewports[i];
|
||
|
||
/* These exactly match the spec values. Nvidia hardware oddities
|
||
* are accounted for later.
|
||
*/
|
||
const float o_x = vp->x + 0.5f * vp->width;
|
||
const float o_y = vp->y + 0.5f * vp->height;
|
||
const float o_z = !dyn->vp.depth_clip_negative_one_to_one ?
|
||
vp->minDepth :
|
||
(vp->maxDepth + vp->minDepth) * 0.5f;
|
||
|
||
const float p_x = vp->width;
|
||
const float p_y = vp->height;
|
||
const float p_z = !dyn->vp.depth_clip_negative_one_to_one ?
|
||
vp->maxDepth - vp->minDepth :
|
||
(vp->maxDepth - vp->minDepth) * 0.5f;
|
||
|
||
P_MTHD(p, NV9097, SET_VIEWPORT_SCALE_X(i));
|
||
P_NV9097_SET_VIEWPORT_SCALE_X(p, i, fui(0.5f * p_x));
|
||
P_NV9097_SET_VIEWPORT_SCALE_Y(p, i, fui(0.5f * p_y));
|
||
P_NV9097_SET_VIEWPORT_SCALE_Z(p, i, fui(p_z));
|
||
|
||
P_NV9097_SET_VIEWPORT_OFFSET_X(p, i, fui(o_x));
|
||
P_NV9097_SET_VIEWPORT_OFFSET_Y(p, i, fui(o_y));
|
||
P_NV9097_SET_VIEWPORT_OFFSET_Z(p, i, fui(o_z));
|
||
|
||
float xmin = vp->x;
|
||
float xmax = vp->x + vp->width;
|
||
float ymin = MIN2(vp->y, vp->y + vp->height);
|
||
float ymax = MAX2(vp->y, vp->y + vp->height);
|
||
float zmin = MIN2(vp->minDepth, vp->maxDepth);
|
||
float zmax = MAX2(vp->minDepth, vp->maxDepth);
|
||
assert(xmin <= xmax && ymin <= ymax);
|
||
|
||
const float max_dim = (float)0xffff;
|
||
xmin = CLAMP(xmin, 0, max_dim);
|
||
xmax = CLAMP(xmax, 0, max_dim);
|
||
ymin = CLAMP(ymin, 0, max_dim);
|
||
ymax = CLAMP(ymax, 0, max_dim);
|
||
|
||
P_MTHD(p, NV9097, SET_VIEWPORT_CLIP_HORIZONTAL(i));
|
||
P_NV9097_SET_VIEWPORT_CLIP_HORIZONTAL(p, i, {
|
||
.x0 = xmin,
|
||
.width = xmax - xmin,
|
||
});
|
||
P_NV9097_SET_VIEWPORT_CLIP_VERTICAL(p, i, {
|
||
.y0 = ymin,
|
||
.height = ymax - ymin,
|
||
});
|
||
P_NV9097_SET_VIEWPORT_CLIP_MIN_Z(p, i, fui(zmin));
|
||
P_NV9097_SET_VIEWPORT_CLIP_MAX_Z(p, i, fui(zmax));
|
||
|
||
if (nvk_cmd_buffer_3d_cls(cmd) >= MAXWELL_B) {
|
||
P_IMMD(p, NVB197, SET_VIEWPORT_COORDINATE_SWIZZLE(i), {
|
||
.x = X_POS_X,
|
||
.y = Y_POS_Y,
|
||
.z = Z_POS_Z,
|
||
.w = W_POS_W,
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE)) {
|
||
P_IMMD(p, NV9097, SET_VIEWPORT_Z_CLIP,
|
||
dyn->vp.depth_clip_negative_one_to_one ?
|
||
RANGE_NEGATIVE_W_TO_POSITIVE_W :
|
||
RANGE_ZERO_TO_POSITIVE_W);
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT)) {
|
||
for (unsigned i = dyn->vp.scissor_count; i < NVK_MAX_VIEWPORTS; i++)
|
||
P_IMMD(p, NV9097, SET_SCISSOR_ENABLE(i), V_FALSE);
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS)) {
|
||
for (unsigned i = 0; i < dyn->vp.scissor_count; i++) {
|
||
const VkRect2D *s = &dyn->vp.scissors[i];
|
||
|
||
const uint32_t xmin = MIN2(16384, s->offset.x);
|
||
const uint32_t xmax = MIN2(16384, s->offset.x + s->extent.width);
|
||
const uint32_t ymin = MIN2(16384, s->offset.y);
|
||
const uint32_t ymax = MIN2(16384, s->offset.y + s->extent.height);
|
||
|
||
P_MTHD(p, NV9097, SET_SCISSOR_ENABLE(i));
|
||
P_NV9097_SET_SCISSOR_ENABLE(p, i, V_TRUE);
|
||
P_NV9097_SET_SCISSOR_HORIZONTAL(p, i, {
|
||
.xmin = xmin,
|
||
.xmax = xmax,
|
||
});
|
||
P_NV9097_SET_SCISSOR_VERTICAL(p, i, {
|
||
.ymin = ymin,
|
||
.ymax = ymax,
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
static uint32_t
|
||
vk_to_nv9097_polygon_mode(VkPolygonMode vk_mode)
|
||
{
|
||
ASSERTED uint16_t vk_to_nv9097[] = {
|
||
[VK_POLYGON_MODE_FILL] = NV9097_SET_FRONT_POLYGON_MODE_V_FILL,
|
||
[VK_POLYGON_MODE_LINE] = NV9097_SET_FRONT_POLYGON_MODE_V_LINE,
|
||
[VK_POLYGON_MODE_POINT] = NV9097_SET_FRONT_POLYGON_MODE_V_POINT,
|
||
};
|
||
assert(vk_mode < ARRAY_SIZE(vk_to_nv9097));
|
||
|
||
uint32_t nv9097_mode = 0x1b00 | (2 - vk_mode);
|
||
assert(nv9097_mode == vk_to_nv9097[vk_mode]);
|
||
return nv9097_mode;
|
||
}
|
||
|
||
static uint32_t
|
||
vk_to_nv9097_cull_mode(VkCullModeFlags vk_cull_mode)
|
||
{
|
||
static const uint16_t vk_to_nv9097[] = {
|
||
[VK_CULL_MODE_FRONT_BIT] = NV9097_OGL_SET_CULL_FACE_V_FRONT,
|
||
[VK_CULL_MODE_BACK_BIT] = NV9097_OGL_SET_CULL_FACE_V_BACK,
|
||
[VK_CULL_MODE_FRONT_AND_BACK] = NV9097_OGL_SET_CULL_FACE_V_FRONT_AND_BACK,
|
||
};
|
||
assert(vk_cull_mode < ARRAY_SIZE(vk_to_nv9097));
|
||
return vk_to_nv9097[vk_cull_mode];
|
||
}
|
||
|
||
static uint32_t
|
||
vk_to_nv9097_front_face(VkFrontFace vk_face)
|
||
{
|
||
/* Vulkan and OpenGL are backwards here because Vulkan assumes the D3D
|
||
* convention in which framebuffer coordinates always start in the upper
|
||
* left while OpenGL has framebuffer coordinates starting in the lower
|
||
* left. Therefore, we want the reverse of the hardware enum name.
|
||
*/
|
||
ASSERTED static const uint16_t vk_to_nv9097[] = {
|
||
[VK_FRONT_FACE_COUNTER_CLOCKWISE] = NV9097_OGL_SET_FRONT_FACE_V_CCW,
|
||
[VK_FRONT_FACE_CLOCKWISE] = NV9097_OGL_SET_FRONT_FACE_V_CW,
|
||
};
|
||
assert(vk_face < ARRAY_SIZE(vk_to_nv9097));
|
||
|
||
uint32_t nv9097_face = 0x900 | (1 - vk_face);
|
||
assert(nv9097_face == vk_to_nv9097[vk_face]);
|
||
return nv9097_face;
|
||
}
|
||
|
||
static uint32_t
|
||
vk_to_nv9097_provoking_vertex(VkProvokingVertexModeEXT vk_mode)
|
||
{
|
||
STATIC_ASSERT(VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT ==
|
||
NV9097_SET_PROVOKING_VERTEX_V_FIRST);
|
||
STATIC_ASSERT(VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT ==
|
||
NV9097_SET_PROVOKING_VERTEX_V_LAST);
|
||
return vk_mode;
|
||
}
|
||
|
||
static void
|
||
nvk_flush_rs_state(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 44);
|
||
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE))
|
||
P_IMMD(p, NV9097, SET_RASTER_ENABLE, !dyn->rs.rasterizer_discard_enable);
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE) ||
|
||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE)) {
|
||
const bool z_clamp = dyn->rs.depth_clamp_enable;
|
||
const bool z_clip = vk_rasterization_state_depth_clip_enable(&dyn->rs);
|
||
P_IMMD(p, NVC397, SET_VIEWPORT_CLIP_CONTROL, {
|
||
/* We only set Z clip range if clamp is requested. Otherwise, we
|
||
* leave it set to -/+INF and clamp using the guardband below.
|
||
*
|
||
* TODO: Fix pre-Volta
|
||
*
|
||
* This probably involves a few macros, one which stases viewport
|
||
* min/maxDepth in scratch states and one which goes here and
|
||
* emits either min/maxDepth or -/+INF as needed.
|
||
*/
|
||
.min_z_zero_max_z_one = MIN_Z_ZERO_MAX_Z_ONE_FALSE,
|
||
.z_clip_range = nvk_cmd_buffer_3d_cls(cmd) >= VOLTA_A
|
||
? (z_clamp ? Z_CLIP_RANGE_MIN_Z_MAX_Z
|
||
: Z_CLIP_RANGE_MINUS_INF_PLUS_INF)
|
||
: Z_CLIP_RANGE_USE_FIELD_MIN_Z_ZERO_MAX_Z_ONE,
|
||
|
||
.pixel_min_z = PIXEL_MIN_Z_CLAMP,
|
||
.pixel_max_z = PIXEL_MAX_Z_CLAMP,
|
||
|
||
.geometry_guardband = GEOMETRY_GUARDBAND_SCALE_256,
|
||
.line_point_cull_guardband = LINE_POINT_CULL_GUARDBAND_SCALE_256,
|
||
.geometry_clip = z_clip ? GEOMETRY_CLIP_FRUSTUM_XYZ_CLIP
|
||
: GEOMETRY_CLIP_FRUSTUM_XY_CLIP,
|
||
|
||
/* We clip depth with the geometry clipper to ensure that it gets
|
||
* clipped before depth bias is applied. If we leave it up to the
|
||
* raserizer clipper (pixel_min/max_z = CLIP), it will clip too late
|
||
* in the pipeline. This can be seen in two different ways:
|
||
*
|
||
* - When depth bias is enabled, the bias is applied post-clipping.
|
||
* If we clip in the rasterizer, it will clip according to the
|
||
* post-bias depth which is wrong.
|
||
*
|
||
* - If the fragment shader overrides the depth by writing to
|
||
* gl_FragDepth, it should be clipped according to the original
|
||
* geometry, not accoring to gl_FragDepth.
|
||
*
|
||
* In order to always get the geometry clipper, we need to set a
|
||
* tight guardband (geometry_guardband_z = SCALE_1).
|
||
*/
|
||
.geometry_guardband_z = z_clip ? GEOMETRY_GUARDBAND_Z_SCALE_1
|
||
: GEOMETRY_GUARDBAND_Z_SCALE_256,
|
||
});
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE)) {
|
||
uint32_t polygon_mode = vk_to_nv9097_polygon_mode(dyn->rs.polygon_mode);
|
||
P_MTHD(p, NV9097, SET_FRONT_POLYGON_MODE);
|
||
P_NV9097_SET_FRONT_POLYGON_MODE(p, polygon_mode);
|
||
P_NV9097_SET_BACK_POLYGON_MODE(p, polygon_mode);
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE)) {
|
||
P_IMMD(p, NV9097, OGL_SET_CULL, dyn->rs.cull_mode != VK_CULL_MODE_NONE);
|
||
|
||
if (dyn->rs.cull_mode != VK_CULL_MODE_NONE) {
|
||
uint32_t face = vk_to_nv9097_cull_mode(dyn->rs.cull_mode);
|
||
P_IMMD(p, NV9097, OGL_SET_CULL_FACE, face);
|
||
}
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE)) {
|
||
P_IMMD(p, NV9097, OGL_SET_FRONT_FACE,
|
||
vk_to_nv9097_front_face(dyn->rs.front_face));
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)) {
|
||
P_IMMD(p, NV9097, SET_PROVOKING_VERTEX,
|
||
vk_to_nv9097_provoking_vertex(dyn->rs.provoking_vertex));
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE)) {
|
||
P_MTHD(p, NV9097, SET_POLY_OFFSET_POINT);
|
||
P_NV9097_SET_POLY_OFFSET_POINT(p, dyn->rs.depth_bias.enable);
|
||
P_NV9097_SET_POLY_OFFSET_LINE(p, dyn->rs.depth_bias.enable);
|
||
P_NV9097_SET_POLY_OFFSET_FILL(p, dyn->rs.depth_bias.enable);
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) {
|
||
switch (dyn->rs.depth_bias.representation) {
|
||
case VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORMAT_EXT:
|
||
P_IMMD(p, NV9097, SET_DEPTH_BIAS_CONTROL,
|
||
DEPTH_FORMAT_DEPENDENT_TRUE);
|
||
break;
|
||
case VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT:
|
||
P_IMMD(p, NV9097, SET_DEPTH_BIAS_CONTROL,
|
||
DEPTH_FORMAT_DEPENDENT_FALSE);
|
||
break;
|
||
case VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT:
|
||
default:
|
||
unreachable("Unsupported depth bias representation");
|
||
}
|
||
/* TODO: The blob multiplies by 2 for some reason. We don't. */
|
||
P_IMMD(p, NV9097, SET_DEPTH_BIAS, fui(dyn->rs.depth_bias.constant));
|
||
P_IMMD(p, NV9097, SET_SLOPE_SCALE_DEPTH_BIAS, fui(dyn->rs.depth_bias.slope));
|
||
P_IMMD(p, NV9097, SET_DEPTH_BIAS_CLAMP, fui(dyn->rs.depth_bias.clamp));
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) {
|
||
P_MTHD(p, NV9097, SET_LINE_WIDTH_FLOAT);
|
||
P_NV9097_SET_LINE_WIDTH_FLOAT(p, fui(dyn->rs.line.width));
|
||
P_NV9097_SET_ALIASED_LINE_WIDTH_FLOAT(p, fui(dyn->rs.line.width));
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_MODE)) {
|
||
switch (dyn->rs.line.mode) {
|
||
case VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR:
|
||
case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_KHR:
|
||
P_IMMD(p, NV9097, SET_LINE_MULTISAMPLE_OVERRIDE, ENABLE_FALSE);
|
||
P_IMMD(p, NV9097, SET_ANTI_ALIASED_LINE, ENABLE_FALSE);
|
||
break;
|
||
|
||
case VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR:
|
||
P_IMMD(p, NV9097, SET_LINE_MULTISAMPLE_OVERRIDE, ENABLE_TRUE);
|
||
P_IMMD(p, NV9097, SET_ANTI_ALIASED_LINE, ENABLE_FALSE);
|
||
break;
|
||
|
||
case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR:
|
||
P_IMMD(p, NV9097, SET_LINE_MULTISAMPLE_OVERRIDE, ENABLE_TRUE);
|
||
P_IMMD(p, NV9097, SET_ANTI_ALIASED_LINE, ENABLE_TRUE);
|
||
break;
|
||
|
||
default:
|
||
unreachable("Invalid line rasterization mode");
|
||
}
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE))
|
||
P_IMMD(p, NV9097, SET_LINE_STIPPLE, dyn->rs.line.stipple.enable);
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) {
|
||
/* map factor from [1,256] to [0, 255] */
|
||
uint32_t stipple_factor = CLAMP(dyn->rs.line.stipple.factor, 1, 256) - 1;
|
||
P_IMMD(p, NV9097, SET_LINE_STIPPLE_PARAMETERS, {
|
||
.factor = stipple_factor,
|
||
.pattern = dyn->rs.line.stipple.pattern,
|
||
});
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM))
|
||
P_IMMD(p, NV9097, SET_RASTER_INPUT, dyn->rs.rasterization_stream);
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE) ||
|
||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_EXTRA_PRIMITIVE_OVERESTIMATION_SIZE)) {
|
||
if (dyn->rs.conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
|
||
P_IMMD(p, NVB197, SET_CONSERVATIVE_RASTER, ENABLE_FALSE);
|
||
} else {
|
||
uint32_t extra_overestimate =
|
||
MIN2(3, dyn->rs.extra_primitive_overestimation_size * 4);
|
||
|
||
if (nvk_cmd_buffer_3d_cls(cmd) < VOLTA_A) {
|
||
P_1INC(p, NVB197, CALL_MME_MACRO(NVK_MME_SET_CONSERVATIVE_RASTER_STATE));
|
||
P_INLINE_DATA(p, extra_overestimate << 23);
|
||
} else {
|
||
P_IMMD(p, NVC397, SET_CONSERVATIVE_RASTER_CONTROL, {
|
||
.extra_prim_bloat = extra_overestimate,
|
||
.copy_inner_to_outer =
|
||
(dyn->rs.conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT),
|
||
.triangle_snap_mode = TRIANGLE_SNAP_MODE_MODE_PRE_SNAP,
|
||
.line_and_point_snap_mode = LINE_AND_POINT_SNAP_MODE_MODE_PRE_SNAP,
|
||
.uncertainty_region_size = UNCERTAINTY_REGION_SIZE_SIZE_512,
|
||
});
|
||
}
|
||
P_IMMD(p, NVB197, SET_CONSERVATIVE_RASTER, ENABLE_TRUE);
|
||
}
|
||
}
|
||
}
|
||
|
||
static VkSampleLocationEXT
|
||
vk_sample_location(const struct vk_sample_locations_state *sl,
|
||
uint32_t x, uint32_t y, uint32_t s)
|
||
{
|
||
x = x % sl->grid_size.width;
|
||
y = y % sl->grid_size.height;
|
||
|
||
return sl->locations[(x + y * sl->grid_size.width) * sl->per_pixel + s];
|
||
}
|
||
|
||
static struct nvk_sample_location
|
||
vk_to_nvk_sample_location(VkSampleLocationEXT loc)
|
||
{
|
||
return (struct nvk_sample_location) {
|
||
.x_u4 = util_bitpack_ufixed_clamp(loc.x, 0, 3, 4),
|
||
.y_u4 = util_bitpack_ufixed_clamp(loc.y, 0, 3, 4),
|
||
};
|
||
}
|
||
|
||
static void
|
||
nvk_flush_ms_state(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
struct nvk_descriptor_state *desc = &cmd->state.gfx.descriptors;
|
||
const struct nvk_rendering_state *render = &cmd->state.gfx.render;
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES)) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
|
||
|
||
/* When we don't have any attachments, we can't know the sample count
|
||
* from the render pass so we need to emit SET_ANTI_ALIAS here. See the
|
||
* comment in nvk_BeginRendering() for more details.
|
||
*/
|
||
if (render->samples == 0) {
|
||
/* Multisample information MAY be missing (rasterizationSamples == 0)
|
||
* if rasterizer discard is enabled. However, this isn't valid in
|
||
* the hardware so always use at least one sample.
|
||
*/
|
||
const uint32_t samples = MAX2(1, dyn->ms.rasterization_samples);
|
||
enum nil_sample_layout layout = nil_choose_sample_layout(samples);
|
||
P_IMMD(p, NV9097, SET_ANTI_ALIAS, nil_to_nv9097_samples_mode(layout));
|
||
} else {
|
||
/* Multisample information MAY be missing (rasterizationSamples == 0)
|
||
* if rasterizer discard is enabled.
|
||
*/
|
||
assert(dyn->ms.rasterization_samples == 0 ||
|
||
dyn->ms.rasterization_samples == render->samples);
|
||
}
|
||
|
||
struct nvk_shader *fs = cmd->state.gfx.shaders[MESA_SHADER_FRAGMENT];
|
||
const float min_sample_shading = fs != NULL ? fs->min_sample_shading : 0;
|
||
uint32_t min_samples = ceilf(dyn->ms.rasterization_samples *
|
||
min_sample_shading);
|
||
min_samples = util_next_power_of_two(MAX2(1, min_samples));
|
||
|
||
P_IMMD(p, NV9097, SET_HYBRID_ANTI_ALIAS_CONTROL, {
|
||
.passes = min_samples,
|
||
.centroid = min_samples > 1 ? CENTROID_PER_PASS
|
||
: CENTROID_PER_FRAGMENT,
|
||
});
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
|
||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE)) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
|
||
P_IMMD(p, NV9097, SET_ANTI_ALIAS_ALPHA_CONTROL, {
|
||
.alpha_to_coverage = dyn->ms.alpha_to_coverage_enable,
|
||
.alpha_to_one = dyn->ms.alpha_to_one_enable,
|
||
});
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ||
|
||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS) ||
|
||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE)) {
|
||
const struct vk_sample_locations_state *sl;
|
||
if (dyn->ms.sample_locations_enable) {
|
||
sl = dyn->ms.sample_locations;
|
||
} else {
|
||
const uint32_t samples = MAX2(1, dyn->ms.rasterization_samples);
|
||
sl = vk_standard_sample_locations_state(samples);
|
||
}
|
||
|
||
for (uint32_t i = 0; i < sl->per_pixel; i++) {
|
||
desc->root.draw.sample_locations[i] =
|
||
vk_to_nvk_sample_location(sl->locations[i]);
|
||
}
|
||
|
||
if (nvk_cmd_buffer_3d_cls(cmd) >= MAXWELL_B) {
|
||
struct nvk_sample_location loc[16];
|
||
for (uint32_t n = 0; n < ARRAY_SIZE(loc); n++) {
|
||
const uint32_t s = n % sl->per_pixel;
|
||
const uint32_t px = n / sl->per_pixel;
|
||
const uint32_t x = px % 2;
|
||
const uint32_t y = px / 2;
|
||
|
||
loc[n] = vk_to_nvk_sample_location(vk_sample_location(sl, x, y, s));
|
||
}
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 5);
|
||
|
||
P_MTHD(p, NVB197, SET_ANTI_ALIAS_SAMPLE_POSITIONS(0));
|
||
for (uint32_t i = 0; i < 4; i++) {
|
||
P_NVB197_SET_ANTI_ALIAS_SAMPLE_POSITIONS(p, i, {
|
||
.x0 = loc[i * 4 + 0].x_u4,
|
||
.y0 = loc[i * 4 + 0].y_u4,
|
||
.x1 = loc[i * 4 + 1].x_u4,
|
||
.y1 = loc[i * 4 + 1].y_u4,
|
||
.x2 = loc[i * 4 + 2].x_u4,
|
||
.y2 = loc[i * 4 + 2].y_u4,
|
||
.x3 = loc[i * 4 + 3].x_u4,
|
||
.y3 = loc[i * 4 + 3].y_u4,
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_MASK)) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 5);
|
||
P_MTHD(p, NV9097, SET_SAMPLE_MASK_X0_Y0);
|
||
P_NV9097_SET_SAMPLE_MASK_X0_Y0(p, dyn->ms.sample_mask & 0xffff);
|
||
P_NV9097_SET_SAMPLE_MASK_X1_Y0(p, dyn->ms.sample_mask & 0xffff);
|
||
P_NV9097_SET_SAMPLE_MASK_X0_Y1(p, dyn->ms.sample_mask & 0xffff);
|
||
P_NV9097_SET_SAMPLE_MASK_X1_Y1(p, dyn->ms.sample_mask & 0xffff);
|
||
}
|
||
}
|
||
|
||
static uint32_t
|
||
vk_to_nv9097_compare_op(VkCompareOp vk_op)
|
||
{
|
||
ASSERTED static const uint16_t vk_to_nv9097[] = {
|
||
[VK_COMPARE_OP_NEVER] = NV9097_SET_DEPTH_FUNC_V_OGL_NEVER,
|
||
[VK_COMPARE_OP_LESS] = NV9097_SET_DEPTH_FUNC_V_OGL_LESS,
|
||
[VK_COMPARE_OP_EQUAL] = NV9097_SET_DEPTH_FUNC_V_OGL_EQUAL,
|
||
[VK_COMPARE_OP_LESS_OR_EQUAL] = NV9097_SET_DEPTH_FUNC_V_OGL_LEQUAL,
|
||
[VK_COMPARE_OP_GREATER] = NV9097_SET_DEPTH_FUNC_V_OGL_GREATER,
|
||
[VK_COMPARE_OP_NOT_EQUAL] = NV9097_SET_DEPTH_FUNC_V_OGL_NOTEQUAL,
|
||
[VK_COMPARE_OP_GREATER_OR_EQUAL] = NV9097_SET_DEPTH_FUNC_V_OGL_GEQUAL,
|
||
[VK_COMPARE_OP_ALWAYS] = NV9097_SET_DEPTH_FUNC_V_OGL_ALWAYS,
|
||
};
|
||
assert(vk_op < ARRAY_SIZE(vk_to_nv9097));
|
||
|
||
uint32_t nv9097_op = 0x200 | vk_op;
|
||
assert(nv9097_op == vk_to_nv9097[vk_op]);
|
||
return nv9097_op;
|
||
}
|
||
|
||
static uint32_t
|
||
vk_to_nv9097_stencil_op(VkStencilOp vk_op)
|
||
{
|
||
#define OP(vk, nv) [VK_STENCIL_OP_##vk] = NV9097_SET_STENCIL_OP_FAIL_V_##nv
|
||
ASSERTED static const uint16_t vk_to_nv9097[] = {
|
||
OP(KEEP, D3D_KEEP),
|
||
OP(ZERO, D3D_ZERO),
|
||
OP(REPLACE, D3D_REPLACE),
|
||
OP(INCREMENT_AND_CLAMP, D3D_INCRSAT),
|
||
OP(DECREMENT_AND_CLAMP, D3D_DECRSAT),
|
||
OP(INVERT, D3D_INVERT),
|
||
OP(INCREMENT_AND_WRAP, D3D_INCR),
|
||
OP(DECREMENT_AND_WRAP, D3D_DECR),
|
||
};
|
||
assert(vk_op < ARRAY_SIZE(vk_to_nv9097));
|
||
#undef OP
|
||
|
||
uint32_t nv9097_op = vk_op + 1;
|
||
assert(nv9097_op == vk_to_nv9097[vk_op]);
|
||
return nv9097_op;
|
||
}
|
||
|
||
static void
|
||
nvk_flush_ds_state(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 35);
|
||
|
||
const struct nvk_rendering_state *render = &cmd->state.gfx.render;
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE)) {
|
||
bool enable = dyn->ds.depth.test_enable &&
|
||
render->depth_att.vk_format != VK_FORMAT_UNDEFINED;
|
||
P_IMMD(p, NV9097, SET_DEPTH_TEST, enable);
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE)) {
|
||
bool enable = dyn->ds.depth.write_enable &&
|
||
render->depth_att.vk_format != VK_FORMAT_UNDEFINED;
|
||
P_IMMD(p, NV9097, SET_DEPTH_WRITE, enable);
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP)) {
|
||
const uint32_t func = vk_to_nv9097_compare_op(dyn->ds.depth.compare_op);
|
||
P_IMMD(p, NV9097, SET_DEPTH_FUNC, func);
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE)) {
|
||
bool enable = dyn->ds.depth.bounds_test.enable &&
|
||
render->depth_att.vk_format != VK_FORMAT_UNDEFINED;
|
||
P_IMMD(p, NV9097, SET_DEPTH_BOUNDS_TEST, enable);
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) {
|
||
P_MTHD(p, NV9097, SET_DEPTH_BOUNDS_MIN);
|
||
P_NV9097_SET_DEPTH_BOUNDS_MIN(p, fui(dyn->ds.depth.bounds_test.min));
|
||
P_NV9097_SET_DEPTH_BOUNDS_MAX(p, fui(dyn->ds.depth.bounds_test.max));
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE)) {
|
||
bool enable = dyn->ds.stencil.test_enable &&
|
||
render->stencil_att.vk_format != VK_FORMAT_UNDEFINED;
|
||
P_IMMD(p, NV9097, SET_STENCIL_TEST, enable);
|
||
}
|
||
|
||
const struct vk_stencil_test_face_state *front = &dyn->ds.stencil.front;
|
||
const struct vk_stencil_test_face_state *back = &dyn->ds.stencil.back;
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP)) {
|
||
P_MTHD(p, NV9097, SET_STENCIL_OP_FAIL);
|
||
P_NV9097_SET_STENCIL_OP_FAIL(p, vk_to_nv9097_stencil_op(front->op.fail));
|
||
P_NV9097_SET_STENCIL_OP_ZFAIL(p, vk_to_nv9097_stencil_op(front->op.depth_fail));
|
||
P_NV9097_SET_STENCIL_OP_ZPASS(p, vk_to_nv9097_stencil_op(front->op.pass));
|
||
P_NV9097_SET_STENCIL_FUNC(p, vk_to_nv9097_compare_op(front->op.compare));
|
||
|
||
P_MTHD(p, NV9097, SET_BACK_STENCIL_OP_FAIL);
|
||
P_NV9097_SET_BACK_STENCIL_OP_FAIL(p, vk_to_nv9097_stencil_op(back->op.fail));
|
||
P_NV9097_SET_BACK_STENCIL_OP_ZFAIL(p, vk_to_nv9097_stencil_op(back->op.depth_fail));
|
||
P_NV9097_SET_BACK_STENCIL_OP_ZPASS(p, vk_to_nv9097_stencil_op(back->op.pass));
|
||
P_NV9097_SET_BACK_STENCIL_FUNC(p, vk_to_nv9097_compare_op(back->op.compare));
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK)) {
|
||
P_IMMD(p, NV9097, SET_STENCIL_FUNC_MASK, front->compare_mask);
|
||
P_IMMD(p, NV9097, SET_BACK_STENCIL_FUNC_MASK, back->compare_mask);
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
|
||
P_IMMD(p, NV9097, SET_STENCIL_MASK, front->write_mask);
|
||
P_IMMD(p, NV9097, SET_BACK_STENCIL_MASK, back->write_mask);
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) {
|
||
P_IMMD(p, NV9097, SET_STENCIL_FUNC_REF, front->reference);
|
||
P_IMMD(p, NV9097, SET_BACK_STENCIL_FUNC_REF, back->reference);
|
||
}
|
||
}
|
||
|
||
static uint32_t
|
||
vk_to_nv9097_logic_op(VkLogicOp vk_op)
|
||
{
|
||
ASSERTED uint16_t vk_to_nv9097[] = {
|
||
[VK_LOGIC_OP_CLEAR] = NV9097_SET_LOGIC_OP_FUNC_V_CLEAR,
|
||
[VK_LOGIC_OP_AND] = NV9097_SET_LOGIC_OP_FUNC_V_AND,
|
||
[VK_LOGIC_OP_AND_REVERSE] = NV9097_SET_LOGIC_OP_FUNC_V_AND_REVERSE,
|
||
[VK_LOGIC_OP_COPY] = NV9097_SET_LOGIC_OP_FUNC_V_COPY,
|
||
[VK_LOGIC_OP_AND_INVERTED] = NV9097_SET_LOGIC_OP_FUNC_V_AND_INVERTED,
|
||
[VK_LOGIC_OP_NO_OP] = NV9097_SET_LOGIC_OP_FUNC_V_NOOP,
|
||
[VK_LOGIC_OP_XOR] = NV9097_SET_LOGIC_OP_FUNC_V_XOR,
|
||
[VK_LOGIC_OP_OR] = NV9097_SET_LOGIC_OP_FUNC_V_OR,
|
||
[VK_LOGIC_OP_NOR] = NV9097_SET_LOGIC_OP_FUNC_V_NOR,
|
||
[VK_LOGIC_OP_EQUIVALENT] = NV9097_SET_LOGIC_OP_FUNC_V_EQUIV,
|
||
[VK_LOGIC_OP_INVERT] = NV9097_SET_LOGIC_OP_FUNC_V_INVERT,
|
||
[VK_LOGIC_OP_OR_REVERSE] = NV9097_SET_LOGIC_OP_FUNC_V_OR_REVERSE,
|
||
[VK_LOGIC_OP_COPY_INVERTED] = NV9097_SET_LOGIC_OP_FUNC_V_COPY_INVERTED,
|
||
[VK_LOGIC_OP_OR_INVERTED] = NV9097_SET_LOGIC_OP_FUNC_V_OR_INVERTED,
|
||
[VK_LOGIC_OP_NAND] = NV9097_SET_LOGIC_OP_FUNC_V_NAND,
|
||
[VK_LOGIC_OP_SET] = NV9097_SET_LOGIC_OP_FUNC_V_SET,
|
||
};
|
||
assert(vk_op < ARRAY_SIZE(vk_to_nv9097));
|
||
|
||
uint32_t nv9097_op = 0x1500 | vk_op;
|
||
assert(nv9097_op == vk_to_nv9097[vk_op]);
|
||
return nv9097_op;
|
||
}
|
||
|
||
static uint32_t
|
||
vk_to_nv9097_blend_op(VkBlendOp vk_op)
|
||
{
|
||
#define OP(vk, nv) [VK_BLEND_OP_##vk] = NV9097_SET_BLEND_COLOR_OP_V_OGL_##nv
|
||
ASSERTED uint16_t vk_to_nv9097[] = {
|
||
OP(ADD, FUNC_ADD),
|
||
OP(SUBTRACT, FUNC_SUBTRACT),
|
||
OP(REVERSE_SUBTRACT, FUNC_REVERSE_SUBTRACT),
|
||
OP(MIN, MIN),
|
||
OP(MAX, MAX),
|
||
};
|
||
assert(vk_op < ARRAY_SIZE(vk_to_nv9097));
|
||
#undef OP
|
||
|
||
return vk_to_nv9097[vk_op];
|
||
}
|
||
|
||
static uint32_t
|
||
vk_to_nv9097_blend_factor(VkBlendFactor vk_factor)
|
||
{
|
||
#define FACTOR(vk, nv) [VK_BLEND_FACTOR_##vk] = \
|
||
NV9097_SET_BLEND_COLOR_SOURCE_COEFF_V_##nv
|
||
ASSERTED uint16_t vk_to_nv9097[] = {
|
||
FACTOR(ZERO, OGL_ZERO),
|
||
FACTOR(ONE, OGL_ONE),
|
||
FACTOR(SRC_COLOR, OGL_SRC_COLOR),
|
||
FACTOR(ONE_MINUS_SRC_COLOR, OGL_ONE_MINUS_SRC_COLOR),
|
||
FACTOR(DST_COLOR, OGL_DST_COLOR),
|
||
FACTOR(ONE_MINUS_DST_COLOR, OGL_ONE_MINUS_DST_COLOR),
|
||
FACTOR(SRC_ALPHA, OGL_SRC_ALPHA),
|
||
FACTOR(ONE_MINUS_SRC_ALPHA, OGL_ONE_MINUS_SRC_ALPHA),
|
||
FACTOR(DST_ALPHA, OGL_DST_ALPHA),
|
||
FACTOR(ONE_MINUS_DST_ALPHA, OGL_ONE_MINUS_DST_ALPHA),
|
||
FACTOR(CONSTANT_COLOR, OGL_CONSTANT_COLOR),
|
||
FACTOR(ONE_MINUS_CONSTANT_COLOR, OGL_ONE_MINUS_CONSTANT_COLOR),
|
||
FACTOR(CONSTANT_ALPHA, OGL_CONSTANT_ALPHA),
|
||
FACTOR(ONE_MINUS_CONSTANT_ALPHA, OGL_ONE_MINUS_CONSTANT_ALPHA),
|
||
FACTOR(SRC_ALPHA_SATURATE, OGL_SRC_ALPHA_SATURATE),
|
||
FACTOR(SRC1_COLOR, OGL_SRC1COLOR),
|
||
FACTOR(ONE_MINUS_SRC1_COLOR, OGL_INVSRC1COLOR),
|
||
FACTOR(SRC1_ALPHA, OGL_SRC1ALPHA),
|
||
FACTOR(ONE_MINUS_SRC1_ALPHA, OGL_INVSRC1ALPHA),
|
||
};
|
||
assert(vk_factor < ARRAY_SIZE(vk_to_nv9097));
|
||
#undef FACTOR
|
||
|
||
return vk_to_nv9097[vk_factor];
|
||
}
|
||
|
||
void
|
||
nvk_mme_set_write_mask(struct mme_builder *b)
|
||
{
|
||
struct mme_value count = mme_load(b);
|
||
struct mme_value mask = mme_load(b);
|
||
|
||
/*
|
||
* mask is a bit field
|
||
*
|
||
* attachment index 88887777666655554444333322221111
|
||
* component abgrabgrabgrabgrabgrabgrabgrabgr
|
||
*/
|
||
|
||
struct mme_value common_mask = mme_mov(b, mme_imm(1));
|
||
struct mme_value first = mme_and(b, mask, mme_imm(BITFIELD_RANGE(0, 4)));
|
||
struct mme_value i = mme_mov(b, mme_zero());
|
||
|
||
mme_while(b, ine, i, count) {
|
||
/*
|
||
We call NV9097_SET_CT_WRITE per attachment. It needs a value as:
|
||
0x0000 0000 0000 0000 000a 000b 000g 000r
|
||
|
||
So for i=0 a mask of
|
||
0x0000 0000 0000 0000 0000 0000 0000 1111
|
||
becomes
|
||
0x0000 0000 0000 0000 0001 0001 0001 0001
|
||
*/
|
||
|
||
struct mme_value val = mme_merge(b, mme_zero(), mask, 0, 1, 0);
|
||
mme_merge_to(b, val, val, mask, 4, 1, 1);
|
||
mme_merge_to(b, val, val, mask, 8, 1, 2);
|
||
mme_merge_to(b, val, val, mask, 12, 1, 3);
|
||
|
||
mme_mthd_arr(b, NV9097_SET_CT_WRITE(0), i);
|
||
mme_emit(b, val);
|
||
mme_free_reg(b, val);
|
||
|
||
/* Check if all masks are common */
|
||
struct mme_value temp = mme_add(b, mask, mme_imm(BITFIELD_RANGE(0, 4)));
|
||
mme_if(b, ine, first, temp) {
|
||
mme_mov_to(b, common_mask, mme_zero());
|
||
}
|
||
mme_free_reg(b, temp);
|
||
|
||
mme_srl_to(b, mask, mask, mme_imm(4));
|
||
|
||
mme_add_to(b, i, i, mme_imm(1));
|
||
}
|
||
|
||
mme_mthd(b, NV9097_SET_SINGLE_CT_WRITE_CONTROL);
|
||
mme_emit(b, common_mask);
|
||
}
|
||
|
||
static void
|
||
nvk_flush_cb_state(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
struct nvk_rendering_state *render = &cmd->state.gfx.render;
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
struct nv_push *p =
|
||
nvk_cmd_buffer_push(cmd, 13 + 10 * render->color_att_count);
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE))
|
||
P_IMMD(p, NV9097, SET_LOGIC_OP, dyn->cb.logic_op_enable);
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP)) {
|
||
const uint32_t func = vk_to_nv9097_logic_op(dyn->cb.logic_op);
|
||
P_IMMD(p, NV9097, SET_LOGIC_OP_FUNC, func);
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES)) {
|
||
for (uint8_t a = 0; a < render->color_att_count; a++) {
|
||
P_IMMD(p, NV9097, SET_BLEND(a), dyn->cb.attachments[a].blend_enable);
|
||
}
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) {
|
||
for (uint8_t a = 0; a < render->color_att_count; a++) {
|
||
const struct vk_color_blend_attachment_state *att =
|
||
&dyn->cb.attachments[a];
|
||
P_MTHD(p, NV9097, SET_BLEND_PER_TARGET_SEPARATE_FOR_ALPHA(a));
|
||
P_NV9097_SET_BLEND_PER_TARGET_SEPARATE_FOR_ALPHA(p, a, ENABLE_TRUE);
|
||
P_NV9097_SET_BLEND_PER_TARGET_COLOR_OP(p, a,
|
||
vk_to_nv9097_blend_op(att->color_blend_op));
|
||
P_NV9097_SET_BLEND_PER_TARGET_COLOR_SOURCE_COEFF(p, a,
|
||
vk_to_nv9097_blend_factor(att->src_color_blend_factor));
|
||
P_NV9097_SET_BLEND_PER_TARGET_COLOR_DEST_COEFF(p, a,
|
||
vk_to_nv9097_blend_factor(att->dst_color_blend_factor));
|
||
P_NV9097_SET_BLEND_PER_TARGET_ALPHA_OP(p, a,
|
||
vk_to_nv9097_blend_op(att->alpha_blend_op));
|
||
P_NV9097_SET_BLEND_PER_TARGET_ALPHA_SOURCE_COEFF(p, a,
|
||
vk_to_nv9097_blend_factor(att->src_alpha_blend_factor));
|
||
P_NV9097_SET_BLEND_PER_TARGET_ALPHA_DEST_COEFF(p, a,
|
||
vk_to_nv9097_blend_factor(att->dst_alpha_blend_factor));
|
||
}
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) ||
|
||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) ||
|
||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_ATTACHMENTS)) {
|
||
uint32_t color_write_enables = 0x0;
|
||
for (uint8_t a = 0; a < render->color_att_count; a++) {
|
||
if (dyn->cb.color_write_enables & BITFIELD_BIT(a))
|
||
color_write_enables |= 0xf << (4 * a);
|
||
}
|
||
|
||
uint32_t cb_att_write_mask = 0x0;
|
||
for (uint8_t a = 0; a < render->color_att_count; a++)
|
||
cb_att_write_mask |= dyn->cb.attachments[a].write_mask << (a * 4);
|
||
|
||
uint32_t rp_att_write_mask = 0x0;
|
||
for (uint8_t a = 0; a < MESA_VK_MAX_COLOR_ATTACHMENTS; a++) {
|
||
if (dyn->rp.attachments & (MESA_VK_RP_ATTACHMENT_COLOR_0_BIT << a))
|
||
rp_att_write_mask |= 0xf << (4 * a);
|
||
}
|
||
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SET_WRITE_MASK));
|
||
P_INLINE_DATA(p, render->color_att_count);
|
||
P_INLINE_DATA(p, color_write_enables &
|
||
cb_att_write_mask &
|
||
rp_att_write_mask);
|
||
}
|
||
|
||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
|
||
P_MTHD(p, NV9097, SET_BLEND_CONST_RED);
|
||
P_NV9097_SET_BLEND_CONST_RED(p, fui(dyn->cb.blend_constants[0]));
|
||
P_NV9097_SET_BLEND_CONST_GREEN(p, fui(dyn->cb.blend_constants[1]));
|
||
P_NV9097_SET_BLEND_CONST_BLUE(p, fui(dyn->cb.blend_constants[2]));
|
||
P_NV9097_SET_BLEND_CONST_ALPHA(p, fui(dyn->cb.blend_constants[3]));
|
||
}
|
||
}
|
||
|
||
static void
|
||
nvk_flush_dynamic_state(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
if (!vk_dynamic_graphics_state_any_dirty(dyn))
|
||
return;
|
||
|
||
nvk_flush_vi_state(cmd);
|
||
nvk_flush_ia_state(cmd);
|
||
nvk_flush_ts_state(cmd);
|
||
nvk_flush_vp_state(cmd);
|
||
nvk_flush_rs_state(cmd);
|
||
|
||
/* MESA_VK_DYNAMIC_FSR */
|
||
|
||
nvk_flush_ms_state(cmd);
|
||
nvk_flush_ds_state(cmd);
|
||
nvk_flush_cb_state(cmd);
|
||
|
||
vk_dynamic_graphics_state_clear_dirty(dyn);
|
||
}
|
||
|
||
void
|
||
nvk_mme_bind_cbuf_desc(struct mme_builder *b)
|
||
{
|
||
/* First 4 bits are group, later bits are slot */
|
||
struct mme_value group_slot = mme_load(b);
|
||
|
||
if (b->devinfo->cls_eng3d >= TURING_A) {
|
||
struct mme_value64 addr = mme_load_addr64(b);
|
||
mme_tu104_read_fifoed(b, addr, mme_imm(3));
|
||
}
|
||
|
||
/* Load the descriptor */
|
||
struct mme_value addr_lo = mme_load(b);
|
||
struct mme_value addr_hi = mme_load(b);
|
||
struct mme_value size = mme_load(b);
|
||
|
||
struct mme_value cb = mme_alloc_reg(b);
|
||
mme_if(b, ieq, size, mme_zero()) {
|
||
/* Bottim bit is the valid bit, 8:4 are shader slot */
|
||
mme_merge_to(b, cb, mme_zero(), group_slot, 4, 5, 4);
|
||
}
|
||
|
||
mme_if(b, ine, size, mme_zero()) {
|
||
uint32_t alignment = nvk_min_cbuf_alignment(b->devinfo);
|
||
mme_add_to(b, size, size, mme_imm(alignment - 1));
|
||
mme_and_to(b, size, size, mme_imm(~(alignment - 1)));
|
||
|
||
/* size = max(size, NVK_MAX_CBUF_SIZE) */
|
||
assert(util_is_power_of_two_nonzero(NVK_MAX_CBUF_SIZE));
|
||
struct mme_value is_large =
|
||
mme_and(b, size, mme_imm(~(NVK_MAX_CBUF_SIZE - 1)));
|
||
mme_if(b, ine, is_large, mme_zero()) {
|
||
mme_mov_to(b, size, mme_imm(NVK_MAX_CBUF_SIZE));
|
||
}
|
||
|
||
mme_mthd(b, NV9097_SET_CONSTANT_BUFFER_SELECTOR_A);
|
||
mme_emit(b, size);
|
||
mme_emit(b, addr_hi);
|
||
mme_emit(b, addr_lo);
|
||
|
||
/* Bottim bit is the valid bit, 8:4 are shader slot */
|
||
mme_merge_to(b, cb, mme_imm(1), group_slot, 4, 5, 4);
|
||
}
|
||
|
||
mme_free_reg(b, addr_hi);
|
||
mme_free_reg(b, addr_lo);
|
||
mme_free_reg(b, size);
|
||
|
||
/* The group comes in the bottom 4 bits in group_slot and we need to
|
||
* combine it with the method. However, unlike most array methods with a
|
||
* stride if 1 dword, BIND_GROUP_CONSTANT_BUFFER has a stride of 32B or 8
|
||
* dwords. This means we need to also shift by 3.
|
||
*/
|
||
struct mme_value group = mme_merge(b, mme_imm(0), group_slot, 3, 4, 0);
|
||
mme_mthd_arr(b, NV9097_BIND_GROUP_CONSTANT_BUFFER(0), group);
|
||
mme_emit(b, cb);
|
||
}
|
||
|
||
static void
|
||
nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||
struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||
const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
|
||
struct nvk_descriptor_state *desc = &cmd->state.gfx.descriptors;
|
||
VkResult result;
|
||
|
||
nvk_cmd_buffer_flush_push_descriptors(cmd, desc);
|
||
|
||
/* pre Pascal the constant buffer sizes need to be 0x100 aligned. As we
|
||
* simply allocated a buffer and upload data to it, make sure its size is
|
||
* 0x100 aligned.
|
||
*/
|
||
STATIC_ASSERT((sizeof(desc->root) & 0xff) == 0);
|
||
assert(sizeof(desc->root) % min_cbuf_alignment == 0);
|
||
|
||
void *root_desc_map;
|
||
uint64_t root_desc_addr;
|
||
result = nvk_cmd_buffer_upload_alloc(cmd, sizeof(desc->root),
|
||
min_cbuf_alignment,
|
||
&root_desc_addr, &root_desc_map);
|
||
if (unlikely(result != VK_SUCCESS)) {
|
||
vk_command_buffer_set_error(&cmd->vk, result);
|
||
return;
|
||
}
|
||
|
||
desc->root.root_desc_addr = root_desc_addr;
|
||
memcpy(root_desc_map, &desc->root, sizeof(desc->root));
|
||
|
||
/* Find cbuf maps for the 5 cbuf groups */
|
||
const struct nvk_shader *cbuf_shaders[5] = { NULL, };
|
||
for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) {
|
||
const struct nvk_shader *shader = cmd->state.gfx.shaders[stage];
|
||
if (shader == NULL)
|
||
continue;
|
||
|
||
uint32_t group = nvk_cbuf_binding_for_stage(stage);
|
||
assert(group < ARRAY_SIZE(cbuf_shaders));
|
||
cbuf_shaders[group] = shader;
|
||
}
|
||
|
||
uint32_t root_cbuf_count = 0;
|
||
for (uint32_t group = 0; group < ARRAY_SIZE(cbuf_shaders); group++) {
|
||
if (cbuf_shaders[group] == NULL)
|
||
continue;
|
||
|
||
const struct nvk_shader *shader = cbuf_shaders[group];
|
||
const struct nvk_cbuf_map *cbuf_map = &shader->cbuf_map;
|
||
|
||
for (uint32_t c = 0; c < cbuf_map->cbuf_count; c++) {
|
||
const struct nvk_cbuf *cbuf = &cbuf_map->cbufs[c];
|
||
|
||
/* We bind these at the very end */
|
||
if (cbuf->type == NVK_CBUF_TYPE_ROOT_DESC) {
|
||
root_cbuf_count++;
|
||
continue;
|
||
}
|
||
|
||
struct nvk_buffer_address ba;
|
||
if (nvk_cmd_buffer_get_cbuf_descriptor(cmd, desc, shader, cbuf, &ba)) {
|
||
assert(ba.base_addr % min_cbuf_alignment == 0);
|
||
ba.size = align(ba.size, min_cbuf_alignment);
|
||
ba.size = MIN2(ba.size, NVK_MAX_CBUF_SIZE);
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 6);
|
||
|
||
if (ba.size > 0) {
|
||
P_MTHD(p, NV9097, SET_CONSTANT_BUFFER_SELECTOR_A);
|
||
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_A(p, ba.size);
|
||
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_B(p, ba.base_addr >> 32);
|
||
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_C(p, ba.base_addr);
|
||
}
|
||
|
||
P_IMMD(p, NV9097, BIND_GROUP_CONSTANT_BUFFER(group), {
|
||
.valid = ba.size > 0,
|
||
.shader_slot = c,
|
||
});
|
||
} else {
|
||
uint64_t desc_addr =
|
||
nvk_cmd_buffer_get_cbuf_descriptor_addr(cmd, desc, cbuf);
|
||
|
||
if (nvk_cmd_buffer_3d_cls(cmd) >= TURING_A) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
|
||
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_BIND_CBUF_DESC));
|
||
P_INLINE_DATA(p, group | (c << 4));
|
||
P_INLINE_DATA(p, desc_addr >> 32);
|
||
P_INLINE_DATA(p, desc_addr);
|
||
} else {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
|
||
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_BIND_CBUF_DESC));
|
||
P_INLINE_DATA(p, group | (c << 4));
|
||
|
||
nv_push_update_count(p, 3);
|
||
nvk_cmd_buffer_push_indirect(cmd, desc_addr, 3);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/* We bind all root descriptors last so that CONSTANT_BUFFER_SELECTOR is
|
||
* always left pointing at the root descriptor table. This way draw
|
||
* parameters and similar MME root table updates always hit the root
|
||
* descriptor table and not some random UBO.
|
||
*/
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4 + 2 * root_cbuf_count);
|
||
P_MTHD(p, NV9097, SET_CONSTANT_BUFFER_SELECTOR_A);
|
||
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_A(p, sizeof(desc->root));
|
||
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_B(p, root_desc_addr >> 32);
|
||
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_C(p, root_desc_addr);
|
||
|
||
for (uint32_t group = 0; group < ARRAY_SIZE(cbuf_shaders); group++) {
|
||
if (cbuf_shaders[group] == NULL)
|
||
continue;
|
||
|
||
const struct nvk_cbuf_map *cbuf_map = &cbuf_shaders[group]->cbuf_map;
|
||
|
||
for (uint32_t c = 0; c < cbuf_map->cbuf_count; c++) {
|
||
const struct nvk_cbuf *cbuf = &cbuf_map->cbufs[c];
|
||
if (cbuf->type == NVK_CBUF_TYPE_ROOT_DESC) {
|
||
P_IMMD(p, NV9097, BIND_GROUP_CONSTANT_BUFFER(group), {
|
||
.valid = VALID_TRUE,
|
||
.shader_slot = c,
|
||
});
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
static void
|
||
nvk_flush_gfx_state(struct nvk_cmd_buffer *cmd)
|
||
{
|
||
nvk_flush_shaders(cmd);
|
||
nvk_flush_dynamic_state(cmd);
|
||
nvk_flush_descriptors(cmd);
|
||
}
|
||
|
||
static uint32_t
|
||
vk_to_nv_index_format(VkIndexType type)
|
||
{
|
||
switch (type) {
|
||
case VK_INDEX_TYPE_UINT16:
|
||
return NVC597_SET_INDEX_BUFFER_E_INDEX_SIZE_TWO_BYTES;
|
||
case VK_INDEX_TYPE_UINT32:
|
||
return NVC597_SET_INDEX_BUFFER_E_INDEX_SIZE_FOUR_BYTES;
|
||
case VK_INDEX_TYPE_UINT8_KHR:
|
||
return NVC597_SET_INDEX_BUFFER_E_INDEX_SIZE_ONE_BYTE;
|
||
default:
|
||
unreachable("Invalid index type");
|
||
}
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdBindIndexBuffer2KHR(VkCommandBuffer commandBuffer,
|
||
VkBuffer _buffer,
|
||
VkDeviceSize offset,
|
||
VkDeviceSize size,
|
||
VkIndexType indexType)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
VK_FROM_HANDLE(nvk_buffer, buffer, _buffer);
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
|
||
|
||
uint64_t addr, range;
|
||
if (buffer != NULL && size > 0) {
|
||
addr = nvk_buffer_address(buffer, offset);
|
||
range = vk_buffer_range(&buffer->vk, offset, size);
|
||
} else {
|
||
range = addr = 0;
|
||
}
|
||
|
||
P_IMMD(p, NV9097, SET_DA_PRIMITIVE_RESTART_INDEX,
|
||
vk_index_to_restart(indexType));
|
||
|
||
P_MTHD(p, NV9097, SET_INDEX_BUFFER_A);
|
||
P_NV9097_SET_INDEX_BUFFER_A(p, addr >> 32);
|
||
P_NV9097_SET_INDEX_BUFFER_B(p, addr);
|
||
|
||
if (nvk_cmd_buffer_3d_cls(cmd) >= TURING_A) {
|
||
P_MTHD(p, NVC597, SET_INDEX_BUFFER_SIZE_A);
|
||
P_NVC597_SET_INDEX_BUFFER_SIZE_A(p, range >> 32);
|
||
P_NVC597_SET_INDEX_BUFFER_SIZE_B(p, range);
|
||
} else {
|
||
/* TODO: What about robust zero-size buffers? */
|
||
const uint64_t limit = range > 0 ? addr + range - 1 : 0;
|
||
P_MTHD(p, NV9097, SET_INDEX_BUFFER_C);
|
||
P_NV9097_SET_INDEX_BUFFER_C(p, limit >> 32);
|
||
P_NV9097_SET_INDEX_BUFFER_D(p, limit);
|
||
}
|
||
|
||
P_IMMD(p, NV9097, SET_INDEX_BUFFER_E, vk_to_nv_index_format(indexType));
|
||
}
|
||
|
||
void
|
||
nvk_cmd_bind_vertex_buffer(struct nvk_cmd_buffer *cmd, uint32_t vb_idx,
|
||
struct nvk_addr_range addr_range)
|
||
{
|
||
/* Used for meta save/restore */
|
||
if (vb_idx == 0)
|
||
cmd->state.gfx.vb0 = addr_range;
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 6);
|
||
|
||
P_MTHD(p, NV9097, SET_VERTEX_STREAM_A_LOCATION_A(vb_idx));
|
||
P_NV9097_SET_VERTEX_STREAM_A_LOCATION_A(p, vb_idx, addr_range.addr >> 32);
|
||
P_NV9097_SET_VERTEX_STREAM_A_LOCATION_B(p, vb_idx, addr_range.addr);
|
||
|
||
if (nvk_cmd_buffer_3d_cls(cmd) >= TURING_A) {
|
||
P_MTHD(p, NVC597, SET_VERTEX_STREAM_SIZE_A(vb_idx));
|
||
P_NVC597_SET_VERTEX_STREAM_SIZE_A(p, vb_idx, addr_range.range >> 32);
|
||
P_NVC597_SET_VERTEX_STREAM_SIZE_B(p, vb_idx, addr_range.range);
|
||
} else {
|
||
/* TODO: What about robust zero-size buffers? */
|
||
const uint64_t limit = addr_range.range > 0 ?
|
||
addr_range.addr + addr_range.range - 1 : 0;
|
||
P_MTHD(p, NV9097, SET_VERTEX_STREAM_LIMIT_A_A(vb_idx));
|
||
P_NV9097_SET_VERTEX_STREAM_LIMIT_A_A(p, vb_idx, limit >> 32);
|
||
P_NV9097_SET_VERTEX_STREAM_LIMIT_A_B(p, vb_idx, limit);
|
||
}
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer,
|
||
uint32_t firstBinding,
|
||
uint32_t bindingCount,
|
||
const VkBuffer *pBuffers,
|
||
const VkDeviceSize *pOffsets,
|
||
const VkDeviceSize *pSizes,
|
||
const VkDeviceSize *pStrides)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
|
||
if (pStrides) {
|
||
vk_cmd_set_vertex_binding_strides(&cmd->vk, firstBinding,
|
||
bindingCount, pStrides);
|
||
}
|
||
|
||
for (uint32_t i = 0; i < bindingCount; i++) {
|
||
VK_FROM_HANDLE(nvk_buffer, buffer, pBuffers[i]);
|
||
uint32_t idx = firstBinding + i;
|
||
|
||
uint64_t size = pSizes ? pSizes[i] : VK_WHOLE_SIZE;
|
||
const struct nvk_addr_range addr_range =
|
||
nvk_buffer_addr_range(buffer, pOffsets[i], size);
|
||
|
||
nvk_cmd_bind_vertex_buffer(cmd, idx, addr_range);
|
||
}
|
||
}
|
||
|
||
static uint32_t
|
||
vk_to_nv9097_primitive_topology(VkPrimitiveTopology prim)
|
||
{
|
||
switch (prim) {
|
||
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
|
||
return NV9097_BEGIN_OP_POINTS;
|
||
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
|
||
return NV9097_BEGIN_OP_LINES;
|
||
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
|
||
return NV9097_BEGIN_OP_LINE_STRIP;
|
||
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
|
||
#pragma GCC diagnostic push
|
||
#pragma GCC diagnostic ignored "-Wswitch"
|
||
case VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA:
|
||
#pragma GCC diagnostic pop
|
||
return NV9097_BEGIN_OP_TRIANGLES;
|
||
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
|
||
return NV9097_BEGIN_OP_TRIANGLE_STRIP;
|
||
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
|
||
return NV9097_BEGIN_OP_TRIANGLE_FAN;
|
||
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
|
||
return NV9097_BEGIN_OP_LINELIST_ADJCY;
|
||
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
|
||
return NV9097_BEGIN_OP_LINESTRIP_ADJCY;
|
||
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
|
||
return NV9097_BEGIN_OP_TRIANGLELIST_ADJCY;
|
||
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
|
||
return NV9097_BEGIN_OP_TRIANGLESTRIP_ADJCY;
|
||
case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
|
||
return NV9097_BEGIN_OP_PATCH;
|
||
default:
|
||
unreachable("Invalid primitive topology");
|
||
}
|
||
}
|
||
|
||
struct mme_draw_params {
|
||
struct mme_value base_vertex;
|
||
struct mme_value first_vertex;
|
||
struct mme_value first_instance;
|
||
struct mme_value draw_idx;
|
||
};
|
||
|
||
static void
|
||
nvk_mme_build_set_draw_params(struct mme_builder *b,
|
||
const struct mme_draw_params *p)
|
||
{
|
||
const uint32_t draw_params_offset = nvk_root_descriptor_offset(draw);
|
||
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
|
||
mme_emit(b, mme_imm(draw_params_offset));
|
||
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0));
|
||
mme_emit(b, p->first_vertex);
|
||
mme_emit(b, p->first_instance);
|
||
mme_emit(b, p->draw_idx);
|
||
mme_emit(b, mme_zero() /* view_index */);
|
||
|
||
mme_mthd(b, NV9097_SET_GLOBAL_BASE_VERTEX_INDEX);
|
||
mme_emit(b, p->base_vertex);
|
||
mme_mthd(b, NV9097_SET_VERTEX_ID_BASE);
|
||
mme_emit(b, p->base_vertex);
|
||
|
||
mme_mthd(b, NV9097_SET_GLOBAL_BASE_INSTANCE_INDEX);
|
||
mme_emit(b, p->first_instance);
|
||
}
|
||
|
||
static void
|
||
nvk_mme_emit_view_index(struct mme_builder *b, struct mme_value view_index)
|
||
{
|
||
/* Set the push constant */
|
||
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
|
||
mme_emit(b, mme_imm(nvk_root_descriptor_offset(draw.view_index)));
|
||
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0));
|
||
mme_emit(b, view_index);
|
||
|
||
/* Set the layer to the view index */
|
||
STATIC_ASSERT(DRF_LO(NV9097_SET_RT_LAYER_V) == 0);
|
||
STATIC_ASSERT(NV9097_SET_RT_LAYER_CONTROL_V_SELECTS_LAYER == 0);
|
||
mme_mthd(b, NV9097_SET_RT_LAYER);
|
||
mme_emit(b, view_index);
|
||
}
|
||
|
||
static void
|
||
nvk_mme_build_draw_loop(struct mme_builder *b,
|
||
struct mme_value instance_count,
|
||
struct mme_value first_vertex,
|
||
struct mme_value vertex_count)
|
||
{
|
||
struct mme_value begin = nvk_mme_load_scratch(b, DRAW_BEGIN);
|
||
|
||
mme_loop(b, instance_count) {
|
||
mme_mthd(b, NV9097_BEGIN);
|
||
mme_emit(b, begin);
|
||
|
||
mme_mthd(b, NV9097_SET_VERTEX_ARRAY_START);
|
||
mme_emit(b, first_vertex);
|
||
mme_emit(b, vertex_count);
|
||
|
||
mme_mthd(b, NV9097_END);
|
||
mme_emit(b, mme_zero());
|
||
|
||
mme_set_field_enum(b, begin, NV9097_BEGIN_INSTANCE_ID, SUBSEQUENT);
|
||
}
|
||
|
||
mme_free_reg(b, begin);
|
||
}
|
||
|
||
static void
|
||
nvk_mme_build_draw(struct mme_builder *b,
|
||
struct mme_value draw_idx)
|
||
{
|
||
/* These are in VkDrawIndirectCommand order */
|
||
struct mme_value vertex_count = mme_load(b);
|
||
struct mme_value instance_count = mme_load(b);
|
||
struct mme_value first_vertex = mme_load(b);
|
||
struct mme_value first_instance = mme_load(b);
|
||
|
||
struct mme_draw_params params = {
|
||
.first_vertex = first_vertex,
|
||
.first_instance = first_instance,
|
||
.draw_idx = draw_idx,
|
||
};
|
||
nvk_mme_build_set_draw_params(b, ¶ms);
|
||
|
||
mme_free_reg(b, first_instance);
|
||
|
||
if (b->devinfo->cls_eng3d < TURING_A)
|
||
nvk_mme_spill(b, DRAW_IDX, draw_idx);
|
||
|
||
struct mme_value view_mask = nvk_mme_load_scratch(b, VIEW_MASK);
|
||
mme_if(b, ieq, view_mask, mme_zero()) {
|
||
mme_free_reg(b, view_mask);
|
||
|
||
nvk_mme_build_draw_loop(b, instance_count,
|
||
first_vertex, vertex_count);
|
||
}
|
||
|
||
view_mask = nvk_mme_load_scratch(b, VIEW_MASK);
|
||
mme_if(b, ine, view_mask, mme_zero()) {
|
||
mme_free_reg(b, view_mask);
|
||
|
||
struct mme_value view = mme_mov(b, mme_zero());
|
||
mme_while(b, ine, view, mme_imm(32)) {
|
||
view_mask = nvk_mme_load_scratch(b, VIEW_MASK);
|
||
struct mme_value has_view = mme_bfe(b, view_mask, view, 1);
|
||
mme_free_reg(b, view_mask);
|
||
mme_if(b, ine, has_view, mme_zero()) {
|
||
mme_free_reg(b, has_view);
|
||
nvk_mme_emit_view_index(b, view);
|
||
nvk_mme_build_draw_loop(b, instance_count,
|
||
first_vertex, vertex_count);
|
||
}
|
||
|
||
mme_add_to(b, view, view, mme_imm(1));
|
||
}
|
||
mme_free_reg(b, view);
|
||
}
|
||
|
||
mme_free_reg(b, instance_count);
|
||
mme_free_reg(b, first_vertex);
|
||
mme_free_reg(b, vertex_count);
|
||
|
||
if (b->devinfo->cls_eng3d < TURING_A)
|
||
nvk_mme_unspill(b, DRAW_IDX, draw_idx);
|
||
}
|
||
|
||
void
|
||
nvk_mme_draw(struct mme_builder *b)
|
||
{
|
||
nvk_mme_load_to_scratch(b, DRAW_BEGIN);
|
||
struct mme_value draw_idx = mme_load(b);
|
||
|
||
nvk_mme_build_draw(b, draw_idx);
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdDraw(VkCommandBuffer commandBuffer,
|
||
uint32_t vertexCount,
|
||
uint32_t instanceCount,
|
||
uint32_t firstVertex,
|
||
uint32_t firstInstance)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
nvk_flush_gfx_state(cmd);
|
||
|
||
uint32_t begin;
|
||
V_NV9097_BEGIN(begin, {
|
||
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
|
||
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
|
||
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
|
||
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
|
||
});
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 7);
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW));
|
||
P_INLINE_DATA(p, begin);
|
||
P_INLINE_DATA(p, 0 /* draw_idx */);
|
||
P_INLINE_DATA(p, vertexCount);
|
||
P_INLINE_DATA(p, instanceCount);
|
||
P_INLINE_DATA(p, firstVertex);
|
||
P_INLINE_DATA(p, firstInstance);
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdDrawMultiEXT(VkCommandBuffer commandBuffer,
|
||
uint32_t drawCount,
|
||
const VkMultiDrawInfoEXT *pVertexInfo,
|
||
uint32_t instanceCount,
|
||
uint32_t firstInstance,
|
||
uint32_t stride)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
nvk_flush_gfx_state(cmd);
|
||
|
||
uint32_t begin;
|
||
V_NV9097_BEGIN(begin, {
|
||
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
|
||
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
|
||
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
|
||
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
|
||
});
|
||
|
||
for (uint32_t draw_idx = 0; draw_idx < drawCount; draw_idx++) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 7);
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW));
|
||
P_INLINE_DATA(p, begin);
|
||
P_INLINE_DATA(p, draw_idx);
|
||
P_INLINE_DATA(p, pVertexInfo->vertexCount);
|
||
P_INLINE_DATA(p, instanceCount);
|
||
P_INLINE_DATA(p, pVertexInfo->firstVertex);
|
||
P_INLINE_DATA(p, firstInstance);
|
||
|
||
pVertexInfo = ((void *)pVertexInfo) + stride;
|
||
}
|
||
}
|
||
|
||
static void
|
||
nvk_mme_build_draw_indexed_loop(struct mme_builder *b,
|
||
struct mme_value instance_count,
|
||
struct mme_value first_index,
|
||
struct mme_value index_count)
|
||
{
|
||
struct mme_value begin = nvk_mme_load_scratch(b, DRAW_BEGIN);
|
||
|
||
mme_loop(b, instance_count) {
|
||
mme_mthd(b, NV9097_BEGIN);
|
||
mme_emit(b, begin);
|
||
|
||
mme_mthd(b, NV9097_SET_INDEX_BUFFER_F);
|
||
mme_emit(b, first_index);
|
||
mme_emit(b, index_count);
|
||
|
||
mme_mthd(b, NV9097_END);
|
||
mme_emit(b, mme_zero());
|
||
|
||
mme_set_field_enum(b, begin, NV9097_BEGIN_INSTANCE_ID, SUBSEQUENT);
|
||
}
|
||
|
||
mme_free_reg(b, begin);
|
||
}
|
||
|
||
static void
|
||
nvk_mme_build_draw_indexed(struct mme_builder *b,
|
||
struct mme_value draw_idx)
|
||
{
|
||
/* These are in VkDrawIndexedIndirectCommand order */
|
||
struct mme_value index_count = mme_load(b);
|
||
struct mme_value instance_count = mme_load(b);
|
||
struct mme_value first_index = mme_load(b);
|
||
struct mme_value vertex_offset = mme_load(b);
|
||
struct mme_value first_instance = mme_load(b);
|
||
|
||
struct mme_draw_params params = {
|
||
.base_vertex = vertex_offset,
|
||
.first_vertex = vertex_offset,
|
||
.first_instance = first_instance,
|
||
.draw_idx = draw_idx,
|
||
};
|
||
nvk_mme_build_set_draw_params(b, ¶ms);
|
||
|
||
mme_free_reg(b, vertex_offset);
|
||
mme_free_reg(b, first_instance);
|
||
|
||
if (b->devinfo->cls_eng3d < TURING_A)
|
||
nvk_mme_spill(b, DRAW_IDX, draw_idx);
|
||
|
||
struct mme_value view_mask = nvk_mme_load_scratch(b, VIEW_MASK);
|
||
mme_if(b, ieq, view_mask, mme_zero()) {
|
||
mme_free_reg(b, view_mask);
|
||
|
||
nvk_mme_build_draw_indexed_loop(b, instance_count,
|
||
first_index, index_count);
|
||
}
|
||
|
||
view_mask = nvk_mme_load_scratch(b, VIEW_MASK);
|
||
mme_if(b, ine, view_mask, mme_zero()) {
|
||
mme_free_reg(b, view_mask);
|
||
|
||
struct mme_value view = mme_mov(b, mme_zero());
|
||
mme_while(b, ine, view, mme_imm(32)) {
|
||
view_mask = nvk_mme_load_scratch(b, VIEW_MASK);
|
||
struct mme_value has_view = mme_bfe(b, view_mask, view, 1);
|
||
mme_free_reg(b, view_mask);
|
||
mme_if(b, ine, has_view, mme_zero()) {
|
||
mme_free_reg(b, has_view);
|
||
nvk_mme_emit_view_index(b, view);
|
||
nvk_mme_build_draw_indexed_loop(b, instance_count,
|
||
first_index, index_count);
|
||
}
|
||
|
||
mme_add_to(b, view, view, mme_imm(1));
|
||
}
|
||
mme_free_reg(b, view);
|
||
}
|
||
|
||
mme_free_reg(b, instance_count);
|
||
mme_free_reg(b, first_index);
|
||
mme_free_reg(b, index_count);
|
||
|
||
if (b->devinfo->cls_eng3d < TURING_A)
|
||
nvk_mme_unspill(b, DRAW_IDX, draw_idx);
|
||
}
|
||
|
||
void
|
||
nvk_mme_draw_indexed(struct mme_builder *b)
|
||
{
|
||
nvk_mme_load_to_scratch(b, DRAW_BEGIN);
|
||
struct mme_value draw_idx = mme_load(b);
|
||
|
||
nvk_mme_build_draw_indexed(b, draw_idx);
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdDrawIndexed(VkCommandBuffer commandBuffer,
|
||
uint32_t indexCount,
|
||
uint32_t instanceCount,
|
||
uint32_t firstIndex,
|
||
int32_t vertexOffset,
|
||
uint32_t firstInstance)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
nvk_flush_gfx_state(cmd);
|
||
|
||
uint32_t begin;
|
||
V_NV9097_BEGIN(begin, {
|
||
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
|
||
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
|
||
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
|
||
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
|
||
});
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW_INDEXED));
|
||
P_INLINE_DATA(p, begin);
|
||
P_INLINE_DATA(p, 0 /* draw_idx */);
|
||
P_INLINE_DATA(p, indexCount);
|
||
P_INLINE_DATA(p, instanceCount);
|
||
P_INLINE_DATA(p, firstIndex);
|
||
P_INLINE_DATA(p, vertexOffset);
|
||
P_INLINE_DATA(p, firstInstance);
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer,
|
||
uint32_t drawCount,
|
||
const VkMultiDrawIndexedInfoEXT *pIndexInfo,
|
||
uint32_t instanceCount,
|
||
uint32_t firstInstance,
|
||
uint32_t stride,
|
||
const int32_t *pVertexOffset)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
nvk_flush_gfx_state(cmd);
|
||
|
||
uint32_t begin;
|
||
V_NV9097_BEGIN(begin, {
|
||
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
|
||
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
|
||
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
|
||
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
|
||
});
|
||
|
||
for (uint32_t draw_idx = 0; draw_idx < drawCount; draw_idx++) {
|
||
const uint32_t vertex_offset =
|
||
pVertexOffset != NULL ? *pVertexOffset : pIndexInfo->vertexOffset;
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW_INDEXED));
|
||
P_INLINE_DATA(p, begin);
|
||
P_INLINE_DATA(p, draw_idx);
|
||
P_INLINE_DATA(p, pIndexInfo->indexCount);
|
||
P_INLINE_DATA(p, instanceCount);
|
||
P_INLINE_DATA(p, pIndexInfo->firstIndex);
|
||
P_INLINE_DATA(p, vertex_offset);
|
||
P_INLINE_DATA(p, firstInstance);
|
||
|
||
pIndexInfo = ((void *)pIndexInfo) + stride;
|
||
}
|
||
}
|
||
|
||
void
|
||
nvk_mme_draw_indirect(struct mme_builder *b)
|
||
{
|
||
nvk_mme_load_to_scratch(b, DRAW_BEGIN);
|
||
|
||
if (b->devinfo->cls_eng3d >= TURING_A) {
|
||
struct mme_value64 draw_addr = mme_load_addr64(b);
|
||
struct mme_value draw_count = mme_load(b);
|
||
struct mme_value stride = mme_load(b);
|
||
|
||
struct mme_value draw = mme_mov(b, mme_zero());
|
||
mme_while(b, ult, draw, draw_count) {
|
||
mme_tu104_read_fifoed(b, draw_addr, mme_imm(4));
|
||
|
||
nvk_mme_build_draw(b, draw);
|
||
|
||
mme_add_to(b, draw, draw, mme_imm(1));
|
||
mme_add64_to(b, draw_addr, draw_addr, mme_value64(stride, mme_zero()));
|
||
}
|
||
} else {
|
||
struct mme_value draw_count = mme_load(b);
|
||
nvk_mme_load_to_scratch(b, DRAW_PAD_DW);
|
||
|
||
struct mme_value draw = mme_mov(b, mme_zero());
|
||
mme_while(b, ine, draw, draw_count) {
|
||
nvk_mme_spill(b, DRAW_COUNT, draw_count);
|
||
|
||
nvk_mme_build_draw(b, draw);
|
||
mme_add_to(b, draw, draw, mme_imm(1));
|
||
|
||
struct mme_value pad_dw = nvk_mme_load_scratch(b, DRAW_PAD_DW);
|
||
mme_loop(b, pad_dw) {
|
||
mme_free_reg(b, mme_load(b));
|
||
}
|
||
mme_free_reg(b, pad_dw);
|
||
|
||
nvk_mme_unspill(b, DRAW_COUNT, draw_count);
|
||
}
|
||
}
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdDrawIndirect(VkCommandBuffer commandBuffer,
|
||
VkBuffer _buffer,
|
||
VkDeviceSize offset,
|
||
uint32_t drawCount,
|
||
uint32_t stride)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
VK_FROM_HANDLE(nvk_buffer, buffer, _buffer);
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
/* From the Vulkan 1.3.238 spec:
|
||
*
|
||
* VUID-vkCmdDrawIndirect-drawCount-00476
|
||
*
|
||
* "If drawCount is greater than 1, stride must be a multiple of 4 and
|
||
* must be greater than or equal to sizeof(VkDrawIndirectCommand)"
|
||
*
|
||
* and
|
||
*
|
||
* "If drawCount is less than or equal to one, stride is ignored."
|
||
*/
|
||
if (drawCount > 1) {
|
||
assert(stride % 4 == 0);
|
||
assert(stride >= sizeof(VkDrawIndirectCommand));
|
||
} else {
|
||
stride = sizeof(VkDrawIndirectCommand);
|
||
}
|
||
|
||
nvk_flush_gfx_state(cmd);
|
||
|
||
uint32_t begin;
|
||
V_NV9097_BEGIN(begin, {
|
||
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
|
||
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
|
||
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
|
||
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
|
||
});
|
||
|
||
if (nvk_cmd_buffer_3d_cls(cmd) >= TURING_A) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 6);
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW_INDIRECT));
|
||
P_INLINE_DATA(p, begin);
|
||
uint64_t draw_addr = nvk_buffer_address(buffer, offset);
|
||
P_INLINE_DATA(p, draw_addr >> 32);
|
||
P_INLINE_DATA(p, draw_addr);
|
||
P_INLINE_DATA(p, drawCount);
|
||
P_INLINE_DATA(p, stride);
|
||
} else {
|
||
const uint32_t max_draws_per_push =
|
||
((NV_PUSH_MAX_COUNT - 3) * 4) / stride;
|
||
|
||
uint64_t draw_addr = nvk_buffer_address(buffer, offset);
|
||
while (drawCount) {
|
||
const uint32_t count = MIN2(drawCount, max_draws_per_push);
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW_INDIRECT));
|
||
P_INLINE_DATA(p, begin);
|
||
P_INLINE_DATA(p, count);
|
||
P_INLINE_DATA(p, (stride - sizeof(VkDrawIndirectCommand)) / 4);
|
||
|
||
uint64_t range = count * (uint64_t)stride;
|
||
nv_push_update_count(p, range / 4);
|
||
nvk_cmd_buffer_push_indirect(cmd, draw_addr, range);
|
||
|
||
draw_addr += range;
|
||
drawCount -= count;
|
||
}
|
||
}
|
||
}
|
||
|
||
void
|
||
nvk_mme_draw_indexed_indirect(struct mme_builder *b)
|
||
{
|
||
nvk_mme_load_to_scratch(b, DRAW_BEGIN);
|
||
|
||
if (b->devinfo->cls_eng3d >= TURING_A) {
|
||
struct mme_value64 draw_addr = mme_load_addr64(b);
|
||
struct mme_value draw_count = mme_load(b);
|
||
struct mme_value stride = mme_load(b);
|
||
|
||
struct mme_value draw = mme_mov(b, mme_zero());
|
||
mme_while(b, ult, draw, draw_count) {
|
||
mme_tu104_read_fifoed(b, draw_addr, mme_imm(5));
|
||
|
||
nvk_mme_build_draw_indexed(b, draw);
|
||
|
||
mme_add_to(b, draw, draw, mme_imm(1));
|
||
mme_add64_to(b, draw_addr, draw_addr, mme_value64(stride, mme_zero()));
|
||
}
|
||
} else {
|
||
struct mme_value draw_count = mme_load(b);
|
||
nvk_mme_load_to_scratch(b, DRAW_PAD_DW);
|
||
|
||
struct mme_value draw = mme_mov(b, mme_zero());
|
||
mme_while(b, ine, draw, draw_count) {
|
||
nvk_mme_spill(b, DRAW_COUNT, draw_count);
|
||
|
||
nvk_mme_build_draw_indexed(b, draw);
|
||
mme_add_to(b, draw, draw, mme_imm(1));
|
||
|
||
struct mme_value pad_dw = nvk_mme_load_scratch(b, DRAW_PAD_DW);
|
||
mme_loop(b, pad_dw) {
|
||
mme_free_reg(b, mme_load(b));
|
||
}
|
||
mme_free_reg(b, pad_dw);
|
||
|
||
nvk_mme_unspill(b, DRAW_COUNT, draw_count);
|
||
}
|
||
}
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
|
||
VkBuffer _buffer,
|
||
VkDeviceSize offset,
|
||
uint32_t drawCount,
|
||
uint32_t stride)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
VK_FROM_HANDLE(nvk_buffer, buffer, _buffer);
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
/* From the Vulkan 1.3.238 spec:
|
||
*
|
||
* VUID-vkCmdDrawIndexedIndirect-drawCount-00528
|
||
*
|
||
* "If drawCount is greater than 1, stride must be a multiple of 4 and
|
||
* must be greater than or equal to sizeof(VkDrawIndexedIndirectCommand)"
|
||
*
|
||
* and
|
||
*
|
||
* "If drawCount is less than or equal to one, stride is ignored."
|
||
*/
|
||
if (drawCount > 1) {
|
||
assert(stride % 4 == 0);
|
||
assert(stride >= sizeof(VkDrawIndexedIndirectCommand));
|
||
} else {
|
||
stride = sizeof(VkDrawIndexedIndirectCommand);
|
||
}
|
||
|
||
nvk_flush_gfx_state(cmd);
|
||
|
||
uint32_t begin;
|
||
V_NV9097_BEGIN(begin, {
|
||
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
|
||
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
|
||
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
|
||
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
|
||
});
|
||
|
||
if (nvk_cmd_buffer_3d_cls(cmd) >= TURING_A) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 6);
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW_INDEXED_INDIRECT));
|
||
P_INLINE_DATA(p, begin);
|
||
uint64_t draw_addr = nvk_buffer_address(buffer, offset);
|
||
P_INLINE_DATA(p, draw_addr >> 32);
|
||
P_INLINE_DATA(p, draw_addr);
|
||
P_INLINE_DATA(p, drawCount);
|
||
P_INLINE_DATA(p, stride);
|
||
} else {
|
||
const uint32_t max_draws_per_push =
|
||
((NV_PUSH_MAX_COUNT - 3) * 4) / stride;
|
||
|
||
uint64_t draw_addr = nvk_buffer_address(buffer, offset);
|
||
while (drawCount) {
|
||
const uint32_t count = MIN2(drawCount, max_draws_per_push);
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW_INDEXED_INDIRECT));
|
||
P_INLINE_DATA(p, begin);
|
||
P_INLINE_DATA(p, count);
|
||
P_INLINE_DATA(p, (stride - sizeof(VkDrawIndexedIndirectCommand)) / 4);
|
||
|
||
uint64_t range = count * (uint64_t)stride;
|
||
nv_push_update_count(p, range / 4);
|
||
nvk_cmd_buffer_push_indirect(cmd, draw_addr, range);
|
||
|
||
draw_addr += range;
|
||
drawCount -= count;
|
||
}
|
||
}
|
||
}
|
||
|
||
void
|
||
nvk_mme_draw_indirect_count(struct mme_builder *b)
|
||
{
|
||
if (b->devinfo->cls_eng3d < TURING_A)
|
||
return;
|
||
|
||
nvk_mme_load_to_scratch(b, DRAW_BEGIN);
|
||
|
||
struct mme_value64 draw_addr = mme_load_addr64(b);
|
||
struct mme_value64 draw_count_addr = mme_load_addr64(b);
|
||
struct mme_value draw_max = mme_load(b);
|
||
struct mme_value stride = mme_load(b);
|
||
|
||
mme_tu104_read_fifoed(b, draw_count_addr, mme_imm(1));
|
||
mme_free_reg64(b, draw_count_addr);
|
||
struct mme_value draw_count_buf = mme_load(b);
|
||
|
||
mme_if(b, ule, draw_count_buf, draw_max) {
|
||
mme_mov_to(b, draw_max, draw_count_buf);
|
||
}
|
||
mme_free_reg(b, draw_count_buf);
|
||
|
||
struct mme_value draw = mme_mov(b, mme_zero());
|
||
mme_while(b, ult, draw, draw_max) {
|
||
mme_tu104_read_fifoed(b, draw_addr, mme_imm(4));
|
||
|
||
nvk_mme_build_draw(b, draw);
|
||
|
||
mme_add_to(b, draw, draw, mme_imm(1));
|
||
mme_add64_to(b, draw_addr, draw_addr, mme_value64(stride, mme_zero()));
|
||
}
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
|
||
VkBuffer _buffer,
|
||
VkDeviceSize offset,
|
||
VkBuffer countBuffer,
|
||
VkDeviceSize countBufferOffset,
|
||
uint32_t maxDrawCount,
|
||
uint32_t stride)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
VK_FROM_HANDLE(nvk_buffer, buffer, _buffer);
|
||
VK_FROM_HANDLE(nvk_buffer, count_buffer, countBuffer);
|
||
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
/* TODO: Indirect count draw pre-Turing */
|
||
assert(nvk_cmd_buffer_3d_cls(cmd) >= TURING_A);
|
||
|
||
nvk_flush_gfx_state(cmd);
|
||
|
||
uint32_t begin;
|
||
V_NV9097_BEGIN(begin, {
|
||
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
|
||
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
|
||
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
|
||
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
|
||
});
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW_INDIRECT_COUNT));
|
||
P_INLINE_DATA(p, begin);
|
||
uint64_t draw_addr = nvk_buffer_address(buffer, offset);
|
||
P_INLINE_DATA(p, draw_addr >> 32);
|
||
P_INLINE_DATA(p, draw_addr);
|
||
uint64_t draw_count_addr = nvk_buffer_address(count_buffer,
|
||
countBufferOffset);
|
||
P_INLINE_DATA(p, draw_count_addr >> 32);
|
||
P_INLINE_DATA(p, draw_count_addr);
|
||
P_INLINE_DATA(p, maxDrawCount);
|
||
P_INLINE_DATA(p, stride);
|
||
}
|
||
|
||
void
|
||
nvk_mme_draw_indexed_indirect_count(struct mme_builder *b)
|
||
{
|
||
if (b->devinfo->cls_eng3d < TURING_A)
|
||
return;
|
||
|
||
nvk_mme_load_to_scratch(b, DRAW_BEGIN);
|
||
|
||
struct mme_value64 draw_addr = mme_load_addr64(b);
|
||
struct mme_value64 draw_count_addr = mme_load_addr64(b);
|
||
struct mme_value draw_max = mme_load(b);
|
||
struct mme_value stride = mme_load(b);
|
||
|
||
mme_tu104_read_fifoed(b, draw_count_addr, mme_imm(1));
|
||
mme_free_reg64(b, draw_count_addr);
|
||
struct mme_value draw_count_buf = mme_load(b);
|
||
|
||
mme_if(b, ule, draw_count_buf, draw_max) {
|
||
mme_mov_to(b, draw_max, draw_count_buf);
|
||
}
|
||
mme_free_reg(b, draw_count_buf);
|
||
|
||
struct mme_value draw = mme_mov(b, mme_zero());
|
||
mme_while(b, ult, draw, draw_max) {
|
||
mme_tu104_read_fifoed(b, draw_addr, mme_imm(5));
|
||
|
||
nvk_mme_build_draw_indexed(b, draw);
|
||
|
||
mme_add_to(b, draw, draw, mme_imm(1));
|
||
mme_add64_to(b, draw_addr, draw_addr, mme_value64(stride, mme_zero()));
|
||
}
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
|
||
VkBuffer _buffer,
|
||
VkDeviceSize offset,
|
||
VkBuffer countBuffer,
|
||
VkDeviceSize countBufferOffset,
|
||
uint32_t maxDrawCount,
|
||
uint32_t stride)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
VK_FROM_HANDLE(nvk_buffer, buffer, _buffer);
|
||
VK_FROM_HANDLE(nvk_buffer, count_buffer, countBuffer);
|
||
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
/* TODO: Indexed indirect count draw pre-Turing */
|
||
assert(nvk_cmd_buffer_3d_cls(cmd) >= TURING_A);
|
||
|
||
nvk_flush_gfx_state(cmd);
|
||
|
||
uint32_t begin;
|
||
V_NV9097_BEGIN(begin, {
|
||
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
|
||
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
|
||
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
|
||
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
|
||
});
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DRAW_INDEXED_INDIRECT_COUNT));
|
||
P_INLINE_DATA(p, begin);
|
||
uint64_t draw_addr = nvk_buffer_address(buffer, offset);
|
||
P_INLINE_DATA(p, draw_addr >> 32);
|
||
P_INLINE_DATA(p, draw_addr);
|
||
uint64_t draw_count_addr = nvk_buffer_address(count_buffer,
|
||
countBufferOffset);
|
||
P_INLINE_DATA(p, draw_count_addr >> 32);
|
||
P_INLINE_DATA(p, draw_count_addr);
|
||
P_INLINE_DATA(p, maxDrawCount);
|
||
P_INLINE_DATA(p, stride);
|
||
}
|
||
|
||
static void
|
||
nvk_mme_xfb_draw_indirect_loop(struct mme_builder *b,
|
||
struct mme_value instance_count,
|
||
struct mme_value counter)
|
||
{
|
||
struct mme_value begin = nvk_mme_load_scratch(b, DRAW_BEGIN);
|
||
|
||
mme_loop(b, instance_count) {
|
||
mme_mthd(b, NV9097_BEGIN);
|
||
mme_emit(b, begin);
|
||
|
||
mme_mthd(b, NV9097_DRAW_AUTO);
|
||
mme_emit(b, counter);
|
||
|
||
mme_mthd(b, NV9097_END);
|
||
mme_emit(b, mme_zero());
|
||
|
||
mme_set_field_enum(b, begin, NV9097_BEGIN_INSTANCE_ID, SUBSEQUENT);
|
||
}
|
||
|
||
mme_free_reg(b, begin);
|
||
}
|
||
|
||
void
|
||
nvk_mme_xfb_draw_indirect(struct mme_builder *b)
|
||
{
|
||
nvk_mme_load_to_scratch(b, DRAW_BEGIN);
|
||
|
||
struct mme_value instance_count = mme_load(b);
|
||
struct mme_value first_instance = mme_load(b);
|
||
|
||
if (b->devinfo->cls_eng3d >= TURING_A) {
|
||
struct mme_value64 counter_addr = mme_load_addr64(b);
|
||
mme_tu104_read_fifoed(b, counter_addr, mme_imm(1));
|
||
mme_free_reg(b, counter_addr.lo);
|
||
mme_free_reg(b, counter_addr.hi);
|
||
}
|
||
struct mme_value counter = mme_load(b);
|
||
|
||
struct mme_draw_params params = {
|
||
.first_instance = first_instance,
|
||
};
|
||
nvk_mme_build_set_draw_params(b, ¶ms);
|
||
|
||
mme_free_reg(b, first_instance);
|
||
|
||
struct mme_value view_mask = nvk_mme_load_scratch(b, VIEW_MASK);
|
||
mme_if(b, ieq, view_mask, mme_zero()) {
|
||
mme_free_reg(b, view_mask);
|
||
|
||
nvk_mme_xfb_draw_indirect_loop(b, instance_count, counter);
|
||
}
|
||
|
||
view_mask = nvk_mme_load_scratch(b, VIEW_MASK);
|
||
mme_if(b, ine, view_mask, mme_zero()) {
|
||
mme_free_reg(b, view_mask);
|
||
|
||
struct mme_value view = mme_mov(b, mme_zero());
|
||
mme_while(b, ine, view, mme_imm(32)) {
|
||
view_mask = nvk_mme_load_scratch(b, VIEW_MASK);
|
||
struct mme_value has_view = mme_bfe(b, view_mask, view, 1);
|
||
mme_free_reg(b, view_mask);
|
||
mme_if(b, ine, has_view, mme_zero()) {
|
||
mme_free_reg(b, has_view);
|
||
nvk_mme_emit_view_index(b, view);
|
||
nvk_mme_xfb_draw_indirect_loop(b, instance_count, counter);
|
||
}
|
||
|
||
mme_add_to(b, view, view, mme_imm(1));
|
||
}
|
||
}
|
||
|
||
mme_free_reg(b, instance_count);
|
||
mme_free_reg(b, counter);
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer,
|
||
uint32_t instanceCount,
|
||
uint32_t firstInstance,
|
||
VkBuffer counterBuffer,
|
||
VkDeviceSize counterBufferOffset,
|
||
uint32_t counterOffset,
|
||
uint32_t vertexStride)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
VK_FROM_HANDLE(nvk_buffer, counter_buffer, counterBuffer);
|
||
const struct vk_dynamic_graphics_state *dyn =
|
||
&cmd->vk.dynamic_graphics_state;
|
||
|
||
nvk_flush_gfx_state(cmd);
|
||
|
||
uint32_t begin;
|
||
V_NV9097_BEGIN(begin, {
|
||
.op = vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology),
|
||
.primitive_id = NV9097_BEGIN_PRIMITIVE_ID_FIRST,
|
||
.instance_id = NV9097_BEGIN_INSTANCE_ID_FIRST,
|
||
.split_mode = SPLIT_MODE_NORMAL_BEGIN_NORMAL_END,
|
||
});
|
||
|
||
uint64_t counter_addr = nvk_buffer_address(counter_buffer,
|
||
counterBufferOffset);
|
||
|
||
if (nvk_cmd_buffer_3d_cls(cmd) >= TURING_A) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
|
||
P_IMMD(p, NV9097, SET_DRAW_AUTO_START, counterOffset);
|
||
P_IMMD(p, NV9097, SET_DRAW_AUTO_STRIDE, vertexStride);
|
||
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_XFB_DRAW_INDIRECT));
|
||
P_INLINE_DATA(p, begin);
|
||
P_INLINE_DATA(p, instanceCount);
|
||
P_INLINE_DATA(p, firstInstance);
|
||
P_INLINE_DATA(p, counter_addr >> 32);
|
||
P_INLINE_DATA(p, counter_addr);
|
||
} else {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 9);
|
||
P_IMMD(p, NV9097, SET_DRAW_AUTO_START, counterOffset);
|
||
P_IMMD(p, NV9097, SET_DRAW_AUTO_STRIDE, vertexStride);
|
||
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_XFB_DRAW_INDIRECT));
|
||
P_INLINE_DATA(p, begin);
|
||
P_INLINE_DATA(p, instanceCount);
|
||
P_INLINE_DATA(p, firstInstance);
|
||
nv_push_update_count(p, 1);
|
||
nvk_cmd_buffer_push_indirect(cmd, counter_addr, 4);
|
||
}
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer,
|
||
uint32_t firstBinding,
|
||
uint32_t bindingCount,
|
||
const VkBuffer *pBuffers,
|
||
const VkDeviceSize *pOffsets,
|
||
const VkDeviceSize *pSizes)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
|
||
for (uint32_t i = 0; i < bindingCount; i++) {
|
||
VK_FROM_HANDLE(nvk_buffer, buffer, pBuffers[i]);
|
||
uint32_t idx = firstBinding + i;
|
||
uint64_t size = pSizes ? pSizes[i] : VK_WHOLE_SIZE;
|
||
struct nvk_addr_range addr_range =
|
||
nvk_buffer_addr_range(buffer, pOffsets[i], size);
|
||
assert(addr_range.range <= UINT32_MAX);
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 5);
|
||
|
||
P_MTHD(p, NV9097, SET_STREAM_OUT_BUFFER_ENABLE(idx));
|
||
P_NV9097_SET_STREAM_OUT_BUFFER_ENABLE(p, idx, V_TRUE);
|
||
P_NV9097_SET_STREAM_OUT_BUFFER_ADDRESS_A(p, idx, addr_range.addr >> 32);
|
||
P_NV9097_SET_STREAM_OUT_BUFFER_ADDRESS_B(p, idx, addr_range.addr);
|
||
P_NV9097_SET_STREAM_OUT_BUFFER_SIZE(p, idx, (uint32_t)addr_range.range);
|
||
}
|
||
|
||
// TODO: do we need to SET_STREAM_OUT_BUFFER_ENABLE V_FALSE ?
|
||
}
|
||
|
||
void
|
||
nvk_mme_xfb_counter_load(struct mme_builder *b)
|
||
{
|
||
struct mme_value buffer = mme_load(b);
|
||
|
||
struct mme_value counter;
|
||
if (b->devinfo->cls_eng3d >= TURING_A) {
|
||
struct mme_value64 counter_addr = mme_load_addr64(b);
|
||
|
||
mme_tu104_read_fifoed(b, counter_addr, mme_imm(1));
|
||
mme_free_reg(b, counter_addr.lo);
|
||
mme_free_reg(b, counter_addr.hi);
|
||
|
||
counter = mme_load(b);
|
||
} else {
|
||
counter = mme_load(b);
|
||
}
|
||
|
||
mme_mthd_arr(b, NV9097_SET_STREAM_OUT_BUFFER_LOAD_WRITE_POINTER(0), buffer);
|
||
mme_emit(b, counter);
|
||
|
||
mme_free_reg(b, counter);
|
||
mme_free_reg(b, buffer);
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer,
|
||
uint32_t firstCounterBuffer,
|
||
uint32_t counterBufferCount,
|
||
const VkBuffer *pCounterBuffers,
|
||
const VkDeviceSize *pCounterBufferOffsets)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
const uint32_t max_buffers = 4;
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2 + 2 * max_buffers);
|
||
|
||
P_IMMD(p, NV9097, SET_STREAM_OUTPUT, ENABLE_TRUE);
|
||
for (uint32_t i = 0; i < max_buffers; ++i) {
|
||
P_IMMD(p, NV9097, SET_STREAM_OUT_BUFFER_LOAD_WRITE_POINTER(i), 0);
|
||
}
|
||
|
||
for (uint32_t i = 0; i < counterBufferCount; ++i) {
|
||
if (pCounterBuffers[i] == VK_NULL_HANDLE)
|
||
continue;
|
||
|
||
VK_FROM_HANDLE(nvk_buffer, buffer, pCounterBuffers[i]);
|
||
// index of counter buffer corresponts to index of transform buffer
|
||
uint32_t cb_idx = firstCounterBuffer + i;
|
||
uint64_t offset = pCounterBufferOffsets ? pCounterBufferOffsets[i] : 0;
|
||
uint64_t cb_addr = nvk_buffer_address(buffer, offset);
|
||
|
||
if (nvk_cmd_buffer_3d_cls(cmd) >= TURING_A) {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_XFB_COUNTER_LOAD));
|
||
/* The STREAM_OUT_BUFFER_LOAD_WRITE_POINTER registers are 8 dword stride */
|
||
P_INLINE_DATA(p, cb_idx * 8);
|
||
P_INLINE_DATA(p, cb_addr >> 32);
|
||
P_INLINE_DATA(p, cb_addr);
|
||
} else {
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
|
||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_XFB_COUNTER_LOAD));
|
||
P_INLINE_DATA(p, cb_idx);
|
||
nv_push_update_count(p, 1);
|
||
nvk_cmd_buffer_push_indirect(cmd, cb_addr, 4);
|
||
}
|
||
}
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer,
|
||
uint32_t firstCounterBuffer,
|
||
uint32_t counterBufferCount,
|
||
const VkBuffer *pCounterBuffers,
|
||
const VkDeviceSize *pCounterBufferOffsets)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 5 * counterBufferCount + 2);
|
||
|
||
P_IMMD(p, NV9097, SET_STREAM_OUTPUT, ENABLE_FALSE);
|
||
|
||
for (uint32_t i = 0; i < counterBufferCount; ++i) {
|
||
if (pCounterBuffers[i] == VK_NULL_HANDLE)
|
||
continue;
|
||
|
||
VK_FROM_HANDLE(nvk_buffer, buffer, pCounterBuffers[i]);
|
||
// index of counter buffer corresponts to index of transform buffer
|
||
uint32_t cb_idx = firstCounterBuffer + i;
|
||
uint64_t offset = pCounterBufferOffsets ? pCounterBufferOffsets[i] : 0;
|
||
uint64_t cb_addr = nvk_buffer_address(buffer, offset);
|
||
|
||
P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
|
||
P_NV9097_SET_REPORT_SEMAPHORE_A(p, cb_addr >> 32);
|
||
P_NV9097_SET_REPORT_SEMAPHORE_B(p, cb_addr);
|
||
P_NV9097_SET_REPORT_SEMAPHORE_C(p, 0);
|
||
P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
|
||
.operation = OPERATION_REPORT_ONLY,
|
||
.pipeline_location = PIPELINE_LOCATION_STREAMING_OUTPUT,
|
||
.report = REPORT_STREAMING_BYTE_COUNT,
|
||
.sub_report = cb_idx,
|
||
.structure_size = STRUCTURE_SIZE_ONE_WORD,
|
||
});
|
||
}
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdBeginConditionalRenderingEXT(VkCommandBuffer commandBuffer,
|
||
const VkConditionalRenderingBeginInfoEXT *pConditionalRenderingBegin)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
VK_FROM_HANDLE(nvk_buffer, buffer, pConditionalRenderingBegin->buffer);
|
||
|
||
uint64_t addr = nvk_buffer_address(buffer, pConditionalRenderingBegin->offset);
|
||
bool inverted = pConditionalRenderingBegin->flags &
|
||
VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
|
||
|
||
/* From the Vulkan 1.3.280 spec:
|
||
*
|
||
* "If the 32-bit value at offset in buffer memory is zero,
|
||
* then the rendering commands are discarded,
|
||
* otherwise they are executed as normal."
|
||
*
|
||
* The hardware compare a 64-bit value, as such we are required to copy it.
|
||
*/
|
||
uint64_t tmp_addr;
|
||
VkResult result = nvk_cmd_buffer_cond_render_alloc(cmd, &tmp_addr);
|
||
if (result != VK_SUCCESS) {
|
||
vk_command_buffer_set_error(&cmd->vk, result);
|
||
return;
|
||
}
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 26);
|
||
|
||
P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
|
||
P_NV90B5_OFFSET_IN_UPPER(p, addr >> 32);
|
||
P_NV90B5_OFFSET_IN_LOWER(p, addr & 0xffffffff);
|
||
P_NV90B5_OFFSET_OUT_UPPER(p, tmp_addr >> 32);
|
||
P_NV90B5_OFFSET_OUT_LOWER(p, tmp_addr & 0xffffffff);
|
||
P_NV90B5_PITCH_IN(p, 4);
|
||
P_NV90B5_PITCH_OUT(p, 4);
|
||
P_NV90B5_LINE_LENGTH_IN(p, 4);
|
||
P_NV90B5_LINE_COUNT(p, 1);
|
||
|
||
P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, {
|
||
.dst_x = DST_X_SRC_X,
|
||
.dst_y = DST_Y_SRC_X,
|
||
.dst_z = DST_Z_NO_WRITE,
|
||
.dst_w = DST_W_NO_WRITE,
|
||
.component_size = COMPONENT_SIZE_ONE,
|
||
.num_src_components = NUM_SRC_COMPONENTS_ONE,
|
||
.num_dst_components = NUM_DST_COMPONENTS_TWO,
|
||
});
|
||
|
||
P_IMMD(p, NV90B5, LAUNCH_DMA, {
|
||
.data_transfer_type = DATA_TRANSFER_TYPE_PIPELINED,
|
||
.multi_line_enable = MULTI_LINE_ENABLE_TRUE,
|
||
.flush_enable = FLUSH_ENABLE_TRUE,
|
||
.src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
|
||
.dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
|
||
.remap_enable = REMAP_ENABLE_TRUE,
|
||
});
|
||
|
||
P_MTHD(p, NV9097, SET_RENDER_ENABLE_A);
|
||
P_NV9097_SET_RENDER_ENABLE_A(p, tmp_addr >> 32);
|
||
P_NV9097_SET_RENDER_ENABLE_B(p, tmp_addr & 0xfffffff0);
|
||
P_NV9097_SET_RENDER_ENABLE_C(p, inverted ? MODE_RENDER_IF_EQUAL : MODE_RENDER_IF_NOT_EQUAL);
|
||
|
||
P_MTHD(p, NV90C0, SET_RENDER_ENABLE_A);
|
||
P_NV90C0_SET_RENDER_ENABLE_A(p, tmp_addr >> 32);
|
||
P_NV90C0_SET_RENDER_ENABLE_B(p, tmp_addr & 0xfffffff0);
|
||
P_NV90C0_SET_RENDER_ENABLE_C(p, inverted ? MODE_RENDER_IF_EQUAL : MODE_RENDER_IF_NOT_EQUAL);
|
||
}
|
||
|
||
VKAPI_ATTR void VKAPI_CALL
|
||
nvk_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer)
|
||
{
|
||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||
|
||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 12);
|
||
P_MTHD(p, NV9097, SET_RENDER_ENABLE_A);
|
||
P_NV9097_SET_RENDER_ENABLE_A(p, 0);
|
||
P_NV9097_SET_RENDER_ENABLE_B(p, 0);
|
||
P_NV9097_SET_RENDER_ENABLE_C(p, MODE_TRUE);
|
||
|
||
P_MTHD(p, NV90C0, SET_RENDER_ENABLE_A);
|
||
P_NV90C0_SET_RENDER_ENABLE_A(p, 0);
|
||
P_NV90C0_SET_RENDER_ENABLE_B(p, 0);
|
||
P_NV90C0_SET_RENDER_ENABLE_C(p, MODE_TRUE);
|
||
}
|