intel: Rename Genx keyword to Gfxx
Commands used to do the changes: export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965" grep -E "Gen[[:digit:]]+" -rIl $SEARCH_PATH | xargs sed -ie "s/Gen\([[:digit:]]\+\)/Gfx\1/g" Exclude changes in src/intel/perf/oa-*.xml: find src/intel/perf -type f \( -name "*.xml" \) | xargs sed -ie "s/Gfx/Gen/g" Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>
This commit is contained in:
parent
b75f095bc7
commit
1d296484b4
|
@ -299,7 +299,7 @@ tex_cache_flush_hack(struct iris_batch *batch,
|
|||
* If the BO hasn't been referenced yet this batch, we assume that the
|
||||
* texture cache doesn't contain any relevant data nor need flushing.
|
||||
*
|
||||
* Icelake (Gen11+) claims to fix this issue, but seems to still have
|
||||
* Icelake (Gfx11+) claims to fix this issue, but seems to still have
|
||||
* issues with ASTC formats.
|
||||
*/
|
||||
bool need_flush = devinfo->ver >= 11 ?
|
||||
|
|
|
@ -1425,7 +1425,7 @@ iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd,
|
|||
bo->external = true;
|
||||
bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
|
||||
|
||||
/* From the Bspec, Memory Compression - Gen12:
|
||||
/* From the Bspec, Memory Compression - Gfx12:
|
||||
*
|
||||
* The base address for the surface has to be 64K page aligned and the
|
||||
* surface is expected to be padded in the virtual domain to be 4 4K
|
||||
|
|
|
@ -77,7 +77,7 @@ iris_format_for_usage(const struct gen_device_info *devinfo,
|
|||
}
|
||||
|
||||
/* We choose RGBA over RGBX for rendering the hardware doesn't support
|
||||
* rendering to RGBX. However, when this internal override is used on Gen9+,
|
||||
* rendering to RGBX. However, when this internal override is used on Gfx9+,
|
||||
* fast clears don't work correctly.
|
||||
*
|
||||
* i965 fixes this by pretending to not support RGBX formats, and the higher
|
||||
|
@ -209,7 +209,7 @@ iris_is_format_supported(struct pipe_screen *pscreen,
|
|||
format == ISL_FORMAT_R32_UINT;
|
||||
}
|
||||
|
||||
/* TODO: Support ASTC 5x5 on Gen9 properly. This means implementing
|
||||
/* TODO: Support ASTC 5x5 on Gfx9 properly. This means implementing
|
||||
* a complex sampler workaround (see i965's gfx9_apply_astc5x5_wa_flush).
|
||||
* Without it, st/mesa will emulate ASTC 5x5 via uncompressed textures.
|
||||
*/
|
||||
|
|
|
@ -62,12 +62,12 @@ iris_emit_pipe_control_flush(struct iris_batch *batch,
|
|||
if ((flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
|
||||
(flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
|
||||
/* A pipe control command with flush and invalidate bits set
|
||||
* simultaneously is an inherently racy operation on Gen6+ if the
|
||||
* simultaneously is an inherently racy operation on Gfx6+ if the
|
||||
* contents of the flushed caches were intended to become visible from
|
||||
* any of the invalidated caches. Split it in two PIPE_CONTROLs, the
|
||||
* first one should stall the pipeline to make sure that the flushed R/W
|
||||
* caches are coherent with memory once the specified R/O caches are
|
||||
* invalidated. On pre-Gen6 hardware the (implicit) R/O cache
|
||||
* invalidated. On pre-Gfx6 hardware the (implicit) R/O cache
|
||||
* invalidation seems to happen at the bottom of the pipeline together
|
||||
* with any write cache flush, so this shouldn't be a concern. In order
|
||||
* to ensure a full stall, we do an end-of-pipe sync.
|
||||
|
|
|
@ -811,7 +811,7 @@ iris_setup_binding_table(const struct gen_device_info *devinfo,
|
|||
BITFIELD64_MASK(num_render_targets);
|
||||
|
||||
/* Setup render target read surface group in order to support non-coherent
|
||||
* framebuffer fetch on Gen8
|
||||
* framebuffer fetch on Gfx8
|
||||
*/
|
||||
if (devinfo->ver == 8 && info->outputs_read) {
|
||||
bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
|
||||
|
@ -1760,12 +1760,12 @@ iris_compile_fs(struct iris_screen *screen,
|
|||
|
||||
/* Lower output variables to load_output intrinsics before setting up
|
||||
* binding tables, so iris_setup_binding_table can map any load_output
|
||||
* intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gen8 for
|
||||
* intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
|
||||
* non-coherent framebuffer fetches.
|
||||
*/
|
||||
brw_nir_lower_fs_outputs(nir);
|
||||
|
||||
/* On Gen11+, shader RT write messages have a "Null Render Target" bit
|
||||
/* On Gfx11+, shader RT write messages have a "Null Render Target" bit
|
||||
* and do not need a binding table entry with a null surface. Earlier
|
||||
* generations need an entry for a null surface.
|
||||
*/
|
||||
|
@ -2162,9 +2162,9 @@ iris_get_scratch_space(struct iris_context *ice,
|
|||
* According to the other driver team, this applies to compute shaders
|
||||
* as well. This is not currently documented at all.
|
||||
*
|
||||
* This hack is no longer necessary on Gen11+.
|
||||
* This hack is no longer necessary on Gfx11+.
|
||||
*
|
||||
* For, Gen11+, scratch space allocation is based on the number of threads
|
||||
* For, Gfx11+, scratch space allocation is based on the number of threads
|
||||
* in the base configuration.
|
||||
*/
|
||||
unsigned subslice_total = screen->subslice_total;
|
||||
|
|
|
@ -538,7 +538,7 @@ iris_hiz_exec(struct iris_context *ice,
|
|||
* enabled must be issued before the rectangle primitive used for
|
||||
* the depth buffer clear operation."
|
||||
*
|
||||
* Same applies for Gen8 and Gen9.
|
||||
* Same applies for Gfx8 and Gfx9.
|
||||
*/
|
||||
iris_emit_pipe_control_flush(batch,
|
||||
"hiz op: pre-flush",
|
||||
|
@ -848,7 +848,7 @@ iris_resource_texture_aux_usage(struct iris_context *ice,
|
|||
0, INTEL_REMAINING_LAYERS))
|
||||
return ISL_AUX_USAGE_NONE;
|
||||
|
||||
/* On Gen9 color buffers may be compressed by the hardware (lossless
|
||||
/* On Gfx9 color buffers may be compressed by the hardware (lossless
|
||||
* compression). There are, however, format restrictions and care needs
|
||||
* to be taken that the sampler engine is capable for re-interpreting a
|
||||
* buffer with format different the buffer was originally written with.
|
||||
|
|
|
@ -291,7 +291,7 @@ iris_image_view_get_format(struct iris_context *ice,
|
|||
iris_format_for_usage(devinfo, img->format, usage).fmt;
|
||||
|
||||
if (img->shader_access & PIPE_IMAGE_ACCESS_READ) {
|
||||
/* On Gen8, try to use typed surfaces reads (which support a
|
||||
/* On Gfx8, try to use typed surfaces reads (which support a
|
||||
* limited number of formats), and if not possible, fall back
|
||||
* to untyped reads.
|
||||
*/
|
||||
|
@ -733,7 +733,7 @@ iris_resource_configure_aux(struct iris_screen *screen,
|
|||
* A CCS value of 0 indicates that the corresponding block is in the
|
||||
* pass-through state which is what we want.
|
||||
*
|
||||
* For CCS_D, do the same thing. On Gen9+, this avoids having any
|
||||
* For CCS_D, do the same thing. On Gfx9+, this avoids having any
|
||||
* undefined bits in the aux buffer.
|
||||
*/
|
||||
if (imported) {
|
||||
|
|
|
@ -633,7 +633,7 @@ emit_pipeline_select(struct iris_batch *batch, uint32_t pipeline)
|
|||
* 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
|
||||
* with Pipeline Select set to GPGPU.
|
||||
*
|
||||
* The internal hardware docs recommend the same workaround for Gen9
|
||||
* The internal hardware docs recommend the same workaround for Gfx9
|
||||
* hardware too.
|
||||
*/
|
||||
if (pipeline == GPGPU)
|
||||
|
@ -817,7 +817,7 @@ iris_enable_obj_preemption(struct iris_batch *batch, bool enable)
|
|||
*
|
||||
* The equations above apply if \p flip is equal to 0, if it is equal to 1 p_0
|
||||
* and p_1 will be swapped for the result. Note that in the context of pixel
|
||||
* pipe hashing this can be always 0 on Gen12 platforms, since the hardware
|
||||
* pipe hashing this can be always 0 on Gfx12 platforms, since the hardware
|
||||
* transparently remaps logical indices found on the table to physical pixel
|
||||
* pipe indices from the highest to lowest EU count.
|
||||
*/
|
||||
|
@ -885,7 +885,7 @@ gfx12_upload_pixel_hashing_tables(struct iris_batch *batch)
|
|||
ppipes_of[n] += (devinfo->ppipe_subslices[p] == n);
|
||||
}
|
||||
|
||||
/* Gen12 has three pixel pipes. */
|
||||
/* Gfx12 has three pixel pipes. */
|
||||
assert(ppipes_of[0] + ppipes_of[1] + ppipes_of[2] == 3);
|
||||
|
||||
if (ppipes_of[2] == 3 || ppipes_of[0] == 2) {
|
||||
|
@ -1389,7 +1389,7 @@ struct iris_depth_stencil_alpha_state {
|
|||
bool depth_writes_enabled;
|
||||
bool stencil_writes_enabled;
|
||||
|
||||
/** Outbound to Gen8-9 PMA stall equations */
|
||||
/** Outbound to Gfx8-9 PMA stall equations */
|
||||
bool depth_test_enabled;
|
||||
};
|
||||
|
||||
|
@ -1518,12 +1518,12 @@ want_pma_fix(struct iris_context *ice)
|
|||
const struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
|
||||
const struct iris_blend_state *cso_blend = ice->state.cso_blend;
|
||||
|
||||
/* In very specific combinations of state, we can instruct Gen8-9 hardware
|
||||
/* In very specific combinations of state, we can instruct Gfx8-9 hardware
|
||||
* to avoid stalling at the pixel mask array. The state equations are
|
||||
* documented in these places:
|
||||
*
|
||||
* - Gen8 Depth PMA Fix: CACHE_MODE_1::NP_PMA_FIX_ENABLE
|
||||
* - Gen9 Stencil PMA Fix: CACHE_MODE_0::STC PMA Optimization Enable
|
||||
* - Gfx8 Depth PMA Fix: CACHE_MODE_1::NP_PMA_FIX_ENABLE
|
||||
* - Gfx9 Stencil PMA Fix: CACHE_MODE_0::STC PMA Optimization Enable
|
||||
*
|
||||
* Both equations share some common elements:
|
||||
*
|
||||
|
@ -1602,7 +1602,7 @@ want_pma_fix(struct iris_context *ice)
|
|||
bool killpixels = wm_prog_data->uses_kill || wm_prog_data->uses_omask ||
|
||||
cso_blend->alpha_to_coverage || cso_zsa->alpha_enabled;
|
||||
|
||||
/* The Gen8 depth PMA equation becomes:
|
||||
/* The Gfx8 depth PMA equation becomes:
|
||||
*
|
||||
* depth_writes =
|
||||
* 3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
|
||||
|
@ -1646,7 +1646,7 @@ genX(update_pma_fix)(struct iris_context *ice,
|
|||
* emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
|
||||
* prior to the LRI. If stencil buffer writes are enabled, then a Render * Cache Flush is also necessary.
|
||||
*
|
||||
* The Gen9 docs say to use a depth stall rather than a command streamer
|
||||
* The Gfx9 docs say to use a depth stall rather than a command streamer
|
||||
* stall. However, the hardware seems to violently disagree. A full
|
||||
* command streamer stall seems to be needed in both cases.
|
||||
*/
|
||||
|
@ -1666,7 +1666,7 @@ genX(update_pma_fix)(struct iris_context *ice,
|
|||
* Flush bits is often necessary. We do it regardless because it's easier.
|
||||
* The render cache flush is also necessary if stencil writes are enabled.
|
||||
*
|
||||
* Again, the Gen9 docs give a different set of flushes but the Broadwell
|
||||
* Again, the Gfx9 docs give a different set of flushes but the Broadwell
|
||||
* flushes seem to work just as well.
|
||||
*/
|
||||
iris_emit_pipe_control_flush(batch, "PMA fix change (1/2)",
|
||||
|
@ -5806,7 +5806,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
|
|||
#endif
|
||||
|
||||
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
|
||||
/* Gen9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted
|
||||
/* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted
|
||||
* in order to commit constants. TODO: Investigate "Disable Gather
|
||||
* at Set Shader" to go back to legacy mode...
|
||||
*/
|
||||
|
@ -5912,7 +5912,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
|
|||
* SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch
|
||||
* mode."
|
||||
*
|
||||
* 16x MSAA only exists on Gen9+, so we can skip this on Gen8.
|
||||
* 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8.
|
||||
*/
|
||||
if (GFX_VER >= 9 && cso_fb->samples == 16 &&
|
||||
!wm_prog_data->persample_dispatch) {
|
||||
|
@ -6236,7 +6236,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
|
|||
if (GFX_VER >= 12) {
|
||||
/* GEN:BUG:1408224581
|
||||
*
|
||||
* Workaround: Gen12LP Astep only An additional pipe control with
|
||||
* Workaround: Gfx12LP Astep only An additional pipe control with
|
||||
* post-sync = store dword operation would be required.( w/a is to
|
||||
* have an additional pipe control after the stencil state whenever
|
||||
* the surface state bits of this state is changing).
|
||||
|
@ -6344,7 +6344,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
|
|||
|
||||
if (count) {
|
||||
#if GFX_VER >= 11
|
||||
/* Gen11+ doesn't need the cache workaround below */
|
||||
/* Gfx11+ doesn't need the cache workaround below */
|
||||
uint64_t bound = dynamic_bound;
|
||||
while (bound) {
|
||||
const int i = u_bit_scan64(&bound);
|
||||
|
@ -6888,7 +6888,7 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
|
|||
|
||||
|
||||
if (stage_dirty & IRIS_STAGE_DIRTY_CS) {
|
||||
/* The MEDIA_VFE_STATE documentation for Gen8+ says:
|
||||
/* The MEDIA_VFE_STATE documentation for Gfx8+ says:
|
||||
*
|
||||
* "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
|
||||
* the only bits that are changed are scoreboard related: Scoreboard
|
||||
|
@ -7424,7 +7424,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
|
|||
*
|
||||
* The same text exists a few rows below for Post Sync Op.
|
||||
*
|
||||
* On Gen12 this is GEN:BUG:1607156449.
|
||||
* On Gfx12 this is GEN:BUG:1607156449.
|
||||
*/
|
||||
iris_emit_raw_pipe_control(batch,
|
||||
"workaround: CS stall before gpgpu post-sync",
|
||||
|
@ -7458,7 +7458,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
|
|||
*
|
||||
* This seems like nonsense. An Ivybridge workaround requires us to
|
||||
* emit a PIPE_CONTROL with a depth stall and write immediate post-sync
|
||||
* operation. Gen8+ requires us to emit depth stalls and depth cache
|
||||
* operation. Gfx8+ requires us to emit depth stalls and depth cache
|
||||
* flushes together. So, it's hard to imagine this means anything other
|
||||
* than "we originally intended this to be used for PS_DEPTH_COUNT".
|
||||
*
|
||||
|
@ -7489,7 +7489,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
|
|||
* We assert that the caller doesn't do this combination, to try and
|
||||
* prevent mistakes. It shouldn't hurt the GPU, though.
|
||||
*
|
||||
* We skip this check on Gen11+ as the "Stall at Pixel Scoreboard"
|
||||
* We skip this check on Gfx11+ as the "Stall at Pixel Scoreboard"
|
||||
* and "Render Target Flush" combo is explicitly required for BTI
|
||||
* update workarounds.
|
||||
*/
|
||||
|
@ -7791,7 +7791,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
|
|||
|
||||
#if GFX_VER == 9
|
||||
/**
|
||||
* Preemption on Gen9 has to be enabled or disabled in various cases.
|
||||
* Preemption on Gfx9 has to be enabled or disabled in various cases.
|
||||
*
|
||||
* See these workarounds for preemption:
|
||||
* - WaDisableMidObjectPreemptionForGSLineStripAdj
|
||||
|
@ -7799,7 +7799,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
|
|||
* - WaDisableMidObjectPreemptionForLineLoop
|
||||
* - WA#0798
|
||||
*
|
||||
* We don't put this in the vtable because it's only used on Gen9.
|
||||
* We don't put this in the vtable because it's only used on Gfx9.
|
||||
*/
|
||||
void
|
||||
gfx9_toggle_preemption(struct iris_context *ice,
|
||||
|
@ -7904,7 +7904,7 @@ genX(emit_hashing_mode)(struct iris_context *ice, struct iris_batch *batch,
|
|||
#if GFX_VER == 9
|
||||
const struct gen_device_info *devinfo = &batch->screen->devinfo;
|
||||
const unsigned slice_hashing[] = {
|
||||
/* Because all Gen9 platforms with more than one slice require
|
||||
/* Because all Gfx9 platforms with more than one slice require
|
||||
* three-way subslice hashing, a single "normal" 16x16 slice hashing
|
||||
* block is guaranteed to suffer from substantial imbalance, with one
|
||||
* subslice receiving twice as much work as the other two in the
|
||||
|
@ -7912,7 +7912,7 @@ genX(emit_hashing_mode)(struct iris_context *ice, struct iris_batch *batch,
|
|||
*
|
||||
* The performance impact of that would be particularly severe when
|
||||
* three-way hashing is also in use for slice balancing (which is the
|
||||
* case for all Gen9 GT4 platforms), because one of the slices
|
||||
* case for all Gfx9 GT4 platforms), because one of the slices
|
||||
* receives one every three 16x16 blocks in either direction, which
|
||||
* is roughly the periodicity of the underlying subslice imbalance
|
||||
* pattern ("roughly" because in reality the hardware's
|
||||
|
|
|
@ -6,7 +6,7 @@ Missing features:
|
|||
- Splitting larger-than-max blits (Jordan)
|
||||
- Bit-for-bit copies (Jason)
|
||||
- Depth and Stencil clears
|
||||
- Gen4-5 support
|
||||
- Gfx4-5 support
|
||||
|
||||
Performance:
|
||||
|
||||
|
|
|
@ -280,7 +280,7 @@ blorp_ensure_sf_program(struct blorp_batch *batch,
|
|||
const struct brw_wm_prog_data *wm_prog_data = params->wm_prog_data;
|
||||
assert(params->wm_prog_data);
|
||||
|
||||
/* Gen6+ doesn't need a strips and fans program */
|
||||
/* Gfx6+ doesn't need a strips and fans program */
|
||||
if (blorp->compiler->devinfo->ver >= 6)
|
||||
return true;
|
||||
|
||||
|
|
|
@ -1203,7 +1203,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
|
|||
|
||||
dst_pos = blorp_blit_get_frag_coords(&b, key, &v);
|
||||
|
||||
/* Render target and texture hardware don't support W tiling until Gen8. */
|
||||
/* Render target and texture hardware don't support W tiling until Gfx8. */
|
||||
const bool rt_tiled_w = false;
|
||||
const bool tex_tiled_w = devinfo->ver >= 8 && key->src_tiled_w;
|
||||
|
||||
|
@ -1375,7 +1375,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
|
|||
nir_imm_float(&b, 0.5f));
|
||||
color = blorp_nir_tex(&b, &v, key, src_pos);
|
||||
} else {
|
||||
/* Gen7+ hardware doesn't automaticaly blend. */
|
||||
/* Gfx7+ hardware doesn't automaticaly blend. */
|
||||
color = blorp_nir_combine_samples(&b, &v, src_pos, key->src_samples,
|
||||
key->tex_aux_usage,
|
||||
key->texture_data_type,
|
||||
|
@ -1648,7 +1648,7 @@ blorp_surf_retile_w_to_y(const struct isl_device *isl_dev,
|
|||
}
|
||||
|
||||
if (isl_dev->info->ver == 6) {
|
||||
/* Gen6 stencil buffers have a very large alignment coming in from the
|
||||
/* Gfx6 stencil buffers have a very large alignment coming in from the
|
||||
* miptree. It's out-of-bounds for what the surface state can handle.
|
||||
* Since we have a single layer and level, it doesn't really matter as
|
||||
* long as we don't pass a bogus value into isl_surf_fill_state().
|
||||
|
@ -1806,10 +1806,10 @@ try_blorp_blit(struct blorp_batch *batch,
|
|||
|
||||
if (params->dst.surf.usage & ISL_SURF_USAGE_DEPTH_BIT) {
|
||||
if (devinfo->ver >= 7) {
|
||||
/* We can render as depth on Gen5 but there's no real advantage since
|
||||
* it doesn't support MSAA or HiZ. On Gen4, we can't always render
|
||||
/* We can render as depth on Gfx5 but there's no real advantage since
|
||||
* it doesn't support MSAA or HiZ. On Gfx4, we can't always render
|
||||
* to depth due to issues with depth buffers and mip-mapping. On
|
||||
* Gen6, we can do everything but we have weird offsetting for HiZ
|
||||
* Gfx6, we can do everything but we have weird offsetting for HiZ
|
||||
* and stencil. It's easier to just render using the color pipe
|
||||
* on those platforms.
|
||||
*/
|
||||
|
@ -2025,7 +2025,7 @@ try_blorp_blit(struct blorp_batch *batch,
|
|||
if ((wm_prog_key->filter == BLORP_FILTER_AVERAGE ||
|
||||
wm_prog_key->filter == BLORP_FILTER_BILINEAR) &&
|
||||
batch->blorp->isl_dev->info->ver <= 6) {
|
||||
/* Gen4-5 don't support non-normalized texture coordinates */
|
||||
/* Gfx4-5 don't support non-normalized texture coordinates */
|
||||
wm_prog_key->src_coords_normalized = true;
|
||||
params->wm_inputs.src_inv_size[0] =
|
||||
1.0f / minify(params->src.surf.logical_level0_px.width,
|
||||
|
@ -2674,7 +2674,7 @@ blorp_copy(struct blorp_batch *batch,
|
|||
params.dst.view.format = params.src.surf.format;
|
||||
} else if ((params.dst.surf.usage & ISL_SURF_USAGE_DEPTH_BIT) &&
|
||||
isl_dev->info->ver >= 7) {
|
||||
/* On Gen7 and higher, we use actual depth writes for blits into depth
|
||||
/* On Gfx7 and higher, we use actual depth writes for blits into depth
|
||||
* buffers so we need the real format.
|
||||
*/
|
||||
params.src.view.format = params.dst.surf.format;
|
||||
|
|
|
@ -808,7 +808,7 @@ blorp_can_hiz_clear_depth(const struct gen_device_info *devinfo,
|
|||
/* We have to set the WM_HZ_OP::FullSurfaceDepthandStencilClear bit
|
||||
* whenever we clear an uninitialized HIZ buffer (as some drivers
|
||||
* currently do). However, this bit seems liable to clear 16x8 pixels in
|
||||
* the ZCS on Gen12 - greater than the slice alignments for depth
|
||||
* the ZCS on Gfx12 - greater than the slice alignments for depth
|
||||
* buffers.
|
||||
*/
|
||||
assert(surf->image_alignment_el.w % 16 != 0 ||
|
||||
|
@ -818,7 +818,7 @@ blorp_can_hiz_clear_depth(const struct gen_device_info *devinfo,
|
|||
* amd_vertex_shader_layer-layered-depth-texture-render piglit test.
|
||||
*
|
||||
* From the Compressed Depth Buffers section of the Bspec, under the
|
||||
* Gen12 texture performant and ZCS columns:
|
||||
* Gfx12 texture performant and ZCS columns:
|
||||
*
|
||||
* Update with clear at either 16x8 or 8x4 granularity, based on
|
||||
* fs_clr or otherwise.
|
||||
|
@ -828,7 +828,7 @@ blorp_can_hiz_clear_depth(const struct gen_device_info *devinfo,
|
|||
* when an initializing clear could hit another miplevel.
|
||||
*
|
||||
* NOTE: Because the CCS compresses the depth buffer and not a version
|
||||
* of it that has been rearranged with different alignments (like Gen8+
|
||||
* of it that has been rearranged with different alignments (like Gfx8+
|
||||
* HIZ), we have to make sure that the x0 and y0 are at least 16x8
|
||||
* aligned in the context of the entire surface.
|
||||
*/
|
||||
|
@ -1193,7 +1193,7 @@ blorp_params_get_mcs_partial_resolve_kernel(struct blorp_batch *batch,
|
|||
|
||||
nir_ssa_def *clear_color = nir_load_var(&b, v_color);
|
||||
if (blorp_key.indirect_clear_color && blorp->isl_dev->info->ver <= 8) {
|
||||
/* Gen7-8 clear colors are stored as single 0/1 bits */
|
||||
/* Gfx7-8 clear colors are stored as single 0/1 bits */
|
||||
clear_color = nir_vec4(&b, blorp_nir_bit(&b, clear_color, 31),
|
||||
blorp_nir_bit(&b, clear_color, 30),
|
||||
blorp_nir_bit(&b, clear_color, 29),
|
||||
|
|
|
@ -478,7 +478,7 @@ blorp_emit_vertex_elements(struct blorp_batch *batch,
|
|||
.SourceElementOffset = 0,
|
||||
.Component0Control = VFCOMP_STORE_SRC,
|
||||
|
||||
/* From Gen8 onwards hardware is no more instructed to overwrite
|
||||
/* From Gfx8 onwards hardware is no more instructed to overwrite
|
||||
* components using an element specifier. Instead one has separate
|
||||
* 3DSTATE_VF_SGVS (System Generated Value Setup) state packet for it.
|
||||
*/
|
||||
|
@ -822,7 +822,7 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
|||
ps.BindingTableEntryCount = 1;
|
||||
}
|
||||
|
||||
/* SAMPLER_STATE prefetching is broken on Gen11 - WA_1606682166 */
|
||||
/* SAMPLER_STATE prefetching is broken on Gfx11 - WA_1606682166 */
|
||||
if (GFX_VER == 11)
|
||||
ps.SamplerCount = 0;
|
||||
|
||||
|
@ -861,11 +861,11 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
|||
}
|
||||
|
||||
/* 3DSTATE_PS expects the number of threads per PSD, which is always 64
|
||||
* for pre Gen11 and 128 for gfx11+; On gfx11+ If a programmed value is
|
||||
* for pre Gfx11 and 128 for gfx11+; On gfx11+ If a programmed value is
|
||||
* k, it implies 2(k+1) threads. It implicitly scales for different GT
|
||||
* levels (which have some # of PSDs).
|
||||
*
|
||||
* In Gen8 the format is U8-2 whereas in Gen9+ it is U9-1.
|
||||
* In Gfx8 the format is U8-2 whereas in Gfx9+ it is U9-1.
|
||||
*/
|
||||
if (GFX_VER >= 9)
|
||||
ps.MaximumNumberofThreadsPerPSD = 64 - 1;
|
||||
|
@ -983,7 +983,7 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
|||
|
||||
ps.AttributeEnable = prog_data->num_varying_inputs > 0;
|
||||
} else {
|
||||
/* Gen7 hardware gets angry if we don't enable at least one dispatch
|
||||
/* Gfx7 hardware gets angry if we don't enable at least one dispatch
|
||||
* mode, so just enable 16-pixel dispatch if we don't have a program.
|
||||
*/
|
||||
ps._16PixelDispatchEnable = true;
|
||||
|
@ -1700,7 +1700,7 @@ blorp_emit_depth_stencil_config(struct blorp_batch *batch,
|
|||
#if GFX_VER >= 12
|
||||
/* GEN:BUG:1408224581
|
||||
*
|
||||
* Workaround: Gen12LP Astep only An additional pipe control with
|
||||
* Workaround: Gfx12LP Astep only An additional pipe control with
|
||||
* post-sync = store dword operation would be required.( w/a is to
|
||||
* have an additional pipe control after the stencil state whenever
|
||||
* the surface state bits of this state is changing).
|
||||
|
|
|
@ -572,7 +572,7 @@ decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
|
|||
struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
|
||||
|
||||
uint64_t ksp = 0;
|
||||
bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gen8+ only */
|
||||
bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */
|
||||
bool is_enabled = true;
|
||||
|
||||
struct intel_field_iterator iter;
|
||||
|
|
|
@ -57,13 +57,13 @@ intel_calculate_guardband_size(uint32_t fb_width, uint32_t fb_height,
|
|||
* This additional restriction must also be comprehended by software,
|
||||
* i.e., enforced by use of clipping."
|
||||
*
|
||||
* This makes no sense. Gen7+ hardware supports 16K render targets,
|
||||
* This makes no sense. Gfx7+ hardware supports 16K render targets,
|
||||
* and you definitely need to be able to draw polygons that fill the
|
||||
* surface. Our assumption is that the rasterizer was limited to 8K
|
||||
* on Sandybridge, which only supports 8K surfaces, and it was actually
|
||||
* increased to 16K on Ivybridge and later.
|
||||
*
|
||||
* So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge.
|
||||
* So, limit the guardband to 16K on Gfx7+ and 8K on Sandybridge.
|
||||
*/
|
||||
const float gb_size = GFX_VER >= 7 ? 16384.0f : 8192.0f;
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ intel_get_urb_config(const struct gen_device_info *devinfo,
|
|||
{
|
||||
unsigned urb_size_kB = intel_get_l3_config_urb_size(devinfo, l3_cfg);
|
||||
|
||||
/* RCU_MODE register for Gen12+ in BSpec says:
|
||||
/* RCU_MODE register for Gfx12+ in BSpec says:
|
||||
*
|
||||
* "HW reserves 4KB of URB space per bank for Compute Engine out of the
|
||||
* total storage available in L3. SW must consider that 4KB of storage
|
||||
|
@ -236,7 +236,7 @@ intel_get_urb_config(const struct gen_device_info *devinfo,
|
|||
|
||||
if (deref_block_size) {
|
||||
if (devinfo->ver >= 12) {
|
||||
/* From the Gen12 BSpec:
|
||||
/* From the Gfx12 BSpec:
|
||||
*
|
||||
* "Deref Block size depends on the last enabled shader and number
|
||||
* of handles programmed for that shader
|
||||
|
|
|
@ -1128,7 +1128,7 @@ mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)
|
|||
|
||||
#endif /* MI_MATH section */
|
||||
|
||||
/* This assumes addresses of strictly more than 32bits (aka. Gen8+). */
|
||||
/* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */
|
||||
#if MI_BUILDER_CAN_WRITE_BATCH
|
||||
|
||||
struct mi_address_token {
|
||||
|
@ -1169,7 +1169,7 @@ mi_self_mod_barrier(struct mi_builder *b)
|
|||
mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
}
|
||||
/* Documentation says Gen11+ should be able to invalidate the command cache
|
||||
/* Documentation says Gfx11+ should be able to invalidate the command cache
|
||||
* but experiment show it doesn't work properly, so for now just get over
|
||||
* the CS prefetch.
|
||||
*/
|
||||
|
|
|
@ -112,7 +112,7 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
|
|||
/* Default to the sampler since that's what we've done since forever */
|
||||
compiler->indirect_ubos_use_sampler = true;
|
||||
|
||||
/* There is no vec4 mode on Gen10+, and we don't use it at all on Gen8+. */
|
||||
/* There is no vec4 mode on Gfx10+, and we don't use it at all on Gfx8+. */
|
||||
for (int i = MESA_SHADER_VERTEX; i < MESA_ALL_SHADER_STAGES; i++) {
|
||||
compiler->scalar_stage[i] = devinfo->ver >= 8 ||
|
||||
i == MESA_SHADER_FRAGMENT || i == MESA_SHADER_COMPUTE;
|
||||
|
@ -145,8 +145,8 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
|
|||
}
|
||||
|
||||
/* The Bspec's section tittled "Instruction_multiply[DevBDW+]" claims that
|
||||
* destination type can be Quadword and source type Doubleword for Gen8 and
|
||||
* Gen9. So, lower 64 bit multiply instruction on rest of the platforms.
|
||||
* destination type can be Quadword and source type Doubleword for Gfx8 and
|
||||
* Gfx9. So, lower 64 bit multiply instruction on rest of the platforms.
|
||||
*/
|
||||
if (devinfo->ver < 8 || devinfo->ver > 9)
|
||||
int64_options |= nir_lower_imul_2x32_64;
|
||||
|
@ -174,7 +174,7 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
|
|||
*nir_options = vector_nir_options;
|
||||
}
|
||||
|
||||
/* Prior to Gen6, there are no three source operations, and Gen11 loses
|
||||
/* Prior to Gfx6, there are no three source operations, and Gfx11 loses
|
||||
* LRP.
|
||||
*/
|
||||
nir_options->lower_ffma16 = devinfo->ver < 6;
|
||||
|
@ -189,7 +189,7 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
|
|||
nir_options->lower_int64_options = int64_options;
|
||||
nir_options->lower_doubles_options = fp64_options;
|
||||
|
||||
/* Starting with Gen11, we lower away 8-bit arithmetic */
|
||||
/* Starting with Gfx11, we lower away 8-bit arithmetic */
|
||||
nir_options->support_8bit_alu = devinfo->ver < 11;
|
||||
|
||||
nir_options->unify_interfaces = i < MESA_SHADER_FRAGMENT;
|
||||
|
|
|
@ -310,7 +310,7 @@ struct brw_vs_prog_key {
|
|||
unsigned nr_userclip_plane_consts:4;
|
||||
|
||||
/**
|
||||
* For pre-Gen6 hardware, a bitfield indicating which texture coordinates
|
||||
* For pre-Gfx6 hardware, a bitfield indicating which texture coordinates
|
||||
* are going to be replaced with point coordinates (as a consequence of a
|
||||
* call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because
|
||||
* our SF thread requires exact matching between VS outputs and FS inputs,
|
||||
|
@ -480,7 +480,7 @@ struct brw_wm_prog_key {
|
|||
|
||||
uint8_t color_outputs_valid;
|
||||
uint64_t input_slots_valid;
|
||||
GLenum alpha_test_func; /* < For Gen4/5 MRT alpha test */
|
||||
GLenum alpha_test_func; /* < For Gfx4/5 MRT alpha test */
|
||||
float alpha_test_ref;
|
||||
};
|
||||
|
||||
|
@ -555,7 +555,7 @@ struct brw_image_param {
|
|||
* From the OpenGL 3.0 spec, table 6.44 (Transform Feedback State), the
|
||||
* minimum value of MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS is 64.
|
||||
*
|
||||
* On Gen6, the size of transform feedback data is limited not by the number
|
||||
* On Gfx6, the size of transform feedback data is limited not by the number
|
||||
* of components but by the number of binding table entries we set aside. We
|
||||
* use one binding table entry for a float, one entry for a vector, and one
|
||||
* entry per matrix column. Since the only way we can communicate our
|
||||
|
@ -868,7 +868,7 @@ struct brw_wm_prog_data {
|
|||
uint64_t inputs;
|
||||
|
||||
/* Mapping of VUE slots to interpolation modes.
|
||||
* Used by the Gen4-5 clip/sf/wm stages.
|
||||
* Used by the Gfx4-5 clip/sf/wm stages.
|
||||
*/
|
||||
unsigned char interp_mode[65]; /* BRW_VARYING_SLOT_COUNT */
|
||||
|
||||
|
@ -1305,25 +1305,25 @@ struct brw_gs_prog_data
|
|||
int invocations;
|
||||
|
||||
/**
|
||||
* Gen6: Provoking vertex convention for odd-numbered triangles
|
||||
* Gfx6: Provoking vertex convention for odd-numbered triangles
|
||||
* in tristrips.
|
||||
*/
|
||||
GLuint pv_first:1;
|
||||
|
||||
/**
|
||||
* Gen6: Number of varyings that are output to transform feedback.
|
||||
* Gfx6: Number of varyings that are output to transform feedback.
|
||||
*/
|
||||
GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
|
||||
|
||||
/**
|
||||
* Gen6: Map from the index of a transform feedback binding table entry to the
|
||||
* Gfx6: Map from the index of a transform feedback binding table entry to the
|
||||
* gl_varying_slot that should be streamed out through that binding table
|
||||
* entry.
|
||||
*/
|
||||
unsigned char transform_feedback_bindings[64 /* BRW_MAX_SOL_BINDINGS */];
|
||||
|
||||
/**
|
||||
* Gen6: Map from the index of a transform feedback binding table entry to the
|
||||
* Gfx6: Map from the index of a transform feedback binding table entry to the
|
||||
* swizzles that should be used when streaming out data through that
|
||||
* binding table entry.
|
||||
*/
|
||||
|
@ -1661,9 +1661,9 @@ encode_slm_size(unsigned gen, uint32_t bytes)
|
|||
*
|
||||
* Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
|
||||
* -------------------------------------------------------------------
|
||||
* Gen7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
|
||||
* Gfx7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
|
||||
* -------------------------------------------------------------------
|
||||
* Gen9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
||||
* Gfx9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
||||
*/
|
||||
|
||||
if (bytes > 0) {
|
||||
|
@ -1676,7 +1676,7 @@ encode_slm_size(unsigned gen, uint32_t bytes)
|
|||
slm_size = ffs(slm_size) - 10;
|
||||
} else {
|
||||
assert(slm_size >= 4096);
|
||||
/* Convert to the pre-Gen9 representation. */
|
||||
/* Convert to the pre-Gfx9 representation. */
|
||||
slm_size = slm_size / 4096;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -288,7 +288,7 @@ static const char *const end_of_thread[2] = {
|
|||
[1] = "EOT"
|
||||
};
|
||||
|
||||
/* SFIDs on Gen4-5 */
|
||||
/* SFIDs on Gfx4-5 */
|
||||
static const char *const gfx4_sfid[16] = {
|
||||
[BRW_SFID_NULL] = "null",
|
||||
[BRW_SFID_MATH] = "math",
|
||||
|
@ -405,7 +405,7 @@ static const char *const m_rt_write_subtype[] = {
|
|||
[0b010] = "SIMD8/DualSrcLow",
|
||||
[0b011] = "SIMD8/DualSrcHigh",
|
||||
[0b100] = "SIMD8",
|
||||
[0b101] = "SIMD8/ImageWrite", /* Gen6+ */
|
||||
[0b101] = "SIMD8/ImageWrite", /* Gfx6+ */
|
||||
[0b111] = "SIMD16/RepData-111", /* no idea how this is different than 1 */
|
||||
};
|
||||
|
||||
|
@ -545,11 +545,11 @@ static const char *const gfx7_urb_opcode[] = {
|
|||
[BRW_URB_OPCODE_WRITE_OWORD] = "write OWord",
|
||||
[BRW_URB_OPCODE_READ_HWORD] = "read HWord",
|
||||
[BRW_URB_OPCODE_READ_OWORD] = "read OWord",
|
||||
[GFX7_URB_OPCODE_ATOMIC_MOV] = "atomic mov", /* Gen7+ */
|
||||
[GFX7_URB_OPCODE_ATOMIC_INC] = "atomic inc", /* Gen7+ */
|
||||
[GFX8_URB_OPCODE_ATOMIC_ADD] = "atomic add", /* Gen8+ */
|
||||
[GFX8_URB_OPCODE_SIMD8_WRITE] = "SIMD8 write", /* Gen8+ */
|
||||
[GFX8_URB_OPCODE_SIMD8_READ] = "SIMD8 read", /* Gen8+ */
|
||||
[GFX7_URB_OPCODE_ATOMIC_MOV] = "atomic mov", /* Gfx7+ */
|
||||
[GFX7_URB_OPCODE_ATOMIC_INC] = "atomic inc", /* Gfx7+ */
|
||||
[GFX8_URB_OPCODE_ATOMIC_ADD] = "atomic add", /* Gfx8+ */
|
||||
[GFX8_URB_OPCODE_SIMD8_WRITE] = "SIMD8 write", /* Gfx8+ */
|
||||
[GFX8_URB_OPCODE_SIMD8_READ] = "SIMD8 read", /* Gfx8+ */
|
||||
/* [9-15] - reserved */
|
||||
};
|
||||
|
||||
|
@ -1919,7 +1919,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||
break;
|
||||
case GFX6_SFID_DATAPORT_SAMPLER_CACHE:
|
||||
case GFX6_SFID_DATAPORT_CONSTANT_CACHE:
|
||||
/* aka BRW_SFID_DATAPORT_READ on Gen4-5 */
|
||||
/* aka BRW_SFID_DATAPORT_READ on Gfx4-5 */
|
||||
if (devinfo->ver >= 6) {
|
||||
format(file, " (%u, %u, %u, %u)",
|
||||
brw_dp_desc_binding_table_index(devinfo, imm_desc),
|
||||
|
@ -1944,7 +1944,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||
break;
|
||||
|
||||
case GFX6_SFID_DATAPORT_RENDER_CACHE: {
|
||||
/* aka BRW_SFID_DATAPORT_WRITE on Gen4-5 */
|
||||
/* aka BRW_SFID_DATAPORT_WRITE on Gfx4-5 */
|
||||
unsigned msg_type = brw_dp_write_desc_msg_type(devinfo, imm_desc);
|
||||
|
||||
err |= control(file, "DP rc message type",
|
||||
|
|
|
@ -150,7 +150,7 @@ disasm_annotate(struct disasm_info *disasm,
|
|||
group->block_start = cfg->blocks[disasm->cur_block];
|
||||
}
|
||||
|
||||
/* There is no hardware DO instruction on Gen6+, so since DO always
|
||||
/* There is no hardware DO instruction on Gfx6+, so since DO always
|
||||
* starts a basic block, we need to set the .block_start of the next
|
||||
* instruction's annotation with a pointer to the bblock started by
|
||||
* the DO.
|
||||
|
|
|
@ -60,7 +60,7 @@ struct brw_insn_state {
|
|||
/* One of BRW_MASK_* */
|
||||
unsigned mask_control:1;
|
||||
|
||||
/* Scheduling info for Gen12+ */
|
||||
/* Scheduling info for Gfx12+ */
|
||||
struct tgl_swsb swsb;
|
||||
|
||||
bool saturate:1;
|
||||
|
@ -1241,7 +1241,7 @@ brw_jump_scale(const struct gen_device_info *devinfo)
|
|||
if (devinfo->ver >= 5)
|
||||
return 2;
|
||||
|
||||
/* Gen4 simply uses the number of 128-bit instructions. */
|
||||
/* Gfx4 simply uses the number of 128-bit instructions. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -46,19 +46,19 @@
|
|||
* A G45-only instruction, NENOP, must be used to provide padding to align
|
||||
* uncompacted instructions.
|
||||
*
|
||||
* Gen5 removes these restrictions and changes jump counts to be in units of
|
||||
* Gfx5 removes these restrictions and changes jump counts to be in units of
|
||||
* 8-byte compacted instructions, allowing jump targets to be only 8-byte
|
||||
* aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
|
||||
*
|
||||
* Gen6 adds the ability to compact instructions with a limited range of
|
||||
* Gfx6 adds the ability to compact instructions with a limited range of
|
||||
* immediate values. Compactable immediates have 12 unrestricted bits, and a
|
||||
* 13th bit that's replicated through the high 20 bits, to create the 32-bit
|
||||
* value of DW3 in the uncompacted instruction word.
|
||||
*
|
||||
* On Gen7 we can compact some control flow instructions with a small positive
|
||||
* On Gfx7 we can compact some control flow instructions with a small positive
|
||||
* immediate in the low bits of DW3, like ENDIF with the JIP field. Other
|
||||
* control flow instructions with UIP cannot be compacted, because of the
|
||||
* replicated 13th bit. No control flow instructions can be compacted on Gen6
|
||||
* replicated 13th bit. No control flow instructions can be compacted on Gfx6
|
||||
* since the jump count field is not in DW3.
|
||||
*
|
||||
* break JIP/UIP
|
||||
|
@ -71,7 +71,7 @@
|
|||
*
|
||||
* Gen 8 adds support for compacting 3-src instructions.
|
||||
*
|
||||
* Gen12 reduces the number of bits that available to compacted immediates from
|
||||
* Gfx12 reduces the number of bits that available to compacted immediates from
|
||||
* 13 to 12, but improves the compaction of floating-point immediates by
|
||||
* allowing the high bits to be encoded (the sign, 8-bit exponent, and the
|
||||
* three most significant bits of the mantissa), rather than the lowest bits of
|
||||
|
@ -1303,11 +1303,11 @@ has_unmapped_bits(const struct gen_device_info *devinfo, const brw_inst *src)
|
|||
/* Check for instruction bits that don't map to any of the fields of the
|
||||
* compacted instruction. The instruction cannot be compacted if any of
|
||||
* them are set. They overlap with:
|
||||
* - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
|
||||
* - Dst.AddrImm[9] (bit 47 on Gen8)
|
||||
* - Src0.AddrImm[9] (bit 95 on Gen8)
|
||||
* - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
|
||||
* - UIP[31] (bit 95 on Gen8)
|
||||
* - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)
|
||||
* - Dst.AddrImm[9] (bit 47 on Gfx8)
|
||||
* - Src0.AddrImm[9] (bit 95 on Gfx8)
|
||||
* - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)
|
||||
* - UIP[31] (bit 95 on Gfx8)
|
||||
*/
|
||||
if (devinfo->ver >= 12) {
|
||||
assert(!brw_inst_bits(src, 7, 7));
|
||||
|
@ -1603,7 +1603,7 @@ precompact(const struct gen_device_info *devinfo, brw_inst inst)
|
|||
*
|
||||
* If we see a 0.0:F, change the type to VF so that it can be compacted.
|
||||
*
|
||||
* Compaction of floating-point immediates is improved on Gen12, thus
|
||||
* Compaction of floating-point immediates is improved on Gfx12, thus
|
||||
* removing the need for this.
|
||||
*/
|
||||
if (devinfo->ver < 12 &&
|
||||
|
@ -1618,7 +1618,7 @@ precompact(const struct gen_device_info *devinfo, brw_inst inst)
|
|||
/* There are no mappings for dst:d | i:d, so if the immediate is suitable
|
||||
* set the types to :UD so the instruction can be compacted.
|
||||
*
|
||||
* FINISHME: Use dst:f | imm:f on Gen12
|
||||
* FINISHME: Use dst:f | imm:f on Gfx12
|
||||
*/
|
||||
if (devinfo->ver < 12 &&
|
||||
compact_immediate(devinfo, BRW_REGISTER_TYPE_D,
|
||||
|
@ -2141,8 +2141,8 @@ update_uip_jip(const struct gen_device_info *devinfo, brw_inst *insn,
|
|||
int this_old_ip, int *compacted_counts)
|
||||
{
|
||||
/* JIP and UIP are in units of:
|
||||
* - bytes on Gen8+; and
|
||||
* - compacted instructions on Gen6+.
|
||||
* - bytes on Gfx8+; and
|
||||
* - compacted instructions on Gfx6+.
|
||||
*/
|
||||
int shift = devinfo->ver >= 8 ? 3 : 0;
|
||||
|
||||
|
@ -2172,7 +2172,7 @@ update_gfx4_jump_count(const struct gen_device_info *devinfo, brw_inst *insn,
|
|||
|
||||
/* Jump Count is in units of:
|
||||
* - uncompacted instructions on G45; and
|
||||
* - compacted instructions on Gen5.
|
||||
* - compacted instructions on Gfx5.
|
||||
*/
|
||||
int shift = devinfo->is_g4x ? 1 : 0;
|
||||
|
||||
|
@ -2390,7 +2390,7 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
|
|||
} else if (devinfo->ver == 6) {
|
||||
assert(!brw_inst_cmpt_control(devinfo, insn));
|
||||
|
||||
/* Jump Count is in units of compacted instructions on Gen6. */
|
||||
/* Jump Count is in units of compacted instructions on Gfx6. */
|
||||
int jump_count_compacted = brw_inst_gfx6_jump_count(devinfo, insn);
|
||||
|
||||
int target_old_ip = this_old_ip + (jump_count_compacted / 2);
|
||||
|
|
|
@ -177,7 +177,7 @@ enum PACKED gfx10_align1_3src_dst_horizontal_stride {
|
|||
|
||||
/** @{
|
||||
*
|
||||
* Gen6 has replaced "mask enable/disable" with WECtrl, which is
|
||||
* Gfx6 has replaced "mask enable/disable" with WECtrl, which is
|
||||
* effectively the same but much simpler to think about. Now, there
|
||||
* are two contributors ANDed together to whether channels are
|
||||
* executed: The predication on the instruction, and the channel write
|
||||
|
@ -209,48 +209,48 @@ enum opcode {
|
|||
BRW_OPCODE_XOR,
|
||||
BRW_OPCODE_SHR,
|
||||
BRW_OPCODE_SHL,
|
||||
BRW_OPCODE_DIM, /**< Gen7.5 only */
|
||||
BRW_OPCODE_SMOV, /**< Gen8+ */
|
||||
BRW_OPCODE_DIM, /**< Gfx7.5 only */
|
||||
BRW_OPCODE_SMOV, /**< Gfx8+ */
|
||||
BRW_OPCODE_ASR,
|
||||
BRW_OPCODE_ROR, /**< Gen11+ */
|
||||
BRW_OPCODE_ROL, /**< Gen11+ */
|
||||
BRW_OPCODE_ROR, /**< Gfx11+ */
|
||||
BRW_OPCODE_ROL, /**< Gfx11+ */
|
||||
BRW_OPCODE_CMP,
|
||||
BRW_OPCODE_CMPN,
|
||||
BRW_OPCODE_CSEL, /**< Gen8+ */
|
||||
BRW_OPCODE_F32TO16, /**< Gen7 only */
|
||||
BRW_OPCODE_F16TO32, /**< Gen7 only */
|
||||
BRW_OPCODE_BFREV, /**< Gen7+ */
|
||||
BRW_OPCODE_BFE, /**< Gen7+ */
|
||||
BRW_OPCODE_BFI1, /**< Gen7+ */
|
||||
BRW_OPCODE_BFI2, /**< Gen7+ */
|
||||
BRW_OPCODE_CSEL, /**< Gfx8+ */
|
||||
BRW_OPCODE_F32TO16, /**< Gfx7 only */
|
||||
BRW_OPCODE_F16TO32, /**< Gfx7 only */
|
||||
BRW_OPCODE_BFREV, /**< Gfx7+ */
|
||||
BRW_OPCODE_BFE, /**< Gfx7+ */
|
||||
BRW_OPCODE_BFI1, /**< Gfx7+ */
|
||||
BRW_OPCODE_BFI2, /**< Gfx7+ */
|
||||
BRW_OPCODE_JMPI,
|
||||
BRW_OPCODE_BRD, /**< Gen7+ */
|
||||
BRW_OPCODE_BRD, /**< Gfx7+ */
|
||||
BRW_OPCODE_IF,
|
||||
BRW_OPCODE_IFF, /**< Pre-Gen6 */
|
||||
BRW_OPCODE_BRC, /**< Gen7+ */
|
||||
BRW_OPCODE_IFF, /**< Pre-Gfx6 */
|
||||
BRW_OPCODE_BRC, /**< Gfx7+ */
|
||||
BRW_OPCODE_ELSE,
|
||||
BRW_OPCODE_ENDIF,
|
||||
BRW_OPCODE_DO, /**< Pre-Gen6 */
|
||||
BRW_OPCODE_CASE, /**< Gen6 only */
|
||||
BRW_OPCODE_DO, /**< Pre-Gfx6 */
|
||||
BRW_OPCODE_CASE, /**< Gfx6 only */
|
||||
BRW_OPCODE_WHILE,
|
||||
BRW_OPCODE_BREAK,
|
||||
BRW_OPCODE_CONTINUE,
|
||||
BRW_OPCODE_HALT,
|
||||
BRW_OPCODE_CALLA, /**< Gen7.5+ */
|
||||
BRW_OPCODE_MSAVE, /**< Pre-Gen6 */
|
||||
BRW_OPCODE_CALL, /**< Gen6+ */
|
||||
BRW_OPCODE_MREST, /**< Pre-Gen6 */
|
||||
BRW_OPCODE_RET, /**< Gen6+ */
|
||||
BRW_OPCODE_PUSH, /**< Pre-Gen6 */
|
||||
BRW_OPCODE_FORK, /**< Gen6 only */
|
||||
BRW_OPCODE_GOTO, /**< Gen8+ */
|
||||
BRW_OPCODE_POP, /**< Pre-Gen6 */
|
||||
BRW_OPCODE_CALLA, /**< Gfx7.5+ */
|
||||
BRW_OPCODE_MSAVE, /**< Pre-Gfx6 */
|
||||
BRW_OPCODE_CALL, /**< Gfx6+ */
|
||||
BRW_OPCODE_MREST, /**< Pre-Gfx6 */
|
||||
BRW_OPCODE_RET, /**< Gfx6+ */
|
||||
BRW_OPCODE_PUSH, /**< Pre-Gfx6 */
|
||||
BRW_OPCODE_FORK, /**< Gfx6 only */
|
||||
BRW_OPCODE_GOTO, /**< Gfx8+ */
|
||||
BRW_OPCODE_POP, /**< Pre-Gfx6 */
|
||||
BRW_OPCODE_WAIT,
|
||||
BRW_OPCODE_SEND,
|
||||
BRW_OPCODE_SENDC,
|
||||
BRW_OPCODE_SENDS, /**< Gen9+ */
|
||||
BRW_OPCODE_SENDSC, /**< Gen9+ */
|
||||
BRW_OPCODE_MATH, /**< Gen6+ */
|
||||
BRW_OPCODE_SENDS, /**< Gfx9+ */
|
||||
BRW_OPCODE_SENDSC, /**< Gfx9+ */
|
||||
BRW_OPCODE_MATH, /**< Gfx6+ */
|
||||
BRW_OPCODE_ADD,
|
||||
BRW_OPCODE_MUL,
|
||||
BRW_OPCODE_AVG,
|
||||
|
@ -262,11 +262,11 @@ enum opcode {
|
|||
BRW_OPCODE_MAC,
|
||||
BRW_OPCODE_MACH,
|
||||
BRW_OPCODE_LZD,
|
||||
BRW_OPCODE_FBH, /**< Gen7+ */
|
||||
BRW_OPCODE_FBL, /**< Gen7+ */
|
||||
BRW_OPCODE_CBIT, /**< Gen7+ */
|
||||
BRW_OPCODE_ADDC, /**< Gen7+ */
|
||||
BRW_OPCODE_SUBB, /**< Gen7+ */
|
||||
BRW_OPCODE_FBH, /**< Gfx7+ */
|
||||
BRW_OPCODE_FBL, /**< Gfx7+ */
|
||||
BRW_OPCODE_CBIT, /**< Gfx7+ */
|
||||
BRW_OPCODE_ADDC, /**< Gfx7+ */
|
||||
BRW_OPCODE_SUBB, /**< Gfx7+ */
|
||||
BRW_OPCODE_SAD2,
|
||||
BRW_OPCODE_SADA2,
|
||||
BRW_OPCODE_DP4,
|
||||
|
@ -275,9 +275,9 @@ enum opcode {
|
|||
BRW_OPCODE_DP2,
|
||||
BRW_OPCODE_LINE,
|
||||
BRW_OPCODE_PLN, /**< G45+ */
|
||||
BRW_OPCODE_MAD, /**< Gen6+ */
|
||||
BRW_OPCODE_LRP, /**< Gen6+ */
|
||||
BRW_OPCODE_MADM, /**< Gen8+ */
|
||||
BRW_OPCODE_MAD, /**< Gfx6+ */
|
||||
BRW_OPCODE_LRP, /**< Gfx6+ */
|
||||
BRW_OPCODE_MADM, /**< Gfx8+ */
|
||||
BRW_OPCODE_NENOP, /**< G45 only */
|
||||
BRW_OPCODE_NOP,
|
||||
|
||||
|
@ -466,7 +466,7 @@ enum opcode {
|
|||
* Source 1: Immediate bool to indicate whether control is returned to the
|
||||
* thread only after the fence has been honored.
|
||||
* Source 2: Immediate byte indicating which memory to fence. Zero means
|
||||
* global memory; GFX7_BTI_SLM means SLM (for Gen11+ only).
|
||||
* global memory; GFX7_BTI_SLM means SLM (for Gfx11+ only).
|
||||
*
|
||||
* Vec4 backend only uses Source 0.
|
||||
*/
|
||||
|
@ -476,7 +476,7 @@ enum opcode {
|
|||
* Scheduling-only fence.
|
||||
*
|
||||
* Sources can be used to force a stall until the registers in those are
|
||||
* available. This might generate MOVs or SYNC_NOPs (Gen12+).
|
||||
* available. This might generate MOVs or SYNC_NOPs (Gfx12+).
|
||||
*/
|
||||
FS_OPCODE_SCHEDULING_FENCE,
|
||||
|
||||
|
@ -487,7 +487,7 @@ enum opcode {
|
|||
SHADER_OPCODE_SCRATCH_HEADER,
|
||||
|
||||
/**
|
||||
* Gen8+ SIMD8 URB Read messages.
|
||||
* Gfx8+ SIMD8 URB Read messages.
|
||||
*/
|
||||
SHADER_OPCODE_URB_READ_SIMD8,
|
||||
SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT,
|
||||
|
@ -1058,7 +1058,7 @@ enum PACKED brw_width {
|
|||
};
|
||||
|
||||
/**
|
||||
* Gen12+ SWSB SBID synchronization mode.
|
||||
* Gfx12+ SWSB SBID synchronization mode.
|
||||
*
|
||||
* This is represented as a bitmask including any required SBID token
|
||||
* synchronization modes, used to synchronize out-of-order instructions. Only
|
||||
|
@ -1226,7 +1226,7 @@ enum tgl_sync_function {
|
|||
*/
|
||||
enum brw_message_target {
|
||||
BRW_SFID_NULL = 0,
|
||||
BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */
|
||||
BRW_SFID_MATH = 1, /* Only valid on Gfx4-5 */
|
||||
BRW_SFID_SAMPLER = 2,
|
||||
BRW_SFID_MESSAGE_GATEWAY = 3,
|
||||
BRW_SFID_DATAPORT_READ = 4,
|
||||
|
@ -1492,7 +1492,7 @@ enum brw_message_target {
|
|||
* Note that because the DRM sets bit 4 of HDC_CHICKEN0 on BDW, CHV and at
|
||||
* least some pre-production steppings of SKL due to WaForceEnableNonCoherent,
|
||||
* HDC memory access may have been overridden by the kernel to be non-coherent
|
||||
* (matching the behavior of the same BTI on pre-Gen8 hardware) and BTI 255
|
||||
* (matching the behavior of the same BTI on pre-Gfx8 hardware) and BTI 255
|
||||
* may actually be an alias for BTI 253.
|
||||
*/
|
||||
#define GFX8_BTI_STATELESS_IA_COHERENT 255
|
||||
|
@ -1587,7 +1587,7 @@ enum brw_message_target {
|
|||
#define BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE 6
|
||||
|
||||
|
||||
/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
|
||||
/* Gfx7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
|
||||
* is 2^9, or 512. It's counted in multiples of 64 bytes.
|
||||
*
|
||||
* Identical for VS, DS, and HS.
|
||||
|
@ -1597,7 +1597,7 @@ enum brw_message_target {
|
|||
#define GFX7_MAX_HS_URB_ENTRY_SIZE_BYTES (512*64)
|
||||
#define GFX7_MAX_VS_URB_ENTRY_SIZE_BYTES (512*64)
|
||||
|
||||
/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
|
||||
/* Gfx6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
|
||||
* (128 bytes) URB rows and the maximum allowed value is 5 rows.
|
||||
*/
|
||||
#define GFX6_MAX_GS_URB_ENTRY_SIZE_BYTES (5*128)
|
||||
|
|
|
@ -1230,8 +1230,8 @@ brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
|
|||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
const bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16;
|
||||
/* The F32TO16 instruction doesn't support 32-bit destination types in
|
||||
* Align1 mode, and neither does the Gen8 implementation in terms of a
|
||||
* converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
|
||||
* Align1 mode, and neither does the Gfx8 implementation in terms of a
|
||||
* converting MOV. Gfx7 does zero out the high 16 bits in Align16 mode as
|
||||
* an undocumented feature.
|
||||
*/
|
||||
const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
|
||||
|
@ -1520,7 +1520,7 @@ patch_IF_ELSE(struct brw_codegen *p,
|
|||
* platforms, we convert flow control instructions to conditional ADDs that
|
||||
* operate on IP (see brw_ENDIF).
|
||||
*
|
||||
* However, on Gen6, writing to IP doesn't work in single program flow mode
|
||||
* However, on Gfx6, writing to IP doesn't work in single program flow mode
|
||||
* (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
|
||||
* not be updated by non-flow control instructions."). And on later
|
||||
* platforms, there is no significant benefit to converting control flow
|
||||
|
@ -1648,15 +1648,15 @@ brw_ENDIF(struct brw_codegen *p)
|
|||
|
||||
/* In single program flow mode, we can express IF and ELSE instructions
|
||||
* equivalently as ADD instructions that operate on IP. On platforms prior
|
||||
* to Gen6, flow control instructions cause an implied thread switch, so
|
||||
* to Gfx6, flow control instructions cause an implied thread switch, so
|
||||
* this is a significant savings.
|
||||
*
|
||||
* However, on Gen6, writing to IP doesn't work in single program flow mode
|
||||
* However, on Gfx6, writing to IP doesn't work in single program flow mode
|
||||
* (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
|
||||
* not be updated by non-flow control instructions."). And on later
|
||||
* platforms, there is no significant benefit to converting control flow
|
||||
* instructions to conditional ADDs. So we only do this trick on Gen4 and
|
||||
* Gen5.
|
||||
* instructions to conditional ADDs. So we only do this trick on Gfx4 and
|
||||
* Gfx5.
|
||||
*/
|
||||
if (devinfo->ver < 6 && p->single_program_flow)
|
||||
emit_endif = false;
|
||||
|
@ -1777,7 +1777,7 @@ brw_HALT(struct brw_codegen *p)
|
|||
insn = next_insn(p, BRW_OPCODE_HALT);
|
||||
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
if (devinfo->ver < 6) {
|
||||
/* From the Gen4 PRM:
|
||||
/* From the Gfx4 PRM:
|
||||
*
|
||||
* "IP register must be put (for example, by the assembler) at <dst>
|
||||
* and <src0> locations.
|
||||
|
@ -1975,7 +1975,7 @@ void brw_CMP(struct brw_codegen *p,
|
|||
* page says:
|
||||
* "Any CMP instruction with a null destination must use a {switch}."
|
||||
*
|
||||
* It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
|
||||
* It also applies to other Gfx7 platforms (IVB, BYT) even though it isn't
|
||||
* mentioned on their work-arounds pages.
|
||||
*/
|
||||
if (devinfo->ver == 7) {
|
||||
|
@ -2090,7 +2090,7 @@ void gfx6_math(struct brw_codegen *p,
|
|||
(src1.type == BRW_REGISTER_TYPE_HF && devinfo->ver >= 9));
|
||||
}
|
||||
|
||||
/* Source modifiers are ignored for extended math instructions on Gen6. */
|
||||
/* Source modifiers are ignored for extended math instructions on Gfx6. */
|
||||
if (devinfo->ver == 6) {
|
||||
assert(!src0.negate);
|
||||
assert(!src0.abs);
|
||||
|
@ -2788,7 +2788,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||
|
||||
if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
|
||||
/* ex_desc bits 15:12 don't exist in the instruction encoding prior
|
||||
* to Gen12, so we may have fallen back to an indirect extended
|
||||
* to Gfx12, so we may have fallen back to an indirect extended
|
||||
* descriptor.
|
||||
*/
|
||||
brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
|
||||
|
@ -2976,7 +2976,7 @@ brw_set_uip_jip(struct brw_codegen *p, int start_offset)
|
|||
case BRW_OPCODE_BREAK:
|
||||
assert(block_end_offset != 0);
|
||||
brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
|
||||
/* Gen7 UIP points to WHILE; Gen6 points just after it */
|
||||
/* Gfx7 UIP points to WHILE; Gfx6 points just after it */
|
||||
brw_inst_set_uip(devinfo, insn,
|
||||
(brw_find_loop_end(p, offset) - offset +
|
||||
(devinfo->ver == 6 ? 16 : 0)) / scale);
|
||||
|
@ -3057,7 +3057,7 @@ void brw_ff_sync(struct brw_codegen *p,
|
|||
}
|
||||
|
||||
/**
|
||||
* Emit the SEND instruction necessary to generate stream output data on Gen6
|
||||
* Emit the SEND instruction necessary to generate stream output data on Gfx6
|
||||
* (for transform feedback).
|
||||
*
|
||||
* If send_commit_msg is true, this is the last piece of stream output data
|
||||
|
@ -3298,7 +3298,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
|
|||
|
||||
brw_push_insn_state(p);
|
||||
|
||||
/* The flag register is only used on Gen7 in align1 mode, so avoid setting
|
||||
/* The flag register is only used on Gfx7 in align1 mode, so avoid setting
|
||||
* unnecessary bits in the instruction words, get the information we need
|
||||
* and reset the default flag register. This allows more instructions to be
|
||||
* compacted.
|
||||
|
@ -3310,7 +3310,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
|
|||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
|
||||
if (devinfo->ver >= 8) {
|
||||
/* Getting the first active channel index is easy on Gen8: Just find
|
||||
/* Getting the first active channel index is easy on Gfx8: Just find
|
||||
* the first bit set in the execution mask. The register exists on
|
||||
* HSW already but it reads back as all ones when the current
|
||||
* instruction has execution masking disabled, so it's kind of
|
||||
|
@ -3350,7 +3350,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
|
|||
* mask in f1.0. We could use a single 32-wide move here if it
|
||||
* weren't because of the hardware bug that causes channel enables to
|
||||
* be applied incorrectly to the second half of 32-wide instructions
|
||||
* on Gen7.
|
||||
* on Gfx7.
|
||||
*/
|
||||
const unsigned lower_size = MIN2(16, exec_size);
|
||||
for (unsigned i = 0; i < exec_size / lower_size; i++) {
|
||||
|
@ -3379,7 +3379,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
|
|||
mask.file == BRW_IMMEDIATE_VALUE && mask.ud == 0xffffffff) {
|
||||
/* In SIMD4x2 mode the first active channel index is just the
|
||||
* negation of the first bit of the mask register. Note that ce0
|
||||
* doesn't take into account the dispatch mask, so the Gen7 path
|
||||
* doesn't take into account the dispatch mask, so the Gfx7 path
|
||||
* should be used instead unless you have the guarantee that the
|
||||
* dispatch mask is tightly packed (i.e. it has the form '2^n - 1'
|
||||
* for some n).
|
||||
|
@ -3650,7 +3650,7 @@ brw_float_controls_mode(struct brw_codegen *p,
|
|||
* thread control field to ‘switch’ for an instruction that uses
|
||||
* control register as an explicit operand."
|
||||
*
|
||||
* On Gen12+ this is implemented in terms of SWSB annotations instead.
|
||||
* On Gfx12+ this is implemented in terms of SWSB annotations instead.
|
||||
*/
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
|
||||
|
|
|
@ -292,9 +292,9 @@ invalid_values(const struct gen_device_info *devinfo, const brw_inst *inst)
|
|||
|
||||
if (num_sources == 3) {
|
||||
/* Nothing to test:
|
||||
* No 3-src instructions on Gen4-5
|
||||
* No reg file bits on Gen6-10 (align16)
|
||||
* No invalid encodings on Gen10-12 (align1)
|
||||
* No 3-src instructions on Gfx4-5
|
||||
* No reg file bits on Gfx6-10 (align16)
|
||||
* No invalid encodings on Gfx10-12 (align1)
|
||||
*/
|
||||
} else {
|
||||
if (devinfo->ver > 6) {
|
||||
|
@ -1873,7 +1873,7 @@ special_requirements_for_handling_double_precision_data_types(
|
|||
* If Align16 is required for an operation with QW destination and non-QW
|
||||
* source datatypes, the execution size cannot exceed 2.
|
||||
*
|
||||
* We assume that the restriction applies to all Gen8+ parts.
|
||||
* We assume that the restriction applies to all Gfx8+ parts.
|
||||
*/
|
||||
if (devinfo->ver >= 8) {
|
||||
enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
|
||||
|
|
|
@ -1097,7 +1097,7 @@ fs_inst::flags_read(const gen_device_info *devinfo) const
|
|||
if (predicate == BRW_PREDICATE_ALIGN1_ANYV ||
|
||||
predicate == BRW_PREDICATE_ALIGN1_ALLV) {
|
||||
/* The vertical predication modes combine corresponding bits from
|
||||
* f0.0 and f1.0 on Gen7+, and f0.0 and f0.1 on older hardware.
|
||||
* f0.0 and f1.0 on Gfx7+, and f0.0 and f0.1 on older hardware.
|
||||
*/
|
||||
const unsigned shift = devinfo->ver >= 7 ? 4 : 2;
|
||||
return flag_mask(this, 1) << shift | flag_mask(this, 1);
|
||||
|
@ -1440,7 +1440,7 @@ fs_visitor::emit_sampleid_setup()
|
|||
* shr(16) tmp<1>W g1.0<1,8,0>B 0x44440000:V
|
||||
* and(16) dst<1>D tmp<8,8,1>W 0xf:W
|
||||
*
|
||||
* TODO: These payload bits exist on Gen7 too, but they appear to always
|
||||
* TODO: These payload bits exist on Gfx7 too, but they appear to always
|
||||
* be zero, so this code fails to work. We should find out why.
|
||||
*/
|
||||
const fs_reg tmp = abld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||
|
@ -1639,7 +1639,7 @@ fs_visitor::assign_curb_setup()
|
|||
ubld.MOV(header0, brw_imm_ud(0));
|
||||
ubld.group(1, 0).SHR(component(header0, 2), base_addr, brw_imm_ud(4));
|
||||
|
||||
/* On Gen12-HP we load constants at the start of the program using A32
|
||||
/* On Gfx12-HP we load constants at the start of the program using A32
|
||||
* stateless messages.
|
||||
*/
|
||||
for (unsigned i = 0; i < uniform_push_length;) {
|
||||
|
@ -2805,7 +2805,7 @@ fs_visitor::opt_algebraic()
|
|||
case BRW_OPCODE_OR:
|
||||
if (inst->src[0].equals(inst->src[1]) ||
|
||||
inst->src[1].is_zero()) {
|
||||
/* On Gen8+, the OR instruction can have a source modifier that
|
||||
/* On Gfx8+, the OR instruction can have a source modifier that
|
||||
* performs logical not on the operand. Cases of 'OR r0, ~r1, 0'
|
||||
* or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV.
|
||||
*/
|
||||
|
@ -2984,7 +2984,7 @@ fs_visitor::opt_algebraic()
|
|||
bool
|
||||
fs_visitor::opt_zero_samples()
|
||||
{
|
||||
/* Gen4 infers the texturing opcode based on the message length so we can't
|
||||
/* Gfx4 infers the texturing opcode based on the message length so we can't
|
||||
* change it.
|
||||
*/
|
||||
if (devinfo->ver < 5)
|
||||
|
@ -3953,7 +3953,7 @@ fs_visitor::lower_mul_dword_inst(fs_inst *inst, bblock_t *block)
|
|||
: brw_imm_w(inst->src[1].d));
|
||||
}
|
||||
} else {
|
||||
/* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot
|
||||
/* Gen < 8 (and some Gfx8+ low-power parts like Cherryview) cannot
|
||||
* do 32-bit integer multiplication in one instruction, but instead
|
||||
* must do a sequence (which actually calculates a 64-bit result):
|
||||
*
|
||||
|
@ -4157,11 +4157,11 @@ fs_visitor::lower_mulh_inst(fs_inst *inst, bblock_t *block)
|
|||
fs_inst *mach = ibld.MACH(inst->dst, inst->src[0], inst->src[1]);
|
||||
|
||||
if (devinfo->ver >= 8) {
|
||||
/* Until Gen8, integer multiplies read 32-bits from one source,
|
||||
/* Until Gfx8, integer multiplies read 32-bits from one source,
|
||||
* and 16-bits from the other, and relying on the MACH instruction
|
||||
* to generate the high bits of the result.
|
||||
*
|
||||
* On Gen8, the multiply instruction does a full 32x32-bit
|
||||
* On Gfx8, the multiply instruction does a full 32x32-bit
|
||||
* multiply, but in order to do a 64-bit multiply we can simulate
|
||||
* the previous behavior and then use a MACH instruction.
|
||||
*/
|
||||
|
@ -4179,13 +4179,13 @@ fs_visitor::lower_mulh_inst(fs_inst *inst, bblock_t *block)
|
|||
* accumulator register is used by the hardware for instructions
|
||||
* that access the accumulator implicitly (e.g. MACH). A
|
||||
* second-half instruction would normally map to acc1, which
|
||||
* doesn't exist on Gen7 and up (the hardware does emulate it for
|
||||
* doesn't exist on Gfx7 and up (the hardware does emulate it for
|
||||
* floating-point instructions *only* by taking advantage of the
|
||||
* extra precision of acc0 not normally used for floating point
|
||||
* arithmetic).
|
||||
*
|
||||
* HSW and up are careful enough not to try to access an
|
||||
* accumulator register that doesn't exist, but on earlier Gen7
|
||||
* accumulator register that doesn't exist, but on earlier Gfx7
|
||||
* hardware we need to make sure that the quarter control bits are
|
||||
* zero to avoid non-deterministic behaviour and emit an extra MOV
|
||||
* to get the result masked correctly according to the current
|
||||
|
@ -4261,7 +4261,7 @@ fs_visitor::lower_minmax()
|
|||
inst->predicate == BRW_PREDICATE_NONE) {
|
||||
/* If src1 is an immediate value that is not NaN, then it can't be
|
||||
* NaN. In that case, emit CMP because it is much better for cmod
|
||||
* propagation. Likewise if src1 is not float. Gen4 and Gen5 don't
|
||||
* propagation. Likewise if src1 is not float. Gfx4 and Gfx5 don't
|
||||
* support HF or DF, so it is not necessary to check for those.
|
||||
*/
|
||||
if (inst->src[1].type != BRW_REGISTER_TYPE_F ||
|
||||
|
@ -4526,7 +4526,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
|||
};
|
||||
ubld.LOAD_PAYLOAD(header, header_sources, 2, 0);
|
||||
|
||||
/* Gen12 will require additional fix-ups if we ever hit this path. */
|
||||
/* Gfx12 will require additional fix-ups if we ever hit this path. */
|
||||
assert(devinfo->ver < 12);
|
||||
}
|
||||
|
||||
|
@ -4731,7 +4731,7 @@ lower_fb_read_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
ubld.LOAD_PAYLOAD(header, header_sources, ARRAY_SIZE(header_sources), 0);
|
||||
|
||||
if (devinfo->ver >= 12) {
|
||||
/* On Gen12 the Viewport and Render Target Array Index fields (AKA
|
||||
/* On Gfx12 the Viewport and Render Target Array Index fields (AKA
|
||||
* Poly 0 Info) are provided in r1.1 instead of r0.0, and the render
|
||||
* target message header format was updated accordingly -- However
|
||||
* the updated format only works for the lower 16 channels in a
|
||||
|
@ -5113,7 +5113,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
fs_reg sampler_state_ptr =
|
||||
retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD);
|
||||
|
||||
/* Gen11+ sampler message headers include bits in 4:0 which conflict
|
||||
/* Gfx11+ sampler message headers include bits in 4:0 which conflict
|
||||
* with the ones included in g0.3 bits 4:0. Mask them out.
|
||||
*/
|
||||
if (devinfo->ver >= 11) {
|
||||
|
@ -5136,7 +5136,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
ubld1.ADD(component(header, 3), sampler_state_ptr, tmp);
|
||||
}
|
||||
} else if (devinfo->ver >= 11) {
|
||||
/* Gen11+ sampler message headers include bits in 4:0 which conflict
|
||||
/* Gfx11+ sampler message headers include bits in 4:0 which conflict
|
||||
* with the ones included in g0.3 bits 4:0. Mask them out.
|
||||
*/
|
||||
ubld1.AND(component(header, 3),
|
||||
|
@ -5195,7 +5195,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
break;
|
||||
case SHADER_OPCODE_TXF:
|
||||
/* Unfortunately, the parameters for LD are intermixed: u, lod, v, r.
|
||||
* On Gen9 they are u, v, lod, r
|
||||
* On Gfx9 they are u, v, lod, r
|
||||
*/
|
||||
bld.MOV(retype(sources[length++], BRW_REGISTER_TYPE_D), coordinate);
|
||||
|
||||
|
@ -5239,7 +5239,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
|
||||
length++;
|
||||
|
||||
/* On Gen9+ we'll use ld2dms_w instead which has two registers for
|
||||
/* On Gfx9+ we'll use ld2dms_w instead which has two registers for
|
||||
* the MCS data.
|
||||
*/
|
||||
if (op == SHADER_OPCODE_TXF_CMS_W) {
|
||||
|
@ -5574,8 +5574,8 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
*
|
||||
* Earlier generations have a similar wording. Because of this restriction
|
||||
* we don't attempt to implement sample masks via predication for such
|
||||
* messages prior to Gen9, since we have to provide a header anyway. On
|
||||
* Gen11+ the header has been removed so we can only use predication.
|
||||
* messages prior to Gfx9, since we have to provide a header anyway. On
|
||||
* Gfx11+ the header has been removed so we can only use predication.
|
||||
*
|
||||
* For all stateless A32 messages, we also need a header
|
||||
*/
|
||||
|
@ -6391,9 +6391,9 @@ fs_visitor::lower_logical_sends()
|
|||
case SHADER_OPCODE_INT_QUOTIENT:
|
||||
case SHADER_OPCODE_INT_REMAINDER:
|
||||
/* The math opcodes are overloaded for the send-like and
|
||||
* expression-like instructions which seems kind of icky. Gen6+ has
|
||||
* expression-like instructions which seems kind of icky. Gfx6+ has
|
||||
* a native (but rather quirky) MATH instruction so we don't need to
|
||||
* do anything here. On Gen4-5 we'll have to lower the Gen6-like
|
||||
* do anything here. On Gfx4-5 we'll have to lower the Gfx6-like
|
||||
* logical instructions (which we can easily recognize because they
|
||||
* have mlen = 0) into send-like virtual instructions.
|
||||
*/
|
||||
|
@ -6520,7 +6520,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||
* integer DWord, the source register is not incremented but the
|
||||
* source sub register is incremented."
|
||||
*
|
||||
* The hardware specs from Gen4 to Gen7.5 mention similar regioning
|
||||
* The hardware specs from Gfx4 to Gfx7.5 mention similar regioning
|
||||
* restrictions. The code below intentionally doesn't check whether the
|
||||
* destination type is integer because empirically the hardware doesn't
|
||||
* seem to care what the actual type is as long as it's dword-aligned.
|
||||
|
@ -6574,7 +6574,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||
* execution mask channels are required, split the instruction into two
|
||||
* SIMD16 instructions."
|
||||
*
|
||||
* There is similar text in the HSW PRMs. Gen4-6 don't even implement
|
||||
* There is similar text in the HSW PRMs. Gfx4-6 don't even implement
|
||||
* 32-wide control flow support in hardware and will behave similarly.
|
||||
*/
|
||||
if (devinfo->ver < 8 && !inst->force_writemask_all)
|
||||
|
@ -6597,7 +6597,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||
if (inst->is_3src(devinfo) && !devinfo->supports_simd16_3src)
|
||||
max_width = MIN2(max_width, inst->exec_size / reg_count);
|
||||
|
||||
/* Pre-Gen8 EUs are hardwired to use the QtrCtrl+1 (where QtrCtrl is
|
||||
/* Pre-Gfx8 EUs are hardwired to use the QtrCtrl+1 (where QtrCtrl is
|
||||
* the 8-bit quarter of the execution mask signals specified in the
|
||||
* instruction control fields) for the second compressed half of any
|
||||
* single-precision instruction (for double-precision instructions
|
||||
|
@ -6706,7 +6706,7 @@ get_sampler_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||
inst->opcode != SHADER_OPCODE_TXF_CMS_LOGICAL) ? 4 :
|
||||
3;
|
||||
|
||||
/* On Gen9+ the LOD argument is for free if we're able to use the LZ
|
||||
/* On Gfx9+ the LOD argument is for free if we're able to use the LZ
|
||||
* variant of the TXL or TXF message.
|
||||
*/
|
||||
const bool implicit_lod = devinfo->ver >= 9 &&
|
||||
|
@ -6821,8 +6821,8 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||
case SHADER_OPCODE_LOG2:
|
||||
case SHADER_OPCODE_SIN:
|
||||
case SHADER_OPCODE_COS: {
|
||||
/* Unary extended math instructions are limited to SIMD8 on Gen4 and
|
||||
* Gen6. Extended Math Function is limited to SIMD8 with half-float.
|
||||
/* Unary extended math instructions are limited to SIMD8 on Gfx4 and
|
||||
* Gfx6. Extended Math Function is limited to SIMD8 with half-float.
|
||||
*/
|
||||
if (devinfo->ver == 6 || (devinfo->ver == 4 && !devinfo->is_g4x))
|
||||
return MIN2(8, inst->exec_size);
|
||||
|
@ -6832,7 +6832,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||
}
|
||||
|
||||
case SHADER_OPCODE_POW: {
|
||||
/* SIMD16 is only allowed on Gen7+. Extended Math Function is limited
|
||||
/* SIMD16 is only allowed on Gfx7+. Extended Math Function is limited
|
||||
* to SIMD8 with half-float
|
||||
*/
|
||||
if (devinfo->ver < 7)
|
||||
|
@ -6886,7 +6886,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||
* In this context, "DW operations" means "operations acting on 32-bit
|
||||
* values", so it includes operations on floats.
|
||||
*
|
||||
* Gen4 has a similar restriction. From the i965 PRM, section 11.5.3
|
||||
* Gfx4 has a similar restriction. From the i965 PRM, section 11.5.3
|
||||
* (Instruction Compression -> Rules and Restrictions):
|
||||
*
|
||||
* "A compressed instruction must be in Align1 access mode. Align16
|
||||
|
@ -6903,13 +6903,13 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||
|
||||
case SHADER_OPCODE_MULH:
|
||||
/* MULH is lowered to the MUL/MACH sequence using the accumulator, which
|
||||
* is 8-wide on Gen7+.
|
||||
* is 8-wide on Gfx7+.
|
||||
*/
|
||||
return (devinfo->ver >= 7 ? 8 :
|
||||
get_fpu_lowered_simd_width(devinfo, inst));
|
||||
|
||||
case FS_OPCODE_FB_WRITE_LOGICAL:
|
||||
/* Gen6 doesn't support SIMD16 depth writes but we cannot handle them
|
||||
/* Gfx6 doesn't support SIMD16 depth writes but we cannot handle them
|
||||
* here.
|
||||
*/
|
||||
assert(devinfo->ver != 6 ||
|
||||
|
@ -6949,7 +6949,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||
|
||||
case SHADER_OPCODE_TXF_LOGICAL:
|
||||
case SHADER_OPCODE_TXS_LOGICAL:
|
||||
/* Gen4 doesn't have SIMD8 variants for the RESINFO and LD-with-LOD
|
||||
/* Gfx4 doesn't have SIMD8 variants for the RESINFO and LD-with-LOD
|
||||
* messages. Use SIMD16 instead.
|
||||
*/
|
||||
if (devinfo->ver == 4)
|
||||
|
@ -7323,7 +7323,7 @@ fs_visitor::lower_simd_width()
|
|||
|
||||
/**
|
||||
* Transform barycentric vectors into the interleaved form expected by the PLN
|
||||
* instruction and returned by the Gen7+ PI shared function.
|
||||
* instruction and returned by the Gfx7+ PI shared function.
|
||||
*
|
||||
* For channels 0-15 in SIMD16 mode they are expected to be laid out as
|
||||
* follows in the register file:
|
||||
|
@ -8076,7 +8076,7 @@ find_halt_control_flow_region_start(const fs_visitor *v)
|
|||
}
|
||||
|
||||
/**
|
||||
* Work around the Gen12 hardware bug filed as GEN:BUG:1407528679. EU fusion
|
||||
* Work around the Gfx12 hardware bug filed as GEN:BUG:1407528679. EU fusion
|
||||
* can cause a BB to be executed with all channels disabled, which will lead
|
||||
* to the execution of any NoMask instructions in it, even though any
|
||||
* execution-masked instructions will be correctly shot down. This may break
|
||||
|
@ -9227,7 +9227,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
* generations, the only configurations supporting persample dispatch
|
||||
* are those in which only one dispatch width is enabled.
|
||||
*
|
||||
* The Gen12 hardware spec has a similar dispatch grouping table, but
|
||||
* The Gfx12 hardware spec has a similar dispatch grouping table, but
|
||||
* the following conflicting restriction applies (from the page on
|
||||
* "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader:
|
||||
*
|
||||
|
|
|
@ -447,7 +447,7 @@ private:
|
|||
|
||||
/**
|
||||
* Return the flag register used in fragment shaders to keep track of live
|
||||
* samples. On Gen7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
|
||||
* samples. On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
|
||||
* dispatch mode, while earlier generations are constrained to f0.1, which
|
||||
* limits the dispatch width to SIMD16 for fragment shaders that use discard.
|
||||
*/
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
* instructions.
|
||||
*
|
||||
* Unfortunately there is close to no information about bank conflicts in the
|
||||
* hardware spec, but experimentally on Gen7-Gen9 ternary instructions seem to
|
||||
* hardware spec, but experimentally on Gfx7-Gfx9 ternary instructions seem to
|
||||
* incur an average bank conflict penalty of one cycle per SIMD8 op whenever
|
||||
* the second and third source are stored in the same GRF bank (\sa bank_of()
|
||||
* for the exact bank layout) which cannot be fetched during the same cycle by
|
||||
|
@ -568,14 +568,14 @@ namespace {
|
|||
}
|
||||
|
||||
/* Preserve the original allocation of VGRFs used by the barycentric
|
||||
* source of the LINTERP instruction on Gen6, since pair-aligned
|
||||
* source of the LINTERP instruction on Gfx6, since pair-aligned
|
||||
* barycentrics allow the PLN instruction to be used.
|
||||
*/
|
||||
if (v->devinfo->has_pln && v->devinfo->ver <= 6 &&
|
||||
inst->opcode == FS_OPCODE_LINTERP)
|
||||
constrained[p.atom_of_reg(reg_of(inst->src[0]))] = true;
|
||||
|
||||
/* The location of the Gen7 MRF hack registers is hard-coded in the
|
||||
/* The location of the Gfx7 MRF hack registers is hard-coded in the
|
||||
* rest of the compiler back-end. Don't attempt to move them around.
|
||||
*/
|
||||
if (v->devinfo->ver >= 7) {
|
||||
|
@ -620,9 +620,9 @@ namespace {
|
|||
* assignment of r. \sa delta_conflicts() for a vectorized implementation
|
||||
* of the expression above.
|
||||
*
|
||||
* FINISHME: Teach this about the Gen10+ bank conflict rules, which are
|
||||
* FINISHME: Teach this about the Gfx10+ bank conflict rules, which are
|
||||
* somewhat more relaxed than on previous generations. In the
|
||||
* meantime optimizing based on Gen9 weights is likely to be more
|
||||
* meantime optimizing based on Gfx9 weights is likely to be more
|
||||
* helpful than not optimizing at all.
|
||||
*/
|
||||
weight_vector_type *
|
||||
|
|
|
@ -703,7 +703,7 @@ namespace brw {
|
|||
}
|
||||
|
||||
/**
|
||||
* Gen4 predicated IF.
|
||||
* Gfx4 predicated IF.
|
||||
*/
|
||||
instruction *
|
||||
IF(brw_predicate predicate) const
|
||||
|
@ -849,10 +849,10 @@ namespace brw {
|
|||
* expanding that result out, but we would need to be careful with
|
||||
* masking.
|
||||
*
|
||||
* Gen6 hardware ignores source modifiers (negate and abs) on math
|
||||
* Gfx6 hardware ignores source modifiers (negate and abs) on math
|
||||
* instructions, so we also move to a temp to set those up.
|
||||
*
|
||||
* Gen7 relaxes most of the above restrictions, but still can't use IMM
|
||||
* Gfx7 relaxes most of the above restrictions, but still can't use IMM
|
||||
* operands to math
|
||||
*/
|
||||
if ((shader->devinfo->ver == 6 &&
|
||||
|
|
|
@ -88,7 +88,7 @@ brw_reg_from_fs_reg(const struct gen_device_info *devinfo, fs_inst *inst,
|
|||
const unsigned max_hw_width = 16;
|
||||
|
||||
/* XXX - The equation above is strictly speaking not correct on
|
||||
* hardware that supports unbalanced GRF writes -- On Gen9+
|
||||
* hardware that supports unbalanced GRF writes -- On Gfx9+
|
||||
* each decompressed chunk of the instruction may have a
|
||||
* different execution size when the number of components
|
||||
* written to each destination GRF is not the same.
|
||||
|
@ -525,7 +525,7 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
|
|||
* code, using it saves us 0 instructions and would require quite a bit
|
||||
* of case-by-case work. It's just not worth it.
|
||||
*
|
||||
* Due to a hardware bug some platforms (particularly Gen11+) seem to
|
||||
* Due to a hardware bug some platforms (particularly Gfx11+) seem to
|
||||
* require the address components of all channels to be valid whether or
|
||||
* not they're active, which causes issues if we use VxH addressing
|
||||
* under non-uniform control-flow. We can easily work around that by
|
||||
|
@ -685,7 +685,7 @@ fs_generator::generate_shuffle(fs_inst *inst,
|
|||
lower_width == dispatch_width;
|
||||
brw_inst *insn;
|
||||
|
||||
/* Due to a hardware bug some platforms (particularly Gen11+) seem
|
||||
/* Due to a hardware bug some platforms (particularly Gfx11+) seem
|
||||
* to require the address components of all channels to be valid
|
||||
* whether or not they're active, which causes issues if we use VxH
|
||||
* addressing under non-uniform control-flow. We can easily work
|
||||
|
@ -1409,7 +1409,7 @@ fs_generator::generate_ddy(const fs_inst *inst,
|
|||
* DWord elements ONLY. This is applicable when both source and
|
||||
* destination are half-floats."
|
||||
*
|
||||
* So for half-float operations we use the Gen11+ Align1 path. CHV
|
||||
* So for half-float operations we use the Gfx11+ Align1 path. CHV
|
||||
* inherits its FP16 hardware from SKL, so it is not affected.
|
||||
*/
|
||||
if (devinfo->ver >= 11 ||
|
||||
|
|
|
@ -206,7 +206,7 @@ emit_system_values_block(nir_block *block, fs_visitor *v)
|
|||
const fs_builder abld =
|
||||
v->bld.annotate("gl_HelperInvocation", NULL);
|
||||
|
||||
/* On Gen6+ (gl_HelperInvocation is only exposed on Gen7+) the
|
||||
/* On Gfx6+ (gl_HelperInvocation is only exposed on Gfx7+) the
|
||||
* pixel mask is in g1.7 of the thread payload.
|
||||
*
|
||||
* We move the per-channel pixel enable bit to the low bit of each
|
||||
|
@ -232,7 +232,7 @@ emit_system_values_block(nir_block *block, fs_visitor *v)
|
|||
* that is the opposite of gl_HelperInvocation so we need to invert
|
||||
* the mask.
|
||||
*
|
||||
* The negate source-modifier bit of logical instructions on Gen8+
|
||||
* The negate source-modifier bit of logical instructions on Gfx8+
|
||||
* performs 1's complement negation, so we can use that instead of
|
||||
* a NOT instruction.
|
||||
*/
|
||||
|
@ -1251,7 +1251,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
|
|||
|
||||
assert(nir_dest_bit_size(instr->dest.dest) == 32);
|
||||
|
||||
/* Before Gen7, the order of the 32-bit source and the 16-bit source was
|
||||
/* Before Gfx7, the order of the 32-bit source and the 16-bit source was
|
||||
* swapped. The extension isn't enabled on those platforms, so don't
|
||||
* pretend to support the differences.
|
||||
*/
|
||||
|
@ -4321,7 +4321,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
slm_fence && workgroup_size() <= dispatch_width)
|
||||
slm_fence = false;
|
||||
|
||||
/* Prior to Gen11, there's only L3 fence, so emit that instead. */
|
||||
/* Prior to Gfx11, there's only L3 fence, so emit that instead. */
|
||||
if (slm_fence && devinfo->ver < 11) {
|
||||
slm_fence = false;
|
||||
l3_fence = true;
|
||||
|
@ -4333,7 +4333,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
const bool needs_render_fence =
|
||||
devinfo->ver == 7 && !devinfo->is_haswell;
|
||||
|
||||
/* Be conservative in Gen11+ and always stall in a fence. Since there
|
||||
/* Be conservative in Gfx11+ and always stall in a fence. Since there
|
||||
* are two different fences, and shader might want to synchronize
|
||||
* between them.
|
||||
*
|
||||
|
@ -6044,7 +6044,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
|
|||
nir_dest[0] = offset(dst, bld, 3);
|
||||
} else if (instr->op == nir_texop_txs &&
|
||||
dest_size >= 3 && devinfo->ver < 7) {
|
||||
/* Gen4-6 return 0 instead of 1 for single layer surfaces. */
|
||||
/* Gfx4-6 return 0 instead of 1 for single layer surfaces. */
|
||||
fs_reg depth = offset(dst, bld, 2);
|
||||
nir_dest[2] = vgrf(glsl_type::int_type);
|
||||
bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE);
|
||||
|
|
|
@ -576,7 +576,7 @@ namespace {
|
|||
unsigned
|
||||
spill_max_size(const backend_shader *s)
|
||||
{
|
||||
/* FINISHME - On Gen7+ it should be possible to avoid this limit
|
||||
/* FINISHME - On Gfx7+ it should be possible to avoid this limit
|
||||
* altogether by spilling directly from the temporary GRF
|
||||
* allocated to hold the result of the instruction (and the
|
||||
* scratch write header).
|
||||
|
@ -594,7 +594,7 @@ namespace {
|
|||
unsigned
|
||||
spill_base_mrf(const backend_shader *s)
|
||||
{
|
||||
/* We don't use the MRF hack on Gen9+ */
|
||||
/* We don't use the MRF hack on Gfx9+ */
|
||||
assert(s->devinfo->ver < 9);
|
||||
return BRW_MAX_MRF(s->devinfo->ver) - spill_max_size(s) - 1;
|
||||
}
|
||||
|
@ -699,7 +699,7 @@ fs_reg_alloc::setup_inst_interference(const fs_inst *inst)
|
|||
grf127_send_hack_node);
|
||||
|
||||
/* Spilling instruction are genereated as SEND messages from MRF but as
|
||||
* Gen7+ supports sending from GRF the driver will maps assingn these
|
||||
* Gfx7+ supports sending from GRF the driver will maps assingn these
|
||||
* MRF registers to a GRF. Implementations reuses the dest of the send
|
||||
* message as source. So as we will have an overlap for sure, we create
|
||||
* an interference between destination and grf127.
|
||||
|
@ -844,7 +844,7 @@ fs_reg_alloc::build_interference_graph(bool allow_spilling)
|
|||
compiler->fs_reg_sets[rsi].classes[size - 1]);
|
||||
}
|
||||
|
||||
/* Special case: on pre-Gen7 hardware that supports PLN, the second operand
|
||||
/* Special case: on pre-Gfx7 hardware that supports PLN, the second operand
|
||||
* of a PLN instruction needs to be an even-numbered register, so we have a
|
||||
* special register class aligned_bary_class to handle this case.
|
||||
*/
|
||||
|
@ -914,9 +914,9 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, fs_reg dst,
|
|||
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
|
||||
BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
|
||||
} else if (devinfo->ver >= 7 && spill_offset < (1 << 12) * REG_SIZE) {
|
||||
/* The Gen7 descriptor-based offset is 12 bits of HWORD units.
|
||||
* Because the Gen7-style scratch block read is hardwired to BTI 255,
|
||||
* on Gen9+ it would cause the DC to do an IA-coherent read, what
|
||||
/* The Gfx7 descriptor-based offset is 12 bits of HWORD units.
|
||||
* Because the Gfx7-style scratch block read is hardwired to BTI 255,
|
||||
* on Gfx9+ it would cause the DC to do an IA-coherent read, what
|
||||
* largely outweighs the slight advantage from not having to provide
|
||||
* the address as part of the message header, so we're better off
|
||||
* using plain old oword block reads.
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
|
||||
/** @file brw_fs_scoreboard.cpp
|
||||
*
|
||||
* Gen12+ hardware lacks the register scoreboard logic that used to guarantee
|
||||
* Gfx12+ hardware lacks the register scoreboard logic that used to guarantee
|
||||
* data coherency between register reads and writes in previous generations.
|
||||
* This lowering pass runs after register allocation in order to make up for
|
||||
* it.
|
||||
|
|
|
@ -60,7 +60,7 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
|
|||
}
|
||||
|
||||
/**
|
||||
* Apply workarounds for Gen6 gather with UINT/SINT
|
||||
* Apply workarounds for Gfx6 gather with UINT/SINT
|
||||
*/
|
||||
void
|
||||
fs_visitor::emit_gfx6_gather_wa(uint8_t wa, fs_reg dst)
|
||||
|
@ -115,7 +115,7 @@ fs_visitor::emit_dummy_fs()
|
|||
write->mlen = 2 + 4 * reg_width;
|
||||
}
|
||||
|
||||
/* Tell the SF we don't have any inputs. Gen4-5 require at least one
|
||||
/* Tell the SF we don't have any inputs. Gfx4-5 require at least one
|
||||
* varying to avoid GPU hangs, so set that.
|
||||
*/
|
||||
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data);
|
||||
|
@ -131,7 +131,7 @@ fs_visitor::emit_dummy_fs()
|
|||
stage_prog_data->dispatch_grf_start_reg = 2;
|
||||
wm_prog_data->dispatch_grf_start_reg_16 = 2;
|
||||
wm_prog_data->dispatch_grf_start_reg_32 = 2;
|
||||
grf_used = 1; /* Gen4-5 don't allow zero GRF blocks */
|
||||
grf_used = 1; /* Gfx4-5 don't allow zero GRF blocks */
|
||||
|
||||
calculate_cfg();
|
||||
}
|
||||
|
|
|
@ -76,7 +76,7 @@ brw_inst_##name(const struct gen_device_info *devinfo, \
|
|||
}
|
||||
|
||||
/* A simple macro for fields which stay in the same place on all generations,
|
||||
* except for Gen12!
|
||||
* except for Gfx12!
|
||||
*/
|
||||
#define F(name, hi4, lo4, hi12, lo12) FC(name, hi4, lo4, hi12, lo12, true)
|
||||
|
||||
|
@ -122,7 +122,7 @@ brw_inst_##name(const struct gen_device_info *devinfo, const brw_inst *inst) \
|
|||
return brw_inst_bits(inst, high, low); \
|
||||
}
|
||||
|
||||
/* A macro for fields which moved as of Gen8+. */
|
||||
/* A macro for fields which moved as of Gfx8+. */
|
||||
#define F8(name, gfx4_high, gfx4_low, gfx8_high, gfx8_low, \
|
||||
gfx12_high, gfx12_low) \
|
||||
FF(name, \
|
||||
|
@ -134,7 +134,7 @@ FF(name, \
|
|||
/* 8: */ gfx8_high, gfx8_low, \
|
||||
/* 12: */ gfx12_high, gfx12_low);
|
||||
|
||||
/* Macro for fields that gained extra discontiguous MSBs in Gen12 (specified
|
||||
/* Macro for fields that gained extra discontiguous MSBs in Gfx12 (specified
|
||||
* by hi12ex-lo12ex).
|
||||
*/
|
||||
#define FFDC(name, hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
|
||||
|
@ -176,7 +176,7 @@ brw_inst_##name(const struct gen_device_info *devinfo, const brw_inst *inst) \
|
|||
FFDC(name, hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
|
||||
hi7, lo7, hi8, lo8, hi12ex, lo12ex, hi12, lo12, true)
|
||||
|
||||
/* Macro for fields that didn't move across generations until Gen12, and then
|
||||
/* Macro for fields that didn't move across generations until Gfx12, and then
|
||||
* gained extra discontiguous bits.
|
||||
*/
|
||||
#define FDC(name, hi4, lo4, hi12ex, lo12ex, hi12, lo12, assertions) \
|
||||
|
@ -184,7 +184,7 @@ brw_inst_##name(const struct gen_device_info *devinfo, const brw_inst *inst) \
|
|||
hi4, lo4, hi4, lo4, hi12ex, lo12ex, hi12, lo12, assertions)
|
||||
|
||||
|
||||
/* Macro for the 2-bit register file field, which on Gen12+ is stored as the
|
||||
/* Macro for the 2-bit register file field, which on Gfx12+ is stored as the
|
||||
* variable length combination of an IsImm (hi12) bit and an additional file
|
||||
* (lo12) bit.
|
||||
*/
|
||||
|
@ -217,7 +217,7 @@ brw_inst_##name(const struct gen_device_info *devinfo, const brw_inst *inst) \
|
|||
} \
|
||||
}
|
||||
|
||||
/* Macro for fields that become a constant in Gen12+ not actually represented
|
||||
/* Macro for fields that become a constant in Gfx12+ not actually represented
|
||||
* in the instruction.
|
||||
*/
|
||||
#define FK(name, hi4, lo4, const12) \
|
||||
|
@ -834,7 +834,7 @@ FF(sfid,
|
|||
FF(null_rt,
|
||||
/* 4-7: */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
/* 8: */ 80, 80,
|
||||
/* 12: */ 44, 44) /* actually only Gen11+ */
|
||||
/* 12: */ 44, 44) /* actually only Gfx11+ */
|
||||
FC(base_mrf, /* 4+ */ 27, 24, /* 12+ */ -1, -1, devinfo->ver < 6);
|
||||
FF(send_rta_index,
|
||||
/* 4: */ -1, -1,
|
||||
|
@ -886,7 +886,7 @@ FF(urb_opcode,
|
|||
/** @} */
|
||||
|
||||
/**
|
||||
* Gen4-5 math messages:
|
||||
* Gfx4-5 math messages:
|
||||
* @{
|
||||
*/
|
||||
FC(math_msg_data_type, /* 4+ */ MD(7), MD(7), /* 12+ */ -1, -1, devinfo->ver < 6)
|
||||
|
@ -933,7 +933,7 @@ F(binding_table_index, /* 4+ */ MD(7), MD(0), /* 12+ */ MD12(7), MD12(0)) /*
|
|||
*/
|
||||
FC(dp_category, /* 4+ */ MD(18), MD(18), /* 12+ */ MD12(18), MD12(18), devinfo->ver >= 7)
|
||||
|
||||
/* Gen4-5 store fields in different bits for read/write messages. */
|
||||
/* Gfx4-5 store fields in different bits for read/write messages. */
|
||||
FF(dp_read_msg_type,
|
||||
/* 4: */ MD(13), MD(12),
|
||||
/* 4.5: */ MD(13), MD(11),
|
||||
|
@ -976,7 +976,7 @@ FF(dp_write_commit,
|
|||
/* 7+: does not exist */ -1, -1, -1, -1,
|
||||
/* 12: */ -1, -1)
|
||||
|
||||
/* Gen6+ use the same bit locations for everything. */
|
||||
/* Gfx6+ use the same bit locations for everything. */
|
||||
FF(dp_msg_type,
|
||||
/* 4-5: use dp_read_msg_type or dp_write_msg_type instead */
|
||||
-1, -1, -1, -1, -1, -1,
|
||||
|
@ -994,7 +994,7 @@ FD(dp_msg_control,
|
|||
/** @} */
|
||||
|
||||
/**
|
||||
* Scratch message bits (Gen7+):
|
||||
* Scratch message bits (Gfx7+):
|
||||
* @{
|
||||
*/
|
||||
FC(scratch_read_write, /* 4+ */ MD(17), MD(17), /* 12+ */ MD12(17), MD12(17), devinfo->ver >= 7) /* 0 = read, 1 = write */
|
||||
|
@ -1189,7 +1189,7 @@ REG_TYPE(src1)
|
|||
#undef REG_TYPE
|
||||
|
||||
|
||||
/* The AddrImm fields are split into two discontiguous sections on Gen8+ */
|
||||
/* The AddrImm fields are split into two discontiguous sections on Gfx8+ */
|
||||
#define BRW_IA1_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low, \
|
||||
g12_high, g12_low) \
|
||||
static inline void \
|
||||
|
@ -1222,7 +1222,7 @@ brw_inst_##reg##_ia1_addr_imm(const struct gen_device_info *devinfo, \
|
|||
}
|
||||
|
||||
/* AddrImm[9:0] for Align1 Indirect Addressing */
|
||||
/* -Gen 4- ----Gen8---- -Gen12- */
|
||||
/* -Gen 4- ----Gfx8---- -Gfx12- */
|
||||
BRW_IA1_ADDR_IMM(src1, 105, 96, 121, 104, 96, 107, 98)
|
||||
BRW_IA1_ADDR_IMM(src0, 73, 64, 95, 72, 64, 75, 66)
|
||||
BRW_IA1_ADDR_IMM(dst, 57, 48, 47, 56, 48, 59, 50)
|
||||
|
@ -1257,7 +1257,7 @@ brw_inst_##reg##_ia16_addr_imm(const struct gen_device_info *devinfo, \
|
|||
|
||||
/* AddrImm[9:0] for Align16 Indirect Addressing:
|
||||
* Compared to Align1, these are missing the low 4 bits.
|
||||
* -Gen 4- ----Gen8----
|
||||
* -Gen 4- ----Gfx8----
|
||||
*/
|
||||
BRW_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100)
|
||||
BRW_IA16_ADDR_IMM(src0, 73, 64, 95, 72, 68)
|
||||
|
@ -1377,7 +1377,7 @@ brw_compact_inst_##name(const struct gen_device_info *devinfo, \
|
|||
}
|
||||
|
||||
/* A simple macro for fields which stay in the same place on all generations
|
||||
* except for Gen12.
|
||||
* except for Gfx12.
|
||||
*/
|
||||
#define F(name, high, low, gfx12_high, gfx12_low) \
|
||||
FC(name, high, low, gfx12_high, gfx12_low, true)
|
||||
|
@ -1412,7 +1412,7 @@ brw_compact_inst_imm(const struct gen_device_info *devinfo,
|
|||
}
|
||||
|
||||
/**
|
||||
* (Gen8+) Compacted three-source instructions:
|
||||
* (Gfx8+) Compacted three-source instructions:
|
||||
* @{
|
||||
*/
|
||||
FC(3src_src2_reg_nr, /* 4+ */ 63, 57, /* 12+ */ 55, 48, devinfo->ver >= 8)
|
||||
|
|
|
@ -38,7 +38,7 @@ namespace {
|
|||
unit_fe,
|
||||
/** EU FPU0 (Note that co-issue to FPU1 is currently not modeled here). */
|
||||
unit_fpu,
|
||||
/** Extended Math unit (AKA FPU1 on Gen8-11, part of the EU on Gen6+). */
|
||||
/** Extended Math unit (AKA FPU1 on Gfx8-11, part of the EU on Gfx6+). */
|
||||
unit_em,
|
||||
/** Sampler shared function. */
|
||||
unit_sampler,
|
||||
|
@ -71,7 +71,7 @@ namespace {
|
|||
enum dependency_id {
|
||||
/* Register part of the GRF. */
|
||||
dependency_id_grf0 = 0,
|
||||
/* Register part of the MRF. Only used on Gen4-6. */
|
||||
/* Register part of the MRF. Only used on Gfx4-6. */
|
||||
dependency_id_mrf0 = dependency_id_grf0 + BRW_MAX_GRF,
|
||||
/* Address register part of the ARF. */
|
||||
dependency_id_addr0 = dependency_id_mrf0 + 24,
|
||||
|
@ -79,9 +79,9 @@ namespace {
|
|||
dependency_id_accum0 = dependency_id_addr0 + 1,
|
||||
/* Flag register part of the ARF. */
|
||||
dependency_id_flag0 = dependency_id_accum0 + 12,
|
||||
/* SBID token write completion. Only used on Gen12+. */
|
||||
/* SBID token write completion. Only used on Gfx12+. */
|
||||
dependency_id_sbid_wr0 = dependency_id_flag0 + 8,
|
||||
/* SBID token read completion. Only used on Gen12+. */
|
||||
/* SBID token read completion. Only used on Gfx12+. */
|
||||
dependency_id_sbid_rd0 = dependency_id_sbid_wr0 + 16,
|
||||
/* Number of computation dependencies currently tracked. */
|
||||
num_dependency_ids = dependency_id_sbid_rd0 + 16
|
||||
|
@ -280,7 +280,7 @@ namespace {
|
|||
* Most timing parameters are obtained from the multivariate linear
|
||||
* regression of a sample of empirical timings measured using the tm0
|
||||
* register (as can be done today by using the shader_time debugging
|
||||
* option). The Gen4-5 math timings are obtained from BSpec Volume 5c.3
|
||||
* option). The Gfx4-5 math timings are obtained from BSpec Volume 5c.3
|
||||
* "Shared Functions - Extended Math", Section 3.2 "Performance".
|
||||
* Parameters marked XXX shall be considered low-quality, they're possibly
|
||||
* high variance or completely guessed in cases where experimental data was
|
||||
|
@ -1229,7 +1229,7 @@ namespace {
|
|||
|
||||
/**
|
||||
* Return the dependency ID corresponding to the SBID read completion
|
||||
* condition of a Gen12+ SWSB.
|
||||
* condition of a Gfx12+ SWSB.
|
||||
*/
|
||||
dependency_id
|
||||
tgl_swsb_rd_dependency_id(tgl_swsb swsb)
|
||||
|
@ -1244,7 +1244,7 @@ namespace {
|
|||
|
||||
/**
|
||||
* Return the dependency ID corresponding to the SBID write completion
|
||||
* condition of a Gen12+ SWSB.
|
||||
* condition of a Gfx12+ SWSB.
|
||||
*/
|
||||
dependency_id
|
||||
tgl_swsb_wr_dependency_id(tgl_swsb swsb)
|
||||
|
@ -1531,7 +1531,7 @@ namespace {
|
|||
* weights used elsewhere in the compiler back-end.
|
||||
*
|
||||
* Note that we provide slightly more pessimistic weights on
|
||||
* Gen12+ for SIMD32, since the effective warp size on that
|
||||
* Gfx12+ for SIMD32, since the effective warp size on that
|
||||
* platform is 2x the SIMD width due to EU fusion, which increases
|
||||
* the likelihood of divergent control flow in comparison to
|
||||
* previous generations, giving narrower SIMD modes a performance
|
||||
|
|
|
@ -614,7 +614,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
|
|||
*
|
||||
* Passing 1 to the peephole select pass causes it to convert
|
||||
* if-statements that contain at most a single ALU instruction (total)
|
||||
* in both branches. Before Gen6, some math instructions were
|
||||
* in both branches. Before Gfx6, some math instructions were
|
||||
* prohibitively expensive and the results of compare operations need an
|
||||
* extra resolve step. For these reasons, this pass is more harmful
|
||||
* than good on those platforms.
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
/**
|
||||
* GEN:BUG:1806565034:
|
||||
*
|
||||
* Gen12+ allows to set RENDER_SURFACE_STATE::SurfaceArray to 1 only if
|
||||
* Gfx12+ allows to set RENDER_SURFACE_STATE::SurfaceArray to 1 only if
|
||||
* array_len > 1. Setting RENDER_SURFACE_STATE::SurfaceArray to 0 results in
|
||||
* the HW RESINFO message to report an array size of 0 which breaks texture
|
||||
* array size queries.
|
||||
|
|
|
@ -137,7 +137,7 @@ image_address(nir_builder *b, const struct gen_device_info *devinfo,
|
|||
* by passing in the miplevel as tile.z for 3-D textures and 0 in
|
||||
* tile.z for 2-D array textures.
|
||||
*
|
||||
* See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
|
||||
* See Volume 1 Part 1 of the Gfx7 PRM, sections 6.18.4.7 "Surface
|
||||
* Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
|
||||
* of the hardware 3D texture and 2D array layouts.
|
||||
*/
|
||||
|
@ -410,7 +410,7 @@ lower_image_load_instr(nir_builder *b,
|
|||
nir_ssa_def *do_load = image_coord_is_in_bounds(b, deref, coord);
|
||||
if (devinfo->ver == 7 && !devinfo->is_haswell) {
|
||||
/* Check whether the first stride component (i.e. the Bpp value)
|
||||
* is greater than four, what on Gen7 indicates that a surface of
|
||||
* is greater than four, what on Gfx7 indicates that a surface of
|
||||
* type RAW has been bound for untyped access. Reading or writing
|
||||
* to a surface of type other than RAW using untyped surface
|
||||
* messages causes a hang on IVB and VLV.
|
||||
|
@ -558,7 +558,7 @@ lower_image_store_instr(nir_builder *b,
|
|||
nir_ssa_def *do_store = image_coord_is_in_bounds(b, deref, coord);
|
||||
if (devinfo->ver == 7 && !devinfo->is_haswell) {
|
||||
/* Check whether the first stride component (i.e. the Bpp value)
|
||||
* is greater than four, what on Gen7 indicates that a surface of
|
||||
* is greater than four, what on Gfx7 indicates that a surface of
|
||||
* type RAW has been bound for untyped access. Reading or writing
|
||||
* to a surface of type other than RAW using untyped surface
|
||||
* messages causes a hang on IVB and VLV.
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "brw_nir.h"
|
||||
|
||||
/**
|
||||
* Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8).
|
||||
* Implements the WaPreventHSTessLevelsInterference workaround (for Gfx7-8).
|
||||
*
|
||||
* From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
|
||||
* definition of the patch header layouts):
|
||||
|
|
|
@ -917,8 +917,8 @@ brw_flag_subreg(unsigned subreg)
|
|||
}
|
||||
|
||||
/**
|
||||
* Return the mask register present in Gen4-5, or the related register present
|
||||
* in Gen7.5 and later hardware referred to as "channel enable" register in
|
||||
* Return the mask register present in Gfx4-5, or the related register present
|
||||
* in Gfx7.5 and later hardware referred to as "channel enable" register in
|
||||
* the documentation.
|
||||
*/
|
||||
static inline struct brw_reg
|
||||
|
|
|
@ -929,8 +929,8 @@ schedule_node::schedule_node(backend_instruction *inst,
|
|||
this->delay = 0;
|
||||
this->exit = NULL;
|
||||
|
||||
/* We can't measure Gen6 timings directly but expect them to be much
|
||||
* closer to Gen7 than Gen4.
|
||||
/* We can't measure Gfx6 timings directly but expect them to be much
|
||||
* closer to Gfx7 than Gfx4.
|
||||
*/
|
||||
if (!sched->post_reg_alloc)
|
||||
this->latency = 1;
|
||||
|
@ -1783,7 +1783,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
|
|||
}
|
||||
cand_generation++;
|
||||
|
||||
/* Shared resource: the mathbox. There's one mathbox per EU on Gen6+
|
||||
/* Shared resource: the mathbox. There's one mathbox per EU on Gfx6+
|
||||
* but it's more limited pre-gfx6, so if we send something off to it then
|
||||
* the next math instruction isn't going to make progress until the first
|
||||
* is done.
|
||||
|
|
|
@ -165,13 +165,13 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
|||
{
|
||||
switch (op) {
|
||||
case 0 ... NUM_BRW_OPCODES - 1:
|
||||
/* The DO instruction doesn't exist on Gen6+, but we use it to mark the
|
||||
/* The DO instruction doesn't exist on Gfx6+, but we use it to mark the
|
||||
* start of a loop in the IR.
|
||||
*/
|
||||
if (devinfo->ver >= 6 && op == BRW_OPCODE_DO)
|
||||
return "do";
|
||||
|
||||
/* The following conversion opcodes doesn't exist on Gen8+, but we use
|
||||
/* The following conversion opcodes doesn't exist on Gfx8+, but we use
|
||||
* then to mark that we want to do the conversion.
|
||||
*/
|
||||
if (devinfo->ver > 7 && op == BRW_OPCODE_F32TO16)
|
||||
|
|
|
@ -293,7 +293,7 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo)
|
|||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
return false;
|
||||
default:
|
||||
/* The MATH instruction on Gen6 only executes in align1 mode, which does
|
||||
/* The MATH instruction on Gfx6 only executes in align1 mode, which does
|
||||
* not support writemasking.
|
||||
*/
|
||||
if (devinfo->ver == 6 && is_math())
|
||||
|
@ -1129,7 +1129,7 @@ vec4_instruction::can_reswizzle(const struct gen_device_info *devinfo,
|
|||
int swizzle,
|
||||
int swizzle_mask)
|
||||
{
|
||||
/* Gen6 MATH instructions can not execute in align16 mode, so swizzles
|
||||
/* Gfx6 MATH instructions can not execute in align16 mode, so swizzles
|
||||
* are not allowed.
|
||||
*/
|
||||
if (devinfo->ver == 6 && is_math() && swizzle != BRW_SWIZZLE_XYZW)
|
||||
|
@ -1871,7 +1871,7 @@ vec4_visitor::lower_minmax()
|
|||
inst->predicate == BRW_PREDICATE_NONE) {
|
||||
/* If src1 is an immediate value that is not NaN, then it can't be
|
||||
* NaN. In that case, emit CMP because it is much better for cmod
|
||||
* propagation. Likewise if src1 is not float. Gen4 and Gen5 don't
|
||||
* propagation. Likewise if src1 is not float. Gfx4 and Gfx5 don't
|
||||
* support HF or DF, so it is not necessary to check for those.
|
||||
*/
|
||||
if (inst->src[1].type != BRW_REGISTER_TYPE_F ||
|
||||
|
@ -2380,7 +2380,7 @@ scalarize_predicate(brw_predicate predicate, unsigned writemask)
|
|||
}
|
||||
}
|
||||
|
||||
/* Gen7 has a hardware decompression bug that we can exploit to represent
|
||||
/* Gfx7 has a hardware decompression bug that we can exploit to represent
|
||||
* handful of additional swizzles natively.
|
||||
*/
|
||||
static bool
|
||||
|
|
|
@ -495,7 +495,7 @@ namespace brw {
|
|||
}
|
||||
|
||||
/**
|
||||
* Gen4 predicated IF.
|
||||
* Gfx4 predicated IF.
|
||||
*/
|
||||
instruction *
|
||||
IF(brw_predicate predicate) const
|
||||
|
@ -504,7 +504,7 @@ namespace brw {
|
|||
}
|
||||
|
||||
/**
|
||||
* Gen6 IF with embedded comparison.
|
||||
* Gfx6 IF with embedded comparison.
|
||||
*/
|
||||
instruction *
|
||||
IF(const src_reg &src0, const src_reg &src1,
|
||||
|
|
|
@ -129,7 +129,7 @@ generate_tex(struct brw_codegen *p,
|
|||
break;
|
||||
case SHADER_OPCODE_TXD:
|
||||
if (inst->shadow_compare) {
|
||||
/* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */
|
||||
/* Gfx7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */
|
||||
assert(devinfo->is_haswell);
|
||||
msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
|
||||
} else {
|
||||
|
@ -1069,7 +1069,7 @@ generate_tcs_create_barrier_header(struct brw_codegen *p,
|
|||
/* Zero the message header */
|
||||
brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
|
||||
|
||||
/* Copy "Barrier ID" from r0.2, bits 16:13 (Gen7.5+) or 15:12 (Gen7) */
|
||||
/* Copy "Barrier ID" from r0.2, bits 16:13 (Gfx7.5+) or 15:12 (Gfx7) */
|
||||
brw_AND(p, m0_2,
|
||||
retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(ivb ? INTEL_MASK(15, 12) : INTEL_MASK(16, 13)));
|
||||
|
|
|
@ -604,7 +604,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||
|
||||
/* The GLSL linker will have already matched up GS inputs and the outputs
|
||||
* of prior stages. The driver does extend VS outputs in some cases, but
|
||||
* only for legacy OpenGL or Gen4-5 hardware, neither of which offer
|
||||
* only for legacy OpenGL or Gfx4-5 hardware, neither of which offer
|
||||
* geometry shader support. So we can safely ignore that.
|
||||
*
|
||||
* For SSO pipelines, we use a fixed VUE map layout based on variable
|
||||
|
@ -915,7 +915,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||
* the best choice for performance, followed by SINGLE mode."
|
||||
*
|
||||
* So SINGLE mode is more performant when invocations == 1 and DUAL_INSTANCE
|
||||
* mode is more performant when invocations > 1. Gen6 only supports
|
||||
* mode is more performant when invocations > 1. Gfx6 only supports
|
||||
* SINGLE mode.
|
||||
*/
|
||||
if (prog_data->invocations <= 1 || compiler->devinfo->ver < 7)
|
||||
|
|
|
@ -189,7 +189,7 @@ ALU2_ACC(SUBB)
|
|||
ALU2(MAC)
|
||||
ALU1(DIM)
|
||||
|
||||
/** Gen4 predicated IF. */
|
||||
/** Gfx4 predicated IF. */
|
||||
vec4_instruction *
|
||||
vec4_visitor::IF(enum brw_predicate predicate)
|
||||
{
|
||||
|
@ -201,7 +201,7 @@ vec4_visitor::IF(enum brw_predicate predicate)
|
|||
return inst;
|
||||
}
|
||||
|
||||
/** Gen6 IF with embedded comparison. */
|
||||
/** Gfx6 IF with embedded comparison. */
|
||||
vec4_instruction *
|
||||
vec4_visitor::IF(src_reg src0, src_reg src1,
|
||||
enum brw_conditional_mod condition)
|
||||
|
@ -340,7 +340,7 @@ vec4_visitor::emit_math(enum opcode opcode,
|
|||
emit(opcode, dst, fix_math_operand(src0), fix_math_operand(src1));
|
||||
|
||||
if (devinfo->ver == 6 && dst.writemask != WRITEMASK_XYZW) {
|
||||
/* MATH on Gen6 must be align1, so we can't do writemasks. */
|
||||
/* MATH on Gfx6 must be align1, so we can't do writemasks. */
|
||||
math->dst = dst_reg(this, glsl_type::vec4_type);
|
||||
math->dst.type = dst.type;
|
||||
math = emit(MOV(dst, src_reg(math->dst)));
|
||||
|
@ -872,7 +872,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
|
|||
inst->offset = constant_offset;
|
||||
|
||||
/* The message header is necessary for:
|
||||
* - Gen4 (always)
|
||||
* - Gfx4 (always)
|
||||
* - Texel offsets
|
||||
* - Gather channel selection
|
||||
* - Sampler indices too large to fit in a 4-bit value.
|
||||
|
@ -1000,7 +1000,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
|
|||
* spec requires layers.
|
||||
*/
|
||||
if (op == ir_txs && devinfo->ver < 7) {
|
||||
/* Gen4-6 return 0 instead of 1 for single layer surfaces. */
|
||||
/* Gfx4-6 return 0 instead of 1 for single layer surfaces. */
|
||||
emit_minmax(BRW_CONDITIONAL_GE, writemask(inst->dst, WRITEMASK_Z),
|
||||
src_reg(inst->dst), brw_imm_d(1));
|
||||
}
|
||||
|
@ -1019,7 +1019,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
|
|||
}
|
||||
|
||||
/**
|
||||
* Apply workarounds for Gen6 gather with UINT/SINT
|
||||
* Apply workarounds for Gfx6 gather with UINT/SINT
|
||||
*/
|
||||
void
|
||||
vec4_visitor::emit_gfx6_gather_wa(uint8_t wa, dst_reg dst)
|
||||
|
|
|
@ -119,7 +119,7 @@ brw_compute_vue_map(const struct gen_device_info *devinfo,
|
|||
* dword 8-11 is the first vertex data.
|
||||
*
|
||||
* On Ironlake the VUE header is nominally 20 dwords, but the hardware
|
||||
* will accept the same header layout as Gen4 [and should be a bit faster]
|
||||
* will accept the same header layout as Gfx4 [and should be a bit faster]
|
||||
*/
|
||||
assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
|
||||
assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC, slot++);
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
/**
|
||||
* \file gfx6_gs_visitor.cpp
|
||||
*
|
||||
* Gen6 geometry shader implementation
|
||||
* Gfx6 geometry shader implementation
|
||||
*/
|
||||
|
||||
#include "gfx6_gs_visitor.h"
|
||||
|
@ -39,7 +39,7 @@ gfx6_gs_visitor::emit_prolog()
|
|||
{
|
||||
vec4_gs_visitor::emit_prolog();
|
||||
|
||||
/* Gen6 geometry shaders require to allocate an initial VUE handle via
|
||||
/* Gfx6 geometry shaders require to allocate an initial VUE handle via
|
||||
* FF_SYNC message, however the documentation remarks that only one thread
|
||||
* can write to the URB simultaneously and the FF_SYNC message provides the
|
||||
* synchronization mechanism for this, so using this message effectively
|
||||
|
@ -584,7 +584,7 @@ gfx6_gs_visitor::xfb_write()
|
|||
num_verts = 3;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unexpected primitive type in Gen6 SOL program.");
|
||||
unreachable("Unexpected primitive type in Gfx6 SOL program.");
|
||||
}
|
||||
|
||||
this->current_annotation = "gfx6 thread end: svb writes init";
|
||||
|
|
|
@ -294,7 +294,7 @@ run_tests(const struct gen_device_info *devinfo)
|
|||
continue;
|
||||
|
||||
for (int align_16 = 0; align_16 <= 1; align_16++) {
|
||||
/* Align16 support is not present on Gen11+ */
|
||||
/* Align16 support is not present on Gfx11+ */
|
||||
if (devinfo->ver >= 11 && align_16)
|
||||
continue;
|
||||
|
||||
|
|
|
@ -167,7 +167,7 @@ TEST_P(validation_test, math_src1_null_reg)
|
|||
gfx6_math(p, g0, BRW_MATH_FUNCTION_POW, g0, null);
|
||||
EXPECT_FALSE(validate(p));
|
||||
} else {
|
||||
/* Math instructions on Gen4/5 are actually SEND messages with payloads.
|
||||
/* Math instructions on Gfx4/5 are actually SEND messages with payloads.
|
||||
* src1 is an immediate message descriptor set by gfx4_math.
|
||||
*/
|
||||
}
|
||||
|
@ -178,7 +178,7 @@ TEST_P(validation_test, opcode46)
|
|||
/* opcode 46 is "push" on Gen 4 and 5
|
||||
* "fork" on Gen 6
|
||||
* reserved on Gen 7
|
||||
* "goto" on Gen8+
|
||||
* "goto" on Gfx8+
|
||||
*/
|
||||
brw_next_insn(p, brw_opcode_decode(&devinfo, 46));
|
||||
|
||||
|
@ -231,7 +231,7 @@ TEST_P(validation_test, invalid_exec_size_encoding)
|
|||
|
||||
TEST_P(validation_test, invalid_file_encoding)
|
||||
{
|
||||
/* Register file on Gen12 is only one bit */
|
||||
/* Register file on Gfx12 is only one bit */
|
||||
if (devinfo.ver >= 12)
|
||||
return;
|
||||
|
||||
|
@ -373,7 +373,7 @@ TEST_P(validation_test, invalid_type_encoding)
|
|||
|
||||
TEST_P(validation_test, invalid_type_encoding_3src_a16)
|
||||
{
|
||||
/* 3-src instructions in align16 mode only supported on Gen6-10 */
|
||||
/* 3-src instructions in align16 mode only supported on Gfx6-10 */
|
||||
if (devinfo.ver < 6 || devinfo.ver > 10)
|
||||
return;
|
||||
|
||||
|
@ -453,7 +453,7 @@ TEST_P(validation_test, invalid_type_encoding_3src_a16)
|
|||
|
||||
TEST_P(validation_test, invalid_type_encoding_3src_a1)
|
||||
{
|
||||
/* 3-src instructions in align1 mode only supported on Gen10+ */
|
||||
/* 3-src instructions in align1 mode only supported on Gfx10+ */
|
||||
if (devinfo.ver < 10)
|
||||
return;
|
||||
|
||||
|
@ -482,7 +482,7 @@ TEST_P(validation_test, invalid_type_encoding_3src_a1)
|
|||
{ BRW_REGISTER_TYPE_UW, E(INT), true },
|
||||
|
||||
/* There are no ternary instructions that can operate on B-type sources
|
||||
* on Gen11-12. Src1/Src2 cannot be B-typed either.
|
||||
* on Gfx11-12. Src1/Src2 cannot be B-typed either.
|
||||
*/
|
||||
{ BRW_REGISTER_TYPE_B, E(INT), false },
|
||||
{ BRW_REGISTER_TYPE_UB, E(INT), false },
|
||||
|
@ -551,11 +551,11 @@ TEST_P(validation_test, invalid_type_encoding_3src_a1)
|
|||
|
||||
TEST_P(validation_test, 3src_inst_access_mode)
|
||||
{
|
||||
/* 3-src instructions only supported on Gen6+ */
|
||||
/* 3-src instructions only supported on Gfx6+ */
|
||||
if (devinfo.ver < 6)
|
||||
return;
|
||||
|
||||
/* No access mode bit on Gen12+ */
|
||||
/* No access mode bit on Gfx12+ */
|
||||
if (devinfo.ver >= 12)
|
||||
return;
|
||||
|
||||
|
@ -750,7 +750,7 @@ TEST_P(validation_test, dst_horizontal_stride_0)
|
|||
|
||||
clear_instructions(p);
|
||||
|
||||
/* Align16 does not exist on Gen11+ */
|
||||
/* Align16 does not exist on Gfx11+ */
|
||||
if (devinfo.ver >= 11)
|
||||
return;
|
||||
|
||||
|
@ -801,7 +801,7 @@ TEST_P(validation_test, must_not_cross_grf_boundary_in_a_width)
|
|||
/* Destination Horizontal must be 1 in Align16 */
|
||||
TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
|
||||
{
|
||||
/* Align16 does not exist on Gen11+ */
|
||||
/* Align16 does not exist on Gfx11+ */
|
||||
if (devinfo.ver >= 11)
|
||||
return;
|
||||
|
||||
|
@ -823,7 +823,7 @@ TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
|
|||
/* VertStride must be 0 or 4 in Align16 */
|
||||
TEST_P(validation_test, vstride_on_align16_must_be_0_or_4)
|
||||
{
|
||||
/* Align16 does not exist on Gen11+ */
|
||||
/* Align16 does not exist on Gfx11+ */
|
||||
if (devinfo.ver >= 11)
|
||||
return;
|
||||
|
||||
|
@ -2078,7 +2078,7 @@ TEST_P(validation_test, vector_immediate_destination_alignment)
|
|||
};
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
|
||||
/* UV type is Gen6+ */
|
||||
/* UV type is Gfx6+ */
|
||||
if (devinfo.ver < 6 &&
|
||||
move[i].src_type == BRW_REGISTER_TYPE_UV)
|
||||
continue;
|
||||
|
@ -2120,7 +2120,7 @@ TEST_P(validation_test, vector_immediate_destination_stride)
|
|||
};
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
|
||||
/* UV type is Gen6+ */
|
||||
/* UV type is Gfx6+ */
|
||||
if (devinfo.ver < 6 &&
|
||||
move[i].src_type == BRW_REGISTER_TYPE_UV)
|
||||
continue;
|
||||
|
@ -2271,11 +2271,11 @@ TEST_P(validation_test, qword_low_power_align1_regioning_restrictions)
|
|||
#undef INST
|
||||
};
|
||||
|
||||
/* These restrictions only apply to Gen8+ */
|
||||
/* These restrictions only apply to Gfx8+ */
|
||||
if (devinfo.ver < 8)
|
||||
return;
|
||||
|
||||
/* NoDDChk/NoDDClr does not exist on Gen12+ */
|
||||
/* NoDDChk/NoDDClr does not exist on Gfx12+ */
|
||||
if (devinfo.ver >= 12)
|
||||
return;
|
||||
|
||||
|
@ -2407,7 +2407,7 @@ TEST_P(validation_test, qword_low_power_no_indirect_addressing)
|
|||
#undef INST
|
||||
};
|
||||
|
||||
/* These restrictions only apply to Gen8+ */
|
||||
/* These restrictions only apply to Gfx8+ */
|
||||
if (devinfo.ver < 8)
|
||||
return;
|
||||
|
||||
|
@ -2555,7 +2555,7 @@ TEST_P(validation_test, qword_low_power_no_64bit_arf)
|
|||
#undef INST
|
||||
};
|
||||
|
||||
/* These restrictions only apply to Gen8+ */
|
||||
/* These restrictions only apply to Gfx8+ */
|
||||
if (devinfo.ver < 8)
|
||||
return;
|
||||
|
||||
|
@ -2660,11 +2660,11 @@ TEST_P(validation_test, align16_64_bit_integer)
|
|||
#undef INST
|
||||
};
|
||||
|
||||
/* 64-bit integer types exist on Gen8+ */
|
||||
/* 64-bit integer types exist on Gfx8+ */
|
||||
if (devinfo.ver < 8)
|
||||
return;
|
||||
|
||||
/* Align16 does not exist on Gen11+ */
|
||||
/* Align16 does not exist on Gfx11+ */
|
||||
if (devinfo.ver >= 11)
|
||||
return;
|
||||
|
||||
|
@ -2768,11 +2768,11 @@ TEST_P(validation_test, qword_low_power_no_depctrl)
|
|||
#undef INST
|
||||
};
|
||||
|
||||
/* These restrictions only apply to Gen8+ */
|
||||
/* These restrictions only apply to Gfx8+ */
|
||||
if (devinfo.ver < 8)
|
||||
return;
|
||||
|
||||
/* NoDDChk/NoDDClr does not exist on Gen12+ */
|
||||
/* NoDDChk/NoDDClr does not exist on Gfx12+ */
|
||||
if (devinfo.ver >= 12)
|
||||
return;
|
||||
|
||||
|
|
|
@ -439,7 +439,7 @@ static const struct gen_device_info gen_device_info_bdw_gt1 = {
|
|||
[MESA_SHADER_VERTEX] = 2560,
|
||||
[MESA_SHADER_TESS_CTRL] = 504,
|
||||
[MESA_SHADER_TESS_EVAL] = 1536,
|
||||
/* Reduced from 960, seems to be similar to the bug on Gen9 GT1. */
|
||||
/* Reduced from 960, seems to be similar to the bug on Gfx9 GT1. */
|
||||
[MESA_SHADER_GEOMETRY] = 690,
|
||||
},
|
||||
},
|
||||
|
@ -1252,7 +1252,7 @@ gen_get_device_info_from_pci_id(int pci_id,
|
|||
* allocate scratch space enough so that each slice has 4 slices allowed."
|
||||
*
|
||||
* The equivalent internal documentation says that this programming note
|
||||
* applies to all Gen9+ platforms.
|
||||
* applies to all Gfx9+ platforms.
|
||||
*
|
||||
* The hardware typically calculates the scratch space pointer by taking
|
||||
* the base address, and adding per-thread-scratch-space * thread ID.
|
||||
|
@ -1321,7 +1321,7 @@ getparam_topology(struct gen_device_info *devinfo, int fd)
|
|||
return update_from_masks(devinfo, slice_mask, subslice_mask, n_eus);
|
||||
|
||||
maybe_warn:
|
||||
/* Only with Gen8+ are we starting to see devices with fusing that can only
|
||||
/* Only with Gfx8+ are we starting to see devices with fusing that can only
|
||||
* be detected at runtime.
|
||||
*/
|
||||
if (devinfo->ver >= 8)
|
||||
|
@ -1445,7 +1445,7 @@ gen_get_device_info_from_fd(int fd, struct gen_device_info *devinfo)
|
|||
}
|
||||
|
||||
if (devinfo->ver == 10) {
|
||||
mesa_loge("Gen10 support is redacted.");
|
||||
mesa_loge("Gfx10 support is redacted.");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -195,7 +195,7 @@ struct gen_device_info
|
|||
* automatically scale pixel shader thread count, based on a single value
|
||||
* programmed into 3DSTATE_PS.
|
||||
*
|
||||
* To calculate the maximum number of threads for Gen8 beyond (which have
|
||||
* To calculate the maximum number of threads for Gfx8 beyond (which have
|
||||
* multiple Pixel Shader Dispatchers):
|
||||
*
|
||||
* - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD"
|
||||
|
@ -216,10 +216,10 @@ struct gen_device_info
|
|||
/**
|
||||
* Fixed size of the URB.
|
||||
*
|
||||
* On Gen6 and DG1, this is measured in KB. Gen4-5 instead measure
|
||||
* On Gfx6 and DG1, this is measured in KB. Gfx4-5 instead measure
|
||||
* this in 512b blocks, as that's more convenient there.
|
||||
*
|
||||
* On most Gen7+ platforms, the URB is a section of the L3 cache,
|
||||
* On most Gfx7+ platforms, the URB is a section of the L3 cache,
|
||||
* and can be resized based on the L3 programming. For those platforms,
|
||||
* simply leave this field blank (zero) - it isn't used.
|
||||
*/
|
||||
|
@ -247,7 +247,7 @@ struct gen_device_info
|
|||
* could be assumed to be 12.5MHz, where the least significant bit neatly
|
||||
* corresponded to 80 nanoseconds.
|
||||
*
|
||||
* Since Gen9 the numbers aren't so round, with a a frequency of 12MHz for
|
||||
* Since Gfx9 the numbers aren't so round, with a a frequency of 12MHz for
|
||||
* SKL (or scale factor of 83.33333333) and a frequency of 19200000Hz for
|
||||
* BXT.
|
||||
*
|
||||
|
|
|
@ -711,8 +711,8 @@
|
|||
<field name="Height" start="80" end="93" type="uint"/>
|
||||
<field name="Surface Pitch" start="96" end="113" type="uint"/>
|
||||
<field name="Tile Address Mapping Mode" start="116" end="116" type="uint">
|
||||
<value name="Gen9" value="0"/>
|
||||
<value name="Gen10+" value="1"/>
|
||||
<value name="Gfx9" value="0"/>
|
||||
<value name="Gfx10+" value="1"/>
|
||||
</field>
|
||||
<field name="Depth" start="117" end="127" type="uint"/>
|
||||
<field name="Multisample Position Palette Index" start="128" end="130" type="uint"/>
|
||||
|
|
|
@ -733,8 +733,8 @@
|
|||
<field name="Null Probing Enable" start="114" end="114" type="uint"/>
|
||||
<field name="Standard Tiling Mode Extensions" start="115" end="115" type="uint"/>
|
||||
<field name="Tile Address Mapping Mode" start="116" end="116" type="uint">
|
||||
<value name="Gen9" value="0"/>
|
||||
<value name="Gen10+" value="1"/>
|
||||
<value name="Gfx9" value="0"/>
|
||||
<value name="Gfx10+" value="1"/>
|
||||
</field>
|
||||
<field name="Depth" start="117" end="127" type="uint"/>
|
||||
<field name="Multisample Position Palette Index" start="128" end="130" type="uint"/>
|
||||
|
|
|
@ -748,8 +748,8 @@
|
|||
<field name="Null Probing Enable" start="114" end="114" type="uint"/>
|
||||
<field name="Standard Tiling Mode Extensions" start="115" end="115" type="uint"/>
|
||||
<field name="Tile Address Mapping Mode" start="116" end="116" type="uint">
|
||||
<value name="Gen9" value="0"/>
|
||||
<value name="Gen10+" value="1"/>
|
||||
<value name="Gfx9" value="0"/>
|
||||
<value name="Gfx10+" value="1"/>
|
||||
</field>
|
||||
<field name="Depth" start="117" end="127" type="uint"/>
|
||||
<field name="Multisample Position Palette Index" start="128" end="130" type="uint"/>
|
||||
|
|
|
@ -70,7 +70,7 @@ Definitions
|
|||
the value of s0.
|
||||
|
||||
For example, the logical array length of a 3D surface is always 1, even on
|
||||
Gen9 where the surface's memory layout is that of an array surface
|
||||
Gfx9 where the surface's memory layout is that of an array surface
|
||||
(ISL_DIM_LAYOUT_GEN4_2D).
|
||||
|
||||
- Physical Surface Samples (sa):
|
||||
|
|
|
@ -193,7 +193,7 @@ isl_device_init(struct isl_device *dev,
|
|||
const struct gen_device_info *info,
|
||||
bool has_bit6_swizzling)
|
||||
{
|
||||
/* Gen8+ don't have bit6 swizzling, ensure callsite is not confused. */
|
||||
/* Gfx8+ don't have bit6 swizzling, ensure callsite is not confused. */
|
||||
assert(!(has_bit6_swizzling && info->ver >= 8));
|
||||
|
||||
dev->info = info;
|
||||
|
@ -411,8 +411,8 @@ isl_tiling_get_info(enum isl_tiling tiling,
|
|||
break;
|
||||
|
||||
case ISL_TILING_GEN12_CCS:
|
||||
/* From the Bspec, Gen Graphics > Gen12 > Memory Data Formats > Memory
|
||||
* Compression > Memory Compression - Gen12:
|
||||
/* From the Bspec, Gen Graphics > Gfx12 > Memory Data Formats > Memory
|
||||
* Compression > Memory Compression - Gfx12:
|
||||
*
|
||||
* 4 bits of auxiliary plane data are required for 2 cachelines of
|
||||
* main surface data. This results in a single cacheline of auxiliary
|
||||
|
@ -1071,7 +1071,7 @@ isl_calc_array_pitch_el_rows_gfx4_2d(
|
|||
* but the second restriction, which is an extension of the first, only
|
||||
* applies to qpitch and must be applied here.
|
||||
*
|
||||
* The second restriction disappears on Gen12.
|
||||
* The second restriction disappears on Gfx12.
|
||||
*/
|
||||
assert(fmtl->bh == 4);
|
||||
pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
|
||||
|
@ -1413,7 +1413,7 @@ isl_calc_row_pitch_alignment(const struct isl_device *dev,
|
|||
const struct isl_tile_info *tile_info)
|
||||
{
|
||||
if (tile_info->tiling != ISL_TILING_LINEAR) {
|
||||
/* According to BSpec: 44930, Gen12's CCS-compressed surface pitches must
|
||||
/* According to BSpec: 44930, Gfx12's CCS-compressed surface pitches must
|
||||
* be 512B-aligned. CCS is only support on Y tilings.
|
||||
*
|
||||
* Only consider 512B alignment when :
|
||||
|
@ -1502,7 +1502,7 @@ isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
|
|||
tile_info->logical_extent_el.width);
|
||||
|
||||
/* In some cases the alignment of the pitch might be > to the tile size
|
||||
* (for example Gen12 CCS requires 512B alignment while the tile's width
|
||||
* (for example Gfx12 CCS requires 512B alignment while the tile's width
|
||||
* can be 128B), so align the row pitch to the alignment.
|
||||
*/
|
||||
assert(alignment_B >= tile_info->phys_extent_B.width);
|
||||
|
@ -1706,7 +1706,7 @@ isl_surf_init_s(const struct isl_device *dev,
|
|||
assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B));
|
||||
base_alignment_B = MAX(info->min_alignment_B, tile_size_B);
|
||||
|
||||
/* The diagram in the Bspec section Memory Compression - Gen12, shows
|
||||
/* The diagram in the Bspec section Memory Compression - Gfx12, shows
|
||||
* that the CCS is indexed in 256B chunks. However, the
|
||||
* PLANE_AUX_DIST::Auxiliary Surface Distance field is in units of 4K
|
||||
* pages. We currently don't assign the usage field like we do for main
|
||||
|
@ -1715,7 +1715,7 @@ isl_surf_init_s(const struct isl_device *dev,
|
|||
if (tiling == ISL_TILING_GEN12_CCS)
|
||||
base_alignment_B = MAX(base_alignment_B, 4096);
|
||||
|
||||
/* Gen12+ requires that images be 64K-aligned if they're going to used
|
||||
/* Gfx12+ requires that images be 64K-aligned if they're going to used
|
||||
* with CCS. This is because the Aux translation table maps main
|
||||
* surface addresses to aux addresses at a 64K (in the main surface)
|
||||
* granularity. Because we don't know for sure in ISL if a surface will
|
||||
|
@ -1967,7 +1967,7 @@ bool
|
|||
isl_surf_supports_ccs(const struct isl_device *dev,
|
||||
const struct isl_surf *surf)
|
||||
{
|
||||
/* CCS support does not exist prior to Gen7 */
|
||||
/* CCS support does not exist prior to Gfx7 */
|
||||
if (ISL_GFX_VER(dev) <= 6)
|
||||
return false;
|
||||
|
||||
|
@ -1991,13 +1991,13 @@ isl_surf_supports_ccs(const struct isl_device *dev,
|
|||
* - MCS and Lossless compression is supported for
|
||||
* TiledY/TileYs/TileYf non-MSRTs only.
|
||||
*
|
||||
* From the BSpec (44930) for Gen12:
|
||||
* From the BSpec (44930) for Gfx12:
|
||||
*
|
||||
* Linear CCS is only allowed for Untyped Buffers but only via HDC
|
||||
* Data-Port messages.
|
||||
*
|
||||
* We never use untyped messages on surfaces created by ISL on Gen9+ so
|
||||
* this means linear is out on Gen12+ as well.
|
||||
* We never use untyped messages on surfaces created by ISL on Gfx9+ so
|
||||
* this means linear is out on Gfx12+ as well.
|
||||
*/
|
||||
if (surf->tiling == ISL_TILING_LINEAR)
|
||||
return false;
|
||||
|
@ -2006,7 +2006,7 @@ isl_surf_supports_ccs(const struct isl_device *dev,
|
|||
if (isl_surf_usage_is_stencil(surf->usage) && surf->samples > 1)
|
||||
return false;
|
||||
|
||||
/* On Gen12, all CCS-compressed surface pitches must be multiples of
|
||||
/* On Gfx12, all CCS-compressed surface pitches must be multiples of
|
||||
* 512B.
|
||||
*/
|
||||
if (surf->row_pitch_B % 512 != 0)
|
||||
|
@ -2042,7 +2042,7 @@ isl_surf_supports_ccs(const struct isl_device *dev,
|
|||
if (surf->samples > 1)
|
||||
return false;
|
||||
|
||||
/* CCS is only for color images on Gen7-11 */
|
||||
/* CCS is only for color images on Gfx7-11 */
|
||||
if (isl_surf_usage_is_depth_or_stencil(surf->usage))
|
||||
return false;
|
||||
|
||||
|
@ -2127,7 +2127,7 @@ isl_surf_get_ccs_surf(const struct isl_device *dev,
|
|||
return false;
|
||||
}
|
||||
|
||||
/* On Gen12, the CCS is a scaled-down version of the main surface. We
|
||||
/* On Gfx12, the CCS is a scaled-down version of the main surface. We
|
||||
* model this as the CCS compressing a 2D-view of the entire surface.
|
||||
*/
|
||||
struct isl_surf *ccs_surf =
|
||||
|
|
|
@ -478,7 +478,7 @@ enum isl_tiling {
|
|||
ISL_TILING_Ys, /**< Standard 64K tiling. The 's' means "sixty-four". */
|
||||
ISL_TILING_HIZ, /**< Tiling format for HiZ surfaces */
|
||||
ISL_TILING_CCS, /**< Tiling format for CCS surfaces */
|
||||
ISL_TILING_GEN12_CCS, /**< Tiling format for Gen12 CCS surfaces */
|
||||
ISL_TILING_GEN12_CCS, /**< Tiling format for Gfx12 CCS surfaces */
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -619,7 +619,7 @@ enum isl_aux_usage {
|
|||
ISL_AUX_USAGE_CCS_E,
|
||||
|
||||
/** The auxiliary surface provides full lossless color compression on
|
||||
* Gen12.
|
||||
* Gfx12.
|
||||
*
|
||||
* @invariant isl_surf::samples == 1
|
||||
*/
|
||||
|
@ -660,7 +660,7 @@ enum isl_aux_usage {
|
|||
/** The auxiliary surface is an MCS and CCS is also enabled
|
||||
*
|
||||
* In this mode, we have fused MCS+CCS compression where the MCS is used
|
||||
* for fast-clears and "identical samples" compression just like on Gen7-11
|
||||
* for fast-clears and "identical samples" compression just like on Gfx7-11
|
||||
* but each plane is then CCS compressed.
|
||||
*
|
||||
* @invariant isl_surf::samples > 1
|
||||
|
|
|
@ -143,7 +143,7 @@ isl_drm_modifier_get_score(const struct gen_device_info *devinfo,
|
|||
case I915_FORMAT_MOD_Y_TILED:
|
||||
return 3;
|
||||
case I915_FORMAT_MOD_Y_TILED_CCS:
|
||||
/* Gen12's CCS layout differs from Gen9-11. */
|
||||
/* Gfx12's CCS layout differs from Gfx9-11. */
|
||||
if (devinfo->ver >= 12)
|
||||
return 0;
|
||||
|
||||
|
|
|
@ -928,7 +928,7 @@ isl_formats_are_ccs_e_compatible(const struct gen_device_info *devinfo,
|
|||
!isl_format_supports_ccs_e(devinfo, format2))
|
||||
return false;
|
||||
|
||||
/* Gen12 added CCS_E support for A8_UNORM, A8_UNORM and R8_UNORM share the
|
||||
/* Gfx12 added CCS_E support for A8_UNORM, A8_UNORM and R8_UNORM share the
|
||||
* same aux map format encoding so they are definitely compatible.
|
||||
*/
|
||||
if (format1 == ISL_FORMAT_A8_UNORM)
|
||||
|
|
|
@ -30,7 +30,7 @@ isl_gfx4_choose_msaa_layout(const struct isl_device *dev,
|
|||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout)
|
||||
{
|
||||
/* Gen4 and Gen5 do not support MSAA */
|
||||
/* Gfx4 and Gfx5 do not support MSAA */
|
||||
assert(info->samples >= 1);
|
||||
|
||||
*msaa_layout = ISL_MSAA_LAYOUT_NONE;
|
||||
|
@ -42,7 +42,7 @@ isl_gfx4_filter_tiling(const struct isl_device *dev,
|
|||
const struct isl_surf_init_info *restrict info,
|
||||
isl_tiling_flags_t *flags)
|
||||
{
|
||||
/* Gen4-5 only support linear, X, and Y-tiling. */
|
||||
/* Gfx4-5 only support linear, X, and Y-tiling. */
|
||||
*flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT | ISL_TILING_Y0_BIT);
|
||||
|
||||
if (isl_surf_usage_is_depth_or_stencil(info->usage)) {
|
||||
|
|
|
@ -303,7 +303,7 @@ isl_gfx6_filter_tiling(const struct isl_device *dev,
|
|||
* "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either
|
||||
* TileX or Linear."
|
||||
*
|
||||
* This is necessary all the way back to 965, but is permitted on Gen7+.
|
||||
* This is necessary all the way back to 965, but is permitted on Gfx7+.
|
||||
*/
|
||||
if (ISL_GFX_VER(dev) < 7 && isl_format_get_layout(info->format)->bpb >= 128)
|
||||
*flags &= ~ISL_TILING_Y0_BIT;
|
||||
|
|
|
@ -183,7 +183,7 @@ isl_gfx9_choose_image_alignment_el(const struct isl_device *dev,
|
|||
}
|
||||
|
||||
if (isl_format_is_compressed(info->format)) {
|
||||
/* On Gen9, the meaning of RENDER_SURFACE_STATE's
|
||||
/* On Gfx9, the meaning of RENDER_SURFACE_STATE's
|
||||
* SurfaceHorizontalAlignment and SurfaceVerticalAlignment changed for
|
||||
* compressed formats. They now indicate a multiple of the compression
|
||||
* block. For example, if the compression mode is ETC2 then HALIGN_4
|
||||
|
|
|
@ -302,10 +302,10 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
|
|||
* a few things which back this up:
|
||||
*
|
||||
* 1. The docs are also pretty clear that this bit was added as part
|
||||
* of enabling Gen12 depth/stencil lossless compression.
|
||||
* of enabling Gfx12 depth/stencil lossless compression.
|
||||
*
|
||||
* 2. The only new difference between depth/stencil and color images on
|
||||
* Gen12 (where the bit was added) is how they treat CCS compression.
|
||||
* Gfx12 (where the bit was added) is how they treat CCS compression.
|
||||
* All other differences such as alignment requirements and MSAA layout
|
||||
* are already covered by other bits.
|
||||
*
|
||||
|
@ -626,14 +626,14 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
|
|||
assert(info->aux_usage == ISL_AUX_USAGE_STC_CCS);
|
||||
|
||||
if (isl_aux_usage_has_hiz(info->aux_usage)) {
|
||||
/* For Gen8-10, there are some restrictions around sampling from HiZ.
|
||||
/* For Gfx8-10, there are some restrictions around sampling from HiZ.
|
||||
* The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
|
||||
* say:
|
||||
*
|
||||
* "If this field is set to AUX_HIZ, Number of Multisamples must
|
||||
* be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
|
||||
*
|
||||
* On Gen12, the docs are a bit less obvious but the restriction is
|
||||
* On Gfx12, the docs are a bit less obvious but the restriction is
|
||||
* the same. The limitation isn't called out explicitly but the docs
|
||||
* for the CCS_E value of RENDER_SURFACE_STATE::AuxiliarySurfaceMode
|
||||
* say:
|
||||
|
@ -677,7 +677,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
|
|||
|
||||
/* The auxiliary buffer info is filled when it's useable by the HW.
|
||||
*
|
||||
* Starting with Gen12, the only form of compression that can be used
|
||||
* Starting with Gfx12, the only form of compression that can be used
|
||||
* with RENDER_SURFACE_STATE which requires an aux surface is MCS.
|
||||
* HiZ still requires a surface but the HiZ surface can only be
|
||||
* accessed through 3DSTATE_HIER_DEPTH_BUFFER.
|
||||
|
@ -746,7 +746,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
|
|||
s.ClearValueAddressEnable = true;
|
||||
s.ClearValueAddress = info->clear_address;
|
||||
#else
|
||||
unreachable("Gen9 and earlier do not support indirect clear colors");
|
||||
unreachable("Gfx9 and earlier do not support indirect clear colors");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -755,7 +755,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
|
|||
* From BXML > GT > Shared Functions > vol5c Shared Functions >
|
||||
* [Structure] RENDER_SURFACE_STATE [BDW+] > ClearColorConversionEnable:
|
||||
*
|
||||
* Project: Gen11
|
||||
* Project: Gfx11
|
||||
*
|
||||
* "Enables Pixel backend hw to convert clear values into native format
|
||||
* and write back to clear address, so that display and sampler can use
|
||||
|
|
|
@ -383,8 +383,8 @@ compute_topology_builtins(struct gen_perf_config *perf,
|
|||
|
||||
perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu;
|
||||
|
||||
/* The subslice mask builtin contains bits for all slices. Prior to Gen11
|
||||
* it had groups of 3bits for each slice, on Gen11 it's 8bits for each
|
||||
/* The subslice mask builtin contains bits for all slices. Prior to Gfx11
|
||||
* it had groups of 3bits for each slice, on Gfx11 it's 8bits for each
|
||||
* slice.
|
||||
*
|
||||
* Ideally equations would be updated to have a slice/subslice query
|
||||
|
@ -727,7 +727,7 @@ oa_metrics_available(struct gen_perf_config *perf, int fd,
|
|||
*/
|
||||
if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) {
|
||||
|
||||
/* If _paranoid == 1 then on Gen8+ we won't be able to access OA
|
||||
/* If _paranoid == 1 then on Gfx8+ we won't be able to access OA
|
||||
* metrics unless running as root.
|
||||
*/
|
||||
if (devinfo->is_haswell)
|
||||
|
@ -1023,8 +1023,8 @@ gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
|
|||
* OA_DEBUG_REGISTER is set to 1. This is how the kernel programs this
|
||||
* global register (see drivers/gpu/drm/i915/i915_perf.c)
|
||||
*
|
||||
* Documentation says this should be available on Gen9+ but experimentation
|
||||
* shows that Gen8 reports similar values, so we enable it there too.
|
||||
* Documentation says this should be available on Gfx9+ but experimentation
|
||||
* shows that Gfx8 reports similar values, so we enable it there too.
|
||||
*/
|
||||
if (devinfo->ver < 8)
|
||||
return;
|
||||
|
|
|
@ -106,7 +106,7 @@ struct gen_pipeline_stat {
|
|||
* The largest OA formats we can use include:
|
||||
* For Haswell:
|
||||
* 1 timestamp, 45 A counters, 8 B counters and 8 C counters.
|
||||
* For Gen8+
|
||||
* For Gfx8+
|
||||
* 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
|
||||
*
|
||||
* Plus 2 PERF_CNT registers and 1 RPSTAT register.
|
||||
|
@ -492,7 +492,7 @@ gen_perf_has_hold_preemption(const struct gen_perf_config *perf)
|
|||
}
|
||||
|
||||
/** Whether we have the ability to lock EU array power configuration for the
|
||||
* duration of the performance recording. This is useful on Gen11 where the HW
|
||||
* duration of the performance recording. This is useful on Gfx11 where the HW
|
||||
* architecture requires half the EU for particular workloads.
|
||||
*/
|
||||
static inline bool
|
||||
|
|
|
@ -185,7 +185,7 @@ hw_vars["$EuSlicesTotalCount"] = "perf->sys_vars.n_eu_slices"
|
|||
hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
|
||||
hw_vars["$EuThreadsCount"] = "perf->sys_vars.eu_threads_count"
|
||||
hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask"
|
||||
# subslice_mask is interchangeable with subslice/dual-subslice since Gen12+
|
||||
# subslice_mask is interchangeable with subslice/dual-subslice since Gfx12+
|
||||
# only has dual subslices which can be assimilated with 16EUs subslices.
|
||||
hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask"
|
||||
hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask"
|
||||
|
|
|
@ -124,7 +124,7 @@ struct mdapi_pipeline_metrics {
|
|||
uint64_t HSInvocations;
|
||||
uint64_t DSInvocations;
|
||||
uint64_t CSInvocations;
|
||||
uint64_t Reserved1; /* Gen10+ */
|
||||
uint64_t Reserved1; /* Gfx10+ */
|
||||
};
|
||||
|
||||
int gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
||||
|
|
|
@ -1213,7 +1213,7 @@ oa_report_ctx_id_valid(const struct gen_device_info *devinfo,
|
|||
*
|
||||
* These periodic snapshots help to ensure we handle counter overflow
|
||||
* correctly by being frequent enough to ensure we don't miss multiple
|
||||
* overflows of a counter between snapshots. For Gen8+ the i915 perf
|
||||
* overflows of a counter between snapshots. For Gfx8+ the i915 perf
|
||||
* snapshots provide the extra context-switch reports that let us
|
||||
* subtract out the progress of counters associated with other
|
||||
* contexts running on the system.
|
||||
|
@ -1244,7 +1244,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
|
|||
goto error;
|
||||
}
|
||||
|
||||
/* On Gen12+ OA reports are sourced from per context counters, so we don't
|
||||
/* On Gfx12+ OA reports are sourced from per context counters, so we don't
|
||||
* ever have to look at the global OA buffer. Yey \o/
|
||||
*/
|
||||
if (perf_ctx->devinfo->ver >= 12) {
|
||||
|
@ -1300,7 +1300,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
|
|||
goto end;
|
||||
}
|
||||
|
||||
/* For Gen8+ since the counters continue while other
|
||||
/* For Gfx8+ since the counters continue while other
|
||||
* contexts are running we need to discount any unrelated
|
||||
* deltas. The hardware automatically generates a report
|
||||
* on context switch which gives us a new reference point
|
||||
|
|
|
@ -429,7 +429,7 @@ aub_write_ggtt(struct aub_file *aub, uint64_t virt_addr, uint64_t size, const vo
|
|||
aub_map_ggtt(aub, virt_addr, size);
|
||||
|
||||
/* We write the GGTT buffer through the GGTT aub command rather than the
|
||||
* PHYSICAL aub command. This is because the Gen9 simulator seems to have 2
|
||||
* PHYSICAL aub command. This is because the Gfx9 simulator seems to have 2
|
||||
* different set of memory pools for GGTT and physical (probably someone
|
||||
* didn't really understand the concept?).
|
||||
*/
|
||||
|
|
|
@ -467,7 +467,7 @@ decode_single_ksp(struct aub_viewer_decode_ctx *ctx,
|
|||
const uint32_t *p)
|
||||
{
|
||||
uint64_t ksp = 0;
|
||||
bool is_simd8 = false; /* vertex shaders on Gen8+ only */
|
||||
bool is_simd8 = false; /* vertex shaders on Gfx8+ only */
|
||||
bool is_enabled = true;
|
||||
|
||||
struct intel_field_iterator iter;
|
||||
|
|
|
@ -1450,9 +1450,9 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
|
|||
* According to the other driver team, this applies to compute shaders
|
||||
* as well. This is not currently documented at all.
|
||||
*
|
||||
* This hack is no longer necessary on Gen11+.
|
||||
* This hack is no longer necessary on Gfx11+.
|
||||
*
|
||||
* For, Gen11+, scratch space allocation is based on the number of threads
|
||||
* For, Gfx11+, scratch space allocation is based on the number of threads
|
||||
* in the base configuration.
|
||||
*/
|
||||
if (devinfo->ver == 12)
|
||||
|
@ -1610,7 +1610,7 @@ static uint32_t
|
|||
anv_device_get_bo_align(struct anv_device *device,
|
||||
enum anv_bo_alloc_flags alloc_flags)
|
||||
{
|
||||
/* Gen12 CCS surface addresses need to be 64K aligned. */
|
||||
/* Gfx12 CCS surface addresses need to be 64K aligned. */
|
||||
if (device->info.ver >= 12 && (alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS))
|
||||
return 64 * 1024;
|
||||
|
||||
|
|
|
@ -678,7 +678,7 @@ anv_physical_device_try_create(struct anv_instance *instance,
|
|||
} else if (devinfo.ver == 7 && devinfo.is_baytrail) {
|
||||
mesa_logw("Bay Trail Vulkan support is incomplete");
|
||||
} else if (devinfo.ver >= 8 && devinfo.ver <= 12) {
|
||||
/* Gen8-12 fully supported */
|
||||
/* Gfx8-12 fully supported */
|
||||
} else {
|
||||
result = vk_errorfi(instance, NULL, VK_ERROR_INCOMPATIBLE_DRIVER,
|
||||
"Vulkan not yet supported on %s", device_name);
|
||||
|
@ -861,12 +861,12 @@ anv_physical_device_try_create(struct anv_instance *instance,
|
|||
|
||||
/* Broadwell PRM says:
|
||||
*
|
||||
* "Before Gen8, there was a historical configuration control field to
|
||||
* "Before Gfx8, there was a historical configuration control field to
|
||||
* swizzle address bit[6] for in X/Y tiling modes. This was set in three
|
||||
* different places: TILECTL[1:0], ARB_MODE[5:4], and
|
||||
* DISP_ARB_CTL[14:13].
|
||||
*
|
||||
* For Gen8 and subsequent generations, the swizzle fields are all
|
||||
* For Gfx8 and subsequent generations, the swizzle fields are all
|
||||
* reserved, and the CPU's memory controller performs all address
|
||||
* swizzling modifications."
|
||||
*/
|
||||
|
|
|
@ -568,7 +568,7 @@ add_aux_surface_if_supported(struct anv_device *device,
|
|||
} else if (device->info.ver >= 12) {
|
||||
anv_perf_warn(device, &image->base,
|
||||
"The CCS_D aux mode is not yet handled on "
|
||||
"Gen12+. Not allocating a CCS buffer.");
|
||||
"Gfx12+. Not allocating a CCS buffer.");
|
||||
image->planes[plane].aux_surface.isl.size_B = 0;
|
||||
return VK_SUCCESS;
|
||||
} else {
|
||||
|
|
|
@ -41,7 +41,7 @@ anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
|
|||
device->perf = NULL;
|
||||
|
||||
/* We need self modifying batches. The i915 parser prevents it on
|
||||
* Gen7.5 :( maybe one day.
|
||||
* Gfx7.5 :( maybe one day.
|
||||
*/
|
||||
if (devinfo->ver < 8)
|
||||
return;
|
||||
|
@ -136,8 +136,8 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
|
|||
properties[p++] = true;
|
||||
|
||||
/* If global SSEU is available, pin it to the default. This will ensure on
|
||||
* Gen11 for instance we use the full EU array. Initially when perf was
|
||||
* enabled we would use only half on Gen11 because of functional
|
||||
* Gfx11 for instance we use the full EU array. Initially when perf was
|
||||
* enabled we would use only half on Gfx11 because of functional
|
||||
* requirements.
|
||||
*/
|
||||
if (gen_perf_has_global_sseu(device->physical->perf)) {
|
||||
|
|
|
@ -488,7 +488,7 @@ struct anv_bo {
|
|||
|
||||
/** Size of the implicit CCS range at the end of the buffer
|
||||
*
|
||||
* On Gen12, CCS data is always a direct 1/256 scale-down. A single 64K
|
||||
* On Gfx12, CCS data is always a direct 1/256 scale-down. A single 64K
|
||||
* page of main surface data maps to a 256B chunk of CCS data and that
|
||||
* mapping is provided on TGL-LP by the AUX table which maps virtual memory
|
||||
* addresses in the main surface to virtual memory addresses for CCS data.
|
||||
|
@ -496,7 +496,7 @@ struct anv_bo {
|
|||
* Because we can't change these maps around easily and because Vulkan
|
||||
* allows two VkImages to be bound to overlapping memory regions (as long
|
||||
* as the app is careful), it's not feasible to make this mapping part of
|
||||
* the image. (On Gen11 and earlier, the mapping was provided via
|
||||
* the image. (On Gfx11 and earlier, the mapping was provided via
|
||||
* RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)
|
||||
* Instead, we attach the CCS data directly to the buffer object and setup
|
||||
* the AUX table mapping at BO creation time.
|
||||
|
@ -940,7 +940,7 @@ struct anv_physical_device {
|
|||
/** True if we can read the GPU timestamp register
|
||||
*
|
||||
* When running in a virtual context, the timestamp register is unreadable
|
||||
* on Gen12+.
|
||||
* on Gfx12+.
|
||||
*/
|
||||
bool has_reg_timestamp;
|
||||
|
||||
|
@ -2351,7 +2351,7 @@ enum anv_pipe_bits {
|
|||
*/
|
||||
ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 23),
|
||||
|
||||
/* This bit does not exist directly in PIPE_CONTROL. It means that Gen12
|
||||
/* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
|
||||
* AUX-TT data has changed and we need to invalidate AUX-TT data. This is
|
||||
* done by writing the AUX-TT register.
|
||||
*/
|
||||
|
@ -2359,7 +2359,7 @@ enum anv_pipe_bits {
|
|||
|
||||
/* This bit does not exist directly in PIPE_CONTROL. It means that a
|
||||
* PIPE_CONTROL with a post-sync operation will follow. This is used to
|
||||
* implement a workaround for Gen9.
|
||||
* implement a workaround for Gfx9.
|
||||
*/
|
||||
ANV_PIPE_POST_SYNC_BIT = (1 << 25),
|
||||
};
|
||||
|
@ -2712,7 +2712,7 @@ struct anv_attachment_state {
|
|||
|
||||
/** State tracking for vertex buffer flushes
|
||||
*
|
||||
* On Gen8-9, the VF cache only considers the bottom 32 bits of memory
|
||||
* On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
|
||||
* addresses. If you happen to have two vertex buffers which get placed
|
||||
* exactly 4 GiB apart and use them in back-to-back draw calls, you can get
|
||||
* collisions. In order to solve this problem, we track vertex address ranges
|
||||
|
@ -3935,7 +3935,7 @@ anv_can_sample_with_hiz(const struct gen_device_info * const devinfo,
|
|||
if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
|
||||
return false;
|
||||
|
||||
/* For Gen8-11, there are some restrictions around sampling from HiZ.
|
||||
/* For Gfx8-11, there are some restrictions around sampling from HiZ.
|
||||
* The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
|
||||
* say:
|
||||
*
|
||||
|
@ -4254,7 +4254,7 @@ anv_clear_color_from_att_state(union isl_color_value *clear_color,
|
|||
|
||||
/* Haswell border color is a bit of a disaster. Float and unorm formats use a
|
||||
* straightforward 32-bit float color in the first 64 bytes. Instead of using
|
||||
* a nice float/integer union like Gen8+, Haswell specifies the integer border
|
||||
* a nice float/integer union like Gfx8+, Haswell specifies the integer border
|
||||
* color as a separate entry /after/ the float color. The layout of this entry
|
||||
* also depends on the format's bpp (with extra hacks for RG32), and overlaps.
|
||||
*
|
||||
|
|
|
@ -433,7 +433,7 @@ anv_can_hiz_clear_ds_view(struct anv_device *device,
|
|||
return false;
|
||||
|
||||
/* Only gfx9+ supports returning ANV_HZ_FC_VAL when sampling a fast-cleared
|
||||
* portion of a HiZ buffer. Testing has revealed that Gen8 only supports
|
||||
* portion of a HiZ buffer. Testing has revealed that Gfx8 only supports
|
||||
* returning 0.0f. Gens prior to gfx8 do not support this feature at all.
|
||||
*/
|
||||
if (GFX_VER == 8 && anv_can_sample_with_hiz(&device->info, iview->image))
|
||||
|
@ -1888,7 +1888,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
|
|||
return;
|
||||
|
||||
#if GFX_VER >= 11
|
||||
/* On Gen11+ we use only one config, so verify it remains the same and skip
|
||||
/* On Gfx11+ we use only one config, so verify it remains the same and skip
|
||||
* the stalling programming entirely.
|
||||
*/
|
||||
assert(cfg == cmd_buffer->device->l3_config);
|
||||
|
@ -2049,7 +2049,7 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
|
|||
*
|
||||
* The same text exists a few rows below for Post Sync Op.
|
||||
*
|
||||
* On Gen12 this is GEN:BUG:1607156449.
|
||||
* On Gfx12 this is GEN:BUG:1607156449.
|
||||
*/
|
||||
if (bits & ANV_PIPE_POST_SYNC_BIT) {
|
||||
if ((GFX_VER == 9 || (GFX_VER == 12 && devinfo->revision == 0 /* A0 */)) &&
|
||||
|
@ -3036,7 +3036,7 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
|
|||
&pipeline->shaders[stage]->bind_map;
|
||||
|
||||
#if GFX_VER >= 9
|
||||
/* This field exists since Gen8. However, the Broadwell PRM says:
|
||||
/* This field exists since Gfx8. However, the Broadwell PRM says:
|
||||
*
|
||||
* "Constant Buffer Object Control State must be always programmed
|
||||
* to zero."
|
||||
|
@ -3451,7 +3451,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
/* Size is in DWords - 1 */
|
||||
sob.SurfaceSize = DIV_ROUND_UP(xfb->size, 4) - 1;
|
||||
#else
|
||||
/* We don't have SOBufferEnable in 3DSTATE_SO_BUFFER on Gen7 so
|
||||
/* We don't have SOBufferEnable in 3DSTATE_SO_BUFFER on Gfx7 so
|
||||
* we trust in SurfaceEndAddress = SurfaceBaseAddress = 0 (the
|
||||
* default for an empty SO_BUFFER packet) to disable them.
|
||||
*/
|
||||
|
@ -4711,7 +4711,7 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
|
|||
* 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
|
||||
* with Pipeline Select set to GPGPU.
|
||||
*
|
||||
* The internal hardware docs recommend the same workaround for Gen9
|
||||
* The internal hardware docs recommend the same workaround for Gfx9
|
||||
* hardware too.
|
||||
*/
|
||||
if (pipeline == GPGPU)
|
||||
|
@ -4992,7 +4992,7 @@ genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
|
|||
#if GFX_VER == 9
|
||||
const struct gen_device_info *devinfo = &cmd_buffer->device->info;
|
||||
const unsigned slice_hashing[] = {
|
||||
/* Because all Gen9 platforms with more than one slice require
|
||||
/* Because all Gfx9 platforms with more than one slice require
|
||||
* three-way subslice hashing, a single "normal" 16x16 slice hashing
|
||||
* block is guaranteed to suffer from substantial imbalance, with one
|
||||
* subslice receiving twice as much work as the other two in the
|
||||
|
@ -5000,7 +5000,7 @@ genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
|
|||
*
|
||||
* The performance impact of that would be particularly severe when
|
||||
* three-way hashing is also in use for slice balancing (which is the
|
||||
* case for all Gen9 GT4 platforms), because one of the slices
|
||||
* case for all Gfx9 GT4 platforms), because one of the slices
|
||||
* receives one every three 16x16 blocks in either direction, which
|
||||
* is roughly the periodicity of the underlying subslice imbalance
|
||||
* pattern ("roughly" because in reality the hardware's
|
||||
|
@ -5144,7 +5144,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
/* GEN:BUG:1408224581
|
||||
*
|
||||
* Workaround: Gen12LP Astep only An additional pipe control with
|
||||
* Workaround: Gfx12LP Astep only An additional pipe control with
|
||||
* post-sync = store dword operation would be required.( w/a is to
|
||||
* have an additional pipe control after the stencil state whenever
|
||||
* the surface state bits of this state is changing).
|
||||
|
|
|
@ -723,7 +723,7 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
|
|||
raster.GlobalDepthOffsetEnablePoint = rs_info->depthBiasEnable;
|
||||
|
||||
#if GFX_VER == 7
|
||||
/* Gen7 requires that we provide the depth format in 3DSTATE_SF so that it
|
||||
/* Gfx7 requires that we provide the depth format in 3DSTATE_SF so that it
|
||||
* can get the depth offsets correct.
|
||||
*/
|
||||
if (subpass->depth_stencil_attachment) {
|
||||
|
@ -755,14 +755,14 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline,
|
|||
const VkPipelineMultisampleStateCreateInfo *info,
|
||||
uint32_t dynamic_states)
|
||||
{
|
||||
/* If the sample locations are dynamic, 3DSTATE_MULTISAMPLE on Gen7/7.5
|
||||
* will be emitted dynamically, so skip it here. On Gen8+
|
||||
/* If the sample locations are dynamic, 3DSTATE_MULTISAMPLE on Gfx7/7.5
|
||||
* will be emitted dynamically, so skip it here. On Gfx8+
|
||||
* 3DSTATE_SAMPLE_PATTERN will be emitted dynamically, so skip it here.
|
||||
*/
|
||||
if (!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)) {
|
||||
/* Only lookup locations if the extensions is active, otherwise the
|
||||
* default ones will be used either at device initialization time or
|
||||
* through 3DSTATE_MULTISAMPLE on Gen7/7.5 by passing NULL locations.
|
||||
* through 3DSTATE_MULTISAMPLE on Gfx7/7.5 by passing NULL locations.
|
||||
*/
|
||||
if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations) {
|
||||
#if GFX_VER >= 8
|
||||
|
@ -776,7 +776,7 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline,
|
|||
pipeline->dynamic_state.sample_locations.samples,
|
||||
pipeline->dynamic_state.sample_locations.locations);
|
||||
} else {
|
||||
/* On Gen8+ 3DSTATE_MULTISAMPLE does not hold anything we need to modify
|
||||
/* On Gfx8+ 3DSTATE_MULTISAMPLE does not hold anything we need to modify
|
||||
* for sample locations, so we don't have to emit it dynamically.
|
||||
*/
|
||||
#if GFX_VER >= 8
|
||||
|
@ -1430,7 +1430,7 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
|
|||
pipeline->gfx7.xfb_bo_pitch[2] = xfb_info->buffers[2].stride;
|
||||
pipeline->gfx7.xfb_bo_pitch[3] = xfb_info->buffers[3].stride;
|
||||
|
||||
/* On Gen7, the SO buffer enables live in 3DSTATE_STREAMOUT which
|
||||
/* On Gfx7, the SO buffer enables live in 3DSTATE_STREAMOUT which
|
||||
* is a bit inconvenient because we don't know what buffers will
|
||||
* actually be enabled until draw time. We do our best here by
|
||||
* setting them based on buffers_written and we disable them
|
||||
|
@ -1926,7 +1926,7 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, struct anv_subpass *subp
|
|||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
/* Gen8 hardware tries to compute ThreadDispatchEnable for us but
|
||||
/* Gfx8 hardware tries to compute ThreadDispatchEnable for us but
|
||||
* doesn't take into account KillPixels when no depth or stencil
|
||||
* writes are enabled. In order for occlusion queries to work
|
||||
* correctly with no attachments, we need to force-enable PS thread
|
||||
|
|
|
@ -58,7 +58,7 @@
|
|||
*
|
||||
* The equations above apply if \p flip is equal to 0, if it is equal to 1 p_0
|
||||
* and p_1 will be swapped for the result. Note that in the context of pixel
|
||||
* pipe hashing this can be always 0 on Gen12 platforms, since the hardware
|
||||
* pipe hashing this can be always 0 on Gfx12 platforms, since the hardware
|
||||
* transparently remaps logical indices found on the table to physical pixel
|
||||
* pipe indices from the highest to lowest EU count.
|
||||
*/
|
||||
|
@ -117,7 +117,7 @@ genX(emit_slice_hashing_state)(struct anv_device *device,
|
|||
ppipes_of[n] += (device->info.ppipe_subslices[p] == n);
|
||||
}
|
||||
|
||||
/* Gen12 has three pixel pipes. */
|
||||
/* Gfx12 has three pixel pipes. */
|
||||
assert(ppipes_of[0] + ppipes_of[1] + ppipes_of[2] == 3);
|
||||
|
||||
if (ppipes_of[2] == 3 || ppipes_of[0] == 2) {
|
||||
|
|
|
@ -1098,7 +1098,7 @@ load_sized_register_mem(struct brw_context *brw,
|
|||
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
||||
int i;
|
||||
|
||||
/* MI_LOAD_REGISTER_MEM only exists on Gen7+. */
|
||||
/* MI_LOAD_REGISTER_MEM only exists on Gfx7+. */
|
||||
assert(devinfo->ver >= 7);
|
||||
|
||||
if (devinfo->ver >= 8) {
|
||||
|
|
|
@ -145,7 +145,7 @@ brw_ptr_in_state_buffer(struct brw_batch *batch, void *p)
|
|||
OUT_BATCH(reloc); \
|
||||
} while (0)
|
||||
|
||||
/* Handle 48-bit address relocations for Gen8+ */
|
||||
/* Handle 48-bit address relocations for Gfx8+ */
|
||||
#define OUT_RELOC64(buf, flags, delta) do { \
|
||||
uint32_t __offset = (__map - brw->batch.batch.map) * 4; \
|
||||
uint64_t reloc64 = \
|
||||
|
|
|
@ -240,7 +240,7 @@ const struct brw_tracked_state brw_gs_binding_table = {
|
|||
*/
|
||||
|
||||
/**
|
||||
* (Gen4-5) Upload the binding table pointers for all shader stages.
|
||||
* (Gfx4-5) Upload the binding table pointers for all shader stages.
|
||||
*
|
||||
* The binding table pointers are relative to the surface state base address,
|
||||
* which points at the batchbuffer containing the streamed batch state.
|
||||
|
|
|
@ -203,7 +203,7 @@ alignment_valid(struct brw_context *brw, unsigned offset,
|
|||
if (tiling != ISL_TILING_LINEAR)
|
||||
return (offset & 4095) == 0;
|
||||
|
||||
/* On Gen8+, linear buffers must be cacheline-aligned. */
|
||||
/* On Gfx8+, linear buffers must be cacheline-aligned. */
|
||||
if (devinfo->ver >= 8)
|
||||
return (offset & 63) == 0;
|
||||
|
||||
|
|
|
@ -244,7 +244,7 @@ brw_blorp_to_isl_format(struct brw_context *brw, mesa_format format,
|
|||
}
|
||||
|
||||
/**
|
||||
* Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
|
||||
* Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gfx7.5+
|
||||
* "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
|
||||
*
|
||||
* SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
|
||||
|
@ -263,7 +263,7 @@ swizzle_to_scs(GLenum swizzle)
|
|||
}
|
||||
|
||||
/**
|
||||
* Note: if the src (or dst) is a 2D multisample array texture on Gen7+ using
|
||||
* Note: if the src (or dst) is a 2D multisample array texture on Gfx7+ using
|
||||
* INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is
|
||||
* the physical layer holding sample 0. So, for example, if
|
||||
* src_mt->surf.samples == 4, then logical layer n corresponds to src_layer ==
|
||||
|
@ -1610,7 +1610,7 @@ brw_hiz_exec(struct brw_context *brw, struct brw_mipmap_tree *mt,
|
|||
* enabled must be issued before the rectangle primitive used for
|
||||
* the depth buffer clear operation.
|
||||
*
|
||||
* Same applies for Gen8 and Gen9.
|
||||
* Same applies for Gfx8 and Gfx9.
|
||||
*
|
||||
* In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
|
||||
* PIPE_CONTROL, Depth Cache Flush Enable:
|
||||
|
|
|
@ -1902,11 +1902,11 @@ brw_bufmgr_create(struct gen_device_info *devinfo, int fd, bool bo_reuse)
|
|||
} else if (devinfo->ver >= 10) {
|
||||
/* Softpin landed in 4.5, but GVT used an aliasing PPGTT until
|
||||
* kernel commit 6b3816d69628becb7ff35978aa0751798b4a940a in
|
||||
* 4.14. Gen10+ GVT hasn't landed yet, so it's not actually a
|
||||
* 4.14. Gfx10+ GVT hasn't landed yet, so it's not actually a
|
||||
* problem - but extending this requirement back to earlier gens
|
||||
* might actually mean requiring 4.14.
|
||||
*/
|
||||
fprintf(stderr, "i965 requires softpin (Kernel 4.5) on Gen10+.");
|
||||
fprintf(stderr, "i965 requires softpin (Kernel 4.5) on Gfx10+.");
|
||||
close(bufmgr->fd);
|
||||
free(bufmgr);
|
||||
return NULL;
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
/** @file brw_conditional_render.c
|
||||
*
|
||||
* Support for conditional rendering based on query objects
|
||||
* (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gen7+.
|
||||
* (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gfx7+.
|
||||
*/
|
||||
|
||||
#include "main/condrender.h"
|
||||
|
|
|
@ -557,7 +557,7 @@ brw_initialize_context_constants(struct brw_context *brw)
|
|||
*/
|
||||
ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
|
||||
|
||||
/* On Gen6, in the worst case, we use up one binding table entry per
|
||||
/* On Gfx6, in the worst case, we use up one binding table entry per
|
||||
* transform feedback component (see comments above the definition of
|
||||
* BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
|
||||
* for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
|
||||
|
@ -675,7 +675,7 @@ brw_initialize_context_constants(struct brw_context *brw)
|
|||
ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
|
||||
|
||||
/* Gen6 converts quads to polygon in beginning of 3D pipeline,
|
||||
/* Gfx6 converts quads to polygon in beginning of 3D pipeline,
|
||||
* but we're not sure how it's actually done for vertex order,
|
||||
* that affect provoking vertex decision. Always use last vertex
|
||||
* convention for quad primitive which works as expected for now.
|
||||
|
|
|
@ -342,7 +342,7 @@ struct brw_ff_gs_prog_data {
|
|||
GLuint total_grf;
|
||||
|
||||
/**
|
||||
* Gen6 transform feedback: Amount by which the streaming vertex buffer
|
||||
* Gfx6 transform feedback: Amount by which the streaming vertex buffer
|
||||
* indices should be incremented each time the GS is invoked.
|
||||
*/
|
||||
unsigned svbi_postincrement_value;
|
||||
|
@ -640,7 +640,7 @@ struct brw_stage_state
|
|||
/** Offset in the program cache to the program */
|
||||
uint32_t prog_offset;
|
||||
|
||||
/** Offset in the batchbuffer to Gen4-5 pipelined state (VS/WM/GS_STATE). */
|
||||
/** Offset in the batchbuffer to Gfx4-5 pipelined state (VS/WM/GS_STATE). */
|
||||
uint32_t state_offset;
|
||||
|
||||
struct brw_bo *push_const_bo; /* NULL if using the batchbuffer */
|
||||
|
@ -1159,7 +1159,7 @@ struct brw_context
|
|||
|
||||
/**
|
||||
* Buffer object used in place of multisampled null render targets on
|
||||
* Gen6. See brw_emit_null_surface_state().
|
||||
* Gfx6. See brw_emit_null_surface_state().
|
||||
*/
|
||||
struct brw_bo *multisampled_null_render_target_bo;
|
||||
|
||||
|
|
|
@ -47,9 +47,9 @@ copy_miptrees(struct brw_context *brw,
|
|||
if (devinfo->ver <= 5) {
|
||||
/* On gfx4-5, try BLT first.
|
||||
*
|
||||
* Gen4-5 have a single ring for both 3D and BLT operations, so there's
|
||||
* no inter-ring synchronization issues like on Gen6+. It is apparently
|
||||
* faster than using the 3D pipeline. Original Gen4 also has to rebase
|
||||
* Gfx4-5 have a single ring for both 3D and BLT operations, so there's
|
||||
* no inter-ring synchronization issues like on Gfx6+. It is apparently
|
||||
* faster than using the 3D pipeline. Original Gfx4 also has to rebase
|
||||
* and copy miptree slices in order to render to unaligned locations.
|
||||
*/
|
||||
if (brw_miptree_copy(brw, src_mt, src_level, src_z, src_x, src_y,
|
||||
|
|
|
@ -388,7 +388,7 @@
|
|||
/* SAMPLER_STATE DW0 */
|
||||
#define BRW_SAMPLER_DISABLE (1 << 31)
|
||||
#define BRW_SAMPLER_LOD_PRECLAMP_ENABLE (1 << 28)
|
||||
#define GFX6_SAMPLER_MIN_MAG_NOT_EQUAL (1 << 27) /* Gen6 only */
|
||||
#define GFX6_SAMPLER_MIN_MAG_NOT_EQUAL (1 << 27) /* Gfx6 only */
|
||||
#define BRW_SAMPLER_BASE_MIPLEVEL_MASK INTEL_MASK(26, 22)
|
||||
#define BRW_SAMPLER_BASE_MIPLEVEL_SHIFT 22
|
||||
#define BRW_SAMPLER_MIP_FILTER_MASK INTEL_MASK(21, 20)
|
||||
|
@ -412,7 +412,7 @@
|
|||
#define GFX4_SAMPLER_MAX_LOD_MASK INTEL_MASK(21, 12)
|
||||
#define GFX4_SAMPLER_MAX_LOD_SHIFT 12
|
||||
#define GFX4_SAMPLER_CUBE_CONTROL_OVERRIDE (1 << 9)
|
||||
/* Wrap modes are in DW1 on Gen4-6 and DW3 on Gen7+ */
|
||||
/* Wrap modes are in DW1 on Gfx4-6 and DW3 on Gfx7+ */
|
||||
#define BRW_SAMPLER_TCX_WRAP_MODE_MASK INTEL_MASK(8, 6)
|
||||
#define BRW_SAMPLER_TCX_WRAP_MODE_SHIFT 6
|
||||
#define BRW_SAMPLER_TCY_WRAP_MODE_MASK INTEL_MASK(5, 3)
|
||||
|
@ -436,7 +436,7 @@
|
|||
#define BRW_SAMPLER_ADDRESS_ROUNDING_MASK INTEL_MASK(18, 13)
|
||||
#define BRW_SAMPLER_ADDRESS_ROUNDING_SHIFT 13
|
||||
#define GFX7_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 10)
|
||||
/* Gen7+ wrap modes reuse the same BRW_SAMPLER_TC*_WRAP_MODE enums. */
|
||||
/* Gfx7+ wrap modes reuse the same BRW_SAMPLER_TC*_WRAP_MODE enums. */
|
||||
#define GFX6_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 0)
|
||||
|
||||
enum brw_wrap_mode {
|
||||
|
@ -601,9 +601,9 @@ enum brw_wrap_mode {
|
|||
# define GFX6_VS_STATISTICS_ENABLE (1 << 10)
|
||||
# define GFX6_VS_CACHE_DISABLE (1 << 1)
|
||||
# define GFX6_VS_ENABLE (1 << 0)
|
||||
/* Gen8+ DW7 */
|
||||
/* Gfx8+ DW7 */
|
||||
# define GFX8_VS_SIMD8_ENABLE (1 << 2)
|
||||
/* Gen8+ DW8 */
|
||||
/* Gfx8+ DW8 */
|
||||
# define GFX8_VS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
|
||||
# define GFX8_VS_URB_OUTPUT_LENGTH_SHIFT 16
|
||||
# define GFX8_VS_USER_CLIP_DISTANCE_SHIFT 8
|
||||
|
@ -650,12 +650,12 @@ enum brw_wrap_mode {
|
|||
# define GFX6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16)
|
||||
# define GFX6_GS_ENABLE (1 << 15)
|
||||
|
||||
/* Gen8+ DW8 */
|
||||
/* Gfx8+ DW8 */
|
||||
# define GFX8_GS_STATIC_OUTPUT (1 << 30)
|
||||
# define GFX8_GS_STATIC_VERTEX_COUNT_SHIFT 16
|
||||
# define GFX8_GS_STATIC_VERTEX_COUNT_MASK INTEL_MASK(26, 16)
|
||||
|
||||
/* Gen8+ DW9 */
|
||||
/* Gfx8+ DW9 */
|
||||
# define GFX8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
|
||||
# define GFX8_GS_URB_OUTPUT_LENGTH_SHIFT 16
|
||||
# define GFX8_GS_USER_CLIP_DISTANCE_SHIFT 8
|
||||
|
@ -724,7 +724,7 @@ enum brw_wrap_mode {
|
|||
# define GFX7_DS_COMPUTE_W_COORDINATE_ENABLE (1 << 2)
|
||||
# define GFX7_DS_CACHE_DISABLE (1 << 1)
|
||||
# define GFX7_DS_ENABLE (1 << 0)
|
||||
/* Gen8+ DW8 */
|
||||
/* Gfx8+ DW8 */
|
||||
# define GFX8_DS_URB_ENTRY_OUTPUT_OFFSET_MASK INTEL_MASK(26, 21)
|
||||
# define GFX8_DS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
|
||||
# define GFX8_DS_URB_OUTPUT_LENGTH_MASK INTEL_MASK(20, 16)
|
||||
|
@ -911,7 +911,7 @@ enum brw_wrap_mode {
|
|||
# define GFX8_RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE (1 << 0)
|
||||
# define GFX9_RASTER_VIEWPORT_Z_NEAR_CLIP_TEST_ENABLE (1 << 0)
|
||||
|
||||
/* Gen8 BLEND_STATE */
|
||||
/* Gfx8 BLEND_STATE */
|
||||
/* DW0 */
|
||||
#define GFX8_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31)
|
||||
#define GFX8_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 30)
|
||||
|
@ -1169,7 +1169,7 @@ enum brw_pixel_shader_coverage_mask_mode {
|
|||
# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT 0
|
||||
# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK INTEL_MASK(4, 0)
|
||||
|
||||
/* 3DSTATE_WM for Gen7 */
|
||||
/* 3DSTATE_WM for Gfx7 */
|
||||
/* DW1 */
|
||||
# define GFX7_WM_STATISTICS_ENABLE (1 << 31)
|
||||
# define GFX7_WM_DEPTH_CLEAR (1 << 30)
|
||||
|
@ -1440,11 +1440,11 @@ enum brw_pixel_shader_coverage_mask_mode {
|
|||
# define MI_STORE_REGISTER_MEM_USE_GGTT (1 << 22)
|
||||
# define MI_STORE_REGISTER_MEM_PREDICATE (1 << 21)
|
||||
|
||||
/* Load a value from memory into a register. Only available on Gen7+. */
|
||||
/* Load a value from memory into a register. Only available on Gfx7+. */
|
||||
#define GFX7_MI_LOAD_REGISTER_MEM (CMD_MI | (0x29 << 23))
|
||||
# define MI_LOAD_REGISTER_MEM_USE_GGTT (1 << 22)
|
||||
|
||||
/* Manipulate the predicate bit based on some register values. Only on Gen7+ */
|
||||
/* Manipulate the predicate bit based on some register values. Only on Gfx7+ */
|
||||
#define GFX7_MI_PREDICATE (CMD_MI | (0xC << 23))
|
||||
# define MI_PREDICATE_LOADOP_KEEP (0 << 6)
|
||||
# define MI_PREDICATE_LOADOP_LOAD (2 << 6)
|
||||
|
@ -1652,10 +1652,10 @@ enum brw_pixel_shader_coverage_mask_mode {
|
|||
#define INSTPM 0x20c0
|
||||
# define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
|
||||
|
||||
#define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */
|
||||
#define CS_DEBUG_MODE2 0x20d8 /* Gfx9+ */
|
||||
# define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
|
||||
|
||||
#define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gen9+ */
|
||||
#define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gfx9+ */
|
||||
# define GLK_SCEC_BARRIER_MODE_GPGPU (0 << 7)
|
||||
# define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7)
|
||||
# define GLK_SCEC_BARRIER_MODE_MASK REG_MASK(1 << 7)
|
||||
|
@ -1669,7 +1669,7 @@ enum brw_pixel_shader_coverage_mask_mode {
|
|||
# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS (1 << 5)
|
||||
# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK REG_MASK(1 << 5)
|
||||
|
||||
#define CS_CHICKEN1 0x2580 /* Gen9+ */
|
||||
#define CS_CHICKEN1 0x2580 /* Gfx9+ */
|
||||
# define GFX9_REPLAY_MODE_MIDBUFFER (0 << 0)
|
||||
# define GFX9_REPLAY_MODE_MIDOBJECT (1 << 0)
|
||||
# define GFX9_REPLAY_MODE_MASK REG_MASK(1 << 0)
|
||||
|
|
|
@ -137,8 +137,8 @@ gfx6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
|
|||
|
||||
/**
|
||||
* The hardware is capable of removing dangling vertices on its own; however,
|
||||
* prior to Gen6, we sometimes convert quads into trifans (and quad strips
|
||||
* into tristrips), since pre-Gen6 hardware requires a GS to render quads.
|
||||
* prior to Gfx6, we sometimes convert quads into trifans (and quad strips
|
||||
* into tristrips), since pre-Gfx6 hardware requires a GS to render quads.
|
||||
* This function manually trims dangling vertices from a draw call involving
|
||||
* quads so that those dangling vertices won't get drawn when we convert to
|
||||
* trifans/tristrips.
|
||||
|
@ -190,7 +190,7 @@ brw_emit_prim(struct brw_context *brw,
|
|||
start_vertex_location += brw->vb.start_vertex_bias;
|
||||
}
|
||||
|
||||
/* We only need to trim the primitive count on pre-Gen6. */
|
||||
/* We only need to trim the primitive count on pre-Gfx6. */
|
||||
if (devinfo->ver < 6)
|
||||
verts_per_instance = trim(prim->mode, prim->count);
|
||||
else
|
||||
|
@ -411,7 +411,7 @@ brw_disable_rb_aux_buffer(struct brw_context *brw,
|
|||
|
||||
/** Implement the ASTC 5x5 sampler workaround
|
||||
*
|
||||
* Gen9 sampling hardware has a bug where an ASTC 5x5 compressed surface
|
||||
* Gfx9 sampling hardware has a bug where an ASTC 5x5 compressed surface
|
||||
* cannot live in the sampler cache at the same time as an aux compressed
|
||||
* surface. In order to work around the bug we have to stall rendering with a
|
||||
* CS and pixel scoreboard stall (implicit in the CS stall) and invalidate the
|
||||
|
|
|
@ -332,7 +332,7 @@ brw_init_extensions(struct gl_context *ctx)
|
|||
ctx->Extensions.ARB_ES3_2_compatibility = true;
|
||||
|
||||
/* Currently only implemented in the scalar backend, so only enable for
|
||||
* Gen8+. Eventually Gen6+ could be supported.
|
||||
* Gfx8+. Eventually Gfx6+ could be supported.
|
||||
*/
|
||||
ctx->Extensions.INTEL_shader_integer_functions2 = true;
|
||||
}
|
||||
|
@ -367,7 +367,7 @@ brw_init_extensions(struct gl_context *ctx)
|
|||
* ensure memory access ordering for all messages to the dataport from
|
||||
* all threads. Memory fence messages prior to SKL only provide memory
|
||||
* access ordering for messages from the same thread, so we can only
|
||||
* support the feature from Gen9 onwards.
|
||||
* support the feature from Gfx9 onwards.
|
||||
*
|
||||
*/
|
||||
|
||||
|
|
|
@ -723,7 +723,7 @@ brw_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
|
|||
_mesa_get_format_name(stencil_mt->format));
|
||||
}
|
||||
if (devinfo->ver < 7 && !brw_renderbuffer_has_hiz(depthRb)) {
|
||||
/* Before Gen7, separate depth and stencil buffers can be used
|
||||
/* Before Gfx7, separate depth and stencil buffers can be used
|
||||
* only if HiZ is enabled. From the Sandybridge PRM, Volume 2,
|
||||
* Part 1, Bit 3DSTATE_DEPTH_BUFFER.SeparateStencilBufferEnable:
|
||||
* [DevSNB]: This field must be set to the same value (enabled
|
||||
|
@ -907,9 +907,9 @@ brw_blit_framebuffer(struct gl_context *ctx,
|
|||
if (devinfo->ver < 6) {
|
||||
/* On gfx4-5, try BLT first.
|
||||
*
|
||||
* Gen4-5 have a single ring for both 3D and BLT operations, so there's
|
||||
* no inter-ring synchronization issues like on Gen6+. It is apparently
|
||||
* faster than using the 3D pipeline. Original Gen4 also has to rebase
|
||||
* Gfx4-5 have a single ring for both 3D and BLT operations, so there's
|
||||
* no inter-ring synchronization issues like on Gfx6+. It is apparently
|
||||
* faster than using the 3D pipeline. Original Gfx4 also has to rebase
|
||||
* and copy miptree slices in order to render to unaligned locations.
|
||||
*/
|
||||
mask = brw_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
|
||||
|
|
|
@ -103,11 +103,11 @@ brw_codegen_ff_gs_prog(struct brw_context *brw,
|
|||
check_edge_flag = true;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unexpected primitive type in Gen6 SOL program.");
|
||||
unreachable("Unexpected primitive type in Gfx6 SOL program.");
|
||||
}
|
||||
gfx6_sol_program(&c, key, num_verts, check_edge_flag);
|
||||
} else {
|
||||
/* On Gen4-5, we use the GS to decompose certain types of primitives.
|
||||
/* On Gfx4-5, we use the GS to decompose certain types of primitives.
|
||||
* Note that primitives which don't require a GS program have already
|
||||
* been weeded out by now.
|
||||
*/
|
||||
|
@ -192,7 +192,7 @@ brw_ff_gs_populate_key(struct brw_context *brw,
|
|||
}
|
||||
|
||||
if (devinfo->ver == 6) {
|
||||
/* On Gen6, GS is used for transform feedback. */
|
||||
/* On Gfx6, GS is used for transform feedback. */
|
||||
/* BRW_NEW_TRANSFORM_FEEDBACK */
|
||||
if (_mesa_is_xfb_active_and_unpaused(ctx)) {
|
||||
const struct gl_program *prog =
|
||||
|
|
|
@ -90,7 +90,7 @@ static void brw_ff_gs_alloc_regs(struct brw_ff_gs_compile *c,
|
|||
* The following information is passed to the GS thread in R0, and needs to be
|
||||
* included in the first URB_WRITE or FF_SYNC message sent by the GS:
|
||||
*
|
||||
* - DWORD 0 [31:0] handle info (Gen4 only)
|
||||
* - DWORD 0 [31:0] handle info (Gfx4 only)
|
||||
* - DWORD 5 [7:0] FFTID
|
||||
* - DWORD 6 [31:0] Debug info
|
||||
* - DWORD 7 [31:0] Debug info
|
||||
|
@ -330,7 +330,7 @@ void brw_ff_gs_lines(struct brw_ff_gs_compile *c)
|
|||
}
|
||||
|
||||
/**
|
||||
* Generate the geometry shader program used on Gen6 to perform stream output
|
||||
* Generate the geometry shader program used on Gfx6 to perform stream output
|
||||
* (transform feedback).
|
||||
*/
|
||||
void
|
||||
|
|
|
@ -44,7 +44,7 @@ brw_generate_mipmap(struct gl_context *ctx, GLenum target,
|
|||
const unsigned base_level = tex_obj->Attrib.BaseLevel;
|
||||
unsigned last_level, first_layer, last_layer;
|
||||
|
||||
/* Blorp doesn't handle combined depth/stencil surfaces on Gen4-5 yet. */
|
||||
/* Blorp doesn't handle combined depth/stencil surfaces on Gfx4-5 yet. */
|
||||
if (devinfo->ver <= 5 &&
|
||||
(tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_COMPONENT ||
|
||||
tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_STENCIL)) {
|
||||
|
|
|
@ -294,7 +294,7 @@ brw_is_color_fast_clear_compatible(struct brw_context *brw,
|
|||
* render using a renderable format, without the override workaround it
|
||||
* wouldn't be possible to have a non-renderable surface in a fast clear
|
||||
* state so the hardware probably legitimately doesn't need to support
|
||||
* this case. At least on Gen9 this really does seem to cause problems.
|
||||
* this case. At least on Gfx9 this really does seem to cause problems.
|
||||
*/
|
||||
if (devinfo->ver >= 9 &&
|
||||
brw_isl_format_for_mesa_format(mt->format) !=
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue