intel: Rename Genx keyword to Gfxx

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "Gen[[:digit:]]+" -rIl $SEARCH_PATH | xargs sed -ie "s/Gen\([[:digit:]]\+\)/Gfx\1/g"

Exclude changes in src/intel/perf/oa-*.xml:
find src/intel/perf -type f \( -name "*.xml" \) | xargs sed -ie "s/Gfx/Gen/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>
This commit is contained in:
Anuj Phogat 2021-03-29 15:46:12 -07:00 committed by Marge Bot
parent b75f095bc7
commit 1d296484b4
117 changed files with 521 additions and 521 deletions

View File

@ -299,7 +299,7 @@ tex_cache_flush_hack(struct iris_batch *batch,
* If the BO hasn't been referenced yet this batch, we assume that the
* texture cache doesn't contain any relevant data nor need flushing.
*
* Icelake (Gen11+) claims to fix this issue, but seems to still have
* Icelake (Gfx11+) claims to fix this issue, but seems to still have
* issues with ASTC formats.
*/
bool need_flush = devinfo->ver >= 11 ?

View File

@ -1425,7 +1425,7 @@ iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd,
bo->external = true;
bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
/* From the Bspec, Memory Compression - Gen12:
/* From the Bspec, Memory Compression - Gfx12:
*
* The base address for the surface has to be 64K page aligned and the
* surface is expected to be padded in the virtual domain to be 4 4K

View File

@ -77,7 +77,7 @@ iris_format_for_usage(const struct gen_device_info *devinfo,
}
/* We choose RGBA over RGBX for rendering the hardware doesn't support
* rendering to RGBX. However, when this internal override is used on Gen9+,
* rendering to RGBX. However, when this internal override is used on Gfx9+,
* fast clears don't work correctly.
*
* i965 fixes this by pretending to not support RGBX formats, and the higher
@ -209,7 +209,7 @@ iris_is_format_supported(struct pipe_screen *pscreen,
format == ISL_FORMAT_R32_UINT;
}
/* TODO: Support ASTC 5x5 on Gen9 properly. This means implementing
/* TODO: Support ASTC 5x5 on Gfx9 properly. This means implementing
* a complex sampler workaround (see i965's gfx9_apply_astc5x5_wa_flush).
* Without it, st/mesa will emulate ASTC 5x5 via uncompressed textures.
*/

View File

@ -62,12 +62,12 @@ iris_emit_pipe_control_flush(struct iris_batch *batch,
if ((flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
(flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
/* A pipe control command with flush and invalidate bits set
* simultaneously is an inherently racy operation on Gen6+ if the
* simultaneously is an inherently racy operation on Gfx6+ if the
* contents of the flushed caches were intended to become visible from
* any of the invalidated caches. Split it in two PIPE_CONTROLs, the
* first one should stall the pipeline to make sure that the flushed R/W
* caches are coherent with memory once the specified R/O caches are
* invalidated. On pre-Gen6 hardware the (implicit) R/O cache
* invalidated. On pre-Gfx6 hardware the (implicit) R/O cache
* invalidation seems to happen at the bottom of the pipeline together
* with any write cache flush, so this shouldn't be a concern. In order
* to ensure a full stall, we do an end-of-pipe sync.

View File

@ -811,7 +811,7 @@ iris_setup_binding_table(const struct gen_device_info *devinfo,
BITFIELD64_MASK(num_render_targets);
/* Setup render target read surface group in order to support non-coherent
* framebuffer fetch on Gen8
* framebuffer fetch on Gfx8
*/
if (devinfo->ver == 8 && info->outputs_read) {
bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
@ -1760,12 +1760,12 @@ iris_compile_fs(struct iris_screen *screen,
/* Lower output variables to load_output intrinsics before setting up
* binding tables, so iris_setup_binding_table can map any load_output
* intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gen8 for
* intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
* non-coherent framebuffer fetches.
*/
brw_nir_lower_fs_outputs(nir);
/* On Gen11+, shader RT write messages have a "Null Render Target" bit
/* On Gfx11+, shader RT write messages have a "Null Render Target" bit
* and do not need a binding table entry with a null surface. Earlier
* generations need an entry for a null surface.
*/
@ -2162,9 +2162,9 @@ iris_get_scratch_space(struct iris_context *ice,
* According to the other driver team, this applies to compute shaders
* as well. This is not currently documented at all.
*
* This hack is no longer necessary on Gen11+.
* This hack is no longer necessary on Gfx11+.
*
* For, Gen11+, scratch space allocation is based on the number of threads
* For, Gfx11+, scratch space allocation is based on the number of threads
* in the base configuration.
*/
unsigned subslice_total = screen->subslice_total;

View File

@ -538,7 +538,7 @@ iris_hiz_exec(struct iris_context *ice,
* enabled must be issued before the rectangle primitive used for
* the depth buffer clear operation."
*
* Same applies for Gen8 and Gen9.
* Same applies for Gfx8 and Gfx9.
*/
iris_emit_pipe_control_flush(batch,
"hiz op: pre-flush",
@ -848,7 +848,7 @@ iris_resource_texture_aux_usage(struct iris_context *ice,
0, INTEL_REMAINING_LAYERS))
return ISL_AUX_USAGE_NONE;
/* On Gen9 color buffers may be compressed by the hardware (lossless
/* On Gfx9 color buffers may be compressed by the hardware (lossless
* compression). There are, however, format restrictions and care needs
* to be taken that the sampler engine is capable for re-interpreting a
* buffer with format different the buffer was originally written with.

View File

@ -291,7 +291,7 @@ iris_image_view_get_format(struct iris_context *ice,
iris_format_for_usage(devinfo, img->format, usage).fmt;
if (img->shader_access & PIPE_IMAGE_ACCESS_READ) {
/* On Gen8, try to use typed surfaces reads (which support a
/* On Gfx8, try to use typed surfaces reads (which support a
* limited number of formats), and if not possible, fall back
* to untyped reads.
*/
@ -733,7 +733,7 @@ iris_resource_configure_aux(struct iris_screen *screen,
* A CCS value of 0 indicates that the corresponding block is in the
* pass-through state which is what we want.
*
* For CCS_D, do the same thing. On Gen9+, this avoids having any
* For CCS_D, do the same thing. On Gfx9+, this avoids having any
* undefined bits in the aux buffer.
*/
if (imported) {

View File

@ -633,7 +633,7 @@ emit_pipeline_select(struct iris_batch *batch, uint32_t pipeline)
* 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
* with Pipeline Select set to GPGPU.
*
* The internal hardware docs recommend the same workaround for Gen9
* The internal hardware docs recommend the same workaround for Gfx9
* hardware too.
*/
if (pipeline == GPGPU)
@ -817,7 +817,7 @@ iris_enable_obj_preemption(struct iris_batch *batch, bool enable)
*
* The equations above apply if \p flip is equal to 0, if it is equal to 1 p_0
* and p_1 will be swapped for the result. Note that in the context of pixel
* pipe hashing this can be always 0 on Gen12 platforms, since the hardware
* pipe hashing this can be always 0 on Gfx12 platforms, since the hardware
* transparently remaps logical indices found on the table to physical pixel
* pipe indices from the highest to lowest EU count.
*/
@ -885,7 +885,7 @@ gfx12_upload_pixel_hashing_tables(struct iris_batch *batch)
ppipes_of[n] += (devinfo->ppipe_subslices[p] == n);
}
/* Gen12 has three pixel pipes. */
/* Gfx12 has three pixel pipes. */
assert(ppipes_of[0] + ppipes_of[1] + ppipes_of[2] == 3);
if (ppipes_of[2] == 3 || ppipes_of[0] == 2) {
@ -1389,7 +1389,7 @@ struct iris_depth_stencil_alpha_state {
bool depth_writes_enabled;
bool stencil_writes_enabled;
/** Outbound to Gen8-9 PMA stall equations */
/** Outbound to Gfx8-9 PMA stall equations */
bool depth_test_enabled;
};
@ -1518,12 +1518,12 @@ want_pma_fix(struct iris_context *ice)
const struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
const struct iris_blend_state *cso_blend = ice->state.cso_blend;
/* In very specific combinations of state, we can instruct Gen8-9 hardware
/* In very specific combinations of state, we can instruct Gfx8-9 hardware
* to avoid stalling at the pixel mask array. The state equations are
* documented in these places:
*
* - Gen8 Depth PMA Fix: CACHE_MODE_1::NP_PMA_FIX_ENABLE
* - Gen9 Stencil PMA Fix: CACHE_MODE_0::STC PMA Optimization Enable
* - Gfx8 Depth PMA Fix: CACHE_MODE_1::NP_PMA_FIX_ENABLE
* - Gfx9 Stencil PMA Fix: CACHE_MODE_0::STC PMA Optimization Enable
*
* Both equations share some common elements:
*
@ -1602,7 +1602,7 @@ want_pma_fix(struct iris_context *ice)
bool killpixels = wm_prog_data->uses_kill || wm_prog_data->uses_omask ||
cso_blend->alpha_to_coverage || cso_zsa->alpha_enabled;
/* The Gen8 depth PMA equation becomes:
/* The Gfx8 depth PMA equation becomes:
*
* depth_writes =
* 3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
@ -1646,7 +1646,7 @@ genX(update_pma_fix)(struct iris_context *ice,
* emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
* prior to the LRI. If stencil buffer writes are enabled, then a Render * Cache Flush is also necessary.
*
* The Gen9 docs say to use a depth stall rather than a command streamer
* The Gfx9 docs say to use a depth stall rather than a command streamer
* stall. However, the hardware seems to violently disagree. A full
* command streamer stall seems to be needed in both cases.
*/
@ -1666,7 +1666,7 @@ genX(update_pma_fix)(struct iris_context *ice,
* Flush bits is often necessary. We do it regardless because it's easier.
* The render cache flush is also necessary if stencil writes are enabled.
*
* Again, the Gen9 docs give a different set of flushes but the Broadwell
* Again, the Gfx9 docs give a different set of flushes but the Broadwell
* flushes seem to work just as well.
*/
iris_emit_pipe_control_flush(batch, "PMA fix change (1/2)",
@ -5806,7 +5806,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
#endif
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
/* Gen9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted
/* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted
* in order to commit constants. TODO: Investigate "Disable Gather
* at Set Shader" to go back to legacy mode...
*/
@ -5912,7 +5912,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
* SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch
* mode."
*
* 16x MSAA only exists on Gen9+, so we can skip this on Gen8.
* 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8.
*/
if (GFX_VER >= 9 && cso_fb->samples == 16 &&
!wm_prog_data->persample_dispatch) {
@ -6236,7 +6236,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
if (GFX_VER >= 12) {
/* GEN:BUG:1408224581
*
* Workaround: Gen12LP Astep only An additional pipe control with
* Workaround: Gfx12LP Astep only An additional pipe control with
* post-sync = store dword operation would be required.( w/a is to
* have an additional pipe control after the stencil state whenever
* the surface state bits of this state is changing).
@ -6344,7 +6344,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
if (count) {
#if GFX_VER >= 11
/* Gen11+ doesn't need the cache workaround below */
/* Gfx11+ doesn't need the cache workaround below */
uint64_t bound = dynamic_bound;
while (bound) {
const int i = u_bit_scan64(&bound);
@ -6888,7 +6888,7 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
if (stage_dirty & IRIS_STAGE_DIRTY_CS) {
/* The MEDIA_VFE_STATE documentation for Gen8+ says:
/* The MEDIA_VFE_STATE documentation for Gfx8+ says:
*
* "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
* the only bits that are changed are scoreboard related: Scoreboard
@ -7424,7 +7424,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
*
* The same text exists a few rows below for Post Sync Op.
*
* On Gen12 this is GEN:BUG:1607156449.
* On Gfx12 this is GEN:BUG:1607156449.
*/
iris_emit_raw_pipe_control(batch,
"workaround: CS stall before gpgpu post-sync",
@ -7458,7 +7458,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
*
* This seems like nonsense. An Ivybridge workaround requires us to
* emit a PIPE_CONTROL with a depth stall and write immediate post-sync
* operation. Gen8+ requires us to emit depth stalls and depth cache
* operation. Gfx8+ requires us to emit depth stalls and depth cache
* flushes together. So, it's hard to imagine this means anything other
* than "we originally intended this to be used for PS_DEPTH_COUNT".
*
@ -7489,7 +7489,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
* We assert that the caller doesn't do this combination, to try and
* prevent mistakes. It shouldn't hurt the GPU, though.
*
* We skip this check on Gen11+ as the "Stall at Pixel Scoreboard"
* We skip this check on Gfx11+ as the "Stall at Pixel Scoreboard"
* and "Render Target Flush" combo is explicitly required for BTI
* update workarounds.
*/
@ -7791,7 +7791,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
#if GFX_VER == 9
/**
* Preemption on Gen9 has to be enabled or disabled in various cases.
* Preemption on Gfx9 has to be enabled or disabled in various cases.
*
* See these workarounds for preemption:
* - WaDisableMidObjectPreemptionForGSLineStripAdj
@ -7799,7 +7799,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
* - WaDisableMidObjectPreemptionForLineLoop
* - WA#0798
*
* We don't put this in the vtable because it's only used on Gen9.
* We don't put this in the vtable because it's only used on Gfx9.
*/
void
gfx9_toggle_preemption(struct iris_context *ice,
@ -7904,7 +7904,7 @@ genX(emit_hashing_mode)(struct iris_context *ice, struct iris_batch *batch,
#if GFX_VER == 9
const struct gen_device_info *devinfo = &batch->screen->devinfo;
const unsigned slice_hashing[] = {
/* Because all Gen9 platforms with more than one slice require
/* Because all Gfx9 platforms with more than one slice require
* three-way subslice hashing, a single "normal" 16x16 slice hashing
* block is guaranteed to suffer from substantial imbalance, with one
* subslice receiving twice as much work as the other two in the
@ -7912,7 +7912,7 @@ genX(emit_hashing_mode)(struct iris_context *ice, struct iris_batch *batch,
*
* The performance impact of that would be particularly severe when
* three-way hashing is also in use for slice balancing (which is the
* case for all Gen9 GT4 platforms), because one of the slices
* case for all Gfx9 GT4 platforms), because one of the slices
* receives one every three 16x16 blocks in either direction, which
* is roughly the periodicity of the underlying subslice imbalance
* pattern ("roughly" because in reality the hardware's

View File

@ -6,7 +6,7 @@ Missing features:
- Splitting larger-than-max blits (Jordan)
- Bit-for-bit copies (Jason)
- Depth and Stencil clears
- Gen4-5 support
- Gfx4-5 support
Performance:

View File

@ -280,7 +280,7 @@ blorp_ensure_sf_program(struct blorp_batch *batch,
const struct brw_wm_prog_data *wm_prog_data = params->wm_prog_data;
assert(params->wm_prog_data);
/* Gen6+ doesn't need a strips and fans program */
/* Gfx6+ doesn't need a strips and fans program */
if (blorp->compiler->devinfo->ver >= 6)
return true;

View File

@ -1203,7 +1203,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
dst_pos = blorp_blit_get_frag_coords(&b, key, &v);
/* Render target and texture hardware don't support W tiling until Gen8. */
/* Render target and texture hardware don't support W tiling until Gfx8. */
const bool rt_tiled_w = false;
const bool tex_tiled_w = devinfo->ver >= 8 && key->src_tiled_w;
@ -1375,7 +1375,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
nir_imm_float(&b, 0.5f));
color = blorp_nir_tex(&b, &v, key, src_pos);
} else {
/* Gen7+ hardware doesn't automaticaly blend. */
/* Gfx7+ hardware doesn't automaticaly blend. */
color = blorp_nir_combine_samples(&b, &v, src_pos, key->src_samples,
key->tex_aux_usage,
key->texture_data_type,
@ -1648,7 +1648,7 @@ blorp_surf_retile_w_to_y(const struct isl_device *isl_dev,
}
if (isl_dev->info->ver == 6) {
/* Gen6 stencil buffers have a very large alignment coming in from the
/* Gfx6 stencil buffers have a very large alignment coming in from the
* miptree. It's out-of-bounds for what the surface state can handle.
* Since we have a single layer and level, it doesn't really matter as
* long as we don't pass a bogus value into isl_surf_fill_state().
@ -1806,10 +1806,10 @@ try_blorp_blit(struct blorp_batch *batch,
if (params->dst.surf.usage & ISL_SURF_USAGE_DEPTH_BIT) {
if (devinfo->ver >= 7) {
/* We can render as depth on Gen5 but there's no real advantage since
* it doesn't support MSAA or HiZ. On Gen4, we can't always render
/* We can render as depth on Gfx5 but there's no real advantage since
* it doesn't support MSAA or HiZ. On Gfx4, we can't always render
* to depth due to issues with depth buffers and mip-mapping. On
* Gen6, we can do everything but we have weird offsetting for HiZ
* Gfx6, we can do everything but we have weird offsetting for HiZ
* and stencil. It's easier to just render using the color pipe
* on those platforms.
*/
@ -2025,7 +2025,7 @@ try_blorp_blit(struct blorp_batch *batch,
if ((wm_prog_key->filter == BLORP_FILTER_AVERAGE ||
wm_prog_key->filter == BLORP_FILTER_BILINEAR) &&
batch->blorp->isl_dev->info->ver <= 6) {
/* Gen4-5 don't support non-normalized texture coordinates */
/* Gfx4-5 don't support non-normalized texture coordinates */
wm_prog_key->src_coords_normalized = true;
params->wm_inputs.src_inv_size[0] =
1.0f / minify(params->src.surf.logical_level0_px.width,
@ -2674,7 +2674,7 @@ blorp_copy(struct blorp_batch *batch,
params.dst.view.format = params.src.surf.format;
} else if ((params.dst.surf.usage & ISL_SURF_USAGE_DEPTH_BIT) &&
isl_dev->info->ver >= 7) {
/* On Gen7 and higher, we use actual depth writes for blits into depth
/* On Gfx7 and higher, we use actual depth writes for blits into depth
* buffers so we need the real format.
*/
params.src.view.format = params.dst.surf.format;

View File

@ -808,7 +808,7 @@ blorp_can_hiz_clear_depth(const struct gen_device_info *devinfo,
/* We have to set the WM_HZ_OP::FullSurfaceDepthandStencilClear bit
* whenever we clear an uninitialized HIZ buffer (as some drivers
* currently do). However, this bit seems liable to clear 16x8 pixels in
* the ZCS on Gen12 - greater than the slice alignments for depth
* the ZCS on Gfx12 - greater than the slice alignments for depth
* buffers.
*/
assert(surf->image_alignment_el.w % 16 != 0 ||
@ -818,7 +818,7 @@ blorp_can_hiz_clear_depth(const struct gen_device_info *devinfo,
* amd_vertex_shader_layer-layered-depth-texture-render piglit test.
*
* From the Compressed Depth Buffers section of the Bspec, under the
* Gen12 texture performant and ZCS columns:
* Gfx12 texture performant and ZCS columns:
*
* Update with clear at either 16x8 or 8x4 granularity, based on
* fs_clr or otherwise.
@ -828,7 +828,7 @@ blorp_can_hiz_clear_depth(const struct gen_device_info *devinfo,
* when an initializing clear could hit another miplevel.
*
* NOTE: Because the CCS compresses the depth buffer and not a version
* of it that has been rearranged with different alignments (like Gen8+
* of it that has been rearranged with different alignments (like Gfx8+
* HIZ), we have to make sure that the x0 and y0 are at least 16x8
* aligned in the context of the entire surface.
*/
@ -1193,7 +1193,7 @@ blorp_params_get_mcs_partial_resolve_kernel(struct blorp_batch *batch,
nir_ssa_def *clear_color = nir_load_var(&b, v_color);
if (blorp_key.indirect_clear_color && blorp->isl_dev->info->ver <= 8) {
/* Gen7-8 clear colors are stored as single 0/1 bits */
/* Gfx7-8 clear colors are stored as single 0/1 bits */
clear_color = nir_vec4(&b, blorp_nir_bit(&b, clear_color, 31),
blorp_nir_bit(&b, clear_color, 30),
blorp_nir_bit(&b, clear_color, 29),

View File

@ -478,7 +478,7 @@ blorp_emit_vertex_elements(struct blorp_batch *batch,
.SourceElementOffset = 0,
.Component0Control = VFCOMP_STORE_SRC,
/* From Gen8 onwards hardware is no more instructed to overwrite
/* From Gfx8 onwards hardware is no more instructed to overwrite
* components using an element specifier. Instead one has separate
* 3DSTATE_VF_SGVS (System Generated Value Setup) state packet for it.
*/
@ -822,7 +822,7 @@ blorp_emit_ps_config(struct blorp_batch *batch,
ps.BindingTableEntryCount = 1;
}
/* SAMPLER_STATE prefetching is broken on Gen11 - WA_1606682166 */
/* SAMPLER_STATE prefetching is broken on Gfx11 - WA_1606682166 */
if (GFX_VER == 11)
ps.SamplerCount = 0;
@ -861,11 +861,11 @@ blorp_emit_ps_config(struct blorp_batch *batch,
}
/* 3DSTATE_PS expects the number of threads per PSD, which is always 64
* for pre Gen11 and 128 for gfx11+; On gfx11+ If a programmed value is
* for pre Gfx11 and 128 for gfx11+; On gfx11+ If a programmed value is
* k, it implies 2(k+1) threads. It implicitly scales for different GT
* levels (which have some # of PSDs).
*
* In Gen8 the format is U8-2 whereas in Gen9+ it is U9-1.
* In Gfx8 the format is U8-2 whereas in Gfx9+ it is U9-1.
*/
if (GFX_VER >= 9)
ps.MaximumNumberofThreadsPerPSD = 64 - 1;
@ -983,7 +983,7 @@ blorp_emit_ps_config(struct blorp_batch *batch,
ps.AttributeEnable = prog_data->num_varying_inputs > 0;
} else {
/* Gen7 hardware gets angry if we don't enable at least one dispatch
/* Gfx7 hardware gets angry if we don't enable at least one dispatch
* mode, so just enable 16-pixel dispatch if we don't have a program.
*/
ps._16PixelDispatchEnable = true;
@ -1700,7 +1700,7 @@ blorp_emit_depth_stencil_config(struct blorp_batch *batch,
#if GFX_VER >= 12
/* GEN:BUG:1408224581
*
* Workaround: Gen12LP Astep only An additional pipe control with
* Workaround: Gfx12LP Astep only An additional pipe control with
* post-sync = store dword operation would be required.( w/a is to
* have an additional pipe control after the stencil state whenever
* the surface state bits of this state is changing).

View File

@ -572,7 +572,7 @@ decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
uint64_t ksp = 0;
bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gen8+ only */
bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */
bool is_enabled = true;
struct intel_field_iterator iter;

View File

@ -57,13 +57,13 @@ intel_calculate_guardband_size(uint32_t fb_width, uint32_t fb_height,
* This additional restriction must also be comprehended by software,
* i.e., enforced by use of clipping."
*
* This makes no sense. Gen7+ hardware supports 16K render targets,
* This makes no sense. Gfx7+ hardware supports 16K render targets,
* and you definitely need to be able to draw polygons that fill the
* surface. Our assumption is that the rasterizer was limited to 8K
* on Sandybridge, which only supports 8K surfaces, and it was actually
* increased to 16K on Ivybridge and later.
*
* So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge.
* So, limit the guardband to 16K on Gfx7+ and 8K on Sandybridge.
*/
const float gb_size = GFX_VER >= 7 ? 16384.0f : 8192.0f;

View File

@ -70,7 +70,7 @@ intel_get_urb_config(const struct gen_device_info *devinfo,
{
unsigned urb_size_kB = intel_get_l3_config_urb_size(devinfo, l3_cfg);
/* RCU_MODE register for Gen12+ in BSpec says:
/* RCU_MODE register for Gfx12+ in BSpec says:
*
* "HW reserves 4KB of URB space per bank for Compute Engine out of the
* total storage available in L3. SW must consider that 4KB of storage
@ -236,7 +236,7 @@ intel_get_urb_config(const struct gen_device_info *devinfo,
if (deref_block_size) {
if (devinfo->ver >= 12) {
/* From the Gen12 BSpec:
/* From the Gfx12 BSpec:
*
* "Deref Block size depends on the last enabled shader and number
* of handles programmed for that shader

View File

@ -1128,7 +1128,7 @@ mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)
#endif /* MI_MATH section */
/* This assumes addresses of strictly more than 32bits (aka. Gen8+). */
/* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */
#if MI_BUILDER_CAN_WRITE_BATCH
struct mi_address_token {
@ -1169,7 +1169,7 @@ mi_self_mod_barrier(struct mi_builder *b)
mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
pc.CommandStreamerStallEnable = true;
}
/* Documentation says Gen11+ should be able to invalidate the command cache
/* Documentation says Gfx11+ should be able to invalidate the command cache
* but experiment show it doesn't work properly, so for now just get over
* the CS prefetch.
*/

View File

@ -112,7 +112,7 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
/* Default to the sampler since that's what we've done since forever */
compiler->indirect_ubos_use_sampler = true;
/* There is no vec4 mode on Gen10+, and we don't use it at all on Gen8+. */
/* There is no vec4 mode on Gfx10+, and we don't use it at all on Gfx8+. */
for (int i = MESA_SHADER_VERTEX; i < MESA_ALL_SHADER_STAGES; i++) {
compiler->scalar_stage[i] = devinfo->ver >= 8 ||
i == MESA_SHADER_FRAGMENT || i == MESA_SHADER_COMPUTE;
@ -145,8 +145,8 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
}
/* The Bspec's section tittled "Instruction_multiply[DevBDW+]" claims that
* destination type can be Quadword and source type Doubleword for Gen8 and
* Gen9. So, lower 64 bit multiply instruction on rest of the platforms.
* destination type can be Quadword and source type Doubleword for Gfx8 and
* Gfx9. So, lower 64 bit multiply instruction on rest of the platforms.
*/
if (devinfo->ver < 8 || devinfo->ver > 9)
int64_options |= nir_lower_imul_2x32_64;
@ -174,7 +174,7 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
*nir_options = vector_nir_options;
}
/* Prior to Gen6, there are no three source operations, and Gen11 loses
/* Prior to Gfx6, there are no three source operations, and Gfx11 loses
* LRP.
*/
nir_options->lower_ffma16 = devinfo->ver < 6;
@ -189,7 +189,7 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
nir_options->lower_int64_options = int64_options;
nir_options->lower_doubles_options = fp64_options;
/* Starting with Gen11, we lower away 8-bit arithmetic */
/* Starting with Gfx11, we lower away 8-bit arithmetic */
nir_options->support_8bit_alu = devinfo->ver < 11;
nir_options->unify_interfaces = i < MESA_SHADER_FRAGMENT;

View File

@ -310,7 +310,7 @@ struct brw_vs_prog_key {
unsigned nr_userclip_plane_consts:4;
/**
* For pre-Gen6 hardware, a bitfield indicating which texture coordinates
* For pre-Gfx6 hardware, a bitfield indicating which texture coordinates
* are going to be replaced with point coordinates (as a consequence of a
* call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because
* our SF thread requires exact matching between VS outputs and FS inputs,
@ -480,7 +480,7 @@ struct brw_wm_prog_key {
uint8_t color_outputs_valid;
uint64_t input_slots_valid;
GLenum alpha_test_func; /* < For Gen4/5 MRT alpha test */
GLenum alpha_test_func; /* < For Gfx4/5 MRT alpha test */
float alpha_test_ref;
};
@ -555,7 +555,7 @@ struct brw_image_param {
* From the OpenGL 3.0 spec, table 6.44 (Transform Feedback State), the
* minimum value of MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS is 64.
*
* On Gen6, the size of transform feedback data is limited not by the number
* On Gfx6, the size of transform feedback data is limited not by the number
* of components but by the number of binding table entries we set aside. We
* use one binding table entry for a float, one entry for a vector, and one
* entry per matrix column. Since the only way we can communicate our
@ -868,7 +868,7 @@ struct brw_wm_prog_data {
uint64_t inputs;
/* Mapping of VUE slots to interpolation modes.
* Used by the Gen4-5 clip/sf/wm stages.
* Used by the Gfx4-5 clip/sf/wm stages.
*/
unsigned char interp_mode[65]; /* BRW_VARYING_SLOT_COUNT */
@ -1305,25 +1305,25 @@ struct brw_gs_prog_data
int invocations;
/**
* Gen6: Provoking vertex convention for odd-numbered triangles
* Gfx6: Provoking vertex convention for odd-numbered triangles
* in tristrips.
*/
GLuint pv_first:1;
/**
* Gen6: Number of varyings that are output to transform feedback.
* Gfx6: Number of varyings that are output to transform feedback.
*/
GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
/**
* Gen6: Map from the index of a transform feedback binding table entry to the
* Gfx6: Map from the index of a transform feedback binding table entry to the
* gl_varying_slot that should be streamed out through that binding table
* entry.
*/
unsigned char transform_feedback_bindings[64 /* BRW_MAX_SOL_BINDINGS */];
/**
* Gen6: Map from the index of a transform feedback binding table entry to the
* Gfx6: Map from the index of a transform feedback binding table entry to the
* swizzles that should be used when streaming out data through that
* binding table entry.
*/
@ -1661,9 +1661,9 @@ encode_slm_size(unsigned gen, uint32_t bytes)
*
* Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
* -------------------------------------------------------------------
* Gen7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
* Gfx7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
* -------------------------------------------------------------------
* Gen9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
* Gfx9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
*/
if (bytes > 0) {
@ -1676,7 +1676,7 @@ encode_slm_size(unsigned gen, uint32_t bytes)
slm_size = ffs(slm_size) - 10;
} else {
assert(slm_size >= 4096);
/* Convert to the pre-Gen9 representation. */
/* Convert to the pre-Gfx9 representation. */
slm_size = slm_size / 4096;
}
}

View File

@ -288,7 +288,7 @@ static const char *const end_of_thread[2] = {
[1] = "EOT"
};
/* SFIDs on Gen4-5 */
/* SFIDs on Gfx4-5 */
static const char *const gfx4_sfid[16] = {
[BRW_SFID_NULL] = "null",
[BRW_SFID_MATH] = "math",
@ -405,7 +405,7 @@ static const char *const m_rt_write_subtype[] = {
[0b010] = "SIMD8/DualSrcLow",
[0b011] = "SIMD8/DualSrcHigh",
[0b100] = "SIMD8",
[0b101] = "SIMD8/ImageWrite", /* Gen6+ */
[0b101] = "SIMD8/ImageWrite", /* Gfx6+ */
[0b111] = "SIMD16/RepData-111", /* no idea how this is different than 1 */
};
@ -545,11 +545,11 @@ static const char *const gfx7_urb_opcode[] = {
[BRW_URB_OPCODE_WRITE_OWORD] = "write OWord",
[BRW_URB_OPCODE_READ_HWORD] = "read HWord",
[BRW_URB_OPCODE_READ_OWORD] = "read OWord",
[GFX7_URB_OPCODE_ATOMIC_MOV] = "atomic mov", /* Gen7+ */
[GFX7_URB_OPCODE_ATOMIC_INC] = "atomic inc", /* Gen7+ */
[GFX8_URB_OPCODE_ATOMIC_ADD] = "atomic add", /* Gen8+ */
[GFX8_URB_OPCODE_SIMD8_WRITE] = "SIMD8 write", /* Gen8+ */
[GFX8_URB_OPCODE_SIMD8_READ] = "SIMD8 read", /* Gen8+ */
[GFX7_URB_OPCODE_ATOMIC_MOV] = "atomic mov", /* Gfx7+ */
[GFX7_URB_OPCODE_ATOMIC_INC] = "atomic inc", /* Gfx7+ */
[GFX8_URB_OPCODE_ATOMIC_ADD] = "atomic add", /* Gfx8+ */
[GFX8_URB_OPCODE_SIMD8_WRITE] = "SIMD8 write", /* Gfx8+ */
[GFX8_URB_OPCODE_SIMD8_READ] = "SIMD8 read", /* Gfx8+ */
/* [9-15] - reserved */
};
@ -1919,7 +1919,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
break;
case GFX6_SFID_DATAPORT_SAMPLER_CACHE:
case GFX6_SFID_DATAPORT_CONSTANT_CACHE:
/* aka BRW_SFID_DATAPORT_READ on Gen4-5 */
/* aka BRW_SFID_DATAPORT_READ on Gfx4-5 */
if (devinfo->ver >= 6) {
format(file, " (%u, %u, %u, %u)",
brw_dp_desc_binding_table_index(devinfo, imm_desc),
@ -1944,7 +1944,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
break;
case GFX6_SFID_DATAPORT_RENDER_CACHE: {
/* aka BRW_SFID_DATAPORT_WRITE on Gen4-5 */
/* aka BRW_SFID_DATAPORT_WRITE on Gfx4-5 */
unsigned msg_type = brw_dp_write_desc_msg_type(devinfo, imm_desc);
err |= control(file, "DP rc message type",

View File

@ -150,7 +150,7 @@ disasm_annotate(struct disasm_info *disasm,
group->block_start = cfg->blocks[disasm->cur_block];
}
/* There is no hardware DO instruction on Gen6+, so since DO always
/* There is no hardware DO instruction on Gfx6+, so since DO always
* starts a basic block, we need to set the .block_start of the next
* instruction's annotation with a pointer to the bblock started by
* the DO.

View File

@ -60,7 +60,7 @@ struct brw_insn_state {
/* One of BRW_MASK_* */
unsigned mask_control:1;
/* Scheduling info for Gen12+ */
/* Scheduling info for Gfx12+ */
struct tgl_swsb swsb;
bool saturate:1;
@ -1241,7 +1241,7 @@ brw_jump_scale(const struct gen_device_info *devinfo)
if (devinfo->ver >= 5)
return 2;
/* Gen4 simply uses the number of 128-bit instructions. */
/* Gfx4 simply uses the number of 128-bit instructions. */
return 1;
}

View File

@ -46,19 +46,19 @@
* A G45-only instruction, NENOP, must be used to provide padding to align
* uncompacted instructions.
*
* Gen5 removes these restrictions and changes jump counts to be in units of
* Gfx5 removes these restrictions and changes jump counts to be in units of
* 8-byte compacted instructions, allowing jump targets to be only 8-byte
* aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
*
* Gen6 adds the ability to compact instructions with a limited range of
* Gfx6 adds the ability to compact instructions with a limited range of
* immediate values. Compactable immediates have 12 unrestricted bits, and a
* 13th bit that's replicated through the high 20 bits, to create the 32-bit
* value of DW3 in the uncompacted instruction word.
*
* On Gen7 we can compact some control flow instructions with a small positive
* On Gfx7 we can compact some control flow instructions with a small positive
* immediate in the low bits of DW3, like ENDIF with the JIP field. Other
* control flow instructions with UIP cannot be compacted, because of the
* replicated 13th bit. No control flow instructions can be compacted on Gen6
* replicated 13th bit. No control flow instructions can be compacted on Gfx6
* since the jump count field is not in DW3.
*
* break JIP/UIP
@ -71,7 +71,7 @@
*
* Gen 8 adds support for compacting 3-src instructions.
*
* Gen12 reduces the number of bits that available to compacted immediates from
* Gfx12 reduces the number of bits that available to compacted immediates from
* 13 to 12, but improves the compaction of floating-point immediates by
* allowing the high bits to be encoded (the sign, 8-bit exponent, and the
* three most significant bits of the mantissa), rather than the lowest bits of
@ -1303,11 +1303,11 @@ has_unmapped_bits(const struct gen_device_info *devinfo, const brw_inst *src)
/* Check for instruction bits that don't map to any of the fields of the
* compacted instruction. The instruction cannot be compacted if any of
* them are set. They overlap with:
* - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
* - Dst.AddrImm[9] (bit 47 on Gen8)
* - Src0.AddrImm[9] (bit 95 on Gen8)
* - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
* - UIP[31] (bit 95 on Gen8)
* - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)
* - Dst.AddrImm[9] (bit 47 on Gfx8)
* - Src0.AddrImm[9] (bit 95 on Gfx8)
* - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)
* - UIP[31] (bit 95 on Gfx8)
*/
if (devinfo->ver >= 12) {
assert(!brw_inst_bits(src, 7, 7));
@ -1603,7 +1603,7 @@ precompact(const struct gen_device_info *devinfo, brw_inst inst)
*
* If we see a 0.0:F, change the type to VF so that it can be compacted.
*
* Compaction of floating-point immediates is improved on Gen12, thus
* Compaction of floating-point immediates is improved on Gfx12, thus
* removing the need for this.
*/
if (devinfo->ver < 12 &&
@ -1618,7 +1618,7 @@ precompact(const struct gen_device_info *devinfo, brw_inst inst)
/* There are no mappings for dst:d | i:d, so if the immediate is suitable
* set the types to :UD so the instruction can be compacted.
*
* FINISHME: Use dst:f | imm:f on Gen12
* FINISHME: Use dst:f | imm:f on Gfx12
*/
if (devinfo->ver < 12 &&
compact_immediate(devinfo, BRW_REGISTER_TYPE_D,
@ -2141,8 +2141,8 @@ update_uip_jip(const struct gen_device_info *devinfo, brw_inst *insn,
int this_old_ip, int *compacted_counts)
{
/* JIP and UIP are in units of:
* - bytes on Gen8+; and
* - compacted instructions on Gen6+.
* - bytes on Gfx8+; and
* - compacted instructions on Gfx6+.
*/
int shift = devinfo->ver >= 8 ? 3 : 0;
@ -2172,7 +2172,7 @@ update_gfx4_jump_count(const struct gen_device_info *devinfo, brw_inst *insn,
/* Jump Count is in units of:
* - uncompacted instructions on G45; and
* - compacted instructions on Gen5.
* - compacted instructions on Gfx5.
*/
int shift = devinfo->is_g4x ? 1 : 0;
@ -2390,7 +2390,7 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
} else if (devinfo->ver == 6) {
assert(!brw_inst_cmpt_control(devinfo, insn));
/* Jump Count is in units of compacted instructions on Gen6. */
/* Jump Count is in units of compacted instructions on Gfx6. */
int jump_count_compacted = brw_inst_gfx6_jump_count(devinfo, insn);
int target_old_ip = this_old_ip + (jump_count_compacted / 2);

View File

@ -177,7 +177,7 @@ enum PACKED gfx10_align1_3src_dst_horizontal_stride {
/** @{
*
* Gen6 has replaced "mask enable/disable" with WECtrl, which is
* Gfx6 has replaced "mask enable/disable" with WECtrl, which is
* effectively the same but much simpler to think about. Now, there
* are two contributors ANDed together to whether channels are
* executed: The predication on the instruction, and the channel write
@ -209,48 +209,48 @@ enum opcode {
BRW_OPCODE_XOR,
BRW_OPCODE_SHR,
BRW_OPCODE_SHL,
BRW_OPCODE_DIM, /**< Gen7.5 only */
BRW_OPCODE_SMOV, /**< Gen8+ */
BRW_OPCODE_DIM, /**< Gfx7.5 only */
BRW_OPCODE_SMOV, /**< Gfx8+ */
BRW_OPCODE_ASR,
BRW_OPCODE_ROR, /**< Gen11+ */
BRW_OPCODE_ROL, /**< Gen11+ */
BRW_OPCODE_ROR, /**< Gfx11+ */
BRW_OPCODE_ROL, /**< Gfx11+ */
BRW_OPCODE_CMP,
BRW_OPCODE_CMPN,
BRW_OPCODE_CSEL, /**< Gen8+ */
BRW_OPCODE_F32TO16, /**< Gen7 only */
BRW_OPCODE_F16TO32, /**< Gen7 only */
BRW_OPCODE_BFREV, /**< Gen7+ */
BRW_OPCODE_BFE, /**< Gen7+ */
BRW_OPCODE_BFI1, /**< Gen7+ */
BRW_OPCODE_BFI2, /**< Gen7+ */
BRW_OPCODE_CSEL, /**< Gfx8+ */
BRW_OPCODE_F32TO16, /**< Gfx7 only */
BRW_OPCODE_F16TO32, /**< Gfx7 only */
BRW_OPCODE_BFREV, /**< Gfx7+ */
BRW_OPCODE_BFE, /**< Gfx7+ */
BRW_OPCODE_BFI1, /**< Gfx7+ */
BRW_OPCODE_BFI2, /**< Gfx7+ */
BRW_OPCODE_JMPI,
BRW_OPCODE_BRD, /**< Gen7+ */
BRW_OPCODE_BRD, /**< Gfx7+ */
BRW_OPCODE_IF,
BRW_OPCODE_IFF, /**< Pre-Gen6 */
BRW_OPCODE_BRC, /**< Gen7+ */
BRW_OPCODE_IFF, /**< Pre-Gfx6 */
BRW_OPCODE_BRC, /**< Gfx7+ */
BRW_OPCODE_ELSE,
BRW_OPCODE_ENDIF,
BRW_OPCODE_DO, /**< Pre-Gen6 */
BRW_OPCODE_CASE, /**< Gen6 only */
BRW_OPCODE_DO, /**< Pre-Gfx6 */
BRW_OPCODE_CASE, /**< Gfx6 only */
BRW_OPCODE_WHILE,
BRW_OPCODE_BREAK,
BRW_OPCODE_CONTINUE,
BRW_OPCODE_HALT,
BRW_OPCODE_CALLA, /**< Gen7.5+ */
BRW_OPCODE_MSAVE, /**< Pre-Gen6 */
BRW_OPCODE_CALL, /**< Gen6+ */
BRW_OPCODE_MREST, /**< Pre-Gen6 */
BRW_OPCODE_RET, /**< Gen6+ */
BRW_OPCODE_PUSH, /**< Pre-Gen6 */
BRW_OPCODE_FORK, /**< Gen6 only */
BRW_OPCODE_GOTO, /**< Gen8+ */
BRW_OPCODE_POP, /**< Pre-Gen6 */
BRW_OPCODE_CALLA, /**< Gfx7.5+ */
BRW_OPCODE_MSAVE, /**< Pre-Gfx6 */
BRW_OPCODE_CALL, /**< Gfx6+ */
BRW_OPCODE_MREST, /**< Pre-Gfx6 */
BRW_OPCODE_RET, /**< Gfx6+ */
BRW_OPCODE_PUSH, /**< Pre-Gfx6 */
BRW_OPCODE_FORK, /**< Gfx6 only */
BRW_OPCODE_GOTO, /**< Gfx8+ */
BRW_OPCODE_POP, /**< Pre-Gfx6 */
BRW_OPCODE_WAIT,
BRW_OPCODE_SEND,
BRW_OPCODE_SENDC,
BRW_OPCODE_SENDS, /**< Gen9+ */
BRW_OPCODE_SENDSC, /**< Gen9+ */
BRW_OPCODE_MATH, /**< Gen6+ */
BRW_OPCODE_SENDS, /**< Gfx9+ */
BRW_OPCODE_SENDSC, /**< Gfx9+ */
BRW_OPCODE_MATH, /**< Gfx6+ */
BRW_OPCODE_ADD,
BRW_OPCODE_MUL,
BRW_OPCODE_AVG,
@ -262,11 +262,11 @@ enum opcode {
BRW_OPCODE_MAC,
BRW_OPCODE_MACH,
BRW_OPCODE_LZD,
BRW_OPCODE_FBH, /**< Gen7+ */
BRW_OPCODE_FBL, /**< Gen7+ */
BRW_OPCODE_CBIT, /**< Gen7+ */
BRW_OPCODE_ADDC, /**< Gen7+ */
BRW_OPCODE_SUBB, /**< Gen7+ */
BRW_OPCODE_FBH, /**< Gfx7+ */
BRW_OPCODE_FBL, /**< Gfx7+ */
BRW_OPCODE_CBIT, /**< Gfx7+ */
BRW_OPCODE_ADDC, /**< Gfx7+ */
BRW_OPCODE_SUBB, /**< Gfx7+ */
BRW_OPCODE_SAD2,
BRW_OPCODE_SADA2,
BRW_OPCODE_DP4,
@ -275,9 +275,9 @@ enum opcode {
BRW_OPCODE_DP2,
BRW_OPCODE_LINE,
BRW_OPCODE_PLN, /**< G45+ */
BRW_OPCODE_MAD, /**< Gen6+ */
BRW_OPCODE_LRP, /**< Gen6+ */
BRW_OPCODE_MADM, /**< Gen8+ */
BRW_OPCODE_MAD, /**< Gfx6+ */
BRW_OPCODE_LRP, /**< Gfx6+ */
BRW_OPCODE_MADM, /**< Gfx8+ */
BRW_OPCODE_NENOP, /**< G45 only */
BRW_OPCODE_NOP,
@ -466,7 +466,7 @@ enum opcode {
* Source 1: Immediate bool to indicate whether control is returned to the
* thread only after the fence has been honored.
* Source 2: Immediate byte indicating which memory to fence. Zero means
* global memory; GFX7_BTI_SLM means SLM (for Gen11+ only).
* global memory; GFX7_BTI_SLM means SLM (for Gfx11+ only).
*
* Vec4 backend only uses Source 0.
*/
@ -476,7 +476,7 @@ enum opcode {
* Scheduling-only fence.
*
* Sources can be used to force a stall until the registers in those are
* available. This might generate MOVs or SYNC_NOPs (Gen12+).
* available. This might generate MOVs or SYNC_NOPs (Gfx12+).
*/
FS_OPCODE_SCHEDULING_FENCE,
@ -487,7 +487,7 @@ enum opcode {
SHADER_OPCODE_SCRATCH_HEADER,
/**
* Gen8+ SIMD8 URB Read messages.
* Gfx8+ SIMD8 URB Read messages.
*/
SHADER_OPCODE_URB_READ_SIMD8,
SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT,
@ -1058,7 +1058,7 @@ enum PACKED brw_width {
};
/**
* Gen12+ SWSB SBID synchronization mode.
* Gfx12+ SWSB SBID synchronization mode.
*
* This is represented as a bitmask including any required SBID token
* synchronization modes, used to synchronize out-of-order instructions. Only
@ -1226,7 +1226,7 @@ enum tgl_sync_function {
*/
enum brw_message_target {
BRW_SFID_NULL = 0,
BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */
BRW_SFID_MATH = 1, /* Only valid on Gfx4-5 */
BRW_SFID_SAMPLER = 2,
BRW_SFID_MESSAGE_GATEWAY = 3,
BRW_SFID_DATAPORT_READ = 4,
@ -1492,7 +1492,7 @@ enum brw_message_target {
* Note that because the DRM sets bit 4 of HDC_CHICKEN0 on BDW, CHV and at
* least some pre-production steppings of SKL due to WaForceEnableNonCoherent,
* HDC memory access may have been overridden by the kernel to be non-coherent
* (matching the behavior of the same BTI on pre-Gen8 hardware) and BTI 255
* (matching the behavior of the same BTI on pre-Gfx8 hardware) and BTI 255
* may actually be an alias for BTI 253.
*/
#define GFX8_BTI_STATELESS_IA_COHERENT 255
@ -1587,7 +1587,7 @@ enum brw_message_target {
#define BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE 6
/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
/* Gfx7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
* is 2^9, or 512. It's counted in multiples of 64 bytes.
*
* Identical for VS, DS, and HS.
@ -1597,7 +1597,7 @@ enum brw_message_target {
#define GFX7_MAX_HS_URB_ENTRY_SIZE_BYTES (512*64)
#define GFX7_MAX_VS_URB_ENTRY_SIZE_BYTES (512*64)
/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
/* Gfx6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
* (128 bytes) URB rows and the maximum allowed value is 5 rows.
*/
#define GFX6_MAX_GS_URB_ENTRY_SIZE_BYTES (5*128)

View File

@ -1230,8 +1230,8 @@ brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
const struct gen_device_info *devinfo = p->devinfo;
const bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16;
/* The F32TO16 instruction doesn't support 32-bit destination types in
* Align1 mode, and neither does the Gen8 implementation in terms of a
* converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
* Align1 mode, and neither does the Gfx8 implementation in terms of a
* converting MOV. Gfx7 does zero out the high 16 bits in Align16 mode as
* an undocumented feature.
*/
const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
@ -1520,7 +1520,7 @@ patch_IF_ELSE(struct brw_codegen *p,
* platforms, we convert flow control instructions to conditional ADDs that
* operate on IP (see brw_ENDIF).
*
* However, on Gen6, writing to IP doesn't work in single program flow mode
* However, on Gfx6, writing to IP doesn't work in single program flow mode
* (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
* not be updated by non-flow control instructions."). And on later
* platforms, there is no significant benefit to converting control flow
@ -1648,15 +1648,15 @@ brw_ENDIF(struct brw_codegen *p)
/* In single program flow mode, we can express IF and ELSE instructions
* equivalently as ADD instructions that operate on IP. On platforms prior
* to Gen6, flow control instructions cause an implied thread switch, so
* to Gfx6, flow control instructions cause an implied thread switch, so
* this is a significant savings.
*
* However, on Gen6, writing to IP doesn't work in single program flow mode
* However, on Gfx6, writing to IP doesn't work in single program flow mode
* (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
* not be updated by non-flow control instructions."). And on later
* platforms, there is no significant benefit to converting control flow
* instructions to conditional ADDs. So we only do this trick on Gen4 and
* Gen5.
* instructions to conditional ADDs. So we only do this trick on Gfx4 and
* Gfx5.
*/
if (devinfo->ver < 6 && p->single_program_flow)
emit_endif = false;
@ -1777,7 +1777,7 @@ brw_HALT(struct brw_codegen *p)
insn = next_insn(p, BRW_OPCODE_HALT);
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
if (devinfo->ver < 6) {
/* From the Gen4 PRM:
/* From the Gfx4 PRM:
*
* "IP register must be put (for example, by the assembler) at <dst>
* and <src0> locations.
@ -1975,7 +1975,7 @@ void brw_CMP(struct brw_codegen *p,
* page says:
* "Any CMP instruction with a null destination must use a {switch}."
*
* It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
* It also applies to other Gfx7 platforms (IVB, BYT) even though it isn't
* mentioned on their work-arounds pages.
*/
if (devinfo->ver == 7) {
@ -2090,7 +2090,7 @@ void gfx6_math(struct brw_codegen *p,
(src1.type == BRW_REGISTER_TYPE_HF && devinfo->ver >= 9));
}
/* Source modifiers are ignored for extended math instructions on Gen6. */
/* Source modifiers are ignored for extended math instructions on Gfx6. */
if (devinfo->ver == 6) {
assert(!src0.negate);
assert(!src0.abs);
@ -2788,7 +2788,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
/* ex_desc bits 15:12 don't exist in the instruction encoding prior
* to Gen12, so we may have fallen back to an indirect extended
* to Gfx12, so we may have fallen back to an indirect extended
* descriptor.
*/
brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
@ -2976,7 +2976,7 @@ brw_set_uip_jip(struct brw_codegen *p, int start_offset)
case BRW_OPCODE_BREAK:
assert(block_end_offset != 0);
brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
/* Gen7 UIP points to WHILE; Gen6 points just after it */
/* Gfx7 UIP points to WHILE; Gfx6 points just after it */
brw_inst_set_uip(devinfo, insn,
(brw_find_loop_end(p, offset) - offset +
(devinfo->ver == 6 ? 16 : 0)) / scale);
@ -3057,7 +3057,7 @@ void brw_ff_sync(struct brw_codegen *p,
}
/**
* Emit the SEND instruction necessary to generate stream output data on Gen6
* Emit the SEND instruction necessary to generate stream output data on Gfx6
* (for transform feedback).
*
* If send_commit_msg is true, this is the last piece of stream output data
@ -3298,7 +3298,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
brw_push_insn_state(p);
/* The flag register is only used on Gen7 in align1 mode, so avoid setting
/* The flag register is only used on Gfx7 in align1 mode, so avoid setting
* unnecessary bits in the instruction words, get the information we need
* and reset the default flag register. This allows more instructions to be
* compacted.
@ -3310,7 +3310,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
if (devinfo->ver >= 8) {
/* Getting the first active channel index is easy on Gen8: Just find
/* Getting the first active channel index is easy on Gfx8: Just find
* the first bit set in the execution mask. The register exists on
* HSW already but it reads back as all ones when the current
* instruction has execution masking disabled, so it's kind of
@ -3350,7 +3350,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
* mask in f1.0. We could use a single 32-wide move here if it
* weren't because of the hardware bug that causes channel enables to
* be applied incorrectly to the second half of 32-wide instructions
* on Gen7.
* on Gfx7.
*/
const unsigned lower_size = MIN2(16, exec_size);
for (unsigned i = 0; i < exec_size / lower_size; i++) {
@ -3379,7 +3379,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
mask.file == BRW_IMMEDIATE_VALUE && mask.ud == 0xffffffff) {
/* In SIMD4x2 mode the first active channel index is just the
* negation of the first bit of the mask register. Note that ce0
* doesn't take into account the dispatch mask, so the Gen7 path
* doesn't take into account the dispatch mask, so the Gfx7 path
* should be used instead unless you have the guarantee that the
* dispatch mask is tightly packed (i.e. it has the form '2^n - 1'
* for some n).
@ -3650,7 +3650,7 @@ brw_float_controls_mode(struct brw_codegen *p,
* thread control field to switch for an instruction that uses
* control register as an explicit operand."
*
* On Gen12+ this is implemented in terms of SWSB annotations instead.
* On Gfx12+ this is implemented in terms of SWSB annotations instead.
*/
brw_set_default_swsb(p, tgl_swsb_regdist(1));

View File

@ -292,9 +292,9 @@ invalid_values(const struct gen_device_info *devinfo, const brw_inst *inst)
if (num_sources == 3) {
/* Nothing to test:
* No 3-src instructions on Gen4-5
* No reg file bits on Gen6-10 (align16)
* No invalid encodings on Gen10-12 (align1)
* No 3-src instructions on Gfx4-5
* No reg file bits on Gfx6-10 (align16)
* No invalid encodings on Gfx10-12 (align1)
*/
} else {
if (devinfo->ver > 6) {
@ -1873,7 +1873,7 @@ special_requirements_for_handling_double_precision_data_types(
* If Align16 is required for an operation with QW destination and non-QW
* source datatypes, the execution size cannot exceed 2.
*
* We assume that the restriction applies to all Gen8+ parts.
* We assume that the restriction applies to all Gfx8+ parts.
*/
if (devinfo->ver >= 8) {
enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);

View File

@ -1097,7 +1097,7 @@ fs_inst::flags_read(const gen_device_info *devinfo) const
if (predicate == BRW_PREDICATE_ALIGN1_ANYV ||
predicate == BRW_PREDICATE_ALIGN1_ALLV) {
/* The vertical predication modes combine corresponding bits from
* f0.0 and f1.0 on Gen7+, and f0.0 and f0.1 on older hardware.
* f0.0 and f1.0 on Gfx7+, and f0.0 and f0.1 on older hardware.
*/
const unsigned shift = devinfo->ver >= 7 ? 4 : 2;
return flag_mask(this, 1) << shift | flag_mask(this, 1);
@ -1440,7 +1440,7 @@ fs_visitor::emit_sampleid_setup()
* shr(16) tmp<1>W g1.0<1,8,0>B 0x44440000:V
* and(16) dst<1>D tmp<8,8,1>W 0xf:W
*
* TODO: These payload bits exist on Gen7 too, but they appear to always
* TODO: These payload bits exist on Gfx7 too, but they appear to always
* be zero, so this code fails to work. We should find out why.
*/
const fs_reg tmp = abld.vgrf(BRW_REGISTER_TYPE_UW);
@ -1639,7 +1639,7 @@ fs_visitor::assign_curb_setup()
ubld.MOV(header0, brw_imm_ud(0));
ubld.group(1, 0).SHR(component(header0, 2), base_addr, brw_imm_ud(4));
/* On Gen12-HP we load constants at the start of the program using A32
/* On Gfx12-HP we load constants at the start of the program using A32
* stateless messages.
*/
for (unsigned i = 0; i < uniform_push_length;) {
@ -2805,7 +2805,7 @@ fs_visitor::opt_algebraic()
case BRW_OPCODE_OR:
if (inst->src[0].equals(inst->src[1]) ||
inst->src[1].is_zero()) {
/* On Gen8+, the OR instruction can have a source modifier that
/* On Gfx8+, the OR instruction can have a source modifier that
* performs logical not on the operand. Cases of 'OR r0, ~r1, 0'
* or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV.
*/
@ -2984,7 +2984,7 @@ fs_visitor::opt_algebraic()
bool
fs_visitor::opt_zero_samples()
{
/* Gen4 infers the texturing opcode based on the message length so we can't
/* Gfx4 infers the texturing opcode based on the message length so we can't
* change it.
*/
if (devinfo->ver < 5)
@ -3953,7 +3953,7 @@ fs_visitor::lower_mul_dword_inst(fs_inst *inst, bblock_t *block)
: brw_imm_w(inst->src[1].d));
}
} else {
/* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot
/* Gen < 8 (and some Gfx8+ low-power parts like Cherryview) cannot
* do 32-bit integer multiplication in one instruction, but instead
* must do a sequence (which actually calculates a 64-bit result):
*
@ -4157,11 +4157,11 @@ fs_visitor::lower_mulh_inst(fs_inst *inst, bblock_t *block)
fs_inst *mach = ibld.MACH(inst->dst, inst->src[0], inst->src[1]);
if (devinfo->ver >= 8) {
/* Until Gen8, integer multiplies read 32-bits from one source,
/* Until Gfx8, integer multiplies read 32-bits from one source,
* and 16-bits from the other, and relying on the MACH instruction
* to generate the high bits of the result.
*
* On Gen8, the multiply instruction does a full 32x32-bit
* On Gfx8, the multiply instruction does a full 32x32-bit
* multiply, but in order to do a 64-bit multiply we can simulate
* the previous behavior and then use a MACH instruction.
*/
@ -4179,13 +4179,13 @@ fs_visitor::lower_mulh_inst(fs_inst *inst, bblock_t *block)
* accumulator register is used by the hardware for instructions
* that access the accumulator implicitly (e.g. MACH). A
* second-half instruction would normally map to acc1, which
* doesn't exist on Gen7 and up (the hardware does emulate it for
* doesn't exist on Gfx7 and up (the hardware does emulate it for
* floating-point instructions *only* by taking advantage of the
* extra precision of acc0 not normally used for floating point
* arithmetic).
*
* HSW and up are careful enough not to try to access an
* accumulator register that doesn't exist, but on earlier Gen7
* accumulator register that doesn't exist, but on earlier Gfx7
* hardware we need to make sure that the quarter control bits are
* zero to avoid non-deterministic behaviour and emit an extra MOV
* to get the result masked correctly according to the current
@ -4261,7 +4261,7 @@ fs_visitor::lower_minmax()
inst->predicate == BRW_PREDICATE_NONE) {
/* If src1 is an immediate value that is not NaN, then it can't be
* NaN. In that case, emit CMP because it is much better for cmod
* propagation. Likewise if src1 is not float. Gen4 and Gen5 don't
* propagation. Likewise if src1 is not float. Gfx4 and Gfx5 don't
* support HF or DF, so it is not necessary to check for those.
*/
if (inst->src[1].type != BRW_REGISTER_TYPE_F ||
@ -4526,7 +4526,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
};
ubld.LOAD_PAYLOAD(header, header_sources, 2, 0);
/* Gen12 will require additional fix-ups if we ever hit this path. */
/* Gfx12 will require additional fix-ups if we ever hit this path. */
assert(devinfo->ver < 12);
}
@ -4731,7 +4731,7 @@ lower_fb_read_logical_send(const fs_builder &bld, fs_inst *inst)
ubld.LOAD_PAYLOAD(header, header_sources, ARRAY_SIZE(header_sources), 0);
if (devinfo->ver >= 12) {
/* On Gen12 the Viewport and Render Target Array Index fields (AKA
/* On Gfx12 the Viewport and Render Target Array Index fields (AKA
* Poly 0 Info) are provided in r1.1 instead of r0.0, and the render
* target message header format was updated accordingly -- However
* the updated format only works for the lower 16 channels in a
@ -5113,7 +5113,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
fs_reg sampler_state_ptr =
retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD);
/* Gen11+ sampler message headers include bits in 4:0 which conflict
/* Gfx11+ sampler message headers include bits in 4:0 which conflict
* with the ones included in g0.3 bits 4:0. Mask them out.
*/
if (devinfo->ver >= 11) {
@ -5136,7 +5136,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
ubld1.ADD(component(header, 3), sampler_state_ptr, tmp);
}
} else if (devinfo->ver >= 11) {
/* Gen11+ sampler message headers include bits in 4:0 which conflict
/* Gfx11+ sampler message headers include bits in 4:0 which conflict
* with the ones included in g0.3 bits 4:0. Mask them out.
*/
ubld1.AND(component(header, 3),
@ -5195,7 +5195,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
break;
case SHADER_OPCODE_TXF:
/* Unfortunately, the parameters for LD are intermixed: u, lod, v, r.
* On Gen9 they are u, v, lod, r
* On Gfx9 they are u, v, lod, r
*/
bld.MOV(retype(sources[length++], BRW_REGISTER_TYPE_D), coordinate);
@ -5239,7 +5239,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
length++;
/* On Gen9+ we'll use ld2dms_w instead which has two registers for
/* On Gfx9+ we'll use ld2dms_w instead which has two registers for
* the MCS data.
*/
if (op == SHADER_OPCODE_TXF_CMS_W) {
@ -5574,8 +5574,8 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
*
* Earlier generations have a similar wording. Because of this restriction
* we don't attempt to implement sample masks via predication for such
* messages prior to Gen9, since we have to provide a header anyway. On
* Gen11+ the header has been removed so we can only use predication.
* messages prior to Gfx9, since we have to provide a header anyway. On
* Gfx11+ the header has been removed so we can only use predication.
*
* For all stateless A32 messages, we also need a header
*/
@ -6391,9 +6391,9 @@ fs_visitor::lower_logical_sends()
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
/* The math opcodes are overloaded for the send-like and
* expression-like instructions which seems kind of icky. Gen6+ has
* expression-like instructions which seems kind of icky. Gfx6+ has
* a native (but rather quirky) MATH instruction so we don't need to
* do anything here. On Gen4-5 we'll have to lower the Gen6-like
* do anything here. On Gfx4-5 we'll have to lower the Gfx6-like
* logical instructions (which we can easily recognize because they
* have mlen = 0) into send-like virtual instructions.
*/
@ -6520,7 +6520,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
* integer DWord, the source register is not incremented but the
* source sub register is incremented."
*
* The hardware specs from Gen4 to Gen7.5 mention similar regioning
* The hardware specs from Gfx4 to Gfx7.5 mention similar regioning
* restrictions. The code below intentionally doesn't check whether the
* destination type is integer because empirically the hardware doesn't
* seem to care what the actual type is as long as it's dword-aligned.
@ -6574,7 +6574,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
* execution mask channels are required, split the instruction into two
* SIMD16 instructions."
*
* There is similar text in the HSW PRMs. Gen4-6 don't even implement
* There is similar text in the HSW PRMs. Gfx4-6 don't even implement
* 32-wide control flow support in hardware and will behave similarly.
*/
if (devinfo->ver < 8 && !inst->force_writemask_all)
@ -6597,7 +6597,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
if (inst->is_3src(devinfo) && !devinfo->supports_simd16_3src)
max_width = MIN2(max_width, inst->exec_size / reg_count);
/* Pre-Gen8 EUs are hardwired to use the QtrCtrl+1 (where QtrCtrl is
/* Pre-Gfx8 EUs are hardwired to use the QtrCtrl+1 (where QtrCtrl is
* the 8-bit quarter of the execution mask signals specified in the
* instruction control fields) for the second compressed half of any
* single-precision instruction (for double-precision instructions
@ -6706,7 +6706,7 @@ get_sampler_lowered_simd_width(const struct gen_device_info *devinfo,
inst->opcode != SHADER_OPCODE_TXF_CMS_LOGICAL) ? 4 :
3;
/* On Gen9+ the LOD argument is for free if we're able to use the LZ
/* On Gfx9+ the LOD argument is for free if we're able to use the LZ
* variant of the TXL or TXF message.
*/
const bool implicit_lod = devinfo->ver >= 9 &&
@ -6821,8 +6821,8 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
case SHADER_OPCODE_LOG2:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS: {
/* Unary extended math instructions are limited to SIMD8 on Gen4 and
* Gen6. Extended Math Function is limited to SIMD8 with half-float.
/* Unary extended math instructions are limited to SIMD8 on Gfx4 and
* Gfx6. Extended Math Function is limited to SIMD8 with half-float.
*/
if (devinfo->ver == 6 || (devinfo->ver == 4 && !devinfo->is_g4x))
return MIN2(8, inst->exec_size);
@ -6832,7 +6832,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
}
case SHADER_OPCODE_POW: {
/* SIMD16 is only allowed on Gen7+. Extended Math Function is limited
/* SIMD16 is only allowed on Gfx7+. Extended Math Function is limited
* to SIMD8 with half-float
*/
if (devinfo->ver < 7)
@ -6886,7 +6886,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
* In this context, "DW operations" means "operations acting on 32-bit
* values", so it includes operations on floats.
*
* Gen4 has a similar restriction. From the i965 PRM, section 11.5.3
* Gfx4 has a similar restriction. From the i965 PRM, section 11.5.3
* (Instruction Compression -> Rules and Restrictions):
*
* "A compressed instruction must be in Align1 access mode. Align16
@ -6903,13 +6903,13 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
case SHADER_OPCODE_MULH:
/* MULH is lowered to the MUL/MACH sequence using the accumulator, which
* is 8-wide on Gen7+.
* is 8-wide on Gfx7+.
*/
return (devinfo->ver >= 7 ? 8 :
get_fpu_lowered_simd_width(devinfo, inst));
case FS_OPCODE_FB_WRITE_LOGICAL:
/* Gen6 doesn't support SIMD16 depth writes but we cannot handle them
/* Gfx6 doesn't support SIMD16 depth writes but we cannot handle them
* here.
*/
assert(devinfo->ver != 6 ||
@ -6949,7 +6949,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
case SHADER_OPCODE_TXF_LOGICAL:
case SHADER_OPCODE_TXS_LOGICAL:
/* Gen4 doesn't have SIMD8 variants for the RESINFO and LD-with-LOD
/* Gfx4 doesn't have SIMD8 variants for the RESINFO and LD-with-LOD
* messages. Use SIMD16 instead.
*/
if (devinfo->ver == 4)
@ -7323,7 +7323,7 @@ fs_visitor::lower_simd_width()
/**
* Transform barycentric vectors into the interleaved form expected by the PLN
* instruction and returned by the Gen7+ PI shared function.
* instruction and returned by the Gfx7+ PI shared function.
*
* For channels 0-15 in SIMD16 mode they are expected to be laid out as
* follows in the register file:
@ -8076,7 +8076,7 @@ find_halt_control_flow_region_start(const fs_visitor *v)
}
/**
* Work around the Gen12 hardware bug filed as GEN:BUG:1407528679. EU fusion
* Work around the Gfx12 hardware bug filed as GEN:BUG:1407528679. EU fusion
* can cause a BB to be executed with all channels disabled, which will lead
* to the execution of any NoMask instructions in it, even though any
* execution-masked instructions will be correctly shot down. This may break
@ -9227,7 +9227,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
* generations, the only configurations supporting persample dispatch
* are those in which only one dispatch width is enabled.
*
* The Gen12 hardware spec has a similar dispatch grouping table, but
* The Gfx12 hardware spec has a similar dispatch grouping table, but
* the following conflicting restriction applies (from the page on
* "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader:
*

View File

@ -447,7 +447,7 @@ private:
/**
* Return the flag register used in fragment shaders to keep track of live
* samples. On Gen7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
* samples. On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
* dispatch mode, while earlier generations are constrained to f0.1, which
* limits the dispatch width to SIMD16 for fragment shaders that use discard.
*/

View File

@ -30,7 +30,7 @@
* instructions.
*
* Unfortunately there is close to no information about bank conflicts in the
* hardware spec, but experimentally on Gen7-Gen9 ternary instructions seem to
* hardware spec, but experimentally on Gfx7-Gfx9 ternary instructions seem to
* incur an average bank conflict penalty of one cycle per SIMD8 op whenever
* the second and third source are stored in the same GRF bank (\sa bank_of()
* for the exact bank layout) which cannot be fetched during the same cycle by
@ -568,14 +568,14 @@ namespace {
}
/* Preserve the original allocation of VGRFs used by the barycentric
* source of the LINTERP instruction on Gen6, since pair-aligned
* source of the LINTERP instruction on Gfx6, since pair-aligned
* barycentrics allow the PLN instruction to be used.
*/
if (v->devinfo->has_pln && v->devinfo->ver <= 6 &&
inst->opcode == FS_OPCODE_LINTERP)
constrained[p.atom_of_reg(reg_of(inst->src[0]))] = true;
/* The location of the Gen7 MRF hack registers is hard-coded in the
/* The location of the Gfx7 MRF hack registers is hard-coded in the
* rest of the compiler back-end. Don't attempt to move them around.
*/
if (v->devinfo->ver >= 7) {
@ -620,9 +620,9 @@ namespace {
* assignment of r. \sa delta_conflicts() for a vectorized implementation
* of the expression above.
*
* FINISHME: Teach this about the Gen10+ bank conflict rules, which are
* FINISHME: Teach this about the Gfx10+ bank conflict rules, which are
* somewhat more relaxed than on previous generations. In the
* meantime optimizing based on Gen9 weights is likely to be more
* meantime optimizing based on Gfx9 weights is likely to be more
* helpful than not optimizing at all.
*/
weight_vector_type *

View File

@ -703,7 +703,7 @@ namespace brw {
}
/**
* Gen4 predicated IF.
* Gfx4 predicated IF.
*/
instruction *
IF(brw_predicate predicate) const
@ -849,10 +849,10 @@ namespace brw {
* expanding that result out, but we would need to be careful with
* masking.
*
* Gen6 hardware ignores source modifiers (negate and abs) on math
* Gfx6 hardware ignores source modifiers (negate and abs) on math
* instructions, so we also move to a temp to set those up.
*
* Gen7 relaxes most of the above restrictions, but still can't use IMM
* Gfx7 relaxes most of the above restrictions, but still can't use IMM
* operands to math
*/
if ((shader->devinfo->ver == 6 &&

View File

@ -88,7 +88,7 @@ brw_reg_from_fs_reg(const struct gen_device_info *devinfo, fs_inst *inst,
const unsigned max_hw_width = 16;
/* XXX - The equation above is strictly speaking not correct on
* hardware that supports unbalanced GRF writes -- On Gen9+
* hardware that supports unbalanced GRF writes -- On Gfx9+
* each decompressed chunk of the instruction may have a
* different execution size when the number of components
* written to each destination GRF is not the same.
@ -525,7 +525,7 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
* code, using it saves us 0 instructions and would require quite a bit
* of case-by-case work. It's just not worth it.
*
* Due to a hardware bug some platforms (particularly Gen11+) seem to
* Due to a hardware bug some platforms (particularly Gfx11+) seem to
* require the address components of all channels to be valid whether or
* not they're active, which causes issues if we use VxH addressing
* under non-uniform control-flow. We can easily work around that by
@ -685,7 +685,7 @@ fs_generator::generate_shuffle(fs_inst *inst,
lower_width == dispatch_width;
brw_inst *insn;
/* Due to a hardware bug some platforms (particularly Gen11+) seem
/* Due to a hardware bug some platforms (particularly Gfx11+) seem
* to require the address components of all channels to be valid
* whether or not they're active, which causes issues if we use VxH
* addressing under non-uniform control-flow. We can easily work
@ -1409,7 +1409,7 @@ fs_generator::generate_ddy(const fs_inst *inst,
* DWord elements ONLY. This is applicable when both source and
* destination are half-floats."
*
* So for half-float operations we use the Gen11+ Align1 path. CHV
* So for half-float operations we use the Gfx11+ Align1 path. CHV
* inherits its FP16 hardware from SKL, so it is not affected.
*/
if (devinfo->ver >= 11 ||

View File

@ -206,7 +206,7 @@ emit_system_values_block(nir_block *block, fs_visitor *v)
const fs_builder abld =
v->bld.annotate("gl_HelperInvocation", NULL);
/* On Gen6+ (gl_HelperInvocation is only exposed on Gen7+) the
/* On Gfx6+ (gl_HelperInvocation is only exposed on Gfx7+) the
* pixel mask is in g1.7 of the thread payload.
*
* We move the per-channel pixel enable bit to the low bit of each
@ -232,7 +232,7 @@ emit_system_values_block(nir_block *block, fs_visitor *v)
* that is the opposite of gl_HelperInvocation so we need to invert
* the mask.
*
* The negate source-modifier bit of logical instructions on Gen8+
* The negate source-modifier bit of logical instructions on Gfx8+
* performs 1's complement negation, so we can use that instead of
* a NOT instruction.
*/
@ -1251,7 +1251,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
assert(nir_dest_bit_size(instr->dest.dest) == 32);
/* Before Gen7, the order of the 32-bit source and the 16-bit source was
/* Before Gfx7, the order of the 32-bit source and the 16-bit source was
* swapped. The extension isn't enabled on those platforms, so don't
* pretend to support the differences.
*/
@ -4321,7 +4321,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
slm_fence && workgroup_size() <= dispatch_width)
slm_fence = false;
/* Prior to Gen11, there's only L3 fence, so emit that instead. */
/* Prior to Gfx11, there's only L3 fence, so emit that instead. */
if (slm_fence && devinfo->ver < 11) {
slm_fence = false;
l3_fence = true;
@ -4333,7 +4333,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
const bool needs_render_fence =
devinfo->ver == 7 && !devinfo->is_haswell;
/* Be conservative in Gen11+ and always stall in a fence. Since there
/* Be conservative in Gfx11+ and always stall in a fence. Since there
* are two different fences, and shader might want to synchronize
* between them.
*
@ -6044,7 +6044,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
nir_dest[0] = offset(dst, bld, 3);
} else if (instr->op == nir_texop_txs &&
dest_size >= 3 && devinfo->ver < 7) {
/* Gen4-6 return 0 instead of 1 for single layer surfaces. */
/* Gfx4-6 return 0 instead of 1 for single layer surfaces. */
fs_reg depth = offset(dst, bld, 2);
nir_dest[2] = vgrf(glsl_type::int_type);
bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE);

View File

@ -576,7 +576,7 @@ namespace {
unsigned
spill_max_size(const backend_shader *s)
{
/* FINISHME - On Gen7+ it should be possible to avoid this limit
/* FINISHME - On Gfx7+ it should be possible to avoid this limit
* altogether by spilling directly from the temporary GRF
* allocated to hold the result of the instruction (and the
* scratch write header).
@ -594,7 +594,7 @@ namespace {
unsigned
spill_base_mrf(const backend_shader *s)
{
/* We don't use the MRF hack on Gen9+ */
/* We don't use the MRF hack on Gfx9+ */
assert(s->devinfo->ver < 9);
return BRW_MAX_MRF(s->devinfo->ver) - spill_max_size(s) - 1;
}
@ -699,7 +699,7 @@ fs_reg_alloc::setup_inst_interference(const fs_inst *inst)
grf127_send_hack_node);
/* Spilling instruction are genereated as SEND messages from MRF but as
* Gen7+ supports sending from GRF the driver will maps assingn these
* Gfx7+ supports sending from GRF the driver will maps assingn these
* MRF registers to a GRF. Implementations reuses the dest of the send
* message as source. So as we will have an overlap for sure, we create
* an interference between destination and grf127.
@ -844,7 +844,7 @@ fs_reg_alloc::build_interference_graph(bool allow_spilling)
compiler->fs_reg_sets[rsi].classes[size - 1]);
}
/* Special case: on pre-Gen7 hardware that supports PLN, the second operand
/* Special case: on pre-Gfx7 hardware that supports PLN, the second operand
* of a PLN instruction needs to be an even-numbered register, so we have a
* special register class aligned_bary_class to handle this case.
*/
@ -914,9 +914,9 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, fs_reg dst,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
} else if (devinfo->ver >= 7 && spill_offset < (1 << 12) * REG_SIZE) {
/* The Gen7 descriptor-based offset is 12 bits of HWORD units.
* Because the Gen7-style scratch block read is hardwired to BTI 255,
* on Gen9+ it would cause the DC to do an IA-coherent read, what
/* The Gfx7 descriptor-based offset is 12 bits of HWORD units.
* Because the Gfx7-style scratch block read is hardwired to BTI 255,
* on Gfx9+ it would cause the DC to do an IA-coherent read, what
* largely outweighs the slight advantage from not having to provide
* the address as part of the message header, so we're better off
* using plain old oword block reads.

View File

@ -23,7 +23,7 @@
/** @file brw_fs_scoreboard.cpp
*
* Gen12+ hardware lacks the register scoreboard logic that used to guarantee
* Gfx12+ hardware lacks the register scoreboard logic that used to guarantee
* data coherency between register reads and writes in previous generations.
* This lowering pass runs after register allocation in order to make up for
* it.

View File

@ -60,7 +60,7 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
}
/**
* Apply workarounds for Gen6 gather with UINT/SINT
* Apply workarounds for Gfx6 gather with UINT/SINT
*/
void
fs_visitor::emit_gfx6_gather_wa(uint8_t wa, fs_reg dst)
@ -115,7 +115,7 @@ fs_visitor::emit_dummy_fs()
write->mlen = 2 + 4 * reg_width;
}
/* Tell the SF we don't have any inputs. Gen4-5 require at least one
/* Tell the SF we don't have any inputs. Gfx4-5 require at least one
* varying to avoid GPU hangs, so set that.
*/
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data);
@ -131,7 +131,7 @@ fs_visitor::emit_dummy_fs()
stage_prog_data->dispatch_grf_start_reg = 2;
wm_prog_data->dispatch_grf_start_reg_16 = 2;
wm_prog_data->dispatch_grf_start_reg_32 = 2;
grf_used = 1; /* Gen4-5 don't allow zero GRF blocks */
grf_used = 1; /* Gfx4-5 don't allow zero GRF blocks */
calculate_cfg();
}

View File

@ -76,7 +76,7 @@ brw_inst_##name(const struct gen_device_info *devinfo, \
}
/* A simple macro for fields which stay in the same place on all generations,
* except for Gen12!
* except for Gfx12!
*/
#define F(name, hi4, lo4, hi12, lo12) FC(name, hi4, lo4, hi12, lo12, true)
@ -122,7 +122,7 @@ brw_inst_##name(const struct gen_device_info *devinfo, const brw_inst *inst) \
return brw_inst_bits(inst, high, low); \
}
/* A macro for fields which moved as of Gen8+. */
/* A macro for fields which moved as of Gfx8+. */
#define F8(name, gfx4_high, gfx4_low, gfx8_high, gfx8_low, \
gfx12_high, gfx12_low) \
FF(name, \
@ -134,7 +134,7 @@ FF(name, \
/* 8: */ gfx8_high, gfx8_low, \
/* 12: */ gfx12_high, gfx12_low);
/* Macro for fields that gained extra discontiguous MSBs in Gen12 (specified
/* Macro for fields that gained extra discontiguous MSBs in Gfx12 (specified
* by hi12ex-lo12ex).
*/
#define FFDC(name, hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
@ -176,7 +176,7 @@ brw_inst_##name(const struct gen_device_info *devinfo, const brw_inst *inst) \
FFDC(name, hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
hi7, lo7, hi8, lo8, hi12ex, lo12ex, hi12, lo12, true)
/* Macro for fields that didn't move across generations until Gen12, and then
/* Macro for fields that didn't move across generations until Gfx12, and then
* gained extra discontiguous bits.
*/
#define FDC(name, hi4, lo4, hi12ex, lo12ex, hi12, lo12, assertions) \
@ -184,7 +184,7 @@ brw_inst_##name(const struct gen_device_info *devinfo, const brw_inst *inst) \
hi4, lo4, hi4, lo4, hi12ex, lo12ex, hi12, lo12, assertions)
/* Macro for the 2-bit register file field, which on Gen12+ is stored as the
/* Macro for the 2-bit register file field, which on Gfx12+ is stored as the
* variable length combination of an IsImm (hi12) bit and an additional file
* (lo12) bit.
*/
@ -217,7 +217,7 @@ brw_inst_##name(const struct gen_device_info *devinfo, const brw_inst *inst) \
} \
}
/* Macro for fields that become a constant in Gen12+ not actually represented
/* Macro for fields that become a constant in Gfx12+ not actually represented
* in the instruction.
*/
#define FK(name, hi4, lo4, const12) \
@ -834,7 +834,7 @@ FF(sfid,
FF(null_rt,
/* 4-7: */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 8: */ 80, 80,
/* 12: */ 44, 44) /* actually only Gen11+ */
/* 12: */ 44, 44) /* actually only Gfx11+ */
FC(base_mrf, /* 4+ */ 27, 24, /* 12+ */ -1, -1, devinfo->ver < 6);
FF(send_rta_index,
/* 4: */ -1, -1,
@ -886,7 +886,7 @@ FF(urb_opcode,
/** @} */
/**
* Gen4-5 math messages:
* Gfx4-5 math messages:
* @{
*/
FC(math_msg_data_type, /* 4+ */ MD(7), MD(7), /* 12+ */ -1, -1, devinfo->ver < 6)
@ -933,7 +933,7 @@ F(binding_table_index, /* 4+ */ MD(7), MD(0), /* 12+ */ MD12(7), MD12(0)) /*
*/
FC(dp_category, /* 4+ */ MD(18), MD(18), /* 12+ */ MD12(18), MD12(18), devinfo->ver >= 7)
/* Gen4-5 store fields in different bits for read/write messages. */
/* Gfx4-5 store fields in different bits for read/write messages. */
FF(dp_read_msg_type,
/* 4: */ MD(13), MD(12),
/* 4.5: */ MD(13), MD(11),
@ -976,7 +976,7 @@ FF(dp_write_commit,
/* 7+: does not exist */ -1, -1, -1, -1,
/* 12: */ -1, -1)
/* Gen6+ use the same bit locations for everything. */
/* Gfx6+ use the same bit locations for everything. */
FF(dp_msg_type,
/* 4-5: use dp_read_msg_type or dp_write_msg_type instead */
-1, -1, -1, -1, -1, -1,
@ -994,7 +994,7 @@ FD(dp_msg_control,
/** @} */
/**
* Scratch message bits (Gen7+):
* Scratch message bits (Gfx7+):
* @{
*/
FC(scratch_read_write, /* 4+ */ MD(17), MD(17), /* 12+ */ MD12(17), MD12(17), devinfo->ver >= 7) /* 0 = read, 1 = write */
@ -1189,7 +1189,7 @@ REG_TYPE(src1)
#undef REG_TYPE
/* The AddrImm fields are split into two discontiguous sections on Gen8+ */
/* The AddrImm fields are split into two discontiguous sections on Gfx8+ */
#define BRW_IA1_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low, \
g12_high, g12_low) \
static inline void \
@ -1222,7 +1222,7 @@ brw_inst_##reg##_ia1_addr_imm(const struct gen_device_info *devinfo, \
}
/* AddrImm[9:0] for Align1 Indirect Addressing */
/* -Gen 4- ----Gen8---- -Gen12- */
/* -Gen 4- ----Gfx8---- -Gfx12- */
BRW_IA1_ADDR_IMM(src1, 105, 96, 121, 104, 96, 107, 98)
BRW_IA1_ADDR_IMM(src0, 73, 64, 95, 72, 64, 75, 66)
BRW_IA1_ADDR_IMM(dst, 57, 48, 47, 56, 48, 59, 50)
@ -1257,7 +1257,7 @@ brw_inst_##reg##_ia16_addr_imm(const struct gen_device_info *devinfo, \
/* AddrImm[9:0] for Align16 Indirect Addressing:
* Compared to Align1, these are missing the low 4 bits.
* -Gen 4- ----Gen8----
* -Gen 4- ----Gfx8----
*/
BRW_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100)
BRW_IA16_ADDR_IMM(src0, 73, 64, 95, 72, 68)
@ -1377,7 +1377,7 @@ brw_compact_inst_##name(const struct gen_device_info *devinfo, \
}
/* A simple macro for fields which stay in the same place on all generations
* except for Gen12.
* except for Gfx12.
*/
#define F(name, high, low, gfx12_high, gfx12_low) \
FC(name, high, low, gfx12_high, gfx12_low, true)
@ -1412,7 +1412,7 @@ brw_compact_inst_imm(const struct gen_device_info *devinfo,
}
/**
* (Gen8+) Compacted three-source instructions:
* (Gfx8+) Compacted three-source instructions:
* @{
*/
FC(3src_src2_reg_nr, /* 4+ */ 63, 57, /* 12+ */ 55, 48, devinfo->ver >= 8)

View File

@ -38,7 +38,7 @@ namespace {
unit_fe,
/** EU FPU0 (Note that co-issue to FPU1 is currently not modeled here). */
unit_fpu,
/** Extended Math unit (AKA FPU1 on Gen8-11, part of the EU on Gen6+). */
/** Extended Math unit (AKA FPU1 on Gfx8-11, part of the EU on Gfx6+). */
unit_em,
/** Sampler shared function. */
unit_sampler,
@ -71,7 +71,7 @@ namespace {
enum dependency_id {
/* Register part of the GRF. */
dependency_id_grf0 = 0,
/* Register part of the MRF. Only used on Gen4-6. */
/* Register part of the MRF. Only used on Gfx4-6. */
dependency_id_mrf0 = dependency_id_grf0 + BRW_MAX_GRF,
/* Address register part of the ARF. */
dependency_id_addr0 = dependency_id_mrf0 + 24,
@ -79,9 +79,9 @@ namespace {
dependency_id_accum0 = dependency_id_addr0 + 1,
/* Flag register part of the ARF. */
dependency_id_flag0 = dependency_id_accum0 + 12,
/* SBID token write completion. Only used on Gen12+. */
/* SBID token write completion. Only used on Gfx12+. */
dependency_id_sbid_wr0 = dependency_id_flag0 + 8,
/* SBID token read completion. Only used on Gen12+. */
/* SBID token read completion. Only used on Gfx12+. */
dependency_id_sbid_rd0 = dependency_id_sbid_wr0 + 16,
/* Number of computation dependencies currently tracked. */
num_dependency_ids = dependency_id_sbid_rd0 + 16
@ -280,7 +280,7 @@ namespace {
* Most timing parameters are obtained from the multivariate linear
* regression of a sample of empirical timings measured using the tm0
* register (as can be done today by using the shader_time debugging
* option). The Gen4-5 math timings are obtained from BSpec Volume 5c.3
* option). The Gfx4-5 math timings are obtained from BSpec Volume 5c.3
* "Shared Functions - Extended Math", Section 3.2 "Performance".
* Parameters marked XXX shall be considered low-quality, they're possibly
* high variance or completely guessed in cases where experimental data was
@ -1229,7 +1229,7 @@ namespace {
/**
* Return the dependency ID corresponding to the SBID read completion
* condition of a Gen12+ SWSB.
* condition of a Gfx12+ SWSB.
*/
dependency_id
tgl_swsb_rd_dependency_id(tgl_swsb swsb)
@ -1244,7 +1244,7 @@ namespace {
/**
* Return the dependency ID corresponding to the SBID write completion
* condition of a Gen12+ SWSB.
* condition of a Gfx12+ SWSB.
*/
dependency_id
tgl_swsb_wr_dependency_id(tgl_swsb swsb)
@ -1531,7 +1531,7 @@ namespace {
* weights used elsewhere in the compiler back-end.
*
* Note that we provide slightly more pessimistic weights on
* Gen12+ for SIMD32, since the effective warp size on that
* Gfx12+ for SIMD32, since the effective warp size on that
* platform is 2x the SIMD width due to EU fusion, which increases
* the likelihood of divergent control flow in comparison to
* previous generations, giving narrower SIMD modes a performance

View File

@ -614,7 +614,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
*
* Passing 1 to the peephole select pass causes it to convert
* if-statements that contain at most a single ALU instruction (total)
* in both branches. Before Gen6, some math instructions were
* in both branches. Before Gfx6, some math instructions were
* prohibitively expensive and the results of compare operations need an
* extra resolve step. For these reasons, this pass is more harmful
* than good on those platforms.

View File

@ -27,7 +27,7 @@
/**
* GEN:BUG:1806565034:
*
* Gen12+ allows to set RENDER_SURFACE_STATE::SurfaceArray to 1 only if
* Gfx12+ allows to set RENDER_SURFACE_STATE::SurfaceArray to 1 only if
* array_len > 1. Setting RENDER_SURFACE_STATE::SurfaceArray to 0 results in
* the HW RESINFO message to report an array size of 0 which breaks texture
* array size queries.

View File

@ -137,7 +137,7 @@ image_address(nir_builder *b, const struct gen_device_info *devinfo,
* by passing in the miplevel as tile.z for 3-D textures and 0 in
* tile.z for 2-D array textures.
*
* See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
* See Volume 1 Part 1 of the Gfx7 PRM, sections 6.18.4.7 "Surface
* Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
* of the hardware 3D texture and 2D array layouts.
*/
@ -410,7 +410,7 @@ lower_image_load_instr(nir_builder *b,
nir_ssa_def *do_load = image_coord_is_in_bounds(b, deref, coord);
if (devinfo->ver == 7 && !devinfo->is_haswell) {
/* Check whether the first stride component (i.e. the Bpp value)
* is greater than four, what on Gen7 indicates that a surface of
* is greater than four, what on Gfx7 indicates that a surface of
* type RAW has been bound for untyped access. Reading or writing
* to a surface of type other than RAW using untyped surface
* messages causes a hang on IVB and VLV.
@ -558,7 +558,7 @@ lower_image_store_instr(nir_builder *b,
nir_ssa_def *do_store = image_coord_is_in_bounds(b, deref, coord);
if (devinfo->ver == 7 && !devinfo->is_haswell) {
/* Check whether the first stride component (i.e. the Bpp value)
* is greater than four, what on Gen7 indicates that a surface of
* is greater than four, what on Gfx7 indicates that a surface of
* type RAW has been bound for untyped access. Reading or writing
* to a surface of type other than RAW using untyped surface
* messages causes a hang on IVB and VLV.

View File

@ -25,7 +25,7 @@
#include "brw_nir.h"
/**
* Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8).
* Implements the WaPreventHSTessLevelsInterference workaround (for Gfx7-8).
*
* From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
* definition of the patch header layouts):

View File

@ -917,8 +917,8 @@ brw_flag_subreg(unsigned subreg)
}
/**
* Return the mask register present in Gen4-5, or the related register present
* in Gen7.5 and later hardware referred to as "channel enable" register in
* Return the mask register present in Gfx4-5, or the related register present
* in Gfx7.5 and later hardware referred to as "channel enable" register in
* the documentation.
*/
static inline struct brw_reg

View File

@ -929,8 +929,8 @@ schedule_node::schedule_node(backend_instruction *inst,
this->delay = 0;
this->exit = NULL;
/* We can't measure Gen6 timings directly but expect them to be much
* closer to Gen7 than Gen4.
/* We can't measure Gfx6 timings directly but expect them to be much
* closer to Gfx7 than Gfx4.
*/
if (!sched->post_reg_alloc)
this->latency = 1;
@ -1783,7 +1783,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
}
cand_generation++;
/* Shared resource: the mathbox. There's one mathbox per EU on Gen6+
/* Shared resource: the mathbox. There's one mathbox per EU on Gfx6+
* but it's more limited pre-gfx6, so if we send something off to it then
* the next math instruction isn't going to make progress until the first
* is done.

View File

@ -165,13 +165,13 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
{
switch (op) {
case 0 ... NUM_BRW_OPCODES - 1:
/* The DO instruction doesn't exist on Gen6+, but we use it to mark the
/* The DO instruction doesn't exist on Gfx6+, but we use it to mark the
* start of a loop in the IR.
*/
if (devinfo->ver >= 6 && op == BRW_OPCODE_DO)
return "do";
/* The following conversion opcodes doesn't exist on Gen8+, but we use
/* The following conversion opcodes doesn't exist on Gfx8+, but we use
* then to mark that we want to do the conversion.
*/
if (devinfo->ver > 7 && op == BRW_OPCODE_F32TO16)

View File

@ -293,7 +293,7 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo)
case SHADER_OPCODE_MOV_INDIRECT:
return false;
default:
/* The MATH instruction on Gen6 only executes in align1 mode, which does
/* The MATH instruction on Gfx6 only executes in align1 mode, which does
* not support writemasking.
*/
if (devinfo->ver == 6 && is_math())
@ -1129,7 +1129,7 @@ vec4_instruction::can_reswizzle(const struct gen_device_info *devinfo,
int swizzle,
int swizzle_mask)
{
/* Gen6 MATH instructions can not execute in align16 mode, so swizzles
/* Gfx6 MATH instructions can not execute in align16 mode, so swizzles
* are not allowed.
*/
if (devinfo->ver == 6 && is_math() && swizzle != BRW_SWIZZLE_XYZW)
@ -1871,7 +1871,7 @@ vec4_visitor::lower_minmax()
inst->predicate == BRW_PREDICATE_NONE) {
/* If src1 is an immediate value that is not NaN, then it can't be
* NaN. In that case, emit CMP because it is much better for cmod
* propagation. Likewise if src1 is not float. Gen4 and Gen5 don't
* propagation. Likewise if src1 is not float. Gfx4 and Gfx5 don't
* support HF or DF, so it is not necessary to check for those.
*/
if (inst->src[1].type != BRW_REGISTER_TYPE_F ||
@ -2380,7 +2380,7 @@ scalarize_predicate(brw_predicate predicate, unsigned writemask)
}
}
/* Gen7 has a hardware decompression bug that we can exploit to represent
/* Gfx7 has a hardware decompression bug that we can exploit to represent
* handful of additional swizzles natively.
*/
static bool

View File

@ -495,7 +495,7 @@ namespace brw {
}
/**
* Gen4 predicated IF.
* Gfx4 predicated IF.
*/
instruction *
IF(brw_predicate predicate) const
@ -504,7 +504,7 @@ namespace brw {
}
/**
* Gen6 IF with embedded comparison.
* Gfx6 IF with embedded comparison.
*/
instruction *
IF(const src_reg &src0, const src_reg &src1,

View File

@ -129,7 +129,7 @@ generate_tex(struct brw_codegen *p,
break;
case SHADER_OPCODE_TXD:
if (inst->shadow_compare) {
/* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */
/* Gfx7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */
assert(devinfo->is_haswell);
msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
} else {
@ -1069,7 +1069,7 @@ generate_tcs_create_barrier_header(struct brw_codegen *p,
/* Zero the message header */
brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
/* Copy "Barrier ID" from r0.2, bits 16:13 (Gen7.5+) or 15:12 (Gen7) */
/* Copy "Barrier ID" from r0.2, bits 16:13 (Gfx7.5+) or 15:12 (Gfx7) */
brw_AND(p, m0_2,
retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(ivb ? INTEL_MASK(15, 12) : INTEL_MASK(16, 13)));

View File

@ -604,7 +604,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
/* The GLSL linker will have already matched up GS inputs and the outputs
* of prior stages. The driver does extend VS outputs in some cases, but
* only for legacy OpenGL or Gen4-5 hardware, neither of which offer
* only for legacy OpenGL or Gfx4-5 hardware, neither of which offer
* geometry shader support. So we can safely ignore that.
*
* For SSO pipelines, we use a fixed VUE map layout based on variable
@ -915,7 +915,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
* the best choice for performance, followed by SINGLE mode."
*
* So SINGLE mode is more performant when invocations == 1 and DUAL_INSTANCE
* mode is more performant when invocations > 1. Gen6 only supports
* mode is more performant when invocations > 1. Gfx6 only supports
* SINGLE mode.
*/
if (prog_data->invocations <= 1 || compiler->devinfo->ver < 7)

View File

@ -189,7 +189,7 @@ ALU2_ACC(SUBB)
ALU2(MAC)
ALU1(DIM)
/** Gen4 predicated IF. */
/** Gfx4 predicated IF. */
vec4_instruction *
vec4_visitor::IF(enum brw_predicate predicate)
{
@ -201,7 +201,7 @@ vec4_visitor::IF(enum brw_predicate predicate)
return inst;
}
/** Gen6 IF with embedded comparison. */
/** Gfx6 IF with embedded comparison. */
vec4_instruction *
vec4_visitor::IF(src_reg src0, src_reg src1,
enum brw_conditional_mod condition)
@ -340,7 +340,7 @@ vec4_visitor::emit_math(enum opcode opcode,
emit(opcode, dst, fix_math_operand(src0), fix_math_operand(src1));
if (devinfo->ver == 6 && dst.writemask != WRITEMASK_XYZW) {
/* MATH on Gen6 must be align1, so we can't do writemasks. */
/* MATH on Gfx6 must be align1, so we can't do writemasks. */
math->dst = dst_reg(this, glsl_type::vec4_type);
math->dst.type = dst.type;
math = emit(MOV(dst, src_reg(math->dst)));
@ -872,7 +872,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
inst->offset = constant_offset;
/* The message header is necessary for:
* - Gen4 (always)
* - Gfx4 (always)
* - Texel offsets
* - Gather channel selection
* - Sampler indices too large to fit in a 4-bit value.
@ -1000,7 +1000,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
* spec requires layers.
*/
if (op == ir_txs && devinfo->ver < 7) {
/* Gen4-6 return 0 instead of 1 for single layer surfaces. */
/* Gfx4-6 return 0 instead of 1 for single layer surfaces. */
emit_minmax(BRW_CONDITIONAL_GE, writemask(inst->dst, WRITEMASK_Z),
src_reg(inst->dst), brw_imm_d(1));
}
@ -1019,7 +1019,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
}
/**
* Apply workarounds for Gen6 gather with UINT/SINT
* Apply workarounds for Gfx6 gather with UINT/SINT
*/
void
vec4_visitor::emit_gfx6_gather_wa(uint8_t wa, dst_reg dst)

View File

@ -119,7 +119,7 @@ brw_compute_vue_map(const struct gen_device_info *devinfo,
* dword 8-11 is the first vertex data.
*
* On Ironlake the VUE header is nominally 20 dwords, but the hardware
* will accept the same header layout as Gen4 [and should be a bit faster]
* will accept the same header layout as Gfx4 [and should be a bit faster]
*/
assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC, slot++);

View File

@ -26,7 +26,7 @@
/**
* \file gfx6_gs_visitor.cpp
*
* Gen6 geometry shader implementation
* Gfx6 geometry shader implementation
*/
#include "gfx6_gs_visitor.h"
@ -39,7 +39,7 @@ gfx6_gs_visitor::emit_prolog()
{
vec4_gs_visitor::emit_prolog();
/* Gen6 geometry shaders require to allocate an initial VUE handle via
/* Gfx6 geometry shaders require to allocate an initial VUE handle via
* FF_SYNC message, however the documentation remarks that only one thread
* can write to the URB simultaneously and the FF_SYNC message provides the
* synchronization mechanism for this, so using this message effectively
@ -584,7 +584,7 @@ gfx6_gs_visitor::xfb_write()
num_verts = 3;
break;
default:
unreachable("Unexpected primitive type in Gen6 SOL program.");
unreachable("Unexpected primitive type in Gfx6 SOL program.");
}
this->current_annotation = "gfx6 thread end: svb writes init";

View File

@ -294,7 +294,7 @@ run_tests(const struct gen_device_info *devinfo)
continue;
for (int align_16 = 0; align_16 <= 1; align_16++) {
/* Align16 support is not present on Gen11+ */
/* Align16 support is not present on Gfx11+ */
if (devinfo->ver >= 11 && align_16)
continue;

View File

@ -167,7 +167,7 @@ TEST_P(validation_test, math_src1_null_reg)
gfx6_math(p, g0, BRW_MATH_FUNCTION_POW, g0, null);
EXPECT_FALSE(validate(p));
} else {
/* Math instructions on Gen4/5 are actually SEND messages with payloads.
/* Math instructions on Gfx4/5 are actually SEND messages with payloads.
* src1 is an immediate message descriptor set by gfx4_math.
*/
}
@ -178,7 +178,7 @@ TEST_P(validation_test, opcode46)
/* opcode 46 is "push" on Gen 4 and 5
* "fork" on Gen 6
* reserved on Gen 7
* "goto" on Gen8+
* "goto" on Gfx8+
*/
brw_next_insn(p, brw_opcode_decode(&devinfo, 46));
@ -231,7 +231,7 @@ TEST_P(validation_test, invalid_exec_size_encoding)
TEST_P(validation_test, invalid_file_encoding)
{
/* Register file on Gen12 is only one bit */
/* Register file on Gfx12 is only one bit */
if (devinfo.ver >= 12)
return;
@ -373,7 +373,7 @@ TEST_P(validation_test, invalid_type_encoding)
TEST_P(validation_test, invalid_type_encoding_3src_a16)
{
/* 3-src instructions in align16 mode only supported on Gen6-10 */
/* 3-src instructions in align16 mode only supported on Gfx6-10 */
if (devinfo.ver < 6 || devinfo.ver > 10)
return;
@ -453,7 +453,7 @@ TEST_P(validation_test, invalid_type_encoding_3src_a16)
TEST_P(validation_test, invalid_type_encoding_3src_a1)
{
/* 3-src instructions in align1 mode only supported on Gen10+ */
/* 3-src instructions in align1 mode only supported on Gfx10+ */
if (devinfo.ver < 10)
return;
@ -482,7 +482,7 @@ TEST_P(validation_test, invalid_type_encoding_3src_a1)
{ BRW_REGISTER_TYPE_UW, E(INT), true },
/* There are no ternary instructions that can operate on B-type sources
* on Gen11-12. Src1/Src2 cannot be B-typed either.
* on Gfx11-12. Src1/Src2 cannot be B-typed either.
*/
{ BRW_REGISTER_TYPE_B, E(INT), false },
{ BRW_REGISTER_TYPE_UB, E(INT), false },
@ -551,11 +551,11 @@ TEST_P(validation_test, invalid_type_encoding_3src_a1)
TEST_P(validation_test, 3src_inst_access_mode)
{
/* 3-src instructions only supported on Gen6+ */
/* 3-src instructions only supported on Gfx6+ */
if (devinfo.ver < 6)
return;
/* No access mode bit on Gen12+ */
/* No access mode bit on Gfx12+ */
if (devinfo.ver >= 12)
return;
@ -750,7 +750,7 @@ TEST_P(validation_test, dst_horizontal_stride_0)
clear_instructions(p);
/* Align16 does not exist on Gen11+ */
/* Align16 does not exist on Gfx11+ */
if (devinfo.ver >= 11)
return;
@ -801,7 +801,7 @@ TEST_P(validation_test, must_not_cross_grf_boundary_in_a_width)
/* Destination Horizontal must be 1 in Align16 */
TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
{
/* Align16 does not exist on Gen11+ */
/* Align16 does not exist on Gfx11+ */
if (devinfo.ver >= 11)
return;
@ -823,7 +823,7 @@ TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
/* VertStride must be 0 or 4 in Align16 */
TEST_P(validation_test, vstride_on_align16_must_be_0_or_4)
{
/* Align16 does not exist on Gen11+ */
/* Align16 does not exist on Gfx11+ */
if (devinfo.ver >= 11)
return;
@ -2078,7 +2078,7 @@ TEST_P(validation_test, vector_immediate_destination_alignment)
};
for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
/* UV type is Gen6+ */
/* UV type is Gfx6+ */
if (devinfo.ver < 6 &&
move[i].src_type == BRW_REGISTER_TYPE_UV)
continue;
@ -2120,7 +2120,7 @@ TEST_P(validation_test, vector_immediate_destination_stride)
};
for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
/* UV type is Gen6+ */
/* UV type is Gfx6+ */
if (devinfo.ver < 6 &&
move[i].src_type == BRW_REGISTER_TYPE_UV)
continue;
@ -2271,11 +2271,11 @@ TEST_P(validation_test, qword_low_power_align1_regioning_restrictions)
#undef INST
};
/* These restrictions only apply to Gen8+ */
/* These restrictions only apply to Gfx8+ */
if (devinfo.ver < 8)
return;
/* NoDDChk/NoDDClr does not exist on Gen12+ */
/* NoDDChk/NoDDClr does not exist on Gfx12+ */
if (devinfo.ver >= 12)
return;
@ -2407,7 +2407,7 @@ TEST_P(validation_test, qword_low_power_no_indirect_addressing)
#undef INST
};
/* These restrictions only apply to Gen8+ */
/* These restrictions only apply to Gfx8+ */
if (devinfo.ver < 8)
return;
@ -2555,7 +2555,7 @@ TEST_P(validation_test, qword_low_power_no_64bit_arf)
#undef INST
};
/* These restrictions only apply to Gen8+ */
/* These restrictions only apply to Gfx8+ */
if (devinfo.ver < 8)
return;
@ -2660,11 +2660,11 @@ TEST_P(validation_test, align16_64_bit_integer)
#undef INST
};
/* 64-bit integer types exist on Gen8+ */
/* 64-bit integer types exist on Gfx8+ */
if (devinfo.ver < 8)
return;
/* Align16 does not exist on Gen11+ */
/* Align16 does not exist on Gfx11+ */
if (devinfo.ver >= 11)
return;
@ -2768,11 +2768,11 @@ TEST_P(validation_test, qword_low_power_no_depctrl)
#undef INST
};
/* These restrictions only apply to Gen8+ */
/* These restrictions only apply to Gfx8+ */
if (devinfo.ver < 8)
return;
/* NoDDChk/NoDDClr does not exist on Gen12+ */
/* NoDDChk/NoDDClr does not exist on Gfx12+ */
if (devinfo.ver >= 12)
return;

View File

@ -439,7 +439,7 @@ static const struct gen_device_info gen_device_info_bdw_gt1 = {
[MESA_SHADER_VERTEX] = 2560,
[MESA_SHADER_TESS_CTRL] = 504,
[MESA_SHADER_TESS_EVAL] = 1536,
/* Reduced from 960, seems to be similar to the bug on Gen9 GT1. */
/* Reduced from 960, seems to be similar to the bug on Gfx9 GT1. */
[MESA_SHADER_GEOMETRY] = 690,
},
},
@ -1252,7 +1252,7 @@ gen_get_device_info_from_pci_id(int pci_id,
* allocate scratch space enough so that each slice has 4 slices allowed."
*
* The equivalent internal documentation says that this programming note
* applies to all Gen9+ platforms.
* applies to all Gfx9+ platforms.
*
* The hardware typically calculates the scratch space pointer by taking
* the base address, and adding per-thread-scratch-space * thread ID.
@ -1321,7 +1321,7 @@ getparam_topology(struct gen_device_info *devinfo, int fd)
return update_from_masks(devinfo, slice_mask, subslice_mask, n_eus);
maybe_warn:
/* Only with Gen8+ are we starting to see devices with fusing that can only
/* Only with Gfx8+ are we starting to see devices with fusing that can only
* be detected at runtime.
*/
if (devinfo->ver >= 8)
@ -1445,7 +1445,7 @@ gen_get_device_info_from_fd(int fd, struct gen_device_info *devinfo)
}
if (devinfo->ver == 10) {
mesa_loge("Gen10 support is redacted.");
mesa_loge("Gfx10 support is redacted.");
return false;
}

View File

@ -195,7 +195,7 @@ struct gen_device_info
* automatically scale pixel shader thread count, based on a single value
* programmed into 3DSTATE_PS.
*
* To calculate the maximum number of threads for Gen8 beyond (which have
* To calculate the maximum number of threads for Gfx8 beyond (which have
* multiple Pixel Shader Dispatchers):
*
* - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD"
@ -216,10 +216,10 @@ struct gen_device_info
/**
* Fixed size of the URB.
*
* On Gen6 and DG1, this is measured in KB. Gen4-5 instead measure
* On Gfx6 and DG1, this is measured in KB. Gfx4-5 instead measure
* this in 512b blocks, as that's more convenient there.
*
* On most Gen7+ platforms, the URB is a section of the L3 cache,
* On most Gfx7+ platforms, the URB is a section of the L3 cache,
* and can be resized based on the L3 programming. For those platforms,
* simply leave this field blank (zero) - it isn't used.
*/
@ -247,7 +247,7 @@ struct gen_device_info
* could be assumed to be 12.5MHz, where the least significant bit neatly
* corresponded to 80 nanoseconds.
*
* Since Gen9 the numbers aren't so round, with a a frequency of 12MHz for
* Since Gfx9 the numbers aren't so round, with a a frequency of 12MHz for
* SKL (or scale factor of 83.33333333) and a frequency of 19200000Hz for
* BXT.
*

View File

@ -711,8 +711,8 @@
<field name="Height" start="80" end="93" type="uint"/>
<field name="Surface Pitch" start="96" end="113" type="uint"/>
<field name="Tile Address Mapping Mode" start="116" end="116" type="uint">
<value name="Gen9" value="0"/>
<value name="Gen10+" value="1"/>
<value name="Gfx9" value="0"/>
<value name="Gfx10+" value="1"/>
</field>
<field name="Depth" start="117" end="127" type="uint"/>
<field name="Multisample Position Palette Index" start="128" end="130" type="uint"/>

View File

@ -733,8 +733,8 @@
<field name="Null Probing Enable" start="114" end="114" type="uint"/>
<field name="Standard Tiling Mode Extensions" start="115" end="115" type="uint"/>
<field name="Tile Address Mapping Mode" start="116" end="116" type="uint">
<value name="Gen9" value="0"/>
<value name="Gen10+" value="1"/>
<value name="Gfx9" value="0"/>
<value name="Gfx10+" value="1"/>
</field>
<field name="Depth" start="117" end="127" type="uint"/>
<field name="Multisample Position Palette Index" start="128" end="130" type="uint"/>

View File

@ -748,8 +748,8 @@
<field name="Null Probing Enable" start="114" end="114" type="uint"/>
<field name="Standard Tiling Mode Extensions" start="115" end="115" type="uint"/>
<field name="Tile Address Mapping Mode" start="116" end="116" type="uint">
<value name="Gen9" value="0"/>
<value name="Gen10+" value="1"/>
<value name="Gfx9" value="0"/>
<value name="Gfx10+" value="1"/>
</field>
<field name="Depth" start="117" end="127" type="uint"/>
<field name="Multisample Position Palette Index" start="128" end="130" type="uint"/>

View File

@ -70,7 +70,7 @@ Definitions
the value of s0.
For example, the logical array length of a 3D surface is always 1, even on
Gen9 where the surface's memory layout is that of an array surface
Gfx9 where the surface's memory layout is that of an array surface
(ISL_DIM_LAYOUT_GEN4_2D).
- Physical Surface Samples (sa):

View File

@ -193,7 +193,7 @@ isl_device_init(struct isl_device *dev,
const struct gen_device_info *info,
bool has_bit6_swizzling)
{
/* Gen8+ don't have bit6 swizzling, ensure callsite is not confused. */
/* Gfx8+ don't have bit6 swizzling, ensure callsite is not confused. */
assert(!(has_bit6_swizzling && info->ver >= 8));
dev->info = info;
@ -411,8 +411,8 @@ isl_tiling_get_info(enum isl_tiling tiling,
break;
case ISL_TILING_GEN12_CCS:
/* From the Bspec, Gen Graphics > Gen12 > Memory Data Formats > Memory
* Compression > Memory Compression - Gen12:
/* From the Bspec, Gen Graphics > Gfx12 > Memory Data Formats > Memory
* Compression > Memory Compression - Gfx12:
*
* 4 bits of auxiliary plane data are required for 2 cachelines of
* main surface data. This results in a single cacheline of auxiliary
@ -1071,7 +1071,7 @@ isl_calc_array_pitch_el_rows_gfx4_2d(
* but the second restriction, which is an extension of the first, only
* applies to qpitch and must be applied here.
*
* The second restriction disappears on Gen12.
* The second restriction disappears on Gfx12.
*/
assert(fmtl->bh == 4);
pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
@ -1413,7 +1413,7 @@ isl_calc_row_pitch_alignment(const struct isl_device *dev,
const struct isl_tile_info *tile_info)
{
if (tile_info->tiling != ISL_TILING_LINEAR) {
/* According to BSpec: 44930, Gen12's CCS-compressed surface pitches must
/* According to BSpec: 44930, Gfx12's CCS-compressed surface pitches must
* be 512B-aligned. CCS is only support on Y tilings.
*
* Only consider 512B alignment when :
@ -1502,7 +1502,7 @@ isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
tile_info->logical_extent_el.width);
/* In some cases the alignment of the pitch might be > to the tile size
* (for example Gen12 CCS requires 512B alignment while the tile's width
* (for example Gfx12 CCS requires 512B alignment while the tile's width
* can be 128B), so align the row pitch to the alignment.
*/
assert(alignment_B >= tile_info->phys_extent_B.width);
@ -1706,7 +1706,7 @@ isl_surf_init_s(const struct isl_device *dev,
assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B));
base_alignment_B = MAX(info->min_alignment_B, tile_size_B);
/* The diagram in the Bspec section Memory Compression - Gen12, shows
/* The diagram in the Bspec section Memory Compression - Gfx12, shows
* that the CCS is indexed in 256B chunks. However, the
* PLANE_AUX_DIST::Auxiliary Surface Distance field is in units of 4K
* pages. We currently don't assign the usage field like we do for main
@ -1715,7 +1715,7 @@ isl_surf_init_s(const struct isl_device *dev,
if (tiling == ISL_TILING_GEN12_CCS)
base_alignment_B = MAX(base_alignment_B, 4096);
/* Gen12+ requires that images be 64K-aligned if they're going to used
/* Gfx12+ requires that images be 64K-aligned if they're going to used
* with CCS. This is because the Aux translation table maps main
* surface addresses to aux addresses at a 64K (in the main surface)
* granularity. Because we don't know for sure in ISL if a surface will
@ -1967,7 +1967,7 @@ bool
isl_surf_supports_ccs(const struct isl_device *dev,
const struct isl_surf *surf)
{
/* CCS support does not exist prior to Gen7 */
/* CCS support does not exist prior to Gfx7 */
if (ISL_GFX_VER(dev) <= 6)
return false;
@ -1991,13 +1991,13 @@ isl_surf_supports_ccs(const struct isl_device *dev,
* - MCS and Lossless compression is supported for
* TiledY/TileYs/TileYf non-MSRTs only.
*
* From the BSpec (44930) for Gen12:
* From the BSpec (44930) for Gfx12:
*
* Linear CCS is only allowed for Untyped Buffers but only via HDC
* Data-Port messages.
*
* We never use untyped messages on surfaces created by ISL on Gen9+ so
* this means linear is out on Gen12+ as well.
* We never use untyped messages on surfaces created by ISL on Gfx9+ so
* this means linear is out on Gfx12+ as well.
*/
if (surf->tiling == ISL_TILING_LINEAR)
return false;
@ -2006,7 +2006,7 @@ isl_surf_supports_ccs(const struct isl_device *dev,
if (isl_surf_usage_is_stencil(surf->usage) && surf->samples > 1)
return false;
/* On Gen12, all CCS-compressed surface pitches must be multiples of
/* On Gfx12, all CCS-compressed surface pitches must be multiples of
* 512B.
*/
if (surf->row_pitch_B % 512 != 0)
@ -2042,7 +2042,7 @@ isl_surf_supports_ccs(const struct isl_device *dev,
if (surf->samples > 1)
return false;
/* CCS is only for color images on Gen7-11 */
/* CCS is only for color images on Gfx7-11 */
if (isl_surf_usage_is_depth_or_stencil(surf->usage))
return false;
@ -2127,7 +2127,7 @@ isl_surf_get_ccs_surf(const struct isl_device *dev,
return false;
}
/* On Gen12, the CCS is a scaled-down version of the main surface. We
/* On Gfx12, the CCS is a scaled-down version of the main surface. We
* model this as the CCS compressing a 2D-view of the entire surface.
*/
struct isl_surf *ccs_surf =

View File

@ -478,7 +478,7 @@ enum isl_tiling {
ISL_TILING_Ys, /**< Standard 64K tiling. The 's' means "sixty-four". */
ISL_TILING_HIZ, /**< Tiling format for HiZ surfaces */
ISL_TILING_CCS, /**< Tiling format for CCS surfaces */
ISL_TILING_GEN12_CCS, /**< Tiling format for Gen12 CCS surfaces */
ISL_TILING_GEN12_CCS, /**< Tiling format for Gfx12 CCS surfaces */
};
/**
@ -619,7 +619,7 @@ enum isl_aux_usage {
ISL_AUX_USAGE_CCS_E,
/** The auxiliary surface provides full lossless color compression on
* Gen12.
* Gfx12.
*
* @invariant isl_surf::samples == 1
*/
@ -660,7 +660,7 @@ enum isl_aux_usage {
/** The auxiliary surface is an MCS and CCS is also enabled
*
* In this mode, we have fused MCS+CCS compression where the MCS is used
* for fast-clears and "identical samples" compression just like on Gen7-11
* for fast-clears and "identical samples" compression just like on Gfx7-11
* but each plane is then CCS compressed.
*
* @invariant isl_surf::samples > 1

View File

@ -143,7 +143,7 @@ isl_drm_modifier_get_score(const struct gen_device_info *devinfo,
case I915_FORMAT_MOD_Y_TILED:
return 3;
case I915_FORMAT_MOD_Y_TILED_CCS:
/* Gen12's CCS layout differs from Gen9-11. */
/* Gfx12's CCS layout differs from Gfx9-11. */
if (devinfo->ver >= 12)
return 0;

View File

@ -928,7 +928,7 @@ isl_formats_are_ccs_e_compatible(const struct gen_device_info *devinfo,
!isl_format_supports_ccs_e(devinfo, format2))
return false;
/* Gen12 added CCS_E support for A8_UNORM, A8_UNORM and R8_UNORM share the
/* Gfx12 added CCS_E support for A8_UNORM, A8_UNORM and R8_UNORM share the
* same aux map format encoding so they are definitely compatible.
*/
if (format1 == ISL_FORMAT_A8_UNORM)

View File

@ -30,7 +30,7 @@ isl_gfx4_choose_msaa_layout(const struct isl_device *dev,
enum isl_tiling tiling,
enum isl_msaa_layout *msaa_layout)
{
/* Gen4 and Gen5 do not support MSAA */
/* Gfx4 and Gfx5 do not support MSAA */
assert(info->samples >= 1);
*msaa_layout = ISL_MSAA_LAYOUT_NONE;
@ -42,7 +42,7 @@ isl_gfx4_filter_tiling(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
isl_tiling_flags_t *flags)
{
/* Gen4-5 only support linear, X, and Y-tiling. */
/* Gfx4-5 only support linear, X, and Y-tiling. */
*flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT | ISL_TILING_Y0_BIT);
if (isl_surf_usage_is_depth_or_stencil(info->usage)) {

View File

@ -303,7 +303,7 @@ isl_gfx6_filter_tiling(const struct isl_device *dev,
* "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either
* TileX or Linear."
*
* This is necessary all the way back to 965, but is permitted on Gen7+.
* This is necessary all the way back to 965, but is permitted on Gfx7+.
*/
if (ISL_GFX_VER(dev) < 7 && isl_format_get_layout(info->format)->bpb >= 128)
*flags &= ~ISL_TILING_Y0_BIT;

View File

@ -183,7 +183,7 @@ isl_gfx9_choose_image_alignment_el(const struct isl_device *dev,
}
if (isl_format_is_compressed(info->format)) {
/* On Gen9, the meaning of RENDER_SURFACE_STATE's
/* On Gfx9, the meaning of RENDER_SURFACE_STATE's
* SurfaceHorizontalAlignment and SurfaceVerticalAlignment changed for
* compressed formats. They now indicate a multiple of the compression
* block. For example, if the compression mode is ETC2 then HALIGN_4

View File

@ -302,10 +302,10 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
* a few things which back this up:
*
* 1. The docs are also pretty clear that this bit was added as part
* of enabling Gen12 depth/stencil lossless compression.
* of enabling Gfx12 depth/stencil lossless compression.
*
* 2. The only new difference between depth/stencil and color images on
* Gen12 (where the bit was added) is how they treat CCS compression.
* Gfx12 (where the bit was added) is how they treat CCS compression.
* All other differences such as alignment requirements and MSAA layout
* are already covered by other bits.
*
@ -626,14 +626,14 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
assert(info->aux_usage == ISL_AUX_USAGE_STC_CCS);
if (isl_aux_usage_has_hiz(info->aux_usage)) {
/* For Gen8-10, there are some restrictions around sampling from HiZ.
/* For Gfx8-10, there are some restrictions around sampling from HiZ.
* The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
* say:
*
* "If this field is set to AUX_HIZ, Number of Multisamples must
* be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
*
* On Gen12, the docs are a bit less obvious but the restriction is
* On Gfx12, the docs are a bit less obvious but the restriction is
* the same. The limitation isn't called out explicitly but the docs
* for the CCS_E value of RENDER_SURFACE_STATE::AuxiliarySurfaceMode
* say:
@ -677,7 +677,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
/* The auxiliary buffer info is filled when it's useable by the HW.
*
* Starting with Gen12, the only form of compression that can be used
* Starting with Gfx12, the only form of compression that can be used
* with RENDER_SURFACE_STATE which requires an aux surface is MCS.
* HiZ still requires a surface but the HiZ surface can only be
* accessed through 3DSTATE_HIER_DEPTH_BUFFER.
@ -746,7 +746,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
s.ClearValueAddressEnable = true;
s.ClearValueAddress = info->clear_address;
#else
unreachable("Gen9 and earlier do not support indirect clear colors");
unreachable("Gfx9 and earlier do not support indirect clear colors");
#endif
}
@ -755,7 +755,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
* From BXML > GT > Shared Functions > vol5c Shared Functions >
* [Structure] RENDER_SURFACE_STATE [BDW+] > ClearColorConversionEnable:
*
* Project: Gen11
* Project: Gfx11
*
* "Enables Pixel backend hw to convert clear values into native format
* and write back to clear address, so that display and sampler can use

View File

@ -383,8 +383,8 @@ compute_topology_builtins(struct gen_perf_config *perf,
perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu;
/* The subslice mask builtin contains bits for all slices. Prior to Gen11
* it had groups of 3bits for each slice, on Gen11 it's 8bits for each
/* The subslice mask builtin contains bits for all slices. Prior to Gfx11
* it had groups of 3bits for each slice, on Gfx11 it's 8bits for each
* slice.
*
* Ideally equations would be updated to have a slice/subslice query
@ -727,7 +727,7 @@ oa_metrics_available(struct gen_perf_config *perf, int fd,
*/
if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) {
/* If _paranoid == 1 then on Gen8+ we won't be able to access OA
/* If _paranoid == 1 then on Gfx8+ we won't be able to access OA
* metrics unless running as root.
*/
if (devinfo->is_haswell)
@ -1023,8 +1023,8 @@ gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
* OA_DEBUG_REGISTER is set to 1. This is how the kernel programs this
* global register (see drivers/gpu/drm/i915/i915_perf.c)
*
* Documentation says this should be available on Gen9+ but experimentation
* shows that Gen8 reports similar values, so we enable it there too.
* Documentation says this should be available on Gfx9+ but experimentation
* shows that Gfx8 reports similar values, so we enable it there too.
*/
if (devinfo->ver < 8)
return;

View File

@ -106,7 +106,7 @@ struct gen_pipeline_stat {
* The largest OA formats we can use include:
* For Haswell:
* 1 timestamp, 45 A counters, 8 B counters and 8 C counters.
* For Gen8+
* For Gfx8+
* 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
*
* Plus 2 PERF_CNT registers and 1 RPSTAT register.
@ -492,7 +492,7 @@ gen_perf_has_hold_preemption(const struct gen_perf_config *perf)
}
/** Whether we have the ability to lock EU array power configuration for the
* duration of the performance recording. This is useful on Gen11 where the HW
* duration of the performance recording. This is useful on Gfx11 where the HW
* architecture requires half the EU for particular workloads.
*/
static inline bool

View File

@ -185,7 +185,7 @@ hw_vars["$EuSlicesTotalCount"] = "perf->sys_vars.n_eu_slices"
hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
hw_vars["$EuThreadsCount"] = "perf->sys_vars.eu_threads_count"
hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask"
# subslice_mask is interchangeable with subslice/dual-subslice since Gen12+
# subslice_mask is interchangeable with subslice/dual-subslice since Gfx12+
# only has dual subslices which can be assimilated with 16EUs subslices.
hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask"
hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask"

View File

@ -124,7 +124,7 @@ struct mdapi_pipeline_metrics {
uint64_t HSInvocations;
uint64_t DSInvocations;
uint64_t CSInvocations;
uint64_t Reserved1; /* Gen10+ */
uint64_t Reserved1; /* Gfx10+ */
};
int gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,

View File

@ -1213,7 +1213,7 @@ oa_report_ctx_id_valid(const struct gen_device_info *devinfo,
*
* These periodic snapshots help to ensure we handle counter overflow
* correctly by being frequent enough to ensure we don't miss multiple
* overflows of a counter between snapshots. For Gen8+ the i915 perf
* overflows of a counter between snapshots. For Gfx8+ the i915 perf
* snapshots provide the extra context-switch reports that let us
* subtract out the progress of counters associated with other
* contexts running on the system.
@ -1244,7 +1244,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
goto error;
}
/* On Gen12+ OA reports are sourced from per context counters, so we don't
/* On Gfx12+ OA reports are sourced from per context counters, so we don't
* ever have to look at the global OA buffer. Yey \o/
*/
if (perf_ctx->devinfo->ver >= 12) {
@ -1300,7 +1300,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
goto end;
}
/* For Gen8+ since the counters continue while other
/* For Gfx8+ since the counters continue while other
* contexts are running we need to discount any unrelated
* deltas. The hardware automatically generates a report
* on context switch which gives us a new reference point

View File

@ -429,7 +429,7 @@ aub_write_ggtt(struct aub_file *aub, uint64_t virt_addr, uint64_t size, const vo
aub_map_ggtt(aub, virt_addr, size);
/* We write the GGTT buffer through the GGTT aub command rather than the
* PHYSICAL aub command. This is because the Gen9 simulator seems to have 2
* PHYSICAL aub command. This is because the Gfx9 simulator seems to have 2
* different set of memory pools for GGTT and physical (probably someone
* didn't really understand the concept?).
*/

View File

@ -467,7 +467,7 @@ decode_single_ksp(struct aub_viewer_decode_ctx *ctx,
const uint32_t *p)
{
uint64_t ksp = 0;
bool is_simd8 = false; /* vertex shaders on Gen8+ only */
bool is_simd8 = false; /* vertex shaders on Gfx8+ only */
bool is_enabled = true;
struct intel_field_iterator iter;

View File

@ -1450,9 +1450,9 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
* According to the other driver team, this applies to compute shaders
* as well. This is not currently documented at all.
*
* This hack is no longer necessary on Gen11+.
* This hack is no longer necessary on Gfx11+.
*
* For, Gen11+, scratch space allocation is based on the number of threads
* For, Gfx11+, scratch space allocation is based on the number of threads
* in the base configuration.
*/
if (devinfo->ver == 12)
@ -1610,7 +1610,7 @@ static uint32_t
anv_device_get_bo_align(struct anv_device *device,
enum anv_bo_alloc_flags alloc_flags)
{
/* Gen12 CCS surface addresses need to be 64K aligned. */
/* Gfx12 CCS surface addresses need to be 64K aligned. */
if (device->info.ver >= 12 && (alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS))
return 64 * 1024;

View File

@ -678,7 +678,7 @@ anv_physical_device_try_create(struct anv_instance *instance,
} else if (devinfo.ver == 7 && devinfo.is_baytrail) {
mesa_logw("Bay Trail Vulkan support is incomplete");
} else if (devinfo.ver >= 8 && devinfo.ver <= 12) {
/* Gen8-12 fully supported */
/* Gfx8-12 fully supported */
} else {
result = vk_errorfi(instance, NULL, VK_ERROR_INCOMPATIBLE_DRIVER,
"Vulkan not yet supported on %s", device_name);
@ -861,12 +861,12 @@ anv_physical_device_try_create(struct anv_instance *instance,
/* Broadwell PRM says:
*
* "Before Gen8, there was a historical configuration control field to
* "Before Gfx8, there was a historical configuration control field to
* swizzle address bit[6] for in X/Y tiling modes. This was set in three
* different places: TILECTL[1:0], ARB_MODE[5:4], and
* DISP_ARB_CTL[14:13].
*
* For Gen8 and subsequent generations, the swizzle fields are all
* For Gfx8 and subsequent generations, the swizzle fields are all
* reserved, and the CPU's memory controller performs all address
* swizzling modifications."
*/

View File

@ -568,7 +568,7 @@ add_aux_surface_if_supported(struct anv_device *device,
} else if (device->info.ver >= 12) {
anv_perf_warn(device, &image->base,
"The CCS_D aux mode is not yet handled on "
"Gen12+. Not allocating a CCS buffer.");
"Gfx12+. Not allocating a CCS buffer.");
image->planes[plane].aux_surface.isl.size_B = 0;
return VK_SUCCESS;
} else {

View File

@ -41,7 +41,7 @@ anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
device->perf = NULL;
/* We need self modifying batches. The i915 parser prevents it on
* Gen7.5 :( maybe one day.
* Gfx7.5 :( maybe one day.
*/
if (devinfo->ver < 8)
return;
@ -136,8 +136,8 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
properties[p++] = true;
/* If global SSEU is available, pin it to the default. This will ensure on
* Gen11 for instance we use the full EU array. Initially when perf was
* enabled we would use only half on Gen11 because of functional
* Gfx11 for instance we use the full EU array. Initially when perf was
* enabled we would use only half on Gfx11 because of functional
* requirements.
*/
if (gen_perf_has_global_sseu(device->physical->perf)) {

View File

@ -488,7 +488,7 @@ struct anv_bo {
/** Size of the implicit CCS range at the end of the buffer
*
* On Gen12, CCS data is always a direct 1/256 scale-down. A single 64K
* On Gfx12, CCS data is always a direct 1/256 scale-down. A single 64K
* page of main surface data maps to a 256B chunk of CCS data and that
* mapping is provided on TGL-LP by the AUX table which maps virtual memory
* addresses in the main surface to virtual memory addresses for CCS data.
@ -496,7 +496,7 @@ struct anv_bo {
* Because we can't change these maps around easily and because Vulkan
* allows two VkImages to be bound to overlapping memory regions (as long
* as the app is careful), it's not feasible to make this mapping part of
* the image. (On Gen11 and earlier, the mapping was provided via
* the image. (On Gfx11 and earlier, the mapping was provided via
* RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)
* Instead, we attach the CCS data directly to the buffer object and setup
* the AUX table mapping at BO creation time.
@ -940,7 +940,7 @@ struct anv_physical_device {
/** True if we can read the GPU timestamp register
*
* When running in a virtual context, the timestamp register is unreadable
* on Gen12+.
* on Gfx12+.
*/
bool has_reg_timestamp;
@ -2351,7 +2351,7 @@ enum anv_pipe_bits {
*/
ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 23),
/* This bit does not exist directly in PIPE_CONTROL. It means that Gen12
/* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
* AUX-TT data has changed and we need to invalidate AUX-TT data. This is
* done by writing the AUX-TT register.
*/
@ -2359,7 +2359,7 @@ enum anv_pipe_bits {
/* This bit does not exist directly in PIPE_CONTROL. It means that a
* PIPE_CONTROL with a post-sync operation will follow. This is used to
* implement a workaround for Gen9.
* implement a workaround for Gfx9.
*/
ANV_PIPE_POST_SYNC_BIT = (1 << 25),
};
@ -2712,7 +2712,7 @@ struct anv_attachment_state {
/** State tracking for vertex buffer flushes
*
* On Gen8-9, the VF cache only considers the bottom 32 bits of memory
* On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
* addresses. If you happen to have two vertex buffers which get placed
* exactly 4 GiB apart and use them in back-to-back draw calls, you can get
* collisions. In order to solve this problem, we track vertex address ranges
@ -3935,7 +3935,7 @@ anv_can_sample_with_hiz(const struct gen_device_info * const devinfo,
if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
return false;
/* For Gen8-11, there are some restrictions around sampling from HiZ.
/* For Gfx8-11, there are some restrictions around sampling from HiZ.
* The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
* say:
*
@ -4254,7 +4254,7 @@ anv_clear_color_from_att_state(union isl_color_value *clear_color,
/* Haswell border color is a bit of a disaster. Float and unorm formats use a
* straightforward 32-bit float color in the first 64 bytes. Instead of using
* a nice float/integer union like Gen8+, Haswell specifies the integer border
* a nice float/integer union like Gfx8+, Haswell specifies the integer border
* color as a separate entry /after/ the float color. The layout of this entry
* also depends on the format's bpp (with extra hacks for RG32), and overlaps.
*

View File

@ -433,7 +433,7 @@ anv_can_hiz_clear_ds_view(struct anv_device *device,
return false;
/* Only gfx9+ supports returning ANV_HZ_FC_VAL when sampling a fast-cleared
* portion of a HiZ buffer. Testing has revealed that Gen8 only supports
* portion of a HiZ buffer. Testing has revealed that Gfx8 only supports
* returning 0.0f. Gens prior to gfx8 do not support this feature at all.
*/
if (GFX_VER == 8 && anv_can_sample_with_hiz(&device->info, iview->image))
@ -1888,7 +1888,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
return;
#if GFX_VER >= 11
/* On Gen11+ we use only one config, so verify it remains the same and skip
/* On Gfx11+ we use only one config, so verify it remains the same and skip
* the stalling programming entirely.
*/
assert(cfg == cmd_buffer->device->l3_config);
@ -2049,7 +2049,7 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
*
* The same text exists a few rows below for Post Sync Op.
*
* On Gen12 this is GEN:BUG:1607156449.
* On Gfx12 this is GEN:BUG:1607156449.
*/
if (bits & ANV_PIPE_POST_SYNC_BIT) {
if ((GFX_VER == 9 || (GFX_VER == 12 && devinfo->revision == 0 /* A0 */)) &&
@ -3036,7 +3036,7 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
&pipeline->shaders[stage]->bind_map;
#if GFX_VER >= 9
/* This field exists since Gen8. However, the Broadwell PRM says:
/* This field exists since Gfx8. However, the Broadwell PRM says:
*
* "Constant Buffer Object Control State must be always programmed
* to zero."
@ -3451,7 +3451,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
/* Size is in DWords - 1 */
sob.SurfaceSize = DIV_ROUND_UP(xfb->size, 4) - 1;
#else
/* We don't have SOBufferEnable in 3DSTATE_SO_BUFFER on Gen7 so
/* We don't have SOBufferEnable in 3DSTATE_SO_BUFFER on Gfx7 so
* we trust in SurfaceEndAddress = SurfaceBaseAddress = 0 (the
* default for an empty SO_BUFFER packet) to disable them.
*/
@ -4711,7 +4711,7 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
* 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
* with Pipeline Select set to GPGPU.
*
* The internal hardware docs recommend the same workaround for Gen9
* The internal hardware docs recommend the same workaround for Gfx9
* hardware too.
*/
if (pipeline == GPGPU)
@ -4992,7 +4992,7 @@ genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
#if GFX_VER == 9
const struct gen_device_info *devinfo = &cmd_buffer->device->info;
const unsigned slice_hashing[] = {
/* Because all Gen9 platforms with more than one slice require
/* Because all Gfx9 platforms with more than one slice require
* three-way subslice hashing, a single "normal" 16x16 slice hashing
* block is guaranteed to suffer from substantial imbalance, with one
* subslice receiving twice as much work as the other two in the
@ -5000,7 +5000,7 @@ genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
*
* The performance impact of that would be particularly severe when
* three-way hashing is also in use for slice balancing (which is the
* case for all Gen9 GT4 platforms), because one of the slices
* case for all Gfx9 GT4 platforms), because one of the slices
* receives one every three 16x16 blocks in either direction, which
* is roughly the periodicity of the underlying subslice imbalance
* pattern ("roughly" because in reality the hardware's
@ -5144,7 +5144,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
/* GEN:BUG:1408224581
*
* Workaround: Gen12LP Astep only An additional pipe control with
* Workaround: Gfx12LP Astep only An additional pipe control with
* post-sync = store dword operation would be required.( w/a is to
* have an additional pipe control after the stencil state whenever
* the surface state bits of this state is changing).

View File

@ -723,7 +723,7 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
raster.GlobalDepthOffsetEnablePoint = rs_info->depthBiasEnable;
#if GFX_VER == 7
/* Gen7 requires that we provide the depth format in 3DSTATE_SF so that it
/* Gfx7 requires that we provide the depth format in 3DSTATE_SF so that it
* can get the depth offsets correct.
*/
if (subpass->depth_stencil_attachment) {
@ -755,14 +755,14 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline,
const VkPipelineMultisampleStateCreateInfo *info,
uint32_t dynamic_states)
{
/* If the sample locations are dynamic, 3DSTATE_MULTISAMPLE on Gen7/7.5
* will be emitted dynamically, so skip it here. On Gen8+
/* If the sample locations are dynamic, 3DSTATE_MULTISAMPLE on Gfx7/7.5
* will be emitted dynamically, so skip it here. On Gfx8+
* 3DSTATE_SAMPLE_PATTERN will be emitted dynamically, so skip it here.
*/
if (!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)) {
/* Only lookup locations if the extensions is active, otherwise the
* default ones will be used either at device initialization time or
* through 3DSTATE_MULTISAMPLE on Gen7/7.5 by passing NULL locations.
* through 3DSTATE_MULTISAMPLE on Gfx7/7.5 by passing NULL locations.
*/
if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations) {
#if GFX_VER >= 8
@ -776,7 +776,7 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline,
pipeline->dynamic_state.sample_locations.samples,
pipeline->dynamic_state.sample_locations.locations);
} else {
/* On Gen8+ 3DSTATE_MULTISAMPLE does not hold anything we need to modify
/* On Gfx8+ 3DSTATE_MULTISAMPLE does not hold anything we need to modify
* for sample locations, so we don't have to emit it dynamically.
*/
#if GFX_VER >= 8
@ -1430,7 +1430,7 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
pipeline->gfx7.xfb_bo_pitch[2] = xfb_info->buffers[2].stride;
pipeline->gfx7.xfb_bo_pitch[3] = xfb_info->buffers[3].stride;
/* On Gen7, the SO buffer enables live in 3DSTATE_STREAMOUT which
/* On Gfx7, the SO buffer enables live in 3DSTATE_STREAMOUT which
* is a bit inconvenient because we don't know what buffers will
* actually be enabled until draw time. We do our best here by
* setting them based on buffers_written and we disable them
@ -1926,7 +1926,7 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, struct anv_subpass *subp
}
#if GFX_VER >= 8
/* Gen8 hardware tries to compute ThreadDispatchEnable for us but
/* Gfx8 hardware tries to compute ThreadDispatchEnable for us but
* doesn't take into account KillPixels when no depth or stencil
* writes are enabled. In order for occlusion queries to work
* correctly with no attachments, we need to force-enable PS thread

View File

@ -58,7 +58,7 @@
*
* The equations above apply if \p flip is equal to 0, if it is equal to 1 p_0
* and p_1 will be swapped for the result. Note that in the context of pixel
* pipe hashing this can be always 0 on Gen12 platforms, since the hardware
* pipe hashing this can be always 0 on Gfx12 platforms, since the hardware
* transparently remaps logical indices found on the table to physical pixel
* pipe indices from the highest to lowest EU count.
*/
@ -117,7 +117,7 @@ genX(emit_slice_hashing_state)(struct anv_device *device,
ppipes_of[n] += (device->info.ppipe_subslices[p] == n);
}
/* Gen12 has three pixel pipes. */
/* Gfx12 has three pixel pipes. */
assert(ppipes_of[0] + ppipes_of[1] + ppipes_of[2] == 3);
if (ppipes_of[2] == 3 || ppipes_of[0] == 2) {

View File

@ -1098,7 +1098,7 @@ load_sized_register_mem(struct brw_context *brw,
const struct gen_device_info *devinfo = &brw->screen->devinfo;
int i;
/* MI_LOAD_REGISTER_MEM only exists on Gen7+. */
/* MI_LOAD_REGISTER_MEM only exists on Gfx7+. */
assert(devinfo->ver >= 7);
if (devinfo->ver >= 8) {

View File

@ -145,7 +145,7 @@ brw_ptr_in_state_buffer(struct brw_batch *batch, void *p)
OUT_BATCH(reloc); \
} while (0)
/* Handle 48-bit address relocations for Gen8+ */
/* Handle 48-bit address relocations for Gfx8+ */
#define OUT_RELOC64(buf, flags, delta) do { \
uint32_t __offset = (__map - brw->batch.batch.map) * 4; \
uint64_t reloc64 = \

View File

@ -240,7 +240,7 @@ const struct brw_tracked_state brw_gs_binding_table = {
*/
/**
* (Gen4-5) Upload the binding table pointers for all shader stages.
* (Gfx4-5) Upload the binding table pointers for all shader stages.
*
* The binding table pointers are relative to the surface state base address,
* which points at the batchbuffer containing the streamed batch state.

View File

@ -203,7 +203,7 @@ alignment_valid(struct brw_context *brw, unsigned offset,
if (tiling != ISL_TILING_LINEAR)
return (offset & 4095) == 0;
/* On Gen8+, linear buffers must be cacheline-aligned. */
/* On Gfx8+, linear buffers must be cacheline-aligned. */
if (devinfo->ver >= 8)
return (offset & 63) == 0;

View File

@ -244,7 +244,7 @@ brw_blorp_to_isl_format(struct brw_context *brw, mesa_format format,
}
/**
* Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
* Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gfx7.5+
* "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
*
* SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
@ -263,7 +263,7 @@ swizzle_to_scs(GLenum swizzle)
}
/**
* Note: if the src (or dst) is a 2D multisample array texture on Gen7+ using
* Note: if the src (or dst) is a 2D multisample array texture on Gfx7+ using
* INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is
* the physical layer holding sample 0. So, for example, if
* src_mt->surf.samples == 4, then logical layer n corresponds to src_layer ==
@ -1610,7 +1610,7 @@ brw_hiz_exec(struct brw_context *brw, struct brw_mipmap_tree *mt,
* enabled must be issued before the rectangle primitive used for
* the depth buffer clear operation.
*
* Same applies for Gen8 and Gen9.
* Same applies for Gfx8 and Gfx9.
*
* In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
* PIPE_CONTROL, Depth Cache Flush Enable:

View File

@ -1902,11 +1902,11 @@ brw_bufmgr_create(struct gen_device_info *devinfo, int fd, bool bo_reuse)
} else if (devinfo->ver >= 10) {
/* Softpin landed in 4.5, but GVT used an aliasing PPGTT until
* kernel commit 6b3816d69628becb7ff35978aa0751798b4a940a in
* 4.14. Gen10+ GVT hasn't landed yet, so it's not actually a
* 4.14. Gfx10+ GVT hasn't landed yet, so it's not actually a
* problem - but extending this requirement back to earlier gens
* might actually mean requiring 4.14.
*/
fprintf(stderr, "i965 requires softpin (Kernel 4.5) on Gen10+.");
fprintf(stderr, "i965 requires softpin (Kernel 4.5) on Gfx10+.");
close(bufmgr->fd);
free(bufmgr);
return NULL;

View File

@ -27,7 +27,7 @@
/** @file brw_conditional_render.c
*
* Support for conditional rendering based on query objects
* (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gen7+.
* (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gfx7+.
*/
#include "main/condrender.h"

View File

@ -557,7 +557,7 @@ brw_initialize_context_constants(struct brw_context *brw)
*/
ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
/* On Gen6, in the worst case, we use up one binding table entry per
/* On Gfx6, in the worst case, we use up one binding table entry per
* transform feedback component (see comments above the definition of
* BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
* for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
@ -675,7 +675,7 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
/* Gen6 converts quads to polygon in beginning of 3D pipeline,
/* Gfx6 converts quads to polygon in beginning of 3D pipeline,
* but we're not sure how it's actually done for vertex order,
* that affect provoking vertex decision. Always use last vertex
* convention for quad primitive which works as expected for now.

View File

@ -342,7 +342,7 @@ struct brw_ff_gs_prog_data {
GLuint total_grf;
/**
* Gen6 transform feedback: Amount by which the streaming vertex buffer
* Gfx6 transform feedback: Amount by which the streaming vertex buffer
* indices should be incremented each time the GS is invoked.
*/
unsigned svbi_postincrement_value;
@ -640,7 +640,7 @@ struct brw_stage_state
/** Offset in the program cache to the program */
uint32_t prog_offset;
/** Offset in the batchbuffer to Gen4-5 pipelined state (VS/WM/GS_STATE). */
/** Offset in the batchbuffer to Gfx4-5 pipelined state (VS/WM/GS_STATE). */
uint32_t state_offset;
struct brw_bo *push_const_bo; /* NULL if using the batchbuffer */
@ -1159,7 +1159,7 @@ struct brw_context
/**
* Buffer object used in place of multisampled null render targets on
* Gen6. See brw_emit_null_surface_state().
* Gfx6. See brw_emit_null_surface_state().
*/
struct brw_bo *multisampled_null_render_target_bo;

View File

@ -47,9 +47,9 @@ copy_miptrees(struct brw_context *brw,
if (devinfo->ver <= 5) {
/* On gfx4-5, try BLT first.
*
* Gen4-5 have a single ring for both 3D and BLT operations, so there's
* no inter-ring synchronization issues like on Gen6+. It is apparently
* faster than using the 3D pipeline. Original Gen4 also has to rebase
* Gfx4-5 have a single ring for both 3D and BLT operations, so there's
* no inter-ring synchronization issues like on Gfx6+. It is apparently
* faster than using the 3D pipeline. Original Gfx4 also has to rebase
* and copy miptree slices in order to render to unaligned locations.
*/
if (brw_miptree_copy(brw, src_mt, src_level, src_z, src_x, src_y,

View File

@ -388,7 +388,7 @@
/* SAMPLER_STATE DW0 */
#define BRW_SAMPLER_DISABLE (1 << 31)
#define BRW_SAMPLER_LOD_PRECLAMP_ENABLE (1 << 28)
#define GFX6_SAMPLER_MIN_MAG_NOT_EQUAL (1 << 27) /* Gen6 only */
#define GFX6_SAMPLER_MIN_MAG_NOT_EQUAL (1 << 27) /* Gfx6 only */
#define BRW_SAMPLER_BASE_MIPLEVEL_MASK INTEL_MASK(26, 22)
#define BRW_SAMPLER_BASE_MIPLEVEL_SHIFT 22
#define BRW_SAMPLER_MIP_FILTER_MASK INTEL_MASK(21, 20)
@ -412,7 +412,7 @@
#define GFX4_SAMPLER_MAX_LOD_MASK INTEL_MASK(21, 12)
#define GFX4_SAMPLER_MAX_LOD_SHIFT 12
#define GFX4_SAMPLER_CUBE_CONTROL_OVERRIDE (1 << 9)
/* Wrap modes are in DW1 on Gen4-6 and DW3 on Gen7+ */
/* Wrap modes are in DW1 on Gfx4-6 and DW3 on Gfx7+ */
#define BRW_SAMPLER_TCX_WRAP_MODE_MASK INTEL_MASK(8, 6)
#define BRW_SAMPLER_TCX_WRAP_MODE_SHIFT 6
#define BRW_SAMPLER_TCY_WRAP_MODE_MASK INTEL_MASK(5, 3)
@ -436,7 +436,7 @@
#define BRW_SAMPLER_ADDRESS_ROUNDING_MASK INTEL_MASK(18, 13)
#define BRW_SAMPLER_ADDRESS_ROUNDING_SHIFT 13
#define GFX7_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 10)
/* Gen7+ wrap modes reuse the same BRW_SAMPLER_TC*_WRAP_MODE enums. */
/* Gfx7+ wrap modes reuse the same BRW_SAMPLER_TC*_WRAP_MODE enums. */
#define GFX6_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 0)
enum brw_wrap_mode {
@ -601,9 +601,9 @@ enum brw_wrap_mode {
# define GFX6_VS_STATISTICS_ENABLE (1 << 10)
# define GFX6_VS_CACHE_DISABLE (1 << 1)
# define GFX6_VS_ENABLE (1 << 0)
/* Gen8+ DW7 */
/* Gfx8+ DW7 */
# define GFX8_VS_SIMD8_ENABLE (1 << 2)
/* Gen8+ DW8 */
/* Gfx8+ DW8 */
# define GFX8_VS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
# define GFX8_VS_URB_OUTPUT_LENGTH_SHIFT 16
# define GFX8_VS_USER_CLIP_DISTANCE_SHIFT 8
@ -650,12 +650,12 @@ enum brw_wrap_mode {
# define GFX6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16)
# define GFX6_GS_ENABLE (1 << 15)
/* Gen8+ DW8 */
/* Gfx8+ DW8 */
# define GFX8_GS_STATIC_OUTPUT (1 << 30)
# define GFX8_GS_STATIC_VERTEX_COUNT_SHIFT 16
# define GFX8_GS_STATIC_VERTEX_COUNT_MASK INTEL_MASK(26, 16)
/* Gen8+ DW9 */
/* Gfx8+ DW9 */
# define GFX8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
# define GFX8_GS_URB_OUTPUT_LENGTH_SHIFT 16
# define GFX8_GS_USER_CLIP_DISTANCE_SHIFT 8
@ -724,7 +724,7 @@ enum brw_wrap_mode {
# define GFX7_DS_COMPUTE_W_COORDINATE_ENABLE (1 << 2)
# define GFX7_DS_CACHE_DISABLE (1 << 1)
# define GFX7_DS_ENABLE (1 << 0)
/* Gen8+ DW8 */
/* Gfx8+ DW8 */
# define GFX8_DS_URB_ENTRY_OUTPUT_OFFSET_MASK INTEL_MASK(26, 21)
# define GFX8_DS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
# define GFX8_DS_URB_OUTPUT_LENGTH_MASK INTEL_MASK(20, 16)
@ -911,7 +911,7 @@ enum brw_wrap_mode {
# define GFX8_RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE (1 << 0)
# define GFX9_RASTER_VIEWPORT_Z_NEAR_CLIP_TEST_ENABLE (1 << 0)
/* Gen8 BLEND_STATE */
/* Gfx8 BLEND_STATE */
/* DW0 */
#define GFX8_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31)
#define GFX8_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 30)
@ -1169,7 +1169,7 @@ enum brw_pixel_shader_coverage_mask_mode {
# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT 0
# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK INTEL_MASK(4, 0)
/* 3DSTATE_WM for Gen7 */
/* 3DSTATE_WM for Gfx7 */
/* DW1 */
# define GFX7_WM_STATISTICS_ENABLE (1 << 31)
# define GFX7_WM_DEPTH_CLEAR (1 << 30)
@ -1440,11 +1440,11 @@ enum brw_pixel_shader_coverage_mask_mode {
# define MI_STORE_REGISTER_MEM_USE_GGTT (1 << 22)
# define MI_STORE_REGISTER_MEM_PREDICATE (1 << 21)
/* Load a value from memory into a register. Only available on Gen7+. */
/* Load a value from memory into a register. Only available on Gfx7+. */
#define GFX7_MI_LOAD_REGISTER_MEM (CMD_MI | (0x29 << 23))
# define MI_LOAD_REGISTER_MEM_USE_GGTT (1 << 22)
/* Manipulate the predicate bit based on some register values. Only on Gen7+ */
/* Manipulate the predicate bit based on some register values. Only on Gfx7+ */
#define GFX7_MI_PREDICATE (CMD_MI | (0xC << 23))
# define MI_PREDICATE_LOADOP_KEEP (0 << 6)
# define MI_PREDICATE_LOADOP_LOAD (2 << 6)
@ -1652,10 +1652,10 @@ enum brw_pixel_shader_coverage_mask_mode {
#define INSTPM 0x20c0
# define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
#define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */
#define CS_DEBUG_MODE2 0x20d8 /* Gfx9+ */
# define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
#define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gen9+ */
#define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gfx9+ */
# define GLK_SCEC_BARRIER_MODE_GPGPU (0 << 7)
# define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7)
# define GLK_SCEC_BARRIER_MODE_MASK REG_MASK(1 << 7)
@ -1669,7 +1669,7 @@ enum brw_pixel_shader_coverage_mask_mode {
# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS (1 << 5)
# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK REG_MASK(1 << 5)
#define CS_CHICKEN1 0x2580 /* Gen9+ */
#define CS_CHICKEN1 0x2580 /* Gfx9+ */
# define GFX9_REPLAY_MODE_MIDBUFFER (0 << 0)
# define GFX9_REPLAY_MODE_MIDOBJECT (1 << 0)
# define GFX9_REPLAY_MODE_MASK REG_MASK(1 << 0)

View File

@ -137,8 +137,8 @@ gfx6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
/**
* The hardware is capable of removing dangling vertices on its own; however,
* prior to Gen6, we sometimes convert quads into trifans (and quad strips
* into tristrips), since pre-Gen6 hardware requires a GS to render quads.
* prior to Gfx6, we sometimes convert quads into trifans (and quad strips
* into tristrips), since pre-Gfx6 hardware requires a GS to render quads.
* This function manually trims dangling vertices from a draw call involving
* quads so that those dangling vertices won't get drawn when we convert to
* trifans/tristrips.
@ -190,7 +190,7 @@ brw_emit_prim(struct brw_context *brw,
start_vertex_location += brw->vb.start_vertex_bias;
}
/* We only need to trim the primitive count on pre-Gen6. */
/* We only need to trim the primitive count on pre-Gfx6. */
if (devinfo->ver < 6)
verts_per_instance = trim(prim->mode, prim->count);
else
@ -411,7 +411,7 @@ brw_disable_rb_aux_buffer(struct brw_context *brw,
/** Implement the ASTC 5x5 sampler workaround
*
* Gen9 sampling hardware has a bug where an ASTC 5x5 compressed surface
* Gfx9 sampling hardware has a bug where an ASTC 5x5 compressed surface
* cannot live in the sampler cache at the same time as an aux compressed
* surface. In order to work around the bug we have to stall rendering with a
* CS and pixel scoreboard stall (implicit in the CS stall) and invalidate the

View File

@ -332,7 +332,7 @@ brw_init_extensions(struct gl_context *ctx)
ctx->Extensions.ARB_ES3_2_compatibility = true;
/* Currently only implemented in the scalar backend, so only enable for
* Gen8+. Eventually Gen6+ could be supported.
* Gfx8+. Eventually Gfx6+ could be supported.
*/
ctx->Extensions.INTEL_shader_integer_functions2 = true;
}
@ -367,7 +367,7 @@ brw_init_extensions(struct gl_context *ctx)
* ensure memory access ordering for all messages to the dataport from
* all threads. Memory fence messages prior to SKL only provide memory
* access ordering for messages from the same thread, so we can only
* support the feature from Gen9 onwards.
* support the feature from Gfx9 onwards.
*
*/

View File

@ -723,7 +723,7 @@ brw_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
_mesa_get_format_name(stencil_mt->format));
}
if (devinfo->ver < 7 && !brw_renderbuffer_has_hiz(depthRb)) {
/* Before Gen7, separate depth and stencil buffers can be used
/* Before Gfx7, separate depth and stencil buffers can be used
* only if HiZ is enabled. From the Sandybridge PRM, Volume 2,
* Part 1, Bit 3DSTATE_DEPTH_BUFFER.SeparateStencilBufferEnable:
* [DevSNB]: This field must be set to the same value (enabled
@ -907,9 +907,9 @@ brw_blit_framebuffer(struct gl_context *ctx,
if (devinfo->ver < 6) {
/* On gfx4-5, try BLT first.
*
* Gen4-5 have a single ring for both 3D and BLT operations, so there's
* no inter-ring synchronization issues like on Gen6+. It is apparently
* faster than using the 3D pipeline. Original Gen4 also has to rebase
* Gfx4-5 have a single ring for both 3D and BLT operations, so there's
* no inter-ring synchronization issues like on Gfx6+. It is apparently
* faster than using the 3D pipeline. Original Gfx4 also has to rebase
* and copy miptree slices in order to render to unaligned locations.
*/
mask = brw_blit_framebuffer_with_blitter(ctx, readFb, drawFb,

View File

@ -103,11 +103,11 @@ brw_codegen_ff_gs_prog(struct brw_context *brw,
check_edge_flag = true;
break;
default:
unreachable("Unexpected primitive type in Gen6 SOL program.");
unreachable("Unexpected primitive type in Gfx6 SOL program.");
}
gfx6_sol_program(&c, key, num_verts, check_edge_flag);
} else {
/* On Gen4-5, we use the GS to decompose certain types of primitives.
/* On Gfx4-5, we use the GS to decompose certain types of primitives.
* Note that primitives which don't require a GS program have already
* been weeded out by now.
*/
@ -192,7 +192,7 @@ brw_ff_gs_populate_key(struct brw_context *brw,
}
if (devinfo->ver == 6) {
/* On Gen6, GS is used for transform feedback. */
/* On Gfx6, GS is used for transform feedback. */
/* BRW_NEW_TRANSFORM_FEEDBACK */
if (_mesa_is_xfb_active_and_unpaused(ctx)) {
const struct gl_program *prog =

View File

@ -90,7 +90,7 @@ static void brw_ff_gs_alloc_regs(struct brw_ff_gs_compile *c,
* The following information is passed to the GS thread in R0, and needs to be
* included in the first URB_WRITE or FF_SYNC message sent by the GS:
*
* - DWORD 0 [31:0] handle info (Gen4 only)
* - DWORD 0 [31:0] handle info (Gfx4 only)
* - DWORD 5 [7:0] FFTID
* - DWORD 6 [31:0] Debug info
* - DWORD 7 [31:0] Debug info
@ -330,7 +330,7 @@ void brw_ff_gs_lines(struct brw_ff_gs_compile *c)
}
/**
* Generate the geometry shader program used on Gen6 to perform stream output
* Generate the geometry shader program used on Gfx6 to perform stream output
* (transform feedback).
*/
void

View File

@ -44,7 +44,7 @@ brw_generate_mipmap(struct gl_context *ctx, GLenum target,
const unsigned base_level = tex_obj->Attrib.BaseLevel;
unsigned last_level, first_layer, last_layer;
/* Blorp doesn't handle combined depth/stencil surfaces on Gen4-5 yet. */
/* Blorp doesn't handle combined depth/stencil surfaces on Gfx4-5 yet. */
if (devinfo->ver <= 5 &&
(tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_COMPONENT ||
tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_STENCIL)) {

View File

@ -294,7 +294,7 @@ brw_is_color_fast_clear_compatible(struct brw_context *brw,
* render using a renderable format, without the override workaround it
* wouldn't be possible to have a non-renderable surface in a fast clear
* state so the hardware probably legitimately doesn't need to support
* this case. At least on Gen9 this really does seem to cause problems.
* this case. At least on Gfx9 this really does seem to cause problems.
*/
if (devinfo->ver >= 9 &&
brw_isl_format_for_mesa_format(mt->format) !=

Some files were not shown because too many files have changed in this diff Show More