intel: fix typos found by codespell
Acked-by: David Heidelberg <david.heidelberg@collabora.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17191>
This commit is contained in:
parent
ab0e09803a
commit
f4386b81e6
|
@ -1394,7 +1394,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp,
|
|||
nir_imm_float(&b, 0.5f));
|
||||
color = blorp_nir_tex(&b, &v, key, src_pos);
|
||||
} else {
|
||||
/* Gfx7+ hardware doesn't automaticaly blend. */
|
||||
/* Gfx7+ hardware doesn't automatically blend. */
|
||||
color = blorp_nir_combine_samples(&b, &v, src_pos, key->src_samples,
|
||||
key->tex_aux_usage,
|
||||
key->texture_data_type,
|
||||
|
|
|
@ -395,7 +395,7 @@ get_fast_clear_rect(const struct isl_device *dev,
|
|||
* Target(s)", beneath the "MSAA Compression" bullet (p326):
|
||||
*
|
||||
* Clear pass for this case requires that scaled down primitive
|
||||
* is sent down with upper left co-ordinate to coincide with
|
||||
* is sent down with upper left coordinate to coincide with
|
||||
* actual rectangle being cleared. For MSAA, clear rectangle’s
|
||||
* height and width need to as show in the following table in
|
||||
* terms of (width,height) of the RT.
|
||||
|
@ -406,7 +406,7 @@ get_fast_clear_rect(const struct isl_device *dev,
|
|||
* 8X Ceil(1/2*width) Ceil(1/2*height)
|
||||
* 16X width Ceil(1/2*height)
|
||||
*
|
||||
* The text "with upper left co-ordinate to coincide with actual
|
||||
* The text "with upper left coordinate to coincide with actual
|
||||
* rectangle being cleared" is a little confusing--it seems to imply
|
||||
* that to clear a rectangle from (x,y) to (x+w,y+h), one needs to
|
||||
* feed the pipeline using the rectangle (x,y) to
|
||||
|
@ -576,7 +576,7 @@ blorp_clear(struct blorp_batch *batch,
|
|||
if (compute)
|
||||
use_simd16_replicated_data = false;
|
||||
|
||||
/* Constant color writes ignore everyting in blend and color calculator
|
||||
/* Constant color writes ignore everything in blend and color calculator
|
||||
* state. This is not documented.
|
||||
*/
|
||||
params.color_write_disable = color_write_disable & BITFIELD_MASK(4);
|
||||
|
@ -735,7 +735,7 @@ blorp_clear_stencil_as_rgba(struct blorp_batch *batch,
|
|||
|
||||
/* W-tiles and Y-tiles have the same layout as far as cache lines are
|
||||
* concerned: both are 8x8 cache lines laid out Y-major. The difference is
|
||||
* entirely in how the data is arranged withing the cache line. W-tiling
|
||||
* entirely in how the data is arranged within the cache line. W-tiling
|
||||
* is 8x8 pixels in a swizzled pattern while Y-tiling is 16B by 4 rows
|
||||
* regardless of image format size. As long as everything is aligned to 8,
|
||||
* we can just treat the W-tiled image as Y-tiled, ignore the layout
|
||||
|
|
|
@ -243,7 +243,7 @@ get_start_end_pos(int *start, int *end)
|
|||
/* start value has to be mod with 32 as we need the relative
|
||||
* start position in the first DWord. For the end position, add
|
||||
* the length of the field to the start position to get the
|
||||
* relative postion in the 64 bit address.
|
||||
* relative position in the 64 bit address.
|
||||
*/
|
||||
if (*end - *start > 32) {
|
||||
int len = *end - *start;
|
||||
|
|
|
@ -63,7 +63,7 @@ intel_48b_address(uint64_t v)
|
|||
}
|
||||
|
||||
/**
|
||||
* Call ioctl, restarting if it is interupted
|
||||
* Call ioctl, restarting if it is interrupted
|
||||
*/
|
||||
static inline int
|
||||
intel_ioctl(int fd, unsigned long request, void *arg)
|
||||
|
|
|
@ -82,7 +82,7 @@ intel_get_urb_config(const struct intel_device_info *devinfo,
|
|||
* Engine, the actual URB space available for operation is only 60KB
|
||||
* (per bank). Similarly when URB space programmed is 128KB (per bank)
|
||||
* for render engine, the actual URB space available for operation is
|
||||
* only 124KB (per bank). More detailed descripton available in "L3
|
||||
* only 124KB (per bank). More detailed description available in "L3
|
||||
* Cache" section of the B-Spec."
|
||||
*/
|
||||
if (devinfo->verx10 == 120) {
|
||||
|
|
|
@ -603,7 +603,7 @@ TEST_F(mi_builder_test, add_imm)
|
|||
mi_store(&b, out_mem64(88),
|
||||
mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
|
||||
|
||||
// And som add_imm just for good measure
|
||||
// And some add_imm just for good measure
|
||||
mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
|
||||
mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
|
||||
|
||||
|
|
|
@ -136,7 +136,7 @@ void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
|
|||
struct brw_codegen *p = &c->func;
|
||||
struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
|
||||
|
||||
/* Initial list of indices for incoming vertexes:
|
||||
/* Initial list of indices for incoming vertices:
|
||||
*/
|
||||
brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
|
||||
brw_CMP(p,
|
||||
|
|
|
@ -38,7 +38,7 @@ struct brw_sf_compile {
|
|||
struct brw_reg dy0;
|
||||
struct brw_reg dy2;
|
||||
|
||||
/* z and 1/w passed in seperately:
|
||||
/* z and 1/w passed in separately:
|
||||
*/
|
||||
struct brw_reg z[3];
|
||||
struct brw_reg inv_w[3];
|
||||
|
@ -161,7 +161,7 @@ static void do_twoside_color( struct brw_sf_compile *c )
|
|||
/* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
|
||||
* to get all channels active inside the IF. In the clipping code
|
||||
* we run with NoMask, so it's not an option and we can use
|
||||
* BRW_EXECUTE_1 for all comparisions.
|
||||
* BRW_EXECUTE_1 for all comparisons.
|
||||
*/
|
||||
brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
|
||||
brw_IF(p, BRW_EXECUTE_4);
|
||||
|
@ -290,7 +290,7 @@ static void alloc_regs( struct brw_sf_compile *c )
|
|||
c->dy0 = brw_vec1_grf(1, 5);
|
||||
c->dy2 = brw_vec1_grf(1, 6);
|
||||
|
||||
/* z and 1/w passed in seperately:
|
||||
/* z and 1/w passed in separately:
|
||||
*/
|
||||
c->z[0] = brw_vec1_grf(2, 0);
|
||||
c->inv_w[0] = brw_vec1_grf(2, 1);
|
||||
|
@ -376,7 +376,7 @@ calculate_masks(struct brw_sf_compile *c,
|
|||
} else if (interp == INTERP_MODE_NOPERSPECTIVE)
|
||||
*pc_linear = 0xf;
|
||||
|
||||
/* Maybe only processs one attribute on the final round:
|
||||
/* Maybe only process one attribute on the final round:
|
||||
*/
|
||||
if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
|
||||
*pc |= 0xf0;
|
||||
|
@ -632,7 +632,7 @@ static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
|
|||
*/
|
||||
if (pc_coord_replace) {
|
||||
set_predicate_control_flag_value(p, c, pc_coord_replace);
|
||||
/* Caculate 1.0/PointWidth */
|
||||
/* Calculate 1.0/PointWidth */
|
||||
gfx4_math(&c->func,
|
||||
c->tmp,
|
||||
BRW_MATH_FUNCTION_INV,
|
||||
|
|
|
@ -151,7 +151,7 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
|
|||
if (!devinfo->has_64bit_int)
|
||||
int64_options |= (nir_lower_int64_options)~0;
|
||||
|
||||
/* The Bspec's section tittled "Instruction_multiply[DevBDW+]" claims that
|
||||
/* The Bspec's section titled "Instruction_multiply[DevBDW+]" claims that
|
||||
* destination type can be Quadword and source type Doubleword for Gfx8 and
|
||||
* Gfx9. So, lower 64 bit multiply instruction on rest of the platforms.
|
||||
*/
|
||||
|
|
|
@ -311,7 +311,7 @@ struct brw_vs_prog_key {
|
|||
*
|
||||
* For each attribute, a combination of BRW_ATTRIB_WA_*.
|
||||
*
|
||||
* For OpenGL, where we expose a maximum of 16 user input atttributes
|
||||
* For OpenGL, where we expose a maximum of 16 user input attributes
|
||||
* we only need up to VERT_ATTRIB_MAX slots, however, in Vulkan
|
||||
* slots preceding VERT_ATTRIB_GENERIC0 are unused and we can
|
||||
* expose up to 28 user input vertex attributes that are mapped to slots
|
||||
|
@ -1404,7 +1404,7 @@ struct brw_sf_prog_data {
|
|||
uint32_t urb_read_length;
|
||||
uint32_t total_grf;
|
||||
|
||||
/* Each vertex may have upto 12 attributes, 4 components each,
|
||||
/* Each vertex may have up to 12 attributes, 4 components each,
|
||||
* except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11
|
||||
* rows.
|
||||
*
|
||||
|
|
|
@ -2534,7 +2534,7 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
|
|||
dst = store + offset;
|
||||
}
|
||||
|
||||
/* If we didn't compact this intruction, we need to move it down into
|
||||
/* If we didn't compact this instruction, we need to move it down into
|
||||
* place.
|
||||
*/
|
||||
if (offset != src_offset) {
|
||||
|
|
|
@ -1048,7 +1048,7 @@ unsigned
|
|||
fs_inst::flags_written(const intel_device_info *devinfo) const
|
||||
{
|
||||
/* On Gfx4 and Gfx5, sel.l (for min) and sel.ge (for max) are implemented
|
||||
* using a separte cmpn and sel instruction. This lowering occurs in
|
||||
* using a separate cmpn and sel instruction. This lowering occurs in
|
||||
* fs_vistor::lower_minmax which is called very, very late.
|
||||
*/
|
||||
if ((conditional_mod && ((opcode != BRW_OPCODE_SEL || devinfo->ver <= 5) &&
|
||||
|
@ -1405,7 +1405,7 @@ fs_visitor::emit_sampleid_setup()
|
|||
*/
|
||||
|
||||
/* SKL+ has an extra bit for the Starting Sample Pair Index to
|
||||
* accomodate 16x MSAA.
|
||||
* accommodate 16x MSAA.
|
||||
*/
|
||||
abld.exec_all().group(1, 0)
|
||||
.AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)),
|
||||
|
|
|
@ -227,7 +227,7 @@ namespace {
|
|||
}
|
||||
|
||||
/**
|
||||
* Substract two vectors with saturation.
|
||||
* Subtract two vectors with saturation.
|
||||
*/
|
||||
vector_type
|
||||
subs(vector_type v, vector_type w)
|
||||
|
|
|
@ -111,9 +111,9 @@ cmod_propagate_cmp_to_add(const intel_device_info *devinfo, bblock_t *block,
|
|||
*
|
||||
* For floating and unsigned types there two special cases,
|
||||
* when we can remove inst even if scan_inst is saturated: G
|
||||
* and LE. Since conditional modifiers are just comparations
|
||||
* and LE. Since conditional modifiers are just comparisons
|
||||
* against zero, saturating positive values to the upper
|
||||
* limit never changes the result of comparation.
|
||||
* limit never changes the result of comparison.
|
||||
*
|
||||
* For negative values:
|
||||
* (sat(x) > 0) == (x > 0) --- false
|
||||
|
|
|
@ -645,7 +645,7 @@ emit_find_msb_using_lzd(const fs_builder &bld,
|
|||
* For all negative number cases, including 0x80000000 and
|
||||
* 0xffffffff, the correct value is obtained from LZD if instead of
|
||||
* negating the (already negative) value the logical-not is used. A
|
||||
* conditonal logical-not can be achieved in two instructions.
|
||||
* conditional logical-not can be achieved in two instructions.
|
||||
*/
|
||||
temp = bld.vgrf(BRW_REGISTER_TYPE_D);
|
||||
|
||||
|
@ -933,7 +933,7 @@ fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr,
|
|||
}
|
||||
|
||||
/**
|
||||
* Deteremine whether sources of a nir_op_fmul can be fused with a nir_op_fsign
|
||||
* Determine whether sources of a nir_op_fmul can be fused with a nir_op_fsign
|
||||
*
|
||||
* Checks the operands of a \c nir_op_fmul to determine whether or not
|
||||
* \c emit_fsign could fuse the multiplication with the \c sign() calculation.
|
||||
|
@ -1404,7 +1404,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
|
|||
* turn that into a predicate. This leads us to an XOR.l instruction.
|
||||
*
|
||||
* Technically, according to the PRM, you're not allowed to use .l on a
|
||||
* XOR instruction. However, emperical experiments and Curro's reading
|
||||
* XOR instruction. However, empirical experiments and Curro's reading
|
||||
* of the simulator source both indicate that it's safe.
|
||||
*/
|
||||
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D);
|
||||
|
@ -3696,7 +3696,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
|
|||
/* Make a loop that sends a message to the pixel interpolater
|
||||
* for the sample number in each live channel. If there are
|
||||
* multiple channels with the same sample number then these
|
||||
* will be handled simultaneously with a single interation of
|
||||
* will be handled simultaneously with a single iteration of
|
||||
* the loop.
|
||||
*/
|
||||
bld.emit(BRW_OPCODE_DO);
|
||||
|
@ -6667,7 +6667,7 @@ setup_imm_df(const fs_builder &bld, double v)
|
|||
if (devinfo->ver >= 8)
|
||||
return brw_imm_df(v);
|
||||
|
||||
/* gfx7.5 does not support DF immediates straighforward but the DIM
|
||||
/* gfx7.5 does not support DF immediates straightforward but the DIM
|
||||
* instruction allows to set the 64-bit immediate value.
|
||||
*/
|
||||
if (devinfo->platform == INTEL_PLATFORM_HSW) {
|
||||
|
|
|
@ -560,7 +560,7 @@ fs_reg_alloc::setup_inst_interference(const fs_inst *inst)
|
|||
*
|
||||
* We are avoiding using grf127 as part of the destination of send
|
||||
* messages adding a node interference to the grf127_send_hack_node.
|
||||
* This node has a fixed asignment to grf127.
|
||||
* This node has a fixed assignment to grf127.
|
||||
*
|
||||
* We don't apply it to SIMD16 instructions because previous code avoids
|
||||
* any register overlap between sources and destination.
|
||||
|
@ -570,7 +570,7 @@ fs_reg_alloc::setup_inst_interference(const fs_inst *inst)
|
|||
ra_add_node_interference(g, first_vgrf_node + inst->dst.nr,
|
||||
grf127_send_hack_node);
|
||||
|
||||
/* Spilling instruction are genereated as SEND messages from MRF but as
|
||||
/* Spilling instruction are generated as SEND messages from MRF but as
|
||||
* Gfx7+ supports sending from GRF the driver will maps assingn these
|
||||
* MRF registers to a GRF. Implementations reuses the dest of the send
|
||||
* message as source. So as we will have an overlap for sure, we create
|
||||
|
@ -1141,7 +1141,7 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||
spill_max_size(fs));
|
||||
|
||||
/* Spills should only write data initialized by the instruction for
|
||||
* whichever channels are enabled in the excution mask. If that's
|
||||
* whichever channels are enabled in the execution mask. If that's
|
||||
* not possible we'll have to emit a matching unspill before the
|
||||
* instruction and set force_writemask_all on the spill.
|
||||
*/
|
||||
|
|
|
@ -94,7 +94,7 @@ add_byte_offset(backend_reg *reg, unsigned bytes)
|
|||
}
|
||||
}
|
||||
|
||||
} /* namepace detail */
|
||||
} /* namespace detail */
|
||||
|
||||
static inline src_reg
|
||||
byte_offset(src_reg reg, unsigned bytes)
|
||||
|
|
|
@ -266,7 +266,7 @@ brw_nir_lower_shader_calls(nir_shader *shader)
|
|||
* return to the caller.
|
||||
*
|
||||
* By default, our HW has the ability to handle the fact that a shader is not
|
||||
* available and will execute the next folowing shader in the tracing call.
|
||||
* available and will execute the next following shader in the tracing call.
|
||||
* For instance, a RAYGEN shader traces a ray, the tracing generates a hit,
|
||||
* but there is no ANYHIT shader available. The HW should follow up by
|
||||
* execution the CLOSESTHIT shader.
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
*
|
||||
* According to the specification, the shading rate output can be read &
|
||||
* written. A read after a write should report a different value if the
|
||||
* implemention decides on different primitive shading rate for some reason.
|
||||
* implementation decides on different primitive shading rate for some reason.
|
||||
* This is never the case in our implementation.
|
||||
*/
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ get_mul_for_src(nir_alu_src *src, unsigned num_components,
|
|||
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
|
||||
/* We want to bail if any of the other ALU operations involved is labled
|
||||
/* We want to bail if any of the other ALU operations involved is labeled
|
||||
* exact. One reason for this is that, while the value that is changing is
|
||||
* actually the result of the add and not the multiply, the intention of
|
||||
* the user when they specify an exact multiply is that they want *that*
|
||||
|
|
|
@ -2329,7 +2329,7 @@ vec4_visitor::apply_logical_swizzle(struct brw_reg *hw_reg,
|
|||
* second half of a register and needs a vertical stride of 0 so we:
|
||||
*
|
||||
* 1. Don't violate register region restrictions.
|
||||
* 2. Activate the gfx7 instruction decompresion bug exploit when
|
||||
* 2. Activate the gfx7 instruction decompression bug exploit when
|
||||
* execsize > 4
|
||||
*/
|
||||
if (hw_reg->subnr % REG_SIZE == 16) {
|
||||
|
@ -2461,7 +2461,7 @@ vec4_visitor::run()
|
|||
|
||||
OPT(lower_64bit_mad_to_mul_add);
|
||||
|
||||
/* Run this before payload setup because tesselation shaders
|
||||
/* Run this before payload setup because tessellation shaders
|
||||
* rely on it to prevent cross dvec2 regioning on DF attributes
|
||||
* that are setup so that XY are on the second half of register and
|
||||
* ZW are in the first half of the next.
|
||||
|
|
|
@ -389,7 +389,7 @@ try_copy_propagate(const struct intel_device_info *devinfo,
|
|||
if (inst->is_send_from_grf())
|
||||
return false;
|
||||
|
||||
/* we can't generally copy-propagate UD negations becuse we
|
||||
/* we can't generally copy-propagate UD negations because we
|
||||
* end up accessing the resulting values as signed integers
|
||||
* instead. See also resolve_ud_negate().
|
||||
*/
|
||||
|
|
|
@ -1992,7 +1992,7 @@ generate_code(struct brw_codegen *p,
|
|||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
|
||||
/* When converting from DF->F, we set destination's stride as 2 as an
|
||||
* aligment requirement. But in IVB/BYT, each DF implicitly writes
|
||||
* alignment requirement. But in IVB/BYT, each DF implicitly writes
|
||||
* two floats, being the first one the converted value. So we don't
|
||||
* need to explicitly set stride 2, but 1.
|
||||
*/
|
||||
|
|
|
@ -285,7 +285,7 @@ setup_imm_df(const vec4_builder &bld, double v)
|
|||
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||
assert(devinfo->ver == 7);
|
||||
|
||||
/* gfx7.5 does not support DF immediates straighforward but the DIM
|
||||
/* gfx7.5 does not support DF immediates straightforward but the DIM
|
||||
* instruction allows to set the 64-bit immediate value.
|
||||
*/
|
||||
if (devinfo->verx10 == 75) {
|
||||
|
@ -851,7 +851,7 @@ emit_find_msb_using_lzd(const vec4_builder &bld,
|
|||
* For all negative number cases, including 0x80000000 and
|
||||
* 0xffffffff, the correct value is obtained from LZD if instead of
|
||||
* negating the (already negative) value the logical-not is used. A
|
||||
* conditonal logical-not can be achieved in two instructions.
|
||||
* conditional logical-not can be achieved in two instructions.
|
||||
*/
|
||||
temp = src_reg(bld.vgrf(BRW_REGISTER_TYPE_D));
|
||||
|
||||
|
@ -1302,7 +1302,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
|||
* turn that into a predicate. This leads us to an XOR.l instruction.
|
||||
*
|
||||
* Technically, according to the PRM, you're not allowed to use .l on a
|
||||
* XOR instruction. However, emperical experiments and Curro's reading
|
||||
* XOR instruction. However, empirical experiments and Curro's reading
|
||||
* of the simulator source both indicate that it's safe.
|
||||
*/
|
||||
src_reg tmp = src_reg(this, glsl_type::ivec4_type);
|
||||
|
|
|
@ -111,7 +111,7 @@ gfx6_gs_visitor::emit_prolog()
|
|||
|
||||
/* PrimitveID is delivered in r0.1 of the thread payload. If the program
|
||||
* needs it we have to move it to a separate register where we can map
|
||||
* the atttribute.
|
||||
* the attribute.
|
||||
*
|
||||
* Notice that we cannot use a virtual register for this, because we need to
|
||||
* map all input attributes to hardware registers in setup_payload(),
|
||||
|
@ -155,7 +155,7 @@ gfx6_gs_visitor::gs_emit_vertex(int stream_id)
|
|||
* each will generate a scratch write with the same offset into
|
||||
* scratch space (thus, each one overwriting the previous). This is
|
||||
* not what we want. What we will do instead is emit PSIZ to a
|
||||
* a regular temporary register, then move that resgister into the
|
||||
* a regular temporary register, then move that register into the
|
||||
* array. This way we only have one instruction with an array
|
||||
* destination and we only produce a single scratch write.
|
||||
*/
|
||||
|
@ -329,7 +329,7 @@ gfx6_gs_visitor::emit_thread_end()
|
|||
}
|
||||
|
||||
/* Here we have to:
|
||||
* 1) Emit an FF_SYNC messsage to obtain an initial VUE handle.
|
||||
* 1) Emit an FF_SYNC message to obtain an initial VUE handle.
|
||||
* 2) Loop over all buffered vertex data and write it to corresponding
|
||||
* URB entries.
|
||||
* 3) Allocate new VUE handles for all vertices other than the first.
|
||||
|
|
|
@ -1190,7 +1190,7 @@ update_l3_banks(struct intel_device_info *devinfo)
|
|||
|
||||
/* At some point in time, some people decided to redefine what topology means,
|
||||
* from useful HW related information (slice, subslice, etc...), to much less
|
||||
* useful generic stuff that noone cares about (a single slice with lots of
|
||||
* useful generic stuff that no one cares about (a single slice with lots of
|
||||
* subslices). Of course all of this was done without asking the people who
|
||||
* defined the topology query in the first place, to solve a lack of
|
||||
* information Gfx10+. This function is here to workaround the fact it's not
|
||||
|
|
|
@ -355,7 +355,7 @@ struct intel_device_info
|
|||
* SKL (or scale factor of 83.33333333) and a frequency of 19200000Hz for
|
||||
* BXT.
|
||||
*
|
||||
* For simplicty to fit with the current code scaling by a single constant
|
||||
* For simplicity to fit with the current code scaling by a single constant
|
||||
* to map from raw timestamps to nanoseconds we now do the conversion in
|
||||
* floating point instead of integer arithmetic.
|
||||
*
|
||||
|
|
|
@ -5517,7 +5517,7 @@
|
|||
<field name="Max Inter MB Bit Count Check Enable" start="66" end="66" type="bool"/>
|
||||
<field name="Max Intra MB Bit Count Check Enable" start="67" end="67" type="bool"/>
|
||||
<field name="Intermediate Bit Buffer Overrun Enable" start="68" end="68" type="bool"/>
|
||||
<field name="Final Bistream Buffer Overrun Enable" start="69" end="69" type="bool"/>
|
||||
<field name="Final Bitstream Buffer Overrun Enable" start="69" end="69" type="bool"/>
|
||||
<field name="QIndex Clamp High for Underflow" start="70" end="70" type="bool"/>
|
||||
<field name="QIndex Clamp High for Overflow" start="71" end="71" type="bool"/>
|
||||
<field name="Max Inter MB Bit Count" start="96" end="107" type="uint"/>
|
||||
|
|
|
@ -5692,7 +5692,7 @@
|
|||
<field name="Max Inter MB Bit Count Check Enable" start="66" end="66" type="bool"/>
|
||||
<field name="Max Intra MB Bit Count Check Enable" start="67" end="67" type="bool"/>
|
||||
<field name="Intermediate Bit Buffer Overrun Enable" start="68" end="68" type="bool"/>
|
||||
<field name="Final Bistream Buffer Overrun Enable" start="69" end="69" type="bool"/>
|
||||
<field name="Final Bitstream Buffer Overrun Enable" start="69" end="69" type="bool"/>
|
||||
<field name="QIndex Clamp High for Underflow" start="70" end="70" type="bool"/>
|
||||
<field name="QIndex Clamp High for Overflow" start="71" end="71" type="bool"/>
|
||||
<field name="Max Inter MB Bit Count" start="96" end="107" type="uint"/>
|
||||
|
|
|
@ -6023,7 +6023,7 @@
|
|||
<field name="Max Inter MB Bit Count Check Enable" start="66" end="66" type="bool"/>
|
||||
<field name="Max Intra MB Bit Count Check Enable" start="67" end="67" type="bool"/>
|
||||
<field name="Intermediate Bit Buffer Overrun Enable" start="68" end="68" type="bool"/>
|
||||
<field name="Final Bistream Buffer Overrun Enable" start="69" end="69" type="bool"/>
|
||||
<field name="Final Bitstream Buffer Overrun Enable" start="69" end="69" type="bool"/>
|
||||
<field name="QIndex Clamp High for Underflow" start="70" end="70" type="bool"/>
|
||||
<field name="QIndex Clamp High for Overflow" start="71" end="71" type="bool"/>
|
||||
<field name="Max Inter MB Bit Count" start="96" end="107" type="uint"/>
|
||||
|
|
|
@ -5156,7 +5156,7 @@
|
|||
<field name="Max Inter MB Bit Count Check Enable" start="66" end="66" type="bool"/>
|
||||
<field name="Max Intra MB Bit Count Check Enable" start="67" end="67" type="bool"/>
|
||||
<field name="Intermediate Bit Buffer Overrun Enable" start="68" end="68" type="bool"/>
|
||||
<field name="Final Bistream Buffer Overrun Enable" start="69" end="69" type="bool"/>
|
||||
<field name="Final Bitstream Buffer Overrun Enable" start="69" end="69" type="bool"/>
|
||||
<field name="QIndex Clamp High for Underflow" start="70" end="70" type="bool"/>
|
||||
<field name="QIndex Clamp High for Overflow" start="71" end="71" type="bool"/>
|
||||
<field name="Max Inter MB Bit Count" start="96" end="107" type="uint"/>
|
||||
|
|
|
@ -116,7 +116,7 @@ def process(filename):
|
|||
enum_dict[e.attrib['name']] = e
|
||||
|
||||
# Structs are a bit annoying because they can refer to each other. We sort
|
||||
# them alphabetically and then build a graph of depedencies. Finally we go
|
||||
# them alphabetically and then build a graph of dependencies. Finally we go
|
||||
# through the alphabetically sorted list and print out dependencies first.
|
||||
structs = sorted(xml.findall('./struct'), key=get_name)
|
||||
wrapped_struct_dict = {}
|
||||
|
|
|
@ -102,7 +102,7 @@ Errata
|
|||
ISL acquired the term 'surface element' from the Broadwell PRM [1], which
|
||||
defines it as follows:
|
||||
|
||||
An element is defined as a pixel in uncompresed surface formats, and as
|
||||
An element is defined as a pixel in uncompressed surface formats, and as
|
||||
a compression block in compressed surface formats. For MSFMT_DEPTH_STENCIL
|
||||
type multisampled surfaces, an element is a sample.
|
||||
|
||||
|
|
|
@ -165,7 +165,7 @@ class Channel(object):
|
|||
|
||||
|
||||
class Format(object):
|
||||
"""Class taht contains all values needed by the template."""
|
||||
"""Class that contains all values needed by the template."""
|
||||
def __init__(self, line):
|
||||
# pylint: disable=invalid-name
|
||||
self.name = line[0].strip()
|
||||
|
|
|
@ -680,7 +680,7 @@ isl_surf_choose_tiling(const struct isl_device *dev,
|
|||
|
||||
#undef CHOOSE
|
||||
|
||||
/* No tiling mode accomodates the inputs. */
|
||||
/* No tiling mode accommodates the inputs. */
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -877,7 +877,7 @@ isl_choose_image_alignment_el(const struct isl_device *dev,
|
|||
*image_align_el = isl_extent3d(1, 1, 1);
|
||||
} else if (ISL_GFX_VER(dev) < 12) {
|
||||
/* On gfx7+, HiZ surfaces are always aligned to 16x8 pixels in the
|
||||
* primary surface which works out to 2x2 HiZ elments.
|
||||
* primary surface which works out to 2x2 HiZ elements.
|
||||
*/
|
||||
*image_align_el = isl_extent3d(2, 2, 1);
|
||||
} else {
|
||||
|
@ -1268,11 +1268,11 @@ isl_calc_phys_slice0_extent_sa_gfx4_2d(
|
|||
* alignment here is safe because we later align the row pitch and array
|
||||
* pitch to the tile boundary. It is safe even for
|
||||
* ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
|
||||
* to accomodate the interleaved samples.
|
||||
* to accommodate the interleaved samples.
|
||||
*
|
||||
* For linear surfaces, reducing the alignment here permits us to later
|
||||
* choose an arbitrary, non-aligned row pitch. If the surface backs
|
||||
* a VkBuffer, then an arbitrary pitch may be needed to accomodate
|
||||
* a VkBuffer, then an arbitrary pitch may be needed to accommodate
|
||||
* VkBufferImageCopy::bufferRowLength.
|
||||
*/
|
||||
*phys_slice0_sa = (struct isl_extent2d) {
|
||||
|
@ -2062,7 +2062,7 @@ isl_surf_get_hiz_surf(const struct isl_device *dev,
|
|||
* from Sandy Bridge through Broadwell, HiZ compresses samples in the
|
||||
* primary depth surface. On Sky Lake and onward, HiZ compresses pixels.
|
||||
*
|
||||
* There are a number of different ways that this discrepency could be
|
||||
* There are a number of different ways that this discrepancy could be
|
||||
* handled. The way we have chosen is to simply make MSAA HiZ have the
|
||||
* same number of samples as the parent surface pre-Sky Lake and always be
|
||||
* single-sampled on Sky Lake and above. Since the block sizes of
|
||||
|
|
|
@ -1161,7 +1161,7 @@ typedef uint32_t isl_sample_count_mask_t;
|
|||
*/
|
||||
enum isl_msaa_layout {
|
||||
/**
|
||||
* @brief Suface is single-sampled.
|
||||
* @brief Surface is single-sampled.
|
||||
*/
|
||||
ISL_MSAA_LAYOUT_NONE,
|
||||
|
||||
|
@ -1389,7 +1389,7 @@ struct isl_tile_info {
|
|||
/**
|
||||
* The physical size of the tile in bytes and rows of bytes
|
||||
*
|
||||
* This field determines how the tiles of a surface are physically layed
|
||||
* This field determines how the tiles of a surface are physically laid
|
||||
* out in memory. The logical and physical tile extent are frequently the
|
||||
* same but this is not always the case. For instance, a W-tile (which is
|
||||
* always used with ISL_FORMAT_R8) has a logical size of 64el x 64el but
|
||||
|
@ -1642,7 +1642,7 @@ struct isl_surf_fill_state_info {
|
|||
uint32_t mocs;
|
||||
|
||||
/**
|
||||
* The auxilary surface or NULL if no auxilary surface is to be used.
|
||||
* The auxiliary surface or NULL if no auxiliary surface is to be used.
|
||||
*/
|
||||
const struct isl_surf *aux_surf;
|
||||
enum isl_aux_usage aux_usage;
|
||||
|
|
|
@ -217,7 +217,7 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device *dev, void *batch,
|
|||
* to match the depth-buffer value for `Depth`. It may be a
|
||||
* documentation bug, since the other fields don't require this.
|
||||
*
|
||||
* TODO: Confirm documentation and remove seeting of `Depth` if not
|
||||
* TODO: Confirm documentation and remove setting of `Depth` if not
|
||||
* required.
|
||||
*/
|
||||
sb.Depth = db.Depth;
|
||||
|
@ -274,7 +274,7 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device *dev, void *batch,
|
|||
* value of RENDER_SURFACE_STATE::AuxiliarySurfaceMode say:
|
||||
*
|
||||
* "If Number of multisamples > 1, programming this value means MSAA
|
||||
* compression is enabled for that surface. Auxillary surface is MSC
|
||||
* compression is enabled for that surface. Auxiliary surface is MSC
|
||||
* with tile y."
|
||||
*
|
||||
* Since this interpretation ignores whether the surface is
|
||||
|
|
|
@ -113,7 +113,7 @@ isl_gfx7_choose_msaa_layout(const struct isl_device *dev,
|
|||
return false;
|
||||
|
||||
/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled
|
||||
* Suface Storage Format:
|
||||
* Surface Storage Format:
|
||||
*
|
||||
* +---------------------+----------------------------------------------------------------+
|
||||
* | MSFMT_MSS | Multsampled surface was/is rendered as a render target |
|
||||
|
@ -128,7 +128,7 @@ isl_gfx7_choose_msaa_layout(const struct isl_device *dev,
|
|||
require_interleaved = true;
|
||||
|
||||
/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled
|
||||
* Suface Storage Format:
|
||||
* Surface Storage Format:
|
||||
*
|
||||
* If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, Width
|
||||
* is >= 8192 (meaning the actual surface width is >= 8193 pixels), this
|
||||
|
@ -138,7 +138,7 @@ isl_gfx7_choose_msaa_layout(const struct isl_device *dev,
|
|||
require_array = true;
|
||||
|
||||
/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled
|
||||
* Suface Storage Format:
|
||||
* Surface Storage Format:
|
||||
*
|
||||
* If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8,
|
||||
* ((Depth+1) * (Height+1)) is > 4,194,304, OR if the surface’s Number
|
||||
|
@ -150,7 +150,7 @@ isl_gfx7_choose_msaa_layout(const struct isl_device *dev,
|
|||
require_interleaved = true;
|
||||
|
||||
/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled
|
||||
* Suface Storage Format:
|
||||
* Surface Storage Format:
|
||||
*
|
||||
* This field must be set to MSFMT_DEPTH_STENCIL if Surface Format is
|
||||
* one of the following: I24X8_UNORM, L24X8_UNORM, A24X8_UNORM, or
|
||||
|
|
|
@ -132,7 +132,7 @@ isl_gfx9_choose_image_alignment_el(const struct isl_device *dev,
|
|||
*
|
||||
* - For Sampling Engine and Render Target Surfaces: This field
|
||||
* specifies the vertical alignment requirement in elements for the
|
||||
* surface. [...] An element is defined as a pixel in uncompresed
|
||||
* surface. [...] An element is defined as a pixel in uncompressed
|
||||
* surface formats, and as a compression block in compressed surface
|
||||
* formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
|
||||
* element is a sample.
|
||||
|
@ -144,7 +144,7 @@ isl_gfx9_choose_image_alignment_el(const struct isl_device *dev,
|
|||
*
|
||||
* See the appropriate Alignment table in the "Surface Layout and
|
||||
* Tiling" section under Common Surface Formats for the table of
|
||||
* alignment values for Tiled Resrouces.
|
||||
* alignment values for Tiled Resources.
|
||||
*
|
||||
* - For uncompressed surfaces, the units of "j" are rows of pixels on
|
||||
* the physical surface. For compressed texture formats, the units of
|
||||
|
|
|
@ -575,7 +575,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
|
|||
* say:
|
||||
*
|
||||
* "If Number of multisamples > 1, programming this value means
|
||||
* MSAA compression is enabled for that surface. Auxillary surface
|
||||
* MSAA compression is enabled for that surface. Auxiliary surface
|
||||
* is MSC with tile y."
|
||||
*
|
||||
* Since this interpretation ignores whether the surface is
|
||||
|
@ -656,7 +656,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
|
|||
#endif
|
||||
}
|
||||
|
||||
/* The auxiliary buffer info is filled when it's useable by the HW.
|
||||
/* The auxiliary buffer info is filled when it's usable by the HW.
|
||||
*
|
||||
* Starting with Gfx12, the only form of compression that can be used
|
||||
* with RENDER_SURFACE_STATE which requires an aux surface is MCS.
|
||||
|
@ -710,7 +710,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
|
|||
/* From the SKL PRM, Programming Note under Sampler Output Channel
|
||||
* Mapping:
|
||||
*
|
||||
* If a surface has an associated HiZ Auxilliary surface, the
|
||||
* If a surface has an associated HiZ Auxiliary surface, the
|
||||
* Sampler L2 Bypass Mode Disable field in the RENDER_SURFACE_STATE
|
||||
* must be set.
|
||||
*/
|
||||
|
@ -796,7 +796,7 @@ isl_genX(buffer_fill_state_s)(const struct isl_device *dev, void *state,
|
|||
uint64_t buffer_size = info->size_B;
|
||||
|
||||
/* Uniform and Storage buffers need to have surface size not less that the
|
||||
* aligned 32-bit size of the buffer. To calculate the array lenght on
|
||||
* aligned 32-bit size of the buffer. To calculate the array length on
|
||||
* unsized arrays in StorageBuffer the last 2 bits store the padding size
|
||||
* added to the surface, so we can calculate latter the original buffer
|
||||
* size to know the number of elements.
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
|
||||
#define BDW_GT2_DEVID 0x161a
|
||||
|
||||
// An asssert that works regardless of NDEBUG.
|
||||
// An assert that works regardless of NDEBUG.
|
||||
#define t_assert(cond) \
|
||||
do { \
|
||||
if (!(cond)) { \
|
||||
|
|
|
@ -1204,7 +1204,7 @@ intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result
|
|||
start + field->location,
|
||||
end + field->location);
|
||||
/* no_oa_accumulate=true is used when doing GL perf queries, we
|
||||
* manually parse the OA reports from the OA buffer and substract
|
||||
* manually parse the OA reports from the OA buffer and subtract
|
||||
* unrelated deltas, so don't accumulate the begin/end reports here.
|
||||
*/
|
||||
if (!no_oa_accumulate) {
|
||||
|
|
|
@ -7705,7 +7705,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss2)"
|
||||
symbol_name="NonSamplerShader02AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader02_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -7720,7 +7720,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader01AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader01_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -7735,7 +7735,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader00AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader00_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -7750,7 +7750,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss2)"
|
||||
symbol_name="NonSamplerShader12AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader12_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -7765,7 +7765,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader11AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader11_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -7780,7 +7780,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader10AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader10_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
|
|
@ -5422,7 +5422,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader00AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader00_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5437,7 +5437,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader01AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader01_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
|
|
@ -5412,7 +5412,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader00AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader00_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5427,7 +5427,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader01AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader01_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5442,7 +5442,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss2)"
|
||||
symbol_name="NonSamplerShader02AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader02_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
|
|
@ -5425,7 +5425,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader00AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader00_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5440,7 +5440,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader01AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader01_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5455,7 +5455,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss2)"
|
||||
symbol_name="NonSamplerShader02AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader02_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
|
|
@ -2641,7 +2641,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss2)"
|
||||
symbol_name="NonSamplerShader02AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader02_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -2656,7 +2656,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader01AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader01_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -2671,7 +2671,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader00AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader00_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -2686,7 +2686,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss2)"
|
||||
symbol_name="NonSamplerShader12AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader12_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -2701,7 +2701,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader11AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader11_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -2716,7 +2716,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader10AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader10_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
|
|
@ -5419,7 +5419,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader00AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader00_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5434,7 +5434,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader01AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader01_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
|
|
@ -5412,7 +5412,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader00AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader00_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5427,7 +5427,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader01AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader01_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5442,7 +5442,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss2)"
|
||||
symbol_name="NonSamplerShader02AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader02_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
|
|
@ -5425,7 +5425,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader00AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader00_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5440,7 +5440,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader01AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader01_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5455,7 +5455,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss2)"
|
||||
symbol_name="NonSamplerShader02AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader02_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
|
|
@ -5936,7 +5936,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader00AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader00_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5951,7 +5951,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader01AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader01_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5966,7 +5966,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss2)"
|
||||
symbol_name="NonSamplerShader02AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader02_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -11480,7 +11480,7 @@
|
|||
<counter name="TE Bottleneck"
|
||||
symbol_name="TeBottleneck"
|
||||
underscore_name="te_bottleneck"
|
||||
description="The percentage of time in which tesselation pipeline stage was slowing down the 3D pipeline."
|
||||
description="The percentage of time in which tessellation pipeline stage was slowing down the 3D pipeline."
|
||||
low_watermark="5"
|
||||
high_watermark="15"
|
||||
data_type="float"
|
||||
|
|
|
@ -5425,7 +5425,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader00AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader00_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5440,7 +5440,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader01AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader01_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5455,7 +5455,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss2)"
|
||||
symbol_name="NonSamplerShader02AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader02_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
|
|
@ -5419,7 +5419,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss0)"
|
||||
symbol_name="NonSamplerShader00AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader00_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss0)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5434,7 +5434,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss1)"
|
||||
symbol_name="NonSamplerShader01AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader01_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss1)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
@ -5449,7 +5449,7 @@
|
|||
<counter name="HDC stalled by L3 (s0.ss2)"
|
||||
symbol_name="NonSamplerShader02AccessStalledOnL3"
|
||||
underscore_name="non_sampler_shader02_access_stalled_on_l3"
|
||||
description="Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
description="Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (s0.ss2)"
|
||||
data_type="float"
|
||||
max_equation="100"
|
||||
units="percent"
|
||||
|
|
|
@ -71,7 +71,7 @@ struct bo {
|
|||
uint32_t size;
|
||||
uint64_t offset;
|
||||
void *map;
|
||||
/* Whether the buffer has been positionned in the GTT already. */
|
||||
/* Whether the buffer has been positioned in the GTT already. */
|
||||
bool gtt_mapped : 1;
|
||||
/* Tracks userspace mmapping of the buffer */
|
||||
bool user_mapped : 1;
|
||||
|
|
|
@ -139,7 +139,7 @@ del_drm_fd(int fd)
|
|||
}
|
||||
}
|
||||
|
||||
/* Our goal is not to have noise good enough for cryto,
|
||||
/* Our goal is not to have noise good enough for crypto,
|
||||
* but instead values that are unique-ish enough that
|
||||
* it is incredibly unlikely that a buffer overwrite
|
||||
* will produce the exact same values.
|
||||
|
|
|
@ -79,7 +79,7 @@
|
|||
* our allocation fast-path, there isn't really a way to munmap the old mmap,
|
||||
* so we just keep it around until garbage collection time. While the block
|
||||
* allocator is lockless for normal operations, we block other threads trying
|
||||
* to allocate while we're growing the map. It sholdn't happen often, and
|
||||
* to allocate while we're growing the map. It shouldn't happen often, and
|
||||
* growing is fast anyway.
|
||||
*
|
||||
* At the next level we can use various sub-allocators. The state pool is a
|
||||
|
@ -998,7 +998,7 @@ anv_state_pool_return_chunk(struct anv_state_pool *pool,
|
|||
|
||||
if (nblocks > 0) {
|
||||
/* First return divisor aligned and sized chunks. We start returning
|
||||
* larger blocks from the end fo the chunk, since they should already be
|
||||
* larger blocks from the end of the chunk, since they should already be
|
||||
* aligned to divisor. Also anv_state_pool_return_blocks() only accepts
|
||||
* aligned chunks.
|
||||
*/
|
||||
|
@ -1102,7 +1102,7 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
|
|||
alloc_size,
|
||||
pool->block_size,
|
||||
&padding);
|
||||
/* Everytime we allocate a new state, add it to the state pool */
|
||||
/* Every time we allocate a new state, add it to the state pool */
|
||||
uint32_t idx;
|
||||
UNUSED VkResult result = anv_state_table_add(&pool->table, &idx, 1);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
|
|
@ -442,14 +442,14 @@ anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
|
|||
|
||||
static VkResult
|
||||
anv_batch_bo_grow(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo,
|
||||
struct anv_batch *batch, size_t aditional,
|
||||
struct anv_batch *batch, size_t additional,
|
||||
size_t batch_padding)
|
||||
{
|
||||
assert(batch->start == bbo->bo->map);
|
||||
bbo->length = batch->next - batch->start;
|
||||
|
||||
size_t new_size = bbo->bo->size;
|
||||
while (new_size <= bbo->length + aditional + batch_padding)
|
||||
while (new_size <= bbo->length + additional + batch_padding)
|
||||
new_size *= 2;
|
||||
|
||||
if (new_size == bbo->bo->size)
|
||||
|
@ -571,7 +571,7 @@ static void
|
|||
emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_bo *bo, uint32_t offset)
|
||||
{
|
||||
/* In gfx8+ the address field grew to two dwords to accomodate 48 bit
|
||||
/* In gfx8+ the address field grew to two dwords to accommodate 48 bit
|
||||
* offsets. The high 16 bits are in the last dword, so we can use the gfx8
|
||||
* version in either case, as long as we set the instruction length in the
|
||||
* header accordingly. This means that we always emit three dwords here
|
||||
|
@ -1689,7 +1689,7 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
|
|||
/* Since we aren't in the softpin case, all of our STATE_BASE_ADDRESS BOs
|
||||
* will get added automatically by processing relocations on the batch
|
||||
* buffer. We have to add the surface state BO manually because it has
|
||||
* relocations of its own that we need to be sure are processsed.
|
||||
* relocations of its own that we need to be sure are processed.
|
||||
*/
|
||||
result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
|
||||
ss_pool->block_pool.bo,
|
||||
|
@ -2060,7 +2060,7 @@ anv_queue_exec_utrace_locked(struct anv_queue *queue,
|
|||
* with our list of BOs out of sync with our list of gem handles.
|
||||
*
|
||||
* 2) The algorithm we use for building the list of unique buffers isn't
|
||||
* thread-safe. While the client is supposed to syncronize around
|
||||
* thread-safe. While the client is supposed to synchronize around
|
||||
* QueueSubmit, this would be extremely difficult to debug if it ever came
|
||||
* up in the wild due to a broken app. It's better to play it safe and
|
||||
* just lock around QueueSubmit.
|
||||
|
|
|
@ -436,7 +436,7 @@ VkResult anv_CreateDescriptorSetLayout(
|
|||
immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
|
||||
}
|
||||
|
||||
/* We need to allocate decriptor set layouts off the device allocator
|
||||
/* We need to allocate descriptor set layouts off the device allocator
|
||||
* with DEVICE scope because they are reference counted and may not be
|
||||
* destroyed when vkDestroyDescriptorSetLayout is called.
|
||||
*/
|
||||
|
@ -846,11 +846,11 @@ void anv_DestroyPipelineLayout(
|
|||
* view surface state. The spec allows us to fail to allocate due to
|
||||
* fragmentation in all cases but two: 1) after pool reset, allocating up
|
||||
* until the pool size with no freeing must succeed and 2) allocating and
|
||||
* freeing only descriptor sets with the same layout. Case 1) is easy enogh,
|
||||
* freeing only descriptor sets with the same layout. Case 1) is easy enough,
|
||||
* and the free lists lets us recycle blocks for case 2).
|
||||
*/
|
||||
|
||||
/* The vma heap reserves 0 to mean NULL; we have to offset by some ammount to
|
||||
/* The vma heap reserves 0 to mean NULL; we have to offset by some amount to
|
||||
* ensure we can allocate the entire BO without hitting zero. The actual
|
||||
* amount doesn't matter.
|
||||
*/
|
||||
|
|
|
@ -4662,7 +4662,7 @@ VkResult anv_GetCalibratedTimestampsEXT(
|
|||
* clock edges is when the sampled clock with the largest period is
|
||||
* sampled at the end of that period but right at the beginning of the
|
||||
* sampling interval and some other clock is sampled right at the
|
||||
* begining of its sampling period and right at the end of the
|
||||
* beginning of its sampling period and right at the end of the
|
||||
* sampling interval. Let's assume the GPU has the longest clock
|
||||
* period and that the application is sampling GPU and monotonic:
|
||||
*
|
||||
|
|
|
@ -326,7 +326,7 @@ add_surface(struct anv_device *device,
|
|||
*
|
||||
* If hardware limitations force us to use a shadow surface, then the same
|
||||
* limitations may also constrain the tiling of the primary surface; therefore
|
||||
* paramater @a inout_primary_tiling_flags.
|
||||
* parameter @a inout_primary_tiling_flags.
|
||||
*
|
||||
* If the image plane is a separate stencil plane and if the user provided
|
||||
* VkImageStencilUsageCreateInfoEXT, then @a usage must be stencilUsage.
|
||||
|
@ -1804,7 +1804,7 @@ VkResult anv_BindImageMemory2(
|
|||
}
|
||||
case VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR: {
|
||||
/* Ignore this struct on Android, we cannot access swapchain
|
||||
* structures threre.
|
||||
* structures there.
|
||||
*/
|
||||
#ifndef VK_USE_PLATFORM_ANDROID_KHR
|
||||
const VkBindImageMemorySwapchainInfoKHR *swapchain_info =
|
||||
|
|
|
@ -476,7 +476,7 @@ struct anv_bo {
|
|||
uint32_t refcount;
|
||||
|
||||
/* Index into the current validation list. This is used by the
|
||||
* validation list building alrogithm to track which buffers are already
|
||||
* validation list building algorithm to track which buffers are already
|
||||
* in the validation list so that we can ensure uniqueness.
|
||||
*/
|
||||
uint32_t exec_obj_index;
|
||||
|
@ -1824,7 +1824,7 @@ struct anv_descriptor_set_binding_layout {
|
|||
*/
|
||||
uint32_t array_size;
|
||||
|
||||
/* Index into the flattend descriptor set */
|
||||
/* Index into the flattened descriptor set */
|
||||
uint32_t descriptor_index;
|
||||
|
||||
/* Index into the dynamic state array for a dynamic buffer */
|
||||
|
@ -2971,7 +2971,7 @@ struct anv_cmd_state {
|
|||
*/
|
||||
bool hiz_enabled;
|
||||
|
||||
/* We ensure the registers for the gfx12 D16 fix are initalized at the
|
||||
/* We ensure the registers for the gfx12 D16 fix are initialized at the
|
||||
* first non-NULL depth stencil packet emission of every command buffer.
|
||||
* For secondary command buffer execution, we transfer the state from the
|
||||
* last command buffer to the primary (if known).
|
||||
|
|
|
@ -288,7 +288,7 @@ blorp_exec_on_render(struct blorp_batch *batch,
|
|||
#if GFX_VER >= 11
|
||||
/* The PIPE_CONTROL command description says:
|
||||
*
|
||||
* "Whenever a Binding Table Index (BTI) used by a Render Taget Message
|
||||
* "Whenever a Binding Table Index (BTI) used by a Render Target Message
|
||||
* points to a different RENDER_SURFACE_STATE, SW must issue a Render
|
||||
* Target Cache Flush by enabling this bit. When render target flush
|
||||
* is set due to new association of BTI, PS Scoreboard Stall bit must
|
||||
|
@ -321,7 +321,7 @@ blorp_exec_on_render(struct blorp_batch *batch,
|
|||
#if GFX_VER >= 11
|
||||
/* The PIPE_CONTROL command description says:
|
||||
*
|
||||
* "Whenever a Binding Table Index (BTI) used by a Render Taget Message
|
||||
* "Whenever a Binding Table Index (BTI) used by a Render Target Message
|
||||
* points to a different RENDER_SURFACE_STATE, SW must issue a Render
|
||||
* Target Cache Flush by enabling this bit. When render target flush
|
||||
* is set due to new association of BTI, PS Scoreboard Stall bit must
|
||||
|
|
|
@ -120,7 +120,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
|||
/* Emit a render target cache flush.
|
||||
*
|
||||
* This isn't documented anywhere in the PRM. However, it seems to be
|
||||
* necessary prior to changing the surface state base adress. Without
|
||||
* necessary prior to changing the surface state base address. Without
|
||||
* this, we get GPU hangs when using multi-level command buffers which
|
||||
* clear depth, reset state base address, and then go render stuff.
|
||||
*/
|
||||
|
@ -237,7 +237,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
|||
#endif /* GFX_VERx10 < 125 */
|
||||
|
||||
/* After re-setting the surface state base address, we have to do some
|
||||
* cache flusing so that the sampler engine will pick up the new
|
||||
* cache flushing so that the sampler engine will pick up the new
|
||||
* SURFACE_STATE objects and binding tables. From the Broadwell PRM,
|
||||
* Shared Function > 3D Sampler > State > State Caching (page 96):
|
||||
*
|
||||
|
@ -1678,7 +1678,7 @@ genX(BeginCommandBuffer)(
|
|||
}
|
||||
|
||||
/* We send an "Indirect State Pointers Disable" packet at
|
||||
* EndCommandBuffer, so all push contant packets are ignored during a
|
||||
* EndCommandBuffer, so all push constant packets are ignored during a
|
||||
* context restore. Documentation says after that command, we need to
|
||||
* emit push constants again before any rendering operation. So we
|
||||
* flag them dirty here to make sure they get emitted.
|
||||
|
@ -6926,7 +6926,7 @@ void genX(CmdBeginRendering)(
|
|||
#if GFX_VER >= 11
|
||||
/* The PIPE_CONTROL command description says:
|
||||
*
|
||||
* "Whenever a Binding Table Index (BTI) used by a Render Taget Message
|
||||
* "Whenever a Binding Table Index (BTI) used by a Render Target Message
|
||||
* points to a different RENDER_SURFACE_STATE, SW must issue a Render
|
||||
* Target Cache Flush by enabling this bit. When render target flush
|
||||
* is set due to new association of BTI, PS Scoreboard Stall bit must
|
||||
|
|
|
@ -476,7 +476,7 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
|||
continue;
|
||||
}
|
||||
|
||||
/* We have to subtract two slots to accout for the URB entry output
|
||||
/* We have to subtract two slots to account for the URB entry output
|
||||
* read offset in the VS and GS stages.
|
||||
*/
|
||||
const int source_attr = slot - 2 * urb_entry_read_offset;
|
||||
|
|
Loading…
Reference in New Issue