From 18a631eb9056857a9ced477e7e3d1a435a906be2 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 21 Oct 2015 16:07:33 -0600
Subject: [PATCH 001/266] svga: fix clip plane regression after recent
tgsi_scan change
Before the change "tgsi/scan: use properties for clip/cull distance
writemasks", the tgsi_shader_info::num_written_clipdistance field
was a multiple of four, now it's an accurate count. In the svga
driver, we need a minor change to the loop test.
Reviewed-by: Charmaine Lee
---
src/gallium/drivers/svga/svga_tgsi_vgpu10.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index d62f2bbcc96..332904f88b4 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -3097,7 +3097,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
unsigned i;
unsigned clip_plane_enable = emit->key.clip_plane_enable;
unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
- unsigned num_written_clipdist = emit->info.num_written_clipdistance;
+ int num_written_clipdist = emit->info.num_written_clipdistance;
assert(emit->clip_dist_out_index != INVALID_INDEX);
assert(emit->clip_dist_tmp_index != INVALID_INDEX);
@@ -3109,7 +3109,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
*/
emit->clip_dist_tmp_index = INVALID_INDEX;
- for (i = 0; i < 2 && num_written_clipdist; i++, num_written_clipdist-=4) {
+ for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
From 5fa7114652068735347c8715d1fc1d2cef72c433 Mon Sep 17 00:00:00 2001
From: Ben Widawsky
Date: Tue, 20 Oct 2015 14:29:37 -0700
Subject: [PATCH 002/266] i965/fs: Enumerate logical fb writes arguments
Gen9 adds the ability to write out a stencil value, so we need to expand the
virtual payload by one. Abstracting this now makes that change easier to read.
I was admittedly confused early on about some of the hardcoding. If people
believe the resulting code is inferior, I am not super attached to the patch.
v2:
Remove explicit numbering from the enumeration (Matt).
Use a real naming scheme, and reference it in the opcode definition (Curro)
Add a missed hardcoded logical position in get_lowered_simd_width (Ben)
Add an assertion to make sure the component numbering is correct (Ben)
Cc: Matt Turner
Cc: Francisco Jerez
Signed-off-by: Ben Widawsky
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_defines.h | 22 +++++++++-------
src/mesa/drivers/dri/i965/brw_fs.cpp | 27 +++++++++++---------
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 1 +
3 files changed, 29 insertions(+), 21 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index bd7d0b1c9a7..457f49c9709 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -911,15 +911,9 @@ enum opcode {
/**
* Same as FS_OPCODE_FB_WRITE but expects its arguments separately as
- * individual sources instead of as a single payload blob:
- *
- * Source 0: [required] Color 0.
- * Source 1: [optional] Color 1 (for dual source blend messages).
- * Source 2: [optional] Src0 Alpha.
- * Source 3: [optional] Source Depth (gl_FragDepth)
- * Source 4: [optional (gen4-5)] Destination Depth passthrough from thread
- * Source 5: [optional] Sample Mask (gl_SampleMask).
- * Source 6: [required] Number of color components (as a UD immediate).
+ * individual sources instead of as a single payload blob. The
+ * position/ordering of the arguments are defined by the enum
+ * fb_write_logical_srcs.
*/
FS_OPCODE_FB_WRITE_LOGICAL,
@@ -1330,6 +1324,16 @@ enum brw_urb_write_flags {
BRW_URB_WRITE_ALLOCATE | BRW_URB_WRITE_COMPLETE,
};
+enum fb_write_logical_srcs {
+ FB_WRITE_LOGICAL_SRC_COLOR0, /* REQUIRED */
+ FB_WRITE_LOGICAL_SRC_COLOR1, /* for dual source blend messages */
+ FB_WRITE_LOGICAL_SRC_SRC0_ALPHA,
+ FB_WRITE_LOGICAL_SRC_SRC_DEPTH, /* gl_FragDepth */
+ FB_WRITE_LOGICAL_SRC_DST_DEPTH, /* GEN4-5: passthrough from thread */
+ FB_WRITE_LOGICAL_SRC_OMASK, /* Sample Mask (gl_SampleMask) */
+ FB_WRITE_LOGICAL_SRC_COMPONENTS, /* REQUIRED */
+};
+
#ifdef __cplusplus
/**
* Allow brw_urb_write_flags enums to be ORed together.
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 436ee4d5f23..f2f598cb569 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -699,10 +699,10 @@ fs_inst::components_read(unsigned i) const
return 2;
case FS_OPCODE_FB_WRITE_LOGICAL:
- assert(src[6].file == IMM);
+ assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
/* First/second FB write color. */
if (i < 2)
- return src[6].fixed_hw_reg.dw1.ud;
+ return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
else
return 1;
@@ -3350,15 +3350,16 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
const brw_wm_prog_key *key,
const fs_visitor::thread_payload &payload)
{
- assert(inst->src[6].file == IMM);
+ assert(inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
const brw_device_info *devinfo = bld.shader->devinfo;
- const fs_reg &color0 = inst->src[0];
- const fs_reg &color1 = inst->src[1];
- const fs_reg &src0_alpha = inst->src[2];
- const fs_reg &src_depth = inst->src[3];
- const fs_reg &dst_depth = inst->src[4];
- fs_reg sample_mask = inst->src[5];
- const unsigned components = inst->src[6].fixed_hw_reg.dw1.ud;
+ const fs_reg &color0 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR0];
+ const fs_reg &color1 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR1];
+ const fs_reg &src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA];
+ const fs_reg &src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH];
+ const fs_reg &dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH];
+ fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
+ const unsigned components =
+ inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
/* We can potentially have a message length of up to 15, so we have to set
* base_mrf to either 0 or 1 in order to fit in m0..m15.
@@ -4186,10 +4187,12 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
/* Gen6 doesn't support SIMD16 depth writes but we cannot handle them
* here.
*/
- assert(devinfo->gen != 6 || inst->src[3].file == BAD_FILE ||
+ assert(devinfo->gen != 6 ||
+ inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH].file == BAD_FILE ||
inst->exec_size == 8);
/* Dual-source FB writes are unsupported in SIMD16 mode. */
- return (inst->src[1].file != BAD_FILE ? 8 : inst->exec_size);
+ return (inst->src[FB_WRITE_LOGICAL_SRC_COLOR1].file != BAD_FILE ?
+ 8 : inst->exec_size);
case SHADER_OPCODE_TXD_LOGICAL:
/* TXD is unsupported in SIMD16 mode. */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 7cc4f3c927a..9e2b221265c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -710,6 +710,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
color0, color1, src0_alpha, src_depth, dst_depth, sample_mask,
fs_reg(components)
};
+ assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS);
fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
sources, ARRAY_SIZE(sources));
From 1db44252d01bf7539452ccc2b5210c74b8dcd573 Mon Sep 17 00:00:00 2001
From: Ben Widawsky
Date: Tue, 20 Oct 2015 14:29:39 -0700
Subject: [PATCH 003/266] i965: Implement ARB_shader_stencil_export (gen9+)
v2: remove useless source_stencil_to_render_target (Ken)
Squash in the actual packing function, which also got to
v2:
Move the definition of the OPCODE outside of FB_WRITE opcodes (Matt)
Reorder the regioning to be in VWH order (Matt)
Don't retype src in the backend, just assert instead (Matt)
Rename the debug prints to something better (Matt)
Signed-off-by: Ben Widawsky
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_compiler.h | 1 +
src/mesa/drivers/dri/i965/brw_defines.h | 2 +
src/mesa/drivers/dri/i965/brw_fs.cpp | 14 +++++
src/mesa/drivers/dri/i965/brw_fs.h | 3 ++
.../drivers/dri/i965/brw_fs_generator.cpp | 53 +++++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 +
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 19 +++++--
src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +
src/mesa/drivers/dri/i965/gen8_ps_state.c | 5 ++
9 files changed, 98 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 742bac4e8ef..68a93a690dd 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -335,6 +335,7 @@ struct brw_wm_prog_data {
} binding_table;
uint8_t computed_depth_mode;
+ bool computed_stencil;
bool early_fragment_tests;
bool no_8;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 457f49c9709..6433cffc919 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -919,6 +919,7 @@ enum opcode {
FS_OPCODE_BLORP_FB_WRITE,
FS_OPCODE_REP_FB_WRITE,
+ FS_OPCODE_PACK_STENCIL_REF,
SHADER_OPCODE_RCP,
SHADER_OPCODE_RSQ,
SHADER_OPCODE_SQRT,
@@ -1330,6 +1331,7 @@ enum fb_write_logical_srcs {
FB_WRITE_LOGICAL_SRC_SRC0_ALPHA,
FB_WRITE_LOGICAL_SRC_SRC_DEPTH, /* gl_FragDepth */
FB_WRITE_LOGICAL_SRC_DST_DEPTH, /* GEN4-5: passthrough from thread */
+ FB_WRITE_LOGICAL_SRC_SRC_STENCIL, /* gl_FragStencilRefARB */
FB_WRITE_LOGICAL_SRC_OMASK, /* Sample Mask (gl_SampleMask) */
FB_WRITE_LOGICAL_SRC_COMPONENTS, /* REQUIRED */
};
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index f2f598cb569..1c96cf2d85b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3357,6 +3357,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
const fs_reg &src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA];
const fs_reg &src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH];
const fs_reg &dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH];
+ const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
const unsigned components =
inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
@@ -3449,6 +3450,17 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
length++;
}
+ if (src_stencil.file != BAD_FILE) {
+ assert(devinfo->gen >= 9);
+ assert(bld.dispatch_width() != 16);
+
+ sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD);
+ bld.exec_all().annotate("FB write OS")
+ .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length],
+ retype(src_stencil, BRW_REGISTER_TYPE_UB));
+ length++;
+ }
+
fs_inst *load;
if (devinfo->gen >= 7) {
/* Send from the GRF */
@@ -5223,6 +5235,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
prog_data->uses_omask =
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
prog_data->computed_depth_mode = computed_depth_mode(shader);
+ prog_data->computed_stencil =
+ shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 50e98becf03..d98769df4dc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -337,6 +337,7 @@ public:
int *push_constant_loc;
fs_reg frag_depth;
+ fs_reg frag_stencil;
fs_reg sample_mask;
fs_reg outputs[VARYING_SLOT_MAX];
unsigned output_components[VARYING_SLOT_MAX];
@@ -427,6 +428,8 @@ private:
void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
void generate_urb_write(fs_inst *inst, struct brw_reg payload);
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
+ void generate_stencil_ref_packing(fs_inst *inst, struct brw_reg dst,
+ struct brw_reg src);
void generate_barrier(fs_inst *inst, struct brw_reg src);
void generate_blorp_fb_write(fs_inst *inst);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index bb7e792044f..b016b5684e7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -317,6 +317,14 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
brw_imm_ud(inst->target));
}
+ /* Set computes stencil to render target */
+ if (prog_data->computed_stencil) {
+ brw_OR(p,
+ vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
+ vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+ brw_imm_ud(0x1 << 14));
+ }
+
implied_header = brw_null_reg();
} else {
implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
@@ -436,6 +444,47 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
}
+void
+fs_generator::generate_stencil_ref_packing(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ assert(dispatch_width == 8);
+ assert(devinfo->gen >= 9);
+
+ /* Stencil value updates are provided in 8 slots of 1 byte per slot.
+ * Presumably, in order to save memory bandwidth, the stencil reference
+ * values written from the FS need to be packed into 2 dwords (this makes
+ * sense because the stencil values are limited to 1 byte each and a SIMD8
+ * send, so stencil slots 0-3 in dw0, and 4-7 in dw1.)
+ *
+ * The spec is confusing here because in the payload definition of MDP_RTW_S8
+ * (Message Data Payload for Render Target Writes with Stencil 8b) the
+ * stencil value seems to be dw4.0-dw4.7. However, if you look at the type of
+ * dw4 it is type MDPR_STENCIL (Message Data Payload Register) which is the
+ * packed values specified above and diagrammed below:
+ *
+ * 31 0
+ * --------------------------------
+ * DW | |
+ * 2-7 | IGNORED |
+ * | |
+ * --------------------------------
+ * DW1 | STC | STC | STC | STC |
+ * | slot7 | slot6 | slot5 | slot4|
+ * --------------------------------
+ * DW0 | STC | STC | STC | STC |
+ * | slot3 | slot2 | slot1 | slot0|
+ * --------------------------------
+ */
+
+ src.vstride = BRW_VERTICAL_STRIDE_4;
+ src.width = BRW_WIDTH_1;
+ src.hstride = BRW_HORIZONTAL_STRIDE_0;
+ assert(src.type == BRW_REGISTER_TYPE_UB);
+ brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UB), src);
+}
+
void
fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
{
@@ -2182,6 +2231,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
generate_barrier(inst, src[0]);
break;
+ case FS_OPCODE_PACK_STENCIL_REF:
+ generate_stencil_ref_packing(inst, dst, src[0]);
+ break;
+
default:
unreachable("Unsupported opcode");
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index e1fb12060c8..acb51c023ad 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -114,6 +114,8 @@ fs_visitor::nir_setup_outputs()
}
} else if (var->data.location == FRAG_RESULT_DEPTH) {
this->frag_depth = reg;
+ } else if (var->data.location == FRAG_RESULT_STENCIL) {
+ this->frag_stencil = reg;
} else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
this->sample_mask = reg;
} else {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 9e2b221265c..5c57944ca39 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -697,7 +697,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
const fs_reg dst_depth = (payload.dest_depth_reg ?
fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) :
fs_reg());
- fs_reg src_depth;
+ fs_reg src_depth, src_stencil;
if (source_depth_to_render_target) {
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
@@ -706,9 +706,12 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
}
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
+ src_stencil = frag_stencil;
+
const fs_reg sources[] = {
- color0, color1, src0_alpha, src_depth, dst_depth, sample_mask,
- fs_reg(components)
+ color0, color1, src0_alpha, src_depth, dst_depth, src_stencil,
+ sample_mask, fs_reg(components)
};
assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS);
fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
@@ -741,6 +744,16 @@ fs_visitor::emit_fb_writes()
no16("Missing support for simd16 depth writes on gen6\n");
}
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
+ /* From the 'Render Target Write message' section of the docs:
+ * "Output Stencil is not supported with SIMD16 Render Target Write
+ * Messages."
+ *
+ * FINISHME: split 16 into 2 8s
+ */
+ no16("FINISHME: support 2 simd8 writes for gl_FragStencilRefARB\n");
+ }
+
if (do_dual_src) {
const fs_builder abld = bld.annotate("FB dual-source write");
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 0ac4f2f6e0d..6f6f77ab583 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -295,6 +295,8 @@ brw_instruction_name(enum opcode op)
return "fb_write";
case FS_OPCODE_FB_WRITE_LOGICAL:
return "fb_write_logical";
+ case FS_OPCODE_PACK_STENCIL_REF:
+ return "pack_stencil_ref";
case FS_OPCODE_BLORP_FB_WRITE:
return "blorp_fb_write";
case FS_OPCODE_REP_FB_WRITE:
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index 8f0507413a7..10e433b1d59 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -95,6 +95,11 @@ gen8_upload_ps_extra(struct brw_context *brw,
!brw_color_buffer_write_enabled(brw))
dw1 |= GEN8_PSX_SHADER_HAS_UAV;
+ if (prog_data->computed_stencil) {
+ assert(brw->gen >= 9);
+ dw1 |= GEN9_PSX_SHADER_COMPUTES_STENCIL;
+ }
+
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
OUT_BATCH(dw1);
From 8eefdacb3892da662f85377f4fe1a33dbff2ce7a Mon Sep 17 00:00:00 2001
From: Ben Widawsky
Date: Tue, 20 Oct 2015 14:29:41 -0700
Subject: [PATCH 004/266] i965: Advertise ARB_shader_stencil_export (gen9+)
Signed-off-by: Ben Widawsky
Reviewed-by: Kenneth Graunke
---
docs/relnotes/11.1.0.html | 1 +
src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index d3dbe9dda13..138bb9eb54e 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -47,6 +47,7 @@ Note: some of the new features are only available with certain drivers.
GL_ARB_blend_func_extended on freedreno (a3xx)
GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips
GL_ARB_gpu_shader5 on r600 for Evergreen and later chips
+GL_ARB_shader_stencil_export on i965 (gen9+)
GL_ARB_shader_storage_buffer_object on i965
GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi
GL_ARB_texture_barrier / GL_NV_texture_barrier on i965
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 3f9afd16c71..c6826d63dde 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -358,6 +358,7 @@ intelInitExtensions(struct gl_context *ctx)
if (brw->gen >= 9) {
ctx->Extensions.KHR_texture_compression_astc_ldr = true;
ctx->Extensions.KHR_texture_compression_astc_hdr = true;
+ ctx->Extensions.ARB_shader_stencil_export = true;
}
if (ctx->API == API_OPENGL_CORE)
From 0b6f6ee50f3b25c21dd8c9ca339d006141340666 Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Thu, 22 Oct 2015 11:19:05 +0800
Subject: [PATCH 005/266] ilo: fix max thread count for HS on Gen8
It is in DW2 on Gen8.
---
src/gallium/drivers/ilo/core/ilo_state_shader.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.c b/src/gallium/drivers/ilo/core/ilo_state_shader.c
index f67326c7f10..2e06b07a8e3 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader.c
@@ -282,13 +282,15 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
dw1 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
- if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ dw2 |= thread_count << GEN8_HS_DW2_MAX_THREADS__SHIFT;
+ else if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
dw1 |= thread_count << GEN75_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
else
dw1 |= thread_count << GEN7_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
- dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
-
if (info->dispatch_enable)
dw2 |= GEN7_HS_DW2_HS_ENABLE;
if (info->stats_enable)
From 13a5805b646b19fd9c155d5c586ad1967d7d9e00 Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Thu, 22 Oct 2015 11:29:17 +0800
Subject: [PATCH 006/266] ilo: make sure there is HiZ before resolving
We do not want to perform a depth resolve on an MCS enabled surface.
---
src/gallium/drivers/ilo/ilo_blit.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/ilo/ilo_blit.h b/src/gallium/drivers/ilo/ilo_blit.h
index da0bfe9c4c9..bad4dab8404 100644
--- a/src/gallium/drivers/ilo/ilo_blit.h
+++ b/src/gallium/drivers/ilo/ilo_blit.h
@@ -58,10 +58,12 @@ ilo_blit_resolve_slices(struct ilo_context *ilo,
* As it is only used to resolve HiZ right now, return early when there is
* no HiZ.
*/
- if (!ilo_image_can_enable_aux(&tex->image, level))
+ if (tex->image.aux.type != ILO_IMAGE_AUX_HIZ ||
+ !ilo_image_can_enable_aux(&tex->image, level))
return;
- if (ilo_image_can_enable_aux(&tex->image, level)) {
+ if (tex->image.aux.type == ILO_IMAGE_AUX_HIZ &&
+ ilo_image_can_enable_aux(&tex->image, level)) {
ilo_blit_resolve_slices_for_hiz(ilo, res, level,
first_slice, num_slices, resolve_flags);
}
From 627c15cde46a76e9bce4425646c5caba11788ec4 Mon Sep 17 00:00:00 2001
From: Jordan Justen
Date: Mon, 19 Oct 2015 23:13:09 -0700
Subject: [PATCH 007/266] i965/fs: Disable CSE optimization for untyped & typed
surface reads
An untyped surface read is volatile because it might be affected by a
write.
In the ES31-CTS.compute_shader.resources-max test, two back to back
read/modify/writes of an SSBO variable looked something like this:
r1 = untyped_surface_read(ssbo_float)
r2 = r1 + 1
untyped_surface_write(ssbo_float, r2)
r3 = untyped_surface_read(ssbo_float)
r4 = r3 + 1
untyped_surface_write(ssbo_float, r4)
And after CSE, we had:
r1 = untyped_surface_read(ssbo_float)
r2 = r1 + 1
untyped_surface_write(ssbo_float, r2)
r4 = r1 + 1
untyped_surface_write(ssbo_float, r4)
Signed-off-by: Jordan Justen
Reviewed-by: Iago Toral Quiroga
---
src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 3 ++-
src/mesa/drivers/dri/i965/brw_shader.cpp | 14 ++++++++++++++
src/mesa/drivers/dri/i965/brw_shader.h | 6 ++++++
3 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index c7628dcc2f4..3a28c8d591d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -93,7 +93,8 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
case SHADER_OPCODE_LOAD_PAYLOAD:
return !inst->is_copy_payload(v->alloc);
default:
- return inst->is_send_from_grf() && !inst->has_side_effects();
+ return inst->is_send_from_grf() && !inst->has_side_effects() &&
+ !inst->is_volatile();
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 6f6f77ab583..204935641f3 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -987,6 +987,20 @@ backend_instruction::has_side_effects() const
}
}
+bool
+backend_instruction::is_volatile() const
+{
+ switch (opcode) {
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+ return true;
+ default:
+ return false;
+ }
+}
+
#ifndef NDEBUG
static bool
inst_is_in_block(const bblock_t *block, const backend_instruction *inst)
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 51b059fcaa1..6a2dfc9bbb6 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -115,6 +115,12 @@ struct backend_instruction : public exec_node {
* optimize these out unless you know what you are doing.
*/
bool has_side_effects() const;
+
+ /**
+ * True if the instruction might be affected by side effects of other
+ * instructions.
+ */
+ bool is_volatile() const;
#else
struct backend_instruction {
struct exec_node link;
From 718249843b915decf8fccec92e466ac1a6219934 Mon Sep 17 00:00:00 2001
From: Jose Fonseca
Date: Wed, 21 Oct 2015 17:19:41 +0100
Subject: [PATCH 008/266] gallivm: Translate all util_cpu_caps bits to LLVM
attributes.
This should prevent disparity between features Mesa and LLVM
believe are supported by the CPU.
http://lists.freedesktop.org/archives/mesa-dev/2015-October/thread.html#96990
Tested on a i7-3720QM w/ LLVM 3.3 and 3.6.
v2: Increase SmallVector initial size as suggested by Gustaw Smolarczyk.
Reviewed-by: Roland Scheidegger
CC: "10.6 11.0"
---
src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 36 +++++++++++++++++--
1 file changed, 34 insertions(+), 2 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 72fab8ccf06..e70a75fc663 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -497,7 +497,33 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
#endif
}
- llvm::SmallVector MAttrs;
+ llvm::SmallVector MAttrs;
+ if (util_cpu_caps.has_sse) {
+ MAttrs.push_back("+sse");
+ }
+ if (util_cpu_caps.has_sse2) {
+ MAttrs.push_back("+sse2");
+ }
+ if (util_cpu_caps.has_sse3) {
+ MAttrs.push_back("+sse3");
+ }
+ if (util_cpu_caps.has_ssse3) {
+ MAttrs.push_back("+ssse3");
+ }
+ if (util_cpu_caps.has_sse4_1) {
+#if HAVE_LLVM >= 0x0304
+ MAttrs.push_back("+sse4.1");
+#else
+ MAttrs.push_back("+sse41");
+#endif
+ }
+ if (util_cpu_caps.has_sse4_2) {
+#if HAVE_LLVM >= 0x0304
+ MAttrs.push_back("+sse4.2");
+#else
+ MAttrs.push_back("+sse42");
+#endif
+ }
if (util_cpu_caps.has_avx) {
/*
* AVX feature is not automatically detected from CPUID by the X86 target
@@ -509,8 +535,14 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
if (util_cpu_caps.has_f16c) {
MAttrs.push_back("+f16c");
}
- builder.setMAttrs(MAttrs);
+ if (util_cpu_caps.has_avx2) {
+ MAttrs.push_back("+avx2");
+ }
}
+ if (util_cpu_caps.has_altivec) {
+ MAttrs.push_back("+altivec");
+ }
+ builder.setMAttrs(MAttrs);
#if HAVE_LLVM >= 0x0305
StringRef MCPU = llvm::sys::getHostCPUName();
From 65ffaf2740e40cc155b55fae81166181a9b616a2 Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Mon, 29 Jun 2015 14:04:14 -0700
Subject: [PATCH 009/266] i965: Note that the UV immediate type is Gen6+.
---
src/mesa/drivers/dri/i965/brw_reg.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 87e7e011541..083c46a3726 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -205,7 +205,7 @@ enum PACKED brw_reg_type {
/** @} */
/** Immediates only: @{ */
- BRW_REGISTER_TYPE_UV,
+ BRW_REGISTER_TYPE_UV, /* Gen6+ */
BRW_REGISTER_TYPE_V,
BRW_REGISTER_TYPE_VF,
/** @} */
From cfb67c3d0656927270302ef17d2d9de065ec44fe Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Mon, 19 Oct 2015 10:51:42 -0700
Subject: [PATCH 010/266] i965/vec4: Initialize LOD to 0.0f for
textureQueryLevels() and texture().
We implement textureQueryLevels (which takes no arguments, save the
sampler) using the resinfo message (which takes an argument of LOD).
Without initializing it, we'd generate a MOV from the null register to
load the LOD argument.
Essentially the same logic applies to texture. A vertex shader cannot
compute derivatives and so cannot produce an LOD, so TXL with an LOD of
0.0 is used.
Reviewed-by: Iago Toral Quiroga
---
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c39f97e3962..b8f90f2aa20 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -882,6 +882,18 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
uint32_t sampler,
src_reg sampler_reg)
{
+ /* The sampler can only meaningfully compute LOD for fragment shader
+ * messages. For all other stages, we change the opcode to TXL and hardcode
+ * the LOD to 0.
+ *
+ * textureQueryLevels() is implemented in terms of TXS so we need to pass a
+ * valid LOD argument.
+ */
+ if (op == ir_tex || op == ir_query_levels) {
+ assert(lod.file == BAD_FILE);
+ lod = src_reg(0.0f);
+ }
+
enum opcode opcode;
switch (op) {
case ir_tex: opcode = SHADER_OPCODE_TXL; break;
From e10fc055e7dc5281f03a77088a24392098e3473b Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Tue, 20 Oct 2015 17:51:12 -0700
Subject: [PATCH 011/266] i965/fs: Use type-W for immediate in SampleID setup.
Not a functional difference, but register is loaded with a signed
immediate (V) and added to a signed type (D) producing a signed result
(D).
Also change the type of g0 to allow for compaction.
Reviewed-by: Kenneth Graunke
Reviewed-by: Anuj Phogat
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++--
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 1c96cf2d85b..3cb8190b02a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1284,7 +1284,7 @@ fs_visitor::emit_sampleid_setup()
if (key->compute_sample_id) {
fs_reg t1 = vgrf(glsl_type::int_type);
fs_reg t2 = vgrf(glsl_type::int_type);
- t2.type = BRW_REGISTER_TYPE_UW;
+ t2.type = BRW_REGISTER_TYPE_W;
/* The PS will be run in MSDISPMODE_PERSAMPLE. For example with
* 8x multisampling, subspan 0 will represent sample N (where N
@@ -1306,7 +1306,7 @@ fs_visitor::emit_sampleid_setup()
* subspan 1, and finally sample 1 of subspan 1.
*/
abld.exec_all()
- .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+ .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
fs_reg(0xc0));
abld.exec_all().SHR(t1, t1, fs_reg(5));
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index b016b5684e7..20461e8bc5b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1508,7 +1508,7 @@ fs_generator::generate_set_sample_id(fs_inst *inst,
brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- struct brw_reg reg = retype(stride(src1, 1, 4, 0), BRW_REGISTER_TYPE_UW);
+ struct brw_reg reg = stride(src1, 1, 4, 0);
if (dispatch_width == 8) {
brw_ADD(p, dst, src0, reg);
} else if (dispatch_width == 16) {
From e2344e11ce8ddefb89a222bbf63a7c60e8ba5655 Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Tue, 20 Oct 2015 18:23:50 -0700
Subject: [PATCH 012/266] i965/fs: Trim unneeded channels in SampleID setup.
The AND and SHR produce a scalar value that we had been replicating
across $dispatch_width channels. The immediate MOV produces only four
useful channels of data.
Reviewed-by: Kenneth Graunke
Reviewed-by: Anuj Phogat
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 3cb8190b02a..71da9d9f7b6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1282,9 +1282,9 @@ fs_visitor::emit_sampleid_setup()
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type));
if (key->compute_sample_id) {
- fs_reg t1 = vgrf(glsl_type::int_type);
- fs_reg t2 = vgrf(glsl_type::int_type);
- t2.type = BRW_REGISTER_TYPE_W;
+ fs_reg t1(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_D);
+ t1.set_smear(0);
+ fs_reg t2(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
/* The PS will be run in MSDISPMODE_PERSAMPLE. For example with
* 8x multisampling, subspan 0 will represent sample N (where N
@@ -1305,13 +1305,13 @@ fs_visitor::emit_sampleid_setup()
* are sample 1 of subspan 0; the third group is sample 0 of
* subspan 1, and finally sample 1 of subspan 1.
*/
- abld.exec_all()
+ abld.exec_all().group(1, 0)
.AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
fs_reg(0xc0));
- abld.exec_all().SHR(t1, t1, fs_reg(5));
+ abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5));
/* This works for both SIMD8 and SIMD16 */
- abld.exec_all()
+ abld.exec_all().group(4, 0)
.MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210));
/* This special instruction takes care of setting vstride=1,
From 0f74796e33cb15ec0c79bc2b5d0c766dc5068b41 Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Tue, 20 Oct 2015 18:29:42 -0700
Subject: [PATCH 013/266] i965/fs: Drop unnecessary write-enable-all from
SET_SAMPLE_ID.
Reviewed-by: Kenneth Graunke
Reviewed-by: Anuj Phogat
---
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 20461e8bc5b..cee7c68bba7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1504,18 +1504,18 @@ fs_generator::generate_set_sample_id(fs_inst *inst,
assert(src0.type == BRW_REGISTER_TYPE_D ||
src0.type == BRW_REGISTER_TYPE_UD);
- brw_push_insn_state(p);
- brw_set_default_exec_size(p, BRW_EXECUTE_8);
- brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
- brw_set_default_mask_control(p, BRW_MASK_DISABLE);
struct brw_reg reg = stride(src1, 1, 4, 0);
if (dispatch_width == 8) {
brw_ADD(p, dst, src0, reg);
} else if (dispatch_width == 16) {
+ brw_push_insn_state(p);
+ brw_set_default_exec_size(p, BRW_EXECUTE_8);
+ brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_ADD(p, firsthalf(dst), firsthalf(src0), reg);
+ brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_ADD(p, sechalf(dst), sechalf(src0), suboffset(reg, 2));
+ brw_pop_insn_state(p);
}
- brw_pop_insn_state(p);
}
void
From e2707c8765b15b7ac666067160307c7f9d2d8b4d Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Tue, 20 Oct 2015 18:31:02 -0700
Subject: [PATCH 014/266] i965/fs: Emit a single ADD instruction for
SET_SAMPLE_ID on Gen8+.
Gen8+ lifted the register region restriction that an instruction whose
destination spans two registers must have sources that also span two
registers.
Reviewed-by: Kenneth Graunke
Reviewed-by: Anuj Phogat
---
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index cee7c68bba7..139d1dd17ec 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1505,7 +1505,7 @@ fs_generator::generate_set_sample_id(fs_inst *inst,
src0.type == BRW_REGISTER_TYPE_UD);
struct brw_reg reg = stride(src1, 1, 4, 0);
- if (dispatch_width == 8) {
+ if (devinfo->gen >= 8 || dispatch_width == 8) {
brw_ADD(p, dst, src0, reg);
} else if (dispatch_width == 16) {
brw_push_insn_state(p);
From 1095d837dc5744b65d4e65ee551046d5d884f607 Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Thu, 22 Oct 2015 12:26:27 -0700
Subject: [PATCH 015/266] i965/vec4/gs: Fix signed/unsigned comparison warning.
---
src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 9402489e628..0b1c2ecd7f0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -768,7 +768,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
output_size_bytes += 32;
assert(output_size_bytes >= 1);
- int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
+ unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (compiler->devinfo->gen == 6)
max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (output_size_bytes > max_output_size_bytes)
From 8ac3b525c77cb5aae9e61bd984b78f6cbbffdc1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?=
Date: Fri, 9 Oct 2015 16:59:20 +0200
Subject: [PATCH 016/266] i965/vec4: nir_emit_if doesn't need to predicate
based on all the channels
v2: changed comment, as suggested by Matt Turner
Reviewed-by: Matt Turner
Reviewed-by: Jason Ekstrand
---
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index ea1e3e7bbcf..0f04f65eeaf 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -193,7 +193,9 @@ vec4_visitor::nir_emit_if(nir_if *if_stmt)
vec4_instruction *inst = emit(MOV(dst_null_d(), condition));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
- emit(IF(BRW_PREDICATE_NORMAL));
+ /* We can just predicate based on the X channel, as the condition only
+ * goes on its own line */
+ emit(IF(BRW_PREDICATE_ALIGN16_REPLICATE_X));
nir_emit_cf_list(&if_stmt->then_list);
From a59359ecd22154cc2b3f88bb8c599f21af8a3934 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?=
Date: Wed, 14 Oct 2015 20:26:43 +0200
Subject: [PATCH 017/266] i965/vec4: track and use independently each flag
channel
vec4_live_variables tracks now each flag channel independently, so
vec4_dead_code_eliminate can update the writemask of null registers,
based on which component are alive at the moment. This would allow
vec4_cmod_propagation to optimize out several movs involving null
registers.
v2: added support to track each flag channel independently at vec4
live_variables, as v1 assumed that it was already doing it, as
pointed by Francisco Jerez
v3: general cleaningn after Matt Turner's review
Reviewed-by: Matt Turner
---
src/mesa/drivers/dri/i965/brw_ir_vec4.h | 21 +++++++++++++
.../dri/i965/brw_vec4_dead_code_eliminate.cpp | 31 +++++++++++++------
.../dri/i965/brw_vec4_live_variables.cpp | 14 ++++++---
3 files changed, 52 insertions(+), 14 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 1b57b65db27..74e9733bd44 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -186,6 +186,27 @@ public:
return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
}
+ bool reads_flag(unsigned c)
+ {
+ if (!reads_flag())
+ return false;
+
+ switch (predicate) {
+ case BRW_PREDICATE_NONE:
+ return false;
+ case BRW_PREDICATE_ALIGN16_REPLICATE_X:
+ return c == 0;
+ case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
+ return c == 1;
+ case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
+ return c == 2;
+ case BRW_PREDICATE_ALIGN16_REPLICATE_W:
+ return c == 3;
+ default:
+ return true;
+ }
+ }
+
bool writes_flag()
{
return (conditional_mod && (opcode != BRW_OPCODE_SEL &&
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
index 8fc7a365bfc..284e0a8d0a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -78,13 +78,19 @@ vec4_visitor::dead_code_eliminate()
sizeof(BITSET_WORD));
foreach_inst_in_block_reverse(vec4_instruction, inst, block) {
- if (inst->dst.file == GRF && !inst->has_side_effects()) {
+ if ((inst->dst.file == GRF && !inst->has_side_effects()) ||
+ (inst->dst.is_null() && inst->writes_flag())){
bool result_live[4] = { false };
- for (unsigned i = 0; i < inst->regs_written; i++) {
- for (int c = 0; c < 4; c++)
- result_live[c] |= BITSET_TEST(
- live, var_from_reg(alloc, offset(inst->dst, i), c));
+ if (inst->dst.file == GRF) {
+ for (unsigned i = 0; i < inst->regs_written; i++) {
+ for (int c = 0; c < 4; c++)
+ result_live[c] |= BITSET_TEST(
+ live, var_from_reg(alloc, offset(inst->dst, i), c));
+ }
+ } else {
+ for (unsigned c = 0; c < 4; c++)
+ result_live[c] = BITSET_TEST(flag_live, c);
}
/* If the instruction can't do writemasking, then it's all or
@@ -117,7 +123,11 @@ vec4_visitor::dead_code_eliminate()
}
if (inst->dst.is_null() && inst->writes_flag()) {
- if (!BITSET_TEST(flag_live, 0)) {
+ bool combined_live = false;
+ for (unsigned c = 0; c < 4; c++)
+ combined_live |= BITSET_TEST(flag_live, c);
+
+ if (!combined_live) {
inst->opcode = BRW_OPCODE_NOP;
progress = true;
continue;
@@ -136,7 +146,8 @@ vec4_visitor::dead_code_eliminate()
}
if (inst->writes_flag()) {
- BITSET_CLEAR(flag_live, 0);
+ for (unsigned c = 0; c < 4; c++)
+ BITSET_CLEAR(flag_live, c);
}
for (int i = 0; i < 3; i++) {
@@ -150,8 +161,10 @@ vec4_visitor::dead_code_eliminate()
}
}
- if (inst->reads_flag()) {
- BITSET_SET(flag_live, 0);
+ for (unsigned c = 0; c < 4; c++) {
+ if (inst->reads_flag(c)) {
+ BITSET_SET(flag_live, c);
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index 678237901f2..aa9a6572eee 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -86,9 +86,10 @@ vec4_live_variables::setup_def_use()
}
}
}
- if (inst->reads_flag()) {
- if (!BITSET_TEST(bd->flag_def, 0)) {
- BITSET_SET(bd->flag_use, 0);
+ for (unsigned c = 0; c < 4; c++) {
+ if (inst->reads_flag(c) &&
+ !BITSET_TEST(bd->flag_def, c)) {
+ BITSET_SET(bd->flag_use, c);
}
}
@@ -110,8 +111,11 @@ vec4_live_variables::setup_def_use()
}
}
if (inst->writes_flag()) {
- if (!BITSET_TEST(bd->flag_use, 0)) {
- BITSET_SET(bd->flag_def, 0);
+ for (unsigned c = 0; c < 4; c++) {
+ if ((inst->dst.writemask & (1 << c)) &&
+ !BITSET_TEST(bd->flag_use, c)) {
+ BITSET_SET(bd->flag_def, c);
+ }
}
}
From 627f94b72e0e9443ad116f072599a7342269f297 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?=
Date: Mon, 28 Sep 2015 17:00:19 +0200
Subject: [PATCH 018/266] i965/vec4: adding vec4_cmod_propagation optimization
vec4 port of fs_cmod_propagation.
Shader-db results (no vec4 grepping):
total instructions in shared programs: 6240413 -> 6235841 (-0.07%)
instructions in affected programs: 401933 -> 397361 (-1.14%)
total loops in shared programs: 1979 -> 1979 (0.00%)
helped: 2265
HURT: 0
v2: remove extra space and combine two if blocks, as suggested by
Matt Turner
v3: add condition check to bail out if current inst and inst being
scanned has different writemask, as pointed by Matt Turner
v3: updated shader-db numbers
v4: remove block from foreach_inst_in_block_*_starting_from after
commit 801f151917fedb13c5c6e96281a18d833dd6901f
Reviewed-by: Matt Turner
---
src/mesa/drivers/dri/i965/Makefile.sources | 1 +
src/mesa/drivers/dri/i965/brw_vec4.cpp | 1 +
src/mesa/drivers/dri/i965/brw_vec4.h | 1 +
.../dri/i965/brw_vec4_cmod_propagation.cpp | 157 ++++++++++++++++++
4 files changed, 160 insertions(+)
create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index c2438bda356..434583defe3 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -57,6 +57,7 @@ i965_compiler_FILES = \
brw_util.c \
brw_util.h \
brw_vec4_builder.h \
+ brw_vec4_cmod_propagation.cpp \
brw_vec4_copy_propagation.cpp \
brw_vec4.cpp \
brw_vec4_cse.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 3e7078d0b32..c8923ef016a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1862,6 +1862,7 @@ vec4_visitor::run()
OPT(dead_code_eliminate);
OPT(dead_control_flow_eliminate, this);
OPT(opt_copy_propagation);
+ OPT(opt_cmod_propagation);
OPT(opt_cse);
OPT(opt_algebraic);
OPT(opt_register_coalesce);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index d861b2e85df..89b6f912272 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -149,6 +149,7 @@ public:
int var_range_start(unsigned v, unsigned n) const;
int var_range_end(unsigned v, unsigned n) const;
bool virtual_grf_interferes(int a, int b);
+ bool opt_cmod_propagation();
bool opt_copy_propagation(bool do_constant_prop = true);
bool opt_cse_local(bblock_t *block);
bool opt_cse();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
new file mode 100644
index 00000000000..329f24269ce
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/** @file brw_vec4_cmod_propagation.cpp
+ *
+ * Really similar to brw_fs_cmod_propagation but adapted to vec4 needs. Check
+ * brw_fs_cmod_propagation for further details on the rationale behind this
+ * optimization.
+ */
+
+#include "brw_vec4.h"
+#include "brw_cfg.h"
+
+namespace brw {
+
+static bool
+opt_cmod_propagation_local(bblock_t *block)
+{
+ bool progress = false;
+ int ip = block->end_ip + 1;
+
+ foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) {
+ ip--;
+
+ if ((inst->opcode != BRW_OPCODE_AND &&
+ inst->opcode != BRW_OPCODE_CMP &&
+ inst->opcode != BRW_OPCODE_MOV) ||
+ inst->predicate != BRW_PREDICATE_NONE ||
+ !inst->dst.is_null() ||
+ inst->src[0].file != GRF ||
+ inst->src[0].abs)
+ continue;
+
+ if (inst->opcode == BRW_OPCODE_AND &&
+ !(inst->src[1].is_one() &&
+ inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+ !inst->src[0].negate))
+ continue;
+
+ if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero())
+ continue;
+
+ if (inst->opcode == BRW_OPCODE_MOV &&
+ inst->conditional_mod != BRW_CONDITIONAL_NZ)
+ continue;
+
+ bool read_flag = false;
+ foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, inst) {
+ if (inst->src[0].in_range(scan_inst->dst,
+ scan_inst->regs_written)) {
+ if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) ||
+ scan_inst->dst.reg_offset != inst->src[0].reg_offset ||
+ (scan_inst->dst.writemask != WRITEMASK_X &&
+ scan_inst->dst.writemask != WRITEMASK_XYZW) ||
+ (scan_inst->dst.writemask == WRITEMASK_XYZW &&
+ inst->src[0].swizzle != BRW_SWIZZLE_XYZW) ||
+ (inst->dst.writemask & ~scan_inst->dst.writemask) != 0) {
+ break;
+ }
+
+ /* CMP's result is the same regardless of dest type. */
+ if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+ scan_inst->opcode == BRW_OPCODE_CMP &&
+ (inst->dst.type == BRW_REGISTER_TYPE_D ||
+ inst->dst.type == BRW_REGISTER_TYPE_UD)) {
+ inst->remove(block);
+ progress = true;
+ break;
+ }
+
+ /* If the AND wasn't handled by the previous case, it isn't safe
+ * to remove it.
+ */
+ if (inst->opcode == BRW_OPCODE_AND)
+ break;
+
+ /* Comparisons operate differently for ints and floats */
+ if (scan_inst->dst.type != inst->dst.type &&
+ (scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
+ inst->dst.type == BRW_REGISTER_TYPE_F))
+ break;
+
+ /* If the instruction generating inst's source also wrote the
+ * flag, and inst is doing a simple .nz comparison, then inst
+ * is redundant - the appropriate value is already in the flag
+ * register. Delete inst.
+ */
+ if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+ !inst->src[0].negate &&
+ scan_inst->writes_flag()) {
+ inst->remove(block);
+ progress = true;
+ break;
+ }
+
+ /* Otherwise, try propagating the conditional. */
+ enum brw_conditional_mod cond =
+ inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod)
+ : inst->conditional_mod;
+
+ if (scan_inst->can_do_cmod() &&
+ ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
+ scan_inst->conditional_mod == cond)) {
+ scan_inst->conditional_mod = cond;
+ inst->remove(block);
+ progress = true;
+ }
+ break;
+ }
+
+ if (scan_inst->writes_flag())
+ break;
+
+ read_flag = read_flag || scan_inst->reads_flag();
+ }
+ }
+
+ return progress;
+}
+
+bool
+vec4_visitor::opt_cmod_propagation()
+{
+ bool progress = false;
+
+ foreach_block_reverse(block, cfg) {
+ progress = opt_cmod_propagation_local(block) || progress;
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
+} /* namespace brw */
From 8fc8fcc04f584b32cd5bf633da8e3508249e339d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?=
Date: Wed, 30 Sep 2015 13:39:30 +0200
Subject: [PATCH 019/266] i965/vec4: Add unit tests for cmod propagation pass
This include the same tests coming from test_fs_cmod_propagation, (non
vector glsl types included) plus some new with vec4 types, inspired on
the regressions found while the optimization was a work in progress.
Additionally, the check of number of instructions after the
optimization was changed from EXPECT_EQ to ASSERT_EQ. This was done to
avoid a crash on failing tests that expected no optimization, as after
checking the number of instructions, there were some checks related to
this last instruction opcode/conditional mod.
v2: update tests after Matt Turner's review of the optimization pass
v3: tweaks on the tests (mostly on the comments), after Matt Turner's
review
Reviewed-by: Matt Turner
---
src/mesa/drivers/dri/i965/Makefile.am | 7 +
.../dri/i965/test_vec4_cmod_propagation.cpp | 822 ++++++++++++++++++
2 files changed, 829 insertions(+)
create mode 100644 src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index 04b3f9cc8ce..9d003e48bd8 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -59,6 +59,7 @@ TESTS = \
test_fs_saturate_propagation \
test_eu_compact \
test_vf_float_conversions \
+ test_vec4_cmod_propagation \
test_vec4_copy_propagation \
test_vec4_register_coalesce
@@ -94,6 +95,12 @@ test_vec4_copy_propagation_LDADD = \
$(top_builddir)/src/gtest/libgtest.la \
$(TEST_LIBS)
+test_vec4_cmod_propagation_SOURCES = \
+ test_vec4_cmod_propagation.cpp
+test_vec4_cmod_propagation_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ $(TEST_LIBS)
+
test_eu_compact_SOURCES = \
test_eu_compact.c
nodist_EXTRA_test_eu_compact_SOURCES = dummy.cpp
diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
new file mode 100644
index 00000000000..5ca697a0b21
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
@@ -0,0 +1,822 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Based on test_fs_cmod_propagation.cpp
+ */
+
+#include
+#include "brw_vec4.h"
+#include "brw_vec4_builder.h"
+#include "brw_cfg.h"
+#include "program/program.h"
+
+using namespace brw;
+
+class cmod_propagation_test : public ::testing::Test {
+ virtual void SetUp();
+
+public:
+ struct brw_compiler *compiler;
+ struct brw_device_info *devinfo;
+ struct gl_context *ctx;
+ struct gl_shader_program *shader_prog;
+ struct brw_vertex_program *vp;
+ vec4_visitor *v;
+};
+
+class cmod_propagation_vec4_visitor : public vec4_visitor
+{
+public:
+ cmod_propagation_vec4_visitor(struct brw_compiler *compiler,
+ nir_shader *shader)
+ : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
+ false, -1) {}
+
+protected:
+ /* Dummy implementation for pure virtual methods */
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type)
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void setup_payload()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_prolog()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_program_code()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_thread_end()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_urb_write_header(int mrf)
+ {
+ unreachable("Not reached");
+ }
+
+ virtual vec4_instruction *emit_urb_write_opcode(bool complete)
+ {
+ unreachable("Not reached");
+ }
+};
+
+
+void cmod_propagation_test::SetUp()
+{
+ ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+ compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+ devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+ compiler->devinfo = devinfo;
+
+ vp = ralloc(NULL, struct brw_vertex_program);
+
+ nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
+
+ v = new cmod_propagation_vec4_visitor(compiler, shader);
+
+ _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
+
+ devinfo->gen = 4;
+}
+
+static vec4_instruction *
+instruction(bblock_t *block, int num)
+{
+ vec4_instruction *inst = (vec4_instruction *)block->start();
+ for (int i = 0; i < num; i++) {
+ inst = (vec4_instruction *)inst->next;
+ }
+ return inst;
+}
+
+static bool
+cmod_propagation(vec4_visitor *v)
+{
+ const bool print = false;
+
+ if (print) {
+ fprintf(stderr, "= Before =\n");
+ v->dump_instructions();
+ }
+
+ bool ret = v->opt_cmod_propagation();
+
+ if (print) {
+ fprintf(stderr, "\n= After =\n");
+ v->dump_instructions();
+ }
+
+ return ret;
+}
+
+TEST_F(cmod_propagation_test, basic)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+
+ bld.ADD(dest, src0, src1);
+ bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest.x src0.xxxx src1.xxxx
+ * 1: cmp.ge.f0 null.x dest.xxxx 0.0f
+ *
+ * = After =
+ * 0: add.ge.f0 dest.x src0.xxxx src1.xxxx
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, basic_different_dst_writemask)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ dst_reg dest_null = bld.null_reg_f();
+
+ bld.ADD(dest, src0, src1);
+ bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest.x src0 src1
+ * 1: cmp.ge.f0 null.xyzw dest 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, andz_one)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::int_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ src_reg one(1);
+
+ bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
+ set_condmod(BRW_CONDITIONAL_Z,
+ bld.AND(bld.null_reg_d(), src_reg(dest), one));
+
+ /* = Before =
+ * 0: cmp.l.f0 dest:F src0:F 0F
+ * 1: and.z.f0 null:D dest:D 1D
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, non_cmod_instruction)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::uint_type);
+ src_reg src0 = src_reg(v, glsl_type::uint_type);
+ src_reg zero(0u);
+ bld.FBL(dest, src0);
+ bld.CMP(bld.null_reg_ud(), src_reg(dest), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: fbl dest src0
+ * 1: cmp.ge.f0 null dest 0u
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_FBL, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, intervening_flag_write)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg src2 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ bld.ADD(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE);
+ bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest src0 src1
+ * 1: cmp.ge.f0 null src2 0.0f
+ * 2: cmp.ge.f0 null dest 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(2, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, intervening_flag_read)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest0 = dst_reg(v, glsl_type::float_type);
+ dst_reg dest1 = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg src2 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ bld.ADD(dest0, src0, src1);
+ set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
+ bld.CMP(bld.null_reg_f(), src_reg(dest0), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest0 src0 src1
+ * 1: (+f0) sel dest1 src2 0.0f
+ * 2: cmp.ge.f0 null dest0 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(2, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, intervening_dest_write)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg src2 = src_reg(v, glsl_type::vec2_type);
+ src_reg zero(0.0f);
+ bld.ADD(offset(dest, 2), src0, src1);
+ bld.emit(SHADER_OPCODE_TEX, dest, src2)
+ ->regs_written = 4;
+ bld.CMP(bld.null_reg_f(), offset(src_reg(dest), 2), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest+2 src0 src1
+ * 1: tex rlen 4 dest+0 src2
+ * 2: cmp.ge.f0 null dest+2 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(2, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(SHADER_OPCODE_TEX, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest0 = dst_reg(v, glsl_type::float_type);
+ dst_reg dest1 = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg src2 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+
+ set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1));
+ set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
+ bld.CMP(dest_null, src_reg(dest0), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add.ge.f0 dest0 src0 src1
+ * 1: (+f0) sel dest1 src2 0.0f
+ * 2: cmp.ge.f0 null.x dest0 0.0f
+ *
+ * = After =
+ * 0: add.ge.f0 dest0 src0 src1
+ * 1: (+f0) sel dest1 src2 0.0f
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
+}
+
+TEST_F(cmod_propagation_test, negate)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ bld.ADD(dest, src0, src1);
+ src_reg tmp_src = src_reg(dest);
+ tmp_src.negate = true;
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+ bld.CMP(dest_null, tmp_src, zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest src0 src1
+ * 1: cmp.ge.f0 null.x -dest 0.0f
+ *
+ * = After =
+ * 0: add.le.f0 dest src0 src1
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, movnz)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+
+ bld.CMP(dest, src0, src1, BRW_CONDITIONAL_L);
+ set_condmod(BRW_CONDITIONAL_NZ,
+ bld.MOV(dest_null, src_reg(dest)));
+
+ /* = Before =
+ *
+ * 0: cmp.l.f0 dest:F src0:F src1:F
+ * 1: mov.nz.f0 null.x dest:F
+ *
+ * = After =
+ * 0: cmp.l.f0 dest src0:F src1:F
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::int_type);
+ src_reg src0 = src_reg(v, glsl_type::int_type);
+ src_reg src1 = src_reg(v, glsl_type::int_type);
+ src_reg zero(0.0f);
+ bld.ADD(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), retype(src_reg(dest), BRW_REGISTER_TYPE_F), zero,
+ BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest:D src0:D src1:D
+ * 1: cmp.ge.f0 null:F dest:F 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, andnz_non_one)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::int_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ src_reg nonone(38);
+
+ bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
+ set_condmod(BRW_CONDITIONAL_NZ,
+ bld.AND(bld.null_reg_d(), src_reg(dest), nonone));
+
+ /* = Before =
+ * 0: cmp.l.f0 dest:F src0:F 0F
+ * 1: and.nz.f0 null:D dest:D 38D
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
+}
+
+/* Note that basic is using glsl_type:float types, while this one is using
+ * glsl_type::vec4 */
+TEST_F(cmod_propagation_test, basic_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg zero(0.0f);
+
+ bld.MUL(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_NZ);
+
+ /* = Before =
+ * 0: mul dest.xyzw src0.xyzw src1.xyzw
+ * 1: cmp.nz.f0.0 null.xyzw dest.xyzw 0.0f
+ *
+ * = After =
+ * 0: mul.nz.f0.0 dest.xyzw src0.xyzw src1.xyzw
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, basic_vec4_different_dst_writemask)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ dest.writemask = WRITEMASK_X;
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg zero(0.0f);
+ dst_reg dest_null = bld.null_reg_f();
+
+ bld.MUL(dest, src0, src1);
+ bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_NZ);
+
+ /* = Before =
+ * 0: mul dest.x src0 src1
+ * 1: cmp.nz.f0.0 null dest 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, mad_one_component_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ dest.writemask = WRITEMASK_X;
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg src2 = src_reg(v, glsl_type::vec4_type);
+ src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX;
+ src2.negate = true;
+ src_reg zero(0.0f);
+ src_reg tmp(dest);
+ tmp.swizzle = BRW_SWIZZLE_XXXX;
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+
+ bld.MAD(dest, src0, src1, src2);
+ bld.CMP(dest_null, tmp, zero, BRW_CONDITIONAL_L);
+
+ /* = Before =
+ *
+ * 0: mad dest.x:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F
+ * 1: cmp.l.f0.0 null.x:F dest.xxxx:F 0.0f
+ *
+ * = After =
+ * 0: mad.l.f0 dest.x:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MAD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, mad_more_one_component_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ dest.writemask = WRITEMASK_XW;
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg src2 = src_reg(v, glsl_type::vec4_type);
+ src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX;
+ src2.negate = true;
+ src_reg zero(0.0f);
+ src_reg tmp(dest);
+ tmp.swizzle = BRW_SWIZZLE_XXXX;
+ dst_reg dest_null = bld.null_reg_f();
+
+ bld.MAD(dest, src0, src1, src2);
+ bld.CMP(dest_null, tmp, zero, BRW_CONDITIONAL_L);
+
+ /* = Before =
+ *
+ * 0: mad dest.xw:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F
+ * 1: cmp.l.f0.0 null:F dest.xxxx:F zeroF
+ *
+ * = After =
+ * (No changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MAD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, cmp_mov_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::ivec4_type);
+ dest.writemask = WRITEMASK_X;
+ src_reg src0 = src_reg(v, glsl_type::ivec4_type);
+ src0.swizzle = BRW_SWIZZLE_XXXX;
+ src0.file = UNIFORM;
+ src_reg nonone = retype(src_reg(16), BRW_REGISTER_TYPE_D);
+ src_reg mov_src = src_reg(dest);
+ mov_src.swizzle = BRW_SWIZZLE_XXXX;
+ dst_reg dest_null = bld.null_reg_d();
+ dest_null.writemask = WRITEMASK_X;
+
+ bld.CMP(dest, src0, nonone, BRW_CONDITIONAL_GE);
+ set_condmod(BRW_CONDITIONAL_NZ,
+ bld.MOV(dest_null, mov_src));
+
+ /* = Before =
+ *
+ * 0: cmp.ge.f0 dest.x:D u.xxxx:D 16D
+ * 1: mov.nz.f0 null.x:D dest.xxxx:D
+ *
+ * = After =
+ * 0: cmp.ge.f0 dest.x:D u.xxxx:D 16D
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, mul_cmp_different_channels_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg zero(0.0f);
+ src_reg cmp_src = src_reg(dest);
+ cmp_src.swizzle = BRW_SWIZZLE4(0,1,3,2);
+
+ bld.MUL(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), cmp_src, zero, BRW_CONDITIONAL_NZ);
+
+ /* = Before =
+ * 0: mul dest src0 src1
+ * 1: cmp.nz.f0.0 null dest.xywz 0.0f
+ *
+ * = After =
+ * (No changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
+}
From 92ae101ed0dff689f207abf61f68167009de4e29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?=
Date: Thu, 1 Oct 2015 16:41:30 +0200
Subject: [PATCH 020/266] i965/vec4: use an envvar to decide to print the
assembly on cmod_propagation tests
The complete way to do this would be parse INTEL_DEBUG and
print the output if DEBUG_VS (or a new one) is present
(see intel_debug.c).
But that seems like an overkill for the unit tests, that
after all, the most common use case is being run when
calling make check.
v2: use the same idea for the fs counterpart too, as suggested by
Matt Turner
Reviewed-by: Matt Turner
---
src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp | 2 +-
src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
index 5f80f90a91d..62d39f70ec4 100644
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -84,7 +84,7 @@ instruction(bblock_t *block, int num)
static bool
cmod_propagation(fs_visitor *v)
{
- const bool print = false;
+ const bool print = getenv("TEST_DEBUG");
if (print) {
fprintf(stderr, "= Before =\n");
diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
index 5ca697a0b21..9aa2fcc7907 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
@@ -122,7 +122,7 @@ instruction(bblock_t *block, int num)
static bool
cmod_propagation(vec4_visitor *v)
{
- const bool print = false;
+ const bool print = getenv("TEST_DEBUG");
if (print) {
fprintf(stderr, "= Before =\n");
From 8cf84a7e470dbd3b46ce4081459d2ecfab22c2d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?=
Date: Fri, 9 Oct 2015 18:39:42 +0200
Subject: [PATCH 021/266] i965/vec4: print predicate control at brw_vec4
dump_instruction
v2: externalize pred_ctrl_align16 from brw_disasm.c instead of adding
a copy on brw_vec4.c, as suggested by Matt Turner
Reviewed-by: Matt Turner
---
src/mesa/drivers/dri/i965/brw_context.h | 1 +
src/mesa/drivers/dri/i965/brw_disasm.c | 2 +-
src/mesa/drivers/dri/i965/brw_vec4.cpp | 5 +++--
3 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 4f503ae4869..0fdc83ef7e1 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1680,6 +1680,7 @@ struct opcode_desc {
extern const struct opcode_desc opcode_descs[128];
extern const char * const conditional_modifier[16];
+extern const char *const pred_ctrl_align16[16];
void
brw_emit_depthbuffer(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index db23a187a93..35cf99efc10 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -252,7 +252,7 @@ static const char *const pred_inv[2] = {
[1] = "-"
};
-static const char *const pred_ctrl_align16[16] = {
+const char *const pred_ctrl_align16[16] = {
[1] = "",
[2] = ".x",
[3] = ".y",
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index c8923ef016a..71a5a4403f4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1370,9 +1370,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
vec4_instruction *inst = (vec4_instruction *)be_inst;
if (inst->predicate) {
- fprintf(file, "(%cf0.%d) ",
+ fprintf(file, "(%cf0.%d%s) ",
inst->predicate_inverse ? '-' : '+',
- inst->flag_subreg);
+ inst->flag_subreg,
+ pred_ctrl_align16[inst->predicate]);
}
fprintf(file, "%s", brw_instruction_name(inst->opcode));
From 7b636581253fe858ac883e3d3eec21173ac069d4 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Thu, 22 Oct 2015 12:24:42 -0400
Subject: [PATCH 022/266] radeon/uvd: don't expose HEVC on old UVD hw (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The section for UVD 2 and older was not updated
when HEVC support was added. Reported by Kano
on irc.
v2: integrate the UVD2 and older checks into the
main switch statement.
v3: handle encode checking as well. Encode is
already checked in the top case statement, so
drop encode checks in the lower case statement.
Reviewed-by: Christian König
Signed-off-by: Alex Deucher
Cc: mesa-stable@lists.freedesktop.org
---
src/gallium/drivers/radeon/radeon_video.c | 50 ++++++++---------------
1 file changed, 18 insertions(+), 32 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
index 3a1834b948f..32bfc32073b 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -205,11 +205,12 @@ int rvid_get_video_param(struct pipe_screen *screen,
enum pipe_video_cap param)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+ enum pipe_video_format codec = u_reduce_video_profile(profile);
if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
- return u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
+ return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
rvce_is_fw_version_supported(rscreen);
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
@@ -232,38 +233,19 @@ int rvid_get_video_param(struct pipe_screen *screen,
}
}
- /* UVD 2.x limits */
- if (rscreen->family < CHIP_PALM) {
- enum pipe_video_format codec = u_reduce_video_profile(profile);
- switch (param) {
- case PIPE_VIDEO_CAP_SUPPORTED:
- /* no support for MPEG4 */
- return codec != PIPE_VIDEO_FORMAT_MPEG4 &&
- /* FIXME: VC-1 simple/main profile is broken */
- profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE &&
- profile != PIPE_VIDEO_PROFILE_VC1_MAIN;
- case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
- case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
- /* MPEG2 only with shaders and no support for
- interlacing on R6xx style UVD */
- return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
- rscreen->family > CHIP_RV770;
- default:
- break;
- }
- }
-
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
- switch (u_reduce_video_profile(profile)) {
+ switch (codec) {
case PIPE_VIDEO_FORMAT_MPEG12:
case PIPE_VIDEO_FORMAT_MPEG4:
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
- return entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
+ if (rscreen->family < CHIP_PALM)
+ /* no support for MPEG4 */
+ return codec != PIPE_VIDEO_FORMAT_MPEG4;
+ return true;
case PIPE_VIDEO_FORMAT_VC1:
/* FIXME: VC-1 simple/main profile is broken */
- return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED &&
- entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
+ return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED;
case PIPE_VIDEO_FORMAT_HEVC:
/* Carrizo only supports HEVC Main */
return rscreen->family >= CHIP_CARRIZO &&
@@ -280,13 +262,17 @@ int rvid_get_video_param(struct pipe_screen *screen,
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
- if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
- return false; //The hardware doesn't support interlaced HEVC.
- return true;
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
- if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
- return false; //The hardware doesn't support interlaced HEVC.
- return true;
+ if (rscreen->family < CHIP_PALM) {
+ /* MPEG2 only with shaders and no support for
+ interlacing on R6xx style UVD */
+ return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
+ rscreen->family > CHIP_RV770;
+ } else {
+ if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
+ return false; //The firmware doesn't support interlaced HEVC.
+ return true;
+ }
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
case PIPE_VIDEO_CAP_MAX_LEVEL:
From 9919f560996cc0014df757cc542bf477c5402c00 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 21 Oct 2015 13:38:23 -0600
Subject: [PATCH 023/266] vbo: optimize vertex copying when 'wrapping'
Instead of calling memcpy() 'n' times, we can do it all at once since
the source and dest regions are all contiguous.
Reviewed-by: Matt Turner
---
src/mesa/vbo/vbo_exec_api.c | 16 +++++++---------
src/mesa/vbo/vbo_save_api.c | 15 +++++++--------
2 files changed, 14 insertions(+), 17 deletions(-)
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index a23d5aa08aa..a614b26cae4 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -132,8 +132,7 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
static void
vbo_exec_vtx_wrap(struct vbo_exec_context *exec)
{
- fi_type *data = exec->vtx.copied.buffer;
- GLuint i;
+ unsigned numComponents;
/* Run pipeline on current vertices, copy wrapped vertices
* to exec->vtx.copied.
@@ -149,13 +148,12 @@ vbo_exec_vtx_wrap(struct vbo_exec_context *exec)
*/
assert(exec->vtx.max_vert - exec->vtx.vert_count > exec->vtx.copied.nr);
- for (i = 0 ; i < exec->vtx.copied.nr ; i++) {
- memcpy( exec->vtx.buffer_ptr, data,
- exec->vtx.vertex_size * sizeof(GLfloat));
- exec->vtx.buffer_ptr += exec->vtx.vertex_size;
- data += exec->vtx.vertex_size;
- exec->vtx.vert_count++;
- }
+ numComponents = exec->vtx.copied.nr * exec->vtx.vertex_size;
+ memcpy(exec->vtx.buffer_ptr,
+ exec->vtx.copied.buffer,
+ numComponents * sizeof(fi_type));
+ exec->vtx.buffer_ptr += numComponents;
+ exec->vtx.vert_count += exec->vtx.copied.nr;
exec->vtx.copied.nr = 0;
}
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index d49aa15b1b7..97a1dfdeb3f 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -601,8 +601,7 @@ static void
_save_wrap_filled_vertex(struct gl_context *ctx)
{
struct vbo_save_context *save = &vbo_context(ctx)->save;
- fi_type *data = save->copied.buffer;
- GLuint i;
+ unsigned numComponents;
/* Emit a glEnd to close off the last vertex list.
*/
@@ -612,12 +611,12 @@ _save_wrap_filled_vertex(struct gl_context *ctx)
*/
assert(save->max_vert - save->vert_count > save->copied.nr);
- for (i = 0; i < save->copied.nr; i++) {
- memcpy(save->buffer_ptr, data, save->vertex_size * sizeof(GLfloat));
- data += save->vertex_size;
- save->buffer_ptr += save->vertex_size;
- save->vert_count++;
- }
+ numComponents = save->copied.nr * save->vertex_size;
+ memcpy(save->buffer_ptr,
+ save->copied.buffer,
+ numComponents * sizeof(fi_type));
+ save->buffer_ptr += numComponents;
+ save->vert_count += save->copied.nr;
}
From 614a74376702b9f49343c9d71339cbb002971e6b Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 21 Oct 2015 13:39:15 -0600
Subject: [PATCH 024/266] mesa: copy rasterpos evaluation code into core Mesa
We'll remove it from the tnl module next. By lifting this code into core
Mesa we can use it from the gallium state tracker.
Reviewed-by: Roland Scheidegger
---
src/mesa/main/rastpos.c | 441 ++++++++++++++++++++++++++++++++++++++++
src/mesa/main/rastpos.h | 3 +
2 files changed, 444 insertions(+)
diff --git a/src/mesa/main/rastpos.c b/src/mesa/main/rastpos.c
index 54b2125a80f..b468219e688 100644
--- a/src/mesa/main/rastpos.c
+++ b/src/mesa/main/rastpos.c
@@ -36,6 +36,447 @@
#include "rastpos.h"
#include "state.h"
#include "main/dispatch.h"
+#include "main/viewport.h"
+#include "util/simple_list.h"
+
+
+
+/**
+ * Clip a point against the view volume.
+ *
+ * \param v vertex vector describing the point to clip.
+ *
+ * \return zero if outside view volume, or one if inside.
+ */
+static GLuint
+viewclip_point_xy( const GLfloat v[] )
+{
+ if ( v[0] > v[3] || v[0] < -v[3]
+ || v[1] > v[3] || v[1] < -v[3] ) {
+ return 0;
+ }
+ else {
+ return 1;
+ }
+}
+
+
+/**
+ * Clip a point against the far/near Z clipping planes.
+ *
+ * \param v vertex vector describing the point to clip.
+ *
+ * \return zero if outside view volume, or one if inside.
+ */
+static GLuint
+viewclip_point_z( const GLfloat v[] )
+{
+ if (v[2] > v[3] || v[2] < -v[3] ) {
+ return 0;
+ }
+ else {
+ return 1;
+ }
+}
+
+
+/**
+ * Clip a point against the user clipping planes.
+ *
+ * \param ctx GL context.
+ * \param v vertex vector describing the point to clip.
+ *
+ * \return zero if the point was clipped, or one otherwise.
+ */
+static GLuint
+userclip_point( struct gl_context *ctx, const GLfloat v[] )
+{
+ GLuint p;
+
+ for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+ if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
+ GLfloat dot = v[0] * ctx->Transform._ClipUserPlane[p][0]
+ + v[1] * ctx->Transform._ClipUserPlane[p][1]
+ + v[2] * ctx->Transform._ClipUserPlane[p][2]
+ + v[3] * ctx->Transform._ClipUserPlane[p][3];
+ if (dot < 0.0F) {
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+
+/**
+ * Compute lighting for the raster position. RGB modes computed.
+ * \param ctx the context
+ * \param vertex vertex location
+ * \param normal normal vector
+ * \param Rcolor returned color
+ * \param Rspec returned specular color (if separate specular enabled)
+ */
+static void
+shade_rastpos(struct gl_context *ctx,
+ const GLfloat vertex[4],
+ const GLfloat normal[3],
+ GLfloat Rcolor[4],
+ GLfloat Rspec[4])
+{
+ /*const*/ GLfloat (*base)[3] = ctx->Light._BaseColor;
+ const struct gl_light *light;
+ GLfloat diffuseColor[4], specularColor[4]; /* for RGB mode only */
+
+ COPY_3V(diffuseColor, base[0]);
+ diffuseColor[3] = CLAMP(
+ ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE][3], 0.0F, 1.0F );
+ ASSIGN_4V(specularColor, 0.0, 0.0, 0.0, 1.0);
+
+ foreach (light, &ctx->Light.EnabledList) {
+ GLfloat attenuation = 1.0;
+ GLfloat VP[3]; /* vector from vertex to light pos */
+ GLfloat n_dot_VP;
+ GLfloat diffuseContrib[3], specularContrib[3];
+
+ if (!(light->_Flags & LIGHT_POSITIONAL)) {
+ /* light at infinity */
+ COPY_3V(VP, light->_VP_inf_norm);
+ attenuation = light->_VP_inf_spot_attenuation;
+ }
+ else {
+ /* local/positional light */
+ GLfloat d;
+
+ /* VP = vector from vertex pos to light[i].pos */
+ SUB_3V(VP, light->_Position, vertex);
+ /* d = length(VP) */
+ d = (GLfloat) LEN_3FV( VP );
+ if (d > 1.0e-6F) {
+ /* normalize VP */
+ GLfloat invd = 1.0F / d;
+ SELF_SCALE_SCALAR_3V(VP, invd);
+ }
+
+ /* atti */
+ attenuation = 1.0F / (light->ConstantAttenuation + d *
+ (light->LinearAttenuation + d *
+ light->QuadraticAttenuation));
+
+ if (light->_Flags & LIGHT_SPOT) {
+ GLfloat PV_dot_dir = - DOT3(VP, light->_NormSpotDirection);
+
+ if (PV_dot_dir_CosCutoff) {
+ continue;
+ }
+ else {
+ GLfloat spot = powf(PV_dot_dir, light->SpotExponent);
+ attenuation *= spot;
+ }
+ }
+ }
+
+ if (attenuation < 1e-3F)
+ continue;
+
+ n_dot_VP = DOT3( normal, VP );
+
+ if (n_dot_VP < 0.0F) {
+ ACC_SCALE_SCALAR_3V(diffuseColor, attenuation, light->_MatAmbient[0]);
+ continue;
+ }
+
+ /* Ambient + diffuse */
+ COPY_3V(diffuseContrib, light->_MatAmbient[0]);
+ ACC_SCALE_SCALAR_3V(diffuseContrib, n_dot_VP, light->_MatDiffuse[0]);
+
+ /* Specular */
+ {
+ const GLfloat *h;
+ GLfloat n_dot_h;
+
+ ASSIGN_3V(specularContrib, 0.0, 0.0, 0.0);
+
+ if (ctx->Light.Model.LocalViewer) {
+ GLfloat v[3];
+ COPY_3V(v, vertex);
+ NORMALIZE_3FV(v);
+ SUB_3V(VP, VP, v);
+ NORMALIZE_3FV(VP);
+ h = VP;
+ }
+ else if (light->_Flags & LIGHT_POSITIONAL) {
+ ACC_3V(VP, ctx->_EyeZDir);
+ NORMALIZE_3FV(VP);
+ h = VP;
+ }
+ else {
+ h = light->_h_inf_norm;
+ }
+
+ n_dot_h = DOT3(normal, h);
+
+ if (n_dot_h > 0.0F) {
+ GLfloat shine;
+ GLfloat spec_coef;
+
+ shine = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS][0];
+ spec_coef = powf(n_dot_h, shine);
+
+ if (spec_coef > 1.0e-10F) {
+ if (ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR) {
+ ACC_SCALE_SCALAR_3V( specularContrib, spec_coef,
+ light->_MatSpecular[0]);
+ }
+ else {
+ ACC_SCALE_SCALAR_3V( diffuseContrib, spec_coef,
+ light->_MatSpecular[0]);
+ }
+ }
+ }
+ }
+
+ ACC_SCALE_SCALAR_3V( diffuseColor, attenuation, diffuseContrib );
+ ACC_SCALE_SCALAR_3V( specularColor, attenuation, specularContrib );
+ }
+
+ Rcolor[0] = CLAMP(diffuseColor[0], 0.0F, 1.0F);
+ Rcolor[1] = CLAMP(diffuseColor[1], 0.0F, 1.0F);
+ Rcolor[2] = CLAMP(diffuseColor[2], 0.0F, 1.0F);
+ Rcolor[3] = CLAMP(diffuseColor[3], 0.0F, 1.0F);
+ Rspec[0] = CLAMP(specularColor[0], 0.0F, 1.0F);
+ Rspec[1] = CLAMP(specularColor[1], 0.0F, 1.0F);
+ Rspec[2] = CLAMP(specularColor[2], 0.0F, 1.0F);
+ Rspec[3] = CLAMP(specularColor[3], 0.0F, 1.0F);
+}
+
+
+/**
+ * Do texgen needed for glRasterPos.
+ * \param ctx rendering context
+ * \param vObj object-space vertex coordinate
+ * \param vEye eye-space vertex coordinate
+ * \param normal vertex normal
+ * \param unit texture unit number
+ * \param texcoord incoming texcoord and resulting texcoord
+ */
+static void
+compute_texgen(struct gl_context *ctx, const GLfloat vObj[4], const GLfloat vEye[4],
+ const GLfloat normal[3], GLuint unit, GLfloat texcoord[4])
+{
+ const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+ /* always compute sphere map terms, just in case */
+ GLfloat u[3], two_nu, rx, ry, rz, m, mInv;
+ COPY_3V(u, vEye);
+ NORMALIZE_3FV(u);
+ two_nu = 2.0F * DOT3(normal, u);
+ rx = u[0] - normal[0] * two_nu;
+ ry = u[1] - normal[1] * two_nu;
+ rz = u[2] - normal[2] * two_nu;
+ m = rx * rx + ry * ry + (rz + 1.0F) * (rz + 1.0F);
+ if (m > 0.0F)
+ mInv = 0.5F * (1.0f / sqrtf(m));
+ else
+ mInv = 0.0F;
+
+ if (texUnit->TexGenEnabled & S_BIT) {
+ switch (texUnit->GenS.Mode) {
+ case GL_OBJECT_LINEAR:
+ texcoord[0] = DOT4(vObj, texUnit->GenS.ObjectPlane);
+ break;
+ case GL_EYE_LINEAR:
+ texcoord[0] = DOT4(vEye, texUnit->GenS.EyePlane);
+ break;
+ case GL_SPHERE_MAP:
+ texcoord[0] = rx * mInv + 0.5F;
+ break;
+ case GL_REFLECTION_MAP:
+ texcoord[0] = rx;
+ break;
+ case GL_NORMAL_MAP:
+ texcoord[0] = normal[0];
+ break;
+ default:
+ _mesa_problem(ctx, "Bad S texgen in compute_texgen()");
+ return;
+ }
+ }
+
+ if (texUnit->TexGenEnabled & T_BIT) {
+ switch (texUnit->GenT.Mode) {
+ case GL_OBJECT_LINEAR:
+ texcoord[1] = DOT4(vObj, texUnit->GenT.ObjectPlane);
+ break;
+ case GL_EYE_LINEAR:
+ texcoord[1] = DOT4(vEye, texUnit->GenT.EyePlane);
+ break;
+ case GL_SPHERE_MAP:
+ texcoord[1] = ry * mInv + 0.5F;
+ break;
+ case GL_REFLECTION_MAP:
+ texcoord[1] = ry;
+ break;
+ case GL_NORMAL_MAP:
+ texcoord[1] = normal[1];
+ break;
+ default:
+ _mesa_problem(ctx, "Bad T texgen in compute_texgen()");
+ return;
+ }
+ }
+
+ if (texUnit->TexGenEnabled & R_BIT) {
+ switch (texUnit->GenR.Mode) {
+ case GL_OBJECT_LINEAR:
+ texcoord[2] = DOT4(vObj, texUnit->GenR.ObjectPlane);
+ break;
+ case GL_EYE_LINEAR:
+ texcoord[2] = DOT4(vEye, texUnit->GenR.EyePlane);
+ break;
+ case GL_REFLECTION_MAP:
+ texcoord[2] = rz;
+ break;
+ case GL_NORMAL_MAP:
+ texcoord[2] = normal[2];
+ break;
+ default:
+ _mesa_problem(ctx, "Bad R texgen in compute_texgen()");
+ return;
+ }
+ }
+
+ if (texUnit->TexGenEnabled & Q_BIT) {
+ switch (texUnit->GenQ.Mode) {
+ case GL_OBJECT_LINEAR:
+ texcoord[3] = DOT4(vObj, texUnit->GenQ.ObjectPlane);
+ break;
+ case GL_EYE_LINEAR:
+ texcoord[3] = DOT4(vEye, texUnit->GenQ.EyePlane);
+ break;
+ default:
+ _mesa_problem(ctx, "Bad Q texgen in compute_texgen()");
+ return;
+ }
+ }
+}
+
+
+/**
+ * glRasterPos transformation. Typically called via ctx->Driver.RasterPos().
+ *
+ * \param vObj vertex position in object space
+ */
+void
+_mesa_RasterPos(struct gl_context *ctx, const GLfloat vObj[4])
+{
+ if (ctx->VertexProgram._Enabled) {
+ /* XXX implement this */
+ _mesa_problem(ctx, "Vertex programs not implemented for glRasterPos");
+ return;
+ }
+ else {
+ GLfloat eye[4], clip[4], ndc[3], d;
+ GLfloat *norm, eyenorm[3];
+ GLfloat *objnorm = ctx->Current.Attrib[VERT_ATTRIB_NORMAL];
+ float scale[3], translate[3];
+
+ /* apply modelview matrix: eye = MV * obj */
+ TRANSFORM_POINT( eye, ctx->ModelviewMatrixStack.Top->m, vObj );
+ /* apply projection matrix: clip = Proj * eye */
+ TRANSFORM_POINT( clip, ctx->ProjectionMatrixStack.Top->m, eye );
+
+ /* clip to view volume. */
+ if (!ctx->Transform.DepthClamp) {
+ if (viewclip_point_z(clip) == 0) {
+ ctx->Current.RasterPosValid = GL_FALSE;
+ return;
+ }
+ }
+ if (!ctx->Transform.RasterPositionUnclipped) {
+ if (viewclip_point_xy(clip) == 0) {
+ ctx->Current.RasterPosValid = GL_FALSE;
+ return;
+ }
+ }
+
+ /* clip to user clipping planes */
+ if (ctx->Transform.ClipPlanesEnabled && !userclip_point(ctx, clip)) {
+ ctx->Current.RasterPosValid = GL_FALSE;
+ return;
+ }
+
+ /* ndc = clip / W */
+ d = (clip[3] == 0.0F) ? 1.0F : 1.0F / clip[3];
+ ndc[0] = clip[0] * d;
+ ndc[1] = clip[1] * d;
+ ndc[2] = clip[2] * d;
+ /* wincoord = viewport_mapping(ndc) */
+ _mesa_get_viewport_xform(ctx, 0, scale, translate);
+ ctx->Current.RasterPos[0] = ndc[0] * scale[0] + translate[0];
+ ctx->Current.RasterPos[1] = ndc[1] * scale[1] + translate[1];
+ ctx->Current.RasterPos[2] = ndc[2] * scale[2] + translate[2];
+ ctx->Current.RasterPos[3] = clip[3];
+
+ if (ctx->Transform.DepthClamp) {
+ ctx->Current.RasterPos[3] = CLAMP(ctx->Current.RasterPos[3],
+ ctx->ViewportArray[0].Near,
+ ctx->ViewportArray[0].Far);
+ }
+
+ /* compute raster distance */
+ if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT)
+ ctx->Current.RasterDistance = ctx->Current.Attrib[VERT_ATTRIB_FOG][0];
+ else
+ ctx->Current.RasterDistance =
+ sqrtf( eye[0]*eye[0] + eye[1]*eye[1] + eye[2]*eye[2] );
+
+ /* compute transformed normal vector (for lighting or texgen) */
+ if (ctx->_NeedEyeCoords) {
+ const GLfloat *inv = ctx->ModelviewMatrixStack.Top->inv;
+ TRANSFORM_NORMAL( eyenorm, objnorm, inv );
+ norm = eyenorm;
+ }
+ else {
+ norm = objnorm;
+ }
+
+ /* update raster color */
+ if (ctx->Light.Enabled) {
+ /* lighting */
+ shade_rastpos( ctx, vObj, norm,
+ ctx->Current.RasterColor,
+ ctx->Current.RasterSecondaryColor );
+ }
+ else {
+ /* use current color */
+ COPY_4FV(ctx->Current.RasterColor,
+ ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
+ COPY_4FV(ctx->Current.RasterSecondaryColor,
+ ctx->Current.Attrib[VERT_ATTRIB_COLOR1]);
+ }
+
+ /* texture coords */
+ {
+ GLuint u;
+ for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
+ GLfloat tc[4];
+ COPY_4V(tc, ctx->Current.Attrib[VERT_ATTRIB_TEX0 + u]);
+ if (ctx->Texture.Unit[u].TexGenEnabled) {
+ compute_texgen(ctx, vObj, eye, norm, u, tc);
+ }
+ TRANSFORM_POINT(ctx->Current.RasterTexCoords[u],
+ ctx->TextureMatrixStack[u].Top->m, tc);
+ }
+ }
+
+ ctx->Current.RasterPosValid = GL_TRUE;
+ }
+
+ if (ctx->RenderMode == GL_SELECT) {
+ _mesa_update_hitflag( ctx, ctx->Current.RasterPos[2] );
+ }
+}
/**
diff --git a/src/mesa/main/rastpos.h b/src/mesa/main/rastpos.h
index dc28c68d41b..90b8f957b9f 100644
--- a/src/mesa/main/rastpos.h
+++ b/src/mesa/main/rastpos.h
@@ -41,6 +41,9 @@ struct gl_context;
extern void
_mesa_init_rastpos(struct gl_context *ctx);
+void
+_mesa_RasterPos(struct gl_context *ctx, const GLfloat vObj[4]);
+
void GLAPIENTRY
_mesa_RasterPos2d(GLdouble x, GLdouble y);
void GLAPIENTRY
From 234d5320bbe373b9197e63f92326398b54c16fe9 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 21 Oct 2015 13:40:58 -0600
Subject: [PATCH 025/266] drivers/common: use _mesa_RasterPos instead of
_tnl_RasterPos
Reviewed-by: Roland Scheidegger
---
src/mesa/drivers/common/driverfuncs.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 3d1fccb3ab4..752aaf6c006 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -33,6 +33,7 @@
#include "main/mipmap.h"
#include "main/queryobj.h"
#include "main/readpix.h"
+#include "main/rastpos.h"
#include "main/renderbuffer.h"
#include "main/shaderobj.h"
#include "main/texcompress.h"
@@ -81,7 +82,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
/* framebuffer/image functions */
driver->Clear = _swrast_Clear;
- driver->RasterPos = _tnl_RasterPos;
+ driver->RasterPos = _mesa_RasterPos;
driver->DrawPixels = _swrast_DrawPixels;
driver->ReadPixels = _mesa_readpixels;
driver->CopyPixels = _swrast_CopyPixels;
From af0399a1ce5acea464219f63399284cb175fa1f4 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 21 Oct 2015 13:42:21 -0600
Subject: [PATCH 026/266] tnl: remove t_rasterpos.c
Reviewed-by: Roland Scheidegger
---
src/mesa/Makefile.sources | 1 -
src/mesa/tnl/t_rasterpos.c | 478 -------------------------------------
2 files changed, 479 deletions(-)
delete mode 100644 src/mesa/tnl/t_rasterpos.c
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 34fb4461985..4bcaa62f48e 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -345,7 +345,6 @@ TNL_FILES = \
tnl/tnl.h \
tnl/t_pipeline.c \
tnl/t_pipeline.h \
- tnl/t_rasterpos.c \
tnl/t_vb_cliptmp.h \
tnl/t_vb_fog.c \
tnl/t_vb_light.c \
diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c
deleted file mode 100644
index 4bd9ac8539e..00000000000
--- a/src/mesa/tnl/t_rasterpos.c
+++ /dev/null
@@ -1,478 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "c99_math.h"
-#include "main/glheader.h"
-#include "main/feedback.h"
-#include "main/light.h"
-#include "main/macros.h"
-#include "util/simple_list.h"
-#include "main/mtypes.h"
-#include "main/viewport.h"
-
-#include "math/m_matrix.h"
-#include "tnl/tnl.h"
-
-
-
-/**
- * Clip a point against the view volume.
- *
- * \param v vertex vector describing the point to clip.
- *
- * \return zero if outside view volume, or one if inside.
- */
-static GLuint
-viewclip_point_xy( const GLfloat v[] )
-{
- if ( v[0] > v[3] || v[0] < -v[3]
- || v[1] > v[3] || v[1] < -v[3] ) {
- return 0;
- }
- else {
- return 1;
- }
-}
-
-
-/**
- * Clip a point against the far/near Z clipping planes.
- *
- * \param v vertex vector describing the point to clip.
- *
- * \return zero if outside view volume, or one if inside.
- */
-static GLuint
-viewclip_point_z( const GLfloat v[] )
-{
- if (v[2] > v[3] || v[2] < -v[3] ) {
- return 0;
- }
- else {
- return 1;
- }
-}
-
-
-/**
- * Clip a point against the user clipping planes.
- *
- * \param ctx GL context.
- * \param v vertex vector describing the point to clip.
- *
- * \return zero if the point was clipped, or one otherwise.
- */
-static GLuint
-userclip_point( struct gl_context *ctx, const GLfloat v[] )
-{
- GLuint p;
-
- for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
- if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
- GLfloat dot = v[0] * ctx->Transform._ClipUserPlane[p][0]
- + v[1] * ctx->Transform._ClipUserPlane[p][1]
- + v[2] * ctx->Transform._ClipUserPlane[p][2]
- + v[3] * ctx->Transform._ClipUserPlane[p][3];
- if (dot < 0.0F) {
- return 0;
- }
- }
- }
-
- return 1;
-}
-
-
-/**
- * Compute lighting for the raster position. RGB modes computed.
- * \param ctx the context
- * \param vertex vertex location
- * \param normal normal vector
- * \param Rcolor returned color
- * \param Rspec returned specular color (if separate specular enabled)
- */
-static void
-shade_rastpos(struct gl_context *ctx,
- const GLfloat vertex[4],
- const GLfloat normal[3],
- GLfloat Rcolor[4],
- GLfloat Rspec[4])
-{
- /*const*/ GLfloat (*base)[3] = ctx->Light._BaseColor;
- const struct gl_light *light;
- GLfloat diffuseColor[4], specularColor[4]; /* for RGB mode only */
-
- COPY_3V(diffuseColor, base[0]);
- diffuseColor[3] = CLAMP(
- ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE][3], 0.0F, 1.0F );
- ASSIGN_4V(specularColor, 0.0, 0.0, 0.0, 1.0);
-
- foreach (light, &ctx->Light.EnabledList) {
- GLfloat attenuation = 1.0;
- GLfloat VP[3]; /* vector from vertex to light pos */
- GLfloat n_dot_VP;
- GLfloat diffuseContrib[3], specularContrib[3];
-
- if (!(light->_Flags & LIGHT_POSITIONAL)) {
- /* light at infinity */
- COPY_3V(VP, light->_VP_inf_norm);
- attenuation = light->_VP_inf_spot_attenuation;
- }
- else {
- /* local/positional light */
- GLfloat d;
-
- /* VP = vector from vertex pos to light[i].pos */
- SUB_3V(VP, light->_Position, vertex);
- /* d = length(VP) */
- d = (GLfloat) LEN_3FV( VP );
- if (d > 1.0e-6F) {
- /* normalize VP */
- GLfloat invd = 1.0F / d;
- SELF_SCALE_SCALAR_3V(VP, invd);
- }
-
- /* atti */
- attenuation = 1.0F / (light->ConstantAttenuation + d *
- (light->LinearAttenuation + d *
- light->QuadraticAttenuation));
-
- if (light->_Flags & LIGHT_SPOT) {
- GLfloat PV_dot_dir = - DOT3(VP, light->_NormSpotDirection);
-
- if (PV_dot_dir_CosCutoff) {
- continue;
- }
- else {
- GLfloat spot = powf(PV_dot_dir, light->SpotExponent);
- attenuation *= spot;
- }
- }
- }
-
- if (attenuation < 1e-3F)
- continue;
-
- n_dot_VP = DOT3( normal, VP );
-
- if (n_dot_VP < 0.0F) {
- ACC_SCALE_SCALAR_3V(diffuseColor, attenuation, light->_MatAmbient[0]);
- continue;
- }
-
- /* Ambient + diffuse */
- COPY_3V(diffuseContrib, light->_MatAmbient[0]);
- ACC_SCALE_SCALAR_3V(diffuseContrib, n_dot_VP, light->_MatDiffuse[0]);
-
- /* Specular */
- {
- const GLfloat *h;
- GLfloat n_dot_h;
-
- ASSIGN_3V(specularContrib, 0.0, 0.0, 0.0);
-
- if (ctx->Light.Model.LocalViewer) {
- GLfloat v[3];
- COPY_3V(v, vertex);
- NORMALIZE_3FV(v);
- SUB_3V(VP, VP, v);
- NORMALIZE_3FV(VP);
- h = VP;
- }
- else if (light->_Flags & LIGHT_POSITIONAL) {
- ACC_3V(VP, ctx->_EyeZDir);
- NORMALIZE_3FV(VP);
- h = VP;
- }
- else {
- h = light->_h_inf_norm;
- }
-
- n_dot_h = DOT3(normal, h);
-
- if (n_dot_h > 0.0F) {
- GLfloat shine;
- GLfloat spec_coef;
-
- shine = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS][0];
- spec_coef = powf(n_dot_h, shine);
-
- if (spec_coef > 1.0e-10F) {
- if (ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR) {
- ACC_SCALE_SCALAR_3V( specularContrib, spec_coef,
- light->_MatSpecular[0]);
- }
- else {
- ACC_SCALE_SCALAR_3V( diffuseContrib, spec_coef,
- light->_MatSpecular[0]);
- }
- }
- }
- }
-
- ACC_SCALE_SCALAR_3V( diffuseColor, attenuation, diffuseContrib );
- ACC_SCALE_SCALAR_3V( specularColor, attenuation, specularContrib );
- }
-
- Rcolor[0] = CLAMP(diffuseColor[0], 0.0F, 1.0F);
- Rcolor[1] = CLAMP(diffuseColor[1], 0.0F, 1.0F);
- Rcolor[2] = CLAMP(diffuseColor[2], 0.0F, 1.0F);
- Rcolor[3] = CLAMP(diffuseColor[3], 0.0F, 1.0F);
- Rspec[0] = CLAMP(specularColor[0], 0.0F, 1.0F);
- Rspec[1] = CLAMP(specularColor[1], 0.0F, 1.0F);
- Rspec[2] = CLAMP(specularColor[2], 0.0F, 1.0F);
- Rspec[3] = CLAMP(specularColor[3], 0.0F, 1.0F);
-}
-
-
-/**
- * Do texgen needed for glRasterPos.
- * \param ctx rendering context
- * \param vObj object-space vertex coordinate
- * \param vEye eye-space vertex coordinate
- * \param normal vertex normal
- * \param unit texture unit number
- * \param texcoord incoming texcoord and resulting texcoord
- */
-static void
-compute_texgen(struct gl_context *ctx, const GLfloat vObj[4], const GLfloat vEye[4],
- const GLfloat normal[3], GLuint unit, GLfloat texcoord[4])
-{
- const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-
- /* always compute sphere map terms, just in case */
- GLfloat u[3], two_nu, rx, ry, rz, m, mInv;
- COPY_3V(u, vEye);
- NORMALIZE_3FV(u);
- two_nu = 2.0F * DOT3(normal, u);
- rx = u[0] - normal[0] * two_nu;
- ry = u[1] - normal[1] * two_nu;
- rz = u[2] - normal[2] * two_nu;
- m = rx * rx + ry * ry + (rz + 1.0F) * (rz + 1.0F);
- if (m > 0.0F)
- mInv = 0.5F * (1.0f / sqrtf(m));
- else
- mInv = 0.0F;
-
- if (texUnit->TexGenEnabled & S_BIT) {
- switch (texUnit->GenS.Mode) {
- case GL_OBJECT_LINEAR:
- texcoord[0] = DOT4(vObj, texUnit->GenS.ObjectPlane);
- break;
- case GL_EYE_LINEAR:
- texcoord[0] = DOT4(vEye, texUnit->GenS.EyePlane);
- break;
- case GL_SPHERE_MAP:
- texcoord[0] = rx * mInv + 0.5F;
- break;
- case GL_REFLECTION_MAP:
- texcoord[0] = rx;
- break;
- case GL_NORMAL_MAP:
- texcoord[0] = normal[0];
- break;
- default:
- _mesa_problem(ctx, "Bad S texgen in compute_texgen()");
- return;
- }
- }
-
- if (texUnit->TexGenEnabled & T_BIT) {
- switch (texUnit->GenT.Mode) {
- case GL_OBJECT_LINEAR:
- texcoord[1] = DOT4(vObj, texUnit->GenT.ObjectPlane);
- break;
- case GL_EYE_LINEAR:
- texcoord[1] = DOT4(vEye, texUnit->GenT.EyePlane);
- break;
- case GL_SPHERE_MAP:
- texcoord[1] = ry * mInv + 0.5F;
- break;
- case GL_REFLECTION_MAP:
- texcoord[1] = ry;
- break;
- case GL_NORMAL_MAP:
- texcoord[1] = normal[1];
- break;
- default:
- _mesa_problem(ctx, "Bad T texgen in compute_texgen()");
- return;
- }
- }
-
- if (texUnit->TexGenEnabled & R_BIT) {
- switch (texUnit->GenR.Mode) {
- case GL_OBJECT_LINEAR:
- texcoord[2] = DOT4(vObj, texUnit->GenR.ObjectPlane);
- break;
- case GL_EYE_LINEAR:
- texcoord[2] = DOT4(vEye, texUnit->GenR.EyePlane);
- break;
- case GL_REFLECTION_MAP:
- texcoord[2] = rz;
- break;
- case GL_NORMAL_MAP:
- texcoord[2] = normal[2];
- break;
- default:
- _mesa_problem(ctx, "Bad R texgen in compute_texgen()");
- return;
- }
- }
-
- if (texUnit->TexGenEnabled & Q_BIT) {
- switch (texUnit->GenQ.Mode) {
- case GL_OBJECT_LINEAR:
- texcoord[3] = DOT4(vObj, texUnit->GenQ.ObjectPlane);
- break;
- case GL_EYE_LINEAR:
- texcoord[3] = DOT4(vEye, texUnit->GenQ.EyePlane);
- break;
- default:
- _mesa_problem(ctx, "Bad Q texgen in compute_texgen()");
- return;
- }
- }
-}
-
-
-/**
- * glRasterPos transformation. Typically called via ctx->Driver.RasterPos().
- * XXX some of this code (such as viewport xform, clip testing and setting
- * of ctx->Current.Raster* fields) could get lifted up into the
- * main/rasterpos.c code.
- *
- * \param vObj vertex position in object space
- */
-void
-_tnl_RasterPos(struct gl_context *ctx, const GLfloat vObj[4])
-{
- if (ctx->VertexProgram._Enabled) {
- /* XXX implement this */
- _mesa_problem(ctx, "Vertex programs not implemented for glRasterPos");
- return;
- }
- else {
- GLfloat eye[4], clip[4], ndc[3], d;
- GLfloat *norm, eyenorm[3];
- GLfloat *objnorm = ctx->Current.Attrib[VERT_ATTRIB_NORMAL];
- float scale[3], translate[3];
-
- /* apply modelview matrix: eye = MV * obj */
- TRANSFORM_POINT( eye, ctx->ModelviewMatrixStack.Top->m, vObj );
- /* apply projection matrix: clip = Proj * eye */
- TRANSFORM_POINT( clip, ctx->ProjectionMatrixStack.Top->m, eye );
-
- /* clip to view volume. */
- if (!ctx->Transform.DepthClamp) {
- if (viewclip_point_z(clip) == 0) {
- ctx->Current.RasterPosValid = GL_FALSE;
- return;
- }
- }
- if (!ctx->Transform.RasterPositionUnclipped) {
- if (viewclip_point_xy(clip) == 0) {
- ctx->Current.RasterPosValid = GL_FALSE;
- return;
- }
- }
-
- /* clip to user clipping planes */
- if (ctx->Transform.ClipPlanesEnabled && !userclip_point(ctx, clip)) {
- ctx->Current.RasterPosValid = GL_FALSE;
- return;
- }
-
- /* ndc = clip / W */
- d = (clip[3] == 0.0F) ? 1.0F : 1.0F / clip[3];
- ndc[0] = clip[0] * d;
- ndc[1] = clip[1] * d;
- ndc[2] = clip[2] * d;
- /* wincoord = viewport_mapping(ndc) */
- _mesa_get_viewport_xform(ctx, 0, scale, translate);
- ctx->Current.RasterPos[0] = ndc[0] * scale[0] + translate[0];
- ctx->Current.RasterPos[1] = ndc[1] * scale[1] + translate[1];
- ctx->Current.RasterPos[2] = ndc[2] * scale[2] + translate[2];
- ctx->Current.RasterPos[3] = clip[3];
-
- if (ctx->Transform.DepthClamp) {
- ctx->Current.RasterPos[3] = CLAMP(ctx->Current.RasterPos[3],
- ctx->ViewportArray[0].Near,
- ctx->ViewportArray[0].Far);
- }
-
- /* compute raster distance */
- if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT)
- ctx->Current.RasterDistance = ctx->Current.Attrib[VERT_ATTRIB_FOG][0];
- else
- ctx->Current.RasterDistance =
- sqrtf( eye[0]*eye[0] + eye[1]*eye[1] + eye[2]*eye[2] );
-
- /* compute transformed normal vector (for lighting or texgen) */
- if (ctx->_NeedEyeCoords) {
- const GLfloat *inv = ctx->ModelviewMatrixStack.Top->inv;
- TRANSFORM_NORMAL( eyenorm, objnorm, inv );
- norm = eyenorm;
- }
- else {
- norm = objnorm;
- }
-
- /* update raster color */
- if (ctx->Light.Enabled) {
- /* lighting */
- shade_rastpos( ctx, vObj, norm,
- ctx->Current.RasterColor,
- ctx->Current.RasterSecondaryColor );
- }
- else {
- /* use current color */
- COPY_4FV(ctx->Current.RasterColor,
- ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
- COPY_4FV(ctx->Current.RasterSecondaryColor,
- ctx->Current.Attrib[VERT_ATTRIB_COLOR1]);
- }
-
- /* texture coords */
- {
- GLuint u;
- for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
- GLfloat tc[4];
- COPY_4V(tc, ctx->Current.Attrib[VERT_ATTRIB_TEX0 + u]);
- if (ctx->Texture.Unit[u].TexGenEnabled) {
- compute_texgen(ctx, vObj, eye, norm, u, tc);
- }
- TRANSFORM_POINT(ctx->Current.RasterTexCoords[u],
- ctx->TextureMatrixStack[u].Top->m, tc);
- }
- }
-
- ctx->Current.RasterPosValid = GL_TRUE;
- }
-
- if (ctx->RenderMode == GL_SELECT) {
- _mesa_update_hitflag( ctx, ctx->Current.RasterPos[2] );
- }
-}
From 990afdc04551e89ec9b5d29a05a9da798c07ccc3 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 21 Oct 2015 13:42:37 -0600
Subject: [PATCH 027/266] st/mesa: use _mesa_RasterPos() when possible
The st_RasterPos() function goes to great pains to implement the
rasterpos transformation. It basically uses gallium's draw module to
execute the vertex shader to draw a point, then capture that point's
attributes.
But glRasterPos isn't typically used with a vertex shader so we can
usually use the old/fixed-function implementation which is a lot simpler
and faster.
This can add up for legacy apps that make a lot of calls to glRasterPos.
Reviewed-by: Roland Scheidegger
---
src/mesa/state_tracker/st_cb_rasterpos.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c
index b9997dacfd2..747b41464ae 100644
--- a/src/mesa/state_tracker/st_cb_rasterpos.c
+++ b/src/mesa/state_tracker/st_cb_rasterpos.c
@@ -39,6 +39,7 @@
#include "main/imports.h"
#include "main/macros.h"
#include "main/feedback.h"
+#include "main/rastpos.h"
#include "st_context.h"
#include "st_atom.h"
@@ -224,6 +225,15 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4])
struct rastpos_stage *rs;
const struct gl_client_array **saved_arrays = ctx->Array._DrawArrays;
+ if (ctx->VertexProgram._Current == NULL ||
+ ctx->VertexProgram._Current == ctx->VertexProgram._TnlProgram) {
+ /* No vertex shader/program is enabled, used the simple/fast fixed-
+ * function implementation of RasterPos.
+ */
+ _mesa_RasterPos(ctx, v);
+ return;
+ }
+
if (st->rastpos_stage) {
/* get rastpos stage info */
rs = rastpos_stage(st->rastpos_stage);
From df0f817e311dbf9af110779086a27429905b1faf Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 21 Oct 2015 13:58:04 -0600
Subject: [PATCH 028/266] mesa: check for unchanged line width before error
checking
Reviewed-by: Matt Turner
---
src/mesa/main/lines.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/src/mesa/main/lines.c b/src/mesa/main/lines.c
index c020fb3eb9e..93b80af0dc4 100644
--- a/src/mesa/main/lines.c
+++ b/src/mesa/main/lines.c
@@ -45,6 +45,10 @@ _mesa_LineWidth( GLfloat width )
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glLineWidth %f\n", width);
+ /* If width is unchanged, there can't be an error */
+ if (ctx->Line.Width == width)
+ return;
+
if (width <= 0.0F) {
_mesa_error( ctx, GL_INVALID_VALUE, "glLineWidth" );
return;
@@ -68,9 +72,6 @@ _mesa_LineWidth( GLfloat width )
return;
}
- if (ctx->Line.Width == width)
- return;
-
FLUSH_VERTICES(ctx, _NEW_LINE);
ctx->Line.Width = width;
From 1082735bb69e9f64cb3991a52f0e270902917855 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Thu, 22 Oct 2015 15:36:25 -0600
Subject: [PATCH 029/266] svga: detect constant color writes in fragment
shaders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Examine the fragment shader to try to detect TGSI shaders which use
"MOV OUT[0], CONST[i]" to write a constant value for the fragment color.
In this case, all fragments will have the same color (unless blending is
enabled).
This is a common case for OpenGL code such as: glColor(), glBegin(),
glVertex(), ..., glEnd() when lighting/fog/etc are disabled. In this
case, the Mesa/gallium state tracker actually generates a simple
"MOV OUT[0], CONST[i]" fragment shader.
This will be used by the next commit to avoid provoking vertex conversion
(creating/rewriting an index buffer) when drawing flat-shaded primitives.
Reviewed-by: Charmaine Lee
Reviewed-by: José Fonseca
---
src/gallium/drivers/svga/svga_shader.h | 3 ++
src/gallium/drivers/svga/svga_tgsi.c | 7 ++++
src/gallium/drivers/svga/svga_tgsi_emit.h | 3 ++
src/gallium/drivers/svga/svga_tgsi_insn.c | 27 ++++++++++++++
src/gallium/drivers/svga/svga_tgsi_vgpu10.c | 39 +++++++++++++++++++--
5 files changed, 77 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h
index efcac408626..f49fdb46d0e 100644
--- a/src/gallium/drivers/svga/svga_shader.h
+++ b/src/gallium/drivers/svga/svga_shader.h
@@ -155,6 +155,9 @@ struct svga_shader_variant
* applied to any of the varyings.
*/
+ /** Is the color output just a constant value? (fragment shader only) */
+ boolean constant_color_output;
+
/** For FS-based polygon stipple */
unsigned pstipple_sampler_unit;
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index 202eee276b7..4c16f4313a0 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -240,6 +240,13 @@ svga_tgsi_vgpu9_translate(struct svga_context *svga,
variant->pstipple_sampler_unit = emit.pstipple_sampler_unit;
+ /* If there was exactly one write to a fragment shader output register
+ * and it came from a constant buffer, we know all fragments will have
+ * the same color (except for blending).
+ */
+ variant->constant_color_output =
+ emit.constant_color_output && emit.num_output_writes == 1;
+
#if 0
if (!svga_shader_verify(variant->tokens, variant->nr_tokens) ||
SVGA_DEBUG & DEBUG_TGSI) {
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
index 0b82483ab2e..83f0c8bd4d0 100644
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -84,6 +84,9 @@ struct svga_shader_emitter
int dynamic_branching_level;
+ unsigned num_output_writes;
+ boolean constant_color_output;
+
boolean in_main_func;
boolean created_common_immediate;
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 00c91a4fa61..dbb90f7654e 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -99,6 +99,7 @@ translate_dst_register( struct svga_shader_emitter *emit,
* Need to lookup a table built at decl time:
*/
dest = emit->output_map[reg->Register.Index];
+ emit->num_output_writes++;
break;
default:
@@ -2102,6 +2103,29 @@ emit_simple_instruction(struct svga_shader_emitter *emit,
}
+/**
+ * TGSI_OPCODE_MOVE is only special-cased here to detect the
+ * svga_fragment_shader::constant_color_output case.
+ */
+static boolean
+emit_mov(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ const struct tgsi_full_src_register *src = &insn->Src[0];
+ const struct tgsi_full_dst_register *dst = &insn->Dst[0];
+
+ if (emit->unit == PIPE_SHADER_FRAGMENT &&
+ dst->Register.File == TGSI_FILE_OUTPUT &&
+ dst->Register.Index == 0 &&
+ src->Register.File == TGSI_FILE_CONSTANT &&
+ !src->Register.Indirect) {
+ emit->constant_color_output = TRUE;
+ }
+
+ return emit_simple_instruction(emit, SVGA3DOP_MOV, insn);
+}
+
+
/**
* Translate/emit TGSI DDX, DDY instructions.
*/
@@ -3045,6 +3069,9 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
case TGSI_OPCODE_SSG:
return emit_ssg( emit, insn );
+ case TGSI_OPCODE_MOV:
+ return emit_mov( emit, insn );
+
default:
{
unsigned opcode = translate_opcode(insn->Instruction.Opcode);
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index 332904f88b4..e70ee689c59 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -202,6 +202,9 @@ struct svga_shader_emitter_v10
/* user clip plane constant slot indexes */
unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
+ unsigned num_output_writes;
+ boolean constant_color_output;
+
boolean uses_flat_interp;
/* For all shaders: const reg index for RECT coord scaling */
@@ -913,6 +916,8 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit,
*/
assert(sem_name == TGSI_SEMANTIC_COLOR);
index = emit->info.output_semantic_index[index];
+
+ emit->num_output_writes++;
}
}
}
@@ -5572,6 +5577,29 @@ emit_simple(struct svga_shader_emitter_v10 *emit,
}
+/**
+ * We only special case the MOV instruction to try to detect constant
+ * color writes in the fragment shader.
+ */
+static boolean
+emit_mov(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const struct tgsi_full_src_register *src = &inst->Src[0];
+ const struct tgsi_full_dst_register *dst = &inst->Dst[0];
+
+ if (emit->unit == PIPE_SHADER_FRAGMENT &&
+ dst->Register.File == TGSI_FILE_OUTPUT &&
+ dst->Register.Index == 0 &&
+ src->Register.File == TGSI_FILE_CONSTANT &&
+ !src->Register.Indirect) {
+ emit->constant_color_output = TRUE;
+ }
+
+ return emit_simple(emit, inst);
+}
+
+
/**
* Emit a simple VGPU10 instruction which writes to multiple dest registers,
* where TGSI only uses one dest register.
@@ -5652,7 +5680,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
case TGSI_OPCODE_MAD:
case TGSI_OPCODE_MAX:
case TGSI_OPCODE_MIN:
- case TGSI_OPCODE_MOV:
case TGSI_OPCODE_MUL:
case TGSI_OPCODE_NOP:
case TGSI_OPCODE_NOT:
@@ -5677,7 +5704,8 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
/* simple instructions */
return emit_simple(emit, inst);
-
+ case TGSI_OPCODE_MOV:
+ return emit_mov(emit, inst);
case TGSI_OPCODE_EMIT:
return emit_vertex(emit, inst);
case TGSI_OPCODE_ENDPRIM:
@@ -6762,6 +6790,13 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
+ /* If there was exactly one write to a fragment shader output register
+ * and it came from a constant buffer, we know all fragments will have
+ * the same color (except for blending).
+ */
+ variant->constant_color_output =
+ emit->constant_color_output && emit->num_output_writes == 1;
+
/** keep track in the variant if flat interpolation is used
* for any of the varyings.
*/
From 129d34da494840628b2bb1cbb6397d50dab3c999 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Fri, 16 Oct 2015 16:12:19 -0600
Subject: [PATCH 030/266] svga: avoid provoking vertex conversion when possible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Provoking vertex comes into play when doing flat shading. But if we know
that all fragments in a primitive are the same color, the provoking vertex
doesn't matter. Check for that case and use whichever provoking vertex
convention is supported by the device.
This avoids generating an index buffer to do the PV conversion.
Reviewed-by: Charmaine Lee
Reviewed-by: José Fonseca
---
src/gallium/drivers/svga/svga_draw_arrays.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
index 5635411d938..e4d776443c5 100644
--- a/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -32,6 +32,7 @@
#include "svga_draw.h"
#include "svga_draw_private.h"
#include "svga_context.h"
+#include "svga_shader.h"
#define DBG 0
@@ -206,6 +207,18 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
unsigned gen_prim, gen_size, gen_nr, gen_type;
u_generate_func gen_func;
enum pipe_error ret = PIPE_OK;
+ unsigned api_pv = hwtnl->api_pv;
+ struct svga_context *svga = hwtnl->svga;
+
+ if (svga->curr.rast->templ.flatshade &&
+ svga->state.hw_draw.fs->constant_color_output) {
+ /* The fragment color is a constant, not per-vertex so the whole
+ * primitive will be the same color (except for possible blending).
+ * We can ignore the current provoking vertex state and use whatever
+ * the hardware wants.
+ */
+ api_pv = hwtnl->hw_pv;
+ }
if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL &&
prim >= PIPE_PRIM_TRIANGLES) {
@@ -226,7 +239,7 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
prim,
start,
count,
- hwtnl->api_pv,
+ api_pv,
hwtnl->hw_pv,
&gen_prim, &gen_size, &gen_nr, &gen_func);
}
From 99effaa9658a34eb07255bb1964569c8a86e8dda Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Fri, 16 Oct 2015 16:14:46 -0600
Subject: [PATCH 031/266] svga: try to avoid index generation for some
primitive types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The svga device doesn't directly support quads, quad strips or polygons
so we have to convert those types to indexed triangle lists. But we
can sometimes avoid that if we're drawing flat/constant-colored prims
and we don't have to worry about provoking vertex.
Reviewed-by: Charmaine Lee
Reviewed-by: José Fonseca
---
src/gallium/drivers/svga/svga_draw_arrays.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
index e4d776443c5..caf4b17de16 100644
--- a/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -218,6 +218,20 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
* the hardware wants.
*/
api_pv = hwtnl->hw_pv;
+
+ if (hwtnl->api_fillmode == PIPE_POLYGON_MODE_FILL) {
+ /* Do some simple primitive conversions to avoid index buffer
+ * generation below. Note that polygons and quads are not directly
+ * supported by the svga device. Also note, we can only do this
+ * for flat/constant-colored rendering because of provoking vertex.
+ */
+ if (prim == PIPE_PRIM_POLYGON) {
+ prim = PIPE_PRIM_TRIANGLE_FAN;
+ }
+ else if (prim == PIPE_PRIM_QUADS && count == 4) {
+ prim = PIPE_PRIM_TRIANGLE_FAN;
+ }
+ }
}
if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL &&
From 231d53923918f4a885e74da0a063dcbf1a4353e3 Mon Sep 17 00:00:00 2001
From: Sinclair Yeh
Date: Tue, 13 Oct 2015 12:58:26 -0700
Subject: [PATCH 032/266] svga: Condition preemptive flush on draw emission
On ultra high resolution modes, the preemptive flush flag can be
set midway through command submission, a condition that cannot be
recovered from a flush-retry, causing rendering artifacts.
This patch prevents a preemtive_flush until a draw has been
emitted.
Signed-off-by: Sinclair Yeh
Reviewed-by: Thomas Hellstrom
Reviewed-by: Charmaine Lee
Reviewed-by: Brian Paul
---
src/gallium/drivers/svga/svga_cmd.c | 2 ++
src/gallium/drivers/svga/svga_cmd_vgpu10.c | 6 ++++++
src/gallium/drivers/svga/svga_winsys.h | 7 +++++++
src/gallium/winsys/svga/drm/vmw_context.c | 15 ++++++++++-----
4 files changed, 25 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
index d3cf52f08e2..0e1e332d6cb 100644
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -1016,6 +1016,8 @@ SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc,
*decls = declArray;
*ranges = rangeArray;
+ swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
+
return PIPE_OK;
}
diff --git a/src/gallium/drivers/svga/svga_cmd_vgpu10.c b/src/gallium/drivers/svga/svga_cmd_vgpu10.c
index 596ba953cd2..5c121089f91 100644
--- a/src/gallium/drivers/svga/svga_cmd_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_cmd_vgpu10.c
@@ -535,6 +535,7 @@ SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc,
SVGA3D_COPY_BASIC_2(vertexCount, startVertexLocation);
+ swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
swc->commit(swc);
return PIPE_OK;
}
@@ -550,6 +551,7 @@ SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc,
SVGA3D_COPY_BASIC_3(indexCount, startIndexLocation,
baseVertexLocation);
+ swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
swc->commit(swc);
return PIPE_OK;
}
@@ -566,6 +568,7 @@ SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc,
SVGA3D_COPY_BASIC_4(vertexCountPerInstance, instanceCount,
startVertexLocation, startInstanceLocation);
+ swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
swc->commit(swc);
return PIPE_OK;
}
@@ -584,6 +587,8 @@ SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc,
startIndexLocation, baseVertexLocation,
startInstanceLocation);
+
+ swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
swc->commit(swc);
return PIPE_OK;
}
@@ -593,6 +598,7 @@ SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc)
{
SVGA3D_CREATE_COMMAND(DrawAuto, DRAW_AUTO);
+ swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
swc->commit(swc);
return PIPE_OK;
}
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h
index c750603989f..3129e46ed06 100644
--- a/src/gallium/drivers/svga/svga_winsys.h
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -85,6 +85,8 @@ struct winsys_handle;
#define SVGA_QUERY_FLAG_SET (1 << 0)
#define SVGA_QUERY_FLAG_REF (1 << 1)
+#define SVGA_HINT_FLAG_DRAW_EMITTED (1 << 0)
+
/** Opaque surface handle */
struct svga_winsys_surface;
@@ -212,6 +214,11 @@ struct svga_winsys_context
*/
uint32 cid;
+ /**
+ * Flags to hint the current context state
+ */
+ uint32 hints;
+
/**
** BEGIN new functions for guest-backed surfaces.
**/
diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c
index 1675af4cbc8..4dc32366d61 100644
--- a/src/gallium/winsys/svga/drm/vmw_context.c
+++ b/src/gallium/winsys/svga/drm/vmw_context.c
@@ -251,6 +251,7 @@ vmw_swc_flush(struct svga_winsys_context *swc,
vswc->must_flush = FALSE;
debug_flush_flush(vswc->fctx);
#endif
+ swc->hints &= ~SVGA_HINT_FLAG_DRAW_EMITTED;
vswc->preemptive_flush = FALSE;
vswc->seen_surfaces = 0;
vswc->seen_regions = 0;
@@ -372,7 +373,8 @@ vmw_swc_region_relocation(struct svga_winsys_context *swc,
if (vmw_swc_add_validate_buffer(vswc, reloc->buffer, flags)) {
vswc->seen_regions += reloc->buffer->size;
- if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/5)
+ if ((swc->hints & SVGA_HINT_FLAG_DRAW_EMITTED) &&
+ vswc->seen_regions >= VMW_GMR_POOL_SIZE/5)
vswc->preemptive_flush = TRUE;
}
@@ -413,8 +415,10 @@ vmw_swc_mob_relocation(struct svga_winsys_context *swc,
if (vmw_swc_add_validate_buffer(vswc, pb_buffer, flags)) {
vswc->seen_mobs += pb_buffer->size;
- /* divide by 5, tested for best performance */
- if (vswc->seen_mobs >= vswc->vws->ioctl.max_mob_memory / VMW_MAX_MOB_MEM_FACTOR)
+
+ if ((swc->hints & SVGA_HINT_FLAG_DRAW_EMITTED) &&
+ vswc->seen_mobs >=
+ vswc->vws->ioctl.max_mob_memory / VMW_MAX_MOB_MEM_FACTOR)
vswc->preemptive_flush = TRUE;
}
@@ -475,8 +479,9 @@ vmw_swc_surface_only_relocation(struct svga_winsys_context *swc,
++vswc->surface.staged;
vswc->seen_surfaces += vsurf->size;
- /* divide by 5 not well tuned for performance */
- if (vswc->seen_surfaces >= vswc->vws->ioctl.max_surface_memory / VMW_MAX_SURF_MEM_FACTOR)
+ if ((swc->hints & SVGA_HINT_FLAG_DRAW_EMITTED) &&
+ vswc->seen_surfaces >=
+ vswc->vws->ioctl.max_surface_memory / VMW_MAX_SURF_MEM_FACTOR)
vswc->preemptive_flush = TRUE;
}
From 041081dc219dcf7f61748284033c6fd7627acc69 Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Mon, 16 Sep 2013 10:13:55 +1000
Subject: [PATCH 033/266] tgsi: add option to dump floats as hex values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This adds support to the parser to accept hex values as floats,
and then adds support to the dumper to allow the user to select
to dump float as 32-bit hex numbers.
This is required to get accurate values for virgl use of TGSI.
Reviewed-by: Marek Olšák
Signed-off-by: Dave Airlie
---
src/gallium/auxiliary/tgsi/tgsi_dump.c | 19 ++++++++++++++++++-
src/gallium/auxiliary/tgsi/tgsi_dump.h | 2 ++
src/gallium/auxiliary/tgsi/tgsi_text.c | 11 ++++++++++-
3 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 5d80cca5b0e..34707392773 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -29,6 +29,7 @@
#include "util/u_string.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_math.h"
#include "tgsi_dump.h"
#include "tgsi_info.h"
#include "tgsi_iterate.h"
@@ -43,6 +44,8 @@ struct dump_ctx
{
struct tgsi_iterate_context iter;
+ boolean dump_float_as_hex;
+
uint instno;
uint immno;
int indent;
@@ -88,6 +91,7 @@ dump_enum(
#define SID(I) ctx->dump_printf( ctx, "%d", I )
#define FLT(F) ctx->dump_printf( ctx, "%10.4f", F )
#define DBL(D) ctx->dump_printf( ctx, "%10.8f", D )
+#define HFLT(F) ctx->dump_printf( ctx, "0x%08x", fui((F)) )
#define ENM(E,ENUMS) dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
const char *
@@ -251,7 +255,10 @@ dump_imm_data(struct tgsi_iterate_context *iter,
break;
}
case TGSI_IMM_FLOAT32:
- FLT( data[i].Float );
+ if (ctx->dump_float_as_hex)
+ HFLT( data[i].Float );
+ else
+ FLT( data[i].Float );
break;
case TGSI_IMM_UINT32:
UID(data[i].Uint);
@@ -682,6 +689,11 @@ tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file)
ctx.indentation = 0;
ctx.file = file;
+ if (flags & TGSI_DUMP_FLOAT_AS_HEX)
+ ctx.dump_float_as_hex = TRUE;
+ else
+ ctx.dump_float_as_hex = FALSE;
+
tgsi_iterate_shader( tokens, &ctx.iter );
}
@@ -750,6 +762,11 @@ tgsi_dump_str(
ctx.ptr = str;
ctx.left = (int)size;
+ if (flags & TGSI_DUMP_FLOAT_AS_HEX)
+ ctx.base.dump_float_as_hex = TRUE;
+ else
+ ctx.base.dump_float_as_hex = FALSE;
+
tgsi_iterate_shader( tokens, &ctx.base.iter );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index 7c8f92ee7bc..6666b983e9c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -38,6 +38,8 @@
extern "C" {
#endif
+#define TGSI_DUMP_FLOAT_AS_HEX (1 << 0)
+
void
tgsi_dump_str(
const struct tgsi_token *tokens,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 3e3ed5b19d1..4a82c9b3552 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -195,8 +195,15 @@ static boolean parse_float( const char **pcur, float *val )
boolean integral_part = FALSE;
boolean fractional_part = FALSE;
- *val = (float) atof( cur );
+ if (*cur == '0' && *(cur + 1) == 'x') {
+ union fi fi;
+ fi.ui = strtoul(cur, NULL, 16);
+ *val = fi.f;
+ cur += 10;
+ goto out;
+ }
+ *val = (float) atof( cur );
if (*cur == '-' || *cur == '+')
cur++;
if (is_digit( cur )) {
@@ -228,6 +235,8 @@ static boolean parse_float( const char **pcur, float *val )
else
return FALSE;
}
+
+out:
*pcur = cur;
return TRUE;
}
From 531f5d1270782927911c5c720201e86c6f40182d Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Thu, 22 Jan 2015 15:18:05 +1000
Subject: [PATCH 034/266] tgsi: try and handle overflowing shaders. (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is used to detect error in virgl if we overflow the shader
dumping buffers.
v2: return a bool.
Reviewed-by: Marek Olšák
Signed-off-by: Dave Airlie
---
src/gallium/auxiliary/tgsi/tgsi_dump.c | 10 ++++++++--
src/gallium/auxiliary/tgsi/tgsi_dump.h | 2 +-
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 34707392773..e29ffb39894 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -709,6 +709,7 @@ struct str_dump_ctx
char *str;
char *ptr;
int left;
+ bool nospace;
};
static void
@@ -731,10 +732,11 @@ str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
sctx->ptr += written;
sctx->left -= written;
}
- }
+ } else
+ sctx->nospace = true;
}
-void
+bool
tgsi_dump_str(
const struct tgsi_token *tokens,
uint flags,
@@ -761,6 +763,7 @@ tgsi_dump_str(
ctx.str[0] = 0;
ctx.ptr = str;
ctx.left = (int)size;
+ ctx.nospace = false;
if (flags & TGSI_DUMP_FLOAT_AS_HEX)
ctx.base.dump_float_as_hex = TRUE;
@@ -768,6 +771,8 @@ tgsi_dump_str(
ctx.base.dump_float_as_hex = FALSE;
tgsi_iterate_shader( tokens, &ctx.base.iter );
+
+ return !ctx.nospace;
}
void
@@ -790,6 +795,7 @@ tgsi_dump_instruction_str(
ctx.str[0] = 0;
ctx.ptr = str;
ctx.left = (int)size;
+ ctx.nospace = false;
iter_instruction( &ctx.base.iter, (struct tgsi_full_instruction *)inst );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index 6666b983e9c..c3722d333d7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -40,7 +40,7 @@ extern "C" {
#define TGSI_DUMP_FLOAT_AS_HEX (1 << 0)
-void
+bool
tgsi_dump_str(
const struct tgsi_token *tokens,
uint flags,
From a8987b88ff1db4ac00720a9b56c4bc3aeb666537 Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Thu, 22 Jan 2015 15:11:47 +1000
Subject: [PATCH 035/266] virgl: add driver for virtio-gpu 3D (v2)
virgl is the 3D acceleration backend for the
virtio-gpu shipping with qemu.
The 3D acceleration is designed around gallium
and TGSI as the virtualisation layer. The backend
renderer translates the virgl interface into
OpenGL currently.
This is the initial import of the driver to mesa.
The kernel driver portions are lined up for drm-next.
Currently this driver supports up to GL3.3 and some
misc extensions if the host driver exposes it. It is
planned to iterate the virgl API to new GL levels
as mesa host drivers gain features.
v2: fix resource tracking across flushes to avoid
->bind hack in mapping.
consolidate mapping and waiting code for transfers.
use u_range for dirt tracking.
handle larger shaders in protocol.
include virtgpu_drm.h in mesa for now.
add translation layer for gallium tgsi to virgl tgsi.
Signed-off-by: Dave Airlie
---
configure.ac | 11 +-
src/gallium/Makefile.am | 5 +
.../target-helpers/inline_drm_helper.h | 42 +
src/gallium/drivers/virgl/Automake.inc | 10 +
src/gallium/drivers/virgl/Makefile.am | 43 +
src/gallium/drivers/virgl/virgl.h | 51 +
src/gallium/drivers/virgl/virgl_buffer.c | 170 ++++
src/gallium/drivers/virgl/virgl_context.c | 962 ++++++++++++++++++
src/gallium/drivers/virgl/virgl_context.h | 100 ++
src/gallium/drivers/virgl/virgl_encode.c | 863 ++++++++++++++++
src/gallium/drivers/virgl/virgl_encode.h | 232 +++++
src/gallium/drivers/virgl/virgl_protocol.h | 468 +++++++++
src/gallium/drivers/virgl/virgl_public.h | 32 +
src/gallium/drivers/virgl/virgl_query.c | 169 +++
src/gallium/drivers/virgl/virgl_resource.c | 89 ++
src/gallium/drivers/virgl/virgl_resource.h | 134 +++
src/gallium/drivers/virgl/virgl_screen.c | 557 ++++++++++
src/gallium/drivers/virgl/virgl_streamout.c | 87 ++
src/gallium/drivers/virgl/virgl_texture.c | 349 +++++++
src/gallium/drivers/virgl/virgl_tgsi.c | 66 ++
src/gallium/drivers/virgl/virgl_winsys.h | 110 ++
src/gallium/targets/dri/Makefile.am | 2 +
src/gallium/winsys/virgl/drm/Makefile.am | 34 +
src/gallium/winsys/virgl/drm/Makefile.sources | 2 +
.../winsys/virgl/drm/virgl_drm_public.h | 30 +
.../winsys/virgl/drm/virgl_drm_winsys.c | 764 ++++++++++++++
.../winsys/virgl/drm/virgl_drm_winsys.h | 98 ++
src/gallium/winsys/virgl/drm/virgl_hw.h | 286 ++++++
src/gallium/winsys/virgl/drm/virtgpu_drm.h | 163 +++
29 files changed, 5928 insertions(+), 1 deletion(-)
create mode 100644 src/gallium/drivers/virgl/Automake.inc
create mode 100644 src/gallium/drivers/virgl/Makefile.am
create mode 100644 src/gallium/drivers/virgl/virgl.h
create mode 100644 src/gallium/drivers/virgl/virgl_buffer.c
create mode 100644 src/gallium/drivers/virgl/virgl_context.c
create mode 100644 src/gallium/drivers/virgl/virgl_context.h
create mode 100644 src/gallium/drivers/virgl/virgl_encode.c
create mode 100644 src/gallium/drivers/virgl/virgl_encode.h
create mode 100644 src/gallium/drivers/virgl/virgl_protocol.h
create mode 100644 src/gallium/drivers/virgl/virgl_public.h
create mode 100644 src/gallium/drivers/virgl/virgl_query.c
create mode 100644 src/gallium/drivers/virgl/virgl_resource.c
create mode 100644 src/gallium/drivers/virgl/virgl_resource.h
create mode 100644 src/gallium/drivers/virgl/virgl_screen.c
create mode 100644 src/gallium/drivers/virgl/virgl_streamout.c
create mode 100644 src/gallium/drivers/virgl/virgl_texture.c
create mode 100644 src/gallium/drivers/virgl/virgl_tgsi.c
create mode 100644 src/gallium/drivers/virgl/virgl_winsys.h
create mode 100644 src/gallium/winsys/virgl/drm/Makefile.am
create mode 100644 src/gallium/winsys/virgl/drm/Makefile.sources
create mode 100644 src/gallium/winsys/virgl/drm/virgl_drm_public.h
create mode 100644 src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
create mode 100644 src/gallium/winsys/virgl/drm/virgl_drm_winsys.h
create mode 100644 src/gallium/winsys/virgl/drm/virgl_hw.h
create mode 100644 src/gallium/winsys/virgl/drm/virtgpu_drm.h
diff --git a/configure.ac b/configure.ac
index d3df1955d26..4ef09d2659b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -866,7 +866,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
AC_ARG_WITH([gallium-drivers],
[AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
[comma delimited Gallium drivers list, e.g.
- "i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4"
+ "i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl"
@<:@default=r300,r600,svga,swrast@:>@])],
[with_gallium_drivers="$withval"],
[with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@@ -2185,6 +2185,12 @@ if test -n "$with_gallium_drivers"; then
PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
[USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
;;
+ xvirgl)
+ HAVE_GALLIUM_VIRGL=yes
+ gallium_require_drm "virgl"
+ gallium_require_drm_loader
+ require_egl_drm "virgl"
+ ;;
*)
AC_MSG_ERROR([Unknown Gallium driver: $driver])
;;
@@ -2256,6 +2262,7 @@ AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_SOFTPIPE, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_LLVMPIPE, test "x$HAVE_GALLIUM_LLVMPIPE" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_VC4, test "x$HAVE_GALLIUM_VC4" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_VIRGL, test "x$HAVE_GALLIUM_VIRGL" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers" = xno)
@@ -2376,6 +2383,7 @@ AC_CONFIG_FILES([Makefile
src/gallium/drivers/svga/Makefile
src/gallium/drivers/trace/Makefile
src/gallium/drivers/vc4/Makefile
+ src/gallium/drivers/virgl/Makefile
src/gallium/state_trackers/clover/Makefile
src/gallium/state_trackers/dri/Makefile
src/gallium/state_trackers/glx/xlib/Makefile
@@ -2416,6 +2424,7 @@ AC_CONFIG_FILES([Makefile
src/gallium/winsys/sw/wrapper/Makefile
src/gallium/winsys/sw/xlib/Makefile
src/gallium/winsys/vc4/drm/Makefile
+ src/gallium/winsys/virgl/drm/Makefile
src/gbm/Makefile
src/gbm/main/gbm.pc
src/glsl/Makefile
diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
index a7c3606de0a..2fa93ddfe5d 100644
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -82,6 +82,11 @@ if HAVE_GALLIUM_VC4
SUBDIRS += drivers/vc4 winsys/vc4/drm
endif
+## virgl
+if HAVE_GALLIUM_VIRGL
+SUBDIRS += drivers/virgl winsys/virgl/drm
+endif
+
## the sw winsys'
SUBDIRS += winsys/sw/null
diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
index 08271a760f5..6ca4dc8136c 100644
--- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
@@ -59,6 +59,11 @@
#include "vc4/drm/vc4_drm_public.h"
#endif
+#if GALLIUM_VIRGL
+#include "virgl/drm/virgl_drm_public.h"
+#include "virgl/virgl_public.h"
+#endif
+
static char* driver_name = NULL;
/* XXX: We need to teardown the winsys if *screen_create() fails. */
@@ -296,6 +301,33 @@ pipe_freedreno_create_screen(int fd)
}
#endif
+#if defined(GALLIUM_VIRGL)
+#if defined(DRI_TARGET)
+
+const __DRIextension **__driDriverGetExtensions_virtio_gpu(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_virtio_gpu(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+#endif
+
+static struct pipe_screen *
+pipe_virgl_create_screen(int fd)
+{
+ struct virgl_winsys *vws;
+ struct pipe_screen *screen;
+
+ vws = virgl_drm_winsys_create(fd);
+ if (!vws)
+ return NULL;
+
+ screen = virgl_create_screen(vws);
+ return screen ? debug_screen_wrap(screen) : NULL;
+}
+#endif
+
#if defined(GALLIUM_VC4)
#if defined(DRI_TARGET)
@@ -385,6 +417,11 @@ dd_create_screen(int fd)
return pipe_freedreno_create_screen(fd);
else
#endif
+#if defined(GALLIUM_VIRGL)
+ if ((strcmp(driver_name, "virtio_gpu") == 0))
+ return pipe_virgl_create_screen(fd);
+ else
+#endif
#if defined(GALLIUM_VC4)
if (strcmp(driver_name, "vc4") == 0)
return pipe_vc4_create_screen(fd);
@@ -474,6 +511,11 @@ dd_configuration(enum drm_conf conf)
return configuration_query(conf);
else
#endif
+#if defined(GALLIUM_VIRGL)
+ if ((strcmp(driver_name, "virtio_gpu") == 0))
+ return configuration_query(conf);
+ else
+#endif
#if defined(GALLIUM_VC4)
if (strcmp(driver_name, "vc4") == 0)
return configuration_query(conf);
diff --git a/src/gallium/drivers/virgl/Automake.inc b/src/gallium/drivers/virgl/Automake.inc
new file mode 100644
index 00000000000..457ca77b3a7
--- /dev/null
+++ b/src/gallium/drivers/virgl/Automake.inc
@@ -0,0 +1,10 @@
+if HAVE_GALLIUM_VIRGL
+
+TARGET_DRIVERS += virtio_gpu
+TARGET_CPPFLAGS += -DGALLIUM_VIRGL
+TARGET_LIB_DEPS += \
+ $(top_builddir)/src/gallium/drivers/virgl/libvirgl.la \
+ $(top_builddir)/src/gallium/winsys/virgl/drm/libvirgldrm.la
+ $(LIBDRM_LIBS)
+
+endif
diff --git a/src/gallium/drivers/virgl/Makefile.am b/src/gallium/drivers/virgl/Makefile.am
new file mode 100644
index 00000000000..90a18ec3ffe
--- /dev/null
+++ b/src/gallium/drivers/virgl/Makefile.am
@@ -0,0 +1,43 @@
+# Copyright © 2014, 2015 Red Hat.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CPPFLAGS = \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys/virgl/drm \
+ -I$(top_srcdir)/include \
+ $(GALLIUM_CFLAGS) \
+ $(LIBDRM_CFLAGS)
+
+noinst_LTLIBRARIES = libvirgl.la
+
+libvirgl_la_SOURCES = \
+ virgl_screen.c \
+ virgl_resource.c \
+ virgl_buffer.c \
+ virgl_texture.c \
+ virgl_context.c \
+ virgl_encode.c \
+ virgl_query.c \
+ virgl_streamout.c \
+ virgl_tgsi.c
diff --git a/src/gallium/drivers/virgl/virgl.h b/src/gallium/drivers/virgl/virgl.h
new file mode 100644
index 00000000000..21e3bc0ea36
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_H
+#define VIRGL_H
+
+#include "util/u_transfer.h"
+
+#include "../winsys/virgl/drm/virgl_hw.h"
+
+#include "virgl_winsys.h"
+#include "pipe/p_screen.h"
+struct virgl_screen {
+ struct pipe_screen base;
+ struct sw_winsys *winsys;
+ struct virgl_winsys *vws;
+
+ struct virgl_drm_caps caps;
+
+ uint32_t sub_ctx_id;
+};
+
+
+static inline struct virgl_screen *
+virgl_screen( struct pipe_screen *pipe )
+{
+ return (struct virgl_screen *)pipe;
+}
+
+#define VIRGL_MAP_BUFFER_ALIGNMENT 64
+
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_buffer.c b/src/gallium/drivers/virgl/virgl_buffer.c
new file mode 100644
index 00000000000..93fb29598a9
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_buffer.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_memory.h"
+#include "virgl_context.h"
+#include "virgl_resource.h"
+
+static void virgl_buffer_destroy(struct pipe_screen *screen,
+ struct pipe_resource *buf)
+{
+ struct virgl_screen *vs = virgl_screen(screen);
+ struct virgl_buffer *vbuf = virgl_buffer(buf);
+
+ util_range_destroy(&vbuf->valid_buffer_range);
+ vs->vws->resource_unref(vs->vws, vbuf->base.hw_res);
+ FREE(vbuf);
+}
+
+static void *virgl_buffer_transfer_map(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **transfer)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_screen *vs = virgl_screen(ctx->screen);
+ struct virgl_buffer *vbuf = virgl_buffer(resource);
+ struct virgl_transfer *trans;
+ void *ptr;
+ bool readback;
+ uint32_t offset;
+ bool doflushwait = false;
+
+ if ((usage & PIPE_TRANSFER_READ) && (vbuf->on_list == TRUE))
+ doflushwait = true;
+ else
+ doflushwait = virgl_res_needs_flush_wait(vctx, &vbuf->base, usage);
+
+ if (doflushwait)
+ ctx->flush(ctx, NULL, 0);
+
+ trans = util_slab_alloc(&vctx->texture_transfer_pool);
+ if (trans == NULL)
+ return NULL;
+
+ trans->base.resource = resource;
+ trans->base.level = level;
+ trans->base.usage = usage;
+ trans->base.box = *box;
+ trans->base.stride = 0;
+ trans->base.layer_stride = 0;
+
+ offset = box->x;
+
+ readback = virgl_res_needs_readback(vctx, &vbuf->base, usage);
+ if (readback)
+ vs->vws->transfer_get(vs->vws, vbuf->base.hw_res, box, trans->base.stride, trans->base.layer_stride, offset, level);
+
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED))
+ doflushwait = true;
+
+ if (doflushwait || readback)
+ vs->vws->resource_wait(vs->vws, vbuf->base.hw_res);
+
+ ptr = vs->vws->resource_map(vs->vws, vbuf->base.hw_res);
+ if (!ptr) {
+ return NULL;
+ }
+
+ trans->offset = offset;
+ *transfer = &trans->base;
+
+ return ptr + trans->offset;
+}
+
+static void virgl_buffer_transfer_unmap(struct pipe_context *ctx,
+ struct pipe_transfer *transfer)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_transfer *trans = (struct virgl_transfer *)transfer;
+ struct virgl_buffer *vbuf = virgl_buffer(transfer->resource);
+
+ if (trans->base.usage & PIPE_TRANSFER_WRITE) {
+ if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+ struct virgl_screen *vs = virgl_screen(ctx->screen);
+ vbuf->base.clean = FALSE;
+ vctx->num_transfers++;
+ vs->vws->transfer_put(vs->vws, vbuf->base.hw_res,
+ &transfer->box, trans->base.stride, trans->base.layer_stride, trans->offset, transfer->level);
+
+ }
+ }
+
+ util_slab_free(&vctx->texture_transfer_pool, trans);
+}
+
+static void virgl_buffer_transfer_flush_region(struct pipe_context *ctx,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_buffer *vbuf = virgl_buffer(transfer->resource);
+
+ if (!vbuf->on_list) {
+ struct pipe_resource *res = NULL;
+
+ list_addtail(&vbuf->flush_list, &vctx->to_flush_bufs);
+ vbuf->on_list = TRUE;
+ pipe_resource_reference(&res, &vbuf->base.u.b);
+ }
+
+ util_range_add(&vbuf->valid_buffer_range, transfer->box.x + box->x,
+ transfer->box.x + box->x + box->width);
+
+ vbuf->base.clean = FALSE;
+}
+
+static const struct u_resource_vtbl virgl_buffer_vtbl =
+{
+ u_default_resource_get_handle, /* get_handle */
+ virgl_buffer_destroy, /* resource_destroy */
+ virgl_buffer_transfer_map, /* transfer_map */
+ virgl_buffer_transfer_flush_region, /* transfer_flush_region */
+ virgl_buffer_transfer_unmap, /* transfer_unmap */
+ virgl_transfer_inline_write /* transfer_inline_write */
+};
+
+struct pipe_resource *virgl_buffer_create(struct virgl_screen *vs,
+ const struct pipe_resource *template)
+{
+ struct virgl_buffer *buf;
+ uint32_t size;
+ uint32_t vbind;
+ buf = CALLOC_STRUCT(virgl_buffer);
+ buf->base.clean = TRUE;
+ buf->base.u.b = *template;
+ buf->base.u.b.screen = &vs->base;
+ buf->base.u.vtbl = &virgl_buffer_vtbl;
+ pipe_reference_init(&buf->base.u.b.reference, 1);
+ util_range_init(&buf->valid_buffer_range);
+
+ vbind = pipe_to_virgl_bind(template->bind);
+ size = template->width0;
+
+ buf->base.hw_res = vs->vws->resource_create(vs->vws, template->target, template->format, vbind, template->width0, 1, 1, 1, 0, 0, size);
+
+ util_range_set_empty(&buf->valid_buffer_range);
+ return &buf->base.u.b;
+}
diff --git a/src/gallium/drivers/virgl/virgl_context.c b/src/gallium/drivers/virgl/virgl_context.c
new file mode 100644
index 00000000000..37a631a2b26
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_context.c
@@ -0,0 +1,962 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_shader_tokens.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_transfer.h"
+#include "util/u_helpers.h"
+#include "util/u_slab.h"
+#include "util/u_upload_mgr.h"
+#include "util/u_blitter.h"
+#include "tgsi/tgsi_text.h"
+
+#include "pipebuffer/pb_buffer.h"
+#include "state_tracker/graw.h"
+#include "state_tracker/drm_driver.h"
+
+#include "virgl_encode.h"
+
+#include "virgl_context.h"
+
+#include "virgl_resource.h"
+#include "virgl.h"
+#include "state_tracker/sw_winsys.h"
+ struct pipe_screen encscreen;
+
+static uint32_t next_handle;
+uint32_t virgl_object_assign_handle(void)
+{
+ return ++next_handle;
+}
+
+static void virgl_buffer_flush(struct virgl_context *vctx,
+ struct virgl_buffer *vbuf)
+{
+ struct virgl_screen *rs = virgl_screen(vctx->base.screen);
+ struct pipe_box box;
+
+ assert(vbuf->on_list);
+
+ box.height = 1;
+ box.depth = 1;
+ box.y = 0;
+ box.z = 0;
+
+ box.x = vbuf->valid_buffer_range.start;
+ box.width = MIN2(vbuf->valid_buffer_range.end - vbuf->valid_buffer_range.start, vbuf->base.u.b.width0);
+
+ vctx->num_transfers++;
+ rs->vws->transfer_put(rs->vws, vbuf->base.hw_res,
+ &box, 0, 0, box.x, 0);
+
+ util_range_set_empty(&vbuf->valid_buffer_range);
+}
+
+static void virgl_attach_res_framebuffer(struct virgl_context *vctx)
+{
+ struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+ struct pipe_surface *surf;
+ struct virgl_resource *res;
+ unsigned i;
+
+ surf = vctx->framebuffer.zsbuf;
+ if (surf) {
+ res = (struct virgl_resource *)surf->texture;
+ if (res)
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+ }
+ for (i = 0; i < vctx->framebuffer.nr_cbufs; i++) {
+ surf = vctx->framebuffer.cbufs[i];
+ if (surf) {
+ res = (struct virgl_resource *)surf->texture;
+ if (res)
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+ }
+ }
+}
+
+static void virgl_attach_res_sampler_views(struct virgl_context *vctx,
+ unsigned shader_type)
+{
+ struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+ struct virgl_textures_info *tinfo = &vctx->samplers[shader_type];
+ struct virgl_resource *res;
+ uint32_t remaining_mask = tinfo->enabled_mask;
+ unsigned i;
+ while (remaining_mask) {
+ i = u_bit_scan(&remaining_mask);
+ assert(tinfo->views[i]);
+
+ res = (struct virgl_resource *)tinfo->views[i]->base.texture;
+ if (res)
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+ }
+}
+
+static void virgl_attach_res_vertex_buffers(struct virgl_context *vctx)
+{
+ struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+ struct virgl_resource *res;
+ unsigned i;
+
+ for (i = 0; i < vctx->num_vertex_buffers; i++) {
+ res = (struct virgl_resource *)vctx->vertex_buffer[i].buffer;
+ if (res)
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+ }
+}
+
+static void virgl_attach_res_index_buffer(struct virgl_context *vctx)
+{
+ struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+ struct virgl_resource *res;
+
+ res = (struct virgl_resource *)vctx->index_buffer.buffer;
+ if (res)
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+}
+
+static void virgl_attach_res_so_targets(struct virgl_context *vctx)
+{
+ struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+ struct virgl_resource *res;
+ unsigned i;
+
+ for (i = 0; i < vctx->num_so_targets; i++) {
+ res = (struct virgl_resource *)vctx->so_targets[i].base.buffer;
+ if (res)
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+ }
+}
+
+static void virgl_attach_res_uniform_buffers(struct virgl_context *vctx,
+ unsigned shader_type)
+{
+ struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+ struct virgl_resource *res;
+ unsigned i;
+ for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
+ res = (struct virgl_resource *)vctx->ubos[shader_type][i];
+ if (res) {
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+ }
+ }
+}
+
+/*
+ * after flushing, the hw context still has a bunch of
+ * resources bound, so we need to rebind those here.
+ */
+static void virgl_reemit_res(struct virgl_context *vctx)
+{
+ unsigned shader_type;
+
+ /* reattach any flushed resources */
+ /* framebuffer, sampler views, vertex/index/uniform/stream buffers */
+ virgl_attach_res_framebuffer(vctx);
+
+ for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
+ virgl_attach_res_sampler_views(vctx, shader_type);
+ virgl_attach_res_uniform_buffers(vctx, shader_type);
+ }
+ virgl_attach_res_index_buffer(vctx);
+ virgl_attach_res_vertex_buffers(vctx);
+ virgl_attach_res_so_targets(vctx);
+}
+
+static struct pipe_surface *virgl_create_surface(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ const struct pipe_surface *templ)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_surface *surf;
+ struct virgl_resource *res = virgl_resource(resource);
+ uint32_t handle;
+
+ surf = CALLOC_STRUCT(virgl_surface);
+ if (surf == NULL)
+ return NULL;
+
+ res->clean = FALSE;
+ handle = virgl_object_assign_handle();
+ pipe_reference_init(&surf->base.reference, 1);
+ pipe_resource_reference(&surf->base.texture, resource);
+ surf->base.context = ctx;
+ surf->base.format = templ->format;
+ if (resource->target != PIPE_BUFFER) {
+ surf->base.width = u_minify(resource->width0, templ->u.tex.level);
+ surf->base.height = u_minify(resource->height0, templ->u.tex.level);
+ surf->base.u.tex.level = templ->u.tex.level;
+ surf->base.u.tex.first_layer = templ->u.tex.first_layer;
+ surf->base.u.tex.last_layer = templ->u.tex.last_layer;
+ } else {
+ surf->base.width = templ->u.buf.last_element - templ->u.buf.first_element + 1;
+ surf->base.height = resource->height0;
+ surf->base.u.buf.first_element = templ->u.buf.first_element;
+ surf->base.u.buf.last_element = templ->u.buf.last_element;
+ }
+ virgl_encoder_create_surface(vctx, handle, res, &surf->base);
+ surf->handle = handle;
+ return &surf->base;
+}
+
+static void virgl_surface_destroy(struct pipe_context *ctx,
+ struct pipe_surface *psurf)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_surface *surf = (struct virgl_surface *)psurf;
+
+ pipe_resource_reference(&surf->base.texture, NULL);
+ virgl_encode_delete_object(vctx, surf->handle, VIRGL_OBJECT_SURFACE);
+ FREE(surf);
+}
+
+static void *virgl_create_blend_state(struct pipe_context *ctx,
+ const struct pipe_blend_state *blend_state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle;
+ handle = virgl_object_assign_handle();
+
+ virgl_encode_blend_state(vctx, handle, blend_state);
+ return (void *)(unsigned long)handle;
+
+}
+
+static void virgl_bind_blend_state(struct pipe_context *ctx,
+ void *blend_state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle = (unsigned long)blend_state;
+ virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_BLEND);
+}
+
+static void virgl_delete_blend_state(struct pipe_context *ctx,
+ void *blend_state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle = (unsigned long)blend_state;
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_BLEND);
+}
+
+static void *virgl_create_depth_stencil_alpha_state(struct pipe_context *ctx,
+ const struct pipe_depth_stencil_alpha_state *blend_state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle;
+ handle = virgl_object_assign_handle();
+
+ virgl_encode_dsa_state(vctx, handle, blend_state);
+ return (void *)(unsigned long)handle;
+}
+
+static void virgl_bind_depth_stencil_alpha_state(struct pipe_context *ctx,
+ void *blend_state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle = (unsigned long)blend_state;
+ virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_DSA);
+}
+
+static void virgl_delete_depth_stencil_alpha_state(struct pipe_context *ctx,
+ void *dsa_state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle = (unsigned long)dsa_state;
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_DSA);
+}
+
+static void *virgl_create_rasterizer_state(struct pipe_context *ctx,
+ const struct pipe_rasterizer_state *rs_state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle;
+ handle = virgl_object_assign_handle();
+
+ virgl_encode_rasterizer_state(vctx, handle, rs_state);
+ return (void *)(unsigned long)handle;
+}
+
+static void virgl_bind_rasterizer_state(struct pipe_context *ctx,
+ void *rs_state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle = (unsigned long)rs_state;
+
+ virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_RASTERIZER);
+}
+
+static void virgl_delete_rasterizer_state(struct pipe_context *ctx,
+ void *rs_state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle = (unsigned long)rs_state;
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_RASTERIZER);
+}
+
+static void virgl_set_framebuffer_state(struct pipe_context *ctx,
+ const struct pipe_framebuffer_state *state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+
+ vctx->framebuffer = *state;
+ virgl_encoder_set_framebuffer_state(vctx, state);
+ virgl_attach_res_framebuffer(vctx);
+}
+
+static void virgl_set_viewport_states(struct pipe_context *ctx,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ virgl_encoder_set_viewport_states(vctx, start_slot, num_viewports, state);
+}
+
+static void *virgl_create_vertex_elements_state(struct pipe_context *ctx,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle = virgl_object_assign_handle();
+ virgl_encoder_create_vertex_elements(vctx, handle,
+ num_elements, elements);
+ return (void*)(unsigned long)handle;
+
+}
+
+static void virgl_delete_vertex_elements_state(struct pipe_context *ctx,
+ void *ve)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle = (unsigned long)ve;
+
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS);
+}
+
+static void virgl_bind_vertex_elements_state(struct pipe_context *ctx,
+ void *ve)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle = (unsigned long)ve;
+ virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS);
+}
+
+static void virgl_set_vertex_buffers(struct pipe_context *ctx,
+ unsigned start_slot,
+ unsigned num_buffers,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+
+ util_set_vertex_buffers_count(vctx->vertex_buffer,
+ &vctx->num_vertex_buffers,
+ buffers, start_slot, num_buffers);
+
+ vctx->vertex_array_dirty = TRUE;
+}
+
+static void virgl_hw_set_vertex_buffers(struct pipe_context *ctx)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+
+ if (vctx->vertex_array_dirty) {
+ virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer);
+ virgl_attach_res_vertex_buffers(vctx);
+ }
+}
+
+static void virgl_set_stencil_ref(struct pipe_context *ctx,
+ const struct pipe_stencil_ref *ref)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ virgl_encoder_set_stencil_ref(vctx, ref);
+}
+
+static void virgl_set_blend_color(struct pipe_context *ctx,
+ const struct pipe_blend_color *color)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ virgl_encoder_set_blend_color(vctx, color);
+}
+
+static void virgl_set_index_buffer(struct pipe_context *ctx,
+ const struct pipe_index_buffer *ib)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+
+ if (ib) {
+ pipe_resource_reference(&vctx->index_buffer.buffer, ib->buffer);
+ memcpy(&vctx->index_buffer, ib, sizeof(*ib));
+ } else {
+ pipe_resource_reference(&vctx->index_buffer.buffer, NULL);
+ }
+}
+
+static void virgl_hw_set_index_buffer(struct pipe_context *ctx,
+ struct pipe_index_buffer *ib)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ virgl_encoder_set_index_buffer(vctx, ib);
+ virgl_attach_res_index_buffer(vctx);
+}
+
+static void virgl_set_constant_buffer(struct pipe_context *ctx,
+ uint shader, uint index,
+ struct pipe_constant_buffer *buf)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+
+ if (buf) {
+ if (!buf->user_buffer){
+ struct virgl_resource *res = (struct virgl_resource *)buf->buffer;
+ virgl_encoder_set_uniform_buffer(vctx, shader, index, buf->buffer_offset,
+ buf->buffer_size, res);
+ pipe_resource_reference(&vctx->ubos[shader][index], buf->buffer);
+ return;
+ }
+ pipe_resource_reference(&vctx->ubos[shader][index], NULL);
+ virgl_encoder_write_constant_buffer(vctx, shader, index, buf->buffer_size / 4, buf->user_buffer);
+ } else {
+ virgl_encoder_write_constant_buffer(vctx, shader, index, 0, NULL);
+ pipe_resource_reference(&vctx->ubos[shader][index], NULL);
+ }
+}
+
+void virgl_transfer_inline_write(struct pipe_context *ctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_screen *vs = virgl_screen(ctx->screen);
+ struct virgl_resource *grres = (struct virgl_resource *)res;
+ struct virgl_buffer *vbuf = virgl_buffer(res);
+
+ grres->clean = FALSE;
+
+ if (virgl_res_needs_flush_wait(vctx, &vbuf->base, usage)) {
+ ctx->flush(ctx, NULL, 0);
+
+ vs->vws->resource_wait(vs->vws, vbuf->base.hw_res);
+ }
+
+ virgl_encoder_inline_write(vctx, grres, level, usage,
+ box, data, stride, layer_stride);
+}
+
+static void *virgl_shader_encoder(struct pipe_context *ctx,
+ const struct pipe_shader_state *shader,
+ unsigned type)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle;
+ struct tgsi_token *new_tokens;
+ int ret;
+
+ new_tokens = virgl_tgsi_transform(shader->tokens);
+ if (!new_tokens)
+ return NULL;
+
+ handle = virgl_object_assign_handle();
+ /* encode VS state */
+ ret = virgl_encode_shader_state(vctx, handle, type,
+ &shader->stream_output,
+ new_tokens);
+ if (ret) {
+ return NULL;
+ }
+
+ FREE(new_tokens);
+ return (void *)(unsigned long)handle;
+
+}
+static void *virgl_create_vs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *shader)
+{
+ return virgl_shader_encoder(ctx, shader, PIPE_SHADER_VERTEX);
+}
+
+static void *virgl_create_gs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *shader)
+{
+ return virgl_shader_encoder(ctx, shader, PIPE_SHADER_GEOMETRY);
+}
+
+static void *virgl_create_fs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *shader)
+{
+ return virgl_shader_encoder(ctx, shader, PIPE_SHADER_FRAGMENT);
+}
+
+static void
+virgl_delete_fs_state(struct pipe_context *ctx,
+ void *fs)
+{
+ uint32_t handle = (unsigned long)fs;
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER);
+}
+
+static void
+virgl_delete_gs_state(struct pipe_context *ctx,
+ void *gs)
+{
+ uint32_t handle = (unsigned long)gs;
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER);
+}
+
+static void
+virgl_delete_vs_state(struct pipe_context *ctx,
+ void *vs)
+{
+ uint32_t handle = (unsigned long)vs;
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER);
+}
+
+static void virgl_bind_vs_state(struct pipe_context *ctx,
+ void *vss)
+{
+ uint32_t handle = (unsigned long)vss;
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+
+ virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_VERTEX);
+}
+
+static void virgl_bind_gs_state(struct pipe_context *ctx,
+ void *vss)
+{
+ uint32_t handle = (unsigned long)vss;
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+
+ virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_GEOMETRY);
+}
+
+
+static void virgl_bind_fs_state(struct pipe_context *ctx,
+ void *vss)
+{
+ uint32_t handle = (unsigned long)vss;
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+
+ virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_FRAGMENT);
+}
+
+static void virgl_clear(struct pipe_context *ctx,
+ unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+
+ virgl_encode_clear(vctx, buffers, color, depth, stencil);
+}
+
+static void virgl_draw_vbo(struct pipe_context *ctx,
+ const struct pipe_draw_info *dinfo)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_screen *rs = virgl_screen(ctx->screen);
+ struct pipe_index_buffer ib = {};
+ struct pipe_draw_info info = *dinfo;
+
+ if (!(rs->caps.caps.v1.prim_mask & (1 << dinfo->mode))) {
+ util_primconvert_save_index_buffer(vctx->primconvert, &vctx->index_buffer);
+ util_primconvert_draw_vbo(vctx->primconvert, dinfo);
+ return;
+ }
+ if (info.indexed) {
+ pipe_resource_reference(&ib.buffer, vctx->index_buffer.buffer);
+ ib.user_buffer = vctx->index_buffer.user_buffer;
+ ib.index_size = vctx->index_buffer.index_size;
+ ib.offset = vctx->index_buffer.offset + info.start * ib.index_size;
+
+ if (ib.user_buffer) {
+ u_upload_data(vctx->uploader, 0, info.count * ib.index_size,
+ ib.user_buffer, &ib.offset, &ib.buffer);
+ ib.user_buffer = NULL;
+ }
+ }
+
+ u_upload_unmap(vctx->uploader);
+
+ vctx->num_draws++;
+ virgl_hw_set_vertex_buffers(ctx);
+ if (info.indexed)
+ virgl_hw_set_index_buffer(ctx, &ib);
+
+ virgl_encoder_draw_vbo(vctx, &info);
+
+ pipe_resource_reference(&ib.buffer, NULL);
+
+}
+
+static void virgl_flush_eq(struct virgl_context *ctx, void *closure)
+{
+ struct virgl_screen *rs = virgl_screen(ctx->base.screen);
+
+ /* send the buffer to the remote side for decoding */
+ ctx->num_transfers = ctx->num_draws = 0;
+ rs->vws->submit_cmd(rs->vws, ctx->cbuf);
+
+ virgl_encoder_set_sub_ctx(ctx, ctx->hw_sub_ctx_id);
+
+ /* add back current framebuffer resources to reference list? */
+ virgl_reemit_res(ctx);
+}
+
+static void virgl_flush_from_st(struct pipe_context *ctx,
+ struct pipe_fence_handle **fence,
+ enum pipe_flush_flags flags)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_screen *rs = virgl_screen(ctx->screen);
+ struct virgl_buffer *buf, *tmp;
+
+ if (fence)
+ *fence = rs->vws->cs_create_fence(rs->vws);
+
+ LIST_FOR_EACH_ENTRY_SAFE(buf, tmp, &vctx->to_flush_bufs, flush_list) {
+ struct pipe_resource *res = &buf->base.u.b;
+ virgl_buffer_flush(vctx, buf);
+ list_del(&buf->flush_list);
+ buf->on_list = FALSE;
+ pipe_resource_reference(&res, NULL);
+
+ }
+ virgl_flush_eq(vctx, vctx);
+}
+
+static struct pipe_sampler_view *virgl_create_sampler_view(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_sampler_view *grview = CALLOC_STRUCT(virgl_sampler_view);
+ uint32_t handle;
+ struct virgl_resource *res;
+
+ if (state == NULL)
+ return NULL;
+
+ res = (struct virgl_resource *)texture;
+ handle = virgl_object_assign_handle();
+ virgl_encode_sampler_view(vctx, handle, res, state);
+
+ grview->base = *state;
+ grview->base.reference.count = 1;
+
+ grview->base.texture = NULL;
+ grview->base.context = ctx;
+ pipe_resource_reference(&grview->base.texture, texture);
+ grview->handle = handle;
+ return &grview->base;
+}
+
+static void virgl_set_sampler_views(struct pipe_context *ctx,
+ unsigned shader_type,
+ unsigned start_slot,
+ unsigned num_views,
+ struct pipe_sampler_view **views)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ int i;
+ uint32_t disable_mask = ~((1ull << num_views) - 1);
+ struct virgl_textures_info *tinfo = &vctx->samplers[shader_type];
+ uint32_t new_mask = 0;
+ uint32_t remaining_mask;
+
+ remaining_mask = tinfo->enabled_mask & disable_mask;
+
+ while (remaining_mask) {
+ i = u_bit_scan(&remaining_mask);
+ assert(tinfo->views[i]);
+
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], NULL);
+ }
+
+ for (i = 0; i < num_views; i++) {
+ struct virgl_sampler_view *grview = (struct virgl_sampler_view *)views[i];
+
+ if (views[i] == (struct pipe_sampler_view *)tinfo->views[i])
+ continue;
+
+ if (grview) {
+ new_mask |= 1 << i;
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], views[i]);
+ } else {
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], NULL);
+ disable_mask |= 1 << i;
+ }
+ }
+
+ tinfo->enabled_mask &= ~disable_mask;
+ tinfo->enabled_mask |= new_mask;
+ virgl_encode_set_sampler_views(vctx, shader_type, start_slot, num_views, tinfo->views);
+ virgl_attach_res_sampler_views(vctx, shader_type);
+}
+
+static void virgl_destroy_sampler_view(struct pipe_context *ctx,
+ struct pipe_sampler_view *view)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_sampler_view *grview = (struct virgl_sampler_view *)view;
+
+ virgl_encode_delete_object(vctx, grview->handle, VIRGL_OBJECT_SAMPLER_VIEW);
+ pipe_resource_reference(&view->texture, NULL);
+ FREE(view);
+}
+
+static void *virgl_create_sampler_state(struct pipe_context *ctx,
+ const struct pipe_sampler_state *state)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle;
+
+ handle = virgl_object_assign_handle();
+
+ virgl_encode_sampler_state(vctx, handle, state);
+ return (void *)(unsigned long)handle;
+}
+
+static void virgl_delete_sampler_state(struct pipe_context *ctx,
+ void *ss)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handle = (unsigned long)ss;
+
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SAMPLER_STATE);
+}
+
+static void virgl_bind_sampler_states(struct pipe_context *ctx,
+ unsigned shader, unsigned start_slot,
+ unsigned num_samplers,
+ void **samplers)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ uint32_t handles[32];
+ int i;
+ for (i = 0; i < num_samplers; i++) {
+ handles[i] = (unsigned long)(samplers[i]);
+ }
+ virgl_encode_bind_sampler_states(vctx, shader, start_slot, num_samplers, handles);
+}
+
+static void virgl_set_polygon_stipple(struct pipe_context *ctx,
+ const struct pipe_poly_stipple *ps)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ virgl_encoder_set_polygon_stipple(vctx, ps);
+}
+
+static void virgl_set_scissor_states(struct pipe_context *ctx,
+ unsigned start_slot,
+ unsigned num_scissor,
+ const struct pipe_scissor_state *ss)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ virgl_encoder_set_scissor_state(vctx, start_slot, num_scissor, ss);
+}
+
+static void virgl_set_sample_mask(struct pipe_context *ctx,
+ unsigned sample_mask)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ virgl_encoder_set_sample_mask(vctx, sample_mask);
+}
+
+static void virgl_set_clip_state(struct pipe_context *ctx,
+ const struct pipe_clip_state *clip)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ virgl_encoder_set_clip_state(vctx, clip);
+}
+
+static void virgl_resource_copy_region(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_resource *dres = (struct virgl_resource *)dst;
+ struct virgl_resource *sres = (struct virgl_resource *)src;
+
+ dres->clean = FALSE;
+ virgl_encode_resource_copy_region(vctx, dres,
+ dst_level, dstx, dsty, dstz,
+ sres, src_level,
+ src_box);
+}
+
+static void
+virgl_flush_resource(struct pipe_context *pipe,
+ struct pipe_resource *resource)
+{
+}
+
+static void virgl_blit(struct pipe_context *ctx,
+ const struct pipe_blit_info *blit)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_resource *dres = (struct virgl_resource *)blit->dst.resource;
+ struct virgl_resource *sres = (struct virgl_resource *)blit->src.resource;
+
+ dres->clean = FALSE;
+ virgl_encode_blit(vctx, dres, sres,
+ blit);
+}
+
+static void
+virgl_context_destroy( struct pipe_context *ctx )
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_screen *rs = virgl_screen(ctx->screen);
+
+ vctx->framebuffer.zsbuf = NULL;
+ vctx->framebuffer.nr_cbufs = 0;
+ virgl_encoder_destroy_sub_ctx(vctx, vctx->hw_sub_ctx_id);
+ virgl_flush_eq(vctx, vctx);
+
+ rs->vws->cmd_buf_destroy(vctx->cbuf);
+ if (vctx->uploader)
+ u_upload_destroy(vctx->uploader);
+ util_primconvert_destroy(vctx->primconvert);
+
+ util_slab_destroy(&vctx->texture_transfer_pool);
+ FREE(vctx);
+}
+
+struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
+ void *priv,
+ unsigned flags)
+{
+ struct virgl_context *vctx;
+ struct virgl_screen *rs = virgl_screen(pscreen);
+ vctx = CALLOC_STRUCT(virgl_context);
+
+ vctx->cbuf = rs->vws->cmd_buf_create(rs->vws);
+ if (!vctx->cbuf) {
+ FREE(vctx);
+ return NULL;
+ }
+
+ vctx->base.destroy = virgl_context_destroy;
+ vctx->base.create_surface = virgl_create_surface;
+ vctx->base.surface_destroy = virgl_surface_destroy;
+ vctx->base.set_framebuffer_state = virgl_set_framebuffer_state;
+ vctx->base.create_blend_state = virgl_create_blend_state;
+ vctx->base.bind_blend_state = virgl_bind_blend_state;
+ vctx->base.delete_blend_state = virgl_delete_blend_state;
+ vctx->base.create_depth_stencil_alpha_state = virgl_create_depth_stencil_alpha_state;
+ vctx->base.bind_depth_stencil_alpha_state = virgl_bind_depth_stencil_alpha_state;
+ vctx->base.delete_depth_stencil_alpha_state = virgl_delete_depth_stencil_alpha_state;
+ vctx->base.create_rasterizer_state = virgl_create_rasterizer_state;
+ vctx->base.bind_rasterizer_state = virgl_bind_rasterizer_state;
+ vctx->base.delete_rasterizer_state = virgl_delete_rasterizer_state;
+
+ vctx->base.set_viewport_states = virgl_set_viewport_states;
+ vctx->base.create_vertex_elements_state = virgl_create_vertex_elements_state;
+ vctx->base.bind_vertex_elements_state = virgl_bind_vertex_elements_state;
+ vctx->base.delete_vertex_elements_state = virgl_delete_vertex_elements_state;
+ vctx->base.set_vertex_buffers = virgl_set_vertex_buffers;
+ vctx->base.set_index_buffer = virgl_set_index_buffer;
+ vctx->base.set_constant_buffer = virgl_set_constant_buffer;
+ vctx->base.transfer_inline_write = virgl_transfer_inline_write;
+
+ vctx->base.create_vs_state = virgl_create_vs_state;
+ vctx->base.create_gs_state = virgl_create_gs_state;
+ vctx->base.create_fs_state = virgl_create_fs_state;
+
+ vctx->base.bind_vs_state = virgl_bind_vs_state;
+ vctx->base.bind_gs_state = virgl_bind_gs_state;
+ vctx->base.bind_fs_state = virgl_bind_fs_state;
+
+ vctx->base.delete_vs_state = virgl_delete_vs_state;
+ vctx->base.delete_gs_state = virgl_delete_gs_state;
+ vctx->base.delete_fs_state = virgl_delete_fs_state;
+
+ vctx->base.clear = virgl_clear;
+ vctx->base.draw_vbo = virgl_draw_vbo;
+ vctx->base.flush = virgl_flush_from_st;
+ vctx->base.screen = pscreen;
+ vctx->base.create_sampler_view = virgl_create_sampler_view;
+ vctx->base.sampler_view_destroy = virgl_destroy_sampler_view;
+ vctx->base.set_sampler_views = virgl_set_sampler_views;
+
+ vctx->base.create_sampler_state = virgl_create_sampler_state;
+ vctx->base.delete_sampler_state = virgl_delete_sampler_state;
+ vctx->base.bind_sampler_states = virgl_bind_sampler_states;
+
+ vctx->base.set_polygon_stipple = virgl_set_polygon_stipple;
+ vctx->base.set_scissor_states = virgl_set_scissor_states;
+ vctx->base.set_sample_mask = virgl_set_sample_mask;
+ vctx->base.set_stencil_ref = virgl_set_stencil_ref;
+ vctx->base.set_clip_state = virgl_set_clip_state;
+
+ vctx->base.set_blend_color = virgl_set_blend_color;
+
+ vctx->base.resource_copy_region = virgl_resource_copy_region;
+ vctx->base.flush_resource = virgl_flush_resource;
+ vctx->base.blit = virgl_blit;
+
+ virgl_init_context_resource_functions(&vctx->base);
+ virgl_init_query_functions(vctx);
+ virgl_init_so_functions(vctx);
+
+ list_inithead(&vctx->to_flush_bufs);
+ util_slab_create(&vctx->texture_transfer_pool, sizeof(struct virgl_transfer),
+ 16, UTIL_SLAB_SINGLETHREADED);
+
+ vctx->primconvert = util_primconvert_create(&vctx->base, rs->caps.caps.v1.prim_mask);
+ vctx->uploader = u_upload_create(&vctx->base, 1024 * 1024, 256,
+ PIPE_BIND_INDEX_BUFFER);
+ if (!vctx->uploader)
+ goto fail;
+
+ vctx->hw_sub_ctx_id = rs->sub_ctx_id++;
+ virgl_encoder_create_sub_ctx(vctx, vctx->hw_sub_ctx_id);
+
+ virgl_encoder_set_sub_ctx(vctx, vctx->hw_sub_ctx_id);
+ return &vctx->base;
+fail:
+ return NULL;
+}
diff --git a/src/gallium/drivers/virgl/virgl_context.h b/src/gallium/drivers/virgl/virgl_context.h
new file mode 100644
index 00000000000..878d7862dbd
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_context.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_CONTEXT_H
+#define VIRGL_CONTEXT_H
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "virgl_protocol.h"
+
+#include "virgl.h"
+#include "util/u_slab.h"
+#include "util/list.h"
+#include "indices/u_primconvert.h"
+
+struct virgl_resource;
+struct virgl_buffer;
+
+struct virgl_sampler_view {
+ struct pipe_sampler_view base;
+ uint32_t handle;
+};
+
+struct virgl_so_target {
+ struct pipe_stream_output_target base;
+ uint32_t handle;
+};
+
+struct virgl_textures_info {
+ struct virgl_sampler_view *views[16];
+ uint32_t enabled_mask;
+};
+
+struct virgl_context {
+ struct pipe_context base;
+ struct virgl_cmd_buf *cbuf;
+
+ struct virgl_textures_info samplers[PIPE_SHADER_TYPES];
+
+ struct pipe_framebuffer_state framebuffer;
+
+ struct util_slab_mempool texture_transfer_pool;
+
+ struct pipe_index_buffer index_buffer;
+ struct u_upload_mgr *uploader;
+
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ unsigned num_vertex_buffers;
+ boolean vertex_array_dirty;
+
+ struct virgl_so_target so_targets[PIPE_MAX_SO_BUFFERS];
+ unsigned num_so_targets;
+
+ struct pipe_resource *ubos[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
+ int num_transfers;
+ int num_draws;
+ struct list_head to_flush_bufs;
+
+ struct primconvert_context *primconvert;
+ uint32_t hw_sub_ctx_id;
+};
+
+struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
+ void *priv, unsigned flags);
+
+void virgl_init_blit_functions(struct virgl_context *vctx);
+void virgl_init_query_functions(struct virgl_context *vctx);
+void virgl_init_so_functions(struct virgl_context *vctx);
+
+void virgl_transfer_inline_write(struct pipe_context *ctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride);
+
+struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in);
+
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_encode.c b/src/gallium/drivers/virgl/virgl_encode.c
new file mode 100644
index 00000000000..b4b49a21494
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_encode.c
@@ -0,0 +1,863 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "pipe/p_state.h"
+#include "virgl_encode.h"
+#include "virgl_resource.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+
+static int virgl_encoder_write_cmd_dword(struct virgl_context *ctx,
+ uint32_t dword)
+{
+ int len = (dword >> 16);
+
+ if ((ctx->cbuf->cdw + len + 1) > VIRGL_MAX_CMDBUF_DWORDS)
+ ctx->base.flush(&ctx->base, NULL, 0);
+
+ virgl_encoder_write_dword(ctx->cbuf, dword);
+ return 0;
+}
+
+static void virgl_encoder_write_res(struct virgl_context *ctx,
+ struct virgl_resource *res)
+{
+ struct virgl_winsys *vws = virgl_screen(ctx->base.screen)->vws;
+
+ if (res && res->hw_res)
+ vws->emit_res(vws, ctx->cbuf, res->hw_res, TRUE);
+ else {
+ virgl_encoder_write_dword(ctx->cbuf, 0);
+ }
+}
+
+int virgl_encode_bind_object(struct virgl_context *ctx,
+ uint32_t handle, uint32_t object)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BIND_OBJECT, object, 1));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ return 0;
+}
+
+int virgl_encode_delete_object(struct virgl_context *ctx,
+ uint32_t handle, uint32_t object)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_DESTROY_OBJECT, object, 1));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ return 0;
+}
+
+int virgl_encode_blend_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_blend_state *blend_state)
+{
+ uint32_t tmp;
+ int i;
+
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_BLEND, VIRGL_OBJ_BLEND_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+
+ tmp =
+ VIRGL_OBJ_BLEND_S0_INDEPENDENT_BLEND_ENABLE(blend_state->independent_blend_enable) |
+ VIRGL_OBJ_BLEND_S0_LOGICOP_ENABLE(blend_state->logicop_enable) |
+ VIRGL_OBJ_BLEND_S0_DITHER(blend_state->dither) |
+ VIRGL_OBJ_BLEND_S0_ALPHA_TO_COVERAGE(blend_state->alpha_to_coverage) |
+ VIRGL_OBJ_BLEND_S0_ALPHA_TO_ONE(blend_state->alpha_to_one);
+
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+
+ tmp = VIRGL_OBJ_BLEND_S1_LOGICOP_FUNC(blend_state->logicop_func);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+
+ for (i = 0; i < VIRGL_MAX_COLOR_BUFS; i++) {
+ tmp =
+ VIRGL_OBJ_BLEND_S2_RT_BLEND_ENABLE(blend_state->rt[i].blend_enable) |
+ VIRGL_OBJ_BLEND_S2_RT_RGB_FUNC(blend_state->rt[i].rgb_func) |
+ VIRGL_OBJ_BLEND_S2_RT_RGB_SRC_FACTOR(blend_state->rt[i].rgb_src_factor) |
+ VIRGL_OBJ_BLEND_S2_RT_RGB_DST_FACTOR(blend_state->rt[i].rgb_dst_factor)|
+ VIRGL_OBJ_BLEND_S2_RT_ALPHA_FUNC(blend_state->rt[i].alpha_func) |
+ VIRGL_OBJ_BLEND_S2_RT_ALPHA_SRC_FACTOR(blend_state->rt[i].alpha_src_factor) |
+ VIRGL_OBJ_BLEND_S2_RT_ALPHA_DST_FACTOR(blend_state->rt[i].alpha_dst_factor) |
+ VIRGL_OBJ_BLEND_S2_RT_COLORMASK(blend_state->rt[i].colormask);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+ }
+ return 0;
+}
+
+int virgl_encode_dsa_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_depth_stencil_alpha_state *dsa_state)
+{
+ uint32_t tmp;
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_DSA, VIRGL_OBJ_DSA_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+
+ tmp = VIRGL_OBJ_DSA_S0_DEPTH_ENABLE(dsa_state->depth.enabled) |
+ VIRGL_OBJ_DSA_S0_DEPTH_WRITEMASK(dsa_state->depth.writemask) |
+ VIRGL_OBJ_DSA_S0_DEPTH_FUNC(dsa_state->depth.func) |
+ VIRGL_OBJ_DSA_S0_ALPHA_ENABLED(dsa_state->alpha.enabled) |
+ VIRGL_OBJ_DSA_S0_ALPHA_FUNC(dsa_state->alpha.func);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+
+ for (i = 0; i < 2; i++) {
+ tmp = VIRGL_OBJ_DSA_S1_STENCIL_ENABLED(dsa_state->stencil[i].enabled) |
+ VIRGL_OBJ_DSA_S1_STENCIL_FUNC(dsa_state->stencil[i].func) |
+ VIRGL_OBJ_DSA_S1_STENCIL_FAIL_OP(dsa_state->stencil[i].fail_op) |
+ VIRGL_OBJ_DSA_S1_STENCIL_ZPASS_OP(dsa_state->stencil[i].zpass_op) |
+ VIRGL_OBJ_DSA_S1_STENCIL_ZFAIL_OP(dsa_state->stencil[i].zfail_op) |
+ VIRGL_OBJ_DSA_S1_STENCIL_VALUEMASK(dsa_state->stencil[i].valuemask) |
+ VIRGL_OBJ_DSA_S1_STENCIL_WRITEMASK(dsa_state->stencil[i].writemask);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+ }
+
+ virgl_encoder_write_dword(ctx->cbuf, fui(dsa_state->alpha.ref_value));
+ return 0;
+}
+int virgl_encode_rasterizer_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_rasterizer_state *state)
+{
+ uint32_t tmp;
+
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_RASTERIZER, VIRGL_OBJ_RS_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+
+ tmp = VIRGL_OBJ_RS_S0_FLATSHADE(state->flatshade) |
+ VIRGL_OBJ_RS_S0_DEPTH_CLIP(state->depth_clip) |
+ VIRGL_OBJ_RS_S0_CLIP_HALFZ(state->clip_halfz) |
+ VIRGL_OBJ_RS_S0_RASTERIZER_DISCARD(state->rasterizer_discard) |
+ VIRGL_OBJ_RS_S0_FLATSHADE_FIRST(state->flatshade_first) |
+ VIRGL_OBJ_RS_S0_LIGHT_TWOSIZE(state->light_twoside) |
+ VIRGL_OBJ_RS_S0_SPRITE_COORD_MODE(state->sprite_coord_mode) |
+ VIRGL_OBJ_RS_S0_POINT_QUAD_RASTERIZATION(state->point_quad_rasterization) |
+ VIRGL_OBJ_RS_S0_CULL_FACE(state->cull_face) |
+ VIRGL_OBJ_RS_S0_FILL_FRONT(state->fill_front) |
+ VIRGL_OBJ_RS_S0_FILL_BACK(state->fill_back) |
+ VIRGL_OBJ_RS_S0_SCISSOR(state->scissor) |
+ VIRGL_OBJ_RS_S0_FRONT_CCW(state->front_ccw) |
+ VIRGL_OBJ_RS_S0_CLAMP_VERTEX_COLOR(state->clamp_vertex_color) |
+ VIRGL_OBJ_RS_S0_CLAMP_FRAGMENT_COLOR(state->clamp_fragment_color) |
+ VIRGL_OBJ_RS_S0_OFFSET_LINE(state->offset_line) |
+ VIRGL_OBJ_RS_S0_OFFSET_POINT(state->offset_point) |
+ VIRGL_OBJ_RS_S0_OFFSET_TRI(state->offset_tri) |
+ VIRGL_OBJ_RS_S0_POLY_SMOOTH(state->poly_smooth) |
+ VIRGL_OBJ_RS_S0_POLY_STIPPLE_ENABLE(state->poly_stipple_enable) |
+ VIRGL_OBJ_RS_S0_POINT_SMOOTH(state->point_smooth) |
+ VIRGL_OBJ_RS_S0_POINT_SIZE_PER_VERTEX(state->point_size_per_vertex) |
+ VIRGL_OBJ_RS_S0_MULTISAMPLE(state->multisample) |
+ VIRGL_OBJ_RS_S0_LINE_SMOOTH(state->line_smooth) |
+ VIRGL_OBJ_RS_S0_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
+ VIRGL_OBJ_RS_S0_LINE_LAST_PIXEL(state->line_last_pixel) |
+ VIRGL_OBJ_RS_S0_HALF_PIXEL_CENTER(state->half_pixel_center) |
+ VIRGL_OBJ_RS_S0_BOTTOM_EDGE_RULE(state->bottom_edge_rule);
+
+ virgl_encoder_write_dword(ctx->cbuf, tmp); /* S0 */
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->point_size)); /* S1 */
+ virgl_encoder_write_dword(ctx->cbuf, state->sprite_coord_enable); /* S2 */
+ tmp = VIRGL_OBJ_RS_S3_LINE_STIPPLE_PATTERN(state->line_stipple_pattern) |
+ VIRGL_OBJ_RS_S3_LINE_STIPPLE_FACTOR(state->line_stipple_factor) |
+ VIRGL_OBJ_RS_S3_CLIP_PLANE_ENABLE(state->clip_plane_enable);
+ virgl_encoder_write_dword(ctx->cbuf, tmp); /* S3 */
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->line_width)); /* S4 */
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->offset_units)); /* S5 */
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->offset_scale)); /* S6 */
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->offset_clamp)); /* S7 */
+ return 0;
+}
+
+static void virgl_emit_shader_header(struct virgl_context *ctx,
+ uint32_t handle, uint32_t len,
+ uint32_t type, uint32_t offlen,
+ uint32_t num_tokens)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SHADER, len));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_dword(ctx->cbuf, type);
+ virgl_encoder_write_dword(ctx->cbuf, offlen);
+ virgl_encoder_write_dword(ctx->cbuf, num_tokens);
+}
+
+static void virgl_emit_shader_streamout(struct virgl_context *ctx,
+ const struct pipe_stream_output_info *so_info)
+{
+ int num_outputs = 0;
+ int i;
+ uint32_t tmp;
+
+ if (so_info)
+ num_outputs = so_info->num_outputs;
+
+ virgl_encoder_write_dword(ctx->cbuf, num_outputs);
+ if (num_outputs) {
+ for (i = 0; i < 4; i++)
+ virgl_encoder_write_dword(ctx->cbuf, so_info->stride[i]);
+
+ for (i = 0; i < so_info->num_outputs; i++) {
+ tmp =
+ VIRGL_OBJ_SHADER_SO_OUTPUT_REGISTER_INDEX(so_info->output[i].register_index) |
+ VIRGL_OBJ_SHADER_SO_OUTPUT_START_COMPONENT(so_info->output[i].start_component) |
+ VIRGL_OBJ_SHADER_SO_OUTPUT_NUM_COMPONENTS(so_info->output[i].num_components) |
+ VIRGL_OBJ_SHADER_SO_OUTPUT_BUFFER(so_info->output[i].output_buffer) |
+ VIRGL_OBJ_SHADER_SO_OUTPUT_DST_OFFSET(so_info->output[i].dst_offset);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+ virgl_encoder_write_dword(ctx->cbuf, 0);
+ }
+ }
+}
+
+int virgl_encode_shader_state(struct virgl_context *ctx,
+ uint32_t handle,
+ uint32_t type,
+ const struct pipe_stream_output_info *so_info,
+ const struct tgsi_token *tokens)
+{
+ char *str, *sptr;
+ uint32_t shader_len, len;
+ bool bret;
+ int num_tokens = tgsi_num_tokens(tokens);
+ int str_total_size = 65536;
+ int retry_size = 1;
+ uint32_t left_bytes, base_hdr_size, strm_hdr_size, thispass;
+ bool first_pass;
+ str = CALLOC(1, str_total_size);
+ if (!str)
+ return -1;
+
+ do {
+ int old_size;
+
+ bret = tgsi_dump_str(tokens, TGSI_DUMP_FLOAT_AS_HEX, str, str_total_size);
+ if (bret == false) {
+ fprintf(stderr, "Failed to translate shader in available space - trying again\n");
+ old_size = str_total_size;
+ str_total_size = 65536 * ++retry_size;
+ str = REALLOC(str, old_size, str_total_size);
+ if (!str)
+ return -1;
+ }
+ } while (bret == false && retry_size < 10);
+
+ if (bret == false)
+ return -1;
+
+ shader_len = strlen(str) + 1;
+
+ left_bytes = shader_len;
+
+ base_hdr_size = 5;
+ strm_hdr_size = so_info->num_outputs ? so_info->num_outputs * 2 + 4 : 0;
+ first_pass = true;
+ sptr = str;
+ while (left_bytes) {
+ uint32_t length, offlen;
+ int hdr_len = base_hdr_size + (first_pass ? strm_hdr_size : 0);
+ if (ctx->cbuf->cdw + hdr_len + 1 > VIRGL_MAX_CMDBUF_DWORDS)
+ ctx->base.flush(&ctx->base, NULL, 0);
+
+ thispass = (VIRGL_MAX_CMDBUF_DWORDS - ctx->cbuf->cdw - hdr_len - 1) * 4;
+
+ length = MIN2(thispass, left_bytes);
+ len = ((length + 3) / 4) + hdr_len;
+
+ if (first_pass)
+ offlen = VIRGL_OBJ_SHADER_OFFSET_VAL(shader_len);
+ else
+ offlen = VIRGL_OBJ_SHADER_OFFSET_VAL((uintptr_t)sptr - (uintptr_t)str) | VIRGL_OBJ_SHADER_OFFSET_CONT;
+
+ virgl_emit_shader_header(ctx, handle, len, type, offlen, num_tokens);
+
+ virgl_emit_shader_streamout(ctx, first_pass ? so_info : NULL);
+
+ virgl_encoder_write_block(ctx->cbuf, (uint8_t *)sptr, length);
+
+ sptr += length;
+ first_pass = false;
+ left_bytes -= length;
+ }
+
+ FREE(str);
+ return 0;
+}
+
+
+int virgl_encode_clear(struct virgl_context *ctx,
+ unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil)
+{
+ int i;
+
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CLEAR, 0, VIRGL_OBJ_CLEAR_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, buffers);
+ for (i = 0; i < 4; i++)
+ virgl_encoder_write_dword(ctx->cbuf, color->ui[i]);
+ virgl_encoder_write_qword(ctx->cbuf, *(uint64_t *)&depth);
+ virgl_encoder_write_dword(ctx->cbuf, stencil);
+ return 0;
+}
+
+int virgl_encoder_set_framebuffer_state(struct virgl_context *ctx,
+ const struct pipe_framebuffer_state *state)
+{
+ struct virgl_surface *zsurf = (struct virgl_surface *)state->zsbuf;
+ int i;
+
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_FRAMEBUFFER_STATE, 0, VIRGL_SET_FRAMEBUFFER_STATE_SIZE(state->nr_cbufs)));
+ virgl_encoder_write_dword(ctx->cbuf, state->nr_cbufs);
+ virgl_encoder_write_dword(ctx->cbuf, zsurf ? zsurf->handle : 0);
+ for (i = 0; i < state->nr_cbufs; i++) {
+ struct virgl_surface *surf = (struct virgl_surface *)state->cbufs[i];
+ virgl_encoder_write_dword(ctx->cbuf, surf ? surf->handle : 0);
+ }
+
+ return 0;
+}
+
+int virgl_encoder_set_viewport_states(struct virgl_context *ctx,
+ int start_slot,
+ int num_viewports,
+ const struct pipe_viewport_state *states)
+{
+ int i,v;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_VIEWPORT_STATE, 0, VIRGL_SET_VIEWPORT_STATE_SIZE(num_viewports)));
+ virgl_encoder_write_dword(ctx->cbuf, start_slot);
+ for (v = 0; v < num_viewports; v++) {
+ for (i = 0; i < 3; i++)
+ virgl_encoder_write_dword(ctx->cbuf, fui(states[v].scale[i]));
+ for (i = 0; i < 3; i++)
+ virgl_encoder_write_dword(ctx->cbuf, fui(states[v].translate[i]));
+ }
+ return 0;
+}
+
+int virgl_encoder_create_vertex_elements(struct virgl_context *ctx,
+ uint32_t handle,
+ unsigned num_elements,
+ const struct pipe_vertex_element *element)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_VERTEX_ELEMENTS, VIRGL_OBJ_VERTEX_ELEMENTS_SIZE(num_elements)));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ for (i = 0; i < num_elements; i++) {
+ virgl_encoder_write_dword(ctx->cbuf, element[i].src_offset);
+ virgl_encoder_write_dword(ctx->cbuf, element[i].instance_divisor);
+ virgl_encoder_write_dword(ctx->cbuf, element[i].vertex_buffer_index);
+ virgl_encoder_write_dword(ctx->cbuf, element[i].src_format);
+ }
+ return 0;
+}
+
+int virgl_encoder_set_vertex_buffers(struct virgl_context *ctx,
+ unsigned num_buffers,
+ const struct pipe_vertex_buffer *buffers)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_VERTEX_BUFFERS, 0, VIRGL_SET_VERTEX_BUFFERS_SIZE(num_buffers)));
+ for (i = 0; i < num_buffers; i++) {
+ struct virgl_resource *res = (struct virgl_resource *)buffers[i].buffer;
+ virgl_encoder_write_dword(ctx->cbuf, buffers[i].stride);
+ virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset);
+ virgl_encoder_write_res(ctx, res);
+ }
+ return 0;
+}
+
+int virgl_encoder_set_index_buffer(struct virgl_context *ctx,
+ const struct pipe_index_buffer *ib)
+{
+ int length = VIRGL_SET_INDEX_BUFFER_SIZE(ib);
+ struct virgl_resource *res = NULL;
+ if (ib)
+ res = (struct virgl_resource *)ib->buffer;
+
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_INDEX_BUFFER, 0, length));
+ virgl_encoder_write_res(ctx, res);
+ if (ib) {
+ virgl_encoder_write_dword(ctx->cbuf, ib->index_size);
+ virgl_encoder_write_dword(ctx->cbuf, ib->offset);
+ }
+ return 0;
+}
+
+int virgl_encoder_draw_vbo(struct virgl_context *ctx,
+ const struct pipe_draw_info *info)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_DRAW_VBO, 0, VIRGL_DRAW_VBO_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, info->start);
+ virgl_encoder_write_dword(ctx->cbuf, info->count);
+ virgl_encoder_write_dword(ctx->cbuf, info->mode);
+ virgl_encoder_write_dword(ctx->cbuf, info->indexed);
+ virgl_encoder_write_dword(ctx->cbuf, info->instance_count);
+ virgl_encoder_write_dword(ctx->cbuf, info->index_bias);
+ virgl_encoder_write_dword(ctx->cbuf, info->start_instance);
+ virgl_encoder_write_dword(ctx->cbuf, info->primitive_restart);
+ virgl_encoder_write_dword(ctx->cbuf, info->restart_index);
+ virgl_encoder_write_dword(ctx->cbuf, info->min_index);
+ virgl_encoder_write_dword(ctx->cbuf, info->max_index);
+ if (info->count_from_stream_output)
+ virgl_encoder_write_dword(ctx->cbuf, info->count_from_stream_output->buffer_size);
+ else
+ virgl_encoder_write_dword(ctx->cbuf, 0);
+ return 0;
+}
+
+int virgl_encoder_create_surface(struct virgl_context *ctx,
+ uint32_t handle,
+ struct virgl_resource *res,
+ const struct pipe_surface *templat)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SURFACE, VIRGL_OBJ_SURFACE_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_res(ctx, res);
+ virgl_encoder_write_dword(ctx->cbuf, templat->format);
+ if (templat->texture->target == PIPE_BUFFER) {
+ virgl_encoder_write_dword(ctx->cbuf, templat->u.buf.first_element);
+ virgl_encoder_write_dword(ctx->cbuf, templat->u.buf.last_element);
+
+ } else {
+ virgl_encoder_write_dword(ctx->cbuf, templat->u.tex.level);
+ virgl_encoder_write_dword(ctx->cbuf, templat->u.tex.first_layer | (templat->u.tex.last_layer << 16));
+ }
+ return 0;
+}
+
+int virgl_encoder_create_so_target(struct virgl_context *ctx,
+ uint32_t handle,
+ struct virgl_resource *res,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_STREAMOUT_TARGET, VIRGL_OBJ_STREAMOUT_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_res(ctx, res);
+ virgl_encoder_write_dword(ctx->cbuf, buffer_offset);
+ virgl_encoder_write_dword(ctx->cbuf, buffer_size);
+ return 0;
+}
+
+static void virgl_encoder_iw_emit_header_1d(struct virgl_context *ctx,
+ struct virgl_resource *res,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ unsigned stride, unsigned layer_stride)
+{
+ virgl_encoder_write_res(ctx, res);
+ virgl_encoder_write_dword(ctx->cbuf, level);
+ virgl_encoder_write_dword(ctx->cbuf, usage);
+ virgl_encoder_write_dword(ctx->cbuf, stride);
+ virgl_encoder_write_dword(ctx->cbuf, layer_stride);
+ virgl_encoder_write_dword(ctx->cbuf, box->x);
+ virgl_encoder_write_dword(ctx->cbuf, box->y);
+ virgl_encoder_write_dword(ctx->cbuf, box->z);
+ virgl_encoder_write_dword(ctx->cbuf, box->width);
+ virgl_encoder_write_dword(ctx->cbuf, box->height);
+ virgl_encoder_write_dword(ctx->cbuf, box->depth);
+}
+
+int virgl_encoder_inline_write(struct virgl_context *ctx,
+ struct virgl_resource *res,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ const void *data, unsigned stride,
+ unsigned layer_stride)
+{
+ uint32_t size = (stride ? stride : box->width) * box->height;
+ uint32_t length, thispass, left_bytes;
+ struct pipe_box mybox = *box;
+
+ length = 11 + (size + 3) / 4;
+ if ((ctx->cbuf->cdw + length + 1) > VIRGL_MAX_CMDBUF_DWORDS) {
+ if (box->height > 1 || box->depth > 1) {
+ debug_printf("inline transfer failed due to multi dimensions and too large\n");
+ assert(0);
+ }
+ }
+
+ left_bytes = size;
+ while (left_bytes) {
+ if (ctx->cbuf->cdw + 12 > VIRGL_MAX_CMDBUF_DWORDS)
+ ctx->base.flush(&ctx->base, NULL, 0);
+
+ thispass = (VIRGL_MAX_CMDBUF_DWORDS - ctx->cbuf->cdw - 12) * 4;
+
+ length = MIN2(thispass, left_bytes);
+
+ mybox.width = length;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_RESOURCE_INLINE_WRITE, 0, ((length + 3) / 4) + 11));
+ virgl_encoder_iw_emit_header_1d(ctx, res, level, usage, &mybox, stride, layer_stride);
+ virgl_encoder_write_block(ctx->cbuf, data, length);
+ left_bytes -= length;
+ mybox.x += length;
+ data += length;
+ }
+ return 0;
+}
+
+int virgl_encoder_flush_frontbuffer(struct virgl_context *ctx,
+ struct virgl_resource *res)
+{
+// virgl_encoder_write_dword(ctx->cbuf, VIRGL_CMD0(VIRGL_CCMD_FLUSH_FRONTUBFFER, 0, 1));
+// virgl_encoder_write_dword(ctx->cbuf, res_handle);
+ return 0;
+}
+
+int virgl_encode_sampler_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_sampler_state *state)
+{
+ uint32_t tmp;
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SAMPLER_STATE, VIRGL_OBJ_SAMPLER_STATE_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+
+ tmp = VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_S(state->wrap_s) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_T(state->wrap_t) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_R(state->wrap_r) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_MIN_IMG_FILTER(state->min_img_filter) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_MIN_MIP_FILTER(state->min_mip_filter) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_MAG_IMG_FILTER(state->mag_img_filter) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_MODE(state->compare_mode) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_FUNC(state->compare_func);
+
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->lod_bias));
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->min_lod));
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->max_lod));
+ for (i = 0; i < 4; i++)
+ virgl_encoder_write_dword(ctx->cbuf, state->border_color.ui[i]);
+ return 0;
+}
+
+
+int virgl_encode_sampler_view(struct virgl_context *ctx,
+ uint32_t handle,
+ struct virgl_resource *res,
+ const struct pipe_sampler_view *state)
+{
+ uint32_t tmp;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SAMPLER_VIEW, VIRGL_OBJ_SAMPLER_VIEW_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_res(ctx, res);
+ virgl_encoder_write_dword(ctx->cbuf, state->format);
+ if (res->u.b.target == PIPE_BUFFER) {
+ virgl_encoder_write_dword(ctx->cbuf, state->u.buf.first_element);
+ virgl_encoder_write_dword(ctx->cbuf, state->u.buf.last_element);
+ } else {
+ virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_layer | state->u.tex.last_layer << 16);
+ virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_level | state->u.tex.last_level << 8);
+ }
+ tmp = VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_R(state->swizzle_r) |
+ VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_G(state->swizzle_g) |
+ VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_B(state->swizzle_b) |
+ VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_A(state->swizzle_a);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+ return 0;
+}
+
+int virgl_encode_set_sampler_views(struct virgl_context *ctx,
+ uint32_t shader_type,
+ uint32_t start_slot,
+ uint32_t num_views,
+ struct virgl_sampler_view **views)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SAMPLER_VIEWS, 0, VIRGL_SET_SAMPLER_VIEWS_SIZE(num_views)));
+ virgl_encoder_write_dword(ctx->cbuf, shader_type);
+ virgl_encoder_write_dword(ctx->cbuf, start_slot);
+ for (i = 0; i < num_views; i++) {
+ uint32_t handle = views[i] ? views[i]->handle : 0;
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ }
+ return 0;
+}
+
+int virgl_encode_bind_sampler_states(struct virgl_context *ctx,
+ uint32_t shader_type,
+ uint32_t start_slot,
+ uint32_t num_handles,
+ uint32_t *handles)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BIND_SAMPLER_STATES, 0, VIRGL_BIND_SAMPLER_STATES(num_handles)));
+ virgl_encoder_write_dword(ctx->cbuf, shader_type);
+ virgl_encoder_write_dword(ctx->cbuf, start_slot);
+ for (i = 0; i < num_handles; i++)
+ virgl_encoder_write_dword(ctx->cbuf, handles[i]);
+ return 0;
+}
+
+int virgl_encoder_write_constant_buffer(struct virgl_context *ctx,
+ uint32_t shader,
+ uint32_t index,
+ uint32_t size,
+ const void *data)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_CONSTANT_BUFFER, 0, size + 2));
+ virgl_encoder_write_dword(ctx->cbuf, shader);
+ virgl_encoder_write_dword(ctx->cbuf, index);
+ if (data)
+ virgl_encoder_write_block(ctx->cbuf, data, size * 4);
+ return 0;
+}
+
+int virgl_encoder_set_uniform_buffer(struct virgl_context *ctx,
+ uint32_t shader,
+ uint32_t index,
+ uint32_t offset,
+ uint32_t length,
+ struct virgl_resource *res)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_UNIFORM_BUFFER, 0, VIRGL_SET_UNIFORM_BUFFER_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, shader);
+ virgl_encoder_write_dword(ctx->cbuf, index);
+ virgl_encoder_write_dword(ctx->cbuf, offset);
+ virgl_encoder_write_dword(ctx->cbuf, length);
+ virgl_encoder_write_res(ctx, res);
+ return 0;
+}
+
+
+int virgl_encoder_set_stencil_ref(struct virgl_context *ctx,
+ const struct pipe_stencil_ref *ref)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_STENCIL_REF, 0, VIRGL_SET_STENCIL_REF_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, VIRGL_STENCIL_REF_VAL(ref->ref_value[0] , (ref->ref_value[1])));
+ return 0;
+}
+
+int virgl_encoder_set_blend_color(struct virgl_context *ctx,
+ const struct pipe_blend_color *color)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_BLEND_COLOR, 0, VIRGL_SET_BLEND_COLOR_SIZE));
+ for (i = 0; i < 4; i++)
+ virgl_encoder_write_dword(ctx->cbuf, fui(color->color[i]));
+ return 0;
+}
+
+int virgl_encoder_set_scissor_state(struct virgl_context *ctx,
+ unsigned start_slot,
+ int num_scissors,
+ const struct pipe_scissor_state *ss)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SCISSOR_STATE, 0, VIRGL_SET_SCISSOR_STATE_SIZE(num_scissors)));
+ virgl_encoder_write_dword(ctx->cbuf, start_slot);
+ for (i = 0; i < num_scissors; i++) {
+ virgl_encoder_write_dword(ctx->cbuf, (ss[i].minx | ss[i].miny << 16));
+ virgl_encoder_write_dword(ctx->cbuf, (ss[i].maxx | ss[i].maxy << 16));
+ }
+ return 0;
+}
+
+void virgl_encoder_set_polygon_stipple(struct virgl_context *ctx,
+ const struct pipe_poly_stipple *ps)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_POLYGON_STIPPLE, 0, VIRGL_POLYGON_STIPPLE_SIZE));
+ for (i = 0; i < VIRGL_POLYGON_STIPPLE_SIZE; i++) {
+ virgl_encoder_write_dword(ctx->cbuf, ps->stipple[i]);
+ }
+}
+
+void virgl_encoder_set_sample_mask(struct virgl_context *ctx,
+ unsigned sample_mask)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SAMPLE_MASK, 0, VIRGL_SET_SAMPLE_MASK_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, sample_mask);
+}
+
+void virgl_encoder_set_clip_state(struct virgl_context *ctx,
+ const struct pipe_clip_state *clip)
+{
+ int i, j;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_CLIP_STATE, 0, VIRGL_SET_CLIP_STATE_SIZE));
+ for (i = 0; i < VIRGL_MAX_CLIP_PLANES; i++) {
+ for (j = 0; j < 4; j++) {
+ virgl_encoder_write_dword(ctx->cbuf, fui(clip->ucp[i][j]));
+ }
+ }
+}
+
+int virgl_encode_resource_copy_region(struct virgl_context *ctx,
+ struct virgl_resource *dst_res,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct virgl_resource *src_res,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_RESOURCE_COPY_REGION, 0, VIRGL_CMD_RESOURCE_COPY_REGION_SIZE));
+ virgl_encoder_write_res(ctx, dst_res);
+ virgl_encoder_write_dword(ctx->cbuf, dst_level);
+ virgl_encoder_write_dword(ctx->cbuf, dstx);
+ virgl_encoder_write_dword(ctx->cbuf, dsty);
+ virgl_encoder_write_dword(ctx->cbuf, dstz);
+ virgl_encoder_write_res(ctx, src_res);
+ virgl_encoder_write_dword(ctx->cbuf, src_level);
+ virgl_encoder_write_dword(ctx->cbuf, src_box->x);
+ virgl_encoder_write_dword(ctx->cbuf, src_box->y);
+ virgl_encoder_write_dword(ctx->cbuf, src_box->z);
+ virgl_encoder_write_dword(ctx->cbuf, src_box->width);
+ virgl_encoder_write_dword(ctx->cbuf, src_box->height);
+ virgl_encoder_write_dword(ctx->cbuf, src_box->depth);
+ return 0;
+}
+
+int virgl_encode_blit(struct virgl_context *ctx,
+ struct virgl_resource *dst_res,
+ struct virgl_resource *src_res,
+ const struct pipe_blit_info *blit)
+{
+ uint32_t tmp;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BLIT, 0, VIRGL_CMD_BLIT_SIZE));
+ tmp = VIRGL_CMD_BLIT_S0_MASK(blit->mask) |
+ VIRGL_CMD_BLIT_S0_FILTER(blit->filter) |
+ VIRGL_CMD_BLIT_S0_SCISSOR_ENABLE(blit->scissor_enable);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+ virgl_encoder_write_dword(ctx->cbuf, (blit->scissor.minx | blit->scissor.miny << 16));
+ virgl_encoder_write_dword(ctx->cbuf, (blit->scissor.maxx | blit->scissor.maxy << 16));
+
+ virgl_encoder_write_res(ctx, dst_res);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.level);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.format);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.x);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.y);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.z);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.width);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.height);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.depth);
+
+ virgl_encoder_write_res(ctx, src_res);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.level);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.format);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.box.x);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.box.y);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.box.z);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.box.width);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.box.height);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.box.depth);
+ return 0;
+}
+
+int virgl_encoder_create_query(struct virgl_context *ctx,
+ uint32_t handle,
+ uint query_type,
+ uint query_index,
+ struct virgl_resource *res,
+ uint32_t offset)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_QUERY, VIRGL_OBJ_QUERY_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_dword(ctx->cbuf, ((query_type & 0xffff) | (query_index << 16)));
+ virgl_encoder_write_dword(ctx->cbuf, offset);
+ virgl_encoder_write_res(ctx, res);
+ return 0;
+}
+
+int virgl_encoder_begin_query(struct virgl_context *ctx,
+ uint32_t handle)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BEGIN_QUERY, 0, 1));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ return 0;
+}
+
+int virgl_encoder_end_query(struct virgl_context *ctx,
+ uint32_t handle)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_END_QUERY, 0, 1));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ return 0;
+}
+
+int virgl_encoder_get_query_result(struct virgl_context *ctx,
+ uint32_t handle, boolean wait)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_GET_QUERY_RESULT, 0, 2));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_dword(ctx->cbuf, wait ? 1 : 0);
+ return 0;
+}
+
+int virgl_encoder_render_condition(struct virgl_context *ctx,
+ uint32_t handle, boolean condition,
+ uint mode)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_RENDER_CONDITION, 0, VIRGL_RENDER_CONDITION_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_dword(ctx->cbuf, condition);
+ virgl_encoder_write_dword(ctx->cbuf, mode);
+ return 0;
+}
+
+int virgl_encoder_set_so_targets(struct virgl_context *ctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ unsigned append_bitmask)
+{
+ int i;
+
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_STREAMOUT_TARGETS, 0, num_targets + 1));
+ virgl_encoder_write_dword(ctx->cbuf, append_bitmask);
+ for (i = 0; i < num_targets; i++) {
+ struct virgl_so_target *tg = (struct virgl_so_target *)targets[i];
+ virgl_encoder_write_dword(ctx->cbuf, tg->handle);
+ }
+ return 0;
+}
+
+
+int virgl_encoder_set_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SUB_CTX, 0, 1));
+ virgl_encoder_write_dword(ctx->cbuf, sub_ctx_id);
+ return 0;
+}
+
+int virgl_encoder_create_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_SUB_CTX, 0, 1));
+ virgl_encoder_write_dword(ctx->cbuf, sub_ctx_id);
+ return 0;
+}
+
+int virgl_encoder_destroy_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_DESTROY_SUB_CTX, 0, 1));
+ virgl_encoder_write_dword(ctx->cbuf, sub_ctx_id);
+ return 0;
+}
+
+int virgl_encode_bind_shader(struct virgl_context *ctx,
+ uint32_t handle, uint32_t type)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BIND_SHADER, 0, 2));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_dword(ctx->cbuf, type);
+ return 0;
+}
diff --git a/src/gallium/drivers/virgl/virgl_encode.h b/src/gallium/drivers/virgl/virgl_encode.h
new file mode 100644
index 00000000000..eabc421f861
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_encode.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_ENCODE_H
+#define VIRGL_ENCODE_H
+
+#include "virgl_context.h"
+struct virgl_surface {
+ struct pipe_surface base;
+ uint32_t handle;
+};
+
+static inline void virgl_encoder_write_dword(struct virgl_cmd_buf *state,
+ uint32_t dword)
+{
+ state->buf[state->cdw++] = dword;
+}
+
+static inline void virgl_encoder_write_qword(struct virgl_cmd_buf *state,
+ uint64_t qword)
+{
+ memcpy(state->buf + state->cdw, &qword, sizeof(uint64_t));
+ state->cdw += 2;
+}
+
+static inline void virgl_encoder_write_block(struct virgl_cmd_buf *state,
+ const uint8_t *ptr, uint32_t len)
+{
+ int x;
+ memcpy(state->buf + state->cdw, ptr, len);
+ x = (len % 4);
+// fprintf(stderr, "[%d] block %d x is %d\n", state->cdw, len, x);
+ if (x) {
+ uint8_t *mp = (uint8_t *)(state->buf + state->cdw);
+ mp += len;
+ memset(mp, 0, x);
+ }
+ state->cdw += (len + 3) / 4;
+}
+
+extern int virgl_encode_blend_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_blend_state *blend_state);
+extern int virgl_encode_rasterizer_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_rasterizer_state *state);
+
+extern int virgl_encode_shader_state(struct virgl_context *ctx,
+ uint32_t handle,
+ uint32_t type,
+ const struct pipe_stream_output_info *so_info,
+ const struct tgsi_token *tokens);
+
+int virgl_encode_stream_output_info(struct virgl_context *ctx,
+ uint32_t handle,
+ uint32_t type,
+ const struct pipe_shader_state *shader);
+
+int virgl_encoder_set_so_targets(struct virgl_context *ctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ unsigned append_bitmask);
+
+int virgl_encoder_create_so_target(struct virgl_context *ctx,
+ uint32_t handle,
+ struct virgl_resource *res,
+ unsigned buffer_offset,
+ unsigned buffer_size);
+
+int virgl_encode_clear(struct virgl_context *ctx,
+ unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil);
+
+int virgl_encode_bind_object(struct virgl_context *ctx,
+ uint32_t handle, uint32_t object);
+int virgl_encode_delete_object(struct virgl_context *ctx,
+ uint32_t handle, uint32_t object);
+
+int virgl_encoder_set_framebuffer_state(struct virgl_context *ctx,
+ const struct pipe_framebuffer_state *state);
+int virgl_encoder_set_viewport_states(struct virgl_context *ctx,
+ int start_slot,
+ int num_viewports,
+ const struct pipe_viewport_state *states);
+
+int virgl_encoder_draw_vbo(struct virgl_context *ctx,
+ const struct pipe_draw_info *info);
+
+
+int virgl_encoder_create_surface(struct virgl_context *ctx,
+ uint32_t handle,
+ struct virgl_resource *res,
+ const struct pipe_surface *templat);
+
+int virgl_encoder_flush_frontbuffer(struct virgl_context *ctx,
+ struct virgl_resource *res);
+
+int virgl_encoder_create_vertex_elements(struct virgl_context *ctx,
+ uint32_t handle,
+ unsigned num_elements,
+ const struct pipe_vertex_element *element);
+
+int virgl_encoder_set_vertex_buffers(struct virgl_context *ctx,
+ unsigned num_buffers,
+ const struct pipe_vertex_buffer *buffers);
+
+
+int virgl_encoder_inline_write(struct virgl_context *ctx,
+ struct virgl_resource *res,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ const void *data, unsigned stride,
+ unsigned layer_stride);
+int virgl_encode_sampler_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_sampler_state *state);
+int virgl_encode_sampler_view(struct virgl_context *ctx,
+ uint32_t handle,
+ struct virgl_resource *res,
+ const struct pipe_sampler_view *state);
+
+int virgl_encode_set_sampler_views(struct virgl_context *ctx,
+ uint32_t shader_type,
+ uint32_t start_slot,
+ uint32_t num_views,
+ struct virgl_sampler_view **views);
+
+int virgl_encode_bind_sampler_states(struct virgl_context *ctx,
+ uint32_t shader_type,
+ uint32_t start_slot,
+ uint32_t num_handles,
+ uint32_t *handles);
+
+int virgl_encoder_set_index_buffer(struct virgl_context *ctx,
+ const struct pipe_index_buffer *ib);
+
+uint32_t virgl_object_assign_handle(void);
+
+int virgl_encoder_write_constant_buffer(struct virgl_context *ctx,
+ uint32_t shader,
+ uint32_t index,
+ uint32_t size,
+ const void *data);
+
+int virgl_encoder_set_uniform_buffer(struct virgl_context *ctx,
+ uint32_t shader,
+ uint32_t index,
+ uint32_t offset,
+ uint32_t length,
+ struct virgl_resource *res);
+int virgl_encode_dsa_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_depth_stencil_alpha_state *dsa_state);
+
+int virgl_encoder_set_stencil_ref(struct virgl_context *ctx,
+ const struct pipe_stencil_ref *ref);
+
+int virgl_encoder_set_blend_color(struct virgl_context *ctx,
+ const struct pipe_blend_color *color);
+
+int virgl_encoder_set_scissor_state(struct virgl_context *ctx,
+ unsigned start_slot,
+ int num_scissors,
+ const struct pipe_scissor_state *ss);
+
+void virgl_encoder_set_polygon_stipple(struct virgl_context *ctx,
+ const struct pipe_poly_stipple *ps);
+
+void virgl_encoder_set_sample_mask(struct virgl_context *ctx,
+ unsigned sample_mask);
+
+void virgl_encoder_set_clip_state(struct virgl_context *ctx,
+ const struct pipe_clip_state *clip);
+
+int virgl_encode_resource_copy_region(struct virgl_context *ctx,
+ struct virgl_resource *dst_res,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct virgl_resource *src_res,
+ unsigned src_level,
+ const struct pipe_box *src_box);
+
+int virgl_encode_blit(struct virgl_context *ctx,
+ struct virgl_resource *dst_res,
+ struct virgl_resource *src_res,
+ const struct pipe_blit_info *blit);
+
+int virgl_encoder_create_query(struct virgl_context *ctx,
+ uint32_t handle,
+ uint query_type,
+ uint query_index,
+ struct virgl_resource *res,
+ uint32_t offset);
+
+int virgl_encoder_begin_query(struct virgl_context *ctx,
+ uint32_t handle);
+int virgl_encoder_end_query(struct virgl_context *ctx,
+ uint32_t handle);
+int virgl_encoder_get_query_result(struct virgl_context *ctx,
+ uint32_t handle, boolean wait);
+
+int virgl_encoder_render_condition(struct virgl_context *ctx,
+ uint32_t handle, boolean condition,
+ uint mode);
+
+int virgl_encoder_set_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id);
+int virgl_encoder_create_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id);
+int virgl_encoder_destroy_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id);
+
+int virgl_encode_bind_shader(struct virgl_context *ctx,
+ uint32_t handle, uint32_t type);
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_protocol.h b/src/gallium/drivers/virgl/virgl_protocol.h
new file mode 100644
index 00000000000..ca3142f5f72
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_protocol.h
@@ -0,0 +1,468 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_PROTOCOL_H
+#define VIRGL_PROTOCOL_H
+
+#define VIRGL_QUERY_STATE_NEW 0
+#define VIRGL_QUERY_STATE_DONE 1
+#define VIRGL_QUERY_STATE_WAIT_HOST 2
+
+struct virgl_host_query_state {
+ uint32_t query_state;
+ uint32_t result_size;
+ uint64_t result;
+};
+
+enum virgl_object_type {
+ VIRGL_OBJECT_NULL,
+ VIRGL_OBJECT_BLEND,
+ VIRGL_OBJECT_RASTERIZER,
+ VIRGL_OBJECT_DSA,
+ VIRGL_OBJECT_SHADER,
+ VIRGL_OBJECT_VERTEX_ELEMENTS,
+ VIRGL_OBJECT_SAMPLER_VIEW,
+ VIRGL_OBJECT_SAMPLER_STATE,
+ VIRGL_OBJECT_SURFACE,
+ VIRGL_OBJECT_QUERY,
+ VIRGL_OBJECT_STREAMOUT_TARGET,
+ VIRGL_MAX_OBJECTS,
+};
+
+/* context cmds to be encoded in the command stream */
+enum virgl_context_cmd {
+ VIRGL_CCMD_NOP = 0,
+ VIRGL_CCMD_CREATE_OBJECT = 1,
+ VIRGL_CCMD_BIND_OBJECT,
+ VIRGL_CCMD_DESTROY_OBJECT,
+ VIRGL_CCMD_SET_VIEWPORT_STATE,
+ VIRGL_CCMD_SET_FRAMEBUFFER_STATE,
+ VIRGL_CCMD_SET_VERTEX_BUFFERS,
+ VIRGL_CCMD_CLEAR,
+ VIRGL_CCMD_DRAW_VBO,
+ VIRGL_CCMD_RESOURCE_INLINE_WRITE,
+ VIRGL_CCMD_SET_SAMPLER_VIEWS,
+ VIRGL_CCMD_SET_INDEX_BUFFER,
+ VIRGL_CCMD_SET_CONSTANT_BUFFER,
+ VIRGL_CCMD_SET_STENCIL_REF,
+ VIRGL_CCMD_SET_BLEND_COLOR,
+ VIRGL_CCMD_SET_SCISSOR_STATE,
+ VIRGL_CCMD_BLIT,
+ VIRGL_CCMD_RESOURCE_COPY_REGION,
+ VIRGL_CCMD_BIND_SAMPLER_STATES,
+ VIRGL_CCMD_BEGIN_QUERY,
+ VIRGL_CCMD_END_QUERY,
+ VIRGL_CCMD_GET_QUERY_RESULT,
+ VIRGL_CCMD_SET_POLYGON_STIPPLE,
+ VIRGL_CCMD_SET_CLIP_STATE,
+ VIRGL_CCMD_SET_SAMPLE_MASK,
+ VIRGL_CCMD_SET_STREAMOUT_TARGETS,
+ VIRGL_CCMD_SET_RENDER_CONDITION,
+ VIRGL_CCMD_SET_UNIFORM_BUFFER,
+
+ VIRGL_CCMD_SET_SUB_CTX,
+ VIRGL_CCMD_CREATE_SUB_CTX,
+ VIRGL_CCMD_DESTROY_SUB_CTX,
+ VIRGL_CCMD_BIND_SHADER,
+};
+
+/*
+ 8-bit cmd headers
+ 8-bit object type
+ 16-bit length
+*/
+
+#define VIRGL_CMD0(cmd, obj, len) ((cmd) | ((obj) << 8) | ((len) << 16))
+
+/* hw specification */
+#define VIRGL_MAX_COLOR_BUFS 8
+#define VIRGL_MAX_CLIP_PLANES 8
+
+#define VIRGL_OBJ_CREATE_HEADER 0
+#define VIRGL_OBJ_CREATE_HANDLE 1
+
+#define VIRGL_OBJ_BIND_HEADER 0
+#define VIRGL_OBJ_BIND_HANDLE 1
+
+#define VIRGL_OBJ_DESTROY_HANDLE 1
+
+/* some of these defines are a specification - not used in the code */
+/* bit offsets for blend state object */
+#define VIRGL_OBJ_BLEND_SIZE (VIRGL_MAX_COLOR_BUFS + 3)
+#define VIRGL_OBJ_BLEND_HANDLE 1
+#define VIRGL_OBJ_BLEND_S0 2
+#define VIRGL_OBJ_BLEND_S0_INDEPENDENT_BLEND_ENABLE(x) ((x) & 0x1 << 0)
+#define VIRGL_OBJ_BLEND_S0_LOGICOP_ENABLE(x) (((x) & 0x1) << 1)
+#define VIRGL_OBJ_BLEND_S0_DITHER(x) (((x) & 0x1) << 2)
+#define VIRGL_OBJ_BLEND_S0_ALPHA_TO_COVERAGE(x) (((x) & 0x1) << 3)
+#define VIRGL_OBJ_BLEND_S0_ALPHA_TO_ONE(x) (((x) & 0x1) << 4)
+#define VIRGL_OBJ_BLEND_S1 3
+#define VIRGL_OBJ_BLEND_S1_LOGICOP_FUNC(x) (((x) & 0xf) << 0)
+/* repeated once per number of cbufs */
+
+#define VIRGL_OBJ_BLEND_S2(cbuf) (4 + (cbuf))
+#define VIRGL_OBJ_BLEND_S2_RT_BLEND_ENABLE(x) (((x) & 0x1) << 0)
+#define VIRGL_OBJ_BLEND_S2_RT_RGB_FUNC(x) (((x) & 0x7) << 1)
+#define VIRGL_OBJ_BLEND_S2_RT_RGB_SRC_FACTOR(x) (((x) & 0x1f) << 4)
+#define VIRGL_OBJ_BLEND_S2_RT_RGB_DST_FACTOR(x) (((x) & 0x1f) << 9)
+#define VIRGL_OBJ_BLEND_S2_RT_ALPHA_FUNC(x) (((x) & 0x7) << 14)
+#define VIRGL_OBJ_BLEND_S2_RT_ALPHA_SRC_FACTOR(x) (((x) & 0x1f) << 17)
+#define VIRGL_OBJ_BLEND_S2_RT_ALPHA_DST_FACTOR(x) (((x) & 0x1f) << 22)
+#define VIRGL_OBJ_BLEND_S2_RT_COLORMASK(x) (((x) & 0xf) << 27)
+
+/* bit offsets for DSA state */
+#define VIRGL_OBJ_DSA_SIZE 5
+#define VIRGL_OBJ_DSA_HANDLE 1
+#define VIRGL_OBJ_DSA_S0 2
+#define VIRGL_OBJ_DSA_S0_DEPTH_ENABLE(x) (((x) & 0x1) << 0)
+#define VIRGL_OBJ_DSA_S0_DEPTH_WRITEMASK(x) (((x) & 0x1) << 1)
+#define VIRGL_OBJ_DSA_S0_DEPTH_FUNC(x) (((x) & 0x7) << 2)
+#define VIRGL_OBJ_DSA_S0_ALPHA_ENABLED(x) (((x) & 0x1) << 8)
+#define VIRGL_OBJ_DSA_S0_ALPHA_FUNC(x) (((x) & 0x7) << 9)
+#define VIRGL_OBJ_DSA_S1 3
+#define VIRGL_OBJ_DSA_S2 4
+#define VIRGL_OBJ_DSA_S1_STENCIL_ENABLED(x) (((x) & 0x1) << 0)
+#define VIRGL_OBJ_DSA_S1_STENCIL_FUNC(x) (((x) & 0x7) << 1)
+#define VIRGL_OBJ_DSA_S1_STENCIL_FAIL_OP(x) (((x) & 0x7) << 4)
+#define VIRGL_OBJ_DSA_S1_STENCIL_ZPASS_OP(x) (((x) & 0x7) << 7)
+#define VIRGL_OBJ_DSA_S1_STENCIL_ZFAIL_OP(x) (((x) & 0x7) << 10)
+#define VIRGL_OBJ_DSA_S1_STENCIL_VALUEMASK(x) (((x) & 0xff) << 13)
+#define VIRGL_OBJ_DSA_S1_STENCIL_WRITEMASK(x) (((x) & 0xff) << 21)
+#define VIRGL_OBJ_DSA_ALPHA_REF 5
+
+/* offsets for rasterizer state */
+#define VIRGL_OBJ_RS_SIZE 9
+#define VIRGL_OBJ_RS_HANDLE 1
+#define VIRGL_OBJ_RS_S0 2
+#define VIRGL_OBJ_RS_S0_FLATSHADE(x) (((x) & 0x1) << 0)
+#define VIRGL_OBJ_RS_S0_DEPTH_CLIP(x) (((x) & 0x1) << 1)
+#define VIRGL_OBJ_RS_S0_CLIP_HALFZ(x) (((x) & 0x1) << 2)
+#define VIRGL_OBJ_RS_S0_RASTERIZER_DISCARD(x) (((x) & 0x1) << 3)
+#define VIRGL_OBJ_RS_S0_FLATSHADE_FIRST(x) (((x) & 0x1) << 4)
+#define VIRGL_OBJ_RS_S0_LIGHT_TWOSIZE(x) (((x) & 0x1) << 5)
+#define VIRGL_OBJ_RS_S0_SPRITE_COORD_MODE(x) (((x) & 0x1) << 6)
+#define VIRGL_OBJ_RS_S0_POINT_QUAD_RASTERIZATION(x) (((x) & 0x1) << 7)
+#define VIRGL_OBJ_RS_S0_CULL_FACE(x) (((x) & 0x3) << 8)
+#define VIRGL_OBJ_RS_S0_FILL_FRONT(x) (((x) & 0x3) << 10)
+#define VIRGL_OBJ_RS_S0_FILL_BACK(x) (((x) & 0x3) << 12)
+#define VIRGL_OBJ_RS_S0_SCISSOR(x) (((x) & 0x1) << 14)
+#define VIRGL_OBJ_RS_S0_FRONT_CCW(x) (((x) & 0x1) << 15)
+#define VIRGL_OBJ_RS_S0_CLAMP_VERTEX_COLOR(x) (((x) & 0x1) << 16)
+#define VIRGL_OBJ_RS_S0_CLAMP_FRAGMENT_COLOR(x) (((x) & 0x1) << 17)
+#define VIRGL_OBJ_RS_S0_OFFSET_LINE(x) (((x) & 0x1) << 18)
+#define VIRGL_OBJ_RS_S0_OFFSET_POINT(x) (((x) & 0x1) << 19)
+#define VIRGL_OBJ_RS_S0_OFFSET_TRI(x) (((x) & 0x1) << 20)
+#define VIRGL_OBJ_RS_S0_POLY_SMOOTH(x) (((x) & 0x1) << 21)
+#define VIRGL_OBJ_RS_S0_POLY_STIPPLE_ENABLE(x) (((x) & 0x1) << 22)
+#define VIRGL_OBJ_RS_S0_POINT_SMOOTH(x) (((x) & 0x1) << 23)
+#define VIRGL_OBJ_RS_S0_POINT_SIZE_PER_VERTEX(x) (((x) & 0x1) << 24)
+#define VIRGL_OBJ_RS_S0_MULTISAMPLE(x) (((x) & 0x1) << 25)
+#define VIRGL_OBJ_RS_S0_LINE_SMOOTH(x) (((x) & 0x1) << 26)
+#define VIRGL_OBJ_RS_S0_LINE_STIPPLE_ENABLE(x) (((x) & 0x1) << 27)
+#define VIRGL_OBJ_RS_S0_LINE_LAST_PIXEL(x) (((x) & 0x1) << 28)
+#define VIRGL_OBJ_RS_S0_HALF_PIXEL_CENTER(x) (((x) & 0x1) << 29)
+#define VIRGL_OBJ_RS_S0_BOTTOM_EDGE_RULE(x) (((x) & 0x1) << 30)
+
+#define VIRGL_OBJ_RS_POINT_SIZE 3
+#define VIRGL_OBJ_RS_SPRITE_COORD_ENABLE 4
+#define VIRGL_OBJ_RS_S3 5
+
+#define VIRGL_OBJ_RS_S3_LINE_STIPPLE_PATTERN(x) (((x) & 0xffff) << 0)
+#define VIRGL_OBJ_RS_S3_LINE_STIPPLE_FACTOR(x) (((x) & 0xff) << 16)
+#define VIRGL_OBJ_RS_S3_CLIP_PLANE_ENABLE(x) (((x) & 0xff) << 24)
+#define VIRGL_OBJ_RS_LINE_WIDTH 6
+#define VIRGL_OBJ_RS_OFFSET_UNITS 7
+#define VIRGL_OBJ_RS_OFFSET_SCALE 8
+#define VIRGL_OBJ_RS_OFFSET_CLAMP 9
+
+#define VIRGL_OBJ_CLEAR_SIZE 8
+#define VIRGL_OBJ_CLEAR_BUFFERS 1
+#define VIRGL_OBJ_CLEAR_COLOR_0 2 /* color is 4 * u32/f32/i32 */
+#define VIRGL_OBJ_CLEAR_COLOR_1 3
+#define VIRGL_OBJ_CLEAR_COLOR_2 4
+#define VIRGL_OBJ_CLEAR_COLOR_3 5
+#define VIRGL_OBJ_CLEAR_DEPTH_0 6 /* depth is a double precision float */
+#define VIRGL_OBJ_CLEAR_DEPTH_1 7
+#define VIRGL_OBJ_CLEAR_STENCIL 8
+
+/* shader object */
+#define VIRGL_OBJ_SHADER_HDR_SIZE(nso) (5 + ((nso) ? (2 * nso) + 4 : 0))
+#define VIRGL_OBJ_SHADER_HANDLE 1
+#define VIRGL_OBJ_SHADER_TYPE 2
+#define VIRGL_OBJ_SHADER_OFFSET 3
+#define VIRGL_OBJ_SHADER_OFFSET_VAL(x) (((x) & 0x7fffffff) << 0)
+/* start contains full length in VAL - also implies continuations */
+/* continuation contains offset in VAL */
+#define VIRGL_OBJ_SHADER_OFFSET_CONT (0x1 << 31)
+#define VIRGL_OBJ_SHADER_NUM_TOKENS 4
+#define VIRGL_OBJ_SHADER_SO_NUM_OUTPUTS 5
+#define VIRGL_OBJ_SHADER_SO_STRIDE(x) (6 + (x))
+#define VIRGL_OBJ_SHADER_SO_OUTPUT0(x) (10 + (x * 2))
+#define VIRGL_OBJ_SHADER_SO_OUTPUT_REGISTER_INDEX(x) (((x) & 0xff) << 0)
+#define VIRGL_OBJ_SHADER_SO_OUTPUT_START_COMPONENT(x) (((x) & 0x3) << 8)
+#define VIRGL_OBJ_SHADER_SO_OUTPUT_NUM_COMPONENTS(x) (((x) & 0x7) << 10)
+#define VIRGL_OBJ_SHADER_SO_OUTPUT_BUFFER(x) (((x) & 0x7) << 13)
+#define VIRGL_OBJ_SHADER_SO_OUTPUT_DST_OFFSET(x) (((x) & 0xffff) << 16)
+#define VIRGL_OBJ_SHADER_SO_OUTPUT0_SO(x) (11 + (x * 2))
+#define VIRGL_OBJ_SHADER_SO_OUTPUT_STREAM(x) (((x) & 0x03) << 0)
+
+/* viewport state */
+#define VIRGL_SET_VIEWPORT_STATE_SIZE(num_viewports) ((6 * num_viewports) + 1)
+#define VIRGL_SET_VIEWPORT_START_SLOT 1
+#define VIRGL_SET_VIEWPORT_STATE_SCALE_0(x) (2 + (x * 6))
+#define VIRGL_SET_VIEWPORT_STATE_SCALE_1(x) (3 + (x * 6))
+#define VIRGL_SET_VIEWPORT_STATE_SCALE_2(x) (4 + (x * 6))
+#define VIRGL_SET_VIEWPORT_STATE_TRANSLATE_0(x) (5 + (x * 6))
+#define VIRGL_SET_VIEWPORT_STATE_TRANSLATE_1(x) (6 + (x * 6))
+#define VIRGL_SET_VIEWPORT_STATE_TRANSLATE_2(x) (7 + (x * 6))
+
+/* framebuffer state */
+#define VIRGL_SET_FRAMEBUFFER_STATE_SIZE(nr_cbufs) (nr_cbufs + 2)
+#define VIRGL_SET_FRAMEBUFFER_STATE_NR_CBUFS 1
+#define VIRGL_SET_FRAMEBUFFER_STATE_NR_ZSURF_HANDLE 2
+#define VIRGL_SET_FRAMEBUFFER_STATE_CBUF_HANDLE(x) ((x) + 3)
+
+/* vertex elements object */
+#define VIRGL_OBJ_VERTEX_ELEMENTS_SIZE(num_elements) (((num_elements) * 4) + 1)
+#define VIRGL_OBJ_VERTEX_ELEMENTS_HANDLE 1
+#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_SRC_OFFSET(x) (((x) * 4) + 2) /* repeated per VE */
+#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_INSTANCE_DIVISOR(x) (((x) * 4) + 3)
+#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_VERTEX_BUFFER_INDEX(x) (((x) * 4) + 4)
+#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_SRC_FORMAT(x) (((x) * 4) + 5)
+
+/* vertex buffers */
+#define VIRGL_SET_VERTEX_BUFFERS_SIZE(num_buffers) ((num_buffers) * 3)
+#define VIRGL_SET_VERTEX_BUFFER_STRIDE(x) (((x) * 3) + 1)
+#define VIRGL_SET_VERTEX_BUFFER_OFFSET(x) (((x) * 3) + 2)
+#define VIRGL_SET_VERTEX_BUFFER_HANDLE(x) (((x) * 3) + 3)
+
+/* index buffer */
+#define VIRGL_SET_INDEX_BUFFER_SIZE(ib) (((ib) ? 2 : 0) + 1)
+#define VIRGL_SET_INDEX_BUFFER_HANDLE 1
+#define VIRGL_SET_INDEX_BUFFER_INDEX_SIZE 2 /* only if sending an IB handle */
+#define VIRGL_SET_INDEX_BUFFER_OFFSET 3 /* only if sending an IB handle */
+
+/* constant buffer */
+#define VIRGL_SET_CONSTANT_BUFFER_SHADER_TYPE 1
+#define VIRGL_SET_CONSTANT_BUFFER_INDEX 2
+#define VIRGL_SET_CONSTANT_BUFFER_DATA_START 3
+
+#define VIRGL_SET_UNIFORM_BUFFER_SIZE 5
+#define VIRGL_SET_UNIFORM_BUFFER_SHADER_TYPE 1
+#define VIRGL_SET_UNIFORM_BUFFER_INDEX 2
+#define VIRGL_SET_UNIFORM_BUFFER_OFFSET 3
+#define VIRGL_SET_UNIFORM_BUFFER_LENGTH 4
+#define VIRGL_SET_UNIFORM_BUFFER_RES_HANDLE 5
+
+/* draw VBO */
+#define VIRGL_DRAW_VBO_SIZE 12
+#define VIRGL_DRAW_VBO_START 1
+#define VIRGL_DRAW_VBO_COUNT 2
+#define VIRGL_DRAW_VBO_MODE 3
+#define VIRGL_DRAW_VBO_INDEXED 4
+#define VIRGL_DRAW_VBO_INSTANCE_COUNT 5
+#define VIRGL_DRAW_VBO_INDEX_BIAS 6
+#define VIRGL_DRAW_VBO_START_INSTANCE 7
+#define VIRGL_DRAW_VBO_PRIMITIVE_RESTART 8
+#define VIRGL_DRAW_VBO_RESTART_INDEX 9
+#define VIRGL_DRAW_VBO_MIN_INDEX 10
+#define VIRGL_DRAW_VBO_MAX_INDEX 11
+#define VIRGL_DRAW_VBO_COUNT_FROM_SO 12
+
+/* create surface */
+#define VIRGL_OBJ_SURFACE_SIZE 5
+#define VIRGL_OBJ_SURFACE_HANDLE 1
+#define VIRGL_OBJ_SURFACE_RES_HANDLE 2
+#define VIRGL_OBJ_SURFACE_FORMAT 3
+#define VIRGL_OBJ_SURFACE_BUFFER_FIRST_ELEMENT 4
+#define VIRGL_OBJ_SURFACE_BUFFER_LAST_ELEMENT 5
+#define VIRGL_OBJ_SURFACE_TEXTURE_LEVEL 4
+#define VIRGL_OBJ_SURFACE_TEXTURE_LAYERS 5
+
+/* create streamout target */
+#define VIRGL_OBJ_STREAMOUT_SIZE 4
+#define VIRGL_OBJ_STREAMOUT_HANDLE 1
+#define VIRGL_OBJ_STREAMOUT_RES_HANDLE 2
+#define VIRGL_OBJ_STREAMOUT_BUFFER_OFFSET 3
+#define VIRGL_OBJ_STREAMOUT_BUFFER_SIZE 4
+
+/* sampler state */
+#define VIRGL_OBJ_SAMPLER_STATE_SIZE 9
+#define VIRGL_OBJ_SAMPLER_STATE_HANDLE 1
+#define VIRGL_OBJ_SAMPLER_STATE_S0 2
+#define VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_S(x) (((x) & 0x7) << 0)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_T(x) (((x) & 0x7) << 3)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_R(x) (((x) & 0x7) << 6)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_MIN_IMG_FILTER(x) (((x) & 0x3) << 9)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_MIN_MIP_FILTER(x) (((x) & 0x3) << 11)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_MAG_IMG_FILTER(x) (((x) & 0x3) << 13)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_MODE(x) (((x) & 0x1) << 15)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_FUNC(x) (((x) & 0x7) << 16)
+
+#define VIRGL_OBJ_SAMPLER_STATE_LOD_BIAS 3
+#define VIRGL_OBJ_SAMPLER_STATE_MIN_LOD 4
+#define VIRGL_OBJ_SAMPLER_STATE_MAX_LOD 5
+#define VIRGL_OBJ_SAMPLER_STATE_BORDER_COLOR(x) ((x) + 6) /* 6 - 9 */
+
+
+/* sampler view */
+#define VIRGL_OBJ_SAMPLER_VIEW_SIZE 6
+#define VIRGL_OBJ_SAMPLER_VIEW_HANDLE 1
+#define VIRGL_OBJ_SAMPLER_VIEW_RES_HANDLE 2
+#define VIRGL_OBJ_SAMPLER_VIEW_FORMAT 3
+#define VIRGL_OBJ_SAMPLER_VIEW_BUFFER_FIRST_ELEMENT 4
+#define VIRGL_OBJ_SAMPLER_VIEW_BUFFER_LAST_ELEMENT 5
+#define VIRGL_OBJ_SAMPLER_VIEW_TEXTURE_LAYER 4
+#define VIRGL_OBJ_SAMPLER_VIEW_TEXTURE_LEVEL 5
+#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE 6
+#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_R(x) (((x) & 0x7) << 0)
+#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_G(x) (((x) & 0x7) << 3)
+#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_B(x) (((x) & 0x7) << 6)
+#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_A(x) (((x) & 0x7) << 9)
+
+/* set sampler views */
+#define VIRGL_SET_SAMPLER_VIEWS_SIZE(num_views) ((num_views) + 2)
+#define VIRGL_SET_SAMPLER_VIEWS_SHADER_TYPE 1
+#define VIRGL_SET_SAMPLER_VIEWS_START_SLOT 2
+#define VIRGL_SET_SAMPLER_VIEWS_V0_HANDLE 3
+
+/* bind sampler states */
+#define VIRGL_BIND_SAMPLER_STATES(num_states) ((num_states) + 2)
+#define VIRGL_BIND_SAMPLER_STATES_SHADER_TYPE 1
+#define VIRGL_BIND_SAMPLER_STATES_START_SLOT 2
+#define VIRGL_BIND_SAMPLER_STATES_S0_HANDLE 3
+
+/* set stencil reference */
+#define VIRGL_SET_STENCIL_REF_SIZE 1
+#define VIRGL_SET_STENCIL_REF 1
+#define VIRGL_STENCIL_REF_VAL(f, s) ((f & 0xff) | (((s & 0xff) << 8)))
+
+/* set blend color */
+#define VIRGL_SET_BLEND_COLOR_SIZE 4
+#define VIRGL_SET_BLEND_COLOR(x) ((x) + 1)
+
+/* set scissor state */
+#define VIRGL_SET_SCISSOR_STATE_SIZE(x) (1 + 2 * x)
+#define VIRGL_SET_SCISSOR_START_SLOT 1
+#define VIRGL_SET_SCISSOR_MINX_MINY(x) (2 + (x * 2))
+#define VIRGL_SET_SCISSOR_MAXX_MAXY(x) (3 + (x * 2))
+
+/* resource copy region */
+#define VIRGL_CMD_RESOURCE_COPY_REGION_SIZE 13
+#define VIRGL_CMD_RCR_DST_RES_HANDLE 1
+#define VIRGL_CMD_RCR_DST_LEVEL 2
+#define VIRGL_CMD_RCR_DST_X 3
+#define VIRGL_CMD_RCR_DST_Y 4
+#define VIRGL_CMD_RCR_DST_Z 5
+#define VIRGL_CMD_RCR_SRC_RES_HANDLE 6
+#define VIRGL_CMD_RCR_SRC_LEVEL 7
+#define VIRGL_CMD_RCR_SRC_X 8
+#define VIRGL_CMD_RCR_SRC_Y 9
+#define VIRGL_CMD_RCR_SRC_Z 10
+#define VIRGL_CMD_RCR_SRC_W 11
+#define VIRGL_CMD_RCR_SRC_H 12
+#define VIRGL_CMD_RCR_SRC_D 13
+
+/* blit */
+#define VIRGL_CMD_BLIT_SIZE 21
+#define VIRGL_CMD_BLIT_S0 1
+#define VIRGL_CMD_BLIT_S0_MASK(x) (((x) & 0xff) << 0)
+#define VIRGL_CMD_BLIT_S0_FILTER(x) (((x) & 0x3) << 8)
+#define VIRGL_CMD_BLIT_S0_SCISSOR_ENABLE(x) (((x) & 0x1) << 10)
+#define VIRGL_CMD_BLIT_SCISSOR_MINX_MINY 2
+#define VIRGL_CMD_BLIT_SCISSOR_MAXX_MAXY 3
+#define VIRGL_CMD_BLIT_DST_RES_HANDLE 4
+#define VIRGL_CMD_BLIT_DST_LEVEL 5
+#define VIRGL_CMD_BLIT_DST_FORMAT 6
+#define VIRGL_CMD_BLIT_DST_X 7
+#define VIRGL_CMD_BLIT_DST_Y 8
+#define VIRGL_CMD_BLIT_DST_Z 9
+#define VIRGL_CMD_BLIT_DST_W 10
+#define VIRGL_CMD_BLIT_DST_H 11
+#define VIRGL_CMD_BLIT_DST_D 12
+#define VIRGL_CMD_BLIT_SRC_RES_HANDLE 13
+#define VIRGL_CMD_BLIT_SRC_LEVEL 14
+#define VIRGL_CMD_BLIT_SRC_FORMAT 15
+#define VIRGL_CMD_BLIT_SRC_X 16
+#define VIRGL_CMD_BLIT_SRC_Y 17
+#define VIRGL_CMD_BLIT_SRC_Z 18
+#define VIRGL_CMD_BLIT_SRC_W 19
+#define VIRGL_CMD_BLIT_SRC_H 20
+#define VIRGL_CMD_BLIT_SRC_D 21
+
+/* query object */
+#define VIRGL_OBJ_QUERY_SIZE 4
+#define VIRGL_OBJ_QUERY_HANDLE 1
+#define VIRGL_OBJ_QUERY_TYPE_INDEX 2
+#define VIRGL_OBJ_QUERY_TYPE(x) (x & 0xffff)
+#define VIRGL_OBJ_QUERY_INDEX(x) ((x & 0xffff) << 16)
+#define VIRGL_OBJ_QUERY_OFFSET 3
+#define VIRGL_OBJ_QUERY_RES_HANDLE 4
+
+#define VIRGL_QUERY_BEGIN_HANDLE 1
+
+#define VIRGL_QUERY_END_HANDLE 1
+
+#define VIRGL_QUERY_RESULT_HANDLE 1
+#define VIRGL_QUERY_RESULT_WAIT 2
+
+/* render condition */
+#define VIRGL_RENDER_CONDITION_SIZE 3
+#define VIRGL_RENDER_CONDITION_HANDLE 1
+#define VIRGL_RENDER_CONDITION_CONDITION 2
+#define VIRGL_RENDER_CONDITION_MODE 3
+
+/* resource inline write */
+#define VIRGL_RESOURCE_IW_RES_HANDLE 1
+#define VIRGL_RESOURCE_IW_LEVEL 2
+#define VIRGL_RESOURCE_IW_USAGE 3
+#define VIRGL_RESOURCE_IW_STRIDE 4
+#define VIRGL_RESOURCE_IW_LAYER_STRIDE 5
+#define VIRGL_RESOURCE_IW_X 6
+#define VIRGL_RESOURCE_IW_Y 7
+#define VIRGL_RESOURCE_IW_Z 8
+#define VIRGL_RESOURCE_IW_W 9
+#define VIRGL_RESOURCE_IW_H 10
+#define VIRGL_RESOURCE_IW_D 11
+#define VIRGL_RESOURCE_IW_DATA_START 12
+
+/* set streamout targets */
+#define VIRGL_SET_STREAMOUT_TARGETS_APPEND_BITMASK 1
+#define VIRGL_SET_STREAMOUT_TARGETS_H0 2
+
+/* set sample mask */
+#define VIRGL_SET_SAMPLE_MASK_SIZE 1
+#define VIRGL_SET_SAMPLE_MASK_MASK 1
+
+/* set clip state */
+#define VIRGL_SET_CLIP_STATE_SIZE 32
+#define VIRGL_SET_CLIP_STATE_C0 1
+
+/* polygon stipple */
+#define VIRGL_POLYGON_STIPPLE_SIZE 32
+#define VIRGL_POLYGON_STIPPLE_P0 1
+
+#define VIRGL_BIND_SHADER_SIZE 2
+#define VIRGL_BIND_SHADER_HANDLE 1
+#define VIRGL_BIND_SHADER_TYPE 2
+
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_public.h b/src/gallium/drivers/virgl/virgl_public.h
new file mode 100644
index 00000000000..6a2c11be320
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_public.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_PUBLIC_H
+#define VIRGL_PUBLIC_H
+
+struct pipe_screen;
+struct sw_winsys;
+struct virgl_winsys;
+
+struct pipe_screen *
+virgl_create_screen(struct virgl_winsys *vws);
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_query.c b/src/gallium/drivers/virgl/virgl_query.c
new file mode 100644
index 00000000000..f79be5f2f09
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_query.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "virgl_resource.h"
+#include "virgl_context.h"
+#include "virgl_encode.h"
+
+struct virgl_query {
+ uint32_t handle;
+ struct virgl_resource *buf;
+
+ unsigned index;
+ unsigned type;
+ unsigned result_size;
+ unsigned result_gotten_sent;
+};
+
+static void virgl_render_condition(struct pipe_context *ctx,
+ struct pipe_query *q,
+ boolean condition,
+ uint mode)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_query *query = (struct virgl_query *)q;
+ uint32_t handle = 0;
+ if (q)
+ handle = query->handle;
+ virgl_encoder_render_condition(vctx, handle, condition, mode);
+}
+
+static struct pipe_query *virgl_create_query(struct pipe_context *ctx,
+ unsigned query_type, unsigned index)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_query *query;
+ uint32_t handle;
+
+ query = CALLOC_STRUCT(virgl_query);
+ if (!query)
+ return NULL;
+
+ query->buf = (struct virgl_resource *)pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_STAGING, sizeof(struct virgl_host_query_state));
+ if (!query->buf) {
+ FREE(query);
+ return NULL;
+ }
+
+ handle = virgl_object_assign_handle();
+ query->type = query_type;
+ query->index = index;
+ query->handle = handle;
+ query->buf->clean = FALSE;
+ virgl_encoder_create_query(vctx, handle, query_type, index, query->buf, 0);
+
+ return (struct pipe_query *)query;
+}
+
+static void virgl_destroy_query(struct pipe_context *ctx,
+ struct pipe_query *q)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_query *query = (struct virgl_query *)q;
+
+ virgl_encode_delete_object(vctx, query->handle, VIRGL_OBJECT_QUERY);
+
+ pipe_resource_reference((struct pipe_resource **)&query->buf, NULL);
+ FREE(query);
+}
+
+static boolean virgl_begin_query(struct pipe_context *ctx,
+ struct pipe_query *q)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_query *query = (struct virgl_query *)q;
+
+ query->buf->clean = FALSE;
+ virgl_encoder_begin_query(vctx, query->handle);
+ return true;
+}
+
+static void virgl_end_query(struct pipe_context *ctx,
+ struct pipe_query *q)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_query *query = (struct virgl_query *)q;
+ struct pipe_box box;
+
+ uint32_t qs = VIRGL_QUERY_STATE_WAIT_HOST;
+ u_box_1d(0, 4, &box);
+ virgl_transfer_inline_write(ctx, &query->buf->u.b, 0, PIPE_TRANSFER_WRITE,
+ &box, &qs, 0, 0);
+
+
+ virgl_encoder_end_query(vctx, query->handle);
+}
+
+static boolean virgl_get_query_result(struct pipe_context *ctx,
+ struct pipe_query *q,
+ boolean wait,
+ union pipe_query_result *result)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_query *query = (struct virgl_query *)q;
+ struct pipe_transfer *transfer;
+ struct virgl_host_query_state *host_state;
+
+ /* ask host for query result */
+ if (!query->result_gotten_sent) {
+ query->result_gotten_sent = 1;
+ virgl_encoder_get_query_result(vctx, query->handle, 0);
+ ctx->flush(ctx, NULL, 0);
+ }
+
+ /* do we have to flush? */
+ /* now we can do the transfer to get the result back? */
+ remap:
+ host_state = pipe_buffer_map(ctx, &query->buf->u.b,
+ PIPE_TRANSFER_READ, &transfer);
+
+ if (host_state->query_state != VIRGL_QUERY_STATE_DONE) {
+ pipe_buffer_unmap(ctx, transfer);
+ if (wait)
+ goto remap;
+ else
+ return FALSE;
+ }
+
+ if (query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED)
+ result->u64 = host_state->result;
+ else
+ result->u64 = (uint32_t)host_state->result;
+
+ pipe_buffer_unmap(ctx, transfer);
+ query->result_gotten_sent = 0;
+ return TRUE;
+}
+
+void virgl_init_query_functions(struct virgl_context *vctx)
+{
+ vctx->base.render_condition = virgl_render_condition;
+ vctx->base.create_query = virgl_create_query;
+ vctx->base.destroy_query = virgl_destroy_query;
+ vctx->base.begin_query = virgl_begin_query;
+ vctx->base.end_query = virgl_end_query;
+ vctx->base.get_query_result = virgl_get_query_result;
+}
diff --git a/src/gallium/drivers/virgl/virgl_resource.c b/src/gallium/drivers/virgl/virgl_resource.c
new file mode 100644
index 00000000000..758dd6b3e4b
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_resource.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "util/u_inlines.h"
+#include "virgl_resource.h"
+#include "virgl_context.h"
+
+bool virgl_res_needs_flush_wait(struct virgl_context *vctx,
+ struct virgl_resource *res,
+ unsigned usage)
+{
+ struct virgl_screen *vs = virgl_screen(vctx->base.screen);
+
+ if ((!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) && vs->vws->res_is_referenced(vs->vws, vctx->cbuf, res->hw_res)) {
+ return true;
+ }
+ return false;
+}
+
+bool virgl_res_needs_readback(struct virgl_context *vctx,
+ struct virgl_resource *res,
+ unsigned usage)
+{
+ bool readback = true;
+ if (res->clean)
+ readback = false;
+ else if (usage & PIPE_TRANSFER_DISCARD_RANGE)
+ readback = false;
+ else if ((usage & (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) ==
+ (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT))
+ readback = false;
+ return readback;
+}
+
+static struct pipe_resource *virgl_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ struct virgl_screen *vs = (struct virgl_screen *)screen;
+ if (templ->target == PIPE_BUFFER)
+ return virgl_buffer_create(vs, templ);
+ else
+ return virgl_texture_create(vs, templ);
+}
+
+static struct pipe_resource *virgl_resource_from_handle(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ struct virgl_screen *vs = (struct virgl_screen *)screen;
+ if (templ->target == PIPE_BUFFER)
+ return NULL;
+ else
+ return virgl_texture_from_handle(vs, templ, whandle);
+}
+
+void virgl_init_screen_resource_functions(struct pipe_screen *screen)
+{
+ screen->resource_create = virgl_resource_create;
+ screen->resource_from_handle = virgl_resource_from_handle;
+ screen->resource_get_handle = u_resource_get_handle_vtbl;
+ screen->resource_destroy = u_resource_destroy_vtbl;
+}
+
+void virgl_init_context_resource_functions(struct pipe_context *ctx)
+{
+ ctx->transfer_map = u_transfer_map_vtbl;
+ ctx->transfer_flush_region = u_transfer_flush_region_vtbl;
+ ctx->transfer_unmap = u_transfer_unmap_vtbl;
+ ctx->transfer_inline_write = u_transfer_inline_write_vtbl;
+}
diff --git a/src/gallium/drivers/virgl/virgl_resource.h b/src/gallium/drivers/virgl/virgl_resource.h
new file mode 100644
index 00000000000..2d0bd8b6400
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_resource.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef VIRGL_RESOURCE_H
+#define VIRGL_RESOURCE_H
+
+#include "util/u_inlines.h"
+#include "util/u_range.h"
+#include "util/list.h"
+#include "util/u_transfer.h"
+
+#include "virgl_hw.h"
+#define VR_MAX_TEXTURE_2D_LEVELS 15
+
+struct virgl_screen;
+struct virgl_context;
+struct virgl_resource {
+ struct u_resource u;
+ struct virgl_hw_res *hw_res;
+ boolean clean;
+};
+
+struct virgl_buffer {
+ struct virgl_resource base;
+
+ struct list_head flush_list;
+ boolean on_list;
+
+ /* The buffer range which is initialized (with a write transfer,
+ * streamout, DMA, or as a random access target). The rest of
+ * the buffer is considered invalid and can be mapped unsynchronized.
+ *
+ * This allows unsychronized mapping of a buffer range which hasn't
+ * been used yet. It's for applications which forget to use
+ * the unsynchronized map flag and expect the driver to figure it out.
+ */
+ struct util_range valid_buffer_range;
+};
+
+struct virgl_texture {
+ struct virgl_resource base;
+
+ unsigned long level_offset[VR_MAX_TEXTURE_2D_LEVELS];
+ unsigned stride[VR_MAX_TEXTURE_2D_LEVELS];
+};
+
+struct virgl_transfer {
+ struct pipe_transfer base;
+ uint32_t offset;
+ struct virgl_resource *resolve_tmp;
+};
+
+void virgl_resource_destroy(struct pipe_screen *screen,
+ struct pipe_resource *resource);
+
+void virgl_init_screen_resource_functions(struct pipe_screen *screen);
+
+void virgl_init_context_resource_functions(struct pipe_context *ctx);
+
+struct pipe_resource *virgl_texture_create(struct virgl_screen *vs,
+ const struct pipe_resource *templ);
+
+struct pipe_resource *virgl_texture_from_handle(struct virgl_screen *vs,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle);
+
+static inline struct virgl_resource *virgl_resource(struct pipe_resource *r)
+{
+ return (struct virgl_resource *)r;
+}
+
+static inline struct virgl_buffer *virgl_buffer(struct pipe_resource *r)
+{
+ return (struct virgl_buffer *)r;
+}
+
+struct pipe_resource *virgl_buffer_create(struct virgl_screen *vs,
+ const struct pipe_resource *templ);
+
+static inline unsigned pipe_to_virgl_bind(unsigned pbind)
+{
+ unsigned outbind = 0;
+ if (pbind & PIPE_BIND_DEPTH_STENCIL)
+ outbind |= VIRGL_BIND_DEPTH_STENCIL;
+ if (pbind & PIPE_BIND_RENDER_TARGET)
+ outbind |= VIRGL_BIND_RENDER_TARGET;
+ if (pbind & PIPE_BIND_SAMPLER_VIEW)
+ outbind |= VIRGL_BIND_SAMPLER_VIEW;
+ if (pbind & PIPE_BIND_VERTEX_BUFFER)
+ outbind |= VIRGL_BIND_VERTEX_BUFFER;
+ if (pbind & PIPE_BIND_INDEX_BUFFER)
+ outbind |= VIRGL_BIND_INDEX_BUFFER;
+ if (pbind & PIPE_BIND_CONSTANT_BUFFER)
+ outbind |= VIRGL_BIND_CONSTANT_BUFFER;
+ if (pbind & PIPE_BIND_DISPLAY_TARGET)
+ outbind |= VIRGL_BIND_DISPLAY_TARGET;
+ if (pbind & PIPE_BIND_STREAM_OUTPUT)
+ outbind |= VIRGL_BIND_STREAM_OUTPUT;
+ if (pbind & PIPE_BIND_CURSOR)
+ outbind |= VIRGL_BIND_CURSOR;
+ if (pbind & PIPE_BIND_CUSTOM)
+ outbind |= VIRGL_BIND_CUSTOM;
+ if (pbind & PIPE_BIND_SCANOUT)
+ outbind |= VIRGL_BIND_SCANOUT;
+ return outbind;
+}
+
+bool virgl_res_needs_flush_wait(struct virgl_context *vctx,
+ struct virgl_resource *res,
+ unsigned usage);
+bool virgl_res_needs_readback(struct virgl_context *vctx,
+ struct virgl_resource *res,
+ unsigned usage);
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c
new file mode 100644
index 00000000000..2bc7f879487
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_screen.c
@@ -0,0 +1,557 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "util/u_video.h"
+#include "os/os_time.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "draw/draw_context.h"
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+
+#include "state_tracker/sw_winsys.h"
+#include "tgsi/tgsi_exec.h"
+
+#include "virgl.h"
+#include "virgl_resource.h"
+#include "virgl_public.h"
+#include "virgl_context.h"
+
+#define SP_MAX_TEXTURE_2D_LEVELS 15 /* 16K x 16K */
+#define SP_MAX_TEXTURE_3D_LEVELS 9 /* 512 x 512 x 512 */
+#define SP_MAX_TEXTURE_CUBE_LEVELS 13 /* 4K x 4K */
+
+static const char *
+virgl_get_vendor(struct pipe_screen *screen)
+{
+ return "Red Hat";
+}
+
+
+static const char *
+virgl_get_name(struct pipe_screen *screen)
+{
+ return "virgl";
+}
+
+static int
+virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ switch (param) {
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_SM3:
+ return 1;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return vscreen->caps.caps.v1.max_render_targets;
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ return vscreen->caps.caps.v1.max_dual_source_render_targets;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return vscreen->caps.caps.v1.bset.occlusion_query;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return vscreen->caps.caps.v1.bset.mirror_clamp;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return SP_MAX_TEXTURE_2D_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return SP_MAX_TEXTURE_3D_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return SP_MAX_TEXTURE_CUBE_LEVELS;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ return 1;
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ return vscreen->caps.caps.v1.bset.indep_blend_enable;
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return vscreen->caps.caps.v1.bset.indep_blend_func;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return vscreen->caps.caps.v1.bset.fragment_coord_conventions;
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ return vscreen->caps.caps.v1.bset.depth_clip_disable;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return vscreen->caps.caps.v1.max_streamout_buffers;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ return 16*4;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ return vscreen->caps.caps.v1.bset.primitive_restart;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return vscreen->caps.caps.v1.bset.shader_stencil_export;
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ return 1;
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ return vscreen->caps.caps.v1.bset.seamless_cube_map;
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ return vscreen->caps.caps.v1.bset.seamless_cube_map_per_texture;
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return vscreen->caps.caps.v1.max_texture_array_layers;
+ case PIPE_CAP_MIN_TEXEL_OFFSET:
+ case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+ return -8;
+ case PIPE_CAP_MAX_TEXEL_OFFSET:
+ case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+ return 7;
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ return vscreen->caps.caps.v1.bset.conditional_render;
+ case PIPE_CAP_TEXTURE_BARRIER:
+ return 0;
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+ return 1;
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+ return vscreen->caps.caps.v1.bset.color_clamping;
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ return 1;
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ return vscreen->caps.caps.v1.glsl_level;
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ return 0;
+ case PIPE_CAP_COMPUTE:
+ return 0;
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
+ return 0;
+ case PIPE_CAP_USER_INDEX_BUFFERS:
+ case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ return 1;
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return 16;
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ return vscreen->caps.caps.v1.bset.streamout_pause_resume;
+ case PIPE_CAP_START_INSTANCE:
+ return vscreen->caps.caps.v1.bset.start_instance;
+ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ return 0;
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ return 1;
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ return 0;
+ case PIPE_CAP_TGSI_TEXCOORD:
+ return 0;
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return VIRGL_MAP_BUFFER_ALIGNMENT;
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ return vscreen->caps.caps.v1.max_tbo_size > 0;
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ return 0;
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ return vscreen->caps.caps.v1.bset.cube_map_array;
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ return vscreen->caps.caps.v1.bset.texture_multisample;
+ case PIPE_CAP_MAX_VIEWPORTS:
+ return vscreen->caps.caps.v1.max_viewports;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ return vscreen->caps.caps.v1.max_tbo_size;
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ case PIPE_CAP_ENDIANNESS:
+ return 0;
+ case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+ return 1;
+ case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+ return 0;
+ case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+ return 1024;
+ case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+ return 16384;
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
+ return vscreen->caps.caps.v1.bset.texture_query_lod;
+ case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+ return vscreen->caps.caps.v1.max_texture_gather_components;
+ case PIPE_CAP_TEXTURE_GATHER_SM5:
+ case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ case PIPE_CAP_SAMPLE_SHADING:
+ case PIPE_CAP_FAKE_SW_MSAA:
+ case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+ case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+ case PIPE_CAP_MAX_VERTEX_STREAMS:
+ case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ case PIPE_CAP_CLIP_HALFZ:
+ case PIPE_CAP_VERTEXID_NOBASE:
+ case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+ case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
+ return 0;
+ case PIPE_CAP_VENDOR_ID:
+ return 0x1af4;
+ case PIPE_CAP_DEVICE_ID:
+ return 0x1010;
+ case PIPE_CAP_ACCELERATED:
+ return 1;
+ case PIPE_CAP_UMA:
+ case PIPE_CAP_VIDEO_MEMORY:
+ return 0;
+ }
+ /* should only get here on unhandled cases */
+ debug_printf("Unexpected PIPE_CAP %d query\n", param);
+ return 0;
+}
+
+static int
+virgl_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ switch(shader)
+ {
+ case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_GEOMETRY:
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return INT_MAX;
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (vscreen->caps.caps.v1.glsl_level < 150)
+ return 16;
+ return shader == PIPE_SHADER_VERTEX ? 16 : 32;
+ case PIPE_SHADER_CAP_MAX_OUTPUTS:
+ return 128;
+ // case PIPE_SHADER_CAP_MAX_CONSTS:
+ // return 4096;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return 256;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return vscreen->caps.caps.v1.max_uniform_blocks;
+ // case PIPE_SHADER_CAP_MAX_ADDRS:
+ // return 1;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return 0;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ return 16;
+ case PIPE_SHADER_CAP_INTEGERS:
+ return vscreen->caps.caps.v1.glsl_level >= 130;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return 32;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+ return 4096 * sizeof(float[4]);
+ default:
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+static float
+virgl_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
+{
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ return 255.0; /* arbitrary */
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 255.0; /* arbitrary */
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 16.0;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 16.0; /* arbitrary */
+ case PIPE_CAPF_GUARD_BAND_LEFT:
+ case PIPE_CAPF_GUARD_BAND_TOP:
+ case PIPE_CAPF_GUARD_BAND_RIGHT:
+ case PIPE_CAPF_GUARD_BAND_BOTTOM:
+ return 0.0;
+ }
+ /* should only get here on unhandled cases */
+ debug_printf("Unexpected PIPE_CAPF %d query\n", param);
+ return 0.0;
+}
+
+static boolean
+virgl_is_vertex_format_supported(struct pipe_screen *screen,
+ enum pipe_format format)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ const struct util_format_description *format_desc;
+ int i;
+
+ format_desc = util_format_description(format);
+ if (!format_desc)
+ return FALSE;
+
+ if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
+ int vformat = VIRGL_FORMAT_R11G11B10_FLOAT;
+ int big = vformat / 32;
+ int small = vformat % 32;
+ if (!(vscreen->caps.caps.v1.vertexbuffer.bitmask[big] & (1 << small)))
+ return FALSE;
+ return TRUE;
+ }
+
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ if (i == 4)
+ return FALSE;
+
+ if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+ return FALSE;
+
+ if (format_desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED)
+ return FALSE;
+ return TRUE;
+}
+
+/**
+ * Query format support for creating a texture, drawing surface, etc.
+ * \param format the format to test
+ * \param type one of PIPE_TEXTURE, PIPE_SURFACE
+ */
+static boolean
+virgl_is_format_supported( struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned bind)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ const struct util_format_description *format_desc;
+ int i;
+
+ assert(target == PIPE_BUFFER ||
+ target == PIPE_TEXTURE_1D ||
+ target == PIPE_TEXTURE_1D_ARRAY ||
+ target == PIPE_TEXTURE_2D ||
+ target == PIPE_TEXTURE_2D_ARRAY ||
+ target == PIPE_TEXTURE_RECT ||
+ target == PIPE_TEXTURE_3D ||
+ target == PIPE_TEXTURE_CUBE ||
+ target == PIPE_TEXTURE_CUBE_ARRAY);
+
+ format_desc = util_format_description(format);
+ if (!format_desc)
+ return FALSE;
+
+ if (util_format_is_intensity(format))
+ return FALSE;
+
+ if (sample_count > 1) {
+ if (!vscreen->caps.caps.v1.bset.texture_multisample)
+ return FALSE;
+ if (sample_count > vscreen->caps.caps.v1.max_samples)
+ return FALSE;
+ }
+
+ if (bind & PIPE_BIND_VERTEX_BUFFER) {
+ return virgl_is_vertex_format_supported(screen, format);
+ }
+
+ if (bind & PIPE_BIND_RENDER_TARGET) {
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+
+ /*
+ * Although possible, it is unnatural to render into compressed or YUV
+ * surfaces. So disable these here to avoid going into weird paths
+ * inside the state trackers.
+ */
+ if (format_desc->block.width != 1 ||
+ format_desc->block.height != 1)
+ return FALSE;
+
+ {
+ int big = format / 32;
+ int small = format % 32;
+ if (!(vscreen->caps.caps.v1.render.bitmask[big] & (1 << small)))
+ return FALSE;
+ }
+ }
+
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+ }
+
+ /*
+ * All other operations (sampling, transfer, etc).
+ */
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ if (util_format_s3tc_enabled)
+ goto out_lookup;
+ return FALSE;
+ }
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ goto out_lookup;
+ }
+
+ if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
+ goto out_lookup;
+ } else if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
+ goto out_lookup;
+ }
+
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ if (i == 4)
+ return FALSE;
+
+ /* no L4A4 */
+ if (format_desc->nr_channels < 4 && format_desc->channel[i].size == 4)
+ return FALSE;
+
+ out_lookup:
+ {
+ int big = format / 32;
+ int small = format % 32;
+ if (!(vscreen->caps.caps.v1.sampler.bitmask[big] & (1 << small)))
+ return FALSE;
+ }
+ /*
+ * Everything else should be supported by u_format.
+ */
+ return TRUE;
+}
+
+static void virgl_flush_frontbuffer(struct pipe_screen *screen,
+ struct pipe_resource *res,
+ unsigned level, unsigned layer,
+ void *winsys_drawable_handle, struct pipe_box *sub_box)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ struct virgl_winsys *vws = vscreen->vws;
+ struct virgl_resource *vres = (struct virgl_resource *)res;
+
+ if (vws->flush_frontbuffer)
+ vws->flush_frontbuffer(vws, vres->hw_res, level, layer, winsys_drawable_handle,
+ sub_box);
+}
+
+static void virgl_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ struct virgl_winsys *vws = vscreen->vws;
+
+ vws->fence_reference(vws, ptr, fence);
+}
+
+static boolean virgl_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ uint64_t timeout)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ struct virgl_winsys *vws = vscreen->vws;
+
+ return vws->fence_wait(vws, fence, timeout);
+}
+
+static uint64_t
+virgl_get_timestamp(struct pipe_screen *_screen)
+{
+ return os_time_get_nano();
+}
+
+static void
+virgl_destroy_screen(struct pipe_screen *screen)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ struct virgl_winsys *vws = vscreen->vws;
+
+ if (vws)
+ vws->destroy(vws);
+ FREE(vscreen);
+}
+
+struct pipe_screen *
+virgl_create_screen(struct virgl_winsys *vws)
+{
+ struct virgl_screen *screen = CALLOC_STRUCT(virgl_screen);
+
+ if (!screen)
+ return NULL;
+
+ screen->vws = vws;
+ screen->winsys = NULL;
+ screen->base.get_name = virgl_get_name;
+ screen->base.get_vendor = virgl_get_vendor;
+ screen->base.get_param = virgl_get_param;
+ screen->base.get_shader_param = virgl_get_shader_param;
+ screen->base.get_paramf = virgl_get_paramf;
+ screen->base.is_format_supported = virgl_is_format_supported;
+ screen->base.destroy = virgl_destroy_screen;
+ screen->base.context_create = virgl_context_create;
+ screen->base.flush_frontbuffer = virgl_flush_frontbuffer;
+ screen->base.get_timestamp = virgl_get_timestamp;
+ screen->base.fence_reference = virgl_fence_reference;
+ //screen->base.fence_signalled = virgl_fence_signalled;
+ screen->base.fence_finish = virgl_fence_finish;
+
+ virgl_init_screen_resource_functions(&screen->base);
+
+ vws->get_caps(vws, &screen->caps);
+
+
+ util_format_s3tc_init();
+ return &screen->base;
+}
diff --git a/src/gallium/drivers/virgl/virgl_streamout.c b/src/gallium/drivers/virgl/virgl_streamout.c
new file mode 100644
index 00000000000..95420f688d3
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_streamout.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "virgl_context.h"
+#include "virgl_encode.h"
+#include "virgl_resource.h"
+
+static struct pipe_stream_output_target *virgl_create_so_target(
+ struct pipe_context *ctx,
+ struct pipe_resource *buffer,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_resource *res = (struct virgl_resource *)buffer;
+ struct virgl_so_target *t = CALLOC_STRUCT(virgl_so_target);
+ uint32_t handle;
+
+ if (!t)
+ return NULL;
+ handle = virgl_object_assign_handle();
+
+ t->base.reference.count = 1;
+ t->base.context = ctx;
+ pipe_resource_reference(&t->base.buffer, buffer);
+ t->base.buffer_offset = buffer_offset;
+ t->base.buffer_size = buffer_size;
+ t->handle = handle;
+ res->clean = FALSE;
+ virgl_encoder_create_so_target(vctx, handle, res, buffer_offset, buffer_size);
+ return &t->base;
+}
+
+static void virgl_destroy_so_target(struct pipe_context *ctx,
+ struct pipe_stream_output_target *target)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_so_target *t = (struct virgl_so_target *)target;
+
+ pipe_resource_reference(&t->base.buffer, NULL);
+ virgl_encode_delete_object(vctx, t->handle, VIRGL_OBJECT_STREAMOUT_TARGET);
+ FREE(t);
+}
+
+static void virgl_set_so_targets(struct pipe_context *ctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offset)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ int i;
+ for (i = 0; i < num_targets; i++) {
+ pipe_resource_reference(&vctx->so_targets[i].base.buffer, targets[i]->buffer);
+ }
+ for (i = num_targets; i < vctx->num_so_targets; i++)
+ pipe_resource_reference(&vctx->so_targets[i].base.buffer, NULL);
+ vctx->num_so_targets = num_targets;
+ virgl_encoder_set_so_targets(vctx, num_targets, targets, 0);//append_bitmask);
+}
+
+void virgl_init_so_functions(struct virgl_context *vctx)
+{
+ vctx->base.create_stream_output_target = virgl_create_so_target;
+ vctx->base.stream_output_target_destroy = virgl_destroy_so_target;
+ vctx->base.set_stream_output_targets = virgl_set_so_targets;
+}
diff --git a/src/gallium/drivers/virgl/virgl_texture.c b/src/gallium/drivers/virgl/virgl_texture.c
new file mode 100644
index 00000000000..97d02eba567
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_texture.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "virgl.h"
+#include "virgl_resource.h"
+#include "virgl_context.h"
+
+static void virgl_copy_region_with_blit(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct pipe_blit_info blit;
+
+ memset(&blit, 0, sizeof(blit));
+ blit.src.resource = src;
+ blit.src.format = src->format;
+ blit.src.level = src_level;
+ blit.src.box = *src_box;
+ blit.dst.resource = dst;
+ blit.dst.format = dst->format;
+ blit.dst.level = dst_level;
+ blit.dst.box.x = dstx;
+ blit.dst.box.y = dsty;
+ blit.dst.box.z = dstz;
+ blit.dst.box.width = src_box->width;
+ blit.dst.box.height = src_box->height;
+ blit.dst.box.depth = src_box->depth;
+ blit.mask = util_format_get_mask(src->format) &
+ util_format_get_mask(dst->format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ if (blit.mask) {
+ pipe->blit(pipe, &blit);
+ }
+}
+static void virgl_init_temp_resource_from_box(struct pipe_resource *res,
+ struct pipe_resource *orig,
+ const struct pipe_box *box,
+ unsigned level, unsigned flags)
+{
+ memset(res, 0, sizeof(*res));
+ res->format = orig->format;
+ res->width0 = box->width;
+ res->height0 = box->height;
+ res->depth0 = 1;
+ res->array_size = 1;
+ res->usage = PIPE_USAGE_STAGING;
+ res->flags = flags;
+
+ /* We must set the correct texture target and dimensions for a 3D box. */
+ if (box->depth > 1 && util_max_layer(orig, level) > 0)
+ res->target = orig->target;
+ else
+ res->target = PIPE_TEXTURE_2D;
+
+ switch (res->target) {
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ res->array_size = box->depth;
+ break;
+ case PIPE_TEXTURE_3D:
+ res->depth0 = box->depth;
+ break;
+ default:
+ break;
+ }
+}
+
+static unsigned
+vrend_get_tex_image_offset(const struct virgl_texture *res,
+ unsigned level, unsigned layer)
+{
+ const struct pipe_resource *pres = &res->base.u.b;
+ const unsigned hgt = u_minify(pres->height0, level);
+ const unsigned nblocksy = util_format_get_nblocksy(pres->format, hgt);
+ unsigned offset = res->level_offset[level];
+
+ if (pres->target == PIPE_TEXTURE_CUBE ||
+ pres->target == PIPE_TEXTURE_CUBE_ARRAY ||
+ pres->target == PIPE_TEXTURE_3D ||
+ pres->target == PIPE_TEXTURE_2D_ARRAY) {
+ offset += layer * nblocksy * res->stride[level];
+ }
+ else if (pres->target == PIPE_TEXTURE_1D_ARRAY) {
+ offset += layer * res->stride[level];
+ }
+ else {
+ assert(layer == 0);
+ }
+
+ return offset;
+}
+
+static void *virgl_texture_transfer_map(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **transfer)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_screen *vs = virgl_screen(ctx->screen);
+ struct virgl_texture *vtex = (struct virgl_texture *)resource;
+ enum pipe_format format = resource->format;
+ struct virgl_transfer *trans;
+ void *ptr;
+ boolean readback = TRUE;
+ uint32_t offset;
+ struct virgl_hw_res *hw_res;
+ const unsigned h = u_minify(vtex->base.u.b.height0, level);
+ const unsigned nblocksy = util_format_get_nblocksy(format, h);
+ bool is_depth = util_format_has_depth(util_format_description(resource->format));
+ uint32_t l_stride;
+ bool doflushwait;
+
+ doflushwait = virgl_res_needs_flush_wait(vctx, &vtex->base, usage);
+ if (doflushwait)
+ ctx->flush(ctx, NULL, 0);
+
+ trans = util_slab_alloc(&vctx->texture_transfer_pool);
+ if (trans == NULL)
+ return NULL;
+
+ trans->base.resource = resource;
+ trans->base.level = level;
+ trans->base.usage = usage;
+ trans->base.box = *box;
+ trans->base.stride = vtex->stride[level];
+ trans->base.layer_stride = trans->base.stride * nblocksy;
+
+ if (resource->target != PIPE_TEXTURE_3D &&
+ resource->target != PIPE_TEXTURE_CUBE &&
+ resource->target != PIPE_TEXTURE_1D_ARRAY &&
+ resource->target != PIPE_TEXTURE_2D_ARRAY &&
+ resource->target != PIPE_TEXTURE_CUBE_ARRAY)
+ l_stride = 0;
+ else
+ l_stride = trans->base.layer_stride;
+
+ if (is_depth && resource->nr_samples > 1) {
+ struct pipe_resource tmp_resource;
+ virgl_init_temp_resource_from_box(&tmp_resource, resource, box,
+ level, 0);
+
+ trans->resolve_tmp = (struct virgl_resource *)ctx->screen->resource_create(ctx->screen, &tmp_resource);
+
+ virgl_copy_region_with_blit(ctx, &trans->resolve_tmp->u.b, 0, 0, 0, 0, resource, level, box);
+ ctx->flush(ctx, NULL, 0);
+ /* we want to do a resolve blit into the temporary */
+ hw_res = trans->resolve_tmp->hw_res;
+ offset = 0;
+ } else {
+ offset = vrend_get_tex_image_offset(vtex, level, box->z);
+
+ offset += box->y / util_format_get_blockheight(format) * trans->base.stride +
+ box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
+ hw_res = vtex->base.hw_res;
+ trans->resolve_tmp = NULL;
+ }
+
+ readback = virgl_res_needs_readback(vctx, &vtex->base, usage);
+ if (readback)
+ vs->vws->transfer_get(vs->vws, hw_res, box, trans->base.stride, l_stride, offset, level);
+
+ if (doflushwait || readback)
+ vs->vws->resource_wait(vs->vws, vtex->base.hw_res);
+
+ ptr = vs->vws->resource_map(vs->vws, hw_res);
+ if (!ptr) {
+ return NULL;
+ }
+
+ trans->offset = offset;
+ *transfer = &trans->base;
+
+ return ptr + trans->offset;
+}
+
+static void virgl_texture_transfer_unmap(struct pipe_context *ctx,
+ struct pipe_transfer *transfer)
+{
+ struct virgl_context *vctx = (struct virgl_context *)ctx;
+ struct virgl_transfer *trans = (struct virgl_transfer *)transfer;
+ struct virgl_texture *vtex = (struct virgl_texture *)transfer->resource;
+ uint32_t l_stride;
+
+ if (transfer->resource->target != PIPE_TEXTURE_3D &&
+ transfer->resource->target != PIPE_TEXTURE_CUBE &&
+ transfer->resource->target != PIPE_TEXTURE_1D_ARRAY &&
+ transfer->resource->target != PIPE_TEXTURE_2D_ARRAY &&
+ transfer->resource->target != PIPE_TEXTURE_CUBE_ARRAY)
+ l_stride = 0;
+ else
+ l_stride = trans->base.layer_stride;
+
+ if (trans->base.usage & PIPE_TRANSFER_WRITE) {
+ if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+ struct virgl_screen *vs = virgl_screen(ctx->screen);
+ vtex->base.clean = FALSE;
+ vctx->num_transfers++;
+ vs->vws->transfer_put(vs->vws, vtex->base.hw_res,
+ &transfer->box, trans->base.stride, l_stride, trans->offset, transfer->level);
+
+ }
+ }
+
+ if (trans->resolve_tmp)
+ pipe_resource_reference((struct pipe_resource **)&trans->resolve_tmp, NULL);
+
+ util_slab_free(&vctx->texture_transfer_pool, trans);
+}
+
+
+static boolean
+vrend_resource_layout(struct virgl_texture *res,
+ uint32_t *total_size)
+{
+ struct pipe_resource *pt = &res->base.u.b;
+ unsigned level;
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned depth = pt->depth0;
+ unsigned buffer_size = 0;
+
+ for (level = 0; level <= pt->last_level; level++) {
+ unsigned slices;
+
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ slices = 6;
+ else if (pt->target == PIPE_TEXTURE_3D)
+ slices = depth;
+ else
+ slices = pt->array_size;
+
+ res->stride[level] = util_format_get_stride(pt->format, width);
+ res->level_offset[level] = buffer_size;
+
+ buffer_size += (util_format_get_nblocksy(pt->format, height) *
+ slices * res->stride[level]);
+
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
+ }
+
+ if (pt->nr_samples <= 1)
+ *total_size = buffer_size;
+ else /* don't create guest backing store for MSAA */
+ *total_size = 0;
+ return TRUE;
+}
+
+static boolean virgl_texture_get_handle(struct pipe_screen *screen,
+ struct pipe_resource *ptex,
+ struct winsys_handle *whandle)
+{
+ struct virgl_screen *vs = virgl_screen(screen);
+ struct virgl_texture *vtex = (struct virgl_texture *)ptex;
+
+ return vs->vws->resource_get_handle(vs->vws, vtex->base.hw_res, vtex->stride[0], whandle);
+}
+
+static void virgl_texture_destroy(struct pipe_screen *screen,
+ struct pipe_resource *res)
+{
+ struct virgl_screen *vs = virgl_screen(screen);
+ struct virgl_texture *vtex = (struct virgl_texture *)res;
+ vs->vws->resource_unref(vs->vws, vtex->base.hw_res);
+ FREE(vtex);
+}
+
+static const struct u_resource_vtbl virgl_texture_vtbl =
+{
+ virgl_texture_get_handle, /* get_handle */
+ virgl_texture_destroy, /* resource_destroy */
+ virgl_texture_transfer_map, /* transfer_map */
+ NULL, /* transfer_flush_region */
+ virgl_texture_transfer_unmap, /* transfer_unmap */
+ NULL /* transfer_inline_write */
+};
+
+struct pipe_resource *
+virgl_texture_from_handle(struct virgl_screen *vs,
+ const struct pipe_resource *template,
+ struct winsys_handle *whandle)
+{
+ struct virgl_texture *tex;
+ uint32_t size;
+
+ tex = CALLOC_STRUCT(virgl_texture);
+ tex->base.u.b = *template;
+ tex->base.u.b.screen = &vs->base;
+ pipe_reference_init(&tex->base.u.b.reference, 1);
+ tex->base.u.vtbl = &virgl_texture_vtbl;
+ vrend_resource_layout(tex, &size);
+
+ tex->base.hw_res = vs->vws->resource_create_from_handle(vs->vws, whandle);
+ return &tex->base.u.b;
+}
+
+struct pipe_resource *virgl_texture_create(struct virgl_screen *vs,
+ const struct pipe_resource *template)
+{
+ struct virgl_texture *tex;
+ uint32_t size;
+ unsigned vbind;
+
+ tex = CALLOC_STRUCT(virgl_texture);
+ tex->base.clean = TRUE;
+ tex->base.u.b = *template;
+ tex->base.u.b.screen = &vs->base;
+ pipe_reference_init(&tex->base.u.b.reference, 1);
+ tex->base.u.vtbl = &virgl_texture_vtbl;
+ vrend_resource_layout(tex, &size);
+
+ vbind = pipe_to_virgl_bind(template->bind);
+ tex->base.hw_res = vs->vws->resource_create(vs->vws, template->target, template->format, vbind, template->width0, template->height0, template->depth0, template->array_size, template->last_level, template->nr_samples, size);
+ if (!tex->base.hw_res) {
+ FREE(tex);
+ return NULL;
+ }
+ return &tex->base.u.b;
+}
diff --git a/src/gallium/drivers/virgl/virgl_tgsi.c b/src/gallium/drivers/virgl/virgl_tgsi.c
new file mode 100644
index 00000000000..641b0b3e3b5
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_tgsi.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* the virgl hw tgsi vs what the current gallium want will diverge over time.
+ so add a transform stage to remove things we don't want to send unless
+ the receiver supports it.
+*/
+#include "tgsi/tgsi_transform.h"
+#include "virgl_context.h"
+struct virgl_transform_context {
+ struct tgsi_transform_context base;
+};
+
+/* for now just strip out the new properties the remote doesn't understand
+ yet */
+static void
+virgl_tgsi_transform_property(struct tgsi_transform_context *ctx,
+ struct tgsi_full_property *prop)
+{
+ switch (prop->Property.PropertyName) {
+ case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
+ case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
+ break;
+ default:
+ ctx->emit_property(ctx, prop);
+ break;
+ }
+}
+
+struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in)
+{
+
+ struct virgl_transform_context transform;
+ const uint newLen = tgsi_num_tokens(tokens_in);
+ struct tgsi_token *new_tokens;
+
+ new_tokens = tgsi_alloc_tokens(newLen);
+ if (!new_tokens)
+ return NULL;
+
+ memset(&transform, 0, sizeof(transform));
+ transform.base.transform_property = virgl_tgsi_transform_property;
+ tgsi_transform_shader(tokens_in, new_tokens, newLen, &transform.base);
+
+ return new_tokens;
+}
diff --git a/src/gallium/drivers/virgl/virgl_winsys.h b/src/gallium/drivers/virgl/virgl_winsys.h
new file mode 100644
index 00000000000..76d401b5c12
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_winsys.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_WINSYS_H
+#define VIRGL_WINSYS_H
+
+#include "pipe/p_compiler.h"
+
+struct winsys_handle;
+struct virgl_hw_res;
+
+#define VIRGL_MAX_CMDBUF_DWORDS (16*1024)
+
+struct virgl_drm_caps {
+ union virgl_caps caps;
+};
+
+struct virgl_cmd_buf {
+ unsigned cdw;
+ uint32_t *buf;
+};
+
+struct virgl_winsys {
+ unsigned pci_id;
+
+ void (*destroy)(struct virgl_winsys *vws);
+
+ int (*transfer_put)(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ const struct pipe_box *box,
+ uint32_t stride, uint32_t layer_stride,
+ uint32_t buf_offset, uint32_t level);
+
+ int (*transfer_get)(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ const struct pipe_box *box,
+ uint32_t stride, uint32_t layer_stride,
+ uint32_t buf_offset, uint32_t level);
+
+ struct virgl_hw_res *(*resource_create)(struct virgl_winsys *vws,
+ enum pipe_texture_target target,
+ uint32_t format, uint32_t bind,
+ uint32_t width, uint32_t height,
+ uint32_t depth, uint32_t array_size,
+ uint32_t last_level, uint32_t nr_samples,
+ uint32_t size);
+
+ void (*resource_unref)(struct virgl_winsys *vws, struct virgl_hw_res *res);
+
+ void *(*resource_map)(struct virgl_winsys *vws, struct virgl_hw_res *res);
+ void (*resource_wait)(struct virgl_winsys *vws, struct virgl_hw_res *res);
+
+ struct virgl_hw_res *(*resource_create_from_handle)(struct virgl_winsys *vws,
+ struct winsys_handle *whandle);
+ boolean (*resource_get_handle)(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ uint32_t stride,
+ struct winsys_handle *whandle);
+
+ struct virgl_cmd_buf *(*cmd_buf_create)(struct virgl_winsys *ws);
+ void (*cmd_buf_destroy)(struct virgl_cmd_buf *buf);
+
+ void (*emit_res)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf, struct virgl_hw_res *res, boolean write_buffer);
+ int (*submit_cmd)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf);
+
+ boolean (*res_is_referenced)(struct virgl_winsys *vws,
+ struct virgl_cmd_buf *buf,
+ struct virgl_hw_res *res);
+
+ int (*get_caps)(struct virgl_winsys *vws, struct virgl_drm_caps *caps);
+
+ /* fence */
+ struct pipe_fence_handle *(*cs_create_fence)(struct virgl_winsys *vws);
+ bool (*fence_wait)(struct virgl_winsys *vws,
+ struct pipe_fence_handle *fence,
+ uint64_t timeout);
+
+ void (*fence_reference)(struct virgl_winsys *vws,
+ struct pipe_fence_handle **dst,
+ struct pipe_fence_handle *src);
+
+ /* for sw paths */
+ void (*flush_frontbuffer)(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ unsigned level, unsigned layer,
+ void *winsys_drawable_handle,
+ struct pipe_box *sub_box);
+};
+
+
+#endif
diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am
index 7f945d14b5c..95efdd4451c 100644
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -83,6 +83,8 @@ include $(top_srcdir)/src/gallium/drivers/freedreno/Automake.inc
include $(top_srcdir)/src/gallium/drivers/vc4/Automake.inc
+include $(top_srcdir)/src/gallium/drivers/virgl/Automake.inc
+
include $(top_srcdir)/src/gallium/drivers/softpipe/Automake.inc
include $(top_srcdir)/src/gallium/drivers/llvmpipe/Automake.inc
diff --git a/src/gallium/winsys/virgl/drm/Makefile.am b/src/gallium/winsys/virgl/drm/Makefile.am
new file mode 100644
index 00000000000..49680e065be
--- /dev/null
+++ b/src/gallium/winsys/virgl/drm/Makefile.am
@@ -0,0 +1,34 @@
+# Copyright © 2015 Red Hat Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/drivers \
+ $(GALLIUM_CFLAGS) \
+ $(LIBDRM_CFLAGS)
+
+noinst_LTLIBRARIES = libvirgldrm.la
+
+libvirgldrm_la_SOURCES = $(C_SOURCES)
diff --git a/src/gallium/winsys/virgl/drm/Makefile.sources b/src/gallium/winsys/virgl/drm/Makefile.sources
new file mode 100644
index 00000000000..c0baed87c79
--- /dev/null
+++ b/src/gallium/winsys/virgl/drm/Makefile.sources
@@ -0,0 +1,2 @@
+C_SOURCES := \
+ virgl_drm_winsys.c
diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_public.h b/src/gallium/winsys/virgl/drm/virgl_drm_public.h
new file mode 100644
index 00000000000..be01021ca9a
--- /dev/null
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_public.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_DRM_PUBLIC_H
+#define VIRGL_DRM_PUBLIC_H
+
+struct virgl_winsys;
+
+struct virgl_winsys *virgl_drm_winsys_create(int drmFD);
+
+#endif
diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
new file mode 100644
index 00000000000..77972cfc358
--- /dev/null
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
@@ -0,0 +1,764 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+/* TODO - remove this */
+#define _FILE_OFFSET_BITS 64
+
+#include "virgl_drm_winsys.h"
+#include "virgl_drm_public.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "state_tracker/drm_driver.h"
+
+#include "os/os_mman.h"
+#include "os/os_time.h"
+#include
+#include
+#include
+#include
+#include
+#include "virtgpu_drm.h"
+
+static inline boolean can_cache_resource(struct virgl_hw_res *res)
+{
+ return res->cacheable == TRUE;
+}
+
+static void virgl_hw_res_destroy(struct virgl_drm_winsys *qdws,
+ struct virgl_hw_res *res)
+{
+ struct drm_gem_close args;
+
+ if (res->name) {
+ pipe_mutex_lock(qdws->bo_handles_mutex);
+ util_hash_table_remove(qdws->bo_handles,
+ (void *)(uintptr_t)res->name);
+ pipe_mutex_unlock(qdws->bo_handles_mutex);
+ }
+
+ if (res->ptr)
+ os_munmap(res->ptr, res->size);
+
+ args.handle = res->bo_handle;
+ drmIoctl(qdws->fd, DRM_IOCTL_GEM_CLOSE, &args);
+ FREE(res);
+}
+
+static boolean virgl_drm_resource_is_busy(struct virgl_drm_winsys *qdws, struct virgl_hw_res *res)
+{
+ struct drm_virtgpu_3d_wait waitcmd;
+ int ret;
+
+ waitcmd.handle = res->bo_handle;
+ waitcmd.flags = VIRTGPU_WAIT_NOWAIT;
+
+ ret = drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_WAIT, &waitcmd);
+ if (ret && errno == EBUSY)
+ return TRUE;
+ return FALSE;
+}
+
+static void
+virgl_cache_flush(struct virgl_drm_winsys *qdws)
+{
+ struct list_head *curr, *next;
+ struct virgl_hw_res *res;
+
+ pipe_mutex_lock(qdws->mutex);
+ curr = qdws->delayed.next;
+ next = curr->next;
+
+ while (curr != &qdws->delayed) {
+ res = LIST_ENTRY(struct virgl_hw_res, curr, head);
+ LIST_DEL(&res->head);
+ virgl_hw_res_destroy(qdws, res);
+ curr = next;
+ next = curr->next;
+ }
+ pipe_mutex_unlock(qdws->mutex);
+}
+static void
+virgl_drm_winsys_destroy(struct virgl_winsys *qws)
+{
+ struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws);
+
+ virgl_cache_flush(qdws);
+
+ util_hash_table_destroy(qdws->bo_handles);
+ pipe_mutex_destroy(qdws->bo_handles_mutex);
+ pipe_mutex_destroy(qdws->mutex);
+
+ FREE(qdws);
+}
+
+static void
+virgl_cache_list_check_free(struct virgl_drm_winsys *qdws)
+{
+ struct list_head *curr, *next;
+ struct virgl_hw_res *res;
+ int64_t now;
+
+ now = os_time_get();
+ curr = qdws->delayed.next;
+ next = curr->next;
+ while (curr != &qdws->delayed) {
+ res = LIST_ENTRY(struct virgl_hw_res, curr, head);
+ if (!os_time_timeout(res->start, res->end, now))
+ break;
+
+ LIST_DEL(&res->head);
+ virgl_hw_res_destroy(qdws, res);
+ curr = next;
+ next = curr->next;
+ }
+}
+
+static void virgl_drm_resource_reference(struct virgl_drm_winsys *qdws,
+ struct virgl_hw_res **dres,
+ struct virgl_hw_res *sres)
+{
+ struct virgl_hw_res *old = *dres;
+ if (pipe_reference(&(*dres)->reference, &sres->reference)) {
+
+ if (!can_cache_resource(old)) {
+ virgl_hw_res_destroy(qdws, old);
+ } else {
+ pipe_mutex_lock(qdws->mutex);
+ virgl_cache_list_check_free(qdws);
+
+ old->start = os_time_get();
+ old->end = old->start + qdws->usecs;
+ LIST_ADDTAIL(&old->head, &qdws->delayed);
+ qdws->num_delayed++;
+ pipe_mutex_unlock(qdws->mutex);
+ }
+ }
+ *dres = sres;
+}
+
+static struct virgl_hw_res *virgl_drm_winsys_resource_create(struct virgl_winsys *qws,
+ enum pipe_texture_target target,
+ uint32_t format,
+ uint32_t bind,
+ uint32_t width,
+ uint32_t height,
+ uint32_t depth,
+ uint32_t array_size,
+ uint32_t last_level,
+ uint32_t nr_samples,
+ uint32_t size)
+{
+ struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws);
+ struct drm_virtgpu_resource_create createcmd;
+ int ret;
+ struct virgl_hw_res *res;
+ uint32_t stride = width * util_format_get_blocksize(format);
+
+ res = CALLOC_STRUCT(virgl_hw_res);
+ if (!res)
+ return NULL;
+
+ createcmd.target = target;
+ createcmd.format = format;
+ createcmd.bind = bind;
+ createcmd.width = width;
+ createcmd.height = height;
+ createcmd.depth = depth;
+ createcmd.array_size = array_size;
+ createcmd.last_level = last_level;
+ createcmd.nr_samples = nr_samples;
+ createcmd.res_handle = 0;
+ createcmd.stride = stride;
+ createcmd.size = size;
+ createcmd.flags = 0;
+
+ ret = drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE, &createcmd);
+ if (ret != 0) {
+ FREE(res);
+ return NULL;
+ }
+
+ res->bind = bind;
+ res->format = format;
+
+ res->res_handle = createcmd.res_handle;
+ res->bo_handle = createcmd.bo_handle;
+ res->size = size;
+ res->stride = stride;
+ pipe_reference_init(&res->reference, 1);
+ res->num_cs_references = 0;
+ return res;
+}
+
+static inline int virgl_is_res_compat(struct virgl_drm_winsys *qdws,
+ struct virgl_hw_res *res,
+ uint32_t size, uint32_t bind, uint32_t format)
+{
+ if (res->bind != bind)
+ return 0;
+ if (res->format != format)
+ return 0;
+ if (res->size < size)
+ return 0;
+ if (res->size > size * 2)
+ return 0;
+
+ if (virgl_drm_resource_is_busy(qdws, res)) {
+ return -1;
+ }
+
+ return 1;
+}
+
+static int
+virgl_bo_transfer_put(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ const struct pipe_box *box,
+ uint32_t stride, uint32_t layer_stride,
+ uint32_t buf_offset, uint32_t level)
+{
+ struct virgl_drm_winsys *vdws = virgl_drm_winsys(vws);
+ struct drm_virtgpu_3d_transfer_to_host tohostcmd;
+ int ret;
+
+ tohostcmd.bo_handle = res->bo_handle;
+ tohostcmd.box = *(struct drm_virtgpu_3d_box *)box;
+ tohostcmd.offset = buf_offset;
+ tohostcmd.level = level;
+ // tohostcmd.stride = stride;
+ // tohostcmd.layer_stride = stride;
+ ret = drmIoctl(vdws->fd, DRM_IOCTL_VIRTGPU_TRANSFER_TO_HOST, &tohostcmd);
+ return ret;
+}
+
+static int
+virgl_bo_transfer_get(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ const struct pipe_box *box,
+ uint32_t stride, uint32_t layer_stride,
+ uint32_t buf_offset, uint32_t level)
+{
+ struct virgl_drm_winsys *vdws = virgl_drm_winsys(vws);
+ struct drm_virtgpu_3d_transfer_from_host fromhostcmd;
+ int ret;
+
+ fromhostcmd.bo_handle = res->bo_handle;
+ fromhostcmd.level = level;
+ fromhostcmd.offset = buf_offset;
+ // fromhostcmd.stride = stride;
+ // fromhostcmd.layer_stride = layer_stride;
+ fromhostcmd.box = *(struct drm_virtgpu_3d_box *)box;
+ ret = drmIoctl(vdws->fd, DRM_IOCTL_VIRTGPU_TRANSFER_FROM_HOST, &fromhostcmd);
+ return ret;
+}
+
+static struct virgl_hw_res *virgl_drm_winsys_resource_cache_create(struct virgl_winsys *qws,
+ enum pipe_texture_target target,
+ uint32_t format,
+ uint32_t bind,
+ uint32_t width,
+ uint32_t height,
+ uint32_t depth,
+ uint32_t array_size,
+ uint32_t last_level,
+ uint32_t nr_samples,
+ uint32_t size)
+{
+ struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws);
+ struct virgl_hw_res *res, *curr_res;
+ struct list_head *curr, *next;
+ int64_t now;
+ int ret;
+
+ /* only store binds for vertex/index/const buffers */
+ if (bind != VIRGL_BIND_CONSTANT_BUFFER && bind != VIRGL_BIND_INDEX_BUFFER &&
+ bind != VIRGL_BIND_VERTEX_BUFFER && bind != VIRGL_BIND_CUSTOM)
+ goto alloc;
+
+ pipe_mutex_lock(qdws->mutex);
+
+ res = NULL;
+ curr = qdws->delayed.next;
+ next = curr->next;
+
+ now = os_time_get();
+ while (curr != &qdws->delayed) {
+ curr_res = LIST_ENTRY(struct virgl_hw_res, curr, head);
+
+ if (!res && (ret = virgl_is_res_compat(qdws, curr_res, size, bind, format) > 0))
+ res = curr_res;
+ else if (os_time_timeout(curr_res->start, curr_res->end, now)) {
+ LIST_DEL(&curr_res->head);
+ virgl_hw_res_destroy(qdws, curr_res);
+ } else
+ break;
+
+ if (ret == -1)
+ break;
+
+ curr = next;
+ next = curr->next;
+ }
+
+ if (!res && ret != -1) {
+ while (curr != &qdws->delayed) {
+ curr_res = LIST_ENTRY(struct virgl_hw_res, curr, head);
+ ret = virgl_is_res_compat(qdws, curr_res, size, bind, format);
+ if (ret > 0) {
+ res = curr_res;
+ break;
+ }
+ if (ret == -1)
+ break;
+ curr = next;
+ next = curr->next;
+ }
+ }
+
+ if (res) {
+ LIST_DEL(&res->head);
+ --qdws->num_delayed;
+ pipe_mutex_unlock(qdws->mutex);
+ pipe_reference_init(&res->reference, 1);
+ return res;
+ }
+
+ pipe_mutex_unlock(qdws->mutex);
+
+alloc:
+ res = virgl_drm_winsys_resource_create(qws, target, format, bind,
+ width, height, depth, array_size,
+ last_level, nr_samples, size);
+ if (bind == VIRGL_BIND_CONSTANT_BUFFER || bind == VIRGL_BIND_INDEX_BUFFER ||
+ bind == VIRGL_BIND_VERTEX_BUFFER)
+ res->cacheable = TRUE;
+ return res;
+}
+
+static struct virgl_hw_res *virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws,
+ struct winsys_handle *whandle)
+{
+ struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws);
+ struct drm_gem_open open_arg = {};
+ struct drm_virtgpu_resource_info info_arg = {};
+ struct virgl_hw_res *res;
+
+ pipe_mutex_lock(qdws->bo_handles_mutex);
+
+ if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
+ res = util_hash_table_get(qdws->bo_handles, (void*)(uintptr_t)whandle->handle);
+ if (res) {
+ struct virgl_hw_res *r = NULL;
+ virgl_drm_resource_reference(qdws, &r, res);
+ goto done;
+ }
+ }
+
+ res = CALLOC_STRUCT(virgl_hw_res);
+ if (!res)
+ goto done;
+
+ if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
+ int r;
+ uint32_t handle;
+ r = drmPrimeFDToHandle(qdws->fd, whandle->handle, &handle);
+ if (r) {
+ FREE(res);
+ res = NULL;
+ goto done;
+ }
+ res->bo_handle = handle;
+ } else {
+ memset(&open_arg, 0, sizeof(open_arg));
+ open_arg.name = whandle->handle;
+ if (drmIoctl(qdws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
+ FREE(res);
+ res = NULL;
+ goto done;
+ }
+ res->bo_handle = open_arg.handle;
+ }
+ res->name = whandle->handle;
+
+ memset(&info_arg, 0, sizeof(info_arg));
+ info_arg.bo_handle = res->bo_handle;
+
+ if (drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_RESOURCE_INFO, &info_arg)) {
+ /* close */
+ FREE(res);
+ res = NULL;
+ goto done;
+ }
+
+ res->res_handle = info_arg.res_handle;
+
+ res->size = info_arg.size;
+ res->stride = info_arg.stride;
+ pipe_reference_init(&res->reference, 1);
+ res->num_cs_references = 0;
+
+ util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)whandle->handle, res);
+
+done:
+ pipe_mutex_unlock(qdws->bo_handles_mutex);
+ return res;
+}
+
+static boolean virgl_drm_winsys_resource_get_handle(struct virgl_winsys *qws,
+ struct virgl_hw_res *res,
+ uint32_t stride,
+ struct winsys_handle *whandle)
+ {
+ struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws);
+ struct drm_gem_flink flink;
+
+ if (!res)
+ return FALSE;
+ memset(&flink, 0, sizeof(flink));
+
+ if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
+ if (!res->flinked) {
+ flink.handle = res->bo_handle;
+
+ if (drmIoctl(qdws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
+ return FALSE;
+ }
+ res->flinked = TRUE;
+ res->flink = flink.name;
+
+ pipe_mutex_lock(qdws->bo_handles_mutex);
+ util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)res->flink, res);
+ pipe_mutex_unlock(qdws->bo_handles_mutex);
+ }
+ whandle->handle = res->flink;
+ } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
+ whandle->handle = res->bo_handle;
+ } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
+ if (drmPrimeHandleToFD(qdws->fd, res->bo_handle, DRM_CLOEXEC, (int*)&whandle->handle))
+ return FALSE;
+ }
+ whandle->stride = stride;
+ return TRUE;
+}
+
+static void virgl_drm_winsys_resource_unref(struct virgl_winsys *qws,
+ struct virgl_hw_res *hres)
+{
+ struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws);
+
+ virgl_drm_resource_reference(qdws, &hres, NULL);
+}
+
+static void *virgl_drm_resource_map(struct virgl_winsys *qws, struct virgl_hw_res *res)
+{
+ struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws);
+ struct drm_virtgpu_map mmap_arg;
+ void *ptr;
+
+ if (res->ptr)
+ return res->ptr;
+
+ mmap_arg.handle = res->bo_handle;
+ if (drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_MAP, &mmap_arg))
+ return NULL;
+
+ ptr = os_mmap(0, res->size, PROT_READ|PROT_WRITE, MAP_SHARED,
+ qdws->fd, mmap_arg.offset);
+ if (ptr == MAP_FAILED)
+ return NULL;
+
+ res->ptr = ptr;
+ return ptr;
+
+}
+
+static void virgl_drm_resource_wait(struct virgl_winsys *qws, struct virgl_hw_res *res)
+{
+ struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws);
+ struct drm_virtgpu_3d_wait waitcmd;
+ int ret;
+
+ waitcmd.handle = res->bo_handle;
+ waitcmd.flags = 0;
+ again:
+ ret = drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_WAIT, &waitcmd);
+ if (ret == -EAGAIN)
+ goto again;
+}
+
+static struct virgl_cmd_buf *virgl_drm_cmd_buf_create(struct virgl_winsys *qws)
+{
+ struct virgl_drm_cmd_buf *cbuf;
+
+ cbuf = CALLOC_STRUCT(virgl_drm_cmd_buf);
+ if (!cbuf)
+ return NULL;
+
+ cbuf->ws = qws;
+
+ cbuf->nres = 512;
+ cbuf->res_bo = (struct virgl_hw_res **)
+ CALLOC(cbuf->nres, sizeof(struct virgl_hw_buf*));
+ if (!cbuf->res_bo) {
+ FREE(cbuf);
+ return NULL;
+ }
+ cbuf->res_hlist = (uint32_t *)malloc(cbuf->nres * sizeof(uint32_t));
+ if (!cbuf->res_hlist) {
+ FREE(cbuf->res_bo);
+ FREE(cbuf);
+ return NULL;
+ }
+
+ cbuf->base.buf = cbuf->buf;
+ return &cbuf->base;
+}
+
+static void virgl_drm_cmd_buf_destroy(struct virgl_cmd_buf *_cbuf)
+{
+ struct virgl_drm_cmd_buf *cbuf = (struct virgl_drm_cmd_buf *)_cbuf;
+
+ FREE(cbuf->res_hlist);
+ FREE(cbuf->res_bo);
+ FREE(cbuf);
+
+}
+
+static boolean virgl_drm_lookup_res(struct virgl_drm_cmd_buf *cbuf,
+ struct virgl_hw_res *res)
+{
+ unsigned hash = res->res_handle & (sizeof(cbuf->is_handle_added)-1);
+ int i;
+
+ if (cbuf->is_handle_added[hash]) {
+ i = cbuf->reloc_indices_hashlist[hash];
+ if (cbuf->res_bo[i] == res)
+ return true;
+
+ for (i = 0; i < cbuf->cres; i++) {
+ if (cbuf->res_bo[i] == res) {
+ cbuf->reloc_indices_hashlist[hash] = i;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static void virgl_drm_add_res(struct virgl_drm_winsys *qdws,
+ struct virgl_drm_cmd_buf *cbuf, struct virgl_hw_res *res)
+{
+ unsigned hash = res->res_handle & (sizeof(cbuf->is_handle_added)-1);
+
+ if (cbuf->cres > cbuf->nres) {
+ fprintf(stderr,"failure to add relocation\n");
+ return;
+ }
+
+ cbuf->res_bo[cbuf->cres] = NULL;
+ virgl_drm_resource_reference(qdws, &cbuf->res_bo[cbuf->cres], res);
+ cbuf->res_hlist[cbuf->cres] = res->bo_handle;
+ cbuf->is_handle_added[hash] = TRUE;
+
+ cbuf->reloc_indices_hashlist[hash] = cbuf->cres;
+ p_atomic_inc(&res->num_cs_references);
+ cbuf->cres++;
+}
+
+static void virgl_drm_release_all_res(struct virgl_drm_winsys *qdws,
+ struct virgl_drm_cmd_buf *cbuf)
+{
+ int i;
+
+ for (i = 0; i < cbuf->cres; i++) {
+ p_atomic_dec(&cbuf->res_bo[i]->num_cs_references);
+ virgl_drm_resource_reference(qdws, &cbuf->res_bo[i], NULL);
+ }
+ cbuf->cres = 0;
+}
+
+static void virgl_drm_emit_res(struct virgl_winsys *qws,
+ struct virgl_cmd_buf *_cbuf, struct virgl_hw_res *res, boolean write_buf)
+{
+ struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws);
+ struct virgl_drm_cmd_buf *cbuf = (struct virgl_drm_cmd_buf *)_cbuf;
+ boolean already_in_list = virgl_drm_lookup_res(cbuf, res);
+
+ if (write_buf)
+ cbuf->base.buf[cbuf->base.cdw++] = res->res_handle;
+
+ if (!already_in_list)
+ virgl_drm_add_res(qdws, cbuf, res);
+}
+
+static boolean virgl_drm_res_is_ref(struct virgl_winsys *qws,
+ struct virgl_cmd_buf *_cbuf,
+ struct virgl_hw_res *res)
+{
+ if (!res->num_cs_references)
+ return FALSE;
+
+ return TRUE;
+}
+
+static int virgl_drm_winsys_submit_cmd(struct virgl_winsys *qws, struct virgl_cmd_buf *_cbuf)
+{
+ struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws);
+ struct virgl_drm_cmd_buf *cbuf = (struct virgl_drm_cmd_buf *)_cbuf;
+ struct drm_virtgpu_execbuffer eb;
+ int ret;
+
+ if (cbuf->base.cdw == 0)
+ return 0;
+
+ memset(&eb, 0, sizeof(struct drm_virtgpu_execbuffer));
+ eb.command = (unsigned long)(void*)cbuf->buf;
+ eb.size = cbuf->base.cdw * 4;
+ eb.num_bo_handles = cbuf->cres;
+ eb.bo_handles = (unsigned long)(void *)cbuf->res_hlist;
+
+ ret = drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &eb);
+ if (ret == -1)
+ fprintf(stderr,"got error from kernel - expect bad rendering %d\n", errno);
+ cbuf->base.cdw = 0;
+
+ virgl_drm_release_all_res(qdws, cbuf);
+
+ memset(cbuf->is_handle_added, 0, sizeof(cbuf->is_handle_added));
+ return ret;
+}
+
+static int virgl_drm_get_caps(struct virgl_winsys *vws, struct virgl_drm_caps *caps)
+{
+ struct virgl_drm_winsys *vdws = virgl_drm_winsys(vws);
+ struct drm_virtgpu_get_caps args;
+ int ret;
+
+ memset(&args, 0, sizeof(args));
+
+ args.cap_set_id = 1;
+ args.addr = (unsigned long)&caps->caps;
+ args.size = sizeof(union virgl_caps);
+ ret = drmIoctl(vdws->fd, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
+ return ret;
+}
+
+#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
+
+static unsigned handle_hash(void *key)
+{
+ return PTR_TO_UINT(key);
+}
+
+static int handle_compare(void *key1, void *key2)
+{
+ return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
+}
+
+static struct pipe_fence_handle *
+virgl_cs_create_fence(struct virgl_winsys *vws)
+{
+ struct virgl_hw_res *res;
+
+ res = virgl_drm_winsys_resource_cache_create(vws,
+ PIPE_BUFFER,
+ PIPE_FORMAT_R8_UNORM,
+ VIRGL_BIND_CUSTOM,
+ 8, 1, 1, 0, 0, 0, 8);
+
+ return (struct pipe_fence_handle *)res;
+}
+
+static bool virgl_fence_wait(struct virgl_winsys *vws,
+ struct pipe_fence_handle *fence,
+ uint64_t timeout)
+{
+ struct virgl_drm_winsys *vdws = virgl_drm_winsys(vws);
+ struct virgl_hw_res *res = (struct virgl_hw_res *)fence;
+
+ if (timeout == 0)
+ return virgl_drm_resource_is_busy(vdws, res);
+
+ if (timeout != PIPE_TIMEOUT_INFINITE) {
+ int64_t start_time = os_time_get();
+ timeout /= 1000;
+ while (virgl_drm_resource_is_busy(vdws, res)) {
+ if (os_time_get() - start_time >= timeout)
+ return FALSE;
+ os_time_sleep(10);
+ }
+ return TRUE;
+ }
+ virgl_drm_resource_wait(vws, res);
+ return TRUE;
+}
+
+static void virgl_fence_reference(struct virgl_winsys *vws,
+ struct pipe_fence_handle **dst,
+ struct pipe_fence_handle *src)
+{
+ struct virgl_drm_winsys *vdws = virgl_drm_winsys(vws);
+ virgl_drm_resource_reference(vdws, (struct virgl_hw_res **)dst,
+ (struct virgl_hw_res *)src);
+}
+
+
+struct virgl_winsys *
+virgl_drm_winsys_create(int drmFD)
+{
+ struct virgl_drm_winsys *qdws;
+
+ qdws = CALLOC_STRUCT(virgl_drm_winsys);
+ if (!qdws)
+ return NULL;
+
+ qdws->fd = drmFD;
+ qdws->num_delayed = 0;
+ qdws->usecs = 1000000;
+ LIST_INITHEAD(&qdws->delayed);
+ pipe_mutex_init(qdws->mutex);
+ pipe_mutex_init(qdws->bo_handles_mutex);
+ qdws->bo_handles = util_hash_table_create(handle_hash, handle_compare);
+ qdws->base.destroy = virgl_drm_winsys_destroy;
+
+ qdws->base.transfer_put = virgl_bo_transfer_put;
+ qdws->base.transfer_get = virgl_bo_transfer_get;
+ qdws->base.resource_create = virgl_drm_winsys_resource_cache_create;
+ qdws->base.resource_unref = virgl_drm_winsys_resource_unref;
+ qdws->base.resource_create_from_handle = virgl_drm_winsys_resource_create_handle;
+ qdws->base.resource_get_handle = virgl_drm_winsys_resource_get_handle;
+ qdws->base.resource_map = virgl_drm_resource_map;
+ qdws->base.resource_wait = virgl_drm_resource_wait;
+ qdws->base.cmd_buf_create = virgl_drm_cmd_buf_create;
+ qdws->base.cmd_buf_destroy = virgl_drm_cmd_buf_destroy;
+ qdws->base.submit_cmd = virgl_drm_winsys_submit_cmd;
+ qdws->base.emit_res = virgl_drm_emit_res;
+ qdws->base.res_is_referenced = virgl_drm_res_is_ref;
+
+ qdws->base.cs_create_fence = virgl_cs_create_fence;
+ qdws->base.fence_wait = virgl_fence_wait;
+ qdws->base.fence_reference = virgl_fence_reference;
+
+ qdws->base.get_caps = virgl_drm_get_caps;
+ return &qdws->base;
+
+}
diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h
new file mode 100644
index 00000000000..c9b25a00977
--- /dev/null
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_DRM_WINSYS_H
+#define VIRGL_DRM_WINSYS_H
+
+#include
+#include "pipe/p_compiler.h"
+#include "drm.h"
+
+#include "os/os_thread.h"
+#include "util/list.h"
+#include "util/u_inlines.h"
+#include "util/u_hash_table.h"
+
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+#include "pipe/p_context.h"
+
+#include "virgl_hw.h"
+#include "virgl/virgl_winsys.h"
+
+struct virgl_drm_winsys;
+
+struct virgl_hw_res {
+ struct pipe_reference reference;
+ uint32_t res_handle;
+ uint32_t bo_handle;
+ uint32_t name;
+ int num_cs_references;
+ uint32_t size;
+ void *ptr;
+ uint32_t stride;
+
+ struct list_head head;
+ uint32_t format;
+ uint32_t bind;
+ boolean cacheable;
+ int64_t start, end;
+ boolean flinked;
+ uint32_t flink;
+};
+
+struct virgl_drm_winsys
+{
+ struct virgl_winsys base;
+ int fd;
+ struct list_head delayed;
+ int num_delayed;
+ unsigned usecs;
+ pipe_mutex mutex;
+
+ struct util_hash_table *bo_handles;
+ pipe_mutex bo_handles_mutex;
+};
+
+struct virgl_drm_cmd_buf {
+ struct virgl_cmd_buf base;
+
+ uint32_t buf[VIRGL_MAX_CMDBUF_DWORDS];
+
+ unsigned nres;
+ unsigned cres;
+ struct virgl_hw_res **res_bo;
+ struct virgl_winsys *ws;
+ uint32_t *res_hlist;
+
+ char is_handle_added[512];
+ unsigned reloc_indices_hashlist[512];
+
+};
+
+static inline struct virgl_drm_winsys *
+virgl_drm_winsys(struct virgl_winsys *iws)
+{
+ return (struct virgl_drm_winsys *)iws;
+}
+
+#endif
diff --git a/src/gallium/winsys/virgl/drm/virgl_hw.h b/src/gallium/winsys/virgl/drm/virgl_hw.h
new file mode 100644
index 00000000000..e3c56db2ac6
--- /dev/null
+++ b/src/gallium/winsys/virgl/drm/virgl_hw.h
@@ -0,0 +1,286 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_HW_H
+#define VIRGL_HW_H
+
+struct virgl_box {
+ uint32_t x, y, z;
+ uint32_t w, h, d;
+};
+
+/* formats known by the HW device - based on gallium subset */
+enum virgl_formats {
+ VIRGL_FORMAT_B8G8R8A8_UNORM = 1,
+ VIRGL_FORMAT_B8G8R8X8_UNORM = 2,
+ VIRGL_FORMAT_A8R8G8B8_UNORM = 3,
+ VIRGL_FORMAT_X8R8G8B8_UNORM = 4,
+ VIRGL_FORMAT_B5G5R5A1_UNORM = 5,
+ VIRGL_FORMAT_B4G4R4A4_UNORM = 6,
+ VIRGL_FORMAT_B5G6R5_UNORM = 7,
+ VIRGL_FORMAT_L8_UNORM = 9, /**< ubyte luminance */
+ VIRGL_FORMAT_A8_UNORM = 10, /**< ubyte alpha */
+ VIRGL_FORMAT_L8A8_UNORM = 12, /**< ubyte alpha, luminance */
+ VIRGL_FORMAT_L16_UNORM = 13, /**< ushort luminance */
+
+ VIRGL_FORMAT_Z16_UNORM = 16,
+ VIRGL_FORMAT_Z32_UNORM = 17,
+ VIRGL_FORMAT_Z32_FLOAT = 18,
+ VIRGL_FORMAT_Z24_UNORM_S8_UINT = 19,
+ VIRGL_FORMAT_S8_UINT_Z24_UNORM = 20,
+ VIRGL_FORMAT_Z24X8_UNORM = 21,
+ VIRGL_FORMAT_S8_UINT = 23, /**< ubyte stencil */
+
+ VIRGL_FORMAT_R32_FLOAT = 28,
+ VIRGL_FORMAT_R32G32_FLOAT = 29,
+ VIRGL_FORMAT_R32G32B32_FLOAT = 30,
+ VIRGL_FORMAT_R32G32B32A32_FLOAT = 31,
+
+ VIRGL_FORMAT_R16_UNORM = 48,
+ VIRGL_FORMAT_R16G16_UNORM = 49,
+
+ VIRGL_FORMAT_R16G16B16A16_UNORM = 51,
+
+ VIRGL_FORMAT_R16_SNORM = 56,
+ VIRGL_FORMAT_R16G16_SNORM = 57,
+ VIRGL_FORMAT_R16G16B16A16_SNORM = 59,
+
+ VIRGL_FORMAT_R8_UNORM = 64,
+ VIRGL_FORMAT_R8G8_UNORM = 65,
+
+ VIRGL_FORMAT_R8G8B8A8_UNORM = 67,
+
+ VIRGL_FORMAT_R8_SNORM = 74,
+ VIRGL_FORMAT_R8G8_SNORM = 75,
+ VIRGL_FORMAT_R8G8B8_SNORM = 76,
+ VIRGL_FORMAT_R8G8B8A8_SNORM = 77,
+
+ VIRGL_FORMAT_R16_FLOAT = 91,
+ VIRGL_FORMAT_R16G16_FLOAT = 92,
+ VIRGL_FORMAT_R16G16B16_FLOAT = 93,
+ VIRGL_FORMAT_R16G16B16A16_FLOAT = 94,
+
+ VIRGL_FORMAT_L8_SRGB = 95,
+ VIRGL_FORMAT_L8A8_SRGB = 96,
+ VIRGL_FORMAT_B8G8R8A8_SRGB = 100,
+ VIRGL_FORMAT_B8G8R8X8_SRGB = 101,
+
+ /* compressed formats */
+ VIRGL_FORMAT_DXT1_RGB = 105,
+ VIRGL_FORMAT_DXT1_RGBA = 106,
+ VIRGL_FORMAT_DXT3_RGBA = 107,
+ VIRGL_FORMAT_DXT5_RGBA = 108,
+
+ /* sRGB, compressed */
+ VIRGL_FORMAT_DXT1_SRGB = 109,
+ VIRGL_FORMAT_DXT1_SRGBA = 110,
+ VIRGL_FORMAT_DXT3_SRGBA = 111,
+ VIRGL_FORMAT_DXT5_SRGBA = 112,
+
+ /* rgtc compressed */
+ VIRGL_FORMAT_RGTC1_UNORM = 113,
+ VIRGL_FORMAT_RGTC1_SNORM = 114,
+ VIRGL_FORMAT_RGTC2_UNORM = 115,
+ VIRGL_FORMAT_RGTC2_SNORM = 116,
+
+ VIRGL_FORMAT_A8B8G8R8_UNORM = 121,
+ VIRGL_FORMAT_B5G5R5X1_UNORM = 122,
+ VIRGL_FORMAT_R11G11B10_FLOAT = 124,
+ VIRGL_FORMAT_R9G9B9E5_FLOAT = 125,
+ VIRGL_FORMAT_Z32_FLOAT_S8X24_UINT = 126,
+
+ VIRGL_FORMAT_B10G10R10A2_UNORM = 131,
+ VIRGL_FORMAT_R8G8B8X8_UNORM = 134,
+ VIRGL_FORMAT_B4G4R4X4_UNORM = 135,
+ VIRGL_FORMAT_B2G3R3_UNORM = 139,
+
+ VIRGL_FORMAT_L16A16_UNORM = 140,
+ VIRGL_FORMAT_A16_UNORM = 141,
+
+ VIRGL_FORMAT_A8_SNORM = 147,
+ VIRGL_FORMAT_L8_SNORM = 148,
+ VIRGL_FORMAT_L8A8_SNORM = 149,
+
+ VIRGL_FORMAT_A16_SNORM = 151,
+ VIRGL_FORMAT_L16_SNORM = 152,
+ VIRGL_FORMAT_L16A16_SNORM = 153,
+
+ VIRGL_FORMAT_A16_FLOAT = 155,
+ VIRGL_FORMAT_L16_FLOAT = 156,
+ VIRGL_FORMAT_L16A16_FLOAT = 157,
+
+ VIRGL_FORMAT_A32_FLOAT = 159,
+ VIRGL_FORMAT_L32_FLOAT = 160,
+ VIRGL_FORMAT_L32A32_FLOAT = 161,
+
+ VIRGL_FORMAT_R8_UINT = 177,
+ VIRGL_FORMAT_R8G8_UINT = 178,
+ VIRGL_FORMAT_R8G8B8_UINT = 179,
+ VIRGL_FORMAT_R8G8B8A8_UINT = 180,
+
+ VIRGL_FORMAT_R8_SINT = 181,
+ VIRGL_FORMAT_R8G8_SINT = 182,
+ VIRGL_FORMAT_R8G8B8_SINT = 183,
+ VIRGL_FORMAT_R8G8B8A8_SINT = 184,
+
+ VIRGL_FORMAT_R16_UINT = 185,
+ VIRGL_FORMAT_R16G16_UINT = 186,
+ VIRGL_FORMAT_R16G16B16_UINT = 187,
+ VIRGL_FORMAT_R16G16B16A16_UINT = 188,
+
+ VIRGL_FORMAT_R16_SINT = 189,
+ VIRGL_FORMAT_R16G16_SINT = 190,
+ VIRGL_FORMAT_R16G16B16_SINT = 191,
+ VIRGL_FORMAT_R16G16B16A16_SINT = 192,
+ VIRGL_FORMAT_R32_UINT = 193,
+ VIRGL_FORMAT_R32G32_UINT = 194,
+ VIRGL_FORMAT_R32G32B32_UINT = 195,
+ VIRGL_FORMAT_R32G32B32A32_UINT = 196,
+
+ VIRGL_FORMAT_R32_SINT = 197,
+ VIRGL_FORMAT_R32G32_SINT = 198,
+ VIRGL_FORMAT_R32G32B32_SINT = 199,
+ VIRGL_FORMAT_R32G32B32A32_SINT = 200,
+
+ VIRGL_FORMAT_A8_UINT = 201,
+ VIRGL_FORMAT_L8_UINT = 203,
+ VIRGL_FORMAT_L8A8_UINT = 204,
+
+ VIRGL_FORMAT_A8_SINT = 205,
+ VIRGL_FORMAT_L8_SINT = 207,
+ VIRGL_FORMAT_L8A8_SINT = 208,
+
+ VIRGL_FORMAT_A16_UINT = 209,
+ VIRGL_FORMAT_L16_UINT = 211,
+ VIRGL_FORMAT_L16A16_UINT = 212,
+
+ VIRGL_FORMAT_A16_SINT = 213,
+ VIRGL_FORMAT_L16_SINT = 215,
+ VIRGL_FORMAT_L16A16_SINT = 216,
+
+ VIRGL_FORMAT_A32_UINT = 217,
+ VIRGL_FORMAT_L32_UINT = 219,
+ VIRGL_FORMAT_L32A32_UINT = 220,
+
+ VIRGL_FORMAT_A32_SINT = 221,
+ VIRGL_FORMAT_L32_SINT = 223,
+ VIRGL_FORMAT_L32A32_SINT = 224,
+
+ VIRGL_FORMAT_B10G10R10A2_UINT = 225,
+ VIRGL_FORMAT_R8G8B8X8_SNORM = 229,
+
+ VIRGL_FORMAT_R8G8B8X8_SRGB = 230,
+
+ VIRGL_FORMAT_B10G10R10X2_UNORM = 233,
+ VIRGL_FORMAT_R16G16B16X16_UNORM = 234,
+ VIRGL_FORMAT_R16G16B16X16_SNORM = 235,
+ VIRGL_FORMAT_MAX,
+};
+
+#define VIRGL_BIND_DEPTH_STENCIL (1 << 0)
+#define VIRGL_BIND_RENDER_TARGET (1 << 1)
+#define VIRGL_BIND_SAMPLER_VIEW (1 << 3)
+#define VIRGL_BIND_VERTEX_BUFFER (1 << 4)
+#define VIRGL_BIND_INDEX_BUFFER (1 << 5)
+#define VIRGL_BIND_CONSTANT_BUFFER (1 << 6)
+#define VIRGL_BIND_DISPLAY_TARGET (1 << 7)
+#define VIRGL_BIND_STREAM_OUTPUT (1 << 11)
+#define VIRGL_BIND_CURSOR (1 << 16)
+#define VIRGL_BIND_CUSTOM (1 << 17)
+#define VIRGL_BIND_SCANOUT (1 << 18)
+
+struct virgl_caps_bool_set1 {
+ unsigned indep_blend_enable:1;
+ unsigned indep_blend_func:1;
+ unsigned cube_map_array:1;
+ unsigned shader_stencil_export:1;
+ unsigned conditional_render:1;
+ unsigned start_instance:1;
+ unsigned primitive_restart:1;
+ unsigned blend_eq_sep:1;
+ unsigned instanceid:1;
+ unsigned vertex_element_instance_divisor:1;
+ unsigned seamless_cube_map:1;
+ unsigned occlusion_query:1;
+ unsigned timer_query:1;
+ unsigned streamout_pause_resume:1;
+ unsigned texture_multisample:1;
+ unsigned fragment_coord_conventions:1;
+ unsigned depth_clip_disable:1;
+ unsigned seamless_cube_map_per_texture:1;
+ unsigned ubo:1;
+ unsigned color_clamping:1; /* not in GL 3.1 core profile */
+ unsigned poly_stipple:1; /* not in GL 3.1 core profile */
+ unsigned mirror_clamp:1;
+ unsigned texture_query_lod:1;
+};
+
+/* endless expansion capabilites - current gallium has 252 formats */
+struct virgl_supported_format_mask {
+ uint32_t bitmask[16];
+};
+/* capabilities set 2 - version 1 - 32-bit and float values */
+struct virgl_caps_v1 {
+ uint32_t max_version;
+ struct virgl_supported_format_mask sampler;
+ struct virgl_supported_format_mask render;
+ struct virgl_supported_format_mask depthstencil;
+ struct virgl_supported_format_mask vertexbuffer;
+ struct virgl_caps_bool_set1 bset;
+ uint32_t glsl_level;
+ uint32_t max_texture_array_layers;
+ uint32_t max_streamout_buffers;
+ uint32_t max_dual_source_render_targets;
+ uint32_t max_render_targets;
+ uint32_t max_samples;
+ uint32_t prim_mask;
+ uint32_t max_tbo_size;
+ uint32_t max_uniform_blocks;
+ uint32_t max_viewports;
+ uint32_t max_texture_gather_components;
+};
+
+union virgl_caps {
+ uint32_t max_version;
+ struct virgl_caps_v1 v1;
+};
+
+enum virgl_errors {
+ VIRGL_ERROR_NONE,
+ VIRGL_ERROR_UNKNOWN,
+ VIRGL_ERROR_UNKNOWN_RESOURCE_FORMAT,
+};
+
+enum virgl_ctx_errors {
+ VIRGL_ERROR_CTX_NONE,
+ VIRGL_ERROR_CTX_UNKNOWN,
+ VIRGL_ERROR_CTX_ILLEGAL_SHADER,
+ VIRGL_ERROR_CTX_ILLEGAL_HANDLE,
+ VIRGL_ERROR_CTX_ILLEGAL_RESOURCE,
+ VIRGL_ERROR_CTX_ILLEGAL_SURFACE,
+ VIRGL_ERROR_CTX_ILLEGAL_VERTEX_FORMAT,
+ VIRGL_ERROR_CTX_ILLEGAL_CMD_BUFFER,
+};
+
+
+#define VIRGL_RESOURCE_Y_0_TOP (1 << 0)
+#endif
diff --git a/src/gallium/winsys/virgl/drm/virtgpu_drm.h b/src/gallium/winsys/virgl/drm/virtgpu_drm.h
new file mode 100644
index 00000000000..fcc789edbff
--- /dev/null
+++ b/src/gallium/winsys/virgl/drm/virtgpu_drm.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright 2013 Red Hat
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRTGPU_DRM_H
+#define VIRTGPU_DRM_H
+
+#include
+#include "drm/drm.h"
+
+/* Please note that modifications to all structs defined here are
+ * subject to backwards-compatibility constraints.
+ *
+ * Do not use pointers, use uint64_t instead for 32 bit / 64 bit user/kernel
+ * compatibility Keep fields aligned to their size
+ */
+
+#define DRM_VIRTGPU_MAP 0x01
+#define DRM_VIRTGPU_EXECBUFFER 0x02
+#define DRM_VIRTGPU_GETPARAM 0x03
+#define DRM_VIRTGPU_RESOURCE_CREATE 0x04
+#define DRM_VIRTGPU_RESOURCE_INFO 0x05
+#define DRM_VIRTGPU_TRANSFER_FROM_HOST 0x06
+#define DRM_VIRTGPU_TRANSFER_TO_HOST 0x07
+#define DRM_VIRTGPU_WAIT 0x08
+#define DRM_VIRTGPU_GET_CAPS 0x09
+
+struct drm_virtgpu_map {
+ uint64_t offset; /* use for mmap system call */
+ uint32_t handle;
+ uint32_t pad;
+};
+
+struct drm_virtgpu_execbuffer {
+ uint32_t flags; /* for future use */
+ uint32_t size;
+ uint64_t command; /* void* */
+ uint64_t bo_handles;
+ uint32_t num_bo_handles;
+ uint32_t pad;
+};
+
+#define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
+
+struct drm_virtgpu_getparam {
+ uint64_t param;
+ uint64_t value;
+};
+
+/* NO_BO flags? NO resource flag? */
+/* resource flag for y_0_top */
+struct drm_virtgpu_resource_create {
+ uint32_t target;
+ uint32_t format;
+ uint32_t bind;
+ uint32_t width;
+ uint32_t height;
+ uint32_t depth;
+ uint32_t array_size;
+ uint32_t last_level;
+ uint32_t nr_samples;
+ uint32_t flags;
+ uint32_t bo_handle; /* if this is set - recreate a new resource attached to this bo ? */
+ uint32_t res_handle; /* returned by kernel */
+ uint32_t size; /* validate transfer in the host */
+ uint32_t stride; /* validate transfer in the host */
+};
+
+struct drm_virtgpu_resource_info {
+ uint32_t bo_handle;
+ uint32_t res_handle;
+ uint32_t size;
+ uint32_t stride;
+};
+
+struct drm_virtgpu_3d_box {
+ uint32_t x, y, z;
+ uint32_t w, h, d;
+};
+
+struct drm_virtgpu_3d_transfer_to_host {
+ uint32_t bo_handle;
+ struct drm_virtgpu_3d_box box;
+ uint32_t level;
+ uint32_t offset;
+};
+
+struct drm_virtgpu_3d_transfer_from_host {
+ uint32_t bo_handle;
+ struct drm_virtgpu_3d_box box;
+ uint32_t level;
+ uint32_t offset;
+};
+
+#define VIRTGPU_WAIT_NOWAIT 1 /* like it */
+struct drm_virtgpu_3d_wait {
+ uint32_t handle; /* 0 is an invalid handle */
+ uint32_t flags;
+};
+
+struct drm_virtgpu_get_caps {
+ uint32_t cap_set_id;
+ uint32_t cap_set_ver;
+ uint64_t addr;
+ uint32_t size;
+ uint32_t pad;
+};
+
+#define DRM_IOCTL_VIRTGPU_MAP \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map)
+
+#define DRM_IOCTL_VIRTGPU_EXECBUFFER \
+ DRM_IOW(DRM_COMMAND_BASE + DRM_VIRTGPU_EXECBUFFER,\
+ struct drm_virtgpu_execbuffer)
+
+#define DRM_IOCTL_VIRTGPU_GETPARAM \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GETPARAM,\
+ struct drm_virtgpu_getparam)
+
+#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE, \
+ struct drm_virtgpu_resource_create)
+
+#define DRM_IOCTL_VIRTGPU_RESOURCE_INFO \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_INFO, \
+ struct drm_virtgpu_resource_info)
+
+#define DRM_IOCTL_VIRTGPU_TRANSFER_FROM_HOST \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_TRANSFER_FROM_HOST, \
+ struct drm_virtgpu_3d_transfer_from_host)
+
+#define DRM_IOCTL_VIRTGPU_TRANSFER_TO_HOST \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_TRANSFER_TO_HOST, \
+ struct drm_virtgpu_3d_transfer_to_host)
+
+#define DRM_IOCTL_VIRTGPU_WAIT \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_WAIT, \
+ struct drm_virtgpu_3d_wait)
+
+#define DRM_IOCTL_VIRTGPU_GET_CAPS \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GET_CAPS, \
+ struct drm_virtgpu_get_caps)
+
+#endif
From b3b82fe8ea1f7d02c93513920143cba2fe145b6c Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Fri, 13 Mar 2015 14:15:47 +1000
Subject: [PATCH 036/266] virgl/vtest: add vtest driver
virgl/vtest is a swrast driver that allows the
virgl acceleration to be tested without having
a virtual machine.
The backend has a unix socket server that
this connects to.
This is run by setting
LIBGL_ALWAYS_SOFTWARE=y
GALLIUM_DRIVER=virpipe
In this mode all renderering is sent over
a socket to the remote renderer, and the
results are readback and copies to the screen
using drisw. This works well enough to develop
new features and to help debug.
Signed-off-by: Dave Airlie
---
configure.ac | 1 +
src/gallium/Makefile.am | 2 +-
.../target-helpers/inline_sw_helper.h | 12 +
src/gallium/drivers/virgl/Automake.inc | 3 +-
src/gallium/winsys/virgl/vtest/Makefile.am | 33 +
.../winsys/virgl/vtest/Makefile.sources | 3 +
.../winsys/virgl/vtest/virgl_vtest_public.h | 30 +
.../winsys/virgl/vtest/virgl_vtest_socket.c | 288 ++++++++
.../winsys/virgl/vtest/virgl_vtest_winsys.c | 653 ++++++++++++++++++
.../winsys/virgl/vtest/virgl_vtest_winsys.h | 132 ++++
.../winsys/virgl/vtest/vtest_protocol.h | 88 +++
11 files changed, 1243 insertions(+), 2 deletions(-)
create mode 100644 src/gallium/winsys/virgl/vtest/Makefile.am
create mode 100644 src/gallium/winsys/virgl/vtest/Makefile.sources
create mode 100644 src/gallium/winsys/virgl/vtest/virgl_vtest_public.h
create mode 100644 src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
create mode 100644 src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
create mode 100644 src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h
create mode 100644 src/gallium/winsys/virgl/vtest/vtest_protocol.h
diff --git a/configure.ac b/configure.ac
index 4ef09d2659b..5aa19a5a6de 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2425,6 +2425,7 @@ AC_CONFIG_FILES([Makefile
src/gallium/winsys/sw/xlib/Makefile
src/gallium/winsys/vc4/drm/Makefile
src/gallium/winsys/virgl/drm/Makefile
+ src/gallium/winsys/virgl/vtest/Makefile
src/gbm/Makefile
src/gbm/main/gbm.pc
src/glsl/Makefile
diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
index 2fa93ddfe5d..611d55fafe2 100644
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -84,7 +84,7 @@ endif
## virgl
if HAVE_GALLIUM_VIRGL
-SUBDIRS += drivers/virgl winsys/virgl/drm
+SUBDIRS += drivers/virgl winsys/virgl/drm winsys/virgl/vtest
endif
## the sw winsys'
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
index 5f46552f6c3..f3693fb1f39 100644
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -19,6 +19,10 @@
#include "llvmpipe/lp_public.h"
#endif
+#ifdef GALLIUM_VIRGL
+#include "virgl/virgl_public.h"
+#include "virgl/vtest/virgl_vtest_public.h"
+#endif
static inline struct pipe_screen *
sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
@@ -30,6 +34,14 @@ sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
screen = llvmpipe_create_screen(winsys);
#endif
+#if defined(GALLIUM_VIRGL)
+ if (screen == NULL && strcmp(driver, "virpipe") == 0) {
+ struct virgl_winsys *vws;
+ vws = virgl_vtest_winsys_wrap(winsys);
+ screen = virgl_create_screen(vws);
+ }
+#endif
+
#if defined(GALLIUM_SOFTPIPE)
if (screen == NULL)
screen = softpipe_create_screen(winsys);
diff --git a/src/gallium/drivers/virgl/Automake.inc b/src/gallium/drivers/virgl/Automake.inc
index 457ca77b3a7..b05d3e314c8 100644
--- a/src/gallium/drivers/virgl/Automake.inc
+++ b/src/gallium/drivers/virgl/Automake.inc
@@ -4,7 +4,8 @@ TARGET_DRIVERS += virtio_gpu
TARGET_CPPFLAGS += -DGALLIUM_VIRGL
TARGET_LIB_DEPS += \
$(top_builddir)/src/gallium/drivers/virgl/libvirgl.la \
- $(top_builddir)/src/gallium/winsys/virgl/drm/libvirgldrm.la
+ $(top_builddir)/src/gallium/winsys/virgl/drm/libvirgldrm.la \
+ $(top_builddir)/src/gallium/winsys/virgl/vtest/libvirglvtest.la \
$(LIBDRM_LIBS)
endif
diff --git a/src/gallium/winsys/virgl/vtest/Makefile.am b/src/gallium/winsys/virgl/vtest/Makefile.am
new file mode 100644
index 00000000000..81270d7b74b
--- /dev/null
+++ b/src/gallium/winsys/virgl/vtest/Makefile.am
@@ -0,0 +1,33 @@
+# Copyright © 2015 Red Hat
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/drivers \
+ $(GALLIUM_CFLAGS)
+
+noinst_LTLIBRARIES = libvirglvtest.la
+
+libvirglvtest_la_SOURCES = $(C_SOURCES)
diff --git a/src/gallium/winsys/virgl/vtest/Makefile.sources b/src/gallium/winsys/virgl/vtest/Makefile.sources
new file mode 100644
index 00000000000..ab72560849a
--- /dev/null
+++ b/src/gallium/winsys/virgl/vtest/Makefile.sources
@@ -0,0 +1,3 @@
+C_SOURCES := \
+ virgl_vtest_winsys.c \
+ virgl_vtest_socket.c
\ No newline at end of file
diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_public.h b/src/gallium/winsys/virgl/vtest/virgl_vtest_public.h
new file mode 100644
index 00000000000..72336499b44
--- /dev/null
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_public.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_VTEST_PUBLIC_H
+#define VIRGL_VTEST_PUBLIC_H
+
+struct virgl_winsys;
+
+struct virgl_winsys *virgl_vtest_winsys_wrap(struct sw_winsys *sws);
+
+#endif
diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
new file mode 100644
index 00000000000..48dff8d0c17
--- /dev/null
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define _FILE_OFFSET_BITS 64
+
+#include "virgl_vtest_winsys.h"
+#include "virgl_vtest_public.h"
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+/* connect to remote socket */
+#define VTEST_SOCKET_NAME "/tmp/.virgl_test"
+
+/* block read/write routines */
+static int virgl_block_write(int fd, void *buf, int size)
+{
+ void *ptr = buf;
+ int left;
+ int ret;
+ left = size;
+ do {
+ ret = write(fd, ptr, left);
+ if (ret < 0)
+ return -errno;
+ left -= ret;
+ ptr += ret;
+ } while (left);
+ return size;
+}
+
+static int virgl_block_read(int fd, void *buf, int size)
+{
+ void *ptr = buf;
+ int left;
+ int ret;
+ left = size;
+ do {
+ ret = read(fd, ptr, left);
+ if (ret <= 0) {
+ fprintf(stderr, "lost connection to rendering server on %d read %d %d\n", size, ret, errno);
+ abort();
+ return ret < 0 ? -errno : 0;
+ }
+ left -= ret;
+ ptr += ret;
+ } while (left);
+ return size;
+}
+
+static int virgl_vtest_send_init(struct virgl_vtest_winsys *vws)
+{
+ uint32_t buf[VTEST_HDR_SIZE];
+ const char *nstr = "virtest";
+ char cmdline[64];
+ int ret;
+
+ ret = os_get_process_name(cmdline, 63);
+ if (ret == FALSE)
+ strcpy(cmdline, nstr);
+#if defined(__GLIBC__) || defined(__CYGWIN__)
+ if (!strcmp(cmdline, "shader_runner")) {
+ const char *name;
+ /* hack to get better testname */
+ name = program_invocation_short_name;
+ name += strlen(name) + 1;
+ strncpy(cmdline, name, 63);
+ }
+#endif
+ buf[VTEST_CMD_LEN] = strlen(cmdline) + 1;
+ buf[VTEST_CMD_ID] = VCMD_CREATE_RENDERER;
+
+ virgl_block_write(vws->sock_fd, &buf, sizeof(buf));
+ virgl_block_write(vws->sock_fd, (void *)cmdline, strlen(cmdline) + 1);
+ return 0;
+}
+
+int virgl_vtest_connect(struct virgl_vtest_winsys *vws)
+{
+ struct sockaddr_un un;
+ int sock, ret;
+
+ sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0)
+ return -1;
+
+ memset(&un, 0, sizeof(un));
+ un.sun_family = AF_UNIX;
+ snprintf(un.sun_path, sizeof(un.sun_path), "%s", VTEST_SOCKET_NAME);
+
+ do {
+ ret = 0;
+ if (connect(sock, (struct sockaddr *)&un, sizeof(un)) < 0) {
+ ret = -errno;
+ }
+ } while (ret == -EINTR);
+
+ vws->sock_fd = sock;
+ virgl_vtest_send_init(vws);
+ return 0;
+}
+
+int virgl_vtest_send_get_caps(struct virgl_vtest_winsys *vws,
+ struct virgl_drm_caps *caps)
+{
+ uint32_t get_caps_buf[VTEST_HDR_SIZE];
+ uint32_t resp_buf[VTEST_HDR_SIZE];
+
+ int ret;
+ get_caps_buf[VTEST_CMD_LEN] = 0;
+ get_caps_buf[VTEST_CMD_ID] = VCMD_GET_CAPS;
+
+ virgl_block_write(vws->sock_fd, &get_caps_buf, sizeof(get_caps_buf));
+
+ ret = virgl_block_read(vws->sock_fd, resp_buf, sizeof(resp_buf));
+ if (ret <= 0)
+ return 0;
+
+ ret = virgl_block_read(vws->sock_fd, &caps->caps, sizeof(union virgl_caps));
+
+ return 0;
+}
+
+int virgl_vtest_send_resource_create(struct virgl_vtest_winsys *vws,
+ uint32_t handle,
+ enum pipe_texture_target target,
+ uint32_t format,
+ uint32_t bind,
+ uint32_t width,
+ uint32_t height,
+ uint32_t depth,
+ uint32_t array_size,
+ uint32_t last_level,
+ uint32_t nr_samples)
+{
+ uint32_t res_create_buf[VCMD_RES_CREATE_SIZE], vtest_hdr[VTEST_HDR_SIZE];
+
+ vtest_hdr[VTEST_CMD_LEN] = VCMD_RES_CREATE_SIZE;
+ vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_CREATE;
+
+ res_create_buf[VCMD_RES_CREATE_RES_HANDLE] = handle;
+ res_create_buf[VCMD_RES_CREATE_TARGET] = target;
+ res_create_buf[VCMD_RES_CREATE_FORMAT] = format;
+ res_create_buf[VCMD_RES_CREATE_BIND] = bind;
+ res_create_buf[VCMD_RES_CREATE_WIDTH] = width;
+ res_create_buf[VCMD_RES_CREATE_HEIGHT] = height;
+ res_create_buf[VCMD_RES_CREATE_DEPTH] = depth;
+ res_create_buf[VCMD_RES_CREATE_ARRAY_SIZE] = array_size;
+ res_create_buf[VCMD_RES_CREATE_LAST_LEVEL] = last_level;
+ res_create_buf[VCMD_RES_CREATE_NR_SAMPLES] = nr_samples;
+
+ virgl_block_write(vws->sock_fd, &vtest_hdr, sizeof(vtest_hdr));
+ virgl_block_write(vws->sock_fd, &res_create_buf, sizeof(res_create_buf));
+
+ return 0;
+}
+
+int virgl_vtest_submit_cmd(struct virgl_vtest_winsys *vws,
+ struct virgl_vtest_cmd_buf *cbuf)
+{
+ uint32_t vtest_hdr[VTEST_HDR_SIZE];
+
+ vtest_hdr[VTEST_CMD_LEN] = cbuf->base.cdw;
+ vtest_hdr[VTEST_CMD_ID] = VCMD_SUBMIT_CMD;
+
+ virgl_block_write(vws->sock_fd, &vtest_hdr, sizeof(vtest_hdr));
+ virgl_block_write(vws->sock_fd, cbuf->buf, cbuf->base.cdw * 4);
+ return 0;
+}
+
+int virgl_vtest_send_resource_unref(struct virgl_vtest_winsys *vws,
+ uint32_t handle)
+{
+ uint32_t vtest_hdr[VTEST_HDR_SIZE];
+ uint32_t cmd[1];
+ vtest_hdr[VTEST_CMD_LEN] = 1;
+ vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_UNREF;
+
+ cmd[0] = handle;
+ virgl_block_write(vws->sock_fd, &vtest_hdr, sizeof(vtest_hdr));
+ virgl_block_write(vws->sock_fd, &cmd, sizeof(cmd));
+ return 0;
+}
+
+int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws,
+ uint32_t vcmd,
+ uint32_t handle,
+ uint32_t level, uint32_t stride,
+ uint32_t layer_stride,
+ const struct pipe_box *box,
+ uint32_t data_size)
+{
+ uint32_t vtest_hdr[VTEST_HDR_SIZE];
+ uint32_t cmd[VCMD_TRANSFER_HDR_SIZE];
+ bool is_put = (vcmd == VCMD_TRANSFER_PUT);
+ vtest_hdr[VTEST_CMD_LEN] = VCMD_TRANSFER_HDR_SIZE + (is_put ? (data_size + 3 / 4) : 0);
+ vtest_hdr[VTEST_CMD_ID] = vcmd;
+
+ cmd[0] = handle;
+ cmd[1] = level;
+ cmd[2] = stride;
+ cmd[3] = layer_stride;
+ cmd[4] = box->x;
+ cmd[5] = box->y;
+ cmd[6] = box->z;
+ cmd[7] = box->width;
+ cmd[8] = box->height;
+ cmd[9] = box->depth;
+ cmd[10] = data_size;
+ virgl_block_write(vws->sock_fd, &vtest_hdr, sizeof(vtest_hdr));
+ virgl_block_write(vws->sock_fd, &cmd, sizeof(cmd));
+
+ return 0;
+}
+
+int virgl_vtest_send_transfer_put_data(struct virgl_vtest_winsys *vws,
+ void *data,
+ uint32_t data_size)
+{
+ return virgl_block_write(vws->sock_fd, data, data_size);
+}
+
+int virgl_vtest_recv_transfer_get_data(struct virgl_vtest_winsys *vws,
+ void *data,
+ uint32_t data_size,
+ uint32_t stride,
+ const struct pipe_box *box, uint32_t format)
+{
+ void *line = malloc(stride);
+ void *ptr = data;
+ int hblocks = util_format_get_nblocksy(format, box->height);
+
+ line = malloc(stride);
+ while (hblocks) {
+ virgl_block_read(vws->sock_fd, line, stride);
+ memcpy(ptr, line, util_format_get_stride(format, box->width));
+ ptr += stride;
+ hblocks--;
+ }
+ free(line);
+ return 0;
+}
+
+int virgl_vtest_busy_wait(struct virgl_vtest_winsys *vws, int handle,
+ int flags)
+{
+ uint32_t vtest_hdr[VTEST_HDR_SIZE];
+ uint32_t cmd[VCMD_BUSY_WAIT_SIZE];
+ uint32_t result[1];
+ int ret;
+ vtest_hdr[VTEST_CMD_LEN] = VCMD_BUSY_WAIT_SIZE;
+ vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_BUSY_WAIT;
+ cmd[VCMD_BUSY_WAIT_HANDLE] = handle;
+ cmd[VCMD_BUSY_WAIT_FLAGS] = flags;
+
+ virgl_block_write(vws->sock_fd, &vtest_hdr, sizeof(vtest_hdr));
+ virgl_block_write(vws->sock_fd, &cmd, sizeof(cmd));
+
+ ret = virgl_block_read(vws->sock_fd, vtest_hdr, sizeof(vtest_hdr));
+ assert(ret);
+ ret = virgl_block_read(vws->sock_fd, result, sizeof(result));
+ assert(ret);
+ return result[0];
+}
diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
new file mode 100644
index 00000000000..9002d40450b
--- /dev/null
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
@@ -0,0 +1,653 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define _FILE_OFFSET_BITS 64
+
+#include
+#include "virgl_vtest_winsys.h"
+#include "virgl_vtest_public.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "state_tracker/drm_driver.h"
+#include "os/os_time.h"
+
+static void *virgl_vtest_resource_map(struct virgl_winsys *vws, struct virgl_hw_res *res);
+static void virgl_vtest_resource_unmap(struct virgl_winsys *vws, struct virgl_hw_res *res);
+static inline boolean can_cache_resource(struct virgl_hw_res *res)
+{
+ return res->cacheable == TRUE;
+}
+
+static uint32_t vtest_get_transfer_size(struct virgl_hw_res *res,
+ const struct pipe_box *box,
+ uint32_t stride, uint32_t layer_stride,
+ uint32_t level, uint32_t *valid_stride_p)
+{
+ uint32_t valid_stride, valid_layer_stride;
+
+ valid_stride = util_format_get_stride(res->format, box->width);
+ if (stride) {
+ if (box->height > 1)
+ valid_stride = stride;
+ }
+
+ valid_layer_stride = util_format_get_2d_size(res->format, valid_stride,
+ box->height);
+ if (layer_stride) {
+ if (box->depth > 1)
+ valid_layer_stride = layer_stride;
+ }
+
+ *valid_stride_p = valid_stride;
+ return valid_layer_stride * box->depth;
+}
+
+static int
+virgl_vtest_transfer_put(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ const struct pipe_box *box,
+ uint32_t stride, uint32_t layer_stride,
+ uint32_t buf_offset, uint32_t level)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+ uint32_t size;
+ void *ptr;
+ uint32_t valid_stride;
+
+ size = vtest_get_transfer_size(res, box, stride, layer_stride, level, &valid_stride);
+
+ virgl_vtest_send_transfer_cmd(vtws, VCMD_TRANSFER_PUT, res->res_handle,
+ level, stride, layer_stride,
+ box, size);
+ ptr = virgl_vtest_resource_map(vws, res);
+ virgl_vtest_send_transfer_put_data(vtws, ptr + buf_offset, size);
+ virgl_vtest_resource_unmap(vws, res);
+ return 0;
+}
+
+static int
+virgl_vtest_transfer_get(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ const struct pipe_box *box,
+ uint32_t stride, uint32_t layer_stride,
+ uint32_t buf_offset, uint32_t level)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+ uint32_t size;
+ void *ptr;
+ uint32_t valid_stride;
+
+ size = vtest_get_transfer_size(res, box, stride, layer_stride, level, &valid_stride);
+
+ virgl_vtest_send_transfer_cmd(vtws, VCMD_TRANSFER_GET, res->res_handle,
+ level, stride, layer_stride,
+ box, size);
+
+
+ ptr = virgl_vtest_resource_map(vws, res);
+ virgl_vtest_recv_transfer_get_data(vtws, ptr + buf_offset, size, valid_stride, box, res->format);
+ virgl_vtest_resource_unmap(vws, res);
+ return 0;
+}
+
+static void virgl_hw_res_destroy(struct virgl_vtest_winsys *vtws,
+ struct virgl_hw_res *res)
+{
+ virgl_vtest_send_resource_unref(vtws, res->res_handle);
+ if (res->dt)
+ vtws->sws->displaytarget_destroy(vtws->sws, res->dt);
+ free(res->ptr);
+ FREE(res);
+}
+
+static boolean virgl_vtest_resource_is_busy(struct virgl_vtest_winsys *vtws,
+ struct virgl_hw_res *res)
+{
+ /* implement busy check */
+ int ret;
+ ret = virgl_vtest_busy_wait(vtws, res->res_handle, 0);
+
+ if (ret < 0)
+ return FALSE;
+
+ return ret == 1 ? TRUE : FALSE;
+}
+
+static void
+virgl_cache_flush(struct virgl_vtest_winsys *vtws)
+{
+ struct list_head *curr, *next;
+ struct virgl_hw_res *res;
+
+ pipe_mutex_lock(vtws->mutex);
+ curr = vtws->delayed.next;
+ next = curr->next;
+
+ while (curr != &vtws->delayed) {
+ res = LIST_ENTRY(struct virgl_hw_res, curr, head);
+ LIST_DEL(&res->head);
+ virgl_hw_res_destroy(vtws, res);
+ curr = next;
+ next = curr->next;
+ }
+ pipe_mutex_unlock(vtws->mutex);
+}
+
+static void
+virgl_cache_list_check_free(struct virgl_vtest_winsys *vtws)
+{
+ struct list_head *curr, *next;
+ struct virgl_hw_res *res;
+ int64_t now;
+
+ now = os_time_get();
+ curr = vtws->delayed.next;
+ next = curr->next;
+ while (curr != &vtws->delayed) {
+ res = LIST_ENTRY(struct virgl_hw_res, curr, head);
+ if (!os_time_timeout(res->start, res->end, now))
+ break;
+
+ LIST_DEL(&res->head);
+ virgl_hw_res_destroy(vtws, res);
+ curr = next;
+ next = curr->next;
+ }
+}
+
+static void virgl_vtest_resource_reference(struct virgl_vtest_winsys *vtws,
+ struct virgl_hw_res **dres,
+ struct virgl_hw_res *sres)
+{
+ struct virgl_hw_res *old = *dres;
+ if (pipe_reference(&(*dres)->reference, &sres->reference)) {
+ if (!can_cache_resource(old)) {
+ virgl_hw_res_destroy(vtws, old);
+ } else {
+ pipe_mutex_lock(vtws->mutex);
+ virgl_cache_list_check_free(vtws);
+
+ old->start = os_time_get();
+ old->end = old->start + vtws->usecs;
+ LIST_ADDTAIL(&old->head, &vtws->delayed);
+ vtws->num_delayed++;
+ pipe_mutex_unlock(vtws->mutex);
+ }
+ }
+ *dres = sres;
+}
+
+static struct virgl_hw_res *virgl_vtest_winsys_resource_create(
+ struct virgl_winsys *vws,
+ enum pipe_texture_target target,
+ uint32_t format,
+ uint32_t bind,
+ uint32_t width,
+ uint32_t height,
+ uint32_t depth,
+ uint32_t array_size,
+ uint32_t last_level,
+ uint32_t nr_samples,
+ uint32_t size)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+ struct virgl_hw_res *res;
+ static int handle = 1;
+
+ res = CALLOC_STRUCT(virgl_hw_res);
+ if (!res)
+ return NULL;
+
+ if (bind & (VIRGL_BIND_DISPLAY_TARGET | VIRGL_BIND_SCANOUT)) {
+ res->dt = vtws->sws->displaytarget_create(vtws->sws,
+ bind,
+ format,
+ width,
+ height,
+ 64,
+ &res->stride);
+
+ } else {
+ res->ptr = align_malloc(size, 64);
+ if (!res->ptr) {
+ FREE(res);
+ return NULL;
+ }
+ }
+
+ res->bind = bind;
+ res->format = format;
+ res->height = height;
+ res->width = width;
+ virgl_vtest_send_resource_create(vtws, handle, target, format, bind, width,
+ height, depth, array_size, last_level,
+ nr_samples);
+
+ res->res_handle = handle++;
+ pipe_reference_init(&res->reference, 1);
+ return res;
+}
+
+static void virgl_vtest_winsys_resource_unref(struct virgl_winsys *vws,
+ struct virgl_hw_res *hres)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+ virgl_vtest_resource_reference(vtws, &hres, NULL);
+}
+
+static void *virgl_vtest_resource_map(struct virgl_winsys *vws, struct virgl_hw_res *res)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+
+ if (res->dt) {
+ return vtws->sws->displaytarget_map(vtws->sws, res->dt, 0);
+ } else {
+ res->mapped = res->ptr;
+ return res->mapped;
+ }
+}
+
+static void virgl_vtest_resource_unmap(struct virgl_winsys *vws, struct virgl_hw_res *res)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+ if (res->mapped)
+ res->mapped = NULL;
+
+ if (res->dt)
+ vtws->sws->displaytarget_unmap(vtws->sws, res->dt);
+}
+
+static void virgl_vtest_resource_wait(struct virgl_winsys *vws, struct virgl_hw_res *res)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+
+ virgl_vtest_busy_wait(vtws, res->res_handle, VCMD_BUSY_WAIT_FLAG_WAIT);
+}
+
+static inline int virgl_is_res_compat(struct virgl_vtest_winsys *vtws,
+ struct virgl_hw_res *res,
+ uint32_t size, uint32_t bind, uint32_t format)
+{
+ if (res->bind != bind)
+ return 0;
+ if (res->format != format)
+ return 0;
+ if (res->size < size)
+ return 0;
+ if (res->size > size * 2)
+ return 0;
+
+ if (virgl_vtest_resource_is_busy(vtws, res)) {
+ return -1;
+ }
+
+ return 1;
+}
+
+static struct virgl_hw_res *virgl_vtest_winsys_resource_cache_create(struct virgl_winsys *vws,
+ enum pipe_texture_target target,
+ uint32_t format,
+ uint32_t bind,
+ uint32_t width,
+ uint32_t height,
+ uint32_t depth,
+ uint32_t array_size,
+ uint32_t last_level,
+ uint32_t nr_samples,
+ uint32_t size)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+ struct virgl_hw_res *res, *curr_res;
+ struct list_head *curr, *next;
+ int64_t now;
+ int ret;
+
+ /* only store binds for vertex/index/const buffers */
+ if (bind != VIRGL_BIND_CONSTANT_BUFFER && bind != VIRGL_BIND_INDEX_BUFFER &&
+ bind != VIRGL_BIND_VERTEX_BUFFER && bind != VIRGL_BIND_CUSTOM)
+ goto alloc;
+
+ pipe_mutex_lock(vtws->mutex);
+
+ res = NULL;
+ curr = vtws->delayed.next;
+ next = curr->next;
+
+ now = os_time_get();
+ while (curr != &vtws->delayed) {
+ curr_res = LIST_ENTRY(struct virgl_hw_res, curr, head);
+
+ if (!res && (ret = virgl_is_res_compat(vtws, curr_res, size, bind, format) > 0))
+ res = curr_res;
+ else if (os_time_timeout(curr_res->start, curr_res->end, now)) {
+ LIST_DEL(&curr_res->head);
+ virgl_hw_res_destroy(vtws, curr_res);
+ } else
+ break;
+
+ if (ret == -1)
+ break;
+
+ curr = next;
+ next = curr->next;
+ }
+
+ if (!res && ret != -1) {
+ while (curr != &vtws->delayed) {
+ curr_res = LIST_ENTRY(struct virgl_hw_res, curr, head);
+ ret = virgl_is_res_compat(vtws, curr_res, size, bind, format);
+ if (ret > 0) {
+ res = curr_res;
+ break;
+ }
+ if (ret == -1)
+ break;
+ curr = next;
+ next = curr->next;
+ }
+ }
+
+ if (res) {
+ LIST_DEL(&res->head);
+ --vtws->num_delayed;
+ pipe_mutex_unlock(vtws->mutex);
+ pipe_reference_init(&res->reference, 1);
+ return res;
+ }
+
+ pipe_mutex_unlock(vtws->mutex);
+
+alloc:
+ res = virgl_vtest_winsys_resource_create(vws, target, format, bind,
+ width, height, depth, array_size,
+ last_level, nr_samples, size);
+ if (bind == VIRGL_BIND_CONSTANT_BUFFER || bind == VIRGL_BIND_INDEX_BUFFER ||
+ bind == VIRGL_BIND_VERTEX_BUFFER)
+ res->cacheable = TRUE;
+ return res;
+}
+
+static struct virgl_cmd_buf *virgl_vtest_cmd_buf_create(struct virgl_winsys *vws)
+{
+ struct virgl_vtest_cmd_buf *cbuf;
+
+ cbuf = CALLOC_STRUCT(virgl_vtest_cmd_buf);
+ if (!cbuf)
+ return NULL;
+
+ cbuf->nres = 512;
+ cbuf->res_bo = (struct virgl_hw_res **)
+ CALLOC(cbuf->nres, sizeof(struct virgl_hw_buf*));
+ if (!cbuf->res_bo) {
+ FREE(cbuf);
+ return NULL;
+ }
+ cbuf->ws = vws;
+ cbuf->base.buf = cbuf->buf;
+ return &cbuf->base;
+}
+
+static void virgl_vtest_cmd_buf_destroy(struct virgl_cmd_buf *_cbuf)
+{
+ struct virgl_vtest_cmd_buf *cbuf = (struct virgl_vtest_cmd_buf *)_cbuf;
+
+ FREE(cbuf->res_bo);
+ FREE(cbuf);
+}
+
+static boolean virgl_vtest_lookup_res(struct virgl_vtest_cmd_buf *cbuf,
+ struct virgl_hw_res *res)
+{
+ unsigned hash = res->res_handle & (sizeof(cbuf->is_handle_added)-1);
+ int i;
+
+ if (cbuf->is_handle_added[hash]) {
+ i = cbuf->reloc_indices_hashlist[hash];
+ if (cbuf->res_bo[i] == res)
+ return true;
+
+ for (i = 0; i < cbuf->cres; i++) {
+ if (cbuf->res_bo[i] == res) {
+ cbuf->reloc_indices_hashlist[hash] = i;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static void virgl_vtest_release_all_res(struct virgl_vtest_winsys *vtws,
+ struct virgl_vtest_cmd_buf *cbuf)
+{
+ int i;
+
+ for (i = 0; i < cbuf->cres; i++) {
+ p_atomic_dec(&cbuf->res_bo[i]->num_cs_references);
+ virgl_vtest_resource_reference(vtws, &cbuf->res_bo[i], NULL);
+ }
+ cbuf->cres = 0;
+}
+
+static void virgl_vtest_add_res(struct virgl_vtest_winsys *vtws,
+ struct virgl_vtest_cmd_buf *cbuf, struct virgl_hw_res *res)
+{
+ unsigned hash = res->res_handle & (sizeof(cbuf->is_handle_added)-1);
+
+ if (cbuf->cres > cbuf->nres) {
+ fprintf(stderr,"failure to add relocation\n");
+ return;
+ }
+
+ cbuf->res_bo[cbuf->cres] = NULL;
+ virgl_vtest_resource_reference(vtws, &cbuf->res_bo[cbuf->cres], res);
+ cbuf->is_handle_added[hash] = TRUE;
+
+ cbuf->reloc_indices_hashlist[hash] = cbuf->cres;
+ p_atomic_inc(&res->num_cs_references);
+ cbuf->cres++;
+}
+
+static int virgl_vtest_winsys_submit_cmd(struct virgl_winsys *vws, struct virgl_cmd_buf *_cbuf)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+ struct virgl_vtest_cmd_buf *cbuf = (struct virgl_vtest_cmd_buf *)_cbuf;
+ int ret;
+
+ if (cbuf->base.cdw == 0)
+ return 0;
+
+ ret = virgl_vtest_submit_cmd(vtws, cbuf);
+
+ virgl_vtest_release_all_res(vtws, cbuf);
+ memset(cbuf->is_handle_added, 0, sizeof(cbuf->is_handle_added));
+ cbuf->base.cdw = 0;
+ return ret;
+}
+
+static void virgl_vtest_emit_res(struct virgl_winsys *vws, struct virgl_cmd_buf *_cbuf, struct virgl_hw_res *res, boolean write_buf)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+ struct virgl_vtest_cmd_buf *cbuf = (struct virgl_vtest_cmd_buf *)_cbuf;
+ boolean already_in_list = virgl_vtest_lookup_res(cbuf, res);
+
+ if (write_buf)
+ cbuf->base.buf[cbuf->base.cdw++] = res->res_handle;
+ if (!already_in_list)
+ virgl_vtest_add_res(vtws, cbuf, res);
+}
+
+static boolean virgl_vtest_res_is_ref(struct virgl_winsys *vws,
+ struct virgl_cmd_buf *_cbuf,
+ struct virgl_hw_res *res)
+{
+ if (!res->num_cs_references)
+ return FALSE;
+
+ return TRUE;
+}
+
+static int virgl_vtest_get_caps(struct virgl_winsys *vws, struct virgl_drm_caps *caps)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+ return virgl_vtest_send_get_caps(vtws, caps);
+}
+
+static struct pipe_fence_handle *
+virgl_cs_create_fence(struct virgl_winsys *vws)
+{
+ struct virgl_hw_res *res;
+
+ res = virgl_vtest_winsys_resource_cache_create(vws,
+ PIPE_BUFFER,
+ PIPE_FORMAT_R8_UNORM,
+ PIPE_BIND_CUSTOM,
+ 8, 1, 1, 0, 0, 0, 8);
+
+ return (struct pipe_fence_handle *)res;
+}
+
+static bool virgl_fence_wait(struct virgl_winsys *vws,
+ struct pipe_fence_handle *fence,
+ uint64_t timeout)
+{
+ struct virgl_vtest_winsys *vdws = virgl_vtest_winsys(vws);
+ struct virgl_hw_res *res = (struct virgl_hw_res *)fence;
+
+ if (timeout == 0)
+ return virgl_vtest_resource_is_busy(vdws, res);
+
+ if (timeout != PIPE_TIMEOUT_INFINITE) {
+ int64_t start_time = os_time_get();
+ timeout /= 1000;
+ while (virgl_vtest_resource_is_busy(vdws, res)) {
+ if (os_time_get() - start_time >= timeout)
+ return FALSE;
+ os_time_sleep(10);
+ }
+ return TRUE;
+ }
+ virgl_vtest_resource_wait(vws, res);
+ return TRUE;
+}
+
+static void virgl_fence_reference(struct virgl_winsys *vws,
+ struct pipe_fence_handle **dst,
+ struct pipe_fence_handle *src)
+{
+ struct virgl_vtest_winsys *vdws = virgl_vtest_winsys(vws);
+ virgl_vtest_resource_reference(vdws, (struct virgl_hw_res **)dst,
+ (struct virgl_hw_res *)src);
+}
+
+static void virgl_vtest_flush_frontbuffer(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ unsigned level, unsigned layer,
+ void *winsys_drawable_handle,
+ struct pipe_box *sub_box)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+ struct pipe_box box;
+ void *map;
+ uint32_t size;
+ uint32_t offset = 0, valid_stride;
+ if (!res->dt)
+ return;
+
+ memset(&box, 0, sizeof(box));
+
+ if (sub_box) {
+ box = *sub_box;
+ offset = (res->stride * (box.y / util_format_get_blockheight(res->format))) + (box.x / util_format_get_blockwidth(res->format)) * util_format_get_blocksize(res->format);
+ } else {
+ box.z = layer;
+ box.width = res->width;
+ box.height = res->height;
+ box.depth = 1;
+ }
+
+ size = vtest_get_transfer_size(res, &box, res->stride, 0, level, &valid_stride);
+
+ virgl_vtest_busy_wait(vtws, res->res_handle, VCMD_BUSY_WAIT_FLAG_WAIT);
+ map = vtws->sws->displaytarget_map(vtws->sws, res->dt, 0);
+
+ /* execute a transfer */
+ virgl_vtest_send_transfer_cmd(vtws, VCMD_TRANSFER_GET, res->res_handle,
+ level, res->stride, 0, &box, size);
+ virgl_vtest_recv_transfer_get_data(vtws, map + offset, size, valid_stride, &box, res->format);
+ vtws->sws->displaytarget_unmap(vtws->sws, res->dt);
+
+ vtws->sws->displaytarget_display(vtws->sws, res->dt, winsys_drawable_handle, sub_box);
+}
+
+static void
+virgl_vtest_winsys_destroy(struct virgl_winsys *vws)
+{
+ struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
+
+ virgl_cache_flush(vtws);
+
+ pipe_mutex_destroy(vtws->mutex);
+ FREE(vtws);
+}
+
+struct virgl_winsys *
+virgl_vtest_winsys_wrap(struct sw_winsys *sws)
+{
+ struct virgl_vtest_winsys *vtws;
+
+ vtws = CALLOC_STRUCT(virgl_vtest_winsys);
+ if (!vtws)
+ return NULL;
+
+ virgl_vtest_connect(vtws);
+ vtws->sws = sws;
+
+ vtws->usecs = 1000000;
+ LIST_INITHEAD(&vtws->delayed);
+ pipe_mutex_init(vtws->mutex);
+
+ vtws->base.destroy = virgl_vtest_winsys_destroy;
+
+ vtws->base.transfer_put = virgl_vtest_transfer_put;
+ vtws->base.transfer_get = virgl_vtest_transfer_get;
+
+ vtws->base.resource_create = virgl_vtest_winsys_resource_cache_create;
+ vtws->base.resource_unref = virgl_vtest_winsys_resource_unref;
+ vtws->base.resource_map = virgl_vtest_resource_map;
+ vtws->base.resource_wait = virgl_vtest_resource_wait;
+ vtws->base.cmd_buf_create = virgl_vtest_cmd_buf_create;
+ vtws->base.cmd_buf_destroy = virgl_vtest_cmd_buf_destroy;
+ vtws->base.submit_cmd = virgl_vtest_winsys_submit_cmd;
+
+ vtws->base.emit_res = virgl_vtest_emit_res;
+ vtws->base.res_is_referenced = virgl_vtest_res_is_ref;
+ vtws->base.get_caps = virgl_vtest_get_caps;
+
+ vtws->base.cs_create_fence = virgl_cs_create_fence;
+ vtws->base.fence_wait = virgl_fence_wait;
+ vtws->base.fence_reference = virgl_fence_reference;
+
+ vtws->base.flush_frontbuffer = virgl_vtest_flush_frontbuffer;
+
+ return &vtws->base;
+}
diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h
new file mode 100644
index 00000000000..3967befa1a9
--- /dev/null
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_DRM_WINSYS_H
+#define VIRGL_DRM_WINSYS_H
+
+#include
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "state_tracker/sw_winsys.h"
+#include "../drm/virgl_hw.h"
+#include "virgl/virgl_winsys.h"
+#include "util/list.h"
+#include "os/os_thread.h"
+
+#include "vtest_protocol.h"
+struct virgl_vtest_winsys {
+ struct virgl_winsys base;
+
+ struct sw_winsys *sws;
+
+ /* fd to remote renderer */
+ int sock_fd;
+
+ struct list_head delayed;
+ int num_delayed;
+ unsigned usecs;
+ pipe_mutex mutex;
+};
+
+struct virgl_hw_res {
+ struct pipe_reference reference;
+ uint32_t res_handle;
+ int num_cs_references;
+
+ void *ptr;
+ int size;
+
+ uint32_t format;
+ uint32_t stride;
+ uint32_t width;
+ uint32_t height;
+
+ struct sw_displaytarget *dt;
+ void *mapped;
+
+ struct list_head head;
+ uint32_t bind;
+ boolean cacheable;
+ int64_t start, end;
+
+};
+
+struct virgl_vtest_cmd_buf {
+ struct virgl_cmd_buf base;
+ uint32_t buf[VIRGL_MAX_CMDBUF_DWORDS];
+ unsigned nres;
+ unsigned cres;
+ struct virgl_winsys *ws;
+ struct virgl_hw_res **res_bo;
+
+ char is_handle_added[512];
+ unsigned reloc_indices_hashlist[512];
+};
+
+static inline struct virgl_vtest_winsys *
+virgl_vtest_winsys(struct virgl_winsys *iws)
+{
+ return (struct virgl_vtest_winsys *)iws;
+}
+
+int virgl_vtest_connect(struct virgl_vtest_winsys *vws);
+int virgl_vtest_send_get_caps(struct virgl_vtest_winsys *vws,
+ struct virgl_drm_caps *caps);
+
+int virgl_vtest_send_resource_create(struct virgl_vtest_winsys *vws,
+ uint32_t handle,
+ enum pipe_texture_target target,
+ uint32_t format,
+ uint32_t bind,
+ uint32_t width,
+ uint32_t height,
+ uint32_t depth,
+ uint32_t array_size,
+ uint32_t last_level,
+ uint32_t nr_samples);
+
+int virgl_vtest_send_resource_unref(struct virgl_vtest_winsys *vws,
+ uint32_t handle);
+int virgl_vtest_submit_cmd(struct virgl_vtest_winsys *vtws,
+ struct virgl_vtest_cmd_buf *cbuf);
+
+int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws,
+ uint32_t vcmd,
+ uint32_t handle,
+ uint32_t level, uint32_t stride,
+ uint32_t layer_stride,
+ const struct pipe_box *box,
+ uint32_t data_size);
+
+int virgl_vtest_send_transfer_put_data(struct virgl_vtest_winsys *vws,
+ void *data,
+ uint32_t data_size);
+int virgl_vtest_recv_transfer_get_data(struct virgl_vtest_winsys *vws,
+ void *data,
+ uint32_t data_size,
+ uint32_t stride,
+ const struct pipe_box *box, uint32_t format);
+
+int virgl_vtest_busy_wait(struct virgl_vtest_winsys *vws, int handle,
+ int flags);
+#endif
diff --git a/src/gallium/winsys/virgl/vtest/vtest_protocol.h b/src/gallium/winsys/virgl/vtest/vtest_protocol.h
new file mode 100644
index 00000000000..86d197f006c
--- /dev/null
+++ b/src/gallium/winsys/virgl/vtest/vtest_protocol.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VTEST_PROTOCOL
+#define VTEST_PROTOCOL
+
+#define VTEST_DEFAULT_SOCKET_NAME "/tmp/.virgl_test"
+
+/* 32-bit length field */
+/* 32-bit cmd field */
+#define VTEST_HDR_SIZE 2
+#define VTEST_CMD_LEN 0 /* length of data */
+#define VTEST_CMD_ID 1
+#define VTEST_CMD_DATA_START 2
+
+/* vtest cmds */
+#define VCMD_GET_CAPS 1
+
+#define VCMD_RESOURCE_CREATE 2
+#define VCMD_RESOURCE_UNREF 3
+
+#define VCMD_TRANSFER_GET 4
+#define VCMD_TRANSFER_PUT 5
+
+#define VCMD_SUBMIT_CMD 6
+
+#define VCMD_RESOURCE_BUSY_WAIT 7
+
+/* pass the process cmd line for debugging */
+#define VCMD_CREATE_RENDERER 8
+/* get caps */
+/* 0 length cmd */
+/* resp VCMD_GET_CAPS + caps */
+
+#define VCMD_RES_CREATE_SIZE 10
+#define VCMD_RES_CREATE_RES_HANDLE 0
+#define VCMD_RES_CREATE_TARGET 1
+#define VCMD_RES_CREATE_FORMAT 2
+#define VCMD_RES_CREATE_BIND 3
+#define VCMD_RES_CREATE_WIDTH 4
+#define VCMD_RES_CREATE_HEIGHT 5
+#define VCMD_RES_CREATE_DEPTH 6
+#define VCMD_RES_CREATE_ARRAY_SIZE 7
+#define VCMD_RES_CREATE_LAST_LEVEL 8
+#define VCMD_RES_CREATE_NR_SAMPLES 9
+
+#define VCMD_RES_UNREF_SIZE 1
+#define VCMD_RES_UNREF_RES_HANDLE 0
+
+#define VCMD_TRANSFER_HDR_SIZE 11
+#define VCMD_TRANSFER_RES_HANDLE 0
+#define VCMD_TRANSFER_LEVEL 1
+#define VCMD_TRANSFER_STRIDE 2
+#define VCMD_TRANSFER_LAYER_STRIDE 3
+#define VCMD_TRANSFER_X 4
+#define VCMD_TRANSFER_Y 5
+#define VCMD_TRANSFER_Z 6
+#define VCMD_TRANSFER_WIDTH 7
+#define VCMD_TRANSFER_HEIGHT 8
+#define VCMD_TRANSFER_DEPTH 9
+#define VCMD_TRANSFER_DATA_SIZE 10
+
+#define VCMD_BUSY_WAIT_FLAG_WAIT 1
+
+#define VCMD_BUSY_WAIT_SIZE 2
+#define VCMD_BUSY_WAIT_HANDLE 0
+#define VCMD_BUSY_WAIT_FLAGS 1
+
+#endif
From 5145243018e4456994e73bffdb0296b8fedd6fbe Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Fri, 23 Oct 2015 12:11:43 +1000
Subject: [PATCH 037/266] docs: update relnotes to mention virgl driver.
---
docs/relnotes/11.1.0.html | 1 +
1 file changed, 1 insertion(+)
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 138bb9eb54e..3e1cd41dc57 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -55,6 +55,7 @@ Note: some of the new features are only available with certain drivers.
GL_ARB_texture_view on radeonsi
EGL_KHR_create_context on softpipe, llvmpipe
EGL_KHR_gl_colorspace on softpipe, llvmpipe
+new virgl gallium driver for qemu virtio-gpu
Bug fixes
From 3994ef5f1b7386c17dff532cb5d04a7823520c7a Mon Sep 17 00:00:00 2001
From: Timothy Arceri
Date: Thu, 22 Oct 2015 16:18:44 +1100
Subject: [PATCH 038/266] glsl: remove excess location qualifier validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Location has never been able to be a negative value because it has
always been validated in the parser.
Also the linker doesn't check for negatives like the comment claims.
Reviewed-by: Tapani Pälli
---
src/glsl/ast_to_hir.cpp | 70 +++++++++++++----------------------------
1 file changed, 22 insertions(+), 48 deletions(-)
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 8549d55c4e3..030653079d9 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2422,21 +2422,6 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
const struct gl_context *const ctx = state->ctx;
unsigned max_loc = qual->location + var->type->uniform_locations() - 1;
- /* ARB_explicit_uniform_location specification states:
- *
- * "The explicitly defined locations and the generated locations
- * must be in the range of 0 to MAX_UNIFORM_LOCATIONS minus one."
- *
- * "Valid locations for default-block uniform variable locations
- * are in the range of 0 to the implementation-defined maximum
- * number of uniform locations."
- */
- if (qual->location < 0) {
- _mesa_glsl_error(loc, state,
- "explicit location < 0 for uniform %s", var->name);
- return;
- }
-
if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) {
_mesa_glsl_error(loc, state, "location(s) consumed by uniform %s "
">= MAX_UNIFORM_LOCATIONS (%u)", var->name,
@@ -2527,41 +2512,30 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
} else {
var->data.explicit_location = true;
- /* This bit of silliness is needed because invalid explicit locations
- * are supposed to be flagged during linking. Small negative values
- * biased by VERT_ATTRIB_GENERIC0 or FRAG_RESULT_DATA0 could alias
- * built-in values (e.g., -16+VERT_ATTRIB_GENERIC0 = VERT_ATTRIB_POS).
- * The linker needs to be able to differentiate these cases. This
- * ensures that negative values stay negative.
- */
- if (qual->location >= 0) {
- switch (state->stage) {
- case MESA_SHADER_VERTEX:
- var->data.location = (var->data.mode == ir_var_shader_in)
- ? (qual->location + VERT_ATTRIB_GENERIC0)
- : (qual->location + VARYING_SLOT_VAR0);
- break;
+ switch (state->stage) {
+ case MESA_SHADER_VERTEX:
+ var->data.location = (var->data.mode == ir_var_shader_in)
+ ? (qual->location + VERT_ATTRIB_GENERIC0)
+ : (qual->location + VARYING_SLOT_VAR0);
+ break;
- case MESA_SHADER_TESS_CTRL:
- case MESA_SHADER_TESS_EVAL:
- case MESA_SHADER_GEOMETRY:
- if (var->data.patch)
- var->data.location = qual->location + VARYING_SLOT_PATCH0;
- else
- var->data.location = qual->location + VARYING_SLOT_VAR0;
- break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_GEOMETRY:
+ if (var->data.patch)
+ var->data.location = qual->location + VARYING_SLOT_PATCH0;
+ else
+ var->data.location = qual->location + VARYING_SLOT_VAR0;
+ break;
- case MESA_SHADER_FRAGMENT:
- var->data.location = (var->data.mode == ir_var_shader_out)
- ? (qual->location + FRAG_RESULT_DATA0)
- : (qual->location + VARYING_SLOT_VAR0);
- break;
- case MESA_SHADER_COMPUTE:
- assert(!"Unexpected shader type");
- break;
- }
- } else {
- var->data.location = qual->location;
+ case MESA_SHADER_FRAGMENT:
+ var->data.location = (var->data.mode == ir_var_shader_out)
+ ? (qual->location + FRAG_RESULT_DATA0)
+ : (qual->location + VARYING_SLOT_VAR0);
+ break;
+ case MESA_SHADER_COMPUTE:
+ assert(!"Unexpected shader type");
+ break;
}
if (qual->flags.q.explicit_index) {
From 4a7d18296a9e80d2c5458bf77f8eb88913433c90 Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Fri, 23 Oct 2015 00:24:26 +0800
Subject: [PATCH 039/266] ilo: fix scratch space setup in core
Move scratch_size out of ilo_state_shader_kernel_info and
ilo_state_compute_interface_info. A scratch space is shared by all
kernels/interfaces. Update builder to emit relocs for scratch bos.
---
.../drivers/ilo/core/ilo_builder_3d_bottom.h | 33 +++++--
.../drivers/ilo/core/ilo_builder_3d_top.h | 99 +++++++++++++++----
.../drivers/ilo/core/ilo_state_compute.c | 95 +++++++++++++-----
.../drivers/ilo/core/ilo_state_compute.h | 12 ++-
.../drivers/ilo/core/ilo_state_shader.c | 74 +++++++++-----
.../drivers/ilo/core/ilo_state_shader.h | 43 +++++++-
.../drivers/ilo/core/ilo_state_shader_ps.c | 52 +++++-----
src/gallium/drivers/ilo/ilo_render_gen6.c | 18 ++--
src/gallium/drivers/ilo/ilo_render_gen7.c | 28 +++---
src/gallium/drivers/ilo/ilo_render_gen8.c | 2 +-
src/gallium/drivers/ilo/ilo_shader.c | 4 +-
11 files changed, 327 insertions(+), 133 deletions(-)
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
index 5efe9da2d22..2e9470e66e9 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
@@ -202,14 +202,16 @@ static inline void
gen6_3DSTATE_WM(struct ilo_builder *builder,
const struct ilo_state_raster *rs,
const struct ilo_state_ps *ps,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 6);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -221,6 +223,11 @@ gen6_3DSTATE_WM(struct ilo_builder *builder,
dw[6] = rs->wm[2] | ps->ps[4];
dw[7] = 0; /* kernel 1 */
dw[8] = 0; /* kernel 2 */
+
+ if (ilo_state_ps_get_scratch_size(ps)) {
+ ilo_builder_batch_reloc(builder, pos + 2, scratch_bo,
+ ps->ps[0], 0);
+ }
}
static inline void
@@ -329,14 +336,16 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder)
static inline void
gen7_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_state_ps *ps,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 8;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -347,19 +356,26 @@ gen7_3DSTATE_PS(struct ilo_builder *builder,
dw[5] = ps->ps[5];
dw[6] = 0; /* kernel 1 */
dw[7] = 0; /* kernel 2 */
+
+ if (ilo_state_ps_get_scratch_size(ps)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ ps->ps[3], 0);
+ }
}
static inline void
gen8_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_state_ps *ps,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 12;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -374,6 +390,11 @@ gen8_3DSTATE_PS(struct ilo_builder *builder,
dw[9] = 0;
dw[10] = 0; /* kernel 2 */
dw[11] = 0;
+
+ if (ilo_state_ps_get_scratch_size(ps)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ ps->ps[1], 0);
+ }
}
static inline void
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
index 6e94fb25f1f..3a448719c15 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
@@ -477,14 +477,16 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
static inline void
gen6_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_state_vs *vs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 6;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -493,19 +495,26 @@ gen6_3DSTATE_VS(struct ilo_builder *builder,
dw[3] = vs->vs[1];
dw[4] = vs->vs[2];
dw[5] = vs->vs[3];
+
+ if (ilo_state_vs_get_scratch_size(vs)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ vs->vs[1], 0);
+ }
}
static inline void
gen8_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_state_vs *vs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -517,19 +526,26 @@ gen8_3DSTATE_VS(struct ilo_builder *builder,
dw[6] = vs->vs[2];
dw[7] = vs->vs[3];
dw[8] = vs->vs[4];
+
+ if (ilo_state_vs_get_scratch_size(vs)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ vs->vs[1], 0);
+ }
}
static inline void
gen7_3DSTATE_HS(struct ilo_builder *builder,
const struct ilo_state_hs *hs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
/* see hs_set_gen7_3DSTATE_HS() */
@@ -539,19 +555,26 @@ gen7_3DSTATE_HS(struct ilo_builder *builder,
dw[4] = hs->hs[2];
dw[5] = hs->hs[3];
dw[6] = 0;
+
+ if (ilo_state_hs_get_scratch_size(hs)) {
+ ilo_builder_batch_reloc(builder, pos + 4, scratch_bo,
+ hs->hs[2], 0);
+ }
}
static inline void
gen8_3DSTATE_HS(struct ilo_builder *builder,
const struct ilo_state_hs *hs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
/* see hs_set_gen7_3DSTATE_HS() */
@@ -563,6 +586,11 @@ gen8_3DSTATE_HS(struct ilo_builder *builder,
dw[6] = 0;
dw[7] = hs->hs[3];
dw[8] = 0;
+
+ if (ilo_state_hs_get_scratch_size(hs)) {
+ ilo_builder_batch_reloc64(builder, pos + 5, scratch_bo,
+ hs->hs[2], 0);
+ }
}
static inline void
@@ -586,14 +614,16 @@ gen7_3DSTATE_TE(struct ilo_builder *builder,
static inline void
gen7_3DSTATE_DS(struct ilo_builder *builder,
const struct ilo_state_ds *ds,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 6;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
/* see ds_set_gen7_3DSTATE_DS() */
@@ -602,19 +632,26 @@ gen7_3DSTATE_DS(struct ilo_builder *builder,
dw[3] = ds->ds[1];
dw[4] = ds->ds[2];
dw[5] = ds->ds[3];
+
+ if (ilo_state_ds_get_scratch_size(ds)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ ds->ds[1], 0);
+ }
}
static inline void
gen8_3DSTATE_DS(struct ilo_builder *builder,
const struct ilo_state_ds *ds,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
/* see ds_set_gen7_3DSTATE_DS() */
@@ -626,19 +663,26 @@ gen8_3DSTATE_DS(struct ilo_builder *builder,
dw[6] = ds->ds[2];
dw[7] = ds->ds[3];
dw[8] = ds->ds[4];
+
+ if (ilo_state_ds_get_scratch_size(ds)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ ds->ds[1], 0);
+ }
}
static inline void
gen6_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 6);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -648,6 +692,11 @@ gen6_3DSTATE_GS(struct ilo_builder *builder,
dw[4] = gs->gs[2];
dw[5] = gs->gs[3];
dw[6] = gs->gs[4];
+
+ if (ilo_state_gs_get_scratch_size(gs)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ gs->gs[1], 0);
+ }
}
static inline void
@@ -677,14 +726,16 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
static inline void
gen7_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -694,19 +745,26 @@ gen7_3DSTATE_GS(struct ilo_builder *builder,
dw[4] = gs->gs[2];
dw[5] = gs->gs[3];
dw[6] = 0;
+
+ if (ilo_state_gs_get_scratch_size(gs)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ gs->gs[1], 0);
+ }
}
static inline void
gen8_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 10;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -719,6 +777,11 @@ gen8_3DSTATE_GS(struct ilo_builder *builder,
dw[7] = gs->gs[3];
dw[8] = 0;
dw[9] = gs->gs[4];
+
+ if (ilo_state_gs_get_scratch_size(gs)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ gs->gs[1], 0);
+ }
}
static inline void
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.c b/src/gallium/drivers/ilo/core/ilo_state_compute.c
index a5fe5e1a6b0..ba3ff9001e1 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_compute.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.c
@@ -158,7 +158,8 @@ compute_interface_get_gen6_read_end(const struct ilo_dev *dev,
*/
assert(per_thread_read <= 63);
- /* From the Haswell PRM, volume 2d, page 199:
+ /*
+ * From the Haswell PRM, volume 2d, page 199:
*
* "(Cross-Thread Constant Data Read Length) [0,127]"
*/
@@ -210,38 +211,68 @@ compute_validate_gen6(const struct ilo_dev *dev,
return true;
}
-static uint8_t
-compute_get_gen6_scratch_space(const struct ilo_dev *dev,
- const struct ilo_state_compute_info *info)
+static uint32_t
+compute_get_gen6_per_thread_scratch_size(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info,
+ uint8_t *per_thread_space)
{
- uint32_t scratch_size = 0;
- uint8_t i;
+ ILO_DEV_ASSERT(dev, 6, 7);
- ILO_DEV_ASSERT(dev, 6, 8);
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 2, page 30:
+ *
+ * "(Per Thread Scratch Space)
+ * Range = [0,11] indicating [1k bytes, 12k bytes] [DevSNB]"
+ */
+ assert(info->per_thread_scratch_size <= 12 * 1024);
- for (i = 0; i < info->interface_count; i++) {
- if (scratch_size < info->interfaces[i].scratch_size)
- scratch_size = info->interfaces[i].scratch_size;
+ if (!info->per_thread_scratch_size) {
+ *per_thread_space = 0;
+ return 0;
}
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- assert(scratch_size <= 2 * 1024 * 1024);
+ *per_thread_space = (info->per_thread_scratch_size > 1024) ?
+ (info->per_thread_scratch_size - 1) / 1024 : 0;
- /* next power of two, starting from 1KB */
- return (scratch_size > 1024) ?
- (util_last_bit(scratch_size - 1) - 10): 0;
- } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
- assert(scratch_size <= 2 * 1024 * 1024);
+ return 1024 * (1 + *per_thread_space);
+}
- /* next power of two, starting from 2KB */
- return (scratch_size > 2048) ?
- (util_last_bit(scratch_size - 1) - 11): 0;
- } else {
- assert(scratch_size <= 12 * 1024);
+static uint32_t
+compute_get_gen75_per_thread_scratch_size(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info,
+ uint8_t *per_thread_space)
+{
+ ILO_DEV_ASSERT(dev, 7.5, 8);
- return (scratch_size > 1024) ?
- (scratch_size - 1) / 1024 : 0;
+ /*
+ * From the Haswell PRM, volume 2b, page 407:
+ *
+ * "(Per Thread Scratch Space)
+ * [0,10] Indicating [2k bytes, 2 Mbytes]"
+ *
+ * "Note: The scratch space should be declared as 2x the desired
+ * scratch space. The stack will start at the half-way point instead
+ * of the end. The upper half of scratch space will not be accessed
+ * and so does not have to be allocated in memory."
+ *
+ * From the Broadwell PRM, volume 2a, page 450:
+ *
+ * "(Per Thread Scratch Space)
+ * [0,11] indicating [1k bytes, 2 Mbytes]"
+ */
+ assert(info->per_thread_scratch_size <=
+ ((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 2 : 1) * 1024 * 1024);
+
+ if (!info->per_thread_scratch_size) {
+ *per_thread_space = 0;
+ return 0;
}
+
+ /* next power of two, starting from 1KB */
+ *per_thread_space = (info->per_thread_scratch_size > 1024) ?
+ (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
+
+ return 1 << (10 + *per_thread_space);
}
static bool
@@ -250,7 +281,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
const struct ilo_state_compute_info *info)
{
struct compute_urb_configuration urb;
- uint8_t scratch_space;
+ uint32_t per_thread_size;
+ uint8_t per_thread_space;
uint32_t dw1, dw2, dw4;
@@ -260,9 +292,16 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
!compute_validate_gen6(dev, info, &urb))
return false;
- scratch_space = compute_get_gen6_scratch_space(dev, info);
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ per_thread_size = compute_get_gen75_per_thread_scratch_size(dev,
+ info, &per_thread_space);
+ } else {
+ per_thread_size = compute_get_gen6_per_thread_scratch_size(dev,
+ info, &per_thread_space);
+ }
+
+ dw1 = per_thread_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
- dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
@@ -281,6 +320,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
compute->vfe[1] = dw2;
compute->vfe[2] = dw4;
+ compute->scratch_size = per_thread_size * dev->thread_count;
+
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.h b/src/gallium/drivers/ilo/core/ilo_state_compute.h
index 346f7b617f4..bd56bba4369 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_compute.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.h
@@ -45,8 +45,6 @@ struct ilo_state_compute_interface_info {
/* usually 0 unless there are multiple interfaces */
uint32_t kernel_offset;
- uint32_t scratch_size;
-
uint8_t sampler_count;
uint8_t surface_count;
@@ -65,6 +63,8 @@ struct ilo_state_compute_info {
const struct ilo_state_compute_interface_info *interfaces;
uint8_t interface_count;
+ uint32_t per_thread_scratch_size;
+
uint32_t cv_urb_alloc_size;
uint32_t curbe_alloc_size;
};
@@ -74,6 +74,8 @@ struct ilo_state_compute {
uint32_t (*idrt)[6];
uint8_t idrt_count;
+
+ uint32_t scratch_size;
};
static inline size_t
@@ -89,4 +91,10 @@ ilo_state_compute_init(struct ilo_state_compute *compute,
const struct ilo_dev *dev,
const struct ilo_state_compute_info *info);
+static inline uint32_t
+ilo_state_compute_get_scratch_size(const struct ilo_state_compute *compute)
+{
+ return compute->scratch_size;
+}
+
#endif /* ILO_STATE_COMPUTE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.c b/src/gallium/drivers/ilo/core/ilo_state_shader.c
index 2e06b07a8e3..aec4fd6d8a6 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader.c
@@ -37,7 +37,9 @@ enum vertex_stage {
struct vertex_ff {
uint8_t grf_start;
- uint8_t scratch_space;
+
+ uint8_t per_thread_scratch_space;
+ uint32_t per_thread_scratch_size;
uint8_t sampler_count;
uint8_t surface_count;
@@ -59,13 +61,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev,
* others.
*/
const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 134:
- *
- * "(Per-Thread Scratch Space)
- * Range [0,11] indicating [1K Bytes, 2M Bytes]"
- */
- const uint32_t max_scratch_size = 2 * 1024 * 1024;
ILO_DEV_ASSERT(dev, 6, 8);
@@ -73,7 +68,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev,
assert(!kernel->offset);
assert(kernel->grf_start < max_grf_start);
- assert(kernel->scratch_size <= max_scratch_size);
return true;
}
@@ -112,18 +106,33 @@ vertex_get_gen6_ff(const struct ilo_dev *dev,
const struct ilo_state_shader_kernel_info *kernel,
const struct ilo_state_shader_resource_info *resource,
const struct ilo_state_shader_urb_info *urb,
+ uint32_t per_thread_scratch_size,
struct vertex_ff *ff)
{
ILO_DEV_ASSERT(dev, 6, 8);
+ memset(ff, 0, sizeof(*ff));
+
if (!vertex_validate_gen6_kernel(dev, stage, kernel) ||
!vertex_validate_gen6_urb(dev, stage, urb))
return false;
ff->grf_start = kernel->grf_start;
- /* next power of two, starting from 1KB */
- ff->scratch_space = (kernel->scratch_size > 1024) ?
- (util_last_bit(kernel->scratch_size - 1) - 10): 0;
+
+ if (per_thread_scratch_size) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 134:
+ *
+ * "(Per-Thread Scratch Space)
+ * Range [0,11] indicating [1K Bytes, 2M Bytes]"
+ */
+ assert(per_thread_scratch_size <= 2 * 1024 * 1024);
+
+ /* next power of two, starting from 1KB */
+ ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ?
+ (util_last_bit(per_thread_scratch_size - 1) - 10) : 0;
+ ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
+ }
ff->sampler_count = (resource->sampler_count <= 12) ?
(resource->sampler_count + 3) / 4 : 4;
@@ -192,8 +201,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
ILO_DEV_ASSERT(dev, 6, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel,
- &info->resource, &info->urb, &ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = vs_get_gen6_thread_count(dev, info);
@@ -207,7 +216,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
@@ -234,6 +244,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
if (ilo_dev_gen(dev) >= ILO_GEN(8))
vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
+ vs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
@@ -273,8 +285,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
ILO_DEV_ASSERT(dev, 7, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel,
- &info->resource, &info->urb, &ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = hs_get_gen7_thread_count(dev, info);
@@ -296,7 +308,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
if (info->stats_enable)
dw2 |= GEN7_HS_DW2_STATISTICS;
- dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw4 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES |
ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT |
@@ -312,6 +325,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
hs->hs[2] = dw4;
hs->hs[3] = dw5;
+ hs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
@@ -375,8 +390,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
ILO_DEV_ASSERT(dev, 7, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel,
- &info->resource, &info->urb, &ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = ds_get_gen7_thread_count(dev, info);
@@ -387,7 +402,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT |
ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT |
@@ -414,6 +430,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
if (ilo_dev_gen(dev) >= ILO_GEN(8))
ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT;
+ ds->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
@@ -427,8 +445,8 @@ gs_get_gen6_ff(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel,
- &info->resource, &info->urb, ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, ff))
return false;
/*
@@ -512,7 +530,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
@@ -552,6 +571,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
gs->gs[3] = dw5;
gs->gs[4] = dw6;
+ gs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
@@ -590,7 +611,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
@@ -620,6 +642,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
if (ilo_dev_gen(dev) >= ILO_GEN(8))
gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT;
+ gs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.h b/src/gallium/drivers/ilo/core/ilo_state_shader.h
index 44690c5b0bb..35651090d66 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader.h
@@ -42,8 +42,6 @@ struct ilo_state_shader_kernel_info {
uint8_t grf_start;
uint8_t pcb_attr_count;
-
- uint32_t scratch_size;
};
/**
@@ -77,6 +75,7 @@ struct ilo_state_vs_info {
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@@ -86,6 +85,7 @@ struct ilo_state_hs_info {
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@@ -95,6 +95,7 @@ struct ilo_state_ds_info {
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@@ -119,6 +120,7 @@ struct ilo_state_gs_info {
struct ilo_state_gs_sol_info sol;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@@ -158,6 +160,8 @@ struct ilo_state_ps_info {
struct ilo_state_ps_io_info io;
struct ilo_state_ps_params_info params;
+ uint32_t per_thread_scratch_size;
+
/* bitmask of GEN6_PS_DISPATCH_x */
uint8_t valid_kernels;
bool per_sample_dispatch;
@@ -173,23 +177,28 @@ struct ilo_state_ps_info {
struct ilo_state_vs {
uint32_t vs[5];
+ uint32_t scratch_size;
};
struct ilo_state_hs {
uint32_t hs[4];
+ uint32_t scratch_size;
};
struct ilo_state_ds {
uint32_t te[3];
uint32_t ds[5];
+ uint32_t scratch_size;
};
struct ilo_state_gs {
uint32_t gs[5];
+ uint32_t scratch_size;
};
struct ilo_state_ps {
uint32_t ps[8];
+ uint32_t scratch_size;
struct ilo_state_ps_dispatch_conds {
bool ps_valid;
@@ -211,6 +220,12 @@ bool
ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_vs_get_scratch_size(const struct ilo_state_vs *vs)
+{
+ return vs->scratch_size;
+}
+
bool
ilo_state_hs_init(struct ilo_state_hs *hs,
const struct ilo_dev *dev,
@@ -221,6 +236,12 @@ ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_hs_get_scratch_size(const struct ilo_state_hs *hs)
+{
+ return hs->scratch_size;
+}
+
bool
ilo_state_ds_init(struct ilo_state_ds *ds,
const struct ilo_dev *dev,
@@ -230,6 +251,12 @@ bool
ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_ds_get_scratch_size(const struct ilo_state_ds *ds)
+{
+ return ds->scratch_size;
+}
+
bool
ilo_state_gs_init(struct ilo_state_gs *gs,
const struct ilo_dev *dev,
@@ -239,6 +266,12 @@ bool
ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_gs_get_scratch_size(const struct ilo_state_gs *gs)
+{
+ return gs->scratch_size;
+}
+
bool
ilo_state_ps_init(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
@@ -253,4 +286,10 @@ ilo_state_ps_set_params(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_params_info *params);
+static inline uint32_t
+ilo_state_ps_get_scratch_size(const struct ilo_state_ps *ps)
+{
+ return ps->scratch_size;
+}
+
#endif /* ILO_STATE_SHADER_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
index ceeb68a460e..5c3ca1ebe37 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
@@ -34,7 +34,8 @@ struct pixel_ff {
uint32_t kernel_offsets[3];
uint8_t grf_starts[3];
bool pcb_enable;
- uint8_t scratch_space;
+ uint8_t per_thread_scratch_space;
+ uint32_t per_thread_scratch_size;
uint8_t sampler_count;
uint8_t surface_count;
@@ -56,13 +57,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev,
{
/* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
const uint8_t max_grf_start = 128;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 271:
- *
- * "(Per-Thread Scratch Space)
- * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
- */
- const uint32_t max_scratch_size = 2 * 1024 * 1024;
ILO_DEV_ASSERT(dev, 6, 8);
@@ -70,7 +64,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev,
assert(kernel->offset % 64 == 0);
assert(kernel->grf_start < max_grf_start);
- assert(kernel->scratch_size <= max_scratch_size);
return true;
}
@@ -325,7 +318,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
- uint32_t scratch_size;
ILO_DEV_ASSERT(dev, 6, 8);
@@ -363,21 +355,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
kernel_32->pcb_attr_count));
- scratch_size = 0;
- if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
- scratch_size < kernel_8->scratch_size)
- scratch_size = kernel_8->scratch_size;
- if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
- scratch_size < kernel_16->scratch_size)
- scratch_size = kernel_16->scratch_size;
- if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
- scratch_size < kernel_32->scratch_size)
- scratch_size = kernel_32->scratch_size;
-
- /* next power of two, starting from 1KB */
- ff->scratch_space = (scratch_size > 1024) ?
- (util_last_bit(scratch_size - 1) - 10): 0;
-
/* GPU hangs on Haswell if none of the dispatch mode bits is set */
if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
@@ -401,6 +378,21 @@ ps_get_gen6_ff(const struct ilo_dev *dev,
if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
return false;
+ if (info->per_thread_scratch_size) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 271:
+ *
+ * "(Per-Thread Scratch Space)
+ * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
+ */
+ assert(info->per_thread_scratch_size <= 2 * 1024 * 1024);
+
+ /* next power of two, starting from 1KB */
+ ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ?
+ (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
+ ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
+ }
+
ff->sampler_count = (resource->sampler_count <= 12) ?
(resource->sampler_count + 3) / 4 : 4;
ff->surface_count = resource->surface_count;
@@ -441,7 +433,8 @@ ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps,
if (false)
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
- dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff->per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
@@ -539,7 +532,8 @@ ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps,
if (false)
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
- dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff->per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
@@ -603,7 +597,8 @@ ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
if (false)
dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
- dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw4 = ff->per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
@@ -705,6 +700,7 @@ ilo_state_ps_init(struct ilo_state_ps *ps,
ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
}
+ ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count;
/* save conditions */
ps->conds = ff.conds;
diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c
index c1f759f3043..c81514f9b4c 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen6.c
@@ -476,9 +476,9 @@ gen6_draw_vs(struct ilo_render *r,
if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))
- gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset);
+ gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset, NULL);
else
- gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+ gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
}
}
@@ -501,7 +501,7 @@ gen6_draw_gs(struct ilo_render *r,
cso = ilo_shader_get_kernel_cso(vec->gs);
kernel_offset = ilo_shader_get_kernel_offset(vec->gs);
- gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset);
+ gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset, NULL);
} else if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
const int verts_per_prim =
@@ -524,9 +524,9 @@ gen6_draw_gs(struct ilo_render *r,
kernel_offset = ilo_shader_get_kernel_offset(vec->vs) +
ilo_shader_get_kernel_param(vec->vs, param);
- gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset);
+ gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset, NULL);
} else {
- gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0);
+ gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0, NULL);
}
}
}
@@ -672,7 +672,7 @@ gen6_draw_wm(struct ilo_render *r,
gen6_wa_pre_3dstate_wm_max_threads(r);
gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs,
- &cso->ps, kernel_offset);
+ &cso->ps, kernel_offset, NULL);
}
}
@@ -817,10 +817,10 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r,
gen6_wa_post_3dstate_constant_vs(r);
gen6_wa_pre_3dstate_vs_toggle(r);
- gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
+ gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
- gen6_3DSTATE_GS(r->builder, &blitter->gs, 0);
+ gen6_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe);
@@ -833,7 +833,7 @@ gen6_rectlist_wm(struct ilo_render *r,
gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen6_wa_pre_3dstate_wm_max_threads(r);
- gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0);
+ gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0, NULL);
}
static void
diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c
index 6623a8bcb43..97d9d058fdf 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen7.c
@@ -319,9 +319,9 @@ gen7_draw_vs(struct ilo_render *r,
const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+ gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
else
- gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+ gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
}
}
@@ -338,9 +338,9 @@ gen7_draw_hs(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_HS(r->builder, hs, kernel_offset);
+ gen8_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
else
- gen7_3DSTATE_HS(r->builder, hs, kernel_offset);
+ gen7_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_HS */
@@ -373,9 +373,9 @@ gen7_draw_ds(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_DS(r->builder, ds, kernel_offset);
+ gen8_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
else
- gen7_3DSTATE_DS(r->builder, ds, kernel_offset);
+ gen7_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_DS */
@@ -397,9 +397,9 @@ gen7_draw_gs(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_GS(r->builder, gs, kernel_offset);
+ gen8_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
else
- gen7_3DSTATE_GS(r->builder, gs, kernel_offset);
+ gen7_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_GS */
@@ -534,7 +534,7 @@ gen7_draw_wm(struct ilo_render *r,
if (r->hw_ctx_changed)
gen7_wa_pre_3dstate_ps_max_threads(r);
- gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
+ gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, NULL);
}
/* 3DSTATE_SCISSOR_STATE_POINTERS */
@@ -678,18 +678,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
- gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
+ gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
- gen7_3DSTATE_HS(r->builder, &blitter->hs, 0);
+ gen7_3DSTATE_HS(r->builder, &blitter->hs, 0, NULL);
gen7_3DSTATE_TE(r->builder, &blitter->ds);
gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
- gen7_3DSTATE_DS(r->builder, &blitter->ds, 0);
+ gen7_3DSTATE_DS(r->builder, &blitter->ds, 0, NULL);
gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
- gen7_3DSTATE_GS(r->builder, &blitter->gs, 0);
+ gen7_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol);
@@ -711,7 +711,7 @@ gen7_rectlist_wm(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen7_wa_pre_3dstate_ps_max_threads(r);
- gen7_3DSTATE_PS(r->builder, &blitter->ps, 0);
+ gen7_3DSTATE_PS(r->builder, &blitter->ps, 0, NULL);
}
static void
diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c
index 65494b4058a..1f750a2bfed 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen8.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen8.c
@@ -125,7 +125,7 @@ gen8_draw_wm(struct ilo_render *r,
/* 3DSTATE_PS */
if (DIRTY(FS) || r->instruction_bo_changed)
- gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
+ gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, NULL);
/* 3DSTATE_PS_EXTRA */
if (DIRTY(FS))
diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c
index 73b625e9de4..c78d0e0b602 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -578,7 +578,6 @@ init_shader_kernel(const struct ilo_shader *kernel,
kern->grf_start = kernel->in.start_grf;
kern->pcb_attr_count =
(kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16;
- kern->scratch_size = 0;
}
static void
@@ -602,6 +601,7 @@ init_vs(struct ilo_shader *kernel,
init_shader_urb(kernel, state, &info.urb);
init_shader_kernel(kernel, state, &info.kernel);
init_shader_resource(kernel, state, &info.resource);
+ info.per_thread_scratch_size = 0;
info.dispatch_enable = true;
info.stats_enable = true;
@@ -640,6 +640,7 @@ init_gs(struct ilo_shader *kernel,
init_shader_urb(kernel, state, &info.urb);
init_shader_kernel(kernel, state, &info.kernel);
init_shader_resource(kernel, state, &info.resource);
+ info.per_thread_scratch_size = 0;
info.dispatch_enable = true;
info.stats_enable = true;
@@ -664,6 +665,7 @@ init_ps(struct ilo_shader *kernel,
init_shader_kernel(kernel, state, &info.kernel_8);
init_shader_resource(kernel, state, &info.resource);
+ info.per_thread_scratch_size = 0;
info.io.has_rt_write = true;
info.io.posoffset = GEN6_POSOFFSET_NONE;
info.io.attr_count = kernel->in.count;
From 582ecb3b9132ff3690900e5426c982187d640c87 Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Fri, 23 Oct 2015 00:45:49 +0800
Subject: [PATCH 040/266] ilo: add support for scratch spaces
When a kernel reports a non-zero per-thread scratch space size, make sure the
hardware state is correctly set up, and a scratch bo is allocated.
---
src/gallium/drivers/ilo/ilo_draw.c | 8 +++
src/gallium/drivers/ilo/ilo_render.c | 39 +++++++++++++++
src/gallium/drivers/ilo/ilo_render.h | 6 +++
src/gallium/drivers/ilo/ilo_render_gen.h | 5 ++
src/gallium/drivers/ilo/ilo_render_gen6.c | 19 ++++---
src/gallium/drivers/ilo/ilo_render_gen7.c | 13 +++--
src/gallium/drivers/ilo/ilo_render_gen8.c | 2 +-
src/gallium/drivers/ilo/ilo_shader.c | 50 +++++++++++++++++--
src/gallium/drivers/ilo/ilo_shader.h | 6 +++
.../drivers/ilo/shader/ilo_shader_internal.h | 1 +
10 files changed, 133 insertions(+), 16 deletions(-)
diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c
index 433348d9326..69f36ae5df6 100644
--- a/src/gallium/drivers/ilo/ilo_draw.c
+++ b/src/gallium/drivers/ilo/ilo_draw.c
@@ -547,6 +547,7 @@ static void
ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct ilo_context *ilo = ilo_context(pipe);
+ int vs_scratch_size, gs_scratch_size, fs_scratch_size;
if (ilo_debug & ILO_DEBUG_DRAW) {
if (info->indexed) {
@@ -574,8 +575,15 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
ilo_finalize_3d_states(ilo, info);
+ /* upload kernels */
ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder);
+ /* prepare scratch spaces */
+ ilo_shader_cache_get_max_scratch_sizes(ilo->shader_cache,
+ &vs_scratch_size, &gs_scratch_size, &fs_scratch_size);
+ ilo_render_prepare_scratch_spaces(ilo->render,
+ vs_scratch_size, gs_scratch_size, fs_scratch_size);
+
ilo_blit_resolve_framebuffer(ilo);
/* If draw_vbo ever fails, return immediately. */
diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c
index 21f75de11a0..8bc04df4fab 100644
--- a/src/gallium/drivers/ilo/ilo_render.c
+++ b/src/gallium/drivers/ilo/ilo_render.c
@@ -67,10 +67,49 @@ ilo_render_create(struct ilo_builder *builder)
void
ilo_render_destroy(struct ilo_render *render)
{
+ intel_bo_unref(render->vs_scratch.bo);
+ intel_bo_unref(render->gs_scratch.bo);
+ intel_bo_unref(render->fs_scratch.bo);
+
intel_bo_unref(render->workaround_bo);
FREE(render);
}
+static bool
+resize_scratch_space(struct ilo_render *render,
+ struct ilo_render_scratch_space *scratch,
+ const char *name, int new_size)
+{
+ struct intel_bo *bo;
+
+ if (scratch->size >= new_size)
+ return true;
+
+ bo = intel_winsys_alloc_bo(render->builder->winsys, name, new_size, false);
+ if (!bo)
+ return false;
+
+ intel_bo_unref(scratch->bo);
+ scratch->bo = bo;
+ scratch->size = new_size;
+
+ return true;
+}
+
+bool
+ilo_render_prepare_scratch_spaces(struct ilo_render *render,
+ int vs_scratch_size,
+ int gs_scratch_size,
+ int fs_scratch_size)
+{
+ return (resize_scratch_space(render, &render->vs_scratch,
+ "vs scratch", vs_scratch_size) &&
+ resize_scratch_space(render, &render->gs_scratch,
+ "gs scratch", gs_scratch_size) &&
+ resize_scratch_space(render, &render->fs_scratch,
+ "fs scratch", fs_scratch_size));
+}
+
void
ilo_render_get_sample_position(const struct ilo_render *render,
unsigned sample_count,
diff --git a/src/gallium/drivers/ilo/ilo_render.h b/src/gallium/drivers/ilo/ilo_render.h
index 098af73ec9b..31fd1e6f859 100644
--- a/src/gallium/drivers/ilo/ilo_render.h
+++ b/src/gallium/drivers/ilo/ilo_render.h
@@ -43,6 +43,12 @@ ilo_render_create(struct ilo_builder *builder);
void
ilo_render_destroy(struct ilo_render *render);
+bool
+ilo_render_prepare_scratch_spaces(struct ilo_render *render,
+ int vs_scratch_size,
+ int gs_scratch_size,
+ int fs_scratch_size);
+
void
ilo_render_get_sample_position(const struct ilo_render *render,
unsigned sample_count,
diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h
index 6b133750043..f227d6bf4da 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen.h
+++ b/src/gallium/drivers/ilo/ilo_render_gen.h
@@ -51,6 +51,11 @@ struct ilo_render {
struct intel_bo *workaround_bo;
+ struct ilo_render_scratch_space {
+ struct intel_bo *bo;
+ int size;
+ } vs_scratch, gs_scratch, fs_scratch;
+
struct ilo_state_sample_pattern sample_pattern;
bool hw_ctx_changed;
diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c
index c81514f9b4c..910e6c0fb7a 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen6.c
@@ -475,10 +475,13 @@ gen6_draw_vs(struct ilo_render *r,
gen6_wa_pre_3dstate_vs_toggle(r);
if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
- ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))
- gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset, NULL);
- else
- gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
+ ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
+ gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs,
+ kernel_offset, r->vs_scratch.bo);
+ } else {
+ gen6_3DSTATE_VS(r->builder, &cso->vs,
+ kernel_offset, r->vs_scratch.bo);
+ }
}
}
@@ -501,7 +504,8 @@ gen6_draw_gs(struct ilo_render *r,
cso = ilo_shader_get_kernel_cso(vec->gs);
kernel_offset = ilo_shader_get_kernel_offset(vec->gs);
- gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset, NULL);
+ gen6_3DSTATE_GS(r->builder, &cso->gs,
+ kernel_offset, r->gs_scratch.bo);
} else if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
const int verts_per_prim =
@@ -524,7 +528,8 @@ gen6_draw_gs(struct ilo_render *r,
kernel_offset = ilo_shader_get_kernel_offset(vec->vs) +
ilo_shader_get_kernel_param(vec->vs, param);
- gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset, NULL);
+ gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol,
+ kernel_offset, r->gs_scratch.bo);
} else {
gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0, NULL);
}
@@ -672,7 +677,7 @@ gen6_draw_wm(struct ilo_render *r,
gen6_wa_pre_3dstate_wm_max_threads(r);
gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs,
- &cso->ps, kernel_offset, NULL);
+ &cso->ps, kernel_offset, r->fs_scratch.bo);
}
}
diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c
index 97d9d058fdf..330ba6c88d6 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen7.c
@@ -318,10 +318,13 @@ gen7_draw_vs(struct ilo_render *r,
const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs);
const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
- if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
- else
- gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
+ if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) {
+ gen8_3DSTATE_VS(r->builder, &cso->vs,
+ kernel_offset, r->vs_scratch.bo);
+ } else {
+ gen6_3DSTATE_VS(r->builder, &cso->vs,
+ kernel_offset, r->vs_scratch.bo);
+ }
}
}
@@ -534,7 +537,7 @@ gen7_draw_wm(struct ilo_render *r,
if (r->hw_ctx_changed)
gen7_wa_pre_3dstate_ps_max_threads(r);
- gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, NULL);
+ gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, r->fs_scratch.bo);
}
/* 3DSTATE_SCISSOR_STATE_POINTERS */
diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c
index 1f750a2bfed..efe0e0d501b 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen8.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen8.c
@@ -125,7 +125,7 @@ gen8_draw_wm(struct ilo_render *r,
/* 3DSTATE_PS */
if (DIRTY(FS) || r->instruction_bo_changed)
- gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, NULL);
+ gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, r->fs_scratch.bo);
/* 3DSTATE_PS_EXTRA */
if (DIRTY(FS))
diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c
index c78d0e0b602..c61716dc791 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -37,6 +37,10 @@
struct ilo_shader_cache {
struct list_head shaders;
struct list_head changed;
+
+ int max_vs_scratch_size;
+ int max_gs_scratch_size;
+ int max_fs_scratch_size;
};
/**
@@ -121,6 +125,8 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc,
struct ilo_shader *sh;
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
+ int scratch_size, *cur_max;
+
if (sh->uploaded)
continue;
@@ -128,6 +134,29 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc,
sh->kernel_size, sh->kernel);
sh->uploaded = true;
+
+ switch (shader->info.type) {
+ case PIPE_SHADER_VERTEX:
+ scratch_size = ilo_state_vs_get_scratch_size(&sh->cso.vs);
+ cur_max = &shc->max_vs_scratch_size;
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ scratch_size = ilo_state_gs_get_scratch_size(&sh->cso.gs);
+ cur_max = &shc->max_gs_scratch_size;
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ scratch_size = ilo_state_ps_get_scratch_size(&sh->cso.ps);
+ cur_max = &shc->max_fs_scratch_size;
+ break;
+ default:
+ assert(!"unknown shader type");
+ scratch_size = 0;
+ cur_max = &shc->max_vs_scratch_size;
+ break;
+ }
+
+ if (*cur_max < scratch_size)
+ *cur_max = scratch_size;
}
list_del(&shader->list);
@@ -155,6 +184,21 @@ ilo_shader_cache_invalidate(struct ilo_shader_cache *shc)
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list)
sh->uploaded = false;
}
+
+ shc->max_vs_scratch_size = 0;
+ shc->max_gs_scratch_size = 0;
+ shc->max_fs_scratch_size = 0;
+}
+
+void
+ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc,
+ int *vs_scratch_size,
+ int *gs_scratch_size,
+ int *fs_scratch_size)
+{
+ *vs_scratch_size = shc->max_vs_scratch_size;
+ *gs_scratch_size = shc->max_gs_scratch_size;
+ *fs_scratch_size = shc->max_fs_scratch_size;
}
/**
@@ -601,7 +645,7 @@ init_vs(struct ilo_shader *kernel,
init_shader_urb(kernel, state, &info.urb);
init_shader_kernel(kernel, state, &info.kernel);
init_shader_resource(kernel, state, &info.resource);
- info.per_thread_scratch_size = 0;
+ info.per_thread_scratch_size = kernel->per_thread_scratch_size;
info.dispatch_enable = true;
info.stats_enable = true;
@@ -640,7 +684,7 @@ init_gs(struct ilo_shader *kernel,
init_shader_urb(kernel, state, &info.urb);
init_shader_kernel(kernel, state, &info.kernel);
init_shader_resource(kernel, state, &info.resource);
- info.per_thread_scratch_size = 0;
+ info.per_thread_scratch_size = kernel->per_thread_scratch_size;
info.dispatch_enable = true;
info.stats_enable = true;
@@ -665,7 +709,7 @@ init_ps(struct ilo_shader *kernel,
init_shader_kernel(kernel, state, &info.kernel_8);
init_shader_resource(kernel, state, &info.resource);
- info.per_thread_scratch_size = 0;
+ info.per_thread_scratch_size = kernel->per_thread_scratch_size;
info.io.has_rt_write = true;
info.io.posoffset = GEN6_POSOFFSET_NONE;
info.io.attr_count = kernel->in.count;
diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h
index 01de54146b1..10dcf739430 100644
--- a/src/gallium/drivers/ilo/ilo_shader.h
+++ b/src/gallium/drivers/ilo/ilo_shader.h
@@ -120,6 +120,12 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc,
void
ilo_shader_cache_invalidate(struct ilo_shader_cache *shc);
+void
+ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc,
+ int *vs_scratch_size,
+ int *gs_scratch_size,
+ int *fs_scratch_size);
+
struct ilo_shader_state *
ilo_shader_create_vs(const struct ilo_dev *dev,
const struct pipe_shader_state *state,
diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
index 01c86675202..1f0cda174e8 100644
--- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
+++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
@@ -139,6 +139,7 @@ struct ilo_shader {
void *kernel;
int kernel_size;
+ int per_thread_scratch_size;
struct ilo_kernel_routing routing;
struct ilo_state_ps_params_info ps_params;
From f408a13dd3089483c78803e961ba8a4df8b4cbba Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez
Date: Wed, 21 Oct 2015 09:46:48 +0200
Subject: [PATCH 041/266] glsl: fix shader storage block member rules when
adding program resources
Commit f24e5e did not take into account arrays of named shader
storage blocks.
Fixes 20 dEQP-GLES31.functional.ssbo.* tests:
dEQP-GLES31.functional.ssbo.layout.single_struct_array.per_block_buffer.shared_instance_array
dEQP-GLES31.functional.ssbo.layout.single_struct_array.per_block_buffer.packed_instance_array
dEQP-GLES31.functional.ssbo.layout.single_struct_array.per_block_buffer.std140_instance_array
dEQP-GLES31.functional.ssbo.layout.single_struct_array.per_block_buffer.std430_instance_array
dEQP-GLES31.functional.ssbo.layout.single_struct_array.single_buffer.shared_instance_array
dEQP-GLES31.functional.ssbo.layout.single_struct_array.single_buffer.packed_instance_array
dEQP-GLES31.functional.ssbo.layout.single_struct_array.single_buffer.std140_instance_array
dEQP-GLES31.functional.ssbo.layout.single_struct_array.single_buffer.std430_instance_array
dEQP-GLES31.functional.ssbo.layout.single_nested_struct_array.per_block_buffer.shared_instance_array
dEQP-GLES31.functional.ssbo.layout.single_nested_struct_array.per_block_buffer.packed_instance_array
dEQP-GLES31.functional.ssbo.layout.single_nested_struct_array.per_block_buffer.std140_instance_array
dEQP-GLES31.functional.ssbo.layout.single_nested_struct_array.per_block_buffer.std430_instance_array
dEQP-GLES31.functional.ssbo.layout.single_nested_struct_array.single_buffer.shared_instance_array
dEQP-GLES31.functional.ssbo.layout.single_nested_struct_array.single_buffer.packed_instance_array
dEQP-GLES31.functional.ssbo.layout.single_nested_struct_array.single_buffer.std140_instance_array
dEQP-GLES31.functional.ssbo.layout.single_nested_struct_array.single_buffer.std430_instance_array
dEQP-GLES31.functional.ssbo.layout.random.all_per_block_buffers.2
dEQP-GLES31.functional.ssbo.layout.random.all_per_block_buffers.29
dEQP-GLES31.functional.ssbo.layout.random.all_per_block_buffers.33
dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.3
V2:
- Rename some variables (Timothy)
Signed-off-by: Samuel Iglesias Gonsalvez
Reviewed-by: Timothy Arceri
---
src/glsl/linker.cpp | 33 +++++++++++++++++++++++++++------
1 file changed, 27 insertions(+), 6 deletions(-)
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 07ea0e0c7e5..424b92a1783 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3137,7 +3137,8 @@ should_add_buffer_variable(struct gl_shader_program *shProg,
GLenum type, const char *name)
{
bool found_interface = false;
- const char *block_name = NULL;
+ unsigned block_name_len = 0;
+ const char *block_name_dot = strchr(name, '.');
/* These rules only apply to buffer variables. So we return
* true for the rest of types.
@@ -3146,8 +3147,28 @@ should_add_buffer_variable(struct gl_shader_program *shProg,
return true;
for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
- block_name = shProg->BufferInterfaceBlocks[i].Name;
- if (strncmp(block_name, name, strlen(block_name)) == 0) {
+ const char *block_name = shProg->BufferInterfaceBlocks[i].Name;
+ block_name_len = strlen(block_name);
+
+ const char *block_square_bracket = strchr(block_name, '[');
+ if (block_square_bracket) {
+ /* The block is part of an array of named interfaces,
+ * for the name comparison we ignore the "[x]" part.
+ */
+ block_name_len -= strlen(block_square_bracket);
+ }
+
+ if (block_name_dot) {
+ /* Check if the variable name starts with the interface
+ * name. The interface name (if present) should have the
+ * length than the interface block name we are comparing to.
+ */
+ unsigned len = strlen(name) - strlen(block_name_dot);
+ if (len != block_name_len)
+ continue;
+ }
+
+ if (strncmp(block_name, name, block_name_len) == 0) {
found_interface = true;
break;
}
@@ -3157,7 +3178,7 @@ should_add_buffer_variable(struct gl_shader_program *shProg,
* including the dot that follows it.
*/
if (found_interface)
- name = name + strlen(block_name) + 1;
+ name = name + block_name_len + 1;
/* From: ARB_program_interface_query extension:
*
@@ -3166,14 +3187,14 @@ should_add_buffer_variable(struct gl_shader_program *shProg,
* of its type. For arrays of aggregate types, the enumeration rules are
* applied recursively for the single enumerated array element.
*/
- const char *first_dot = strchr(name, '.');
+ const char *struct_first_dot = strchr(name, '.');
const char *first_square_bracket = strchr(name, '[');
/* The buffer variable is on top level and it is not an array */
if (!first_square_bracket) {
return true;
/* The shader storage block member is a struct, then generate the entry */
- } else if (first_dot && first_dot < first_square_bracket) {
+ } else if (struct_first_dot && struct_first_dot < first_square_bracket) {
return true;
} else {
/* Shader storage block member is an array, only generate an entry for the
From b0963ce75871e2b34856b8e24c8ad427f64be62a Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Thu, 22 Oct 2015 11:31:56 +0100
Subject: [PATCH 042/266] vc4: Add a sentinel after simulator buffers for
buffer overflow detection.
This is a little bit like the mprotect-based fencing I've experimented
with, but it's simple and low overhead. The downside is that only catches
writes, not reads.
It didn't catch any bad writes on a current piglit run, but may be useful
in the future.
---
src/gallium/drivers/vc4/vc4_simulator.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index 76980ca32af..10dabd09f5e 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -32,6 +32,11 @@
#include "vc4_simulator_validate.h"
#include "simpenrose/simpenrose.h"
+/* A marker placed just after each BO, then checked after rendering to make
+ * sure it's still there.
+ */
+#define BO_SENTINEL 0xfedcba98
+
#define OVERFLOW_SIZE (32 * 1024 * 1024)
static struct drm_gem_cma_object *
@@ -49,10 +54,12 @@ vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo)
obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next;
obj->paddr = simpenrose_hw_addr(obj->vaddr);
- dev->simulator_mem_next += size;
+ dev->simulator_mem_next += size + sizeof(uint32_t);
dev->simulator_mem_next = align(dev->simulator_mem_next, 4096);
assert(dev->simulator_mem_next <= screen->simulator_mem_size);
+ *(uint32_t *)(obj->vaddr + bo->size) = BO_SENTINEL;
+
return obj;
}
@@ -109,6 +116,7 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
struct vc4_bo *bo = drm_bo->bo;
+ assert(*(uint32_t *)(obj->vaddr + bo->size) == BO_SENTINEL);
memcpy(bo->map, obj->vaddr, bo->size);
if (drm_bo->validated_shader) {
@@ -197,6 +205,8 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec.unref_list,
unref_head) {
list_del(&bo->unref_head);
+ assert(*(uint32_t *)(bo->base.vaddr + bo->bo->size) ==
+ BO_SENTINEL);
vc4_bo_unreference(&bo->bo);
free(bo);
}
From 7516cbd26153bd0cc8a548744ea2443b9ea43cc0 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 10 Aug 2015 11:27:21 -0700
Subject: [PATCH 043/266] vc4: Use VC4_GET_FIELD and other defines in dumping
VC4_RENDER_CONFIG.
---
src/gallium/drivers/vc4/vc4_cl_dump.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 6d748010baf..9da59ae6aa7 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -262,14 +262,14 @@ dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t h
shorts[1]);
const char *format = "???";
- switch ((bytes[0] >> 2) & 3) {
- case 0:
+ switch (VC4_GET_FIELD(shorts[2], VC4_RENDER_CONFIG_FORMAT)) {
+ case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED:
format = "BGR565_DITHERED";
break;
- case 1:
+ case VC4_RENDER_CONFIG_FORMAT_RGBA8888:
format = "RGBA8888";
break;
- case 2:
+ case VC4_RENDER_CONFIG_FORMAT_BGR565:
format = "BGR565";
break;
}
@@ -277,14 +277,14 @@ dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t h
format = "64bit";
const char *tiling = "???";
- switch ((bytes[0] >> 6) & 3) {
- case 0:
+ switch (VC4_GET_FIELD(shorts[2], VC4_RENDER_CONFIG_MEMORY_FORMAT)) {
+ case VC4_TILING_FORMAT_LINEAR:
tiling = "linear";
break;
- case 1:
+ case VC4_TILING_FORMAT_T:
tiling = "T";
break;
- case 2:
+ case VC4_TILING_FORMAT_LT:
tiling = "LT";
break;
}
@@ -296,10 +296,10 @@ dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t h
(bytes[0] & VC4_RENDER_CONFIG_MS_MODE_4X) ? "ms" : "ss");
const char *earlyz = "";
- if (bytes[1] & (1 << 3)) {
+ if (shorts[2] & VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE) {
earlyz = "early_z disabled";
} else {
- if (bytes[1] & (1 << 2))
+ if (shorts[2] & VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G)
earlyz = "early_z >";
else
earlyz = "early_z <";
From b3797a8f8858a471ab602c7e165f5fa78e611693 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 10 Aug 2015 11:28:53 -0700
Subject: [PATCH 044/266] vc4: Also dump the decimation mode for resolved
stores.
---
src/gallium/drivers/vc4/vc4_cl_dump.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 9da59ae6aa7..5a24e025d06 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -289,11 +289,13 @@ dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t h
break;
}
- fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s %s %s\n",
+ fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s %s %s %s\n",
offset + 8, hw_offset + 8,
bytes[0],
format, tiling,
- (bytes[0] & VC4_RENDER_CONFIG_MS_MODE_4X) ? "ms" : "ss");
+ (shorts[2] & VC4_RENDER_CONFIG_MS_MODE_4X) ? "ms" : "ss",
+ (shorts[2] & VC4_RENDER_CONFIG_DECIMATE_MODE_4X) ?
+ "ms_decimate" : "ss_decimate");
const char *earlyz = "";
if (shorts[2] & VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE) {
From fb064901e9bd83a63d486f246b9ea943cd00f6cd Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Wed, 21 Oct 2015 16:40:46 +0100
Subject: [PATCH 045/266] vc4: Use Rob's NIR-based user clip lowering.
---
src/gallium/drivers/vc4/vc4_nir_lower_io.c | 1 +
src/gallium/drivers/vc4/vc4_program.c | 80 ++++------------------
src/gallium/drivers/vc4/vc4_qir.c | 1 -
src/gallium/drivers/vc4/vc4_qir.h | 1 -
4 files changed, 14 insertions(+), 69 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index caf706aa2a6..7ea263afb68 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -406,6 +406,7 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_uniform_indirect:
+ case nir_intrinsic_load_user_clip_plane:
vc4_nir_lower_uniform(c, b, intr);
break;
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 6e9ec6530c6..b931870ae2d 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1048,33 +1048,9 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
}
}
-static void
-clip_distance_discard(struct vc4_compile *c)
-{
- for (int i = 0; i < PIPE_MAX_CLIP_PLANES; i++) {
- if (!(c->key->ucp_enables & (1 << i)))
- continue;
-
- struct qreg dist =
- emit_fragment_varying(c,
- VARYING_SLOT_CLIP_DIST0 + (i / 4),
- i % 4);
-
- qir_SF(c, dist);
-
- if (c->discard.file == QFILE_NULL)
- c->discard = qir_uniform_ui(c, 0);
-
- c->discard = qir_SEL_X_Y_NS(c, qir_uniform_ui(c, ~0),
- c->discard);
- }
-}
-
static void
emit_frag_end(struct vc4_compile *c)
{
- clip_distance_discard(c);
-
struct qreg color;
if (c->output_color_index != -1) {
color = c->outputs[c->output_color_index];
@@ -1189,45 +1165,6 @@ emit_stub_vpm_read(struct vc4_compile *c)
c->num_inputs++;
}
-static void
-emit_ucp_clipdistance(struct vc4_compile *c)
-{
- unsigned cv;
- if (c->output_clipvertex_index != -1)
- cv = c->output_clipvertex_index;
- else if (c->output_position_index != -1)
- cv = c->output_position_index;
- else
- return;
-
- for (int plane = 0; plane < PIPE_MAX_CLIP_PLANES; plane++) {
- if (!(c->key->ucp_enables & (1 << plane)))
- continue;
-
- /* Pick the next outputs[] that hasn't been written to, since
- * there are no other program writes left to be processed at
- * this point. If something had been declared but not written
- * (like a w component), we'll just smash over the top of it.
- */
- uint32_t output_index = c->num_outputs++;
- add_output(c, output_index,
- VARYING_SLOT_CLIP_DIST0 + plane / 4,
- plane % 4);
-
-
- struct qreg dist = qir_uniform_f(c, 0.0);
- for (int i = 0; i < 4; i++) {
- struct qreg pos_chan = c->outputs[cv + i];
- struct qreg ucp =
- qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
- plane * 4 + i);
- dist = qir_FADD(c, dist, qir_FMUL(c, pos_chan, ucp));
- }
-
- c->outputs[output_index] = dist;
- }
-}
-
static void
emit_vert_end(struct vc4_compile *c,
struct vc4_varying_slot *fs_inputs,
@@ -1236,7 +1173,6 @@ emit_vert_end(struct vc4_compile *c,
struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]);
emit_stub_vpm_read(c);
- emit_ucp_clipdistance(c);
emit_scaled_viewport_write(c, rcp_w);
emit_zs_write(c, rcp_w);
@@ -1391,9 +1327,6 @@ ntq_setup_outputs(struct vc4_compile *c)
case VARYING_SLOT_POS:
c->output_position_index = loc;
break;
- case VARYING_SLOT_CLIP_VERTEX:
- c->output_clipvertex_index = loc;
- break;
case VARYING_SLOT_PSIZ:
c->output_point_size_index = loc;
break;
@@ -1486,6 +1419,11 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
break;
+ case nir_intrinsic_load_user_clip_plane:
+ *dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
+ instr->const_index[0]);
+ break;
+
case nir_intrinsic_load_input:
assert(instr->num_components == 1);
if (instr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
@@ -1683,10 +1621,18 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
c->s = tgsi_to_nir(tokens, &nir_options);
nir_opt_global_to_local(c->s);
nir_convert_to_ssa(c->s);
+
if (stage == QSTAGE_FRAG)
vc4_nir_lower_blend(c);
+
if (c->fs_key && c->fs_key->light_twoside)
nir_lower_two_sided_color(c->s);
+
+ if (stage == QSTAGE_FRAG)
+ nir_lower_clip_fs(c->s, c->key->ucp_enables);
+ else
+ nir_lower_clip_vs(c->s, c->key->ucp_enables);
+
vc4_nir_lower_io(c);
nir_lower_idiv(c->s);
nir_lower_load_const_to_scalar(c->s);
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index e385fbb65ae..d7da63ba207 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -385,7 +385,6 @@ qir_compile_init(void)
list_inithead(&c->instructions);
c->output_position_index = -1;
- c->output_clipvertex_index = -1;
c->output_color_index = -1;
c->output_point_size_index = -1;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index ddde96db6b4..fa9f5f61c94 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -399,7 +399,6 @@ struct vc4_compile {
uint32_t num_outputs;
uint32_t num_texture_samples;
uint32_t output_position_index;
- uint32_t output_clipvertex_index;
uint32_t output_color_index;
uint32_t output_point_size_index;
From 2f1bc1da864f8d169427b911bf60e1023321e420 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?=
Date: Fri, 23 Oct 2015 15:32:30 +0200
Subject: [PATCH 046/266] i965/vec4: check opcode on
vec4_instruction::reads_flag(channel)
Commit f17b78 added an alternative reads_flag(channel) that returned
if the instruction was reading a specific channel flag. By mistake it
only took into account the predicate, but when the opcode is
VS_OPCODE_UNPACK_FLAGS_SIMD4X2 there isn't any predicate, but the flag
are used.
That mistake caused some regressions on old hw. More information on
this bug:
https://bugs.freedesktop.org/show_bug.cgi?id=92621
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_ir_vec4.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 74e9733bd44..cc4104c8eb3 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -188,8 +188,8 @@ public:
bool reads_flag(unsigned c)
{
- if (!reads_flag())
- return false;
+ if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
+ return true;
switch (predicate) {
case BRW_PREDICATE_NONE:
From de5a450bd360d24db65cbba5b6633f800fda0d2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg=20Kristensen?=
Date: Fri, 16 Oct 2015 21:38:05 -0700
Subject: [PATCH 047/266] i965: Don't use message headers for untyped reads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
We always set the mask to 0xffff, which is what it defaults to when no
header is present. Let's drop the header instead.
v2: Only remove header for untyped reads. Typed reads always need the
header.
Reviewed-by: Francisco Jerez
Reviewed-by: Jordan Justen
Signed-off-by: Kristian Høgsberg Kristensen
---
src/mesa/drivers/dri/i965/brw_eu_emit.c | 3 +--
src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +-
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index bf2fee9ed48..ebd811f9674 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2906,11 +2906,10 @@ brw_untyped_surface_read(struct brw_codegen *p,
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN7_SFID_DATAPORT_DATA_CACHE);
- const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
struct brw_inst *insn = brw_send_indirect_surface_message(
p, sfid, dst, payload, surface, msg_length,
brw_surface_payload_size(p, num_channels, true, true),
- align1);
+ false);
brw_set_dp_untyped_surface_read_message(
p, insn, num_channels);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 71da9d9f7b6..c8df9407f03 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4070,7 +4070,7 @@ fs_visitor::lower_logical_sends()
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_UNTYPED_SURFACE_READ,
- fs_reg(0xffff));
+ fs_reg());
break;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
From 24a3a697e5e029767c2d210a94d47c52c5e5e299 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg=20Kristensen?=
Date: Fri, 16 Oct 2015 21:58:14 -0700
Subject: [PATCH 048/266] i965/fs: Read all components of a SSBO field with one
send
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Instead of looping through single-component reads, read all components
in one go.
Reviewed-by: Iago Toral Quiroga
Reviewed-by: Jordan Justen
Signed-off-by: Kristian Høgsberg Kristensen
---
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 25 +++++++-----------------
1 file changed, 7 insertions(+), 18 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index acb51c023ad..5dc63c95a95 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1532,24 +1532,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
}
/* Read the vector */
- for (int i = 0; i < instr->num_components; i++) {
- fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
- 1 /* dims */, 1 /* size */,
- BRW_PREDICATE_NONE);
- read_result.type = dest.type;
- bld.MOV(dest, read_result);
- dest = offset(dest, bld, 1);
-
- /* Vector components are stored contiguous in memory */
- if (i < instr->num_components) {
- if (!has_indirect) {
- const_offset_bytes += 4;
- bld.MOV(offset_reg, fs_reg(const_offset_bytes));
- } else {
- bld.ADD(offset_reg, offset_reg, brw_imm_ud(4));
- }
- }
- }
+ fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+ 1 /* dims */,
+ instr->num_components,
+ BRW_PREDICATE_NONE);
+ read_result.type = dest.type;
+ for (int i = 0; i < instr->num_components; i++)
+ bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
break;
}
From aedc0aab19c233cc084211959ef2b6be1c500bb7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg=20Kristensen?=
Date: Tue, 20 Oct 2015 23:07:56 -0700
Subject: [PATCH 049/266] i965/fs: Use unsigned immediate 0 when eliminating
SHADER_OPCODE_FIND_LIVE_CHANNEL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The destination for SHADER_OPCODE_FIND_LIVE_CHANNEL is always a UD
register. When we replace the opcode with a MOV, make sure we use a UD
immediate 0 so copy propagation doesn't bail because of non-matching
types.
Reviewed-by: Matt Turner
Signed-off-by: Kristian Høgsberg Kristensen
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c8df9407f03..75f1a926c1d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2614,7 +2614,7 @@ fs_visitor::eliminate_find_live_channel()
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
if (depth == 0) {
inst->opcode = BRW_OPCODE_MOV;
- inst->src[0] = fs_reg(0);
+ inst->src[0] = fs_reg(0u);
inst->sources = 1;
inst->force_writemask_all = true;
progress = true;
From a19bf6d3ccbab6170ccfb7e04316a58f3e19396c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg=20Kristensen?=
Date: Tue, 20 Oct 2015 23:16:51 -0700
Subject: [PATCH 050/266] i965/fs: Don't uniformize surface index twice
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The emit_untyped_read and emit_untyped_write helpers already uniformize
the surface index argument. No need to do it before calling them.
Reviewed-by: Matt Turner
Signed-off-by: Kristian Høgsberg Kristensen
---
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 5dc63c95a95..00f200a6c97 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1511,7 +1511,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[0]),
fs_reg(stage_prog_data->binding_table.ssbo_start));
- surf_index = bld.emit_uniformize(surf_index);
/* Assume this may touch any UBO. It would be nice to provide
* a tighter bound, but the array information is already lowered away.
@@ -1756,7 +1755,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[1]),
fs_reg(stage_prog_data->binding_table.ssbo_start));
- surf_index = bld.emit_uniformize(surf_index);
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ssbo_start +
From 0a5a738252afdd64b778024bcd130473b9a6224e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg=20Kristensen?=
Date: Tue, 20 Oct 2015 23:31:49 -0700
Subject: [PATCH 051/266] i965/fs: Avoid scalar destinations in
emit_uniformize()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The scalar destination registers break copy propagation. Instead compute
the results to a regular register and then reference a component when we
later use the result as a source.
Reviewed-by: Francisco Jerez
Signed-off-by: Kristian Høgsberg Kristensen
---
src/mesa/drivers/dri/i965/brw_fs_builder.h | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index df10a9de293..f121f3463d3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -390,14 +390,21 @@ namespace brw {
src_reg
emit_uniformize(const src_reg &src) const
{
+ /* FIXME: We use a vector chan_index and dst to allow constant and
+ * copy propagration to move result all the way into the consuming
+ * instruction (typically a surface index or sampler index for a
+ * send). This uses 1 or 3 extra hw registers in 16 or 32 wide
+ * dispatch. Once we teach const/copy propagation about scalars we
+ * should go back to scalar destinations here.
+ */
const fs_builder ubld = exec_all();
- const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0);
- const dst_reg dst = component(vgrf(src.type), 0);
+ const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
+ const dst_reg dst = vgrf(src.type);
ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
- ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index);
+ ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
- return src_reg(dst);
+ return src_reg(component(dst, 0));
}
/**
From feff21d1a6ba49a0d6f7526e1ff473a0b574c92e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg=20Kristensen?=
Date: Wed, 21 Oct 2015 22:49:14 -0700
Subject: [PATCH 052/266] i965/fs: Drop offset_reg temporary in ssbo load
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Now that we don't read each component one-by-one, we don't need the
temoprary vgrf for the offset. More importantly, this register was type
UD while the nir source was type D. This broke copy propagation and left
a redundant MOV in the generated code.
Reviewed-by: Francisco Jerez
Signed-off-by: Kristian Høgsberg Kristensen
---
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 00f200a6c97..a82c616e8fb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1521,13 +1521,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
}
/* Get the offset to read from */
- fs_reg offset_reg = vgrf(glsl_type::uint_type);
- unsigned const_offset_bytes = 0;
+ fs_reg offset_reg;
if (has_indirect) {
- bld.MOV(offset_reg, get_nir_src(instr->src[1]));
+ offset_reg = get_nir_src(instr->src[1]);
} else {
- const_offset_bytes = instr->const_index[0];
- bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+ offset_reg = fs_reg(instr->const_index[0]);
}
/* Read the vector */
From 0cb7d7b4b7c32246d4c4225a1d17d7ff79a7526d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg=20Kristensen?=
Date: Wed, 21 Oct 2015 23:43:34 -0700
Subject: [PATCH 053/266] i965/fs: Optimize ssbo stores
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Francisco Jerez
Write groups of enabled components together.
Signed-off-by: Kristian Høgsberg Kristensen
---
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 55 +++++++++++-------------
1 file changed, 25 insertions(+), 30 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index a82c616e8fb..4950ba4fe67 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1759,45 +1759,40 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir->info.num_ssbos - 1);
}
- /* Offset */
- fs_reg offset_reg = vgrf(glsl_type::uint_type);
- unsigned const_offset_bytes = 0;
- if (has_indirect) {
- bld.MOV(offset_reg, get_nir_src(instr->src[2]));
- } else {
- const_offset_bytes = instr->const_index[0];
- bld.MOV(offset_reg, fs_reg(const_offset_bytes));
- }
-
/* Value */
fs_reg val_reg = get_nir_src(instr->src[0]);
/* Writemask */
unsigned writemask = instr->const_index[1];
- /* Write each component present in the writemask */
- unsigned skipped_channels = 0;
- for (int i = 0; i < instr->num_components; i++) {
- int component_mask = 1 << i;
- if (writemask & component_mask) {
- if (skipped_channels) {
- if (!has_indirect) {
- const_offset_bytes += 4 * skipped_channels;
- bld.MOV(offset_reg, fs_reg(const_offset_bytes));
- } else {
- bld.ADD(offset_reg, offset_reg,
- brw_imm_ud(4 * skipped_channels));
- }
- skipped_channels = 0;
- }
+ /* Combine groups of consecutive enabled channels in one write
+ * message. We use ffs to find the first enabled channel and then ffs on
+ * the bit-inverse, down-shifted writemask to determine the length of
+ * the block of enabled bits.
+ */
+ while (writemask) {
+ unsigned first_component = ffs(writemask) - 1;
+ unsigned length = ffs(~(writemask >> first_component)) - 1;
+ fs_reg offset_reg;
- emit_untyped_write(bld, surf_index, offset_reg,
- offset(val_reg, bld, i),
- 1 /* dims */, 1 /* size */,
- BRW_PREDICATE_NONE);
+ if (!has_indirect) {
+ offset_reg = fs_reg(instr->const_index[0] + 4 * first_component);
+ } else {
+ offset_reg = vgrf(glsl_type::uint_type);
+ bld.ADD(offset_reg,
+ retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD),
+ fs_reg(4 * first_component));
}
- skipped_channels++;
+ emit_untyped_write(bld, surf_index, offset_reg,
+ offset(val_reg, bld, first_component),
+ 1 /* dims */, length,
+ BRW_PREDICATE_NONE);
+
+ /* Clear the bits in the writemask that we just wrote, then try
+ * again to see if more channels are left.
+ */
+ writemask &= (15 << (first_component + length));
}
break;
}
From 8f60dc83f7edba51037662c2637f830feeea3fc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg=20Kristensen?=
Date: Tue, 20 Oct 2015 23:22:27 -0700
Subject: [PATCH 054/266] i965/fs: Allow copy propagating into new surface
access opcodes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Francisco Jerez
Signed-off-by: Kristian Høgsberg Kristensen
---
.../drivers/dri/i965/brw_fs_copy_propagation.cpp | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 5589716239a..97e206d3daa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -612,6 +612,21 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
}
break;
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case SHADER_OPCODE_TYPED_ATOMIC:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+ /* We only propagate into the surface argument of the
+ * instruction. Everything else goes through LOAD_PAYLOAD.
+ */
+ if (i == 1) {
+ inst->src[i] = val;
+ progress = true;
+ }
+ break;
+
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
case SHADER_OPCODE_BROADCAST:
inst->src[i] = val;
From 73f610453296d7e8039ab05179d714d684d50fb3 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Fri, 23 Oct 2015 14:41:47 +0100
Subject: [PATCH 055/266] vc4: Fix missing \n in a perf_debug().
---
src/gallium/drivers/vc4/vc4_draw.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index a4e5e092b1a..c7ca8f9db57 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -344,7 +344,7 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
* could avoid this by using the 3d engine to clear.
*/
if (vc4->draw_call_queued) {
- perf_debug("Flushing rendering to process new clear.");
+ perf_debug("Flushing rendering to process new clear.\n");
vc4_flush(pctx);
}
From 7d7fbcdf4e1683d1aef19c7ee08cc222d8279672 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Fri, 23 Oct 2015 14:43:41 +0100
Subject: [PATCH 056/266] vc4: Add a workaround for HW-2116 (state counter wrap
fails).
I haven't proven that this happens (I've got other GPU hangs in the
way), but the closed driver also does this and it's documented as an
errata.
---
src/gallium/drivers/vc4/vc4_context.h | 6 ++---
src/gallium/drivers/vc4/vc4_draw.c | 38 +++++++++++++++++++++++++--
src/gallium/drivers/vc4/vc4_job.c | 2 +-
3 files changed, 40 insertions(+), 6 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index c7698422951..7a758f8545f 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -250,10 +250,10 @@ struct vc4_context {
bool needs_flush;
/**
- * Set when needs_flush, and the queued rendering is not just composed
- * of full-buffer clears.
+ * Number of draw calls (not counting full buffer clears) queued in
+ * the current job.
*/
- bool draw_call_queued;
+ uint32_t draw_calls_queued;
/** Maximum index buffer valid for the current shader_rec. */
uint32_t max_index;
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index c7ca8f9db57..da81599fcc3 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -100,7 +100,7 @@ vc4_start_draw(struct vc4_context *vc4)
VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
vc4->needs_flush = true;
- vc4->draw_call_queued = true;
+ vc4->draw_calls_queued++;
vc4->draw_width = width;
vc4->draw_height = height;
@@ -226,6 +226,38 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *i
vc4->max_index = max_index;
}
+/**
+ * HW-2116 workaround: Flush the batch before triggering the hardware state
+ * counter wraparound behavior.
+ *
+ * State updates are tracked by a global counter which increments at the first
+ * state update after a draw or a START_BINNING. Tiles can then have their
+ * state updated at draw time with a set of cheap checks for whether the
+ * state's copy of the global counter matches the global counter the last time
+ * that state was written to the tile.
+ *
+ * The state counters are relatively small and wrap around quickly, so you
+ * could get false negatives for needing to update a particular state in the
+ * tile. To avoid this, the hardware attempts to write all of the state in
+ * the tile at wraparound time. This apparently is broken, so we just flush
+ * everything before that behavior is triggered. A batch flush is sufficient
+ * to get our current contents drawn and reset the counters to 0.
+ *
+ * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the
+ * tiles with VC4_PACKET_RETURN_FROM_LIST.
+ */
+static void
+vc4_hw_2116_workaround(struct pipe_context *pctx)
+{
+ struct vc4_context *vc4 = vc4_context(pctx);
+
+ if (vc4->draw_calls_queued == 0x1ef0) {
+ perf_debug("Flushing batch due to HW-2116 workaround "
+ "(too many draw calls per scene\n");
+ vc4_flush(pctx);
+ }
+}
+
static void
vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
{
@@ -244,6 +276,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
vc4_update_shadow_textures(pctx, &vc4->verttex);
vc4_update_shadow_textures(pctx, &vc4->fragtex);
+ vc4_hw_2116_workaround(pctx);
+
vc4_get_draw_cl_space(vc4);
if (vc4->prim_mode != info->mode) {
@@ -343,7 +377,7 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
/* We can't flag new buffers for clearing once we've queued draws. We
* could avoid this by using the 3d engine to clear.
*/
- if (vc4->draw_call_queued) {
+ if (vc4->draw_calls_queued) {
perf_debug("Flushing rendering to process new clear.\n");
vc4_flush(pctx);
}
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index 7ebd9f160eb..9ad79c2ea10 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -55,7 +55,7 @@ vc4_job_reset(struct vc4_context *vc4)
vc4->shader_rec_count = 0;
vc4->needs_flush = false;
- vc4->draw_call_queued = false;
+ vc4->draw_calls_queued = 0;
/* We have no hardware context saved between our draw calls, so we
* need to flag the next draw as needing all state emitted. Emitting
From 1066a372d8a260aef29ffb6226e8691c07ec696a Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Fri, 23 Oct 2015 15:26:12 +0100
Subject: [PATCH 057/266] vc4: Add dumping of VC4_PACKET_GL_INDEXED_PRIMITIVE.
---
src/gallium/drivers/vc4/vc4_cl_dump.c | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 5a24e025d06..476d2b5b0b1 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -22,6 +22,7 @@
*/
#include "util/u_math.h"
+#include "util/u_prim.h"
#include "util/macros.h"
#include "vc4_context.h"
@@ -162,6 +163,26 @@ dump_VC4_PACKET_LOAD_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_
dump_loadstore_general(cl, offset, hw_offset);
}
+static void
+dump_VC4_PACKET_GL_INDEXED_PRIMITIVE(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ uint8_t *b = cl + offset;
+ uint32_t *count = cl + offset + 1;
+ uint32_t *ib_offset = cl + offset + 5;
+ uint32_t *max_index = cl + offset + 9;
+
+ fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s %s\n",
+ offset, hw_offset,
+ b[0], (b[0] & VC4_INDEX_BUFFER_U16) ? "16-bit" : "8-bit",
+ u_prim_name(b[0] & 0x7));
+ fprintf(stderr, "0x%08x 0x%08x: %d verts\n",
+ offset + 1, hw_offset + 1, *count);
+ fprintf(stderr, "0x%08x 0x%08x: 0x%08x IB offset\n",
+ offset + 5, hw_offset + 5, *ib_offset);
+ fprintf(stderr, "0x%08x 0x%08x: 0x%08x max index\n",
+ offset + 9, hw_offset + 9, *max_index);
+}
+
static void
dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset)
{
@@ -358,7 +379,7 @@ static const struct packet_info {
PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL),
PACKET_DUMP(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL),
- PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE),
+ PACKET_DUMP(VC4_PACKET_GL_INDEXED_PRIMITIVE),
PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE),
PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE),
From 5b2fb138bc5ff68af27d8435cbc01f83a09ee078 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Tue, 18 Aug 2015 22:38:34 -0700
Subject: [PATCH 058/266] nir: Add opcodes for saturated vector math.
This corresponds to instructions used on vc4 for its blending inside of
shaders. I've seen these opcodes on other architectures before, but I
think it's the first time these are needed in Mesa.
v2: Rename to 'u' instead of 'i', since they're all 'u'norm (from review
by jekstrand)
---
src/glsl/nir/nir_opcodes.py | 45 +++++++++++++++++++++++++++++++
src/glsl/nir/nir_opt_algebraic.py | 6 +++++
2 files changed, 51 insertions(+)
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index f2d584fe484..3c0f1da94af 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -468,6 +468,51 @@ binop("fmax", tfloat, "", "fmaxf(src0, src1)")
binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0")
+# Saturated vector add for 4 8bit ints.
+binop("usadd_4x8", tint, commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
+}
+""")
+
+# Saturated vector subtract for 4 8bit ints.
+binop("ussub_4x8", tint, "", """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ int src0_chan = (src0 >> i) & 0xff;
+ int src1_chan = (src1 >> i) & 0xff;
+ if (src0_chan > src1_chan)
+ dst |= (src0_chan - src1_chan) << i;
+}
+""")
+
+# vector min for 4 8bit ints.
+binop("umin_4x8", tint, commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
+}
+""")
+
+# vector max for 4 8bit ints.
+binop("umax_4x8", tint, commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
+}
+""")
+
+# unorm multiply: (a * b) / 255.
+binop("umul_unorm_4x8", tint, commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ int src0_chan = (src0 >> i) & 0xff;
+ int src1_chan = (src1 >> i) & 0xff;
+ dst |= ((src0_chan * src1_chan) / 255) << i;
+}
+""")
+
binop("fpow", tfloat, "", "powf(src0, src1)")
binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat,
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index 585e5e0ae98..6aa8b1f6cae 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -56,12 +56,16 @@ optimizations = [
(('iabs', ('ineg', a)), ('iabs', a)),
(('fadd', a, 0.0), a),
(('iadd', a, 0), a),
+ (('usadd_4x8', a, 0), a),
+ (('usadd_4x8', a, ~0), ~0),
(('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
(('fadd', ('fneg', a), a), 0.0),
(('iadd', ('ineg', a), a), 0),
(('fmul', a, 0.0), 0.0),
(('imul', a, 0), 0),
+ (('umul_unorm_4x8', a, 0), 0),
+ (('umul_unorm_4x8', a, ~0), a),
(('fmul', a, 1.0), a),
(('imul', a, 1), a),
(('fmul', a, -1.0), ('fneg', a)),
@@ -201,6 +205,8 @@ optimizations = [
# Subtracts
(('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
(('isub', a, ('isub', 0, b)), ('iadd', a, b)),
+ (('ussub_4x8', a, 0), a),
+ (('ussub_4x8', a, ~0), 0),
(('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
(('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
From 817a7eb588c7d6536cb469f4ca7447b49268bc00 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Fri, 23 Oct 2015 16:34:01 +0100
Subject: [PATCH 059/266] vc4: Don't try to CSE non-SSA instructions.
This can happen when we're doing destination packing -- we don't know
what's in the rest of the register.
Signed-off-by: Eric Anholt
---
src/gallium/drivers/vc4/vc4_opt_cse.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c
index 0e5480ea781..8b4d429074c 100644
--- a/src/gallium/drivers/vc4/vc4_opt_cse.c
+++ b/src/gallium/drivers/vc4/vc4_opt_cse.c
@@ -65,6 +65,7 @@ vc4_find_cse(struct vc4_compile *c, struct hash_table *ht,
struct qinst *inst, uint32_t sf_count)
{
if (inst->dst.file != QFILE_TEMP ||
+ !c->defs[inst->dst.index] ||
inst->op == QOP_MOV ||
qir_get_op_nsrc(inst->op) > 4) {
return NULL;
From 8e701fda499af0387f5c72f7bc14510182738647 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Thu, 8 Jan 2015 18:07:15 -0800
Subject: [PATCH 060/266] vc4: Add QIR/QPU support for the 8-bit vector
instructions.
---
src/gallium/drivers/vc4/vc4_program.c | 20 ++++++++++++++++++++
src/gallium/drivers/vc4/vc4_qir.c | 10 ++++++++++
src/gallium/drivers/vc4/vc4_qir.h | 10 ++++++++++
src/gallium/drivers/vc4/vc4_qpu_emit.c | 5 +++++
4 files changed, 45 insertions(+)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index b931870ae2d..5d7564b46d5 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1040,6 +1040,26 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
*dest = ntq_emit_ubfe(c, src[0], src[1], src[2]);
break;
+ case nir_op_usadd_4x8:
+ *dest = qir_V8ADDS(c, src[0], src[1]);
+ break;
+
+ case nir_op_ussub_4x8:
+ *dest = qir_V8SUBS(c, src[0], src[1]);
+ break;
+
+ case nir_op_umin_4x8:
+ *dest = qir_V8MIN(c, src[0], src[1]);
+ break;
+
+ case nir_op_umax_4x8:
+ *dest = qir_V8MAX(c, src[0], src[1]);
+ break;
+
+ case nir_op_umul_unorm_4x8:
+ *dest = qir_V8MULD(c, src[0], src[1]);
+ break;
+
default:
fprintf(stderr, "unknown NIR ALU inst: ");
nir_print_instr(&instr->instr, stderr);
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index d7da63ba207..e7d0d664e19 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -40,6 +40,11 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_FSUB] = { "fsub", 1, 2 },
[QOP_FMUL] = { "fmul", 1, 2 },
[QOP_MUL24] = { "mul24", 1, 2 },
+ [QOP_V8MULD] = {"v8muld", 1, 2 },
+ [QOP_V8MIN] = {"v8min", 1, 2 },
+ [QOP_V8MAX] = {"v8max", 1, 2 },
+ [QOP_V8ADDS] = {"v8adds", 1, 2 },
+ [QOP_V8SUBS] = {"v8subs", 1, 2 },
[QOP_FMIN] = { "fmin", 1, 2 },
[QOP_FMAX] = { "fmax", 1, 2 },
[QOP_FMINABS] = { "fminabs", 1, 2 },
@@ -173,6 +178,11 @@ qir_is_mul(struct qinst *inst)
switch (inst->op) {
case QOP_FMUL:
case QOP_MUL24:
+ case QOP_V8MULD:
+ case QOP_V8MIN:
+ case QOP_V8MAX:
+ case QOP_V8ADDS:
+ case QOP_V8SUBS:
return true;
default:
return false;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index fa9f5f61c94..fce24cd2330 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -67,6 +67,11 @@ enum qop {
QOP_FADD,
QOP_FSUB,
QOP_FMUL,
+ QOP_V8MULD,
+ QOP_V8MIN,
+ QOP_V8MAX,
+ QOP_V8ADDS,
+ QOP_V8SUBS,
QOP_MUL24,
QOP_FMIN,
QOP_FMAX,
@@ -563,6 +568,11 @@ QIR_ALU1(MOV)
QIR_ALU2(FADD)
QIR_ALU2(FSUB)
QIR_ALU2(FMUL)
+QIR_ALU2(V8MULD)
+QIR_ALU2(V8MIN)
+QIR_ALU2(V8MAX)
+QIR_ALU2(V8ADDS)
+QIR_ALU2(V8SUBS)
QIR_ALU2(MUL24)
QIR_ALU1(SEL_X_0_ZS)
QIR_ALU1(SEL_X_0_ZC)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index adf3a8b3658..4c81deb8eaa 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -203,6 +203,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
A(NOT),
M(FMUL),
+ M(V8MULD),
+ M(V8MIN),
+ M(V8MAX),
+ M(V8ADDS),
+ M(V8SUBS),
M(MUL24),
};
From 70b06fb5d55d639fd74596a2ff6971cb57c030ca Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Wed, 19 Aug 2015 09:38:14 -0700
Subject: [PATCH 061/266] vc4: Convert blending to being done in 4x8 unorm
normally.
We can't do this all the time, because you want blending to be done in
linear space, and sRGB would lose too much precision being done in 4x8.
The win on instructions is pretty huge when you can, though.
total uniforms in shared programs: 32065 -> 32168 (0.32%)
uniforms in affected programs: 327 -> 430 (31.50%)
total instructions in shared programs: 92644 -> 89830 (-3.04%)
instructions in affected programs: 15580 -> 12766 (-18.06%)
Improves openarena performance at 1920x1080 from 10.7fps to 11.2fps.
---
src/gallium/drivers/vc4/vc4_context.h | 5 +-
src/gallium/drivers/vc4/vc4_nir_lower_blend.c | 286 +++++++++++++++---
src/gallium/drivers/vc4/vc4_qir.h | 2 +
src/gallium/drivers/vc4/vc4_state.c | 4 +-
src/gallium/drivers/vc4/vc4_uniforms.c | 30 +-
5 files changed, 276 insertions(+), 51 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index 7a758f8545f..86f2ce5e608 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -291,7 +291,10 @@ struct vc4_context {
struct vc4_vertex_stateobj *vtx;
- struct pipe_blend_color blend_color;
+ struct {
+ struct pipe_blend_color f;
+ uint8_t ub[4];
+ } blend_color;
struct pipe_stencil_ref stencil_ref;
unsigned sample_mask;
struct pipe_framebuffer_state framebuffer;
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index 17b524653bb..373c9e12d11 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -86,11 +86,11 @@ vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
}
static nir_ssa_def *
-vc4_blend_channel(nir_builder *b,
- nir_ssa_def **src,
- nir_ssa_def **dst,
- unsigned factor,
- int channel)
+vc4_blend_channel_f(nir_builder *b,
+ nir_ssa_def **src,
+ nir_ssa_def **dst,
+ unsigned factor,
+ int channel)
{
switch(factor) {
case PIPE_BLENDFACTOR_ONE:
@@ -146,8 +146,75 @@ vc4_blend_channel(nir_builder *b,
}
static nir_ssa_def *
-vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
- unsigned func)
+vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
+ int chan)
+{
+ unsigned chan_mask = 0xff << (chan * 8);
+ return nir_ior(b,
+ nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
+ nir_iand(b, src1, nir_imm_int(b, chan_mask)));
+}
+
+static nir_ssa_def *
+vc4_blend_channel_i(nir_builder *b,
+ nir_ssa_def *src,
+ nir_ssa_def *dst,
+ nir_ssa_def *src_a,
+ nir_ssa_def *dst_a,
+ unsigned factor,
+ int a_chan)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ return nir_imm_int(b, ~0);
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return src;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return src_a;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return dst_a;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return dst;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return vc4_nir_set_packed_chan(b,
+ nir_umin_4x8(b,
+ src_a,
+ nir_inot(b, dst_a)),
+ nir_imm_int(b, ~0),
+ a_chan);
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA);
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA);
+ case PIPE_BLENDFACTOR_ZERO:
+ return nir_imm_int(b, 0);
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return nir_inot(b, src);
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return nir_inot(b, src_a);
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return nir_inot(b, dst_a);
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return nir_inot(b, dst);
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA));
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA));
+
+ default:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* Unsupported. */
+ fprintf(stderr, "Unknown blend factor %d\n", factor);
+ return nir_imm_int(b, ~0);
+ }
+}
+
+static nir_ssa_def *
+vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
+ unsigned func)
{
switch (func) {
case PIPE_BLEND_ADD:
@@ -169,9 +236,33 @@ vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
}
}
+static nir_ssa_def *
+vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
+ unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return nir_usadd_4x8(b, src, dst);
+ case PIPE_BLEND_SUBTRACT:
+ return nir_ussub_4x8(b, src, dst);
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return nir_ussub_4x8(b, dst, src);
+ case PIPE_BLEND_MIN:
+ return nir_umin_4x8(b, src, dst);
+ case PIPE_BLEND_MAX:
+ return nir_umax_4x8(b, src, dst);
+
+ default:
+ /* Unsupported. */
+ fprintf(stderr, "Unknown blend func %d\n", func);
+ return src;
+
+ }
+}
+
static void
-vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
- nir_ssa_def **src_color, nir_ssa_def **dst_color)
+vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
+ nir_ssa_def **src_color, nir_ssa_def **dst_color)
{
struct pipe_rt_blend_state *blend = &c->fs_key->blend;
@@ -192,22 +283,108 @@ vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
blend->alpha_dst_factor);
src_blend[i] = nir_fmul(b, src_color[i],
- vc4_blend_channel(b,
- src_color, dst_color,
- src_factor, i));
+ vc4_blend_channel_f(b,
+ src_color, dst_color,
+ src_factor, i));
dst_blend[i] = nir_fmul(b, dst_color[i],
- vc4_blend_channel(b,
- src_color, dst_color,
- dst_factor, i));
+ vc4_blend_channel_f(b,
+ src_color, dst_color,
+ dst_factor, i));
}
for (int i = 0; i < 4; i++) {
- result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i],
- ((i != 3) ? blend->rgb_func :
- blend->alpha_func));
+ result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
+ ((i != 3) ? blend->rgb_func :
+ blend->alpha_func));
}
}
+static nir_ssa_def *
+vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
+{
+ nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
+ return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
+}
+
+static nir_ssa_def *
+vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
+ nir_ssa_def *src_color, nir_ssa_def *dst_color,
+ nir_ssa_def *src_float_a)
+{
+ struct pipe_rt_blend_state *blend = &c->fs_key->blend;
+
+ if (!blend->blend_enable)
+ return src_color;
+
+ enum pipe_format color_format = c->fs_key->color_format;
+ const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+ nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
+ nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
+ nir_ssa_def *dst_a;
+ int alpha_chan;
+ for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
+ if (format_swiz[alpha_chan] == 3)
+ break;
+ }
+ if (alpha_chan != 4) {
+ nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
+ dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
+ shift), imm_0xff));
+ } else {
+ dst_a = nir_imm_int(b, ~0);
+ }
+
+ nir_ssa_def *src_factor = vc4_blend_channel_i(b,
+ src_color, dst_color,
+ src_a, dst_a,
+ blend->rgb_src_factor,
+ alpha_chan);
+ nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
+ src_color, dst_color,
+ src_a, dst_a,
+ blend->rgb_dst_factor,
+ alpha_chan);
+
+ if (alpha_chan != 4 &&
+ blend->alpha_src_factor != blend->rgb_src_factor) {
+ nir_ssa_def *src_alpha_factor =
+ vc4_blend_channel_i(b,
+ src_color, dst_color,
+ src_a, dst_a,
+ blend->alpha_src_factor,
+ alpha_chan);
+ src_factor = vc4_nir_set_packed_chan(b, src_factor,
+ src_alpha_factor,
+ alpha_chan);
+ }
+ if (alpha_chan != 4 &&
+ blend->alpha_dst_factor != blend->rgb_dst_factor) {
+ nir_ssa_def *dst_alpha_factor =
+ vc4_blend_channel_i(b,
+ src_color, dst_color,
+ src_a, dst_a,
+ blend->alpha_dst_factor,
+ alpha_chan);
+ dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
+ dst_alpha_factor,
+ alpha_chan);
+ }
+ nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
+ nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
+
+ nir_ssa_def *result =
+ vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
+ if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
+ nir_ssa_def *result_a = vc4_blend_func_i(b,
+ src_blend,
+ dst_blend,
+ blend->alpha_func);
+ result = vc4_nir_set_packed_chan(b, result, result_a,
+ alpha_chan);
+ }
+ return result;
+}
+
static nir_ssa_def *
vc4_logicop(nir_builder *b, int logicop_func,
nir_ssa_def *src, nir_ssa_def *dst)
@@ -299,12 +476,33 @@ vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
nir_builder_instr_insert(b, &discard->instr);
}
+static nir_ssa_def *
+vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
+ nir_ssa_def **colors)
+{
+ enum pipe_format color_format = c->fs_key->color_format;
+ const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+
+ nir_ssa_def *swizzled[4];
+ for (int i = 0; i < 4; i++) {
+ swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
+ format_swiz[i]);
+ }
+
+ return nir_pack_unorm_4x8(b,
+ nir_vec4(b,
+ swizzled[0], swizzled[1],
+ swizzled[2], swizzled[3]));
+
+}
+
static void
vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
enum pipe_format color_format = c->fs_key->color_format;
const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+ bool srgb = util_format_is_srgb(color_format);
/* Pull out the float src/dst color components. */
nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
@@ -315,45 +513,39 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
}
- /* Unswizzle the destination color. */
- nir_ssa_def *dst_color[4];
- for (unsigned i = 0; i < 4; i++) {
- dst_color[i] = vc4_nir_get_swizzled_channel(b,
- unpacked_dst_color,
- format_swiz[i]);
- }
-
vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
- /* Turn dst color to linear. */
- if (util_format_is_srgb(color_format)) {
+ nir_ssa_def *packed_color;
+ if (srgb) {
+ /* Unswizzle the destination color. */
+ nir_ssa_def *dst_color[4];
+ for (unsigned i = 0; i < 4; i++) {
+ dst_color[i] = vc4_nir_get_swizzled_channel(b,
+ unpacked_dst_color,
+ format_swiz[i]);
+ }
+
+ /* Turn dst color to linear. */
for (int i = 0; i < 3; i++)
dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
- }
- nir_ssa_def *blend_color[4];
- vc4_do_blending(c, b, blend_color, src_color, dst_color);
+ nir_ssa_def *blend_color[4];
+ vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
- /* sRGB encode the output color */
- if (util_format_is_srgb(color_format)) {
+ /* sRGB encode the output color */
for (int i = 0; i < 3; i++)
blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
- }
- nir_ssa_def *swizzled_outputs[4];
- for (int i = 0; i < 4; i++) {
- swizzled_outputs[i] =
- vc4_nir_get_swizzled_channel(b, blend_color,
- format_swiz[i]);
- }
+ packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
+ } else {
+ nir_ssa_def *packed_src_color =
+ vc4_nir_swizzle_and_pack(c, b, src_color);
- nir_ssa_def *packed_color =
- nir_pack_unorm_4x8(b,
- nir_vec4(b,
- swizzled_outputs[0],
- swizzled_outputs[1],
- swizzled_outputs[2],
- swizzled_outputs[3]));
+ packed_color =
+ vc4_do_blending_i(c, b,
+ packed_src_color, packed_dst_color,
+ src_color[3]);
+ }
packed_color = vc4_logicop(b, c->fs_key->logicop_func,
packed_color, packed_dst_color);
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index fce24cd2330..fa1b50f3d10 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -253,6 +253,8 @@ enum quniform_contents {
QUNIFORM_BLEND_CONST_COLOR_Y,
QUNIFORM_BLEND_CONST_COLOR_Z,
QUNIFORM_BLEND_CONST_COLOR_W,
+ QUNIFORM_BLEND_CONST_COLOR_RGBA,
+ QUNIFORM_BLEND_CONST_COLOR_AAAA,
QUNIFORM_STENCIL,
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index 8a759c2ca4c..147694644f0 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -51,7 +51,9 @@ vc4_set_blend_color(struct pipe_context *pctx,
const struct pipe_blend_color *blend_color)
{
struct vc4_context *vc4 = vc4_context(pctx);
- vc4->blend_color = *blend_color;
+ vc4->blend_color.f = *blend_color;
+ for (int i = 0; i < 4; i++)
+ vc4->blend_color.ub[i] = float_to_ubyte(blend_color->color[i]);
vc4->dirty |= VC4_DIRTY_BLEND_COLOR;
}
diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c
index 85d6998205e..f5ad481f186 100644
--- a/src/gallium/drivers/vc4/vc4_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_uniforms.c
@@ -262,11 +262,35 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
case QUNIFORM_BLEND_CONST_COLOR_Z:
case QUNIFORM_BLEND_CONST_COLOR_W:
cl_aligned_f(&uniforms,
- CLAMP(vc4->blend_color.color[uinfo->contents[i] -
- QUNIFORM_BLEND_CONST_COLOR_X],
+ CLAMP(vc4->blend_color.f.color[uinfo->contents[i] -
+ QUNIFORM_BLEND_CONST_COLOR_X],
0, 1));
break;
+ case QUNIFORM_BLEND_CONST_COLOR_RGBA: {
+ const uint8_t *format_swiz =
+ vc4_get_format_swizzle(vc4->framebuffer.cbufs[0]->format);
+ uint32_t color = 0;
+ for (int i = 0; i < 4; i++) {
+ if (format_swiz[i] >= 4)
+ continue;
+
+ color |= (vc4->blend_color.ub[format_swiz[i]] <<
+ (i * 8));
+ }
+ cl_aligned_u32(&uniforms, color);
+ break;
+ }
+
+ case QUNIFORM_BLEND_CONST_COLOR_AAAA: {
+ uint8_t a = vc4->blend_color.ub[3];
+ cl_aligned_u32(&uniforms, ((a) |
+ (a << 8) |
+ (a << 16) |
+ (a << 24)));
+ break;
+ }
+
case QUNIFORM_STENCIL:
cl_aligned_u32(&uniforms,
vc4->zsa->stencil_uniforms[uinfo->data[i]] |
@@ -330,6 +354,8 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
case QUNIFORM_BLEND_CONST_COLOR_Y:
case QUNIFORM_BLEND_CONST_COLOR_Z:
case QUNIFORM_BLEND_CONST_COLOR_W:
+ case QUNIFORM_BLEND_CONST_COLOR_RGBA:
+ case QUNIFORM_BLEND_CONST_COLOR_AAAA:
dirty |= VC4_DIRTY_BLEND_COLOR;
break;
From ea421e919ae6e72e1319fb205c42a6fb53ca2f82 Mon Sep 17 00:00:00 2001
From: Jose Fonseca
Date: Fri, 23 Oct 2015 11:40:25 +0100
Subject: [PATCH 062/266] gallivm: Explicitly disable unsupported CPU features.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92214
CC: "10.6 11.0"
Reviewed-by: Roland Scheidegger
---
src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 72 +++++++++----------
1 file changed, 34 insertions(+), 38 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index e70a75fc663..7bda1184ee9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -498,50 +498,46 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
}
llvm::SmallVector MAttrs;
- if (util_cpu_caps.has_sse) {
- MAttrs.push_back("+sse");
- }
- if (util_cpu_caps.has_sse2) {
- MAttrs.push_back("+sse2");
- }
- if (util_cpu_caps.has_sse3) {
- MAttrs.push_back("+sse3");
- }
- if (util_cpu_caps.has_ssse3) {
- MAttrs.push_back("+ssse3");
- }
- if (util_cpu_caps.has_sse4_1) {
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * We need to unset attributes because sometimes LLVM mistakenly assumes
+ * certain features are present given the processor name.
+ *
+ * https://bugs.freedesktop.org/show_bug.cgi?id=92214
+ * http://llvm.org/PR25021
+ * http://llvm.org/PR19429
+ * http://llvm.org/PR16721
+ */
+ MAttrs.push_back(util_cpu_caps.has_sse ? "+sse" : "-sse" );
+ MAttrs.push_back(util_cpu_caps.has_sse2 ? "+sse2" : "-sse2" );
+ MAttrs.push_back(util_cpu_caps.has_sse3 ? "+sse3" : "-sse3" );
+ MAttrs.push_back(util_cpu_caps.has_ssse3 ? "+ssse3" : "-ssse3" );
#if HAVE_LLVM >= 0x0304
- MAttrs.push_back("+sse4.1");
+ MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1");
#else
- MAttrs.push_back("+sse41");
+ MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse41" : "-sse41" );
#endif
- }
- if (util_cpu_caps.has_sse4_2) {
#if HAVE_LLVM >= 0x0304
- MAttrs.push_back("+sse4.2");
+ MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2");
#else
- MAttrs.push_back("+sse42");
+ MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse42" : "-sse42" );
#endif
- }
- if (util_cpu_caps.has_avx) {
- /*
- * AVX feature is not automatically detected from CPUID by the X86 target
- * yet, because the old (yet default) JIT engine is not capable of
- * emitting the opcodes. On newer llvm versions it is and at least some
- * versions (tested with 3.3) will emit avx opcodes without this anyway.
- */
- MAttrs.push_back("+avx");
- if (util_cpu_caps.has_f16c) {
- MAttrs.push_back("+f16c");
- }
- if (util_cpu_caps.has_avx2) {
- MAttrs.push_back("+avx2");
- }
- }
- if (util_cpu_caps.has_altivec) {
- MAttrs.push_back("+altivec");
- }
+ /*
+ * AVX feature is not automatically detected from CPUID by the X86 target
+ * yet, because the old (yet default) JIT engine is not capable of
+ * emitting the opcodes. On newer llvm versions it is and at least some
+ * versions (tested with 3.3) will emit avx opcodes without this anyway.
+ */
+ MAttrs.push_back(util_cpu_caps.has_avx ? "+avx" : "-avx");
+ MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c");
+ MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2");
+#endif
+
+#if defined(PIPE_ARCH_PPC)
+ MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
+#endif
+
builder.setMAttrs(MAttrs);
#if HAVE_LLVM >= 0x0305
From e05021ff72abb7de6506c90dd70a9f7ab490bf90 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Fri, 23 Oct 2015 02:14:31 -0400
Subject: [PATCH 063/266] nvc0: respect edgeflag attribute width
The edgeflag comes in as ubyte with glEdgeFlagPointer but as float with
plain immediate glEdgeFlag. Avoid reading bytes that weren't meant for
the edgeflag in the pointer case.
Fixes intermittent failures with gl-2.0-edgeflag piglit (and valgrind
complaints about reading uninitialized memory).
Signed-off-by: Ilia Mirkin
Cc: mesa-stable@lists.freedesktop.org
---
.../drivers/nouveau/nvc0/nvc0_vbo_translate.c | 40 +++++++++++++++----
1 file changed, 33 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
index 8b23a4887da..9c19ba20a7e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
@@ -27,6 +27,7 @@ struct push_context {
struct {
bool enabled;
bool value;
+ uint8_t width;
unsigned stride;
const uint8_t *data;
} edgeflag;
@@ -53,6 +54,7 @@ nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx)
/* silence warnings */
ctx->edgeflag.data = NULL;
ctx->edgeflag.stride = 0;
+ ctx->edgeflag.width = 0;
}
static inline void
@@ -100,6 +102,7 @@ nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0,
struct nv04_resource *buf = nv04_resource(vb->buffer);
ctx->edgeflag.stride = vb->stride;
+ ctx->edgeflag.width = util_format_get_blocksize(ve->src_format);
if (buf) {
unsigned offset = vb->buffer_offset + ve->src_offset;
ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base,
@@ -137,10 +140,17 @@ prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index)
}
static inline bool
-ef_value(const struct push_context *ctx, uint32_t index)
+ef_value_8(const struct push_context *ctx, uint32_t index)
{
- float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
- return *pf ? true : false;
+ uint8_t *pf = (uint8_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
+ return !!*pf;
+}
+
+static inline bool
+ef_value_32(const struct push_context *ctx, uint32_t index)
+{
+ uint32_t *pf = (uint32_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
+ return !!*pf;
}
static inline bool
@@ -154,7 +164,11 @@ static inline unsigned
ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
{
unsigned i;
- for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ bool ef = ctx->edgeflag.value;
+ if (ctx->edgeflag.width == 1)
+ for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
+ else
+ for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
return i;
}
@@ -162,7 +176,11 @@ static inline unsigned
ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
{
unsigned i;
- for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ bool ef = ctx->edgeflag.value;
+ if (ctx->edgeflag.width == 1)
+ for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
+ else
+ for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
return i;
}
@@ -170,7 +188,11 @@ static inline unsigned
ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
{
unsigned i;
- for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ bool ef = ctx->edgeflag.value;
+ if (ctx->edgeflag.width == 1)
+ for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
+ else
+ for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
return i;
}
@@ -178,7 +200,11 @@ static inline unsigned
ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
{
unsigned i;
- for (i = 0; i < n && ef_value(ctx, start++) == ctx->edgeflag.value; ++i);
+ bool ef = ctx->edgeflag.value;
+ if (ctx->edgeflag.width == 1)
+ for (i = 0; i < n && ef_value_8(ctx, start++) == ef; ++i);
+ else
+ for (i = 0; i < n && ef_value_32(ctx, start++) == ef; ++i);
return i;
}
From bf0d0ce0d57dce5df8195942d2eda6389d341fea Mon Sep 17 00:00:00 2001
From: Samuel Li
Date: Fri, 21 Aug 2015 15:35:46 -0400
Subject: [PATCH 064/266] radeonsi: add support for Stoney asics (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
v2 (agd): rebase on mesa master, split pci ids to
separate commit
v3 (agd): use carrizo for llvm processor name for
llvm 3.7 and older
Reviewed-by: Marek Olšák
Signed-off-by: Samuel Li
Cc: mesa-stable@lists.freedesktop.org
---
src/gallium/drivers/radeon/r600_pipe_common.c | 6 ++++++
src/gallium/drivers/radeon/radeon_winsys.h | 1 +
src/gallium/drivers/radeonsi/si_state.c | 1 +
src/gallium/winsys/amdgpu/drm/amdgpu_id.h | 8 ++++++--
src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 6 +++++-
5 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 7ac94caad9f..4ce0c6a1994 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -416,6 +416,7 @@ static const char* r600_get_chip_name(struct r600_common_screen *rscreen)
case CHIP_ICELAND: return "AMD ICELAND";
case CHIP_CARRIZO: return "AMD CARRIZO";
case CHIP_FIJI: return "AMD FIJI";
+ case CHIP_STONEY: return "AMD STONEY";
default: return "AMD unknown";
}
}
@@ -540,6 +541,11 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
case CHIP_ICELAND: return "iceland";
case CHIP_CARRIZO: return "carrizo";
case CHIP_FIJI: return "fiji";
+#if HAVE_LLVM <= 0x0307
+ case CHIP_STONEY: return "carrizo";
+#else
+ case CHIP_STONEY: return "stoney";
+#endif
default: return "";
}
}
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index b91e1adf41d..5f13c1ebc26 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -137,6 +137,7 @@ enum radeon_family {
CHIP_ICELAND,
CHIP_CARRIZO,
CHIP_FIJI,
+ CHIP_STONEY,
CHIP_LAST,
};
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 243bdc6e6d7..a71ff4926ab 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3336,6 +3336,7 @@ static void si_init_config(struct si_context *sctx)
break;
case CHIP_KABINI:
case CHIP_MULLINS:
+ case CHIP_STONEY:
raster_config = 0x00000000;
raster_config_1 = 0x00000000;
break;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_id.h b/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
index 8882c418e12..90fe0cd50f1 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
@@ -151,11 +151,15 @@ enum {
/* CZ specific rev IDs */
enum {
- CZ_CARRIZO_A0 = 0x01,
+ CARRIZO_A0 = 0x01,
+ STONEY_A0 = 0x61,
CZ_UNKNOWN = 0xFF
};
#define ASICREV_IS_CARRIZO(eChipRev) \
- (eChipRev >= CARRIZO_A0)
+ ((eChipRev >= CARRIZO_A0) && (eChipRev < STONEY_A0))
+
+#define ASICREV_IS_STONEY(eChipRev) \
+ ((eChipRev >= STONEY_A0) && (eChipRev < CZ_UNKNOWN))
#endif /* AMDGPU_ID_H */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index c8772490e74..32cd9d9aa50 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -226,7 +226,11 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
break;
case CHIP_CARRIZO:
ws->family = FAMILY_CZ;
- ws->rev_id = CZ_CARRIZO_A0;
+ ws->rev_id = CARRIZO_A0;
+ break;
+ case CHIP_STONEY:
+ ws->family = FAMILY_CZ;
+ ws->rev_id = STONEY_A0;
break;
case CHIP_FIJI:
ws->family = FAMILY_VI;
From 98546bfd038bc07a8cc7fed259c5022486bba473 Mon Sep 17 00:00:00 2001
From: Samuel Li
Date: Thu, 22 Oct 2015 12:06:43 -0400
Subject: [PATCH 065/266] radeonsi: add Stoney pci ids
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Marek Olšák
Reviewed-by: Michel Dänzer
Signed-off-by: Samuel Li
Cc: mesa-stable@lists.freedesktop.org
---
include/pci_ids/radeonsi_pci_ids.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h
index 52eada1d3d5..bcf15a186c6 100644
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -181,3 +181,5 @@ CHIPSET(0x9876, CARRIZO_, CARRIZO)
CHIPSET(0x9877, CARRIZO_, CARRIZO)
CHIPSET(0x7300, FIJI_, FIJI)
+
+CHIPSET(0x98E4, STONEY_, STONEY)
From dd05824b8968c723fba767698b496691e8dc81e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 22 Oct 2015 19:46:07 +0200
Subject: [PATCH 066/266] st/mesa: don't load state parameters if there are
none
Out of 7063 shaders from my shader-db:
- 6564 (93%) shaders don't have any state parameters.
- 347 (5%) shaders have 1 state parameter for WPOS lowering.
- The remaining 2% have more state parameters, usually matrices.
Reviewed-by: Brian Paul
---
src/mesa/state_tracker/st_atom_constbuf.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c
index acaa85d9356..20f8b3df99d 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -73,7 +73,8 @@ void st_upload_constants( struct st_context *st,
* the parameters list are explicitly set by the user with glUniform,
* glProgramParameter(), etc.
*/
- _mesa_load_state_parameters(st->ctx, params);
+ if (params->StateFlags)
+ _mesa_load_state_parameters(st->ctx, params);
/* We always need to get a new buffer, to keep the drivers simple and
* avoid gratuitous rendering synchronization.
From d842d2f25190fea353b0eb104ae83062fbd2c245 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 22 Oct 2015 22:14:53 +0200
Subject: [PATCH 067/266] gallium/util: add a test for NULL fragment shaders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Just to validate that radeonsi doesn't crash.
Reviewed-by: Michel Dänzer
---
src/gallium/auxiliary/util/u_tests.c | 38 ++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/src/gallium/auxiliary/util/u_tests.c b/src/gallium/auxiliary/util/u_tests.c
index a94e5cc2949..006dfa95af2 100644
--- a/src/gallium/auxiliary/util/u_tests.c
+++ b/src/gallium/auxiliary/util/u_tests.c
@@ -450,6 +450,43 @@ null_constant_buffer(struct pipe_context *ctx)
util_report_result(pass);
}
+static void
+null_fragment_shader(struct pipe_context *ctx)
+{
+ struct cso_context *cso;
+ struct pipe_resource *cb;
+ void *vs;
+ struct pipe_rasterizer_state rs = {0};
+ struct pipe_query *query;
+ union pipe_query_result qresult;
+
+ cso = cso_create_context(ctx);
+ cb = util_create_texture2d(ctx->screen, 256, 256,
+ PIPE_FORMAT_R8G8B8A8_UNORM);
+ util_set_common_states_and_clear(cso, ctx, cb);
+
+ /* No rasterization. */
+ rs.rasterizer_discard = 1;
+ cso_set_rasterizer(cso, &rs);
+
+ vs = util_set_passthrough_vertex_shader(cso, ctx, false);
+
+ query = ctx->create_query(ctx, PIPE_QUERY_PRIMITIVES_GENERATED, 0);
+ ctx->begin_query(ctx, query);
+ util_draw_fullscreen_quad(cso);
+ ctx->end_query(ctx, query);
+ ctx->get_query_result(ctx, query, true, &qresult);
+
+ /* Cleanup. */
+ cso_destroy_context(cso);
+ ctx->delete_vs_state(ctx, vs);
+ ctx->destroy_query(ctx, query);
+ pipe_resource_reference(&cb, NULL);
+
+ /* Check PRIMITIVES_GENERATED. */
+ util_report_result(qresult.u64 == 2);
+}
+
/**
* Run all tests. This should be run with a clean context after
* context_create.
@@ -459,6 +496,7 @@ util_run_tests(struct pipe_screen *screen)
{
struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
+ null_fragment_shader(ctx);
tgsi_vs_window_space_position(ctx);
null_sampler_view(ctx, TGSI_TEXTURE_2D);
null_sampler_view(ctx, TGSI_TEXTURE_BUFFER);
From ed95cb3a310951c9623fa6757226359c60fa7375 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 22 Oct 2015 22:17:28 +0200
Subject: [PATCH 068/266] radeonsi: add checks for a NULL pixel shader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This will allow removing the dummy PS.
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_state.c | 1 +
.../drivers/radeonsi/si_state_shaders.c | 71 +++++++++++--------
2 files changed, 41 insertions(+), 31 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index a71ff4926ab..e91b6c7c665 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -266,6 +266,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
* Reproducible with Unigine Heaven 4.0 and drirc missing.
*/
if (blend->dual_src_blend &&
+ sctx->ps_shader.cso &&
(sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3)
mask = 0;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index eea00e0fafc..ce8d1cca212 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -956,13 +956,15 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
struct si_shader *ps = sctx->ps_shader.current;
struct si_shader *vs = si_get_vs_state(sctx);
- struct tgsi_shader_info *psinfo = &ps->selector->info;
+ struct tgsi_shader_info *psinfo;
struct tgsi_shader_info *vsinfo = &vs->selector->info;
unsigned i, j, tmp, num_written = 0;
- if (!ps->nparam)
+ if (!ps || !ps->nparam)
return;
+ psinfo = &ps->selector->info;
+
radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam);
for (i = 0; i < psinfo->num_inputs; i++) {
@@ -1025,7 +1027,12 @@ static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
struct si_shader *ps = sctx->ps_shader.current;
- unsigned input_ena = ps->spi_ps_input_ena;
+ unsigned input_ena;
+
+ if (!ps)
+ return;
+
+ input_ena = ps->spi_ps_input_ena;
/* we need to enable at least one of them, otherwise we hang the GPU */
assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
@@ -1531,23 +1538,38 @@ bool si_update_shaders(struct si_context *sctx)
si_update_vgt_shader_config(sctx);
- r = si_shader_select(ctx, &sctx->ps_shader);
- if (r)
- return false;
- si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
+ if (sctx->ps_shader.cso) {
+ r = si_shader_select(ctx, &sctx->ps_shader);
+ if (r)
+ return false;
+ si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
- if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
- sctx->sprite_coord_enable != rs->sprite_coord_enable ||
- sctx->flatshade != rs->flatshade) {
- sctx->sprite_coord_enable = rs->sprite_coord_enable;
- sctx->flatshade = rs->flatshade;
- si_mark_atom_dirty(sctx, &sctx->spi_map);
- }
+ if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
+ sctx->sprite_coord_enable != rs->sprite_coord_enable ||
+ sctx->flatshade != rs->flatshade) {
+ sctx->sprite_coord_enable = rs->sprite_coord_enable;
+ sctx->flatshade = rs->flatshade;
+ si_mark_atom_dirty(sctx, &sctx->spi_map);
+ }
- if (si_pm4_state_changed(sctx, ps) ||
- sctx->force_persample_interp != rs->force_persample_interp) {
- sctx->force_persample_interp = rs->force_persample_interp;
- si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
+ if (si_pm4_state_changed(sctx, ps) ||
+ sctx->force_persample_interp != rs->force_persample_interp) {
+ sctx->force_persample_interp = rs->force_persample_interp;
+ si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
+ }
+
+ if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) {
+ sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
+ }
+
+ if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) {
+ sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing;
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
+
+ if (sctx->b.chip_class == SI)
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
+ }
}
if (si_pm4_state_changed(sctx, ls) ||
@@ -1559,19 +1581,6 @@ bool si_update_shaders(struct si_context *sctx)
if (!si_update_spi_tmpring_size(sctx))
return false;
}
-
- if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) {
- sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control;
- si_mark_atom_dirty(sctx, &sctx->db_render_state);
- }
-
- if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) {
- sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing;
- si_mark_atom_dirty(sctx, &sctx->msaa_config);
-
- if (sctx->b.chip_class == SI)
- si_mark_atom_dirty(sctx, &sctx->db_render_state);
- }
return true;
}
From 50bb2decf70b59ac8021048c0f8baf05ab3766f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 22 Oct 2015 22:18:49 +0200
Subject: [PATCH 069/266] radeonsi: add draw_vbo check for a NULL pixel shader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_state.c | 1 +
src/gallium/drivers/radeonsi/si_state.h | 1 +
src/gallium/drivers/radeonsi/si_state_draw.c | 7 ++++++-
3 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index e91b6c7c665..aef85579085 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -698,6 +698,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
rs->clamp_fragment_color = state->clamp_fragment_color;
rs->flatshade = state->flatshade;
rs->sprite_coord_enable = state->sprite_coord_enable;
+ rs->rasterizer_discard = state->rasterizer_discard;
rs->pa_sc_line_stipple = state->line_stipple_enable ?
S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index fba6619d2fd..8b9a311cd3f 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -61,6 +61,7 @@ struct si_state_rasterizer {
bool poly_smooth;
bool uses_poly_offset;
bool clamp_fragment_color;
+ bool rasterizer_discard;
};
struct si_dsa_stencil_ref_part {
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index ce6c98c3124..59845c4b43b 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -728,6 +728,7 @@ static void si_get_draw_start_count(struct si_context *sctx,
void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct pipe_index_buffer ib = {};
unsigned mask;
@@ -735,7 +736,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
(info->indexed || !info->count_from_stream_output))
return;
- if (!sctx->ps_shader.cso || !sctx->vs_shader.cso) {
+ if (!sctx->vs_shader.cso) {
+ assert(0);
+ return;
+ }
+ if (!sctx->ps_shader.cso && (!rs || !rs->rasterizer_discard)) {
assert(0);
return;
}
From 07b3cc6ecf6588647094ff73c3dbbdc227c7f92e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 22 Oct 2015 22:19:34 +0200
Subject: [PATCH 070/266] radeonsi: allow unbinding pixel shaders and remove
the dummy shader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_pipe.c | 2 --
src/gallium/drivers/radeonsi/si_pipe.h | 3 ---
.../drivers/radeonsi/si_state_shaders.c | 18 +-----------------
3 files changed, 1 insertion(+), 22 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 5f910c95ef3..e211e9274a4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -55,8 +55,6 @@ static void si_destroy_context(struct pipe_context *context)
if (sctx->pstipple_sampler_state)
sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state);
- if (sctx->dummy_pixel_shader)
- sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader);
if (sctx->fixed_func_tcs_shader.cso)
sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso);
if (sctx->custom_dsa_flush)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index d7a2282952a..63057241f5e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -203,9 +203,6 @@ struct si_context {
struct si_pm4_state *init_config;
bool init_config_has_vgt_flush;
struct si_pm4_state *vgt_shader_config[4];
- /* With rasterizer discard, there doesn't have to be a pixel shader.
- * In that case, we bind this one: */
- void *dummy_pixel_shader;
/* shaders */
struct si_shader_ctx_state ps_shader;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index ce8d1cca212..25cba844784 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -864,16 +864,6 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
si_update_viewports_and_scissors(sctx);
}
-static void si_make_dummy_ps(struct si_context *sctx)
-{
- if (!sctx->dummy_pixel_shader) {
- sctx->dummy_pixel_shader =
- util_make_fragment_cloneinput_shader(&sctx->b.b, 0,
- TGSI_SEMANTIC_GENERIC,
- TGSI_INTERPOLATE_CONSTANT);
- }
-}
-
static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
@@ -883,14 +873,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
if (sctx->ps_shader.cso == sel)
return;
- /* use a dummy shader if binding a NULL shader */
- if (!sel) {
- si_make_dummy_ps(sctx);
- sel = sctx->dummy_pixel_shader;
- }
-
sctx->ps_shader.cso = sel;
- sctx->ps_shader.current = sel->first_variant;
+ sctx->ps_shader.current = sel ? sel->first_variant : NULL;
si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
}
From 3a157e6e68b3f01fe5814f6313ec0811115b0694 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 22 Oct 2015 21:32:23 +0200
Subject: [PATCH 071/266] radeonsi: allow unbinding vertex shaders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Draw calls without a vertex shader are skipped.
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_state_shaders.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 25cba844784..4a3a04caa52 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -799,11 +799,11 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
- if (sctx->vs_shader.cso == sel || !sel)
+ if (sctx->vs_shader.cso == sel)
return;
sctx->vs_shader.cso = sel;
- sctx->vs_shader.current = sel->first_variant;
+ sctx->vs_shader.current = sel ? sel->first_variant : NULL;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
si_update_viewports_and_scissors(sctx);
}
From 7e056f872fb3f476ff48f159b72e29905afad6aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 18 Oct 2015 21:28:54 +0200
Subject: [PATCH 072/266] radeonsi: remove unnecessary PARTIAL_VS_WAVE setting
for streamout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
hardware does this automatically
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_state_draw.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 59845c4b43b..635fb5fe644 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -269,10 +269,6 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
wd_switch_on_eop = true;
}
- if (sctx->b.streamout.streamout_enabled ||
- sctx->b.streamout.prims_gen_query_enabled)
- partial_vs_wave = true;
-
if (sctx->b.chip_class >= CIK) {
/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
* 4 shader engines. Set 1 to pass the assertion below.
From 96d5879d3850a7e0c921da0c8bfa7ba9993b6aa6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 18 Oct 2015 21:43:30 +0200
Subject: [PATCH 073/266] radeonsi: add SWITCH_ON_EOI requirement for 4 SE
parts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_state_draw.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 635fb5fe644..96448eeb7f3 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -292,6 +292,10 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
if (info->count_from_stream_output)
wd_switch_on_eop = true;
+ /* Required on CIK and later. */
+ if (sctx->b.screen->info.max_se > 2 && !wd_switch_on_eop)
+ ia_switch_on_eoi = true;
+
/* If the WD switch is false, the IA switch must be false too. */
assert(wd_switch_on_eop || !ia_switch_on_eop);
}
From a6b5684e998845f0c503e5a2c2b40bb47c5daa57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 18 Oct 2015 21:51:41 +0200
Subject: [PATCH 074/266] radeonsi: only apply the instancing bug workaround to
Bonaire
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_state_draw.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 96448eeb7f3..1aa5456f6a1 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -296,6 +296,11 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
if (sctx->b.screen->info.max_se > 2 && !wd_switch_on_eop)
ia_switch_on_eoi = true;
+ /* Instancing bug on Bonaire. */
+ if (sctx->b.family == CHIP_BONAIRE && ia_switch_on_eoi &&
+ (info->indirect || info->instance_count > 1))
+ partial_vs_wave = true;
+
/* If the WD switch is false, the IA switch must be false too. */
assert(wd_switch_on_eop || !ia_switch_on_eop);
}
@@ -308,11 +313,6 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
u_prims_for_vertices(info->mode, info->count) <= 1)))
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
- /* Instancing bug on 2 SE chips. */
- if (sctx->b.screen->info.max_se == 2 && ia_switch_on_eoi &&
- (info->indirect || info->instance_count > 1))
- partial_vs_wave = true;
-
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
From 2070af2fb121674ba21ae38c6c2ded8ae8ae413f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 18 Oct 2015 22:07:01 +0200
Subject: [PATCH 075/266] radeonsi: add one more SWITCH_ON_EOI requirement for
Hawaii and VI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The VI condition depends on geometry shaders and MAX_PRIMGRP_IN_WAVE.
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_state_draw.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 1aa5456f6a1..3b606b2c7dc 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -223,6 +223,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned prim = info->mode;
unsigned primgroup_size = 128; /* recommended without a GS */
+ unsigned max_primgroup_in_wave = 2;
/* SWITCH_ON_EOP(0) is always preferable. */
bool wd_switch_on_eop = false;
@@ -296,6 +297,13 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
if (sctx->b.screen->info.max_se > 2 && !wd_switch_on_eop)
ia_switch_on_eoi = true;
+ /* Required by Hawaii and, for some special cases, by VI. */
+ if (ia_switch_on_eoi &&
+ (sctx->b.family == CHIP_HAWAII ||
+ (sctx->b.chip_class == VI &&
+ (sctx->gs_shader.cso || max_primgroup_in_wave != 2))))
+ partial_vs_wave = true;
+
/* Instancing bug on Bonaire. */
if (sctx->b.family == CHIP_BONAIRE && ia_switch_on_eoi &&
(info->indirect || info->instance_count > 1))
@@ -319,7 +327,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
- S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0);
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ?
+ max_primgroup_in_wave : 0);
}
static unsigned si_get_ls_hs_config(struct si_context *sctx,
From ca18f12dbb8d7b8542b33aac70866a27bb5d89e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 18 Oct 2015 22:17:04 +0200
Subject: [PATCH 076/266] radeonsi: make PARTIAL_ES_WAVE globally dependent on
SWITCH_ON_EOI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This catches the other cases that enable SWITCH_ON_EOI.
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_state_draw.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 3b606b2c7dc..396200375c2 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -247,13 +247,10 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
/* primgroup_size must be set to a multiple of NUM_PATCHES */
primgroup_size = (primgroup_size / num_patches) * num_patches;
- /* SWITCH_ON_EOI must be set if PrimID is used.
- * If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
+ /* SWITCH_ON_EOI must be set if PrimID is used. */
if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) ||
- sctx->tes_shader.cso->info.uses_primid) {
+ sctx->tes_shader.cso->info.uses_primid)
ia_switch_on_eoi = true;
- partial_es_wave = true;
- }
/* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
if ((sctx->b.family == CHIP_TAHITI ||
@@ -313,6 +310,10 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
assert(wd_switch_on_eop || !ia_switch_on_eop);
}
+ /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
+ if (ia_switch_on_eoi)
+ partial_es_wave = true;
+
/* Hw bug with single-primitive instances and SWITCH_ON_EOI
* on multi-SE chips. */
if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
From 0d2cb35f689675bfe00c281513547544fe46d894 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Sun, 18 Oct 2015 22:22:22 +0200
Subject: [PATCH 077/266] radeonsi: merge two ifs setting WD_SWITCH_ON_EOP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_state_draw.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 396200375c2..e58e474e320 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -276,7 +276,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
prim == PIPE_PRIM_LINE_LOOP ||
prim == PIPE_PRIM_TRIANGLE_FAN ||
prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY ||
- info->primitive_restart)
+ info->primitive_restart ||
+ info->count_from_stream_output)
wd_switch_on_eop = true;
/* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
@@ -286,10 +287,6 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
(info->indirect || info->instance_count > 1))
wd_switch_on_eop = true;
- /* USE_OPAQUE doesn't work when WD_SWITCH_ON_EOP is 0. */
- if (info->count_from_stream_output)
- wd_switch_on_eop = true;
-
/* Required on CIK and later. */
if (sctx->b.screen->info.max_se > 2 && !wd_switch_on_eop)
ia_switch_on_eoi = true;
From 06083046a4a6e8321cc0230f84208c8e10947105 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Mon, 19 Oct 2015 02:45:56 +0200
Subject: [PATCH 078/266] radeonsi: add another requirement for PARTIAL_ES_WAVE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_pipe.c | 29 +++++++++++++++++++-
src/gallium/drivers/radeonsi/si_pipe.h | 2 ++
src/gallium/drivers/radeonsi/si_state.c | 2 +-
src/gallium/drivers/radeonsi/si_state_draw.c | 4 +++
4 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index e211e9274a4..e65379993f7 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -576,6 +576,32 @@ static bool si_initialize_pipe_config(struct si_screen *sscreen)
return true;
}
+static bool si_init_gs_info(struct si_screen *sscreen)
+{
+ switch (sscreen->b.family) {
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ case CHIP_ICELAND:
+ case CHIP_CARRIZO:
+ sscreen->gs_table_depth = 16;
+ return true;
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ sscreen->gs_table_depth = 32;
+ return true;
+ default:
+ return false;
+ }
+}
+
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
{
struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
@@ -593,7 +619,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
sscreen->b.b.resource_create = r600_resource_create_common;
if (!r600_common_screen_init(&sscreen->b, ws) ||
- !si_initialize_pipe_config(sscreen)) {
+ !si_initialize_pipe_config(sscreen) ||
+ !si_init_gs_info(sscreen)) {
FREE(sscreen);
return NULL;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 63057241f5e..53529779481 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -42,6 +42,7 @@
#define SI_BASE_VERTEX_UNKNOWN INT_MIN
#define SI_RESTART_INDEX_UNKNOWN INT_MIN
#define SI_NUM_SMOOTH_AA_SAMPLES 8
+#define SI_GS_PER_ES 128
/* Instruction cache. */
#define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
@@ -85,6 +86,7 @@ struct si_compute;
struct si_screen {
struct r600_common_screen b;
+ unsigned gs_table_depth;
};
struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index aef85579085..264ab1b3817 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3264,7 +3264,7 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
/* FIXME calculate these values somehow ??? */
- si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80);
+ si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index e58e474e320..822e2d55441 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -311,6 +311,10 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
if (ia_switch_on_eoi)
partial_es_wave = true;
+ /* GS requirement. */
+ if (SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3)
+ partial_es_wave = true;
+
/* Hw bug with single-primitive instances and SWITCH_ON_EOI
* on multi-SE chips. */
if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
From e1c098f2381de53bd0a3260d330b057c8bfb3156 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 22 Oct 2015 23:32:16 +0200
Subject: [PATCH 079/266] util/format: add helper util_format_is_snorm8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Michel Dänzer
---
src/gallium/auxiliary/util/u_format.c | 19 +++++++++++++++++++
src/gallium/auxiliary/util/u_format.h | 3 +++
2 files changed, 22 insertions(+)
diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c
index c1ce408119f..79630bf6dc3 100644
--- a/src/gallium/auxiliary/util/u_format.c
+++ b/src/gallium/auxiliary/util/u_format.c
@@ -169,6 +169,25 @@ util_format_is_snorm(enum pipe_format format)
desc->channel[i].normalized;
}
+boolean
+util_format_is_snorm8(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+ int i;
+
+ if (desc->is_mixed)
+ return FALSE;
+
+ i = util_format_get_first_non_void_channel(format);
+ if (i == -1)
+ return FALSE;
+
+ return desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED &&
+ !desc->channel[i].pure_integer &&
+ desc->channel[i].normalized &&
+ desc->channel[i].size == 8;
+}
+
boolean
util_format_is_luminance_alpha(enum pipe_format format)
{
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 42b39ff04fd..a1b1b28fa41 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -686,6 +686,9 @@ util_format_is_pure_uint(enum pipe_format format);
boolean
util_format_is_snorm(enum pipe_format format);
+boolean
+util_format_is_snorm8(enum pipe_format format);
+
/**
* Check if the src format can be blitted to the destination format with
* a simple memcpy. For example, blitting from RGBA to RGBx is OK, but not
From edf6a4537c5febbe38780819df30cbfffc74c329 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 22 Oct 2015 23:36:11 +0200
Subject: [PATCH 080/266] radeonsi: only apply the SNORM blit workaround to
*8_SNORM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Like the comment says. This fixes DCC, which doesn't like blitting RG16
as RGBA8.
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_blit.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 082ea850675..aa526ee720e 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -536,7 +536,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
src_force_level = src_level;
} else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src) ||
/* also *8_SNORM has precision issues, use UNORM instead */
- util_format_is_snorm(src->format)) {
+ util_format_is_snorm8(src->format)) {
if (util_format_is_subsampled_422(src->format)) {
src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
From afa357c3b01322df31034f84613a4d8401a9486e Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen
Date: Wed, 21 Oct 2015 00:10:36 +0200
Subject: [PATCH 081/266] radeonsi: Allocate buffers for DCC.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As the alignment requirements can be 32 KiB or more, also adding
an aligned buffer creation function.
DCC is disabled for textures that can be shared as sharing the
DCC buffers has not been implemented yet.
Signed-off-by: Bas Nieuwenhuizen
Signed-off-by: Marek Olšák
---
.../drivers/radeon/r600_buffer_common.c | 21 +++++++++
src/gallium/drivers/radeon/r600_pipe_common.h | 6 +++
src/gallium/drivers/radeon/r600_texture.c | 18 +++++++
src/gallium/drivers/radeon/radeon_winsys.h | 5 ++
.../winsys/amdgpu/drm/amdgpu_surface.c | 47 +++++++++++++++++--
5 files changed, 92 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index f341ecb41a5..0dc6c918331 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -443,6 +443,27 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
return &rbuffer->b.b;
}
+struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
+ unsigned bind,
+ unsigned usage,
+ unsigned size,
+ unsigned alignment)
+{
+ struct pipe_resource buffer;
+
+ memset(&buffer, 0, sizeof buffer);
+ buffer.target = PIPE_BUFFER;
+ buffer.format = PIPE_FORMAT_R8_UNORM;
+ buffer.bind = bind;
+ buffer.usage = usage;
+ buffer.flags = 0;
+ buffer.width0 = size;
+ buffer.height0 = 1;
+ buffer.depth0 = 1;
+ buffer.array_size = 1;
+ return r600_buffer_create(screen, &buffer, alignment);
+}
+
struct pipe_resource *
r600_buffer_from_user_memory(struct pipe_screen *screen,
const struct pipe_resource *templ,
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index b58b500bd76..1eec596b258 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -214,6 +214,7 @@ struct r600_texture {
struct r600_fmask_info fmask;
struct r600_cmask_info cmask;
struct r600_resource *cmask_buffer;
+ struct r600_resource *dcc_buffer;
unsigned cb_color_info; /* fast clear enable bit */
unsigned color_clear_value[2];
@@ -489,6 +490,11 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
unsigned alignment);
+struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
+ unsigned bind,
+ unsigned usage,
+ unsigned size,
+ unsigned alignment);
struct pipe_resource *
r600_buffer_from_user_memory(struct pipe_screen *screen,
const struct pipe_resource *templ,
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index fc69f48bb70..fffb9ef4cbe 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -268,6 +268,7 @@ static void r600_texture_destroy(struct pipe_screen *screen,
if (rtex->cmask_buffer != &rtex->resource) {
pipe_resource_reference((struct pipe_resource**)&rtex->cmask_buffer, NULL);
}
+ pipe_resource_reference((struct pipe_resource**)&rtex->dcc_buffer, NULL);
pb_reference(&resource->buf, NULL);
FREE(rtex);
}
@@ -482,6 +483,20 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
}
+static void vi_texture_alloc_dcc_separate(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ rtex->dcc_buffer = (struct r600_resource *)
+ r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_DEFAULT, rtex->surface.dcc_size, rtex->surface.dcc_alignment);
+ if (rtex->dcc_buffer == NULL) {
+ return;
+ }
+
+ r600_screen_clear_buffer(rscreen, &rtex->dcc_buffer->b.b, 0, rtex->surface.dcc_size,
+ 0xFFFFFFFF, true);
+}
+
static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
@@ -621,6 +636,9 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
}
}
+ if (rtex->surface.dcc_enabled) {
+ vi_texture_alloc_dcc_separate(rscreen, rtex);
+ }
}
/* Now create the backing buffer. */
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 5f13c1ebc26..0178643549e 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -332,6 +332,7 @@ struct radeon_surf_level {
uint32_t nblk_z;
uint32_t pitch_bytes;
uint32_t mode;
+ uint64_t dcc_offset;
};
struct radeon_surf {
@@ -367,6 +368,10 @@ struct radeon_surf {
uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL];
uint32_t pipe_config;
uint32_t num_banks;
+
+ uint64_t dcc_size;
+ uint64_t dcc_alignment;
+ bool dcc_enabled;
};
struct radeon_bo_list_item {
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index 358df381011..b442174b7b8 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -175,7 +175,9 @@ static int compute_level(struct amdgpu_winsys *ws,
struct radeon_surf *surf, bool is_stencil,
unsigned level, unsigned type, bool compressed,
ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
- ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut)
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
+ ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
+ ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut)
{
struct radeon_surf_level *surf_level;
ADDR_E_RETURNCODE ret;
@@ -248,6 +250,30 @@ static int compute_level(struct amdgpu_winsys *ws,
surf->tiling_index[level] = AddrSurfInfoOut->tileIndex;
surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize;
+
+ if (surf->dcc_enabled) {
+ AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
+ AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
+ AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
+ AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
+ AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
+
+ ret = AddrComputeDccInfo(ws->addrlib,
+ AddrDccIn,
+ AddrDccOut);
+
+ if (ret == ADDR_OK) {
+ surf_level->dcc_offset = surf->dcc_size;
+ surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
+ surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign);
+ } else {
+ surf->dcc_enabled = false;
+ surf_level->dcc_offset = 0;
+ }
+ } else {
+ surf_level->dcc_offset = 0;
+ }
+
return 0;
}
@@ -259,6 +285,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
bool compressed;
ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
+ ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
+ ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
ADDR_TILEINFO AddrTileInfoIn = {0};
ADDR_TILEINFO AddrTileInfoOut = {0};
int r;
@@ -269,6 +297,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
+ AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
+ AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
type = RADEON_SURF_GET(surf->flags, TYPE);
@@ -318,12 +348,16 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
}
}
else {
- AddrSurfInfoIn.bpp = surf->bpe * 8;
+ AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
}
- AddrSurfInfoIn.numSamples = surf->nsamples;
+ AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = surf->nsamples;
AddrSurfInfoIn.tileIndex = -1;
+ surf->dcc_enabled = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
+ !(surf->flags & RADEON_SURF_SCANOUT) &&
+ !compressed && AddrDccIn.numSamples <= 1;
+
/* Set the micro tile type. */
if (surf->flags & RADEON_SURF_SCANOUT)
AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
@@ -339,6 +373,7 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
AddrSurfInfoIn.flags.degrade4Space = 1;
+ AddrSurfInfoIn.flags.dccCompatible = surf->dcc_enabled;
/* This disables incorrect calculations (hacks) in addrlib. */
AddrSurfInfoIn.flags.noStencil = 1;
@@ -375,11 +410,13 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
}
surf->bo_size = 0;
+ surf->dcc_size = 0;
+ surf->dcc_alignment = 1;
/* Calculate texture layout information. */
for (level = 0; level <= surf->last_level; level++) {
r = compute_level(ws, surf, false, level, type, compressed,
- &AddrSurfInfoIn, &AddrSurfInfoOut);
+ &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
if (r)
return r;
@@ -406,7 +443,7 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
for (level = 0; level <= surf->last_level; level++) {
r = compute_level(ws, surf, true, level, type, compressed,
- &AddrSurfInfoIn, &AddrSurfInfoOut);
+ &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
if (r)
return r;
From bb77467df9863c522c8d8550e295e2ad7bbef37c Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen
Date: Wed, 21 Oct 2015 00:10:37 +0200
Subject: [PATCH 082/266] radeonsi: Disable operations that do not work with
DCC.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Bas Nieuwenhuizen
Signed-off-by: Marek Olšák
---
src/gallium/drivers/radeon/r600_texture.c | 5 +++++
src/gallium/drivers/radeonsi/cik_sdma.c | 3 ++-
src/gallium/drivers/radeonsi/si_blit.c | 3 ++-
src/gallium/drivers/radeonsi/si_dma.c | 3 ++-
4 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index fffb9ef4cbe..c2a692c9af1 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1290,6 +1290,11 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
continue;
}
+ /* CMASK clear does not work for DCC compressed textures */
+ if (tex->surface.dcc_enabled) {
+ continue;
+ }
+
/* ensure CMASK is enabled */
r600_texture_alloc_cmask_separate(rctx->screen, tex);
if (tex->cmask.size == 0) {
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 6454b8ce8c0..25fd09a4725 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -242,7 +242,8 @@ void cik_sdma_copy(struct pipe_context *ctx,
if (src->format != dst->format ||
rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 ||
- (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) {
+ (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) ||
+ rdst->surface.dcc_enabled || rsrc->surface.dcc_enabled) {
goto fallback;
}
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index aa526ee720e..ab2523a24b7 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -675,7 +675,8 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
info->src.box.depth == 1 &&
dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
!(dst->surface.flags & RADEON_SURF_SCANOUT) &&
- (!dst->cmask.size || !dst->dirty_level_mask) /* dst cannot be fast-cleared */) {
+ (!dst->cmask.size || !dst->dirty_level_mask) && /* dst cannot be fast-cleared */
+ !dst->surface.dcc_enabled) {
si_blitter_begin(ctx, SI_COLOR_RESOLVE |
(info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
util_blitter_custom_resolve_color(sctx->blitter,
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index 31b0b41e5a4..73c026cc0cd 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -248,7 +248,8 @@ void si_dma_copy(struct pipe_context *ctx,
if (src->format != dst->format || src_box->depth > 1 ||
(rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) ||
rdst->cmask.size || rdst->fmask.size ||
- rsrc->cmask.size || rsrc->fmask.size) {
+ rsrc->cmask.size || rsrc->fmask.size ||
+ rdst->surface.dcc_enabled || rsrc->surface.dcc_enabled) {
goto fallback;
}
From 81ebd6a88289663677ebb97f788afb53fb84dfd5 Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen
Date: Wed, 21 Oct 2015 00:10:38 +0200
Subject: [PATCH 083/266] radeonsi: Add FLUSH_AND_INV_CB_DATA_TS for DCC.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Bas Nieuwenhuizen
Signed-off-by: Marek Olšák
---
src/gallium/drivers/radeonsi/si_state_draw.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 822e2d55441..cf0891a2ab7 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -647,6 +647,17 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
S_0085F0_CB5_DEST_BASE_ENA(1) |
S_0085F0_CB6_DEST_BASE_ENA(1) |
S_0085F0_CB7_DEST_BASE_ENA(1);
+
+ /* Necessary for DCC */
+ if (sctx->chip_class >= VI) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0) | compute);
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) |
+ EVENT_INDEX(5));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ }
}
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
From 48b5f104ac4e0c3ddbff87520adb7a9d2a254c67 Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen
Date: Wed, 21 Oct 2015 00:10:39 +0200
Subject: [PATCH 084/266] radeonsi: Enable DCC.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Bas Nieuwenhuizen
Signed-off-by: Marek Olšák
---
src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
src/gallium/drivers/radeon/r600_texture.c | 2 +
src/gallium/drivers/radeon/r600d_common.h | 1 +
src/gallium/drivers/radeonsi/si_descriptors.c | 5 ++
src/gallium/drivers/radeonsi/si_pipe.h | 1 +
src/gallium/drivers/radeonsi/si_state.c | 46 ++++++++++++++++---
6 files changed, 50 insertions(+), 6 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 1eec596b258..f21a0b3a188 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -244,6 +244,7 @@ struct r600_surface {
unsigned cb_color_dim; /* EG only */
unsigned cb_color_pitch; /* EG and later */
unsigned cb_color_slice; /* EG and later */
+ unsigned cb_dcc_base; /* VI and later */
unsigned cb_color_attrib; /* EG and later */
unsigned cb_dcc_control; /* VI and later */
unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index c2a692c9af1..03140497629 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -495,6 +495,8 @@ static void vi_texture_alloc_dcc_separate(struct r600_common_screen *rscreen,
r600_screen_clear_buffer(rscreen, &rtex->dcc_buffer->b.b, 0, rtex->surface.dcc_size,
0xFFFFFFFF, true);
+
+ rtex->cb_color_info |= VI_S_028C70_DCC_ENABLE(1);
}
static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
diff --git a/src/gallium/drivers/radeon/r600d_common.h b/src/gallium/drivers/radeon/r600d_common.h
index 115042d153e..a3d182cd30f 100644
--- a/src/gallium/drivers/radeon/r600d_common.h
+++ b/src/gallium/drivers/radeon/r600d_common.h
@@ -202,6 +202,7 @@
#define EG_S_028C70_FAST_CLEAR(x) (((x) & 0x1) << 17)
#define SI_S_028C70_FAST_CLEAR(x) (((x) & 0x1) << 13)
+#define VI_S_028C70_DCC_ENABLE(x) (((x) & 0x1) << 28)
/*CIK+*/
#define R_0300FC_CP_STRMOUT_CNTL 0x0300FC
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 13738da5e2c..5548cba3a24 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -181,6 +181,11 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
rview->resource, RADEON_USAGE_READ,
r600_get_sampler_view_priority(rview->resource));
+ if (rview->dcc_buffer && rview->dcc_buffer != rview->resource)
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ rview->dcc_buffer, RADEON_USAGE_READ,
+ RADEON_PRIO_DCC);
+
pipe_sampler_view_reference(&views->views[slot], view);
memcpy(views->desc.list + slot*8, view_desc, 8*4);
views->desc.enabled_mask |= 1llu << slot;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 53529779481..42cd8803c36 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -98,6 +98,7 @@ struct si_sampler_view {
struct pipe_sampler_view base;
struct list_head list;
struct r600_resource *resource;
+ struct r600_resource *dcc_buffer;
/* [0..7] = image descriptor
* [4..7] = buffer descriptor */
uint32_t state[8];
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 264ab1b3817..384c8e28faa 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1926,8 +1926,25 @@ static void si_initialize_color_surface(struct si_context *sctx,
surf->cb_color_info = color_info;
surf->cb_color_attrib = color_attrib;
- if (sctx->b.chip_class >= VI)
- surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1);
+ if (sctx->b.chip_class >= VI) {
+ unsigned max_uncompressed_block_size = 2;
+
+ if (rtex->surface.nsamples > 1) {
+ if (rtex->surface.bpe == 1)
+ max_uncompressed_block_size = 0;
+ else if (rtex->surface.bpe == 2)
+ max_uncompressed_block_size = 1;
+ }
+
+ surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
+ S_028C78_INDEPENDENT_64B_BLOCKS(1);
+
+ if (rtex->surface.dcc_enabled) {
+ uint64_t dcc_offset = rtex->surface.level[level].dcc_offset;
+
+ surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + dcc_offset) >> 8;
+ }
+ }
if (rtex->fmask.size) {
surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
@@ -2251,6 +2268,12 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
RADEON_PRIO_CMASK);
}
+ if (tex->dcc_buffer && tex->dcc_buffer != &tex->resource) {
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ tex->dcc_buffer, RADEON_USAGE_READWRITE,
+ RADEON_PRIO_DCC);
+ }
+
radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
sctx->b.chip_class >= VI ? 14 : 13);
radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
@@ -2268,7 +2291,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
if (sctx->b.chip_class >= VI)
- radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */
+ radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */
}
/* set CB_COLOR1_INFO for possible dual-src blending */
if (i == 1 && state->cbufs[0] &&
@@ -2635,8 +2658,18 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
S_008F24_LAST_ARRAY(last_layer));
- view->state[6] = 0;
- view->state[7] = 0;
+
+ if (tmp->surface.dcc_enabled) {
+ uint64_t dcc_offset = surflevel[base_level].dcc_offset;
+ unsigned swap = r600_translate_colorswap(pipe_format);
+
+ view->state[6] = S_008F28_COMPRESSION_EN(1) | S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
+ view->state[7] = (tmp->dcc_buffer->gpu_address + dcc_offset) >> 8;
+ view->dcc_buffer = tmp->dcc_buffer;
+ } else {
+ view->state[6] = 0;
+ view->state[7] = 0;
+ }
/* Initialize the sampler view for FMASK. */
if (tmp->fmask.size) {
@@ -3409,7 +3442,8 @@ static void si_init_config(struct si_context *sctx)
if (sctx->b.chip_class >= VI) {
si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
- S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1));
+ S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
+ S_028424_OVERWRITE_COMBINER_WATERMARK(4));
si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
}
From 830e57b82d21fd324059b7a7074b047ad41d0aa4 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Fri, 23 Oct 2015 18:31:57 -0400
Subject: [PATCH 085/266] radeonsi: add Stoney to si_init_gs_info()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This patch was originally written before stoney support
was merged. Add stoney.
Reviewed-by: Marek Olšák
Signed-off-by: Alex Deucher
---
src/gallium/drivers/radeonsi/si_pipe.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index e65379993f7..047bbf4e537 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -586,6 +586,7 @@ static bool si_init_gs_info(struct si_screen *sscreen)
case CHIP_MULLINS:
case CHIP_ICELAND:
case CHIP_CARRIZO:
+ case CHIP_STONEY:
sscreen->gs_table_depth = 16;
return true;
case CHIP_TAHITI:
From fe707c0373cf73f9323014f7bc38f20120d4f096 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger
Date: Thu, 22 Oct 2015 22:26:52 +0200
Subject: [PATCH 086/266] llvmpipe: fix using non-zero layer in non-array view
from array resource
Just need to use resource target not view target when calculating
first-layer based mip offsets. (This is a gl specific problem since
d3d10 does not distinguish between non-array and array resources neither
at the resource nor view level, only at the shader level.)
Fixes new piglit arb_texture_view sampling-2d-array-as-2d-layer test.
Reviewed-by: Dave Airlie
Reviewed-by: Jose Fonseca
---
src/gallium/drivers/llvmpipe/lp_setup.c | 8 ++++----
src/gallium/drivers/llvmpipe/lp_state_sampler.c | 8 ++++----
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 4c8167a9e7d..1778b13f9dd 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -854,10 +854,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
jit_tex->img_stride[j] = lp_tex->img_stride[j];
}
- if (view->target == PIPE_TEXTURE_1D_ARRAY ||
- view->target == PIPE_TEXTURE_2D_ARRAY ||
- view->target == PIPE_TEXTURE_CUBE ||
- view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (res->target == PIPE_TEXTURE_1D_ARRAY ||
+ res->target == PIPE_TEXTURE_2D_ARRAY ||
+ res->target == PIPE_TEXTURE_CUBE ||
+ res->target == PIPE_TEXTURE_CUBE_ARRAY) {
/*
* For array textures, we don't have first_layer, instead
* adjust last_layer (stored as depth) plus the mip level offsets
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index b205f02fdba..1e055878f7c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -275,10 +275,10 @@ prepare_shader_sampling(
row_stride[j] = lp_tex->row_stride[j];
img_stride[j] = lp_tex->img_stride[j];
}
- if (view->target == PIPE_TEXTURE_1D_ARRAY ||
- view->target == PIPE_TEXTURE_2D_ARRAY ||
- view->target == PIPE_TEXTURE_CUBE ||
- view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (tex->target == PIPE_TEXTURE_1D_ARRAY ||
+ tex->target == PIPE_TEXTURE_2D_ARRAY ||
+ tex->target == PIPE_TEXTURE_CUBE ||
+ tex->target == PIPE_TEXTURE_CUBE_ARRAY) {
num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1;
for (j = first_level; j <= last_level; j++) {
mip_offsets[j] += view->u.tex.first_layer *
From fb586e1edb207328534f554a9afaf2c07434e08e Mon Sep 17 00:00:00 2001
From: Roland Scheidegger
Date: Thu, 22 Oct 2015 22:28:28 +0200
Subject: [PATCH 087/266] softpipe: fix using non-zero layer in non-array view
from array resource
For vertex/geometry shader sampling, this is the same as for llvmpipe - just
use the original resource target.
For fragment shader sampling though (which does not use first-layer based mip
offsets) adjust the sampling code to use first_layer in the non-array cases.
While here also fix up some code which looked wrong wrt buffer texel fetch
(no piglit change).
Reviewed-by: Dave Airlie
Reviewed-by: Jose Fonseca
---
.../drivers/softpipe/sp_state_sampler.c | 8 ++---
src/gallium/drivers/softpipe/sp_tex_sample.c | 32 ++++++++++++++++---
2 files changed, 31 insertions(+), 9 deletions(-)
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index d7a3360713f..23ec4ef3cb6 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -214,10 +214,10 @@ prepare_shader_sampling(
row_stride[j] = sp_tex->stride[j];
img_stride[j] = sp_tex->img_stride[j];
}
- if (view->target == PIPE_TEXTURE_1D_ARRAY ||
- view->target == PIPE_TEXTURE_2D_ARRAY ||
- view->target == PIPE_TEXTURE_CUBE ||
- view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (tex->target == PIPE_TEXTURE_1D_ARRAY ||
+ tex->target == PIPE_TEXTURE_2D_ARRAY ||
+ tex->target == PIPE_TEXTURE_CUBE ||
+ tex->target == PIPE_TEXTURE_CUBE_ARRAY) {
num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1;
for (j = first_level; j <= last_level; j++) {
mip_offsets[j] += view->u.tex.first_layer *
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 8a0935062b6..e3e28a3ef32 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1033,6 +1033,7 @@ img_filter_2d_linear_repeat_POT(const struct sp_sampler_view *sp_sview,
addr.value = 0;
addr.bits.level = args->level;
+ addr.bits.z = sp_sview->base.u.tex.first_layer;
/* Can we fetch all four at once:
*/
@@ -1081,6 +1082,7 @@ img_filter_2d_nearest_repeat_POT(const struct sp_sampler_view *sp_sview,
addr.value = 0;
addr.bits.level = args->level;
+ addr.bits.z = sp_sview->base.u.tex.first_layer;
out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1111,6 +1113,7 @@ img_filter_2d_nearest_clamp_POT(const struct sp_sampler_view *sp_sview,
addr.value = 0;
addr.bits.level = args->level;
+ addr.bits.z = sp_sview->base.u.tex.first_layer;
x0 = util_ifloor(u);
if (x0 < 0)
@@ -1154,7 +1157,8 @@ img_filter_1d_nearest(const struct sp_sampler_view *sp_sview,
sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
- out = get_texel_2d(sp_sview, sp_samp, addr, x, 0);
+ out = get_texel_1d_array(sp_sview, sp_samp, addr, x,
+ sp_sview->base.u.tex.first_layer);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
rgba[TGSI_NUM_CHANNELS*c] = out[c];
@@ -1215,6 +1219,7 @@ img_filter_2d_nearest(const struct sp_sampler_view *sp_sview,
addr.value = 0;
addr.bits.level = args->level;
+ addr.bits.z = sp_sview->base.u.tex.first_layer;
sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
@@ -1396,8 +1401,10 @@ img_filter_1d_linear(const struct sp_sampler_view *sp_sview,
sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
- tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0);
- tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0);
+ tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0,
+ sp_sview->base.u.tex.first_layer);
+ tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1,
+ sp_sview->base.u.tex.first_layer);
/* interpolate R, G, B, A */
for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1523,6 +1530,7 @@ img_filter_2d_linear(const struct sp_sampler_view *sp_sview,
addr.value = 0;
addr.bits.level = args->level;
+ addr.bits.z = sp_sview->base.u.tex.first_layer;
sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
@@ -3252,10 +3260,22 @@ sp_get_texels(const struct sp_sampler_view *sp_sview,
switch (sp_sview->base.target) {
case PIPE_BUFFER:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ const int x = CLAMP(v_i[j] + offset[0] +
+ sp_sview->base.u.buf.first_element,
+ sp_sview->base.u.buf.first_element,
+ sp_sview->base.u.buf.last_element);
+ tx = get_texel_2d_no_border(sp_sview, addr, x, 0);
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = tx[c];
+ }
+ }
+ break;
case PIPE_TEXTURE_1D:
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
- tx = get_texel_2d_no_border(sp_sview, addr, x, 0);
+ tx = get_texel_2d_no_border(sp_sview, addr, x,
+ sp_sview->base.u.tex.first_layer);
for (c = 0; c < 4; c++) {
rgba[c][j] = tx[c];
}
@@ -3277,7 +3297,8 @@ sp_get_texels(const struct sp_sampler_view *sp_sview,
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
const int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
- tx = get_texel_2d_no_border(sp_sview, addr, x, y);
+ tx = get_texel_3d_no_border(sp_sview, addr, x, y,
+ sp_sview->base.u.tex.first_layer);
for (c = 0; c < 4; c++) {
rgba[c][j] = tx[c];
}
@@ -3307,6 +3328,7 @@ sp_get_texels(const struct sp_sampler_view *sp_sview,
}
break;
case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
+ case PIPE_TEXTURE_CUBE_ARRAY:
default:
assert(!"Unknown or CUBE texture type in TXF processing\n");
break;
From 71ff5af5dd308e4a53b66b7530cc01ec4bf5715e Mon Sep 17 00:00:00 2001
From: Roland Scheidegger
Date: Thu, 22 Oct 2015 23:49:41 +0200
Subject: [PATCH 088/266] gallivm: fix sampling with texture offsets in SoA
path
When using nearest filtering and clamp / clamp to edge wrapping results could
be wrong for negative offsets. Fix this by adding the offset before doing
the conversion to int coords (could also use floor instead of trunc int
conversion but probably more complex on "typical" cpu).
This fixes the piglit texwrap offset failures with this filter/wrap combo
(which only leaves the linear/mirror repeat combination broken).
Reviewed-by: Dave Airlie
Reviewed-by: Jose Fonseca
---
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index b5c06b69571..125505eeb4b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -567,12 +567,13 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
coord = lp_build_mul(coord_bld, coord, length_f);
}
+ if (offset) {
+ offset = lp_build_int_to_float(coord_bld, offset);
+ coord = lp_build_add(coord_bld, coord, offset);
+ }
/* floor */
/* use itrunc instead since we clamp to 0 anyway */
icoord = lp_build_itrunc(coord_bld, coord);
- if (offset) {
- icoord = lp_build_add(int_coord_bld, icoord, offset);
- }
/* clamp to [0, length - 1]. */
icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
@@ -2586,6 +2587,10 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
}
+ /*
+ * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
+ * so AoS path could be used. Not sure it's worth the trouble...
+ */
min_img_filter = derived_sampler_state.min_img_filter;
mag_img_filter = derived_sampler_state.mag_img_filter;
From 205a3ce5c158349dda7082f44e95301526b9f183 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger
Date: Thu, 22 Oct 2015 23:58:50 +0200
Subject: [PATCH 089/266] gallivm: fix tex offsets with mirror repeat linear
Can't see why anyone would ever want to use this, but it was clearly broken.
This fixes the piglit texwrap offset test using this combination.
Reviewed-by: Dave Airlie
Reviewed-by: Jose Fonseca
---
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 125505eeb4b..26bfa0d2677 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -405,16 +405,17 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
break;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ if (offset) {
+ offset = lp_build_int_to_float(coord_bld, offset);
+ offset = lp_build_div(coord_bld, offset, length_f);
+ coord = lp_build_add(coord_bld, coord, offset);
+ }
/* compute mirror function */
coord = lp_build_coord_mirror(bld, coord);
/* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
- if (offset) {
- offset = lp_build_int_to_float(coord_bld, offset);
- coord = lp_build_add(coord_bld, coord, offset);
- }
/* convert to int, compute lerp weight */
lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
From 6529daca391912246c13e6f76e885026d2ce88be Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen
Date: Sat, 24 Oct 2015 01:47:45 +0200
Subject: [PATCH 090/266] radeonsi: Implement DCC fast clear.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Uses the DCC buffer instead of the CMASK buffer. The ELIMINATE_FAST_CLEAR
still works. Furthermore, with DCC compression we can directly clear
to a limited set of colors such that we do not need a postprocessing step.
v2 Marek: check dcc_buffer && dirty_level_mask in set_sampler_view
Signed-off-by: Bas Nieuwenhuizen
Signed-off-by: Marek Olšák
---
src/gallium/drivers/radeon/r600_texture.c | 109 ++++++++++++++++--
src/gallium/drivers/radeonsi/si_blit.c | 4 +-
src/gallium/drivers/radeonsi/si_descriptors.c | 3 +-
3 files changed, 101 insertions(+), 15 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 03140497629..f7a11a2534c 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1239,6 +1239,81 @@ static void evergreen_set_clear_color(struct r600_texture *rtex,
memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
}
+static void vi_get_fast_clear_parameters(enum pipe_format surface_format,
+ const union pipe_color_union *color,
+ uint32_t* reset_value,
+ bool* clear_words_needed)
+{
+ bool values[4] = {};
+ int i;
+ bool main_value = false;
+ bool extra_value = false;
+ int extra_channel;
+ const struct util_format_description *desc = util_format_description(surface_format);
+
+ *clear_words_needed = true;
+ *reset_value = 0x20202020U;
+
+ /* If we want to clear without needing a fast clear eliminate step, we
+ * can set each channel to 0 or 1 (or 0/max for integer formats). We
+ * have two sets of flags, one for the last or first channel(extra) and
+ * one for the other channels(main).
+ */
+
+ if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
+ surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
+ surface_format == PIPE_FORMAT_B5G6R5_SRGB) {
+ extra_channel = -1;
+ } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
+ if(r600_translate_colorswap(surface_format) <= 1)
+ extra_channel = desc->nr_channels - 1;
+ else
+ extra_channel = 0;
+ } else
+ return;
+
+ for (i = 0; i < 4; ++i) {
+ int index = desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X;
+
+ if (desc->swizzle[i] < UTIL_FORMAT_SWIZZLE_X ||
+ desc->swizzle[i] > UTIL_FORMAT_SWIZZLE_W)
+ continue;
+
+ if (util_format_is_pure_sint(surface_format)) {
+ values[i] = color->i[i] != 0;
+ if (color->i[i] != 0 && color->i[i] != INT32_MAX)
+ return;
+ } else if (util_format_is_pure_uint(surface_format)) {
+ values[i] = color->ui[i] != 0U;
+ if (color->ui[i] != 0U && color->ui[i] != UINT32_MAX)
+ return;
+ } else {
+ values[i] = color->f[i] != 0.0F;
+ if (color->f[i] != 0.0F && color->f[i] != 1.0F)
+ return;
+ }
+
+ if (index == extra_channel)
+ extra_value = values[i];
+ else
+ main_value = values[i];
+ }
+
+ for (int i = 0; i < 4; ++i)
+ if (values[i] != main_value &&
+ desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X != extra_channel &&
+ desc->swizzle[i] >= UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W)
+ return;
+
+ *clear_words_needed = false;
+ if (main_value)
+ *reset_value |= 0x80808080U;
+
+ if (extra_value)
+ *reset_value |= 0x40404040U;
+}
+
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
@@ -1292,23 +1367,33 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
continue;
}
- /* CMASK clear does not work for DCC compressed textures */
if (tex->surface.dcc_enabled) {
- continue;
+ uint32_t reset_value;
+ bool clear_words_needed;
+
+ vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
+
+ rctx->clear_buffer(&rctx->b, &tex->dcc_buffer->b.b,
+ 0, tex->surface.dcc_size, reset_value, true);
+
+ if (clear_words_needed)
+ tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+ } else {
+ /* ensure CMASK is enabled */
+ r600_texture_alloc_cmask_separate(rctx->screen, tex);
+ if (tex->cmask.size == 0) {
+ continue;
+ }
+
+ /* Do the fast clear. */
+ rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
+ tex->cmask.offset, tex->cmask.size, 0, true);
+
+ tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
}
- /* ensure CMASK is enabled */
- r600_texture_alloc_cmask_separate(rctx->screen, tex);
- if (tex->cmask.size == 0) {
- continue;
- }
-
- /* Do the fast clear. */
evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
- rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
- tex->cmask.offset, tex->cmask.size, 0, true);
- tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
if (dirty_cbufs)
*dirty_cbufs |= 1 << i;
rctx->set_atom_dirty(rctx, fb_state, true);
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index ab2523a24b7..a2264363db9 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context *sctx,
assert(view);
tex = (struct r600_texture *)view->texture;
- assert(tex->cmask.size || tex->fmask.size);
+ assert(tex->cmask.size || tex->fmask.size || tex->surface.dcc_enabled);
si_blit_decompress_color(&sctx->b.b, tex,
view->u.tex.first_level, view->u.tex.last_level,
@@ -455,7 +455,7 @@ static void si_decompress_subresource(struct pipe_context *ctx,
si_blit_decompress_depth_in_place(sctx, rtex, true,
level, level,
first_layer, last_layer);
- } else if (rtex->fmask.size || rtex->cmask.size) {
+ } else if (rtex->fmask.size || rtex->cmask.size || rtex->surface.dcc_enabled) {
si_blit_decompress_color(ctx, rtex, level, level,
first_layer, last_layer);
}
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 5548cba3a24..a8ff6f27319 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -234,7 +234,8 @@ static void si_set_sampler_views(struct pipe_context *ctx,
} else {
samplers->depth_texture_mask &= ~(1 << slot);
}
- if (rtex->cmask.size || rtex->fmask.size) {
+ if (rtex->cmask.size || rtex->fmask.size ||
+ (rtex->dcc_buffer && rtex->dirty_level_mask)) {
samplers->compressed_colortex_mask |= 1 << slot;
} else {
samplers->compressed_colortex_mask &= ~(1 << slot);
From 1e8d0cc62897fc90ac7dc9a92e80e714e52d3e77 Mon Sep 17 00:00:00 2001
From: Rob Clark
Date: Fri, 23 Oct 2015 13:37:26 -0400
Subject: [PATCH 091/266] freedreno: remove unnecessary null checks
According to piglit/xonotic/neverball/stc, blend/rasterize/zsa state
will always be bound (never null). And the null checks were in-
consistent anyways, so remove them.
Signed-off-by: Rob Clark
---
src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 10 +++++-----
src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 2 +-
src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 12 ++++++------
src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 2 +-
4 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index a9498835011..3906c9b996e 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -81,7 +81,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
- if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
+ if (ctx->rasterizer->point_size_per_vertex &&
(info->mode == PIPE_PRIM_POINTS))
primtype = DI_PT_POINTLIST_PSIZE;
@@ -137,7 +137,7 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.key = {
/* do binning pass first: */
.binning_pass = true,
- .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
+ .color_two_side = ctx->rasterizer->light_twoside,
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
@@ -149,9 +149,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.fsaturate_t = fd3_ctx->fsaturate_t,
.fsaturate_r = fd3_ctx->fsaturate_r,
},
- .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
- .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0,
- .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
+ .rasterflat = ctx->rasterizer->flatshade,
+ .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
+ .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
unsigned dirty;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 411f5b76329..8f9c8b0623c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -627,7 +627,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
ctx->prog.dirty = 0;
}
- if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
+ if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
uint32_t i;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index 025753c037e..7bd5163529a 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -118,12 +118,12 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.key = {
/* do binning pass first: */
.binning_pass = true,
- .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
- .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
+ .color_two_side = ctx->rasterizer->light_twoside,
+ .rasterflat = ctx->rasterizer->flatshade,
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
- .ucp_enables = ctx->rasterizer ? ctx->rasterizer->clip_plane_enable : 0,
+ .ucp_enables = ctx->rasterizer->clip_plane_enable,
.has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate),
.vsaturate_s = fd4_ctx->vsaturate_s,
.vsaturate_t = fd4_ctx->vsaturate_t,
@@ -132,9 +132,9 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.fsaturate_t = fd4_ctx->fsaturate_t,
.fsaturate_r = fd4_ctx->fsaturate_r,
},
- .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
- .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : false,
- .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
+ .rasterflat = ctx->rasterizer->flatshade,
+ .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
+ .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
unsigned dirty;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index c7ed1d2e379..cf5dd7b0f17 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -594,7 +594,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
ctx->prog.dirty = 0;
}
- if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
+ if ((dirty & FD_DIRTY_BLEND)) {
struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
uint32_t i;
From b0342f48d07568f77cde035b861f8eb67f941ae4 Mon Sep 17 00:00:00 2001
From: Fabio Pedretti
Date: Thu, 15 Oct 2015 10:00:23 +0200
Subject: [PATCH 092/266] docs/index.html: fix typo
Reviewed-by: Boyan Ding
Signed-off-by: Emil Velikov
---
docs/index.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/index.html b/docs/index.html
index 138447fc500..e28c63d41bc 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -28,7 +28,7 @@ This is a bug-fix release.
This is a bug-fix release.
NOTE: It is anticipated that 10.6.9 will be the final release in the 10.6
-series. Users of 10.5 are encouraged to migrate to the 11.0 series in order
+series. Users of 10.6 are encouraged to migrate to the 11.0 series in order
to obtain future fixes.
From 9ecfc6baf1c92a9050942e0f015ba4e9e2619cb9 Mon Sep 17 00:00:00 2001
From: Ben Widawsky
Date: Fri, 23 Oct 2015 14:38:39 -0700
Subject: [PATCH 093/266] i965: Remove unused devinfo revision
I left the function to obtain the revision because it is, and will continue to
be useful in the future. I'd rather not have to dig it up every time we need it.
Comments left at the implementation to say as much.
This was accidentally left here when I moved the early platform support:
commit 28ed1e08e8ba98ebd4ff0b56326372f0df9c73ad
Author: Ben Widawsky
Date: Fri Aug 7 13:58:37 2015 -0700
i965/skl: Remove early platform support
Signed-off-by: Ben Widawsky
Reviewed-by: Jason Ekstrand
---
src/mesa/drivers/dri/i965/brw_device_info.c | 2 +-
src/mesa/drivers/dri/i965/brw_device_info.h | 2 +-
src/mesa/drivers/dri/i965/intel_screen.c | 14 +++++++++++---
3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index a6a3bb670ca..e86b5309688 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -359,7 +359,7 @@ static const struct brw_device_info brw_device_info_bxt = {
};
const struct brw_device_info *
-brw_get_device_info(int devid, int revision)
+brw_get_device_info(int devid)
{
const struct brw_device_info *devinfo;
switch (devid) {
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h
index 2a73e937d9f..4911c233dea 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.h
+++ b/src/mesa/drivers/dri/i965/brw_device_info.h
@@ -86,4 +86,4 @@ struct brw_device_info
/** @} */
};
-const struct brw_device_info *brw_get_device_info(int devid, int revision);
+const struct brw_device_info *brw_get_device_info(int devid);
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 590c45d93ea..fb95fb629ad 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1357,7 +1357,16 @@ set_max_gl_versions(struct intel_screen *screen)
}
}
-static int
+/**
+ * Return the revision (generally the revid field of the PCI header) of the
+ * graphics device.
+ *
+ * XXX: This function is useful to keep around even if it is not currently in
+ * use. It is necessary for new platforms and revision specific workarounds or
+ * features. Please don't remove it so that we know it at least continues to
+ * build.
+ */
+static __attribute__((__unused__)) int
brw_get_revision(int fd)
{
struct drm_i915_getparam gp;
@@ -1416,8 +1425,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
return false;
intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr);
- intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID,
- brw_get_revision(psp->fd));
+ intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID);
if (!intelScreen->devinfo)
return false;
From f09ed63f4342846e361242233162799140674d5f Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Sat, 24 Oct 2015 16:30:30 -0700
Subject: [PATCH 094/266] vc4: Fix the test for skipping raw MOVs.
I don't know what previous test was trying to do, but it dates back to the
first add of vc4_qpu_emit.c. No change to shader-db.
---
src/gallium/drivers/vc4/vc4_qir.c | 8 ++++++++
src/gallium/drivers/vc4/vc4_qir.h | 1 +
src/gallium/drivers/vc4/vc4_qpu_emit.c | 2 +-
3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index e7d0d664e19..3d3989fa95d 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -189,6 +189,14 @@ qir_is_mul(struct qinst *inst)
}
}
+bool
+qir_is_raw_mov(struct qinst *inst)
+{
+ return (inst->op == QOP_MOV &&
+ !inst->dst.pack &&
+ !inst->src[0].pack);
+}
+
bool
qir_is_tex(struct qinst *inst)
{
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index fa1b50f3d10..aa4d9328f37 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -463,6 +463,7 @@ bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
bool qir_is_multi_instruction(struct qinst *inst);
bool qir_is_mul(struct qinst *inst);
+bool qir_is_raw_mov(struct qinst *inst);
bool qir_is_tex(struct qinst *inst);
bool qir_depends_on_flags(struct qinst *inst);
bool qir_writes_r4(struct qinst *inst);
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 4c81deb8eaa..2055fa5d231 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -266,7 +266,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
switch (qinst->op) {
case QOP_MOV:
/* Skip emitting the MOV if it's a no-op. */
- if (dst.mux == QPU_MUX_A || dst.mux == QPU_MUX_B ||
+ if (qir_is_raw_mov(qinst) ||
dst.mux != src[0].mux || dst.addr != src[0].addr) {
queue(c, qpu_a_MOV(dst, src[0]));
}
From ae1d3322ccfd4758614167f4c6087d8acabe3cc5 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Sat, 24 Oct 2015 17:04:49 -0700
Subject: [PATCH 095/266] vc4: Take advantage of the 8888 pack function in
pack_unorm_4x8.
One instruction instead of four, and it turns out you do this a lot for
the Over operator.
total uniforms in shared programs: 32168 -> 32087 (-0.25%)
uniforms in affected programs: 318 -> 237 (-25.47%)
total instructions in shared programs: 89830 -> 89472 (-0.40%)
instructions in affected programs: 6434 -> 6076 (-5.56%)
---
src/gallium/drivers/vc4/vc4_program.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 5d7564b46d5..a48dad804e2 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -738,6 +738,20 @@ ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr)
vec4 = nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
}
+ /* If the pack is replicating the same channel 4 times, use the 8888
+ * pack flag. This is common for blending using the alpha
+ * channel.
+ */
+ if (instr->src[0].swizzle[0] == instr->src[0].swizzle[1] &&
+ instr->src[0].swizzle[0] == instr->src[0].swizzle[2] &&
+ instr->src[0].swizzle[0] == instr->src[0].swizzle[3]) {
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+ *dest = qir_PACK_8888_F(c,
+ ntq_get_src(c, instr->src[0].src,
+ instr->src[0].swizzle[0]));
+ return;
+ }
+
for (int i = 0; i < 4; i++) {
int swiz = instr->src[0].swizzle[i];
struct qreg src;
From a238ad372d0914b8a95c3521c52639a4ddee7bae Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Sat, 24 Oct 2015 17:38:26 -0700
Subject: [PATCH 096/266] vc4: Don't try to register coalesce into the VPM
across non-raw MOVs.
No known bugs, just something I noticed while updating optimization code
for other changes.
---
src/gallium/drivers/vc4/vc4_opt_vpm_writes.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
index f2cdf8f694f..73ded766db9 100644
--- a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
@@ -58,7 +58,7 @@ qir_opt_vpm_writes(struct vc4_compile *c)
}
for (int i = 0; i < vpm_write_count; i++) {
- if (vpm_writes[i]->op != QOP_MOV ||
+ if (!qir_is_raw_mov(vpm_writes[i]) ||
vpm_writes[i]->src[0].file != QFILE_TEMP) {
continue;
}
From a2eba3362f1c120e5e70b56568d44470cb75fdc8 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Tue, 4 Aug 2015 21:23:53 -0700
Subject: [PATCH 097/266] vc4: Fix names of the 16-bit unpacks
They're only f16-to-f32 on a float operation, otherwise they're
i16-to-i32.
---
src/gallium/drivers/vc4/vc4_qpu_defines.h | 4 ++--
src/gallium/drivers/vc4/vc4_qpu_disasm.c | 4 ++--
src/gallium/drivers/vc4/vc4_qpu_emit.c | 4 ++--
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h
index eb3dfb33827..626dc3be6be 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_defines.h
+++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h
@@ -200,8 +200,8 @@ enum qpu_pack_a {
enum qpu_unpack {
QPU_UNPACK_NOP,
- QPU_UNPACK_16A_TO_F32,
- QPU_UNPACK_16B_TO_F32,
+ QPU_UNPACK_16A,
+ QPU_UNPACK_16B,
QPU_UNPACK_8D_REP,
QPU_UNPACK_8A,
QPU_UNPACK_8B,
diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
index 0879787ec03..a854db219ff 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
@@ -98,8 +98,8 @@ static const char *qpu_pack_mul[] = {
*/
static const char *qpu_unpack[] = {
[QPU_UNPACK_NOP] = "",
- [QPU_UNPACK_16A_TO_F32] = "16a",
- [QPU_UNPACK_16B_TO_F32] = "16b",
+ [QPU_UNPACK_16A] = "16a",
+ [QPU_UNPACK_16B] = "16b",
[QPU_UNPACK_8D_REP] = "8d_rep",
[QPU_UNPACK_8A] = "8a",
[QPU_UNPACK_8B] = "8b",
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 2055fa5d231..cf493d6e47b 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -140,8 +140,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
QPU_UNPACK_8B,
QPU_UNPACK_8C,
QPU_UNPACK_8D,
- QPU_UNPACK_16A_TO_F32,
- QPU_UNPACK_16B_TO_F32,
+ QPU_UNPACK_16A,
+ QPU_UNPACK_16B,
};
list_inithead(&c->qpu_inst_list);
From fcb39f5b6a0c7cc92b4260b44a3f327ec84ef47a Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Sat, 25 Jul 2015 21:29:28 -0700
Subject: [PATCH 098/266] i965: Make brw_varying_to_offset take a const pointer
to the VUE map.
It doesn't modify it.
Signed-off-by: Kenneth Graunke
Reviewed-by: Matt Turner
---
src/mesa/drivers/dri/i965/brw_compiler.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 68a93a690dd..bb2fbd4a7b2 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -465,8 +465,8 @@ static inline GLuint brw_vue_slot_to_offset(GLuint slot)
* Convert a vertex output (brw_varying_slot) into a byte offset within the
* VUE.
*/
-static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
- GLuint varying)
+static inline
+GLuint brw_varying_to_offset(const struct brw_vue_map *vue_map, GLuint varying)
{
return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
}
From bddb7a51c3d0403767dc691e91e6186b3c3c1c1f Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Sat, 24 Oct 2015 19:34:01 +0100
Subject: [PATCH 099/266] docs: add release notes for 11.0.4
Signed-off-by: Emil Velikov
(cherry picked from commit 31bf24703193cc23961923e01548b1acb2760a93)
---
docs/relnotes/11.0.4.html | 167 ++++++++++++++++++++++++++++++++++++++
1 file changed, 167 insertions(+)
create mode 100644 docs/relnotes/11.0.4.html
diff --git a/docs/relnotes/11.0.4.html b/docs/relnotes/11.0.4.html
new file mode 100644
index 00000000000..6359434f669
--- /dev/null
+++ b/docs/relnotes/11.0.4.html
@@ -0,0 +1,167 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 11.0.4 Release Notes / October 24, 2015
+
+
+Mesa 11.0.4 is a bug fix release which fixes bugs found since the 11.0.3 release.
+
+
+Mesa 11.0.4 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1. OpenGL
+4.1 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+TBD
+
+
+
+
New features
+
None
+
+
Bug fixes
+
+
This list is likely incomplete.
+
+
+
+Bug 86281 - brw_meta_fast_clear (brw=brw@entry=0x7fffd4097a08, fb=fb@entry=0x7fffd40fa900, buffers=buffers@entry=2, partial_clear=partial_clear@entry=false)
+
+Bug 86720 - [radeon] Europa Universalis 4 freezing during game start (10.3.3+, still broken on 11.0.2)
+
+Bug 91788 - [HSW Regression] Synmark2_v6 Multithread performance case FPS reduced by 36%
+
+Bug 92304 - [cts] cts.shaders.negative conformance tests fail
+
+
+
+
+
Changes
+
+
Alejandro Piñeiro (2):
+
+ i965/vec4: check writemask when bailing out at register coalesce
+ i965/vec4: fill src_reg type using the constructor type parameter
+
+
+
Brian Paul (2):
+
+ vbo: fix incorrect switch statement in init_mat_currval()
+ mesa: fix incorrect opcode in save_BlendFunci()
+
+
+
Chih-Wei Huang (3):
+
+ mesa: android: Fix the incorrect path of sse_minmax.c
+ nv50/ir: use C++11 standard std::unordered_map if possible
+ nv30: include the header of ffs prototype
+
+
+
Chris Wilson (1):
+
+ i965: Remove early release of DRI2 miptree
+
+
+
Dave Airlie (1):
+
+ mesa/uniforms: fix get_uniform for doubles (v2)
+
+
+
Emil Velikov (1):
+
+ docs: add sha256 checksums for 11.0.3
+
+
+
Francisco Jerez (5):
+
+ i965: Don't tell the hardware about our UAV access.
+ mesa: Expose function to calculate whether a shader image unit is valid.
+ mesa: Skip redundant texture completeness checking during image validation.
+ i965: Use _mesa_is_image_unit_valid() instead of gl_image_unit::_Valid.
+ mesa: Get rid of texture-dependent image unit derived state.
+
+
+
Ian Romanick (8):
+
+ glsl: Allow built-in functions as constant expressions in OpenGL ES 1.00
+ ff_fragment_shader: Use binding to set the sampler unit
+ glsl/linker: Use constant_initializer instead of constant_value to initialize uniforms
+ glsl: Use constant_initializer instead of constant_value to determine whether to keep an unused uniform
+ glsl: Only set ir_variable::constant_value for const-decorated variables
+ glsl: Restrict initializers for global variables to constant expression in ES
+ glsl: Add method to determine whether an expression contains the sequence operator
+ glsl: In later GLSL versions, sequence operator is cannot be a constant expression
+
+
+
Ilia Mirkin (1):
+
+ nouveau: make sure there's always room to emit a fence
+
+
+
Indrajit Das (1):
+
+ st/va: Used correct parameter to derive the value of the "h" variable in vlVaCreateImage
+
+
+
Jonathan Gray (1):
+
+ configure.ac: ensure RM is set
+
+
+
Krzysztof Sobiecki (1):
+
+ st/fbo: use pipe_surface_release instead of pipe_surface_reference
+
+
+
Leo Liu (1):
+
+ st/omx/dec/h264: fix field picture type 0 poc disorder
+
+
+
Marek Olšák (3):
+
+ st/mesa: fix clip state dependencies
+ radeonsi: fix a GS copy shader leak
+ gallium: add PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT
+
+
+
Nicolai Hähnle (1):
+
+ u_vbuf: fix vb slot assignment for translated buffers
+
+
+
Rob Clark (1):
+
+ freedreno/a3xx: cache-flush is needed after MEM_WRITE
+
+
+
Tapani Pälli (3):
+
+ mesa: add GL_UNSIGNED_INT_24_8 to _mesa_pack_depth_span
+ mesa: Set api prefix to version string when overriding version
+ mesa: fix ARRAY_SIZE query for GetProgramResourceiv
+
+
+
+
+
+
From 47dd80a35df94dc9b5ba16bf102b85751024ca09 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Sun, 25 Oct 2015 10:04:09 +0000
Subject: [PATCH 100/266] docs: add sha256 checksums for 11.0.4
Signed-off-by: Emil Velikov
(cherry picked from commit ec14e6f8fd05999b482e0785d8cd286042c9c254)
---
docs/relnotes/11.0.4.html | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/docs/relnotes/11.0.4.html b/docs/relnotes/11.0.4.html
index 6359434f669..a777b9de506 100644
--- a/docs/relnotes/11.0.4.html
+++ b/docs/relnotes/11.0.4.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.
SHA256 checksums
-TBD
+ed412ca6a46d1bd055120e5c12806c15419ae8c4dd6d3f6ea20a83091d5c78bf mesa-11.0.4.tar.gz
+40201bf7fc6fa12a6d9edfe870b41eb4dd6669154e3c42c48a96f70805f5483d mesa-11.0.4.tar.xz
From a305d59baa4ad9f8ebbe74b53f022e8ff6636c29 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Sun, 25 Oct 2015 10:17:08 +0000
Subject: [PATCH 101/266] docs: add news item and link release notes for 11.0.4
Signed-off-by: Emil Velikov
---
docs/index.html | 6 ++++++
docs/relnotes.html | 1 +
2 files changed, 7 insertions(+)
diff --git a/docs/index.html b/docs/index.html
index e28c63d41bc..c8d4a5c2699 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,6 +16,12 @@
News
+October 24, 2015
+
+Mesa 11.0.4 is released.
+This is a bug-fix release.
+
+
October 10, 2015
Mesa 11.0.3 is released.
diff --git a/docs/relnotes.html b/docs/relnotes.html
index 074c3b6a612..d1dde4fd726 100644
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.
+11.0.4 release notes
11.0.3 release notes
10.6.9 release notes
11.0.2 release notes
From 8ae4317c3643f1728f99172812df6df2d30456bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?=
Date: Mon, 26 Oct 2015 11:13:14 +0200
Subject: [PATCH 102/266] mesa: add additional checks for uniform location
query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Patch adds additional check to make sure we don't return locations for
structures or arrays of structures.
From page 79 of the OpenGL 4.2 spec:
"A valid name cannot be a structure, an array of structures, or any
portion of a single vector or a matrix."
v2: use without-array() to simplify code (Timothy)
No Piglit or CTS regressions observed.
Signed-off-by: Tapani Pälli
Reviewed-by: Samuel Iglesias Gonsálvez
---
src/mesa/main/shader_query.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 8182d3dcc04..84cbfbcd4af 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -808,6 +808,14 @@ program_resource_location(struct gl_shader_program *shProg,
if (RESOURCE_UNI(res)->builtin)
return -1;
+ /* From page 79 of the OpenGL 4.2 spec:
+ *
+ * "A valid name cannot be a structure, an array of structures, or any
+ * portion of a single vector or a matrix."
+ */
+ if (RESOURCE_UNI(res)->type->without_array()->is_record())
+ return -1;
+
/* From the GL_ARB_uniform_buffer_object spec:
*
* "The value -1 will be returned if does not correspond to an
From a61be1a79897931e3efb5b9119c48e1fb1257db4 Mon Sep 17 00:00:00 2001
From: Julien Isorce
Date: Fri, 23 Oct 2015 13:25:47 +0100
Subject: [PATCH 103/266] st/va: pass picture desc to begin and decode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
At least vl_mpeg12_decoder uses the picture
desc in begin_frame and decode_bitstream.
https://bugs.freedesktop.org/show_bug.cgi?id=92634
Signed-off-by: Julien Isorce
Reviewed-by: Christian König
---
src/gallium/state_trackers/va/picture.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c
index 9b94b397b07..a0d530bb797 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -58,7 +58,7 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID context_id, VASurfaceID rende
return VA_STATUS_ERROR_INVALID_SURFACE;
context->target = surf->buffer;
- context->decoder->begin_frame(context->decoder, context->target, NULL);
+ context->decoder->begin_frame(context->decoder, context->target, &context->desc.base);
return VA_STATUS_SUCCESS;
}
@@ -517,7 +517,7 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf)
buffers[num_buffers] = buf->data;
sizes[num_buffers] = buf->size;
++num_buffers;
- context->decoder->decode_bitstream(context->decoder, context->target, NULL,
+ context->decoder->decode_bitstream(context->decoder, context->target, &context->desc.base,
num_buffers, (const void * const*)buffers, sizes);
}
From 711489648bcce5cd8fcf14e73e5affe069010c01 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger
Date: Mon, 26 Oct 2015 16:44:47 +0100
Subject: [PATCH 104/266] gallivm: disable f16c when not using AVX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
f16c intrinsic can only be emitted when AVX is used. So when we disable AVX
due to forcing 128bit vectors we must not use this intrinsic (depending on
llvm version, this worked previously because llvm used AVX even when we didn't
tell it to, however I've seen this fail with llvm 3.3 since
718249843b915decf8fccec92e466ac1a6219934 which seems to have the side effect
of disabling avx in llvm albeit it only touches sse flags really, but
with ea421e919ae6e72e1319fb205c42a6fb53ca2f82 it's now really disabled).
Albeit being able to use AVX with 128bit vectors also would have its uses, the
code as is really was meant to emulate jit code creation for less capable cpus.
v2: add some (ifdefed out) missing de-featuring options for simulating
less capable cpus.
Reviewed-by: Nicolai Hähnle
Reviewed-by: Jose Fonseca
---
src/gallium/auxiliary/gallivm/lp_bld_init.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 017d0752060..96aba7370c1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -427,6 +427,7 @@ lp_build_init(void)
*/
util_cpu_caps.has_avx = 0;
util_cpu_caps.has_avx2 = 0;
+ util_cpu_caps.has_f16c = 0;
}
#ifdef PIPE_ARCH_PPC_64
@@ -458,7 +459,9 @@ lp_build_init(void)
util_cpu_caps.has_sse3 = 0;
util_cpu_caps.has_ssse3 = 0;
util_cpu_caps.has_sse4_1 = 0;
+ util_cpu_caps.has_sse4_2 = 0;
util_cpu_caps.has_avx = 0;
+ util_cpu_caps.has_avx2 = 0;
util_cpu_caps.has_f16c = 0;
#endif
From a3d0359aff7a9be90149c416844f330b4f9a15ed Mon Sep 17 00:00:00 2001
From: Timothy Arceri
Date: Tue, 27 Oct 2015 06:58:15 +1100
Subject: [PATCH 105/266] glsl: keep track of intra-stage indices for atomics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is more optimal as it means we no longer have to upload the same set
of ABO surfaces to all stages in the program.
This also fixes a bug where since commit c0cd5b var->data.binding was
being used as a replacement for atomic buffer index, but they don't have
to be the same value they just happened to end up the same when binding is 0.
Reviewed-by: Francisco Jerez
Reviewed-by: Samuel Iglesias Gonsálvez
Cc: Ilia Mirkin
Cc: Alejandro Piñeiro
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90175
---
src/glsl/link_atomics.cpp | 43 +++++++++++++++++--
src/glsl/nir/glsl_to_nir.cpp | 2 -
src/glsl/nir/nir.h | 4 +-
src/glsl/nir/nir_lower_atomics.c | 25 ++++++++---
src/mesa/drivers/dri/i965/brw_context.h | 2 +-
.../drivers/dri/i965/brw_gs_surface_state.c | 4 +-
src/mesa/drivers/dri/i965/brw_nir.c | 6 ++-
src/mesa/drivers/dri/i965/brw_shader.cpp | 4 +-
.../drivers/dri/i965/brw_vs_surface_state.c | 4 +-
.../drivers/dri/i965/brw_wm_surface_state.c | 35 +++++++--------
src/mesa/main/mtypes.h | 5 ++-
11 files changed, 95 insertions(+), 39 deletions(-)
diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp
index 70ef0e1c891..cdcc06d53e2 100644
--- a/src/glsl/link_atomics.cpp
+++ b/src/glsl/link_atomics.cpp
@@ -198,6 +198,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
struct gl_shader_program *prog)
{
unsigned num_buffers;
+ unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {};
active_atomic_buffer *abs =
find_active_atomic_counters(ctx, prog, &num_buffers);
@@ -242,13 +243,49 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
}
/* Assign stage-specific fields. */
- for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j)
- mab.StageReferences[j] =
- (ab.stage_references[j] ? GL_TRUE : GL_FALSE);
+ for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) {
+ if (ab.stage_references[j]) {
+ mab.StageReferences[j] = GL_TRUE;
+ num_atomic_buffers[j]++;
+ } else {
+ mab.StageReferences[j] = GL_FALSE;
+ }
+ }
i++;
}
+ /* Store a list pointers to atomic buffers per stage and store the index
+ * to the intra-stage buffer list in uniform storage.
+ */
+ for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) {
+ if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) {
+ prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j];
+ prog->_LinkedShaders[j]->AtomicBuffers =
+ rzalloc_array(prog, gl_active_atomic_buffer *,
+ num_atomic_buffers[j]);
+
+ unsigned intra_stage_idx = 0;
+ for (unsigned i = 0; i < num_buffers; i++) {
+ struct gl_active_atomic_buffer *atomic_buffer =
+ &prog->AtomicBuffers[i];
+ if (atomic_buffer->StageReferences[j]) {
+ prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] =
+ atomic_buffer;
+
+ for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) {
+ prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index =
+ intra_stage_idx;
+ prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active =
+ true;
+ }
+
+ intra_stage_idx++;
+ }
+ }
+ }
+ }
+
delete [] abs;
assert(i == num_buffers);
}
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 9b50a93e7f6..01f16d70eb1 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -392,8 +392,6 @@ nir_visitor::visit(ir_variable *ir)
var->data.index = ir->data.index;
var->data.binding = ir->data.binding;
- /* XXX Get rid of buffer_index */
- var->data.atomic.buffer_index = ir->data.binding;
var->data.atomic.offset = ir->data.atomic.offset;
var->data.image.read_only = ir->data.image_read_only;
var->data.image.write_only = ir->data.image_write_only;
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index e3777f926e2..04a21a7ead6 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -308,7 +308,6 @@ typedef struct {
* Location an atomic counter is stored at.
*/
struct {
- unsigned buffer_index;
unsigned offset;
} atomic;
@@ -1978,7 +1977,8 @@ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
void nir_lower_two_sided_color(nir_shader *shader);
-void nir_lower_atomics(nir_shader *shader);
+void nir_lower_atomics(nir_shader *shader,
+ const struct gl_shader_program *shader_program);
void nir_lower_to_source_mods(nir_shader *shader);
bool nir_lower_gs_intrinsics(nir_shader *shader);
diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c
index 46e137652a1..40ca3de96cf 100644
--- a/src/glsl/nir/nir_lower_atomics.c
+++ b/src/glsl/nir/nir_lower_atomics.c
@@ -25,17 +25,24 @@
*
*/
+#include "ir_uniform.h"
#include "nir.h"
#include "main/config.h"
#include
+typedef struct {
+ const struct gl_shader_program *shader_program;
+ nir_shader *shader;
+} lower_atomic_state;
+
/*
* replace atomic counter intrinsics that use a variable with intrinsics
* that directly store the buffer index and byte offset
*/
static void
-lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
+lower_instr(nir_intrinsic_instr *instr,
+ lower_atomic_state *state)
{
nir_intrinsic_op op;
switch (instr->intrinsic) {
@@ -60,10 +67,11 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
return; /* atomics passed as function arguments can't be lowered */
void *mem_ctx = ralloc_parent(instr);
+ unsigned uniform_loc = instr->variables[0]->var->data.location;
nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
new_instr->const_index[0] =
- (int) instr->variables[0]->var->data.atomic.buffer_index;
+ state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index;
nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset;
@@ -132,18 +140,25 @@ lower_block(nir_block *block, void *state)
{
nir_foreach_instr_safe(block, instr) {
if (instr->type == nir_instr_type_intrinsic)
- lower_instr(nir_instr_as_intrinsic(instr), state);
+ lower_instr(nir_instr_as_intrinsic(instr),
+ (lower_atomic_state *) state);
}
return true;
}
void
-nir_lower_atomics(nir_shader *shader)
+nir_lower_atomics(nir_shader *shader,
+ const struct gl_shader_program *shader_program)
{
+ lower_atomic_state state = {
+ .shader = shader,
+ .shader_program = shader_program,
+ };
+
nir_foreach_overload(shader, overload) {
if (overload->impl) {
- nir_foreach_block(overload->impl, lower_block, overload->impl);
+ nir_foreach_block(overload->impl, lower_block, (void *) &state);
nir_metadata_preserve(overload->impl, nir_metadata_block_index |
nir_metadata_dominance);
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0fdc83ef7e1..18c361ea8cd 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1463,7 +1463,7 @@ void brw_upload_ubo_surfaces(struct brw_context *brw,
struct brw_stage_prog_data *prog_data,
bool dword_pitch);
void brw_upload_abo_surfaces(struct brw_context *brw,
- struct gl_shader_program *prog,
+ struct gl_shader *shader,
struct brw_stage_state *stage_state,
struct brw_stage_prog_data *prog_data);
void brw_upload_image_surfaces(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
index 00125c0f405..76ed237d88a 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
@@ -105,8 +105,8 @@ brw_upload_gs_abo_surfaces(struct brw_context *brw)
if (prog) {
/* BRW_NEW_GS_PROG_DATA */
- brw_upload_abo_surfaces(brw, prog, &brw->gs.base,
- &brw->gs.prog_data->base.base);
+ brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
+ &brw->gs.base, &brw->gs.prog_data->base.base);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 1b4dace84fb..11f111382f4 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -249,8 +249,10 @@ brw_create_nir(struct brw_context *brw,
nir_lower_system_values(nir);
nir_validate_shader(nir);
- nir_lower_atomics(nir);
- nir_validate_shader(nir);
+ if (shader_prog) {
+ nir_lower_atomics(nir, shader_prog);
+ nir_validate_shader(nir);
+ }
nir_optimize(nir, is_scalar);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 204935641f3..4ea297ade4c 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -1191,9 +1191,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
}
- if (shader_prog && shader_prog->NumAtomicBuffers) {
+ if (shader && shader->NumAtomicBuffers) {
stage_prog_data->binding_table.abo_start = next_binding_table_offset;
- next_binding_table_offset += shader_prog->NumAtomicBuffers;
+ next_binding_table_offset += shader->NumAtomicBuffers;
} else {
stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index f65258a52a5..d7473845c72 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -177,8 +177,8 @@ brw_upload_vs_abo_surfaces(struct brw_context *brw)
if (prog) {
/* BRW_NEW_VS_PROG_DATA */
- brw_upload_abo_surfaces(brw, prog, &brw->vs.base,
- &brw->vs.prog_data->base.base);
+ brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
+ &brw->vs.base, &brw->vs.prog_data->base.base);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 6ebe6481c32..f88f8d59196 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1029,7 +1029,7 @@ const struct brw_tracked_state brw_cs_ubo_surfaces = {
void
brw_upload_abo_surfaces(struct brw_context *brw,
- struct gl_shader_program *prog,
+ struct gl_shader *shader,
struct brw_stage_state *stage_state,
struct brw_stage_prog_data *prog_data)
{
@@ -1037,21 +1037,22 @@ brw_upload_abo_surfaces(struct brw_context *brw,
uint32_t *surf_offsets =
&stage_state->surf_offset[prog_data->binding_table.abo_start];
- for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) {
- struct gl_atomic_buffer_binding *binding =
- &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
- struct intel_buffer_object *intel_bo =
- intel_buffer_object(binding->BufferObject);
- drm_intel_bo *bo = intel_bufferobj_buffer(
- brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
+ if (shader && shader->NumAtomicBuffers) {
+ for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
+ struct gl_atomic_buffer_binding *binding =
+ &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
+ struct intel_buffer_object *intel_bo =
+ intel_buffer_object(binding->BufferObject);
+ drm_intel_bo *bo = intel_bufferobj_buffer(
+ brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
- brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
- binding->Offset, BRW_SURFACEFORMAT_RAW,
- bo->size - binding->Offset, 1, true);
- }
+ brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
+ binding->Offset, BRW_SURFACEFORMAT_RAW,
+ bo->size - binding->Offset, 1, true);
+ }
- if (prog->NumAtomicBuffers)
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+ }
}
static void
@@ -1063,8 +1064,8 @@ brw_upload_wm_abo_surfaces(struct brw_context *brw)
if (prog) {
/* BRW_NEW_FS_PROG_DATA */
- brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
- &brw->wm.prog_data->base);
+ brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
+ &brw->wm.base, &brw->wm.prog_data->base);
}
}
@@ -1088,8 +1089,8 @@ brw_upload_cs_abo_surfaces(struct brw_context *brw)
if (prog) {
/* BRW_NEW_CS_PROG_DATA */
- brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
- &brw->cs.prog_data->base);
+ brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+ &brw->cs.base, &brw->cs.prog_data->base);
}
}
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 20dd70ef734..34120cf5777 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2389,6 +2389,9 @@ struct gl_shader
*/
GLuint NumImages;
+ struct gl_active_atomic_buffer **AtomicBuffers;
+ unsigned NumAtomicBuffers;
+
/**
* Whether early fragment tests are enabled as defined by
* ARB_shader_image_load_store.
@@ -4496,7 +4499,7 @@ static inline bool
_mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx)
{
return ctx->Shader._CurrentFragmentProgram != NULL &&
- ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0;
+ ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->NumAtomicBuffers > 0;
}
#ifdef __cplusplus
From 7070c8879adff2a1204d7473f119d8194eff919b Mon Sep 17 00:00:00 2001
From: Ian Romanick
Date: Thu, 15 Oct 2015 12:50:12 -0700
Subject: [PATCH 106/266] i965: Fix is-renderable check in
intel_image_target_renderbuffer_storage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Previously we could create a renderbuffer with format
MESA_FORMAT_R8G8B8A8_UNORM, convert that renderbuffer to an EGLImage,
then FAIL to convert the EGLImage back to a renderbuffer because
reasons. Just use the same check in
intel_image_target_renderbuffer_storage that brw_render_target_supported
uses.
There are more checks in brw_render_target_supported, but I don't think
they are necessary here. A different approach would be to refactor
brw_render_target_supported to take rb->Format and rb->NumSamples as
parameters (instead of a gl_renderbuffer) and use the new function here.
Fixes:
ES2-CTS.gtf.GL2ExtensionTests.egl_image.egl_image
Signed-off-by: Ian Romanick
Reviewed-by: Anuj Phogat
Tested-by: Tapani Pälli
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92476
Cc: "10.3 10.4 10.5 10.6 11.0"
---
src/mesa/drivers/dri/i965/intel_fbo.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 5a6b0dd1ec5..7f281fafc77 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -348,14 +348,10 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
}
/* __DRIimage is opaque to the core so it has to be checked here */
- switch (image->format) {
- case MESA_FORMAT_R8G8B8A8_UNORM:
+ if (!brw->format_supported_as_render_target[image->format]) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glEGLImageTargetRenderbufferStorage(unsupported image format");
return;
- break;
- default:
- break;
}
irb = intel_renderbuffer(rb);
From 8f84a8e257291168ec4b394b1eaa6acef1538cea Mon Sep 17 00:00:00 2001
From: Ian Romanick
Date: Fri, 16 Oct 2015 09:18:24 -0700
Subject: [PATCH 107/266] i965: Add missing close-parenthesis in error messages
Trivial.
Signed-off-by: Ian Romanick
---
src/mesa/drivers/dri/i965/intel_fbo.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 7f281fafc77..3a4a53a07e6 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -343,14 +343,14 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
if (image->planar_format && image->planar_format->nplanes > 1) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glEGLImageTargetRenderbufferStorage(planar buffers are not "
- "supported as render targets.");
+ "supported as render targets.)");
return;
}
/* __DRIimage is opaque to the core so it has to be checked here */
if (!brw->format_supported_as_render_target[image->format]) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glEGLImageTargetRenderbufferStorage(unsupported image format");
+ "glEGLImageTargetRenderbufferStorage(unsupported image format)");
return;
}
From 8034e7d6f1be3c9c1a9626b64830617ccf000ecc Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Tue, 28 Jul 2015 18:16:37 -0700
Subject: [PATCH 108/266] glsl: Convert TES gl_PatchVerticesIn into a constant
when using a TCS.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When a TCS is present, the TES input gl_PatchVerticesIn is actually a
constant - it's simply the # of output vertices specified by the TCS
layout qualifiers. So, we can replace the system value with a constant,
which may allow further optimization, and will likely be more efficient.
If the TCS is absent, we can't do this optimization.
Signed-off-by: Kenneth Graunke
Reviewed-by: Marek Olšák
---
src/glsl/linker.cpp | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 424b92a1783..cfd8f81c867 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2282,6 +2282,22 @@ resize_tes_inputs(struct gl_context *ctx,
foreach_in_list(ir_instruction, ir, tes->ir) {
ir->accept(&input_resize_visitor);
}
+
+ if (tcs) {
+ /* Convert the gl_PatchVerticesIn system value into a constant, since
+ * the value is known at this point.
+ */
+ foreach_in_list(ir_instruction, ir, tes->ir) {
+ ir_variable *var = ir->as_variable();
+ if (var && var->data.mode == ir_var_system_value &&
+ var->data.location == SYSTEM_VALUE_VERTICES_IN) {
+ void *mem_ctx = ralloc_parent(var);
+ var->data.mode = ir_var_auto;
+ var->data.location = 0;
+ var->constant_value = new(mem_ctx) ir_constant(num_vertices);
+ }
+ }
+ }
}
/**
From 0ccacfa0170f19e08228e7404e77a9da8f5f633c Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Sat, 24 Oct 2015 17:33:30 -0700
Subject: [PATCH 109/266] vc4: If a QIR source has an unpack set, print it.
Not used yet, but will be.
---
src/gallium/drivers/vc4/vc4_qir.c | 1 +
src/gallium/drivers/vc4/vc4_qpu.h | 3 +++
src/gallium/drivers/vc4/vc4_qpu_disasm.c | 12 +++++++++---
3 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 3d3989fa95d..d9f51bb4a3f 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -313,6 +313,7 @@ qir_dump_inst(struct vc4_compile *c, struct qinst *inst)
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
fprintf(stderr, ", ");
qir_print_reg(c, inst->src[i], false);
+ vc4_qpu_disasm_unpack(stderr, inst->src[i].pack);
}
}
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h
index 0719d2828b5..866ca5c1300 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -212,6 +212,9 @@ vc4_qpu_disasm_pack_mul(FILE *out, uint32_t pack);
void
vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack);
+void
+vc4_qpu_disasm_unpack(FILE *out, uint32_t pack);
+
void
vc4_qpu_validate(uint64_t *insts, uint32_t num_inst);
diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
index a854db219ff..c46fd1a0e3f 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
@@ -257,6 +257,13 @@ vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack)
fprintf(out, "%s", DESC(qpu_pack_a, pack));
}
+void
+vc4_qpu_disasm_unpack(FILE *out, uint32_t unpack)
+{
+ if (unpack != QPU_UNPACK_NOP)
+ fprintf(out, ".%s", DESC(qpu_unpack, unpack));
+}
+
static void
print_alu_dst(uint64_t inst, bool is_mul)
{
@@ -315,10 +322,9 @@ print_alu_src(uint64_t inst, uint32_t mux)
fprintf(stderr, "%s", DESC(special_read_b, raddr - 32));
}
- if (unpack != QPU_UNPACK_NOP &&
- ((mux == QPU_MUX_A && !(inst & QPU_PM)) ||
+ if (((mux == QPU_MUX_A && !(inst & QPU_PM)) ||
(mux == QPU_MUX_R4 && (inst & QPU_PM)))) {
- fprintf(stderr, ".%s", DESC(qpu_unpack, unpack));
+ vc4_qpu_disasm_unpack(stderr, unpack);
}
}
From 6eb0760f4896d2dd45820ec1fcab36496228b761 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Sat, 24 Oct 2015 17:49:03 -0700
Subject: [PATCH 110/266] vc4: Only copy propagate raw MOVs.
No problems being fixed, but needed for the new unpack changes.
---
src/gallium/drivers/vc4/vc4_opt_copy_propagation.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
index fd2539aed95..b46be24ad0c 100644
--- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
+++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
@@ -45,14 +45,9 @@ qir_opt_copy_propagation(struct vc4_compile *c)
int index = inst->src[i].index;
if (inst->src[i].file == QFILE_TEMP &&
c->defs[index] &&
- c->defs[index]->op == QOP_MOV &&
+ qir_is_raw_mov(c->defs[index]) &&
(c->defs[index]->src[0].file == QFILE_TEMP ||
c->defs[index]->src[0].file == QFILE_UNIF)) {
- /* If it has a pack, it shouldn't be an SSA
- * def.
- */
- assert(!c->defs[index]->dst.pack);
-
if (debug) {
fprintf(stderr, "Copy propagate: ");
qir_dump_inst(c, inst);
From 3d7a088608b38b3a460ad7ba5c7fb575815c8a43 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 26 Oct 2015 13:57:57 -0700
Subject: [PATCH 111/266] vc4: Don't try to follow MOVs across a pack.
---
src/gallium/drivers/vc4/vc4_qir.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index d9f51bb4a3f..59a4e62159c 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -429,7 +429,8 @@ qir_follow_movs(struct vc4_compile *c, struct qreg reg)
{
while (reg.file == QFILE_TEMP &&
c->defs[reg.index] &&
- c->defs[reg.index]->op == QOP_MOV) {
+ c->defs[reg.index]->op == QOP_MOV &&
+ !c->defs[reg.index]->dst.pack) {
reg = c->defs[reg.index]->src[0];
}
From 652a864b257650e730ecec9e5882d765840a02e1 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 26 Oct 2015 13:17:33 -0700
Subject: [PATCH 112/266] vc4: Fix up the test for whether the unpack can be
from r4.
We can do 16a/16b from float as well. No difference on shader-db.
---
src/gallium/drivers/vc4/vc4_qir.c | 24 +++++++++++++++++++
src/gallium/drivers/vc4/vc4_qir.h | 1 +
.../drivers/vc4/vc4_register_allocate.c | 10 ++------
3 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 59a4e62159c..ce6618fe7d0 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -189,6 +189,30 @@ qir_is_mul(struct qinst *inst)
}
}
+bool
+qir_is_float_input(struct qinst *inst)
+{
+ switch (inst->op) {
+ case QOP_FMUL:
+ case QOP_FADD:
+ case QOP_FSUB:
+ case QOP_FMIN:
+ case QOP_FMAX:
+ case QOP_FMINABS:
+ case QOP_FMAXABS:
+ case QOP_FTOI:
+ case QOP_UNPACK_8A_F:
+ case QOP_UNPACK_8B_F:
+ case QOP_UNPACK_8C_F:
+ case QOP_UNPACK_8D_F:
+ case QOP_UNPACK_16A_F:
+ case QOP_UNPACK_16B_F:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool
qir_is_raw_mov(struct qinst *inst)
{
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index aa4d9328f37..1a1e0f32386 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -465,6 +465,7 @@ bool qir_is_multi_instruction(struct qinst *inst);
bool qir_is_mul(struct qinst *inst);
bool qir_is_raw_mov(struct qinst *inst);
bool qir_is_tex(struct qinst *inst);
+bool qir_is_float_input(struct qinst *inst);
bool qir_depends_on_flags(struct qinst *inst);
bool qir_writes_r4(struct qinst *inst);
bool qir_src_needs_a_file(struct qinst *inst);
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
index 3ced50f3a44..2a1b6c35d69 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -283,20 +283,14 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
}
if (qir_src_needs_a_file(inst)) {
- switch (inst->op) {
- case QOP_UNPACK_8A_F:
- case QOP_UNPACK_8B_F:
- case QOP_UNPACK_8C_F:
- case QOP_UNPACK_8D_F:
+ if (qir_is_float_input(inst)) {
/* Special case: these can be done as R4
* unpacks, as well.
*/
class_bits[inst->src[0].index] &= (CLASS_BIT_A |
CLASS_BIT_R4);
- break;
- default:
+ } else {
class_bits[inst->src[0].index] &= CLASS_BIT_A;
- break;
}
}
ip++;
From a7b424e835269d9aab6ba9b7b23f836474682725 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 26 Oct 2015 14:07:44 -0700
Subject: [PATCH 113/266] vc4: Reduce MOV special-casing in QIR-to-QPU.
I'm going to introduce some more types of MOV, which also want the elision
of raw MOVs.
---
src/gallium/drivers/vc4/vc4_qpu_emit.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index cf493d6e47b..21db3674313 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -209,6 +209,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
M(V8ADDS),
M(V8SUBS),
M(MUL24),
+
+ /* If we replicate src[0] out to src[1], this works
+ * out the same as a MOV.
+ */
+ [QOP_MOV] = { QPU_A_OR },
};
struct qpu_reg src[4];
@@ -264,14 +269,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
}
switch (qinst->op) {
- case QOP_MOV:
- /* Skip emitting the MOV if it's a no-op. */
- if (qir_is_raw_mov(qinst) ||
- dst.mux != src[0].mux || dst.addr != src[0].addr) {
- queue(c, qpu_a_MOV(dst, src[0]));
- }
- break;
-
case QOP_SEL_X_0_ZS:
case QOP_SEL_X_0_ZC:
case QOP_SEL_X_0_NS:
@@ -490,6 +487,12 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
assert(qinst->op < ARRAY_SIZE(translate));
assert(translate[qinst->op].op != 0); /* NOPs */
+ /* Skip emitting the MOV if it's a no-op. */
+ if (qir_is_raw_mov(qinst) &&
+ dst.mux == src[0].mux && dst.addr == src[0].addr) {
+ break;
+ }
+
/* If we have only one source, put it in the second
* argument slot as well so that we don't take up
* another raddr just to get unused data.
From 548b05d53f3c89630aa77fc92ff174f5d8162ab2 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 26 Oct 2015 13:22:18 -0700
Subject: [PATCH 114/266] vc4: Drop some confused code about pack/unpack
handling.
At one point I thought packs and unpacks were in the same field of the
instruction. They aren't. These instructions therefore never cause a
pack.
total instructions in shared programs: 89472 -> 89390 (-0.09%)
instructions in affected programs: 15261 -> 15179 (-0.54%)
---
src/gallium/drivers/vc4/vc4_qpu_emit.c | 27 ++++----------------------
1 file changed, 4 insertions(+), 23 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 21db3674313..94fd187dc0a 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -431,7 +431,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_UNPACK_8C_F:
case QOP_UNPACK_8D_F:
case QOP_UNPACK_16A_F:
- case QOP_UNPACK_16B_F: {
+ case QOP_UNPACK_16B_F:
if (src[0].mux == QPU_MUX_R4) {
queue(c, qpu_a_MOV(dst, src[0]));
*last_inst(c) |= QPU_PM;
@@ -442,22 +442,12 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
} else {
assert(src[0].mux == QPU_MUX_A);
- /* Since we're setting the pack bits, if the
- * destination is in A it would get re-packed.
- */
- queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
- qpu_rb(31) : dst),
- src[0], src[0]));
+ queue(c, qpu_a_FMAX(dst, src[0], src[0]));
*last_inst(c) |=
QPU_SET_FIELD(unpack_map[qinst->op -
QOP_UNPACK_8A_F],
QPU_UNPACK);
-
- if (dst.mux == QPU_MUX_A) {
- queue(c, qpu_a_MOV(dst, qpu_rb(31)));
- }
}
- }
break;
case QOP_UNPACK_8A_I:
@@ -465,22 +455,13 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_UNPACK_8C_I:
case QOP_UNPACK_8D_I:
case QOP_UNPACK_16A_I:
- case QOP_UNPACK_16B_I: {
+ case QOP_UNPACK_16B_I:
assert(src[0].mux == QPU_MUX_A);
- /* Since we're setting the pack bits, if the
- * destination is in A it would get re-packed.
- */
- queue(c, qpu_a_MOV((dst.mux == QPU_MUX_A ?
- qpu_rb(31) : dst), src[0]));
+ queue(c, qpu_a_MOV(dst, src[0]));
*last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op -
QOP_UNPACK_8A_I],
QPU_UNPACK);
-
- if (dst.mux == QPU_MUX_A) {
- queue(c, qpu_a_MOV(dst, qpu_rb(31)));
- }
- }
break;
default:
From 99a9a5a345fab8bbf36ab4e42581f8ee04a59a63 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Sat, 24 Oct 2015 17:35:03 -0700
Subject: [PATCH 115/266] vc4: Switch the unpack ops to being unpack flags on a
mov.
This paves the way for copy propagating our unpacks. We end up with a
small change on shader-db:
total instructions in shared programs: 89390 -> 89251 (-0.16%)
instructions in affected programs: 19041 -> 18902 (-0.73%)
which appears to be because we no longer convert MOVs for an FMAX dst,
r4.unpack, r4.unpack (instead of the previous MOV dst, r4.unpack), and
this ends up with a slightly better schedule.
---
src/gallium/drivers/vc4/vc4_opt_algebraic.c | 1 +
.../drivers/vc4/vc4_opt_small_immediates.c | 4 +-
src/gallium/drivers/vc4/vc4_qir.c | 45 ++-------------
src/gallium/drivers/vc4/vc4_qir.h | 34 ++++-------
src/gallium/drivers/vc4/vc4_qpu_emit.c | 57 ++++---------------
.../drivers/vc4/vc4_register_allocate.c | 24 +++++---
6 files changed, 42 insertions(+), 123 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index 5b435832b92..f1bab810eff 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -64,6 +64,7 @@ is_constant_value(struct vc4_compile *c, struct qreg reg,
uint32_t val)
{
if (reg.file == QFILE_UNIF &&
+ !reg.pack &&
c->uniform_contents[reg.index] == QUNIFORM_CONSTANT &&
c->uniform_data[reg.index] == val) {
return true;
diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
index d6e98f0aebf..e61562171aa 100644
--- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
+++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
@@ -56,6 +56,7 @@ qir_opt_small_immediates(struct vc4_compile *c)
struct qreg src = qir_follow_movs(c, inst->src[i]);
if (src.file != QFILE_UNIF ||
+ src.pack ||
c->uniform_contents[src.index] !=
QUNIFORM_CONSTANT) {
continue;
@@ -72,9 +73,6 @@ qir_opt_small_immediates(struct vc4_compile *c)
continue;
}
- if (qir_src_needs_a_file(inst))
- continue;
-
uint32_t imm = c->uniform_data[src.index];
uint32_t small_imm = qpu_encode_small_immediate(imm);
if (small_imm == ~0)
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index ce6618fe7d0..bba4f6d82ac 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -36,6 +36,7 @@ struct qir_op_info {
static const struct qir_op_info qir_op_info[] = {
[QOP_MOV] = { "mov", 1, 1 },
+ [QOP_FMOV] = { "fmov", 1, 1 },
[QOP_FADD] = { "fadd", 1, 2 },
[QOP_FSUB] = { "fsub", 1, 2 },
[QOP_FMUL] = { "fmul", 1, 2 },
@@ -100,18 +101,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_TEX_B] = { "tex_b", 0, 2 },
[QOP_TEX_DIRECT] = { "tex_direct", 0, 2 },
[QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
- [QOP_UNPACK_8A_F] = { "unpack_8a_f", 1, 1 },
- [QOP_UNPACK_8B_F] = { "unpack_8b_f", 1, 1 },
- [QOP_UNPACK_8C_F] = { "unpack_8c_f", 1, 1 },
- [QOP_UNPACK_8D_F] = { "unpack_8d_f", 1, 1 },
- [QOP_UNPACK_16A_F] = { "unpack_16a_f", 1, 1 },
- [QOP_UNPACK_16B_F] = { "unpack_16b_f", 1, 1 },
- [QOP_UNPACK_8A_I] = { "unpack_8a_i", 1, 1 },
- [QOP_UNPACK_8B_I] = { "unpack_8b_i", 1, 1 },
- [QOP_UNPACK_8C_I] = { "unpack_8c_i", 1, 1 },
- [QOP_UNPACK_8D_I] = { "unpack_8d_i", 1, 1 },
- [QOP_UNPACK_16A_I] = { "unpack_16a_i", 1, 1 },
- [QOP_UNPACK_16B_I] = { "unpack_16b_i", 1, 1 },
};
static const char *
@@ -193,6 +182,7 @@ bool
qir_is_float_input(struct qinst *inst)
{
switch (inst->op) {
+ case QOP_FMOV:
case QOP_FMUL:
case QOP_FADD:
case QOP_FSUB:
@@ -201,12 +191,6 @@ qir_is_float_input(struct qinst *inst)
case QOP_FMINABS:
case QOP_FMAXABS:
case QOP_FTOI:
- case QOP_UNPACK_8A_F:
- case QOP_UNPACK_8B_F:
- case QOP_UNPACK_8C_F:
- case QOP_UNPACK_8D_F:
- case QOP_UNPACK_16A_F:
- case QOP_UNPACK_16B_F:
return true;
default:
return false;
@@ -216,7 +200,8 @@ qir_is_float_input(struct qinst *inst)
bool
qir_is_raw_mov(struct qinst *inst)
{
- return (inst->op == QOP_MOV &&
+ return ((inst->op == QOP_MOV ||
+ inst->op == QOP_FMOV) &&
!inst->dst.pack &&
!inst->src[0].pack);
}
@@ -245,28 +230,6 @@ qir_depends_on_flags(struct qinst *inst)
}
}
-bool
-qir_src_needs_a_file(struct qinst *inst)
-{
- switch (inst->op) {
- case QOP_UNPACK_8A_F:
- case QOP_UNPACK_8B_F:
- case QOP_UNPACK_8C_F:
- case QOP_UNPACK_8D_F:
- case QOP_UNPACK_16A_F:
- case QOP_UNPACK_16B_F:
- case QOP_UNPACK_8A_I:
- case QOP_UNPACK_8B_I:
- case QOP_UNPACK_8C_I:
- case QOP_UNPACK_8D_I:
- case QOP_UNPACK_16A_I:
- case QOP_UNPACK_16B_I:
- return true;
- default:
- return false;
- }
-}
-
bool
qir_writes_r4(struct qinst *inst)
{
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 1a1e0f32386..393749b2f60 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -37,6 +37,7 @@
#include "util/u_math.h"
#include "vc4_screen.h"
+#include "vc4_qpu_defines.h"
#include "pipe/p_state.h"
struct nir_builder;
@@ -64,6 +65,7 @@ struct qreg {
enum qop {
QOP_UNDEF,
QOP_MOV,
+ QOP_FMOV,
QOP_FADD,
QOP_FSUB,
QOP_FMUL,
@@ -128,20 +130,6 @@ enum qop {
QOP_FRAG_W,
QOP_FRAG_REV_FLAG,
- QOP_UNPACK_8A_F,
- QOP_UNPACK_8B_F,
- QOP_UNPACK_8C_F,
- QOP_UNPACK_8D_F,
- QOP_UNPACK_16A_F,
- QOP_UNPACK_16B_F,
-
- QOP_UNPACK_8A_I,
- QOP_UNPACK_8B_I,
- QOP_UNPACK_8C_I,
- QOP_UNPACK_8D_I,
- QOP_UNPACK_16A_I,
- QOP_UNPACK_16B_I,
-
/** Texture x coordinate parameter write */
QOP_TEX_S,
/** Texture y coordinate parameter write */
@@ -468,7 +456,6 @@ bool qir_is_tex(struct qinst *inst);
bool qir_is_float_input(struct qinst *inst);
bool qir_depends_on_flags(struct qinst *inst);
bool qir_writes_r4(struct qinst *inst);
-bool qir_src_needs_a_file(struct qinst *inst);
struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
void qir_dump(struct vc4_compile *c);
@@ -569,6 +556,7 @@ qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a) \
}
QIR_ALU1(MOV)
+QIR_ALU1(FMOV)
QIR_ALU2(FADD)
QIR_ALU2(FSUB)
QIR_ALU2(FMUL)
@@ -635,32 +623,32 @@ QIR_NODST_1(TLB_STENCIL_SETUP)
static inline struct qreg
qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef));
+ struct qreg t = qir_FMOV(c, src);
+ c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i;
return t;
}
static inline struct qreg
qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef));
+ struct qreg t = qir_MOV(c, src);
+ c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i;
return t;
}
static inline struct qreg
qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef));
+ struct qreg t = qir_FMOV(c, src);
+ c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i;
return t;
}
static inline struct qreg
qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef));
+ struct qreg t = qir_MOV(c, src);
+ c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i;
return t;
}
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 94fd187dc0a..a3eff844137 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -134,15 +134,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
uint32_t vpm_read_fifo_count = 0;
uint32_t vpm_read_offset = 0;
int last_vpm_read_index = -1;
- /* Map from the QIR ops enum order to QPU unpack bits. */
- static const uint32_t unpack_map[] = {
- QPU_UNPACK_8A,
- QPU_UNPACK_8B,
- QPU_UNPACK_8C,
- QPU_UNPACK_8D,
- QPU_UNPACK_16A,
- QPU_UNPACK_16B,
- };
list_inithead(&c->qpu_inst_list);
@@ -214,8 +205,10 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
* out the same as a MOV.
*/
[QOP_MOV] = { QPU_A_OR },
+ [QOP_FMOV] = { QPU_A_FMAX },
};
+ uint64_t unpack = 0;
struct qpu_reg src[4];
for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
int index = qinst->src[i].index;
@@ -225,6 +218,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
case QFILE_TEMP:
src[i] = temp_registers[index];
+ if (qinst->src[i].pack) {
+ assert(!unpack ||
+ unpack == qinst->src[i].pack);
+ unpack = QPU_SET_FIELD(qinst->src[i].pack,
+ QPU_UNPACK);
+ if (src[i].mux == QPU_MUX_R4)
+ unpack |= QPU_PM;
+ }
break;
case QFILE_UNIF:
src[i] = qpu_unif();
@@ -426,44 +427,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_a_MOV(dst, qpu_r4()));
break;
- case QOP_UNPACK_8A_F:
- case QOP_UNPACK_8B_F:
- case QOP_UNPACK_8C_F:
- case QOP_UNPACK_8D_F:
- case QOP_UNPACK_16A_F:
- case QOP_UNPACK_16B_F:
- if (src[0].mux == QPU_MUX_R4) {
- queue(c, qpu_a_MOV(dst, src[0]));
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
- (qinst->op -
- QOP_UNPACK_8A_F),
- QPU_UNPACK);
- } else {
- assert(src[0].mux == QPU_MUX_A);
-
- queue(c, qpu_a_FMAX(dst, src[0], src[0]));
- *last_inst(c) |=
- QPU_SET_FIELD(unpack_map[qinst->op -
- QOP_UNPACK_8A_F],
- QPU_UNPACK);
- }
- break;
-
- case QOP_UNPACK_8A_I:
- case QOP_UNPACK_8B_I:
- case QOP_UNPACK_8C_I:
- case QOP_UNPACK_8D_I:
- case QOP_UNPACK_16A_I:
- case QOP_UNPACK_16B_I:
- assert(src[0].mux == QPU_MUX_A);
-
- queue(c, qpu_a_MOV(dst, src[0]));
- *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op -
- QOP_UNPACK_8A_I],
- QPU_UNPACK);
- break;
-
default:
assert(qinst->op < ARRAY_SIZE(translate));
assert(translate[qinst->op].op != 0); /* NOPs */
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
index 2a1b6c35d69..bca36c3e7f4 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -282,17 +282,23 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
class_bits[inst->dst.index] &= CLASS_BIT_A;
}
- if (qir_src_needs_a_file(inst)) {
- if (qir_is_float_input(inst)) {
- /* Special case: these can be done as R4
- * unpacks, as well.
- */
- class_bits[inst->src[0].index] &= (CLASS_BIT_A |
- CLASS_BIT_R4);
- } else {
- class_bits[inst->src[0].index] &= CLASS_BIT_A;
+ /* Apply restrictions for src unpacks. The integer unpacks
+ * can only be done from regfile A, while float unpacks can be
+ * either A or R4.
+ */
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file == QFILE_TEMP &&
+ inst->src[i].pack) {
+ if (qir_is_float_input(inst)) {
+ class_bits[inst->src[i].index] &=
+ CLASS_BIT_A | CLASS_BIT_R4;
+ } else {
+ class_bits[inst->src[i].index] &=
+ CLASS_BIT_A;
+ }
}
}
+
ip++;
}
From 72fa2ae20b979ced1b4dde16f81b28d02a2e3b7b Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 26 Oct 2015 14:16:19 -0700
Subject: [PATCH 116/266] vc4: Move dst pack setup out to a helper function
with more asserts.
---
src/gallium/drivers/vc4/vc4_qpu_emit.c | 32 ++++++++++++++++++--------
1 file changed, 22 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index a3eff844137..7d4144ac30f 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -125,6 +125,27 @@ fixup_raddr_conflict(struct vc4_compile *c,
}
}
+static void
+set_last_dst_pack(struct vc4_compile *c, struct qinst *inst)
+{
+ bool had_pm = *last_inst(c) & QPU_PM;
+ bool had_ws = *last_inst(c) & QPU_WS;
+ uint32_t unpack = QPU_GET_FIELD(*last_inst(c), QPU_UNPACK);
+
+ if (!inst->dst.pack)
+ return;
+
+ *last_inst(c) |= QPU_SET_FIELD(inst->dst.pack, QPU_PACK);
+
+ if (qir_is_mul(inst)) {
+ assert(!unpack || had_pm);
+ *last_inst(c) |= QPU_PM;
+ } else {
+ assert(!unpack || !had_pm);
+ assert(!had_ws); /* dst must be a-file to pack. */
+ }
+}
+
void
vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
{
@@ -450,21 +471,12 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_m_alu2(translate[qinst->op].op,
dst,
src[0], src[1]));
- if (qinst->dst.pack) {
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
- QPU_PACK);
- }
} else {
queue(c, qpu_a_alu2(translate[qinst->op].op,
dst,
src[0], src[1]));
- if (qinst->dst.pack) {
- assert(dst.mux == QPU_MUX_A);
- *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
- QPU_PACK);
- }
}
+ set_last_dst_pack(c, qinst);
break;
}
From 01ca4f207efac555ff5f729dce1687a68ba65400 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 26 Oct 2015 13:45:06 -0700
Subject: [PATCH 117/266] vc4: Rewrite the pack instructions as a MOV with a
dst pack flag
Another step in reducing the special-casing of instructions.
---
src/gallium/drivers/vc4/vc4_qir.c | 10 ++++------
src/gallium/drivers/vc4/vc4_qir.h | 25 +++++++++++++------------
src/gallium/drivers/vc4/vc4_qpu_emit.c | 20 +-------------------
3 files changed, 18 insertions(+), 37 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index bba4f6d82ac..7894b081b19 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -37,6 +37,7 @@ struct qir_op_info {
static const struct qir_op_info qir_op_info[] = {
[QOP_MOV] = { "mov", 1, 1 },
[QOP_FMOV] = { "fmov", 1, 1 },
+ [QOP_MMOV] = { "mmov", 1, 1 },
[QOP_FADD] = { "fadd", 1, 2 },
[QOP_FSUB] = { "fsub", 1, 2 },
[QOP_FMUL] = { "fmul", 1, 2 },
@@ -77,11 +78,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_RSQ] = { "rsq", 1, 1, false, true },
[QOP_EXP2] = { "exp2", 1, 2, false, true },
[QOP_LOG2] = { "log2", 1, 2, false, true },
- [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1 },
- [QOP_PACK_8A_F] = { "pack_8a_f", 1, 1 },
- [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 },
- [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 },
- [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 },
[QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
[QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
[QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
@@ -165,6 +161,7 @@ bool
qir_is_mul(struct qinst *inst)
{
switch (inst->op) {
+ case QOP_MMOV:
case QOP_FMUL:
case QOP_MUL24:
case QOP_V8MULD:
@@ -201,7 +198,8 @@ bool
qir_is_raw_mov(struct qinst *inst)
{
return ((inst->op == QOP_MOV ||
- inst->op == QOP_FMOV) &&
+ inst->op == QOP_FMOV ||
+ inst->op == QOP_MMOV) &&
!inst->dst.pack &&
!inst->src[0].pack);
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 393749b2f60..a92ad93ee07 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -66,6 +66,7 @@ enum qop {
QOP_UNDEF,
QOP_MOV,
QOP_FMOV,
+ QOP_MMOV,
QOP_FADD,
QOP_FSUB,
QOP_FMUL,
@@ -112,11 +113,6 @@ enum qop {
QOP_LOG2,
QOP_VW_SETUP,
QOP_VR_SETUP,
- QOP_PACK_8888_F,
- QOP_PACK_8A_F,
- QOP_PACK_8B_F,
- QOP_PACK_8C_F,
- QOP_PACK_8D_F,
QOP_TLB_DISCARD_SETUP,
QOP_TLB_STENCIL_SETUP,
QOP_TLB_Z_WRITE,
@@ -557,6 +553,7 @@ qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a) \
QIR_ALU1(MOV)
QIR_ALU1(FMOV)
+QIR_ALU1(MMOV)
QIR_ALU2(FADD)
QIR_ALU2(FSUB)
QIR_ALU2(FMUL)
@@ -597,11 +594,6 @@ QIR_ALU1(RCP)
QIR_ALU1(RSQ)
QIR_ALU1(EXP2)
QIR_ALU1(LOG2)
-QIR_ALU1(PACK_8888_F)
-QIR_PACK(PACK_8A_F)
-QIR_PACK(PACK_8B_F)
-QIR_PACK(PACK_8C_F)
-QIR_PACK(PACK_8D_F)
QIR_ALU1(VARY_ADD_C)
QIR_NODST_2(TEX_S)
QIR_NODST_2(TEX_T)
@@ -652,12 +644,21 @@ qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
return t;
}
-static inline struct qreg
+static inline void
qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan)
{
- qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, dest, val, c->undef));
+ assert(!dest.pack);
+ dest.pack = QPU_PACK_MUL_8A + chan;
+ qir_emit(c, qir_inst(QOP_MMOV, dest, val, c->undef));
if (dest.file == QFILE_TEMP)
c->defs[dest.index] = NULL;
+}
+
+static inline struct qreg
+qir_PACK_8888_F(struct vc4_compile *c, struct qreg val)
+{
+ struct qreg dest = qir_MMOV(c, val);
+ c->defs[dest.index]->dst.pack = QPU_PACK_MUL_8888;
return dest;
}
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 7d4144ac30f..d06f8b27d29 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -227,6 +227,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
*/
[QOP_MOV] = { QPU_A_OR },
[QOP_FMOV] = { QPU_A_FMAX },
+ [QOP_MMOV] = { QPU_M_V8MIN },
};
uint64_t unpack = 0;
@@ -348,25 +349,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
- case QOP_PACK_8888_F:
- queue(c, qpu_m_MOV(dst, src[0]));
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888,
- QPU_PACK);
- break;
-
- case QOP_PACK_8A_F:
- case QOP_PACK_8B_F:
- case QOP_PACK_8C_F:
- case QOP_PACK_8D_F:
- queue(c,
- qpu_m_MOV(dst, src[0]) |
- QPU_PM |
- QPU_SET_FIELD(QPU_PACK_MUL_8A +
- qinst->op - QOP_PACK_8A_F,
- QPU_PACK));
- break;
-
case QOP_FRAG_X:
queue(c, qpu_a_ITOF(dst,
qpu_ra(QPU_R_XY_PIXEL_COORD)));
From 3359ad6cda49fb977d837eb00e8ae4d781d95c2a Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Wed, 5 Aug 2015 20:05:56 -0700
Subject: [PATCH 118/266] vc4: Add support for copy propagation with unpack
flags present.
total instructions in shared programs: 89251 -> 87862 (-1.56%)
instructions in affected programs: 52971 -> 51582 (-2.62%)
---
.../drivers/vc4/vc4_opt_copy_propagation.c | 90 ++++++++++++++-----
src/gallium/drivers/vc4/vc4_qpu_emit.c | 61 +++++++++----
2 files changed, 112 insertions(+), 39 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
index b46be24ad0c..0eee5c34e1d 100644
--- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
+++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
@@ -41,29 +41,77 @@ qir_opt_copy_propagation(struct vc4_compile *c)
bool debug = false;
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
- for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
- int index = inst->src[i].index;
- if (inst->src[i].file == QFILE_TEMP &&
- c->defs[index] &&
- qir_is_raw_mov(c->defs[index]) &&
- (c->defs[index]->src[0].file == QFILE_TEMP ||
- c->defs[index]->src[0].file == QFILE_UNIF)) {
- if (debug) {
- fprintf(stderr, "Copy propagate: ");
- qir_dump_inst(c, inst);
- fprintf(stderr, "\n");
- }
+ int nsrc = qir_get_op_nsrc(inst->op);
+ for (int i = 0; i < nsrc; i++) {
+ if (inst->src[i].file != QFILE_TEMP)
+ continue;
- inst->src[i] = c->defs[index]->src[0];
-
- if (debug) {
- fprintf(stderr, "to: ");
- qir_dump_inst(c, inst);
- fprintf(stderr, "\n");
- }
-
- progress = true;
+ struct qinst *mov = c->defs[inst->src[i].index];
+ if (!mov ||
+ (mov->op != QOP_MOV &&
+ mov->op != QOP_FMOV &&
+ mov->op != QOP_MMOV)) {
+ continue;
}
+
+ if (mov->src[0].file != QFILE_TEMP &&
+ mov->src[0].file != QFILE_UNIF) {
+ continue;
+ }
+
+ if (mov->dst.pack)
+ continue;
+
+ uint8_t unpack;
+ if (mov->src[0].pack) {
+ /* Make sure that the meaning of the unpack
+ * would be the same between the two
+ * instructions.
+ */
+ if (qir_is_float_input(inst) !=
+ qir_is_float_input(mov)) {
+ continue;
+ }
+
+ /* There's only one unpack field, so make sure
+ * this instruction doesn't already use it.
+ */
+ bool already_has_unpack = false;
+ for (int j = 0; j < nsrc; j++) {
+ if (inst->src[j].pack)
+ already_has_unpack = true;
+ }
+ if (already_has_unpack)
+ continue;
+
+ /* A destination pack requires the PM bit to
+ * be set to a specific value already, which
+ * may be different from ours.
+ */
+ if (inst->dst.pack)
+ continue;
+
+ unpack = mov->src[0].pack;
+ } else {
+ unpack = inst->src[i].pack;
+ }
+
+ if (debug) {
+ fprintf(stderr, "Copy propagate: ");
+ qir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
+ }
+
+ inst->src[i] = mov->src[0];
+ inst->src[i].pack = unpack;
+
+ if (debug) {
+ fprintf(stderr, "to: ");
+ qir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
+ }
+
+ progress = true;
}
}
return progress;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index d06f8b27d29..133e1385178 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -101,7 +101,8 @@ swap_file(struct qpu_reg *src)
static void
fixup_raddr_conflict(struct vc4_compile *c,
struct qpu_reg dst,
- struct qpu_reg *src0, struct qpu_reg *src1)
+ struct qpu_reg *src0, struct qpu_reg *src1,
+ struct qinst *inst, uint64_t *unpack)
{
uint32_t mux0 = src0->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src0->mux;
uint32_t mux1 = src1->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src1->mux;
@@ -117,7 +118,21 @@ fixup_raddr_conflict(struct vc4_compile *c,
return;
if (mux0 == QPU_MUX_A) {
- queue(c, qpu_a_MOV(qpu_rb(31), *src0));
+ /* Make sure we use the same type of MOV as the instruction,
+ * in case of unpacks.
+ */
+ if (qir_is_float_input(inst))
+ queue(c, qpu_a_FMAX(qpu_rb(31), *src0, *src0));
+ else
+ queue(c, qpu_a_MOV(qpu_rb(31), *src0));
+
+ /* If we had an unpack on this A-file source, we need to put
+ * it into this MOV, not into the later move from regfile B.
+ */
+ if (inst->src[0].pack) {
+ *last_inst(c) |= *unpack;
+ *unpack = 0;
+ }
*src0 = qpu_rb(31);
} else {
queue(c, qpu_a_MOV(qpu_ra(31), *src0));
@@ -296,7 +311,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_SEL_X_0_ZC:
case QOP_SEL_X_0_NS:
case QOP_SEL_X_0_NC:
- queue(c, qpu_a_MOV(dst, src[0]));
+ queue(c, qpu_a_MOV(dst, src[0]) | unpack);
set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
QPU_COND_ZS);
@@ -310,10 +325,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_SEL_X_Y_NS:
case QOP_SEL_X_Y_NC:
queue(c, qpu_a_MOV(dst, src[0]));
+ if (qinst->src[0].pack)
+ *(last_inst(c)) |= unpack;
set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS +
QPU_COND_ZS);
queue(c, qpu_a_MOV(dst, src[1]));
+ if (qinst->src[1].pack)
+ *(last_inst(c)) |= unpack;
set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^
1) + QPU_COND_ZS);
@@ -326,19 +345,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
switch (qinst->op) {
case QOP_RCP:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIP),
- src[0]));
+ src[0]) | unpack);
break;
case QOP_RSQ:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIPSQRT),
- src[0]));
+ src[0]) | unpack);
break;
case QOP_EXP2:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_EXP),
- src[0]));
+ src[0]) | unpack);
break;
case QOP_LOG2:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_LOG),
- src[0]));
+ src[0]) | unpack);
break;
default:
abort();
@@ -373,16 +392,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_TLB_DISCARD_SETUP:
discard = true;
- queue(c, qpu_a_MOV(src[0], src[0]));
+ queue(c, qpu_a_MOV(src[0], src[0]) | unpack);
*last_inst(c) |= QPU_SF;
break;
case QOP_TLB_STENCIL_SETUP:
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), src[0]));
+ assert(!unpack);
+ queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP),
+ src[0]) | unpack);
break;
case QOP_TLB_Z_WRITE:
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), src[0]));
+ queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z),
+ src[0]) | unpack);
if (discard) {
set_last_cond_add(c, QPU_COND_ZS);
}
@@ -398,14 +420,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
case QOP_TLB_COLOR_WRITE:
- queue(c, qpu_a_MOV(qpu_tlbc(), src[0]));
+ queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack);
if (discard) {
set_last_cond_add(c, QPU_COND_ZS);
}
break;
case QOP_VARY_ADD_C:
- queue(c, qpu_a_FADD(dst, src[0], qpu_r5()));
+ queue(c, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack);
break;
case QOP_TEX_S:
@@ -414,12 +436,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_TEX_B:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_TMU0_S +
(qinst->op - QOP_TEX_S)),
- src[0]));
+ src[0]) | unpack);
break;
case QOP_TEX_DIRECT:
- fixup_raddr_conflict(c, dst, &src[0], &src[1]);
- queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], src[1]));
+ fixup_raddr_conflict(c, dst, &src[0], &src[1],
+ qinst, &unpack);
+ queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S),
+ src[0], src[1]) | unpack);
break;
case QOP_TEX_RESULT:
@@ -447,16 +471,17 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
if (qir_get_op_nsrc(qinst->op) == 1)
src[1] = src[0];
- fixup_raddr_conflict(c, dst, &src[0], &src[1]);
+ fixup_raddr_conflict(c, dst, &src[0], &src[1],
+ qinst, &unpack);
if (qir_is_mul(qinst)) {
queue(c, qpu_m_alu2(translate[qinst->op].op,
dst,
- src[0], src[1]));
+ src[0], src[1]) | unpack);
} else {
queue(c, qpu_a_alu2(translate[qinst->op].op,
dst,
- src[0], src[1]));
+ src[0], src[1]) | unpack);
}
set_last_dst_pack(c, qinst);
From 4565b6f4fb59d261f9128ffe91c0787af4b15ea4 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez
Date: Wed, 21 Oct 2015 14:34:29 +0200
Subject: [PATCH 119/266] main: Remove interface block array index for doing
the name comparison
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
From ARB_program_query_interface spec:
"uint GetProgramResourceIndex(uint program, enum programInterface,
const char *name);
[...]
If exactly matches the name string of one of the active resources
for , the index of the matched resource is returned.
Additionally, if would exactly match the name string of an active
resource if "[0]" were appended to , the index of the matched
resource is returned. [...]"
"A string provided to GetProgramResourceLocation or
GetProgramResourceLocationIndex is considered to match an active variable
if:
[...]
* if the string identifies the base name of an active array, where the
string would exactly match the name of the variable if the suffix
"[0]" were appended to the string;
[...]
"
Fixes the following two dEQP-GLES31 tests:
dEQP-GLES31.functional.program_interface_query.shader_storage_block.resource_list.block_array
dEQP-GLES31.functional.program_interface_query.shader_storage_block.resource_list.block_array_single_element
v2:
- Add AoA support (Timothy)
- Apply it too for GetUniformLocation(), GetUniformName() and others
because ARB_program_interface_query says that they are equivalent
to GetProgramResourceLocation() and GetProgramResourceName() (Tapani)
Signed-off-by: Samuel Iglesias Gonsalvez
Reviewed-by: Tapani Pälli
---
src/mesa/main/shader_query.cpp | 43 +++++++++++++++++++++++++++++++++-
1 file changed, 42 insertions(+), 1 deletion(-)
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 84cbfbcd4af..59ec3d7f693 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -543,8 +543,49 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg,
/* Resource basename. */
const char *rname = _mesa_program_resource_name(res);
unsigned baselen = strlen(rname);
+ unsigned baselen_without_array_index = baselen;
+ const char *rname_last_square_bracket = strrchr(rname, '[');
+ bool found = false;
+ bool rname_has_array_index_zero = false;
+ /* From ARB_program_interface_query spec:
+ *
+ * "uint GetProgramResourceIndex(uint program, enum programInterface,
+ * const char *name);
+ * [...]
+ * If exactly matches the name string of one of the active
+ * resources for , the index of the matched resource is
+ * returned. Additionally, if would exactly match the name string
+ * of an active resource if "[0]" were appended to , the index of
+ * the matched resource is returned. [...]"
+ *
+ * "A string provided to GetProgramResourceLocation or
+ * GetProgramResourceLocationIndex is considered to match an active variable
+ * if:
+ *
+ * * the string exactly matches the name of the active variable;
+ *
+ * * if the string identifies the base name of an active array, where the
+ * string would exactly match the name of the variable if the suffix
+ * "[0]" were appended to the string; [...]"
+ */
+ /* Remove array's index from interface block name comparison only if
+ * array's index is zero and the resulting string length is the same
+ * than the provided name's length.
+ */
+ if (rname_last_square_bracket) {
+ baselen_without_array_index -= strlen(rname_last_square_bracket);
+ rname_has_array_index_zero =
+ (strncmp(rname_last_square_bracket, "[0]\0", 4) == 0) &&
+ (baselen_without_array_index == strlen(name));
+ }
- if (strncmp(rname, name, baselen) == 0) {
+ if (strncmp(rname, name, baselen) == 0)
+ found = true;
+ else if (rname_has_array_index_zero &&
+ strncmp(rname, name, baselen_without_array_index) == 0)
+ found = true;
+
+ if (found) {
switch (programInterface) {
case GL_UNIFORM_BLOCK:
case GL_SHADER_STORAGE_BLOCK:
From 2c91e086563541271668f9ea6ca58689c0c97d44 Mon Sep 17 00:00:00 2001
From: Marta Lofstedt
Date: Mon, 26 Oct 2015 11:22:27 +0100
Subject: [PATCH 120/266] mesa: Draw Indirect return wrong error code on
unalinged
From OpenGL 4.4 specification, section 10.4 and
Open GL Es 3.1 section 10.5:
"An INVALID_VALUE error is generated if indirect is not a multiple
of the size, in basic machine units, of uint."
However, the current code follow the ARB_draw_indirect:
https://www.opengl.org/registry/specs/ARB/draw_indirect.txt
"INVALID_OPERATION is generated by DrawArraysIndirect and
DrawElementsIndirect if commands source data beyond the end
of a buffer object or if is not word aligned."
V2: After discussions on the list, it was suggested to
only keep the INVALID_VALUE error.
Signed-off-by: Marta Lofstedt
Reviewed-by: Ian Romanick
Reviewed-by: Ilia Mirkin
---
src/mesa/main/api_validate.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index a46c1944e96..e4cfc9bfdaf 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -702,12 +702,14 @@ valid_draw_indirect(struct gl_context *ctx,
return GL_FALSE;
- /* From the ARB_draw_indirect specification:
- * "An INVALID_OPERATION error is generated [...] if is no
- * word aligned."
+ /* From OpenGL version 4.4. section 10.5
+ * and OpenGL ES 3.1, section 10.6:
+ *
+ * "An INVALID_VALUE error is generated if indirect is not a
+ * multiple of the size, in basic machine units, of uint."
*/
if ((GLsizeiptr)indirect & (sizeof(GLuint) - 1)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
+ _mesa_error(ctx, GL_INVALID_VALUE,
"%s(indirect is not aligned)", name);
return GL_FALSE;
}
From 3daa7e5147f164e822269f13c3bcccfa6446fa83 Mon Sep 17 00:00:00 2001
From: Marta Lofstedt
Date: Mon, 26 Oct 2015 11:50:24 +0100
Subject: [PATCH 121/266] mesa: Draw indirect is not allowed when xfb is active
and unpaused
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
OpenGL ES 3.1 specification, section 10.5:
"An INVALID_OPERATION error is generated if
transform feedback is active and not paused."
Signed-off-by: Marta Lofstedt
Reviewed-by: Ian Romanick
Reviewed-by: Tapani Pälli
---
src/mesa/main/api_validate.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index e4cfc9bfdaf..e936d62d488 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -701,6 +701,15 @@ valid_draw_indirect(struct gl_context *ctx,
if (!_mesa_valid_prim_mode(ctx, mode, name))
return GL_FALSE;
+ /* OpenGL ES 3.1 specification, section 10.5:
+ *
+ * "An INVALID_OPERATION error is generated if
+ * transform feedback is active and not paused."
+ */
+ if (_mesa_is_gles31(ctx) && _mesa_is_xfb_active_and_unpaused(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(TransformFeedback is active and not paused)", name);
+ }
/* From OpenGL version 4.4. section 10.5
* and OpenGL ES 3.1, section 10.6:
From 5bc5dca0cbcb1a13fbe9b3a33489e88531d1eb33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 22 Oct 2015 11:10:14 +0200
Subject: [PATCH 122/266] radeonsi: simplify DCC handling in
si_initialize_color_surface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeonsi/si_state.c | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 384c8e28faa..c87f661f278 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1926,8 +1926,9 @@ static void si_initialize_color_surface(struct si_context *sctx,
surf->cb_color_info = color_info;
surf->cb_color_attrib = color_attrib;
- if (sctx->b.chip_class >= VI) {
+ if (sctx->b.chip_class >= VI && rtex->surface.dcc_enabled) {
unsigned max_uncompressed_block_size = 2;
+ uint64_t dcc_offset = rtex->surface.level[level].dcc_offset;
if (rtex->surface.nsamples > 1) {
if (rtex->surface.bpe == 1)
@@ -1938,12 +1939,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
S_028C78_INDEPENDENT_64B_BLOCKS(1);
-
- if (rtex->surface.dcc_enabled) {
- uint64_t dcc_offset = rtex->surface.level[level].dcc_offset;
-
- surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + dcc_offset) >> 8;
- }
+ surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + dcc_offset) >> 8;
}
if (rtex->fmask.size) {
From 235d38584cd47faa2837cd66ebdc770f295f47c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 22 Oct 2015 22:48:32 +0200
Subject: [PATCH 123/266] radeonsi: properly check if DCC is enabled and
allocated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeon/r600_texture.c | 2 +-
src/gallium/drivers/radeonsi/cik_sdma.c | 2 +-
src/gallium/drivers/radeonsi/si_blit.c | 6 +++---
src/gallium/drivers/radeonsi/si_dma.c | 2 +-
src/gallium/drivers/radeonsi/si_state.c | 4 ++--
5 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index f7a11a2534c..40075ae7031 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1367,7 +1367,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
continue;
}
- if (tex->surface.dcc_enabled) {
+ if (tex->dcc_buffer) {
uint32_t reset_value;
bool clear_words_needed;
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 25fd09a4725..e53af1dd6b5 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -243,7 +243,7 @@ void cik_sdma_copy(struct pipe_context *ctx,
if (src->format != dst->format ||
rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 ||
(rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) ||
- rdst->surface.dcc_enabled || rsrc->surface.dcc_enabled) {
+ rdst->dcc_buffer || rsrc->dcc_buffer) {
goto fallback;
}
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index a2264363db9..302b75ccce4 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context *sctx,
assert(view);
tex = (struct r600_texture *)view->texture;
- assert(tex->cmask.size || tex->fmask.size || tex->surface.dcc_enabled);
+ assert(tex->cmask.size || tex->fmask.size || tex->dcc_buffer);
si_blit_decompress_color(&sctx->b.b, tex,
view->u.tex.first_level, view->u.tex.last_level,
@@ -455,7 +455,7 @@ static void si_decompress_subresource(struct pipe_context *ctx,
si_blit_decompress_depth_in_place(sctx, rtex, true,
level, level,
first_layer, last_layer);
- } else if (rtex->fmask.size || rtex->cmask.size || rtex->surface.dcc_enabled) {
+ } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_buffer) {
si_blit_decompress_color(ctx, rtex, level, level,
first_layer, last_layer);
}
@@ -676,7 +676,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
!(dst->surface.flags & RADEON_SURF_SCANOUT) &&
(!dst->cmask.size || !dst->dirty_level_mask) && /* dst cannot be fast-cleared */
- !dst->surface.dcc_enabled) {
+ !dst->dcc_buffer) {
si_blitter_begin(ctx, SI_COLOR_RESOLVE |
(info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
util_blitter_custom_resolve_color(sctx->blitter,
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index 73c026cc0cd..581e89f42d8 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -249,7 +249,7 @@ void si_dma_copy(struct pipe_context *ctx,
(rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) ||
rdst->cmask.size || rdst->fmask.size ||
rsrc->cmask.size || rsrc->fmask.size ||
- rdst->surface.dcc_enabled || rsrc->surface.dcc_enabled) {
+ rdst->dcc_buffer || rsrc->dcc_buffer) {
goto fallback;
}
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index c87f661f278..18b64056bc7 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1926,7 +1926,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
surf->cb_color_info = color_info;
surf->cb_color_attrib = color_attrib;
- if (sctx->b.chip_class >= VI && rtex->surface.dcc_enabled) {
+ if (sctx->b.chip_class >= VI && rtex->dcc_buffer) {
unsigned max_uncompressed_block_size = 2;
uint64_t dcc_offset = rtex->surface.level[level].dcc_offset;
@@ -2655,7 +2655,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
S_008F24_LAST_ARRAY(last_layer));
- if (tmp->surface.dcc_enabled) {
+ if (tmp->dcc_buffer) {
uint64_t dcc_offset = surflevel[base_level].dcc_offset;
unsigned swap = r600_translate_colorswap(pipe_format);
From 3aebc596b339b1b787ed0dfc27793263d48b2819 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Thu, 22 Oct 2015 22:55:19 +0200
Subject: [PATCH 124/266] radeonsi: add debug flags that disable DCC and DCC
fast clear
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
For debugging, bug reports, etc.
This is not in the radeonsi directory, but it is about radeonsi.
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeon/r600_pipe_common.c | 2 ++
src/gallium/drivers/radeon/r600_pipe_common.h | 2 ++
src/gallium/drivers/radeon/r600_texture.c | 6 ++++++
3 files changed, 10 insertions(+)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 4ce0c6a1994..0ad36849645 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -360,6 +360,8 @@ static const struct debug_named_value common_debug_options[] = {
{ "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
{ "nowc", DBG_NO_WC, "Disable GTT write combining" },
{ "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
+ { "nodcc", DBG_NO_DCC, "Disable DCC." },
+ { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
DEBUG_NAMED_VALUE_END /* must be last */
};
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index f21a0b3a188..c300c0b3332 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -99,6 +99,8 @@
#define DBG_INFO (1llu << 40)
#define DBG_NO_WC (1llu << 41)
#define DBG_CHECK_VM (1llu << 42)
+#define DBG_NO_DCC (1llu << 43)
+#define DBG_NO_DCC_CLEAR (1llu << 44)
#define R600_MAP_BUFFER_ALIGNMENT 64
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 40075ae7031..789c66fd169 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -486,6 +486,9 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
static void vi_texture_alloc_dcc_separate(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
+ if (rscreen->debug_flags & DBG_NO_DCC)
+ return;
+
rtex->dcc_buffer = (struct r600_resource *)
r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
PIPE_USAGE_DEFAULT, rtex->surface.dcc_size, rtex->surface.dcc_alignment);
@@ -1371,6 +1374,9 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
uint32_t reset_value;
bool clear_words_needed;
+ if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
+ continue;
+
vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
rctx->clear_buffer(&rctx->b, &tex->dcc_buffer->b.b,
From 93eb4f9287576e346838e7b38fec9b42518605f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Mon, 26 Oct 2015 11:11:44 +0100
Subject: [PATCH 125/266] winsys/amdgpu: remove the dcc_enable surface flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
dcc_size is sufficient and doesn't need a further comment in my opinion.
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeon/r600_texture.c | 3 +--
src/gallium/drivers/radeon/radeon_winsys.h | 1 -
src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 13 ++++++-------
3 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 789c66fd169..edfdfe33187 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -641,9 +641,8 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
}
}
- if (rtex->surface.dcc_enabled) {
+ if (rtex->surface.dcc_size)
vi_texture_alloc_dcc_separate(rscreen, rtex);
- }
}
/* Now create the backing buffer. */
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 0178643549e..8bf1e15f3be 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -371,7 +371,6 @@ struct radeon_surf {
uint64_t dcc_size;
uint64_t dcc_alignment;
- bool dcc_enabled;
};
struct radeon_bo_list_item {
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index b442174b7b8..3006bd17958 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -251,7 +251,7 @@ static int compute_level(struct amdgpu_winsys *ws,
surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize;
- if (surf->dcc_enabled) {
+ if (AddrSurfInfoIn->flags.dccCompatible) {
AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
@@ -267,10 +267,11 @@ static int compute_level(struct amdgpu_winsys *ws,
surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign);
} else {
- surf->dcc_enabled = false;
+ surf->dcc_size = 0;
surf_level->dcc_offset = 0;
}
} else {
+ surf->dcc_size = 0;
surf_level->dcc_offset = 0;
}
@@ -354,10 +355,6 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = surf->nsamples;
AddrSurfInfoIn.tileIndex = -1;
- surf->dcc_enabled = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
- !(surf->flags & RADEON_SURF_SCANOUT) &&
- !compressed && AddrDccIn.numSamples <= 1;
-
/* Set the micro tile type. */
if (surf->flags & RADEON_SURF_SCANOUT)
AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
@@ -373,7 +370,9 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
AddrSurfInfoIn.flags.degrade4Space = 1;
- AddrSurfInfoIn.flags.dccCompatible = surf->dcc_enabled;
+ AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
+ !(surf->flags & RADEON_SURF_SCANOUT) &&
+ !compressed && AddrDccIn.numSamples <= 1;
/* This disables incorrect calculations (hacks) in addrlib. */
AddrSurfInfoIn.flags.noStencil = 1;
From 16c49da63a72aa4b1dce5c90397ee4af2f6a8f9d Mon Sep 17 00:00:00 2001
From: Marta Lofstedt
Date: Mon, 26 Oct 2015 10:58:37 +0100
Subject: [PATCH 126/266] mesa: Draw indirect is not allowed if the default VAO
is bound.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
From OpenGL ES 3.1 specification, section 10.5:
"DrawArraysIndirect requires that all data sourced for the
command, including the DrawArraysIndirectCommand
structure, be in buffer objects, and may not be called when
the default vertex array object is bound."
Signed-off-by: Marta Lofstedt
Reviewed-by: Tapani Pälli
---
src/mesa/main/api_validate.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index e936d62d488..06efe02e445 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -698,6 +698,18 @@ valid_draw_indirect(struct gl_context *ctx,
{
const GLsizeiptr end = (GLsizeiptr)indirect + size;
+ /* OpenGL ES 3.1 spec. section 10.5:
+ *
+ * "DrawArraysIndirect requires that all data sourced for the
+ * command, including the DrawArraysIndirectCommand
+ * structure, be in buffer objects, and may not be called when
+ * the default vertex array object is bound."
+ */
+ if (ctx->Array.VAO == ctx->Array.DefaultVAO) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "(no VAO bound)");
+ return GL_FALSE;
+ }
+
if (!_mesa_valid_prim_mode(ctx, mode, name))
return GL_FALSE;
From 231605d14d3ca75ce99ea01d75868a2f2862504d Mon Sep 17 00:00:00 2001
From: Boyuan Zhang
Date: Fri, 23 Oct 2015 12:12:19 -0400
Subject: [PATCH 127/266] vl: add RefPicList defines for VAAPI HEVC decode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Boyuan Zhang
Reviewed-by: Christian König
Reviewed-by: Leo Liu
---
src/gallium/include/pipe/p_video_state.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 7d13151e643..d353be60759 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -479,6 +479,8 @@ struct pipe_h265_picture_desc
uint8_t RefPicSetStCurrBefore[8];
uint8_t RefPicSetStCurrAfter[8];
uint8_t RefPicSetLtCurr[8];
+ uint8_t RefPicList[2][15];
+ bool UseRefPicList;
};
#ifdef __cplusplus
From 38c3d7cfc42f00c47f720ab293ad88b9a0f637f2 Mon Sep 17 00:00:00 2001
From: Boyuan Zhang
Date: Fri, 23 Oct 2015 12:30:33 -0400
Subject: [PATCH 128/266] radeon/uvd: implement and add flag for VAAPI HEVC
decode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Boyuan Zhang
Reviewed-by: Christian König
Reviewed-by: Leo Liu
---
src/gallium/drivers/radeon/radeon_uvd.c | 7 +++++++
src/gallium/drivers/radeon/radeon_uvd.h | 9 +++++++++
2 files changed, 16 insertions(+)
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index c3ac7e7f2ef..33b01361aa5 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -478,6 +478,8 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video
result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
result.sps_info_flags |= 1 << 9;
+ if (pic->UseRefPicList == true)
+ result.sps_info_flags |= 1 << 10;
result.chroma_format = pic->pps->sps->chroma_format_idc;
result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
@@ -586,6 +588,11 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video
memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
+ for (i = 0 ; i < 2 ; i++) {
+ for (int j = 0 ; j < 15 ; j++)
+ result.direct_reflist[i][j] = pic->RefPicList[i][j];
+ }
+
/* TODO
result.highestTid;
result.isNonRef;
diff --git a/src/gallium/drivers/radeon/radeon_uvd.h b/src/gallium/drivers/radeon/radeon_uvd.h
index 452fbd60880..9cc0a694c30 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.h
+++ b/src/gallium/drivers/radeon/radeon_uvd.h
@@ -233,6 +233,15 @@ struct ruvd_h265 {
uint8_t highestTid;
uint8_t isNonRef;
+
+ uint8_t p010_mode;
+ uint8_t msb_mode;
+ uint8_t luma_10to8;
+ uint8_t chroma_10to8;
+ uint8_t sclr_luma10to8;
+ uint8_t sclr_chroma10to8;
+
+ uint8_t direct_reflist[2][15];
};
struct ruvd_vc1 {
From ad2752e94b535dbcf01829464431ce242068de53 Mon Sep 17 00:00:00 2001
From: Boyuan Zhang
Date: Fri, 23 Oct 2015 13:37:48 -0400
Subject: [PATCH 129/266] st/va: add VAAPI HEVC decode support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Boyuan Zhang
Reviewed-by: Christian König
Reviewed-by: Leo Liu
---
configure.ac | 2 +-
src/gallium/state_trackers/va/config.c | 2 +-
src/gallium/state_trackers/va/context.c | 20 +++
src/gallium/state_trackers/va/picture.c | 182 +++++++++++++++++++++
src/gallium/state_trackers/va/va_private.h | 5 +
5 files changed, 209 insertions(+), 2 deletions(-)
diff --git a/configure.ac b/configure.ac
index 5aa19a5a6de..3f15881a18e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -81,7 +81,7 @@ PRESENTPROTO_REQUIRED=1.0
LIBUDEV_REQUIRED=151
GLPROTO_REQUIRED=1.4.14
LIBOMXIL_BELLAGIO_REQUIRED=0.0
-LIBVA_REQUIRED=0.35.0
+LIBVA_REQUIRED=0.38.0
VDPAU_REQUIRED=1.1
WAYLAND_REQUIRED=1.2.0
XCB_REQUIRED=1.9.3
diff --git a/src/gallium/state_trackers/va/config.c b/src/gallium/state_trackers/va/config.c
index cfb0b25b71f..5030f9e5d43 100644
--- a/src/gallium/state_trackers/va/config.c
+++ b/src/gallium/state_trackers/va/config.c
@@ -45,7 +45,7 @@ vlVaQueryConfigProfiles(VADriverContextP ctx, VAProfile *profile_list, int *num_
*num_profiles = 0;
pscreen = VL_VA_PSCREEN(ctx);
- for (p = PIPE_VIDEO_PROFILE_MPEG2_SIMPLE; p <= PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH; ++p)
+ for (p = PIPE_VIDEO_PROFILE_MPEG2_SIMPLE; p <= PIPE_VIDEO_PROFILE_HEVC_MAIN_444; ++p)
if (pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_SUPPORTED)) {
vap = PipeToProfile(p);
if (vap != VAProfileNone)
diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c
index 8b003aedaec..5adbe768532 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -194,6 +194,21 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID config_id, int picture_width,
}
}
+ if (u_reduce_video_profile(context->decoder->profile) ==
+ PIPE_VIDEO_FORMAT_HEVC) {
+ context->desc.h265.pps = CALLOC_STRUCT(pipe_h265_pps);
+ if (!context->desc.h265.pps) {
+ FREE(context);
+ return VA_STATUS_ERROR_ALLOCATION_FAILED;
+ }
+ context->desc.h265.pps->sps = CALLOC_STRUCT(pipe_h265_sps);
+ if (!context->desc.h265.pps->sps) {
+ FREE(context->desc.h265.pps);
+ FREE(context);
+ return VA_STATUS_ERROR_ALLOCATION_FAILED;
+ }
+ }
+
context->desc.base.profile = config_id;
*context_id = handle_table_add(drv->htab, context);
@@ -216,6 +231,11 @@ vlVaDestroyContext(VADriverContextP ctx, VAContextID context_id)
FREE(context->desc.h264.pps->sps);
FREE(context->desc.h264.pps);
}
+ if (u_reduce_video_profile(context->decoder->profile) ==
+ PIPE_VIDEO_FORMAT_HEVC) {
+ FREE(context->desc.h265.pps->sps);
+ FREE(context->desc.h265.pps);
+ }
context->decoder->destroy(context->decoder);
FREE(context);
handle_table_remove(drv->htab, context_id);
diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c
index a0d530bb797..9cd496f6417 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -81,6 +81,7 @@ handlePictureParameterBuffer(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *
VAPictureParameterBufferH264 *h264;
VAPictureParameterBufferVC1 * vc1;
VAPictureParameterBufferMPEG4 *mpeg4;
+ VAPictureParameterBufferHEVC *hevc;
vlVaSurface *surf_forward;
vlVaSurface *surf_backward;
unsigned int i;
@@ -286,6 +287,157 @@ handlePictureParameterBuffer(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *
break;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ assert(buf->size >= sizeof(VAPictureParameterBufferHEVC) && buf->num_elements == 1);
+ hevc = buf->data;
+ context->desc.h265.pps->sps->chroma_format_idc = hevc->pic_fields.bits.chroma_format_idc;
+ context->desc.h265.pps->sps->separate_colour_plane_flag =
+ hevc->pic_fields.bits.separate_colour_plane_flag;
+ context->desc.h265.pps->sps->pic_width_in_luma_samples = hevc->pic_width_in_luma_samples;
+ context->desc.h265.pps->sps->pic_height_in_luma_samples = hevc->pic_height_in_luma_samples;
+ context->desc.h265.pps->sps->bit_depth_luma_minus8 = hevc->bit_depth_luma_minus8;
+ context->desc.h265.pps->sps->bit_depth_chroma_minus8 = hevc->bit_depth_chroma_minus8;
+ context->desc.h265.pps->sps->log2_max_pic_order_cnt_lsb_minus4 =
+ hevc->log2_max_pic_order_cnt_lsb_minus4;
+ context->desc.h265.pps->sps->sps_max_dec_pic_buffering_minus1 =
+ hevc->sps_max_dec_pic_buffering_minus1;
+ context->desc.h265.pps->sps->log2_min_luma_coding_block_size_minus3 =
+ hevc->log2_min_luma_coding_block_size_minus3;
+ context->desc.h265.pps->sps->log2_diff_max_min_luma_coding_block_size =
+ hevc->log2_diff_max_min_luma_coding_block_size;
+ context->desc.h265.pps->sps->log2_min_transform_block_size_minus2 =
+ hevc->log2_min_transform_block_size_minus2;
+ context->desc.h265.pps->sps->log2_diff_max_min_transform_block_size =
+ hevc->log2_diff_max_min_transform_block_size;
+ context->desc.h265.pps->sps->max_transform_hierarchy_depth_inter =
+ hevc->max_transform_hierarchy_depth_inter;
+ context->desc.h265.pps->sps->max_transform_hierarchy_depth_intra =
+ hevc->max_transform_hierarchy_depth_intra;
+ context->desc.h265.pps->sps->scaling_list_enabled_flag =
+ hevc->pic_fields.bits.scaling_list_enabled_flag;
+ context->desc.h265.pps->sps->amp_enabled_flag = hevc->pic_fields.bits.amp_enabled_flag;
+ context->desc.h265.pps->sps->sample_adaptive_offset_enabled_flag =
+ hevc->slice_parsing_fields.bits.sample_adaptive_offset_enabled_flag;
+ context->desc.h265.pps->sps->pcm_enabled_flag = hevc->pic_fields.bits.pcm_enabled_flag;
+ if (hevc->pic_fields.bits.pcm_enabled_flag == 1) {
+ context->desc.h265.pps->sps->pcm_sample_bit_depth_luma_minus1 =
+ hevc->pcm_sample_bit_depth_luma_minus1;
+ context->desc.h265.pps->sps->pcm_sample_bit_depth_chroma_minus1 =
+ hevc->pcm_sample_bit_depth_chroma_minus1;
+ context->desc.h265.pps->sps->log2_min_pcm_luma_coding_block_size_minus3 =
+ hevc->log2_min_pcm_luma_coding_block_size_minus3;
+ context->desc.h265.pps->sps->log2_diff_max_min_pcm_luma_coding_block_size =
+ hevc->log2_diff_max_min_pcm_luma_coding_block_size;
+ context->desc.h265.pps->sps->pcm_loop_filter_disabled_flag =
+ hevc->pic_fields.bits.pcm_loop_filter_disabled_flag;
+ }
+ context->desc.h265.pps->sps->num_short_term_ref_pic_sets = hevc->num_short_term_ref_pic_sets;
+ context->desc.h265.pps->sps->long_term_ref_pics_present_flag =
+ hevc->slice_parsing_fields.bits.long_term_ref_pics_present_flag;
+ context->desc.h265.pps->sps->num_long_term_ref_pics_sps = hevc->num_long_term_ref_pic_sps;
+ context->desc.h265.pps->sps->sps_temporal_mvp_enabled_flag =
+ hevc->slice_parsing_fields.bits.sps_temporal_mvp_enabled_flag;
+ context->desc.h265.pps->sps->strong_intra_smoothing_enabled_flag =
+ hevc->pic_fields.bits.strong_intra_smoothing_enabled_flag;
+
+ context->desc.h265.pps->dependent_slice_segments_enabled_flag =
+ hevc->slice_parsing_fields.bits.dependent_slice_segments_enabled_flag;
+ context->desc.h265.pps->output_flag_present_flag =
+ hevc->slice_parsing_fields.bits.output_flag_present_flag;
+ context->desc.h265.pps->num_extra_slice_header_bits = hevc->num_extra_slice_header_bits;
+ context->desc.h265.pps->sign_data_hiding_enabled_flag =
+ hevc->pic_fields.bits.sign_data_hiding_enabled_flag;
+ context->desc.h265.pps->cabac_init_present_flag =
+ hevc->slice_parsing_fields.bits.cabac_init_present_flag;
+ context->desc.h265.pps->num_ref_idx_l0_default_active_minus1 =
+ hevc->num_ref_idx_l0_default_active_minus1;
+ context->desc.h265.pps->num_ref_idx_l1_default_active_minus1 =
+ hevc->num_ref_idx_l1_default_active_minus1;
+ context->desc.h265.pps->init_qp_minus26 = hevc->init_qp_minus26;
+ context->desc.h265.pps->constrained_intra_pred_flag =
+ hevc->pic_fields.bits.constrained_intra_pred_flag;
+ context->desc.h265.pps->transform_skip_enabled_flag =
+ hevc->pic_fields.bits.transform_skip_enabled_flag;
+ context->desc.h265.pps->cu_qp_delta_enabled_flag =
+ hevc->pic_fields.bits.cu_qp_delta_enabled_flag;
+ context->desc.h265.pps->diff_cu_qp_delta_depth = hevc->diff_cu_qp_delta_depth;
+ context->desc.h265.pps->pps_cb_qp_offset = hevc->pps_cb_qp_offset;
+ context->desc.h265.pps->pps_cr_qp_offset = hevc->pps_cr_qp_offset;
+ context->desc.h265.pps->pps_slice_chroma_qp_offsets_present_flag =
+ hevc->slice_parsing_fields.bits.pps_slice_chroma_qp_offsets_present_flag;
+ context->desc.h265.pps->weighted_pred_flag = hevc->pic_fields.bits.weighted_pred_flag;
+ context->desc.h265.pps->weighted_bipred_flag = hevc->pic_fields.bits.weighted_bipred_flag;
+ context->desc.h265.pps->transquant_bypass_enabled_flag =
+ hevc->pic_fields.bits.transquant_bypass_enabled_flag;
+ context->desc.h265.pps->tiles_enabled_flag = hevc->pic_fields.bits.tiles_enabled_flag;
+ context->desc.h265.pps->entropy_coding_sync_enabled_flag =
+ hevc->pic_fields.bits.entropy_coding_sync_enabled_flag;
+ if (hevc->pic_fields.bits.tiles_enabled_flag == 1) {
+ context->desc.h265.pps->num_tile_columns_minus1 = hevc->num_tile_columns_minus1;
+ context->desc.h265.pps->num_tile_rows_minus1 = hevc->num_tile_rows_minus1;
+ for (i = 0 ; i < 19 ; i++)
+ context->desc.h265.pps->column_width_minus1[i] = hevc->column_width_minus1[i];
+ for (i = 0 ; i < 21 ; i++)
+ context->desc.h265.pps->row_height_minus1[i] = hevc->row_height_minus1[i];
+ context->desc.h265.pps->loop_filter_across_tiles_enabled_flag =
+ hevc->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
+ }
+ context->desc.h265.pps->pps_loop_filter_across_slices_enabled_flag =
+ hevc->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag;
+ context->desc.h265.pps->deblocking_filter_override_enabled_flag =
+ hevc->slice_parsing_fields.bits.deblocking_filter_override_enabled_flag;
+ context->desc.h265.pps->pps_deblocking_filter_disabled_flag =
+ hevc->slice_parsing_fields.bits.pps_disable_deblocking_filter_flag;
+ context->desc.h265.pps->pps_beta_offset_div2 = hevc->pps_beta_offset_div2;
+ context->desc.h265.pps->pps_tc_offset_div2 = hevc->pps_tc_offset_div2;
+ context->desc.h265.pps->lists_modification_present_flag =
+ hevc->slice_parsing_fields.bits.lists_modification_present_flag;
+ context->desc.h265.pps->log2_parallel_merge_level_minus2 =
+ hevc->log2_parallel_merge_level_minus2;
+ context->desc.h265.pps->slice_segment_header_extension_present_flag =
+ hevc->slice_parsing_fields.bits.slice_segment_header_extension_present_flag;
+
+ context->desc.h265.IDRPicFlag = hevc->slice_parsing_fields.bits.IdrPicFlag;
+ context->desc.h265.RAPPicFlag = hevc->slice_parsing_fields.bits.RapPicFlag;
+
+ context->desc.h265.CurrPicOrderCntVal = hevc->CurrPic.pic_order_cnt;
+
+ for (i = 0 ; i < 8 ; i++) {
+ context->desc.h265.RefPicSetStCurrBefore[i] = 0xFF;
+ context->desc.h265.RefPicSetStCurrAfter[i] = 0xFF;
+ context->desc.h265.RefPicSetLtCurr[i] = 0xFF;
+ }
+ context->desc.h265.NumPocStCurrBefore = 0;
+ context->desc.h265.NumPocStCurrAfter = 0;
+ context->desc.h265.NumPocLtCurr = 0;
+ unsigned int iBefore = 0;
+ unsigned int iAfter = 0;
+ unsigned int iCurr = 0;
+ for (i = 0 ; i < 15 ; i++) {
+ context->desc.h265.PicOrderCntVal[i] = hevc->ReferenceFrames[i].pic_order_cnt;
+
+ unsigned int index = hevc->ReferenceFrames[i].picture_id & 0x7F;
+
+ if (index == 0x7F)
+ continue;
+
+ getReferenceFrame(drv, hevc->ReferenceFrames[i].picture_id, &context->desc.h265.ref[i]);
+
+ if ((hevc->ReferenceFrames[i].flags & VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE) && (iBefore < 8)) {
+ context->desc.h265.RefPicSetStCurrBefore[iBefore++] = i;
+ context->desc.h265.NumPocStCurrBefore++;
+ }
+ if ((hevc->ReferenceFrames[i].flags & VA_PICTURE_HEVC_RPS_ST_CURR_AFTER) && (iAfter < 8)) {
+ context->desc.h265.RefPicSetStCurrAfter[iAfter++] = i;
+ context->desc.h265.NumPocStCurrAfter++;
+ }
+ if ((hevc->ReferenceFrames[i].flags & VA_PICTURE_HEVC_RPS_LT_CURR) && (iCurr < 8)) {
+ context->desc.h265.RefPicSetLtCurr[iCurr++] = i;
+ context->desc.h265.NumPocLtCurr++;
+ }
+ }
+ break;
+
default:
break;
}
@@ -297,6 +449,7 @@ handleIQMatrixBuffer(vlVaContext *context, vlVaBuffer *buf)
VAIQMatrixBufferMPEG2 *mpeg2;
VAIQMatrixBufferH264 *h264;
VAIQMatrixBufferMPEG4 *mpeg4;
+ VAIQMatrixBufferHEVC *h265;
switch (u_reduce_video_profile(context->decoder->profile)) {
case PIPE_VIDEO_FORMAT_MPEG12:
@@ -320,6 +473,17 @@ handleIQMatrixBuffer(vlVaContext *context, vlVaBuffer *buf)
memcpy(&context->desc.h264.pps->ScalingList8x8, h264->ScalingList8x8, 2 * 64);
break;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ assert(buf->size >= sizeof(VAIQMatrixBufferH264) && buf->num_elements == 1);
+ h265 = buf->data;
+ memcpy(&context->desc.h265.pps->sps->ScalingList4x4, h265->ScalingList4x4, 6 * 16);
+ memcpy(&context->desc.h265.pps->sps->ScalingList8x8, h265->ScalingList8x8, 6 * 64);
+ memcpy(&context->desc.h265.pps->sps->ScalingList16x16, h265->ScalingList16x16, 6 * 64);
+ memcpy(&context->desc.h265.pps->sps->ScalingList32x32, h265->ScalingList32x32, 2 * 64);
+ memcpy(&context->desc.h265.pps->sps->ScalingListDCCoeff16x16, h265->ScalingListDC16x16, 6);
+ memcpy(&context->desc.h265.pps->sps->ScalingListDCCoeff32x32, h265->ScalingListDC32x32, 2);
+ break;
+
case PIPE_VIDEO_FORMAT_MPEG4:
assert(buf->size >= sizeof(VAIQMatrixBufferMPEG4) && buf->num_elements == 1);
mpeg4 = buf->data;
@@ -345,6 +509,7 @@ handleSliceParameterBuffer(vlVaContext *context, vlVaBuffer *buf)
{
VASliceParameterBufferH264 *h264;
VASliceParameterBufferMPEG4 *mpeg4;
+ VASliceParameterBufferHEVC *h265;
switch (u_reduce_video_profile(context->decoder->profile)) {
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
@@ -361,6 +526,15 @@ handleSliceParameterBuffer(vlVaContext *context, vlVaBuffer *buf)
context->mpeg4.quant_scale = mpeg4->quant_scale;
break;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ assert(buf->size >= sizeof(VASliceParameterBufferHEVC) && buf->num_elements == 1);
+ h265 = buf->data;
+ for (int i = 0 ; i < 2 ; i++) {
+ for (int j = 0 ; j < 15 ; j++)
+ context->desc.h265.RefPicList[i][j] = h265->RefPicList[i][j];
+ }
+ context->desc.h265.UseRefPicList = true;
+ break;
default:
break;
}
@@ -483,6 +657,7 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf)
void * const *buffers[2];
unsigned sizes[2];
static const uint8_t start_code_h264[] = { 0x00, 0x00, 0x01 };
+ static const uint8_t start_code_h265[] = { 0x00, 0x00, 0x01 };
static const uint8_t start_code_vc1[] = { 0x00, 0x00, 0x01, 0x0d };
format = u_reduce_video_profile(context->decoder->profile);
@@ -494,6 +669,13 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf)
buffers[num_buffers] = (void *const)&start_code_h264;
sizes[num_buffers++] = sizeof(start_code_h264);
break;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ if (bufHasStartcode(buf, 0x000001, 24))
+ break;
+
+ buffers[num_buffers] = (void *const)&start_code_h265;
+ sizes[num_buffers++] = sizeof(start_code_h265);
+ break;
case PIPE_VIDEO_FORMAT_VC1:
if (bufHasStartcode(buf, 0x0000010d, 32) ||
bufHasStartcode(buf, 0x0000010c, 32) ||
diff --git a/src/gallium/state_trackers/va/va_private.h b/src/gallium/state_trackers/va/va_private.h
index 1ea7be79aa3..93af1be19a7 100644
--- a/src/gallium/state_trackers/va/va_private.h
+++ b/src/gallium/state_trackers/va/va_private.h
@@ -112,6 +112,8 @@ PipeToProfile(enum pipe_video_profile profile)
return VAProfileH264High;
case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED:
return VAProfileNone;
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN:
+ return VAProfileHEVCMain;
default:
assert(0);
return -1;
@@ -142,6 +144,8 @@ ProfileToPipe(VAProfile profile)
return PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN;
case VAProfileH264High:
return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH;
+ case VAProfileHEVCMain:
+ return PIPE_VIDEO_PROFILE_HEVC_MAIN;
default:
return PIPE_VIDEO_PROFILE_UNKNOWN;
}
@@ -174,6 +178,7 @@ typedef struct {
struct pipe_mpeg4_picture_desc mpeg4;
struct pipe_vc1_picture_desc vc1;
struct pipe_h264_picture_desc h264;
+ struct pipe_h265_picture_desc h265;
} desc;
struct {
From 03c92ffbf636ef9333f4575602e4de471ec7cdba Mon Sep 17 00:00:00 2001
From: Boyuan Zhang
Date: Fri, 23 Oct 2015 13:44:23 -0400
Subject: [PATCH 130/266] st/vdpau: disable RefPicList for Vdpau HEVC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Boyuan Zhang
Reviewed-by: Christian König
Reviewed-by: Leo Liu
---
src/gallium/state_trackers/vdpau/decode.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 3233799d650..f85bce823bb 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -518,6 +518,7 @@ vlVdpDecoderRenderH265(struct pipe_h265_picture_desc *picture,
memcpy(picture->RefPicSetStCurrBefore, picture_info->RefPicSetStCurrBefore, 8);
memcpy(picture->RefPicSetStCurrAfter, picture_info->RefPicSetStCurrAfter, 8);
memcpy(picture->RefPicSetLtCurr, picture_info->RefPicSetLtCurr, 8);
+ picture->UseRefPicList = false;
return VDP_STATUS_OK;
}
From 74fcc4c41fe547ea5fb8e23f25fc71b25f5afe96 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez
Date: Tue, 27 Oct 2015 09:33:01 +0100
Subject: [PATCH 131/266] main: fix GL_MAX_NUM_ACTIVE_VARIABLES value for
shader storage blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The maximum number of active variables for shader storage blocks should
take into account the specific rules for shader storage blocks, i.e. for
an active shader storage block member declared as an array, an entry
will be generated only for the first array element, regardless of its type.
Fixes 3 dEQP-GLES31.functional.* tests:
dEQP-GLES31.functional.program_interface_query.shader_storage_block.active_variables.named_block
dEQP-GLES31.functional.program_interface_query.shader_storage_block.active_variables.unnamed_block
dEQP-GLES31.functional.program_interface_query.shader_storage_block.active_variables.block_array
Signed-off-by: Samuel Iglesias Gonsalvez
Reviewed-by: Tapani Pälli
---
src/mesa/main/program_resource.c | 21 ++++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c
index eb71fdde703..b7e25fe3840 100644
--- a/src/mesa/main/program_resource.c
+++ b/src/mesa/main/program_resource.c
@@ -119,7 +119,6 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
case GL_MAX_NUM_ACTIVE_VARIABLES:
switch (programInterface) {
case GL_UNIFORM_BLOCK:
- case GL_SHADER_STORAGE_BLOCK:
for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
if (shProg->ProgramResourceList[i].Type == programInterface) {
struct gl_uniform_block *block =
@@ -129,6 +128,26 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
}
}
break;
+ case GL_SHADER_STORAGE_BLOCK:
+ for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
+ if (shProg->ProgramResourceList[i].Type == programInterface) {
+ struct gl_uniform_block *block =
+ (struct gl_uniform_block *)
+ shProg->ProgramResourceList[i].Data;
+ GLint block_params = 0;
+ for (unsigned j = 0; j < block->NumUniforms; j++) {
+ const char *iname = block->Uniforms[j].IndexName;
+ struct gl_program_resource *uni =
+ _mesa_program_resource_find_name(shProg, GL_BUFFER_VARIABLE,
+ iname, NULL);
+ if (!uni)
+ continue;
+ block_params++;
+ }
+ *params = MAX2(*params, block_params);
+ }
+ }
+ break;
case GL_ATOMIC_COUNTER_BUFFER:
for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
if (shProg->ProgramResourceList[i].Type == programInterface) {
From 0325f6822899fa58963094e3bb6433e3c6d5b374 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Wed, 7 Oct 2015 12:38:11 +0100
Subject: [PATCH 132/266] i965: remove cache_aux_free_func array
There is only one function that can be called, which is well known at
compilation time.
The abstraction used here seems unnecessary, so let's use a direct call
to brw_stage_prog_data_free() when appropriate, cut down the size of
struct brw_cache.
Signed-off-by: Emil Velikov
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_context.h | 5 -----
src/mesa/drivers/dri/i965/brw_state_cache.c | 12 +++++-------
2 files changed, 5 insertions(+), 12 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 18c361ea8cd..e075395ef49 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -501,8 +501,6 @@ struct brw_cache_item {
};
-typedef void (*cache_aux_free_func)(const void *aux);
-
struct brw_cache {
struct brw_context *brw;
@@ -512,9 +510,6 @@ struct brw_cache {
uint32_t next_offset;
bool bo_used_by_gpu;
-
- /** Optional functions for freeing other pointers attached to a prog_data. */
- cache_aux_free_func aux_free[BRW_MAX_CACHE];
};
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index 2fbcd146750..7f07bef253a 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -349,11 +349,6 @@ brw_init_caches(struct brw_context *brw)
4096, 64);
if (brw->has_llc)
drm_intel_gem_bo_map_unsynchronized(cache->bo);
-
- cache->aux_free[BRW_CACHE_VS_PROG] = brw_stage_prog_data_free;
- cache->aux_free[BRW_CACHE_GS_PROG] = brw_stage_prog_data_free;
- cache->aux_free[BRW_CACHE_FS_PROG] = brw_stage_prog_data_free;
- cache->aux_free[BRW_CACHE_CS_PROG] = brw_stage_prog_data_free;
}
static void
@@ -367,9 +362,12 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
for (i = 0; i < cache->size; i++) {
for (c = cache->items[i]; c; c = next) {
next = c->next;
- if (cache->aux_free[c->cache_id]) {
+ if (c->cache_id == BRW_CACHE_VS_PROG ||
+ c->cache_id == BRW_CACHE_GS_PROG ||
+ c->cache_id == BRW_CACHE_FS_PROG ||
+ c->cache_id == BRW_CACHE_CS_PROG) {
const void *item_aux = c->key + c->key_size;
- cache->aux_free[c->cache_id](item_aux);
+ brw_stage_prog_data_free(item_aux);
}
free((void *)c->key);
free(c);
From bfc73ff10eafad59b6ae9ca3991f9f1a3700b3a1 Mon Sep 17 00:00:00 2001
From: Emil Velikov
Date: Wed, 7 Oct 2015 12:38:12 +0100
Subject: [PATCH 133/266] i965: remove unneeded src_reg copy in
emit_shader_time_write
The variable is already of type src_reg. creating a new instance only to
destroy it seems unnecessary.
Signed-off-by: Emil Velikov
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 71a5a4403f4..f87b86fbd3e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1788,7 +1788,7 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE)));
time.type = BRW_REGISTER_TYPE_UD;
- emit(MOV(time, src_reg(value)));
+ emit(MOV(time, value));
vec4_instruction *inst =
emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(dst));
From 6a4dc1ad492d7d53c5c17e6eb975f0ed44b6741d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Tue, 27 Oct 2015 11:11:19 +0100
Subject: [PATCH 134/266] mesa: set TargetIndex in VDPAURegister*SurfaceNV (v2)
We initialized Target, but not TargetIndex.
This is required since 7d7dd1871174905dfdd3ca874a09d9.
v2: do it in the right place. Noticed by Brian Paul.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92645
Reviewed-by: Brian Paul
---
src/mesa/main/vdpau.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/mesa/main/vdpau.c b/src/mesa/main/vdpau.c
index 0efa56e4f41..44be3a37443 100644
--- a/src/mesa/main/vdpau.c
+++ b/src/mesa/main/vdpau.c
@@ -163,9 +163,10 @@ register_surface(struct gl_context *ctx, GLboolean isOutput,
return (GLintptr)NULL;
}
- if (tex->Target == 0)
+ if (tex->Target == 0) {
tex->Target = target;
- else if (tex->Target != target) {
+ tex->TargetIndex = _mesa_tex_target_to_index(ctx, target);
+ } else if (tex->Target != target) {
_mesa_unlock_texture(ctx, tex);
free(surf);
_mesa_error(ctx, GL_INVALID_OPERATION,
From e82c527f1fc2f8ddc64954ecd06b0de3cea92e93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Mon, 24 Aug 2015 01:08:48 +0200
Subject: [PATCH 135/266] radeonsi: allow copying between compatible compressed
and uncompressed formats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
which is where a block in src maps to a pixel in dst and vice versa.
e.g. DXT1 <-> R32G32_UINT
DXT5 <-> R32G32B32A32_UINT
Reviewed-by: Michel Dänzer
---
src/gallium/drivers/radeonsi/si_blit.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 302b75ccce4..fce014a1e6b 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -507,7 +507,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
util_blitter_default_src_texture(&src_templ, src, src_level);
- if (util_format_is_compressed(src->format) &&
+ if (util_format_is_compressed(src->format) ||
util_format_is_compressed(dst->format)) {
unsigned blocksize = util_format_get_blocksize(src->format);
From ce9db16e1c101204cdb6b2482aa1b4c7d0c59d41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Mon, 24 Aug 2015 01:19:35 +0200
Subject: [PATCH 136/266] gallium: add
PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS
For ARB_copy_image.
Reviewed-by: Brian Paul
---
src/gallium/docs/source/screen.rst | 4 +++-
src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
src/gallium/drivers/i915/i915_screen.c | 1 +
src/gallium/drivers/ilo/ilo_screen.c | 1 +
src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
src/gallium/drivers/nouveau/nv30/nv30_screen.c | 1 +
src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 +
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 +
src/gallium/drivers/r300/r300_screen.c | 1 +
src/gallium/drivers/r600/r600_pipe.c | 1 +
src/gallium/drivers/radeonsi/si_pipe.c | 1 +
src/gallium/drivers/softpipe/sp_screen.c | 1 +
src/gallium/drivers/svga/svga_screen.c | 1 +
src/gallium/drivers/vc4/vc4_screen.c | 1 +
src/gallium/include/pipe/p_defines.h | 1 +
15 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 151afb2dffe..91fdb43cfbb 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -278,7 +278,9 @@ The integer capabilities:
in the shader.
* ``PIPE_CAP_SHAREABLE_SHADERS``: Whether shader CSOs can be used by any
pipe_context.
-
+* ``PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS``:
+ Whether copying between compressed and plain formats is supported where
+ a compressed block is copied to/from a plain pixel of the same size.
.. _pipe_capf:
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 50d140fe903..9f8c33263fb 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -238,6 +238,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 5812af626cb..2d2fd375656 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -252,6 +252,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index e1a7dc56685..888f7aa6782 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -474,6 +474,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index e2ed267da78..d1c50aefc84 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -299,6 +299,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 03301649e38..bdecb0a32b3 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -172,6 +172,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index ec51d00f266..2c8884edc2d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -217,6 +217,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index af8e5f72670..1646a9dbcbe 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -203,6 +203,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index a576abdfaf2..d5981248a86 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -198,6 +198,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
/* SWTCL-only features. */
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 9a97de9965e..9f4cda2c142 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -344,6 +344,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
/* Stream output. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 047bbf4e537..60baad3d13c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -298,6 +298,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index e7006d2fa0d..c0fc82b2f2c 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -249,6 +249,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index f6fafca5c0b..5aa7b0d86eb 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -382,6 +382,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
}
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 774ec095652..432b1d1b86e 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -183,6 +183,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
/* Stream output. */
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 1ad545aae09..b15c8809c1d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -633,6 +633,7 @@ enum pipe_cap
PIPE_CAP_TGSI_TXQS,
PIPE_CAP_FORCE_PERSAMPLE_INTERP,
PIPE_CAP_SHAREABLE_SHADERS,
+ PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS,
};
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
From f04f13622f3e71bee057d60a6be9c53b92b56cc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=
Date: Mon, 24 Aug 2015 02:55:20 +0200
Subject: [PATCH 137/266] st/mesa: implement ARB_copy_image
I wonder if the craziness was worth it.
Reviewed-by: Brian Paul
---
docs/GL3.txt | 2 +-
docs/relnotes/11.1.0.html | 1 +
src/mesa/Makefile.sources | 2 +
src/mesa/state_tracker/st_cb_copyimage.c | 578 +++++++++++++++++++++++
src/mesa/state_tracker/st_cb_copyimage.h | 33 ++
src/mesa/state_tracker/st_cb_texture.c | 51 --
src/mesa/state_tracker/st_context.c | 2 +
src/mesa/state_tracker/st_extensions.c | 1 +
8 files changed, 618 insertions(+), 52 deletions(-)
create mode 100644 src/mesa/state_tracker/st_cb_copyimage.c
create mode 100644 src/mesa/state_tracker/st_cb_copyimage.h
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 167321676df..549c4dbfda7 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -153,7 +153,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30)
GL_ARB_clear_buffer_object DONE (all drivers)
GL_ARB_compute_shader in progress (jljusten)
- GL_ARB_copy_image DONE (i965) (gallium - in progress, VMware)
+ GL_ARB_copy_image DONE (i965, radeonsi)
GL_KHR_debug DONE (all drivers)
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 3e1cd41dc57..3ecf287e315 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
GL_ARB_blend_func_extended on freedreno (a3xx)
+GL_ARB_copy_image on radeonsi
GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips
GL_ARB_gpu_shader5 on r600 for Evergreen and later chips
GL_ARB_shader_stencil_export on i965 (gen9+)
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 4bcaa62f48e..de0e330b7d1 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -423,6 +423,8 @@ STATETRACKER_FILES = \
state_tracker/st_cb_clear.h \
state_tracker/st_cb_condrender.c \
state_tracker/st_cb_condrender.h \
+ state_tracker/st_cb_copyimage.c \
+ state_tracker/st_cb_copyimage.h \
state_tracker/st_cb_drawpixels.c \
state_tracker/st_cb_drawpixels.h \
state_tracker/st_cb_drawpixels_shader.c \
diff --git a/src/mesa/state_tracker/st_cb_copyimage.c b/src/mesa/state_tracker/st_cb_copyimage.c
new file mode 100644
index 00000000000..c94a0b22ea8
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_copyimage.c
@@ -0,0 +1,578 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "state_tracker/st_context.h"
+#include "state_tracker/st_cb_copyimage.h"
+#include "state_tracker/st_cb_fbo.h"
+#include "state_tracker/st_texture.h"
+
+#include "util/u_box.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+
+
+/**
+ * Return an equivalent canonical format without "X" channels.
+ *
+ * Copying between incompatible formats is easier when the format is
+ * canonicalized, meaning that it is in a standard form.
+ *
+ * The returned format has the same component sizes and swizzles as
+ * the source format, the type is changed to UINT or UNORM, depending on
+ * which one has the most swizzle combinations in their group.
+ *
+ * If it's not an array format, return a memcpy-equivalent array format.
+ *
+ * The key feature is that swizzled versions of formats of the same
+ * component size always return the same component type.
+ *
+ * X returns A.
+ * Luminance, intensity, alpha, depth, stencil, and 8-bit and 16-bit packed
+ * formats are not supported. (same as ARB_copy_image)
+ */
+static enum pipe_format
+get_canonical_format(enum pipe_format format)
+{
+ const struct util_format_description *desc =
+ util_format_description(format);
+
+ /* Packed formats. Return the equivalent array format. */
+ if (format == PIPE_FORMAT_R11G11B10_FLOAT ||
+ format == PIPE_FORMAT_R9G9B9E5_FLOAT)
+ return get_canonical_format(PIPE_FORMAT_R8G8B8A8_UINT);
+
+ if (desc->nr_channels == 4 &&
+ desc->channel[0].size == 10 &&
+ desc->channel[1].size == 10 &&
+ desc->channel[2].size == 10 &&
+ desc->channel[3].size == 2) {
+ if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_Y &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_Z)
+ return get_canonical_format(PIPE_FORMAT_R8G8B8A8_UINT);
+
+ return PIPE_FORMAT_NONE;
+ }
+
+#define RETURN_FOR_SWIZZLE1(x, format) \
+ if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x) \
+ return format
+
+#define RETURN_FOR_SWIZZLE2(x, y, format) \
+ if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x && \
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_##y) \
+ return format
+
+#define RETURN_FOR_SWIZZLE3(x, y, z, format) \
+ if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x && \
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_##y && \
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_##z) \
+ return format
+
+#define RETURN_FOR_SWIZZLE4(x, y, z, w, format) \
+ if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x && \
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_##y && \
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_##z && \
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_##w) \
+ return format
+
+ /* Array formats. */
+ if (desc->is_array) {
+ switch (desc->nr_channels) {
+ case 1:
+ switch (desc->channel[0].size) {
+ case 8:
+ RETURN_FOR_SWIZZLE1(X, PIPE_FORMAT_R8_UINT);
+ break;
+
+ case 16:
+ RETURN_FOR_SWIZZLE1(X, PIPE_FORMAT_R16_UINT);
+ break;
+
+ case 32:
+ RETURN_FOR_SWIZZLE1(X, PIPE_FORMAT_R32_UINT);
+ break;
+ }
+ break;
+
+ case 2:
+ switch (desc->channel[0].size) {
+ case 8:
+ /* All formats in each group must be of the same type.
+ * We can't use UINT for R8G8 while using UNORM for G8R8.
+ */
+ RETURN_FOR_SWIZZLE2(X, Y, PIPE_FORMAT_R8G8_UNORM);
+ RETURN_FOR_SWIZZLE2(Y, X, PIPE_FORMAT_G8R8_UNORM);
+ break;
+
+ case 16:
+ RETURN_FOR_SWIZZLE2(X, Y, PIPE_FORMAT_R16G16_UNORM);
+ RETURN_FOR_SWIZZLE2(Y, X, PIPE_FORMAT_G16R16_UNORM);
+ break;
+
+ case 32:
+ RETURN_FOR_SWIZZLE2(X, Y, PIPE_FORMAT_R32G32_UINT);
+ break;
+ }
+ break;
+
+ case 3:
+ switch (desc->channel[0].size) {
+ case 8:
+ RETURN_FOR_SWIZZLE3(X, Y, Z, PIPE_FORMAT_R8G8B8_UINT);
+ break;
+
+ case 16:
+ RETURN_FOR_SWIZZLE3(X, Y, Z, PIPE_FORMAT_R16G16B16_UINT);
+ break;
+
+ case 32:
+ RETURN_FOR_SWIZZLE3(X, Y, Z, PIPE_FORMAT_R32G32B32_UINT);
+ break;
+ }
+ break;
+
+ case 4:
+ switch (desc->channel[0].size) {
+ case 8:
+ RETURN_FOR_SWIZZLE4(X, Y, Z, W, PIPE_FORMAT_R8G8B8A8_UNORM);
+ RETURN_FOR_SWIZZLE4(X, Y, Z, 1, PIPE_FORMAT_R8G8B8A8_UNORM);
+ RETURN_FOR_SWIZZLE4(Z, Y, X, W, PIPE_FORMAT_B8G8R8A8_UNORM);
+ RETURN_FOR_SWIZZLE4(Z, Y, X, 1, PIPE_FORMAT_B8G8R8A8_UNORM);
+ RETURN_FOR_SWIZZLE4(W, Z, Y, X, PIPE_FORMAT_A8B8G8R8_UNORM);
+ RETURN_FOR_SWIZZLE4(1, Z, Y, X, PIPE_FORMAT_A8B8G8R8_UNORM);
+ RETURN_FOR_SWIZZLE4(W, X, Y, Z, PIPE_FORMAT_A8R8G8B8_UNORM);
+ RETURN_FOR_SWIZZLE4(1, X, Y, Z, PIPE_FORMAT_A8R8G8B8_UNORM);
+ break;
+
+ case 16:
+ RETURN_FOR_SWIZZLE4(X, Y, Z, W, PIPE_FORMAT_R16G16B16A16_UINT);
+ RETURN_FOR_SWIZZLE4(X, Y, Z, 1, PIPE_FORMAT_R16G16B16A16_UINT);
+ break;
+
+ case 32:
+ RETURN_FOR_SWIZZLE4(X, Y, Z, W, PIPE_FORMAT_R32G32B32A32_UINT);
+ RETURN_FOR_SWIZZLE4(X, Y, Z, 1, PIPE_FORMAT_R32G32B32A32_UINT);
+ break;
+ }
+ }
+
+ assert(!"unknown array format");
+ return PIPE_FORMAT_NONE;
+ }
+
+ assert(!"unknown packed format");
+ return PIPE_FORMAT_NONE;
+}
+
+/**
+ * Return true if the swizzle is XYZW in case of a 4-channel format,
+ * XY in case of a 2-channel format, or X in case of a 1-channel format.
+ */
+static bool
+has_identity_swizzle(const struct util_format_description *desc)
+{
+ int i;
+
+ for (i = 0; i < desc->nr_channels; i++)
+ if (desc->swizzle[i] != UTIL_FORMAT_SWIZZLE_X + i)
+ return false;
+
+ return true;
+}
+
+/**
+ * Return a canonical format for the given bits and channel size.
+ */
+static enum pipe_format
+canonical_format_from_bits(unsigned bits, unsigned channel_size)
+{
+ switch (bits) {
+ case 8:
+ if (channel_size == 8)
+ return get_canonical_format(PIPE_FORMAT_R8_UINT);
+ break;
+
+ case 16:
+ if (channel_size == 8)
+ return get_canonical_format(PIPE_FORMAT_R8G8_UINT);
+ if (channel_size == 16)
+ return get_canonical_format(PIPE_FORMAT_R16_UINT);
+ break;
+
+ case 32:
+ if (channel_size == 8)
+ return get_canonical_format(PIPE_FORMAT_R8G8B8A8_UINT);
+ if (channel_size == 16)
+ return get_canonical_format(PIPE_FORMAT_R16G16_UINT);
+ if (channel_size == 32)
+ return get_canonical_format(PIPE_FORMAT_R32_UINT);
+ break;
+
+ case 64:
+ if (channel_size == 16)
+ return get_canonical_format(PIPE_FORMAT_R16G16B16A16_UINT);
+ if (channel_size == 32)
+ return get_canonical_format(PIPE_FORMAT_R32G32_UINT);
+ break;
+
+ case 128:
+ if (channel_size == 32)
+ return get_canonical_format(PIPE_FORMAT_R32G32B32A32_UINT);
+ break;
+ }
+
+ assert(!"impossible format");
+ return PIPE_FORMAT_NONE;
+}
+
+static void
+blit(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ enum pipe_format dst_format,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ enum pipe_format src_format,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct pipe_blit_info blit = {{0}};
+
+ blit.src.resource = src;
+ blit.dst.resource = dst;
+ blit.src.format = src_format;
+ blit.dst.format = dst_format;
+ blit.src.level = src_level;
+ blit.dst.level = dst_level;
+ blit.src.box = *src_box;
+ u_box_3d(dstx, dsty, dstz, src_box->width, src_box->height,
+ src_box->depth, &blit.dst.box);
+ blit.mask = PIPE_MASK_RGBA;
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ pipe->blit(pipe, &blit);
+}
+
+static void
+swizzled_copy(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ const struct util_format_description *src_desc, *dst_desc;
+ unsigned bits;
+ enum pipe_format blit_src_format, blit_dst_format;
+
+ /* Get equivalent canonical formats. Those are always array formats and
+ * copying between compatible canonical formats behaves either like
+ * memcpy or like swizzled memcpy. The idea is that we won't have to care
+ * about the channel type from this point on.
+ * Only the swizzle and channel size.
+ */
+ blit_src_format = get_canonical_format(src->format);
+ blit_dst_format = get_canonical_format(dst->format);
+
+ assert(blit_src_format != PIPE_FORMAT_NONE);
+ assert(blit_dst_format != PIPE_FORMAT_NONE);
+
+ src_desc = util_format_description(blit_src_format);
+ dst_desc = util_format_description(blit_dst_format);
+
+ assert(src_desc->block.bits == dst_desc->block.bits);
+ bits = src_desc->block.bits;
+
+ if (dst_desc->channel[0].size == src_desc->channel[0].size) {
+ /* Only the swizzle is different, which means we can just blit,
+ * e.g. RGBA -> BGRA.
+ */
+ } else if (has_identity_swizzle(src_desc)) {
+ /* Src is unswizzled and dst can be swizzled, so src is typecast
+ * to an equivalent dst-compatible format.
+ * e.g. R32 -> BGRA8 is realized as RGBA8 -> BGRA8
+ */
+ blit_src_format =
+ canonical_format_from_bits(bits, dst_desc->channel[0].size);
+ } else if (has_identity_swizzle(dst_desc)) {
+ /* Dst is unswizzled and src can be swizzled, so dst is typecast
+ * to an equivalent src-compatible format.
+ * e.g. BGRA8 -> R32 is realized as BGRA8 -> RGBA8
+ */
+ blit_dst_format =
+ canonical_format_from_bits(bits, src_desc->channel[0].size);
+ } else {
+ assert(!"This should have been handled by handle_complex_copy.");
+ return;
+ }
+
+ blit(pipe, dst, blit_dst_format, dst_level, dstx, dsty, dstz,
+ src, blit_src_format, src_level, src_box);
+}
+
+static bool
+same_size_and_swizzle(const struct util_format_description *d1,
+ const struct util_format_description *d2)
+{
+ int i;
+
+ if (d1->layout != d2->layout ||
+ d1->nr_channels != d2->nr_channels ||
+ d1->is_array != d2->is_array)
+ return false;
+
+ for (i = 0; i < d1->nr_channels; i++) {
+ if (d1->channel[i].size != d2->channel[i].size)
+ return false;
+
+ if (d1->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W &&
+ d2->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W &&
+ d1->swizzle[i] != d2->swizzle[i])
+ return false;
+ }
+
+ return true;
+}
+
+static struct pipe_resource *
+create_texture(struct pipe_screen *screen, enum pipe_format format,
+ unsigned width, unsigned height, unsigned depth)
+{
+ struct pipe_resource templ;
+
+ memset(&templ, 0, sizeof(templ));
+ templ.format = format;
+ templ.width0 = width;
+ templ.height0 = height;
+ templ.depth0 = 1;
+ templ.array_size = depth;
+ templ.usage = PIPE_USAGE_DEFAULT;
+ templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+
+ if (depth > 1)
+ templ.target = PIPE_TEXTURE_2D_ARRAY;
+ else
+ templ.target = PIPE_TEXTURE_2D;
+
+ return screen->resource_create(screen, &templ);
+}
+
+/**
+ * Handle complex format conversions using 2 blits with a temporary texture
+ * in between, e.g. blitting from B10G10R10A2 to G16R16.
+ *
+ * This example is implemented this way:
+ * 1) First, blit from B10G10R10A2 to R10G10B10A2, which is canonical, so it
+ * can be reinterpreted as a different canonical format of the same bpp,
+ * such as R16G16. This blit only swaps R and B 10-bit components.
+ * 2) Finally, blit the result, which is R10G10B10A2, as R16G16 to G16R16.
+ * This blit only swaps R and G 16-bit components.
+ */
+static bool
+handle_complex_copy(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box,
+ enum pipe_format noncanon_format,
+ enum pipe_format canon_format)
+{
+ struct pipe_box temp_box;
+ struct pipe_resource *temp = NULL;
+ const struct util_format_description *src_desc, *dst_desc;
+ const struct util_format_description *canon_desc, *noncanon_desc;
+ bool src_is_canon;
+ bool src_is_noncanon;
+ bool dst_is_canon;
+ bool dst_is_noncanon;
+
+ src_desc = util_format_description(src->format);
+ dst_desc = util_format_description(dst->format);
+ canon_desc = util_format_description(canon_format);
+ noncanon_desc = util_format_description(noncanon_format);
+
+ src_is_canon = same_size_and_swizzle(src_desc, canon_desc);
+ dst_is_canon = same_size_and_swizzle(dst_desc, canon_desc);
+ src_is_noncanon = same_size_and_swizzle(src_desc, noncanon_desc);
+ dst_is_noncanon = same_size_and_swizzle(dst_desc, noncanon_desc);
+
+ if (src_is_noncanon) {
+ /* Simple case - only types differ (e.g. UNORM and UINT). */
+ if (dst_is_noncanon) {
+ blit(pipe, dst, noncanon_format, dst_level, dstx, dsty, dstz, src,
+ noncanon_format, src_level, src_box);
+ return true;
+ }
+
+ /* Simple case - only types and swizzles differ. */
+ if (dst_is_canon) {
+ blit(pipe, dst, canon_format, dst_level, dstx, dsty, dstz, src,
+ noncanon_format, src_level, src_box);
+ return true;
+ }
+
+ /* Use the temporary texture. Src is converted to a canonical format,
+ * then proceed the generic swizzled_copy.
+ */
+ temp = create_texture(pipe->screen, canon_format, src_box->width,
+ src_box->height, src_box->depth);
+
+ u_box_3d(0, 0, 0, src_box->width, src_box->height, src_box->depth,
+ &temp_box);
+
+ blit(pipe, temp, canon_format, 0, 0, 0, 0, src, noncanon_format,
+ src_level, src_box);
+ swizzled_copy(pipe, dst, dst_level, dstx, dsty, dstz, temp, 0,
+ &temp_box);
+ pipe_resource_reference(&temp, NULL);
+ return true;
+ }
+
+ if (dst_is_noncanon) {
+ /* Simple case - only types and swizzles differ. */
+ if (src_is_canon) {
+ blit(pipe, dst, noncanon_format, dst_level, dstx, dsty, dstz, src,
+ canon_format, src_level, src_box);
+ return true;
+ }
+
+ /* Use the temporary texture. First, use the generic copy, but use
+ * a canonical format in the destination. Then convert */
+ temp = create_texture(pipe->screen, canon_format, src_box->width,
+ src_box->height, src_box->depth);
+
+ u_box_3d(0, 0, 0, src_box->width, src_box->height, src_box->depth,
+ &temp_box);
+
+ swizzled_copy(pipe, temp, 0, 0, 0, 0, src, src_level, src_box);
+ blit(pipe, dst, noncanon_format, dst_level, dstx, dsty, dstz, temp,
+ canon_format, 0, &temp_box);
+ pipe_resource_reference(&temp, NULL);
+ return true;
+ }
+
+ return false;
+}
+
+static void
+copy_image(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ if (src->format == dst->format ||
+ util_format_is_compressed(src->format) ||
+ util_format_is_compressed(dst->format)) {
+ pipe->resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
+ return;
+ }
+
+ /* Copying to/from B10G10R10*2 needs 2 blits with R10G10B10A2
+ * as a temporary texture in between.
+ */
+ if (handle_complex_copy(pipe, dst, dst_level, dstx, dsty, dstz, src,
+ src_level, src_box, PIPE_FORMAT_B10G10R10A2_UINT,
+ PIPE_FORMAT_R10G10B10A2_UINT))
+ return;
+
+ /* Copying to/from G8R8 needs 2 blits with R8G8 as a temporary texture
+ * in between.
+ */
+ if (handle_complex_copy(pipe, dst, dst_level, dstx, dsty, dstz, src,
+ src_level, src_box, PIPE_FORMAT_G8R8_UNORM,
+ PIPE_FORMAT_R8G8_UNORM))
+ return;
+
+ /* Copying to/from G16R16 needs 2 blits with R16G16 as a temporary texture
+ * in between.
+ */
+ if (handle_complex_copy(pipe, dst, dst_level, dstx, dsty, dstz, src,
+ src_level, src_box, PIPE_FORMAT_G16R16_UNORM,
+ PIPE_FORMAT_R16G16_UNORM))
+ return;
+
+ /* Only allow non-identity swizzling on RGBA8 formats. */
+
+ /* Simple copy, memcpy with swizzling, no format conversion. */
+ swizzled_copy(pipe, dst, dst_level, dstx, dsty, dstz, src, src_level,
+ src_box);
+}
+
+static void
+st_CopyImageSubData(struct gl_context *ctx,
+ struct gl_texture_image *src_image,
+ struct gl_renderbuffer *src_renderbuffer,
+ int src_x, int src_y, int src_z,
+ struct gl_texture_image *dst_image,
+ struct gl_renderbuffer *dst_renderbuffer,
+ int dst_x, int dst_y, int dst_z,
+ int src_width, int src_height)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_resource *src_res, *dst_res;
+ struct pipe_box box;
+ int src_level, dst_level;
+
+ if (src_image) {
+ struct st_texture_image *src = st_texture_image(src_image);
+ src_res = src->pt;
+ src_level = src_image->Level;
+ src_z += src_image->Face;
+ } else {
+ struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer);
+ src_res = src->texture;
+ src_level = 0;
+ }
+
+ if (dst_image) {
+ struct st_texture_image *dst = st_texture_image(dst_image);
+ dst_res = dst->pt;
+ dst_level = dst_image->Level;
+ dst_z += dst_image->Face;
+ } else {
+ struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer);
+ dst_res = dst->texture;
+ dst_level = 0;
+ }
+
+ u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box);
+
+ copy_image(pipe, dst_res, dst_level, dst_x, dst_y, dst_z,
+ src_res, src_level, &box);
+}
+
+void
+st_init_copy_image_functions(struct dd_function_table *functions)
+{
+ functions->CopyImageSubData = st_CopyImageSubData;
+}
diff --git a/src/mesa/state_tracker/st_cb_copyimage.h b/src/mesa/state_tracker/st_cb_copyimage.h
new file mode 100644
index 00000000000..d17f35c0953
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_copyimage.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef ST_CB_COPY_IMAGE_H
+#define ST_CB_COPY_IMAGE_H
+
+struct dd_function_table;
+
+extern void
+st_init_copy_image_functions(struct dd_function_table *functions);
+
+#endif /* ST_CB_COPY_IMAGE_H */
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 5d25fed317e..d4c916e8057 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1873,55 +1873,6 @@ st_TextureView(struct gl_context *ctx,
return GL_TRUE;
}
-/* HACK: this is only enough for the most basic uses of CopyImage. Must fix
- * before actually exposing the extension.
- */
-static void
-st_CopyImageSubData(struct gl_context *ctx,
- struct gl_texture_image *src_image,
- struct gl_renderbuffer *src_renderbuffer,
- int src_x, int src_y, int src_z,
- struct gl_texture_image *dst_image,
- struct gl_renderbuffer *dst_renderbuffer,
- int dst_x, int dst_y, int dst_z,
- int src_width, int src_height)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct pipe_resource *src_res, *dst_res;
- struct pipe_box box;
- int src_level, dst_level;
-
- if (src_image) {
- struct st_texture_image *src = st_texture_image(src_image);
- src_res = src->pt;
- src_level = src_image->Level;
- }
- else {
- struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer);
- src_res = src->texture;
- src_level = 0;
- }
-
- if (dst_image) {
- struct st_texture_image *dst = st_texture_image(dst_image);
- dst_res = dst->pt;
- dst_level = dst_image->Level;
- }
- else {
- struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer);
- dst_res = dst->texture;
- dst_level = 0;
- }
-
- u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box);
- pipe->resource_copy_region(pipe, dst_res, dst_level,
- dst_x, dst_y, dst_z,
- src_res, src_level,
- &box);
-}
-
-
void
st_init_texture_functions(struct dd_function_table *functions)
{
@@ -1953,6 +1904,4 @@ st_init_texture_functions(struct dd_function_table *functions)
functions->AllocTextureStorage = st_AllocTextureStorage;
functions->TextureView = st_TextureView;
-
- functions->CopyImageSubData = st_CopyImageSubData;
}
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 5abb17385c2..6e20fd1fda2 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -44,6 +44,7 @@
#include "st_cb_bufferobjects.h"
#include "st_cb_clear.h"
#include "st_cb_condrender.h"
+#include "st_cb_copyimage.h"
#include "st_cb_drawpixels.h"
#include "st_cb_rasterpos.h"
#include "st_cb_drawtex.h"
@@ -430,6 +431,7 @@ void st_init_driver_functions(struct pipe_screen *screen,
st_init_bufferobject_functions(functions);
st_init_clear_functions(functions);
st_init_bitmap_functions(functions);
+ st_init_copy_image_functions(functions);
st_init_drawpixels_functions(functions);
st_init_rasterpos_functions(functions);
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index d4724b46e0a..bd7cbccc20c 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -439,6 +439,7 @@ void st_init_extensions(struct pipe_screen *screen,
{ o(ARB_base_instance), PIPE_CAP_START_INSTANCE },
{ o(ARB_buffer_storage), PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT },
{ o(ARB_color_buffer_float), PIPE_CAP_VERTEX_COLOR_UNCLAMPED },
+ { o(ARB_copy_image), PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS },
{ o(ARB_depth_clamp), PIPE_CAP_DEPTH_CLIP_DISABLE },
{ o(ARB_depth_texture), PIPE_CAP_TEXTURE_SHADOW_MAP },
{ o(ARB_draw_buffers_blend), PIPE_CAP_INDEP_BLEND_FUNC },
From 8e9c3070bf45cd33a77537c6769d422d2c9fa8c3 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 28 Oct 2015 11:03:21 -0600
Subject: [PATCH 138/266] mesa: minor formatting fix in
get_tex_rgba_compressed()
---
src/mesa/main/texgetimage.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 682b72755c7..945890aeeb5 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -297,8 +297,7 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions,
uint8_t rebaseSwizzle[4];
/* Decompress into temp float buffer, then pack into user buffer */
- tempImage = malloc(width * height * depth
- * 4 * sizeof(GLfloat));
+ tempImage = malloc(width * height * depth * 4 * sizeof(GLfloat));
if (!tempImage) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()");
return;
From 2bf224b3f96f926ba442d2b33a6f50d33e31cee0 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Tue, 27 Oct 2015 13:50:10 -0600
Subject: [PATCH 139/266] vbo: replace assertion with conditional in
vbo_compute_max_verts()
With just the right sequence of per-vertex commands and state changes,
it's possible for this assertion to fail (such as with viewperf11's
lightwave-06-1 test). Instead of asserting, return 0 so that the
caller knows the VBO is full and needs to be flushed.
Reviewed-by: Charmaine Lee
---
src/mesa/vbo/vbo_context.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h
index e6b9d890d5f..6293a8b9edc 100644
--- a/src/mesa/vbo/vbo_context.h
+++ b/src/mesa/vbo/vbo_context.h
@@ -207,7 +207,8 @@ vbo_compute_max_verts(const struct vbo_exec_context *exec)
{
unsigned n = (VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
(exec->vtx.vertex_size * sizeof(GLfloat));
- assert(n > 0);
+ if (n == 0)
+ return 0;
/* Subtract one so we're always sure to have room for an extra
* vertex for GL_LINE_LOOP -> GL_LINE_STRIP conversion.
*/
From 3bbb8715acd1cb85ea7aa7763c06cd12347a1a9a Mon Sep 17 00:00:00 2001
From: Julien Isorce
Date: Tue, 20 Oct 2015 17:34:23 +0100
Subject: [PATCH 140/266] nvc0: fix crash when nv50_miptree_from_handle fails
Signed-off-by: Julien Isorce
Reviewed-by: Samuel Pitoiset
---
src/gallium/drivers/nouveau/nvc0/nvc0_resource.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
index 12b5a025064..15c803c4307 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
@@ -26,7 +26,8 @@ nvc0_resource_from_handle(struct pipe_screen * screen,
} else {
struct pipe_resource *res = nv50_miptree_from_handle(screen,
templ, whandle);
- nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
+ if (res)
+ nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
return res;
}
}
From ebbd7b41c0be8666f47b06cb83ae5e5117088ea1 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Wed, 28 Oct 2015 16:18:18 -0400
Subject: [PATCH 141/266] nvc0: add ARB_copy_image support
Signed-off-by: Ilia Mirkin
---
docs/GL3.txt | 2 +-
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +-
src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 16 ++++++++++------
3 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 549c4dbfda7..7387ee2ee31 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -153,7 +153,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30)
GL_ARB_clear_buffer_object DONE (all drivers)
GL_ARB_compute_shader in progress (jljusten)
- GL_ARB_copy_image DONE (i965, radeonsi)
+ GL_ARB_copy_image DONE (i965, nvc0, radeonsi)
GL_KHR_debug DONE (all drivers)
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 1646a9dbcbe..422d6d45369 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -179,6 +179,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -203,7 +204,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
- case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index dbdf292c862..4d8b5e65848 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -225,10 +225,14 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
if (m2mf) {
+ struct nv50_miptree *src_mt = nv50_miptree(src);
+ struct nv50_miptree *dst_mt = nv50_miptree(dst);
struct nv50_m2mf_rect drect, srect;
unsigned i;
- unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
- unsigned ny = util_format_get_nblocksy(src->format, src_box->height);
+ unsigned nx = util_format_get_nblocksx(src->format, src_box->width)
+ << src_mt->ms_x;
+ unsigned ny = util_format_get_nblocksy(src->format, src_box->height)
+ << src_mt->ms_y;
nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz);
nv50_m2mf_rect_setup(&srect, src, src_level,
@@ -237,15 +241,15 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
for (i = 0; i < src_box->depth; ++i) {
nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny);
- if (nv50_miptree(dst)->layout_3d)
+ if (dst_mt->layout_3d)
drect.z++;
else
- drect.base += nv50_miptree(dst)->layer_stride;
+ drect.base += dst_mt->layer_stride;
- if (nv50_miptree(src)->layout_3d)
+ if (src_mt->layout_3d)
srect.z++;
else
- srect.base += nv50_miptree(src)->layer_stride;
+ srect.base += src_mt->layer_stride;
}
return;
}
From d0693d7515efb9a747b7fe1d0a21a1ca4bdf33a0 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Wed, 28 Oct 2015 20:52:50 -0400
Subject: [PATCH 142/266] nv50: add ARB_copy_image support
Signed-off-by: Ilia Mirkin
---
docs/GL3.txt | 2 +-
src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +-
src/gallium/drivers/nouveau/nv50/nv50_surface.c | 16 ++++++++++------
3 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 7387ee2ee31..d3743c7b4b7 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -153,7 +153,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30)
GL_ARB_clear_buffer_object DONE (all drivers)
GL_ARB_compute_shader in progress (jljusten)
- GL_ARB_copy_image DONE (i965, nvc0, radeonsi)
+ GL_ARB_copy_image DONE (i965, nv50, nvc0, radeonsi)
GL_KHR_debug DONE (all drivers)
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 2c8884edc2d..bcad90128d2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -180,6 +180,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -217,7 +218,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
- case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 64348b3c378..0b96e528070 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -220,10 +220,14 @@ nv50_resource_copy_region(struct pipe_context *pipe,
nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
if (m2mf) {
+ struct nv50_miptree *src_mt = nv50_miptree(src);
+ struct nv50_miptree *dst_mt = nv50_miptree(dst);
struct nv50_m2mf_rect drect, srect;
unsigned i;
- unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
- unsigned ny = util_format_get_nblocksy(src->format, src_box->height);
+ unsigned nx = util_format_get_nblocksx(src->format, src_box->width)
+ << src_mt->ms_x;
+ unsigned ny = util_format_get_nblocksy(src->format, src_box->height)
+ << src_mt->ms_y;
nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz);
nv50_m2mf_rect_setup(&srect, src, src_level,
@@ -232,15 +236,15 @@ nv50_resource_copy_region(struct pipe_context *pipe,
for (i = 0; i < src_box->depth; ++i) {
nv50_m2mf_transfer_rect(nv50, &drect, &srect, nx, ny);
- if (nv50_miptree(dst)->layout_3d)
+ if (dst_mt->layout_3d)
drect.z++;
else
- drect.base += nv50_miptree(dst)->layer_stride;
+ drect.base += dst_mt->layer_stride;
- if (nv50_miptree(src)->layout_3d)
+ if (src_mt->layout_3d)
srect.z++;
else
- srect.base += nv50_miptree(src)->layer_stride;
+ srect.base += src_mt->layer_stride;
}
return;
}
From 37402014e80f991169f9e05c1520667ba3930baf Mon Sep 17 00:00:00 2001
From: Laurent Carlier
Date: Wed, 28 Oct 2015 15:47:09 +0100
Subject: [PATCH 143/266] clover: fix building fix clang-3.8
https://bugs.freedesktop.org/show_bug.cgi?id=92705
v2.1: use Linker::Flags::None instead of 0 and emplace_back()
Signed-off-by: Laurent Carlier
Reviewed-by: Francisco Jerez
---
src/gallium/state_trackers/clover/llvm/invocation.cpp | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index d74b50df45a..3b37f0802b0 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -247,8 +247,12 @@ namespace {
// attribute. This attribute will prevent Clang from creating
// illegal uses of barrier() (e.g. Moving barrier() inside a conditional
// that is no executed by all threads) during its optimizaton passes.
+#if HAVE_LLVM >= 0x0308
+ c.getCodeGenOpts().LinkBitcodeFiles.emplace_back(llvm::Linker::Flags::None,
+ libclc_path);
+#else
c.getCodeGenOpts().LinkBitcodeFile = libclc_path;
-
+#endif
optimization_level = c.getCodeGenOpts().OptimizationLevel;
// Compile the code
From bf05af3f0e8769f417bbd995470dc1b8083a0df9 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Wed, 28 Oct 2015 00:53:20 -0700
Subject: [PATCH 144/266] i965: Fix missing BRW_NEW_*_PROG_DATA flagging caused
by cache reuse.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Consider the case of two nearly identical GLSL fragment shaders:
out vec4 color;
void main() { color = vec4(1); }
and
layout(early_fragment_tests) in;
out vec4 color;
void main() { color = vec4(1); }
These shaders compile to the exact same assembly, but have distinct
values for brw_wm_prog_data::early_fragment_tests.
Since these are two independent GLSL shaders, they have different
program keys - notably, brw_wm_prog_key::program_string_id differs.
When uploading the second, brw_upload_cache will find an existing copy
of the assembly in the cache BO, which means matching_data will be
non-NULL. Although we create a second cache item (with the new key
and prog_data), we set item->offset to the existing copy and avoid
re-uploading duplicate assembly.
However, brw_search_cache() would only flag BRW_NEW_*_PROG_DATA if
item->offset differed from the supplied offset. With reuse, both
programs have the same offset, but prog_data changed. We have to
flag it, but failed to.
To fix this, we simply need to check if the aux (prog_data) pointer
changed. If either the assembly or the prog_data differs, flag it.
This fixes a regression since 1bba29ed403e735ba0bf04ed8aa2e571884f,
where Topi fixed brw_upload_cache() to actually reuse identical
assembly. Prior to that, reuse basically never happened due to bugs.
Unfortunately, this code apparently wasn't prepared to handle reuse!
Fixes GPU hangs in Dolphin on Broadwell.
Huge thanks to Pierre Bourdon and Ilia Mirkin for debugging this
and helping track down the real issue.
Cc: "11.0"
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92623
Signed-off-by: Kenneth Graunke
Reviewed-by: Topi Pohjolainen
Reviewed-by: Kristian Høgsberg
Tested-by: Pierre Bourdon
---
src/mesa/drivers/dri/i965/brw_state.h | 2 +-
src/mesa/drivers/dri/i965/brw_state_cache.c | 7 ++++---
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index dc2b9415673..6fc9c14f6b4 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -220,7 +220,7 @@ bool brw_search_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_size,
- uint32_t *inout_offset, void *out_aux);
+ uint32_t *inout_offset, void *inout_aux);
void brw_state_cache_check_size( struct brw_context *brw );
void brw_init_caches( struct brw_context *brw );
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index 7f07bef253a..f7c0a2037d9 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -137,7 +137,7 @@ bool
brw_search_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key, GLuint key_size,
- uint32_t *inout_offset, void *out_aux)
+ uint32_t *inout_offset, void *inout_aux)
{
struct brw_context *brw = cache->brw;
struct brw_cache_item *item;
@@ -155,11 +155,12 @@ brw_search_cache(struct brw_cache *cache,
if (item == NULL)
return false;
- *(void **)out_aux = ((char *)item->key + item->key_size);
+ void *aux = ((char *) item->key) + item->key_size;
- if (item->offset != *inout_offset) {
+ if (item->offset != *inout_offset || aux != *((void **) inout_aux)) {
brw->ctx.NewDriverState |= (1 << cache_id);
*inout_offset = item->offset;
+ *((void **) inout_aux) = aux;
}
return true;
From 9c8208f2c15445b00ba170ff96a9ea4ce6c083e1 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Thu, 8 Oct 2015 23:53:47 -0700
Subject: [PATCH 145/266] nir: Add intrinsics for tessellation shader system
values.
nir_intrinsic_load_patch_vertices_in corresponds to gl_PatchVerticesIn,
a special input in both the TCS and TES stages.
nir_intrinsic_load_tess_coord corresponds to gl_TessCoord, a special
tessellation evaluation shader input.
nir_intrinsic_load_tess_level_outer/inner correspond to the
gl_TessLevelOuter[] and gl_TessLevelInner[] evaluation shader inputs,
which we treat as system values because they're stored specially.
(These intrinsics are only for the TES - the TCS uses output variables.)
Signed-off-by: Kenneth Graunke
Reviewed-by: Iago Toral Quiroga
---
src/glsl/nir/nir.c | 17 ++++++++++-------
src/glsl/nir/nir_intrinsics.h | 4 ++++
2 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 793bdafb54b..5f03095d673 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1557,12 +1557,14 @@ nir_intrinsic_from_system_value(gl_system_value val)
return nir_intrinsic_load_num_work_groups;
case SYSTEM_VALUE_PRIMITIVE_ID:
return nir_intrinsic_load_primitive_id;
- /* FINISHME: Add tessellation intrinsics.
case SYSTEM_VALUE_TESS_COORD:
- case SYSTEM_VALUE_VERTICES_IN:
+ return nir_intrinsic_load_tess_coord;
case SYSTEM_VALUE_TESS_LEVEL_OUTER:
+ return nir_intrinsic_load_tess_level_outer;
case SYSTEM_VALUE_TESS_LEVEL_INNER:
- */
+ return nir_intrinsic_load_tess_level_inner;
+ case SYSTEM_VALUE_VERTICES_IN:
+ return nir_intrinsic_load_patch_vertices_in;
default:
unreachable("system value does not directly correspond to intrinsic");
}
@@ -1598,13 +1600,14 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
return SYSTEM_VALUE_WORK_GROUP_ID;
case nir_intrinsic_load_primitive_id:
return SYSTEM_VALUE_PRIMITIVE_ID;
- /* FINISHME: Add tessellation intrinsics.
+ case nir_intrinsic_load_tess_coord:
return SYSTEM_VALUE_TESS_COORD;
- return SYSTEM_VALUE_VERTICES_IN;
- return SYSTEM_VALUE_PRIMITIVE_ID;
+ case nir_intrinsic_load_tess_level_outer:
return SYSTEM_VALUE_TESS_LEVEL_OUTER;
+ case nir_intrinsic_load_tess_level_inner:
return SYSTEM_VALUE_TESS_LEVEL_INNER;
- */
+ case nir_intrinsic_load_patch_vertices_in:
+ return SYSTEM_VALUE_VERTICES_IN;
default:
unreachable("intrinsic doesn't produce a system value");
}
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 49bf3b22aed..c1fb1bad4bc 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -198,6 +198,10 @@ SYSTEM_VALUE(sample_pos, 2, 0)
SYSTEM_VALUE(sample_mask_in, 1, 0)
SYSTEM_VALUE(primitive_id, 1, 0)
SYSTEM_VALUE(invocation_id, 1, 0)
+SYSTEM_VALUE(tess_coord, 3, 0)
+SYSTEM_VALUE(tess_level_outer, 4, 0)
+SYSTEM_VALUE(tess_level_inner, 2, 0)
+SYSTEM_VALUE(patch_vertices_in, 1, 0)
SYSTEM_VALUE(local_invocation_id, 3, 0)
SYSTEM_VALUE(work_group_id, 3, 0)
SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
From 77f58c04cc287b72e2b859d71591da158db6830a Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Fri, 2 Oct 2015 00:01:23 -0700
Subject: [PATCH 146/266] nir: Copy "patch" flag from ir_variable to
nir_variable.
This was introduced in GLSL IR after NIR development had branched.
Signed-off-by: Kenneth Graunke
Reviewed-by: Iago Toral Quiroga
---
src/glsl/nir/glsl_to_nir.cpp | 1 +
src/glsl/nir/nir.h | 1 +
src/glsl/nir/nir_print.c | 5 +++--
3 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 01f16d70eb1..7275b48eff0 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -303,6 +303,7 @@ nir_visitor::visit(ir_variable *ir)
var->data.read_only = ir->data.read_only;
var->data.centroid = ir->data.centroid;
var->data.sample = ir->data.sample;
+ var->data.patch = ir->data.patch;
var->data.invariant = ir->data.invariant;
var->data.location = ir->data.location;
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 04a21a7ead6..ac422514d52 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -171,6 +171,7 @@ typedef struct {
unsigned read_only:1;
unsigned centroid:1;
unsigned sample:1;
+ unsigned patch:1;
unsigned invariant:1;
/**
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 09663996869..30220c5e48d 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -228,12 +228,13 @@ print_var_decl(nir_variable *var, print_state *state)
const char *const cent = (var->data.centroid) ? "centroid " : "";
const char *const samp = (var->data.sample) ? "sample " : "";
+ const char *const patch = (var->data.patch) ? "patch " : "";
const char *const inv = (var->data.invariant) ? "invariant " : "";
const char *const mode[] = { "shader_in ", "shader_out ", "", "",
"uniform ", "shader_storage", "system " };
- fprintf(fp, "%s%s%s%s%s ",
- cent, samp, inv, mode[var->data.mode],
+ fprintf(fp, "%s%s%s%s%s%s ",
+ cent, samp, patch, inv, mode[var->data.mode],
glsl_interp_qualifier_name(var->data.interpolation));
glsl_print_type(var->type, fp);
From b75fff70d82474a571c59c2a3a01e4f9f02286a7 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Tue, 20 Oct 2015 18:03:40 -0400
Subject: [PATCH 147/266] nvc0: do upload-time fixups for interpolation
parameters
Unfortunately flatshading is an all-or-nothing proposition on nvc0,
while GL 3.0 calls for the ability to selectively specify explicit
interpolation parameters on gl_Color/gl_SecondaryColor which would
override the flatshading setting. This allows us to fix up the
interpolation settings after shader generation based on rasterizer
settings.
While we're at it, we can add support for dynamically forcing all
(non-flat) shader inputs to be interpolated per-sample, which allows
st/mesa to not generate variants for these.
Fixes the remaining failing glsl-1.30/execution/interpolation piglits.
Signed-off-by: Ilia Mirkin
---
.../drivers/nouveau/codegen/nv50_ir_driver.h | 5 ++
.../nouveau/codegen/nv50_ir_emit_gk110.cpp | 31 +++++++++++-
.../nouveau/codegen/nv50_ir_emit_gm107.cpp | 26 ++++++++++
.../nouveau/codegen/nv50_ir_emit_nvc0.cpp | 30 +++++++++++-
.../nouveau/codegen/nv50_ir_from_tgsi.cpp | 14 ++++--
.../nouveau/codegen/nv50_ir_target.cpp | 39 ++++++++++++++-
.../drivers/nouveau/codegen/nv50_ir_target.h | 21 ++++++++
.../drivers/nouveau/nvc0/nvc0_program.c | 25 +++++++++-
.../drivers/nouveau/nvc0/nvc0_program.h | 7 ++-
.../drivers/nouveau/nvc0/nvc0_screen.c | 4 +-
.../drivers/nouveau/nvc0/nvc0_screen.h | 1 +
.../drivers/nouveau/nvc0/nvc0_shader_state.c | 48 ++++++++++++++++++-
src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 3 --
.../nouveau/nvc0/nvc0_state_validate.c | 2 +-
.../drivers/nouveau/nvc0/nvc0_stateobj.h | 2 +-
15 files changed, 239 insertions(+), 19 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 2b9edcf9172..3cc467fdb13 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -99,6 +99,7 @@ struct nv50_ir_prog_info
uint8_t sourceRep; /* NV50_PROGRAM_IR */
const void *source;
void *relocData;
+ void *interpData;
struct nv50_ir_prog_symbol *syms;
uint16_t numSyms;
} bin;
@@ -198,6 +199,10 @@ extern void nv50_ir_relocate_code(void *relocData, uint32_t *code,
uint32_t libPos,
uint32_t dataPos);
+extern void
+nv50_ir_change_interp(void *interpData, uint32_t *code,
+ bool force_per_sample, bool flatshade);
+
/* obtain code that will be shared among programs */
extern void nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 8f1542959c9..d712c9c300a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1437,6 +1437,30 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i)
code[1] |= (i->ipa & 0xc) << (19 - 2);
}
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ int ipa = entry->ipa;
+ int reg = entry->reg;
+ int loc = entry->loc;
+
+ if (flatshade &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
+ ipa = NV50_IR_INTERP_FLAT;
+ reg = 0xff;
+ } else if (force_persample_interp &&
+ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ ipa |= NV50_IR_INTERP_CENTROID;
+ }
+ code[loc + 1] &= ~(0xf << 19);
+ code[loc + 1] |= (ipa & 0x3) << 21;
+ code[loc + 1] |= (ipa & 0xc) << (19 - 2);
+ code[loc + 0] &= ~(0xff << 23);
+ code[loc + 0] |= reg << 23;
+}
+
void
CodeEmitterGK110::emitINTERP(const Instruction *i)
{
@@ -1448,10 +1472,13 @@ CodeEmitterGK110::emitINTERP(const Instruction *i)
if (i->saturate)
code[1] |= 1 << 18;
- if (i->op == OP_PINTERP)
+ if (i->op == OP_PINTERP) {
srcId(i->src(1), 23);
- else
+ addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
+ } else {
code[0] |= 0xff << 23;
+ addInterp(i->ipa, 0xff, interpApply);
+ }
srcId(i->src(0).getIndirect(0), 10);
emitInterpMode(i);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 6e22788341f..a327d572470 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -2217,6 +2217,30 @@ CodeEmitterGM107::emitAL2P()
emitGPR (0x00, insn->def(0));
}
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ int ipa = entry->ipa;
+ int reg = entry->reg;
+ int loc = entry->loc;
+
+ if (flatshade &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
+ ipa = NV50_IR_INTERP_FLAT;
+ reg = 0xff;
+ } else if (force_persample_interp &&
+ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ ipa |= NV50_IR_INTERP_CENTROID;
+ }
+ code[loc + 1] &= ~(0xf << 0x14);
+ code[loc + 1] |= (ipa & 0x3) << 0x16;
+ code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
+ code[loc + 0] &= ~(0xff << 0x14);
+ code[loc + 0] |= reg << 0x14;
+}
+
void
CodeEmitterGM107::emitIPA()
{
@@ -2255,10 +2279,12 @@ CodeEmitterGM107::emitIPA()
emitGPR(0x14, insn->src(1));
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
emitGPR(0x27, insn->src(2));
+ addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
} else {
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
emitGPR(0x27, insn->src(1));
emitGPR(0x14);
+ addInterp(insn->ipa, 0xff, interpApply);
}
if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 6bf5219d346..fd103146c72 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1618,6 +1618,29 @@ CodeEmitterNVC0::emitInterpMode(const Instruction *i)
}
}
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ int ipa = entry->ipa;
+ int reg = entry->reg;
+ int loc = entry->loc;
+
+ if (flatshade &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
+ ipa = NV50_IR_INTERP_FLAT;
+ reg = 0x3f;
+ } else if (force_persample_interp &&
+ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ ipa |= NV50_IR_INTERP_CENTROID;
+ }
+ code[loc + 0] &= ~(0xf << 6);
+ code[loc + 0] |= ipa << 6;
+ code[loc + 0] &= ~(0x3f << 26);
+ code[loc + 0] |= reg << 26;
+}
+
void
CodeEmitterNVC0::emitINTERP(const Instruction *i)
{
@@ -1630,10 +1653,13 @@ CodeEmitterNVC0::emitINTERP(const Instruction *i)
if (i->saturate)
code[0] |= 1 << 5;
- if (i->op == OP_PINTERP)
+ if (i->op == OP_PINTERP) {
srcId(i->src(1), 26);
- else
+ addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
+ } else {
code[0] |= 0x3f << 26;
+ addInterp(i->ipa, 0x3f, interpApply);
+ }
srcId(i->src(0).getIndirect(0), 20);
} else {
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index c8efaf5947a..f27a78e715d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1054,7 +1054,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
default:
break;
}
- if (decl->Interp.Location || info->io.sampleInterp)
+ if (decl->Interp.Location)
info->in[i].centroid = 1;
}
@@ -1119,6 +1119,10 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_SEMANTIC_VERTEXID:
info->io.vertexId = first;
break;
+ case TGSI_SEMANTIC_SAMPLEID:
+ case TGSI_SEMANTIC_SAMPLEPOS:
+ info->io.sampleInterp = 1;
+ break;
default:
break;
}
@@ -1338,6 +1342,8 @@ private:
void handleINTERP(Value *dst0[4]);
+ uint8_t translateInterpMode(const struct nv50_ir_varying *var,
+ operation& op);
Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
@@ -1451,8 +1457,8 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
return sym;
}
-static inline uint8_t
-translateInterpMode(const struct nv50_ir_varying *var, operation& op)
+uint8_t
+Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
{
uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
@@ -1468,7 +1474,7 @@ translateInterpMode(const struct nv50_ir_varying *var, operation& op)
op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
? OP_PINTERP : OP_LINTERP;
- if (var->centroid)
+ if (var->centroid || info->io.sampleInterp)
mode |= NV50_IR_INTERP_CENTROID;
return mode;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index fe530c76b62..afc8ff1374f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -166,7 +166,7 @@ void Target::destroy(Target *targ)
delete targ;
}
-CodeEmitter::CodeEmitter(const Target *target) : targ(target)
+CodeEmitter::CodeEmitter(const Target *target) : targ(target), interpInfo(NULL)
{
}
@@ -388,6 +388,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
}
}
info->bin.relocData = emit->getRelocInfo();
+ info->bin.interpData = emit->getInterpInfo();
emitSymbolTable(info);
@@ -428,6 +429,29 @@ CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
return true;
}
+bool
+CodeEmitter::addInterp(int ipa, int reg, InterpApply apply)
+{
+ unsigned int n = interpInfo ? interpInfo->count : 0;
+
+ if (!(n % RELOC_ALLOC_INCREMENT)) {
+ size_t size = sizeof(InterpInfo) + n * sizeof(InterpEntry);
+ interpInfo = reinterpret_cast(
+ REALLOC(interpInfo, n ? size : 0,
+ size + RELOC_ALLOC_INCREMENT * sizeof(InterpEntry)));
+ if (!interpInfo)
+ return false;
+ if (n == 0)
+ memset(interpInfo, 0, sizeof(InterpInfo));
+ }
+ ++interpInfo->count;
+
+ interpInfo->entry[n] = InterpEntry(ipa, reg, codeSize >> 2);
+ interpInfo->apply = apply;
+
+ return true;
+}
+
void
RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
{
@@ -471,6 +495,19 @@ nv50_ir_relocate_code(void *relocData, uint32_t *code,
info->entry[i].apply(code, info);
}
+void
+nv50_ir_change_interp(void *interpData, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ nv50_ir::InterpInfo *info = reinterpret_cast(
+ interpData);
+
+ // force_persample_interp: all non-flat -> per-sample
+ // flatshade: all color -> flat
+ for (unsigned i = 0; i < info->count; ++i)
+ info->apply(&info->entry[i], code, force_persample_interp, flatshade);
+}
+
void
nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
index 591916eb412..4e33997e1c1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
@@ -58,6 +58,23 @@ struct RelocInfo
RelocEntry entry[0];
};
+struct InterpEntry
+{
+ InterpEntry(int ipa, int reg, int loc) : ipa(ipa), reg(reg), loc(loc) {}
+ uint32_t ipa:4; // SC mode used to identify colors
+ uint32_t reg:8; // The reg used for perspective division
+ uint32_t loc:20; // Let's hope we don't have more than 1M-sized shaders
+};
+
+typedef void (*InterpApply)(const InterpEntry*, uint32_t*, bool, bool);
+
+struct InterpInfo
+{
+ uint32_t count;
+ InterpApply apply;
+ InterpEntry entry[0];
+};
+
class CodeEmitter
{
public:
@@ -78,6 +95,9 @@ public:
inline void *getRelocInfo() const { return relocInfo; }
+ bool addInterp(int ipa, int reg, InterpApply apply);
+ inline void *getInterpInfo() const { return interpInfo; }
+
virtual void prepareEmission(Program *);
virtual void prepareEmission(Function *);
virtual void prepareEmission(BasicBlock *);
@@ -92,6 +112,7 @@ protected:
uint32_t codeSizeLimit;
RelocInfo *relocInfo;
+ InterpInfo *interpInfo;
};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index a168dd684ab..b70b08a4a1e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -424,6 +424,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
for (i = 0; i < info->numInputs; ++i) {
m = nvc0_hdr_interp_mode(&info->in[i]);
+ if (info->in[i].sn == TGSI_SEMANTIC_COLOR) {
+ fp->fp.colors |= 1 << info->in[i].si;
+ if (info->in[i].sc)
+ fp->fp.color_interp[info->in[i].si] = m | (info->in[i].mask << 4);
+ }
for (c = 0; c < 4; ++c) {
if (!(info->in[i].mask & (1 << c)))
continue;
@@ -531,7 +536,6 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
info->io.genUserClip = prog->vp.num_ucps;
info->io.ucpBase = 256;
info->io.ucpCBSlot = 15;
- info->io.sampleInterp = prog->fp.sample_interp;
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
@@ -575,6 +579,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
prog->immd_data = info->immd.buf;
prog->immd_size = info->immd.bufSize;
prog->relocs = info->bin.relocData;
+ prog->interps = info->bin.interpData;
prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
prog->num_barriers = info->numBarriers;
@@ -713,6 +718,23 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
if (prog->relocs)
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
+ if (prog->interps) {
+ nv50_ir_change_interp(prog->interps, prog->code,
+ prog->fp.force_persample_interp,
+ prog->fp.flatshade);
+ for (int i = 0; i < 2; i++) {
+ unsigned mask = prog->fp.color_interp[i] >> 4;
+ unsigned interp = prog->fp.color_interp[i] & 3;
+ if (!mask)
+ continue;
+ prog->hdr[14] &= ~(0xff << (8 * i));
+ if (prog->fp.flatshade)
+ interp = NVC0_INTERP_FLAT;
+ for (int c = 0; c < 4; c++)
+ if (mask & (1 << c))
+ prog->hdr[14] |= interp << (2 * (4 * i + c));
+ }
+ }
#ifdef DEBUG
if (debug_get_bool_option("NV50_PROG_DEBUG", false))
@@ -773,6 +795,7 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
FREE(prog->code); /* may be 0 for hardcoded shaders */
FREE(prog->immd_data);
FREE(prog->relocs);
+ FREE(prog->interps);
if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms)
FREE(prog->cp.syms);
if (prog->tfb) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index 390e0c7a4f0..9c45e7b3e31 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -45,8 +45,10 @@ struct nvc0_program {
} vp;
struct {
uint8_t early_z;
- uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
- uint8_t sample_interp;
+ uint8_t colors;
+ uint8_t color_interp[2];
+ bool force_persample_interp;
+ bool flatshade;
} fp;
struct {
uint32_t tess_mode; /* ~0 if defined by the other stage */
@@ -61,6 +63,7 @@ struct nvc0_program {
uint8_t num_barriers;
void *relocs;
+ void *interps;
struct nvc0_transform_feedback_state *tfb;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 422d6d45369..d48c32d2b93 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -180,6 +180,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -202,7 +203,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
- case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
@@ -828,6 +828,8 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 1);
BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1);
PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
+ PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH);
if (screen->eng3d->oclass < NVE4_3D_CLASS) {
BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1);
PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 857eb0316c7..8b73102b98b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -38,6 +38,7 @@ struct nvc0_graph_state {
uint32_t constant_elts;
int32_t index_bias;
uint16_t scissor;
+ bool flatshade;
uint8_t patch_vertices;
uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
uint8_t num_vtxbufs;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index af837fc4a33..8595800592c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -107,8 +107,54 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *fp = nvc0->fragprog;
+ struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
- fp->fp.sample_interp = nvc0->min_samples > 1;
+ if (fp->fp.force_persample_interp != rast->force_persample_interp) {
+ /* Force the program to be reuploaded, which will trigger interp fixups
+ * to get applied
+ */
+ if (fp->mem)
+ nouveau_heap_free(&fp->mem);
+
+ fp->fp.force_persample_interp = rast->force_persample_interp;
+ }
+
+ /* Shade model works well enough when both colors follow it. However if one
+ * (or both) is explicitly set, then we have to go the patching route.
+ */
+ bool has_explicit_color = fp->fp.colors &&
+ (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) ||
+ ((fp->fp.colors & 2) && !fp->fp.color_interp[1]));
+ bool hwflatshade = false;
+ if (has_explicit_color && fp->fp.flatshade != rast->flatshade) {
+ /* Force re-upload */
+ if (fp->mem)
+ nouveau_heap_free(&fp->mem);
+
+ fp->fp.flatshade = rast->flatshade;
+
+ /* Always smooth-shade in this mode, the shader will decide on its own
+ * when to flat-shade.
+ */
+ } else if (!has_explicit_color) {
+ hwflatshade = rast->flatshade;
+
+ /* No need to binary-patch the shader each time, make sure that it's set
+ * up for the default behaviour.
+ */
+ fp->fp.flatshade = 0;
+ }
+
+ if (hwflatshade != nvc0->state.flatshade) {
+ nvc0->state.flatshade = hwflatshade;
+ BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
+ PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT :
+ NVC0_3D_SHADE_MODEL_SMOOTH);
+ }
+
+ if (fp->mem && !(nvc0->dirty & NVC0_NEW_FRAGPROG)) {
+ return;
+ }
if (!nvc0_program_validate(nvc0, fp))
return;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 742bef39247..e95554df12e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -212,9 +212,6 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe,
* always emit 16 commands, one for each scissor rectangle, here.
*/
- SB_BEGIN_3D(so, SHADE_MODEL, 1);
- SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
- NVC0_3D_SHADE_MODEL_SMOOTH);
SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first);
SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index aec06097bbd..f86de31c889 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -645,7 +645,7 @@ static struct state_validate {
{ nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
{ nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR },
{ nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
- { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
+ { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG | NVC0_NEW_RASTERIZER },
{ nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
NVC0_NEW_RASTERIZER },
{ nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
index 8bc33c6a0e0..f9680f5a90f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
@@ -23,7 +23,7 @@ struct nvc0_blend_stateobj {
struct nvc0_rasterizer_stateobj {
struct pipe_rasterizer_state pipe;
int size;
- uint32_t state[44];
+ uint32_t state[42];
};
struct nvc0_zsa_stateobj {
From a5bae7b31d642ad55bc7bed4f2130788413a1d21 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Tue, 20 Oct 2015 18:50:54 -0400
Subject: [PATCH 148/266] nvc0: share shaders between contexts and build
immediately
Avoid deferring building shaders until draw time, should hopefully
reduce any stuttering, as well as enable shader-db style analysis.
Signed-off-by: Ilia Mirkin
---
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +-
src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 3 +++
src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 3 +++
3 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index d48c32d2b93..6aa4f0b5f24 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -181,6 +181,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -203,7 +204,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
- case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index e95554df12e..ba1714da010 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -680,6 +680,9 @@ nvc0_sp_state_create(struct pipe_context *pipe,
if (cso->stream_output.num_outputs)
prog->pipe.stream_output = cso->stream_output;
+ prog->translated = nvc0_program_translate(
+ prog, nvc0_context(pipe)->screen->base.device->chipset);
+
return (void *)prog;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index f86de31c889..205e7dc6ae9 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -606,6 +606,9 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1;
}
+ /* Reset tfb as the shader that owns it may have been deleted. */
+ ctx_to->state.tfb = NULL;
+
if (!ctx_to->vertex)
ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS);
if (!ctx_to->idxbuf.buffer)
From 5227e915803079e5e72a0b2fde3a11d62af8df99 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Fri, 23 Oct 2015 23:25:33 -0400
Subject: [PATCH 149/266] nv50/ir: adapt to new method for passing in cull/clip
distance masks
Signed-off-by: Ilia Mirkin
---
src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 1 -
src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h | 5 ++---
.../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 12 ++++++++----
src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 10 ++++------
4 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index cce60550ae5..6ad9dd31681 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1128,7 +1128,6 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
info->prop.gp.instanceCount = 1;
info->prop.gp.maxVertices = 1;
}
- info->io.clipDistance = 0xff;
info->io.pointSize = 0xff;
info->io.instanceId = 0xff;
info->io.vertexId = 0xff;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 3cc467fdb13..391130e67af 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -155,9 +155,8 @@ struct nv50_ir_prog_info
uint8_t numBarriers;
struct {
- uint8_t clipDistance; /* index of first clip distance output */
- uint8_t clipDistanceMask; /* mask of clip distances defined */
- uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */
+ uint8_t clipDistances; /* number of clip distance outputs */
+ uint8_t cullDistances; /* number of cull distance outputs */
int8_t genUserClip; /* request user clip planes for ClipVertex */
uint16_t ucpBase; /* base address for UCPs */
uint8_t ucpCBSlot; /* constant buffer index of UCP data */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index f27a78e715d..eb28622eb59 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -910,7 +910,7 @@ bool Source::scanSource()
info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
if (info->io.genUserClip > 0) {
- info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1;
+ info->io.clipDistances = info->io.genUserClip;
const unsigned int nOut = (info->io.genUserClip + 3) / 4;
@@ -919,7 +919,7 @@ bool Source::scanSource()
info->out[i].id = i;
info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
info->out[i].si = n;
- info->out[i].mask = info->io.clipDistanceMask >> (n * 4);
+ info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
}
}
@@ -969,6 +969,12 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
else
info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
break;
+ case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
+ info->io.clipDistances = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
+ info->io.cullDistances = prop->u[0].Data;
+ break;
default:
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
break;
@@ -1086,8 +1092,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
clipVertexOutput = i;
break;
case TGSI_SEMANTIC_CLIPDIST:
- info->io.clipDistanceMask |=
- decl->Declaration.UsageMask << (si * 4);
info->io.genUserClip = -1;
break;
case TGSI_SEMANTIC_SAMPLEMASK:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index b70b08a4a1e..68048f9d6c0 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -252,10 +252,10 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
}
}
- vp->vp.clip_enable = info->io.clipDistanceMask;
- for (i = 0; i < 8; ++i)
- if (info->io.cullDistanceMask & (1 << i))
- vp->vp.clip_mode |= 1 << (i * 4);
+ vp->vp.clip_enable =
+ (1 << (info->io.clipDistances + info->io.cullDistances)) - 1;
+ for (i = 0; i < info->io.cullDistances; ++i)
+ vp->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4);
if (info->io.genUserClip < 0)
vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */
@@ -269,8 +269,6 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
vp->hdr[0] = 0x20061 | (1 << 10);
vp->hdr[4] = 0xff000;
- vp->hdr[18] = info->io.clipDistanceMask;
-
return nvc0_vtgp_gen_header(vp, info);
}
From 6ae47a3eb4e30fa92dd60e4a16e016e8585c3c48 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Mon, 26 Oct 2015 01:02:18 -0700
Subject: [PATCH 150/266] i965: Update stale comment about unused VUE map
slots.
I changed this from COUNT to PAD in commit 268008f98c3810b9f276df985dc93ef.
Signed-off-by: Kenneth Graunke
Reviewed-by: Chris Forbes
---
src/mesa/drivers/dri/i965/brw_compiler.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index bb2fbd4a7b2..91eabaf7787 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -441,9 +441,7 @@ struct brw_vue_map {
* directly correspond to a gl_varying_slot, the value comes from
* brw_varying_slot.
*
- * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
- * simplifies code that uses the value stored in slot_to_varying to
- * create a bit mask).
+ * For slots that are not in use, the value is BRW_VARYING_SLOT_PAD.
*/
signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
From b94cdcdada251bb8e866cb7af0f2ff222b55a918 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Mon, 26 Oct 2015 01:03:12 -0700
Subject: [PATCH 151/266] i965/fs: Properly check for PAD in fragment shaders
with > 16 varyings.
Commit 268008f98c3810b9f276df985dc93efc0c49f33e changed unused VUE map
slots to be initialized with BRW_VARYING_SLOT_PAD, not COUNT. I missed
updating this. It also means that commit message was wrong, as some
code *did* rely slots being initialized to COUNT.
This may fix a bug with SSO programs with > 16 FS input varyings.
I think we probably just emitted extra pointless code, but probably
didn't break anything. We might also just have no tests for that.
Signed-off-by: Kenneth Graunke
Reviewed-by: Chris Forbes
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 75f1a926c1d..2eef7af4e11 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1456,10 +1456,7 @@ fs_visitor::calculate_urb_setup()
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
slot++) {
int varying = prev_stage_vue_map.slot_to_varying[slot];
- /* Note that varying == BRW_VARYING_SLOT_COUNT when a slot is
- * unused.
- */
- if (varying != BRW_VARYING_SLOT_COUNT &&
+ if (varying != BRW_VARYING_SLOT_PAD &&
(nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
BITFIELD64_BIT(varying))) {
prog_data->urb_setup[varying] = slot - first_slot;
From b3d19d20f27c9169641051ac6f855481e58623b3 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Mon, 26 Oct 2015 00:14:13 -0700
Subject: [PATCH 152/266] glsl: Mark gl_ViewportIndex and gl_Layer varyings as
flat.
Integer varyings need to be flat qualified - all others were already.
I think we just missed this. Presumably some hardware passes this via
sideband and ignores attribute interpolation, so no one has noticed.
Signed-off-by: Kenneth Graunke
Reviewed-by: Chris Forbes
---
src/glsl/builtin_variables.cpp | 37 +++++++++++++++++++++++-----------
1 file changed, 25 insertions(+), 12 deletions(-)
diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index a6ad1050552..925ffa165bb 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -887,16 +887,22 @@ builtin_variable_generator::generate_uniforms()
void
builtin_variable_generator::generate_vs_special_vars()
{
+ ir_variable *var;
+
if (state->is_version(130, 300))
add_system_value(SYSTEM_VALUE_VERTEX_ID, int_t, "gl_VertexID");
if (state->ARB_draw_instanced_enable)
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceIDARB");
if (state->ARB_draw_instanced_enable || state->is_version(140, 300))
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceID");
- if (state->AMD_vertex_shader_layer_enable)
- add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
- if (state->AMD_vertex_shader_viewport_index_enable)
- add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ if (state->AMD_vertex_shader_layer_enable) {
+ var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ }
+ if (state->AMD_vertex_shader_viewport_index_enable) {
+ var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ }
if (compatibility) {
add_input(VERT_ATTRIB_POS, vec4_t, "gl_Vertex");
add_input(VERT_ATTRIB_NORMAL, vec3_t, "gl_Normal");
@@ -954,9 +960,14 @@ builtin_variable_generator::generate_tes_special_vars()
void
builtin_variable_generator::generate_gs_special_vars()
{
- add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
- if (state->is_version(410, 0) || state->ARB_viewport_array_enable)
- add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ ir_variable *var;
+
+ var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ if (state->is_version(410, 0) || state->ARB_viewport_array_enable) {
+ var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ }
if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable)
add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID");
@@ -970,7 +981,6 @@ builtin_variable_generator::generate_gs_special_vars()
* the specific case of gl_PrimitiveIDIn. So we don't need to treat
* gl_PrimitiveIDIn as an {ARB,EXT}_geometry_shader4-only variable.
*/
- ir_variable *var;
var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveIDIn");
var->data.interpolation = INTERP_QUALIFIER_FLAT;
var = add_output(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID");
@@ -984,14 +994,15 @@ builtin_variable_generator::generate_gs_special_vars()
void
builtin_variable_generator::generate_fs_special_vars()
{
+ ir_variable *var;
+
add_input(VARYING_SLOT_POS, vec4_t, "gl_FragCoord");
add_input(VARYING_SLOT_FACE, bool_t, "gl_FrontFacing");
if (state->is_version(120, 100))
add_input(VARYING_SLOT_PNTC, vec2_t, "gl_PointCoord");
if (state->is_version(150, 0)) {
- ir_variable *var =
- add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID");
+ var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID");
var->data.interpolation = INTERP_QUALIFIER_FLAT;
}
@@ -1043,8 +1054,10 @@ builtin_variable_generator::generate_fs_special_vars()
}
if (state->is_version(430, 0) || state->ARB_fragment_layer_viewport_enable) {
- add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer");
- add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ var = add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer");
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ var = add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
}
}
From 5392328a3218c0a69301badefe8fed79ca0d8642 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Mon, 26 Oct 2015 00:52:14 -0700
Subject: [PATCH 153/266] i965: Make calculate_attr_overrides return the URB
read offset.
Traditionally, we've hardcoded "URB Entry Read Offset" to 1 (which
represents 2 vec4 varying slots) to skip over the 8 DWord VUE header.
In order to support ARB_fragment_layer_viewport, we'll need to read
from that header. This patch adds the basic plumbing necessary to
calculate a value dynamically and hook it up in the SBE packets.
Signed-off-by: Kenneth Graunke
Reviewed-by: Chris Forbes
---
src/mesa/drivers/dri/i965/brw_state.h | 3 ++-
src/mesa/drivers/dri/i965/gen6_sf_state.c | 13 ++++++++-----
src/mesa/drivers/dri/i965/gen7_sf_state.c | 5 +++--
src/mesa/drivers/dri/i965/gen8_sf_state.c | 6 ++++--
4 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 6fc9c14f6b4..78389011015 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -345,7 +345,8 @@ calculate_attr_overrides(const struct brw_context *brw,
uint16_t *attr_overrides,
uint32_t *point_sprite_enables,
uint32_t *flat_enables,
- uint32_t *urb_entry_read_length);
+ uint32_t *urb_entry_read_length,
+ uint32_t *urb_entry_read_offset);
/* gen6_surface_state.c */
void gen6_init_vtable_surface_functions(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 4068f2844a2..0c8c05304f5 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -159,14 +159,16 @@ calculate_attr_overrides(const struct brw_context *brw,
uint16_t *attr_overrides,
uint32_t *point_sprite_enables,
uint32_t *flat_enables,
- uint32_t *urb_entry_read_length)
+ uint32_t *urb_entry_read_length,
+ uint32_t *urb_entry_read_offset)
{
- const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
uint32_t max_source_attr = 0;
*point_sprite_enables = 0;
*flat_enables = 0;
+ *urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
+
/* _NEW_LIGHT */
bool shade_model_flat = brw->ctx.Light.ShadeModel == GL_FLAT;
@@ -228,7 +230,7 @@ calculate_attr_overrides(const struct brw_context *brw,
/* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
uint16_t attr_override = point_sprite ? 0 :
get_attr_override(&brw->vue_map_geom_out,
- urb_entry_read_offset, attr,
+ *urb_entry_read_offset, attr,
brw->ctx.VertexProgram._TwoSideEnabled,
&max_source_attr);
@@ -276,7 +278,6 @@ upload_sf_state(struct brw_context *brw)
bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
- const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
float point_size;
uint16_t attr_overrides[16];
uint32_t point_sprite_origin;
@@ -411,8 +412,10 @@ upload_sf_state(struct brw_context *brw)
* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
*/
uint32_t urb_entry_read_length;
+ uint32_t urb_entry_read_offset;
calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
- &flat_enables, &urb_entry_read_length);
+ &flat_enables, &urb_entry_read_length,
+ &urb_entry_read_offset);
dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c
index 698b3d491bc..b1f13aceba4 100644
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -40,7 +40,6 @@ upload_sbe_state(struct brw_context *brw)
uint32_t point_sprite_enables;
uint32_t flat_enables;
int i;
- const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
uint16_t attr_overrides[16];
/* _NEW_BUFFERS */
bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
@@ -65,8 +64,10 @@ upload_sbe_state(struct brw_context *brw)
* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
*/
uint32_t urb_entry_read_length;
+ uint32_t urb_entry_read_offset;
calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
- &flat_enables, &urb_entry_read_length);
+ &flat_enables, &urb_entry_read_length,
+ &urb_entry_read_offset);
dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c
index 6b655ee493e..8b6f31f3be6 100644
--- a/src/mesa/drivers/dri/i965/gen8_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c
@@ -37,6 +37,7 @@ upload_sbe(struct brw_context *brw)
uint32_t num_outputs = brw->wm.prog_data->num_varying_inputs;
uint16_t attr_overrides[VARYING_SLOT_MAX];
uint32_t urb_entry_read_length;
+ uint32_t urb_entry_read_offset;
uint32_t point_sprite_enables;
uint32_t flat_enables;
int sbe_cmd_length;
@@ -66,7 +67,8 @@ upload_sbe(struct brw_context *brw)
calculate_attr_overrides(brw, attr_overrides,
&point_sprite_enables,
&flat_enables,
- &urb_entry_read_length);
+ &urb_entry_read_length,
+ &urb_entry_read_offset);
/* Typically, the URB entry read length and offset should be programmed in
* 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active stage
@@ -78,7 +80,7 @@ upload_sbe(struct brw_context *brw)
*/
dw1 |=
urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
- BRW_SF_URB_ENTRY_READ_OFFSET << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT |
+ urb_entry_read_offset << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT |
GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH |
GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET;
From 8c902a580a490181e7cde29073b11181db4614f8 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Wed, 17 Jun 2015 13:06:18 -0700
Subject: [PATCH 154/266] i965: Implement ARB_fragment_layer_viewport.
Normally, we could read gl_Layer from bits 26:16 of R0.0. However, the
specification requires that bogus out-of-range 32-bit values written by
previous stages need to appear in the fragment shader as-written.
Instead, we pass in the full 32-bit value from the VUE header as an
extra flat-shaded varying. We have the SF override the value to 0
when the previous stage didn't actually write a value (it's actually
defined to return 0).
Signed-off-by: Kenneth Graunke
Reviewed-by: Chris Forbes
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 7 ++++-
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 8 +++++
src/mesa/drivers/dri/i965/gen6_sf_state.c | 31 ++++++++++++++++++++
src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
4 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 2eef7af4e11..5a82b041370 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1442,6 +1442,9 @@ fs_visitor::calculate_urb_setup()
}
}
} else {
+ bool include_vue_header =
+ nir->info.inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+
/* We have enough input varyings that the SF/SBE pipeline stage can't
* arbitrarily rearrange them to suit our whim; we have to put them
* in an order that matches the output of the previous pipeline stage
@@ -1451,7 +1454,9 @@ fs_visitor::calculate_urb_setup()
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
key->input_slots_valid,
nir->info.separate_shader);
- int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
+ int first_slot =
+ include_vue_header ? 0 : 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
+
assert(prev_stage_vue_map.num_slots <= first_slot + 32);
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
slot++) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 4950ba4fe67..9c1f95c2229 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -71,6 +71,14 @@ fs_visitor::nir_setup_inputs()
var->data.origin_upper_left);
emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
input, reg), 0xF);
+ } else if (var->data.location == VARYING_SLOT_LAYER) {
+ struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_LAYER, 1), 3);
+ reg.type = BRW_REGISTER_TYPE_D;
+ bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg);
+ } else if (var->data.location == VARYING_SLOT_VIEWPORT) {
+ struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_VIEWPORT, 2), 3);
+ reg.type = BRW_REGISTER_TYPE_D;
+ bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg);
} else {
emit_general_interpolation(input, var->name, var->type,
(glsl_interp_qualifier) var->data.interpolation,
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 0c8c05304f5..2634e6ba6fd 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -60,6 +60,23 @@ get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
/* Find the VUE slot for this attribute. */
int slot = vue_map->varying_to_slot[fs_attr];
+ /* Viewport and Layer are stored in the VUE header. We need to override
+ * them to zero if earlier stages didn't write them, as GL requires that
+ * they read back as zero when not explicitly set.
+ */
+ if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
+ unsigned override =
+ ATTRIBUTE_0_OVERRIDE_X | ATTRIBUTE_0_OVERRIDE_W |
+ ATTRIBUTE_CONST_0000 << ATTRIBUTE_0_CONST_SOURCE_SHIFT;
+
+ if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
+ override |= ATTRIBUTE_0_OVERRIDE_Y;
+ if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
+ override |= ATTRIBUTE_0_OVERRIDE_Z;
+
+ return override;
+ }
+
/* If there was only a back color written but not front, use back
* as the color instead of undefined
*/
@@ -169,6 +186,20 @@ calculate_attr_overrides(const struct brw_context *brw,
*urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
+ /* BRW_NEW_FRAGMENT_PROGRAM
+ *
+ * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
+ * the full vertex header. Otherwise, we can program the SF to start
+ * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
+ * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
+ * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
+ */
+
+ bool fs_needs_vue_header = brw->fragment_program->Base.InputsRead &
+ (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+
+ *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
+
/* _NEW_LIGHT */
bool shade_model_flat = brw->ctx.Light.ShadeModel == GL_FLAT;
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index c6826d63dde..4433410b412 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -287,6 +287,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_conditional_render_inverted = true;
ctx->Extensions.ARB_draw_buffers_blend = true;
ctx->Extensions.ARB_ES3_compatibility = true;
+ ctx->Extensions.ARB_fragment_layer_viewport = true;
ctx->Extensions.ARB_sample_shading = true;
ctx->Extensions.ARB_shading_language_420pack = true;
ctx->Extensions.ARB_shading_language_packing = true;
From cf93251bedc831dc252afaec82d654de3cbd9a48 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Wed, 28 Oct 2015 22:02:39 -0700
Subject: [PATCH 155/266] docs: Mark GL_ARB_fragment_layer_viewport as done on
i965.
---
docs/GL3.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/GL3.txt b/docs/GL3.txt
index d3743c7b4b7..7964a5e0c40 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -156,7 +156,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_copy_image DONE (i965, nv50, nvc0, radeonsi)
GL_KHR_debug DONE (all drivers)
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
- GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
+ GL_ARB_fragment_layer_viewport DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
GL_ARB_framebuffer_no_attachments DONE (i965)
GL_ARB_internalformat_query2 not started
GL_ARB_invalidate_subdata DONE (all drivers)
From 85f1f0441326ef3f49a4edeac474599db471ef7f Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez
Date: Thu, 22 Oct 2015 10:07:54 +0200
Subject: [PATCH 156/266] glsl: fix GL_BUFFER_DATA_SIZE value for shader
storage blocks with unsize arrays
From ARB_program_interface_query:
"For the property of BUFFER_DATA_SIZE, then the implementation-dependent
minimum total buffer object size, in basic machine units, required to hold
all active variables associated with an active uniform block, shader
storage block, or atomic counter buffer is written to . If the
final member of an active shader storage block is array with no declared
size, the minimum buffer size is computed assuming the array was declared
as an array with one element."
Fixes the following dEQP-GLES31 tests:
dEQP-GLES31.functional.program_interface_query.shader_storage_block.buffer_data_size.named_block
dEQP-GLES31.functional.program_interface_query.shader_storage_block.buffer_data_size.unnamed_block
dEQP-GLES31.functional.program_interface_query.shader_storage_block.buffer_data_size.block_array
v2:
- Fix comment's indentation and explain that the parser already
checked that unsized array is in last element of a shader
storage block (Iago).
- Add assert (Iago).
Signed-off-by: Samuel Iglesias Gonsalvez
Reviewed-by: Iago Toral Quiroga
---
src/glsl/link_uniform_blocks.cpp | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp
index 5285d8d01e4..d5d30bb0a0d 100644
--- a/src/glsl/link_uniform_blocks.cpp
+++ b/src/glsl/link_uniform_blocks.cpp
@@ -100,7 +100,7 @@ private:
virtual void visit_field(const glsl_type *type, const char *name,
bool row_major, const glsl_type *,
const unsigned packing,
- bool /* last_field */)
+ bool last_field)
{
assert(this->index < this->num_variables);
@@ -131,12 +131,28 @@ private:
unsigned alignment = 0;
unsigned size = 0;
+ /* From ARB_program_interface_query:
+ *
+ * "If the final member of an active shader storage block is array
+ * with no declared size, the minimum buffer size is computed
+ * assuming the array was declared as an array with one element."
+ *
+ * For that reason, we use the base type of the unsized array to calculate
+ * its size. We don't need to check if the unsized array is the last member
+ * of a shader storage block (that check was already done by the parser).
+ */
+ const glsl_type *type_for_size = type;
+ if (type->is_unsized_array()) {
+ assert(last_field);
+ type_for_size = type->without_array();
+ }
+
if (packing == GLSL_INTERFACE_PACKING_STD430) {
alignment = type->std430_base_alignment(v->RowMajor);
- size = type->std430_size(v->RowMajor);
+ size = type_for_size->std430_size(v->RowMajor);
} else {
alignment = type->std140_base_alignment(v->RowMajor);
- size = type->std140_size(v->RowMajor);
+ size = type_for_size->std140_size(v->RowMajor);
}
this->offset = glsl_align(this->offset, alignment);
From 6ce0857e30a8484f1b190ccf37631d64e629a468 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?=
Date: Tue, 27 Oct 2015 13:18:41 +0200
Subject: [PATCH 157/266] mesa: add fragdata_arrays list to gl_shader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is required to store information about fragdata arrays, currently
these variables get lost and cannot be retrieved later in sensible way
for program interface queries. List will be utilized by next patch.
Patch also modifies opt_dead_builtin_varyings pass to build list when
lowering fragdata arrays. This is identical approach as taken with
packed varyings pass.
Signed-off-by: Tapani Pälli
Reviewed-by: Marta Lofstedt
---
src/glsl/opt_dead_builtin_varyings.cpp | 42 ++++++++++++++++----------
src/mesa/main/mtypes.h | 1 +
2 files changed, 27 insertions(+), 16 deletions(-)
diff --git a/src/glsl/opt_dead_builtin_varyings.cpp b/src/glsl/opt_dead_builtin_varyings.cpp
index 31719d20c05..68b70eedf92 100644
--- a/src/glsl/opt_dead_builtin_varyings.cpp
+++ b/src/glsl/opt_dead_builtin_varyings.cpp
@@ -269,14 +269,14 @@ public:
*/
class replace_varyings_visitor : public ir_rvalue_visitor {
public:
- replace_varyings_visitor(exec_list *ir,
+ replace_varyings_visitor(struct gl_shader *sha,
const varying_info_visitor *info,
unsigned external_texcoord_usage,
unsigned external_color_usage,
bool external_has_fog)
- : info(info), new_fog(NULL)
+ : shader(sha), info(info), new_fog(NULL)
{
- void *const ctx = ir;
+ void *const ctx = shader->ir;
memset(this->new_fragdata, 0, sizeof(this->new_fragdata));
memset(this->new_texcoord, 0, sizeof(this->new_texcoord));
@@ -293,14 +293,16 @@ public:
* occurrences of gl_TexCoord will be replaced with.
*/
if (info->lower_texcoord_array) {
- prepare_array(ir, this->new_texcoord, ARRAY_SIZE(this->new_texcoord),
+ prepare_array(shader->ir, this->new_texcoord,
+ ARRAY_SIZE(this->new_texcoord),
VARYING_SLOT_TEX0, "TexCoord", mode_str,
info->texcoord_usage, external_texcoord_usage);
}
/* Handle gl_FragData in the same way like gl_TexCoord. */
if (info->lower_fragdata_array) {
- prepare_array(ir, this->new_fragdata, ARRAY_SIZE(this->new_fragdata),
+ prepare_array(shader->ir, this->new_fragdata,
+ ARRAY_SIZE(this->new_fragdata),
FRAG_RESULT_DATA0, "FragData", mode_str,
info->fragdata_usage, (1 << MAX_DRAW_BUFFERS) - 1);
}
@@ -340,7 +342,7 @@ public:
}
/* Now do the replacing. */
- visit_list_elements(this, ir);
+ visit_list_elements(this, shader->ir);
}
void prepare_array(exec_list *ir,
@@ -389,6 +391,13 @@ public:
/* Remove the gl_FragData array. */
if (this->info->lower_fragdata_array &&
var == this->info->fragdata_array) {
+
+ /* Clone variable for program resource list before it is removed. */
+ if (!shader->fragdata_arrays)
+ shader->fragdata_arrays = new (shader) exec_list;
+
+ shader->fragdata_arrays->push_tail(var->clone(shader, NULL));
+
var->remove();
}
@@ -487,6 +496,7 @@ public:
}
private:
+ struct gl_shader *shader;
const varying_info_visitor *info;
ir_variable *new_fragdata[MAX_DRAW_BUFFERS];
ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS];
@@ -498,20 +508,20 @@ private:
} /* anonymous namespace */
static void
-lower_texcoord_array(exec_list *ir, const varying_info_visitor *info)
+lower_texcoord_array(struct gl_shader *shader, const varying_info_visitor *info)
{
- replace_varyings_visitor(ir, info,
+ replace_varyings_visitor(shader, info,
(1 << MAX_TEXTURE_COORD_UNITS) - 1,
1 | 2, true);
}
static void
-lower_fragdata_array(exec_list *ir)
+lower_fragdata_array(struct gl_shader *shader)
{
varying_info_visitor info(ir_var_shader_out, true);
- info.get(ir, 0, NULL);
+ info.get(shader->ir, 0, NULL);
- replace_varyings_visitor(ir, &info, 0, 0, 0);
+ replace_varyings_visitor(shader, &info, 0, 0, 0);
}
@@ -523,7 +533,7 @@ do_dead_builtin_varyings(struct gl_context *ctx,
{
/* Lower the gl_FragData array to separate variables. */
if (consumer && consumer->Stage == MESA_SHADER_FRAGMENT) {
- lower_fragdata_array(consumer->ir);
+ lower_fragdata_array(consumer);
}
/* Lowering of built-in varyings has no effect with the core context and
@@ -544,7 +554,7 @@ do_dead_builtin_varyings(struct gl_context *ctx,
if (!consumer) {
/* At least eliminate unused gl_TexCoord elements. */
if (producer_info.lower_texcoord_array) {
- lower_texcoord_array(producer->ir, &producer_info);
+ lower_texcoord_array(producer, &producer_info);
}
return;
}
@@ -556,7 +566,7 @@ do_dead_builtin_varyings(struct gl_context *ctx,
if (!producer) {
/* At least eliminate unused gl_TexCoord elements. */
if (consumer_info.lower_texcoord_array) {
- lower_texcoord_array(consumer->ir, &consumer_info);
+ lower_texcoord_array(consumer, &consumer_info);
}
return;
}
@@ -566,7 +576,7 @@ do_dead_builtin_varyings(struct gl_context *ctx,
if (producer_info.lower_texcoord_array ||
producer_info.color_usage ||
producer_info.has_fog) {
- replace_varyings_visitor(producer->ir,
+ replace_varyings_visitor(producer,
&producer_info,
consumer_info.texcoord_usage,
consumer_info.color_usage,
@@ -587,7 +597,7 @@ do_dead_builtin_varyings(struct gl_context *ctx,
if (consumer_info.lower_texcoord_array ||
consumer_info.color_usage ||
consumer_info.has_fog) {
- replace_varyings_visitor(consumer->ir,
+ replace_varyings_visitor(consumer,
&consumer_info,
producer_info.texcoord_usage,
producer_info.color_usage,
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 34120cf5777..10e158618f7 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2292,6 +2292,7 @@ struct gl_shader
struct exec_list *ir;
struct exec_list *packed_varyings;
+ struct exec_list *fragdata_arrays;
struct glsl_symbol_table *symbols;
bool uses_builtin_functions;
From afbe8b6085c4761c25b7bc49a051a08e10a87805 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?=
Date: Tue, 27 Oct 2015 13:18:42 +0200
Subject: [PATCH 158/266] glsl: add fragdata arrays to program resource list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This makes sure that user is still able to query properties about
variables that have gotten removed by opt_dead_builtin_varyings pass.
Fixes following OpenGL ES 3.1 test:
ES31-CTS.program_interface_query.output-layout
No Piglit regressions.
v2: cleanup, drop extra parenthesis (Topi)
Signed-off-by: Tapani Pälli
Reviewed-by: Marta Lofstedt
---
src/glsl/linker.cpp | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index cfd8f81c867..48dd2d38633 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3386,6 +3386,12 @@ add_interface_variables(struct gl_shader_program *shProg,
if (strncmp(var->name, "packed:", 7) == 0)
continue;
+ /* Skip fragdata arrays, these are handled separately
+ * by add_fragdata_arrays.
+ */
+ if (strncmp(var->name, "gl_out_FragData", 15) == 0)
+ continue;
+
if (!add_program_resource(shProg, programInterface, var,
build_stageref(shProg, var->name,
var->data.mode) | mask))
@@ -3425,6 +3431,26 @@ add_packed_varyings(struct gl_shader_program *shProg, int stage)
return true;
}
+static bool
+add_fragdata_arrays(struct gl_shader_program *shProg)
+{
+ struct gl_shader *sh = shProg->_LinkedShaders[MESA_SHADER_FRAGMENT];
+
+ if (!sh || !sh->fragdata_arrays)
+ return true;
+
+ foreach_in_list(ir_instruction, node, sh->fragdata_arrays) {
+ ir_variable *var = node->as_variable();
+ if (var) {
+ assert(var->data.mode == ir_var_shader_out);
+ if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, var,
+ 1 << MESA_SHADER_FRAGMENT))
+ return false;
+ }
+ }
+ return true;
+}
+
static char*
get_top_level_name(const char *name)
{
@@ -3701,6 +3727,9 @@ build_program_resource_list(struct gl_shader_program *shProg)
return;
}
+ if (!add_fragdata_arrays(shProg))
+ return;
+
/* Add inputs and outputs to the resource list. */
if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir,
GL_PROGRAM_INPUT))
From 6166a8e369b86395ffec7229257f797662f9d1aa Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Wed, 28 Oct 2015 15:38:53 -0400
Subject: [PATCH 159/266] st/mesa: create temporary textures with the same
nr_samples as source
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Not sure if this is actually reachable in practice (to have a complex
copy with MS textures).
Signed-off-by: Ilia Mirkin
Reviewed-by: Marek Olšák
---
src/mesa/state_tracker/st_cb_copyimage.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/src/mesa/state_tracker/st_cb_copyimage.c b/src/mesa/state_tracker/st_cb_copyimage.c
index c94a0b22ea8..75114cdb712 100644
--- a/src/mesa/state_tracker/st_cb_copyimage.c
+++ b/src/mesa/state_tracker/st_cb_copyimage.c
@@ -359,6 +359,7 @@ same_size_and_swizzle(const struct util_format_description *d1,
static struct pipe_resource *
create_texture(struct pipe_screen *screen, enum pipe_format format,
+ unsigned nr_samples,
unsigned width, unsigned height, unsigned depth)
{
struct pipe_resource templ;
@@ -369,6 +370,7 @@ create_texture(struct pipe_screen *screen, enum pipe_format format,
templ.height0 = height;
templ.depth0 = 1;
templ.array_size = depth;
+ templ.nr_samples = nr_samples;
templ.usage = PIPE_USAGE_DEFAULT;
templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
@@ -439,7 +441,8 @@ handle_complex_copy(struct pipe_context *pipe,
/* Use the temporary texture. Src is converted to a canonical format,
* then proceed the generic swizzled_copy.
*/
- temp = create_texture(pipe->screen, canon_format, src_box->width,
+ temp = create_texture(pipe->screen, canon_format, src->nr_samples,
+ src_box->width,
src_box->height, src_box->depth);
u_box_3d(0, 0, 0, src_box->width, src_box->height, src_box->depth,
@@ -463,7 +466,8 @@ handle_complex_copy(struct pipe_context *pipe,
/* Use the temporary texture. First, use the generic copy, but use
* a canonical format in the destination. Then convert */
- temp = create_texture(pipe->screen, canon_format, src_box->width,
+ temp = create_texture(pipe->screen, canon_format, dst->nr_samples,
+ src_box->width,
src_box->height, src_box->depth);
u_box_3d(0, 0, 0, src_box->width, src_box->height, src_box->depth,
From 0260620ab35615838a76bf218788adc957212934 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset
Date: Wed, 28 Oct 2015 11:20:36 +0100
Subject: [PATCH 160/266] nvc0: expose a group of performance metrics on Fermi
This allows to monitor those performance metrics through
GL_AMD_performance_monitor.
Signed-off-by: Samuel Pitoiset
---
src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 14 +++++++++++++-
src/gallium/drivers/nouveau/nvc0/nvc0_query.h | 3 ++-
.../drivers/nouveau/nvc0/nvc0_query_hw_metric.c | 2 +-
3 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index e4752e2dbc5..f53921092a5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -28,6 +28,7 @@
#include "nvc0/nvc0_query.h"
#include "nvc0/nvc0_query_sw.h"
#include "nvc0/nvc0_query_hw.h"
+#include "nvc0/nvc0_query_hw_metric.h"
#include "nvc0/nvc0_query_hw_sm.h"
static struct pipe_query *
@@ -188,7 +189,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
count++;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
- count++;
+ count += 2;
}
}
}
@@ -218,6 +219,17 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
return 1;
}
}
+ } else
+ if (id == NVC0_HW_METRIC_QUERY_GROUP) {
+ if (screen->compute) {
+ if (screen->base.class_3d < NVE4_3D_CLASS) {
+ info->name = "Performance metrics";
+ info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
+ info->max_active_queries = 1;
+ info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
+ return 1;
+ }
+ }
}
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
index 6883ab6ab9d..c46361c31aa 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
@@ -32,7 +32,8 @@ nvc0_query(struct pipe_query *pipe)
* Driver queries groups:
*/
#define NVC0_HW_SM_QUERY_GROUP 0
-#define NVC0_SW_QUERY_DRV_STAT_GROUP 1
+#define NVC0_HW_METRIC_QUERY_GROUP 1
+#define NVC0_SW_QUERY_DRV_STAT_GROUP 2
void nvc0_init_query_functions(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
index 25aa09be42a..fb2806a805e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -431,7 +431,7 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
id = nvc0_hw_metric_get_next_query_id(queries, id);
info->name = nvc0_hw_metric_names[id];
info->query_type = NVC0_HW_METRIC_QUERY(id);
- info->group_id = -1;
+ info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
return 1;
}
}
From 8090a1c32692e89b79d89f4dc959a26532dd2f91 Mon Sep 17 00:00:00 2001
From: Nanley Chery
Date: Tue, 13 Oct 2015 21:05:07 -0700
Subject: [PATCH 161/266] mesa/texcompress: Restrict FXT1 format to desktop GL
subset
In agreement with the extension spec and commit
dd0eb004874645135b9aaac3ebbd0aaf274079ea, filter FXT1 formats to the
desktop GL profiles. Now we no longer advertise such formats as supported
in an ES context and then throw an INVALID_ENUM error when the client
tries to use such formats with CompressedTexImage2D.
Fixes the following 26 dEQP tests:
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_invalid_border
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_invalid_border_cube_neg_x
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_invalid_border_cube_neg_y
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_invalid_border_cube_neg_z
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_invalid_border_cube_pos_x
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_invalid_border_cube_pos_y
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_invalid_border_cube_pos_z
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_invalid_size
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_level_max_cube_pos
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_level_max_tex2d
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_neg_level_cube
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_neg_level_tex2d
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_neg_width_height_cube_neg_x
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_neg_width_height_cube_neg_y
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_neg_width_height_cube_neg_z
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_neg_width_height_cube_pos_x
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_neg_width_height_cube_pos_y
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_neg_width_height_cube_pos_z
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_neg_width_height_tex2d
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_width_height_max_cube_neg_x
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_width_height_max_cube_neg_y
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_width_height_max_cube_neg_z
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_width_height_max_cube_pos_x
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_width_height_max_cube_pos_y
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_width_height_max_cube_pos_z
* dEQP-GLES2.functional.negative_api.texture.compressedteximage2d_width_height_max_tex2d
v2. Use _mesa_is_desktop_gl() (Ilia, Ian)
Reviewed-by: Ian Romanick
Signed-off-by: Nanley Chery
---
src/mesa/main/texcompress.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 84973d3fe5d..7fcaa424583 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -286,7 +286,8 @@ GLuint
_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats)
{
GLuint n = 0;
- if (ctx->Extensions.TDFX_texture_compression_FXT1) {
+ if (_mesa_is_desktop_gl(ctx) &&
+ ctx->Extensions.TDFX_texture_compression_FXT1) {
if (formats) {
formats[n++] = GL_COMPRESSED_RGB_FXT1_3DFX;
formats[n++] = GL_COMPRESSED_RGBA_FXT1_3DFX;
From 65f6caf43e8bbea7c8a0d4e146ad7186c276ff9a Mon Sep 17 00:00:00 2001
From: Nanley Chery
Date: Wed, 14 Oct 2015 14:32:43 -0700
Subject: [PATCH 162/266] mesa: Enable ASTC in GLES'
[NUM_]COMPRESSED_TEXTURE_FORMATS queries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In OpenGL ES, the COMPRESSED_TEXTURE_FORMATS query returns the set of
supported specific compressed formats. Since ASTC formats fit within
that category, include them in the set and update the
NUM_COMPRESSED_TEXTURE_FORMATS query as well.
This enables GLES2-based ASTC dEQP tests to run. See the Bugzilla for
more info.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92193
Reported-by: Tapani Pälli
Suggested-by: Ian Romanick
Signed-off-by: Nanley Chery
Reviewed-by: Ian Romanick
---
src/mesa/main/texcompress.c | 85 +++++++++++++++++++++++++++----------
1 file changed, 63 insertions(+), 22 deletions(-)
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 7fcaa424583..a8ac19e40d7 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -243,28 +243,6 @@ _mesa_gl_compressed_format_base_format(GLenum format)
* what GL_NUM_COMPRESSED_TEXTURE_FORMATS and
* GL_COMPRESSED_TEXTURE_FORMATS return."
*
- * The KHR_texture_compression_astc_hdr spec says:
- *
- * "Interactions with OpenGL 4.2
- *
- * OpenGL 4.2 supports the feature that compressed textures can be
- * compressed online, by passing the compressed texture format enum as
- * the internal format when uploading a texture using TexImage1D,
- * TexImage2D or TexImage3D (see Section 3.9.3, Texture Image
- * Specification, subsection Encoding of Special Internal Formats).
- *
- * Due to the complexity of the ASTC compression algorithm, it is not
- * usually suitable for online use, and therefore ASTC support will be
- * limited to pre-compressed textures only. Where on-device compression
- * is required, a domain-specific limited compressor will typically
- * be used, and this is therefore not suitable for implementation in
- * the driver.
- *
- * In particular, the ASTC format specifiers will not be added to
- * Table 3.14, and thus will not be accepted by the TexImage*D
- * functions, and will not be returned by the (already deprecated)
- * COMPRESSED_TEXTURE_FORMATS query."
- *
* There is no formal spec for GL_ATI_texture_compression_3dc. Since the
* formats added by this extension are luminance-alpha formats, it is
* reasonable to expect them to follow the same rules as
@@ -397,6 +375,69 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats)
n += 10;
}
}
+
+ /* The KHR_texture_compression_astc_hdr spec says:
+ *
+ * "Interactions with OpenGL 4.2
+ *
+ * OpenGL 4.2 supports the feature that compressed textures can be
+ * compressed online, by passing the compressed texture format enum as
+ * the internal format when uploading a texture using TexImage1D,
+ * TexImage2D or TexImage3D (see Section 3.9.3, Texture Image
+ * Specification, subsection Encoding of Special Internal Formats).
+ *
+ * Due to the complexity of the ASTC compression algorithm, it is not
+ * usually suitable for online use, and therefore ASTC support will be
+ * limited to pre-compressed textures only. Where on-device compression
+ * is required, a domain-specific limited compressor will typically
+ * be used, and this is therefore not suitable for implementation in
+ * the driver.
+ *
+ * In particular, the ASTC format specifiers will not be added to
+ * Table 3.14, and thus will not be accepted by the TexImage*D
+ * functions, and will not be returned by the (already deprecated)
+ * COMPRESSED_TEXTURE_FORMATS query."
+ *
+ * The ES and the desktop specs diverge here. In OpenGL ES, the COMPRESSED_TEXTURE_FORMATS
+ * query returns the set of supported specific compressed formats.
+ */
+ if (ctx->API == API_OPENGLES2 &&
+ ctx->Extensions.KHR_texture_compression_astc_ldr) {
+ if (formats) {
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_4x4_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_5x4_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_5x5_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_6x5_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_6x6_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_8x5_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_8x6_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_8x8_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x5_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x6_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x8_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x10_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_12x10_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_12x12_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR;
+ }
+ else {
+ n += 28;
+ }
+ }
+
return n;
}
From 24c90888aeaf90b13700389b91b74bf63ee9f28d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?=
Date: Fri, 23 Oct 2015 01:06:15 +0200
Subject: [PATCH 163/266] st/mesa: fix mipmap generation for immutable textures
with incomplete pyramids
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Without the clamping by NumLevels, the state tracker would reallocate the
texture storage (incorrect) and even fail to copy the base level image
after reallocation, leading to the graphical glitch of
https://bugs.freedesktop.org/show_bug.cgi?id=91993 .
A piglit test has been submitted for review as well (subtest of
arb_texture_storage-texture-storage).
v2: also bypass all calls to st_finalize_texture (suggested by Marek Olšák)
Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Marek Olšák
---
src/mesa/state_tracker/st_gen_mipmap.c | 62 ++++++++++++++------------
1 file changed, 33 insertions(+), 29 deletions(-)
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
index 26e1c21f6c5..b3700406df0 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -61,6 +61,8 @@ compute_num_levels(struct gl_context *ctx,
numLevels = texObj->BaseLevel + baseImage->MaxNumLevels;
numLevels = MIN2(numLevels, (GLuint) texObj->MaxLevel + 1);
+ if (texObj->Immutable)
+ numLevels = MIN2(numLevels, texObj->NumLevels);
assert(numLevels >= 1);
return numLevels;
@@ -99,38 +101,40 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
*/
stObj->lastLevel = lastLevel;
- if (pt->last_level < lastLevel) {
- /* The current gallium texture doesn't have space for all the
- * mipmap levels we need to generate. So allocate a new texture.
- */
- struct pipe_resource *oldTex = stObj->pt;
+ if (!texObj->Immutable) {
+ if (pt->last_level < lastLevel) {
+ /* The current gallium texture doesn't have space for all the
+ * mipmap levels we need to generate. So allocate a new texture.
+ */
+ struct pipe_resource *oldTex = stObj->pt;
- /* create new texture with space for more levels */
- stObj->pt = st_texture_create(st,
- oldTex->target,
- oldTex->format,
- lastLevel,
- oldTex->width0,
- oldTex->height0,
- oldTex->depth0,
- oldTex->array_size,
- 0,
- oldTex->bind);
+ /* create new texture with space for more levels */
+ stObj->pt = st_texture_create(st,
+ oldTex->target,
+ oldTex->format,
+ lastLevel,
+ oldTex->width0,
+ oldTex->height0,
+ oldTex->depth0,
+ oldTex->array_size,
+ 0,
+ oldTex->bind);
- /* This will copy the old texture's base image into the new texture
- * which we just allocated.
- */
- st_finalize_texture(ctx, st->pipe, texObj);
+ /* This will copy the old texture's base image into the new texture
+ * which we just allocated.
+ */
+ st_finalize_texture(ctx, st->pipe, texObj);
- /* release the old tex (will likely be freed too) */
- pipe_resource_reference(&oldTex, NULL);
- st_texture_release_all_sampler_views(st, stObj);
- }
- else {
- /* Make sure that the base texture image data is present in the
- * texture buffer.
- */
- st_finalize_texture(ctx, st->pipe, texObj);
+ /* release the old tex (will likely be freed too) */
+ pipe_resource_reference(&oldTex, NULL);
+ st_texture_release_all_sampler_views(st, stObj);
+ }
+ else {
+ /* Make sure that the base texture image data is present in the
+ * texture buffer.
+ */
+ st_finalize_texture(ctx, st->pipe, texObj);
+ }
}
pt = stObj->pt;
From f75f21a24ae2dd83507f3d4d8007f0fcfe6db802 Mon Sep 17 00:00:00 2001
From: Ivan Kalvachev
Date: Sun, 25 Oct 2015 01:16:58 +0300
Subject: [PATCH 164/266] r600g: Fix special negative immediate constants when
using ABS modifier.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Some constants (like 1.0 and 0.5) could be inlined as immediate inputs
without using their literal value. The r600_bytecode_special_constants()
function emulates the negative of these constants by using NEG modifier.
However some shaders define -1.0 constant and want to use it as 1.0.
They do so by using ABS modifier. But r600_bytecode_special_constants()
set NEG in addition to ABS. Since NEG modifier have priority over ABS one,
we get -|1.0| as result, instead of |1.0|.
The patch simply prevents the additional switching of NEG when ABS is set.
[According to Ivan Kalvachev, this bug was fond via
https://github.com/iXit/Mesa-3D/issues/126 and
https://github.com/iXit/Mesa-3D/issues/127]
Signed-off-by: Ivan Kalvachev
Reviewed-by: Nicolai Hähnle
CC:
---
src/gallium/drivers/r600/r600_asm.c | 8 ++++----
src/gallium/drivers/r600/r600_asm.h | 2 +-
src/gallium/drivers/r600/r600_shader.c | 2 +-
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index bc6980660a5..ee7beee3001 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -635,7 +635,7 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
return 0;
}
-void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg)
+void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg, unsigned abs)
{
switch(value) {
case 0:
@@ -655,11 +655,11 @@ void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *ne
break;
case 0xBF800000: /* -1.0f */
*sel = V_SQ_ALU_SRC_1;
- *neg ^= 1;
+ *neg ^= !abs;
break;
case 0xBF000000: /* -0.5f */
*sel = V_SQ_ALU_SRC_0_5;
- *neg ^= 1;
+ *neg ^= !abs;
break;
default:
*sel = V_SQ_ALU_SRC_LITERAL;
@@ -1208,7 +1208,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
}
if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
r600_bytecode_special_constants(nalu->src[i].value,
- &nalu->src[i].sel, &nalu->src[i].neg);
+ &nalu->src[i].sel, &nalu->src[i].neg, nalu->src[i].abs);
}
if (nalu->dst.sel >= bc->ngpr) {
bc->ngpr = nalu->dst.sel + 1;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 7cf3a090908..d48ad1ebf01 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -255,7 +255,7 @@ int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
const struct r600_bytecode_alu *alu, unsigned type);
void r600_bytecode_special_constants(uint32_t value,
- unsigned *sel, unsigned *neg);
+ unsigned *sel, unsigned *neg, unsigned abs);
void r600_bytecode_disasm(struct r600_bytecode *bc);
void r600_bytecode_alu_read(struct r600_bytecode *bc,
struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 8efe902a329..50c03297c1c 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1008,7 +1008,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
- r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+ r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs);
if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
return;
}
From 73ff0ead3688519eb76ea8bc32eabb9004e6f37b Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Thu, 22 Oct 2015 16:04:15 -0700
Subject: [PATCH 165/266] i965/vec4: Convert src_reg/dst_reg to brw_reg at the
end of the visitor.
This patch makes the visitor convert registers to the HW_REG file at the
very end, after register allocation, post-RA scheduling, and dependency
control flagging. After that, everything is in fixed brw_regs.
This simplifies the code generator, as it can just use the hardware
registers rather than having to interpret our abstract files. In
particular, interpreting the UNIFORM file meant reading prog_data
to figure out where push constants are supposed to start.
Having the part of the code that performs register allocation also
translate everything to hardware registers seems sensible.
Signed-off-by: Kenneth Graunke
Reviewed-by: Matt Turner
---
src/mesa/drivers/dri/i965/brw_ir_vec4.h | 3 -
src/mesa/drivers/dri/i965/brw_vec4.cpp | 86 ++++++++++++++
src/mesa/drivers/dri/i965/brw_vec4.h | 1 +
.../drivers/dri/i965/brw_vec4_generator.cpp | 112 +-----------------
4 files changed, 92 insertions(+), 110 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index cc4104c8eb3..29642c6d2a4 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -161,9 +161,6 @@ public:
const src_reg &src1 = src_reg(),
const src_reg &src2 = src_reg());
- struct brw_reg get_dst(unsigned gen);
- struct brw_reg get_src(const struct brw_vue_prog_data *prog_data, int i);
-
dst_reg dst;
src_reg src[3];
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index f87b86fbd3e..272277196f7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1795,6 +1795,90 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
inst->mlen = 2;
}
+void
+vec4_visitor::convert_to_hw_regs()
+{
+ foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+ for (int i = 0; i < 3; i++) {
+ struct src_reg &src = inst->src[i];
+ struct brw_reg reg;
+ switch (src.file) {
+ case GRF:
+ reg = brw_vec8_grf(src.reg + src.reg_offset, 0);
+ reg.type = src.type;
+ reg.dw1.bits.swizzle = src.swizzle;
+ reg.abs = src.abs;
+ reg.negate = src.negate;
+ break;
+
+ case IMM:
+ reg = brw_imm_reg(src.type);
+ reg.dw1.ud = src.fixed_hw_reg.dw1.ud;
+ break;
+
+ case UNIFORM:
+ reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg +
+ (src.reg + src.reg_offset) / 2,
+ ((src.reg + src.reg_offset) % 2) * 4),
+ 0, 4, 1);
+ reg.type = src.type;
+ reg.dw1.bits.swizzle = src.swizzle;
+ reg.abs = src.abs;
+ reg.negate = src.negate;
+
+ /* This should have been moved to pull constants. */
+ assert(!src.reladdr);
+ break;
+
+ case HW_REG:
+ assert(src.type == src.fixed_hw_reg.type);
+ continue;
+
+ case BAD_FILE:
+ /* Probably unused. */
+ reg = brw_null_reg();
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+ src.fixed_hw_reg = reg;
+ }
+
+ dst_reg &dst = inst->dst;
+ struct brw_reg reg;
+
+ switch (inst->dst.file) {
+ case GRF:
+ reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
+ reg.type = dst.type;
+ reg.dw1.bits.writemask = dst.writemask;
+ break;
+
+ case MRF:
+ assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+ reg = brw_message_reg(dst.reg + dst.reg_offset);
+ reg.type = dst.type;
+ reg.dw1.bits.writemask = dst.writemask;
+ break;
+
+ case HW_REG:
+ assert(dst.type == dst.fixed_hw_reg.type);
+ reg = dst.fixed_hw_reg;
+ break;
+
+ case BAD_FILE:
+ reg = brw_null_reg();
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+
+ dst.fixed_hw_reg = reg;
+ }
+}
+
bool
vec4_visitor::run()
{
@@ -1916,6 +2000,8 @@ vec4_visitor::run()
opt_set_dependency_control();
+ convert_to_hw_regs();
+
if (last_scratch > 0) {
prog_data->base.total_scratch =
brw_get_scratch_size(last_scratch * REG_SIZE);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 89b6f912272..81ffa561013 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -159,6 +159,7 @@ public:
bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
void opt_set_dependency_control();
void opt_schedule_instructions();
+ void convert_to_hw_regs();
vec4_instruction *emit(vec4_instruction *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index a84f6c47471..b5a1e09ac5b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -34,108 +34,6 @@ extern "C" {
namespace brw {
-struct brw_reg
-vec4_instruction::get_dst(unsigned gen)
-{
- struct brw_reg brw_reg;
-
- switch (dst.file) {
- case GRF:
- brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
- brw_reg = retype(brw_reg, dst.type);
- brw_reg.dw1.bits.writemask = dst.writemask;
- break;
-
- case MRF:
- assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(gen));
- brw_reg = brw_message_reg(dst.reg + dst.reg_offset);
- brw_reg = retype(brw_reg, dst.type);
- brw_reg.dw1.bits.writemask = dst.writemask;
- break;
-
- case HW_REG:
- assert(dst.type == dst.fixed_hw_reg.type);
- brw_reg = dst.fixed_hw_reg;
- break;
-
- case BAD_FILE:
- brw_reg = brw_null_reg();
- break;
-
- default:
- unreachable("not reached");
- }
- return brw_reg;
-}
-
-struct brw_reg
-vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i)
-{
- struct brw_reg brw_reg;
-
- switch (src[i].file) {
- case GRF:
- brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
- brw_reg = retype(brw_reg, src[i].type);
- brw_reg.dw1.bits.swizzle = src[i].swizzle;
- if (src[i].abs)
- brw_reg = brw_abs(brw_reg);
- if (src[i].negate)
- brw_reg = negate(brw_reg);
- break;
-
- case IMM:
- switch (src[i].type) {
- case BRW_REGISTER_TYPE_F:
- brw_reg = brw_imm_f(src[i].fixed_hw_reg.dw1.f);
- break;
- case BRW_REGISTER_TYPE_D:
- brw_reg = brw_imm_d(src[i].fixed_hw_reg.dw1.d);
- break;
- case BRW_REGISTER_TYPE_UD:
- brw_reg = brw_imm_ud(src[i].fixed_hw_reg.dw1.ud);
- break;
- case BRW_REGISTER_TYPE_VF:
- brw_reg = brw_imm_vf(src[i].fixed_hw_reg.dw1.ud);
- break;
- default:
- unreachable("not reached");
- }
- break;
-
- case UNIFORM:
- brw_reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg +
- (src[i].reg + src[i].reg_offset) / 2,
- ((src[i].reg + src[i].reg_offset) % 2) * 4),
- 0, 4, 1);
- brw_reg = retype(brw_reg, src[i].type);
- brw_reg.dw1.bits.swizzle = src[i].swizzle;
- if (src[i].abs)
- brw_reg = brw_abs(brw_reg);
- if (src[i].negate)
- brw_reg = negate(brw_reg);
-
- /* This should have been moved to pull constants. */
- assert(!src[i].reladdr);
- break;
-
- case HW_REG:
- assert(src[i].type == src[i].fixed_hw_reg.type);
- brw_reg = src[i].fixed_hw_reg;
- break;
-
- case BAD_FILE:
- /* Probably unused. */
- brw_reg = brw_null_reg();
- break;
- case ATTR:
- default:
- unreachable("not reached");
- }
-
- return brw_reg;
-}
-
vec4_generator::vec4_generator(const struct brw_compiler *compiler,
void *log_data,
struct brw_vue_prog_data *prog_data,
@@ -476,7 +374,7 @@ vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst)
/* We pass the temporary passed in src0 as the writeback register */
brw_urb_WRITE(p,
- inst->get_src(this->prog_data, 0), /* dest */
+ inst->src[0].fixed_hw_reg, /* dest */
inst->base_mrf, /* starting mrf reg nr */
src,
BRW_URB_WRITE_ALLOCATE_COMPLETE,
@@ -489,8 +387,8 @@ vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst)
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, get_element_ud(inst->get_dst(devinfo->gen), 0),
- get_element_ud(inst->get_src(this->prog_data, 0), 0));
+ brw_MOV(p, get_element_ud(inst->dst.fixed_hw_reg, 0),
+ get_element_ud(inst->src[0].fixed_hw_reg, 0));
brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
@@ -1154,9 +1052,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
for (unsigned int i = 0; i < 3; i++) {
- src[i] = inst->get_src(this->prog_data, i);
+ src[i] = inst->src[i].fixed_hw_reg;
}
- dst = inst->get_dst(devinfo->gen);
+ dst = inst->dst.fixed_hw_reg;
brw_set_default_predicate_control(p, inst->predicate);
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
From 4cba8f5d21e4b50343e7c7bfbeb603b59c5d71dd Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Thu, 22 Oct 2015 15:01:27 -0700
Subject: [PATCH 166/266] i965/vec4: Wrap vec4_generator in a C function.
vec4_generator is a class for convenience, but only exports a single
method as its public API. It makes much more sense to just export a
single function.
Signed-off-by: Kenneth Graunke
Reviewed-by: Matt Turner
---
src/mesa/drivers/dri/i965/brw_vec4.cpp | 6 +++---
src/mesa/drivers/dri/i965/brw_vec4.h | 9 +++++++++
.../drivers/dri/i965/brw_vec4_generator.cpp | 19 +++++++++++++++++++
.../drivers/dri/i965/brw_vec4_gs_visitor.cpp | 12 ++++++------
4 files changed, 37 insertions(+), 9 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 272277196f7..3353e1ebf3a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -2108,9 +2108,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
return NULL;
}
- vec4_generator g(compiler, log_data, &prog_data->base,
- mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS");
- assembly = g.generate_assembly(v.cfg, final_assembly_size, shader);
+ assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
+ shader, &prog_data->base, v.cfg,
+ final_assembly_size);
}
return assembly;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 81ffa561013..5a385c13950 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -52,6 +52,15 @@ extern "C" {
extern "C" {
#endif
+const unsigned *
+brw_vec4_generate_assembly(const struct brw_compiler *compiler,
+ void *log_data,
+ void *mem_ctx,
+ const nir_shader *nir,
+ struct brw_vue_prog_data *prog_data,
+ const struct cfg_t *cfg,
+ unsigned *out_assembly_size);
+
#ifdef __cplusplus
} /* extern "C" */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index b5a1e09ac5b..029a594e040 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1578,4 +1578,23 @@ vec4_generator::generate_assembly(const cfg_t *cfg,
return brw_get_program(p, assembly_size);
}
+extern "C" const unsigned *
+brw_vec4_generate_assembly(const struct brw_compiler *compiler,
+ void *log_data,
+ void *mem_ctx,
+ const nir_shader *nir,
+ struct brw_vue_prog_data *prog_data,
+ const struct cfg_t *cfg,
+ unsigned *out_assembly_size)
+{
+ const char *stage_name = _mesa_shader_stage_to_string(nir->stage);
+ const char *stage_abbrev = _mesa_shader_stage_to_abbrev(nir->stage);
+ bool debug_flag = INTEL_DEBUG &
+ intel_debug_flag_for_shader_stage(nir->stage);
+
+ vec4_generator g(compiler, log_data, prog_data, mem_ctx,
+ debug_flag, stage_name, stage_abbrev);
+ return g.generate_assembly(cfg, out_assembly_size, nir);
+}
+
} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 0b1c2ecd7f0..cfb5cd95cb1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -824,9 +824,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
mem_ctx, true /* no_spills */, shader_time_index);
if (v.run()) {
- vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
- INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
- return g.generate_assembly(v.cfg, final_assembly_size, shader);
+ return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
+ shader, &prog_data->base, v.cfg,
+ final_assembly_size);
}
}
}
@@ -875,9 +875,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
if (error_str)
*error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
} else {
- vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
- INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
- ret = g.generate_assembly(gs->cfg, final_assembly_size, shader);
+ ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader,
+ &prog_data->base, gs->cfg,
+ final_assembly_size);
}
delete gs;
From 1a094a2ee2d63073ac12c8ab0dbd38c0e9270cf5 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Thu, 22 Oct 2015 15:04:52 -0700
Subject: [PATCH 167/266] i965/vec4: Move vec4_generator class definition into
the .cpp file.
The public API for the generator is brw_vec4_generate_code(); nobody
actually needs to use the class. This means we can extend it without
triggering the recompiles associated with altering brw_vec4.h.
Signed-off-by: Kenneth Graunke
Reviewed-by: Matt Turner
---
src/mesa/drivers/dri/i965/brw_vec4.h | 111 ------------------
.../drivers/dri/i965/brw_vec4_generator.cpp | 110 +++++++++++++++++
2 files changed, 110 insertions(+), 111 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 5a385c13950..ec8abf49cd8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -392,117 +392,6 @@ private:
unsigned last_scratch; /**< measured in 32-byte (register size) units */
};
-
-/**
- * The vertex shader code generator.
- *
- * Translates VS IR to actual i965 assembly code.
- */
-class vec4_generator
-{
-public:
- vec4_generator(const struct brw_compiler *compiler, void *log_data,
- struct brw_vue_prog_data *prog_data,
- void *mem_ctx,
- bool debug_flag,
- const char *stage_name,
- const char *stage_abbrev);
- ~vec4_generator();
-
- const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size,
- const nir_shader *nir);
-
-private:
- void generate_code(const cfg_t *cfg, const nir_shader *nir);
-
- void generate_math1_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src);
- void generate_math2_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_math_gen6(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
-
- void generate_tex(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg sampler_index);
-
- void generate_vs_urb_write(vec4_instruction *inst);
- void generate_gs_urb_write(vec4_instruction *inst);
- void generate_gs_urb_write_allocate(vec4_instruction *inst);
- void generate_gs_thread_end(vec4_instruction *inst);
- void generate_gs_set_write_offset(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_set_vertex_count(struct brw_reg dst,
- struct brw_reg src);
- void generate_gs_svb_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_svb_set_destination_index(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src);
- void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
- void generate_gs_prepare_channel_masks(struct brw_reg dst);
- void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
- void generate_gs_get_instance_id(struct brw_reg dst);
- void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1,
- struct brw_reg src2);
- void generate_gs_ff_sync(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_set_primitive_id(struct brw_reg dst);
- void generate_oword_dual_block_offsets(struct brw_reg m1,
- struct brw_reg index);
- void generate_scratch_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index);
- void generate_scratch_read(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index);
- void generate_pull_constant_load(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset);
- void generate_pull_constant_load_gen7(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg surf_index,
- struct brw_reg offset);
- void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
- struct brw_reg dst);
-
- void generate_get_buffer_size(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index);
-
- void generate_unpack_flags(struct brw_reg dst);
-
- const struct brw_compiler *compiler;
- void *log_data; /* Passed to compiler->*_log functions */
-
- const struct brw_device_info *devinfo;
-
- struct brw_codegen *p;
-
- struct brw_vue_prog_data *prog_data;
-
- void *mem_ctx;
- const char *stage_name;
- const char *stage_abbrev;
- const bool debug_flag;
-};
-
} /* namespace brw */
#endif /* __cplusplus */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 029a594e040..96a52c61bb3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -34,6 +34,116 @@ extern "C" {
namespace brw {
+/**
+ * The vertex shader code generator.
+ *
+ * Translates VS IR to actual i965 assembly code.
+ */
+class vec4_generator
+{
+public:
+ vec4_generator(const struct brw_compiler *compiler, void *log_data,
+ struct brw_vue_prog_data *prog_data,
+ void *mem_ctx,
+ bool debug_flag,
+ const char *stage_name,
+ const char *stage_abbrev);
+ ~vec4_generator();
+
+ const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size,
+ const nir_shader *nir);
+
+private:
+ void generate_code(const cfg_t *cfg, const nir_shader *nir);
+
+ void generate_math1_gen4(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_math2_gen4(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+ void generate_math_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+ void generate_tex(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg sampler_index);
+
+ void generate_vs_urb_write(vec4_instruction *inst);
+ void generate_gs_urb_write(vec4_instruction *inst);
+ void generate_gs_urb_write_allocate(vec4_instruction *inst);
+ void generate_gs_thread_end(vec4_instruction *inst);
+ void generate_gs_set_write_offset(struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+ void generate_gs_set_vertex_count(struct brw_reg dst,
+ struct brw_reg src);
+ void generate_gs_svb_write(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+ void generate_gs_svb_set_destination_index(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
+ void generate_gs_prepare_channel_masks(struct brw_reg dst);
+ void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
+ void generate_gs_get_instance_id(struct brw_reg dst);
+ void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1,
+ struct brw_reg src2);
+ void generate_gs_ff_sync(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+ void generate_gs_set_primitive_id(struct brw_reg dst);
+ void generate_oword_dual_block_offsets(struct brw_reg m1,
+ struct brw_reg index);
+ void generate_scratch_write(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg index);
+ void generate_scratch_read(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index);
+ void generate_pull_constant_load(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index,
+ struct brw_reg offset);
+ void generate_pull_constant_load_gen7(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset);
+ void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
+ struct brw_reg dst);
+
+ void generate_get_buffer_size(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg index);
+
+ void generate_unpack_flags(struct brw_reg dst);
+
+ const struct brw_compiler *compiler;
+ void *log_data; /* Passed to compiler->*_log functions */
+
+ const struct brw_device_info *devinfo;
+
+ struct brw_codegen *p;
+
+ struct brw_vue_prog_data *prog_data;
+
+ void *mem_ctx;
+ const char *stage_name;
+ const char *stage_abbrev;
+ const bool debug_flag;
+};
+
vec4_generator::vec4_generator(const struct brw_compiler *compiler,
void *log_data,
struct brw_vue_prog_data *prog_data,
From c6b24448b578c4a8ab031923df3ef1e2d865d5bb Mon Sep 17 00:00:00 2001
From: Kenneth Graunke
Date: Thu, 22 Oct 2015 15:35:15 -0700
Subject: [PATCH 168/266] i965/vec4: Eliminate the vec4_generator class
altogether.
We really weren't taking advantage of vec4_generator being a class.
By adding a "p" parameter to the helper methods, and "prog_data" to
ones which need binding table information, we can convert everything
to static functions.
Signed-off-by: Kenneth Graunke
Reviewed-by: Matt Turner
---
.../drivers/dri/i965/brw_vec4_generator.cpp | 464 +++++++-----------
1 file changed, 180 insertions(+), 284 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 96a52c61bb3..bd816e3926f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -32,142 +32,13 @@ extern "C" {
#include "program/prog_parameter.h"
};
-namespace brw {
+using namespace brw;
-/**
- * The vertex shader code generator.
- *
- * Translates VS IR to actual i965 assembly code.
- */
-class vec4_generator
-{
-public:
- vec4_generator(const struct brw_compiler *compiler, void *log_data,
- struct brw_vue_prog_data *prog_data,
- void *mem_ctx,
- bool debug_flag,
- const char *stage_name,
- const char *stage_abbrev);
- ~vec4_generator();
-
- const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size,
- const nir_shader *nir);
-
-private:
- void generate_code(const cfg_t *cfg, const nir_shader *nir);
-
- void generate_math1_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src);
- void generate_math2_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_math_gen6(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
-
- void generate_tex(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg sampler_index);
-
- void generate_vs_urb_write(vec4_instruction *inst);
- void generate_gs_urb_write(vec4_instruction *inst);
- void generate_gs_urb_write_allocate(vec4_instruction *inst);
- void generate_gs_thread_end(vec4_instruction *inst);
- void generate_gs_set_write_offset(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_set_vertex_count(struct brw_reg dst,
- struct brw_reg src);
- void generate_gs_svb_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_svb_set_destination_index(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src);
- void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
- void generate_gs_prepare_channel_masks(struct brw_reg dst);
- void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
- void generate_gs_get_instance_id(struct brw_reg dst);
- void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1,
- struct brw_reg src2);
- void generate_gs_ff_sync(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_set_primitive_id(struct brw_reg dst);
- void generate_oword_dual_block_offsets(struct brw_reg m1,
- struct brw_reg index);
- void generate_scratch_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index);
- void generate_scratch_read(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index);
- void generate_pull_constant_load(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset);
- void generate_pull_constant_load_gen7(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg surf_index,
- struct brw_reg offset);
- void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
- struct brw_reg dst);
-
- void generate_get_buffer_size(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index);
-
- void generate_unpack_flags(struct brw_reg dst);
-
- const struct brw_compiler *compiler;
- void *log_data; /* Passed to compiler->*_log functions */
-
- const struct brw_device_info *devinfo;
-
- struct brw_codegen *p;
-
- struct brw_vue_prog_data *prog_data;
-
- void *mem_ctx;
- const char *stage_name;
- const char *stage_abbrev;
- const bool debug_flag;
-};
-
-vec4_generator::vec4_generator(const struct brw_compiler *compiler,
- void *log_data,
- struct brw_vue_prog_data *prog_data,
- void *mem_ctx,
- bool debug_flag,
- const char *stage_name,
- const char *stage_abbrev)
- : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo),
- prog_data(prog_data),
- mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev),
- debug_flag(debug_flag)
-{
- p = rzalloc(mem_ctx, struct brw_codegen);
- brw_init_codegen(devinfo, p, mem_ctx);
-}
-
-vec4_generator::~vec4_generator()
-{
-}
-
-void
-vec4_generator::generate_math1_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src)
+static void
+generate_math1_gen4(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
{
gen4_math(p,
dst,
@@ -186,11 +57,12 @@ check_gen6_math_src_arg(struct brw_reg src)
assert(src.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
}
-void
-vec4_generator::generate_math_gen6(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_math_gen6(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* Can't do writemask because math can't be align16. */
assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
@@ -204,11 +76,12 @@ vec4_generator::generate_math_gen6(vec4_instruction *inst,
brw_set_default_access_mode(p, BRW_ALIGN_16);
}
-void
-vec4_generator::generate_math2_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_math2_gen4(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
* "Message Payload":
@@ -237,12 +110,15 @@ vec4_generator::generate_math2_gen4(vec4_instruction *inst,
BRW_MATH_PRECISION_FULL);
}
-void
-vec4_generator::generate_tex(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg sampler_index)
+static void
+generate_tex(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg sampler_index)
{
+ const struct brw_device_info *devinfo = p->devinfo;
int msg_type = -1;
if (devinfo->gen >= 5) {
@@ -448,8 +324,8 @@ vec4_generator::generate_tex(vec4_instruction *inst,
}
}
-void
-vec4_generator::generate_vs_urb_write(vec4_instruction *inst)
+static void
+generate_vs_urb_write(struct brw_codegen *p, vec4_instruction *inst)
{
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
@@ -462,8 +338,8 @@ vec4_generator::generate_vs_urb_write(vec4_instruction *inst)
BRW_URB_SWIZZLE_INTERLEAVE);
}
-void
-vec4_generator::generate_gs_urb_write(vec4_instruction *inst)
+static void
+generate_gs_urb_write(struct brw_codegen *p, vec4_instruction *inst)
{
struct brw_reg src = brw_message_reg(inst->base_mrf);
brw_urb_WRITE(p,
@@ -477,8 +353,8 @@ vec4_generator::generate_gs_urb_write(vec4_instruction *inst)
BRW_URB_SWIZZLE_INTERLEAVE);
}
-void
-vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst)
+static void
+generate_gs_urb_write_allocate(struct brw_codegen *p, vec4_instruction *inst)
{
struct brw_reg src = brw_message_reg(inst->base_mrf);
@@ -503,8 +379,8 @@ vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_thread_end(vec4_instruction *inst)
+static void
+generate_gs_thread_end(struct brw_codegen *p, vec4_instruction *inst)
{
struct brw_reg src = brw_message_reg(inst->base_mrf);
brw_urb_WRITE(p,
@@ -518,10 +394,11 @@ vec4_generator::generate_gs_thread_end(vec4_instruction *inst)
BRW_URB_SWIZZLE_INTERLEAVE);
}
-void
-vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_gs_set_write_offset(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
* Header: M0.3):
@@ -544,7 +421,7 @@ vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- assert(devinfo->gen >= 7 &&
+ assert(p->devinfo->gen >= 7 &&
src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_UD &&
src1.dw1.ud <= USHRT_MAX);
@@ -559,14 +436,15 @@ vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst,
- struct brw_reg src)
+static void
+generate_gs_set_vertex_count(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src)
{
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- if (devinfo->gen >= 8) {
+ if (p->devinfo->gen >= 8) {
/* Move the vertex count into the second MRF for the EOT write. */
brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD),
src);
@@ -593,11 +471,13 @@ vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_svb_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_gs_svb_write(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
int binding = inst->sol_binding;
bool final_write = inst->sol_final_write;
@@ -631,12 +511,12 @@ vec4_generator::generate_gs_svb_write(vec4_instruction *inst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_svb_set_destination_index(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src)
+static void
+generate_gs_svb_set_destination_index(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
{
-
int vertex = inst->sol_vertex;
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
@@ -645,8 +525,10 @@ vec4_generator::generate_gs_svb_set_destination_index(vec4_instruction *inst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src)
+static void
+generate_gs_set_dword_2(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src)
{
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
@@ -655,8 +537,9 @@ vec4_generator::generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
+static void
+generate_gs_prepare_channel_masks(struct brw_codegen *p,
+ struct brw_reg dst)
{
/* We want to left shift just DWORD 4 (the x component belonging to the
* second geometry shader invocation) by 4 bits. So generate the
@@ -672,9 +555,10 @@ vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
- struct brw_reg src)
+static void
+generate_gs_set_channel_masks(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src)
{
/* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
* Header: M0.5):
@@ -735,8 +619,9 @@ vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_get_instance_id(struct brw_reg dst)
+static void
+generate_gs_get_instance_id(struct brw_codegen *p,
+ struct brw_reg dst)
{
/* We want to right shift R0.0 & R0.1 by GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT
* and store into dst.0 & dst.4. So generate the instruction:
@@ -752,11 +637,12 @@ vec4_generator::generate_gs_get_instance_id(struct brw_reg dst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1,
- struct brw_reg src2)
+static void
+generate_gs_ff_sync_set_primitives(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1,
+ struct brw_reg src2)
{
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
@@ -773,11 +659,12 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_ff_sync(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_gs_ff_sync(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* This opcode uses an implied MRF register for:
* - the header of the ff_sync message. And as such it is expected to be
@@ -819,8 +706,8 @@ vec4_generator::generate_gs_ff_sync(vec4_instruction *inst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_set_primitive_id(struct brw_reg dst)
+static void
+generate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst)
{
/* In gen6, PrimitiveID is delivered in R0.1 of the payload */
struct brw_reg src = brw_vec8_grf(0, 0);
@@ -831,13 +718,14 @@ vec4_generator::generate_gs_set_primitive_id(struct brw_reg dst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
- struct brw_reg index)
+static void
+generate_oword_dual_block_offsets(struct brw_codegen *p,
+ struct brw_reg m1,
+ struct brw_reg index)
{
int second_vertex_offset;
- if (devinfo->gen >= 6)
+ if (p->devinfo->gen >= 6)
second_vertex_offset = 1;
else
second_vertex_offset = 16;
@@ -868,8 +756,9 @@ vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_unpack_flags(struct brw_reg dst)
+static void
+generate_unpack_flags(struct brw_codegen *p,
+ struct brw_reg dst)
{
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
@@ -886,16 +775,18 @@ vec4_generator::generate_unpack_flags(struct brw_reg dst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_scratch_read(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index)
+static void
+generate_scratch_read(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index)
{
+ const struct brw_device_info *devinfo = p->devinfo;
struct brw_reg header = brw_vec8_grf(0, 0);
gen6_resolve_implied_move(p, &header, inst->base_mrf);
- generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+ generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1),
index);
uint32_t msg_type;
@@ -914,7 +805,7 @@ vec4_generator::generate_scratch_read(vec4_instruction *inst,
brw_set_dest(p, send, dst);
brw_set_src0(p, send, header);
if (devinfo->gen < 6)
- brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf);
+ brw_inst_set_cond_modifier(devinfo, send, inst->base_mrf);
brw_set_dp_read_message(p, send,
255, /* binding table index: stateless access */
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
@@ -925,12 +816,14 @@ vec4_generator::generate_scratch_read(vec4_instruction *inst,
1 /* rlen */);
}
-void
-vec4_generator::generate_scratch_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index)
+static void
+generate_scratch_write(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg index)
{
+ const struct brw_device_info *devinfo = p->devinfo;
struct brw_reg header = brw_vec8_grf(0, 0);
bool write_commit;
@@ -941,7 +834,7 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst,
gen6_resolve_implied_move(p, &header, inst->base_mrf);
- generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+ generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1),
index);
brw_MOV(p,
@@ -998,12 +891,15 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst,
write_commit);
}
-void
-vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset)
+static void
+generate_pull_constant_load(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index,
+ struct brw_reg offset)
{
+ const struct brw_device_info *devinfo = p->devinfo;
assert(index.file == BRW_IMMEDIATE_VALUE &&
index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.dw1.ud;
@@ -1044,13 +940,15 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
brw_mark_surface_used(&prog_data->base, surf_index);
}
-void
-vec4_generator::generate_get_buffer_size(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg surf_index)
+static void
+generate_get_buffer_size(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg surf_index)
{
- assert(devinfo->gen >= 7);
+ assert(p->devinfo->gen >= 7);
assert(surf_index.type == BRW_REGISTER_TYPE_UD &&
surf_index.file == BRW_IMMEDIATE_VALUE);
@@ -1070,11 +968,13 @@ vec4_generator::generate_get_buffer_size(vec4_instruction *inst,
brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
}
-void
-vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg surf_index,
- struct brw_reg offset)
+static void
+generate_pull_constant_load_gen7(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset)
{
assert(surf_index.type == BRW_REGISTER_TYPE_UD);
@@ -1131,9 +1031,10 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
}
}
-void
-vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst,
- struct brw_reg dst)
+static void
+generate_set_simd4x2_header_gen9(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst)
{
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
@@ -1148,9 +1049,18 @@ vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
+static void
+generate_code(struct brw_codegen *p,
+ const struct brw_compiler *compiler,
+ void *log_data,
+ const nir_shader *nir,
+ struct brw_vue_prog_data *prog_data,
+ const struct cfg_t *cfg)
{
+ const struct brw_device_info *devinfo = p->devinfo;
+ const char *stage_abbrev = _mesa_shader_stage_to_abbrev(nir->stage);
+ bool debug_flag = INTEL_DEBUG &
+ intel_debug_flag_for_shader_stage(nir->stage);
struct annotation_info annotation;
memset(&annotation, 0, sizeof(annotation));
int loop_count = 0;
@@ -1391,9 +1301,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
gen6_math(p, dst, brw_math_function(inst->opcode), src[0],
brw_null_reg());
} else if (devinfo->gen == 6) {
- generate_math_gen6(inst, dst, src[0], brw_null_reg());
+ generate_math_gen6(p, inst, dst, src[0], brw_null_reg());
} else {
- generate_math1_gen4(inst, dst, src[0]);
+ generate_math1_gen4(p, inst, dst, src[0]);
}
break;
@@ -1404,9 +1314,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
if (devinfo->gen >= 7) {
gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
} else if (devinfo->gen == 6) {
- generate_math_gen6(inst, dst, src[0], src[1]);
+ generate_math_gen6(p, inst, dst, src[0], src[1]);
} else {
- generate_math2_gen4(inst, dst, src[0], src[1]);
+ generate_math2_gen4(p, inst, dst, src[0], src[1]);
}
break;
@@ -1420,92 +1330,92 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
case SHADER_OPCODE_SAMPLEINFO:
- generate_tex(inst, dst, src[0], src[1]);
+ generate_tex(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_URB_WRITE:
- generate_vs_urb_write(inst);
+ generate_vs_urb_write(p, inst);
break;
case SHADER_OPCODE_GEN4_SCRATCH_READ:
- generate_scratch_read(inst, dst, src[0]);
+ generate_scratch_read(p, inst, dst, src[0]);
break;
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
- generate_scratch_write(inst, dst, src[0], src[1]);
+ generate_scratch_write(p, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_PULL_CONSTANT_LOAD:
- generate_pull_constant_load(inst, dst, src[0], src[1]);
+ generate_pull_constant_load(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
- generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
+ generate_pull_constant_load_gen7(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
- generate_set_simd4x2_header_gen9(inst, dst);
+ generate_set_simd4x2_header_gen9(p, inst, dst);
break;
case VS_OPCODE_GET_BUFFER_SIZE:
- generate_get_buffer_size(inst, dst, src[0], src[1]);
+ generate_get_buffer_size(p, prog_data, inst, dst, src[0], src[1]);
break;
case GS_OPCODE_URB_WRITE:
- generate_gs_urb_write(inst);
+ generate_gs_urb_write(p, inst);
break;
case GS_OPCODE_URB_WRITE_ALLOCATE:
- generate_gs_urb_write_allocate(inst);
+ generate_gs_urb_write_allocate(p, inst);
break;
case GS_OPCODE_SVB_WRITE:
- generate_gs_svb_write(inst, dst, src[0], src[1]);
+ generate_gs_svb_write(p, prog_data, inst, dst, src[0], src[1]);
break;
case GS_OPCODE_SVB_SET_DST_INDEX:
- generate_gs_svb_set_destination_index(inst, dst, src[0]);
+ generate_gs_svb_set_destination_index(p, inst, dst, src[0]);
break;
case GS_OPCODE_THREAD_END:
- generate_gs_thread_end(inst);
+ generate_gs_thread_end(p, inst);
break;
case GS_OPCODE_SET_WRITE_OFFSET:
- generate_gs_set_write_offset(dst, src[0], src[1]);
+ generate_gs_set_write_offset(p, dst, src[0], src[1]);
break;
case GS_OPCODE_SET_VERTEX_COUNT:
- generate_gs_set_vertex_count(dst, src[0]);
+ generate_gs_set_vertex_count(p, dst, src[0]);
break;
case GS_OPCODE_FF_SYNC:
- generate_gs_ff_sync(inst, dst, src[0], src[1]);
+ generate_gs_ff_sync(p, inst, dst, src[0], src[1]);
break;
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
- generate_gs_ff_sync_set_primitives(dst, src[0], src[1], src[2]);
+ generate_gs_ff_sync_set_primitives(p, dst, src[0], src[1], src[2]);
break;
case GS_OPCODE_SET_PRIMITIVE_ID:
- generate_gs_set_primitive_id(dst);
+ generate_gs_set_primitive_id(p, dst);
break;
case GS_OPCODE_SET_DWORD_2:
- generate_gs_set_dword_2(dst, src[0]);
+ generate_gs_set_dword_2(p, dst, src[0]);
break;
case GS_OPCODE_PREPARE_CHANNEL_MASKS:
- generate_gs_prepare_channel_masks(dst);
+ generate_gs_prepare_channel_masks(p, dst);
break;
case GS_OPCODE_SET_CHANNEL_MASKS:
- generate_gs_set_channel_masks(dst, src[0]);
+ generate_gs_set_channel_masks(p, dst, src[0]);
break;
case GS_OPCODE_GET_INSTANCE_ID:
- generate_gs_get_instance_id(dst);
+ generate_gs_get_instance_id(p, dst);
break;
case SHADER_OPCODE_SHADER_TIME_ADD:
@@ -1564,7 +1474,7 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
break;
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
- generate_unpack_flags(dst);
+ generate_unpack_flags(p, dst);
break;
case VEC4_OPCODE_MOV_BYTES: {
@@ -1677,17 +1587,6 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
before_size, after_size);
}
-const unsigned *
-vec4_generator::generate_assembly(const cfg_t *cfg,
- unsigned *assembly_size,
- const nir_shader *nir)
-{
- brw_set_default_access_mode(p, BRW_ALIGN_16);
- generate_code(cfg, nir);
-
- return brw_get_program(p, assembly_size);
-}
-
extern "C" const unsigned *
brw_vec4_generate_assembly(const struct brw_compiler *compiler,
void *log_data,
@@ -1697,14 +1596,11 @@ brw_vec4_generate_assembly(const struct brw_compiler *compiler,
const struct cfg_t *cfg,
unsigned *out_assembly_size)
{
- const char *stage_name = _mesa_shader_stage_to_string(nir->stage);
- const char *stage_abbrev = _mesa_shader_stage_to_abbrev(nir->stage);
- bool debug_flag = INTEL_DEBUG &
- intel_debug_flag_for_shader_stage(nir->stage);
+ struct brw_codegen *p = rzalloc(mem_ctx, struct brw_codegen);
+ brw_init_codegen(compiler->devinfo, p, mem_ctx);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
- vec4_generator g(compiler, log_data, prog_data, mem_ctx,
- debug_flag, stage_name, stage_abbrev);
- return g.generate_assembly(cfg, out_assembly_size, nir);
+ generate_code(p, compiler, log_data, nir, prog_data, cfg);
+
+ return brw_get_program(p, out_assembly_size);
}
-
-} /* namespace brw */
From 744cc036b9734da13be85183e668536a69b7d224 Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Fri, 30 Oct 2015 10:39:13 +1000
Subject: [PATCH 169/266] r600: enable SB for geom shaders on pre-evergreen
I've checked with piglit and one tests fails, but it fails
on evergreen as well, so will get fixed later.
Otherwise SB seems to be working fine for geom shaders on my
rv635.
Signed-off-by: Dave Airlie
---
src/gallium/drivers/r600/r600_shader.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 50c03297c1c..fc6335ae8bc 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -162,10 +162,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
goto error;
}
- /* disable SB for geom shaders on R6xx/R7xx due to some mysterious gs piglit regressions with it enabled. */
- if (rctx->b.chip_class <= R700) {
- use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
- }
/* disable SB for shaders using doubles */
use_sb &= !shader->shader.uses_doubles;
From 11a7b6bbaaa8790f580ccdd99ac1798629df2041 Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Fri, 23 Oct 2015 23:13:07 -0700
Subject: [PATCH 170/266] i965/vec4: Remove unnecessary #includes from the
generator.
Reviewed-by: Iago Toral Quiroga
---
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 8 --------
1 file changed, 8 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index bd816e3926f..c704e238142 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -20,18 +20,10 @@
* IN THE SOFTWARE.
*/
-#include
#include "glsl/glsl_parser_extras.h"
#include "brw_vec4.h"
#include "brw_cfg.h"
-extern "C" {
-#include "brw_eu.h"
-#include "main/macros.h"
-#include "program/prog_print.h"
-#include "program/prog_parameter.h"
-};
-
using namespace brw;
static void
From fa142773d9a7d3249396fe2547da24eaf58962c1 Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Fri, 23 Oct 2015 23:15:03 -0700
Subject: [PATCH 171/266] i965/vec4: Drop brw_set_default_* before popping insn
state.
Reviewed-by: Ben Widawsky
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 3 ---
1 file changed, 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index c704e238142..586f45ed260 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -367,7 +367,6 @@ generate_gs_urb_write_allocate(struct brw_codegen *p, vec4_instruction *inst)
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, get_element_ud(inst->dst.fixed_hw_reg, 0),
get_element_ud(inst->src[0].fixed_hw_reg, 0));
- brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
@@ -424,7 +423,6 @@ generate_gs_set_write_offset(struct brw_codegen *p,
brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
retype(src1, BRW_REGISTER_TYPE_UW));
}
- brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
@@ -458,7 +456,6 @@ generate_gs_set_vertex_count(struct brw_codegen *p,
brw_MOV(p,
suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
- brw_set_default_access_mode(p, BRW_ALIGN_16);
}
brw_pop_insn_state(p);
}
From e09e5f992eb233a4e2afb505e150befd7a67deac Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Sun, 25 Oct 2015 17:20:54 -0700
Subject: [PATCH 172/266] i965: Set correct field for indirect align16 addrimm.
This has been wrong since the initial import of the i965 driver.
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index ebd811f9674..df4859086dd 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -410,7 +410,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset);
} else {
- brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.dw1.bits.indirect_offset);
+ brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset);
}
}
From 5916b073f6b60b66e86a71b11cc3dc856d780144 Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Sun, 25 Oct 2015 17:44:59 -0700
Subject: [PATCH 173/266] i965/disasm: Remove unused _addr_mode argument from
src_ia1().
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_disasm.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 35cf99efc10..7cbdbafa536 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -908,7 +908,6 @@ src_ia1(FILE *file,
unsigned _addr_subreg_nr,
unsigned _negate,
unsigned __abs,
- unsigned _addr_mode,
unsigned _horiz_stride, unsigned _width, unsigned _vert_stride)
{
int err = 0;
@@ -1143,7 +1142,6 @@ src0(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst)
brw_inst_src0_ia_subreg_nr(devinfo, inst),
brw_inst_src0_negate(devinfo, inst),
brw_inst_src0_abs(devinfo, inst),
- brw_inst_src0_address_mode(devinfo, inst),
brw_inst_src0_hstride(devinfo, inst),
brw_inst_src0_width(devinfo, inst),
brw_inst_src0_vstride(devinfo, inst));
@@ -1200,7 +1198,6 @@ src1(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst)
brw_inst_src1_ia_subreg_nr(devinfo, inst),
brw_inst_src1_negate(devinfo, inst),
brw_inst_src1_abs(devinfo, inst),
- brw_inst_src1_address_mode(devinfo, inst),
brw_inst_src1_hstride(devinfo, inst),
brw_inst_src1_width(devinfo, inst),
brw_inst_src1_vstride(devinfo, inst));
From 9115fa1d132b3ed38180f05c303f9190cde23878 Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Wed, 28 Oct 2015 21:11:46 -0700
Subject: [PATCH 174/266] i965/cfg: Handle no-idom case in
cfg_t::dump_domtree().
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_cfg.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index 10bcd4bafd4..5d46615bc7b 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -528,7 +528,9 @@ cfg_t::dump_domtree()
{
printf("digraph DominanceTree {\n");
foreach_block(block, this) {
- printf("\t%d -> %d\n", block->idom->num, block->num);
+ if (block->idom) {
+ printf("\t%d -> %d\n", block->idom->num, block->num);
+ }
}
printf("}\n");
}
From 8c4151b866181198cb850137a6b65052e79554b1 Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Thu, 29 Oct 2015 10:29:55 -0700
Subject: [PATCH 175/266] i965/fs: Use group(4, 0) to emit an exec-size 4 MOV.
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 5a82b041370..c40ca9177ec 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2642,8 +2642,9 @@ fs_visitor::emit_repclear_shader()
int base_mrf = 1;
int color_mrf = base_mrf + 2;
- fs_inst *mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
- fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+ fs_inst *mov = bld.exec_all().group(4, 0)
+ .MOV(brw_message_reg(color_mrf),
+ fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
fs_inst *write;
if (key->nr_color_regions == 1) {
From ee46c1e6261584326e9153a22861a16778803506 Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Wed, 28 Oct 2015 21:19:52 -0700
Subject: [PATCH 176/266] i965/vec4: Test against BRW_IMMEDIATE_VALUE, not IMM.
No functional change, since they were both 3, but BRW_IMMEDIATE_VALUE is
the hardware value and IMM was the IR value -- and you can see that
BRW_IMMEDIATE_VALUE was correctly used in the context of this patch.
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 586f45ed260..d74b1b2939d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -416,7 +416,7 @@ generate_gs_set_write_offset(struct brw_codegen *p,
src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_UD &&
src1.dw1.ud <= USHRT_MAX);
- if (src0.file == IMM) {
+ if (src0.file == BRW_IMMEDIATE_VALUE) {
brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3),
brw_imm_ud(src0.dw1.ud * src1.dw1.ud));
} else {
From bcdf664682ea027b38e3c610e4e038956b1bb0de Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Mon, 26 Oct 2015 04:09:35 -0700
Subject: [PATCH 177/266] i965: Test fixed_hw_reg.file against
BRW_IMMEDIATE_VALUE, not IMM.
No functional change, since they were both 3, but BRW_IMMEDIATE_VALUE is
the hardware value and IMM was the IR value -- and you can see that
BRW_IMMEDIATE_VALUE was correctly used in the context of this patch.
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index b710c60148c..cd1f21e1253 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -822,7 +822,7 @@ fs_instruction_scheduler::calculate_deps()
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != IMM)) {
+ inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
assert(inst->src[i].file != MRF);
add_barrier_deps(n);
}
@@ -951,7 +951,7 @@ fs_instruction_scheduler::calculate_deps()
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != IMM)) {
+ inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
assert(inst->src[i].file != MRF);
add_barrier_deps(n);
}
@@ -1080,7 +1080,7 @@ vec4_instruction_scheduler::calculate_deps()
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != IMM)) {
+ inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
/* No reads from MRF, and ATTR is already translated away */
assert(inst->src[i].file != MRF &&
inst->src[i].file != ATTR);
@@ -1177,7 +1177,7 @@ vec4_instruction_scheduler::calculate_deps()
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != IMM)) {
+ inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
assert(inst->src[i].file != MRF &&
inst->src[i].file != ATTR);
add_barrier_deps(n);
From 18b194925f5f80eccb53e07609083049b981d60c Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Sun, 25 Oct 2015 19:16:39 -0700
Subject: [PATCH 178/266] i965: Print the type and writemask on null
destinations.
These are often useful in debugging, and the writemask (actually
"Channel Enables") determines more than just what goes into the
destination.
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_disasm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 7cbdbafa536..df747107188 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -726,7 +726,7 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr)
switch (_reg_nr & 0xf0) {
case BRW_ARF_NULL:
string(file, "null");
- return -1;
+ break;
case BRW_ARF_ADDRESS:
format(file, "a%d", _reg_nr & 0x0f);
break;
From 93268939e44fec7065ceda737fbe6a063203d127 Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Sun, 25 Oct 2015 19:05:56 -0700
Subject: [PATCH 179/266] i965: Add INTEL_DEBUG=hex to print the hex with the
disassembly.
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_eu.c | 2 +-
src/mesa/drivers/dri/i965/intel_debug.c | 1 +
src/mesa/drivers/dri/i965/intel_debug.h | 1 +
3 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index 1f4a3516fa2..40ec87d38f0 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -261,7 +261,7 @@ void
brw_disassemble(const struct brw_device_info *devinfo,
void *assembly, int start, int end, FILE *out)
{
- bool dump_hex = false;
+ bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0;
for (int offset = start; offset < end;) {
brw_inst *insn = assembly + offset;
diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
index f7c02c8a38d..31821affa21 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -73,6 +73,7 @@ static const struct debug_control debug_control[] = {
{ "spill_fs", DEBUG_SPILL_FS },
{ "spill_vec4", DEBUG_SPILL_VEC4 },
{ "cs", DEBUG_CS },
+ { "hex", DEBUG_HEX },
{ NULL, 0 }
};
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h
index 0a6e1b90b98..25ea32c6771 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -67,6 +67,7 @@ extern uint64_t INTEL_DEBUG;
#define DEBUG_SPILL_FS (1ull << 31)
#define DEBUG_SPILL_VEC4 (1ull << 32)
#define DEBUG_CS (1ull << 33)
+#define DEBUG_HEX (1ull << 34)
#ifdef HAVE_ANDROID_PLATFORM
#define LOG_TAG "INTEL-MESA"
From 85ee2f7fcf59991308632d7e3a64e9a1d22fe24c Mon Sep 17 00:00:00 2001
From: Matt Turner
Date: Thu, 29 Oct 2015 16:08:45 -0700
Subject: [PATCH 180/266] i965: Add INTEL_DEBUG=nocompact to disable
instruction compaction.
Reviewed-by: Kenneth Graunke
---
src/mesa/drivers/dri/i965/brw_eu_compact.c | 3 +++
src/mesa/drivers/dri/i965/intel_debug.c | 1 +
src/mesa/drivers/dri/i965/intel_debug.h | 1 +
3 files changed, 5 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index f787ea3d4f8..07ace6bfbcb 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -1407,6 +1407,9 @@ void
brw_compact_instructions(struct brw_codegen *p, int start_offset,
int num_annotations, struct annotation *annotation)
{
+ if (unlikely(INTEL_DEBUG & DEBUG_NO_COMPACTION))
+ return;
+
const struct brw_device_info *devinfo = p->devinfo;
void *store = p->store + start_offset / 16;
/* For an instruction at byte offset 16*i before compaction, this is the
diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
index 31821affa21..c00d2e786f3 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -74,6 +74,7 @@ static const struct debug_control debug_control[] = {
{ "spill_vec4", DEBUG_SPILL_VEC4 },
{ "cs", DEBUG_CS },
{ "hex", DEBUG_HEX },
+ { "nocompact", DEBUG_NO_COMPACTION },
{ NULL, 0 }
};
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h
index 25ea32c6771..98bd7e93956 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -68,6 +68,7 @@ extern uint64_t INTEL_DEBUG;
#define DEBUG_SPILL_VEC4 (1ull << 32)
#define DEBUG_CS (1ull << 33)
#define DEBUG_HEX (1ull << 34)
+#define DEBUG_NO_COMPACTION (1ull << 35)
#ifdef HAVE_ANDROID_PLATFORM
#define LOG_TAG "INTEL-MESA"
From f768eaa87d3413610df645dcc94b06fa1fbe0005 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Thu, 29 Oct 2015 22:18:25 -0400
Subject: [PATCH 181/266] nv50: allow per-sample interpolation to be forced via
rast
Uses the same technique as for nvc0 of fixups before upload, and
evicting in case of state change. Removes one source of variants kept by
st/mesa.
Signed-off-by: Ilia Mirkin
---
.../drivers/nouveau/codegen/nv50_ir_driver.h | 2 +-
.../nouveau/codegen/nv50_ir_emit_nv50.cpp | 26 +++++++++++++++++++
.../nouveau/codegen/nv50_ir_from_tgsi.cpp | 4 +--
.../drivers/nouveau/nv50/nv50_program.c | 6 ++++-
.../drivers/nouveau/nv50/nv50_program.h | 3 ++-
.../drivers/nouveau/nv50/nv50_screen.c | 2 +-
.../drivers/nouveau/nv50/nv50_shader_state.c | 16 ++++++++++--
.../nouveau/nv50/nv50_state_validate.c | 2 +-
8 files changed, 52 insertions(+), 9 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 391130e67af..c0cab3299b5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -144,6 +144,7 @@ struct nv50_ir_prog_info
bool earlyFragTests;
bool separateFragData;
bool usesDiscard;
+ bool sampleInterp; /* perform sample interp on all fp inputs */
} fp;
struct {
uint32_t inputOffset; /* base address for user args */
@@ -168,7 +169,6 @@ struct nv50_ir_prog_info
int8_t viewportId; /* output index of ViewportIndex */
uint8_t fragDepth; /* output index of FragDepth */
uint8_t sampleMask; /* output index of SampleMask */
- bool sampleInterp; /* perform sample interp on all fp inputs */
uint8_t backFaceColor[2]; /* input/output indices of back face colour */
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
bool fp64; /* program uses fp64 math */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 90147668c91..e9b1cef2aa0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -876,6 +876,30 @@ CodeEmitterNV50::emitPFETCH(const Instruction *i)
emitFlagsRd(i);
}
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ int ipa = entry->ipa;
+ int encSize = entry->reg;
+ int loc = entry->loc;
+
+ if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ if (force_persample_interp) {
+ if (encSize == 8)
+ code[loc + 1] |= 1 << 16;
+ else
+ code[loc + 0] |= 1 << 24;
+ } else {
+ if (encSize == 8)
+ code[loc + 1] &= ~(1 << 16);
+ else
+ code[loc + 0] &= ~(1 << 24);
+ }
+ }
+}
+
void
CodeEmitterNV50::emitINTERP(const Instruction *i)
{
@@ -904,6 +928,8 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
code[0] |= 1;
emitFlagsRd(i);
}
+
+ addInterp(i->ipa, i->encSize, interpApply);
}
void
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index eb28622eb59..0031b5e15e8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1125,7 +1125,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
break;
case TGSI_SEMANTIC_SAMPLEID:
case TGSI_SEMANTIC_SAMPLEPOS:
- info->io.sampleInterp = 1;
+ info->prop.fp.sampleInterp = 1;
break;
default:
break;
@@ -1478,7 +1478,7 @@ Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
? OP_PINTERP : OP_LINTERP;
- if (var->centroid || info->io.sampleInterp)
+ if (var->centroid || info->prop.fp.sampleInterp)
mode |= NV50_IR_INTERP_CENTROID;
return mode;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index eff4477472c..26ba2c2c380 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -336,7 +336,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
info->io.ucpCBSlot = 15;
info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
info->io.genUserClip = prog->vp.clpd_nr;
- info->io.sampleInterp = prog->fp.sample_interp;
info->io.resInfoCBSlot = 15;
info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
@@ -374,6 +373,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
prog->code = info->bin.code;
prog->code_size = info->bin.codeSize;
prog->fixups = info->bin.relocData;
+ prog->interps = info->bin.interpData;
prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
prog->tls_space = info->bin.tlsSpace;
@@ -456,6 +456,10 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
if (prog->fixups)
nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
+ if (prog->interps)
+ nv50_ir_change_interp(prog->interps, prog->code,
+ prog->fp.force_persample_interp,
+ false /* flatshade */);
nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
(prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index f4e8e9402ca..24cc96567d7 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -86,7 +86,7 @@ struct nv50_program {
uint32_t interp; /* 0x1988 */
uint32_t colors; /* 0x1904 */
uint8_t has_samplemask;
- uint8_t sample_interp;
+ uint8_t force_persample_interp;
} fp;
struct {
@@ -99,6 +99,7 @@ struct nv50_program {
} gp;
void *fixups; /* relocation records */
+ void *interps; /* interpolation records */
struct nouveau_heap *mem;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index bcad90128d2..398abfb97ba 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -192,6 +192,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return class_3d >= NVA3_3D_CLASS;
/* unsupported caps */
@@ -216,7 +217,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
- case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index 941555ffbf8..b3fece07c3f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -168,11 +168,23 @@ nv50_fragprog_validate(struct nv50_context *nv50)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_program *fp = nv50->fragprog;
+ struct pipe_rasterizer_state *rast = &nv50->rast->pipe;
- fp->fp.sample_interp = nv50->min_samples > 1;
+ if (fp->fp.force_persample_interp != rast->force_persample_interp) {
+ /* Force the program to be reuploaded, which will trigger interp fixups
+ * to get applied
+ */
+ if (fp->mem)
+ nouveau_heap_free(&fp->mem);
+
+ fp->fp.force_persample_interp = rast->force_persample_interp;
+ }
+
+ if (fp->mem && !(nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_MIN_SAMPLES)))
+ return;
if (!nv50_program_validate(nv50, fp))
- return;
+ return;
nv50_program_update_context_state(nv50, fp, 1);
BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 66dcf43533b..b6181edf24f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -487,7 +487,7 @@ static struct state_validate {
{ nv50_validate_viewport, NV50_NEW_VIEWPORT },
{ nv50_vertprog_validate, NV50_NEW_VERTPROG },
{ nv50_gmtyprog_validate, NV50_NEW_GMTYPROG },
- { nv50_fragprog_validate, NV50_NEW_FRAGPROG |
+ { nv50_fragprog_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
NV50_NEW_MIN_SAMPLES },
{ nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER },
From 06fa2e864acea8f34eb3821523b1924fe8efdc9b Mon Sep 17 00:00:00 2001
From: Ilia Mirkin
Date: Thu, 29 Oct 2015 23:25:08 -0400
Subject: [PATCH 182/266] nv50: mark contexts shareable, compile at creation
time
Signed-off-by: Ilia Mirkin
---
src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +-
src/gallium/drivers/nouveau/nv50/nv50_state.c | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 398abfb97ba..a9e0c478322 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -181,6 +181,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -217,7 +218,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
- case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 410e6311e60..671c8ab629c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -725,6 +725,9 @@ nv50_sp_state_create(struct pipe_context *pipe,
if (cso->stream_output.num_outputs)
prog->pipe.stream_output = cso->stream_output;
+ prog->translated = nv50_program_translate(
+ prog, nv50_context(pipe)->screen->base.device->chipset);
+
return (void *)prog;
}
From 04c42f3ab56a19089b46dea48615aeef8b8225da Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Tue, 28 Jul 2015 11:35:03 -0700
Subject: [PATCH 183/266] vc4: Allow user index buffers, to avoid slow readback
for shadow IBs.
Improves low-settings openarena performance by 31.9975% +/- 0.659931%
(n=7).
---
src/gallium/drivers/vc4/vc4_draw.c | 13 +++++++++++--
src/gallium/drivers/vc4/vc4_resource.c | 18 ++++++++++++------
src/gallium/drivers/vc4/vc4_screen.c | 2 +-
src/gallium/drivers/vc4/vc4_state.c | 2 +-
4 files changed, 25 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index da81599fcc3..624a236c573 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -25,6 +25,7 @@
#include "util/u_prim.h"
#include "util/u_format.h"
#include "util/u_pack_color.h"
+#include "util/u_upload_mgr.h"
#include "indices/u_primconvert.h"
#include "vc4_context.h"
@@ -319,7 +320,15 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
info->count, &offset);
index_size = 2;
} else {
- prsc = vc4->indexbuf.buffer;
+ if (vc4->indexbuf.user_buffer) {
+ prsc = NULL;
+ u_upload_data(vc4->uploader, 0,
+ info->count * index_size,
+ vc4->indexbuf.user_buffer,
+ &offset, &prsc);
+ } else {
+ prsc = vc4->indexbuf.buffer;
+ }
}
struct vc4_resource *rsc = vc4_resource(prsc);
@@ -334,7 +343,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset);
cl_u32(&bcl, vc4->max_index);
- if (vc4->indexbuf.index_size == 4)
+ if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer)
pipe_resource_reference(&prsc, NULL);
} else {
cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index 5d5166fd818..122bda0bac6 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -667,11 +667,16 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx,
shadow_offset, &shadow_rsc, &data);
uint16_t *dst = data;
- struct pipe_transfer *src_transfer;
- uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b,
- ib->offset,
- count * 4,
- PIPE_TRANSFER_READ, &src_transfer);
+ struct pipe_transfer *src_transfer = NULL;
+ uint32_t *src;
+ if (ib->user_buffer) {
+ src = ib->user_buffer;
+ } else {
+ src = pipe_buffer_map_range(pctx, &orig->base.b,
+ ib->offset,
+ count * 4,
+ PIPE_TRANSFER_READ, &src_transfer);
+ }
for (int i = 0; i < count; i++) {
uint32_t src_index = src[i];
@@ -679,7 +684,8 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx,
dst[i] = src_index;
}
- pctx->transfer_unmap(pctx, src_transfer);
+ if (src_transfer)
+ pctx->transfer_unmap(pctx, src_transfer);
return shadow_rsc;
}
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 432b1d1b86e..bb867611804 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -94,6 +94,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_SHADOW_MAP:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_USER_INDEX_BUFFERS:
return 1;
/* lying for GL 2.0 */
@@ -152,7 +153,6 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
- case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index 147694644f0..78aa344ab1d 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -305,10 +305,10 @@ vc4_set_index_buffer(struct pipe_context *pctx,
struct vc4_context *vc4 = vc4_context(pctx);
if (ib) {
- assert(!ib->user_buffer);
pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer);
vc4->indexbuf.index_size = ib->index_size;
vc4->indexbuf.offset = ib->offset;
+ vc4->indexbuf.user_buffer = ib->user_buffer;
} else {
pipe_resource_reference(&vc4->indexbuf.buffer, NULL);
}
From 6f231fddff1661c2ca2cfb7bb7a0e6a970bcbf40 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Fri, 5 Jun 2015 19:20:57 -0400
Subject: [PATCH 184/266] i965: fix cycle estimates when there's a pipeline
stall
The issue time for an instruction is how many cycles it takes to
actually put it into the pipeline. If there's a pipeline stall that
causes the instruction to be delayed, we should first take that into
account to figure out when the instruction would start executing and
*then* add the issue time. The old code had it backwards, and so we
would underestimate the total time whenever we thought there would be a
pipeline stall by up to the issue time of the instruction.
Reviewed-by: Jason Ekstrand
---
.../dri/i965/brw_schedule_instructions.cpp | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index cd1f21e1253..3e86cb05cce 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -1405,18 +1405,19 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
instructions_to_schedule--;
update_register_pressure(chosen->inst);
+ /* If we expected a delay for scheduling, then bump the clock to reflect
+ * that. In reality, the hardware will switch to another hyperthread
+ * and may not return to dispatching our thread for a while even after
+ * we're unblocked. After this, we have the time when the chosen
+ * instruction will start executing.
+ */
+ time = MAX2(time, chosen->unblocked_time);
+
/* Update the clock for how soon an instruction could start after the
* chosen one.
*/
time += issue_time(chosen->inst);
- /* If we expected a delay for scheduling, then bump the clock to reflect
- * that as well. In reality, the hardware will switch to another
- * hyperthread and may not return to dispatching our thread for a while
- * even after we're unblocked.
- */
- time = MAX2(time, chosen->unblocked_time);
-
if (debug) {
fprintf(stderr, "clock %4d, scheduled: ", time);
bs->dump_instruction(chosen->inst);
From 85fce2d2f50335b1d204dbaedc36cdd37164a9c0 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Sun, 7 Jun 2015 00:37:27 -0400
Subject: [PATCH 185/266] i965/sched: write-after-read dependencies are free
Although write-after-write dependencies have the same latency as
read-after-write dependencies due to how the register scoreboard works,
write-after-read dependencies aren't checked by the EU at all, so
they're purely a constraint on how the scheduler can order the
instructions.
v2: fix accumulator dependencies too.
Reviewed-by: Jason Ekstrand
---
.../drivers/dri/i965/brw_schedule_instructions.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 3e86cb05cce..094c47aba37 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -927,10 +927,10 @@ fs_instruction_scheduler::calculate_deps()
if (inst->src[i].file == GRF) {
if (post_reg_alloc) {
for (int r = 0; r < inst->regs_read(i); r++)
- add_dep(n, last_grf_write[inst->src[i].reg + r]);
+ add_dep(n, last_grf_write[inst->src[i].reg + r], 0);
} else {
for (int r = 0; r < inst->regs_read(i); r++) {
- add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r]);
+ add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r], 0);
}
}
} else if (inst->src[i].file == HW_REG &&
@@ -941,12 +941,12 @@ fs_instruction_scheduler::calculate_deps()
if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
size = 1;
for (int r = 0; r < size; r++)
- add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r]);
+ add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r], 0);
} else {
- add_dep(n, last_fixed_grf_write);
+ add_dep(n, last_fixed_grf_write, 0);
}
} else if (inst->src[i].is_accumulator()) {
- add_dep(n, last_accumulator_write);
+ add_dep(n, last_accumulator_write, 0);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
From 486268bdb03a36faf09d84e0458ff49dd1325c40 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Sat, 6 Jun 2015 13:32:21 -0400
Subject: [PATCH 186/266] i965: always run the post-RA scheduler
Before, we would only do scheduling after register allocation if we
spilled, despite the fact that the pre-RA scheduler was only supposed to
be for register pressure and set the latencies of every instruction to
1. This meant that unless we spilled, which we rarely do, then we never
considered instruction latencies at all, and we usually never bothered
to try and hide texture fetch latency. Although a later commit removes
the setting the latency to 1 part, we still want to always run the
post-RA scheduler since it's able to take the false dependencies that
the register allocator creates into account, and it can be more
aggressive than the pre-RA scheduler since it doesn't have to worry
about register pressure at all.
Test master post-ra-sched diff %diff
bench_OglPSBump2 396.730 402.386 5.656 +1.400%
bench_OglPSBump8 244.370 247.591 3.221 +1.300%
bench_OglPSPhong 241.117 242.002 0.885 +0.300%
bench_OglPSPom 59.555 59.725 0.170 +0.200%
bench_OglShMapPcf 86.149 102.346 16.197 +18.800%
bench_OglVSTangent 388.849 395.489 6.640 +1.700%
bench_trex 65.471 65.862 0.390 +0.500%
bench_trexoff 69.562 70.150 0.588 +0.800%
bench_heaven 25.179 25.254 0.074 +0.200%
Reviewed-by: Jason Ekstrand
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c40ca9177ec..d9e2f2c450f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4979,8 +4979,7 @@ fs_visitor::allocate_registers()
if (failed)
return;
- if (!allocated_without_spills)
- schedule_instructions(SCHEDULE_POST);
+ schedule_instructions(SCHEDULE_POST);
if (last_scratch > 0)
prog_data->total_scratch = brw_get_scratch_size(last_scratch);
From 45cd76e342d1e8ecea38e2048b96cf5be3a30fab Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Sat, 6 Jun 2015 10:55:21 -0400
Subject: [PATCH 187/266] i965: dump scheduling cycle estimates
The heuristic we're using is rather lame, since it assumes everything is
non-uniform and loops execute 10 times, but it should be enough for
measuring improvements in the scheduler that don't result in a change in
the number of instructions.
v2:
- Switch loops and cycle counts to be compatible with older shader-db.
- Make loop heuristic 10x to match with spilling code.
Reviewed-by: Jason Ekstrand
---
src/mesa/drivers/dri/i965/brw_cfg.h | 4 ++++
.../drivers/dri/i965/brw_fs_generator.cpp | 11 +++++-----
.../dri/i965/brw_schedule_instructions.cpp | 20 +++++++++++++++++++
.../drivers/dri/i965/brw_vec4_generator.cpp | 11 +++++-----
4 files changed, 36 insertions(+), 10 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h
index a06b0aa1cd0..69e39e8964d 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -90,6 +90,8 @@ struct bblock_t {
struct exec_list parents;
struct exec_list children;
int num;
+
+ unsigned cycle_count;
};
static inline struct backend_instruction *
@@ -285,6 +287,8 @@ struct cfg_t {
int num_blocks;
bool idom_dirty;
+
+ unsigned cycle_count;
};
/* Note that this is implemented with a double for loop -- break will
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 139d1dd17ec..58bd23f6cbc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -2269,9 +2269,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
if (unlikely(debug_flag)) {
fprintf(stderr, "Native code for %s\n"
- "SIMD%d shader: %d instructions. %d loops. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d"
+ "SIMD%d shader: %d instructions. %d loops. %u cycles. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d"
" bytes (%.0f%%)\n",
- shader_name, dispatch_width, before_size / 16, loop_count,
+ shader_name, dispatch_width, before_size / 16, loop_count, cfg->cycle_count,
spill_count, fill_count, promoted_constants, before_size, after_size,
100.0f * (before_size - after_size) / before_size);
@@ -2281,12 +2281,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
}
compiler->shader_debug_log(log_data,
- "%s SIMD%d shader: %d inst, %d loops, "
+ "%s SIMD%d shader: %d inst, %d loops, %u cycles, "
"%d:%d spills:fills, Promoted %u constants, "
"compacted %d to %d bytes.\n",
stage_abbrev, dispatch_width, before_size / 16,
- loop_count, spill_count, fill_count,
- promoted_constants, before_size, after_size);
+ loop_count, cfg->cycle_count, spill_count,
+ fill_count, promoted_constants, before_size,
+ after_size);
return start_offset;
}
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 094c47aba37..46da3251ea7 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -1467,6 +1467,24 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
if (block->end()->opcode == BRW_OPCODE_NOP)
block->end()->remove(block);
assert(instructions_to_schedule == 0);
+
+ block->cycle_count = time;
+}
+
+static unsigned get_cycle_count(cfg_t *cfg)
+{
+ unsigned count = 0, multiplier = 1;
+ foreach_block(block, cfg) {
+ if (block->start()->opcode == BRW_OPCODE_DO)
+ multiplier *= 10; /* assume that loops execute ~10 times */
+
+ count += block->cycle_count * multiplier;
+
+ if (block->end()->opcode == BRW_OPCODE_WHILE)
+ multiplier /= 10;
+ }
+
+ return count;
}
void
@@ -1507,6 +1525,8 @@ instruction_scheduler::run(cfg_t *cfg)
post_reg_alloc);
bs->dump_instructions();
}
+
+ cfg->cycle_count = get_cycle_count(cfg);
}
void
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index d74b1b2939d..8bc21df5ffc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1558,10 +1558,10 @@ generate_code(struct brw_codegen *p,
nir->info.label ? nir->info.label : "unnamed",
_mesa_shader_stage_to_string(nir->stage), nir->info.name);
- fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted %d to %d"
- " bytes (%.0f%%)\n",
+ fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles."
+ "Compacted %d to %d bytes (%.0f%%)\n",
stage_abbrev,
- before_size / 16, loop_count, before_size, after_size,
+ before_size / 16, loop_count, cfg->cycle_count, before_size, after_size,
100.0f * (before_size - after_size) / before_size);
dump_assembly(p->store, annotation.ann_count, annotation.ann,
@@ -1570,9 +1570,10 @@ generate_code(struct brw_codegen *p,
}
compiler->shader_debug_log(log_data,
- "%s vec4 shader: %d inst, %d loops, "
+ "%s vec4 shader: %d inst, %d loops, %u cycles, "
"compacted %d to %d bytes.\n",
- stage_abbrev, before_size / 16, loop_count,
+ stage_abbrev, before_size / 16,
+ loop_count, cfg->cycle_count,
before_size, after_size);
}
From c1860299b807c6a7c237962977294580f9f17c86 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Fri, 12 Jun 2015 12:01:35 -0700
Subject: [PATCH 188/266] i965/fs: split out calculation of payload live ranges
We'll need this for the scheduler too, since it wants to know when the
live ranges of payload registers end in order to model them in our
register pressure calculations.
Reviewed-by: Jason Ekstrand
---
src/mesa/drivers/dri/i965/brw_fs.h | 2 +
.../drivers/dri/i965/brw_fs_reg_allocate.cpp | 51 +++++++++++--------
2 files changed, 31 insertions(+), 22 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index d98769df4dc..8058b344b7a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -145,6 +145,8 @@ public:
void assign_vs_urb_setup();
bool assign_regs(bool allow_spilling);
void assign_regs_trivial();
+ void calculate_payload_ranges(int payload_node_count,
+ int *payload_last_use_ip);
void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
int first_payload_node);
int choose_spill_reg(struct ra_graph *g);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 36388fad98d..9251d9552a5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -330,32 +330,12 @@ count_to_loop_end(const bblock_t *block)
unreachable("not reached");
}
-/**
- * Sets up interference between thread payload registers and the virtual GRFs
- * to be allocated for program temporaries.
- *
- * We want to be able to reallocate the payload for our virtual GRFs, notably
- * because the setup coefficients for a full set of 16 FS inputs takes up 8 of
- * our 128 registers.
- *
- * The layout of the payload registers is:
- *
- * 0..payload.num_regs-1: fixed function setup (including bary coordinates).
- * payload.num_regs..payload.num_regs+curb_read_lengh-1: uniform data
- * payload.num_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients.
- *
- * And we have payload_node_count nodes covering these registers in order
- * (note that in SIMD16, a node is two registers).
- */
-void
-fs_visitor::setup_payload_interference(struct ra_graph *g,
- int payload_node_count,
- int first_payload_node)
+void fs_visitor::calculate_payload_ranges(int payload_node_count,
+ int *payload_last_use_ip)
{
int loop_depth = 0;
int loop_end_ip = 0;
- int payload_last_use_ip[payload_node_count];
for (int i = 0; i < payload_node_count; i++)
payload_last_use_ip[i] = -1;
@@ -426,6 +406,33 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
ip++;
}
+}
+
+
+/**
+ * Sets up interference between thread payload registers and the virtual GRFs
+ * to be allocated for program temporaries.
+ *
+ * We want to be able to reallocate the payload for our virtual GRFs, notably
+ * because the setup coefficients for a full set of 16 FS inputs takes up 8 of
+ * our 128 registers.
+ *
+ * The layout of the payload registers is:
+ *
+ * 0..payload.num_regs-1: fixed function setup (including bary coordinates).
+ * payload.num_regs..payload.num_regs+curb_read_lengh-1: uniform data
+ * payload.num_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients.
+ *
+ * And we have payload_node_count nodes covering these registers in order
+ * (note that in SIMD16, a node is two registers).
+ */
+void
+fs_visitor::setup_payload_interference(struct ra_graph *g,
+ int payload_node_count,
+ int first_payload_node)
+{
+ int payload_last_use_ip[payload_node_count];
+ calculate_payload_ranges(payload_node_count, payload_last_use_ip);
for (int i = 0; i < payload_node_count; i++) {
if (payload_last_use_ip[i] == -1)
From 73caa26e4319f4e0bbc0bf1d5d455ab0d53d20a3 Mon Sep 17 00:00:00 2001
From: Connor Abbott
Date: Tue, 9 Jun 2015 10:26:53 -0700
Subject: [PATCH 189/266] i965/sched: use liveness analysis for computing
register pressure
Previously, we were using some heuristics to try and detect when a write
was about to begin a live range, or when a read was about to end a live
range. We never used the liveness analysis information used by the
register allocator, though, which meant that the scheduler's and the
allocator's ideas of when a live range began and ended were different.
Not only did this make our estimate of the register pressure benefit of
scheduling an instruction wrong in some cases, but it was preventing us
from knowing the actual register pressure when scheduling each
instruction, which we want to have in order to switch to register
pressure scheduling only when the register pressure is too high.
This commit rewrites the register pressure tracking code to use the same
model as our register allocator currently uses. We use the results of
liveness analysis, as well as the compute_payload_ranges() function that
we split out in the last commit. This means that we compute live ranges
twice on each round through the register allocator, although we could
speed it up by only recomputing the ranges and not the live in/live out
sets after scheduling, since we only shuffle around instructions within
a single basic block when we schedule.
Shader-db results on bdw:
total instructions in shared programs: 7130187 -> 7129880 (-0.00%)
instructions in affected programs: 1744 -> 1437 (-17.60%)
helped: 1
HURT: 1
total cycles in shared programs: 172535126 -> 172473226 (-0.04%)
cycles in affected programs: 11338636 -> 11276736 (-0.55%)
helped: 876
HURT: 873
LOST: 8
GAINED: 0
v2: use regs_read() in more places.
Reviewed-by: Jason Ekstrand