ac/nir: support 16-bit data in buffer_load_format opcodes

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5003>
2020-05-04 10:43:07 -04:00 · 2020-05-04 10:43:07 -04:00 · c3e0ba52a0
parent b819ba949b
commit c3e0ba52a0
5 changed files with 18 additions and 7 deletions
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@ -1314,6 +1314,10 @@ ac_build_buffer_load_common(struct ac_llvm_context *ctx,
 	const char *indexing_kind = structurized ? "struct" : "raw";
 	char name[256], type_name[8];

+	/* D16 is only supported on gfx8+ */
+	assert((channel_type != ctx->f16 && channel_type != ctx->i16) ||
+	       ctx->chip_class >= GFX8);
+
 	LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
 	ac_build_type_name_for_intr(type, type_name, sizeof(type_name));

@ -1389,10 +1393,12 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
 					 LLVMValueRef voffset,
 					 unsigned num_channels,
 					 unsigned cache_policy,
-					 bool can_speculate)
+					 bool can_speculate,
+					 bool d16)
 {
 	return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset,
-					   ctx->i32_0, num_channels, ctx->f32,
+					   ctx->i32_0, num_channels,
+					   d16 ? ctx->f16 : ctx->f32,
 					   cache_policy, can_speculate,
 					   true, true);
 }
--- a/src/amd/llvm/ac_llvm_build.h
+++ b/src/amd/llvm/ac_llvm_build.h
@ -338,7 +338,8 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
 					 LLVMValueRef voffset,
 					 unsigned num_channels,
 					 unsigned cache_policy,
-					 bool can_speculate);
+					 bool can_speculate,
+					 bool d16);

 LLVMValueRef
 ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@ -1436,12 +1436,14 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
 	if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
 		unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);

+		assert(instr->dest.is_ssa);
 		return ac_build_buffer_load_format(&ctx->ac,
 			                           args->resource,
 			                           args->coords[0],
 			                           ctx->ac.i32_0,
 			                           util_last_bit(mask),
-			                           0, true);
+			                           0, true,
+						   instr->dest.ssa.bit_size == 16);
 	}

 	args->opcode = ac_image_sample;
@ -2782,11 +2784,13 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
 		vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 						 ctx->ac.i32_0, "");

+		assert(instr->dest.is_ssa);
 		bool can_speculate = access & ACCESS_CAN_REORDER;
 		res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
 						  ctx->ac.i32_0, num_channels,
 						  args.cache_policy,
-						  can_speculate);
+						  can_speculate,
+						  instr->dest.ssa.bit_size == 16);
 		res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);

 		res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
--- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
+++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
@ -460,7 +460,7 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx)
   if (key->opt.cs_indexed) {
      for (unsigned i = 0; i < 3; i++) {
         index[i] = ac_build_buffer_load_format(&ctx->ac, input_indexbuf, index[i], ctx->ac.i32_0,
-                                                1, 0, true);
+                                                1, 0, true, false);
         index[i] = ac_to_integer(&ctx->ac, index[i]);
      }
   }
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
@ -152,7 +152,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
   for (unsigned i = 0; i < num_fetches; ++i) {
      LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, fetch_stride * i, 0);
      fetches[i] = ac_build_buffer_load_format(&ctx->ac, vb_desc, vertex_index, voffset,
-                                               channels_per_fetch, 0, true);
+                                               channels_per_fetch, 0, true, false);
   }

   if (num_fetches == 1 && channels_per_fetch > 1) {