From c5c5cd7132e18f4aad8e73d8ee879f8823c4c1e7 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 22 Feb 2010 22:02:58 -0500 Subject: [PATCH 01/19] gallium/draw: initial code to properly support llvm in the draw module code generate big chunks of the vertex pipeline in order to speed up software vertex processing. --- src/gallium/auxiliary/SConscript | 2 + src/gallium/auxiliary/draw/draw_context.c | 26 +- src/gallium/auxiliary/draw/draw_context.h | 10 +- src/gallium/auxiliary/draw/draw_llvm.c | 311 +++++++++ src/gallium/auxiliary/draw/draw_llvm.h | 154 +++++ .../auxiliary/draw/draw_llvm_translate.c | 653 ++++++++++++++++++ src/gallium/auxiliary/draw/draw_private.h | 11 + src/gallium/auxiliary/draw/draw_pt.c | 4 +- src/gallium/auxiliary/draw/draw_pt.h | 1 + .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 432 ++++++++++++ src/gallium/auxiliary/draw/draw_vs_llvm.c | 9 +- src/gallium/drivers/llvmpipe/lp_context.c | 6 +- 12 files changed, 1601 insertions(+), 18 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_llvm.c create mode 100644 src/gallium/auxiliary/draw/draw_llvm.h create mode 100644 src/gallium/auxiliary/draw/draw_llvm_translate.c create mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index b531ad2dbd9..51c4a0cbbe3 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -194,6 +194,8 @@ if drawllvm: 'gallivm/lp_bld_swizzle.c', 'gallivm/lp_bld_tgsi_soa.c', 'gallivm/lp_bld_type.c', + 'draw/draw_llvm.c', + 'draw/draw_pt_fetch_shade_pipeline_llvm.c' ] gallium = env.ConvenienceLibrary( diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index d5ddc4a6a92..6fa73ad56ba 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -44,6 +44,18 @@ struct draw_context *draw_create( void ) if (draw == NULL) goto fail; + if (!draw_init(draw)) + goto fail; + + return draw; + +fail: + draw_destroy( draw ); + return NULL; +} + +boolean draw_init(struct draw_context *draw) +{ ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 ); ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 ); ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 ); @@ -57,22 +69,18 @@ struct draw_context *draw_create( void ) if (!draw_pipeline_init( draw )) - goto fail; + return FALSE; if (!draw_pt_init( draw )) - goto fail; + return FALSE; if (!draw_vs_init( draw )) - goto fail; + return FALSE; if (!draw_gs_init( draw )) - goto fail; + return FALSE; - return draw; - -fail: - draw_destroy( draw ); - return NULL; + return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index acd81b9712d..d42e4003183 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -40,6 +40,9 @@ #include "pipe/p_state.h" +#ifdef DRAW_LLVM +#include +#endif struct pipe_context; struct draw_context; @@ -197,6 +200,11 @@ boolean draw_need_pipeline(const struct draw_context *draw, const struct pipe_rasterizer_state *rasterizer, unsigned prim ); - +#ifdef DRAW_LLVM +/******************************************************************************* + * LLVM integration + */ +struct draw_context *draw_create_with_llvm(LLVMExecutionEngineRef engine); +#endif #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c new file mode 100644 index 00000000000..6b0ddfd064d --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -0,0 +1,311 @@ +#include "draw_llvm.h" + +#include "draw_context.h" +#include "draw_vs.h" + +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_interp.h" +#include "gallivm/lp_bld_struct.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_tgsi.h" + +#include "util/u_cpu_detect.h" + +#include + +static void +init_globals(struct draw_llvm *llvm) +{ + LLVMTypeRef vertex_header; + LLVMTypeRef texture_type; + + /* struct vertex_header */ + { + LLVMTypeRef elem_types[3]; + + elem_types[0] = LLVMIntType(32); + elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); + elem_types[2] = LLVMArrayType(elem_types[1], 0); + + vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0); + + /* these are bit-fields and we can't take address of them + LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_CLIPMASK); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_EDGEFLAG); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_PAD); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_VERTEX_ID); + */ + LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_CLIP); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_DATA); + + LP_CHECK_STRUCT_SIZE(struct vertex_header, + llvm->target, vertex_header); + + LLVMAddTypeName(llvm->module, "vertex_header", vertex_header); + + llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); + } + /* struct draw_jit_texture */ + { + LLVMTypeRef elem_types[4]; + + elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + + texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_DATA); + LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, + llvm->target, texture_type); + + LLVMAddTypeName(llvm->module, "texture", texture_type); + } + + + /* struct draw_jit_context */ + { + LLVMTypeRef elem_types[3]; + LLVMTypeRef context_type; + + elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ + elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ + elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + + context_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, + llvm->target, context_type, 0); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, + llvm->target, context_type, 1); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, + llvm->target, context_type, + DRAW_JIT_CONTEXT_TEXTURES_INDEX); + LP_CHECK_STRUCT_SIZE(struct draw_jit_context, + llvm->target, context_type); + + LLVMAddTypeName(llvm->module, "context", context_type); + + llvm->context_ptr_type = LLVMPointerType(context_type, 0); + } +} + +struct draw_llvm * +draw_llvm_create(struct draw_context *draw) +{ + struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm ); + + util_cpu_detect(); + + llvm->draw = draw; + llvm->engine = draw->engine; + + debug_assert(llvm->engine); + + llvm->module = LLVMModuleCreateWithName("draw_llvm"); + llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module); + + LLVMAddModuleProvider(llvm->engine, llvm->provider); + + llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine); + + llvm->pass = LLVMCreateFunctionPassManager(llvm->provider); + LLVMAddTargetData(llvm->target, llvm->pass); + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + /* TODO: Add more passes */ + LLVMAddConstantPropagationPass(llvm->pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(llvm->pass); + } + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); + LLVMAddGVNPass(llvm->pass); + LLVMAddCFGSimplificationPass(llvm->pass); + + init_globals(llvm); + + +#if 1 + LLVMDumpModule(llvm->module); +#endif + + return llvm; +} + +void +draw_llvm_destroy(struct draw_llvm *llvm) +{ + free(llvm); +} + +void +draw_llvm_prepare(struct draw_llvm *llvm) +{ +} + + +struct draw_context *draw_create_with_llvm(LLVMExecutionEngineRef engine) +{ + struct draw_context *draw = CALLOC_STRUCT( draw_context ); + if (draw == NULL) + goto fail; + draw->engine = engine; + + if (!draw_init(draw)) + goto fail; + + return draw; + +fail: + draw_destroy( draw ); + return NULL; +} + +static void +generate_vs(struct draw_llvm *llvm, + LLVMBuilderRef builder, + LLVMValueRef context_ptr, + LLVMValueRef io) +{ + const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; + struct lp_type vs_type = lp_type_float(32); + LLVMValueRef vs_consts; + const LLVMValueRef (*inputs)[NUM_CHANNELS]; + LLVMValueRef (*outputs)[NUM_CHANNELS]; + + lp_build_tgsi_soa(builder, + tokens, + vs_type, + NULL /*struct lp_build_mask_context *mask*/, + vs_consts, + NULL /*pos*/, + inputs, + outputs, + NULL/*sampler*/); +} + +void +draw_llvm_generate(struct draw_llvm *llvm) +{ + LLVMTypeRef arg_types[5]; + LLVMTypeRef func_type; + LLVMValueRef context_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef function; + LLVMValueRef start, end, count, stride, step; + LLVMValueRef io_ptr; + unsigned i; + unsigned chan; + struct lp_build_context bld; + struct lp_build_loop_state lp_loop; + struct lp_type vs_type = lp_type_float(32); + + arg_types[0] = llvm->context_ptr_type; /* context */ + arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ + arg_types[2] = LLVMInt32Type(); /* start */ + arg_types[3] = LLVMInt32Type(); /* count */ + arg_types[4] = LLVMInt32Type(); /* stride */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); + LLVMSetFunctionCallConv(function, LLVMCCallConv); + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(function, 0); + io_ptr = LLVMGetParam(function, 1); + start = LLVMGetParam(function, 2); + count = LLVMGetParam(function, 3); + stride = LLVMGetParam(function, 4); + + lp_build_name(context_ptr, "context"); + lp_build_name(io_ptr, "io"); + lp_build_name(start, "start"); + lp_build_name(count, "count"); + lp_build_name(stride, "stride"); + + /* + * Function body + */ + + block = LLVMAppendBasicBlock(function, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, builder, vs_type); + + end = lp_build_add(&bld, start, count); + + step = LLVMConstInt(LLVMInt32Type(), 1, 0); + lp_build_loop_begin(builder, start, &lp_loop); + { + LLVMValueRef io = LLVMBuildGEP(builder, io_ptr, &lp_loop.counter, 1, ""); + + generate_vs(llvm, + builder, + context_ptr, + io); + } + lp_build_loop_end(builder, end, step, &lp_loop); + + + LLVMBuildRetVoid(builder); + + LLVMDisposeBuilder(builder); + + /* + * Translate the LLVM IR into machine code. + */ + +#ifdef DEBUG + if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { + LLVMDumpValue(function); + assert(0); + } +#endif + + LLVMRunFunctionPassManager(llvm->pass, function); + + if (1) { + LLVMDumpValue(function); + debug_printf("\n"); + } + + llvm->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, function); + + if (1) + lp_disassemble(llvm->jit_func); +} diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h new file mode 100644 index 00000000000..0a1845f1fb5 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -0,0 +1,154 @@ +#ifndef DRAW_LLVM_H +#define DRAW_LLVM_H + +#include "draw/draw_private.h" + +#include "pipe/p_context.h" + +#include +#include +#include +#include + +struct draw_jit_texture +{ + uint32_t width; + uint32_t height; + uint32_t stride; + const void *data; +}; + +enum { + DRAW_JIT_TEXTURE_WIDTH = 0, + DRAW_JIT_TEXTURE_HEIGHT, + DRAW_JIT_TEXTURE_STRIDE, + DRAW_JIT_TEXTURE_DATA +}; + +enum { + DRAW_JIT_VERTEX_VERTEX_ID = 0, + DRAW_JIT_VERTEX_CLIP, + DRAW_JIT_VERTEX_DATA +}; + +/** + * This structure is passed directly to the generated vertex shader. + * + * It contains the derived state. + * + * Changes here must be reflected in the draw_jit_context_* macros. + * Changes to the ordering should be avoided. + * + * Only use types with a clear size and padding here, in particular prefer the + * stdint.h types to the basic integer types. + */ +struct draw_jit_context +{ + const float *vs_constants; + const float *gs_constants; + + + struct draw_jit_texture textures[PIPE_MAX_SAMPLERS]; +}; + + +#define draw_jit_context_vs_constants(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 0, "vs_constants") + +#define draw_jit_context_gs_constants(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 1, "gs_constants") + +#define DRAW_JIT_CONTEXT_TEXTURES_INDEX 2 + +#define draw_jit_context_textures(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures") + +/* we are construction a function of the form: + +struct vertex_header { + uint32 vertex_id; + + float clip[4]; + float data[][4]; +}; + +struct draw_jit_context +{ + const float *vs_constants; + const float *gs_constants; + + struct draw_jit_texture textures[PIPE_MAX_SAMPLERS]; + const void *vbuffers; +}; + +void +draw_shader(struct draw_jit_context *context, + struct vertex_header *io, + unsigned start, + unsigned count, + unsigned stride) +{ + // do a fetch and a run vertex shader + for (int i = 0; i < count; ++i) { + struct vertex_header *header = &io[i]; + header->vertex_id = 0xffff; + // follows code-genarted fetch/translate section + // for each vertex_element ... + codegened_translate(header->data[num_element], + context->vertex_elements[num_element], + context->vertex_buffers, + context->vbuffers); + + codegened_vertex_shader(header->data, context->vs_constants); + } + + for (int i = 0; i < count; i += context->primitive_size) { + struct vertex_header *prim[MAX_PRIMITIVE_SIZE]; + for (int j = 0; j < context->primitive_size; ++j) { + header[j] = &io[i + j]; + } + codegened_geometry_shader(prim, gs_constants); + } +} +*/ + +typedef void +(*draw_jit_vert_func)(struct draw_jit_context *context, + struct vertex_header *io, + unsigned start, + unsigned count, + unsigned stride); + +struct draw_llvm { + struct draw_context *draw; + + struct draw_jit_context jit_context; + + draw_jit_vert_func jit_func; + + LLVMModuleRef module; + LLVMExecutionEngineRef engine; + LLVMModuleProviderRef provider; + LLVMTargetDataRef target; + LLVMPassManagerRef pass; + + LLVMTypeRef context_ptr_type; + LLVMTypeRef vertex_header_ptr_type; +}; + + +struct draw_llvm * +draw_llvm_create(struct draw_context *draw); + +void +draw_llvm_destroy(struct draw_llvm *llvm); + +void +draw_llvm_prepare(struct draw_llvm *llvm); + +/* generates the draw jit function */ +void +draw_llvm_generate(struct draw_llvm *llvm); + + +#endif diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c new file mode 100644 index 00000000000..588e97b29ce --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -0,0 +1,653 @@ + + + +#include "util/u_memory.h" +#include "pipe/p_state.h" +#include "translate.h" + + +#define DRAW_DBG 0 + +typedef void (*fetch_func)(const void *ptr, float *attrib); +typedef void (*emit_func)(const float *attrib, void *ptr); + + + +struct translate_generic { + struct translate translate; + + struct { + enum translate_element_type type; + + fetch_func fetch; + unsigned buffer; + unsigned input_offset; + unsigned instance_divisor; + + emit_func emit; + unsigned output_offset; + + char *input_ptr; + unsigned input_stride; + + } attrib[PIPE_MAX_ATTRIBS]; + + unsigned nr_attrib; +}; + + +static struct translate_generic *translate_generic( struct translate *translate ) +{ + return (struct translate_generic *)translate; +} + +/** + * Fetch a float[4] vertex attribute from memory, doing format/type + * conversion as needed. + * + * This is probably needed/dupliocated elsewhere, eg format + * conversion, texture sampling etc. + */ +#define ATTRIB( NAME, SZ, TYPE, FROM, TO ) \ +static void \ +fetch_##NAME(const void *ptr, float *attrib) \ +{ \ + const float defaults[4] = { 0.0f,0.0f,0.0f,1.0f }; \ + unsigned i; \ + \ + for (i = 0; i < SZ; i++) { \ + attrib[i] = FROM(i); \ + } \ + \ + for (; i < 4; i++) { \ + attrib[i] = defaults[i]; \ + } \ +} \ + \ +static void \ +emit_##NAME(const float *attrib, void *ptr) \ +{ \ + unsigned i; \ + TYPE *out = (TYPE *)ptr; \ + \ + for (i = 0; i < SZ; i++) { \ + out[i] = TO(attrib[i]); \ + } \ +} + +{ + + return conv = instr(builder, bc, ""); +} + +static INLINE LLVMValueRef +from_64_float(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMDoubleType() , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildFPTrunc(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_32_float(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMFloatType() , ""); + return LLVMBuildLoad(builder, bc, ""); +} + +static INLINE LLVMValueRef +from_8_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_16_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMIntType(16) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_32_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMIntType(32) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_8_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_16_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMIntType(16) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_32_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMIntType(32) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); +} + + +static INLINE LLVMValueRef +from_8_unorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(builder, 255.)); +} + +static INLINE LLVMValueRef +from_16_unorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMIntType(16) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(builder, 65535.)); +} + +static INLINE LLVMValueRef +from_32_unorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMIntType(32) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(builder, 4294967295.)); +} + +static INLINE LLVMValueRef +from_8_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(builder, 127.0)); +} + +static INLINE LLVMValueRef +from_16_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMIntType(16) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(builder, 32767.0f)); +} + +static INLINE LLVMValueRef +from_32_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMIntType(32) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(builder, 2147483647.0)); +} + +static INLINE LLVMValueRef +from_32_fixed(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMIntType(32) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(builder, 65536.0)); +} + +static INLINE LLVMValueRef +to_64_float(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPExt(builder, l, LLVMDoubleType(), ""); +} + +static INLINE LLVMValueRef +to_32_float(LLVMBuilderRef builder, LLVMValueRef fp) +{ + return LLVMBuildLoad(builder, fp, ""); +} + +atic INLINE LLVMValueRef +to_8_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToUI(builder, l, LLVMIntType(8), ""); +} + +static INLINE LLVMValueRef +to_16_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToUI(builder, l, LLVMIntType(16), ""); +} + +static INLINE LLVMValueRef +to_32_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); +} + +static INLINE LLVMValueRef +to_8_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToSI(builder, l, LLVMIntType(8), ""); +} + +static INLINE LLVMValueRef +to_16_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToSI(builder, l, LLVMIntType(16), ""); +} + +static INLINE LLVMValueRef +to_32_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); +} + +static INLINE LLVMValueRef +to_8_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(8), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(builder, 255.)); +} + +static INLINE LLVMValueRef +to_16_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(builder, 65535.)); +} + +static INLINE LLVMValueRef +to_32_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); + + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(builder, 4294967295.)); +} + +static INLINE LLVMValueRef +to_8_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(8), ""); + return LLVMBuildFMUL(builder, uscaled, + LLVMConstReal(builder, 127.0)); +} + +static INLINE LLVMValueRef +to_16_snorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(16), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(builder, 32767.0f)); +} + +static INLINE LLVMValueRef +to_32_snorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); + + return LLVMBuildFMUL(builder, uscaled, + LLVMConstReal(builder, 2147483647.0)); +} + +static INLINE LLVMValueRef +to_32_fixed(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); + + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(builder, 65536.0)); +} + +static LLVMValueRef +fetch(LLVMValueRef ptr, int val_size, int nr_components, + LLVMValueRef res) +{ + int i; + int offset = 0; + + for (i = 0; i < nr_components; ++i) { + LLVMValueRef src_index = LLVMConstInt(LLVMInt32Type(), offset, 0); + LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); + //getelementptr i8* ptr, i64 offset + LLVMValueRef src_tmp = LLVMBuildGEP(builder, ptr, &src_index, 1, ""); + //getelementptr float* res, i64 i + LLVMValueRef res_tmp = LLVMBuildGEP(builder, res, &dst_index, 1, ""); + //bitcast i8* src, to res_type* + //load res_type src + //convert res_type src to float + //store float src, float *dst src + offset += val_size; + } +} + + +static void +fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) +{ + attrib[2] = FROM_8_UNORM(0); + attrib[1] = FROM_8_UNORM(1); + attrib[0] = FROM_8_UNORM(2); + attrib[3] = FROM_8_UNORM(3); +} + +static void +emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) +{ + ubyte *out = (ubyte *)ptr; + out[2] = TO_8_UNORM(attrib[0]); + out[1] = TO_8_UNORM(attrib[1]); + out[0] = TO_8_UNORM(attrib[2]); + out[3] = TO_8_UNORM(attrib[3]); +} + +static void +fetch_NULL( const void *ptr, float *attrib ) +{ + attrib[0] = 0; + attrib[1] = 0; + attrib[2] = 0; + attrib[3] = 1; +} + +static void +emit_NULL( const float *attrib, void *ptr ) +{ + /* do nothing is the only sensible option */ +} + +typedef LLVMValueRef (*from_func)(LLVMBuilderRef, LLVMValueRef); +typedef LLVMValueRef (*to_func)(LLVMBuilderRef, LLVMValueRef); + +struct draw_llvm_translate { + int format; + from_func from; + to_func to; + LLVMTypeRef type; + int num_components; +} translates[] = +{ + {PIPE_FORMAT_R64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 1}, + {PIPE_FORMAT_R64G64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 2}, + {PIPE_FORMAT_R64G64B64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 3}, + {PIPE_FORMAT_R64G64B64A64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 4}, + + {PIPE_FORMAT_R32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 1}, + {PIPE_FORMAT_R32G32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 2}, + {PIPE_FORMAT_R32G32B32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 3}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 4}, + + {PIPE_FORMAT_R32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 1}, + {PIPE_FORMAT_R32G32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 2}, + {PIPE_FORMAT_R32G32B32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 3}, + {PIPE_FORMAT_R32G32B32A32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 4}, + + {PIPE_FORMAT_R32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 1}, + {PIPE_FORMAT_R32G32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 2}, + {PIPE_FORMAT_R32G32B32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 3}, + {PIPE_FORMAT_R32G32B32A32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 4}, + + {PIPE_FORMAT_R32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 1}, + {PIPE_FORMAT_R32G32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 2}, + {PIPE_FORMAT_R32G32B32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 3}, + {PIPE_FORMAT_R32G32B32A32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 4}, + + {PIPE_FORMAT_R32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 1}, + {PIPE_FORMAT_R32G32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 2}, + {PIPE_FORMAT_R32G32B32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 3}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 4}, + + {PIPE_FORMAT_R16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 1}, + {PIPE_FORMAT_R16G16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 2}, + {PIPE_FORMAT_R16G16B16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 3}, + {PIPE_FORMAT_R16G16B16A16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 4}, + + {PIPE_FORMAT_R16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 1}, + {PIPE_FORMAT_R16G16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 2}, + {PIPE_FORMAT_R16G16B16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 3}, + {PIPE_FORMAT_R16G16B16A16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 4}, + + {PIPE_FORMAT_R16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 1}, + {PIPE_FORMAT_R16G16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 2}, + {PIPE_FORMAT_R16G16B16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 3}, + {PIPE_FORMAT_R16G16B16A16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 4}, + + {PIPE_FORMAT_R16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 1}, + {PIPE_FORMAT_R16G16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 2}, + {PIPE_FORMAT_R16G16B16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 3}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 4}, + + {PIPE_FORMAT_R8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 1}, + {PIPE_FORMAT_R8G8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 2}, + {PIPE_FORMAT_R8G8B8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 3}, + {PIPE_FORMAT_R8G8B8A8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 4}, + + {PIPE_FORMAT_R8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 1}, + {PIPE_FORMAT_R8G8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 2}, + {PIPE_FORMAT_R8G8B8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 3}, + {PIPE_FORMAT_R8G8B8A8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 4}, + + {PIPE_FORMAT_R8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 1}, + {PIPE_FORMAT_R8G8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 2}, + {PIPE_FORMAT_R8G8B8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 3}, + {PIPE_FORMAT_R8G8B8A8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 4}, + + {PIPE_FORMAT_R8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 1}, + {PIPE_FORMAT_R8G8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 2}, + {PIPE_FORMAT_R8G8B8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 3}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 4}, + + {PIPE_FORMAT_R32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 1}, + {PIPE_FORMAT_R32G32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 2}, + {PIPE_FORMAT_R32G32B32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 3}, + {PIPE_FORMAT_R32G32B32A32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 4}, + + {PIPE_FORMAT_A8R8G8B8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 4}, + {PIPE_FORMAT_B8G8R8A8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(), 4}, +}; + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void PIPE_CDECL generic_run_elts( struct translate *translate, + const unsigned *elts, + unsigned count, + unsigned instance_id, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned nr_attrs = tg->nr_attrib; + unsigned attr; + unsigned i; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + unsigned elt = *elts++; + + for (attr = 0; attr < nr_attrs; attr++) { + float data[4]; + const char *src; + + char *dst = (vert + + tg->attrib[attr].output_offset); + + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + + tg->attrib[attr].fetch( src, data ); + + if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", + i, elt, attr, data[0], data[1], data[2], data[3]); + + tg->attrib[attr].emit( data, dst ); + } + + vert += tg->translate.key.output_stride; + } +} + + + +static void PIPE_CDECL generic_run( struct translate *translate, + unsigned start, + unsigned count, + unsigned instance_id, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned nr_attrs = tg->nr_attrib; + unsigned attr; + unsigned i; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + unsigned elt = start + i; + + for (attr = 0; attr < nr_attrs; attr++) { + float data[4]; + + char *dst = (vert + + tg->attrib[attr].output_offset); + + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const char *src; + + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + + tg->attrib[attr].fetch( src, data ); + } else { + data[0] = (float)instance_id; + } + + if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", + i, attr, data[0], data[1], data[2], data[3]); + + tg->attrib[attr].emit( data, dst ); + } + + vert += tg->translate.key.output_stride; + } +} + + + +static void generic_set_buffer( struct translate *translate, + unsigned buf, + const void *ptr, + unsigned stride ) +{ + struct translate_generic *tg = translate_generic(translate); + unsigned i; + + for (i = 0; i < tg->nr_attrib; i++) { + if (tg->attrib[i].buffer == buf) { + tg->attrib[i].input_ptr = ((char *)ptr + + tg->attrib[i].input_offset); + tg->attrib[i].input_stride = stride; + } + } +} + + +static void generic_release( struct translate *translate ) +{ + /* Refcount? + */ + FREE(translate); +} + +struct translate *translate_generic_create( const struct translate_key *key ) +{ + struct translate_generic *tg = CALLOC_STRUCT(translate_generic); + unsigned i; + + if (tg == NULL) + return NULL; + + tg->translate.key = *key; + tg->translate.release = generic_release; + tg->translate.set_buffer = generic_set_buffer; + tg->translate.run_elts = generic_run_elts; + tg->translate.run = generic_run; + + for (i = 0; i < key->nr_elements; i++) { + tg->attrib[i].type = key->element[i].type; + + tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); + tg->attrib[i].buffer = key->element[i].input_buffer; + tg->attrib[i].input_offset = key->element[i].input_offset; + tg->attrib[i].instance_divisor = key->element[i].instance_divisor; + + tg->attrib[i].emit = get_emit_func(key->element[i].output_format); + tg->attrib[i].output_offset = key->element[i].output_offset; + + } + + tg->nr_attrib = key->nr_elements; + + + return &tg->translate; +} diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 1e6e01af9e2..7e24e5fd6fc 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -46,6 +46,10 @@ #include "tgsi/tgsi_scan.h" +#ifdef DRAW_LLVM +#include +#endif + struct pipe_context; struct draw_vertex_shader; @@ -237,9 +241,16 @@ struct draw_context unsigned instance_id; +#ifdef DRAW_LLVM + LLVMExecutionEngineRef engine; +#endif void *driver_private; }; +/******************************************************************************* + * Draw common initialization code + */ +boolean draw_init(struct draw_context *draw); /******************************************************************************* * Vertex shader code: diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 341353f6289..9b1e319551c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -142,7 +142,9 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_shade_emit) return FALSE; - draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit_llvm( draw ); + if (!draw->pt.middle.general) + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); if (!draw->pt.middle.general) return FALSE; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index d5e0d92a605..c2797a759ee 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -147,6 +147,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c new file mode 100644 index 00000000000..ae5956333eb --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -0,0 +1,432 @@ +/************************************************************************** + * + * Copyright 2010 VMWare, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" +#include "draw/draw_gs.h" +#include "draw/draw_llvm.h" + +#include "translate/translate.h" + + +struct llvm_middle_end { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct pt_emit *emit; + struct pt_fetch *fetch; + struct pt_post_vs *post_vs; + + + unsigned vertex_data_offset; + unsigned vertex_size; + unsigned prim; + unsigned opt; + + struct draw_llvm *llvm; +}; + + +static void +llvm_middle_end_prepare( struct draw_pt_middle_end *middle, + unsigned prim, + unsigned opt, + unsigned *max_vertices ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *vs = draw->vs.vertex_shader; + struct draw_geometry_shader *gs = draw->gs.geometry_shader; + unsigned i; + unsigned instance_id_index = ~0; + + /* Add one to num_outputs because the pipeline occasionally tags on + * an additional texcoord, eg for AA lines. + */ + unsigned nr = MAX2( vs->info.num_inputs, + vs->info.num_outputs + 1 ); + + /* Scan for instanceID system value. + */ + for (i = 0; i < vs->info.num_inputs; i++) { + if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + instance_id_index = i; + break; + } + } + + fpme->prim = prim; + fpme->opt = opt; + + /* Always leave room for the vertex header whether we need it or + * not. It's hard to get rid of it in particular because of the + * viewport code in draw_pt_post_vs.c. + */ + fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); + + + + draw_pt_fetch_prepare( fpme->fetch, + vs->info.num_inputs, + fpme->vertex_size, + instance_id_index ); + if (opt & PT_SHADE) { + vs->prepare(vs, draw); + draw_geometry_shader_prepare(gs, draw); + } + + + /* XXX: it's not really gl rasterization rules we care about here, + * but gl vs dx9 clip spaces. + */ + draw_pt_post_vs_prepare( fpme->post_vs, + (boolean)draw->bypass_clipping, + (boolean)(draw->identity_viewport || + draw->rasterizer->bypass_vs_clip_and_viewport), + (boolean)draw->rasterizer->gl_rasterization_rules, + (draw->vs.edgeflag_output ? true : false) ); + + if (!(opt & PT_PIPELINE)) { + draw_pt_emit_prepare( fpme->emit, + prim, + max_vertices ); + + *max_vertices = MAX2( *max_vertices, + DRAW_PIPE_MAX_VERTICES ); + } + else { + *max_vertices = DRAW_PIPE_MAX_VERTICES; + } + + /* return even number */ + *max_vertices = *max_vertices & ~1; + + draw_llvm_prepare(fpme->llvm); +} + + + +static void llvm_middle_end_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *vshader = draw->vs.vertex_shader; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align( fetch_count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run( fpme->fetch, + fetch_elts, + fetch_count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, eg if + * bypass_vs_clip_and_viewport, then the inputs == outputs, and are + * already in the correct place.*/ + if (opt & PT_SHADE) + { + vshader->run_linear(vshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + draw->pt.user.vs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + if (gshader) + draw_geometry_shader_run(gshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + draw->pt.user.gs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + fetch_count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + + FREE(pipeline_verts); +} + + +static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + unsigned opt = fpme->opt; + unsigned alloc_count = align( count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + + fpme->llvm->jit_func( &fpme->llvm->jit_context, + pipeline_verts, + start, + count, + fpme->vertex_size ); + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run_linear( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size); + } + else { + draw_pt_emit_linear( fpme->emit, + (const float (*)[4])pipeline_verts->data, + fpme->vertex_size, + count ); + } + + FREE(pipeline_verts); +} + + + +static boolean +llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *shader = draw->vs.vertex_shader; + struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align( count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) + return FALSE; + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run_linear( fpme->fetch, + start, + count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, ie if + * bypass_vs_clip_and_viewport, then the inputs == outputs, and are + * already in the correct place. + */ + if (opt & PT_SHADE) + { + shader->run_linear(shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + draw->pt.user.vs_constants, + count, + fpme->vertex_size, + fpme->vertex_size); + + if (geometry_shader) + draw_geometry_shader_run(geometry_shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + draw->pt.user.gs_constants, + count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + FREE(pipeline_verts); + return TRUE; +} + + + +static void llvm_middle_end_finish( struct draw_pt_middle_end *middle ) +{ + /* nothing to do */ +} + +static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + + if (fpme->fetch) + draw_pt_fetch_destroy( fpme->fetch ); + + if (fpme->emit) + draw_pt_emit_destroy( fpme->emit ); + + if (fpme->post_vs) + draw_pt_post_vs_destroy( fpme->post_vs ); + + if (fpme->llvm) + draw_llvm_destroy( fpme->llvm ); + + FREE(middle); +} + + +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw ) +{ + struct llvm_middle_end *fpme = 0; + + if (!draw->engine) + return NULL; + + fpme = CALLOC_STRUCT( llvm_middle_end ); + if (!fpme) + goto fail; + + fpme->base.prepare = llvm_middle_end_prepare; + fpme->base.run = llvm_middle_end_run; + fpme->base.run_linear = llvm_middle_end_linear_run; + fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts; + fpme->base.finish = llvm_middle_end_finish; + fpme->base.destroy = llvm_middle_end_destroy; + + fpme->draw = draw; + + fpme->fetch = draw_pt_fetch_create( draw ); + if (!fpme->fetch) + goto fail; + + fpme->post_vs = draw_pt_post_vs_create( draw ); + if (!fpme->post_vs) + goto fail; + + fpme->emit = draw_pt_emit_create( draw ); + if (!fpme->emit) + goto fail; + + fpme->llvm = draw_llvm_create(draw); + if (!fpme->llvm) + goto fail; + + return &fpme->base; + + fail: + if (fpme) + llvm_middle_end_destroy( &fpme->base ); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 5f7a645f5d8..0c483de4071 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -40,7 +40,7 @@ #include "tgsi/tgsi_parse.h" -#ifdef MESA_LLVM +#ifdef DRAW_LLVM struct draw_llvm_vertex_shader { struct draw_vertex_shader base; @@ -64,12 +64,8 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, unsigned input_stride, unsigned output_stride ) { - struct draw_llvm_vertex_shader *shader = - (struct draw_llvm_vertex_shader *)base; } - - static void vs_llvm_delete( struct draw_vertex_shader *base ) { @@ -90,6 +86,7 @@ struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ) { +#if 0 struct draw_llvm_vertex_shader *vs; vs = CALLOC_STRUCT( draw_llvm_vertex_shader ); @@ -113,6 +110,8 @@ draw_create_vs_llvm(struct draw_context *draw, vs->machine = draw->vs.machine; return &vs->base; +#endif + return NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 9120226de0c..3edc62d0c69 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -40,6 +40,7 @@ #include "lp_context.h" #include "lp_flush.h" #include "lp_perf.h" +#include "lp_screen.h" #include "lp_state.h" #include "lp_surface.h" #include "lp_query.h" @@ -105,6 +106,7 @@ struct pipe_context * llvmpipe_create_context( struct pipe_screen *screen, void *priv ) { struct llvmpipe_context *llvmpipe; + struct llvmpipe_screen *llvmscreen = llvmpipe_screen(screen); llvmpipe = align_malloc(sizeof(struct llvmpipe_context), 16); if (!llvmpipe) @@ -174,8 +176,8 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) /* * Create drawing context and plug our rendering stage into it. */ - llvmpipe->draw = draw_create(); - if (!llvmpipe->draw) + llvmpipe->draw = draw_create_with_llvm(llvmscreen->engine); + if (!llvmpipe->draw) goto fail; /* FIXME: devise alternative to draw_texture_samplers */ From f44af927ff90a9fe1256d8c6f4869a39a55043d3 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 29 Mar 2010 13:19:16 -0400 Subject: [PATCH 02/19] draw llvmpipe: lots of fixes for fetch/emit the values passed are still not right, but the general scheme is looking good. --- src/gallium/auxiliary/draw/draw_llvm.c | 155 ++++++++++++++-- src/gallium/auxiliary/draw/draw_llvm.h | 8 +- .../auxiliary/draw/draw_llvm_translate.c | 165 ++---------------- .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 4 +- 4 files changed, 161 insertions(+), 171 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 6b0ddfd064d..91fe838b8b7 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -114,6 +114,10 @@ init_globals(struct draw_llvm *llvm) llvm->context_ptr_type = LLVMPointerType(context_type, 0); } + { + LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMOpaqueType(), 0); + llvm->buffer_ptr_type = LLVMArrayType(buffer_ptr, PIPE_MAX_ATTRIBS); + } } struct draw_llvm * @@ -171,6 +175,7 @@ draw_llvm_destroy(struct draw_llvm *llvm) void draw_llvm_prepare(struct draw_llvm *llvm) { + draw_llvm_generate(llvm); } @@ -194,14 +199,14 @@ fail: static void generate_vs(struct draw_llvm *llvm, LLVMBuilderRef builder, + LLVMValueRef (*outputs)[NUM_CHANNELS], + const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef context_ptr, LLVMValueRef io) { const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; struct lp_type vs_type = lp_type_float(32); LLVMValueRef vs_consts; - const LLVMValueRef (*inputs)[NUM_CHANNELS]; - LLVMValueRef (*outputs)[NUM_CHANNELS]; lp_build_tgsi_soa(builder, tokens, @@ -214,28 +219,129 @@ generate_vs(struct draw_llvm *llvm, NULL/*sampler*/); } +static void +generate_fetch(LLVMBuilderRef builder, + const LLVMValueRef vbuffers_ptr, + LLVMValueRef *res, + struct pipe_vertex_element *velem, + struct pipe_vertex_buffer *vbuf, + LLVMValueRef index) +{ + LLVMValueRef indices = LLVMConstInt(LLVMInt32Type(), + velem->vertex_buffer_index, 0); + LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, + &indices, 1, ""); + LLVMValueRef stride = LLVMBuildMul(builder, + LLVMConstInt(LLVMInt32Type(), vbuf->stride, 0), + index, ""); + stride = LLVMBuildAdd(builder, stride, + LLVMConstInt(LLVMInt32Type(), vbuf->buffer_offset, 0), + ""); + stride = LLVMBuildAdd(builder, stride, + LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0), + ""); + + vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, ""); + + *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format); +} + +static LLVMValueRef +aos_to_soa(LLVMBuilderRef builder, + LLVMValueRef val0, + LLVMValueRef val1, + LLVMValueRef val2, + LLVMValueRef val3, + LLVMValueRef channel) +{ + LLVMValueRef ex, res; + + ex = LLVMBuildExtractElement(builder, val0, + channel, ""); + res = LLVMBuildInsertElement(builder, + LLVMConstNull(LLVMTypeOf(val0)), + ex, + LLVMConstInt(LLVMInt32Type(), 0, 0), + ""); + + ex = LLVMBuildExtractElement(builder, val1, + channel, ""); + res = LLVMBuildInsertElement(builder, + res, ex, + LLVMConstInt(LLVMInt32Type(), 1, 0), + ""); + + ex = LLVMBuildExtractElement(builder, val2, + channel, ""); + res = LLVMBuildInsertElement(builder, + res, ex, + LLVMConstInt(LLVMInt32Type(), 2, 0), + ""); + + ex = LLVMBuildExtractElement(builder, val3, + channel, ""); + res = LLVMBuildInsertElement(builder, + res, ex, + LLVMConstInt(LLVMInt32Type(), 3, 0), + ""); + + return res; +} + +static void +convert_to_soa(LLVMBuilderRef builder, + LLVMValueRef (*aos)[NUM_CHANNELS], + LLVMValueRef (*soa)[NUM_CHANNELS], + int num_attribs) +{ + int i; + + debug_assert(NUM_CHANNELS == 4); + + for (i = 0; i < num_attribs; ++i) { + LLVMValueRef val0 = aos[i][0]; + LLVMValueRef val1 = aos[i][1]; + LLVMValueRef val2 = aos[i][2]; + LLVMValueRef val3 = aos[i][3]; + + soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 0, 0)); + soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 1, 0)); + soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 2, 0)); + soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 3, 0)); + + } +} + void draw_llvm_generate(struct draw_llvm *llvm) { - LLVMTypeRef arg_types[5]; + LLVMTypeRef arg_types[6]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef function; LLVMValueRef start, end, count, stride, step; - LLVMValueRef io_ptr; - unsigned i; + LLVMValueRef io_ptr, vbuffers_ptr; + struct draw_context *draw = llvm->draw; + unsigned i, j; unsigned chan; struct lp_build_context bld; struct lp_build_loop_state lp_loop; - struct lp_type vs_type = lp_type_float(32); + struct lp_type vs_type = lp_type_float_vec(32); + const int max_vertices = 4; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ - arg_types[2] = LLVMInt32Type(); /* start */ - arg_types[3] = LLVMInt32Type(); /* count */ - arg_types[4] = LLVMInt32Type(); /* stride */ + arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ + arg_types[3] = LLVMInt32Type(); /* start */ + arg_types[4] = LLVMInt32Type(); /* count */ + arg_types[5] = LLVMInt32Type(); /* stride */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); @@ -247,12 +353,14 @@ draw_llvm_generate(struct draw_llvm *llvm) context_ptr = LLVMGetParam(function, 0); io_ptr = LLVMGetParam(function, 1); - start = LLVMGetParam(function, 2); - count = LLVMGetParam(function, 3); - stride = LLVMGetParam(function, 4); + vbuffers_ptr = LLVMGetParam(function, 2); + start = LLVMGetParam(function, 3); + count = LLVMGetParam(function, 4); + stride = LLVMGetParam(function, 5); lp_build_name(context_ptr, "context"); lp_build_name(io_ptr, "io"); + lp_build_name(vbuffers_ptr, "vbuffers"); lp_build_name(start, "start"); lp_build_name(count, "count"); lp_build_name(stride, "stride"); @@ -269,13 +377,34 @@ draw_llvm_generate(struct draw_llvm *llvm) end = lp_build_add(&bld, start, count); - step = LLVMConstInt(LLVMInt32Type(), 1, 0); + step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); lp_build_loop_begin(builder, start, &lp_loop); { + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef io = LLVMBuildGEP(builder, io_ptr, &lp_loop.counter, 1, ""); + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef true_index = LLVMBuildAdd( + builder, + lp_loop.counter, + LLVMConstInt(LLVMInt32Type(), i, 0), ""); + for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { + struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; + struct pipe_vertex_buffer *vbuf = &draw->pt.vertex_buffer[ + velem->vertex_buffer_index]; + + generate_fetch(builder, vbuffers_ptr, + &aos_attribs[j][i], velem, vbuf, true_index); + } + } + convert_to_soa(builder, inputs, aos_attribs, + draw->pt.nr_vertex_elements); + generate_vs(llvm, builder, + outputs, + inputs, context_ptr, io); } diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 0a1845f1fb5..cbbfeeccc56 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -78,12 +78,12 @@ struct draw_jit_context const float *gs_constants; struct draw_jit_texture textures[PIPE_MAX_SAMPLERS]; - const void *vbuffers; }; void draw_shader(struct draw_jit_context *context, struct vertex_header *io, + const void *vbuffers[PIPE_MAX_ATTRIBS], unsigned start, unsigned count, unsigned stride) @@ -115,6 +115,7 @@ draw_shader(struct draw_jit_context *context, typedef void (*draw_jit_vert_func)(struct draw_jit_context *context, struct vertex_header *io, + const void *vbuffers[PIPE_MAX_ATTRIBS], unsigned start, unsigned count, unsigned stride); @@ -134,6 +135,7 @@ struct draw_llvm { LLVMTypeRef context_ptr_type; LLVMTypeRef vertex_header_ptr_type; + LLVMTypeRef buffer_ptr_type; }; @@ -150,5 +152,9 @@ draw_llvm_prepare(struct draw_llvm *llvm); void draw_llvm_generate(struct draw_llvm *llvm); +LLVMValueRef +draw_llvm_translate_from(LLVMBuilderRef builder, + LLVMValueRef vbuffer, + enum pipe_format from_format); #endif diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c index 588e97b29ce..012ca3f6f03 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_translate.c +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -492,162 +492,17 @@ struct draw_llvm_translate { {PIPE_FORMAT_B8G8R8A8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(), 4}, }; -/** - * Fetch vertex attributes for 'count' vertices. - */ -static void PIPE_CDECL generic_run_elts( struct translate *translate, - const unsigned *elts, - unsigned count, - unsigned instance_id, - void *output_buffer ) + +LLVMValueRef +draw_llvm_translate_from(LLVMBuilderRef builder, + LLVMValueRef vbuffer, + enum pipe_format from_format) { - struct translate_generic *tg = translate_generic(translate); - char *vert = output_buffer; - unsigned nr_attrs = tg->nr_attrib; - unsigned attr; - unsigned i; - - /* loop over vertex attributes (vertex shader inputs) - */ - for (i = 0; i < count; i++) { - unsigned elt = *elts++; - - for (attr = 0; attr < nr_attrs; attr++) { - float data[4]; - const char *src; - - char *dst = (vert + - tg->attrib[attr].output_offset); - - if (tg->attrib[attr].instance_divisor) { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * - (instance_id / tg->attrib[attr].instance_divisor); - } else { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt; - } - - tg->attrib[attr].fetch( src, data ); - - if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", - i, elt, attr, data[0], data[1], data[2], data[3]); - - tg->attrib[attr].emit( data, dst ); - } - - vert += tg->translate.key.output_stride; - } -} - - - -static void PIPE_CDECL generic_run( struct translate *translate, - unsigned start, - unsigned count, - unsigned instance_id, - void *output_buffer ) -{ - struct translate_generic *tg = translate_generic(translate); - char *vert = output_buffer; - unsigned nr_attrs = tg->nr_attrib; - unsigned attr; - unsigned i; - - /* loop over vertex attributes (vertex shader inputs) - */ - for (i = 0; i < count; i++) { - unsigned elt = start + i; - - for (attr = 0; attr < nr_attrs; attr++) { - float data[4]; - - char *dst = (vert + - tg->attrib[attr].output_offset); - - if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { - const char *src; - - if (tg->attrib[attr].instance_divisor) { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * - (instance_id / tg->attrib[attr].instance_divisor); - } else { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt; - } - - tg->attrib[attr].fetch( src, data ); - } else { - data[0] = (float)instance_id; - } - - if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", - i, attr, data[0], data[1], data[2], data[3]); - - tg->attrib[attr].emit( data, dst ); - } - - vert += tg->translate.key.output_stride; - } -} - - - -static void generic_set_buffer( struct translate *translate, - unsigned buf, - const void *ptr, - unsigned stride ) -{ - struct translate_generic *tg = translate_generic(translate); - unsigned i; - - for (i = 0; i < tg->nr_attrib; i++) { - if (tg->attrib[i].buffer == buf) { - tg->attrib[i].input_ptr = ((char *)ptr + - tg->attrib[i].input_offset); - tg->attrib[i].input_stride = stride; + int i; + for (i = 0; i < Elements(translates); ++i) { + if (translates[i].format == from_format) { + return translates.from_func(builder, vbuffer, from_format); } } -} - - -static void generic_release( struct translate *translate ) -{ - /* Refcount? - */ - FREE(translate); -} - -struct translate *translate_generic_create( const struct translate_key *key ) -{ - struct translate_generic *tg = CALLOC_STRUCT(translate_generic); - unsigned i; - - if (tg == NULL) - return NULL; - - tg->translate.key = *key; - tg->translate.release = generic_release; - tg->translate.set_buffer = generic_set_buffer; - tg->translate.run_elts = generic_run_elts; - tg->translate.run = generic_run; - - for (i = 0; i < key->nr_elements; i++) { - tg->attrib[i].type = key->element[i].type; - - tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); - tg->attrib[i].buffer = key->element[i].input_buffer; - tg->attrib[i].input_offset = key->element[i].input_offset; - tg->attrib[i].instance_divisor = key->element[i].instance_divisor; - - tg->attrib[i].emit = get_emit_func(key->element[i].output_format); - tg->attrib[i].output_offset = key->element[i].output_offset; - - } - - tg->nr_attrib = key->nr_elements; - - - return &tg->translate; + return LLVMGetUndef(LLVMTypeOf(vbuffer)); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index ae5956333eb..38b1c4462db 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -110,8 +110,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, */ draw_pt_post_vs_prepare( fpme->post_vs, (boolean)draw->bypass_clipping, - (boolean)(draw->identity_viewport || - draw->rasterizer->bypass_vs_clip_and_viewport), + (boolean)(draw->identity_viewport), (boolean)draw->rasterizer->gl_rasterization_rules, (draw->vs.edgeflag_output ? true : false) ); @@ -242,6 +241,7 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, fpme->llvm->jit_func( &fpme->llvm->jit_context, pipeline_verts, + draw->pt.user.vbuffer, start, count, fpme->vertex_size ); From 1963112f9d0a2ed8e237641eef8eb384365d1375 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 30 Mar 2010 12:35:40 -0400 Subject: [PATCH 03/19] draw llvm: various fixes for the translation code the from translation isn't quite right yet --- src/gallium/auxiliary/SConscript | 3 +- src/gallium/auxiliary/draw/draw_gs.c | 12 +- src/gallium/auxiliary/draw/draw_llvm.c | 12 +- src/gallium/auxiliary/draw/draw_llvm.h | 2 +- .../auxiliary/draw/draw_llvm_translate.c | 396 ++++++++---------- 5 files changed, 194 insertions(+), 231 deletions(-) diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 76675fb69c2..b234b2f5f4e 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -205,7 +205,8 @@ if drawllvm: 'gallivm/lp_bld_tgsi_soa.c', 'gallivm/lp_bld_type.c', 'draw/draw_llvm.c', - 'draw/draw_pt_fetch_shade_pipeline_llvm.c' + 'draw/draw_pt_fetch_shade_pipeline_llvm.c', + 'draw/draw_llvm_translate.c' ] gallium = env.ConvenienceLibrary( diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 7069aa6b181..131deed43e4 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -342,10 +342,10 @@ void draw_geometry_shader_delete(struct draw_geometry_shader *shader) void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, struct draw_context *draw) { - if (shader->machine->Tokens != shader->state.tokens) { - tgsi_exec_machine_bind_shader(shader->machine, - shader->state.tokens, - draw->gs.num_samplers, - draw->gs.samplers); - } + if (shader && shader->machine->Tokens != shader->state.tokens) { + tgsi_exec_machine_bind_shader(shader->machine, + shader->state.tokens, + draw->gs.num_samplers, + draw->gs.samplers); + } } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 91fe838b8b7..aa4a310a07d 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -115,8 +115,8 @@ init_globals(struct draw_llvm *llvm) llvm->context_ptr_type = LLVMPointerType(context_type, 0); } { - LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMOpaqueType(), 0); - llvm->buffer_ptr_type = LLVMArrayType(buffer_ptr, PIPE_MAX_ATTRIBS); + LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0); + llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0); } } @@ -221,19 +221,19 @@ generate_vs(struct draw_llvm *llvm, static void generate_fetch(LLVMBuilderRef builder, - const LLVMValueRef vbuffers_ptr, + LLVMValueRef vbuffers_ptr, LLVMValueRef *res, struct pipe_vertex_element *velem, struct pipe_vertex_buffer *vbuf, LLVMValueRef index) { - LLVMValueRef indices = LLVMConstInt(LLVMInt32Type(), - velem->vertex_buffer_index, 0); + LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &indices, 1, ""); LLVMValueRef stride = LLVMBuildMul(builder, LLVMConstInt(LLVMInt32Type(), vbuf->stride, 0), index, ""); + stride = LLVMBuildAdd(builder, stride, LLVMConstInt(LLVMInt32Type(), vbuf->buffer_offset, 0), ""); @@ -393,7 +393,7 @@ draw_llvm_generate(struct draw_llvm *llvm) struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; struct pipe_vertex_buffer *vbuf = &draw->pt.vertex_buffer[ velem->vertex_buffer_index]; - + LLVMDumpValue(function); generate_fetch(builder, vbuffers_ptr, &aos_attribs[j][i], velem, vbuf, true_index); } diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index cbbfeeccc56..e375008f3b4 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -115,7 +115,7 @@ draw_shader(struct draw_jit_context *context, typedef void (*draw_jit_vert_func)(struct draw_jit_context *context, struct vertex_header *io, - const void *vbuffers[PIPE_MAX_ATTRIBS], + const char *vbuffers[PIPE_MAX_ATTRIBS], unsigned start, unsigned count, unsigned stride); diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c index 012ca3f6f03..6cbe98c4893 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_translate.c +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -1,86 +1,23 @@ +#include "draw_private.h" +#include "draw_context.h" +#include "draw_llvm.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_interp.h" +#include "gallivm/lp_bld_struct.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_tgsi.h" #include "util/u_memory.h" #include "pipe/p_state.h" -#include "translate.h" #define DRAW_DBG 0 -typedef void (*fetch_func)(const void *ptr, float *attrib); -typedef void (*emit_func)(const float *attrib, void *ptr); - - - -struct translate_generic { - struct translate translate; - - struct { - enum translate_element_type type; - - fetch_func fetch; - unsigned buffer; - unsigned input_offset; - unsigned instance_divisor; - - emit_func emit; - unsigned output_offset; - - char *input_ptr; - unsigned input_stride; - - } attrib[PIPE_MAX_ATTRIBS]; - - unsigned nr_attrib; -}; - - -static struct translate_generic *translate_generic( struct translate *translate ) -{ - return (struct translate_generic *)translate; -} - -/** - * Fetch a float[4] vertex attribute from memory, doing format/type - * conversion as needed. - * - * This is probably needed/dupliocated elsewhere, eg format - * conversion, texture sampling etc. - */ -#define ATTRIB( NAME, SZ, TYPE, FROM, TO ) \ -static void \ -fetch_##NAME(const void *ptr, float *attrib) \ -{ \ - const float defaults[4] = { 0.0f,0.0f,0.0f,1.0f }; \ - unsigned i; \ - \ - for (i = 0; i < SZ; i++) { \ - attrib[i] = FROM(i); \ - } \ - \ - for (; i < 4; i++) { \ - attrib[i] = defaults[i]; \ - } \ -} \ - \ -static void \ -emit_##NAME(const float *attrib, void *ptr) \ -{ \ - unsigned i; \ - TYPE *out = (TYPE *)ptr; \ - \ - for (i = 0; i < SZ; i++) { \ - out[i] = TO(attrib[i]); \ - } \ -} - -{ - - return conv = instr(builder, bc, ""); -} - -static INLINE LLVMValueRef +static LLVMValueRef from_64_float(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, @@ -89,7 +26,7 @@ from_64_float(LLVMBuilderRef builder, LLVMValueRef val) return LLVMBuildFPTrunc(builder, l, LLVMFloatType(), ""); } -static INLINE LLVMValueRef +static LLVMValueRef from_32_float(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, @@ -154,7 +91,7 @@ from_8_unorm(LLVMBuilderRef builder, LLVMValueRef val) LLVMValueRef l = LLVMBuildLoad(builder, val, ""); LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(builder, 255.)); + LLVMConstReal(LLVMFloatType(), 255.), ""); } static INLINE LLVMValueRef @@ -165,7 +102,7 @@ from_16_unorm(LLVMBuilderRef builder, LLVMValueRef val) LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(builder, 65535.)); + LLVMConstReal(LLVMFloatType(), 65535.), ""); } static INLINE LLVMValueRef @@ -177,7 +114,7 @@ from_32_unorm(LLVMBuilderRef builder, LLVMValueRef val) LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(builder, 4294967295.)); + LLVMConstReal(LLVMFloatType(), 4294967295.), ""); } static INLINE LLVMValueRef @@ -186,7 +123,7 @@ from_8_snorm(LLVMBuilderRef builder, LLVMValueRef val) LLVMValueRef l = LLVMBuildLoad(builder, val, ""); LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(builder, 127.0)); + LLVMConstReal(LLVMFloatType(), 127.0), ""); } static INLINE LLVMValueRef @@ -197,7 +134,7 @@ from_16_snorm(LLVMBuilderRef builder, LLVMValueRef val) LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(builder, 32767.0f)); + LLVMConstReal(LLVMFloatType(), 32767.0f), ""); } static INLINE LLVMValueRef @@ -209,7 +146,7 @@ from_32_snorm(LLVMBuilderRef builder, LLVMValueRef val) LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(builder, 2147483647.0)); + LLVMConstReal(LLVMFloatType(), 2147483647.0), ""); } static INLINE LLVMValueRef @@ -221,23 +158,23 @@ from_32_fixed(LLVMBuilderRef builder, LLVMValueRef val) LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(builder, 65536.0)); + LLVMConstReal(LLVMFloatType(), 65536.0), ""); } -static INLINE LLVMValueRef +static LLVMValueRef to_64_float(LLVMBuilderRef builder, LLVMValueRef fp) { LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); return LLVMBuildFPExt(builder, l, LLVMDoubleType(), ""); } -static INLINE LLVMValueRef +static LLVMValueRef to_32_float(LLVMBuilderRef builder, LLVMValueRef fp) { return LLVMBuildLoad(builder, fp, ""); } -atic INLINE LLVMValueRef +static INLINE LLVMValueRef to_8_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) { LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); @@ -285,7 +222,7 @@ to_8_unorm(LLVMBuilderRef builder, LLVMValueRef fp) LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(8), ""); return LLVMBuildFMul(builder, uscaled, - LLVMConstReal(builder, 255.)); + LLVMConstReal(LLVMFloatType(), 255.), ""); } static INLINE LLVMValueRef @@ -294,7 +231,7 @@ to_16_unorm(LLVMBuilderRef builder, LLVMValueRef fp) LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); return LLVMBuildFMul(builder, uscaled, - LLVMConstReal(builder, 65535.)); + LLVMConstReal(LLVMFloatType(), 65535.), ""); } static INLINE LLVMValueRef @@ -304,7 +241,7 @@ to_32_unorm(LLVMBuilderRef builder, LLVMValueRef fp) LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); return LLVMBuildFMul(builder, uscaled, - LLVMConstReal(builder, 4294967295.)); + LLVMConstReal(LLVMFloatType(), 4294967295.), ""); } static INLINE LLVMValueRef @@ -312,8 +249,8 @@ to_8_snorm(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef l = LLVMBuildLoad(builder, val, ""); LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(8), ""); - return LLVMBuildFMUL(builder, uscaled, - LLVMConstReal(builder, 127.0)); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 127.0), ""); } static INLINE LLVMValueRef @@ -322,7 +259,7 @@ to_16_snorm(LLVMBuilderRef builder, LLVMValueRef fp) LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(16), ""); return LLVMBuildFMul(builder, uscaled, - LLVMConstReal(builder, 32767.0f)); + LLVMConstReal(LLVMFloatType(), 32767.0f), ""); } static INLINE LLVMValueRef @@ -331,8 +268,8 @@ to_32_snorm(LLVMBuilderRef builder, LLVMValueRef fp) LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); - return LLVMBuildFMUL(builder, uscaled, - LLVMConstReal(builder, 2147483647.0)); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 2147483647.0), ""); } static INLINE LLVMValueRef @@ -342,157 +279,177 @@ to_32_fixed(LLVMBuilderRef builder, LLVMValueRef fp) LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); return LLVMBuildFMul(builder, uscaled, - LLVMConstReal(builder, 65536.0)); -} - -static LLVMValueRef -fetch(LLVMValueRef ptr, int val_size, int nr_components, - LLVMValueRef res) -{ - int i; - int offset = 0; - - for (i = 0; i < nr_components; ++i) { - LLVMValueRef src_index = LLVMConstInt(LLVMInt32Type(), offset, 0); - LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); - //getelementptr i8* ptr, i64 offset - LLVMValueRef src_tmp = LLVMBuildGEP(builder, ptr, &src_index, 1, ""); - //getelementptr float* res, i64 i - LLVMValueRef res_tmp = LLVMBuildGEP(builder, res, &dst_index, 1, ""); - //bitcast i8* src, to res_type* - //load res_type src - //convert res_type src to float - //store float src, float *dst src - offset += val_size; - } -} - - -static void -fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) -{ - attrib[2] = FROM_8_UNORM(0); - attrib[1] = FROM_8_UNORM(1); - attrib[0] = FROM_8_UNORM(2); - attrib[3] = FROM_8_UNORM(3); -} - -static void -emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) -{ - ubyte *out = (ubyte *)ptr; - out[2] = TO_8_UNORM(attrib[0]); - out[1] = TO_8_UNORM(attrib[1]); - out[0] = TO_8_UNORM(attrib[2]); - out[3] = TO_8_UNORM(attrib[3]); -} - -static void -fetch_NULL( const void *ptr, float *attrib ) -{ - attrib[0] = 0; - attrib[1] = 0; - attrib[2] = 0; - attrib[3] = 1; -} - -static void -emit_NULL( const float *attrib, void *ptr ) -{ - /* do nothing is the only sensible option */ + LLVMConstReal(LLVMFloatType(), 65536.0), ""); } typedef LLVMValueRef (*from_func)(LLVMBuilderRef, LLVMValueRef); typedef LLVMValueRef (*to_func)(LLVMBuilderRef, LLVMValueRef); +/* so that underneath can avoid function calls which are prohibited + * for static initialization we need this conversion */ +enum ll_type { + LL_Double, + LL_Float, + LL_Int32, + LL_Int16, + LL_Int8 +}; + +static INLINE LLVMTypeRef +ll_type_to_llvm(enum ll_type type) +{ + switch (type) { + case LL_Double: + return LLVMDoubleType(); + case LL_Float: + return LLVMFloatType(); + case LL_Int32: + return LLVMInt32Type(); + case LL_Int16: + return LLVMIntType(16); + case LL_Int8: + return LLVMIntType(8); + } + return LLVMIntType(8); +} + +static INLINE int +ll_type_size(enum ll_type type) +{ + switch (type) { + case LL_Double: + return 8; + case LL_Float: + return 4; + case LL_Int32: + return 4; + case LL_Int16: + return 2; + case LL_Int8: + return 1; + } + return 1; +} + struct draw_llvm_translate { int format; from_func from; to_func to; - LLVMTypeRef type; + enum ll_type type; int num_components; } translates[] = { - {PIPE_FORMAT_R64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 1}, - {PIPE_FORMAT_R64G64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 2}, - {PIPE_FORMAT_R64G64B64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 3}, - {PIPE_FORMAT_R64G64B64A64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 4}, + {PIPE_FORMAT_R64_FLOAT, from_64_float, to_64_float, LL_Double, 1}, + {PIPE_FORMAT_R64G64_FLOAT, from_64_float, to_64_float, LL_Double, 2}, + {PIPE_FORMAT_R64G64B64_FLOAT, from_64_float, to_64_float, LL_Double, 3}, + {PIPE_FORMAT_R64G64B64A64_FLOAT, from_64_float, to_64_float, LL_Double, 4}, + {PIPE_FORMAT_R32_FLOAT, from_32_float, to_32_float, LL_Float, 1}, + {PIPE_FORMAT_R32G32_FLOAT, from_32_float, to_32_float, LL_Float, 2}, + {PIPE_FORMAT_R32G32B32_FLOAT, from_32_float, to_32_float, LL_Float, 3}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, from_32_float, to_32_float, LL_Float, 4}, - {PIPE_FORMAT_R32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 1}, - {PIPE_FORMAT_R32G32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 2}, - {PIPE_FORMAT_R32G32B32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 3}, - {PIPE_FORMAT_R32G32B32A32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 4}, + {PIPE_FORMAT_R32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 4}, - {PIPE_FORMAT_R32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 1}, - {PIPE_FORMAT_R32G32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 2}, - {PIPE_FORMAT_R32G32B32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 3}, - {PIPE_FORMAT_R32G32B32A32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 4}, + {PIPE_FORMAT_R32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 4}, - {PIPE_FORMAT_R32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 1}, - {PIPE_FORMAT_R32G32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 2}, - {PIPE_FORMAT_R32G32B32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 3}, - {PIPE_FORMAT_R32G32B32A32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 4}, + {PIPE_FORMAT_R32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 4}, - {PIPE_FORMAT_R32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 1}, - {PIPE_FORMAT_R32G32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 2}, - {PIPE_FORMAT_R32G32B32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 3}, - {PIPE_FORMAT_R32G32B32A32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 4}, + {PIPE_FORMAT_R32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 4}, - {PIPE_FORMAT_R32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 1}, - {PIPE_FORMAT_R32G32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 2}, - {PIPE_FORMAT_R32G32B32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 3}, - {PIPE_FORMAT_R32G32B32A32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 4}, + {PIPE_FORMAT_R16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 4}, - {PIPE_FORMAT_R16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 1}, - {PIPE_FORMAT_R16G16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 2}, - {PIPE_FORMAT_R16G16B16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 3}, - {PIPE_FORMAT_R16G16B16A16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 4}, + {PIPE_FORMAT_R16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 4}, - {PIPE_FORMAT_R16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 1}, - {PIPE_FORMAT_R16G16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 2}, - {PIPE_FORMAT_R16G16B16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 3}, - {PIPE_FORMAT_R16G16B16A16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 4}, + {PIPE_FORMAT_R16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 4}, - {PIPE_FORMAT_R16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 1}, - {PIPE_FORMAT_R16G16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 2}, - {PIPE_FORMAT_R16G16B16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 3}, - {PIPE_FORMAT_R16G16B16A16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 4}, + {PIPE_FORMAT_R16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 4}, - {PIPE_FORMAT_R16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 1}, - {PIPE_FORMAT_R16G16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 2}, - {PIPE_FORMAT_R16G16B16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 3}, - {PIPE_FORMAT_R16G16B16A16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 4}, + {PIPE_FORMAT_R8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 4}, - {PIPE_FORMAT_R8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 1}, - {PIPE_FORMAT_R8G8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 2}, - {PIPE_FORMAT_R8G8B8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 3}, - {PIPE_FORMAT_R8G8B8A8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 4}, + {PIPE_FORMAT_R8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 4}, - {PIPE_FORMAT_R8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 1}, - {PIPE_FORMAT_R8G8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 2}, - {PIPE_FORMAT_R8G8B8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 3}, - {PIPE_FORMAT_R8G8B8A8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 4}, + {PIPE_FORMAT_R8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 4}, - {PIPE_FORMAT_R8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 1}, - {PIPE_FORMAT_R8G8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 2}, - {PIPE_FORMAT_R8G8B8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 3}, - {PIPE_FORMAT_R8G8B8A8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 4}, + {PIPE_FORMAT_R8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 4}, - {PIPE_FORMAT_R8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 1}, - {PIPE_FORMAT_R8G8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 2}, - {PIPE_FORMAT_R8G8B8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 3}, - {PIPE_FORMAT_R8G8B8A8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 4}, + {PIPE_FORMAT_R32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 4}, - {PIPE_FORMAT_R32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 1}, - {PIPE_FORMAT_R32G32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 2}, - {PIPE_FORMAT_R32G32B32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 3}, - {PIPE_FORMAT_R32G32B32A32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 4}, - - {PIPE_FORMAT_A8R8G8B8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 4}, - {PIPE_FORMAT_B8G8R8A8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(), 4}, + {PIPE_FORMAT_A8R8G8B8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 4}, + {PIPE_FORMAT_B8G8R8A8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 4}, }; +static LLVMValueRef +fetch(LLVMBuilderRef builder, + LLVMValueRef ptr, int val_size, int nr_components, + from_func func) +{ + int i; + int offset = 0; + LLVMValueRef res = LLVMConstNull( + LLVMVectorType(LLVMFloatType(), 4)); + + for (i = 0; i < nr_components; ++i) { + LLVMValueRef src_index = LLVMConstInt(LLVMInt32Type(), offset, 0); + LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef src_tmp = LLVMBuildGEP(builder, ptr, &src_index, 1, ""); + LLVMValueRef component; + + src_tmp = LLVMBuildLoad(builder, src_tmp, ""); + + /* convert src_tmp to float */ + component = func(builder, src_tmp); + + /* vec.comp = component */ + res = LLVMBuildInsertElement(builder, + res, + component, + dst_index, ""); + offset += val_size; + } + return res; +} + + LLVMValueRef draw_llvm_translate_from(LLVMBuilderRef builder, LLVMValueRef vbuffer, @@ -501,8 +458,13 @@ draw_llvm_translate_from(LLVMBuilderRef builder, int i; for (i = 0; i < Elements(translates); ++i) { if (translates[i].format == from_format) { - return translates.from_func(builder, vbuffer, from_format); + /*LLVMTypeRef type = ll_type_to_llvm(translates[i].type);*/ + return fetch(builder, + vbuffer, + ll_type_size(translates[i].type), + translates[i].num_components, + translates[i].from); } } - return LLVMGetUndef(LLVMTypeOf(vbuffer)); + return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); } From 6f34339af421d4001dcc01114be462d2ea381f16 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 30 Mar 2010 12:52:09 -0400 Subject: [PATCH 04/19] draw llvm: we translate between pointers, not values directly --- .../auxiliary/draw/draw_llvm_translate.c | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c index 6cbe98c4893..74830f1b46c 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_translate.c +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -21,7 +21,7 @@ static LLVMValueRef from_64_float(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMDoubleType() , ""); + LLVMPointerType(LLVMDoubleType(), 0) , ""); LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); return LLVMBuildFPTrunc(builder, l, LLVMFloatType(), ""); } @@ -30,7 +30,7 @@ static LLVMValueRef from_32_float(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMFloatType() , ""); + LLVMPointerType(LLVMFloatType(), 0) , ""); return LLVMBuildLoad(builder, bc, ""); } @@ -45,7 +45,7 @@ static INLINE LLVMValueRef from_16_uscaled(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMIntType(16) , ""); + LLVMPointerType(LLVMIntType(16), 0) , ""); LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); } @@ -54,7 +54,7 @@ static INLINE LLVMValueRef from_32_uscaled(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMIntType(32) , ""); + LLVMPointerType(LLVMIntType(32), 0) , ""); LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); } @@ -70,7 +70,7 @@ static INLINE LLVMValueRef from_16_sscaled(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMIntType(16) , ""); + LLVMPointerType(LLVMIntType(16), 0) , ""); LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); } @@ -79,7 +79,7 @@ static INLINE LLVMValueRef from_32_sscaled(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMIntType(32) , ""); + LLVMPointerType(LLVMIntType(32), 0) , ""); LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); } @@ -98,7 +98,7 @@ static INLINE LLVMValueRef from_16_unorm(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMIntType(16) , ""); + LLVMPointerType(LLVMIntType(16), 0) , ""); LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); return LLVMBuildFDiv(builder, uscaled, @@ -109,7 +109,7 @@ static INLINE LLVMValueRef from_32_unorm(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMIntType(32) , ""); + LLVMPointerType(LLVMIntType(32), 0) , ""); LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); @@ -130,7 +130,7 @@ static INLINE LLVMValueRef from_16_snorm(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMIntType(16) , ""); + LLVMPointerType(LLVMIntType(16), 0) , ""); LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); return LLVMBuildFDiv(builder, uscaled, @@ -141,7 +141,7 @@ static INLINE LLVMValueRef from_32_snorm(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMIntType(32) , ""); + LLVMPointerType(LLVMIntType(32), 0) , ""); LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); @@ -153,7 +153,7 @@ static INLINE LLVMValueRef from_32_fixed(LLVMBuilderRef builder, LLVMValueRef val) { LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMIntType(32) , ""); + LLVMPointerType(LLVMIntType(32), 0) , ""); LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); @@ -436,6 +436,7 @@ fetch(LLVMBuilderRef builder, src_tmp = LLVMBuildLoad(builder, src_tmp, ""); + LLVMDumpValue(src_tmp); /* convert src_tmp to float */ component = func(builder, src_tmp); From 8bc07fc83e344c9e693bdec4148009320a1731f1 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 30 Mar 2010 16:16:36 -0400 Subject: [PATCH 05/19] draw llvm: fix some silly mistakes the vs_type selection isn't ideal, but for now both llvmpipe's fs and vs do the same thing which is operate on 4xfloat vector as the base type --- src/gallium/auxiliary/draw/draw_llvm.c | 22 +++++++++++++------ .../auxiliary/draw/draw_llvm_translate.c | 1 - 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index aa4a310a07d..cd183d5087c 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -205,14 +205,24 @@ generate_vs(struct draw_llvm *llvm, LLVMValueRef io) { const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; - struct lp_type vs_type = lp_type_float(32); - LLVMValueRef vs_consts; + struct lp_type vs_type; + LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); + + memset(&vs_type, 0, sizeof vs_type); + vs_type.floating = TRUE; /* floating point values */ + vs_type.sign = TRUE; /* values are signed */ + vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + vs_type.width = 32; /* 32-bit float */ + vs_type.length = 4; /* 4 elements per vector */ +#if 0 + num_vs = 4; /* number of vertices per block */ +#endif lp_build_tgsi_soa(builder, tokens, vs_type, NULL /*struct lp_build_mask_context *mask*/, - vs_consts, + consts_ptr, NULL /*pos*/, inputs, outputs, @@ -312,7 +322,6 @@ convert_to_soa(LLVMBuilderRef builder, LLVMConstInt(LLVMInt32Type(), 2, 0)); soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3, LLVMConstInt(LLVMInt32Type(), 3, 0)); - } } @@ -329,7 +338,6 @@ draw_llvm_generate(struct draw_llvm *llvm) LLVMValueRef io_ptr, vbuffers_ptr; struct draw_context *draw = llvm->draw; unsigned i, j; - unsigned chan; struct lp_build_context bld; struct lp_build_loop_state lp_loop; struct lp_type vs_type = lp_type_float_vec(32); @@ -393,12 +401,11 @@ draw_llvm_generate(struct draw_llvm *llvm) struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; struct pipe_vertex_buffer *vbuf = &draw->pt.vertex_buffer[ velem->vertex_buffer_index]; - LLVMDumpValue(function); generate_fetch(builder, vbuffers_ptr, &aos_attribs[j][i], velem, vbuf, true_index); } } - convert_to_soa(builder, inputs, aos_attribs, + convert_to_soa(builder, aos_attribs, inputs, draw->pt.nr_vertex_elements); generate_vs(llvm, @@ -407,6 +414,7 @@ draw_llvm_generate(struct draw_llvm *llvm) inputs, context_ptr, io); + LLVMDumpModule(llvm->module); } lp_build_loop_end(builder, end, step, &lp_loop); diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c index 74830f1b46c..260568aaa4a 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_translate.c +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -436,7 +436,6 @@ fetch(LLVMBuilderRef builder, src_tmp = LLVMBuildLoad(builder, src_tmp, ""); - LLVMDumpValue(src_tmp); /* convert src_tmp to float */ component = func(builder, src_tmp); From b0f946e90069f34e69a0492caa7a2867ae184e9a Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 30 Mar 2010 17:21:11 -0400 Subject: [PATCH 06/19] draw llvm: actually set the constant buffers on the context --- src/gallium/auxiliary/draw/draw_llvm.c | 2 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index cd183d5087c..1127d5417e8 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -110,7 +110,7 @@ init_globals(struct draw_llvm *llvm) LP_CHECK_STRUCT_SIZE(struct draw_jit_context, llvm->target, context_type); - LLVMAddTypeName(llvm->module, "context", context_type); + LLVMAddTypeName(llvm->module, "draw_jit_context", context_type); llvm->context_ptr_type = LLVMPointerType(context_type, 0); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 38b1c4462db..f93df37d92b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -130,6 +130,12 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, *max_vertices = *max_vertices & ~1; draw_llvm_prepare(fpme->llvm); + + /*XXX we only support one constant buffer */ + fpme->llvm->jit_context.vs_constants = + draw->pt.user.vs_constants[0]; + fpme->llvm->jit_context.gs_constants = + draw->pt.user.gs_constants[0]; } @@ -241,7 +247,7 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, fpme->llvm->jit_func( &fpme->llvm->jit_context, pipeline_verts, - draw->pt.user.vbuffer, + (const char **)draw->pt.user.vbuffer, start, count, fpme->vertex_size ); From 93e342574f5fc95789028dbe7cf637257562e9bb Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 30 Mar 2010 21:09:51 -0400 Subject: [PATCH 07/19] draw llvm: fix a warning --- src/gallium/auxiliary/draw/draw_llvm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 1127d5417e8..6347acb0b96 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -391,6 +391,7 @@ draw_llvm_generate(struct draw_llvm *llvm) LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef io = LLVMBuildGEP(builder, io_ptr, &lp_loop.counter, 1, ""); + const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; for (i = 0; i < NUM_CHANNELS; ++i) { LLVMValueRef true_index = LLVMBuildAdd( @@ -408,17 +409,17 @@ draw_llvm_generate(struct draw_llvm *llvm) convert_to_soa(builder, aos_attribs, inputs, draw->pt.nr_vertex_elements); + ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; generate_vs(llvm, builder, outputs, - inputs, + ptr_aos, context_ptr, io); LLVMDumpModule(llvm->module); } lp_build_loop_end(builder, end, step, &lp_loop); - LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); From ae5487d4276007e466b6a7c783d6fb740f9539c5 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 31 Mar 2010 22:15:12 -0400 Subject: [PATCH 08/19] draw llvm: fix iteration over buffers fetching was converting garbage --- src/gallium/auxiliary/draw/draw_llvm.c | 25 +++++++++++++++++-- .../auxiliary/draw/draw_llvm_translate.c | 5 ++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 6347acb0b96..2978621826b 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -10,6 +10,7 @@ #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_tgsi.h" +#include "gallivm/lp_bld_printf.h" #include "util/u_cpu_detect.h" @@ -229,6 +230,23 @@ generate_vs(struct draw_llvm *llvm, NULL/*sampler*/); } + +static void print_vectorf(LLVMBuilderRef builder, + LLVMValueRef vec) +{ + LLVMValueRef val[4]; + val[0] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + val[1] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + val[2] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + val[3] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n", + val[0], val[1], val[2], val[3]); +} + static void generate_fetch(LLVMBuilderRef builder, LLVMValueRef vbuffers_ptr, @@ -244,6 +262,8 @@ generate_fetch(LLVMBuilderRef builder, LLVMConstInt(LLVMInt32Type(), vbuf->stride, 0), index, ""); + vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); + stride = LLVMBuildAdd(builder, stride, LLVMConstInt(LLVMInt32Type(), vbuf->buffer_offset, 0), ""); @@ -251,6 +271,7 @@ generate_fetch(LLVMBuilderRef builder, LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0), ""); + /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/ vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, ""); *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format); @@ -386,6 +407,7 @@ draw_llvm_generate(struct draw_llvm *llvm) end = lp_build_add(&bld, start, count); step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + lp_build_loop_begin(builder, start, &lp_loop); { LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; @@ -404,6 +426,7 @@ draw_llvm_generate(struct draw_llvm *llvm) velem->vertex_buffer_index]; generate_fetch(builder, vbuffers_ptr, &aos_attribs[j][i], velem, vbuf, true_index); + /*print_vectorf(builder, aos_attribs[j][i]);*/ } } convert_to_soa(builder, aos_attribs, inputs, @@ -416,7 +439,6 @@ draw_llvm_generate(struct draw_llvm *llvm) ptr_aos, context_ptr, io); - LLVMDumpModule(llvm->module); } lp_build_loop_end(builder, end, step, &lp_loop); @@ -441,7 +463,6 @@ draw_llvm_generate(struct draw_llvm *llvm) LLVMDumpValue(function); debug_printf("\n"); } - llvm->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, function); if (1) diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c index 260568aaa4a..bc17d3861aa 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_translate.c +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -10,6 +10,7 @@ #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_tgsi.h" +#include "gallivm/lp_bld_printf.h" #include "util/u_memory.h" #include "pipe/p_state.h" @@ -431,10 +432,10 @@ fetch(LLVMBuilderRef builder, for (i = 0; i < nr_components; ++i) { LLVMValueRef src_index = LLVMConstInt(LLVMInt32Type(), offset, 0); LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef src_tmp = LLVMBuildGEP(builder, ptr, &src_index, 1, ""); + LLVMValueRef src_tmp; LLVMValueRef component; - src_tmp = LLVMBuildLoad(builder, src_tmp, ""); + src_tmp = LLVMBuildGEP(builder, ptr, &src_index, 1, "src_tmp"); /* convert src_tmp to float */ component = func(builder, src_tmp); From ab5c09738760bc1b665b9809eaf921f4ac27057e Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 1 Apr 2010 18:58:51 -0400 Subject: [PATCH 09/19] draw llvm: a lot better storing implementation --- src/gallium/auxiliary/draw/draw_llvm.c | 148 +++++++++++++++++- src/gallium/auxiliary/draw/draw_llvm.h | 11 ++ .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 2 + 3 files changed, 157 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 2978621826b..30058c8cff3 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -13,6 +13,7 @@ #include "gallivm/lp_bld_printf.h" #include "util/u_cpu_detect.h" +#include "tgsi/tgsi_dump.h" #include @@ -202,8 +203,7 @@ generate_vs(struct draw_llvm *llvm, LLVMBuilderRef builder, LLVMValueRef (*outputs)[NUM_CHANNELS], const LLVMValueRef (*inputs)[NUM_CHANNELS], - LLVMValueRef context_ptr, - LLVMValueRef io) + LLVMValueRef context_ptr) { const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; struct lp_type vs_type; @@ -219,6 +219,7 @@ generate_vs(struct draw_llvm *llvm, num_vs = 4; /* number of vertices per block */ #endif + tgsi_dump(tokens, 0); lp_build_tgsi_soa(builder, tokens, vs_type, @@ -319,6 +320,41 @@ aos_to_soa(LLVMBuilderRef builder, return res; } +static void +soa_to_aos(LLVMBuilderRef builder, + LLVMValueRef soa[NUM_CHANNELS], + LLVMValueRef aos[NUM_CHANNELS]) +{ + LLVMValueRef comp; + int i = 0; + + debug_assert(NUM_CHANNELS == 4); + + aos[0] = LLVMConstNull(LLVMTypeOf(soa[0])); + aos[1] = aos[2] = aos[3] = aos[0]; + + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, ""); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, ""); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, ""); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, ""); + + } +} + static void convert_to_soa(LLVMBuilderRef builder, LLVMValueRef (*aos)[NUM_CHANNELS], @@ -346,6 +382,107 @@ convert_to_soa(LLVMBuilderRef builder, } } +static void +store_aos(LLVMBuilderRef builder, + LLVMValueRef io_ptr, + LLVMValueRef index, + LLVMValueRef value) +{ + LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); + LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); + LLVMValueRef indices[2]; + + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = index; + + /* undefined vertex */ + LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), + 0xffff, 0), id_ptr); + + + data_ptr = LLVMBuildInBoundsGEP(builder, data_ptr, indices, 2, ""); + lp_build_printf(builder, " ---- storing at %d (%p) ", index, data_ptr); + print_vectorf(builder, value); + data_ptr = LLVMBuildBitCast(builder, data_ptr, + LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0), + "datavec"); + LLVMBuildStore(builder, value, data_ptr); + lp_build_printf(builder, " ++ stored\n"); +} + +static void +store_aos_array(LLVMBuilderRef builder, + LLVMValueRef io_ptr, + LLVMValueRef aos[NUM_CHANNELS], + LLVMValueRef start_index, + int attrib, + int num_outputs) +{ + LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); + LLVMValueRef ind0 = start_index; + LLVMValueRef ind1 = + LLVMBuildAdd(builder, start_index, + LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + LLVMValueRef ind2 = + LLVMBuildAdd(builder, start_index, + LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + LLVMValueRef ind3 = + LLVMBuildAdd(builder, start_index, + LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; + + debug_assert(NUM_CHANNELS == 4); + + io0_ptr = LLVMBuildGEP(builder, io_ptr, + &ind0, 1, ""); + io1_ptr = LLVMBuildGEP(builder, io_ptr, + &ind1, 1, ""); + io2_ptr = LLVMBuildGEP(builder, io_ptr, + &ind2, 1, ""); + io3_ptr = LLVMBuildGEP(builder, io_ptr, + &ind3, 1, ""); + + store_aos(builder, io0_ptr, attr_index, aos[0]); + store_aos(builder, io1_ptr, attr_index, aos[1]); + store_aos(builder, io2_ptr, attr_index, aos[2]); + store_aos(builder, io3_ptr, attr_index, aos[3]); +} + +static void +convert_to_aos(LLVMBuilderRef builder, + LLVMValueRef io, + LLVMValueRef (*outputs)[NUM_CHANNELS], + int num_outputs, + int max_vertices, + LLVMValueRef start_index) +{ + unsigned chan, attrib; + + for (attrib = 0; attrib < num_outputs; ++attrib) { + LLVMValueRef soa[4]; + LLVMValueRef aos[4]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); + lp_build_printf(builder, "output %d : %d ", + LLVMConstInt(LLVMInt32Type(), attrib, 0), + LLVMConstInt(LLVMInt32Type(), chan, 0)); + print_vectorf(builder, out); + soa[chan] = out; + } else + soa[chan] = 0; + } + soa_to_aos(builder, soa, aos); + store_aos_array(builder, + io, + aos, + start_index, + attrib, + num_outputs); + } +} + void draw_llvm_generate(struct draw_llvm *llvm) { @@ -437,8 +574,11 @@ draw_llvm_generate(struct draw_llvm *llvm) builder, outputs, ptr_aos, - context_ptr, - io); + context_ptr); + + convert_to_aos(builder, io, outputs, + draw->vs.vertex_shader->info.num_outputs, + max_vertices, lp_loop.counter); } lp_build_loop_end(builder, end, step, &lp_loop); diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index e375008f3b4..46ce236e3cd 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -63,6 +63,17 @@ struct draw_jit_context #define draw_jit_context_textures(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures") + + +#define draw_jit_header_id(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, 0, "id") + +#define draw_jit_header_clip(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 1, "clip") + +#define draw_jit_header_data(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, 2, "data") + /* we are construction a function of the form: struct vertex_header { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index f93df37d92b..07d464b1593 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -245,6 +245,8 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, return; } + debug_printf("--- pipe verts data[0] = %p, data[1] = %p\n", + pipeline_verts->data[0], pipeline_verts->data[1]); fpme->llvm->jit_func( &fpme->llvm->jit_context, pipeline_verts, (const char **)draw->pt.user.vbuffer, From 557b75248a3ebc6daabe3c2b69ac24d409aaa1e0 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 2 Apr 2010 15:56:34 -0400 Subject: [PATCH 10/19] draw llvm: fix translation of formats with variable components --- src/gallium/auxiliary/draw/draw_llvm_translate.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c index bc17d3861aa..b29ebdec3a9 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_translate.c +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -428,6 +428,12 @@ fetch(LLVMBuilderRef builder, int offset = 0; LLVMValueRef res = LLVMConstNull( LLVMVectorType(LLVMFloatType(), 4)); + LLVMValueRef defaults[4]; + + defaults[0] = LLVMConstReal(LLVMFloatType(), 0); + defaults[1] = LLVMConstReal(LLVMFloatType(), 0); + defaults[2] = LLVMConstReal(LLVMFloatType(), 0); + defaults[3] = LLVMConstReal(LLVMFloatType(), 1); for (i = 0; i < nr_components; ++i) { LLVMValueRef src_index = LLVMConstInt(LLVMInt32Type(), offset, 0); @@ -447,6 +453,13 @@ fetch(LLVMBuilderRef builder, dst_index, ""); offset += val_size; } + for (; i < 4; ++i) { + LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(builder, + res, + defaults[i], + dst_index, ""); + } return res; } From cb31d3b5fec6260142ed27cc37f7155915ecfe89 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 2 Apr 2010 18:52:32 -0400 Subject: [PATCH 11/19] draw llvm: fix storing of outputs for the rest of the pipeline there's no good way of aligning the output's, and since the vertex_header is variable sized in the first place we need to extract elements from a vector and store them individually into an array. this gets the basic examples working again --- src/gallium/auxiliary/draw/draw_llvm.c | 141 +++++++++++------- src/gallium/auxiliary/draw/draw_llvm.h | 2 +- .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 2 +- src/gallium/auxiliary/draw/draw_pt_post_vs.c | 3 + 4 files changed, 94 insertions(+), 54 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 30058c8cff3..388c9910756 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -20,48 +20,9 @@ static void init_globals(struct draw_llvm *llvm) { - LLVMTypeRef vertex_header; - LLVMTypeRef texture_type; + LLVMTypeRef texture_type; - /* struct vertex_header */ - { - LLVMTypeRef elem_types[3]; - - elem_types[0] = LLVMIntType(32); - elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); - elem_types[2] = LLVMArrayType(elem_types[1], 0); - - vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0); - - /* these are bit-fields and we can't take address of them - LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, - llvm->target, vertex_header, - DRAW_JIT_VERTEX_CLIPMASK); - LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, - llvm->target, vertex_header, - DRAW_JIT_VERTEX_EDGEFLAG); - LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, - llvm->target, vertex_header, - DRAW_JIT_VERTEX_PAD); - LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, - llvm->target, vertex_header, - DRAW_JIT_VERTEX_VERTEX_ID); - */ - LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip, - llvm->target, vertex_header, - DRAW_JIT_VERTEX_CLIP); - LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, - llvm->target, vertex_header, - DRAW_JIT_VERTEX_DATA); - - LP_CHECK_STRUCT_SIZE(struct vertex_header, - llvm->target, vertex_header); - - LLVMAddTypeName(llvm->module, "vertex_header", vertex_header); - - llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); - } - /* struct draw_jit_texture */ + /* struct draw_jit_texture */ { LLVMTypeRef elem_types[4]; @@ -122,6 +83,45 @@ init_globals(struct draw_llvm *llvm) } } +static LLVMTypeRef +create_vertex_header(struct draw_llvm *llvm, int data_elems) +{ + /* struct vertex_header */ + LLVMTypeRef elem_types[3]; + LLVMTypeRef vertex_header; + + elem_types[0] = LLVMIntType(32); + elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); + elem_types[2] = LLVMArrayType(elem_types[1], data_elems); + + vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0); + + /* these are bit-fields and we can't take address of them + LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_CLIPMASK); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_EDGEFLAG); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_PAD); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_VERTEX_ID); + */ + LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_CLIP); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_DATA); + + LLVMAddTypeName(llvm->module, "vertex_header", vertex_header); + + return LLVMPointerType(vertex_header, 0); +} + struct draw_llvm * draw_llvm_create(struct draw_context *draw) { @@ -175,8 +175,9 @@ draw_llvm_destroy(struct draw_llvm *llvm) } void -draw_llvm_prepare(struct draw_llvm *llvm) +draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) { + llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); draw_llvm_generate(llvm); } @@ -390,24 +391,59 @@ store_aos(LLVMBuilderRef builder, { LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); - LLVMValueRef indices[2]; + LLVMValueRef indices[3]; indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); indices[1] = index; + indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); /* undefined vertex */ LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0xffff, 0), id_ptr); - - data_ptr = LLVMBuildInBoundsGEP(builder, data_ptr, indices, 2, ""); - lp_build_printf(builder, " ---- storing at %d (%p) ", index, data_ptr); - print_vectorf(builder, value); +#if 0 + /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); + print_vectorf(builder, value);*/ data_ptr = LLVMBuildBitCast(builder, data_ptr, - LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0), + LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0), "datavec"); + data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, ""); + LLVMBuildStore(builder, value, data_ptr); - lp_build_printf(builder, " ++ stored\n"); +#else + { + LLVMValueRef x, y, z, w; + LLVMValueRef idx0, idx1, idx2, idx3; + LLVMValueRef gep0, gep1, gep2, gep3; + data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); + + idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + + x = LLVMBuildExtractElement(builder, value, + idx0, ""); + y = LLVMBuildExtractElement(builder, value, + idx1, ""); + z = LLVMBuildExtractElement(builder, value, + idx2, ""); + w = LLVMBuildExtractElement(builder, value, + idx3, ""); + + gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, ""); + gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, ""); + gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, ""); + gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, ""); + + /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n", + x, gep0, y, gep1, z, gep2, w, gep3);*/ + LLVMBuildStore(builder, x, gep0); + LLVMBuildStore(builder, y, gep1); + LLVMBuildStore(builder, z, gep2); + LLVMBuildStore(builder, w, gep3); + } +#endif } static void @@ -442,6 +478,8 @@ store_aos_array(LLVMBuilderRef builder, io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + /*lp_build_printf(builder, "io = %d\n", start_index);*/ + store_aos(builder, io0_ptr, attr_index, aos[0]); store_aos(builder, io1_ptr, attr_index, aos[1]); store_aos(builder, io2_ptr, attr_index, aos[2]); @@ -465,10 +503,10 @@ convert_to_aos(LLVMBuilderRef builder, if(outputs[attrib][chan]) { LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); - lp_build_printf(builder, "output %d : %d ", + /*lp_build_printf(builder, "output %d : %d ", LLVMConstInt(LLVMInt32Type(), attrib, 0), LLVMConstInt(LLVMInt32Type(), chan, 0)); - print_vectorf(builder, out); + print_vectorf(builder, out);*/ soa[chan] = out; } else soa[chan] = 0; @@ -563,7 +601,6 @@ draw_llvm_generate(struct draw_llvm *llvm) velem->vertex_buffer_index]; generate_fetch(builder, vbuffers_ptr, &aos_attribs[j][i], velem, vbuf, true_index); - /*print_vectorf(builder, aos_attribs[j][i]);*/ } } convert_to_soa(builder, aos_attribs, inputs, diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 46ce236e3cd..afafa15e687 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -157,7 +157,7 @@ void draw_llvm_destroy(struct draw_llvm *llvm); void -draw_llvm_prepare(struct draw_llvm *llvm); +draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs); /* generates the draw jit function */ void diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 07d464b1593..f65cf3822c4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -129,7 +129,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, /* return even number */ *max_vertices = *max_vertices & ~1; - draw_llvm_prepare(fpme->llvm); + draw_llvm_prepare(fpme->llvm, nr); /*XXX we only support one constant buffer */ fpme->llvm->jit_context.vs_constants = diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 9728d5c2bdf..7afbf6fa461 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -108,6 +108,9 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, for (j = 0; j < count; j++) { float *position = out->data[pos]; + debug_printf("%d) %p %p = [%f, %f, %f, %f]\n", + j, out, position, position[0], position[1], position[2], position[3]); + out->clip[0] = position[0]; out->clip[1] = position[1]; out->clip[2] = position[2]; From 1b0bab167cd541f70c32249ca3e70da88b8c93c5 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 5 Apr 2010 16:43:53 -0400 Subject: [PATCH 12/19] draw llvm: when generating the vertex_header struct adjust its name change the name to not clash and accuretly represent the number of inputs we store in the data member --- src/gallium/auxiliary/draw/draw_llvm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 388c9910756..382f765e16c 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -89,6 +89,9 @@ create_vertex_header(struct draw_llvm *llvm, int data_elems) /* struct vertex_header */ LLVMTypeRef elem_types[3]; LLVMTypeRef vertex_header; + char struct_name[24]; + + snprintf(struct_name, 23, "vertex_header%d", data_elems); elem_types[0] = LLVMIntType(32); elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); @@ -117,7 +120,7 @@ create_vertex_header(struct draw_llvm *llvm, int data_elems) llvm->target, vertex_header, DRAW_JIT_VERTEX_DATA); - LLVMAddTypeName(llvm->module, "vertex_header", vertex_header); + LLVMAddTypeName(llvm->module, struct_name, vertex_header); return LLVMPointerType(vertex_header, 0); } From 9dd70e7b85ddbc73bd976c4dab81476aa36c557e Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 6 Apr 2010 00:13:20 -0400 Subject: [PATCH 13/19] draw llvm: fix loop iteration and vertex header offsets the loop was doing a NE comparison which we could have skipped if the prim was triangles (3 verts) and our step was 4 verts. also fix offsets in conversion to aos. --- src/gallium/auxiliary/draw/draw_llvm.c | 44 +++++++++++++------ .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 2 - src/gallium/auxiliary/gallivm/lp_bld_flow.c | 29 ++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_flow.h | 7 +++ 4 files changed, 66 insertions(+), 16 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 382f765e16c..27df59653c2 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -17,6 +17,8 @@ #include +#define DEBUG_STORE 0 + static void init_globals(struct draw_llvm *llvm) { @@ -235,7 +237,7 @@ generate_vs(struct draw_llvm *llvm, NULL/*sampler*/); } - +#if DEBUG_STORE static void print_vectorf(LLVMBuilderRef builder, LLVMValueRef vec) { @@ -251,6 +253,7 @@ static void print_vectorf(LLVMBuilderRef builder, lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n", val[0], val[1], val[2], val[3]); } +#endif static void generate_fetch(LLVMBuilderRef builder, @@ -404,6 +407,9 @@ store_aos(LLVMBuilderRef builder, LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0xffff, 0), id_ptr); +#if DEBUG_STORE + lp_build_printf(builder, " ---- %p storing at %d (%p)\n", io_ptr, index, data_ptr); +#endif #if 0 /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); print_vectorf(builder, value);*/ @@ -458,16 +464,10 @@ store_aos_array(LLVMBuilderRef builder, int num_outputs) { LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); - LLVMValueRef ind0 = start_index; - LLVMValueRef ind1 = - LLVMBuildAdd(builder, start_index, - LLVMConstInt(LLVMInt32Type(), 1, 0), ""); - LLVMValueRef ind2 = - LLVMBuildAdd(builder, start_index, - LLVMConstInt(LLVMInt32Type(), 2, 0), ""); - LLVMValueRef ind3 = - LLVMBuildAdd(builder, start_index, - LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; debug_assert(NUM_CHANNELS == 4); @@ -481,7 +481,10 @@ store_aos_array(LLVMBuilderRef builder, io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); - /*lp_build_printf(builder, "io = %d\n", start_index);*/ +#if DEBUG_STORE + lp_build_printf(builder, "io = %d, indexes[%d, %d, %d, %d]\n", + start_index, ind0, ind1, ind2, ind3); +#endif store_aos(builder, io0_ptr, attr_index, aos[0]); store_aos(builder, io1_ptr, attr_index, aos[1]); @@ -499,6 +502,9 @@ convert_to_aos(LLVMBuilderRef builder, { unsigned chan, attrib; +#if DEBUG_STORE + lp_build_printf(builder, " # storing begin\n"); +#endif for (attrib = 0; attrib < num_outputs; ++attrib) { LLVMValueRef soa[4]; LLVMValueRef aos[4]; @@ -522,6 +528,9 @@ convert_to_aos(LLVMBuilderRef builder, attrib, num_outputs); } +#if DEBUG_STORE + lp_build_printf(builder, " # storing end\n"); +#endif } void @@ -586,13 +595,20 @@ draw_llvm_generate(struct draw_llvm *llvm) step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); +#if DEBUG_STORE + lp_build_printf(builder, "start = %d, end = %d, step = %d\n", + start, end, step); +#endif lp_build_loop_begin(builder, start, &lp_loop); { LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef io = LLVMBuildGEP(builder, io_ptr, &lp_loop.counter, 1, ""); const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; - +#if DEBUG_STORE + lp_build_printf(builder, " --- loop counter %d\n", + lp_loop.counter); +#endif for (i = 0; i < NUM_CHANNELS; ++i) { LLVMValueRef true_index = LLVMBuildAdd( builder, @@ -620,7 +636,7 @@ draw_llvm_generate(struct draw_llvm *llvm) draw->vs.vertex_shader->info.num_outputs, max_vertices, lp_loop.counter); } - lp_build_loop_end(builder, end, step, &lp_loop); + lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); LLVMBuildRetVoid(builder); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index f65cf3822c4..aebfe40a037 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -245,8 +245,6 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, return; } - debug_printf("--- pipe verts data[0] = %p, data[1] = %p\n", - pipeline_verts->data[0], pipeline_verts->data[1]); fpme->llvm->jit_func( &fpme->llvm->jit_context, pipeline_verts, (const char **)draw->pt.user.vbuffer, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index 106fc03e46f..e60ab4f6ba1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -570,6 +570,35 @@ lp_build_loop_end(LLVMBuilderRef builder, LLVMPositionBuilderAtEnd(builder, after_block); } +void +lp_build_loop_end_cond(LLVMBuilderRef builder, + LLVMValueRef end, + LLVMValueRef step, + int llvm_cond, + struct lp_build_loop_state *state) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + LLVMValueRef next; + LLVMValueRef cond; + LLVMBasicBlockRef after_block; + + if (!step) + step = LLVMConstInt(LLVMTypeOf(end), 1, 0); + + next = LLVMBuildAdd(builder, state->counter, step, ""); + + cond = LLVMBuildICmp(builder, llvm_cond, next, end, ""); + + after_block = LLVMAppendBasicBlock(function, ""); + + LLVMBuildCondBr(builder, cond, after_block, state->block); + + LLVMAddIncoming(state->counter, &next, &block, 1); + + LLVMPositionBuilderAtEnd(builder, after_block); +} + /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index c2b50e1b602..745838570c8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -124,6 +124,13 @@ lp_build_loop_end(LLVMBuilderRef builder, LLVMValueRef step, struct lp_build_loop_state *state); +void +lp_build_loop_end_cond(LLVMBuilderRef builder, + LLVMValueRef end, + LLVMValueRef step, + int cond, /* LLVM condition */ + struct lp_build_loop_state *state); + From aeaf2cf18fc74f2d65fcadfad8c19f244ccd4206 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 6 Apr 2010 11:00:35 -0400 Subject: [PATCH 14/19] draw llvm: fix draw arrays we don't index within the outputs but only within the inputs --- src/gallium/auxiliary/draw/draw_llvm.c | 21 ++++++++++--------- .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 2 ++ src/gallium/auxiliary/draw/draw_pt_post_vs.c | 2 +- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 27df59653c2..eac5e917d5b 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -408,7 +408,7 @@ store_aos(LLVMBuilderRef builder, 0xffff, 0), id_ptr); #if DEBUG_STORE - lp_build_printf(builder, " ---- %p storing at %d (%p)\n", io_ptr, index, data_ptr); + lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); #endif #if 0 /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); @@ -459,7 +459,6 @@ static void store_aos_array(LLVMBuilderRef builder, LLVMValueRef io_ptr, LLVMValueRef aos[NUM_CHANNELS], - LLVMValueRef start_index, int attrib, int num_outputs) { @@ -482,8 +481,8 @@ store_aos_array(LLVMBuilderRef builder, &ind3, 1, ""); #if DEBUG_STORE - lp_build_printf(builder, "io = %d, indexes[%d, %d, %d, %d]\n", - start_index, ind0, ind1, ind2, ind3); + lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n", + io_ptr, ind0, ind1, ind2, ind3); #endif store_aos(builder, io0_ptr, attr_index, aos[0]); @@ -497,8 +496,7 @@ convert_to_aos(LLVMBuilderRef builder, LLVMValueRef io, LLVMValueRef (*outputs)[NUM_CHANNELS], int num_outputs, - int max_vertices, - LLVMValueRef start_index) + int max_vertices) { unsigned chan, attrib; @@ -524,7 +522,6 @@ convert_to_aos(LLVMBuilderRef builder, store_aos_array(builder, io, aos, - start_index, attrib, num_outputs); } @@ -542,7 +539,7 @@ draw_llvm_generate(struct draw_llvm *llvm) LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef function; - LLVMValueRef start, end, count, stride, step; + LLVMValueRef start, end, count, stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr; struct draw_context *draw = llvm->draw; unsigned i, j; @@ -595,6 +592,7 @@ draw_llvm_generate(struct draw_llvm *llvm) step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + io_itr = LLVMConstInt(LLVMInt32Type(), 0, 0); #if DEBUG_STORE lp_build_printf(builder, "start = %d, end = %d, step = %d\n", start, end, step); @@ -603,7 +601,7 @@ draw_llvm_generate(struct draw_llvm *llvm) { LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - LLVMValueRef io = LLVMBuildGEP(builder, io_ptr, &lp_loop.counter, 1, ""); + LLVMValueRef io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; #if DEBUG_STORE lp_build_printf(builder, " --- loop counter %d\n", @@ -634,7 +632,10 @@ draw_llvm_generate(struct draw_llvm *llvm) convert_to_aos(builder, io, outputs, draw->vs.vertex_shader->info.num_outputs, - max_vertices, lp_loop.counter); + max_vertices); + + io_itr = LLVMBuildAdd(builder, io_itr, + LLVMConstInt(LLVMInt32Type(), 1, 0), ""); } lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index aebfe40a037..307e6cc3a4d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -245,6 +245,8 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, return; } + debug_printf("#### Pipeline = %p (data = %p)\n", + pipeline_verts, pipeline_verts->data); fpme->llvm->jit_func( &fpme->llvm->jit_context, pipeline_verts, (const char **)draw->pt.user.vbuffer, diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 7afbf6fa461..af07926a55c 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -108,7 +108,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, for (j = 0; j < count; j++) { float *position = out->data[pos]; - debug_printf("%d) %p %p = [%f, %f, %f, %f]\n", + debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n", j, out, position, position[0], position[1], position[2], position[3]); out->clip[0] = position[0]; From de0647dbad96db222b5643d03b3f61b093e7ef76 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 6 Apr 2010 12:07:33 -0400 Subject: [PATCH 15/19] draw llvm: iterate with the correct stop over the outputs it's whatever the var step is (4 usually) not an unconditional 1 --- src/gallium/auxiliary/draw/draw_llvm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index eac5e917d5b..021662e75e0 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -634,8 +634,7 @@ draw_llvm_generate(struct draw_llvm *llvm) draw->vs.vertex_shader->info.num_outputs, max_vertices); - io_itr = LLVMBuildAdd(builder, io_itr, - LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + io_itr = LLVMBuildAdd(builder, io_itr, step, ""); } lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); From 1e0bf24139f6047f505b138392fc0f1d6584d6bc Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 6 Apr 2010 12:37:31 -0400 Subject: [PATCH 16/19] draw llvm: fix iteration for larger vertex arrays we were trying to store the outputs starting at the same offset we were using for the input arrays, which was writing beyond the end of the output array. --- src/gallium/auxiliary/draw/draw_llvm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 021662e75e0..a09e2a9f0c2 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -17,7 +17,7 @@ #include -#define DEBUG_STORE 0 +#define DEBUG_STORE 1 static void init_globals(struct draw_llvm *llvm) @@ -481,7 +481,7 @@ store_aos_array(LLVMBuilderRef builder, &ind3, 1, ""); #if DEBUG_STORE - lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n", + lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n", io_ptr, ind0, ind1, ind2, ind3); #endif @@ -592,7 +592,6 @@ draw_llvm_generate(struct draw_llvm *llvm) step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); - io_itr = LLVMConstInt(LLVMInt32Type(), 0, 0); #if DEBUG_STORE lp_build_printf(builder, "start = %d, end = %d, step = %d\n", start, end, step); @@ -601,11 +600,14 @@ draw_llvm_generate(struct draw_llvm *llvm) { LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - LLVMValueRef io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); + LLVMValueRef io; const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; + + io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); + io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); #if DEBUG_STORE - lp_build_printf(builder, " --- loop counter %d\n", - lp_loop.counter); + lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", + io_itr, io, lp_loop.counter); #endif for (i = 0; i < NUM_CHANNELS; ++i) { LLVMValueRef true_index = LLVMBuildAdd( @@ -633,8 +635,6 @@ draw_llvm_generate(struct draw_llvm *llvm) convert_to_aos(builder, io, outputs, draw->vs.vertex_shader->info.num_outputs, max_vertices); - - io_itr = LLVMBuildAdd(builder, io_itr, step, ""); } lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); From 323fdd8ae5baf16df6c57754e58adc8e22d28e10 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 6 Apr 2010 16:28:48 -0400 Subject: [PATCH 17/19] draw llvm: implement simple pipeline caching using variants --- src/gallium/auxiliary/draw/draw_llvm.c | 75 +++++++++++++------ src/gallium/auxiliary/draw/draw_llvm.h | 27 +++++-- .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 36 +++++++-- 3 files changed, 103 insertions(+), 35 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index a09e2a9f0c2..26f756c6f8a 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -17,7 +17,12 @@ #include -#define DEBUG_STORE 1 +#define DEBUG_STORE 0 + + +/* generates the draw jit function */ +static void +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); static void init_globals(struct draw_llvm *llvm) @@ -179,11 +184,18 @@ draw_llvm_destroy(struct draw_llvm *llvm) free(llvm); } -void +struct draw_llvm_variant * draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) { + struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant)); + + draw_llvm_make_variant_key(llvm, &variant->key); + llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); - draw_llvm_generate(llvm); + + draw_llvm_generate(llvm, variant); + + return variant; } @@ -530,15 +542,14 @@ convert_to_aos(LLVMBuilderRef builder, #endif } -void -draw_llvm_generate(struct draw_llvm *llvm) +static void +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { LLVMTypeRef arg_types[6]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; - LLVMValueRef function; LLVMValueRef start, end, count, stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr; struct draw_context *draw = llvm->draw; @@ -558,18 +569,18 @@ draw_llvm_generate(struct draw_llvm *llvm) func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); - function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); - LLVMSetFunctionCallConv(function, LLVMCCallConv); + variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); + LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute); + LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); - context_ptr = LLVMGetParam(function, 0); - io_ptr = LLVMGetParam(function, 1); - vbuffers_ptr = LLVMGetParam(function, 2); - start = LLVMGetParam(function, 3); - count = LLVMGetParam(function, 4); - stride = LLVMGetParam(function, 5); + context_ptr = LLVMGetParam(variant->function, 0); + io_ptr = LLVMGetParam(variant->function, 1); + vbuffers_ptr = LLVMGetParam(variant->function, 2); + start = LLVMGetParam(variant->function, 3); + count = LLVMGetParam(variant->function, 4); + stride = LLVMGetParam(variant->function, 5); lp_build_name(context_ptr, "context"); lp_build_name(io_ptr, "io"); @@ -582,7 +593,7 @@ draw_llvm_generate(struct draw_llvm *llvm) * Function body */ - block = LLVMAppendBasicBlock(function, "entry"); + block = LLVMAppendBasicBlock(variant->function, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); @@ -647,20 +658,40 @@ draw_llvm_generate(struct draw_llvm *llvm) */ #ifdef DEBUG - if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { - LLVMDumpValue(function); + if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { + LLVMDumpValue(variant->function); assert(0); } #endif - LLVMRunFunctionPassManager(llvm->pass, function); + LLVMRunFunctionPassManager(llvm->pass, variant->function); if (1) { - LLVMDumpValue(function); + LLVMDumpValue(variant->function); debug_printf("\n"); } - llvm->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, function); + variant->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, variant->function); if (1) - lp_disassemble(llvm->jit_func); + lp_disassemble(variant->jit_func); +} + +void +draw_llvm_make_variant_key(struct draw_llvm *llvm, + struct draw_llvm_variant_key *key) +{ + key->nr_vertex_buffers = llvm->draw->pt.nr_vertex_buffers; + key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; + + memcpy(key->vertex_buffer, + llvm->draw->pt.vertex_buffer, + sizeof(struct pipe_vertex_buffer) * PIPE_MAX_ATTRIBS); + + memcpy(key->vertex_element, + llvm->draw->pt.vertex_element, + sizeof(struct pipe_vertex_element) * PIPE_MAX_ATTRIBS); + + memcpy(&key->vs, + &llvm->draw->vs.vertex_shader->state, + sizeof(struct pipe_shader_state)); } diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index afafa15e687..774eb16d90c 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -136,8 +136,6 @@ struct draw_llvm { struct draw_jit_context jit_context; - draw_jit_vert_func jit_func; - LLVMModuleRef module; LLVMExecutionEngineRef engine; LLVMModuleProviderRef provider; @@ -150,22 +148,39 @@ struct draw_llvm { }; +struct draw_llvm_variant_key +{ + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_buffers; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_elements; + struct pipe_shader_state vs; +}; + +struct draw_llvm_variant +{ + struct draw_llvm_variant_key key; + LLVMValueRef function; + draw_jit_vert_func jit_func; + + struct draw_llvm_variant *next; +}; + struct draw_llvm * draw_llvm_create(struct draw_context *draw); void draw_llvm_destroy(struct draw_llvm *llvm); -void +struct draw_llvm_variant * draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs); -/* generates the draw jit function */ void -draw_llvm_generate(struct draw_llvm *llvm); +draw_llvm_make_variant_key(struct draw_llvm *llvm, + struct draw_llvm_variant_key *key); LLVMValueRef draw_llvm_translate_from(LLVMBuilderRef builder, LLVMValueRef vbuffer, enum pipe_format from_format); - #endif diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 307e6cc3a4d..0ed043d9506 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -53,6 +53,8 @@ struct llvm_middle_end { unsigned opt; struct draw_llvm *llvm; + struct draw_llvm_variant *variants; + struct draw_llvm_variant *current_variant; }; @@ -66,6 +68,8 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *vs = draw->vs.vertex_shader; struct draw_geometry_shader *gs = draw->gs.geometry_shader; + struct draw_llvm_variant_key key; + struct draw_llvm_variant *variant = NULL; unsigned i; unsigned instance_id_index = ~0; @@ -129,7 +133,22 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, /* return even number */ *max_vertices = *max_vertices & ~1; - draw_llvm_prepare(fpme->llvm, nr); + draw_llvm_make_variant_key(fpme->llvm, &key); + + variant = fpme->variants; + while(variant) { + if(memcmp(&variant->key, &key, sizeof key) == 0) + break; + + variant = variant->next; + } + + if (!variant) { + variant = draw_llvm_prepare(fpme->llvm, nr); + variant->next = fpme->variants; + fpme->variants = variant; + } + fpme->current_variant = variant; /*XXX we only support one constant buffer */ fpme->llvm->jit_context.vs_constants = @@ -247,12 +266,12 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, debug_printf("#### Pipeline = %p (data = %p)\n", pipeline_verts, pipeline_verts->data); - fpme->llvm->jit_func( &fpme->llvm->jit_context, - pipeline_verts, - (const char **)draw->pt.user.vbuffer, - start, - count, - fpme->vertex_size ); + fpme->current_variant->jit_func( &fpme->llvm->jit_context, + pipeline_verts, + (const char **)draw->pt.user.vbuffer, + start, + count, + fpme->vertex_size ); if (draw_pt_post_vs_run( fpme->post_vs, pipeline_verts, @@ -430,6 +449,9 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_cont if (!fpme->llvm) goto fail; + fpme->variants = NULL; + fpme->current_variant = NULL; + return &fpme->base; fail: From 06bda76c08618eaf68de70f4f776329e6ef1f196 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 6 Apr 2010 17:14:13 -0400 Subject: [PATCH 18/19] draw llvm: disable debugging output --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c | 2 ++ src/gallium/auxiliary/draw/draw_pt_post_vs.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 0ed043d9506..d2ed0eb3d18 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -264,8 +264,10 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, return; } +#if 0 debug_printf("#### Pipeline = %p (data = %p)\n", pipeline_verts, pipeline_verts->data); +#endif fpme->current_variant->jit_func( &fpme->llvm->jit_context, pipeline_verts, (const char **)draw->pt.user.vbuffer, diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index af07926a55c..5525dfc748d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -108,8 +108,10 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, for (j = 0; j < count; j++) { float *position = out->data[pos]; +#if 0 debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n", j, out, position, position[0], position[1], position[2], position[3]); +#endif out->clip[0] = position[0]; out->clip[1] = position[1]; From 695a029e9b8c70a34c5cde01ab32ac377e513707 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 6 Apr 2010 17:14:30 -0400 Subject: [PATCH 19/19] llvmpipe: use a define to decide whether to use draw llvm paths right now disabled by default --- src/gallium/drivers/llvmpipe/lp_context.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 383e4b0614d..5c476f693f2 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -47,7 +47,7 @@ #include "lp_setup.h" - +#define USE_DRAW_LLVM 0 static void llvmpipe_destroy( struct pipe_context *pipe ) @@ -182,7 +182,11 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) /* * Create drawing context and plug our rendering stage into it. */ +#if USE_DRAW_LLVM llvmpipe->draw = draw_create_with_llvm(llvmscreen->engine); +#else + llvmpipe->draw = draw_create(); +#endif if (!llvmpipe->draw) goto fail;