More GLSL code - more support for vec4 operations on x86 back-end.
This commit is contained in:
parent
9f344b3e7d
commit
edefc658e4
|
@ -1463,6 +1463,16 @@ void main () {
|
|||
|
||||
printMESA (normalize (_Two4.xyz));
|
||||
printMESA (normalize (_Three4));
|
||||
|
||||
vec4 tmp = _Two4;
|
||||
printMESA (tmp);
|
||||
|
||||
printMESA (_Two4 == _Three4);
|
||||
printMESA (_Two4 != _Three4);
|
||||
printMESA (_Two4 == _Two4);
|
||||
printMESA (_Three4 != _Three4);
|
||||
printMESA (_Two4 != vec4 (_Two4.xyz, 999.0));
|
||||
printMESA (_Two4 != vec4 (999.0, _Two4.yzw));
|
||||
}
|
||||
|
||||
$output
|
||||
|
@ -1552,3 +1562,14 @@ $output
|
|||
0.507392
|
||||
0.522768
|
||||
|
||||
2.1
|
||||
2.2
|
||||
2.3
|
||||
2.4
|
||||
|
||||
false
|
||||
true
|
||||
true
|
||||
false
|
||||
true
|
||||
true
|
||||
|
|
|
@ -320,7 +320,7 @@ void RenderScene (void)
|
|||
break;
|
||||
}
|
||||
if (fabs (value - attribs[i].data[j]) > EPSILON)
|
||||
printf ("*** %s\n", "Values are different");
|
||||
printf ("*** %s, is %f, should be %f\n", "Values are different", value, attribs[i].data[j]);
|
||||
p = strchr (p, '\n');
|
||||
if (p != NULL)
|
||||
p++;
|
||||
|
|
|
@ -398,74 +398,95 @@ GLboolean _slang_cleanup_stack (slang_assemble_ctx *A, slang_operation *op)
|
|||
|
||||
/* _slang_assemble_operation() */
|
||||
|
||||
static GLboolean dereference_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg,
|
||||
GLuint *size, slang_swizzle *swz, GLboolean is_swizzled)
|
||||
static GLboolean
|
||||
dereference_basic (slang_assemble_ctx *A, slang_storage_type type, GLuint *size, slang_swizzle *swz,
|
||||
GLboolean is_swizzled)
|
||||
{
|
||||
GLuint i;
|
||||
GLuint src_offset;
|
||||
slang_assembly_type ty;
|
||||
|
||||
for (i = agg->count; i > 0; i--)
|
||||
{
|
||||
const slang_storage_array *arr = &agg->arrays[i - 1];
|
||||
GLuint j;
|
||||
*size -= _slang_sizeof_type (type);
|
||||
|
||||
for (j = arr->length; j > 0; j--)
|
||||
{
|
||||
if (arr->type == slang_stor_aggregate)
|
||||
{
|
||||
if (!dereference_aggregate (A, arr->aggregate, size, swz, is_swizzled))
|
||||
return GL_FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
GLuint src_offset;
|
||||
slang_assembly_type ty;
|
||||
/* If swizzling is taking place, we are forced to use scalar operations, even if we have
|
||||
* vec4 instructions enabled (this should be actually done with special vec4 shuffle
|
||||
* instructions).
|
||||
* Adjust the size and calculate the offset within source variable to read.
|
||||
*/
|
||||
if (is_swizzled)
|
||||
src_offset = swz->swizzle[*size / 4] * 4;
|
||||
else
|
||||
src_offset = *size;
|
||||
|
||||
*size -= 4;
|
||||
/* dereference data slot of a basic type */
|
||||
if (!PLAB2 (A->file, slang_asm_local_addr, A->local.addr_tmp, 4))
|
||||
return GL_FALSE;
|
||||
if (!PUSH (A->file, slang_asm_addr_deref))
|
||||
return GL_FALSE;
|
||||
if (src_offset != 0) {
|
||||
if (!PLAB (A->file, slang_asm_addr_push, src_offset))
|
||||
return GL_FALSE;
|
||||
if (!PUSH (A->file, slang_asm_addr_add))
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
/* calculate the offset within source variable to read */
|
||||
if (is_swizzled)
|
||||
{
|
||||
/* swizzle the index to get the actual offset */
|
||||
src_offset = swz->swizzle[*size / 4] * 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* no swizzling - read sequentially */
|
||||
src_offset = *size;
|
||||
}
|
||||
switch (type) {
|
||||
case slang_stor_bool:
|
||||
ty = slang_asm_bool_deref;
|
||||
break;
|
||||
case slang_stor_int:
|
||||
ty = slang_asm_int_deref;
|
||||
break;
|
||||
case slang_stor_float:
|
||||
ty = slang_asm_float_deref;
|
||||
break;
|
||||
#if defined(USE_X86_ASM) || defined(SLANG_X86)
|
||||
case slang_stor_vec4:
|
||||
ty = slang_asm_vec4_deref;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
_mesa_problem(NULL, "Unexpected arr->type in dereference_basic");
|
||||
ty = slang_asm_none;
|
||||
}
|
||||
|
||||
/* dereference data slot of a basic type */
|
||||
if (!PLAB2 (A->file, slang_asm_local_addr, A->local.addr_tmp, 4))
|
||||
return GL_FALSE;
|
||||
if (!PUSH (A->file, slang_asm_addr_deref))
|
||||
return GL_FALSE;
|
||||
if (!PLAB (A->file, slang_asm_addr_push, src_offset))
|
||||
return GL_FALSE;
|
||||
if (!PUSH (A->file, slang_asm_addr_add))
|
||||
return GL_FALSE;
|
||||
return PUSH (A->file, ty);
|
||||
}
|
||||
|
||||
switch (arr->type)
|
||||
{
|
||||
case slang_stor_bool:
|
||||
ty = slang_asm_bool_deref;
|
||||
break;
|
||||
case slang_stor_int:
|
||||
ty = slang_asm_int_deref;
|
||||
break;
|
||||
case slang_stor_float:
|
||||
ty = slang_asm_float_deref;
|
||||
break;
|
||||
default:
|
||||
_mesa_problem(NULL, "Unexpected arr->type in dereference_aggregate");
|
||||
ty = slang_asm_none;
|
||||
}
|
||||
if (!PUSH (A->file, ty))
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
static GLboolean
|
||||
dereference_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, GLuint *size,
|
||||
slang_swizzle *swz, GLboolean is_swizzled)
|
||||
{
|
||||
GLuint i;
|
||||
|
||||
return GL_TRUE;
|
||||
for (i = agg->count; i > 0; i--) {
|
||||
const slang_storage_array *arr = &agg->arrays[i - 1];
|
||||
GLuint j;
|
||||
|
||||
for (j = arr->length; j > 0; j--) {
|
||||
if (arr->type == slang_stor_aggregate) {
|
||||
if (!dereference_aggregate (A, arr->aggregate, size, swz, is_swizzled))
|
||||
return GL_FALSE;
|
||||
}
|
||||
else {
|
||||
if (is_swizzled && arr->type == slang_stor_vec4) {
|
||||
if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled))
|
||||
return GL_FALSE;
|
||||
if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled))
|
||||
return GL_FALSE;
|
||||
if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled))
|
||||
return GL_FALSE;
|
||||
if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled))
|
||||
return GL_FALSE;
|
||||
}
|
||||
else {
|
||||
if (!dereference_basic (A, arr->type, size, swz, is_swizzled))
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
GLboolean _slang_dereference (slang_assemble_ctx *A, slang_operation *op)
|
||||
|
@ -694,35 +715,40 @@ static GLboolean call_asm_instruction (slang_assemble_ctx *A, slang_atom a_name)
|
|||
return GL_TRUE;
|
||||
}
|
||||
|
||||
static GLboolean equality_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg,
|
||||
GLuint *index, GLuint size, GLuint z_label)
|
||||
static GLboolean
|
||||
equality_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, GLuint *index,
|
||||
GLuint size, GLuint z_label)
|
||||
{
|
||||
GLuint i;
|
||||
GLuint i;
|
||||
|
||||
for (i = 0; i < agg->count; i++)
|
||||
{
|
||||
const slang_storage_array *arr = &agg->arrays[i];
|
||||
GLuint j;
|
||||
for (i = 0; i < agg->count; i++) {
|
||||
const slang_storage_array *arr = &agg->arrays[i];
|
||||
GLuint j;
|
||||
|
||||
for (j = 0; j < arr->length; j++)
|
||||
{
|
||||
if (arr->type == slang_stor_aggregate)
|
||||
{
|
||||
if (!equality_aggregate (A, arr->aggregate, index, size, z_label))
|
||||
return GL_FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!PLAB2 (A->file, slang_asm_float_equal_int, size + *index, *index))
|
||||
return GL_FALSE;
|
||||
*index += 4;
|
||||
if (!PLAB (A->file, slang_asm_jump_if_zero, z_label))
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (j = 0; j < arr->length; j++) {
|
||||
if (arr->type == slang_stor_aggregate) {
|
||||
if (!equality_aggregate (A, arr->aggregate, index, size, z_label))
|
||||
return GL_FALSE;
|
||||
}
|
||||
else {
|
||||
#if defined(USE_X86_ASM) || defined(SLANG_X86)
|
||||
if (arr->type == slang_stor_vec4) {
|
||||
if (!PLAB2 (A->file, slang_asm_vec4_equal_int, size + *index, *index))
|
||||
return GL_FALSE;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (!PLAB2 (A->file, slang_asm_float_equal_int, size + *index, *index))
|
||||
return GL_FALSE;
|
||||
|
||||
return GL_TRUE;
|
||||
*index += _slang_sizeof_type (arr->type);
|
||||
if (!PLAB (A->file, slang_asm_jump_if_zero, z_label))
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
static GLboolean equality (slang_assemble_ctx *A, slang_operation *op, GLboolean equal)
|
||||
|
|
|
@ -105,6 +105,9 @@ typedef enum slang_assembly_type_
|
|||
slang_asm_vec4_divide,
|
||||
slang_asm_vec4_negate,
|
||||
slang_asm_vec4_dot,
|
||||
slang_asm_vec4_copy,
|
||||
slang_asm_vec4_deref,
|
||||
slang_asm_vec4_equal_int,
|
||||
/* not a real assembly instruction */
|
||||
slang_asm__last
|
||||
} slang_assembly_type;
|
||||
|
|
|
@ -53,66 +53,89 @@
|
|||
* +------------------+
|
||||
*/
|
||||
|
||||
static GLboolean assign_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg,
|
||||
GLuint *index, GLuint size)
|
||||
static GLboolean
|
||||
assign_basic (slang_assemble_ctx *A, slang_storage_type type, GLuint *index, GLuint size)
|
||||
{
|
||||
GLuint i;
|
||||
GLuint dst_offset, dst_addr_loc;
|
||||
slang_assembly_type ty;
|
||||
|
||||
for (i = 0; i < agg->count; i++)
|
||||
{
|
||||
const slang_storage_array *arr = &agg->arrays[i];
|
||||
GLuint j;
|
||||
/* Calculate the offset within destination variable to write. */
|
||||
if (A->swz.num_components != 0)
|
||||
dst_offset = A->swz.swizzle[*index / 4] * 4;
|
||||
else
|
||||
dst_offset = *index;
|
||||
|
||||
for (j = 0; j < arr->length; j++)
|
||||
{
|
||||
if (arr->type == slang_stor_aggregate)
|
||||
{
|
||||
if (!assign_aggregate (A, arr->aggregate, index, size))
|
||||
return GL_FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
GLuint dst_addr_loc, dst_offset;
|
||||
slang_assembly_type ty;
|
||||
switch (type) {
|
||||
case slang_stor_bool:
|
||||
ty = slang_asm_bool_copy;
|
||||
break;
|
||||
case slang_stor_int:
|
||||
ty = slang_asm_int_copy;
|
||||
break;
|
||||
case slang_stor_float:
|
||||
ty = slang_asm_float_copy;
|
||||
break;
|
||||
#if defined(USE_X86_ASM) || defined(SLANG_X86)
|
||||
case slang_stor_vec4:
|
||||
ty = slang_asm_vec4_copy;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
_mesa_problem(NULL, "Unexpected arr->type in assign_basic");
|
||||
ty = slang_asm_none;
|
||||
}
|
||||
|
||||
/* calculate the distance from top of the stack to the destination address */
|
||||
dst_addr_loc = size - *index;
|
||||
/* Calculate the distance from top of the stack to the destination address. As the
|
||||
* copy operation progresses, components of the source are being successively popped
|
||||
* off the stack by the amount of *index increase step.
|
||||
*/
|
||||
dst_addr_loc = size - *index;
|
||||
|
||||
/* calculate the offset within destination variable to write */
|
||||
if (A->swz.num_components != 0)
|
||||
{
|
||||
/* swizzle the index to get the actual offset */
|
||||
dst_offset = A->swz.swizzle[*index / 4] * 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* no swizzling - write sequentially */
|
||||
dst_offset = *index;
|
||||
}
|
||||
if (!slang_assembly_file_push_label2 (A->file, ty, dst_addr_loc, dst_offset))
|
||||
return GL_FALSE;
|
||||
*index += _slang_sizeof_type (type);
|
||||
|
||||
switch (arr->type)
|
||||
{
|
||||
case slang_stor_bool:
|
||||
ty = slang_asm_bool_copy;
|
||||
break;
|
||||
case slang_stor_int:
|
||||
ty = slang_asm_int_copy;
|
||||
break;
|
||||
case slang_stor_float:
|
||||
ty = slang_asm_float_copy;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (!slang_assembly_file_push_label2 (A->file, ty, dst_addr_loc, dst_offset))
|
||||
return GL_FALSE;
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
*index += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
static GLboolean
|
||||
assign_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, GLuint *index,
|
||||
GLuint size)
|
||||
{
|
||||
GLuint i;
|
||||
|
||||
return GL_TRUE;
|
||||
for (i = 0; i < agg->count; i++) {
|
||||
const slang_storage_array *arr = &agg->arrays[i];
|
||||
GLuint j;
|
||||
|
||||
for (j = 0; j < arr->length; j++) {
|
||||
if (arr->type == slang_stor_aggregate) {
|
||||
if (!assign_aggregate (A, arr->aggregate, index, size))
|
||||
return GL_FALSE;
|
||||
}
|
||||
else {
|
||||
/* When the destination is swizzled, we are forced to do float_copy, even if
|
||||
* vec4 extension is enabled with vec4_copy operation.
|
||||
*/
|
||||
if (A->swz.num_components != 0 && arr->type == slang_stor_vec4) {
|
||||
if (!assign_basic (A, slang_stor_float, index, size))
|
||||
return GL_FALSE;
|
||||
if (!assign_basic (A, slang_stor_float, index, size))
|
||||
return GL_FALSE;
|
||||
if (!assign_basic (A, slang_stor_float, index, size))
|
||||
return GL_FALSE;
|
||||
if (!assign_basic (A, slang_stor_float, index, size))
|
||||
return GL_FALSE;
|
||||
}
|
||||
else {
|
||||
if (!assign_basic (A, arr->type, index, size))
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
GLboolean _slang_assemble_assignment (slang_assemble_ctx *A, slang_operation *op)
|
||||
|
|
|
@ -143,38 +143,32 @@ GLvoid _slang_multiply_swizzles (slang_swizzle *dst, const slang_swizzle *left,
|
|||
|
||||
/* _slang_assemble_constructor() */
|
||||
|
||||
static GLboolean sizeof_argument (slang_assemble_ctx *A, GLuint *size, slang_operation *op)
|
||||
static GLboolean
|
||||
sizeof_argument (slang_assemble_ctx *A, GLuint *size, slang_operation *op)
|
||||
{
|
||||
slang_assembly_typeinfo ti;
|
||||
GLboolean result = GL_FALSE;
|
||||
slang_storage_aggregate agg, flat_agg;
|
||||
slang_assembly_typeinfo ti;
|
||||
GLboolean result = GL_FALSE;
|
||||
slang_storage_aggregate agg;
|
||||
|
||||
if (!slang_assembly_typeinfo_construct (&ti))
|
||||
return GL_FALSE;
|
||||
if (!_slang_typeof_operation (A, op, &ti))
|
||||
goto end1;
|
||||
if (!slang_assembly_typeinfo_construct (&ti))
|
||||
return GL_FALSE;
|
||||
if (!_slang_typeof_operation (A, op, &ti))
|
||||
goto end1;
|
||||
|
||||
if (!slang_storage_aggregate_construct (&agg))
|
||||
goto end1;
|
||||
if (!_slang_aggregate_variable (&agg, &ti.spec, 0, A->space.funcs, A->space.structs,
|
||||
A->space.vars, A->mach, A->file, A->atoms))
|
||||
goto end2;
|
||||
if (!slang_storage_aggregate_construct (&agg))
|
||||
goto end1;
|
||||
if (!_slang_aggregate_variable (&agg, &ti.spec, 0, A->space.funcs, A->space.structs,
|
||||
A->space.vars, A->mach, A->file, A->atoms))
|
||||
goto end;
|
||||
|
||||
if (!slang_storage_aggregate_construct (&flat_agg))
|
||||
goto end2;
|
||||
if (!_slang_flatten_aggregate (&flat_agg, &agg))
|
||||
goto end;
|
||||
*size = _slang_sizeof_aggregate (&agg);
|
||||
result = GL_TRUE;
|
||||
|
||||
*size = flat_agg.count * 4;
|
||||
|
||||
result = GL_TRUE;
|
||||
end:
|
||||
slang_storage_aggregate_destruct (&flat_agg);
|
||||
end2:
|
||||
slang_storage_aggregate_destruct (&agg);
|
||||
slang_storage_aggregate_destruct (&agg);
|
||||
end1:
|
||||
slang_assembly_typeinfo_destruct (&ti);
|
||||
return result;
|
||||
slang_assembly_typeinfo_destruct (&ti);
|
||||
return result;
|
||||
}
|
||||
|
||||
static GLboolean constructor_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *flat,
|
||||
|
@ -270,7 +264,7 @@ GLboolean _slang_assemble_constructor (slang_assemble_ctx *A, slang_operation *o
|
|||
arg_sums[1] = 0; /* will hold all argument's size sum */
|
||||
for (i = 0; i < op->num_children; i++)
|
||||
{
|
||||
GLuint arg_size;
|
||||
GLuint arg_size = 0;
|
||||
|
||||
if (!sizeof_argument (A, &arg_size, &op->children[i]))
|
||||
goto end;
|
||||
|
|
|
@ -50,6 +50,7 @@ typedef struct
|
|||
struct x86_reg r_eax;
|
||||
struct x86_reg r_ecx;
|
||||
struct x86_reg r_edx;
|
||||
struct x86_reg r_ebx;
|
||||
struct x86_reg r_esp;
|
||||
struct x86_reg r_ebp;
|
||||
struct x86_reg r_st0;
|
||||
|
@ -183,7 +184,7 @@ static GLvoid do_print_bool (slang_info_log **infolog, GLfloat x)
|
|||
|
||||
static GLvoid codegen_assem (codegen_ctx *G, slang_assembly *a, slang_info_log **infolog)
|
||||
{
|
||||
GLint disp;
|
||||
GLint disp, i;
|
||||
|
||||
switch (a->type)
|
||||
{
|
||||
|
@ -517,128 +518,133 @@ static GLvoid codegen_assem (codegen_ctx *G, slang_assembly *a, slang_info_log *
|
|||
break;
|
||||
case slang_asm_vec4_add:
|
||||
/* [vec4] | vec4 */
|
||||
x87_fld (&G->f, x86_deref (G->r_esp));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
|
||||
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
|
||||
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
|
||||
x87_fld (&G->f, x86_deref (G->r_eax));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
|
||||
x87_faddp (&G->f, G->r_st4);
|
||||
x87_faddp (&G->f, G->r_st4);
|
||||
x87_faddp (&G->f, G->r_st4);
|
||||
x87_faddp (&G->f, G->r_st4);
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
|
||||
x87_fstp (&G->f, x86_deref (G->r_eax));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_faddp (&G->f, G->r_st4);
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
|
||||
break;
|
||||
case slang_asm_vec4_subtract:
|
||||
/* [vec4] | vec4 */
|
||||
x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16));
|
||||
x87_fld (&G->f, x86_deref (G->r_eax));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
|
||||
x87_fld (&G->f, x86_deref (G->r_esp));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
|
||||
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
|
||||
x87_fsubp (&G->f, G->r_st4);
|
||||
x87_fsubp (&G->f, G->r_st4);
|
||||
x87_fsubp (&G->f, G->r_st4);
|
||||
x87_fsubp (&G->f, G->r_st4);
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
|
||||
x87_fstp (&G->f, x86_deref (G->r_eax));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fsubp (&G->f, G->r_st4);
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
|
||||
break;
|
||||
case slang_asm_vec4_multiply:
|
||||
/* [vec4] | vec4 */
|
||||
x87_fld (&G->f, x86_deref (G->r_esp));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
|
||||
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
|
||||
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
|
||||
x87_fld (&G->f, x86_deref (G->r_eax));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
|
||||
x87_fmulp (&G->f, G->r_st4);
|
||||
x87_fmulp (&G->f, G->r_st4);
|
||||
x87_fmulp (&G->f, G->r_st4);
|
||||
x87_fmulp (&G->f, G->r_st4);
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
|
||||
x87_fstp (&G->f, x86_deref (G->r_eax));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fmulp (&G->f, G->r_st4);
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
|
||||
break;
|
||||
case slang_asm_vec4_divide:
|
||||
/* [vec4] | vec4 */
|
||||
x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16));
|
||||
x87_fld (&G->f, x86_deref (G->r_eax));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
|
||||
x87_fld (&G->f, x86_deref (G->r_esp));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
|
||||
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
|
||||
x87_fdivp (&G->f, G->r_st4);
|
||||
x87_fdivp (&G->f, G->r_st4);
|
||||
x87_fdivp (&G->f, G->r_st4);
|
||||
x87_fdivp (&G->f, G->r_st4);
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
|
||||
x87_fstp (&G->f, x86_deref (G->r_eax));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fdivp (&G->f, G->r_st4);
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
|
||||
break;
|
||||
case slang_asm_vec4_negate:
|
||||
/* [vec4] */
|
||||
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
|
||||
x87_fld (&G->f, x86_deref (G->r_eax));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
|
||||
x87_fchs (&G->f);
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
|
||||
x87_fchs (&G->f);
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
|
||||
x87_fchs (&G->f);
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
|
||||
x87_fchs (&G->f);
|
||||
x87_fstp (&G->f, x86_deref (G->r_eax));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
|
||||
for (i = 0; i < 4; i++) {
|
||||
x87_fchs (&G->f);
|
||||
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
|
||||
}
|
||||
break;
|
||||
case slang_asm_vec4_dot:
|
||||
/* [vec4] | vec4 */
|
||||
x87_fld (&G->f, x86_deref (G->r_esp));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
|
||||
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
|
||||
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
|
||||
x87_fld (&G->f, x86_deref (G->r_eax));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
|
||||
x87_fmulp (&G->f, G->r_st4);
|
||||
x87_fmulp (&G->f, G->r_st4);
|
||||
x87_fmulp (&G->f, G->r_st4);
|
||||
x87_fmulp (&G->f, G->r_st4);
|
||||
x87_faddp (&G->f, G->r_st1);
|
||||
x87_faddp (&G->f, G->r_st1);
|
||||
x87_faddp (&G->f, G->r_st1);
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
|
||||
for (i = 0; i < 4; i++)
|
||||
x87_fmulp (&G->f, G->r_st4);
|
||||
for (i = 0; i < 3; i++)
|
||||
x87_faddp (&G->f, G->r_st1);
|
||||
x87_fstp (&G->f, x86_deref (G->r_eax));
|
||||
break;
|
||||
default:
|
||||
assert (0);
|
||||
}
|
||||
case slang_asm_vec4_copy:
|
||||
/* [vec4] | vec4 */
|
||||
x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, a->param[0]));
|
||||
x86_pop (&G->f, G->r_ecx);
|
||||
x86_pop (&G->f, G->r_edx);
|
||||
x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1]), G->r_ecx);
|
||||
x86_pop (&G->f, G->r_ebx);
|
||||
x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 4), G->r_edx);
|
||||
x86_pop (&G->f, G->r_ecx);
|
||||
x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 8), G->r_ebx);
|
||||
x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 12), G->r_ecx);
|
||||
break;
|
||||
case slang_asm_vec4_deref:
|
||||
/* [vec4] */
|
||||
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
|
||||
x86_mov (&G->f, G->r_ecx, x86_make_disp (G->r_eax, 12));
|
||||
x86_mov (&G->f, G->r_edx, x86_make_disp (G->r_eax, 8));
|
||||
x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx);
|
||||
x86_mov (&G->f, G->r_ebx, x86_make_disp (G->r_eax, 4));
|
||||
x86_push (&G->f, G->r_edx);
|
||||
x86_mov (&G->f, G->r_ecx, x86_deref (G->r_eax));
|
||||
x86_push (&G->f, G->r_ebx);
|
||||
x86_push (&G->f, G->r_ecx);
|
||||
break;
|
||||
case slang_asm_vec4_equal_int:
|
||||
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, -4));
|
||||
x86_mov_reg_imm (&G->f, G->r_edx, 0x4000);
|
||||
for (i = 0; i < 4; i++) {
|
||||
x87_fld (&G->f, x86_make_disp (G->r_esp, a->param[0] + 4 + i * 4));
|
||||
x87_fcomp (&G->f, x86_make_disp (G->r_esp, a->param[1] + 4 + i * 4));
|
||||
x87_fnstsw (&G->f, G->r_eax);
|
||||
x86_and (&G->f, G->r_edx, G->r_eax);
|
||||
}
|
||||
/* TODO: use test r8,imm8 */
|
||||
x86_mov_reg_imm (&G->f, G->r_ecx, 0x4000);
|
||||
x86_test (&G->f, G->r_edx, G->r_ecx);
|
||||
{
|
||||
GLubyte *lab0, *lab1;
|
||||
|
||||
/* TODO: use jcc rel8 */
|
||||
lab0 = x86_jcc_forward (&G->f, cc_E);
|
||||
x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ONE);
|
||||
/* TODO: use jmp rel8 */
|
||||
lab1 = x86_jmp_forward (&G->f);
|
||||
x86_fixup_fwd_jump (&G->f, lab0);
|
||||
x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ZERO);
|
||||
x86_fixup_fwd_jump (&G->f, lab1);
|
||||
x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert (0);
|
||||
}
|
||||
}
|
||||
|
||||
GLboolean _slang_x86_codegen (slang_machine *mach, slang_assembly_file *file, GLuint start)
|
||||
|
@ -664,6 +670,7 @@ GLboolean _slang_x86_codegen (slang_machine *mach, slang_assembly_file *file, GL
|
|||
G.r_eax = x86_make_reg (file_REG32, reg_AX);
|
||||
G.r_ecx = x86_make_reg (file_REG32, reg_CX);
|
||||
G.r_edx = x86_make_reg (file_REG32, reg_DX);
|
||||
G.r_ebx = x86_make_reg (file_REG32, reg_BX);
|
||||
G.r_esp = x86_make_reg (file_REG32, reg_SP);
|
||||
G.r_ebp = x86_make_reg (file_REG32, reg_BP);
|
||||
G.r_st0 = x86_make_reg (file_x87, 0);
|
||||
|
|
|
@ -211,14 +211,22 @@ GLboolean _slang_aggregate_variable (slang_storage_aggregate *agg, slang_type_sp
|
|||
return aggregate_vector (agg, slang_stor_float, 2);
|
||||
case slang_spec_vec3:
|
||||
return aggregate_vector (agg, slang_stor_float, 3);
|
||||
case slang_spec_vec4:
|
||||
return aggregate_vector (agg, slang_stor_float, 4);
|
||||
case slang_spec_vec4:
|
||||
#if defined(USE_X86_ASM) || defined(SLANG_X86)
|
||||
return aggregate_vector (agg, slang_stor_vec4, 1);
|
||||
#else
|
||||
return aggregate_vector (agg, slang_stor_float, 4);
|
||||
#endif
|
||||
case slang_spec_mat2:
|
||||
return aggregate_matrix (agg, slang_stor_float, 2);
|
||||
case slang_spec_mat3:
|
||||
return aggregate_matrix (agg, slang_stor_float, 3);
|
||||
case slang_spec_mat4:
|
||||
return aggregate_matrix (agg, slang_stor_float, 4);
|
||||
case slang_spec_mat4:
|
||||
#if defined(USE_X86_ASM) || defined(SLANG_X86)
|
||||
return aggregate_vector (agg, slang_stor_vec4, 4);
|
||||
#else
|
||||
return aggregate_matrix (agg, slang_stor_float, 4);
|
||||
#endif
|
||||
case slang_spec_sampler1D:
|
||||
case slang_spec_sampler2D:
|
||||
case slang_spec_sampler3D:
|
||||
|
@ -258,54 +266,77 @@ GLboolean _slang_aggregate_variable (slang_storage_aggregate *agg, slang_type_sp
|
|||
}
|
||||
}
|
||||
|
||||
/* _slang_sizeof_type() */
|
||||
|
||||
GLuint
|
||||
_slang_sizeof_type (slang_storage_type type)
|
||||
{
|
||||
if (type == slang_stor_aggregate)
|
||||
return 0;
|
||||
if (type == slang_stor_vec4)
|
||||
return 4 * sizeof (GLfloat);
|
||||
return sizeof (GLfloat);
|
||||
}
|
||||
|
||||
/* _slang_sizeof_aggregate() */
|
||||
|
||||
GLuint _slang_sizeof_aggregate (const slang_storage_aggregate *agg)
|
||||
{
|
||||
GLuint i, size = 0;
|
||||
GLuint i, size = 0;
|
||||
|
||||
for (i = 0; i < agg->count; i++)
|
||||
{
|
||||
GLuint element_size;
|
||||
for (i = 0; i < agg->count; i++) {
|
||||
slang_storage_array *arr = &agg->arrays[i];
|
||||
GLuint element_size;
|
||||
|
||||
if (agg->arrays[i].type == slang_stor_aggregate)
|
||||
element_size = _slang_sizeof_aggregate (agg->arrays[i].aggregate);
|
||||
else
|
||||
element_size = sizeof (GLfloat);
|
||||
size += element_size * agg->arrays[i].length;
|
||||
}
|
||||
return size;
|
||||
if (arr->type == slang_stor_aggregate)
|
||||
element_size = _slang_sizeof_aggregate (arr->aggregate);
|
||||
else
|
||||
element_size = _slang_sizeof_type (arr->type);
|
||||
size += element_size * arr->length;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
/* _slang_flatten_aggregate () */
|
||||
|
||||
GLboolean _slang_flatten_aggregate (slang_storage_aggregate *flat, const slang_storage_aggregate *agg)
|
||||
GLboolean
|
||||
_slang_flatten_aggregate (slang_storage_aggregate *flat, const slang_storage_aggregate *agg)
|
||||
{
|
||||
GLuint i;
|
||||
GLuint i;
|
||||
|
||||
for (i = 0; i < agg->count; i++)
|
||||
{
|
||||
GLuint j;
|
||||
for (i = 0; i < agg->count; i++) {
|
||||
GLuint j;
|
||||
|
||||
for (j = 0; j < agg->arrays[i].length; j++)
|
||||
{
|
||||
if (agg->arrays[i].type == slang_stor_aggregate)
|
||||
{
|
||||
if (!_slang_flatten_aggregate (flat, agg->arrays[i].aggregate))
|
||||
return GL_FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
slang_storage_array *arr;
|
||||
for (j = 0; j < agg->arrays[i].length; j++) {
|
||||
if (agg->arrays[i].type == slang_stor_aggregate) {
|
||||
if (!_slang_flatten_aggregate (flat, agg->arrays[i].aggregate))
|
||||
return GL_FALSE;
|
||||
}
|
||||
else {
|
||||
GLuint k, count;
|
||||
slang_storage_type type;
|
||||
|
||||
arr = slang_storage_aggregate_push_new (flat);
|
||||
if (arr == NULL)
|
||||
return GL_FALSE;
|
||||
arr->type = agg->arrays[i].type;
|
||||
arr->length = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return GL_TRUE;
|
||||
if (agg->arrays[i].type == slang_stor_vec4) {
|
||||
count = 4;
|
||||
type = slang_stor_float;
|
||||
}
|
||||
else {
|
||||
count = 1;
|
||||
type = agg->arrays[i].type;
|
||||
}
|
||||
|
||||
for (k = 0; k < count; k++) {
|
||||
slang_storage_array *arr;
|
||||
|
||||
arr = slang_storage_aggregate_push_new (flat);
|
||||
if (arr == NULL)
|
||||
return GL_FALSE;
|
||||
arr->type = type;
|
||||
arr->length = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
|
|
|
@ -41,13 +41,19 @@ extern "C" {
|
|||
*
|
||||
* For now, only the three basic types are supported, that is bool, int and float. Other built-in
|
||||
* types like vector or matrix can easily be decomposed into a series of basic types.
|
||||
*
|
||||
* If the vec4 module is enabled, 4-component vectors of floats are used when possible. 4x4 matrices
|
||||
* are constructed of 4 vec4 slots.
|
||||
*/
|
||||
typedef enum slang_storage_type_
|
||||
{
|
||||
slang_stor_aggregate,
|
||||
slang_stor_bool,
|
||||
slang_stor_int,
|
||||
slang_stor_float
|
||||
/* core */
|
||||
slang_stor_aggregate,
|
||||
slang_stor_bool,
|
||||
slang_stor_int,
|
||||
slang_stor_float,
|
||||
/* vec4 */
|
||||
slang_stor_vec4
|
||||
} slang_storage_type;
|
||||
|
||||
/*
|
||||
|
@ -105,6 +111,14 @@ _slang_evaluate_int(slang_assembly_file *file,
|
|||
GLuint *pint,
|
||||
slang_atom_pool *atoms);
|
||||
|
||||
/*
|
||||
* Returns the size (in machine units) of the given storage type.
|
||||
* It is an error to pass-in slang_stor_aggregate.
|
||||
* Returns 0 on error.
|
||||
*/
|
||||
extern GLuint
|
||||
_slang_sizeof_type (slang_storage_type);
|
||||
|
||||
/*
|
||||
* Returns total size (in machine units) of the given aggregate.
|
||||
* Returns 0 on error.
|
||||
|
|
|
@ -367,6 +367,20 @@ void x86_sub( struct x86_function *p,
|
|||
emit_op_modrm(p, 0x2b, 0x29, dst, src );
|
||||
}
|
||||
|
||||
void x86_or( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src )
|
||||
{
|
||||
emit_op_modrm( p, 0x0b, 0x09, dst, src );
|
||||
}
|
||||
|
||||
void x86_and( struct x86_function *p,
|
||||
struct x86_reg dst,
|
||||
struct x86_reg src )
|
||||
{
|
||||
emit_op_modrm( p, 0x23, 0x21, dst, src );
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***********************************************************************
|
||||
|
|
|
@ -172,12 +172,14 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src
|
|||
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
|
||||
|
||||
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_dec( struct x86_function *p, struct x86_reg reg );
|
||||
void x86_inc( struct x86_function *p, struct x86_reg reg );
|
||||
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_mul( struct x86_function *p, struct x86_reg src );
|
||||
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_pop( struct x86_function *p, struct x86_reg reg );
|
||||
void x86_push( struct x86_function *p, struct x86_reg reg );
|
||||
void x86_ret( struct x86_function *p );
|
||||
|
|
Loading…
Reference in New Issue