More GLSL code - more support for vec4 operations on x86 back-end.

This commit is contained in:
Michal Krol 2006-08-09 20:05:26 +00:00
parent 9f344b3e7d
commit edefc658e4
11 changed files with 437 additions and 302 deletions

View File

@ -1463,6 +1463,16 @@ void main () {
printMESA (normalize (_Two4.xyz));
printMESA (normalize (_Three4));
vec4 tmp = _Two4;
printMESA (tmp);
printMESA (_Two4 == _Three4);
printMESA (_Two4 != _Three4);
printMESA (_Two4 == _Two4);
printMESA (_Three4 != _Three4);
printMESA (_Two4 != vec4 (_Two4.xyz, 999.0));
printMESA (_Two4 != vec4 (999.0, _Two4.yzw));
}
$output
@ -1552,3 +1562,14 @@ $output
0.507392
0.522768
2.1
2.2
2.3
2.4
false
true
true
false
true
true

View File

@ -320,7 +320,7 @@ void RenderScene (void)
break;
}
if (fabs (value - attribs[i].data[j]) > EPSILON)
printf ("*** %s\n", "Values are different");
printf ("*** %s, is %f, should be %f\n", "Values are different", value, attribs[i].data[j]);
p = strchr (p, '\n');
if (p != NULL)
p++;

View File

@ -398,74 +398,95 @@ GLboolean _slang_cleanup_stack (slang_assemble_ctx *A, slang_operation *op)
/* _slang_assemble_operation() */
static GLboolean dereference_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg,
GLuint *size, slang_swizzle *swz, GLboolean is_swizzled)
static GLboolean
dereference_basic (slang_assemble_ctx *A, slang_storage_type type, GLuint *size, slang_swizzle *swz,
GLboolean is_swizzled)
{
GLuint i;
GLuint src_offset;
slang_assembly_type ty;
for (i = agg->count; i > 0; i--)
{
const slang_storage_array *arr = &agg->arrays[i - 1];
GLuint j;
*size -= _slang_sizeof_type (type);
for (j = arr->length; j > 0; j--)
{
if (arr->type == slang_stor_aggregate)
{
if (!dereference_aggregate (A, arr->aggregate, size, swz, is_swizzled))
return GL_FALSE;
}
else
{
GLuint src_offset;
slang_assembly_type ty;
/* If swizzling is taking place, we are forced to use scalar operations, even if we have
* vec4 instructions enabled (this should be actually done with special vec4 shuffle
* instructions).
* Adjust the size and calculate the offset within source variable to read.
*/
if (is_swizzled)
src_offset = swz->swizzle[*size / 4] * 4;
else
src_offset = *size;
*size -= 4;
/* dereference data slot of a basic type */
if (!PLAB2 (A->file, slang_asm_local_addr, A->local.addr_tmp, 4))
return GL_FALSE;
if (!PUSH (A->file, slang_asm_addr_deref))
return GL_FALSE;
if (src_offset != 0) {
if (!PLAB (A->file, slang_asm_addr_push, src_offset))
return GL_FALSE;
if (!PUSH (A->file, slang_asm_addr_add))
return GL_FALSE;
}
/* calculate the offset within source variable to read */
if (is_swizzled)
{
/* swizzle the index to get the actual offset */
src_offset = swz->swizzle[*size / 4] * 4;
}
else
{
/* no swizzling - read sequentially */
src_offset = *size;
}
switch (type) {
case slang_stor_bool:
ty = slang_asm_bool_deref;
break;
case slang_stor_int:
ty = slang_asm_int_deref;
break;
case slang_stor_float:
ty = slang_asm_float_deref;
break;
#if defined(USE_X86_ASM) || defined(SLANG_X86)
case slang_stor_vec4:
ty = slang_asm_vec4_deref;
break;
#endif
default:
_mesa_problem(NULL, "Unexpected arr->type in dereference_basic");
ty = slang_asm_none;
}
/* dereference data slot of a basic type */
if (!PLAB2 (A->file, slang_asm_local_addr, A->local.addr_tmp, 4))
return GL_FALSE;
if (!PUSH (A->file, slang_asm_addr_deref))
return GL_FALSE;
if (!PLAB (A->file, slang_asm_addr_push, src_offset))
return GL_FALSE;
if (!PUSH (A->file, slang_asm_addr_add))
return GL_FALSE;
return PUSH (A->file, ty);
}
switch (arr->type)
{
case slang_stor_bool:
ty = slang_asm_bool_deref;
break;
case slang_stor_int:
ty = slang_asm_int_deref;
break;
case slang_stor_float:
ty = slang_asm_float_deref;
break;
default:
_mesa_problem(NULL, "Unexpected arr->type in dereference_aggregate");
ty = slang_asm_none;
}
if (!PUSH (A->file, ty))
return GL_FALSE;
}
}
}
static GLboolean
dereference_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, GLuint *size,
slang_swizzle *swz, GLboolean is_swizzled)
{
GLuint i;
return GL_TRUE;
for (i = agg->count; i > 0; i--) {
const slang_storage_array *arr = &agg->arrays[i - 1];
GLuint j;
for (j = arr->length; j > 0; j--) {
if (arr->type == slang_stor_aggregate) {
if (!dereference_aggregate (A, arr->aggregate, size, swz, is_swizzled))
return GL_FALSE;
}
else {
if (is_swizzled && arr->type == slang_stor_vec4) {
if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled))
return GL_FALSE;
if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled))
return GL_FALSE;
if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled))
return GL_FALSE;
if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled))
return GL_FALSE;
}
else {
if (!dereference_basic (A, arr->type, size, swz, is_swizzled))
return GL_FALSE;
}
}
}
}
return GL_TRUE;
}
GLboolean _slang_dereference (slang_assemble_ctx *A, slang_operation *op)
@ -694,35 +715,40 @@ static GLboolean call_asm_instruction (slang_assemble_ctx *A, slang_atom a_name)
return GL_TRUE;
}
static GLboolean equality_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg,
GLuint *index, GLuint size, GLuint z_label)
static GLboolean
equality_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, GLuint *index,
GLuint size, GLuint z_label)
{
GLuint i;
GLuint i;
for (i = 0; i < agg->count; i++)
{
const slang_storage_array *arr = &agg->arrays[i];
GLuint j;
for (i = 0; i < agg->count; i++) {
const slang_storage_array *arr = &agg->arrays[i];
GLuint j;
for (j = 0; j < arr->length; j++)
{
if (arr->type == slang_stor_aggregate)
{
if (!equality_aggregate (A, arr->aggregate, index, size, z_label))
return GL_FALSE;
}
else
{
if (!PLAB2 (A->file, slang_asm_float_equal_int, size + *index, *index))
return GL_FALSE;
*index += 4;
if (!PLAB (A->file, slang_asm_jump_if_zero, z_label))
return GL_FALSE;
}
}
}
for (j = 0; j < arr->length; j++) {
if (arr->type == slang_stor_aggregate) {
if (!equality_aggregate (A, arr->aggregate, index, size, z_label))
return GL_FALSE;
}
else {
#if defined(USE_X86_ASM) || defined(SLANG_X86)
if (arr->type == slang_stor_vec4) {
if (!PLAB2 (A->file, slang_asm_vec4_equal_int, size + *index, *index))
return GL_FALSE;
}
else
#endif
if (!PLAB2 (A->file, slang_asm_float_equal_int, size + *index, *index))
return GL_FALSE;
return GL_TRUE;
*index += _slang_sizeof_type (arr->type);
if (!PLAB (A->file, slang_asm_jump_if_zero, z_label))
return GL_FALSE;
}
}
}
return GL_TRUE;
}
static GLboolean equality (slang_assemble_ctx *A, slang_operation *op, GLboolean equal)

View File

@ -105,6 +105,9 @@ typedef enum slang_assembly_type_
slang_asm_vec4_divide,
slang_asm_vec4_negate,
slang_asm_vec4_dot,
slang_asm_vec4_copy,
slang_asm_vec4_deref,
slang_asm_vec4_equal_int,
/* not a real assembly instruction */
slang_asm__last
} slang_assembly_type;

View File

@ -53,66 +53,89 @@
* +------------------+
*/
static GLboolean assign_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg,
GLuint *index, GLuint size)
static GLboolean
assign_basic (slang_assemble_ctx *A, slang_storage_type type, GLuint *index, GLuint size)
{
GLuint i;
GLuint dst_offset, dst_addr_loc;
slang_assembly_type ty;
for (i = 0; i < agg->count; i++)
{
const slang_storage_array *arr = &agg->arrays[i];
GLuint j;
/* Calculate the offset within destination variable to write. */
if (A->swz.num_components != 0)
dst_offset = A->swz.swizzle[*index / 4] * 4;
else
dst_offset = *index;
for (j = 0; j < arr->length; j++)
{
if (arr->type == slang_stor_aggregate)
{
if (!assign_aggregate (A, arr->aggregate, index, size))
return GL_FALSE;
}
else
{
GLuint dst_addr_loc, dst_offset;
slang_assembly_type ty;
switch (type) {
case slang_stor_bool:
ty = slang_asm_bool_copy;
break;
case slang_stor_int:
ty = slang_asm_int_copy;
break;
case slang_stor_float:
ty = slang_asm_float_copy;
break;
#if defined(USE_X86_ASM) || defined(SLANG_X86)
case slang_stor_vec4:
ty = slang_asm_vec4_copy;
break;
#endif
default:
_mesa_problem(NULL, "Unexpected arr->type in assign_basic");
ty = slang_asm_none;
}
/* calculate the distance from top of the stack to the destination address */
dst_addr_loc = size - *index;
/* Calculate the distance from top of the stack to the destination address. As the
* copy operation progresses, components of the source are being successively popped
* off the stack by the amount of *index increase step.
*/
dst_addr_loc = size - *index;
/* calculate the offset within destination variable to write */
if (A->swz.num_components != 0)
{
/* swizzle the index to get the actual offset */
dst_offset = A->swz.swizzle[*index / 4] * 4;
}
else
{
/* no swizzling - write sequentially */
dst_offset = *index;
}
if (!slang_assembly_file_push_label2 (A->file, ty, dst_addr_loc, dst_offset))
return GL_FALSE;
*index += _slang_sizeof_type (type);
switch (arr->type)
{
case slang_stor_bool:
ty = slang_asm_bool_copy;
break;
case slang_stor_int:
ty = slang_asm_int_copy;
break;
case slang_stor_float:
ty = slang_asm_float_copy;
break;
default:
break;
}
if (!slang_assembly_file_push_label2 (A->file, ty, dst_addr_loc, dst_offset))
return GL_FALSE;
return GL_TRUE;
}
*index += 4;
}
}
}
static GLboolean
assign_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, GLuint *index,
GLuint size)
{
GLuint i;
return GL_TRUE;
for (i = 0; i < agg->count; i++) {
const slang_storage_array *arr = &agg->arrays[i];
GLuint j;
for (j = 0; j < arr->length; j++) {
if (arr->type == slang_stor_aggregate) {
if (!assign_aggregate (A, arr->aggregate, index, size))
return GL_FALSE;
}
else {
/* When the destination is swizzled, we are forced to do float_copy, even if
* vec4 extension is enabled with vec4_copy operation.
*/
if (A->swz.num_components != 0 && arr->type == slang_stor_vec4) {
if (!assign_basic (A, slang_stor_float, index, size))
return GL_FALSE;
if (!assign_basic (A, slang_stor_float, index, size))
return GL_FALSE;
if (!assign_basic (A, slang_stor_float, index, size))
return GL_FALSE;
if (!assign_basic (A, slang_stor_float, index, size))
return GL_FALSE;
}
else {
if (!assign_basic (A, arr->type, index, size))
return GL_FALSE;
}
}
}
}
return GL_TRUE;
}
GLboolean _slang_assemble_assignment (slang_assemble_ctx *A, slang_operation *op)

View File

@ -143,38 +143,32 @@ GLvoid _slang_multiply_swizzles (slang_swizzle *dst, const slang_swizzle *left,
/* _slang_assemble_constructor() */
static GLboolean sizeof_argument (slang_assemble_ctx *A, GLuint *size, slang_operation *op)
static GLboolean
sizeof_argument (slang_assemble_ctx *A, GLuint *size, slang_operation *op)
{
slang_assembly_typeinfo ti;
GLboolean result = GL_FALSE;
slang_storage_aggregate agg, flat_agg;
slang_assembly_typeinfo ti;
GLboolean result = GL_FALSE;
slang_storage_aggregate agg;
if (!slang_assembly_typeinfo_construct (&ti))
return GL_FALSE;
if (!_slang_typeof_operation (A, op, &ti))
goto end1;
if (!slang_assembly_typeinfo_construct (&ti))
return GL_FALSE;
if (!_slang_typeof_operation (A, op, &ti))
goto end1;
if (!slang_storage_aggregate_construct (&agg))
goto end1;
if (!_slang_aggregate_variable (&agg, &ti.spec, 0, A->space.funcs, A->space.structs,
A->space.vars, A->mach, A->file, A->atoms))
goto end2;
if (!slang_storage_aggregate_construct (&agg))
goto end1;
if (!_slang_aggregate_variable (&agg, &ti.spec, 0, A->space.funcs, A->space.structs,
A->space.vars, A->mach, A->file, A->atoms))
goto end;
if (!slang_storage_aggregate_construct (&flat_agg))
goto end2;
if (!_slang_flatten_aggregate (&flat_agg, &agg))
goto end;
*size = _slang_sizeof_aggregate (&agg);
result = GL_TRUE;
*size = flat_agg.count * 4;
result = GL_TRUE;
end:
slang_storage_aggregate_destruct (&flat_agg);
end2:
slang_storage_aggregate_destruct (&agg);
slang_storage_aggregate_destruct (&agg);
end1:
slang_assembly_typeinfo_destruct (&ti);
return result;
slang_assembly_typeinfo_destruct (&ti);
return result;
}
static GLboolean constructor_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *flat,
@ -270,7 +264,7 @@ GLboolean _slang_assemble_constructor (slang_assemble_ctx *A, slang_operation *o
arg_sums[1] = 0; /* will hold all argument's size sum */
for (i = 0; i < op->num_children; i++)
{
GLuint arg_size;
GLuint arg_size = 0;
if (!sizeof_argument (A, &arg_size, &op->children[i]))
goto end;

View File

@ -50,6 +50,7 @@ typedef struct
struct x86_reg r_eax;
struct x86_reg r_ecx;
struct x86_reg r_edx;
struct x86_reg r_ebx;
struct x86_reg r_esp;
struct x86_reg r_ebp;
struct x86_reg r_st0;
@ -183,7 +184,7 @@ static GLvoid do_print_bool (slang_info_log **infolog, GLfloat x)
static GLvoid codegen_assem (codegen_ctx *G, slang_assembly *a, slang_info_log **infolog)
{
GLint disp;
GLint disp, i;
switch (a->type)
{
@ -517,128 +518,133 @@ static GLvoid codegen_assem (codegen_ctx *G, slang_assembly *a, slang_info_log *
break;
case slang_asm_vec4_add:
/* [vec4] | vec4 */
x87_fld (&G->f, x86_deref (G->r_esp));
x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
for (i = 0; i < 4; i++)
x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
x87_fld (&G->f, x86_deref (G->r_eax));
x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
x87_faddp (&G->f, G->r_st4);
x87_faddp (&G->f, G->r_st4);
x87_faddp (&G->f, G->r_st4);
x87_faddp (&G->f, G->r_st4);
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
x87_fstp (&G->f, x86_deref (G->r_eax));
for (i = 0; i < 4; i++)
x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
for (i = 0; i < 4; i++)
x87_faddp (&G->f, G->r_st4);
for (i = 0; i < 4; i++)
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
break;
case slang_asm_vec4_subtract:
/* [vec4] | vec4 */
x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16));
x87_fld (&G->f, x86_deref (G->r_eax));
x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
x87_fld (&G->f, x86_deref (G->r_esp));
x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
for (i = 0; i < 4; i++)
x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
for (i = 0; i < 4; i++)
x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
x87_fsubp (&G->f, G->r_st4);
x87_fsubp (&G->f, G->r_st4);
x87_fsubp (&G->f, G->r_st4);
x87_fsubp (&G->f, G->r_st4);
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
x87_fstp (&G->f, x86_deref (G->r_eax));
for (i = 0; i < 4; i++)
x87_fsubp (&G->f, G->r_st4);
for (i = 0; i < 4; i++)
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
break;
case slang_asm_vec4_multiply:
/* [vec4] | vec4 */
x87_fld (&G->f, x86_deref (G->r_esp));
x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
for (i = 0; i < 4; i++)
x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
x87_fld (&G->f, x86_deref (G->r_eax));
x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
x87_fmulp (&G->f, G->r_st4);
x87_fmulp (&G->f, G->r_st4);
x87_fmulp (&G->f, G->r_st4);
x87_fmulp (&G->f, G->r_st4);
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
x87_fstp (&G->f, x86_deref (G->r_eax));
for (i = 0; i < 4; i++)
x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
for (i = 0; i < 4; i++)
x87_fmulp (&G->f, G->r_st4);
for (i = 0; i < 4; i++)
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
break;
case slang_asm_vec4_divide:
/* [vec4] | vec4 */
x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16));
x87_fld (&G->f, x86_deref (G->r_eax));
x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
x87_fld (&G->f, x86_deref (G->r_esp));
x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
for (i = 0; i < 4; i++)
x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
for (i = 0; i < 4; i++)
x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
x87_fdivp (&G->f, G->r_st4);
x87_fdivp (&G->f, G->r_st4);
x87_fdivp (&G->f, G->r_st4);
x87_fdivp (&G->f, G->r_st4);
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
x87_fstp (&G->f, x86_deref (G->r_eax));
for (i = 0; i < 4; i++)
x87_fdivp (&G->f, G->r_st4);
for (i = 0; i < 4; i++)
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
break;
case slang_asm_vec4_negate:
/* [vec4] */
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
x87_fld (&G->f, x86_deref (G->r_eax));
x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
x87_fchs (&G->f);
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
x87_fchs (&G->f);
x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
x87_fchs (&G->f);
x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
x87_fchs (&G->f);
x87_fstp (&G->f, x86_deref (G->r_eax));
for (i = 0; i < 4; i++)
x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
for (i = 0; i < 4; i++) {
x87_fchs (&G->f);
x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
}
break;
case slang_asm_vec4_dot:
/* [vec4] | vec4 */
x87_fld (&G->f, x86_deref (G->r_esp));
x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
for (i = 0; i < 4; i++)
x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
x87_fld (&G->f, x86_deref (G->r_eax));
x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
x87_fmulp (&G->f, G->r_st4);
x87_fmulp (&G->f, G->r_st4);
x87_fmulp (&G->f, G->r_st4);
x87_fmulp (&G->f, G->r_st4);
x87_faddp (&G->f, G->r_st1);
x87_faddp (&G->f, G->r_st1);
x87_faddp (&G->f, G->r_st1);
for (i = 0; i < 4; i++)
x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
for (i = 0; i < 4; i++)
x87_fmulp (&G->f, G->r_st4);
for (i = 0; i < 3; i++)
x87_faddp (&G->f, G->r_st1);
x87_fstp (&G->f, x86_deref (G->r_eax));
break;
default:
assert (0);
}
case slang_asm_vec4_copy:
/* [vec4] | vec4 */
x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, a->param[0]));
x86_pop (&G->f, G->r_ecx);
x86_pop (&G->f, G->r_edx);
x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1]), G->r_ecx);
x86_pop (&G->f, G->r_ebx);
x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 4), G->r_edx);
x86_pop (&G->f, G->r_ecx);
x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 8), G->r_ebx);
x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 12), G->r_ecx);
break;
case slang_asm_vec4_deref:
/* [vec4] */
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
x86_mov (&G->f, G->r_ecx, x86_make_disp (G->r_eax, 12));
x86_mov (&G->f, G->r_edx, x86_make_disp (G->r_eax, 8));
x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx);
x86_mov (&G->f, G->r_ebx, x86_make_disp (G->r_eax, 4));
x86_push (&G->f, G->r_edx);
x86_mov (&G->f, G->r_ecx, x86_deref (G->r_eax));
x86_push (&G->f, G->r_ebx);
x86_push (&G->f, G->r_ecx);
break;
case slang_asm_vec4_equal_int:
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, -4));
x86_mov_reg_imm (&G->f, G->r_edx, 0x4000);
for (i = 0; i < 4; i++) {
x87_fld (&G->f, x86_make_disp (G->r_esp, a->param[0] + 4 + i * 4));
x87_fcomp (&G->f, x86_make_disp (G->r_esp, a->param[1] + 4 + i * 4));
x87_fnstsw (&G->f, G->r_eax);
x86_and (&G->f, G->r_edx, G->r_eax);
}
/* TODO: use test r8,imm8 */
x86_mov_reg_imm (&G->f, G->r_ecx, 0x4000);
x86_test (&G->f, G->r_edx, G->r_ecx);
{
GLubyte *lab0, *lab1;
/* TODO: use jcc rel8 */
lab0 = x86_jcc_forward (&G->f, cc_E);
x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ONE);
/* TODO: use jmp rel8 */
lab1 = x86_jmp_forward (&G->f);
x86_fixup_fwd_jump (&G->f, lab0);
x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ZERO);
x86_fixup_fwd_jump (&G->f, lab1);
x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx);
}
break;
default:
assert (0);
}
}
GLboolean _slang_x86_codegen (slang_machine *mach, slang_assembly_file *file, GLuint start)
@ -664,6 +670,7 @@ GLboolean _slang_x86_codegen (slang_machine *mach, slang_assembly_file *file, GL
G.r_eax = x86_make_reg (file_REG32, reg_AX);
G.r_ecx = x86_make_reg (file_REG32, reg_CX);
G.r_edx = x86_make_reg (file_REG32, reg_DX);
G.r_ebx = x86_make_reg (file_REG32, reg_BX);
G.r_esp = x86_make_reg (file_REG32, reg_SP);
G.r_ebp = x86_make_reg (file_REG32, reg_BP);
G.r_st0 = x86_make_reg (file_x87, 0);

View File

@ -211,14 +211,22 @@ GLboolean _slang_aggregate_variable (slang_storage_aggregate *agg, slang_type_sp
return aggregate_vector (agg, slang_stor_float, 2);
case slang_spec_vec3:
return aggregate_vector (agg, slang_stor_float, 3);
case slang_spec_vec4:
return aggregate_vector (agg, slang_stor_float, 4);
case slang_spec_vec4:
#if defined(USE_X86_ASM) || defined(SLANG_X86)
return aggregate_vector (agg, slang_stor_vec4, 1);
#else
return aggregate_vector (agg, slang_stor_float, 4);
#endif
case slang_spec_mat2:
return aggregate_matrix (agg, slang_stor_float, 2);
case slang_spec_mat3:
return aggregate_matrix (agg, slang_stor_float, 3);
case slang_spec_mat4:
return aggregate_matrix (agg, slang_stor_float, 4);
case slang_spec_mat4:
#if defined(USE_X86_ASM) || defined(SLANG_X86)
return aggregate_vector (agg, slang_stor_vec4, 4);
#else
return aggregate_matrix (agg, slang_stor_float, 4);
#endif
case slang_spec_sampler1D:
case slang_spec_sampler2D:
case slang_spec_sampler3D:
@ -258,54 +266,77 @@ GLboolean _slang_aggregate_variable (slang_storage_aggregate *agg, slang_type_sp
}
}
/* _slang_sizeof_type() */
GLuint
_slang_sizeof_type (slang_storage_type type)
{
if (type == slang_stor_aggregate)
return 0;
if (type == slang_stor_vec4)
return 4 * sizeof (GLfloat);
return sizeof (GLfloat);
}
/* _slang_sizeof_aggregate() */
GLuint _slang_sizeof_aggregate (const slang_storage_aggregate *agg)
{
GLuint i, size = 0;
GLuint i, size = 0;
for (i = 0; i < agg->count; i++)
{
GLuint element_size;
for (i = 0; i < agg->count; i++) {
slang_storage_array *arr = &agg->arrays[i];
GLuint element_size;
if (agg->arrays[i].type == slang_stor_aggregate)
element_size = _slang_sizeof_aggregate (agg->arrays[i].aggregate);
else
element_size = sizeof (GLfloat);
size += element_size * agg->arrays[i].length;
}
return size;
if (arr->type == slang_stor_aggregate)
element_size = _slang_sizeof_aggregate (arr->aggregate);
else
element_size = _slang_sizeof_type (arr->type);
size += element_size * arr->length;
}
return size;
}
/* _slang_flatten_aggregate () */
GLboolean _slang_flatten_aggregate (slang_storage_aggregate *flat, const slang_storage_aggregate *agg)
GLboolean
_slang_flatten_aggregate (slang_storage_aggregate *flat, const slang_storage_aggregate *agg)
{
GLuint i;
GLuint i;
for (i = 0; i < agg->count; i++)
{
GLuint j;
for (i = 0; i < agg->count; i++) {
GLuint j;
for (j = 0; j < agg->arrays[i].length; j++)
{
if (agg->arrays[i].type == slang_stor_aggregate)
{
if (!_slang_flatten_aggregate (flat, agg->arrays[i].aggregate))
return GL_FALSE;
}
else
{
slang_storage_array *arr;
for (j = 0; j < agg->arrays[i].length; j++) {
if (agg->arrays[i].type == slang_stor_aggregate) {
if (!_slang_flatten_aggregate (flat, agg->arrays[i].aggregate))
return GL_FALSE;
}
else {
GLuint k, count;
slang_storage_type type;
arr = slang_storage_aggregate_push_new (flat);
if (arr == NULL)
return GL_FALSE;
arr->type = agg->arrays[i].type;
arr->length = 1;
}
}
}
return GL_TRUE;
if (agg->arrays[i].type == slang_stor_vec4) {
count = 4;
type = slang_stor_float;
}
else {
count = 1;
type = agg->arrays[i].type;
}
for (k = 0; k < count; k++) {
slang_storage_array *arr;
arr = slang_storage_aggregate_push_new (flat);
if (arr == NULL)
return GL_FALSE;
arr->type = type;
arr->length = 1;
}
}
}
}
return GL_TRUE;
}

View File

@ -41,13 +41,19 @@ extern "C" {
*
* For now, only the three basic types are supported, that is bool, int and float. Other built-in
* types like vector or matrix can easily be decomposed into a series of basic types.
*
* If the vec4 module is enabled, 4-component vectors of floats are used when possible. 4x4 matrices
* are constructed of 4 vec4 slots.
*/
typedef enum slang_storage_type_
{
slang_stor_aggregate,
slang_stor_bool,
slang_stor_int,
slang_stor_float
/* core */
slang_stor_aggregate,
slang_stor_bool,
slang_stor_int,
slang_stor_float,
/* vec4 */
slang_stor_vec4
} slang_storage_type;
/*
@ -105,6 +111,14 @@ _slang_evaluate_int(slang_assembly_file *file,
GLuint *pint,
slang_atom_pool *atoms);
/*
* Returns the size (in machine units) of the given storage type.
* It is an error to pass-in slang_stor_aggregate.
* Returns 0 on error.
*/
extern GLuint
_slang_sizeof_type (slang_storage_type);
/*
* Returns total size (in machine units) of the given aggregate.
* Returns 0 on error.

View File

@ -367,6 +367,20 @@ void x86_sub( struct x86_function *p,
emit_op_modrm(p, 0x2b, 0x29, dst, src );
}
void x86_or( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
emit_op_modrm( p, 0x0b, 0x09, dst, src );
}
void x86_and( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
emit_op_modrm( p, 0x23, 0x21, dst, src );
}
/***********************************************************************

View File

@ -172,12 +172,14 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_dec( struct x86_function *p, struct x86_reg reg );
void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mul( struct x86_function *p, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_pop( struct x86_function *p, struct x86_reg reg );
void x86_push( struct x86_function *p, struct x86_reg reg );
void x86_ret( struct x86_function *p );