cell: move command processing code into new spu_command.c file

This commit is contained in:
Brian Paul 2008-09-26 10:15:11 -06:00
parent bb01c1a78e
commit 9d00cd3fc7
4 changed files with 611 additions and 556 deletions

View File

@ -16,8 +16,9 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o
SOURCES = \
spu_funcs.c \
spu_command.c \
spu_dcache.c \
spu_funcs.c \
spu_main.c \
spu_per_fragment_op.c \
spu_render.c \

View File

@ -0,0 +1,599 @@
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* SPU command processing code
*/
#include <stdio.h>
#include <libmisc.h>
#include "pipe/p_defines.h"
#include "spu_command.h"
#include "spu_main.h"
#include "spu_render.h"
#include "spu_per_fragment_op.h"
#include "spu_texture.h"
#include "spu_tile.h"
#include "spu_vertex_shader.h"
#include "spu_dcache.h"
#include "spu_debug.h"
#include "cell/common.h"
struct spu_vs_context draw;
/**
* Buffers containing dynamically generated SPU code:
*/
static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
ALIGN16_ATTRIB;
/**
* Tell the PPU that this SPU has finished copying a buffer to
* local store and that it may be reused by the PPU.
* This is done by writting a 16-byte batch-buffer-status block back into
* main memory (in cell_context->buffer_status[]).
*/
static void
release_buffer(uint buffer)
{
/* Evidently, using less than a 16-byte status doesn't work reliably */
static const uint status[4] ALIGN16_ATTRIB
= {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
uint *dst = spu.init.buffer_status + index;
ASSERT(buffer < CELL_NUM_BUFFERS);
mfc_put((void *) &status, /* src in local memory */
(unsigned int) dst, /* dst in main memory */
sizeof(status), /* size */
TAG_MISC, /* tag is unimportant */
0, /* tid */
0 /* rid */);
}
static void
cmd_clear_surface(const struct cell_command_clear_surface *clear)
{
DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
if (clear->surface == 0) {
spu.fb.color_clear_value = clear->value;
if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
uint x = (spu.init.id << 4) | (spu.init.id << 12) |
(spu.init.id << 20) | (spu.init.id << 28);
spu.fb.color_clear_value ^= x;
}
}
else {
spu.fb.depth_clear_value = clear->value;
}
#define CLEAR_OPT 1
#if CLEAR_OPT
/* Simply set all tiles' status to CLEAR.
* When we actually begin rendering into a tile, we'll initialize it to
* the clear value. If any tiles go untouched during the frame,
* really_clear_tiles() will set them to the clear value.
*/
if (clear->surface == 0) {
memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
}
else {
memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
}
#else
/*
* This path clears the whole framebuffer to the clear color right now.
*/
/*
printf("SPU: %s num=%d w=%d h=%d\n",
__FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
*/
/* init a single tile to the clear value */
if (clear->surface == 0) {
clear_c_tile(&spu.ctile);
}
else {
clear_z_tile(&spu.ztile);
}
/* walk over my tiles, writing the 'clear' tile's data */
{
const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
uint i;
for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
uint tx = i % spu.fb.width_tiles;
uint ty = i / spu.fb.width_tiles;
if (clear->surface == 0)
put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
else
put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
}
}
if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
wait_on_mask(1 << TAG_SURFACE_CLEAR);
}
#endif /* CLEAR_OPT */
DEBUG_PRINTF("CLEAR SURF done\n");
}
static void
cmd_release_verts(const struct cell_command_release_verts *release)
{
DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf);
ASSERT(release->vertex_buf != ~0U);
release_buffer(release->vertex_buf);
}
/**
* Process a CELL_CMD_STATE_FRAGMENT_OPS command.
* This involves installing new fragment ops SPU code.
* If this function is never called, we'll use a regular C fallback function
* for fragment processing.
*/
static void
cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
{
static int warned = 0;
DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n");
/* Copy SPU code from batch buffer to spu buffer */
memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
/* Copy state info (for fallback case only) */
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
/* Parity twist! For now, always use the fallback code by default,
* only switching to codegen when specifically requested. This
* allows us to develop freely without risking taking down the
* branch.
*
* Later, the parity of this check will be reversed, so that
* codegen is *always* used, unless we specifically indicate that
* we don't want it.
*
* Eventually, the option will be removed completely, because in
* final code we'll always use codegen and won't even provide the
* raw state records that the fallback code requires.
*/
if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) {
spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
}
else {
/* otherwise, the default fallback code remains in place */
if (!warned) {
fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
warned = 1;
}
}
spu.read_depth = spu.depth_stencil_alpha.depth.enabled;
spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled;
}
static void
cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
{
DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n");
/* Copy SPU code from batch buffer to spu buffer */
memcpy(spu.fragment_program_code, fp->code,
SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
#if 01
/* Point function pointer at new code */
spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
#endif
}
static void
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
{
DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
cmd->width,
cmd->height,
cmd->color_start,
cmd->color_format,
cmd->depth_format);
ASSERT_ALIGN16(cmd->color_start);
ASSERT_ALIGN16(cmd->depth_start);
spu.fb.color_start = cmd->color_start;
spu.fb.depth_start = cmd->depth_start;
spu.fb.color_format = cmd->color_format;
spu.fb.depth_format = cmd->depth_format;
spu.fb.width = cmd->width;
spu.fb.height = cmd->height;
spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
switch (spu.fb.depth_format) {
case PIPE_FORMAT_Z32_UNORM:
spu.fb.zsize = 4;
spu.fb.zscale = (float) 0xffffffffu;
break;
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
spu.fb.zsize = 4;
spu.fb.zscale = (float) 0x00ffffffu;
break;
case PIPE_FORMAT_Z16_UNORM:
spu.fb.zsize = 2;
spu.fb.zscale = (float) 0xffffu;
break;
default:
spu.fb.zsize = 0;
break;
}
}
static void
cmd_state_sampler(const struct cell_command_sampler *sampler)
{
DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit);
spu.sampler[sampler->unit] = sampler->state;
if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR)
spu.sample_texture[sampler->unit] = sample_texture_bilinear;
else
spu.sample_texture[sampler->unit] = sample_texture_nearest;
}
static void
cmd_state_texture(const struct cell_command_texture *texture)
{
const uint unit = texture->unit;
const uint width = texture->width;
const uint height = texture->height;
DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n",
texture->unit, texture->start,
texture->width, texture->height);
spu.texture[unit].start = texture->start;
spu.texture[unit].width = width;
spu.texture[unit].height = height;
spu.texture[unit].tiles_per_row = width / TILE_SIZE;
spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0};
spu.texture[unit].tex_size_mask = (vector unsigned int)
{ width - 1, height - 1, 0, 0 };
spu.texture[unit].tex_size_x_mask = spu_splats(width - 1);
spu.texture[unit].tex_size_y_mask = spu_splats(height - 1);
}
static void
cmd_state_vertex_info(const struct vertex_info *vinfo)
{
DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
ASSERT(vinfo->num_attribs >= 1);
ASSERT(vinfo->num_attribs <= 8);
memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
}
static void
cmd_state_vs_array_info(const struct cell_array_info *vs_info)
{
const unsigned attr = vs_info->attr;
ASSERT(attr < PIPE_MAX_ATTRIBS);
draw.vertex_fetch.src_ptr[attr] = vs_info->base;
draw.vertex_fetch.pitch[attr] = vs_info->pitch;
draw.vertex_fetch.size[attr] = vs_info->size;
draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
draw.vertex_fetch.dirty = 1;
}
static void
cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
{
mfc_get(attribute_fetch_code_buffer,
(unsigned int) code->base, /* src */
code->size,
TAG_BATCH_BUFFER,
0, /* tid */
0 /* rid */);
wait_on_mask(1 << TAG_BATCH_BUFFER);
draw.vertex_fetch.code = attribute_fetch_code_buffer;
}
static void
cmd_finish(void)
{
DEBUG_PRINTF("FINISH\n");
really_clear_tiles(0);
/* wait for all outstanding DMAs to finish */
mfc_write_tag_mask(~0);
mfc_read_tag_status_all();
/* send mbox message to PPU */
spu_write_out_mbox(CELL_CMD_FINISH);
}
/**
* Execute a batch of commands which was sent to us by the PPU.
* See the cell_emit_state.c code to see where the commands come from.
*
* The opcode param encodes the location of the buffer and its size.
*/
static void
cmd_batch(uint opcode)
{
const uint buf = (opcode >> 8) & 0xff;
uint size = (opcode >> 16);
uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB;
const unsigned usize = size / sizeof(buffer[0]);
uint pos;
DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n",
buf, size, spu.init.buffers[buf]);
ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
ASSERT_ALIGN16(spu.init.buffers[buf]);
size = ROUNDUP16(size);
ASSERT_ALIGN16(spu.init.buffers[buf]);
mfc_get(buffer, /* dest */
(unsigned int) spu.init.buffers[buf], /* src */
size,
TAG_BATCH_BUFFER,
0, /* tid */
0 /* rid */);
wait_on_mask(1 << TAG_BATCH_BUFFER);
/* Tell PPU we're done copying the buffer to local store */
DEBUG_PRINTF("release batch buf %u\n", buf);
release_buffer(buf);
/*
* Loop over commands in the batch buffer
*/
for (pos = 0; pos < usize; /* no incr */) {
switch (buffer[pos]) {
/*
* rendering commands
*/
case CELL_CMD_CLEAR_SURFACE:
{
struct cell_command_clear_surface *clr
= (struct cell_command_clear_surface *) &buffer[pos];
cmd_clear_surface(clr);
pos += sizeof(*clr) / 8;
}
break;
case CELL_CMD_RENDER:
{
struct cell_command_render *render
= (struct cell_command_render *) &buffer[pos];
uint pos_incr;
cmd_render(render, &pos_incr);
pos += pos_incr;
}
break;
/*
* state-update commands
*/
case CELL_CMD_STATE_FRAMEBUFFER:
{
struct cell_command_framebuffer *fb
= (struct cell_command_framebuffer *) &buffer[pos];
cmd_state_framebuffer(fb);
pos += sizeof(*fb) / 8;
}
break;
case CELL_CMD_STATE_FRAGMENT_OPS:
{
struct cell_command_fragment_ops *fops
= (struct cell_command_fragment_ops *) &buffer[pos];
cmd_state_fragment_ops(fops);
pos += sizeof(*fops) / 8;
}
break;
case CELL_CMD_STATE_FRAGMENT_PROGRAM:
{
struct cell_command_fragment_program *fp
= (struct cell_command_fragment_program *) &buffer[pos];
cmd_state_fragment_program(fp);
pos += sizeof(*fp) / 8;
}
break;
case CELL_CMD_STATE_SAMPLER:
{
struct cell_command_sampler *sampler
= (struct cell_command_sampler *) &buffer[pos];
cmd_state_sampler(sampler);
pos += sizeof(*sampler) / 8;
}
break;
case CELL_CMD_STATE_TEXTURE:
{
struct cell_command_texture *texture
= (struct cell_command_texture *) &buffer[pos];
cmd_state_texture(texture);
pos += sizeof(*texture) / 8;
}
break;
case CELL_CMD_STATE_VERTEX_INFO:
cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8);
break;
case CELL_CMD_STATE_VIEWPORT:
(void) memcpy(& draw.viewport, &buffer[pos+1],
sizeof(struct pipe_viewport_state));
pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8);
break;
case CELL_CMD_STATE_UNIFORMS:
draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1];
pos += 2;
break;
case CELL_CMD_STATE_VS_ARRAY_INFO:
cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
break;
case CELL_CMD_STATE_BIND_VS:
#if 0
spu_bind_vertex_shader(&draw,
(struct cell_shader_info *) &buffer[pos+1]);
#endif
pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
break;
case CELL_CMD_STATE_ATTRIB_FETCH:
cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
&buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
break;
/*
* misc commands
*/
case CELL_CMD_FINISH:
cmd_finish();
pos += 1;
break;
case CELL_CMD_RELEASE_VERTS:
{
struct cell_command_release_verts *release
= (struct cell_command_release_verts *) &buffer[pos];
cmd_release_verts(release);
pos += sizeof(*release) / 8;
}
break;
case CELL_CMD_FLUSH_BUFFER_RANGE: {
struct cell_buffer_range *br = (struct cell_buffer_range *)
&buffer[pos+1];
spu_dcache_mark_dirty((unsigned) br->base, br->size);
pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8);
break;
}
default:
printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]);
ASSERT(0);
break;
}
}
DEBUG_PRINTF("BATCH complete\n");
}
/**
* Main loop for SPEs: Get a command, execute it, repeat.
*/
void
command_loop(void)
{
struct cell_command cmd;
int exitFlag = 0;
DEBUG_PRINTF("Enter command loop\n");
ASSERT((sizeof(struct cell_command) & 0xf) == 0);
ASSERT_ALIGN16(&cmd);
while (!exitFlag) {
unsigned opcode;
int tag = 0;
DEBUG_PRINTF("Wait for cmd...\n");
/* read/wait from mailbox */
opcode = (unsigned int) spu_read_in_mbox();
DEBUG_PRINTF("got cmd 0x%x\n", opcode);
/* command payload */
mfc_get(&cmd, /* dest */
(unsigned int) spu.init.cmd, /* src */
sizeof(struct cell_command), /* bytes */
tag,
0, /* tid */
0 /* rid */);
wait_on_mask( 1 << tag );
/*
* NOTE: most commands should be contained in a batch buffer
*/
switch (opcode & CELL_CMD_OPCODE_MASK) {
case CELL_CMD_EXIT:
DEBUG_PRINTF("EXIT\n");
exitFlag = 1;
break;
case CELL_CMD_VS_EXECUTE:
#if 0
spu_execute_vertex_shader(&draw, &cmd.vs);
#endif
break;
case CELL_CMD_BATCH:
cmd_batch(opcode);
break;
default:
printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
}
}
DEBUG_PRINTF("Exit command loop\n");
spu_dcache_report();
}

View File

@ -0,0 +1,7 @@
extern void
command_loop(void);

View File

@ -35,14 +35,11 @@
#include "pipe/p_defines.h"
#include "spu_funcs.h"
#include "spu_command.h"
#include "spu_main.h"
#include "spu_render.h"
#include "spu_per_fragment_op.h"
#include "spu_texture.h"
#include "spu_tile.h"
//#include "spu_test.h"
#include "spu_vertex_shader.h"
#include "spu_dcache.h"
#include "spu_debug.h"
#include "cell/common.h"
@ -55,8 +52,6 @@ helpful headers:
struct spu_global spu;
struct spu_vs_context draw;
#if DEBUG
boolean Debug = FALSE;
@ -64,554 +59,6 @@ boolean force_fragment_ops_fallback = TRUE;
#endif
/**
* Buffers containing dynamically generated SPU code:
*/
static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
ALIGN16_ATTRIB;
/**
* Tell the PPU that this SPU has finished copying a buffer to
* local store and that it may be reused by the PPU.
* This is done by writting a 16-byte batch-buffer-status block back into
* main memory (in cell_context->buffer_status[]).
*/
static void
release_buffer(uint buffer)
{
/* Evidently, using less than a 16-byte status doesn't work reliably */
static const uint status[4] ALIGN16_ATTRIB
= {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
uint *dst = spu.init.buffer_status + index;
ASSERT(buffer < CELL_NUM_BUFFERS);
mfc_put((void *) &status, /* src in local memory */
(unsigned int) dst, /* dst in main memory */
sizeof(status), /* size */
TAG_MISC, /* tag is unimportant */
0, /* tid */
0 /* rid */);
}
static void
cmd_clear_surface(const struct cell_command_clear_surface *clear)
{
DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
if (clear->surface == 0) {
spu.fb.color_clear_value = clear->value;
if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
uint x = (spu.init.id << 4) | (spu.init.id << 12) |
(spu.init.id << 20) | (spu.init.id << 28);
spu.fb.color_clear_value ^= x;
}
}
else {
spu.fb.depth_clear_value = clear->value;
}
#define CLEAR_OPT 1
#if CLEAR_OPT
/* Simply set all tiles' status to CLEAR.
* When we actually begin rendering into a tile, we'll initialize it to
* the clear value. If any tiles go untouched during the frame,
* really_clear_tiles() will set them to the clear value.
*/
if (clear->surface == 0) {
memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
}
else {
memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
}
#else
/*
* This path clears the whole framebuffer to the clear color right now.
*/
/*
printf("SPU: %s num=%d w=%d h=%d\n",
__FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
*/
/* init a single tile to the clear value */
if (clear->surface == 0) {
clear_c_tile(&spu.ctile);
}
else {
clear_z_tile(&spu.ztile);
}
/* walk over my tiles, writing the 'clear' tile's data */
{
const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
uint i;
for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
uint tx = i % spu.fb.width_tiles;
uint ty = i / spu.fb.width_tiles;
if (clear->surface == 0)
put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
else
put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
}
}
if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
wait_on_mask(1 << TAG_SURFACE_CLEAR);
}
#endif /* CLEAR_OPT */
DEBUG_PRINTF("CLEAR SURF done\n");
}
static void
cmd_release_verts(const struct cell_command_release_verts *release)
{
DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf);
ASSERT(release->vertex_buf != ~0U);
release_buffer(release->vertex_buf);
}
/**
* Process a CELL_CMD_STATE_FRAGMENT_OPS command.
* This involves installing new fragment ops SPU code.
* If this function is never called, we'll use a regular C fallback function
* for fragment processing.
*/
static void
cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
{
static int warned = 0;
DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n");
/* Copy SPU code from batch buffer to spu buffer */
memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
/* Copy state info (for fallback case only) */
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
/* Parity twist! For now, always use the fallback code by default,
* only switching to codegen when specifically requested. This
* allows us to develop freely without risking taking down the
* branch.
*
* Later, the parity of this check will be reversed, so that
* codegen is *always* used, unless we specifically indicate that
* we don't want it.
*
* Eventually, the option will be removed completely, because in
* final code we'll always use codegen and won't even provide the
* raw state records that the fallback code requires.
*/
if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) {
spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
}
else {
/* otherwise, the default fallback code remains in place */
if (!warned) {
fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
warned = 1;
}
}
spu.read_depth = spu.depth_stencil_alpha.depth.enabled;
spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled;
}
static void
cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
{
DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n");
/* Copy SPU code from batch buffer to spu buffer */
memcpy(spu.fragment_program_code, fp->code,
SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
#if 01
/* Point function pointer at new code */
spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
#endif
}
static void
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
{
DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
cmd->width,
cmd->height,
cmd->color_start,
cmd->color_format,
cmd->depth_format);
ASSERT_ALIGN16(cmd->color_start);
ASSERT_ALIGN16(cmd->depth_start);
spu.fb.color_start = cmd->color_start;
spu.fb.depth_start = cmd->depth_start;
spu.fb.color_format = cmd->color_format;
spu.fb.depth_format = cmd->depth_format;
spu.fb.width = cmd->width;
spu.fb.height = cmd->height;
spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
switch (spu.fb.depth_format) {
case PIPE_FORMAT_Z32_UNORM:
spu.fb.zsize = 4;
spu.fb.zscale = (float) 0xffffffffu;
break;
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
spu.fb.zsize = 4;
spu.fb.zscale = (float) 0x00ffffffu;
break;
case PIPE_FORMAT_Z16_UNORM:
spu.fb.zsize = 2;
spu.fb.zscale = (float) 0xffffu;
break;
default:
spu.fb.zsize = 0;
break;
}
}
static void
cmd_state_sampler(const struct cell_command_sampler *sampler)
{
DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit);
spu.sampler[sampler->unit] = sampler->state;
if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR)
spu.sample_texture[sampler->unit] = sample_texture_bilinear;
else
spu.sample_texture[sampler->unit] = sample_texture_nearest;
}
static void
cmd_state_texture(const struct cell_command_texture *texture)
{
const uint unit = texture->unit;
const uint width = texture->width;
const uint height = texture->height;
DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n",
texture->unit, texture->start,
texture->width, texture->height);
spu.texture[unit].start = texture->start;
spu.texture[unit].width = width;
spu.texture[unit].height = height;
spu.texture[unit].tiles_per_row = width / TILE_SIZE;
spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0};
spu.texture[unit].tex_size_mask = (vector unsigned int)
{ width - 1, height - 1, 0, 0 };
spu.texture[unit].tex_size_x_mask = spu_splats(width - 1);
spu.texture[unit].tex_size_y_mask = spu_splats(height - 1);
}
static void
cmd_state_vertex_info(const struct vertex_info *vinfo)
{
DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
ASSERT(vinfo->num_attribs >= 1);
ASSERT(vinfo->num_attribs <= 8);
memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
}
static void
cmd_state_vs_array_info(const struct cell_array_info *vs_info)
{
const unsigned attr = vs_info->attr;
ASSERT(attr < PIPE_MAX_ATTRIBS);
draw.vertex_fetch.src_ptr[attr] = vs_info->base;
draw.vertex_fetch.pitch[attr] = vs_info->pitch;
draw.vertex_fetch.size[attr] = vs_info->size;
draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
draw.vertex_fetch.dirty = 1;
}
static void
cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
{
mfc_get(attribute_fetch_code_buffer,
(unsigned int) code->base, /* src */
code->size,
TAG_BATCH_BUFFER,
0, /* tid */
0 /* rid */);
wait_on_mask(1 << TAG_BATCH_BUFFER);
draw.vertex_fetch.code = attribute_fetch_code_buffer;
}
static void
cmd_finish(void)
{
DEBUG_PRINTF("FINISH\n");
really_clear_tiles(0);
/* wait for all outstanding DMAs to finish */
mfc_write_tag_mask(~0);
mfc_read_tag_status_all();
/* send mbox message to PPU */
spu_write_out_mbox(CELL_CMD_FINISH);
}
/**
* Execute a batch of commands which was sent to us by the PPU.
* See the cell_emit_state.c code to see where the commands come from.
*
* The opcode param encodes the location of the buffer and its size.
*/
static void
cmd_batch(uint opcode)
{
const uint buf = (opcode >> 8) & 0xff;
uint size = (opcode >> 16);
uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB;
const unsigned usize = size / sizeof(buffer[0]);
uint pos;
DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n",
buf, size, spu.init.buffers[buf]);
ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
ASSERT_ALIGN16(spu.init.buffers[buf]);
size = ROUNDUP16(size);
ASSERT_ALIGN16(spu.init.buffers[buf]);
mfc_get(buffer, /* dest */
(unsigned int) spu.init.buffers[buf], /* src */
size,
TAG_BATCH_BUFFER,
0, /* tid */
0 /* rid */);
wait_on_mask(1 << TAG_BATCH_BUFFER);
/* Tell PPU we're done copying the buffer to local store */
DEBUG_PRINTF("release batch buf %u\n", buf);
release_buffer(buf);
/*
* Loop over commands in the batch buffer
*/
for (pos = 0; pos < usize; /* no incr */) {
switch (buffer[pos]) {
/*
* rendering commands
*/
case CELL_CMD_CLEAR_SURFACE:
{
struct cell_command_clear_surface *clr
= (struct cell_command_clear_surface *) &buffer[pos];
cmd_clear_surface(clr);
pos += sizeof(*clr) / 8;
}
break;
case CELL_CMD_RENDER:
{
struct cell_command_render *render
= (struct cell_command_render *) &buffer[pos];
uint pos_incr;
cmd_render(render, &pos_incr);
pos += pos_incr;
}
break;
/*
* state-update commands
*/
case CELL_CMD_STATE_FRAMEBUFFER:
{
struct cell_command_framebuffer *fb
= (struct cell_command_framebuffer *) &buffer[pos];
cmd_state_framebuffer(fb);
pos += sizeof(*fb) / 8;
}
break;
case CELL_CMD_STATE_FRAGMENT_OPS:
{
struct cell_command_fragment_ops *fops
= (struct cell_command_fragment_ops *) &buffer[pos];
cmd_state_fragment_ops(fops);
pos += sizeof(*fops) / 8;
}
break;
case CELL_CMD_STATE_FRAGMENT_PROGRAM:
{
struct cell_command_fragment_program *fp
= (struct cell_command_fragment_program *) &buffer[pos];
cmd_state_fragment_program(fp);
pos += sizeof(*fp) / 8;
}
break;
case CELL_CMD_STATE_SAMPLER:
{
struct cell_command_sampler *sampler
= (struct cell_command_sampler *) &buffer[pos];
cmd_state_sampler(sampler);
pos += sizeof(*sampler) / 8;
}
break;
case CELL_CMD_STATE_TEXTURE:
{
struct cell_command_texture *texture
= (struct cell_command_texture *) &buffer[pos];
cmd_state_texture(texture);
pos += sizeof(*texture) / 8;
}
break;
case CELL_CMD_STATE_VERTEX_INFO:
cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8);
break;
case CELL_CMD_STATE_VIEWPORT:
(void) memcpy(& draw.viewport, &buffer[pos+1],
sizeof(struct pipe_viewport_state));
pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8);
break;
case CELL_CMD_STATE_UNIFORMS:
draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1];
pos += 2;
break;
case CELL_CMD_STATE_VS_ARRAY_INFO:
cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
break;
case CELL_CMD_STATE_BIND_VS:
#if 0
spu_bind_vertex_shader(&draw,
(struct cell_shader_info *) &buffer[pos+1]);
#endif
pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
break;
case CELL_CMD_STATE_ATTRIB_FETCH:
cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
&buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
break;
/*
* misc commands
*/
case CELL_CMD_FINISH:
cmd_finish();
pos += 1;
break;
case CELL_CMD_RELEASE_VERTS:
{
struct cell_command_release_verts *release
= (struct cell_command_release_verts *) &buffer[pos];
cmd_release_verts(release);
pos += sizeof(*release) / 8;
}
break;
case CELL_CMD_FLUSH_BUFFER_RANGE: {
struct cell_buffer_range *br = (struct cell_buffer_range *)
&buffer[pos+1];
spu_dcache_mark_dirty((unsigned) br->base, br->size);
pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8);
break;
}
default:
printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]);
ASSERT(0);
break;
}
}
DEBUG_PRINTF("BATCH complete\n");
}
/**
* Temporary/simple main loop for SPEs: Get a command, execute it, repeat.
*/
static void
main_loop(void)
{
struct cell_command cmd;
int exitFlag = 0;
DEBUG_PRINTF("Enter main loop\n");
ASSERT((sizeof(struct cell_command) & 0xf) == 0);
ASSERT_ALIGN16(&cmd);
while (!exitFlag) {
unsigned opcode;
int tag = 0;
DEBUG_PRINTF("Wait for cmd...\n");
/* read/wait from mailbox */
opcode = (unsigned int) spu_read_in_mbox();
DEBUG_PRINTF("got cmd 0x%x\n", opcode);
/* command payload */
mfc_get(&cmd, /* dest */
(unsigned int) spu.init.cmd, /* src */
sizeof(struct cell_command), /* bytes */
tag,
0, /* tid */
0 /* rid */);
wait_on_mask( 1 << tag );
/*
* NOTE: most commands should be contained in a batch buffer
*/
switch (opcode & CELL_CMD_OPCODE_MASK) {
case CELL_CMD_EXIT:
DEBUG_PRINTF("EXIT\n");
exitFlag = 1;
break;
case CELL_CMD_VS_EXECUTE:
#if 0
spu_execute_vertex_shader(&draw, &cmd.vs);
#endif
break;
case CELL_CMD_BATCH:
cmd_batch(opcode);
break;
default:
printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
}
}
DEBUG_PRINTF("Exit main loop\n");
spu_dcache_report();
}
static void
one_time_init(void)
{
@ -658,6 +105,7 @@ main(main_param_t speid, main_param_t argp)
DEBUG_PRINTF("main() speid=%lu\n", (unsigned long) speid);
D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n");
/* get initialization data */
mfc_get(&spu.init, /* dest */
(unsigned int) argp, /* src */
sizeof(struct cell_init_info), /* bytes */
@ -675,7 +123,7 @@ main(main_param_t speid, main_param_t argp)
spu_test_misc(spu.init.id);
#endif
main_loop();
command_loop();
return 0;
}