From 9d00cd3fc726a3fe01b98fd222dd4c71b3e95d44 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 26 Sep 2008 10:15:11 -0600 Subject: [PATCH] cell: move command processing code into new spu_command.c file --- src/gallium/drivers/cell/spu/Makefile | 3 +- src/gallium/drivers/cell/spu/spu_command.c | 599 +++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_command.h | 7 + src/gallium/drivers/cell/spu/spu_main.c | 558 +------------------ 4 files changed, 611 insertions(+), 556 deletions(-) create mode 100644 src/gallium/drivers/cell/spu/spu_command.c create mode 100644 src/gallium/drivers/cell/spu/spu_command.h diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index c2db85247e0..116453b79c5 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -16,8 +16,9 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o SOURCES = \ - spu_funcs.c \ + spu_command.c \ spu_dcache.c \ + spu_funcs.c \ spu_main.c \ spu_per_fragment_op.c \ spu_render.c \ diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c new file mode 100644 index 00000000000..ec9da5d8870 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -0,0 +1,599 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * SPU command processing code + */ + + +#include +#include + +#include "pipe/p_defines.h" + +#include "spu_command.h" +#include "spu_main.h" +#include "spu_render.h" +#include "spu_per_fragment_op.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_vertex_shader.h" +#include "spu_dcache.h" +#include "spu_debug.h" +#include "cell/common.h" + + +struct spu_vs_context draw; + + +/** + * Buffers containing dynamically generated SPU code: + */ +static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] + ALIGN16_ATTRIB; + + + +/** + * Tell the PPU that this SPU has finished copying a buffer to + * local store and that it may be reused by the PPU. + * This is done by writting a 16-byte batch-buffer-status block back into + * main memory (in cell_context->buffer_status[]). + */ +static void +release_buffer(uint buffer) +{ + /* Evidently, using less than a 16-byte status doesn't work reliably */ + static const uint status[4] ALIGN16_ATTRIB + = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; + + const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); + uint *dst = spu.init.buffer_status + index; + + ASSERT(buffer < CELL_NUM_BUFFERS); + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_MISC, /* tag is unimportant */ + 0, /* tid */ + 0 /* rid */); +} + + +static void +cmd_clear_surface(const struct cell_command_clear_surface *clear) +{ + DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); + + if (clear->surface == 0) { + spu.fb.color_clear_value = clear->value; + if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { + uint x = (spu.init.id << 4) | (spu.init.id << 12) | + (spu.init.id << 20) | (spu.init.id << 28); + spu.fb.color_clear_value ^= x; + } + } + else { + spu.fb.depth_clear_value = clear->value; + } + +#define CLEAR_OPT 1 +#if CLEAR_OPT + + /* Simply set all tiles' status to CLEAR. + * When we actually begin rendering into a tile, we'll initialize it to + * the clear value. If any tiles go untouched during the frame, + * really_clear_tiles() will set them to the clear value. + */ + if (clear->surface == 0) { + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); + } + else { + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); + } + +#else + + /* + * This path clears the whole framebuffer to the clear color right now. + */ + + /* + printf("SPU: %s num=%d w=%d h=%d\n", + __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); + */ + + /* init a single tile to the clear value */ + if (clear->surface == 0) { + clear_c_tile(&spu.ctile); + } + else { + clear_z_tile(&spu.ztile); + } + + /* walk over my tiles, writing the 'clear' tile's data */ + { + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (clear->surface == 0) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + else + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); + } + } + + if (spu.init.debug_flags & CELL_DEBUG_SYNC) { + wait_on_mask(1 << TAG_SURFACE_CLEAR); + } + +#endif /* CLEAR_OPT */ + + DEBUG_PRINTF("CLEAR SURF done\n"); +} + + +static void +cmd_release_verts(const struct cell_command_release_verts *release) +{ + DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf); + ASSERT(release->vertex_buf != ~0U); + release_buffer(release->vertex_buf); +} + + +/** + * Process a CELL_CMD_STATE_FRAGMENT_OPS command. + * This involves installing new fragment ops SPU code. + * If this function is never called, we'll use a regular C fallback function + * for fragment processing. + */ +static void +cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) +{ + static int warned = 0; + + DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n"); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); + /* Copy state info (for fallback case only) */ + memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); + memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); + + /* Parity twist! For now, always use the fallback code by default, + * only switching to codegen when specifically requested. This + * allows us to develop freely without risking taking down the + * branch. + * + * Later, the parity of this check will be reversed, so that + * codegen is *always* used, unless we specifically indicate that + * we don't want it. + * + * Eventually, the option will be removed completely, because in + * final code we'll always use codegen and won't even provide the + * raw state records that the fallback code requires. + */ + if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) { + spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; + } + else { + /* otherwise, the default fallback code remains in place */ + if (!warned) { + fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); + warned = 1; + } + } + + spu.read_depth = spu.depth_stencil_alpha.depth.enabled; + spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; +} + + +static void +cmd_state_fragment_program(const struct cell_command_fragment_program *fp) +{ + DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n"); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_program_code, fp->code, + SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); +#if 01 + /* Point function pointer at new code */ + spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; +#endif +} + + +static void +cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) +{ + DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", + cmd->width, + cmd->height, + cmd->color_start, + cmd->color_format, + cmd->depth_format); + + ASSERT_ALIGN16(cmd->color_start); + ASSERT_ALIGN16(cmd->depth_start); + + spu.fb.color_start = cmd->color_start; + spu.fb.depth_start = cmd->depth_start; + spu.fb.color_format = cmd->color_format; + spu.fb.depth_format = cmd->depth_format; + spu.fb.width = cmd->width; + spu.fb.height = cmd->height; + spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; + spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; + + switch (spu.fb.depth_format) { + case PIPE_FORMAT_Z32_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0xffffffffu; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0x00ffffffu; + break; + case PIPE_FORMAT_Z16_UNORM: + spu.fb.zsize = 2; + spu.fb.zscale = (float) 0xffffu; + break; + default: + spu.fb.zsize = 0; + break; + } +} + + +static void +cmd_state_sampler(const struct cell_command_sampler *sampler) +{ + DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); + + spu.sampler[sampler->unit] = sampler->state; + if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) + spu.sample_texture[sampler->unit] = sample_texture_bilinear; + else + spu.sample_texture[sampler->unit] = sample_texture_nearest; +} + + +static void +cmd_state_texture(const struct cell_command_texture *texture) +{ + const uint unit = texture->unit; + const uint width = texture->width; + const uint height = texture->height; + + DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n", + texture->unit, texture->start, + texture->width, texture->height); + + spu.texture[unit].start = texture->start; + spu.texture[unit].width = width; + spu.texture[unit].height = height; + + spu.texture[unit].tiles_per_row = width / TILE_SIZE; + + spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0}; + spu.texture[unit].tex_size_mask = (vector unsigned int) + { width - 1, height - 1, 0, 0 }; + spu.texture[unit].tex_size_x_mask = spu_splats(width - 1); + spu.texture[unit].tex_size_y_mask = spu_splats(height - 1); +} + + +static void +cmd_state_vertex_info(const struct vertex_info *vinfo) +{ + DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); + ASSERT(vinfo->num_attribs >= 1); + ASSERT(vinfo->num_attribs <= 8); + memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); +} + + +static void +cmd_state_vs_array_info(const struct cell_array_info *vs_info) +{ + const unsigned attr = vs_info->attr; + + ASSERT(attr < PIPE_MAX_ATTRIBS); + draw.vertex_fetch.src_ptr[attr] = vs_info->base; + draw.vertex_fetch.pitch[attr] = vs_info->pitch; + draw.vertex_fetch.size[attr] = vs_info->size; + draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; + draw.vertex_fetch.dirty = 1; +} + + +static void +cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) +{ + mfc_get(attribute_fetch_code_buffer, + (unsigned int) code->base, /* src */ + code->size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + draw.vertex_fetch.code = attribute_fetch_code_buffer; +} + + +static void +cmd_finish(void) +{ + DEBUG_PRINTF("FINISH\n"); + really_clear_tiles(0); + /* wait for all outstanding DMAs to finish */ + mfc_write_tag_mask(~0); + mfc_read_tag_status_all(); + /* send mbox message to PPU */ + spu_write_out_mbox(CELL_CMD_FINISH); +} + + +/** + * Execute a batch of commands which was sent to us by the PPU. + * See the cell_emit_state.c code to see where the commands come from. + * + * The opcode param encodes the location of the buffer and its size. + */ +static void +cmd_batch(uint opcode) +{ + const uint buf = (opcode >> 8) & 0xff; + uint size = (opcode >> 16); + uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; + const unsigned usize = size / sizeof(buffer[0]); + uint pos; + + DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n", + buf, size, spu.init.buffers[buf]); + + ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + size = ROUNDUP16(size); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + mfc_get(buffer, /* dest */ + (unsigned int) spu.init.buffers[buf], /* src */ + size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + /* Tell PPU we're done copying the buffer to local store */ + DEBUG_PRINTF("release batch buf %u\n", buf); + release_buffer(buf); + + /* + * Loop over commands in the batch buffer + */ + for (pos = 0; pos < usize; /* no incr */) { + switch (buffer[pos]) { + /* + * rendering commands + */ + case CELL_CMD_CLEAR_SURFACE: + { + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) &buffer[pos]; + cmd_clear_surface(clr); + pos += sizeof(*clr) / 8; + } + break; + case CELL_CMD_RENDER: + { + struct cell_command_render *render + = (struct cell_command_render *) &buffer[pos]; + uint pos_incr; + cmd_render(render, &pos_incr); + pos += pos_incr; + } + break; + /* + * state-update commands + */ + case CELL_CMD_STATE_FRAMEBUFFER: + { + struct cell_command_framebuffer *fb + = (struct cell_command_framebuffer *) &buffer[pos]; + cmd_state_framebuffer(fb); + pos += sizeof(*fb) / 8; + } + break; + case CELL_CMD_STATE_FRAGMENT_OPS: + { + struct cell_command_fragment_ops *fops + = (struct cell_command_fragment_ops *) &buffer[pos]; + cmd_state_fragment_ops(fops); + pos += sizeof(*fops) / 8; + } + break; + case CELL_CMD_STATE_FRAGMENT_PROGRAM: + { + struct cell_command_fragment_program *fp + = (struct cell_command_fragment_program *) &buffer[pos]; + cmd_state_fragment_program(fp); + pos += sizeof(*fp) / 8; + } + break; + case CELL_CMD_STATE_SAMPLER: + { + struct cell_command_sampler *sampler + = (struct cell_command_sampler *) &buffer[pos]; + cmd_state_sampler(sampler); + pos += sizeof(*sampler) / 8; + } + break; + case CELL_CMD_STATE_TEXTURE: + { + struct cell_command_texture *texture + = (struct cell_command_texture *) &buffer[pos]; + cmd_state_texture(texture); + pos += sizeof(*texture) / 8; + } + break; + case CELL_CMD_STATE_VERTEX_INFO: + cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); + break; + case CELL_CMD_STATE_VIEWPORT: + (void) memcpy(& draw.viewport, &buffer[pos+1], + sizeof(struct pipe_viewport_state)); + pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); + break; + case CELL_CMD_STATE_UNIFORMS: + draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1]; + pos += 2; + break; + case CELL_CMD_STATE_VS_ARRAY_INFO: + cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); + break; + case CELL_CMD_STATE_BIND_VS: +#if 0 + spu_bind_vertex_shader(&draw, + (struct cell_shader_info *) &buffer[pos+1]); +#endif + pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); + break; + case CELL_CMD_STATE_ATTRIB_FETCH: + cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) + &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); + break; + /* + * misc commands + */ + case CELL_CMD_FINISH: + cmd_finish(); + pos += 1; + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 8; + } + break; + case CELL_CMD_FLUSH_BUFFER_RANGE: { + struct cell_buffer_range *br = (struct cell_buffer_range *) + &buffer[pos+1]; + + spu_dcache_mark_dirty((unsigned) br->base, br->size); + pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8); + break; + } + default: + printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); + ASSERT(0); + break; + } + } + + DEBUG_PRINTF("BATCH complete\n"); +} + + + +/** + * Main loop for SPEs: Get a command, execute it, repeat. + */ +void +command_loop(void) +{ + struct cell_command cmd; + int exitFlag = 0; + + DEBUG_PRINTF("Enter command loop\n"); + + ASSERT((sizeof(struct cell_command) & 0xf) == 0); + ASSERT_ALIGN16(&cmd); + + while (!exitFlag) { + unsigned opcode; + int tag = 0; + + DEBUG_PRINTF("Wait for cmd...\n"); + + /* read/wait from mailbox */ + opcode = (unsigned int) spu_read_in_mbox(); + + DEBUG_PRINTF("got cmd 0x%x\n", opcode); + + /* command payload */ + mfc_get(&cmd, /* dest */ + (unsigned int) spu.init.cmd, /* src */ + sizeof(struct cell_command), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask( 1 << tag ); + + /* + * NOTE: most commands should be contained in a batch buffer + */ + + switch (opcode & CELL_CMD_OPCODE_MASK) { + case CELL_CMD_EXIT: + DEBUG_PRINTF("EXIT\n"); + exitFlag = 1; + break; + case CELL_CMD_VS_EXECUTE: +#if 0 + spu_execute_vertex_shader(&draw, &cmd.vs); +#endif + break; + case CELL_CMD_BATCH: + cmd_batch(opcode); + break; + default: + printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); + } + + } + + DEBUG_PRINTF("Exit command loop\n"); + + spu_dcache_report(); +} diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h new file mode 100644 index 00000000000..853e9aa5498 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_command.h @@ -0,0 +1,7 @@ + + + +extern void +command_loop(void); + + diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index bc94674fe82..4becd0f92a4 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -35,14 +35,11 @@ #include "pipe/p_defines.h" #include "spu_funcs.h" +#include "spu_command.h" #include "spu_main.h" -#include "spu_render.h" #include "spu_per_fragment_op.h" #include "spu_texture.h" -#include "spu_tile.h" //#include "spu_test.h" -#include "spu_vertex_shader.h" -#include "spu_dcache.h" #include "spu_debug.h" #include "cell/common.h" @@ -55,8 +52,6 @@ helpful headers: struct spu_global spu; -struct spu_vs_context draw; - #if DEBUG boolean Debug = FALSE; @@ -64,554 +59,6 @@ boolean force_fragment_ops_fallback = TRUE; #endif -/** - * Buffers containing dynamically generated SPU code: - */ -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; - - - -/** - * Tell the PPU that this SPU has finished copying a buffer to - * local store and that it may be reused by the PPU. - * This is done by writting a 16-byte batch-buffer-status block back into - * main memory (in cell_context->buffer_status[]). - */ -static void -release_buffer(uint buffer) -{ - /* Evidently, using less than a 16-byte status doesn't work reliably */ - static const uint status[4] ALIGN16_ATTRIB - = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; - - const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); - uint *dst = spu.init.buffer_status + index; - - ASSERT(buffer < CELL_NUM_BUFFERS); - - mfc_put((void *) &status, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - sizeof(status), /* size */ - TAG_MISC, /* tag is unimportant */ - 0, /* tid */ - 0 /* rid */); -} - - -static void -cmd_clear_surface(const struct cell_command_clear_surface *clear) -{ - DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); - - if (clear->surface == 0) { - spu.fb.color_clear_value = clear->value; - if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { - uint x = (spu.init.id << 4) | (spu.init.id << 12) | - (spu.init.id << 20) | (spu.init.id << 28); - spu.fb.color_clear_value ^= x; - } - } - else { - spu.fb.depth_clear_value = clear->value; - } - -#define CLEAR_OPT 1 -#if CLEAR_OPT - - /* Simply set all tiles' status to CLEAR. - * When we actually begin rendering into a tile, we'll initialize it to - * the clear value. If any tiles go untouched during the frame, - * really_clear_tiles() will set them to the clear value. - */ - if (clear->surface == 0) { - memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); - } - else { - memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); - } - -#else - - /* - * This path clears the whole framebuffer to the clear color right now. - */ - - /* - printf("SPU: %s num=%d w=%d h=%d\n", - __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); - */ - - /* init a single tile to the clear value */ - if (clear->surface == 0) { - clear_c_tile(&spu.ctile); - } - else { - clear_z_tile(&spu.ztile); - } - - /* walk over my tiles, writing the 'clear' tile's data */ - { - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (clear->surface == 0) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - else - put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); - } - } - - if (spu.init.debug_flags & CELL_DEBUG_SYNC) { - wait_on_mask(1 << TAG_SURFACE_CLEAR); - } - -#endif /* CLEAR_OPT */ - - DEBUG_PRINTF("CLEAR SURF done\n"); -} - - -static void -cmd_release_verts(const struct cell_command_release_verts *release) -{ - DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf); - ASSERT(release->vertex_buf != ~0U); - release_buffer(release->vertex_buf); -} - - -/** - * Process a CELL_CMD_STATE_FRAGMENT_OPS command. - * This involves installing new fragment ops SPU code. - * If this function is never called, we'll use a regular C fallback function - * for fragment processing. - */ -static void -cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) -{ - static int warned = 0; - - DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n"); - /* Copy SPU code from batch buffer to spu buffer */ - memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); - /* Copy state info (for fallback case only) */ - memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); - memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); - - /* Parity twist! For now, always use the fallback code by default, - * only switching to codegen when specifically requested. This - * allows us to develop freely without risking taking down the - * branch. - * - * Later, the parity of this check will be reversed, so that - * codegen is *always* used, unless we specifically indicate that - * we don't want it. - * - * Eventually, the option will be removed completely, because in - * final code we'll always use codegen and won't even provide the - * raw state records that the fallback code requires. - */ - if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) { - spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; - } - else { - /* otherwise, the default fallback code remains in place */ - if (!warned) { - fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); - warned = 1; - } - } - - spu.read_depth = spu.depth_stencil_alpha.depth.enabled; - spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; -} - - -static void -cmd_state_fragment_program(const struct cell_command_fragment_program *fp) -{ - DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n"); - /* Copy SPU code from batch buffer to spu buffer */ - memcpy(spu.fragment_program_code, fp->code, - SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); -#if 01 - /* Point function pointer at new code */ - spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; -#endif -} - - -static void -cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) -{ - DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", - cmd->width, - cmd->height, - cmd->color_start, - cmd->color_format, - cmd->depth_format); - - ASSERT_ALIGN16(cmd->color_start); - ASSERT_ALIGN16(cmd->depth_start); - - spu.fb.color_start = cmd->color_start; - spu.fb.depth_start = cmd->depth_start; - spu.fb.color_format = cmd->color_format; - spu.fb.depth_format = cmd->depth_format; - spu.fb.width = cmd->width; - spu.fb.height = cmd->height; - spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; - spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; - - switch (spu.fb.depth_format) { - case PIPE_FORMAT_Z32_UNORM: - spu.fb.zsize = 4; - spu.fb.zscale = (float) 0xffffffffu; - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - spu.fb.zsize = 4; - spu.fb.zscale = (float) 0x00ffffffu; - break; - case PIPE_FORMAT_Z16_UNORM: - spu.fb.zsize = 2; - spu.fb.zscale = (float) 0xffffu; - break; - default: - spu.fb.zsize = 0; - break; - } -} - - -static void -cmd_state_sampler(const struct cell_command_sampler *sampler) -{ - DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); - - spu.sampler[sampler->unit] = sampler->state; - if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) - spu.sample_texture[sampler->unit] = sample_texture_bilinear; - else - spu.sample_texture[sampler->unit] = sample_texture_nearest; -} - - -static void -cmd_state_texture(const struct cell_command_texture *texture) -{ - const uint unit = texture->unit; - const uint width = texture->width; - const uint height = texture->height; - - DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n", - texture->unit, texture->start, - texture->width, texture->height); - - spu.texture[unit].start = texture->start; - spu.texture[unit].width = width; - spu.texture[unit].height = height; - - spu.texture[unit].tiles_per_row = width / TILE_SIZE; - - spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0}; - spu.texture[unit].tex_size_mask = (vector unsigned int) - { width - 1, height - 1, 0, 0 }; - spu.texture[unit].tex_size_x_mask = spu_splats(width - 1); - spu.texture[unit].tex_size_y_mask = spu_splats(height - 1); -} - - -static void -cmd_state_vertex_info(const struct vertex_info *vinfo) -{ - DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); - ASSERT(vinfo->num_attribs >= 1); - ASSERT(vinfo->num_attribs <= 8); - memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); -} - - -static void -cmd_state_vs_array_info(const struct cell_array_info *vs_info) -{ - const unsigned attr = vs_info->attr; - - ASSERT(attr < PIPE_MAX_ATTRIBS); - draw.vertex_fetch.src_ptr[attr] = vs_info->base; - draw.vertex_fetch.pitch[attr] = vs_info->pitch; - draw.vertex_fetch.size[attr] = vs_info->size; - draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; - draw.vertex_fetch.dirty = 1; -} - - -static void -cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) -{ - mfc_get(attribute_fetch_code_buffer, - (unsigned int) code->base, /* src */ - code->size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - draw.vertex_fetch.code = attribute_fetch_code_buffer; -} - - -static void -cmd_finish(void) -{ - DEBUG_PRINTF("FINISH\n"); - really_clear_tiles(0); - /* wait for all outstanding DMAs to finish */ - mfc_write_tag_mask(~0); - mfc_read_tag_status_all(); - /* send mbox message to PPU */ - spu_write_out_mbox(CELL_CMD_FINISH); -} - - -/** - * Execute a batch of commands which was sent to us by the PPU. - * See the cell_emit_state.c code to see where the commands come from. - * - * The opcode param encodes the location of the buffer and its size. - */ -static void -cmd_batch(uint opcode) -{ - const uint buf = (opcode >> 8) & 0xff; - uint size = (opcode >> 16); - uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; - const unsigned usize = size / sizeof(buffer[0]); - uint pos; - - DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n", - buf, size, spu.init.buffers[buf]); - - ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); - - ASSERT_ALIGN16(spu.init.buffers[buf]); - - size = ROUNDUP16(size); - - ASSERT_ALIGN16(spu.init.buffers[buf]); - - mfc_get(buffer, /* dest */ - (unsigned int) spu.init.buffers[buf], /* src */ - size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - /* Tell PPU we're done copying the buffer to local store */ - DEBUG_PRINTF("release batch buf %u\n", buf); - release_buffer(buf); - - /* - * Loop over commands in the batch buffer - */ - for (pos = 0; pos < usize; /* no incr */) { - switch (buffer[pos]) { - /* - * rendering commands - */ - case CELL_CMD_CLEAR_SURFACE: - { - struct cell_command_clear_surface *clr - = (struct cell_command_clear_surface *) &buffer[pos]; - cmd_clear_surface(clr); - pos += sizeof(*clr) / 8; - } - break; - case CELL_CMD_RENDER: - { - struct cell_command_render *render - = (struct cell_command_render *) &buffer[pos]; - uint pos_incr; - cmd_render(render, &pos_incr); - pos += pos_incr; - } - break; - /* - * state-update commands - */ - case CELL_CMD_STATE_FRAMEBUFFER: - { - struct cell_command_framebuffer *fb - = (struct cell_command_framebuffer *) &buffer[pos]; - cmd_state_framebuffer(fb); - pos += sizeof(*fb) / 8; - } - break; - case CELL_CMD_STATE_FRAGMENT_OPS: - { - struct cell_command_fragment_ops *fops - = (struct cell_command_fragment_ops *) &buffer[pos]; - cmd_state_fragment_ops(fops); - pos += sizeof(*fops) / 8; - } - break; - case CELL_CMD_STATE_FRAGMENT_PROGRAM: - { - struct cell_command_fragment_program *fp - = (struct cell_command_fragment_program *) &buffer[pos]; - cmd_state_fragment_program(fp); - pos += sizeof(*fp) / 8; - } - break; - case CELL_CMD_STATE_SAMPLER: - { - struct cell_command_sampler *sampler - = (struct cell_command_sampler *) &buffer[pos]; - cmd_state_sampler(sampler); - pos += sizeof(*sampler) / 8; - } - break; - case CELL_CMD_STATE_TEXTURE: - { - struct cell_command_texture *texture - = (struct cell_command_texture *) &buffer[pos]; - cmd_state_texture(texture); - pos += sizeof(*texture) / 8; - } - break; - case CELL_CMD_STATE_VERTEX_INFO: - cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); - break; - case CELL_CMD_STATE_VIEWPORT: - (void) memcpy(& draw.viewport, &buffer[pos+1], - sizeof(struct pipe_viewport_state)); - pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); - break; - case CELL_CMD_STATE_UNIFORMS: - draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1]; - pos += 2; - break; - case CELL_CMD_STATE_VS_ARRAY_INFO: - cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); - break; - case CELL_CMD_STATE_BIND_VS: -#if 0 - spu_bind_vertex_shader(&draw, - (struct cell_shader_info *) &buffer[pos+1]); -#endif - pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); - break; - case CELL_CMD_STATE_ATTRIB_FETCH: - cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) - &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); - break; - /* - * misc commands - */ - case CELL_CMD_FINISH: - cmd_finish(); - pos += 1; - break; - case CELL_CMD_RELEASE_VERTS: - { - struct cell_command_release_verts *release - = (struct cell_command_release_verts *) &buffer[pos]; - cmd_release_verts(release); - pos += sizeof(*release) / 8; - } - break; - case CELL_CMD_FLUSH_BUFFER_RANGE: { - struct cell_buffer_range *br = (struct cell_buffer_range *) - &buffer[pos+1]; - - spu_dcache_mark_dirty((unsigned) br->base, br->size); - pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8); - break; - } - default: - printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); - ASSERT(0); - break; - } - } - - DEBUG_PRINTF("BATCH complete\n"); -} - - -/** - * Temporary/simple main loop for SPEs: Get a command, execute it, repeat. - */ -static void -main_loop(void) -{ - struct cell_command cmd; - int exitFlag = 0; - - DEBUG_PRINTF("Enter main loop\n"); - - ASSERT((sizeof(struct cell_command) & 0xf) == 0); - ASSERT_ALIGN16(&cmd); - - while (!exitFlag) { - unsigned opcode; - int tag = 0; - - DEBUG_PRINTF("Wait for cmd...\n"); - - /* read/wait from mailbox */ - opcode = (unsigned int) spu_read_in_mbox(); - - DEBUG_PRINTF("got cmd 0x%x\n", opcode); - - /* command payload */ - mfc_get(&cmd, /* dest */ - (unsigned int) spu.init.cmd, /* src */ - sizeof(struct cell_command), /* bytes */ - tag, - 0, /* tid */ - 0 /* rid */); - wait_on_mask( 1 << tag ); - - /* - * NOTE: most commands should be contained in a batch buffer - */ - - switch (opcode & CELL_CMD_OPCODE_MASK) { - case CELL_CMD_EXIT: - DEBUG_PRINTF("EXIT\n"); - exitFlag = 1; - break; - case CELL_CMD_VS_EXECUTE: -#if 0 - spu_execute_vertex_shader(&draw, &cmd.vs); -#endif - break; - case CELL_CMD_BATCH: - cmd_batch(opcode); - break; - default: - printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); - } - - } - - DEBUG_PRINTF("Exit main loop\n"); - - spu_dcache_report(); -} - - - static void one_time_init(void) { @@ -658,6 +105,7 @@ main(main_param_t speid, main_param_t argp) DEBUG_PRINTF("main() speid=%lu\n", (unsigned long) speid); D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); + /* get initialization data */ mfc_get(&spu.init, /* dest */ (unsigned int) argp, /* src */ sizeof(struct cell_init_info), /* bytes */ @@ -675,7 +123,7 @@ main(main_param_t speid, main_param_t argp) spu_test_misc(spu.init.id); #endif - main_loop(); + command_loop(); return 0; }