Cell: Initial pass at unified data cache
This commit is contained in:
parent
8be9bc08e1
commit
7c74037852
|
@ -93,6 +93,7 @@
|
||||||
#define CELL_CMD_STATE_BLEND 19
|
#define CELL_CMD_STATE_BLEND 19
|
||||||
#define CELL_CMD_STATE_ATTRIB_FETCH 20
|
#define CELL_CMD_STATE_ATTRIB_FETCH 20
|
||||||
#define CELL_CMD_VS_EXECUTE 21
|
#define CELL_CMD_VS_EXECUTE 21
|
||||||
|
#define CELL_CMD_FLUSH_BUFFER_RANGE 22
|
||||||
|
|
||||||
|
|
||||||
#define CELL_NUM_BUFFERS 4
|
#define CELL_NUM_BUFFERS 4
|
||||||
|
@ -144,6 +145,13 @@ struct cell_attribute_fetch_code {
|
||||||
uint size;
|
uint size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct cell_buffer_range {
|
||||||
|
uint64_t base;
|
||||||
|
unsigned size;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct cell_shader_info
|
struct cell_shader_info
|
||||||
{
|
{
|
||||||
uint64_t declarations;
|
uint64_t declarations;
|
||||||
|
|
|
@ -49,9 +49,12 @@ cell_map_constant_buffers(struct cell_context *sp)
|
||||||
struct pipe_winsys *ws = sp->pipe.winsys;
|
struct pipe_winsys *ws = sp->pipe.winsys;
|
||||||
uint i;
|
uint i;
|
||||||
for (i = 0; i < 2; i++) {
|
for (i = 0; i < 2; i++) {
|
||||||
if (sp->constants[i].size)
|
if (sp->constants[i].size) {
|
||||||
sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer,
|
sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer,
|
||||||
PIPE_BUFFER_USAGE_CPU_READ);
|
PIPE_BUFFER_USAGE_CPU_READ);
|
||||||
|
cell_flush_buffer_range(sp, sp->mapped_constants[i],
|
||||||
|
sp->constants[i].buffer->size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
draw_set_mapped_constant_buffer(sp->draw,
|
draw_set_mapped_constant_buffer(sp->draw,
|
||||||
|
@ -124,6 +127,7 @@ cell_draw_elements(struct pipe_context *pipe,
|
||||||
void *buf = pipe->winsys->buffer_map(pipe->winsys,
|
void *buf = pipe->winsys->buffer_map(pipe->winsys,
|
||||||
sp->vertex_buffer[i].buffer,
|
sp->vertex_buffer[i].buffer,
|
||||||
PIPE_BUFFER_USAGE_CPU_READ);
|
PIPE_BUFFER_USAGE_CPU_READ);
|
||||||
|
cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size);
|
||||||
draw_set_mapped_vertex_buffer(draw, i, buf);
|
draw_set_mapped_vertex_buffer(draw, i, buf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -82,3 +82,17 @@ cell_flush_int(struct pipe_context *pipe, unsigned flags)
|
||||||
|
|
||||||
flushing = FALSE;
|
flushing = FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
cell_flush_buffer_range(struct cell_context *cell, void *ptr,
|
||||||
|
unsigned size)
|
||||||
|
{
|
||||||
|
uint64_t batch[1 + (ROUNDUP8(sizeof(struct cell_buffer_range)) / 8)];
|
||||||
|
struct cell_buffer_range *br = (struct cell_buffer_range *) & batch[1];
|
||||||
|
|
||||||
|
batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE;
|
||||||
|
br->base = (uintptr_t) ptr;
|
||||||
|
br->size = size;
|
||||||
|
cell_batch_append(cell, batch, sizeof(batch));
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
/*
|
||||||
|
* (C) Copyright IBM Corporation 2008
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||||
|
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||||
|
* the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||||
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||||
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "spu_main.h"
|
||||||
|
#include "spu_dcache.h"
|
||||||
|
|
||||||
|
#define CACHE_NAME data
|
||||||
|
#define CACHED_TYPE qword
|
||||||
|
#define CACHE_TYPE CACHE_TYPE_RO
|
||||||
|
#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER
|
||||||
|
#define CACHE_LOG2NNWAY 2
|
||||||
|
#define CACHE_LOG2NSETS 6
|
||||||
|
#include <cache-api.h>
|
||||||
|
|
||||||
|
/* Yes folks, this is ugly.
|
||||||
|
*/
|
||||||
|
#undef CACHE_NWAY
|
||||||
|
#undef CACHE_NSETS
|
||||||
|
#define CACHE_NAME data
|
||||||
|
#define CACHE_NWAY 4
|
||||||
|
#define CACHE_NSETS (1U << 6)
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch between arbitrary number of bytes from an unaligned address
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size)
|
||||||
|
{
|
||||||
|
const int shift = ea & 0x0f;
|
||||||
|
const unsigned aligned_start_ea = ea & ~0x0f;
|
||||||
|
const unsigned aligned_end_ea = (ea + size) & ~0x0f;
|
||||||
|
const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1;
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
|
||||||
|
if (shift == 0) {
|
||||||
|
/* Data is already aligned. Fetch directly into the destination buffer.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < num_entries; i++) {
|
||||||
|
dst[i] = cache_rd(data, (ea & ~0x0f) + (i * 16));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
qword tmp[2] ALIGN16_ATTRIB;
|
||||||
|
|
||||||
|
|
||||||
|
tmp[0] = cache_rd(data, (ea & ~0x0f));
|
||||||
|
for (i = 0; i < (num_entries & ~1); i++) {
|
||||||
|
const unsigned curr = i & 1;
|
||||||
|
const unsigned next = curr ^ 1;
|
||||||
|
|
||||||
|
tmp[next] = cache_rd(data, (ea & ~0x0f) + (next * 16));
|
||||||
|
|
||||||
|
dst[i] = si_or((qword) spu_slqwbyte(tmp[curr], shift),
|
||||||
|
(qword) spu_rlmaskqwbyte(tmp[next], shift - 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i < num_entries) {
|
||||||
|
dst[i] = si_or((qword) spu_slqwbyte(tmp[(i & 1)], shift),
|
||||||
|
si_il(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
spu_dcache_mark_dirty(unsigned ea, unsigned size)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
(void) ea;
|
||||||
|
(void) size;
|
||||||
|
|
||||||
|
/* Invalidate the whole cache for now.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) {
|
||||||
|
CACHELINE_CLEARVALID(i);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
/*
|
||||||
|
* (C) Copyright IBM Corporation 2008
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||||
|
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||||
|
* the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||||
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||||
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SPU_DCACHE_H
|
||||||
|
#define SPU_DCACHE_H
|
||||||
|
|
||||||
|
extern void
|
||||||
|
spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size);
|
||||||
|
|
||||||
|
extern void
|
||||||
|
spu_dcache_mark_dirty(unsigned ea, unsigned size);
|
||||||
|
|
||||||
|
#endif /* SPU_DCACHE_H */
|
|
@ -72,6 +72,7 @@
|
||||||
#include "spu_exec.h"
|
#include "spu_exec.h"
|
||||||
#include "spu_main.h"
|
#include "spu_main.h"
|
||||||
#include "spu_vertex_shader.h"
|
#include "spu_vertex_shader.h"
|
||||||
|
#include "spu_dcache.h"
|
||||||
|
|
||||||
#define TILE_TOP_LEFT 0
|
#define TILE_TOP_LEFT 0
|
||||||
#define TILE_TOP_RIGHT 1
|
#define TILE_TOP_RIGHT 1
|
||||||
|
@ -352,19 +353,17 @@ fetch_src_file_channel(
|
||||||
case TGSI_EXTSWIZZLE_W:
|
case TGSI_EXTSWIZZLE_W:
|
||||||
switch( file ) {
|
switch( file ) {
|
||||||
case TGSI_FILE_CONSTANT: {
|
case TGSI_FILE_CONSTANT: {
|
||||||
unsigned char buffer[32] ALIGN16_ATTRIB;
|
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
for (i = 0; i < 4; i++) {
|
||||||
const float *ptr = mach->Consts[index->i[i]];
|
const float *ptr = mach->Consts[index->i[i]];
|
||||||
const uint64_t addr = (uint64_t)(uintptr_t) ptr;
|
float tmp[4];
|
||||||
const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32;
|
|
||||||
|
|
||||||
mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0);
|
spu_dcache_fetch_unaligned((qword *) tmp,
|
||||||
wait_on_mask(1 << TAG_VERTEX_BUFFER);
|
(uintptr_t)(ptr + swizzle),
|
||||||
|
sizeof(float));
|
||||||
|
|
||||||
(void) memcpy(& chan->f[i], &buffer[(addr & 0x0f)
|
chan->f[i] = tmp[0];
|
||||||
+ (sizeof(float) * swizzle)], sizeof(float));
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1899,32 +1898,30 @@ spu_exec_machine_run( struct spu_exec_machine *mach )
|
||||||
/* execute declarations (interpolants) */
|
/* execute declarations (interpolants) */
|
||||||
if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
|
if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
|
||||||
for (i = 0; i < mach->NumDeclarations; i++) {
|
for (i = 0; i < mach->NumDeclarations; i++) {
|
||||||
uint8_t buffer[sizeof(struct tgsi_full_declaration) + 32] ALIGN16_ATTRIB;
|
union {
|
||||||
struct tgsi_full_declaration decl;
|
struct tgsi_full_declaration decl;
|
||||||
unsigned long decl_addr = (unsigned long) (mach->Declarations+i);
|
qword buffer[2 * ((sizeof(struct tgsi_full_declaration) + 31)
|
||||||
unsigned size = ((sizeof(decl) + (decl_addr & 0x0f) + 0x0f) & ~0x0f);
|
/ 32)];
|
||||||
|
} d ALIGN16_ATTRIB;
|
||||||
|
unsigned ea = (unsigned) (mach->Declarations + pc);
|
||||||
|
|
||||||
mfc_get(buffer, decl_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0);
|
spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
|
||||||
wait_on_mask(1 << TAG_INSTRUCTION_FETCH);
|
|
||||||
|
|
||||||
memcpy(& decl, buffer + (decl_addr & 0x0f), sizeof(decl));
|
exec_declaration( mach, &d.decl );
|
||||||
exec_declaration( mach, &decl );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* execute instructions, until pc is set to -1 */
|
/* execute instructions, until pc is set to -1 */
|
||||||
while (pc != -1) {
|
while (pc != -1) {
|
||||||
uint8_t buffer[sizeof(struct tgsi_full_instruction) + 32] ALIGN16_ATTRIB;
|
union {
|
||||||
struct tgsi_full_instruction inst;
|
struct tgsi_full_instruction inst;
|
||||||
unsigned long inst_addr = (unsigned long) (mach->Instructions + pc);
|
qword buffer[2 * ((sizeof(struct tgsi_full_instruction) + 31)
|
||||||
unsigned size = ((sizeof(inst) + (inst_addr & 0x0f) + 0x0f) & ~0x0f);
|
/ 32)];
|
||||||
|
} i ALIGN16_ATTRIB;
|
||||||
|
unsigned ea = (unsigned) (mach->Instructions + pc);
|
||||||
|
|
||||||
assert(pc < mach->NumInstructions);
|
spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));
|
||||||
mfc_get(buffer, inst_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0);
|
exec_instruction( mach, & i.inst, &pc );
|
||||||
wait_on_mask(1 << TAG_INSTRUCTION_FETCH);
|
|
||||||
|
|
||||||
memcpy(& inst, buffer + (inst_addr & 0x0f), sizeof(inst));
|
|
||||||
exec_instruction( mach, & inst, &pc );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
|
|
@ -462,6 +462,14 @@ cmd_batch(uint opcode)
|
||||||
pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
|
pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case CELL_CMD_FLUSH_BUFFER_RANGE: {
|
||||||
|
struct cell_buffer_range *br = (struct cell_buffer_range *)
|
||||||
|
&buffer[pos+1];
|
||||||
|
|
||||||
|
spu_dcache_mark_dirty((unsigned) br->base, br->size);
|
||||||
|
pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8);
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]);
|
printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]);
|
||||||
ASSERT(0);
|
ASSERT(0);
|
||||||
|
|
|
@ -40,25 +40,7 @@
|
||||||
#include "spu_exec.h"
|
#include "spu_exec.h"
|
||||||
#include "spu_vertex_shader.h"
|
#include "spu_vertex_shader.h"
|
||||||
#include "spu_main.h"
|
#include "spu_main.h"
|
||||||
|
#include "spu_dcache.h"
|
||||||
#define CACHE_NAME attribute
|
|
||||||
#define CACHED_TYPE qword
|
|
||||||
#define CACHE_TYPE CACHE_TYPE_RO
|
|
||||||
#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER
|
|
||||||
#define CACHE_LOG2NNWAY 2
|
|
||||||
#define CACHE_LOG2NSETS 6
|
|
||||||
#include <cache-api.h>
|
|
||||||
|
|
||||||
/* Yes folks, this is ugly.
|
|
||||||
*/
|
|
||||||
#undef CACHE_NWAY
|
|
||||||
#undef CACHE_NSETS
|
|
||||||
#define CACHE_NAME attribute
|
|
||||||
#define CACHE_NWAY 4
|
|
||||||
#define CACHE_NSETS (1U << 6)
|
|
||||||
|
|
||||||
|
|
||||||
#define DRAW_DBG 0
|
|
||||||
|
|
||||||
typedef void (*spu_fetch_func)(qword *out, const qword *in,
|
typedef void (*spu_fetch_func)(qword *out, const qword *in,
|
||||||
const qword *shuffle_data);
|
const qword *shuffle_data);
|
||||||
|
@ -102,44 +84,6 @@ static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fetch between 1 and 32 bytes from an unaligned address
|
|
||||||
*/
|
|
||||||
static INLINE void
|
|
||||||
fetch_unaligned(qword *dst, unsigned ea, unsigned size)
|
|
||||||
{
|
|
||||||
qword tmp[4] ALIGN16_ATTRIB;
|
|
||||||
const int shift = ea & 0x0f;
|
|
||||||
const unsigned aligned_start_ea = ea & ~0x0f;
|
|
||||||
const unsigned aligned_end_ea = (ea + size) & ~0x0f;
|
|
||||||
const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1;
|
|
||||||
unsigned i;
|
|
||||||
|
|
||||||
|
|
||||||
if (shift == 0) {
|
|
||||||
/* Data is already aligned. Fetch directly into the destination buffer.
|
|
||||||
*/
|
|
||||||
for (i = 0; i < num_entries; i++) {
|
|
||||||
dst[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* Fetch data from the cache to the local buffer.
|
|
||||||
*/
|
|
||||||
for (i = 0; i < num_entries; i++) {
|
|
||||||
tmp[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Fix the alignment of the data and write to the destination buffer.
|
|
||||||
*/
|
|
||||||
for (i = 0; i < ((size + 15) / 16); i++) {
|
|
||||||
dst[i] = si_or((qword) spu_slqwbyte(tmp[i], shift),
|
|
||||||
(qword) spu_rlmaskqwbyte(tmp[i + 1], shift - 16));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetch vertex attributes for 'count' vertices.
|
* Fetch vertex attributes for 'count' vertices.
|
||||||
*/
|
*/
|
||||||
|
@ -182,7 +126,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
|
||||||
printf("SPU: fetching = 0x%llx\n", addr);
|
printf("SPU: fetching = 0x%llx\n", addr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
fetch_unaligned(& in[idx], addr, bytes_per_entry);
|
spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry);
|
||||||
idx += quads_per_entry;
|
idx += quads_per_entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -200,15 +144,5 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
|
||||||
|
|
||||||
void spu_update_vertex_fetch( struct spu_vs_context *draw )
|
void spu_update_vertex_fetch( struct spu_vs_context *draw )
|
||||||
{
|
{
|
||||||
unsigned i;
|
|
||||||
|
|
||||||
|
|
||||||
/* Invalidate the vertex cache.
|
|
||||||
*/
|
|
||||||
for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) {
|
|
||||||
CACHELINE_CLEARVALID(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
draw->vertex_fetch.fetch_func = generic_vertex_fetch;
|
draw->vertex_fetch.fetch_func = generic_vertex_fetch;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue