nv50: add remapping of buffers/images into unified space
This allows us to use up to 15 images or buffers (but not both). GL supports the concept of combined resource maximums though. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Acked-by: Pierre Moreau <dev@pmoreau.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10164>
This commit is contained in:
parent
58d47ca324
commit
f451854f39
|
@ -26,7 +26,6 @@
|
|||
|
||||
extern "C" {
|
||||
#include "nouveau_debug.h"
|
||||
#include "nv50/nv50_program.h"
|
||||
}
|
||||
|
||||
namespace nv50_ir {
|
||||
|
|
|
@ -184,6 +184,13 @@ struct nv50_ir_prog_info_out
|
|||
bool readsSampleLocations : 1;
|
||||
bool separateFragData : 1;
|
||||
} fp;
|
||||
struct {
|
||||
struct {
|
||||
unsigned valid : 1;
|
||||
unsigned image : 1;
|
||||
unsigned slot : 6;
|
||||
} gmem[16]; /* nv50 only */
|
||||
} cp;
|
||||
} prop;
|
||||
|
||||
struct {
|
||||
|
|
|
@ -1012,6 +1012,9 @@ public:
|
|||
std::map<int, std::pair<int, int> > tempArrayInfo;
|
||||
std::vector<int> tempArrayId;
|
||||
|
||||
std::map<int, int> bufferIds;
|
||||
std::map<int, int> imageIds;
|
||||
|
||||
int clipVertexOutput;
|
||||
|
||||
struct TextureView {
|
||||
|
@ -1041,6 +1044,7 @@ public:
|
|||
} immd;
|
||||
|
||||
private:
|
||||
int gmemSlot;
|
||||
nv50_ir::Program *prog;
|
||||
int inferSysValDirection(unsigned sn) const;
|
||||
bool scanDeclaration(const struct tgsi_full_declaration *);
|
||||
|
@ -1056,7 +1060,8 @@ private:
|
|||
|
||||
Source::Source(struct nv50_ir_prog_info *info, struct nv50_ir_prog_info_out *info_out,
|
||||
nv50_ir::Program *prog)
|
||||
: insns(NULL), info(info), info_out(info_out), clipVertexOutput(-1), prog(prog)
|
||||
: insns(NULL), info(info), info_out(info_out), clipVertexOutput(-1),
|
||||
gmemSlot(0), prog(prog)
|
||||
{
|
||||
tokens = (const struct tgsi_token *)info->bin.source;
|
||||
|
||||
|
@ -1437,12 +1442,27 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
|||
case TGSI_FILE_BUFFER:
|
||||
for (i = first; i <= last; ++i)
|
||||
bufferAtomics[i] = decl->Declaration.Atomic;
|
||||
if (info->type == PIPE_SHADER_COMPUTE && info->target < NVISA_GF100_CHIPSET) {
|
||||
for (i = first; i <= last; i++) {
|
||||
bufferIds.insert(std::make_pair(i, gmemSlot));
|
||||
info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .slot = i};
|
||||
assert(gmemSlot < 16);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case TGSI_FILE_IMAGE:
|
||||
if (info->type == PIPE_SHADER_COMPUTE && info->target < NVISA_GF100_CHIPSET) {
|
||||
for (i = first; i <= last; i++) {
|
||||
imageIds.insert(std::make_pair(i, gmemSlot));
|
||||
info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 1, .slot = i};
|
||||
assert(gmemSlot < 16);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case TGSI_FILE_ADDRESS:
|
||||
case TGSI_FILE_CONSTANT:
|
||||
case TGSI_FILE_IMMEDIATE:
|
||||
case TGSI_FILE_SAMPLER:
|
||||
case TGSI_FILE_IMAGE:
|
||||
break;
|
||||
default:
|
||||
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
|
||||
|
@ -1677,6 +1697,8 @@ private:
|
|||
|
||||
// Symbol *getResourceBase(int r);
|
||||
void getImageCoords(std::vector<Value *>&, int s);
|
||||
int remapImageId(int);
|
||||
int remapBufferId(int);
|
||||
|
||||
void handleLOAD(Value *dst0[4]);
|
||||
void handleSTORE();
|
||||
|
@ -2610,12 +2632,30 @@ Converter::getImageCoords(std::vector<Value *> &coords, int s)
|
|||
coords.push_back(fetchSrc(s, 3));
|
||||
}
|
||||
|
||||
int
|
||||
Converter::remapBufferId(int id)
|
||||
{
|
||||
std::map<int, int>::const_iterator it = code->bufferIds.find(id);
|
||||
if (it != code->bufferIds.end())
|
||||
return it->second;
|
||||
return id;
|
||||
}
|
||||
|
||||
int
|
||||
Converter::remapImageId(int id)
|
||||
{
|
||||
std::map<int, int>::const_iterator it = code->imageIds.find(id);
|
||||
if (it != code->imageIds.end())
|
||||
return it->second;
|
||||
return id;
|
||||
}
|
||||
|
||||
// For raw loads, granularity is 4 byte.
|
||||
// Usage of the texture read mask on OP_SULDP is not allowed.
|
||||
void
|
||||
Converter::handleLOAD(Value *dst0[4])
|
||||
{
|
||||
const int r = tgsi.getSrc(0).getIndex(0);
|
||||
int r = tgsi.getSrc(0).getIndex(0);
|
||||
int c;
|
||||
std::vector<Value *> off, src, ldv, def;
|
||||
Value *ind = NULL;
|
||||
|
@ -2625,6 +2665,8 @@ Converter::handleLOAD(Value *dst0[4])
|
|||
|
||||
switch (tgsi.getSrc(0).getFile()) {
|
||||
case TGSI_FILE_BUFFER:
|
||||
r = remapBufferId(r);
|
||||
/* fallthrough */
|
||||
case TGSI_FILE_MEMORY:
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (!dst0[c])
|
||||
|
@ -2648,7 +2690,7 @@ Converter::handleLOAD(Value *dst0[4])
|
|||
|
||||
Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
|
||||
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER &&
|
||||
code->bufferAtomics[r])
|
||||
code->bufferAtomics[tgsi.getSrc(0).getIndex(0)])
|
||||
ld->cache = nv50_ir::CACHE_CG;
|
||||
else
|
||||
ld->cache = tgsi.getCacheMode();
|
||||
|
@ -2657,6 +2699,7 @@ Converter::handleLOAD(Value *dst0[4])
|
|||
}
|
||||
break;
|
||||
default: {
|
||||
r = remapImageId(r);
|
||||
getImageCoords(off, 1);
|
||||
def.resize(4);
|
||||
|
||||
|
@ -2764,7 +2807,7 @@ Converter::handleLOAD(Value *dst0[4])
|
|||
void
|
||||
Converter::handleSTORE()
|
||||
{
|
||||
const int r = tgsi.getDst(0).getIndex(0);
|
||||
int r = tgsi.getDst(0).getIndex(0);
|
||||
int c;
|
||||
std::vector<Value *> off, src, dummy;
|
||||
Value *ind = NULL;
|
||||
|
@ -2774,6 +2817,8 @@ Converter::handleSTORE()
|
|||
|
||||
switch (tgsi.getDst(0).getFile()) {
|
||||
case TGSI_FILE_BUFFER:
|
||||
r = remapBufferId(r);
|
||||
/* fallthrough */
|
||||
case TGSI_FILE_MEMORY:
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (!(tgsi.getDst(0).getMask() & (1 << c)))
|
||||
|
@ -2798,6 +2843,7 @@ Converter::handleSTORE()
|
|||
}
|
||||
break;
|
||||
default: {
|
||||
r = remapImageId(r);
|
||||
getImageCoords(off, 0);
|
||||
src = off;
|
||||
|
||||
|
@ -2881,7 +2927,7 @@ Converter::handleSTORE()
|
|||
void
|
||||
Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
|
||||
{
|
||||
const int r = tgsi.getSrc(0).getIndex(0);
|
||||
int r = tgsi.getSrc(0).getIndex(0);
|
||||
std::vector<Value *> srcv;
|
||||
std::vector<Value *> defv;
|
||||
LValue *dst = getScratch();
|
||||
|
@ -2892,6 +2938,8 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
|
|||
|
||||
switch (tgsi.getSrc(0).getFile()) {
|
||||
case TGSI_FILE_BUFFER:
|
||||
r = remapBufferId(r);
|
||||
/* fallthrough */
|
||||
case TGSI_FILE_MEMORY:
|
||||
for (int c = 0; c < 4; ++c) {
|
||||
if (!dst0[c])
|
||||
|
@ -2920,6 +2968,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
|
|||
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
|
||||
break;
|
||||
default: {
|
||||
r = remapImageId(r);
|
||||
getImageCoords(srcv, 1);
|
||||
defv.push_back(dst);
|
||||
srcv.push_back(fetchSrc(2, 0));
|
||||
|
|
|
@ -1117,7 +1117,7 @@ NV50LoweringPreSSA::handleSUQ(TexInstruction *suq)
|
|||
const int dim = suq->tex.target.getDim();
|
||||
const int arg = dim + (suq->tex.target.isArray() || suq->tex.target.isCube());
|
||||
int mask = suq->tex.mask;
|
||||
int slot = suq->tex.r + 7;
|
||||
int slot = suq->tex.r;
|
||||
int c, d;
|
||||
|
||||
for (c = 0, d = 0; c < 3; ++c, mask >>= 1) {
|
||||
|
@ -1661,7 +1661,7 @@ getShaderType(const ImgType type) {
|
|||
Value *
|
||||
NV50LoweringPreSSA::processSurfaceCoords(TexInstruction *su)
|
||||
{
|
||||
const int slot = su->tex.r + 7;
|
||||
const int slot = su->tex.r;
|
||||
const int dim = su->tex.target.getDim();
|
||||
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
|
||||
|
||||
|
@ -1796,7 +1796,7 @@ NV50LoweringPreSSA::processSurfaceCoords(TexInstruction *su)
|
|||
bool
|
||||
NV50LoweringPreSSA::handleSULDP(TexInstruction *su)
|
||||
{
|
||||
const int slot = su->tex.r + 7;
|
||||
const int slot = su->tex.r;
|
||||
assert(!su->getIndirectR());
|
||||
|
||||
bld.setPosition(su, false);
|
||||
|
@ -1917,7 +1917,7 @@ NV50LoweringPreSSA::handleSULDP(TexInstruction *su)
|
|||
bool
|
||||
NV50LoweringPreSSA::handleSUREDP(TexInstruction *su)
|
||||
{
|
||||
const int slot = su->tex.r + 7;
|
||||
const int slot = su->tex.r;
|
||||
const int dim = su->tex.target.getDim();
|
||||
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
|
||||
assert(!su->getIndirectR());
|
||||
|
@ -1943,7 +1943,7 @@ NV50LoweringPreSSA::handleSUREDP(TexInstruction *su)
|
|||
bool
|
||||
NV50LoweringPreSSA::handleSUSTP(TexInstruction *su)
|
||||
{
|
||||
const int slot = su->tex.r + 7;
|
||||
const int slot = su->tex.r;
|
||||
const int dim = su->tex.target.getDim();
|
||||
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
|
||||
assert(!su->getIndirectR());
|
||||
|
|
|
@ -142,6 +142,9 @@ nv50_ir_prog_info_out_serialize(struct blob *blob,
|
|||
case PIPE_SHADER_FRAGMENT:
|
||||
blob_write_bytes(blob, &info_out->prop.fp, sizeof(info_out->prop.fp));
|
||||
break;
|
||||
case PIPE_SHADER_COMPUTE:
|
||||
blob_write_bytes(blob, &info_out->prop.cp, sizeof(info_out->prop.cp));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -259,6 +262,9 @@ nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset,
|
|||
case PIPE_SHADER_FRAGMENT:
|
||||
blob_copy_bytes(&reader, &info_out->prop.fp, sizeof(info_out->prop.fp));
|
||||
break;
|
||||
case PIPE_SHADER_COMPUTE:
|
||||
blob_copy_bytes(&reader, &info_out->prop.cp, sizeof(info_out->prop.cp));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -275,46 +275,6 @@ nv50_compute_validate_constbufs(struct nv50_context *nv50)
|
|||
nv50_compute_invalidate_constbufs(nv50);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_compute_validate_buffers(struct nv50_context *nv50)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 7; i++) {
|
||||
BEGIN_NV04(push, NV50_CP(GLOBAL(i)), 5);
|
||||
unsigned width;
|
||||
if (nv50->buffers[i].buffer) {
|
||||
struct nv04_resource *res =
|
||||
nv04_resource(nv50->buffers[i].buffer);
|
||||
PUSH_DATAh(push, res->address + nv50->buffers[i].buffer_offset);
|
||||
PUSH_DATA (push, res->address + nv50->buffers[i].buffer_offset);
|
||||
PUSH_DATA (push, 0); /* pitch? */
|
||||
PUSH_DATA (push, ALIGN(nv50->buffers[i].buffer_size, 256) - 1);
|
||||
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
|
||||
BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR);
|
||||
util_range_add(&res->base, &res->valid_buffer_range,
|
||||
nv50->buffers[i].buffer_offset,
|
||||
nv50->buffers[i].buffer_offset +
|
||||
nv50->buffers[i].buffer_size);
|
||||
width = nv50->buffers[i].buffer_size;
|
||||
} else {
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
width = 0;
|
||||
}
|
||||
|
||||
PUSH_SPACE(push, 1 + 3);
|
||||
BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
|
||||
PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX);
|
||||
BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 1);
|
||||
PUSH_DATA (push, width);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_get_surface_dims(const struct pipe_image_view *view,
|
||||
int *width, int *height, int *depth)
|
||||
|
@ -416,13 +376,34 @@ nv50_compute_validate_surfaces(struct nv50_context *nv50)
|
|||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
struct pipe_image_view *view = &nv50->images[i];
|
||||
for (i = 0; i < NV50_MAX_GLOBALS - 1; i++) {
|
||||
struct nv50_gmem_state *gmem = &nv50->compprog->cp.gmem[i];
|
||||
int width, height, depth;
|
||||
uint64_t address = 0;
|
||||
|
||||
BEGIN_NV04(push, NV50_CP(GLOBAL(7 + i)), 5);
|
||||
if (view->resource) {
|
||||
BEGIN_NV04(push, NV50_CP(GLOBAL(i)), 5);
|
||||
|
||||
if (gmem->valid && !gmem->image && nv50->buffers[gmem->slot].buffer) {
|
||||
struct pipe_shader_buffer *buffer = &nv50->buffers[gmem->slot];
|
||||
struct nv04_resource *res = nv04_resource(buffer->buffer);
|
||||
PUSH_DATAh(push, res->address + buffer->buffer_offset);
|
||||
PUSH_DATA (push, res->address + buffer->buffer_offset);
|
||||
PUSH_DATA (push, 0); /* pitch? */
|
||||
PUSH_DATA (push, ALIGN(buffer->buffer_size, 256) - 1);
|
||||
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
|
||||
BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR);
|
||||
util_range_add(&res->base, &res->valid_buffer_range,
|
||||
buffer->buffer_offset,
|
||||
buffer->buffer_offset +
|
||||
buffer->buffer_size);
|
||||
|
||||
PUSH_SPACE(push, 1 + 3);
|
||||
BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
|
||||
PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX);
|
||||
BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 1);
|
||||
PUSH_DATA (push, buffer->buffer_size);
|
||||
} else if (gmem->valid && gmem->image && nv50->images[gmem->slot].resource) {
|
||||
struct pipe_image_view *view = &nv50->images[gmem->slot];
|
||||
struct nv04_resource *res = nv04_resource(view->resource);
|
||||
|
||||
/* get surface dimensions based on the target. */
|
||||
|
@ -483,6 +464,12 @@ nv50_compute_validate_surfaces(struct nv50_context *nv50)
|
|||
}
|
||||
|
||||
BCTX_REFN(nv50->bufctx_cp, CP_SUF, res, RDWR);
|
||||
|
||||
PUSH_SPACE(push, 12 + 3);
|
||||
BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
|
||||
PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX);
|
||||
BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 12);
|
||||
nv50_set_surface_info(push, view, width, height, depth);
|
||||
} else {
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
|
@ -490,12 +477,6 @@ nv50_compute_validate_surfaces(struct nv50_context *nv50)
|
|||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
|
||||
PUSH_SPACE(push, 12 + 3);
|
||||
BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
|
||||
PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(7 + i) << (8 - 2) | NV50_CB_AUX);
|
||||
BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 12);
|
||||
nv50_set_surface_info(push, view, width, height, depth);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -518,8 +499,9 @@ static struct nv50_state_validate
|
|||
validate_list_cp[] = {
|
||||
{ nv50_compprog_validate, NV50_NEW_CP_PROGRAM },
|
||||
{ nv50_compute_validate_constbufs, NV50_NEW_CP_CONSTBUF },
|
||||
{ nv50_compute_validate_buffers, NV50_NEW_CP_BUFFERS },
|
||||
{ nv50_compute_validate_surfaces, NV50_NEW_CP_SURFACES },
|
||||
{ nv50_compute_validate_surfaces, NV50_NEW_CP_SURFACES |
|
||||
NV50_NEW_CP_BUFFERS |
|
||||
NV50_NEW_CP_PROGRAM },
|
||||
{ nv50_compute_validate_textures, NV50_NEW_CP_TEXTURES },
|
||||
{ nv50_compute_validate_samplers, NV50_NEW_CP_SAMPLERS },
|
||||
{ nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS },
|
||||
|
|
|
@ -24,8 +24,8 @@
|
|||
|
||||
#include "compiler/nir/nir.h"
|
||||
|
||||
#include "nv50/nv50_program.h"
|
||||
#include "nv50/nv50_context.h"
|
||||
#include "nv50/nv50_program.h"
|
||||
|
||||
#include "codegen/nv50_ir_driver.h"
|
||||
|
||||
|
@ -434,6 +434,15 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
|
|||
break;
|
||||
}
|
||||
prog->gp.vert_count = CLAMP(info_out.prop.gp.maxVertices, 1, 1024);
|
||||
} else
|
||||
if (prog->type == PIPE_SHADER_COMPUTE) {
|
||||
for (i = 0; i < NV50_MAX_GLOBALS; i++) {
|
||||
prog->cp.gmem[i] = (struct nv50_gmem_state){
|
||||
.valid = info_out.prop.cp.gmem[i].valid,
|
||||
.image = info_out.prop.cp.gmem[i].image,
|
||||
.slot = info_out.prop.cp.gmem[i].slot
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (prog->pipe.stream_output.num_outputs)
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
struct nv50_context;
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
|
||||
struct nv50_varying {
|
||||
uint8_t id; /* tgsi index */
|
||||
|
@ -49,6 +48,12 @@ struct nv50_stream_output_state
|
|||
uint8_t map[128];
|
||||
};
|
||||
|
||||
struct nv50_gmem_state {
|
||||
unsigned valid : 1; /* whether there's something there */
|
||||
unsigned image : 1; /* buffer or image */
|
||||
unsigned slot : 6; /* slot in the relevant resource arrays */
|
||||
};
|
||||
|
||||
struct nv50_program {
|
||||
struct pipe_shader_state pipe;
|
||||
|
||||
|
@ -104,6 +109,7 @@ struct nv50_program {
|
|||
struct {
|
||||
uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
|
||||
uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
|
||||
struct nv50_gmem_state gmem[NV50_MAX_GLOBALS];
|
||||
} cp;
|
||||
|
||||
bool mul_zero_wins;
|
||||
|
|
Loading…
Reference in New Issue