nv50: add remapping of buffers/images into unified space

This allows us to use up to 15 images or buffers (but not both). GL
supports the concept of combined resource maximums though.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Acked-by: Pierre Moreau <dev@pmoreau.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10164>
This commit is contained in:
Ilia Mirkin 2021-03-19 20:10:24 -04:00 committed by Marge Bot
parent 58d47ca324
commit f451854f39
8 changed files with 124 additions and 66 deletions

View File

@ -26,7 +26,6 @@
extern "C" {
#include "nouveau_debug.h"
#include "nv50/nv50_program.h"
}
namespace nv50_ir {

View File

@ -184,6 +184,13 @@ struct nv50_ir_prog_info_out
bool readsSampleLocations : 1;
bool separateFragData : 1;
} fp;
struct {
struct {
unsigned valid : 1;
unsigned image : 1;
unsigned slot : 6;
} gmem[16]; /* nv50 only */
} cp;
} prop;
struct {

View File

@ -1012,6 +1012,9 @@ public:
std::map<int, std::pair<int, int> > tempArrayInfo;
std::vector<int> tempArrayId;
std::map<int, int> bufferIds;
std::map<int, int> imageIds;
int clipVertexOutput;
struct TextureView {
@ -1041,6 +1044,7 @@ public:
} immd;
private:
int gmemSlot;
nv50_ir::Program *prog;
int inferSysValDirection(unsigned sn) const;
bool scanDeclaration(const struct tgsi_full_declaration *);
@ -1056,7 +1060,8 @@ private:
Source::Source(struct nv50_ir_prog_info *info, struct nv50_ir_prog_info_out *info_out,
nv50_ir::Program *prog)
: insns(NULL), info(info), info_out(info_out), clipVertexOutput(-1), prog(prog)
: insns(NULL), info(info), info_out(info_out), clipVertexOutput(-1),
gmemSlot(0), prog(prog)
{
tokens = (const struct tgsi_token *)info->bin.source;
@ -1437,12 +1442,27 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_FILE_BUFFER:
for (i = first; i <= last; ++i)
bufferAtomics[i] = decl->Declaration.Atomic;
if (info->type == PIPE_SHADER_COMPUTE && info->target < NVISA_GF100_CHIPSET) {
for (i = first; i <= last; i++) {
bufferIds.insert(std::make_pair(i, gmemSlot));
info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .slot = i};
assert(gmemSlot < 16);
}
}
break;
case TGSI_FILE_IMAGE:
if (info->type == PIPE_SHADER_COMPUTE && info->target < NVISA_GF100_CHIPSET) {
for (i = first; i <= last; i++) {
imageIds.insert(std::make_pair(i, gmemSlot));
info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 1, .slot = i};
assert(gmemSlot < 16);
}
}
break;
case TGSI_FILE_ADDRESS:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
case TGSI_FILE_SAMPLER:
case TGSI_FILE_IMAGE:
break;
default:
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
@ -1677,6 +1697,8 @@ private:
// Symbol *getResourceBase(int r);
void getImageCoords(std::vector<Value *>&, int s);
int remapImageId(int);
int remapBufferId(int);
void handleLOAD(Value *dst0[4]);
void handleSTORE();
@ -2610,12 +2632,30 @@ Converter::getImageCoords(std::vector<Value *> &coords, int s)
coords.push_back(fetchSrc(s, 3));
}
int
Converter::remapBufferId(int id)
{
std::map<int, int>::const_iterator it = code->bufferIds.find(id);
if (it != code->bufferIds.end())
return it->second;
return id;
}
int
Converter::remapImageId(int id)
{
std::map<int, int>::const_iterator it = code->imageIds.find(id);
if (it != code->imageIds.end())
return it->second;
return id;
}
// For raw loads, granularity is 4 byte.
// Usage of the texture read mask on OP_SULDP is not allowed.
void
Converter::handleLOAD(Value *dst0[4])
{
const int r = tgsi.getSrc(0).getIndex(0);
int r = tgsi.getSrc(0).getIndex(0);
int c;
std::vector<Value *> off, src, ldv, def;
Value *ind = NULL;
@ -2625,6 +2665,8 @@ Converter::handleLOAD(Value *dst0[4])
switch (tgsi.getSrc(0).getFile()) {
case TGSI_FILE_BUFFER:
r = remapBufferId(r);
/* fallthrough */
case TGSI_FILE_MEMORY:
for (c = 0; c < 4; ++c) {
if (!dst0[c])
@ -2648,7 +2690,7 @@ Converter::handleLOAD(Value *dst0[4])
Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER &&
code->bufferAtomics[r])
code->bufferAtomics[tgsi.getSrc(0).getIndex(0)])
ld->cache = nv50_ir::CACHE_CG;
else
ld->cache = tgsi.getCacheMode();
@ -2657,6 +2699,7 @@ Converter::handleLOAD(Value *dst0[4])
}
break;
default: {
r = remapImageId(r);
getImageCoords(off, 1);
def.resize(4);
@ -2764,7 +2807,7 @@ Converter::handleLOAD(Value *dst0[4])
void
Converter::handleSTORE()
{
const int r = tgsi.getDst(0).getIndex(0);
int r = tgsi.getDst(0).getIndex(0);
int c;
std::vector<Value *> off, src, dummy;
Value *ind = NULL;
@ -2774,6 +2817,8 @@ Converter::handleSTORE()
switch (tgsi.getDst(0).getFile()) {
case TGSI_FILE_BUFFER:
r = remapBufferId(r);
/* fallthrough */
case TGSI_FILE_MEMORY:
for (c = 0; c < 4; ++c) {
if (!(tgsi.getDst(0).getMask() & (1 << c)))
@ -2798,6 +2843,7 @@ Converter::handleSTORE()
}
break;
default: {
r = remapImageId(r);
getImageCoords(off, 0);
src = off;
@ -2881,7 +2927,7 @@ Converter::handleSTORE()
void
Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
{
const int r = tgsi.getSrc(0).getIndex(0);
int r = tgsi.getSrc(0).getIndex(0);
std::vector<Value *> srcv;
std::vector<Value *> defv;
LValue *dst = getScratch();
@ -2892,6 +2938,8 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
switch (tgsi.getSrc(0).getFile()) {
case TGSI_FILE_BUFFER:
r = remapBufferId(r);
/* fallthrough */
case TGSI_FILE_MEMORY:
for (int c = 0; c < 4; ++c) {
if (!dst0[c])
@ -2920,6 +2968,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
break;
default: {
r = remapImageId(r);
getImageCoords(srcv, 1);
defv.push_back(dst);
srcv.push_back(fetchSrc(2, 0));

View File

@ -1117,7 +1117,7 @@ NV50LoweringPreSSA::handleSUQ(TexInstruction *suq)
const int dim = suq->tex.target.getDim();
const int arg = dim + (suq->tex.target.isArray() || suq->tex.target.isCube());
int mask = suq->tex.mask;
int slot = suq->tex.r + 7;
int slot = suq->tex.r;
int c, d;
for (c = 0, d = 0; c < 3; ++c, mask >>= 1) {
@ -1661,7 +1661,7 @@ getShaderType(const ImgType type) {
Value *
NV50LoweringPreSSA::processSurfaceCoords(TexInstruction *su)
{
const int slot = su->tex.r + 7;
const int slot = su->tex.r;
const int dim = su->tex.target.getDim();
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
@ -1796,7 +1796,7 @@ NV50LoweringPreSSA::processSurfaceCoords(TexInstruction *su)
bool
NV50LoweringPreSSA::handleSULDP(TexInstruction *su)
{
const int slot = su->tex.r + 7;
const int slot = su->tex.r;
assert(!su->getIndirectR());
bld.setPosition(su, false);
@ -1917,7 +1917,7 @@ NV50LoweringPreSSA::handleSULDP(TexInstruction *su)
bool
NV50LoweringPreSSA::handleSUREDP(TexInstruction *su)
{
const int slot = su->tex.r + 7;
const int slot = su->tex.r;
const int dim = su->tex.target.getDim();
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
assert(!su->getIndirectR());
@ -1943,7 +1943,7 @@ NV50LoweringPreSSA::handleSUREDP(TexInstruction *su)
bool
NV50LoweringPreSSA::handleSUSTP(TexInstruction *su)
{
const int slot = su->tex.r + 7;
const int slot = su->tex.r;
const int dim = su->tex.target.getDim();
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
assert(!su->getIndirectR());

View File

@ -142,6 +142,9 @@ nv50_ir_prog_info_out_serialize(struct blob *blob,
case PIPE_SHADER_FRAGMENT:
blob_write_bytes(blob, &info_out->prop.fp, sizeof(info_out->prop.fp));
break;
case PIPE_SHADER_COMPUTE:
blob_write_bytes(blob, &info_out->prop.cp, sizeof(info_out->prop.cp));
break;
default:
break;
}
@ -259,6 +262,9 @@ nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset,
case PIPE_SHADER_FRAGMENT:
blob_copy_bytes(&reader, &info_out->prop.fp, sizeof(info_out->prop.fp));
break;
case PIPE_SHADER_COMPUTE:
blob_copy_bytes(&reader, &info_out->prop.cp, sizeof(info_out->prop.cp));
break;
default:
break;
}

View File

@ -275,46 +275,6 @@ nv50_compute_validate_constbufs(struct nv50_context *nv50)
nv50_compute_invalidate_constbufs(nv50);
}
static void
nv50_compute_validate_buffers(struct nv50_context *nv50)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
int i;
for (i = 0; i < 7; i++) {
BEGIN_NV04(push, NV50_CP(GLOBAL(i)), 5);
unsigned width;
if (nv50->buffers[i].buffer) {
struct nv04_resource *res =
nv04_resource(nv50->buffers[i].buffer);
PUSH_DATAh(push, res->address + nv50->buffers[i].buffer_offset);
PUSH_DATA (push, res->address + nv50->buffers[i].buffer_offset);
PUSH_DATA (push, 0); /* pitch? */
PUSH_DATA (push, ALIGN(nv50->buffers[i].buffer_size, 256) - 1);
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR);
util_range_add(&res->base, &res->valid_buffer_range,
nv50->buffers[i].buffer_offset,
nv50->buffers[i].buffer_offset +
nv50->buffers[i].buffer_size);
width = nv50->buffers[i].buffer_size;
} else {
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
width = 0;
}
PUSH_SPACE(push, 1 + 3);
BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX);
BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 1);
PUSH_DATA (push, width);
}
}
static void
nv50_get_surface_dims(const struct pipe_image_view *view,
int *width, int *height, int *depth)
@ -416,13 +376,34 @@ nv50_compute_validate_surfaces(struct nv50_context *nv50)
struct nouveau_pushbuf *push = nv50->base.pushbuf;
int i;
for (i = 0; i < 8; i++) {
struct pipe_image_view *view = &nv50->images[i];
for (i = 0; i < NV50_MAX_GLOBALS - 1; i++) {
struct nv50_gmem_state *gmem = &nv50->compprog->cp.gmem[i];
int width, height, depth;
uint64_t address = 0;
BEGIN_NV04(push, NV50_CP(GLOBAL(7 + i)), 5);
if (view->resource) {
BEGIN_NV04(push, NV50_CP(GLOBAL(i)), 5);
if (gmem->valid && !gmem->image && nv50->buffers[gmem->slot].buffer) {
struct pipe_shader_buffer *buffer = &nv50->buffers[gmem->slot];
struct nv04_resource *res = nv04_resource(buffer->buffer);
PUSH_DATAh(push, res->address + buffer->buffer_offset);
PUSH_DATA (push, res->address + buffer->buffer_offset);
PUSH_DATA (push, 0); /* pitch? */
PUSH_DATA (push, ALIGN(buffer->buffer_size, 256) - 1);
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR);
util_range_add(&res->base, &res->valid_buffer_range,
buffer->buffer_offset,
buffer->buffer_offset +
buffer->buffer_size);
PUSH_SPACE(push, 1 + 3);
BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX);
BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 1);
PUSH_DATA (push, buffer->buffer_size);
} else if (gmem->valid && gmem->image && nv50->images[gmem->slot].resource) {
struct pipe_image_view *view = &nv50->images[gmem->slot];
struct nv04_resource *res = nv04_resource(view->resource);
/* get surface dimensions based on the target. */
@ -483,6 +464,12 @@ nv50_compute_validate_surfaces(struct nv50_context *nv50)
}
BCTX_REFN(nv50->bufctx_cp, CP_SUF, res, RDWR);
PUSH_SPACE(push, 12 + 3);
BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX);
BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 12);
nv50_set_surface_info(push, view, width, height, depth);
} else {
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
@ -490,12 +477,6 @@ nv50_compute_validate_surfaces(struct nv50_context *nv50)
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
}
PUSH_SPACE(push, 12 + 3);
BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(7 + i) << (8 - 2) | NV50_CB_AUX);
BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 12);
nv50_set_surface_info(push, view, width, height, depth);
}
}
@ -518,8 +499,9 @@ static struct nv50_state_validate
validate_list_cp[] = {
{ nv50_compprog_validate, NV50_NEW_CP_PROGRAM },
{ nv50_compute_validate_constbufs, NV50_NEW_CP_CONSTBUF },
{ nv50_compute_validate_buffers, NV50_NEW_CP_BUFFERS },
{ nv50_compute_validate_surfaces, NV50_NEW_CP_SURFACES },
{ nv50_compute_validate_surfaces, NV50_NEW_CP_SURFACES |
NV50_NEW_CP_BUFFERS |
NV50_NEW_CP_PROGRAM },
{ nv50_compute_validate_textures, NV50_NEW_CP_TEXTURES },
{ nv50_compute_validate_samplers, NV50_NEW_CP_SAMPLERS },
{ nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS },

View File

@ -24,8 +24,8 @@
#include "compiler/nir/nir.h"
#include "nv50/nv50_program.h"
#include "nv50/nv50_context.h"
#include "nv50/nv50_program.h"
#include "codegen/nv50_ir_driver.h"
@ -434,6 +434,15 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
break;
}
prog->gp.vert_count = CLAMP(info_out.prop.gp.maxVertices, 1, 1024);
} else
if (prog->type == PIPE_SHADER_COMPUTE) {
for (i = 0; i < NV50_MAX_GLOBALS; i++) {
prog->cp.gmem[i] = (struct nv50_gmem_state){
.valid = info_out.prop.cp.gmem[i].valid,
.image = info_out.prop.cp.gmem[i].image,
.slot = info_out.prop.cp.gmem[i].slot
};
}
}
if (prog->pipe.stream_output.num_outputs)

View File

@ -26,7 +26,6 @@
struct nv50_context;
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
struct nv50_varying {
uint8_t id; /* tgsi index */
@ -49,6 +48,12 @@ struct nv50_stream_output_state
uint8_t map[128];
};
struct nv50_gmem_state {
unsigned valid : 1; /* whether there's something there */
unsigned image : 1; /* buffer or image */
unsigned slot : 6; /* slot in the relevant resource arrays */
};
struct nv50_program {
struct pipe_shader_state pipe;
@ -104,6 +109,7 @@ struct nv50_program {
struct {
uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
struct nv50_gmem_state gmem[NV50_MAX_GLOBALS];
} cp;
bool mul_zero_wins;