nvc0: use NOUVEAU_GETPARAM_GRAPH_UNITS to get MP count

This commit is contained in:
Christoph Bumiller 2013-03-27 23:38:29 +01:00
parent 443b247878
commit 25722e3454
4 changed files with 73 additions and 29 deletions

View File

@ -32,6 +32,10 @@
#include "nvc0_graph_macros.h"
#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
#endif
static boolean
nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
@ -494,6 +498,35 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
}
}
boolean
nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
uint32_t lpos, uint32_t lneg, uint32_t cstack)
{
struct nouveau_bo *bo = NULL;
int ret;
uint64_t size = (lpos + lneg) * 32 + cstack;
if (size >= (1 << 20)) {
NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size);
return FALSE;
}
size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */
size *= screen->mp_count;
size = align(size, 1 << 17);
ret = nouveau_bo_new(screen->base.device, NOUVEAU_BO_VRAM, 1 << 17, size,
NULL, &bo);
if (ret) {
NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size);
return FALSE;
}
nouveau_bo_ref(NULL, &screen->tls);
screen->tls = bo;
return TRUE;
}
#define FAIL_SCREEN_INIT(str, err) \
do { \
NOUVEAU_ERR(str, err); \
@ -508,6 +541,7 @@ nvc0_screen_create(struct nouveau_device *dev)
struct pipe_screen *pscreen;
struct nouveau_object *chan;
struct nouveau_pushbuf *push;
uint64_t value;
uint32_t obj_class;
int ret;
unsigned i;
@ -733,18 +767,21 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
/* max MPs * max warps per MP (TODO: ask kernel) */
if (screen->eng3d->oclass >= NVE4_3D_CLASS)
screen->tls_size = 8 * 64 * 32;
else
screen->tls_size = 16 * 48 * 32;
screen->tls_size *= NVC0_CAP_MAX_PROGRAM_TEMPS * 16;
screen->tls_size = align(screen->tls_size, 1 << 17);
if (dev->drm_version >= 0x01000101) {
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
if (ret) {
NOUVEAU_ERR("NOUVEAU_GETPARAM_GRAPH_UNITS failed.\n");
goto fail;
}
} else {
if (dev->chipset >= 0xe0 && dev->chipset < 0xf0)
value = (8 << 8) | 4;
else
value = (16 << 8) | 4;
}
screen->mp_count = value >> 8;
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17,
screen->tls_size, NULL, &screen->tls);
if (ret)
goto fail;
nvc0_screen_resize_tls_area(screen, 128 * 16, 0, 0x200);
BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->text->offset);
@ -752,8 +789,8 @@ nvc0_screen_create(struct nouveau_device *dev)
BEGIN_NVC0(push, NVC0_3D(TEMP_ADDRESS_HIGH), 4);
PUSH_DATAh(push, screen->tls->offset);
PUSH_DATA (push, screen->tls->offset);
PUSH_DATA (push, screen->tls_size >> 32);
PUSH_DATA (push, screen->tls_size);
PUSH_DATA (push, screen->tls->size >> 32);
PUSH_DATA (push, screen->tls->size);
BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1);
PUSH_DATA (push, 0);
BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);

View File

@ -38,7 +38,7 @@ struct nvc0_screen {
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
struct nouveau_bo *poly_cache;
uint64_t tls_size;
uint16_t mp_count;
struct nouveau_heap *text_heap;
struct nouveau_heap *lib_code; /* allocated from text_heap */
@ -86,6 +86,9 @@ int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
uint32_t lneg, uint32_t cstack);
static INLINE void
nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
{

View File

@ -74,13 +74,13 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
* Actually this might be per-MP TEMP size and looks like I'm only using
* 2 MPs instead of all 8.
*/
BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_SIZE_HIGH(0)), 3);
PUSH_DATAh(push, screen->tls_size / 2);
PUSH_DATA (push, screen->tls_size / 2);
BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(0)), 3);
PUSH_DATAh(push, screen->tls->size / screen->mp_count);
PUSH_DATA (push, screen->tls->size / screen->mp_count);
PUSH_DATA (push, 0xff);
BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_SIZE_HIGH(1)), 3);
PUSH_DATAh(push, screen->tls_size / 2);
PUSH_DATA (push, screen->tls_size / 2);
BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(1)), 3);
PUSH_DATAh(push, screen->tls->size / screen->mp_count);
PUSH_DATA (push, screen->tls->size / screen->mp_count);
PUSH_DATA (push, 0xff);
/* Unified address space ? Who needs that ? Certainly not OpenCL.

View File

@ -8,10 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
- nve4_compute.xml ( 6352 bytes, from 2013-03-10 14:59:45)
- nve4_compute.xml ( 11117 bytes, from 2013-03-27 19:22:20)
- copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
- nvchipsets.xml ( 3870 bytes, from 2013-03-08 12:41:50)
- nv_object.xml ( 13238 bytes, from 2013-02-07 16:35:34)
- nvchipsets.xml ( 3954 bytes, from 2013-03-26 01:26:43)
- nv_object.xml ( 13792 bytes, from 2013-03-26 01:26:43)
- nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12)
- nv50_defs.xml ( 7783 bytes, from 2013-03-08 12:42:29)
@ -110,15 +110,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVE4_COMPUTE_LAUNCH 0x000002bc
#define NVE4_COMPUTE_TEMP_SIZE(i0) (0x000002e4 + 0xc*(i0))
#define NVE4_COMPUTE_TEMP_SIZE__ESIZE 0x0000000c
#define NVE4_COMPUTE_TEMP_SIZE__LEN 0x00000002
#define NVE4_COMPUTE_MP_TEMP_SIZE(i0) (0x000002e4 + 0xc*(i0))
#define NVE4_COMPUTE_MP_TEMP_SIZE__ESIZE 0x0000000c
#define NVE4_COMPUTE_MP_TEMP_SIZE__LEN 0x00000002
#define NVE4_COMPUTE_TEMP_SIZE_HIGH(i0) (0x000002e4 + 0xc*(i0))
#define NVE4_COMPUTE_MP_TEMP_SIZE_HIGH(i0) (0x000002e4 + 0xc*(i0))
#define NVE4_COMPUTE_TEMP_SIZE_LOW(i0) (0x000002e8 + 0xc*(i0))
#define NVE4_COMPUTE_MP_TEMP_SIZE_LOW(i0) (0x000002e8 + 0xc*(i0))
#define NVE4_COMPUTE_TEMP_SIZE_MASK(i0) (0x000002ec + 0xc*(i0))
#define NVE4_COMPUTE_MP_TEMP_SIZE_MASK(i0) (0x000002ec + 0xc*(i0))
#define NVE4_COMPUTE_UNK0310 0x00000310
@ -200,6 +200,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVE4_COMPUTE_UNK260c 0x0000260c
#define NVE4_COMPUTE_LAUNCH_DESC__SIZE 0x00000100
#define NVE4_COMPUTE_LAUNCH_DESC_6 0x00000018
#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__MASK 0x00000c00
#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__SHIFT 10
#define NVE4_COMPUTE_LAUNCH_DESC_PROG_START 0x00000020
#define NVE4_COMPUTE_LAUNCH_DESC_12 0x00000030