diff --git a/configure.ac b/configure.ac index 99a08fd060f..ba4c203e5f9 100644 --- a/configure.ac +++ b/configure.ac @@ -33,7 +33,7 @@ LIBDRM_REQUIRED=2.4.24 LIBDRM_RADEON_REQUIRED=2.4.40 LIBDRM_INTEL_REQUIRED=2.4.38 LIBDRM_NVVIEUX_REQUIRED=2.4.33 -LIBDRM_NOUVEAU_REQUIRED=2.4.33 +LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41" DRI2PROTO_REQUIRED=2.6 GLPROTO_REQUIRED=1.4.14 LIBDRM_XORG_REQUIRED=2.4.24 diff --git a/src/gallium/drivers/nvc0/Makefile.sources b/src/gallium/drivers/nvc0/Makefile.sources index 12eedf96231..33b90f290fb 100644 --- a/src/gallium/drivers/nvc0/Makefile.sources +++ b/src/gallium/drivers/nvc0/Makefile.sources @@ -14,4 +14,7 @@ C_SOURCES := \ nvc0_program.c \ nvc0_shader_state.c \ nvc0_query.c \ - nvc0_video.c + nvc0_video.c \ + nvc0_video_bsp.c \ + nvc0_video_vp.c \ + nvc0_video_ppp.c diff --git a/src/gallium/drivers/nvc0/nvc0_video.c b/src/gallium/drivers/nvc0/nvc0_video.c index 5cf16e79b2b..cdb80dba064 100644 --- a/src/gallium/drivers/nvc0/nvc0_video.c +++ b/src/gallium/drivers/nvc0/nvc0_video.c @@ -1,5 +1,5 @@ /* - * Copyright 2011 Maarten Lankhorst + * Copyright 2011-2013 Maarten Lankhorst * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -54,6 +54,116 @@ nvc0_screen_get_video_param(struct pipe_screen *pscreen, } } +static void +nvc0_decoder_decode_bitstream(struct pipe_video_decoder *decoder, + struct pipe_video_buffer *video_target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes) +{ + struct nvc0_decoder *dec = (struct nvc0_decoder *)decoder; + struct nvc0_video_buffer *target = (struct nvc0_video_buffer *)video_target; + uint32_t comm_seq = ++dec->fence_seq; + union pipe_desc desc; + + unsigned vp_caps, is_ref, ret; + struct nvc0_video_buffer *refs[16] = {}; + + desc.base = picture; + + assert(target->base.buffer_format == PIPE_FORMAT_NV12); + + ret = nvc0_decoder_bsp(dec, desc, target, comm_seq, + num_buffers, data, num_bytes, + &vp_caps, &is_ref, refs); + + /* did we decode bitstream correctly? */ + assert(ret == 2); + + nvc0_decoder_vp(dec, desc, target, comm_seq, vp_caps, is_ref, refs); + nvc0_decoder_ppp(dec, desc, target, comm_seq); +} + +static void +nvc0_decoder_flush(struct pipe_video_decoder *decoder) +{ + struct nvc0_decoder *dec = (struct nvc0_decoder *)decoder; + (void)dec; +} + +static void +nvc0_decoder_begin_frame(struct pipe_video_decoder *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ +} + +static void +nvc0_decoder_end_frame(struct pipe_video_decoder *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ +} + +static void +nvc0_decoder_destroy(struct pipe_video_decoder *decoder) +{ + struct nvc0_decoder *dec = (struct nvc0_decoder *)decoder; + int i; + + nouveau_bo_ref(NULL, &dec->ref_bo); + nouveau_bo_ref(NULL, &dec->bitplane_bo); + nouveau_bo_ref(NULL, &dec->inter_bo[0]); + nouveau_bo_ref(NULL, &dec->inter_bo[1]); +#ifdef NVC0_DEBUG_FENCE + nouveau_bo_ref(NULL, &dec->fence_bo); +#endif + nouveau_bo_ref(NULL, &dec->fw_bo); + + for (i = 0; i < NVC0_VIDEO_QDEPTH; ++i) + nouveau_bo_ref(NULL, &dec->bsp_bo[i]); + + nouveau_object_del(&dec->bsp); + nouveau_object_del(&dec->vp); + nouveau_object_del(&dec->ppp); + + if (dec->channel[0] != dec->channel[1]) { + for (i = 0; i < 3; ++i) { + nouveau_pushbuf_del(&dec->pushbuf[i]); + nouveau_object_del(&dec->channel[i]); + } + } else { + nouveau_pushbuf_del(dec->pushbuf); + nouveau_object_del(dec->channel); + } + + FREE(dec); +} + +static void nvc0_video_getpath(enum pipe_video_profile profile, char *path) +{ + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_CODEC_MPEG12: { + sprintf(path, "/lib/firmware/nouveau/vuc-mpeg12-0"); + break; + } + case PIPE_VIDEO_CODEC_MPEG4: { + sprintf(path, "/lib/firmware/nouveau/vuc-mpeg4-0"); + break; + } + case PIPE_VIDEO_CODEC_VC1: { + sprintf(path, "/lib/firmware/nouveau/vuc-vc1-%u", profile - PIPE_VIDEO_PROFILE_VC1_SIMPLE); + break; + } + case PIPE_VIDEO_CODEC_MPEG4_AVC: { + sprintf(path, "/lib/firmware/nouveau/vuc-h264-0"); + break; + } + default: assert(0); + } +} + struct pipe_video_decoder * nvc0_create_decoder(struct pipe_context *context, enum pipe_video_profile profile, @@ -62,6 +172,20 @@ nvc0_create_decoder(struct pipe_context *context, unsigned width, unsigned height, unsigned max_references, bool chunked_decode) { + struct nouveau_screen *screen = &((struct nvc0_context *)context)->screen->base; + struct nvc0_decoder *dec; + struct nouveau_pushbuf **push; + union nouveau_bo_config cfg; + bool kepler = screen->device->chipset >= 0xe0; + + cfg.nvc0.tile_mode = 0x10; + cfg.nvc0.memtype = 0xfe; + + int ret, i; + uint32_t codec = 1, ppp_codec = 3; + uint32_t timeout; + u32 tmp_size = 0; + if (getenv("XVMC_VL")) return vl_create_decoder(context, profile, entrypoint, chroma_format, width, height, @@ -72,6 +196,307 @@ nvc0_create_decoder(struct pipe_context *context, return NULL; } + dec = CALLOC_STRUCT(nvc0_decoder); + if (!dec) + return NULL; + dec->client = screen->client; + + if (!kepler) { + dec->bsp_idx = 5; + dec->vp_idx = 6; + dec->ppp_idx = 7; + } else { + dec->bsp_idx = 2; + dec->vp_idx = 2; + dec->ppp_idx = 2; + } + + for (i = 0; i < 3; ++i) + if (i && !kepler) { + dec->channel[i] = dec->channel[0]; + dec->pushbuf[i] = dec->pushbuf[0]; + } else { + void *data; + u32 size; + struct nvc0_fifo nvc0_args = {}; + struct nve0_fifo nve0_args = {}; + + if (!kepler) { + size = sizeof(nvc0_args); + data = &nvc0_args; + } else { + unsigned engine[] = { + NVE0_FIFO_ENGINE_BSP, + NVE0_FIFO_ENGINE_VP, + NVE0_FIFO_ENGINE_PPP + }; + + nve0_args.engine = engine[i]; + size = sizeof(nve0_args); + data = &nve0_args; + } + + ret = nouveau_object_new(&screen->device->object, 0, + NOUVEAU_FIFO_CHANNEL_CLASS, + data, size, &dec->channel[i]); + + if (!ret) + ret = nouveau_pushbuf_new(screen->client, dec->channel[i], 4, + 32 * 1024, true, &dec->pushbuf[i]); + if (ret) + break; + } + push = dec->pushbuf; + + if (!kepler) { + if (!ret) + ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x90b1, NULL, 0, &dec->bsp); + if (!ret) + ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x90b2, NULL, 0, &dec->vp); + if (!ret) + ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x90b3, NULL, 0, &dec->ppp); + } else { + if (!ret) + ret = nouveau_object_new(dec->channel[0], 0x95b1, 0x95b1, NULL, 0, &dec->bsp); + if (!ret) + ret = nouveau_object_new(dec->channel[1], 0x95b2, 0x95b2, NULL, 0, &dec->vp); + if (!ret) + ret = nouveau_object_new(dec->channel[2], 0x90b3, 0x90b3, NULL, 0, &dec->ppp); + } + if (ret) + goto fail; + + BEGIN_NVC0(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[0], dec->bsp->handle); + + BEGIN_NVC0(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[1], dec->vp->handle); + + BEGIN_NVC0(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[2], dec->ppp->handle); + + dec->base.context = context; + dec->base.profile = profile; + dec->base.entrypoint = entrypoint; + dec->base.chroma_format = chroma_format; + dec->base.width = width; + dec->base.height = height; + dec->base.max_references = max_references; + dec->base.destroy = nvc0_decoder_destroy; + dec->base.flush = nvc0_decoder_flush; + dec->base.decode_bitstream = nvc0_decoder_decode_bitstream; + dec->base.begin_frame = nvc0_decoder_begin_frame; + dec->base.end_frame = nvc0_decoder_end_frame; + + for (i = 0; i < NVC0_VIDEO_QDEPTH && !ret; ++i) + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0, 1 << 20, &cfg, &dec->bsp_bo[i]); + if (!ret) + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0x100, 4 << 20, &cfg, &dec->inter_bo[0]); + if (!ret) { + if (!kepler) + nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]); + else + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0x100, dec->inter_bo[0]->size, &cfg, + &dec->inter_bo[1]); + } + if (ret) + goto fail; + + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_CODEC_MPEG12: { + codec = 1; + assert(max_references <= 2); + break; + } + case PIPE_VIDEO_CODEC_MPEG4: { + codec = 4; + tmp_size = mb(height)*16 * mb(width)*16; + assert(max_references <= 2); + break; + } + case PIPE_VIDEO_CODEC_VC1: { + ppp_codec = codec = 2; + tmp_size = mb(height)*16 * mb(width)*16; + assert(max_references <= 2); + break; + } + case PIPE_VIDEO_CODEC_MPEG4_AVC: { + codec = 3; + dec->tmp_stride = 16 * mb_half(width) * nvc0_video_align(height) * 3 / 2; + tmp_size = dec->tmp_stride * (max_references + 1); + assert(max_references <= 16); + break; + } + default: + fprintf(stderr, "invalid codec\n"); + goto fail; + } + + if (screen->device->chipset < 0xd0) { + int fd; + char path[PATH_MAX]; + ssize_t r; + uint32_t *end, endval; + + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + 0x4000, &cfg, &dec->fw_bo); + if (!ret) + ret = nouveau_bo_map(dec->fw_bo, NOUVEAU_BO_WR, dec->client); + if (ret) + goto fail; + + nvc0_video_getpath(profile, path); + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + fprintf(stderr, "opening firmware file %s failed: %m\n", path); + goto fw_fail; + } + r = read(fd, dec->fw_bo->map, 0x4000); + if (r < 0) { + fprintf(stderr, "reading firmware file %s failed: %m\n", path); + goto fw_fail; + } + + if (r == 0x4000) { + close(fd); + fprintf(stderr, "firmware file %s too large!\n", path); + goto fw_fail; + } + + if (r & 0xff) { + close(fd); + fprintf(stderr, "firmware file %s wrong size!\n", path); + goto fw_fail; + } + + end = dec->fw_bo->map + r - 4; + endval = *end; + while (endval == *end) + end--; + + r = (intptr_t)end - (intptr_t)dec->fw_bo->map + 4; + + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_CODEC_MPEG12: { + assert((r & 0xff) == 0xe0); + dec->fw_sizes = (0x2e0<<16) | (r - 0x2e0); + break; + } + case PIPE_VIDEO_CODEC_MPEG4: { + assert((r & 0xff) == 0xe0); + dec->fw_sizes = (0x2e0<<16) | (r - 0x2e0); + break; + } + case PIPE_VIDEO_CODEC_VC1: { + assert((r & 0xff) == 0xac); + dec->fw_sizes = (0x3ac<<16) | (r - 0x3ac); + break; + } + case PIPE_VIDEO_CODEC_MPEG4_AVC: { + assert((r & 0xff) == 0x70); + dec->fw_sizes = (0x370<<16) | (r - 0x370); + break; + } + default: + goto fw_fail; + } + munmap(dec->fw_bo->map, dec->fw_bo->size); + dec->fw_bo->map = NULL; + } + + if (codec != 3) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + 0x400, &cfg, &dec->bitplane_bo); + if (ret) + goto fail; + } + + dec->ref_stride = mb(width)*16 * (mb_half(height)*32 + nvc0_video_align(height)/2); + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + dec->ref_stride * (max_references+2) + tmp_size, + &cfg, &dec->ref_bo); + if (ret) + goto fail; + + timeout = 0; + + BEGIN_NVC0(push[0], SUBC_BSP(0x200), 2); + PUSH_DATA (push[0], codec); + PUSH_DATA (push[0], timeout); + + BEGIN_NVC0(push[1], SUBC_VP(0x200), 2); + PUSH_DATA (push[1], codec); + PUSH_DATA (push[1], timeout); + + BEGIN_NVC0(push[2], SUBC_PPP(0x200), 2); + PUSH_DATA (push[2], ppp_codec); + PUSH_DATA (push[2], timeout); + + ++dec->fence_seq; + +#if NVC0_DEBUG_FENCE + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP, + 0, 0x1000, &cfg, &dec->fence_bo); + if (ret) + goto fail; + + nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client); + dec->fence_map = dec->fence_bo->map; + dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0; + dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map))); + + /* So lets test if the fence is working? */ + BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3); + PUSH_DATAh(push[0], dec->fence_bo->offset); + PUSH_DATA (push[0], dec->fence_bo->offset); + PUSH_DATA (push[0], dec->fence_seq); + + BEGIN_NVC0(push[0], SUBC_BSP(0x304), 1); + PUSH_DATA (push[0], 1); + PUSH_KICK (push[0]); + + BEGIN_NVC0(push[1], SUBC_VP(0x240), 3); + PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push[1], dec->fence_seq); + + BEGIN_NVC0(push[1], SUBC_VP(0x304), 1); + PUSH_DATA (push[1], 1); + PUSH_KICK (push[1]); + + BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3); + PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push[2], dec->fence_seq); + + BEGIN_NVC0(push[2], SUBC_PPP(0x304), 1); + PUSH_DATA (push[2], 1); + PUSH_KICK (push[2]); + + usleep(100); + while (dec->fence_seq > dec->fence_map[0] && + dec->fence_seq > dec->fence_map[4] && + dec->fence_seq > dec->fence_map[8]) { + debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); + usleep(100); + } + debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); +#endif + + return &dec->base; + +fw_fail: + debug_printf("Cannot create decoder without firmware..\n"); + nvc0_decoder_destroy(&dec->base); + return NULL; + +fail: + debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret); + nvc0_decoder_destroy(&dec->base); return NULL; } diff --git a/src/gallium/drivers/nvc0/nvc0_video.h b/src/gallium/drivers/nvc0/nvc0_video.h index e2cfc3d5902..4cc0ebbb605 100644 --- a/src/gallium/drivers/nvc0/nvc0_video.h +++ b/src/gallium/drivers/nvc0/nvc0_video.h @@ -1,5 +1,5 @@ /* - * Copyright 2011 Maarten Lankhorst + * Copyright 2011-2013 Maarten Lankhorst * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -29,6 +29,30 @@ #include "util/u_video.h" +#define SLICE_SIZE 0x200 +#define VP_OFFSET 0x200 +#define COMM_OFFSET 0x500 + +//#define NVC0_DEBUG_FENCE 1 + +#ifdef NVC0_DEBUG_FENCE +# define NVC0_VIDEO_QDEPTH 1 +#else +# define NVC0_VIDEO_QDEPTH 2 +#endif + +#define SUBC_BSP(m) dec->bsp_idx, (m) +#define SUBC_VP(m) dec->vp_idx, (m) +#define SUBC_PPP(m) dec->ppp_idx, (m) + +union pipe_desc { + struct pipe_picture_desc *base; + struct pipe_mpeg12_picture_desc *mpeg12; + struct pipe_mpeg4_picture_desc *mpeg4; + struct pipe_vc1_picture_desc *vc1; + struct pipe_h264_picture_desc *h264; +}; + struct nvc0_video_buffer { struct pipe_video_buffer base; unsigned num_planes, valid_ref; @@ -38,6 +62,79 @@ struct nvc0_video_buffer { struct pipe_surface *surfaces[VL_NUM_COMPONENTS * 2]; }; +struct nvc0_decoder { + struct pipe_video_decoder base; + struct nouveau_client *client; + struct nouveau_object *channel[3], *bsp, *vp, *ppp; + struct nouveau_pushbuf *pushbuf[3]; + +#ifdef NVC0_DEBUG_FENCE + /* dump fence and comm, as needed.. */ + unsigned *fence_map; + struct comm *comm; + + struct nouveau_bo *fence_bo; +#endif + + struct nouveau_bo *fw_bo, *bitplane_bo; + + // array size max_references + 2, contains unpostprocessed images + // added at the end of ref_bo is a tmp array + // tmp is an array for h264, with each member being used for a ref frame or current + // target.. size = (((mb(w)*((mb(h)+1)&~1))+3)>>2)<<8 * (max_references+1) + // for other codecs, it simply seems that size = w*h is enough + // unsure what it's supposed to contain.. + struct nouveau_bo *ref_bo; + + struct nouveau_bo *inter_bo[2]; + + struct nouveau_bo *bsp_bo[NVC0_VIDEO_QDEPTH]; + + // bo's used by each cycle: + + // bsp_bo: contains raw bitstream data and parameters for BSP and VP. + // inter_bo: contains data shared between BSP and VP + // ref_bo: reference image data, used by PPP and VP + // bitplane_bo: contain bitplane data (similar to ref_bo), used by BSP only + // fw_bo: used by VP only. + + // Needed amount of copies in optimal case: + // 2 copies of inter_bo, VP would process the last inter_bo, while BSP is + // writing out a new set. + // NVC0_VIDEO_QDEPTH copies of bsp_bo. We don't want to block the pipeline ever, + // and give shaders a chance to run as well. + + struct { + struct nvc0_video_buffer *vidbuf; + unsigned last_used; + unsigned field_pic_flag : 1; + unsigned decoded_top : 1; + unsigned decoded_bottom : 1; + } refs[17]; + unsigned fence_seq, fw_sizes, last_frame_num, tmp_stride, ref_stride; + + unsigned bsp_idx, vp_idx, ppp_idx; +}; + +struct comm { + uint32_t bsp_cur_index; // 000 + uint32_t byte_ofs; // 004 + uint32_t status[0x10]; // 008 + uint32_t pos[0x10]; // 048 + uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted + + uint32_t pvp_cur_index; // 100 + uint32_t acked_byte_ofs; // 104 + uint32_t status_vp[0x10]; // 108 + uint16_t mb_y[0x10]; //148 + uint32_t pvp_stage; // 168 0xeeXX + uint16_t parse_endpos_index; // 16c + uint16_t irq_index; // 16e + uint8_t irq_470[0x10]; // 170 + uint32_t irq_pos[0x10]; // 180 + uint32_t parse_endpos[0x10]; // 1c0 +}; + static INLINE uint32_t nvc0_video_align(uint32_t h) { return ((h+0x3f)&~0x3f); @@ -52,3 +149,73 @@ static INLINE uint32_t mb_half(uint32_t coord) { return (coord + 0x1f)>>5; } + +static INLINE uint64_t +nvc0_video_addr(struct nvc0_decoder *dec, struct nvc0_video_buffer *target) +{ + uint64_t ret; + if (target) + ret = dec->ref_stride * target->valid_ref; + else + ret = dec->ref_stride * (dec->base.max_references+1); + return dec->ref_bo->offset + ret; +} + +static INLINE void +nvc0_decoder_ycbcr_offsets(struct nvc0_decoder *dec, uint32_t *y2, + uint32_t *cbcr, uint32_t *cbcr2) +{ + uint32_t w = mb(dec->base.width), size; + *y2 = mb_half(dec->base.height)*w; + *cbcr = *y2 * 2; + *cbcr2 = *cbcr + w * (nvc0_video_align(dec->base.height)>>6); + + /* The check here should never fail because it means a bug + * in the code rather than a bug in hardware.. + */ + size = (2 * (*cbcr2 - *cbcr) + *cbcr) << 8; + if (size > dec->ref_stride) { + debug_printf("Overshot ref_stride (%u) with size %u and ofs (%u,%u,%u)\n", + dec->ref_stride, size, *y2<<8, *cbcr<<8, *cbcr2<<8); + *y2 = *cbcr = *cbcr2 = 0; + assert(size <= dec->ref_stride); + } +} + +static INLINE void +nvc0_decoder_inter_sizes(struct nvc0_decoder *dec, uint32_t slice_count, + uint32_t *slice_size, uint32_t *bucket_size, + uint32_t *ring_size) +{ + *slice_size = (SLICE_SIZE * slice_count)>>8; + if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_CODEC_MPEG12) + *bucket_size = 0; + else + *bucket_size = mb(dec->base.width) * 3; + *ring_size = (dec->inter_bo[0]->size >> 8) - *bucket_size - *slice_size; +} + +extern unsigned +nvc0_decoder_bsp(struct nvc0_decoder *dec, union pipe_desc desc, + struct nvc0_video_buffer *target, + unsigned comm_seq, unsigned num_buffers, + const void *const *data, const unsigned *num_bytes, + unsigned *vp_caps, unsigned *is_ref, + struct nvc0_video_buffer *refs[16]); + +extern void nvc0_decoder_vp_caps(struct nvc0_decoder *dec, + union pipe_desc desc, + struct nvc0_video_buffer *target, + unsigned comm_seq, + unsigned *caps, unsigned *is_ref, + struct nvc0_video_buffer *refs[16]); + +extern void +nvc0_decoder_vp(struct nvc0_decoder *dec, union pipe_desc desc, + struct nvc0_video_buffer *target, unsigned comm_seq, + unsigned caps, unsigned is_ref, + struct nvc0_video_buffer *refs[16]); + +extern void +nvc0_decoder_ppp(struct nvc0_decoder *dec, union pipe_desc desc, + struct nvc0_video_buffer *target, unsigned comm_seq); diff --git a/src/gallium/drivers/nvc0/nvc0_video_bsp.c b/src/gallium/drivers/nvc0/nvc0_video_bsp.c new file mode 100644 index 00000000000..798b4bfacac --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_video_bsp.c @@ -0,0 +1,423 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_video.h" + +struct strparm_bsp { + uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi + uint32_t w1[4]; // bit 8-24 addr_lo + uint32_t unk20; // should be idx * 0x8000000, bitstream offset + uint32_t do_crypto_crap; // set to 0 +}; + +struct mpeg12_picparm_bsp { + uint16_t width; + uint16_t height; + uint8_t picture_structure; + uint8_t picture_coding_type; + uint8_t intra_dc_precision; + uint8_t frame_pred_frame_dct; + uint8_t concealment_motion_vectors; + uint8_t intra_vlc_format; + uint16_t pad; + uint8_t f_code[2][2]; +}; + +struct mpeg4_picparm_bsp { + uint16_t width; + uint16_t height; + uint8_t vop_time_increment_size; + uint8_t interlaced; + uint8_t resync_marker_disable; +}; + +struct vc1_picparm_bsp { + uint16_t width; + uint16_t height; + uint8_t profile; // 04 0 simple, 1 main, 2 advanced + uint8_t postprocflag; // 05 + uint8_t pulldown; // 06 + uint8_t interlaced; // 07 + uint8_t tfcntrflag; // 08 + uint8_t finterpflag; // 09 + uint8_t psf; // 0a + uint8_t pad; // 0b + uint8_t multires; // 0c + uint8_t syncmarker; // 0d + uint8_t rangered; // 0e + uint8_t maxbframes; // 0f + uint8_t dquant; // 10 + uint8_t panscan_flag; // 11 + uint8_t refdist_flag; // 12 + uint8_t quantizer; // 13 + uint8_t extended_mv; // 14 + uint8_t extended_dmv; // 15 + uint8_t overlap; // 16 + uint8_t vstransform; // 17 +}; + +struct h264_picparm_bsp { + // 00 + uint32_t unk00; + // 04 + uint32_t log2_max_frame_num_minus4; // 04 checked + uint32_t pic_order_cnt_type; // 08 checked + uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked + uint32_t delta_pic_order_always_zero_flag; // 10, or unknown + + uint32_t frame_mbs_only_flag; // 14, always 1? + uint32_t direct_8x8_inference_flag; // 18, always 1? + uint32_t width_mb; // 1c checked + uint32_t height_mb; // 20 checked + // 24 + //struct picparm2 + uint32_t entropy_coding_mode_flag; // 00, checked + uint32_t pic_order_present_flag; // 04 checked + uint32_t unk; // 08 seems to be 0? + uint32_t pad1; // 0c seems to be 0? + uint32_t pad2; // 10 always 0 ? + uint32_t num_ref_idx_l0_active_minus1; // 14 always 0? + uint32_t num_ref_idx_l1_active_minus1; // 18 always 0? + uint32_t weighted_pred_flag; // 1c checked + uint32_t weighted_bipred_idc; // 20 checked + uint32_t pic_init_qp_minus26; // 24 checked + uint32_t deblocking_filter_control_present_flag; // 28 always 1? + uint32_t redundant_pic_cnt_present_flag; // 2c always 0? + uint32_t transform_8x8_mode_flag; // 30 checked + uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish + uint8_t field_pic_flag; // 38 checked + uint8_t bottom_field_flag; // 39 checked + uint8_t real_pad[0x1b]; // XX why? +}; + +static uint32_t +nvc0_decoder_fill_picparm_mpeg12_bsp(struct nvc0_decoder *dec, + struct pipe_mpeg12_picture_desc *desc, + char *map) +{ + struct mpeg12_picparm_bsp *pic_bsp = (struct mpeg12_picparm_bsp *)map; + int i; + pic_bsp->width = dec->base.width; + pic_bsp->height = dec->base.height; + pic_bsp->picture_structure = desc->picture_structure; + pic_bsp->picture_coding_type = desc->picture_coding_type; + pic_bsp->intra_dc_precision = desc->intra_dc_precision; + pic_bsp->frame_pred_frame_dct = desc->frame_pred_frame_dct; + pic_bsp->concealment_motion_vectors = desc->concealment_motion_vectors; + pic_bsp->intra_vlc_format = desc->intra_vlc_format; + pic_bsp->pad = 0; + for (i = 0; i < 4; ++i) + pic_bsp->f_code[i/2][i%2] = desc->f_code[i/2][i%2] + 1; // FU + + return (desc->num_slices << 4) | (dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1); +} + +static uint32_t +nvc0_decoder_fill_picparm_mpeg4_bsp(struct nvc0_decoder *dec, + struct pipe_mpeg4_picture_desc *desc, + char *map) +{ + struct mpeg4_picparm_bsp *pic_bsp = (struct mpeg4_picparm_bsp *)map; + uint32_t t, bits = 0; + pic_bsp->width = dec->base.width; + pic_bsp->height = dec->base.height; + assert(desc->vop_time_increment_resolution > 0); + + t = desc->vop_time_increment_resolution - 1; + while (t) { + bits++; + t /= 2; + } + if (!bits) + bits = 1; + t = desc->vop_time_increment_resolution - 1; + pic_bsp->vop_time_increment_size = bits; + pic_bsp->interlaced = desc->interlaced; + pic_bsp->resync_marker_disable = desc->resync_marker_disable; + return 4; +} + +static uint32_t +nvc0_decoder_fill_picparm_vc1_bsp(struct nvc0_decoder *dec, + struct pipe_vc1_picture_desc *d, + char *map) +{ + struct vc1_picparm_bsp *vc = (struct vc1_picparm_bsp *)map; + uint32_t caps = (d->slice_count << 4)&0xfff0; + vc->width = dec->base.width; + vc->height = dec->base.height; + vc->profile = dec->base.profile - PIPE_VIDEO_PROFILE_VC1_SIMPLE; // 04 + vc->postprocflag = d->postprocflag; + vc->pulldown = d->pulldown; + vc->interlaced = d->interlace; + vc->tfcntrflag = d->tfcntrflag; // 08 + vc->finterpflag = d->finterpflag; + vc->psf = d->psf; + vc->pad = 0; + vc->multires = d->multires; // 0c + vc->syncmarker = d->syncmarker; + vc->rangered = d->rangered; + vc->maxbframes = d->maxbframes; + vc->dquant = d->dquant; // 10 + vc->panscan_flag = d->panscan_flag; + vc->refdist_flag = d->refdist_flag; + vc->quantizer = d->quantizer; + vc->extended_mv = d->extended_mv; // 14 + vc->extended_dmv = d->extended_dmv; + vc->overlap = d->overlap; + vc->vstransform = d->vstransform; + return caps | 2; +} + +static uint32_t +nvc0_decoder_fill_picparm_h264_bsp(struct nvc0_decoder *dec, + struct pipe_h264_picture_desc *d, + char *map) +{ + struct h264_picparm_bsp stub_h = {}, *h = &stub_h; + uint32_t caps = (d->slice_count << 4)&0xfff0; + + assert(!(d->slice_count & ~0xfff)); + if (d->slice_count & 0x1000) + caps |= 1 << 20; + + assert(offsetof(struct h264_picparm_bsp, bottom_field_flag) == (0x39 + 0x24)); + h->unk00 = 1; + h->pad1 = h->pad2 = 0; + h->unk = 0; + h->log2_max_frame_num_minus4 = d->log2_max_frame_num_minus4; + h->frame_mbs_only_flag = d->frame_mbs_only_flag; + h->direct_8x8_inference_flag = d->direct_8x8_inference_flag; + h->width_mb = mb(dec->base.width); + h->height_mb = mb(dec->base.height); + h->entropy_coding_mode_flag = d->entropy_coding_mode_flag; + h->pic_order_present_flag = d->pic_order_present_flag; + h->pic_order_cnt_type = d->pic_order_cnt_type; + h->log2_max_pic_order_cnt_lsb_minus4 = d->log2_max_pic_order_cnt_lsb_minus4; + h->delta_pic_order_always_zero_flag = d->delta_pic_order_always_zero_flag; + h->num_ref_idx_l0_active_minus1 = d->num_ref_idx_l0_active_minus1; + h->num_ref_idx_l1_active_minus1 = d->num_ref_idx_l1_active_minus1; + h->weighted_pred_flag = d->weighted_pred_flag; + h->weighted_bipred_idc = d->weighted_bipred_idc; + h->pic_init_qp_minus26 = d->pic_init_qp_minus26; + h->deblocking_filter_control_present_flag = d->deblocking_filter_control_present_flag; + h->redundant_pic_cnt_present_flag = d->redundant_pic_cnt_present_flag; + h->transform_8x8_mode_flag = d->transform_8x8_mode_flag; + h->mb_adaptive_frame_field_flag = d->mb_adaptive_frame_field_flag; + h->field_pic_flag = d->field_pic_flag; + h->bottom_field_flag = d->bottom_field_flag; + memset(h->real_pad, 0, sizeof(h->real_pad)); + *(struct h264_picparm_bsp *)map = *h; + return caps | 3; +} + +#if NVC0_DEBUG_FENCE +static void dump_comm_bsp(struct comm *comm) +{ + unsigned idx = comm->bsp_cur_index & 0xf; + debug_printf("Cur seq: %x, bsp byte ofs: %x\n", comm->bsp_cur_index, comm->byte_ofs); + debug_printf("Status: %08x, pos: %08x\n", comm->status[idx], comm->pos[idx]); +} +#endif + +unsigned +nvc0_decoder_bsp(struct nvc0_decoder *dec, union pipe_desc desc, + struct nvc0_video_buffer *target, + unsigned comm_seq, unsigned num_buffers, + const void *const *data, const unsigned *num_bytes, + unsigned *vp_caps, unsigned *is_ref, + struct nvc0_video_buffer *refs[16]) +{ + struct nouveau_pushbuf *push = dec->pushbuf[0]; + enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile); + char *bsp; + uint32_t bsp_addr, comm_addr, inter_addr; + uint32_t slice_size, bucket_size, ring_size; + uint32_t endmarker, caps; + struct strparm_bsp *str_bsp; + int ret, i; + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH]; + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; + unsigned fence_extra = 0; + struct nouveau_pushbuf_refn bo_refs[] = { + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, +#ifdef NVC0_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + +#ifdef NVC0_DEBUG_FENCE + fence_extra = 4; +#endif + + ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client); + if (ret) { + debug_printf("map failed: %i %s\n", ret, strerror(-ret)); + return -1; + } + bsp = bsp_bo->map; + /* + * 0x000..0x100: picparm_bsp + * 0x200..0x500: picparm_vp + * 0x500..0x700: comm + * 0x700..onward: raw bitstream + */ + + switch (codec){ + case PIPE_VIDEO_CODEC_MPEG12: + endmarker = 0xb7010000; + caps = nvc0_decoder_fill_picparm_mpeg12_bsp(dec, desc.mpeg12, bsp); + break; + case PIPE_VIDEO_CODEC_MPEG4: + endmarker = 0xb1010000; + caps = nvc0_decoder_fill_picparm_mpeg4_bsp(dec, desc.mpeg4, bsp); + break; + case PIPE_VIDEO_CODEC_VC1: { + endmarker = 0x0a010000; + caps = nvc0_decoder_fill_picparm_vc1_bsp(dec, desc.vc1, bsp); + break; + } + case PIPE_VIDEO_CODEC_MPEG4_AVC: { + endmarker = 0x0b010000; + caps = nvc0_decoder_fill_picparm_h264_bsp(dec, desc.h264, bsp); + break; + } + default: assert(0); return -1; + } + + nvc0_decoder_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs); + + PUSH_SPACE(push, 6 + (codec == PIPE_VIDEO_CODEC_MPEG4_AVC ? 9 : 7) + fence_extra + 2); + if (!dec->bitplane_bo) + num_refs--; + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + caps |= 0 << 16; // reset struct comm if flag is set + caps |= 1 << 17; // enable watchdog + caps |= 0 << 18; // do not report error to VP, so it can continue decoding what we have + caps |= 0 << 19; // if enabled, use crypto crap? + bsp += 0x100; + + str_bsp = (struct strparm_bsp *)bsp; + memset(str_bsp, 0, 0x80); + str_bsp->w0[0] = 16; + str_bsp->w1[0] = 0x1; + bsp += 0x100; + /* Reserved for picparm_vp */ + bsp += 0x300; + /* Reserved for comm */ +#if !NVC0_DEBUG_FENCE + memset(bsp, 0, 0x200); +#endif + bsp += 0x200; + for (i = 0; i < num_buffers; ++i) { + memcpy(bsp, data[i], num_bytes[i]); + bsp += num_bytes[i]; + str_bsp->w0[0] += num_bytes[i]; + } + + /* Append end sequence */ + *(uint32_t *)bsp = endmarker; + bsp += 4; + *(uint32_t *)bsp = 0x00000000; + bsp += 4; + *(uint32_t *)bsp = endmarker; + bsp += 4; + *(uint32_t *)bsp = 0x00000000; + + bsp_addr = bsp_bo->offset >> 8; + inter_addr = inter_bo->offset >> 8; + +#if NVC0_DEBUG_FENCE + memset(dec->comm, 0, 0x200); + comm_addr = (dec->fence_bo->offset + COMM_OFFSET) >> 8; +#else + comm_addr = bsp_addr + (COMM_OFFSET>>8); +#endif + + BEGIN_NVC0(push, SUBC_BSP(0x700), 5); + PUSH_DATA (push, caps); // 700 cmd + PUSH_DATA (push, bsp_addr + 1); // 704 strparm_bsp + PUSH_DATA (push, bsp_addr + 7); // 708 str addr + PUSH_DATA (push, comm_addr); // 70c comm + PUSH_DATA (push, comm_seq); // 710 seq + + if (codec != PIPE_VIDEO_CODEC_MPEG4_AVC) { + u32 bitplane_addr; + + bitplane_addr = dec->bitplane_bo->offset >> 8; + + nvc0_decoder_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size); + BEGIN_NVC0(push, SUBC_BSP(0x400), 6); + PUSH_DATA (push, bsp_addr); // 400 picparm addr + PUSH_DATA (push, inter_addr); // 404 interparm addr + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 408 interdata addr + PUSH_DATA (push, ring_size << 8); // 40c interdata_size + PUSH_DATA (push, bitplane_addr); // 410 BITPLANE_DATA + PUSH_DATA (push, 0x400); // 414 BITPLANE_DATA_SIZE + } else { + nvc0_decoder_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); + BEGIN_NVC0(push, SUBC_BSP(0x400), 8); + PUSH_DATA (push, bsp_addr); // 400 picparm addr + PUSH_DATA (push, inter_addr); // 404 interparm addr + PUSH_DATA (push, slice_size << 8); // 408 interparm size? + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 40c interdata addr + PUSH_DATA (push, ring_size << 8); // 410 interdata size + PUSH_DATA (push, inter_addr + slice_size); // 414 bucket? + PUSH_DATA (push, bucket_size << 8); // 418 bucket size? unshifted.. + PUSH_DATA (push, 0); // 41c targets + // TODO: Double check 414 / 418 with nvidia trace + } + +#if NVC0_DEBUG_FENCE + BEGIN_NVC0(push, SUBC_BSP(0x240), 3); + PUSH_DATAh(push, dec->fence_bo->offset); + PUSH_DATA (push, dec->fence_bo->offset); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NVC0(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK (push); + + { + unsigned spin = 0; + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) { + debug_printf("%u: %u\n", dec->fence_seq, dec->fence_map[0]); + dump_comm_bsp(dec->comm); + } + } while (dec->fence_seq > dec->fence_map[0]); + } + + dump_comm_bsp(dec->comm); + return dec->comm->status[comm_seq & 0xf]; +#else + BEGIN_NVC0(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); + return 2; +#endif +} diff --git a/src/gallium/drivers/nvc0/nvc0_video_ppp.c b/src/gallium/drivers/nvc0/nvc0_video_ppp.c new file mode 100644 index 00000000000..2e99540d6d4 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_video_ppp.c @@ -0,0 +1,145 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_video.h" + +static void +nvc0_decoder_setup_ppp(struct nvc0_decoder *dec, struct nvc0_video_buffer *target, uint32_t low700) { + struct nouveau_pushbuf *push = dec->pushbuf[2]; + + uint32_t stride_in = mb(dec->base.width); + uint32_t stride_out = mb(target->resources[0]->width0); + uint32_t dec_h = mb(dec->base.height); + uint32_t dec_w = mb(dec->base.width); + uint64_t in_addr; + uint32_t y2, cbcr, cbcr2, i; + struct nouveau_pushbuf_refn bo_refs[] = { + { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { dec->ref_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, +#ifdef NVC0_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + }; + unsigned num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + + for (i = 0; i < 2; ++i) { + struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i]; + bo_refs[i].bo = mt->base.bo; + } + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + nvc0_decoder_ycbcr_offsets(dec, &y2, &cbcr, &cbcr2); + + BEGIN_NVC0(push, SUBC_PPP(0x700), 10); + in_addr = nvc0_video_addr(dec, target) >> 8; + + PUSH_DATA (push, (stride_out << 24) | (stride_out << 16) | low700); // 700 + PUSH_DATA (push, (stride_in << 24) | (stride_in << 16) | (dec_h << 8) | dec_w); // 704 + assert(dec_w == stride_in); + + /* Input: */ + PUSH_DATA (push, in_addr); // 708 + PUSH_DATA (push, in_addr + y2); // 70c + PUSH_DATA (push, in_addr + cbcr); // 710 + PUSH_DATA (push, in_addr + cbcr2); // 714 + assert(target->resources[0]->width0 >= 16 * dec_w); + assert(target->resources[0]->height0 >= dec->base.height/2); + + for (i = 0; i < 2; ++i) { + struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i]; + + PUSH_DATA (push, mt->base.address >> 8); + PUSH_DATA (push, (mt->base.address + mt->total_size/2/mt->base.base.array_size) >> 8); + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + } +} + +static uint32_t +nvc0_decoder_vc1_ppp(struct nvc0_decoder *dec, struct pipe_vc1_picture_desc *desc, struct nvc0_video_buffer *target) { + struct nouveau_pushbuf *push = dec->pushbuf[2]; + + nvc0_decoder_setup_ppp(dec, target, 0x1412); + assert(!desc->deblockEnable); + assert(!(dec->base.width & 0xf)); + assert(!(dec->base.height & 0xf)); + + BEGIN_NVC0(push, SUBC_PPP(0x400), 1); + PUSH_DATA (push, desc->pquant << 11); + + // 728 = wtf? + return 0x10; +} + +void +nvc0_decoder_ppp(struct nvc0_decoder *dec, union pipe_desc desc, struct nvc0_video_buffer *target, unsigned comm_seq) { + enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile); + struct nouveau_pushbuf *push = dec->pushbuf[2]; + unsigned ppp_caps = 0x10; + unsigned fence_extra = 0; + +#if NVC0_DEBUG_FENCE + fence_extra = 4; +#endif + + PUSH_SPACE(push, 11 + (codec == PIPE_VIDEO_CODEC_VC1 ? 2 : 0) + 3 + fence_extra + 2); + + switch (codec) { + case PIPE_VIDEO_CODEC_MPEG12: { + unsigned mpeg2 = dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1; + nvc0_decoder_setup_ppp(dec, target, 0x1410 | mpeg2); + break; + } + case PIPE_VIDEO_CODEC_MPEG4: nvc0_decoder_setup_ppp(dec, target, 0x1414); break; + case PIPE_VIDEO_CODEC_VC1: ppp_caps = nvc0_decoder_vc1_ppp(dec, desc.vc1, target); break; + case PIPE_VIDEO_CODEC_MPEG4_AVC: nvc0_decoder_setup_ppp(dec, target, 0x1413); break; + default: assert(0); + } + BEGIN_NVC0(push, SUBC_PPP(0x734), 2); + PUSH_DATA (push, comm_seq); + PUSH_DATA (push, ppp_caps); + +#if NVC0_DEBUG_FENCE + BEGIN_NVC0(push, SUBC_PPP(0x240), 3); + PUSH_DATAh(push, (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push, (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NVC0(push, SUBC_PPP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK (push); + + { + unsigned spin = 0; + + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) + debug_printf("ppp%u: %u\n", dec->fence_seq, dec->fence_map[8]); + } while (dec->fence_seq > dec->fence_map[8]); + } +#else + BEGIN_NVC0(push, SUBC_PPP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); +#endif +} diff --git a/src/gallium/drivers/nvc0/nvc0_video_vp.c b/src/gallium/drivers/nvc0/nvc0_video_vp.c new file mode 100644 index 00000000000..84af0d68be9 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_video_vp.c @@ -0,0 +1,667 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_video.h" +#include + +struct mpeg12_picparm_vp { + uint16_t width; // 00 in mb units + uint16_t height; // 02 in mb units + + uint32_t unk04; // 04 stride for Y? + uint32_t unk08; // 08 stride for CbCr? + + uint32_t ofs[6]; // 1c..20 ofs + uint32_t bucket_size; // 24 + uint32_t inter_ring_data_size; // 28 + uint16_t unk2c; // 2c + uint16_t alternate_scan; // 2e + uint16_t unk30; // 30 not seen set yet + uint16_t picture_structure; // 32 + uint16_t pad2[3]; + uint16_t unk3a; // 3a set on I frame? + + uint32_t f_code[4]; // 3c + uint32_t picture_coding_type; // 4c + uint32_t intra_dc_precision; // 50 + uint32_t q_scale_type; // 54 + uint32_t top_field_first; // 58 + uint32_t full_pel_forward_vector; // 5c + uint32_t full_pel_backward_vector; // 60 + uint8_t intra_quantizer_matrix[0x40]; // 64 + uint8_t non_intra_quantizer_matrix[0x40]; // a4 +}; + +struct mpeg4_picparm_vp { + uint32_t width; // 00 in normal units + uint32_t height; // 04 in normal units + uint32_t unk08; // stride 1 + uint32_t unk0c; // stride 2 + uint32_t ofs[6]; // 10..24 ofs + uint32_t bucket_size; // 28 + uint32_t pad1; // 2c, pad + uint32_t pad2; // 30 + uint32_t inter_ring_data_size; // 34 + + uint32_t trd[2]; // 38, 3c + uint32_t trb[2]; // 40, 44 + uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile + uint16_t f_code_fw; // 4c + uint16_t f_code_bw; // 4e + uint8_t interlaced; // 50 + + uint8_t quant_type; // bool, written to 528 + uint8_t quarter_sample; // bool, written to 548 + uint8_t short_video_header; // bool, negated written to 528 shifted by 1 + uint8_t u54; // bool, written to 0x740 + uint8_t vop_coding_type; // 55 + uint8_t rounding_control; // 56 + uint8_t alternate_vertical_scan_flag; // 57 bool + uint8_t top_field_first; // bool, written to vuc + + uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob + uint32_t pad5[0x10]; // 5c...9c non-inclusive, but WHY? + + uint32_t intra[0x10]; // 9c + uint32_t non_intra[0x10]; // bc + // udc..uff pad? +}; + +// Full version, with data pumped from BSP +struct vc1_picparm_vp { + uint32_t bucket_size; // 00 + uint32_t pad; // 04 + + uint32_t inter_ring_data_size; // 08 + uint32_t unk0c; // stride 1 + uint32_t unk10; // stride 2 + uint32_t ofs[6]; // 14..28 ofs + + uint16_t width; // 2c + uint16_t height; // 2e + + uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced + uint8_t loopfilter; // 31 written into vuc + uint8_t fastuvmc; // 32, written into vuc + uint8_t dquant; // 33 + + uint8_t overlap; // 34 + uint8_t quantizer; // 35 + uint8_t u36; // 36, bool + uint8_t pad2; // 37, to align to 0x38 +}; + +struct h264_picparm_vp { // 700..a00 + uint16_t width, height; + uint32_t stride1, stride2; // 04 08 + uint32_t ofs[6]; // 0c..24 in-image offset + + uint32_t u24; // nfi ac8 ? + uint32_t bucket_size; // 28 bucket size + uint32_t inter_ring_data_size; // 2c + + unsigned f0 : 1; // 0 0x01: into 640 shifted by 3, 540 shifted by 5, half size something? + unsigned f1 : 1; // 1 0x02: into vuc ofs 56 + unsigned weighted_pred_flag : 1; // 2 0x04 + unsigned f3 : 1; // 3 0x08: into vuc ofs 68 + unsigned is_reference : 1; // 4 + unsigned interlace : 1; // 5 field_pic_flag + unsigned bottom_field_flag : 1; // 6 + unsigned f7 : 1; // 7 0x80: nfi yet + + signed log2_max_frame_num_minus4 : 4; // 31 0..3 + unsigned u31_45 : 2; // 31 4..5 + unsigned pic_order_cnt_type : 2; // 31 6..7 + signed pic_init_qp_minus26 : 6; // 32 0..5 + signed chroma_qp_index_offset : 5; // 32 6..10 + signed second_chroma_qp_index_offset : 5; // 32 11..15 + + unsigned weighted_bipred_idc : 2; // 34 0..1 + unsigned fifo_dec_index : 7; // 34 2..8 + unsigned tmp_idx : 5; // 34 9..13 + unsigned frame_number : 16; // 34 14..29 + unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30] + unsigned u34_3131 : 1; // 34 31..31 pad? + + uint32_t field_order_cnt[2]; // 38, 3c + + struct { // 40 + // 0x00223102 + // nfi (needs: top_is_reference, bottom_is_reference, is_long_term, maybe some other state that was saved.. + unsigned fifo_idx : 7; // 00 0..6 + unsigned tmp_idx : 5; // 00 7..11 + unsigned unk12 : 1; // 00 12 not seen yet, but set, maybe top_is_reference + unsigned unk13 : 1; // 00 13 not seen yet, but set, maybe bottom_is_reference? + unsigned unk14 : 1; // 00 14 skipped? + unsigned notseenyet : 1; // 00 15 pad? + unsigned unk16 : 1; // 00 16 + unsigned unk17 : 4; // 00 17..20 + unsigned unk21 : 4; // 00 21..24 + unsigned pad : 7; // 00 d25..31 + + uint32_t field_order_cnt[2]; // 04,08 + uint32_t frame_idx; // 0c + } refs[0x10]; + + uint8_t m4x4[6][16]; // 140 + uint8_t m8x8[2][64]; // 1a0 + uint32_t u220; // 220 number of extra reorder_list to append? + uint8_t u224[0x20]; // 224..244 reorder_list append ? + uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read +}; + +static void +nvc0_decoder_handle_references(struct nvc0_decoder *dec, struct nvc0_video_buffer *refs[16], unsigned seq, struct nvc0_video_buffer *target) +{ + unsigned h264 = u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_CODEC_MPEG4_AVC; + unsigned i, idx, empty_spot = dec->base.max_references + 1; + for (i = 0; i < dec->base.max_references; ++i) { + if (!refs[i]) + continue; + + idx = refs[i]->valid_ref; + //debug_printf("ref[%i] %p in slot %i\n", i, refs[i], idx); + assert(target != refs[i] || + (h264 && empty_spot && + (!dec->refs[idx].decoded_bottom || !dec->refs[idx].decoded_top))); + if (target == refs[i]) + empty_spot = 0; + assert(!h264 || + dec->refs[idx].last_used == seq - 1); + + if (dec->refs[idx].vidbuf != refs[i]) { + debug_printf("%p is not a real ref\n", refs[i]); + // FIXME: Maybe do m2mf copy here if a application really depends on it? + continue; + } + + assert(dec->refs[idx].vidbuf == refs[i]); + dec->refs[idx].last_used = seq; + } + if (!empty_spot) + return; + + /* Try to find a real empty spot first, there should be one.. + */ + for (i = 0; i < dec->base.max_references + 1; ++i) { + if (dec->refs[i].last_used < seq) { + if (!dec->refs[i].vidbuf) { + empty_spot = i; + break; + } + if (empty_spot < dec->base.max_references+1 && + dec->refs[empty_spot].last_used < dec->refs[i].last_used) + continue; + empty_spot = i; + } + } + assert(empty_spot < dec->base.max_references+1); + dec->refs[empty_spot].last_used = seq; +// debug_printf("Kicked %p to add %p to slot %i\n", dec->refs[empty_spot].vidbuf, target, i); + dec->refs[empty_spot].vidbuf = target; + dec->refs[empty_spot].decoded_bottom = dec->refs[empty_spot].decoded_top = 0; + target->valid_ref = empty_spot; +} + +static void +nvc0_decoder_kick_ref(struct nvc0_decoder *dec, struct nvc0_video_buffer *target) +{ + dec->refs[target->valid_ref].vidbuf = NULL; + dec->refs[target->valid_ref].last_used = 0; +// debug_printf("Unreffed %p\n", target); +} + +static uint32_t +nvc0_decoder_fill_picparm_mpeg12_vp(struct nvc0_decoder *dec, + struct pipe_mpeg12_picture_desc *desc, + struct nvc0_video_buffer *refs[16], + unsigned *is_ref, + char *map) +{ + struct mpeg12_picparm_vp pic_vp_stub = {}, *pic_vp = &pic_vp_stub; + uint32_t i, ret = 0x01010, ring; // !async_shutdown << 16 | watchdog << 12 | irq_record << 4 | unk; + assert(!(dec->base.width & 0xf)); + *is_ref = desc->picture_coding_type <= 2; + + if (dec->base.profile == PIPE_VIDEO_PROFILE_MPEG1) + pic_vp->picture_structure = 3; + else + pic_vp->picture_structure = desc->picture_structure; + + assert(desc->picture_structure != 4); + if (desc->picture_structure == 4) // Untested, but should work + ret |= 0x100; + pic_vp->width = mb(dec->base.width); + pic_vp->height = mb(dec->base.height); + pic_vp->unk08 = pic_vp->unk04 = (dec->base.width+0xf)&~0xf; // Stride + + nvc0_decoder_ycbcr_offsets(dec, &pic_vp->ofs[1], &pic_vp->ofs[3], &pic_vp->ofs[4]); + pic_vp->ofs[5] = pic_vp->ofs[3]; + pic_vp->ofs[0] = pic_vp->ofs[2] = 0; + nvc0_decoder_inter_sizes(dec, 1, &ring, &pic_vp->bucket_size, &pic_vp->inter_ring_data_size); + + pic_vp->alternate_scan = desc->alternate_scan; + pic_vp->pad2[0] = pic_vp->pad2[1] = pic_vp->pad2[2] = 0; + pic_vp->unk30 = desc->picture_structure < 3 && (desc->picture_structure == 2 - desc->top_field_first); + pic_vp->unk3a = (desc->picture_coding_type == 1); + for (i = 0; i < 4; ++i) + pic_vp->f_code[i] = desc->f_code[i/2][i%2] + 1; // FU + pic_vp->picture_coding_type = desc->picture_coding_type; + pic_vp->intra_dc_precision = desc->intra_dc_precision; + pic_vp->q_scale_type = desc->q_scale_type; + pic_vp->top_field_first = desc->top_field_first; + pic_vp->full_pel_forward_vector = desc->full_pel_forward_vector; + pic_vp->full_pel_backward_vector = desc->full_pel_backward_vector; + memcpy(pic_vp->intra_quantizer_matrix, desc->intra_matrix, 0x40); + memcpy(pic_vp->non_intra_quantizer_matrix, desc->non_intra_matrix, 0x40); + memcpy(map, pic_vp, sizeof(*pic_vp)); + refs[0] = (struct nvc0_video_buffer *)desc->ref[0]; + refs[!!refs[0]] = (struct nvc0_video_buffer *)desc->ref[1]; + return ret | (dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1); +} + +static uint32_t +nvc0_decoder_fill_picparm_mpeg4_vp(struct nvc0_decoder *dec, + struct pipe_mpeg4_picture_desc *desc, + struct nvc0_video_buffer *refs[16], + unsigned *is_ref, + char *map) +{ + struct mpeg4_picparm_vp pic_vp_stub = {}, *pic_vp = &pic_vp_stub; + uint32_t ring, ret = 0x01014; // !async_shutdown << 16 | watchdog << 12 | irq_record << 4 | unk; + assert(!(dec->base.width & 0xf)); + *is_ref = desc->vop_coding_type <= 1; + + pic_vp->width = dec->base.width; + pic_vp->height = mb(dec->base.height)<<4; + pic_vp->unk0c = pic_vp->unk08 = mb(dec->base.width)<<4; // Stride + + nvc0_decoder_ycbcr_offsets(dec, &pic_vp->ofs[1], &pic_vp->ofs[3], &pic_vp->ofs[4]); + pic_vp->ofs[5] = pic_vp->ofs[3]; + pic_vp->ofs[0] = pic_vp->ofs[2] = 0; + pic_vp->pad1 = pic_vp->pad2 = 0; + nvc0_decoder_inter_sizes(dec, 1, &ring, &pic_vp->bucket_size, &pic_vp->inter_ring_data_size); + + pic_vp->trd[0] = desc->trd[0]; + pic_vp->trd[1] = desc->trd[1]; + pic_vp->trb[0] = desc->trb[0]; + pic_vp->trb[1] = desc->trb[1]; + pic_vp->u48 = 0; // Codec? + pic_vp->pad1 = pic_vp->pad2 = 0; + pic_vp->f_code_fw = desc->vop_fcode_forward; + pic_vp->f_code_bw = desc->vop_fcode_backward; + pic_vp->interlaced = desc->interlaced; + pic_vp->quant_type = desc->quant_type; + pic_vp->quarter_sample = desc->quarter_sample; + pic_vp->short_video_header = desc->short_video_header; + pic_vp->u54 = 0; + pic_vp->vop_coding_type = desc->vop_coding_type; + pic_vp->rounding_control = desc->rounding_control; + pic_vp->alternate_vertical_scan_flag = desc->alternate_vertical_scan_flag; + pic_vp->top_field_first = desc->top_field_first; + + memcpy(pic_vp->intra, desc->intra_matrix, 0x40); + memcpy(pic_vp->non_intra, desc->non_intra_matrix, 0x40); + memcpy(map, pic_vp, sizeof(*pic_vp)); + refs[0] = (struct nvc0_video_buffer *)desc->ref[0]; + refs[!!refs[0]] = (struct nvc0_video_buffer *)desc->ref[1]; + return ret; +} + +static uint32_t +nvc0_decoder_fill_picparm_h264_vp(struct nvc0_decoder *dec, + const struct pipe_h264_picture_desc *d, + struct nvc0_video_buffer *refs[16], + unsigned *is_ref, + char *map) +{ + struct h264_picparm_vp stub_h = {}, *h = &stub_h; + unsigned ring, i, j = 0; + assert(offsetof(struct h264_picparm_vp, u224) == 0x224); + *is_ref = d->is_reference; + assert(!d->frame_num || dec->last_frame_num + 1 == d->frame_num || dec->last_frame_num == d->frame_num); + dec->last_frame_num = d->frame_num; + + h->width = mb(dec->base.width); + h->height = mb(dec->base.height); + h->stride1 = h->stride2 = mb(dec->base.width)*16; + nvc0_decoder_ycbcr_offsets(dec, &h->ofs[1], &h->ofs[3], &h->ofs[4]); + h->ofs[5] = h->ofs[3]; + h->ofs[0] = h->ofs[2] = 0; + h->u24 = dec->tmp_stride >> 8; + assert(h->u24); + nvc0_decoder_inter_sizes(dec, 1, &ring, &h->bucket_size, &h->inter_ring_data_size); + + h->u220 = 0; + h->f0 = d->mb_adaptive_frame_field_flag; + h->f1 = d->direct_8x8_inference_flag; + h->weighted_pred_flag = d->weighted_pred_flag; + h->f3 = d->constrained_intra_pred_flag; + h->is_reference = d->is_reference; + h->interlace = d->field_pic_flag; + h->bottom_field_flag = d->bottom_field_flag; + h->f7 = 0; // TODO: figure out when set.. + h->log2_max_frame_num_minus4 = d->log2_max_frame_num_minus4; + h->u31_45 = 1; + + h->pic_order_cnt_type = d->pic_order_cnt_type; + h->pic_init_qp_minus26 = d->pic_init_qp_minus26; + h->chroma_qp_index_offset = d->chroma_qp_index_offset; + h->second_chroma_qp_index_offset = d->second_chroma_qp_index_offset; + h->weighted_bipred_idc = d->weighted_bipred_idc; + h->tmp_idx = 0; // set in h264_vp_refs below + h->fifo_dec_index = 0; // always set to 0 to be fifo compatible with other codecs + h->frame_number = d->frame_num; + h->u34_3030 = h->u34_3131 = 0; + h->field_order_cnt[0] = d->field_order_cnt[0]; + h->field_order_cnt[1] = d->field_order_cnt[1]; + memset(h->refs, 0, sizeof(h->refs)); + memcpy(h->m4x4, d->scaling_lists_4x4, sizeof(h->m4x4) + sizeof(h->m8x8)); + h->u220 = 0; + for (i = 0; i < d->num_ref_frames; ++i) { + if (!d->ref[i]) + break; + refs[j] = (struct nvc0_video_buffer *)d->ref[i]; + h->refs[j].fifo_idx = j + 1; + h->refs[j].tmp_idx = refs[j]->valid_ref; + h->refs[j].field_order_cnt[0] = d->field_order_cnt_list[i][0]; + h->refs[j].field_order_cnt[1] = d->field_order_cnt_list[i][1]; + h->refs[j].frame_idx = d->frame_num_list[i]; + if (!dec->refs[refs[j]->valid_ref].field_pic_flag) { + h->refs[j].unk12 = d->top_is_reference[i]; + h->refs[j].unk13 = d->bottom_is_reference[i]; + } + h->refs[j].unk14 = 0; + h->refs[j].notseenyet = 0; + h->refs[j].unk16 = dec->refs[refs[j]->valid_ref].field_pic_flag; + h->refs[j].unk17 = dec->refs[refs[j]->valid_ref].decoded_top && + d->top_is_reference[i]; + h->refs[j].unk21 = dec->refs[refs[j]->valid_ref].decoded_bottom && + d->bottom_is_reference[i]; + h->refs[j].pad = 0; + assert(!d->is_long_term[i]); + j++; + } + for (; i < 16; ++i) + assert(!d->ref[i]); + assert(d->num_ref_frames <= dec->base.max_references); + + for (; i < d->num_ref_frames; ++i) + h->refs[j].unk16 = d->field_pic_flag; + *(struct h264_picparm_vp *)map = *h; + + return 0x1113; +} + +static void +nvc0_decoder_fill_picparm_h264_vp_refs(struct nvc0_decoder *dec, + struct pipe_h264_picture_desc *d, + struct nvc0_video_buffer *refs[16], + struct nvc0_video_buffer *target, + char *map) +{ + struct h264_picparm_vp *h = (struct h264_picparm_vp *)map; + assert(dec->refs[target->valid_ref].vidbuf == target); +// debug_printf("Target: %p\n", target); + + h->tmp_idx = target->valid_ref; + dec->refs[target->valid_ref].field_pic_flag = d->field_pic_flag; + if (!d->field_pic_flag || d->bottom_field_flag) + dec->refs[target->valid_ref].decoded_bottom = 1; + if (!d->field_pic_flag || !d->bottom_field_flag) + dec->refs[target->valid_ref].decoded_top = 1; +} + +static uint32_t +nvc0_decoder_fill_picparm_vc1_vp(struct nvc0_decoder *dec, + struct pipe_vc1_picture_desc *d, + struct nvc0_video_buffer *refs[16], + unsigned *is_ref, + char *map) +{ + struct vc1_picparm_vp *vc = (struct vc1_picparm_vp *)map; + unsigned ring; + assert(dec->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE); + *is_ref = d->picture_type <= 1; + + nvc0_decoder_ycbcr_offsets(dec, &vc->ofs[1], &vc->ofs[3], &vc->ofs[4]); + vc->ofs[5] = vc->ofs[3]; + vc->ofs[0] = vc->ofs[2] = 0; + vc->width = dec->base.width; + vc->height = mb(dec->base.height)<<4; + vc->unk0c = vc->unk10 = mb(dec->base.width)<<4; // Stride + vc->pad = vc->pad2 = 0; + nvc0_decoder_inter_sizes(dec, 1, &ring, &vc->bucket_size, &vc->inter_ring_data_size); + vc->profile = dec->base.profile - PIPE_VIDEO_PROFILE_VC1_SIMPLE; + vc->loopfilter = d->loopfilter; + vc->fastuvmc = d->fastuvmc; + vc->dquant = d->dquant; + vc->overlap = d->overlap; + vc->quantizer = d->quantizer; + vc->u36 = 0; // ? No idea what this one is.. + refs[0] = (struct nvc0_video_buffer *)d->ref[0]; + refs[!!refs[0]] = (struct nvc0_video_buffer *)d->ref[1]; + return 0x12; +} + +#if NVC0_DEBUG_FENCE +static void dump_comm_vp(struct nvc0_decoder *dec, struct comm *comm, u32 comm_seq, + struct nouveau_bo *inter_bo, unsigned slice_size) +{ + unsigned i, idx = comm->pvp_cur_index & 0xf; + debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); +#if 0 + debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); + debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); + + for (i = 0; i != comm->irq_index; ++i) + debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); + for (i = 0; i != comm->parse_endpos_index; ++i) + debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); +#endif + debug_printf("mb_y = %u\n", comm->mb_y[idx]); + if (comm->status_vp[idx] == 1) + return; + + if ((comm->pvp_stage & 0xff) != 0xff) { + unsigned *map; + assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0); + map = inter_bo->map; + for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { + debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); + } + munmap(inter_bo->map, inter_bo->size); + inter_bo->map = NULL; + } + assert((comm->pvp_stage & 0xff) == 0xff); +} +#endif + +void nvc0_decoder_vp_caps(struct nvc0_decoder *dec, union pipe_desc desc, + struct nvc0_video_buffer *target, unsigned comm_seq, + unsigned *caps, unsigned *is_ref, + struct nvc0_video_buffer *refs[16]) +{ + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH]; + enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile); + char *vp = bsp_bo->map + VP_OFFSET; + + switch (codec){ + case PIPE_VIDEO_CODEC_MPEG12: + *caps = nvc0_decoder_fill_picparm_mpeg12_vp(dec, desc.mpeg12, refs, is_ref, vp); + nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target); + return; + case PIPE_VIDEO_CODEC_MPEG4: + *caps = nvc0_decoder_fill_picparm_mpeg4_vp(dec, desc.mpeg4, refs, is_ref, vp); + nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target); + return; + case PIPE_VIDEO_CODEC_VC1: { + *caps = nvc0_decoder_fill_picparm_vc1_vp(dec, desc.vc1, refs, is_ref, vp); + nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target); + return; + } + case PIPE_VIDEO_CODEC_MPEG4_AVC: { + *caps = nvc0_decoder_fill_picparm_h264_vp(dec, desc.h264, refs, is_ref, vp); + nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target); + nvc0_decoder_fill_picparm_h264_vp_refs(dec, desc.h264, refs, target, vp); + return; + } + default: assert(0); return; + } +} + +void +nvc0_decoder_vp(struct nvc0_decoder *dec, union pipe_desc desc, + struct nvc0_video_buffer *target, unsigned comm_seq, + unsigned caps, unsigned is_ref, + struct nvc0_video_buffer *refs[16]) +{ + struct nouveau_pushbuf *push = dec->pushbuf[1]; + uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr; + uint32_t slice_size, bucket_size, ring_size, i; + enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile); + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH]; + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; + u32 fence_extra = 0, codec_extra = 0; + struct nouveau_pushbuf_refn bo_refs[] = { + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, +#ifdef NVC0_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + { dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo; + +#if NVC0_DEBUG_FENCE + fence_extra = 4; +#endif + + if (codec == PIPE_VIDEO_CODEC_MPEG4_AVC) { + nvc0_decoder_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); + codec_extra += 2; + } else + nvc0_decoder_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size); + + if (dec->base.max_references > 2) + codec_extra += 1 + (dec->base.max_references - 2); + + pic_addr[16] = nvc0_video_addr(dec, target) >> 8; + last_addr = null_addr = nvc0_video_addr(dec, NULL) >> 8; + + for (i = 0; i < dec->base.max_references; ++i) { + if (!refs[i]) + pic_addr[i] = last_addr; + else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i]) + last_addr = pic_addr[i] = nvc0_video_addr(dec, refs[i]) >> 8; + else + pic_addr[i] = null_addr; + } + if (!is_ref) + nvc0_decoder_kick_ref(dec, target); + + PUSH_SPACE(push, 8 + 3 * (codec != PIPE_VIDEO_CODEC_MPEG12) + + 6 + codec_extra + fence_extra + 2); + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + bsp_addr = bsp_bo->offset >> 8; +#if NVC0_DEBUG_FENCE + comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8; +#else + comm_addr = bsp_addr + (COMM_OFFSET>>8); +#endif + inter_addr = inter_bo->offset >> 8; + if (dec->fw_bo) + ucode_addr = dec->fw_bo->offset >> 8; + else + ucode_addr = 0; + + BEGIN_NVC0(push, SUBC_VP(0x700), 7); + PUSH_DATA (push, caps); // 700 + PUSH_DATA (push, comm_seq); // 704 + PUSH_DATA (push, 0); // 708 fuc targets, ignored for nvc0 + PUSH_DATA (push, dec->fw_sizes); // 70c + PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr + PUSH_DATA (push, inter_addr); // 714 inter_parm + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs + + if (bucket_size) { + uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2); + + BEGIN_NVC0(push, SUBC_VP(0x71c), 2); + PUSH_DATA (push, tmpimg_addr >> 8); // 71c + PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs + } + + BEGIN_NVC0(push, SUBC_VP(0x724), 5); + PUSH_DATA (push, comm_addr); // 724 + PUSH_DATA (push, ucode_addr); // 728 + PUSH_DATA (push, pic_addr[16]); // 734 + PUSH_DATA (push, pic_addr[0]); // 72c + PUSH_DATA (push, pic_addr[1]); // 730 + + if (dec->base.max_references > 2) { + int i; + + BEGIN_NVC0(push, SUBC_VP(0x400), dec->base.max_references - 2); + for (i = 2; i < dec->base.max_references; ++i) { + assert(0x400 + (i - 2) * 4 < 0x438); + PUSH_DATA (push, pic_addr[i]); + } + } + + if (codec == PIPE_VIDEO_CODEC_MPEG4_AVC) { + BEGIN_NVC0(push, SUBC_VP(0x438), 1); + PUSH_DATA (push, desc.h264->slice_count); + } + + //debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]); + +#if NVC0_DEBUG_FENCE + BEGIN_NVC0(push, SUBC_VP(0x240), 3); + PUSH_DATAh(push, (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push, (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NVC0(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK(push); + + { + unsigned spin = 0; + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) { + debug_printf("vp%u: %u\n", dec->fence_seq, dec->fence_map[4]); + dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8); + } + } while (dec->fence_seq > dec->fence_map[4]); + } + dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8); +#else + BEGIN_NVC0(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); +#endif +}