5180 lines
199 KiB
C
5180 lines
199 KiB
C
/*
|
|
* Copyright © 2022 Imagination Technologies Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include "pvr_device_info.h"
|
|
#include "pvr_pds.h"
|
|
#include "pvr_rogue_pds_defs.h"
|
|
#include "pvr_rogue_pds_disasm.h"
|
|
#include "pvr_rogue_pds_encode.h"
|
|
#include "util/log.h"
|
|
#include "util/macros.h"
|
|
|
|
#define H32(X) (uint32_t)((((X) >> 32U) & 0xFFFFFFFFUL))
|
|
#define L32(X) (uint32_t)(((X)&0xFFFFFFFFUL))
|
|
|
|
/*****************************************************************************
|
|
Macro definitions
|
|
*****************************************************************************/
|
|
|
|
#define PVR_PDS_DWORD_SHIFT 2
|
|
|
|
#define PVR_PDS_CONSTANTS_BLOCK_BASE 0
|
|
#define PVR_PDS_CONSTANTS_BLOCK_SIZE 128
|
|
#define PVR_PDS_TEMPS_BLOCK_BASE 128
|
|
#define PVR_PDS_TEMPS_BLOCK_SIZE 32
|
|
|
|
#define PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE PVR_ROGUE_PDSINST_ST_COUNT4_MASK
|
|
#define PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE PVR_ROGUE_PDSINST_LD_COUNT8_MASK
|
|
|
|
/* Map PDS temp registers to the CDM values they contain Work-group IDs are only
|
|
* available in the coefficient sync task.
|
|
*/
|
|
#define PVR_PDS_CDM_WORK_GROUP_ID_X 0
|
|
#define PVR_PDS_CDM_WORK_GROUP_ID_Y 1
|
|
#define PVR_PDS_CDM_WORK_GROUP_ID_Z 2
|
|
/* Local IDs are available in every task. */
|
|
#define PVR_PDS_CDM_LOCAL_ID_X 0
|
|
#define PVR_PDS_CDM_LOCAL_ID_YZ 1
|
|
|
|
#define PVR_PDS_DOUTW_LOWER32 0x0
|
|
#define PVR_PDS_DOUTW_UPPER32 0x1
|
|
#define PVR_PDS_DOUTW_LOWER64 0x2
|
|
#define PVR_PDS_DOUTW_LOWER128 0x3
|
|
#define PVR_PDS_DOUTW_MAXMASK 0x4
|
|
|
|
#define ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE 8U
|
|
#define PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE (16U)
|
|
|
|
/*****************************************************************************
|
|
Static variables
|
|
*****************************************************************************/
|
|
|
|
static const uint32_t dword_mask_const[PVR_PDS_DOUTW_MAXMASK] = {
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64
|
|
};
|
|
|
|
/* If has_slc_mcu_cache_control is enabled use cache_control_const[0], else use
|
|
* cache_control_const[1].
|
|
*/
|
|
static const uint32_t cache_control_const[2][2] = {
|
|
{ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED },
|
|
{ 0, 0 }
|
|
};
|
|
|
|
/*****************************************************************************
|
|
Function definitions
|
|
*****************************************************************************/
|
|
|
|
uint64_t pvr_pds_encode_ld_src0(uint64_t dest,
|
|
uint64_t count8,
|
|
uint64_t src_add,
|
|
bool cached,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
uint64_t encoded = 0;
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
|
|
encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED
|
|
: PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS);
|
|
}
|
|
|
|
encoded |= ((src_add & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
|
|
<< PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
|
|
encoded |= ((count8 & PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
|
|
<< PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
|
|
encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED
|
|
: PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS);
|
|
encoded |= ((dest & PVR_ROGUE_PDSINST_REGS64TP_MASK)
|
|
<< PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
|
|
|
|
return encoded;
|
|
}
|
|
|
|
uint64_t pvr_pds_encode_st_src0(uint64_t src,
|
|
uint64_t count4,
|
|
uint64_t dst_add,
|
|
bool write_through,
|
|
const struct pvr_device_info *device_info)
|
|
{
|
|
uint64_t encoded = 0;
|
|
|
|
if (device_info->features.has_slc_mcu_cache_controls) {
|
|
encoded |= (write_through
|
|
? PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH
|
|
: PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK);
|
|
}
|
|
|
|
encoded |= ((dst_add & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
|
|
<< PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
|
|
encoded |= ((count4 & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
|
|
<< PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
|
|
encoded |= (write_through ? PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH
|
|
: PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK);
|
|
encoded |= ((src & PVR_ROGUE_PDSINST_REGS32TP_MASK)
|
|
<< PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
|
|
|
|
return encoded;
|
|
}
|
|
|
|
static ALWAYS_INLINE uint32_t
|
|
pvr_pds_encode_doutw_src1(uint32_t dest,
|
|
uint32_t dword_mask,
|
|
uint32_t flags,
|
|
bool cached,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
assert(((dword_mask > PVR_PDS_DOUTW_LOWER64) && ((dest & 3) == 0)) ||
|
|
((dword_mask == PVR_PDS_DOUTW_LOWER64) && ((dest & 1) == 0)) ||
|
|
(dword_mask < PVR_PDS_DOUTW_LOWER64));
|
|
|
|
uint32_t encoded =
|
|
(dest << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT);
|
|
|
|
encoded |= dword_mask_const[dword_mask];
|
|
|
|
encoded |= flags;
|
|
|
|
encoded |=
|
|
cache_control_const[PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) ? 0
|
|
: 1]
|
|
[cached ? 1 : 0];
|
|
return encoded;
|
|
}
|
|
|
|
static ALWAYS_INLINE uint32_t pvr_pds_encode_doutw64(uint32_t cc,
|
|
uint32_t end,
|
|
uint32_t src1,
|
|
uint32_t src0)
|
|
{
|
|
return pvr_pds_inst_encode_dout(cc,
|
|
end,
|
|
src1,
|
|
src0,
|
|
PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
|
|
}
|
|
|
|
static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
|
|
uint32_t end,
|
|
uint32_t src0)
|
|
{
|
|
return pvr_pds_inst_encode_dout(cc,
|
|
end,
|
|
0,
|
|
src0,
|
|
PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
|
|
}
|
|
|
|
static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_doutc(uint32_t cc,
|
|
uint32_t end)
|
|
{
|
|
return pvr_pds_inst_encode_dout(cc,
|
|
end,
|
|
0,
|
|
0,
|
|
PVR_ROGUE_PDSINST_DSTDOUT_DOUTC);
|
|
}
|
|
|
|
static ALWAYS_INLINE uint32_t pvr_pds_encode_doutd(uint32_t cc,
|
|
uint32_t end,
|
|
uint32_t src1,
|
|
uint32_t src0)
|
|
{
|
|
return pvr_pds_inst_encode_dout(cc,
|
|
end,
|
|
src1,
|
|
src0,
|
|
PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
|
|
}
|
|
|
|
static ALWAYS_INLINE uint32_t pvr_pds_encode_douti(uint32_t cc,
|
|
uint32_t end,
|
|
uint32_t src0)
|
|
{
|
|
return pvr_pds_inst_encode_dout(cc,
|
|
end,
|
|
0,
|
|
src0,
|
|
PVR_ROGUE_PDSINST_DSTDOUT_DOUTI);
|
|
}
|
|
|
|
static ALWAYS_INLINE uint32_t pvr_pds_encode_doutv(uint32_t cc,
|
|
uint32_t end,
|
|
uint32_t src1,
|
|
uint32_t src0)
|
|
{
|
|
return pvr_pds_inst_encode_dout(cc,
|
|
end,
|
|
src1,
|
|
src0,
|
|
PVR_ROGUE_PDSINST_DSTDOUT_DOUTV);
|
|
}
|
|
|
|
static ALWAYS_INLINE uint32_t pvr_pds_encode_bra(uint32_t srcc,
|
|
uint32_t neg,
|
|
uint32_t setc,
|
|
int32_t relative_address)
|
|
{
|
|
/* Address should be signed but API only allows unsigned value. */
|
|
return pvr_pds_inst_encode_bra(srcc, neg, setc, (uint32_t)relative_address);
|
|
}
|
|
|
|
/**
|
|
* Gets the next constant address and moves the next constant pointer along.
|
|
*
|
|
* \param next_constant Pointer to the next constant address.
|
|
* \param num_constants The number of constants required.
|
|
* \param count The number of constants allocated.
|
|
* \return The address of the next constant.
|
|
*/
|
|
static uint32_t pvr_pds_get_constants(uint32_t *next_constant,
|
|
uint32_t num_constants,
|
|
uint32_t *count)
|
|
{
|
|
uint32_t constant;
|
|
|
|
/* Work out starting constant number. For even number of constants, start on
|
|
* a 64-bit boundary.
|
|
*/
|
|
if (num_constants & 1)
|
|
constant = *next_constant;
|
|
else
|
|
constant = (*next_constant + 1) & ~1;
|
|
|
|
/* Update the count with the number of constants actually allocated. */
|
|
*count += constant + num_constants - *next_constant;
|
|
|
|
/* Move the next constant pointer. */
|
|
*next_constant = constant + num_constants;
|
|
|
|
assert((constant + num_constants) <= PVR_PDS_CONSTANTS_BLOCK_SIZE);
|
|
|
|
return constant;
|
|
}
|
|
|
|
/**
|
|
* Gets the next temp address and moves the next temp pointer along.
|
|
*
|
|
* \param next_temp Pointer to the next temp address.
|
|
* \param num_temps The number of temps required.
|
|
* \param count The number of temps allocated.
|
|
* \return The address of the next temp.
|
|
*/
|
|
static uint32_t
|
|
pvr_pds_get_temps(uint32_t *next_temp, uint32_t num_temps, uint32_t *count)
|
|
{
|
|
uint32_t temp;
|
|
|
|
/* Work out starting temp number. For even number of temps, start on a
|
|
* 64-bit boundary.
|
|
*/
|
|
if (num_temps & 1)
|
|
temp = *next_temp;
|
|
else
|
|
temp = (*next_temp + 1) & ~1;
|
|
|
|
/* Update the count with the number of temps actually allocated. */
|
|
*count += temp + num_temps - *next_temp;
|
|
|
|
/* Move the next temp pointer. */
|
|
*next_temp = temp + num_temps;
|
|
|
|
assert((temp + num_temps) <=
|
|
(PVR_PDS_TEMPS_BLOCK_SIZE + PVR_PDS_TEMPS_BLOCK_BASE));
|
|
|
|
return temp;
|
|
}
|
|
|
|
/**
|
|
* Write a 32-bit constant indexed by the long range.
|
|
*
|
|
* \param data_block Pointer to data block to write to.
|
|
* \param index Index within the data to write to.
|
|
* \param dword The 32-bit constant to write.
|
|
*/
|
|
static void
|
|
pvr_pds_write_constant32(uint32_t *data_block, uint32_t index, uint32_t dword0)
|
|
{
|
|
/* Check range. */
|
|
assert(index <= (PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER -
|
|
PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER));
|
|
|
|
data_block[index + 0] = dword0;
|
|
|
|
PVR_PDS_PRINT_DATA("WriteConstant32", (uint64_t)dword0, index);
|
|
}
|
|
|
|
/**
|
|
* Write a 64-bit constant indexed by the long range.
|
|
*
|
|
* \param data_block Pointer to data block to write to.
|
|
* \param index Index within the data to write to.
|
|
* \param dword0 Lower half of the 64 bit constant.
|
|
* \param dword1 Upper half of the 64 bit constant.
|
|
*/
|
|
static void pvr_pds_write_constant64(uint32_t *data_block,
|
|
uint32_t index,
|
|
uint32_t dword0,
|
|
uint32_t dword1)
|
|
{
|
|
/* Has to be on 64 bit boundary. */
|
|
assert((index & 1) == 0);
|
|
|
|
/* Check range. */
|
|
assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
|
|
PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
|
|
|
|
data_block[index + 0] = dword0;
|
|
data_block[index + 1] = dword1;
|
|
|
|
PVR_PDS_PRINT_DATA("WriteConstant64",
|
|
((uint64_t)dword0 << 32) | (uint64_t)dword1,
|
|
index);
|
|
}
|
|
|
|
/**
|
|
* Write a 64-bit constant from a single wide word indexed by the long-range
|
|
* number.
|
|
*
|
|
* \param data_block Pointer to data block to write to.
|
|
* \param index Index within the data to write to.
|
|
* \param word The 64-bit constant to write.
|
|
*/
|
|
|
|
static void
|
|
pvr_pds_write_wide_constant(uint32_t *data_block, uint32_t index, uint64_t word)
|
|
{
|
|
/* Has to be on 64 bit boundary. */
|
|
assert((index & 1) == 0);
|
|
|
|
/* Check range. */
|
|
assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
|
|
PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
|
|
|
|
data_block[index + 0] = L32(word);
|
|
data_block[index + 1] = H32(word);
|
|
|
|
PVR_PDS_PRINT_DATA("WriteWideConstant", word, index);
|
|
}
|
|
|
|
static void pvr_pds_write_dma_address(uint32_t *data_block,
|
|
uint32_t index,
|
|
uint64_t address,
|
|
bool coherent,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
/* Has to be on 64 bit boundary. */
|
|
assert((index & 1) == 0);
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
|
|
address |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
|
|
|
|
/* Check range. */
|
|
assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
|
|
PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
|
|
|
|
data_block[index + 0] = L32(address);
|
|
data_block[index + 1] = H32(address);
|
|
|
|
PVR_PDS_PRINT_DATA("WriteDMAAddress", address, index);
|
|
}
|
|
|
|
/**
|
|
* External API to append a 64-bit constant to an existing data segment
|
|
* allocation.
|
|
*
|
|
* \param constants Pointer to start of data segment.
|
|
* \param constant_value Value to write to constant.
|
|
* \param data_size The number of constants allocated.
|
|
* \returns The address of the next constant.
|
|
*/
|
|
uint32_t pvr_pds_append_constant64(uint32_t *constants,
|
|
uint64_t constant_value,
|
|
uint32_t *data_size)
|
|
{
|
|
/* Calculate next constant from current data size. */
|
|
uint32_t next_constant = *data_size;
|
|
uint32_t constant = pvr_pds_get_constants(&next_constant, 2, data_size);
|
|
|
|
/* Set the value. */
|
|
pvr_pds_write_wide_constant(constants, constant, constant_value);
|
|
|
|
return constant;
|
|
}
|
|
|
|
void pvr_pds_pixel_shader_sa_initialize(
|
|
struct pvr_pds_pixel_shader_sa_program *program)
|
|
{
|
|
memset(program, 0, sizeof(*program));
|
|
}
|
|
|
|
/**
|
|
* Encode a DMA burst.
|
|
*
|
|
* \param dma_control DMA control words.
|
|
* \param dma_address DMA address.
|
|
* \param dest_offset Destination offset in the attribute.
|
|
* \param dma_size The size of the DMA in words.
|
|
* \param src_address Source address for the burst.
|
|
* \param dev_info PVR device info structure.
|
|
* \returns The number of DMA transfers required.
|
|
*/
|
|
|
|
uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
|
|
uint64_t *dma_address,
|
|
uint32_t dest_offset,
|
|
uint32_t dma_size,
|
|
uint64_t src_address,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
/* Simplified for MS2. */
|
|
|
|
/* Force to 1 DMA. */
|
|
const uint32_t num_kicks = 1;
|
|
|
|
dma_control[0] = dma_size
|
|
<< PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
|
|
dma_control[0] |= dest_offset
|
|
<< PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
|
|
|
|
dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE;
|
|
|
|
dma_address[0] = src_address;
|
|
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
|
|
dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
|
|
}
|
|
|
|
return num_kicks;
|
|
}
|
|
|
|
/* FIXME: use the csbgen interface and pvr_csb_pack.
|
|
* FIXME: use bool for phase_rate_change.
|
|
*/
|
|
/**
|
|
* Sets up the USC control words for a DOUTU.
|
|
*
|
|
* \param usc_task_control USC task control structure to be setup.
|
|
* \param execution_address USC execution virtual address.
|
|
* \param usc_temps Number of USC temps.
|
|
* \param sample_rate Sample rate for the DOUTU.
|
|
* \param phase_rate_change Phase rate change for the DOUTU.
|
|
*/
|
|
void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
|
|
uint64_t execution_address,
|
|
uint32_t usc_temps,
|
|
uint32_t sample_rate,
|
|
uint32_t phase_rate_change)
|
|
{
|
|
usc_task_control->src0 = UINT64_C(0);
|
|
|
|
/* Set the execution address. */
|
|
pvr_set_usc_execution_address64(&(usc_task_control->src0),
|
|
execution_address);
|
|
|
|
if (usc_temps > 0) {
|
|
/* Temps are allocated in blocks of 4 dwords. */
|
|
usc_temps =
|
|
DIV_ROUND_UP(usc_temps,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE);
|
|
|
|
/* Check for losing temps due to too many requested. */
|
|
assert((usc_temps & PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK) ==
|
|
usc_temps);
|
|
|
|
usc_task_control->src0 |=
|
|
((uint64_t)(usc_temps &
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK))
|
|
<< PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT;
|
|
}
|
|
|
|
if (sample_rate > 0) {
|
|
usc_task_control->src0 |=
|
|
((uint64_t)sample_rate)
|
|
<< PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT;
|
|
}
|
|
|
|
if (phase_rate_change) {
|
|
usc_task_control->src0 |=
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generates the PDS pixel event program.
|
|
*
|
|
* \param program Pointer to the PDS pixel event program.
|
|
* \param buffer Pointer to the buffer for the program.
|
|
* \param gen_mode Generate either a data segment or code segment.
|
|
* \param dev_info PVR device info structure.
|
|
* \returns Pointer to just beyond the buffer for the program.
|
|
*/
|
|
uint32_t *
|
|
pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
|
uint32_t *constants = buffer;
|
|
|
|
uint32_t data_size = 0;
|
|
|
|
/* Copy the DMA control words and USC task control words to constants, then
|
|
* arrange them so that the 64-bit words are together followed by the 32-bit
|
|
* words.
|
|
*/
|
|
uint32_t control_constant =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
uint32_t emit_constant =
|
|
pvr_pds_get_constants(&next_constant,
|
|
(2 * program->num_emit_word_pairs),
|
|
&data_size);
|
|
|
|
uint32_t control_word_constant =
|
|
pvr_pds_get_constants(&next_constant,
|
|
program->num_emit_word_pairs,
|
|
&data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
/* Src0 for DOUTU. */
|
|
pvr_pds_write_wide_constant(buffer,
|
|
control_constant,
|
|
program->task_control.src0); /* DOUTU */
|
|
/* 64-bit Src0. */
|
|
|
|
/* Emit words for end of tile program. */
|
|
for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
|
|
pvr_pds_write_constant64(constants,
|
|
emit_constant + (2 * i),
|
|
program->emit_words[(2 * i) + 0],
|
|
program->emit_words[(2 * i) + 1]);
|
|
}
|
|
|
|
/* Control words. */
|
|
for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
|
|
uint32_t doutw = pvr_pds_encode_doutw_src1(
|
|
(2 * i),
|
|
PVR_PDS_DOUTW_LOWER64,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
|
false,
|
|
dev_info);
|
|
|
|
if (i == (program->num_emit_word_pairs - 1))
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
|
|
pvr_pds_write_constant32(constants, control_word_constant + i, doutw);
|
|
}
|
|
}
|
|
|
|
else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
/* DOUTW the state into the shared register. */
|
|
for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
|
|
*buffer++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 0,
|
|
/* END */ 0,
|
|
/* SRC1 */ (control_word_constant + i), /* DOUTW 32-bit Src1 */
|
|
/* SRC0 */ (emit_constant + (2 * i)) >> 1); /* DOUTW 64-bit Src0
|
|
*/
|
|
}
|
|
|
|
/* Kick the USC. */
|
|
*buffer++ = pvr_pds_encode_doutu(
|
|
/* cc */ 0,
|
|
/* END */ 1,
|
|
/* SRC0 */ control_constant >> 1);
|
|
}
|
|
|
|
uint32_t code_size = 1 + program->num_emit_word_pairs;
|
|
|
|
/* Save the data segment Pointer and size. */
|
|
program->data_segment = constants;
|
|
program->data_size = data_size;
|
|
program->code_size = code_size;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
|
|
return (constants + next_constant);
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
|
|
return buffer;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* Checks if any of the vertex streams contains instance data.
|
|
*
|
|
* \param streams Streams contained in the vertex shader.
|
|
* \param num_streams Number of vertex streams.
|
|
* \returns true if one or more of the given vertex streams contains
|
|
* instance data, otherwise false.
|
|
*/
|
|
static bool pvr_pds_vertex_streams_contains_instance_data(
|
|
const struct pvr_pds_vertex_stream *streams,
|
|
uint32_t num_streams)
|
|
{
|
|
for (uint32_t i = 0; i < num_streams; i++) {
|
|
const struct pvr_pds_vertex_stream *vertex_stream = &streams[i];
|
|
if (vertex_stream->instance_data)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static uint32_t pvr_pds_get_bank_based_constants(uint32_t num_backs,
|
|
uint32_t *next_constant,
|
|
uint32_t num_constants,
|
|
uint32_t *count)
|
|
{
|
|
/* Allocate constant for PDS vertex shader where constant is divided into
|
|
* banks.
|
|
*/
|
|
uint32_t constant;
|
|
|
|
assert(num_constants == 1 || num_constants == 2);
|
|
|
|
if (*next_constant >= (num_backs << 3))
|
|
return pvr_pds_get_constants(next_constant, num_constants, count);
|
|
|
|
if ((*next_constant % 8) == 0) {
|
|
constant = *next_constant;
|
|
|
|
if (num_constants == 1)
|
|
*next_constant += 1;
|
|
else
|
|
*next_constant += 8;
|
|
} else if (num_constants == 1) {
|
|
constant = *next_constant;
|
|
*next_constant += 7;
|
|
} else {
|
|
*next_constant += 7;
|
|
constant = *next_constant;
|
|
|
|
if (*next_constant >= (num_backs << 3)) {
|
|
*next_constant += 2;
|
|
*count += 2;
|
|
} else {
|
|
*next_constant += 8;
|
|
}
|
|
}
|
|
return constant;
|
|
}
|
|
|
|
/**
|
|
* Generates a PDS program to load USC vertex inputs based from one or more
|
|
* vertex buffers, each containing potentially multiple elements, and then a
|
|
* DOUTU to execute the USC.
|
|
*
|
|
* \param program Pointer to the description of the program which should be
|
|
* generated.
|
|
* \param buffer Pointer to buffer that receives the output of this function.
|
|
* Will either be the data segment or code segment depending on
|
|
* gen_mode.
|
|
* \param gen_mode Which part to generate, either data segment or
|
|
* code segment. If PDS_GENERATE_SIZES is specified, nothing is
|
|
* written, but size information in program is updated.
|
|
* \param dev_info PVR device info structure.
|
|
* \returns Pointer to just beyond the buffer for the data - i.e the value
|
|
* of the buffer after writing its contents.
|
|
*/
|
|
uint32_t *
|
|
pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
|
uint32_t next_stream_constant;
|
|
uint32_t next_temp;
|
|
uint32_t usc_control_constant64;
|
|
uint32_t stride_constant32 = 0;
|
|
uint32_t dma_address_constant64 = 0;
|
|
uint32_t dma_control_constant64;
|
|
uint32_t multiplier_constant32 = 0;
|
|
uint32_t base_instance_const32 = 0;
|
|
|
|
uint32_t temp = 0;
|
|
uint32_t index_temp64 = 0;
|
|
uint32_t num_vertices_temp64 = 0;
|
|
uint32_t pre_index_temp = (uint32_t)(-1);
|
|
bool first_ddmadt = true;
|
|
uint32_t input_register0;
|
|
uint32_t input_register1;
|
|
uint32_t input_register2;
|
|
|
|
struct pvr_pds_vertex_stream *vertex_stream;
|
|
struct pvr_pds_vertex_element *vertex_element;
|
|
uint32_t shift_2s_comp;
|
|
|
|
uint32_t data_size = 0;
|
|
uint32_t code_size = 0;
|
|
uint32_t temps_used = 0;
|
|
|
|
bool direct_writes_needed = false;
|
|
|
|
uint32_t consts_size = 0;
|
|
uint32_t vertex_id_control_word_const32 = 0;
|
|
uint32_t instance_id_control_word_const32 = 0;
|
|
uint32_t instance_id_modifier_word_const32 = 0;
|
|
uint32_t geometry_id_control_word_const64 = 0;
|
|
uint32_t empty_dma_control_constant64 = 0;
|
|
|
|
bool any_instanced_stream =
|
|
pvr_pds_vertex_streams_contains_instance_data(program->streams,
|
|
program->num_streams);
|
|
|
|
uint32_t base_instance_register = 0;
|
|
uint32_t ddmadt_enables = 0;
|
|
|
|
bool issue_empty_ddmad = false;
|
|
uint32_t last_stream_index = program->num_streams - 1;
|
|
bool current_p0 = false;
|
|
uint32_t skip_stream_flag = 0;
|
|
|
|
/* Generate the PDS vertex shader data. */
|
|
|
|
#if defined(DEBUG)
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
for (uint32_t i = 0; i < program->data_size; i++)
|
|
buffer[i] = 0xDEADBEEF;
|
|
}
|
|
#endif
|
|
|
|
/* Generate the PDS vertex shader program */
|
|
next_temp = PVR_PDS_TEMPS_BLOCK_BASE;
|
|
/* IR0 is in first 32-bit temp, temp[0].32, vertex_Index. */
|
|
input_register0 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
|
|
/* IR1 is in second 32-bit temp, temp[1].32, instance_ID. */
|
|
input_register1 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
|
|
|
|
if (program->iterate_remap_id)
|
|
input_register2 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
|
|
else
|
|
input_register2 = 0; /* Not used, but need to silence the compiler. */
|
|
|
|
/* Generate the PDS vertex shader code. The constants in the data block are
|
|
* arranged as follows:
|
|
*
|
|
* 64 bit bank 0 64 bit bank 1 64 bit bank 2 64 bit bank
|
|
* 3 Not used (tmps) Stride | Multiplier Address Control
|
|
*/
|
|
|
|
/* Find out how many constants are needed by streams. */
|
|
for (uint32_t stream = 0; stream < program->num_streams; stream++) {
|
|
pvr_pds_get_constants(&next_constant,
|
|
8 * program->streams[stream].num_elements,
|
|
&consts_size);
|
|
}
|
|
|
|
/* If there are no vertex streams allocate the first bank for USC Code
|
|
* Address.
|
|
*/
|
|
if (consts_size == 0)
|
|
pvr_pds_get_constants(&next_constant, 2, &consts_size);
|
|
else
|
|
next_constant = 8;
|
|
|
|
direct_writes_needed = program->iterate_instance_id ||
|
|
program->iterate_vtx_id || program->iterate_remap_id;
|
|
|
|
if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
|
|
/* Evaluate what config of DDMAD should be used for each stream. */
|
|
for (uint32_t stream = 0; stream < program->num_streams; stream++) {
|
|
vertex_stream = &program->streams[stream];
|
|
|
|
if (vertex_stream->use_ddmadt) {
|
|
ddmadt_enables |= (1 << stream);
|
|
|
|
/* The condition for index value is:
|
|
* index * stride + size <= bufferSize (all in unit of byte)
|
|
*/
|
|
if (vertex_stream->stride == 0) {
|
|
if (vertex_stream->elements[0].size <=
|
|
vertex_stream->buffer_size_in_bytes) {
|
|
/* index can be any value -> no need to use DDMADT. */
|
|
ddmadt_enables &= (~(1 << stream));
|
|
} else {
|
|
/* No index works -> no need to issue DDMAD instruction.
|
|
*/
|
|
skip_stream_flag |= (1 << stream);
|
|
}
|
|
} else {
|
|
/* index * stride + size <= bufferSize
|
|
*
|
|
* can be converted to:
|
|
* index <= (bufferSize - size) / stride
|
|
*
|
|
* where maximum index is:
|
|
* integer((bufferSize - size) / stride).
|
|
*/
|
|
if (vertex_stream->buffer_size_in_bytes <
|
|
vertex_stream->elements[0].size) {
|
|
/* No index works -> no need to issue DDMAD instruction.
|
|
*/
|
|
skip_stream_flag |= (1 << stream);
|
|
} else {
|
|
uint32_t max_index = (vertex_stream->buffer_size_in_bytes -
|
|
vertex_stream->elements[0].size) /
|
|
vertex_stream->stride;
|
|
if (max_index == 0xFFFFFFFFu) {
|
|
/* No need to use DDMADT as all possible indices can
|
|
* pass the test.
|
|
*/
|
|
ddmadt_enables &= (~(1 << stream));
|
|
} else {
|
|
/* In this case, test condition can be changed to
|
|
* index < max_index + 1.
|
|
*/
|
|
program->streams[stream].num_vertices =
|
|
pvr_pds_get_bank_based_constants(program->num_streams,
|
|
&next_constant,
|
|
1,
|
|
&consts_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
pvr_pds_write_constant32(
|
|
buffer,
|
|
program->streams[stream].num_vertices,
|
|
max_index + 1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((skip_stream_flag & (1 << stream)) == 0) {
|
|
issue_empty_ddmad = (ddmadt_enables & (1 << stream)) != 0;
|
|
last_stream_index = stream;
|
|
}
|
|
}
|
|
} else {
|
|
if (program->num_streams > 0 &&
|
|
program->streams[program->num_streams - 1].use_ddmadt) {
|
|
issue_empty_ddmad = true;
|
|
}
|
|
}
|
|
|
|
if (direct_writes_needed)
|
|
issue_empty_ddmad = false;
|
|
|
|
if (issue_empty_ddmad) {
|
|
/* An empty DMA control const (DMA size = 0) is required in case the
|
|
* last DDMADD is predicated out and last flag does not have any usage.
|
|
*/
|
|
empty_dma_control_constant64 =
|
|
pvr_pds_get_bank_based_constants(program->num_streams,
|
|
&next_constant,
|
|
2,
|
|
&consts_size);
|
|
}
|
|
|
|
/* Assign constants for non stream or base instance if there is any
|
|
* instanced stream.
|
|
*/
|
|
if (direct_writes_needed || any_instanced_stream ||
|
|
program->instance_ID_modifier) {
|
|
if (program->iterate_vtx_id) {
|
|
vertex_id_control_word_const32 =
|
|
pvr_pds_get_bank_based_constants(program->num_streams,
|
|
&next_constant,
|
|
1,
|
|
&consts_size);
|
|
}
|
|
|
|
if (program->iterate_instance_id || program->instance_ID_modifier) {
|
|
if (program->instance_ID_modifier == 0) {
|
|
instance_id_control_word_const32 =
|
|
pvr_pds_get_bank_based_constants(program->num_streams,
|
|
&next_constant,
|
|
1,
|
|
&consts_size);
|
|
} else {
|
|
instance_id_modifier_word_const32 =
|
|
pvr_pds_get_bank_based_constants(program->num_streams,
|
|
&next_constant,
|
|
1,
|
|
&consts_size);
|
|
if ((instance_id_modifier_word_const32 % 2) == 0) {
|
|
instance_id_control_word_const32 =
|
|
pvr_pds_get_bank_based_constants(program->num_streams,
|
|
&next_constant,
|
|
1,
|
|
&consts_size);
|
|
} else {
|
|
instance_id_control_word_const32 =
|
|
instance_id_modifier_word_const32;
|
|
instance_id_modifier_word_const32 =
|
|
pvr_pds_get_bank_based_constants(program->num_streams,
|
|
&next_constant,
|
|
1,
|
|
&consts_size);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (program->base_instance != 0) {
|
|
base_instance_const32 =
|
|
pvr_pds_get_bank_based_constants(program->num_streams,
|
|
&next_constant,
|
|
1,
|
|
&consts_size);
|
|
}
|
|
|
|
if (program->iterate_remap_id) {
|
|
geometry_id_control_word_const64 =
|
|
pvr_pds_get_bank_based_constants(program->num_streams,
|
|
&next_constant,
|
|
2,
|
|
&consts_size);
|
|
}
|
|
}
|
|
|
|
if (program->instance_ID_modifier != 0) {
|
|
/* This instanceID modifier is used when a draw array instanced call
|
|
* sourcing from client data cannot fit into vertex buffer and needs to
|
|
* be broken down into several draw calls.
|
|
*/
|
|
|
|
code_size += 1;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
pvr_pds_write_constant32(buffer,
|
|
instance_id_modifier_word_const32,
|
|
program->instance_ID_modifier);
|
|
} else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
*buffer++ = pvr_pds_inst_encode_add32(
|
|
/* cc */ 0x0,
|
|
/* ALUM */ 0, /* Unsigned */
|
|
/* SNA */ 0, /* Add */
|
|
/* SRC0 32b */ instance_id_modifier_word_const32,
|
|
/* SRC1 32b */ input_register1,
|
|
/* DST 32b */ input_register1);
|
|
}
|
|
}
|
|
|
|
/* Adjust instanceID if necessary. */
|
|
if (any_instanced_stream || program->iterate_instance_id) {
|
|
if (program->base_instance != 0) {
|
|
assert(!program->draw_indirect);
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
pvr_pds_write_constant32(buffer,
|
|
base_instance_const32,
|
|
program->base_instance);
|
|
}
|
|
|
|
base_instance_register = base_instance_const32;
|
|
}
|
|
|
|
if (program->draw_indirect) {
|
|
assert((program->instance_ID_modifier == 0) &&
|
|
(program->base_instance == 0));
|
|
|
|
base_instance_register = PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER + 1;
|
|
}
|
|
}
|
|
|
|
next_constant = next_stream_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
|
usc_control_constant64 =
|
|
pvr_pds_get_constants(&next_stream_constant, 2, &data_size);
|
|
|
|
for (uint32_t stream = 0; stream < program->num_streams; stream++) {
|
|
bool instance_data_with_base_instance;
|
|
|
|
if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
|
|
((skip_stream_flag & (1 << stream)) != 0)) {
|
|
continue;
|
|
}
|
|
|
|
vertex_stream = &program->streams[stream];
|
|
|
|
instance_data_with_base_instance =
|
|
((vertex_stream->instance_data) &&
|
|
((program->base_instance > 0) || (program->draw_indirect)));
|
|
|
|
/* Get all 8 32-bit constants at once, only 6 for first stream due to
|
|
* USC constants.
|
|
*/
|
|
if (stream == 0) {
|
|
stride_constant32 =
|
|
pvr_pds_get_constants(&next_stream_constant, 6, &data_size);
|
|
} else {
|
|
next_constant =
|
|
pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
|
|
|
|
/* Skip bank 0. */
|
|
stride_constant32 = next_constant + 2;
|
|
}
|
|
|
|
multiplier_constant32 = stride_constant32 + 1;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
pvr_pds_write_constant32(buffer,
|
|
stride_constant32,
|
|
vertex_stream->stride);
|
|
|
|
/* Vertex stream frequency multiplier. */
|
|
if (vertex_stream->multiplier)
|
|
pvr_pds_write_constant32(buffer,
|
|
multiplier_constant32,
|
|
vertex_stream->multiplier);
|
|
}
|
|
|
|
/* Update the code size count and temps count for the above code
|
|
* segment.
|
|
*/
|
|
if (vertex_stream->current_state) {
|
|
code_size += 1;
|
|
temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); /* 32-bit */
|
|
} else {
|
|
unsigned int num_temps_required = 0;
|
|
|
|
if (vertex_stream->multiplier) {
|
|
num_temps_required += 2;
|
|
code_size += 3;
|
|
|
|
if (vertex_stream->shift) {
|
|
code_size += 1;
|
|
|
|
if ((int32_t)vertex_stream->shift > 0)
|
|
code_size += 1;
|
|
}
|
|
} else if (vertex_stream->shift) {
|
|
code_size += 1;
|
|
num_temps_required += 1;
|
|
} else if (instance_data_with_base_instance) {
|
|
num_temps_required += 1;
|
|
}
|
|
|
|
if (num_temps_required != 0) {
|
|
temp = pvr_pds_get_temps(&next_temp,
|
|
num_temps_required,
|
|
&temps_used); /* 64-bit */
|
|
} else {
|
|
temp = vertex_stream->instance_data ? input_register1
|
|
: input_register0;
|
|
}
|
|
|
|
if (instance_data_with_base_instance)
|
|
code_size += 1;
|
|
}
|
|
|
|
/* The real code segment. */
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
/* If it's current state stream, then index = 0 always. */
|
|
if (vertex_stream->current_state) {
|
|
/* Put zero in temp. */
|
|
*buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
|
|
} else if (vertex_stream->multiplier) {
|
|
/* old: Iout = (Iin * (Multiplier+2^24)) >> (Shift+24)
|
|
* new: Iout = (Iin * Multiplier) >> (shift+31)
|
|
*/
|
|
|
|
/* Put zero in temp. Need zero for add part of the following
|
|
* MAD. MAD source is 64 bit, so need two LIMMs.
|
|
*/
|
|
*buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
|
|
/* Put zero in temp. Need zero for add part of the following
|
|
* MAD.
|
|
*/
|
|
*buffer++ = pvr_pds_inst_encode_limm(0, temp + 1, 0, 0);
|
|
|
|
/* old: (Iin * (Multiplier+2^24))
|
|
* new: (Iin * Multiplier)
|
|
*/
|
|
*buffer++ = pvr_rogue_inst_encode_mad(
|
|
0, /* Sign of add is positive. */
|
|
0, /* Unsigned ALU mode */
|
|
0, /* Unconditional */
|
|
multiplier_constant32,
|
|
vertex_stream->instance_data ? input_register1 : input_register0,
|
|
temp / 2,
|
|
temp / 2);
|
|
|
|
if (vertex_stream->shift) {
|
|
int32_t shift = (int32_t)vertex_stream->shift;
|
|
|
|
/* new: >> (shift + 31) */
|
|
shift += 31;
|
|
shift *= -1;
|
|
|
|
if (shift < -31) {
|
|
/* >> (31) */
|
|
shift_2s_comp = 0xFFFE1;
|
|
*buffer++ = pvr_pds_inst_encode_stflp64(
|
|
/* cc */ 0,
|
|
/* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
|
|
/* IM */ 1, /* enable immediate */
|
|
/* SRC0 */ temp / 2,
|
|
/* SRC1 */ input_register0, /* This won't be used in
|
|
* a shift operation.
|
|
*/
|
|
/* SRC2 (Shift) */ shift_2s_comp,
|
|
/* DST */ temp / 2);
|
|
shift += 31;
|
|
}
|
|
|
|
/* old: >> (Shift+24)
|
|
* new: >> (shift + 31)
|
|
*/
|
|
shift_2s_comp = *((uint32_t *)&shift);
|
|
*buffer++ = pvr_pds_inst_encode_stflp64(
|
|
/* cc */ 0,
|
|
/* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
|
|
/* IM */ 1, /*enable immediate */
|
|
/* SRC0 */ temp / 2,
|
|
/* SRC1 */ input_register0, /* This won't be used in
|
|
* a shift operation.
|
|
*/
|
|
/* SRC2 (Shift) */ shift_2s_comp,
|
|
/* DST */ temp / 2);
|
|
}
|
|
|
|
if (instance_data_with_base_instance) {
|
|
*buffer++ =
|
|
pvr_pds_inst_encode_add32(0, /* cc */
|
|
0, /* ALNUM */
|
|
0, /* SNA */
|
|
base_instance_register, /* src0
|
|
*/
|
|
temp, /* src1 */
|
|
temp /* dst */
|
|
);
|
|
}
|
|
} else { /* NOT vertex_stream->multiplier */
|
|
if (vertex_stream->shift) {
|
|
/* Shift Index/InstanceNum Right by shift bits. Put result
|
|
* in a Temp.
|
|
*/
|
|
|
|
/* 2's complement of shift as this will be a right shift. */
|
|
shift_2s_comp = ~(vertex_stream->shift) + 1;
|
|
|
|
*buffer++ = pvr_pds_inst_encode_stflp32(
|
|
/* IM */ 1, /* enable immediate. */
|
|
/* cc */ 0,
|
|
/* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
|
|
/* SRC0 */ vertex_stream->instance_data ? input_register1
|
|
: input_register0,
|
|
/* SRC1 */ input_register0, /* This won't be used in
|
|
* a shift operation.
|
|
*/
|
|
/* SRC2 (Shift) */ shift_2s_comp,
|
|
/* DST */ temp);
|
|
|
|
if (instance_data_with_base_instance) {
|
|
*buffer++ =
|
|
pvr_pds_inst_encode_add32(0, /* cc */
|
|
0, /* ALNUM */
|
|
0, /* SNA */
|
|
base_instance_register, /* src0
|
|
*/
|
|
temp, /* src1 */
|
|
temp /* dst */
|
|
);
|
|
}
|
|
} else {
|
|
if (instance_data_with_base_instance) {
|
|
*buffer++ =
|
|
pvr_pds_inst_encode_add32(0, /* cc */
|
|
0, /* ALNUM */
|
|
0, /* SNA */
|
|
base_instance_register, /* src0
|
|
*/
|
|
input_register1, /* src1 */
|
|
temp /* dst */
|
|
);
|
|
} else {
|
|
/* If the shift instruction doesn't happen, use the IR
|
|
* directly into the following MAD.
|
|
*/
|
|
temp = vertex_stream->instance_data ? input_register1
|
|
: input_register0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
|
|
if (vertex_stream->use_ddmadt)
|
|
ddmadt_enables |= (1 << stream);
|
|
} else {
|
|
if ((ddmadt_enables & (1 << stream)) != 0) {
|
|
/* Emulate what DDMADT does for range checking. */
|
|
if (first_ddmadt) {
|
|
/* Get an 64 bits temp such that cmp current index with
|
|
* allowed vertex number can work.
|
|
*/
|
|
index_temp64 =
|
|
pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
|
|
*/
|
|
num_vertices_temp64 =
|
|
pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
|
|
*/
|
|
|
|
index_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
|
|
num_vertices_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
|
|
|
|
code_size += 3;
|
|
current_p0 = true;
|
|
}
|
|
|
|
code_size += (temp == pre_index_temp ? 1 : 2);
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
if (first_ddmadt) {
|
|
/* Set predicate to be P0. */
|
|
*buffer++ = pvr_pds_encode_bra(
|
|
PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
|
|
*/
|
|
0, /* Neg */
|
|
PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETCC
|
|
*/
|
|
1); /* Addr */
|
|
|
|
*buffer++ =
|
|
pvr_pds_inst_encode_limm(0, index_temp64 + 1, 0, 0);
|
|
*buffer++ =
|
|
pvr_pds_inst_encode_limm(0, num_vertices_temp64 + 1, 0, 0);
|
|
}
|
|
|
|
if (temp != pre_index_temp) {
|
|
*buffer++ = pvr_pds_inst_encode_stflp32(
|
|
/* IM */ 1, /* enable immediate. */
|
|
/* cc */ 0,
|
|
/* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
|
|
/* SRC0 */ temp - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER,
|
|
/* SRC1 */ 0,
|
|
/* SRC2 (Shift) */ 0,
|
|
/* DST */ index_temp64);
|
|
}
|
|
|
|
*buffer++ = pvr_pds_inst_encode_stflp32(
|
|
/* IM */ 1, /* enable immediate. */
|
|
/* cc */ 0,
|
|
/* LOP */ PVR_ROGUE_PDSINST_LOP_OR,
|
|
/* SRC0 */ num_vertices_temp64 + 1,
|
|
/* SRC1 */ vertex_stream->num_vertices,
|
|
/* SRC2 (Shift) */ 0,
|
|
/* DST */ num_vertices_temp64);
|
|
}
|
|
|
|
first_ddmadt = false;
|
|
|
|
pre_index_temp = temp;
|
|
}
|
|
}
|
|
|
|
/* Process the elements in the stream. */
|
|
for (uint32_t element = 0; element < vertex_stream->num_elements;
|
|
element++) {
|
|
bool terminate = false;
|
|
|
|
vertex_element = &vertex_stream->elements[element];
|
|
/* Check if last DDMAD needs terminate or not. */
|
|
if ((element == (vertex_stream->num_elements - 1)) &&
|
|
(stream == last_stream_index)) {
|
|
terminate = !issue_empty_ddmad && !direct_writes_needed;
|
|
}
|
|
|
|
/* Get a new set of constants for this element. */
|
|
if (element) {
|
|
/* Get all 8 32 bit constants at once. */
|
|
next_constant =
|
|
pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
|
|
}
|
|
|
|
dma_address_constant64 = next_constant + 4;
|
|
dma_control_constant64 = dma_address_constant64 + 2;
|
|
|
|
if (vertex_element->component_size == 0) {
|
|
/* Standard DMA.
|
|
*
|
|
* Write the DMA transfer control words into the PDS data
|
|
* section.
|
|
*
|
|
* DMA Address is 40-bit.
|
|
*/
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint32_t dma_control_word;
|
|
uint64_t dma_control_word64 = 0;
|
|
uint32_t dma_size;
|
|
|
|
/* Write the address to the constant. */
|
|
pvr_pds_write_dma_address(buffer,
|
|
dma_address_constant64,
|
|
vertex_stream->address +
|
|
(uint64_t)vertex_element->offset,
|
|
false,
|
|
dev_info);
|
|
{
|
|
if (program->stream_patch_offsets) {
|
|
program
|
|
->stream_patch_offsets[program->num_stream_patches++] =
|
|
(stream << 16) | (dma_address_constant64 >> 1);
|
|
}
|
|
}
|
|
|
|
/* Size is in bytes - round up to nearest 32 bit word. */
|
|
dma_size =
|
|
(vertex_element->size + (1 << PVR_PDS_DWORD_SHIFT) - 1) >>
|
|
PVR_PDS_DWORD_SHIFT;
|
|
|
|
assert(dma_size <= PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER);
|
|
|
|
/* Set up the dma transfer control word. */
|
|
dma_control_word =
|
|
dma_size << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
|
|
|
|
dma_control_word |=
|
|
vertex_element->reg
|
|
<< PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
|
|
|
|
dma_control_word |=
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
|
|
if ((ddmadt_enables & (1 << stream)) != 0) {
|
|
assert(
|
|
((((uint64_t)vertex_stream->buffer_size_in_bytes
|
|
<< PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
|
|
~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK) >>
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) ==
|
|
(uint64_t)vertex_stream->buffer_size_in_bytes);
|
|
dma_control_word64 =
|
|
(PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN |
|
|
(((uint64_t)vertex_stream->buffer_size_in_bytes
|
|
<< PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
|
|
~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK));
|
|
}
|
|
}
|
|
/* If this is the last dma then also set the last flag. */
|
|
if (terminate) {
|
|
dma_control_word |=
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
|
|
}
|
|
|
|
/* Write the 32-Bit SRC3 word to a 64-bit constant as per
|
|
* spec.
|
|
*/
|
|
pvr_pds_write_wide_constant(buffer,
|
|
dma_control_constant64,
|
|
dma_control_word64 |
|
|
(uint64_t)dma_control_word);
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
|
|
if ((ddmadt_enables & (1 << stream)) != 0) {
|
|
*buffer++ = pvr_pds_inst_encode_cmp(
|
|
0, /* cc enable */
|
|
PVR_ROGUE_PDSINST_COP_LT, /* Operation */
|
|
index_temp64 >> 1, /* SRC0 (REGS64TP) */
|
|
(num_vertices_temp64 >> 1) +
|
|
PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER); /* SRC1
|
|
(REGS64)
|
|
*/
|
|
}
|
|
}
|
|
/* Multiply by the vertex stream stride and add the base
|
|
* followed by a DOUTD.
|
|
*
|
|
* dmad32 (C0 * T0) + C1, C2
|
|
* src0 = stride src1 = index src2 = baseaddr src3 =
|
|
* doutd part
|
|
*/
|
|
|
|
uint32_t cc;
|
|
if (PVR_HAS_FEATURE(dev_info, pds_ddmadt))
|
|
cc = 0;
|
|
else
|
|
cc = (ddmadt_enables & (1 << stream)) != 0 ? 1 : 0;
|
|
|
|
*buffer++ = pvr_pds_inst_encode_ddmad(
|
|
/* cc */ cc,
|
|
/* END */ 0,
|
|
/* SRC0 */ stride_constant32, /* Stride 32-bit*/
|
|
/* SRC1 */ temp, /* Index 32-bit*/
|
|
/* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
|
|
* Address
|
|
* +
|
|
* Offset
|
|
*/
|
|
/* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
|
|
* Transfer
|
|
* Control
|
|
* Word.
|
|
*/
|
|
);
|
|
}
|
|
|
|
if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
|
|
((ddmadt_enables & (1 << stream)) != 0)) {
|
|
code_size += 1;
|
|
}
|
|
code_size += 1;
|
|
} else {
|
|
/* Repeat DMA.
|
|
*
|
|
* Write the DMA transfer control words into the PDS data
|
|
* section.
|
|
*
|
|
* DMA address is 40-bit.
|
|
*/
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint32_t dma_control_word;
|
|
|
|
/* Write the address to the constant. */
|
|
pvr_pds_write_dma_address(buffer,
|
|
dma_address_constant64,
|
|
vertex_stream->address +
|
|
(uint64_t)vertex_element->offset,
|
|
false,
|
|
dev_info);
|
|
|
|
/* Set up the DMA transfer control word. */
|
|
dma_control_word =
|
|
vertex_element->size
|
|
<< PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
|
|
|
|
dma_control_word |=
|
|
vertex_element->reg
|
|
<< PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
|
|
|
|
switch (vertex_element->component_size) {
|
|
case 4: {
|
|
dma_control_word |=
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR;
|
|
break;
|
|
}
|
|
case 3: {
|
|
dma_control_word |=
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE;
|
|
break;
|
|
}
|
|
case 2: {
|
|
dma_control_word |=
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO;
|
|
break;
|
|
}
|
|
default: {
|
|
dma_control_word |=
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE;
|
|
break;
|
|
}
|
|
}
|
|
|
|
dma_control_word |=
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT;
|
|
|
|
dma_control_word |=
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
|
|
|
|
/* If this is the last dma then also set the last flag. */
|
|
if (terminate) {
|
|
dma_control_word |=
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
|
|
}
|
|
|
|
/* Write the 32-Bit SRC3 word to a 64-bit constant as per
|
|
* spec.
|
|
*/
|
|
pvr_pds_write_wide_constant(buffer,
|
|
dma_control_constant64,
|
|
(uint64_t)dma_control_word);
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
/* Multiply by the vertex stream stride and add the base
|
|
* followed by a DOUTD.
|
|
*
|
|
* dmad32 (C0 * T0) + C1, C2
|
|
* src0 = stride src1 = index src2 = baseaddr src3 =
|
|
* doutd part
|
|
*/
|
|
*buffer++ = pvr_pds_inst_encode_ddmad(
|
|
/* cc */ 0,
|
|
/* END */ 0,
|
|
/* SRC0 */ stride_constant32, /* Stride 32-bit*/
|
|
/* SRC1 */ temp, /* Index 32-bit*/
|
|
/* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
|
|
* Address
|
|
* +
|
|
* Offset.
|
|
*/
|
|
/* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
|
|
* Transfer
|
|
* Control
|
|
* Word.
|
|
*/
|
|
);
|
|
}
|
|
|
|
code_size += 1;
|
|
} /* End of repeat DMA. */
|
|
} /* Element loop */
|
|
} /* Stream loop */
|
|
|
|
if (issue_empty_ddmad) {
|
|
/* Issue an empty last DDMAD, always executed. */
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
pvr_pds_write_wide_constant(
|
|
buffer,
|
|
empty_dma_control_constant64,
|
|
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN);
|
|
}
|
|
|
|
code_size += 1;
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
*buffer++ = pvr_pds_inst_encode_ddmad(
|
|
/* cc */ 0,
|
|
/* END */ 0,
|
|
/* SRC0 */ stride_constant32, /* Stride 32-bit*/
|
|
/* SRC1 */ temp, /* Index 32-bit*/
|
|
/* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
|
|
*Address +
|
|
*Offset.
|
|
*/
|
|
/* SRC3 64-bit */ empty_dma_control_constant64 >> 1 /* DMA
|
|
* Transfer
|
|
* Control
|
|
* Word.
|
|
*/
|
|
);
|
|
}
|
|
}
|
|
|
|
if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
|
|
if (current_p0) {
|
|
code_size += 1;
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
/* Revert predicate back to IF0 which is required by DOUTU. */
|
|
*buffer++ =
|
|
pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
|
|
*/
|
|
0, /* Neg */
|
|
PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC
|
|
*/
|
|
1); /* Addr */
|
|
}
|
|
}
|
|
}
|
|
/* Send VertexID if requested. */
|
|
if (program->iterate_vtx_id) {
|
|
if (program->draw_indirect) {
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
*buffer++ = pvr_pds_inst_encode_add32(
|
|
/* cc */ 0x0,
|
|
/* ALUM */ 0, /* Unsigned */
|
|
/* SNA */ 1, /* Minus */
|
|
/* SRC0 32b */ input_register0, /* vertexID */
|
|
/* SRC1 32b */ PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER, /* base
|
|
* vertexID.
|
|
*/
|
|
/* DST 32b */ input_register0);
|
|
}
|
|
|
|
code_size += 1;
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint32_t doutw = pvr_pds_encode_doutw_src1(
|
|
program->vtx_id_register,
|
|
PVR_PDS_DOUTW_LOWER32,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
|
|
false,
|
|
dev_info);
|
|
|
|
if (!program->iterate_instance_id && !program->iterate_remap_id)
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
|
|
pvr_pds_write_constant32(buffer,
|
|
vertex_id_control_word_const32,
|
|
doutw);
|
|
} else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
*buffer++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 0,
|
|
/* END */ 0,
|
|
/* SRC1 */ vertex_id_control_word_const32, /* DOUTW 32-bit Src1
|
|
*/
|
|
/* SRC0 */ input_register0 >> 1); /* DOUTW 64-bit Src0 */
|
|
}
|
|
|
|
code_size += 1;
|
|
}
|
|
|
|
/* Send InstanceID if requested. */
|
|
if (program->iterate_instance_id) {
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint32_t doutw = pvr_pds_encode_doutw_src1(
|
|
program->instance_id_register,
|
|
PVR_PDS_DOUTW_UPPER32,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
|
|
true,
|
|
dev_info);
|
|
|
|
if (!program->iterate_remap_id)
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
|
|
pvr_pds_write_constant32(buffer,
|
|
instance_id_control_word_const32,
|
|
doutw);
|
|
} else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
*buffer++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 0,
|
|
/* END */ 0,
|
|
/* SRC1 */ instance_id_control_word_const32, /* DOUTW 32-bit Src1 */
|
|
/* SRC0 */ input_register1 >> 1); /* DOUTW 64-bit Src0 */
|
|
}
|
|
|
|
code_size += 1;
|
|
}
|
|
|
|
/* Send remapped index number to vi0. */
|
|
if (program->iterate_remap_id) {
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint32_t doutw = pvr_pds_encode_doutw_src1(
|
|
0 /* vi0 */,
|
|
PVR_PDS_DOUTW_LOWER32,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
|
|
false,
|
|
dev_info);
|
|
|
|
pvr_pds_write_constant64(buffer,
|
|
geometry_id_control_word_const64,
|
|
doutw,
|
|
0);
|
|
} else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
*buffer++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 0,
|
|
/* END */ 0,
|
|
/* SRC1 */ geometry_id_control_word_const64, /* DOUTW 32-bit
|
|
* Src1
|
|
*/
|
|
/* SRC0 */ input_register2 >> 1); /* DOUTW 64-bit Src0 */
|
|
}
|
|
|
|
code_size += 1;
|
|
}
|
|
|
|
/* Copy the USC task control words to constants. */
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
pvr_pds_write_wide_constant(buffer,
|
|
usc_control_constant64,
|
|
program->usc_task_control.src0); /* 64-bit
|
|
* Src0
|
|
*/
|
|
if (program->stream_patch_offsets) {
|
|
/* USC TaskControl is always the first patch. */
|
|
program->stream_patch_offsets[0] = usc_control_constant64 >> 1;
|
|
}
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
/* Conditionally (if last in task) issue the task to the USC
|
|
* (if0) DOUTU src1=USC Code Base address, src2=DOUTU word 2.
|
|
*/
|
|
|
|
*buffer++ = pvr_pds_encode_doutu(
|
|
/* cc */ 1,
|
|
/* END */ 1,
|
|
/* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0 */
|
|
|
|
/* End the program if the Dout did not already end it. */
|
|
*buffer++ = pvr_pds_inst_encode_halt(0);
|
|
}
|
|
|
|
code_size += 2;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
/* Set the data segment pointer and ensure we return 1 past the buffer
|
|
* ptr.
|
|
*/
|
|
program->data_segment = buffer;
|
|
|
|
buffer += consts_size;
|
|
}
|
|
|
|
program->temps_used = temps_used;
|
|
program->data_size = consts_size;
|
|
program->code_size = code_size;
|
|
program->ddmadt_enables = ddmadt_enables;
|
|
if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt))
|
|
program->skip_stream_flag = skip_stream_flag;
|
|
|
|
return buffer;
|
|
}
|
|
|
|
/**
|
|
* Generates a PDS program to load USC compute shader global/local/workgroup
|
|
* sizes/ids and then a DOUTU to execute the USC.
|
|
*
|
|
* \param program Pointer to description of the program that should be
|
|
* generated.
|
|
* \param buffer Pointer to buffer that receives the output of this function.
|
|
* This will be either the data segment, or the code depending on
|
|
* gen_mode.
|
|
* \param gen_mode Which part to generate, either data segment or code segment.
|
|
* If PDS_GENERATE_SIZES is specified, nothing is written, but
|
|
* size information in program is updated.
|
|
* \param dev_info PVR device info struct.
|
|
* \returns Pointer to just beyond the buffer for the data - i.e. the value of
|
|
* the buffer after writing its contents.
|
|
*/
|
|
uint32_t *
|
|
pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
uint32_t usc_control_constant64;
|
|
uint32_t usc_control_constant64_coeff_update = 0;
|
|
uint32_t zero_constant64 = 0;
|
|
|
|
uint32_t data_size = 0;
|
|
uint32_t code_size = 0;
|
|
uint32_t temps_used = 0;
|
|
uint32_t doutw = 0;
|
|
|
|
uint32_t barrier_ctrl_word = 0;
|
|
uint32_t barrier_ctrl_word2 = 0;
|
|
|
|
/* Even though there are 3 IDs for local and global we only need max one
|
|
* DOUTW for local, and two for global.
|
|
*/
|
|
uint32_t work_group_id_ctrl_words[2] = { 0 };
|
|
uint32_t local_id_ctrl_word = 0;
|
|
uint32_t local_input_register;
|
|
|
|
/* For the constant value to load into ptemp (SW fence). */
|
|
uint64_t predicate_ld_src0_constant = 0;
|
|
uint32_t cond_render_negate_constant = 0;
|
|
|
|
uint32_t cond_render_pred_temp;
|
|
uint32_t cond_render_negate_temp;
|
|
|
|
/* 2x 64 bit registers that will mask out the Predicate load. */
|
|
uint32_t cond_render_pred_mask_constant = 0;
|
|
|
|
#if defined(DEBUG)
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
for (uint32_t j = 0; j < program->data_size; j++)
|
|
buffer[j] = 0xDEADBEEF;
|
|
}
|
|
#endif
|
|
|
|
/* All the compute input registers are in temps. */
|
|
temps_used += PVR_PDS_NUM_COMPUTE_INPUT_REGS;
|
|
|
|
uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE + temps_used;
|
|
|
|
uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
|
|
|
if (program->kick_usc) {
|
|
/* Copy the USC task control words to constants. */
|
|
usc_control_constant64 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
}
|
|
|
|
if (program->has_coefficient_update_task) {
|
|
usc_control_constant64_coeff_update =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
}
|
|
|
|
if (program->conditional_render) {
|
|
predicate_ld_src0_constant =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
cond_render_negate_constant =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
cond_render_pred_mask_constant =
|
|
pvr_pds_get_constants(&next_constant, 4, &data_size);
|
|
|
|
/* LD will load a 64 bit value. */
|
|
cond_render_pred_temp = pvr_pds_get_temps(&next_temp, 4, &temps_used);
|
|
cond_render_negate_temp = pvr_pds_get_temps(&next_temp, 2, &temps_used);
|
|
|
|
program->cond_render_const_offset_in_dwords = predicate_ld_src0_constant;
|
|
program->cond_render_pred_temp = cond_render_pred_temp;
|
|
}
|
|
|
|
if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
|
|
(program->clear_pds_barrier) ||
|
|
(program->kick_usc && program->conditional_render)) {
|
|
zero_constant64 = pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
}
|
|
|
|
if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
barrier_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
|
|
if (PVR_HAS_QUIRK(dev_info, 51210)) {
|
|
barrier_ctrl_word2 =
|
|
pvr_pds_get_constants(&next_constant, 1, &data_size);
|
|
}
|
|
}
|
|
|
|
if (program->work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
|
|
program->work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
work_group_id_ctrl_words[0] =
|
|
pvr_pds_get_constants(&next_constant, 1, &data_size);
|
|
}
|
|
|
|
if (program->work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
work_group_id_ctrl_words[1] =
|
|
pvr_pds_get_constants(&next_constant, 1, &data_size);
|
|
}
|
|
|
|
if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
|
|
(program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
|
|
(program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
|
|
local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
|
|
}
|
|
|
|
if (program->add_base_workgroup) {
|
|
for (uint32_t workgroup_component = 0; workgroup_component < 3;
|
|
workgroup_component++) {
|
|
if (program->work_group_input_regs[workgroup_component] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
program
|
|
->base_workgroup_constant_offset_in_dwords[workgroup_component] =
|
|
pvr_pds_get_constants(&next_constant, 1, &data_size);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
if (program->kick_usc) {
|
|
/* Src0 for DOUTU */
|
|
pvr_pds_write_wide_constant(buffer,
|
|
usc_control_constant64,
|
|
program->usc_task_control.src0); /* 64-bit
|
|
* Src0.
|
|
*/
|
|
}
|
|
|
|
if (program->has_coefficient_update_task) {
|
|
/* Src0 for DOUTU. */
|
|
pvr_pds_write_wide_constant(
|
|
buffer,
|
|
usc_control_constant64_coeff_update,
|
|
program->usc_task_control_coeff_update.src0); /* 64-bit Src0 */
|
|
}
|
|
|
|
if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
|
|
(program->clear_pds_barrier) ||
|
|
(program->kick_usc && program->conditional_render)) {
|
|
pvr_pds_write_wide_constant(buffer, zero_constant64, 0); /* 64-bit
|
|
* Src0
|
|
*/
|
|
}
|
|
|
|
if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
if (PVR_HAS_QUIRK(dev_info, 51210)) {
|
|
/* Write the constant for the coefficient register write. */
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->barrier_coefficient + 4,
|
|
PVR_PDS_DOUTW_LOWER64,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
|
true,
|
|
dev_info);
|
|
pvr_pds_write_constant32(buffer, barrier_ctrl_word2, doutw);
|
|
}
|
|
/* Write the constant for the coefficient register write. */
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->barrier_coefficient,
|
|
PVR_PDS_DOUTW_LOWER64,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
|
true,
|
|
dev_info);
|
|
|
|
/* Check whether the barrier is going to be the last DOUTW done by
|
|
* the coefficient sync task.
|
|
*/
|
|
if ((program->work_group_input_regs[0] ==
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
|
|
(program->work_group_input_regs[1] ==
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
|
|
(program->work_group_input_regs[2] ==
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
}
|
|
|
|
pvr_pds_write_constant32(buffer, barrier_ctrl_word, doutw);
|
|
}
|
|
|
|
/* If we want work-group id X, see if we also want work-group id Y. */
|
|
if (program->work_group_input_regs[0] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED &&
|
|
program->work_group_input_regs[1] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
/* Make sure we are going to DOUTW them into adjacent registers
|
|
* otherwise we can't do it in one.
|
|
*/
|
|
assert(program->work_group_input_regs[1] ==
|
|
(program->work_group_input_regs[0] + 1));
|
|
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->work_group_input_regs[0],
|
|
PVR_PDS_DOUTW_LOWER64,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
|
true,
|
|
dev_info);
|
|
|
|
/* If we don't want the Z work-group id then this is the last one.
|
|
*/
|
|
if (program->work_group_input_regs[2] ==
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
}
|
|
|
|
pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[0], doutw);
|
|
}
|
|
/* If we only want one of X or Y then handle them separately. */
|
|
else {
|
|
if (program->work_group_input_regs[0] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->work_group_input_regs[0],
|
|
PVR_PDS_DOUTW_LOWER32,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
|
true,
|
|
dev_info);
|
|
|
|
/* If we don't want the Z work-group id then this is the last
|
|
* one.
|
|
*/
|
|
if (program->work_group_input_regs[2] ==
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
}
|
|
|
|
pvr_pds_write_constant32(buffer,
|
|
work_group_id_ctrl_words[0],
|
|
doutw);
|
|
} else if (program->work_group_input_regs[1] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->work_group_input_regs[1],
|
|
PVR_PDS_DOUTW_UPPER32,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
|
true,
|
|
dev_info);
|
|
|
|
/* If we don't want the Z work-group id then this is the last
|
|
* one.
|
|
*/
|
|
if (program->work_group_input_regs[2] ==
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
}
|
|
|
|
pvr_pds_write_constant32(buffer,
|
|
work_group_id_ctrl_words[0],
|
|
doutw);
|
|
}
|
|
}
|
|
|
|
/* Handle work-group id Z. */
|
|
if (program->work_group_input_regs[2] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->work_group_input_regs[2],
|
|
PVR_PDS_DOUTW_UPPER32,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE |
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
|
|
true,
|
|
dev_info);
|
|
|
|
pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw);
|
|
}
|
|
|
|
/* Handle the local IDs. */
|
|
if ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
|
|
(program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
|
|
uint32_t dest_reg;
|
|
|
|
/* If we want local id Y and Z make sure the compiler wants them in
|
|
* the same register.
|
|
*/
|
|
if (!program->flattened_work_groups) {
|
|
if ((program->local_input_regs[1] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
|
|
(program->local_input_regs[2] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
|
|
assert(program->local_input_regs[1] ==
|
|
program->local_input_regs[2]);
|
|
}
|
|
}
|
|
|
|
if (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
|
|
dest_reg = program->local_input_regs[1];
|
|
else
|
|
dest_reg = program->local_input_regs[2];
|
|
|
|
/* If we want local id X and (Y or Z) then we can do that in a
|
|
* single 64-bit DOUTW.
|
|
*/
|
|
if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
assert(dest_reg == (program->local_input_regs[0] + 1));
|
|
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->local_input_regs[0],
|
|
PVR_PDS_DOUTW_LOWER64,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
|
|
true,
|
|
dev_info);
|
|
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
|
|
pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
|
|
}
|
|
/* Otherwise just DMA in Y and Z together in a single 32-bit DOUTW.
|
|
*/
|
|
else {
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
dest_reg,
|
|
PVR_PDS_DOUTW_UPPER32,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
|
|
true,
|
|
dev_info);
|
|
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
|
|
pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
|
|
}
|
|
}
|
|
/* If we don't want Y or Z then just DMA in X in a single 32-bit DOUTW.
|
|
*/
|
|
else if (program->local_input_regs[0] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->local_input_regs[0],
|
|
PVR_PDS_DOUTW_LOWER32,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
|
|
true,
|
|
dev_info);
|
|
|
|
pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
|
|
}
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
|
|
gen_mode == PDS_GENERATE_SIZES) {
|
|
const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT);
|
|
#define APPEND(X) \
|
|
if (encode) { \
|
|
*buffer = X; \
|
|
buffer++; \
|
|
} else { \
|
|
code_size += sizeof(uint32_t); \
|
|
}
|
|
|
|
/* Assert that coeff_update_task_branch_size is > 0 because if it is 0
|
|
* then we will be doing an infinite loop.
|
|
*/
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
|
|
assert(program->coeff_update_task_branch_size > 0);
|
|
|
|
/* Test whether this is the coefficient update task or not. */
|
|
APPEND(
|
|
pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SRCC */
|
|
PVR_ROGUE_PDSINST_NEG_ENABLE, /* NEG */
|
|
PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC */
|
|
program->coeff_update_task_branch_size /* ADDR */));
|
|
|
|
/* Do we need to initialize the barrier coefficient? */
|
|
if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
if (PVR_HAS_QUIRK(dev_info, 51210)) {
|
|
/* Initialize the second barrier coefficient registers to zero.
|
|
*/
|
|
APPEND(pvr_pds_encode_doutw64(0, /* cc */
|
|
0, /* END */
|
|
barrier_ctrl_word2, /* SRC1 */
|
|
zero_constant64 >> 1)); /* SRC0 */
|
|
}
|
|
/* Initialize the coefficient register to zero. */
|
|
APPEND(pvr_pds_encode_doutw64(0, /* cc */
|
|
0, /* END */
|
|
barrier_ctrl_word, /* SRC1 */
|
|
zero_constant64 >> 1)); /* SRC0 */
|
|
}
|
|
|
|
if (program->add_base_workgroup) {
|
|
const uint32_t temp_values[3] = { 0, 1, 3 };
|
|
for (uint32_t workgroup_component = 0; workgroup_component < 3;
|
|
workgroup_component++) {
|
|
if (program->work_group_input_regs[workgroup_component] ==
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
|
|
continue;
|
|
|
|
APPEND(pvr_pds_inst_encode_add32(
|
|
/* cc */ 0x0,
|
|
/* ALUM */ 0,
|
|
/* SNA */ 0,
|
|
/* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER +
|
|
program->base_workgroup_constant_offset_in_dwords
|
|
[workgroup_component],
|
|
/* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER +
|
|
PVR_PDS_CDM_WORK_GROUP_ID_X +
|
|
temp_values[workgroup_component],
|
|
/* DST (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER +
|
|
PVR_PDS_CDM_WORK_GROUP_ID_X +
|
|
temp_values[workgroup_component]));
|
|
}
|
|
}
|
|
|
|
/* If we are going to put the work-group IDs in coefficients then we
|
|
* just need to do the DOUTWs.
|
|
*/
|
|
if ((program->work_group_input_regs[0] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
|
|
(program->work_group_input_regs[1] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
|
|
uint32_t dest_reg;
|
|
|
|
if (program->work_group_input_regs[0] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X;
|
|
} else {
|
|
dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y;
|
|
}
|
|
|
|
APPEND(pvr_pds_encode_doutw64(0, /* cc */
|
|
0, /* END */
|
|
work_group_id_ctrl_words[0], /* SRC1
|
|
*/
|
|
dest_reg >> 1)); /* SRC0 */
|
|
}
|
|
|
|
if (program->work_group_input_regs[2] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
APPEND(pvr_pds_encode_doutw64(
|
|
0, /* cc */
|
|
0, /* END */
|
|
work_group_id_ctrl_words[1], /* SRC1 */
|
|
(PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Z) >>
|
|
1)); /* SRC0 */
|
|
}
|
|
|
|
/* Issue the task to the USC. */
|
|
if (program->kick_usc && program->has_coefficient_update_task) {
|
|
APPEND(pvr_pds_encode_doutu(0, /* cc */
|
|
1, /* END */
|
|
usc_control_constant64_coeff_update >>
|
|
1)); /* SRC0; DOUTU 64-bit Src0 */
|
|
}
|
|
|
|
/* Encode a HALT */
|
|
APPEND(pvr_pds_inst_encode_halt(0));
|
|
|
|
/* Set the branch size used to skip the coefficient sync task. */
|
|
program->coeff_update_task_branch_size = code_size / sizeof(uint32_t);
|
|
|
|
/* DOUTW in the local IDs. */
|
|
|
|
/* If we want X and Y or Z, we only need one DOUTW. */
|
|
if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
|
|
((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
|
|
(program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED))) {
|
|
local_input_register =
|
|
PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
|
|
} else {
|
|
/* If we just want X. */
|
|
if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
local_input_register =
|
|
PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
|
|
}
|
|
/* If we just want Y or Z. */
|
|
else if (program->local_input_regs[1] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
|
|
program->local_input_regs[2] !=
|
|
PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
|
|
local_input_register =
|
|
PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_YZ;
|
|
}
|
|
}
|
|
|
|
if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
|
|
(program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
|
|
(program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
|
|
APPEND(pvr_pds_encode_doutw64(0, /* cc */
|
|
0, /* END */
|
|
local_id_ctrl_word, /* SRC1 */
|
|
local_input_register >> 1)); /* SRC0
|
|
*/
|
|
}
|
|
|
|
if (program->clear_pds_barrier) {
|
|
/* Zero the persistent temp (SW fence for context switch). */
|
|
APPEND(pvr_pds_inst_encode_add64(
|
|
0, /* cc */
|
|
PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
|
|
PVR_ROGUE_PDSINST_MAD_SNA_ADD,
|
|
PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
|
|
(zero_constant64 >> 1), /* src0 = 0 */
|
|
PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
|
|
(zero_constant64 >> 1), /* src1 = 0 */
|
|
PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0)); /* dest =
|
|
* ptemp64[0]
|
|
*/
|
|
}
|
|
|
|
/* If this is a fence, issue the DOUTC. */
|
|
if (program->fence) {
|
|
APPEND(pvr_pds_inst_encode_doutc(0, /* cc */
|
|
0 /* END */));
|
|
}
|
|
|
|
if (program->kick_usc) {
|
|
if (program->conditional_render) {
|
|
/* Skip if coefficient update task. */
|
|
APPEND(pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1,
|
|
0,
|
|
PVR_ROGUE_PDSINST_PREDICATE_KEEP,
|
|
16));
|
|
|
|
/* Load the predicate. */
|
|
APPEND(pvr_pds_inst_encode_ld(0, predicate_ld_src0_constant >> 1));
|
|
|
|
/* Load negate constant into temp for CMP. */
|
|
APPEND(pvr_pds_inst_encode_add64(
|
|
0, /* cc */
|
|
PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
|
|
PVR_ROGUE_PDSINST_MAD_SNA_ADD,
|
|
PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
|
|
(cond_render_negate_constant >> 1), /* src0 = 0 */
|
|
PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
|
|
(zero_constant64 >> 1), /* src1 = 0 */
|
|
PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER +
|
|
(cond_render_negate_temp >> 1))); /* dest = ptemp64[0]
|
|
*/
|
|
|
|
APPEND(pvr_pds_inst_encode_wdf(0));
|
|
|
|
for (uint32_t i = 0; i < 4; i++) {
|
|
APPEND(pvr_pds_inst_encode_stflp32(
|
|
1, /* enable immediate */
|
|
0, /* cc */
|
|
PVR_ROGUE_PDSINST_LOP_AND, /* LOP */
|
|
cond_render_pred_temp + i, /* SRC0 */
|
|
cond_render_pred_mask_constant + i, /* SRC1 */
|
|
0, /* SRC2 (Shift) */
|
|
cond_render_pred_temp + i)); /* DST */
|
|
|
|
APPEND(
|
|
pvr_pds_inst_encode_stflp32(1, /* enable immediate */
|
|
0, /* cc */
|
|
PVR_ROGUE_PDSINST_LOP_OR, /* LOP
|
|
*/
|
|
cond_render_pred_temp + i, /* SRC0
|
|
*/
|
|
cond_render_pred_temp, /* SRC1 */
|
|
0, /* SRC2 (Shift) */
|
|
cond_render_pred_temp)); /* DST */
|
|
}
|
|
|
|
APPEND(pvr_pds_inst_encode_limm(0, /* cc */
|
|
cond_render_pred_temp + 1, /* SRC1
|
|
*/
|
|
0, /* SRC0 */
|
|
0)); /* GLOBALREG */
|
|
|
|
APPEND(pvr_pds_inst_encode_stflp32(1, /* enable immediate */
|
|
0, /* cc */
|
|
PVR_ROGUE_PDSINST_LOP_XOR, /* LOP
|
|
*/
|
|
cond_render_pred_temp, /* SRC0 */
|
|
cond_render_negate_temp, /* SRC1
|
|
*/
|
|
0, /* SRC2 (Shift) */
|
|
cond_render_pred_temp)); /* DST
|
|
*/
|
|
|
|
/* Check that the predicate is 0. */
|
|
APPEND(pvr_pds_inst_encode_cmpi(
|
|
0, /* cc */
|
|
PVR_ROGUE_PDSINST_COP_EQ, /* LOP */
|
|
(cond_render_pred_temp >> 1) +
|
|
PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER, /* SRC0 */
|
|
0)); /* SRC1 */
|
|
|
|
/* If predicate is 0, skip DOUTU. */
|
|
APPEND(pvr_pds_inst_encode_bra(
|
|
PVR_ROGUE_PDSINST_PREDICATE_P0, /* SRCC:
|
|
P0 */
|
|
0, /* NEG */
|
|
PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC:
|
|
keep
|
|
*/
|
|
2));
|
|
}
|
|
|
|
/* Issue the task to the USC.
|
|
* DoutU src1=USC Code Base address, src2=doutu word 2.
|
|
*/
|
|
APPEND(pvr_pds_encode_doutu(1, /* cc */
|
|
1, /* END */
|
|
usc_control_constant64 >> 1)); /* SRC0;
|
|
* DOUTU
|
|
* 64-bit
|
|
* Src0.
|
|
*/
|
|
}
|
|
|
|
/* End the program if the Dout did not already end it. */
|
|
APPEND(pvr_pds_inst_encode_halt(0));
|
|
#undef APPEND
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
/* Set the data segment pointer and ensure we return 1 past the buffer
|
|
* ptr.
|
|
*/
|
|
program->data_segment = buffer;
|
|
|
|
buffer += next_constant;
|
|
}
|
|
|
|
/* Require at least one DWORD of PDS data so the program runs. */
|
|
data_size = MAX2(1, data_size);
|
|
|
|
program->temps_used = temps_used;
|
|
program->highest_temp = temps_used;
|
|
program->data_size = data_size;
|
|
if (gen_mode == PDS_GENERATE_SIZES)
|
|
program->code_size = code_size;
|
|
|
|
return buffer;
|
|
}
|
|
|
|
/**
|
|
* Generates the PDS vertex shader data or code block. This program will do a
|
|
* DMA into USC Constants followed by a DOUTU.
|
|
*
|
|
* \param program Pointer to the PDS vertex shader program.
|
|
* \param buffer Pointer to the buffer for the program.
|
|
* \param gen_mode Generate code or data.
|
|
* \param dev_info PVR device information struct.
|
|
* \returns Pointer to just beyond the code/data.
|
|
*/
|
|
uint32_t *pvr_pds_vertex_shader_sa(
|
|
struct pvr_pds_vertex_shader_sa_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
uint32_t next_constant;
|
|
uint32_t data_size = 0;
|
|
uint32_t code_size = 0;
|
|
|
|
uint32_t usc_control_constant64 = 0;
|
|
uint32_t dma_address_constant64 = 0;
|
|
uint32_t dma_control_constant32 = 0;
|
|
uint32_t doutw_value_constant64 = 0;
|
|
uint32_t doutw_control_constant32 = 0;
|
|
uint32_t fence_constant_word = 0;
|
|
uint32_t *buffer_base;
|
|
uint32_t kick_index;
|
|
|
|
uint32_t total_num_doutw =
|
|
program->num_dword_doutw + program->num_q_word_doutw;
|
|
uint32_t total_size_dma =
|
|
program->num_dword_doutw + 2 * program->num_q_word_doutw;
|
|
|
|
next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
|
|
|
/* Copy the DMA control words and USC task control words to constants.
|
|
*
|
|
* Arrange them so that the 64-bit words are together followed by the 32-bit
|
|
* words.
|
|
*/
|
|
if (program->kick_usc) {
|
|
usc_control_constant64 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
}
|
|
|
|
if (program->clear_pds_barrier) {
|
|
fence_constant_word =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
}
|
|
dma_address_constant64 = pvr_pds_get_constants(&next_constant,
|
|
2 * program->num_dma_kicks,
|
|
&data_size);
|
|
|
|
/* Assign all unaligned constants together to avoid alignment issues caused
|
|
* by pvr_pds_get_constants with even allocation sizes.
|
|
*/
|
|
doutw_value_constant64 = pvr_pds_get_constants(
|
|
&next_constant,
|
|
total_size_dma + total_num_doutw + program->num_dma_kicks,
|
|
&data_size);
|
|
doutw_control_constant32 = doutw_value_constant64 + total_size_dma;
|
|
dma_control_constant32 = doutw_control_constant32 + total_num_doutw;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
buffer_base = buffer;
|
|
|
|
if (program->kick_usc) {
|
|
/* Src0 for DOUTU. */
|
|
pvr_pds_write_wide_constant(buffer_base,
|
|
usc_control_constant64,
|
|
program->usc_task_control.src0); /* DOUTU
|
|
* 64-bit
|
|
* Src0.
|
|
*/
|
|
buffer += 2;
|
|
}
|
|
|
|
if (program->clear_pds_barrier) {
|
|
/* Encode the fence constant src0. Fence barrier is initialized to
|
|
* zero.
|
|
*/
|
|
pvr_pds_write_wide_constant(buffer_base, fence_constant_word, 0);
|
|
buffer += 2;
|
|
}
|
|
|
|
if (total_num_doutw > 0) {
|
|
for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
|
|
/* Write the constant for the coefficient register write. */
|
|
pvr_pds_write_constant64(buffer_base,
|
|
doutw_value_constant64,
|
|
program->q_word_doutw_value[2 * i],
|
|
program->q_word_doutw_value[2 * i + 1]);
|
|
pvr_pds_write_constant32(
|
|
buffer_base,
|
|
doutw_control_constant32,
|
|
program->q_word_doutw_control[i] |
|
|
((!program->num_dma_kicks && i == total_num_doutw - 1)
|
|
? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
|
|
: 0));
|
|
|
|
doutw_value_constant64 += 2;
|
|
doutw_control_constant32 += 1;
|
|
}
|
|
|
|
for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
|
|
/* Write the constant for the coefficient register write. */
|
|
pvr_pds_write_constant32(buffer_base,
|
|
doutw_value_constant64,
|
|
program->dword_doutw_value[i]);
|
|
pvr_pds_write_constant32(
|
|
buffer_base,
|
|
doutw_control_constant32,
|
|
program->dword_doutw_control[i] |
|
|
((!program->num_dma_kicks && i == program->num_dword_doutw - 1)
|
|
? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
|
|
: 0));
|
|
|
|
doutw_value_constant64 += 1;
|
|
doutw_control_constant32 += 1;
|
|
}
|
|
|
|
buffer += total_size_dma + total_num_doutw;
|
|
}
|
|
|
|
if (program->num_dma_kicks == 1) /* Most-common case. */
|
|
{
|
|
/* Src0 for DOUTD - Address. */
|
|
pvr_pds_write_dma_address(buffer_base,
|
|
dma_address_constant64,
|
|
program->dma_address[0],
|
|
false,
|
|
dev_info);
|
|
|
|
/* Src1 for DOUTD - Control Word. */
|
|
pvr_pds_write_constant32(
|
|
buffer_base,
|
|
dma_control_constant32,
|
|
program->dma_control[0] |
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
|
|
|
|
/* Move the buffer ptr along as we will return 1 past the buffer. */
|
|
buffer += 3;
|
|
} else if (program->num_dma_kicks > 1) {
|
|
for (kick_index = 0; kick_index < program->num_dma_kicks - 1;
|
|
kick_index++) {
|
|
/* Src0 for DOUTD - Address. */
|
|
pvr_pds_write_dma_address(buffer_base,
|
|
dma_address_constant64,
|
|
program->dma_address[kick_index],
|
|
false,
|
|
dev_info);
|
|
|
|
/* Src1 for DOUTD - Control Word. */
|
|
pvr_pds_write_constant32(buffer_base,
|
|
dma_control_constant32,
|
|
program->dma_control[kick_index]);
|
|
dma_address_constant64 += 2;
|
|
dma_control_constant32 += 1;
|
|
}
|
|
|
|
/* Src0 for DOUTD - Address. */
|
|
pvr_pds_write_dma_address(buffer_base,
|
|
dma_address_constant64,
|
|
program->dma_address[kick_index],
|
|
false,
|
|
dev_info);
|
|
|
|
/* Src1 for DOUTD - Control Word. */
|
|
pvr_pds_write_constant32(
|
|
buffer_base,
|
|
dma_control_constant32,
|
|
program->dma_control[kick_index] |
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
|
|
|
|
buffer += 3 * program->num_dma_kicks;
|
|
}
|
|
} else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
if (program->clear_pds_barrier) {
|
|
/* Zero the persistent temp (SW fence for context switch). */
|
|
*buffer++ = pvr_pds_inst_encode_add64(
|
|
0, /* cc */
|
|
PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
|
|
PVR_ROGUE_PDSINST_MAD_SNA_ADD,
|
|
PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
|
|
(fence_constant_word >> 1), /* src0 = 0 */
|
|
PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
|
|
(fence_constant_word >> 1), /* src1 = 0 */
|
|
PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
|
|
* ptemp[0]
|
|
*/
|
|
}
|
|
|
|
if (total_num_doutw > 0) {
|
|
for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
|
|
/* Set the coefficient register to data value. */
|
|
*buffer++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 0,
|
|
/* END */ !program->num_dma_kicks && !program->kick_usc &&
|
|
(i == total_num_doutw - 1),
|
|
/* SRC1 */ doutw_control_constant32,
|
|
/* SRC0 */ doutw_value_constant64 >> 1);
|
|
|
|
doutw_value_constant64 += 2;
|
|
doutw_control_constant32 += 1;
|
|
}
|
|
|
|
for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
|
|
/* Set the coefficient register to data value. */
|
|
*buffer++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 0,
|
|
/* END */ !program->num_dma_kicks && !program->kick_usc &&
|
|
(i == program->num_dword_doutw - 1),
|
|
/* SRC1 */ doutw_control_constant32,
|
|
/* SRC0 */ doutw_value_constant64 >> 1);
|
|
|
|
doutw_value_constant64 += 1;
|
|
doutw_control_constant32 += 1;
|
|
}
|
|
}
|
|
|
|
if (program->num_dma_kicks != 0) {
|
|
/* DMA the state into the secondary attributes. */
|
|
|
|
if (program->num_dma_kicks == 1) /* Most-common case. */
|
|
{
|
|
*buffer++ = pvr_pds_encode_doutd(
|
|
/* cc */ 0,
|
|
/* END */ !program->kick_usc,
|
|
/* SRC1 */ dma_control_constant32, /* DOUTD 32-bit Src1 */
|
|
/* SRC0 */ dma_address_constant64 >> 1); /* DOUTD 64-bit
|
|
* Src0.
|
|
*/
|
|
} else {
|
|
for (kick_index = 0; kick_index < program->num_dma_kicks;
|
|
kick_index++) {
|
|
*buffer++ = pvr_pds_encode_doutd(
|
|
/* cc */ 0,
|
|
/* END */ (!program->kick_usc) &&
|
|
(kick_index + 1 == program->num_dma_kicks),
|
|
/* SRC1 */ dma_control_constant32, /* DOUTD 32-bit
|
|
* Src1.
|
|
*/
|
|
/* SRC0 */ dma_address_constant64 >> 1); /* DOUTD
|
|
* 64-bit
|
|
* Src0.
|
|
*/
|
|
dma_address_constant64 += 2;
|
|
dma_control_constant32 += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (program->kick_usc) {
|
|
/* Kick the USC. */
|
|
*buffer++ = pvr_pds_encode_doutu(
|
|
/* cc */ 0,
|
|
/* END */ 1,
|
|
/* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0.
|
|
*/
|
|
}
|
|
|
|
if (!program->kick_usc && program->num_dma_kicks == 0 &&
|
|
total_num_doutw == 0) {
|
|
*buffer++ = pvr_pds_inst_encode_halt(0);
|
|
}
|
|
}
|
|
|
|
code_size = program->num_dma_kicks + total_num_doutw;
|
|
if (program->clear_pds_barrier)
|
|
code_size++; /* ADD64 instruction. */
|
|
|
|
if (program->kick_usc)
|
|
code_size++;
|
|
|
|
/* If there are no DMAs and no USC kick then code is HALT only. */
|
|
if (code_size == 0)
|
|
code_size = 1;
|
|
|
|
program->data_size = data_size;
|
|
program->code_size = code_size;
|
|
|
|
return buffer;
|
|
}
|
|
|
|
/**
|
|
* Writes the Uniform Data block for the PDS pixel shader secondary attributes
|
|
* program.
|
|
*
|
|
* \param program Pointer to the PDS pixel shader secondary attributes program.
|
|
* \param buffer Pointer to the buffer for the code/data.
|
|
* \param gen_mode Either code or data can be generated or sizes only updated.
|
|
* \returns Pointer to just beyond the buffer for the program/data.
|
|
*/
|
|
uint32_t *pvr_pds_pixel_shader_uniform_texture_code(
|
|
struct pvr_pds_pixel_shader_sa_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode)
|
|
{
|
|
uint32_t *instruction;
|
|
uint32_t code_size = 0;
|
|
uint32_t data_size = 0;
|
|
uint32_t temps_used = 0;
|
|
uint32_t next_constant;
|
|
|
|
assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
|
|
0);
|
|
|
|
assert(gen_mode != PDS_GENERATE_DATA_SEGMENT);
|
|
|
|
/* clang-format off */
|
|
/* Shape of code segment (note: clear is different)
|
|
*
|
|
* Code
|
|
* +------------+
|
|
* | BRA if0 |
|
|
* | DOUTD |
|
|
* | ... |
|
|
* | DOUTD.halt |
|
|
* | uniform |
|
|
* | DOUTD |
|
|
* | ... |
|
|
* | ... |
|
|
* | DOUTW |
|
|
* | ... |
|
|
* | ... |
|
|
* | DOUTU.halt |
|
|
* | HALT |
|
|
* +------------+
|
|
*/
|
|
/* clang-format on */
|
|
instruction = buffer;
|
|
|
|
next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
|
|
|
/* The clear color can arrive packed in the right form in the first (or
|
|
* first 2) dwords of the shared registers and the program will issue a
|
|
* single doutw for this.
|
|
*/
|
|
if (program->clear && program->packed_clear) {
|
|
uint32_t color_constant1 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
uint32_t control_word_constant1 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
|
|
/* DOUTW the clear color to the USC constants. Predicate with
|
|
* uniform loading flag (IF0).
|
|
*/
|
|
*instruction++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 1, /* Only for uniform loading program. */
|
|
/* END */ program->kick_usc ? 0 : 1, /* Last
|
|
* instruction
|
|
* for a clear.
|
|
*/
|
|
/* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
|
|
/* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
|
|
|
|
code_size += 1;
|
|
}
|
|
} else if (program->clear) {
|
|
uint32_t color_constant1, color_constant2;
|
|
|
|
if (program->clear_color_dest_reg & 0x1) {
|
|
uint32_t color_constant3, control_word_constant1,
|
|
control_word_constant2, color_constant4;
|
|
|
|
color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
|
|
color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
|
|
|
|
control_word_constant1 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
control_word_constant2 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
|
|
/* DOUTW the clear color to the USSE constants. Predicate with
|
|
* uniform loading flag (IF0).
|
|
*/
|
|
*instruction++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 1, /* Only for Uniform Loading program */
|
|
/* END */ 0,
|
|
/* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
|
|
/* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
|
|
|
|
*instruction++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 1, /* Only for Uniform Loading program */
|
|
/* END */ 0,
|
|
/* SRC1 */ control_word_constant2, /* DOUTW 32-bit Src1 */
|
|
/* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
|
|
|
|
*instruction++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 1, /* Only for uniform loading program */
|
|
/* END */ program->kick_usc ? 0 : 1, /* Last
|
|
* instruction
|
|
* for a clear.
|
|
*/
|
|
/* SRC1 */ color_constant4, /* DOUTW 32-bit Src1 */
|
|
/* SRC0 */ color_constant3 >> 1); /* DOUTW 64-bit Src0 */
|
|
}
|
|
|
|
code_size += 3;
|
|
} else {
|
|
uint32_t control_word_constant, control_word_last_constant;
|
|
|
|
/* Put the clear color and control words into the first 8
|
|
* constants.
|
|
*/
|
|
color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
control_word_constant =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
control_word_last_constant =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
|
|
/* DOUTW the clear color to the USSE constants. Predicate with
|
|
* uniform loading flag (IF0).
|
|
*/
|
|
*instruction++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 1, /* Only for Uniform Loading program */
|
|
/* END */ 0,
|
|
/* SRC1 */ control_word_constant, /* DOUTW 32-bit Src1 */
|
|
/* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
|
|
|
|
*instruction++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 1, /* Only for uniform loading program */
|
|
/* END */ program->kick_usc ? 0 : 1, /* Last
|
|
* instruction
|
|
* for a clear.
|
|
*/
|
|
/* SRC1 */ control_word_last_constant, /* DOUTW 32-bit Src1
|
|
*/
|
|
/* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
|
|
}
|
|
|
|
code_size += 2;
|
|
}
|
|
|
|
if (program->kick_usc) {
|
|
uint32_t doutu_constant64;
|
|
|
|
doutu_constant64 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
|
|
/* Issue the task to the USC.
|
|
*
|
|
* dout ds1[constant_use], ds0[constant_use],
|
|
* ds1[constant_use], emit
|
|
*/
|
|
*instruction++ = pvr_pds_encode_doutu(
|
|
/* cc */ 0,
|
|
/* END */ 1,
|
|
/* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0
|
|
*/
|
|
}
|
|
|
|
code_size += 1;
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
|
|
/* End the program. */
|
|
*instruction++ = pvr_pds_inst_encode_halt(0);
|
|
}
|
|
code_size += 1;
|
|
} else {
|
|
uint32_t total_num_doutw =
|
|
program->num_dword_doutw + program->num_q_word_doutw;
|
|
bool both_textures_and_uniforms =
|
|
((program->num_texture_dma_kicks > 0) &&
|
|
((program->num_uniform_dma_kicks > 0 || total_num_doutw > 0) ||
|
|
program->kick_usc));
|
|
uint32_t doutu_constant64 = 0;
|
|
|
|
if (both_textures_and_uniforms) {
|
|
/* If the size of a PDS data section is 0, the hardware won't run
|
|
* it. We therefore don't need to branch when there is only a
|
|
* texture OR a uniform update program.
|
|
*/
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
|
|
uint32_t branch_address =
|
|
MAX2(1 + program->num_texture_dma_kicks, 2);
|
|
|
|
/* Use If0 to BRAnch to uniform code. */
|
|
*instruction++ = pvr_pds_encode_bra(
|
|
/* SRCC */ PVR_ROGUE_PDSINST_PREDICATE_IF0,
|
|
/* NEG */ PVR_ROGUE_PDSINST_NEG_DISABLE,
|
|
/* SETC */ PVR_ROGUE_PDSINST_PREDICATE_KEEP,
|
|
/* ADDR */ branch_address);
|
|
}
|
|
|
|
code_size += 1;
|
|
}
|
|
|
|
if (program->num_texture_dma_kicks > 0) {
|
|
uint32_t dma_address_constant64;
|
|
uint32_t dma_control_constant32;
|
|
/* Allocate 3 constant spaces for each kick. The 64-bit constants
|
|
* come first followed by the 32-bit constants.
|
|
*/
|
|
dma_address_constant64 = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
|
dma_control_constant32 =
|
|
dma_address_constant64 + (program->num_texture_dma_kicks * 2);
|
|
|
|
for (uint32_t dma = 0; dma < program->num_texture_dma_kicks; dma++) {
|
|
code_size += 1;
|
|
if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction)
|
|
continue;
|
|
|
|
/* DMA the state into the secondary attributes. */
|
|
*instruction++ = pvr_pds_encode_doutd(
|
|
/* cc */ 0,
|
|
/* END */ dma == (program->num_texture_dma_kicks - 1),
|
|
/* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 */
|
|
/* SRC0 */ dma_address_constant64 >> 1); /* DOUT
|
|
* 64-bit
|
|
* Src0
|
|
*/
|
|
dma_address_constant64 += 2;
|
|
dma_control_constant32 += 1;
|
|
}
|
|
} else if (both_textures_and_uniforms) {
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
|
|
/* End the program. */
|
|
*instruction++ = pvr_pds_inst_encode_halt(0);
|
|
}
|
|
|
|
code_size += 1;
|
|
}
|
|
|
|
/* Reserve space at the beginning of the data segment for the DOUTU Task
|
|
* Control if one is needed.
|
|
*/
|
|
if (program->kick_usc) {
|
|
doutu_constant64 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
}
|
|
|
|
/* Allocate 3 constant spaces for each DMA and 2 for a USC kick. The
|
|
* 64-bit constants come first followed by the 32-bit constants.
|
|
*/
|
|
uint32_t total_size_dma =
|
|
program->num_dword_doutw + 2 * program->num_q_word_doutw;
|
|
|
|
uint32_t dma_address_constant64 = pvr_pds_get_constants(
|
|
&next_constant,
|
|
program->num_uniform_dma_kicks * 3 + total_size_dma + total_num_doutw,
|
|
&data_size);
|
|
uint32_t doutw_value_constant64 =
|
|
dma_address_constant64 + program->num_uniform_dma_kicks * 2;
|
|
uint32_t dma_control_constant32 = doutw_value_constant64 + total_size_dma;
|
|
uint32_t doutw_control_constant32 =
|
|
dma_control_constant32 + program->num_uniform_dma_kicks;
|
|
|
|
if (total_num_doutw > 0) {
|
|
pvr_pds_get_constants(&next_constant, 0, &data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
|
|
/* Set the coefficient register to data value. */
|
|
*instruction++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 0,
|
|
/* END */ !program->num_uniform_dma_kicks &&
|
|
!program->kick_usc && (i == total_num_doutw - 1),
|
|
/* SRC1 */ doutw_control_constant32,
|
|
/* SRC0 */ doutw_value_constant64 >> 1);
|
|
|
|
doutw_value_constant64 += 2;
|
|
doutw_control_constant32 += 1;
|
|
}
|
|
|
|
for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
|
|
/* Set the coefficient register to data value. */
|
|
*instruction++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 0,
|
|
/* END */ !program->num_uniform_dma_kicks &&
|
|
!program->kick_usc && (i == program->num_dword_doutw - 1),
|
|
/* SRC1 */ doutw_control_constant32,
|
|
/* SRC0 */ doutw_value_constant64 >> 1);
|
|
|
|
doutw_value_constant64 += 1;
|
|
doutw_control_constant32 += 1;
|
|
}
|
|
}
|
|
code_size += total_num_doutw;
|
|
}
|
|
|
|
if (program->num_uniform_dma_kicks > 0) {
|
|
for (uint32_t dma = 0; dma < program->num_uniform_dma_kicks; dma++) {
|
|
code_size += 1;
|
|
|
|
if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction)
|
|
continue;
|
|
|
|
bool last_instruction = false;
|
|
if (!program->kick_usc &&
|
|
(dma == program->num_uniform_dma_kicks - 1)) {
|
|
last_instruction = true;
|
|
}
|
|
/* DMA the state into the secondary attributes. */
|
|
*instruction++ = pvr_pds_encode_doutd(
|
|
/* cc */ 0,
|
|
/* END */ last_instruction,
|
|
/* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1
|
|
*/
|
|
/* SRC0 */ dma_address_constant64 >> 1); /* DOUT
|
|
* 64-bit
|
|
* Src0
|
|
*/
|
|
dma_address_constant64 += 2;
|
|
dma_control_constant32 += 1;
|
|
}
|
|
}
|
|
|
|
if (program->kick_usc) {
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
|
|
/* Issue the task to the USC.
|
|
*
|
|
* dout ds1[constant_use], ds0[constant_use],
|
|
* ds1[constant_use], emit
|
|
*/
|
|
|
|
*instruction++ = pvr_pds_encode_doutu(
|
|
/* cc */ 0,
|
|
/* END */ 1,
|
|
/* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 */
|
|
}
|
|
|
|
code_size += 1;
|
|
} else if (program->num_uniform_dma_kicks == 0 && total_num_doutw == 0) {
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
|
|
/* End the program. */
|
|
*instruction++ = pvr_pds_inst_encode_halt(0);
|
|
}
|
|
|
|
code_size += 1;
|
|
}
|
|
}
|
|
|
|
/* Minimum temp count is 1. */
|
|
program->temps_used = MAX2(temps_used, 1);
|
|
program->code_size = code_size;
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
|
|
return instruction;
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* Writes the Uniform Data block for the PDS pixel shader secondary attributes
|
|
* program.
|
|
*
|
|
* \param program Pointer to the PDS pixel shader secondary attributes program.
|
|
* \param buffer Pointer to the buffer for the code/data.
|
|
* \param gen_mode Either code or data can be generated or sizes only updated.
|
|
* \param dev_info PVR device information struct.
|
|
* \returns Pointer to just beyond the buffer for the program/data.
|
|
*/
|
|
uint32_t *pvr_pds_pixel_shader_uniform_texture_data(
|
|
struct pvr_pds_pixel_shader_sa_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
bool uniform,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
uint32_t *constants = buffer;
|
|
uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
|
uint32_t temps_used = 0;
|
|
uint32_t data_size = 0;
|
|
|
|
assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
|
|
0);
|
|
|
|
assert(gen_mode != PDS_GENERATE_CODE_SEGMENT);
|
|
|
|
/* Shape of data segment (note: clear is different).
|
|
*
|
|
* Uniform Texture
|
|
* +--------------+ +-------------+
|
|
* | USC Task L | | USC Task L |
|
|
* | H | | H |
|
|
* | DMA1 Src0 L | | DMA1 Src0 L |
|
|
* | H | | H |
|
|
* | DMA2 Src0 L | | |
|
|
* | H | | |
|
|
* | DMA1 Src1 | | DMA1 Src1 |
|
|
* | DMA2 Src1 | | |
|
|
* | DOUTW0 Src1 | | |
|
|
* | DOUTW1 Src1 | | |
|
|
* | ... | | |
|
|
* | DOUTWn Srcn | | |
|
|
* | other data | | |
|
|
* +--------------+ +-------------+
|
|
*/
|
|
|
|
/* Generate the PDS pixel shader secondary attributes data.
|
|
*
|
|
* Packed Clear
|
|
* The clear color can arrive packed in the right form in the first (or
|
|
* first 2) dwords of the shared registers and the program will issue a
|
|
* single DOUTW for this.
|
|
*/
|
|
if (program->clear && uniform && program->packed_clear) {
|
|
uint32_t color_constant1 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
uint32_t control_word_constant1 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint32_t doutw;
|
|
|
|
pvr_pds_write_constant64(constants,
|
|
color_constant1,
|
|
program->clear_color[0],
|
|
program->clear_color[1]);
|
|
|
|
/* Load into first constant in common store. */
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->clear_color_dest_reg,
|
|
PVR_PDS_DOUTW_LOWER64,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
|
false,
|
|
dev_info);
|
|
|
|
/* Set the last flag. */
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
pvr_pds_write_constant64(constants, control_word_constant1, doutw, 0);
|
|
}
|
|
} else if (program->clear && uniform) {
|
|
uint32_t color_constant1, color_constant2;
|
|
|
|
if (program->clear_color_dest_reg & 0x1) {
|
|
uint32_t color_constant3, control_word_constant1,
|
|
control_word_constant2, color_constant4;
|
|
|
|
color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
|
|
color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
|
|
|
|
control_word_constant1 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
control_word_constant2 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint32_t doutw;
|
|
|
|
pvr_pds_write_constant32(constants,
|
|
color_constant1,
|
|
program->clear_color[0]);
|
|
|
|
pvr_pds_write_constant64(constants,
|
|
color_constant2,
|
|
program->clear_color[1],
|
|
program->clear_color[2]);
|
|
|
|
pvr_pds_write_constant32(constants,
|
|
color_constant3,
|
|
program->clear_color[3]);
|
|
|
|
/* Load into first constant in common store. */
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->clear_color_dest_reg,
|
|
PVR_PDS_DOUTW_LOWER32,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
|
false,
|
|
dev_info);
|
|
|
|
pvr_pds_write_constant64(constants,
|
|
control_word_constant1,
|
|
doutw,
|
|
0);
|
|
|
|
/* Move the destination register along. */
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->clear_color_dest_reg + 1,
|
|
PVR_PDS_DOUTW_LOWER64,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
|
false,
|
|
dev_info);
|
|
|
|
pvr_pds_write_constant64(constants,
|
|
control_word_constant2,
|
|
doutw,
|
|
0);
|
|
|
|
/* Move the destination register along. */
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->clear_color_dest_reg + 3,
|
|
PVR_PDS_DOUTW_LOWER32,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
|
false,
|
|
dev_info);
|
|
|
|
/* Set the last flag. */
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
pvr_pds_write_constant64(constants, color_constant4, doutw, 0);
|
|
}
|
|
} else {
|
|
uint32_t control_word_constant, control_word_last_constant;
|
|
|
|
/* Put the clear color and control words into the first 8
|
|
* constants.
|
|
*/
|
|
color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
control_word_constant =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
control_word_last_constant =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint32_t doutw;
|
|
pvr_pds_write_constant64(constants,
|
|
color_constant1,
|
|
program->clear_color[0],
|
|
program->clear_color[1]);
|
|
|
|
pvr_pds_write_constant64(constants,
|
|
color_constant2,
|
|
program->clear_color[2],
|
|
program->clear_color[3]);
|
|
|
|
/* Load into first constant in common store. */
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
program->clear_color_dest_reg,
|
|
PVR_PDS_DOUTW_LOWER64,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
|
false,
|
|
dev_info);
|
|
|
|
pvr_pds_write_constant64(constants, control_word_constant, doutw, 0);
|
|
|
|
/* Move the destination register along. */
|
|
doutw &= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK;
|
|
doutw |= (program->clear_color_dest_reg + 2)
|
|
<< PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
|
|
|
|
/* Set the last flag. */
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
pvr_pds_write_constant64(constants,
|
|
control_word_last_constant,
|
|
doutw,
|
|
0);
|
|
}
|
|
}
|
|
|
|
/* Constants for the DOUTU Task Control, if needed. */
|
|
if (program->kick_usc) {
|
|
uint32_t doutu_constant64 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
pvr_pds_write_wide_constant(
|
|
constants,
|
|
doutu_constant64,
|
|
program->usc_task_control.src0); /* 64-bit
|
|
*/
|
|
/* Src0 */
|
|
}
|
|
}
|
|
} else {
|
|
if (uniform) {
|
|
/* Reserve space at the beginning of the data segment for the DOUTU
|
|
* Task Control if one is needed.
|
|
*/
|
|
if (program->kick_usc) {
|
|
uint32_t doutu_constant64 =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
pvr_pds_write_wide_constant(
|
|
constants,
|
|
doutu_constant64,
|
|
program->usc_task_control.src0); /* 64-bit Src0 */
|
|
}
|
|
}
|
|
|
|
uint32_t total_num_doutw =
|
|
program->num_dword_doutw + program->num_q_word_doutw;
|
|
uint32_t total_size_dma =
|
|
program->num_dword_doutw + 2 * program->num_q_word_doutw;
|
|
|
|
/* Allocate 3 constant spaces for each kick. The 64-bit constants
|
|
* come first followed by the 32-bit constants.
|
|
*/
|
|
uint32_t dma_address_constant64 =
|
|
pvr_pds_get_constants(&next_constant,
|
|
program->num_uniform_dma_kicks * 3 +
|
|
total_size_dma + total_num_doutw,
|
|
&data_size);
|
|
uint32_t doutw_value_constant64 =
|
|
dma_address_constant64 + program->num_uniform_dma_kicks * 2;
|
|
uint32_t dma_control_constant32 =
|
|
doutw_value_constant64 + total_size_dma;
|
|
uint32_t doutw_control_constant32 =
|
|
dma_control_constant32 + program->num_uniform_dma_kicks;
|
|
|
|
if (total_num_doutw > 0) {
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
|
|
pvr_pds_write_constant64(
|
|
constants,
|
|
doutw_value_constant64,
|
|
program->q_word_doutw_value[2 * i],
|
|
program->q_word_doutw_value[2 * i + 1]);
|
|
pvr_pds_write_constant32(
|
|
constants,
|
|
doutw_control_constant32,
|
|
program->q_word_doutw_control[i] |
|
|
((!program->num_uniform_dma_kicks &&
|
|
i == total_num_doutw - 1)
|
|
? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
|
|
: 0));
|
|
|
|
doutw_value_constant64 += 2;
|
|
doutw_control_constant32 += 1;
|
|
}
|
|
|
|
for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
|
|
pvr_pds_write_constant32(constants,
|
|
doutw_value_constant64,
|
|
program->dword_doutw_value[i]);
|
|
pvr_pds_write_constant32(
|
|
constants,
|
|
doutw_control_constant32,
|
|
program->dword_doutw_control[i] |
|
|
((!program->num_uniform_dma_kicks &&
|
|
i == program->num_dword_doutw - 1)
|
|
? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
|
|
: 0));
|
|
|
|
doutw_value_constant64 += 1;
|
|
doutw_control_constant32 += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (program->num_uniform_dma_kicks > 0) {
|
|
uint32_t kick;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
for (kick = 0; kick < program->num_uniform_dma_kicks - 1;
|
|
kick++) {
|
|
/* Copy the dma control words to constants. */
|
|
pvr_pds_write_dma_address(constants,
|
|
dma_address_constant64,
|
|
program->uniform_dma_address[kick],
|
|
false,
|
|
dev_info);
|
|
pvr_pds_write_constant32(constants,
|
|
dma_control_constant32,
|
|
program->uniform_dma_control[kick]);
|
|
|
|
dma_address_constant64 += 2;
|
|
dma_control_constant32 += 1;
|
|
}
|
|
|
|
pvr_pds_write_dma_address(constants,
|
|
dma_address_constant64,
|
|
program->uniform_dma_address[kick],
|
|
false,
|
|
dev_info);
|
|
pvr_pds_write_constant32(
|
|
constants,
|
|
dma_control_constant32,
|
|
program->uniform_dma_control[kick] |
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
|
|
}
|
|
}
|
|
|
|
} else if (program->num_texture_dma_kicks > 0) {
|
|
/* Allocate 3 constant spaces for each kick. The 64-bit constants
|
|
* come first followed by the 32-bit constants.
|
|
*/
|
|
uint32_t dma_address_constant64 =
|
|
pvr_pds_get_constants(&next_constant,
|
|
program->num_texture_dma_kicks * 3,
|
|
&data_size);
|
|
uint32_t dma_control_constant32 =
|
|
dma_address_constant64 + (program->num_texture_dma_kicks * 2);
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint32_t kick;
|
|
for (kick = 0; kick < program->num_texture_dma_kicks - 1; kick++) {
|
|
/* Copy the DMA control words to constants. */
|
|
pvr_pds_write_dma_address(constants,
|
|
dma_address_constant64,
|
|
program->texture_dma_address[kick],
|
|
false,
|
|
dev_info);
|
|
|
|
pvr_pds_write_constant32(constants,
|
|
dma_control_constant32,
|
|
program->texture_dma_control[kick]);
|
|
|
|
dma_address_constant64 += 2;
|
|
dma_control_constant32 += 1;
|
|
}
|
|
|
|
pvr_pds_write_dma_address(constants,
|
|
dma_address_constant64,
|
|
program->texture_dma_address[kick],
|
|
false,
|
|
dev_info);
|
|
|
|
pvr_pds_write_constant32(
|
|
constants,
|
|
dma_control_constant32,
|
|
program->texture_dma_control[kick] |
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Save the data segment pointer and size. */
|
|
program->data_segment = constants;
|
|
|
|
/* Minimum temp count is 1. */
|
|
program->temps_used = MAX2(temps_used, 1);
|
|
program->data_size = data_size;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
|
|
return (constants + next_constant);
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* Generates generic DOUTC PDS program.
|
|
*
|
|
* \param program Pointer to the PDS kick USC.
|
|
* \param buffer Pointer to the buffer for the program.
|
|
* \param gen_mode Either code and data can be generated, or sizes only updated.
|
|
* \returns Pointer to just beyond the buffer for the code or program segment.
|
|
*/
|
|
uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode)
|
|
{
|
|
uint32_t constant = 0;
|
|
|
|
/* Automatically get a data size of 1x 128bit chunks. */
|
|
uint32_t data_size = 0, code_size = 0;
|
|
|
|
/* Setup the data part. */
|
|
uint32_t *constants = buffer; /* Constants placed at front of buffer. */
|
|
uint32_t *instruction = buffer;
|
|
uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
|
|
* dwords.
|
|
*/
|
|
|
|
/* Update the program sizes. */
|
|
program->data_size = data_size;
|
|
program->code_size = code_size;
|
|
program->data_segment = constants;
|
|
|
|
if (gen_mode == PDS_GENERATE_SIZES)
|
|
return NULL;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
/* Copy the USC task control words to constants. */
|
|
|
|
constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
pvr_pds_write_wide_constant(constants, constant + 0, 0); /* 64-bit
|
|
* Src0
|
|
*/
|
|
|
|
uint32_t control_word_constant =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
pvr_pds_write_constant64(constants, control_word_constant, 0, 0); /* 32-bit
|
|
* Src1
|
|
*/
|
|
|
|
program->data_size = data_size;
|
|
buffer += data_size;
|
|
|
|
return buffer;
|
|
} else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
|
|
*instruction++ = pvr_pds_inst_encode_doutc(
|
|
/* cc */ 0,
|
|
/* END */ 0);
|
|
|
|
code_size++;
|
|
|
|
/* End the program. */
|
|
*instruction++ = pvr_pds_inst_encode_halt(0);
|
|
code_size++;
|
|
|
|
program->code_size = code_size;
|
|
}
|
|
|
|
return instruction;
|
|
}
|
|
|
|
/**
|
|
* Generates generic kick DOUTU PDS program in a single data+code block.
|
|
*
|
|
* \param control Pointer to the PDS kick USC.
|
|
* \param buffer Pointer to the buffer for the program.
|
|
* \param gen_mode Either code and data can be generated or sizes only updated.
|
|
* \param dev_info PVR device information structure.
|
|
* \returns Pointer to just beyond the buffer for the code or program segment.
|
|
*/
|
|
uint32_t *pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict control,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
|
uint32_t doutw;
|
|
uint32_t data_size = 0, code_size = 0;
|
|
uint32_t constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
|
|
uint32_t control_word_constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
|
|
|
|
/* Assert if buffer is exceeded. */
|
|
assert(control->num_const64 <= PVR_PDS_MAX_NUM_DOUTW_CONSTANTS);
|
|
|
|
uint32_t *constants = buffer;
|
|
uint32_t *instruction = buffer;
|
|
|
|
/* Put the constants and control words interleaved in the data region. */
|
|
for (uint32_t const_pair = 0; const_pair < control->num_const64;
|
|
const_pair++) {
|
|
constant[const_pair] =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
control_word_constant[const_pair] =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
/* Data segment points to start of constants. */
|
|
control->data_segment = constants;
|
|
|
|
for (uint32_t const_pair = 0; const_pair < control->num_const64;
|
|
const_pair++) {
|
|
pvr_pds_write_constant64(constants,
|
|
constant[const_pair],
|
|
H32(control->doutw_data[const_pair]),
|
|
L32(control->doutw_data[const_pair]));
|
|
|
|
/* Start loading at offset 0. */
|
|
if (control->dest_store == PDS_COMMON_STORE) {
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
(2 * const_pair),
|
|
PVR_PDS_DOUTW_LOWER64,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
|
false,
|
|
dev_info);
|
|
} else {
|
|
doutw = pvr_pds_encode_doutw_src1(
|
|
(2 * const_pair),
|
|
PVR_PDS_DOUTW_LOWER64,
|
|
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
|
|
false,
|
|
dev_info);
|
|
}
|
|
|
|
if (const_pair + 1 == control->num_const64) {
|
|
/* Set the last flag for the MCU (assume there are no following
|
|
* DOUTD's).
|
|
*/
|
|
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
|
}
|
|
pvr_pds_write_constant64(constants,
|
|
control_word_constant[const_pair],
|
|
doutw,
|
|
0);
|
|
}
|
|
|
|
control->data_size = data_size;
|
|
} else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
|
|
/* Code section. */
|
|
|
|
for (uint32_t const_pair = 0; const_pair < control->num_const64;
|
|
const_pair++) {
|
|
/* DOUTW the PDS data to the USC constants. */
|
|
*instruction++ = pvr_pds_encode_doutw64(
|
|
/* cc */ 0,
|
|
/* END */ control->last_instruction &&
|
|
(const_pair + 1 == control->num_const64),
|
|
/* SRC1 */ control_word_constant[const_pair], /* DOUTW 32-bit
|
|
* Src1.
|
|
*/
|
|
/* SRC0 */ constant[const_pair] >> 1); /* DOUTW 64-bit Src0. */
|
|
|
|
code_size++;
|
|
}
|
|
|
|
if (control->last_instruction) {
|
|
/* End the program. */
|
|
*instruction++ = pvr_pds_inst_encode_halt(0);
|
|
code_size++;
|
|
}
|
|
|
|
control->code_size = code_size;
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
|
|
return (constants + next_constant);
|
|
else
|
|
return instruction;
|
|
}
|
|
|
|
/**
|
|
* Generates generic kick DOUTU PDS program in a single data+code block.
|
|
*
|
|
* \param program Pointer to the PDS kick USC.
|
|
* \param buffer Pointer to the buffer for the program.
|
|
* \param start_next_constant Next constant in data segment. Non-zero if another
|
|
* instruction precedes the DOUTU.
|
|
* \param cc_enabled If true then the DOUTU is predicated (cc set).
|
|
* \param gen_mode Either code and data can be generated or sizes only updated.
|
|
* \returns Pointer to just beyond the buffer for the code or program segment.
|
|
*/
|
|
uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
uint32_t start_next_constant,
|
|
bool cc_enabled,
|
|
enum pvr_pds_generate_mode gen_mode)
|
|
{
|
|
uint32_t constant = 0;
|
|
|
|
/* Automatically get a data size of 2 128bit chunks. */
|
|
uint32_t data_size = ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE;
|
|
uint32_t code_size = 1; /* Single doutu */
|
|
uint32_t dummy_count = 0;
|
|
|
|
/* Setup the data part. */
|
|
uint32_t *constants = buffer; /* Constants placed at front of buffer. */
|
|
uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
|
|
* dwords.
|
|
*/
|
|
|
|
/* Update the program sizes. */
|
|
program->data_size = data_size;
|
|
program->code_size = code_size;
|
|
program->data_segment = constants;
|
|
|
|
if (gen_mode == PDS_GENERATE_SIZES)
|
|
return NULL;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT ||
|
|
gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
|
|
/* Copy the USC task control words to constants. */
|
|
|
|
constant = pvr_pds_get_constants(&next_constant, 2, &dummy_count);
|
|
|
|
pvr_pds_write_wide_constant(constants,
|
|
constant + 0,
|
|
program->usc_task_control.src0); /* 64-bit
|
|
* Src0.
|
|
*/
|
|
buffer += data_size;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
|
|
return buffer;
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
|
|
gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
|
|
/* Generate the PDS pixel shader code. */
|
|
|
|
/* Setup the instruction pointer. */
|
|
uint32_t *instruction = buffer;
|
|
|
|
/* Issue the task to the USC.
|
|
*
|
|
* dout ds1[constant_use], ds0[constant_use], ds1[constant_use], emit ;
|
|
* halt halt
|
|
*/
|
|
|
|
*instruction++ = pvr_pds_encode_doutu(
|
|
/* cc */ cc_enabled,
|
|
/* END */ 1,
|
|
/* SRC0 */ (constant + start_next_constant) >> 1); /* DOUTU
|
|
* 64-bit Src0
|
|
*/
|
|
|
|
/* Return pointer to just after last instruction. */
|
|
return instruction;
|
|
}
|
|
|
|
/* Execution should never reach here; keep compiler happy. */
|
|
return NULL;
|
|
}
|
|
|
|
uint32_t *pvr_pds_generate_compute_barrier_conditional(
|
|
uint32_t *buffer,
|
|
enum pvr_pds_generate_mode gen_mode)
|
|
{
|
|
/* Compute barriers supported. Need to test for coeff sync task. */
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
|
|
return buffer; /* No data segment. */
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
/* Test whether this is the coefficient update task or not. */
|
|
*buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
|
|
*/
|
|
PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
|
|
*/
|
|
PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SETC
|
|
*/
|
|
1 /* ADDR */);
|
|
|
|
/* Encode a HALT. */
|
|
*buffer++ = pvr_pds_inst_encode_halt(1);
|
|
|
|
/* Reset the default predicate to IF0. */
|
|
*buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
|
|
*/
|
|
PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
|
|
*/
|
|
PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETC
|
|
*/
|
|
1 /* ADDR */);
|
|
}
|
|
|
|
return buffer;
|
|
}
|
|
|
|
/**
|
|
* Generates program to kick the USC task to store shared.
|
|
*
|
|
* \param program Pointer to the PDS shared register.
|
|
* \param buffer Pointer to the buffer for the program.
|
|
* \param gen_mode Either code and data can be generated or sizes only updated.
|
|
* \param dev_info PVR device information structure.
|
|
* \returns Pointer to just beyond the buffer for the program.
|
|
*/
|
|
uint32_t *pvr_pds_generate_shared_storing_program(
|
|
struct pvr_pds_shared_storing_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
|
|
struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
|
|
|
|
if (gen_mode == PDS_GENERATE_SIZES)
|
|
return NULL;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint32_t *constants = buffer;
|
|
|
|
constants =
|
|
pvr_pds_generate_doutw(doutw_control, constants, gen_mode, dev_info);
|
|
program->data_size = doutw_control->data_size;
|
|
|
|
constants = pvr_pds_kick_usc(kick_usc_program,
|
|
constants,
|
|
0,
|
|
program->cc_enable,
|
|
gen_mode);
|
|
program->data_size += kick_usc_program->data_size;
|
|
|
|
return constants;
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
/* Generate PDS code segment. */
|
|
uint32_t *instruction = buffer;
|
|
|
|
/* doutw vi1, vi0
|
|
* doutu ds1[constant_use], ds0[constant_use], ds1[constant_use],
|
|
* emit
|
|
*/
|
|
instruction =
|
|
pvr_pds_generate_doutw(doutw_control, buffer, gen_mode, dev_info);
|
|
program->code_size = doutw_control->code_size;
|
|
|
|
/* Offset into data segment follows on from doutw data segment. */
|
|
instruction = pvr_pds_kick_usc(kick_usc_program,
|
|
instruction,
|
|
doutw_control->data_size,
|
|
program->cc_enable,
|
|
gen_mode);
|
|
program->code_size += kick_usc_program->code_size;
|
|
|
|
return instruction;
|
|
}
|
|
|
|
/* Execution should never reach here. */
|
|
return NULL;
|
|
}
|
|
|
|
uint32_t *pvr_pds_generate_fence_terminate_program(
|
|
struct pvr_pds_fence_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
uint32_t data_size = 0;
|
|
uint32_t code_size = 0;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
/* Data segment. */
|
|
uint32_t *constants, *constants_base;
|
|
|
|
constants = constants_base = (uint32_t *)buffer;
|
|
|
|
/* DOUTC sources are not used, but they must be valid. */
|
|
pvr_pds_generate_doutc(program, constants, PDS_GENERATE_DATA_SEGMENT);
|
|
data_size += program->data_size;
|
|
|
|
if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
|
|
/* Append a 64-bit constant with value 1. Used to increment ptemp.
|
|
* Return the offset into the data segment.
|
|
*/
|
|
program->fence_constant_word =
|
|
pvr_pds_append_constant64(constants_base, 1, &data_size);
|
|
}
|
|
|
|
program->data_size = data_size;
|
|
return constants;
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
/* Code segment. */
|
|
uint32_t *instruction = (uint32_t *)buffer;
|
|
|
|
instruction = pvr_pds_generate_compute_barrier_conditional(
|
|
instruction,
|
|
PDS_GENERATE_CODE_SEGMENT);
|
|
code_size += 3;
|
|
|
|
if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
|
|
/* lock */
|
|
*instruction++ = pvr_pds_inst_encode_lock(0); /* cc */
|
|
|
|
/* add64 pt[0], pt[0], #1 */
|
|
*instruction++ = pvr_pds_inst_encode_add64(
|
|
0, /* cc */
|
|
PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
|
|
PVR_ROGUE_PDSINST_MAD_SNA_ADD,
|
|
PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER + 0, /* src0 = ptemp[0]
|
|
*/
|
|
PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
|
|
(program->fence_constant_word >> 1), /* src1 = 1 */
|
|
PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
|
|
* ptemp[0]
|
|
*/
|
|
|
|
/* release */
|
|
*instruction++ = pvr_pds_inst_encode_release(0); /* cc */
|
|
|
|
/* cmp pt[0] EQ 0x4 == Number of USC clusters per phantom */
|
|
*instruction++ = pvr_pds_inst_encode_cmpi(
|
|
0, /* cc */
|
|
PVR_ROGUE_PDSINST_COP_EQ,
|
|
PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0, /* src0
|
|
* = ptemp[0]
|
|
*/
|
|
PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0));
|
|
|
|
/* bra -1 */
|
|
*instruction++ =
|
|
pvr_pds_encode_bra(0, /* cc */
|
|
1, /* PVR_ROGUE_PDSINST_BRA_NEG_ENABLE
|
|
*/
|
|
0, /* PVR_ROGUE_PDSINST_BRA_SETC_P0
|
|
*/
|
|
-1); /* bra PC */
|
|
code_size += 5;
|
|
}
|
|
|
|
/* DOUTC */
|
|
instruction = pvr_pds_generate_doutc(program,
|
|
instruction,
|
|
PDS_GENERATE_CODE_SEGMENT);
|
|
code_size += program->code_size;
|
|
|
|
program->code_size = code_size;
|
|
return instruction;
|
|
}
|
|
|
|
/* Execution should never reach here. */
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* Generates program to kick the USC task to load shared registers from memory.
|
|
*
|
|
* \param program Pointer to the PDS shared register.
|
|
* \param buffer Pointer to the buffer for the program.
|
|
* \param gen_mode Either code and data can be generated or sizes only updated.
|
|
* \param dev_info PVR device information struct.
|
|
* \returns Pointer to just beyond the buffer for the program.
|
|
*/
|
|
uint32_t *pvr_pds_generate_compute_shared_loading_program(
|
|
struct pvr_pds_shared_storing_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
|
|
struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
|
|
|
|
uint32_t next_constant;
|
|
uint32_t data_size = 0;
|
|
uint32_t code_size = 0;
|
|
|
|
/* This needs to persist to the CODE_SEGMENT call. */
|
|
static uint32_t fence_constant_word = 0;
|
|
uint64_t zero_constant64 = 0;
|
|
|
|
if (gen_mode == PDS_GENERATE_SIZES)
|
|
return NULL;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint32_t *constants = buffer;
|
|
|
|
constants = pvr_pds_generate_doutw(doutw_control,
|
|
constants,
|
|
PDS_GENERATE_DATA_SEGMENT,
|
|
dev_info);
|
|
data_size += doutw_control->data_size;
|
|
|
|
constants = pvr_pds_kick_usc(kick_usc_program,
|
|
constants,
|
|
0,
|
|
program->cc_enable,
|
|
gen_mode);
|
|
data_size += kick_usc_program->data_size;
|
|
|
|
/* Copy the fence constant value (64-bit). */
|
|
next_constant = data_size; /* Assumes data words fully packed. */
|
|
fence_constant_word =
|
|
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
/* Encode the fence constant src0 (offset measured from start of data
|
|
* buffer). Fence barrier is initialized to zero.
|
|
*/
|
|
pvr_pds_write_wide_constant(buffer, fence_constant_word, zero_constant64);
|
|
/* Update the const size. */
|
|
data_size += 2;
|
|
constants += 2;
|
|
|
|
program->data_size = data_size;
|
|
return constants;
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
/* Generate PDS code segment. */
|
|
uint32_t *instruction = buffer;
|
|
|
|
/* add64 pt0, c0, c0
|
|
* IF [2x Phantoms]
|
|
* add64 pt1, c0, c0
|
|
* st [constant_mem_addr], pt0, 4
|
|
* ENDIF
|
|
* doutw vi1, vi0
|
|
* doutu ds1[constant_use], ds0[constant_use], ds1[constant_use],
|
|
* emit
|
|
*
|
|
* Zero the persistent temp (SW fence for context switch).
|
|
*/
|
|
*instruction++ = pvr_pds_inst_encode_add64(
|
|
0, /* cc */
|
|
PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
|
|
PVR_ROGUE_PDSINST_MAD_SNA_ADD,
|
|
PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
|
|
(fence_constant_word >> 1), /* src0
|
|
* = 0
|
|
*/
|
|
PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
|
|
(fence_constant_word >> 1), /* src1
|
|
* = 0
|
|
*/
|
|
PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = ptemp64[0]
|
|
*/
|
|
code_size++;
|
|
|
|
instruction = pvr_pds_generate_doutw(doutw_control,
|
|
instruction,
|
|
PDS_GENERATE_CODE_SEGMENT,
|
|
dev_info);
|
|
code_size += doutw_control->code_size;
|
|
|
|
/* Offset into data segment follows on from doutw data segment. */
|
|
instruction = pvr_pds_kick_usc(kick_usc_program,
|
|
instruction,
|
|
doutw_control->data_size,
|
|
program->cc_enable,
|
|
gen_mode);
|
|
code_size += kick_usc_program->code_size;
|
|
|
|
program->code_size = code_size;
|
|
return instruction;
|
|
}
|
|
|
|
/* Execution should never reach here. */
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* Generates both code and data when gen_mode is not PDS_GENERATE_SIZES.
|
|
* Relies on num_fpu_iterators being initialized for size calculation.
|
|
* Relies on num_fpu_iterators, destination[], and FPU_iterators[] being
|
|
* initialized for program generation.
|
|
*
|
|
* \param program Pointer to the PDS pixel shader program.
|
|
* \param buffer Pointer to the buffer for the program.
|
|
* \param gen_mode Either code and data can be generated or sizes only updated.
|
|
* \returns Pointer to just beyond the buffer for the program.
|
|
*/
|
|
uint32_t *pvr_pds_coefficient_loading(
|
|
struct pvr_pds_coeff_loading_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode)
|
|
{
|
|
uint32_t constant;
|
|
uint32_t *instruction;
|
|
uint32_t total_data_size, code_size;
|
|
|
|
/* Place constants at the front of the buffer. */
|
|
uint32_t *constants = buffer;
|
|
/* Start counting constants from 0. */
|
|
uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
|
|
|
/* Save the data segment pointer and size. */
|
|
program->data_segment = constants;
|
|
|
|
total_data_size = 0;
|
|
code_size = 0;
|
|
|
|
total_data_size += 2 * program->num_fpu_iterators;
|
|
code_size += program->num_fpu_iterators;
|
|
|
|
/* Instructions start where constants finished, but we must take note of
|
|
* alignment.
|
|
*
|
|
* 128-bit boundary = 4 dwords.
|
|
*/
|
|
total_data_size = ALIGN_POT(total_data_size, 4);
|
|
if (gen_mode != PDS_GENERATE_SIZES) {
|
|
uint32_t data_size = 0;
|
|
uint32_t iterator = 0;
|
|
|
|
instruction = buffer + total_data_size;
|
|
|
|
while (iterator < program->num_fpu_iterators) {
|
|
uint64_t iterator_word;
|
|
|
|
/* Copy the USC task control words to constants. */
|
|
constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
|
|
|
|
/* Write the first iterator. */
|
|
iterator_word =
|
|
(uint64_t)program->FPU_iterators[iterator]
|
|
<< PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT;
|
|
|
|
/* Write the destination. */
|
|
iterator_word |=
|
|
(uint64_t)program->destination[iterator++]
|
|
<< PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT;
|
|
|
|
/* If this is the last DOUTI word the "Last Issue" bit should be
|
|
* set.
|
|
*/
|
|
if (iterator >= program->num_fpu_iterators) {
|
|
iterator_word |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN;
|
|
}
|
|
|
|
/* Write the word to the buffer. */
|
|
pvr_pds_write_wide_constant(constants,
|
|
constant,
|
|
iterator_word); /* 64-bit
|
|
Src0
|
|
*/
|
|
|
|
/* Write the DOUT instruction. */
|
|
*instruction++ = pvr_pds_encode_douti(
|
|
/* cc */ 0,
|
|
/* END */ 0,
|
|
/* SRC0 */ constant >> 1); /* DOUT Issue word 0 64-bit */
|
|
}
|
|
|
|
/* Update the last DOUTI instruction to have the END flag set. */
|
|
*(instruction - 1) |= 1 << PVR_ROGUE_PDSINST_DOUT_END_SHIFT;
|
|
} else {
|
|
instruction = NULL;
|
|
}
|
|
|
|
/* Update the data size and code size. Minimum temp count is 1. */
|
|
program->temps_used = 1;
|
|
program->data_size = total_data_size;
|
|
program->code_size = code_size;
|
|
|
|
return instruction;
|
|
}
|
|
|
|
/**
|
|
* Generate a single ld/st instruction. This can correspond to one or more
|
|
* real ld/st instructions based on the value of count.
|
|
*
|
|
* \param ld true to generate load, false to generate store.
|
|
* \param control Cache mode control.
|
|
* \param temp_index Dest temp for load/source temp for store, in 32bits
|
|
* register index.
|
|
* \param address Source for load/dest for store in bytes.
|
|
* \param count Number of dwords for load/store.
|
|
* \param next_constant
|
|
* \param total_data_size
|
|
* \param total_code_size
|
|
* \param buffer Pointer to the buffer for the program.
|
|
* \param data_fence Issue data fence.
|
|
* \param gen_mode Either code and data can be generated or sizes only updated.
|
|
* \param dev_info PVR device information structure.
|
|
* \returns Pointer to just beyond the buffer for the program.
|
|
*/
|
|
uint32_t *pvr_pds_generate_single_ldst_instruction(
|
|
bool ld,
|
|
const struct pvr_pds_ldst_control *control,
|
|
uint32_t temp_index,
|
|
uint64_t address,
|
|
uint32_t count,
|
|
uint32_t *next_constant,
|
|
uint32_t *total_data_size,
|
|
uint32_t *total_code_size,
|
|
uint32_t *restrict buffer,
|
|
bool data_fence,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
/* A single ld/ST here does NOT actually correspond to a single ld/ST
|
|
* instruction, but may needs multiple ld/ST instructions because each ld/ST
|
|
* instruction can only ld/ST a restricted max number of dwords which may
|
|
* less than count passed here.
|
|
*/
|
|
|
|
uint32_t num_inst;
|
|
uint32_t constant;
|
|
|
|
if (ld) {
|
|
/* ld must operate on 64bits unit, and it needs to load from and to 128
|
|
* bits aligned. Apart from the last ld, all the other need to ld 2x(x =
|
|
* 1, 2, ...) times 64bits unit.
|
|
*/
|
|
uint32_t per_inst_count = 0;
|
|
uint32_t last_inst_count;
|
|
|
|
assert((gen_mode == PDS_GENERATE_SIZES) ||
|
|
(((count % 2) == 0) && ((address % 16) == 0) &&
|
|
(temp_index % 2) == 0));
|
|
|
|
count >>= 1;
|
|
temp_index >>= 1;
|
|
|
|
/* Found out how many ld instructions are needed and ld size for the all
|
|
* possible ld instructions.
|
|
*/
|
|
if (count <= PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE) {
|
|
num_inst = 1;
|
|
last_inst_count = count;
|
|
} else {
|
|
per_inst_count = PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE;
|
|
if ((per_inst_count % 2) != 0)
|
|
per_inst_count -= 1;
|
|
|
|
num_inst = count / per_inst_count;
|
|
last_inst_count = count - per_inst_count * num_inst;
|
|
num_inst += 1;
|
|
}
|
|
|
|
/* Generate all the instructions. */
|
|
for (uint32_t i = 0; i < num_inst; i++) {
|
|
if ((i == (num_inst - 1)) && (last_inst_count == 0))
|
|
break;
|
|
|
|
/* A single load instruction. */
|
|
constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint64_t ld_src0 = 0;
|
|
|
|
ld_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
|
|
<< PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
|
|
ld_src0 |= (((uint64_t)((i == num_inst - 1) ? last_inst_count
|
|
: per_inst_count) &
|
|
PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
|
|
<< PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
|
|
ld_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS64TP_MASK)
|
|
<< PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
|
|
|
|
if (!control) {
|
|
ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED;
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
|
|
ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED;
|
|
|
|
} else {
|
|
ld_src0 |= control->cache_control_const;
|
|
}
|
|
|
|
/* Write it to the constant. */
|
|
pvr_pds_write_constant64(buffer,
|
|
constant,
|
|
(uint32_t)(ld_src0),
|
|
(uint32_t)(ld_src0 >> 32));
|
|
|
|
/* Adjust value for next ld instruction. */
|
|
temp_index += per_inst_count;
|
|
address += (((uint64_t)(per_inst_count)) << 3);
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
*buffer++ = pvr_pds_inst_encode_ld(0, constant >> 1);
|
|
|
|
if (data_fence)
|
|
*buffer++ = pvr_pds_inst_encode_wdf(0);
|
|
}
|
|
}
|
|
} else {
|
|
/* ST needs source memory address to be 32bits aligned. */
|
|
assert((gen_mode == PDS_GENERATE_SIZES) || ((address % 4) == 0));
|
|
|
|
/* Found out how many ST instructions are needed, each ST can only store
|
|
* PVR_ROGUE_PDSINST_ST_COUNT4_MASK number of 32bits.
|
|
*/
|
|
num_inst = count / PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE;
|
|
num_inst += ((count % PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE) == 0 ? 0 : 1);
|
|
|
|
/* Generate all the instructions. */
|
|
for (uint32_t i = 0; i < num_inst; i++) {
|
|
/* A single store instruction. */
|
|
constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
|
uint32_t per_inst_count =
|
|
(count <= PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE
|
|
? count
|
|
: PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE);
|
|
uint64_t st_src0 = 0;
|
|
|
|
st_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
|
|
<< PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
|
|
st_src0 |=
|
|
(((uint64_t)per_inst_count & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
|
|
<< PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
|
|
st_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS32TP_MASK)
|
|
<< PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
|
|
|
|
if (!control) {
|
|
st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH;
|
|
|
|
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
|
|
st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH;
|
|
}
|
|
|
|
} else {
|
|
st_src0 |= control->cache_control_const;
|
|
}
|
|
|
|
/* Write it to the constant. */
|
|
pvr_pds_write_constant64(buffer,
|
|
constant,
|
|
(uint32_t)(st_src0),
|
|
(uint32_t)(st_src0 >> 32));
|
|
|
|
/* Adjust value for next ST instruction. */
|
|
temp_index += per_inst_count;
|
|
count -= per_inst_count;
|
|
address += (((uint64_t)(per_inst_count)) << 2);
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
*buffer++ = pvr_pds_inst_encode_st(0, constant >> 1);
|
|
|
|
if (data_fence)
|
|
*buffer++ = pvr_pds_inst_encode_wdf(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
(*total_code_size) += num_inst;
|
|
if (data_fence)
|
|
(*total_code_size) += num_inst;
|
|
|
|
if (gen_mode != PDS_GENERATE_SIZES)
|
|
return buffer;
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* Generate programs used to prepare stream out, i.e., clear stream out buffer
|
|
* overflow flags and update Persistent temps by a ld instruction.
|
|
*
|
|
* This must be used in PPP state update.
|
|
*
|
|
* \param program Pointer to the stream out program.
|
|
* \param buffer Pointer to the buffer for the program.
|
|
* \param store_mode If true then the data is stored to memory. If false then
|
|
* the data is loaded from memory.
|
|
* \param gen_mode Either code and data can be generated or sizes only updated.
|
|
* \param dev_info PVR device information structure.
|
|
* \returns Pointer to just beyond the buffer for the program.
|
|
*/
|
|
uint32_t *pvr_pds_generate_stream_out_init_program(
|
|
struct pvr_pds_stream_out_init_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
bool store_mode,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
uint32_t total_data_size = 0;
|
|
uint32_t PTDst = PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER;
|
|
|
|
/* Start counting constants from 0. */
|
|
uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
|
|
|
uint32_t total_code_size = 1;
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
/* We only need to clear global stream out predicate, other predicates
|
|
* are not used during the stream out buffer overflow test.
|
|
*/
|
|
*buffer++ = pvr_pds_inst_encode_stmc(0, 0x10);
|
|
}
|
|
|
|
for (uint32_t index = 0; index < program->num_buffers; index++) {
|
|
if (program->dev_address_for_buffer_data[index] != 0) {
|
|
/* Generate load/store program to load/store persistent temps. */
|
|
|
|
/* NOTE: store_mode == true case should be handled by
|
|
* StreamOutTerminate.
|
|
*/
|
|
buffer = pvr_pds_generate_single_ldst_instruction(
|
|
!store_mode,
|
|
NULL,
|
|
PTDst,
|
|
program->dev_address_for_buffer_data[index],
|
|
program->pds_buffer_data_size[index],
|
|
&next_constant,
|
|
&total_data_size,
|
|
&total_code_size,
|
|
buffer,
|
|
false,
|
|
gen_mode,
|
|
dev_info);
|
|
}
|
|
|
|
PTDst += program->pds_buffer_data_size[index];
|
|
}
|
|
|
|
total_code_size += 2;
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
/* We need to fence the loading. */
|
|
*buffer++ = pvr_pds_inst_encode_wdf(0);
|
|
*buffer++ = pvr_pds_inst_encode_halt(0);
|
|
}
|
|
|
|
/* Save size information to program */
|
|
program->stream_out_init_pds_data_size =
|
|
ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
|
|
/* PDS program code size. */
|
|
program->stream_out_init_pds_code_size = total_code_size;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
|
|
return buffer + program->stream_out_init_pds_data_size;
|
|
else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
|
|
return buffer;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* Generate stream out terminate program for stream out.
|
|
*
|
|
* If pds_persistent_temp_size_to_store is 0, the final primitive written value
|
|
* will be stored.
|
|
*
|
|
* If pds_persistent_temp_size_to_store is non 0, the value of persistent temps
|
|
* will be stored into memory.
|
|
*
|
|
* The stream out terminate program is used to update the PPP state and the data
|
|
* and code section cannot be separate.
|
|
*
|
|
* \param program Pointer to the stream out program.
|
|
* \param buffer Pointer to the buffer for the program.
|
|
* \param gen_mode Either code and data can be generated or sizes only updated.
|
|
* \param dev_info PVR device info structure.
|
|
* \returns Pointer to just beyond the buffer for the program.
|
|
*/
|
|
uint32_t *pvr_pds_generate_stream_out_terminate_program(
|
|
struct pvr_pds_stream_out_terminate_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
uint32_t next_constant;
|
|
uint32_t total_data_size = 0, total_code_size = 0;
|
|
|
|
/* Start counting constants from 0. */
|
|
next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
|
|
|
/* Generate store program to store persistent temps. */
|
|
buffer = pvr_pds_generate_single_ldst_instruction(
|
|
false,
|
|
NULL,
|
|
PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER,
|
|
program->dev_address_for_storing_persistent_temp,
|
|
program->pds_persistent_temp_size_to_store,
|
|
&next_constant,
|
|
&total_data_size,
|
|
&total_code_size,
|
|
buffer,
|
|
false,
|
|
gen_mode,
|
|
dev_info);
|
|
|
|
total_code_size += 2;
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
*buffer++ = pvr_pds_inst_encode_wdf(0);
|
|
*buffer++ = pvr_pds_inst_encode_halt(0);
|
|
}
|
|
|
|
/* Save size information to program. */
|
|
program->stream_out_terminate_pds_data_size =
|
|
ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
|
|
/* PDS program code size. */
|
|
program->stream_out_terminate_pds_code_size = total_code_size;
|
|
|
|
if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
|
|
return buffer + program->stream_out_terminate_pds_data_size;
|
|
else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
|
|
return buffer;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/* DrawArrays works in several steps:
|
|
*
|
|
* 1) load data from draw_indirect buffer
|
|
* 2) tweak data to match hardware formats
|
|
* 3) write data to indexblock
|
|
* 4) signal the VDM to continue
|
|
*
|
|
* This is complicated by HW limitations on alignment, as well as a HWBRN.
|
|
*
|
|
* 1) Load data.
|
|
* Loads _must_ be 128-bit aligned. Because there is no such limitation in the
|
|
* spec we must deal with this by choosing an appropriate earlier address and
|
|
* loading enough dwords that we load the entirety of the buffer.
|
|
*
|
|
* if addr & 0xf:
|
|
* load [addr & ~0xf] 6 dwords -> tmp[0, 1, 2, 3, 4, 5]
|
|
* data = tmp[0 + (uiAddr & 0xf) >> 2]...
|
|
* else
|
|
* load [addr] 4 dwords -> tmp[0, 1, 2, 3]
|
|
* data = tmp[0]...
|
|
*
|
|
*
|
|
* 2) Tweak data.
|
|
* primCount in the spec does not match the encoding of INDEX_INSTANCE_COUNT in
|
|
* the VDM control stream. We must subtract 1 from the loaded primCount.
|
|
*
|
|
* However, there is a HWBRN that disallows the ADD32 instruction from sourcing
|
|
* a tmp that is non-64-bit-aligned. To work around this, we must move primCount
|
|
* into another tmp that has the correct alignment. Note: this is only required
|
|
* when data = tmp[even], as primCount is data+1:
|
|
*
|
|
* if data = tmp[even]:
|
|
* primCount = data + 1 = tmp[odd] -- not 64-bit aligned!
|
|
* else:
|
|
* primCount = data + 1 = tmp[even] -- already aligned, don't need workaround.
|
|
*
|
|
* This boils down to:
|
|
*
|
|
* primCount = data[1]
|
|
* primCountSrc = data[1]
|
|
* if brn_present && (data is even):
|
|
* mov scratch, primCount
|
|
* primCountSrc = scratch
|
|
* endif
|
|
* sub primCount, primCountSrc, 1
|
|
*
|
|
* 3) Store Data.
|
|
* Write the now-tweaked data over the top of the indexblock.
|
|
* To ensure the write completes before the VDM re-reads the data, we must cause
|
|
* a data hazard by doing a dummy (dummy meaning we don't care about the
|
|
* returned data) load from the same addresses. Again, because the ld must
|
|
* always be 128-bit aligned (note: the ST is dword-aligned), we must ensure the
|
|
* index block is 128-bit aligned. This is the client driver's responsibility.
|
|
*
|
|
* st data[0, 1, 2] -> (idxblock + 4)
|
|
* load [idxblock] 4 dwords
|
|
*
|
|
* 4) Signal the VDM
|
|
* This is simply a DOUTV with a src1 of 0, indicating the VDM should continue
|
|
* where it is currently fenced on a dummy idxblock that has been inserted by
|
|
* the driver.
|
|
*/
|
|
|
|
#include "pvr_draw_indirect_arrays0.h"
|
|
#include "pvr_draw_indirect_arrays1.h"
|
|
#include "pvr_draw_indirect_arrays2.h"
|
|
#include "pvr_draw_indirect_arrays3.h"
|
|
|
|
#include "pvr_draw_indirect_arrays_base_instance0.h"
|
|
#include "pvr_draw_indirect_arrays_base_instance1.h"
|
|
#include "pvr_draw_indirect_arrays_base_instance2.h"
|
|
#include "pvr_draw_indirect_arrays_base_instance3.h"
|
|
|
|
#include "pvr_draw_indirect_arrays_base_instance_drawid0.h"
|
|
#include "pvr_draw_indirect_arrays_base_instance_drawid1.h"
|
|
#include "pvr_draw_indirect_arrays_base_instance_drawid2.h"
|
|
#include "pvr_draw_indirect_arrays_base_instance_drawid3.h"
|
|
|
|
#define ENABLE_SLC_MCU_CACHE_CONTROLS(device) \
|
|
((device)->features.has_slc_mcu_cache_controls \
|
|
? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \
|
|
: PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS)
|
|
|
|
void pvr_pds_generate_draw_arrays_indirect(
|
|
struct pvr_pds_drawindirect_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
|
|
(gen_mode == PDS_GENERATE_SIZES)) {
|
|
const struct pvr_psc_program_output *psc_program = NULL;
|
|
switch ((program->arg_buffer >> 2) % 4) {
|
|
case 0:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
psc_program =
|
|
&pvr_draw_indirect_arrays_base_instance_drawid0_program;
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_arrays_base_instance0_program;
|
|
}
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_arrays0_program;
|
|
}
|
|
break;
|
|
case 1:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
psc_program =
|
|
&pvr_draw_indirect_arrays_base_instance_drawid1_program;
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_arrays_base_instance1_program;
|
|
}
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_arrays1_program;
|
|
}
|
|
break;
|
|
case 2:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
psc_program =
|
|
&pvr_draw_indirect_arrays_base_instance_drawid2_program;
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_arrays_base_instance2_program;
|
|
}
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_arrays2_program;
|
|
}
|
|
break;
|
|
case 3:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
psc_program =
|
|
&pvr_draw_indirect_arrays_base_instance_drawid3_program;
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_arrays_base_instance3_program;
|
|
}
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_arrays3_program;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
memcpy(buffer,
|
|
psc_program->code,
|
|
psc_program->code_size * sizeof(uint32_t));
|
|
#if defined(DUMP_PDS)
|
|
for (uint32_t i = 0; i < psc_program->code_size; i++)
|
|
PVR_PDS_PRINT_INST(buffer[i]);
|
|
#endif
|
|
}
|
|
|
|
program->program = *psc_program;
|
|
} else {
|
|
switch ((program->arg_buffer >> 2) % 4) {
|
|
case 0:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer + 4);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates(
|
|
buffer);
|
|
} else {
|
|
pvr_write_draw_indirect_arrays_base_instance0_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_arrays_base_instance0_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer + 4);
|
|
pvr_write_draw_indirect_arrays_base_instance0_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_arrays_base_instance0_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer);
|
|
}
|
|
} else {
|
|
pvr_write_draw_indirect_arrays0_di_data(buffer,
|
|
program->arg_buffer &
|
|
~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_arrays0_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer + 4);
|
|
pvr_write_draw_indirect_arrays0_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_arrays0_num_views(buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_arrays0_immediates(buffer);
|
|
}
|
|
break;
|
|
case 1:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer + 4);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates(
|
|
buffer);
|
|
} else {
|
|
pvr_write_draw_indirect_arrays_base_instance1_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_arrays_base_instance1_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer + 4);
|
|
pvr_write_draw_indirect_arrays_base_instance1_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_arrays_base_instance1_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer);
|
|
}
|
|
} else {
|
|
pvr_write_draw_indirect_arrays1_di_data(buffer,
|
|
program->arg_buffer &
|
|
~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_arrays1_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer + 4);
|
|
pvr_write_draw_indirect_arrays1_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_arrays1_num_views(buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_arrays1_immediates(buffer);
|
|
}
|
|
break;
|
|
case 2:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer + 4);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates(
|
|
buffer);
|
|
} else {
|
|
pvr_write_draw_indirect_arrays_base_instance2_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_arrays_base_instance2_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer + 4);
|
|
pvr_write_draw_indirect_arrays_base_instance2_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_arrays_base_instance2_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer);
|
|
}
|
|
} else {
|
|
pvr_write_draw_indirect_arrays2_di_data(buffer,
|
|
program->arg_buffer &
|
|
~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_arrays2_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer + 4);
|
|
pvr_write_draw_indirect_arrays2_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_arrays2_num_views(buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_arrays2_immediates(buffer);
|
|
}
|
|
break;
|
|
case 3:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer + 4);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates(
|
|
buffer);
|
|
} else {
|
|
pvr_write_draw_indirect_arrays_base_instance3_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_arrays_base_instance3_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer + 4);
|
|
pvr_write_draw_indirect_arrays_base_instance3_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_arrays_base_instance3_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer);
|
|
}
|
|
} else {
|
|
pvr_write_draw_indirect_arrays3_di_data(buffer,
|
|
program->arg_buffer &
|
|
~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_arrays3_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer + 4);
|
|
pvr_write_draw_indirect_arrays3_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_arrays3_num_views(buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_arrays3_immediates(buffer);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
#include "pvr_draw_indirect_elements0.h"
|
|
#include "pvr_draw_indirect_elements1.h"
|
|
#include "pvr_draw_indirect_elements2.h"
|
|
#include "pvr_draw_indirect_elements3.h"
|
|
#include "pvr_draw_indirect_elements_base_instance0.h"
|
|
#include "pvr_draw_indirect_elements_base_instance1.h"
|
|
#include "pvr_draw_indirect_elements_base_instance2.h"
|
|
#include "pvr_draw_indirect_elements_base_instance3.h"
|
|
#include "pvr_draw_indirect_elements_base_instance_drawid0.h"
|
|
#include "pvr_draw_indirect_elements_base_instance_drawid1.h"
|
|
#include "pvr_draw_indirect_elements_base_instance_drawid2.h"
|
|
#include "pvr_draw_indirect_elements_base_instance_drawid3.h"
|
|
|
|
void pvr_pds_generate_draw_elements_indirect(
|
|
struct pvr_pds_drawindirect_program *restrict program,
|
|
uint32_t *restrict buffer,
|
|
enum pvr_pds_generate_mode gen_mode,
|
|
const struct pvr_device_info *dev_info)
|
|
{
|
|
if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
|
|
(gen_mode == PDS_GENERATE_SIZES)) {
|
|
const struct pvr_psc_program_output *psc_program = NULL;
|
|
switch ((program->arg_buffer >> 2) % 4) {
|
|
case 0:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
psc_program =
|
|
&pvr_draw_indirect_elements_base_instance_drawid0_program;
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_elements_base_instance0_program;
|
|
}
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_elements0_program;
|
|
}
|
|
break;
|
|
case 1:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
psc_program =
|
|
&pvr_draw_indirect_elements_base_instance_drawid1_program;
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_elements_base_instance1_program;
|
|
}
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_elements1_program;
|
|
}
|
|
break;
|
|
case 2:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
psc_program =
|
|
&pvr_draw_indirect_elements_base_instance_drawid2_program;
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_elements_base_instance2_program;
|
|
}
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_elements2_program;
|
|
}
|
|
break;
|
|
case 3:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
psc_program =
|
|
&pvr_draw_indirect_elements_base_instance_drawid3_program;
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_elements_base_instance3_program;
|
|
}
|
|
} else {
|
|
psc_program = &pvr_draw_indirect_elements3_program;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
|
|
memcpy(buffer,
|
|
psc_program->code,
|
|
psc_program->code_size * sizeof(uint32_t));
|
|
|
|
#if defined(DUMP_PDS)
|
|
for (uint32_t i = 0; i < psc_program->code_size; i++)
|
|
PVR_PDS_PRINT_INST(buffer[i]);
|
|
#endif
|
|
}
|
|
|
|
program->program = *psc_program;
|
|
} else {
|
|
switch ((program->arg_buffer >> 2) % 4) {
|
|
case 0:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
pvr_write_draw_indirect_elements_base_instance_drawid0_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid0_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride(
|
|
buffer,
|
|
program->index_stride);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base(
|
|
buffer,
|
|
program->index_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header(
|
|
buffer,
|
|
program->index_block_header);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid0_immediates(
|
|
buffer);
|
|
} else {
|
|
pvr_write_draw_indirect_elements_base_instance0_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_elements_base_instance0_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance0_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance0_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_elements_base_instance0_idx_stride(
|
|
buffer,
|
|
program->index_stride);
|
|
pvr_write_draw_indirect_elements_base_instance0_idx_base(
|
|
buffer,
|
|
program->index_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance0_idx_header(
|
|
buffer,
|
|
program->index_block_header);
|
|
pvr_write_draw_indirect_elements_base_instance0_immediates(
|
|
buffer);
|
|
}
|
|
} else {
|
|
pvr_write_draw_indirect_elements0_di_data(buffer,
|
|
program->arg_buffer &
|
|
~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_elements0_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements0_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements0_num_views(buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_elements0_idx_stride(buffer,
|
|
program->index_stride);
|
|
pvr_write_draw_indirect_elements0_idx_base(buffer,
|
|
program->index_buffer);
|
|
pvr_write_draw_indirect_elements0_idx_header(
|
|
buffer,
|
|
program->index_block_header);
|
|
pvr_write_draw_indirect_elements0_immediates(buffer);
|
|
}
|
|
break;
|
|
case 1:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
pvr_write_draw_indirect_elements_base_instance_drawid1_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid1_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride(
|
|
buffer,
|
|
program->index_stride);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base(
|
|
buffer,
|
|
program->index_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header(
|
|
buffer,
|
|
program->index_block_header);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid1_immediates(
|
|
buffer);
|
|
} else {
|
|
pvr_write_draw_indirect_elements_base_instance1_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_elements_base_instance1_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance1_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance1_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_elements_base_instance1_idx_stride(
|
|
buffer,
|
|
program->index_stride);
|
|
pvr_write_draw_indirect_elements_base_instance1_idx_base(
|
|
buffer,
|
|
program->index_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance1_idx_header(
|
|
buffer,
|
|
program->index_block_header);
|
|
pvr_write_draw_indirect_elements_base_instance1_immediates(
|
|
buffer);
|
|
}
|
|
} else {
|
|
pvr_write_draw_indirect_elements1_di_data(buffer,
|
|
program->arg_buffer &
|
|
~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_elements1_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements1_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements1_num_views(buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_elements1_idx_stride(buffer,
|
|
program->index_stride);
|
|
pvr_write_draw_indirect_elements1_idx_base(buffer,
|
|
program->index_buffer);
|
|
pvr_write_draw_indirect_elements1_idx_header(
|
|
buffer,
|
|
program->index_block_header);
|
|
pvr_write_draw_indirect_elements1_immediates(buffer);
|
|
}
|
|
break;
|
|
case 2:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
pvr_write_draw_indirect_elements_base_instance_drawid2_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid2_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride(
|
|
buffer,
|
|
program->index_stride);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base(
|
|
buffer,
|
|
program->index_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header(
|
|
buffer,
|
|
program->index_block_header);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid2_immediates(
|
|
buffer);
|
|
} else {
|
|
pvr_write_draw_indirect_elements_base_instance2_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_elements_base_instance2_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance2_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance2_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_elements_base_instance2_idx_stride(
|
|
buffer,
|
|
program->index_stride);
|
|
pvr_write_draw_indirect_elements_base_instance2_idx_base(
|
|
buffer,
|
|
program->index_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance2_idx_header(
|
|
buffer,
|
|
program->index_block_header);
|
|
pvr_write_draw_indirect_elements_base_instance2_immediates(
|
|
buffer);
|
|
}
|
|
} else {
|
|
pvr_write_draw_indirect_elements2_di_data(buffer,
|
|
program->arg_buffer &
|
|
~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_elements2_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements2_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements2_num_views(buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_elements2_idx_stride(buffer,
|
|
program->index_stride);
|
|
pvr_write_draw_indirect_elements2_idx_base(buffer,
|
|
program->index_buffer);
|
|
pvr_write_draw_indirect_elements2_idx_header(
|
|
buffer,
|
|
program->index_block_header);
|
|
pvr_write_draw_indirect_elements2_immediates(buffer);
|
|
}
|
|
break;
|
|
case 3:
|
|
if (program->support_base_instance) {
|
|
if (program->increment_draw_id) {
|
|
pvr_write_draw_indirect_elements_base_instance_drawid3_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid3_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride(
|
|
buffer,
|
|
program->index_stride);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base(
|
|
buffer,
|
|
program->index_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header(
|
|
buffer,
|
|
program->index_block_header);
|
|
pvr_write_draw_indirect_elements_base_instance_drawid3_immediates(
|
|
buffer);
|
|
} else {
|
|
pvr_write_draw_indirect_elements_base_instance3_di_data(
|
|
buffer,
|
|
program->arg_buffer & ~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_elements_base_instance3_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance3_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance3_num_views(
|
|
buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_elements_base_instance3_idx_stride(
|
|
buffer,
|
|
program->index_stride);
|
|
pvr_write_draw_indirect_elements_base_instance3_idx_base(
|
|
buffer,
|
|
program->index_buffer);
|
|
pvr_write_draw_indirect_elements_base_instance3_idx_header(
|
|
buffer,
|
|
program->index_block_header);
|
|
pvr_write_draw_indirect_elements_base_instance3_immediates(
|
|
buffer);
|
|
}
|
|
} else {
|
|
pvr_write_draw_indirect_elements3_di_data(buffer,
|
|
program->arg_buffer &
|
|
~0xfull,
|
|
dev_info);
|
|
pvr_write_draw_indirect_elements3_write_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements3_flush_vdm(
|
|
buffer,
|
|
program->index_list_addr_buffer);
|
|
pvr_write_draw_indirect_elements3_num_views(buffer,
|
|
program->num_views);
|
|
pvr_write_draw_indirect_elements3_idx_stride(buffer,
|
|
program->index_stride);
|
|
pvr_write_draw_indirect_elements3_idx_base(buffer,
|
|
program->index_buffer);
|
|
pvr_write_draw_indirect_elements3_idx_header(
|
|
buffer,
|
|
program->index_block_header);
|
|
pvr_write_draw_indirect_elements3_immediates(buffer);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|